]> git.cameronkatri.com Git - mandoc.git/blob - mandoc.c
Churn as I finish email address migration kth.se -> bsd.lv.
[mandoc.git] / mandoc.c
1 /* $Id: mandoc.c,v 1.19 2010/06/19 20:46:28 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <sys/types.h>
22
23 #include <assert.h>
24 #include <ctype.h>
25 #include <stdlib.h>
26 #include <stdio.h>
27 #include <string.h>
28 #include <time.h>
29
30 #include "mandoc.h"
31 #include "libmandoc.h"
32
33 static int a2time(time_t *, const char *, const char *);
34 static int spec_norm(char *, int);
35
36
37 /*
38 * "Normalise" a special string by converting its ASCII_HYPH entries
39 * into actual hyphens.
40 */
41 static int
42 spec_norm(char *p, int sz)
43 {
44 int i;
45
46 for (i = 0; i < sz; i++)
47 if (ASCII_HYPH == p[i])
48 p[i] = '-';
49
50 return(sz);
51 }
52
53
54 int
55 mandoc_special(char *p)
56 {
57 int terminator; /* Terminator for \s. */
58 int lim; /* Limit for N in \s. */
59 int c, i;
60 char *sv;
61
62 sv = p;
63
64 if ('\\' != *p++)
65 return(spec_norm(sv, 0));
66
67 switch (*p) {
68 case ('\''):
69 /* FALLTHROUGH */
70 case ('`'):
71 /* FALLTHROUGH */
72 case ('q'):
73 /* FALLTHROUGH */
74 case (ASCII_HYPH):
75 /* FALLTHROUGH */
76 case ('-'):
77 /* FALLTHROUGH */
78 case ('~'):
79 /* FALLTHROUGH */
80 case ('^'):
81 /* FALLTHROUGH */
82 case ('%'):
83 /* FALLTHROUGH */
84 case ('0'):
85 /* FALLTHROUGH */
86 case (' '):
87 /* FALLTHROUGH */
88 case ('}'):
89 /* FALLTHROUGH */
90 case ('|'):
91 /* FALLTHROUGH */
92 case ('&'):
93 /* FALLTHROUGH */
94 case ('.'):
95 /* FALLTHROUGH */
96 case (':'):
97 /* FALLTHROUGH */
98 case ('c'):
99 /* FALLTHROUGH */
100 case ('e'):
101 return(spec_norm(sv, 2));
102 case ('s'):
103 if ('\0' == *++p)
104 return(spec_norm(sv, 2));
105
106 c = 2;
107 terminator = 0;
108 lim = 1;
109
110 if (*p == '\'') {
111 lim = 0;
112 terminator = 1;
113 ++p;
114 ++c;
115 } else if (*p == '[') {
116 lim = 0;
117 terminator = 2;
118 ++p;
119 ++c;
120 } else if (*p == '(') {
121 lim = 2;
122 terminator = 3;
123 ++p;
124 ++c;
125 }
126
127 if (*p == '+' || *p == '-') {
128 ++p;
129 ++c;
130 }
131
132 if (*p == '\'') {
133 if (terminator)
134 return(spec_norm(sv, 0));
135 lim = 0;
136 terminator = 1;
137 ++p;
138 ++c;
139 } else if (*p == '[') {
140 if (terminator)
141 return(spec_norm(sv, 0));
142 lim = 0;
143 terminator = 2;
144 ++p;
145 ++c;
146 } else if (*p == '(') {
147 if (terminator)
148 return(spec_norm(sv, 0));
149 lim = 2;
150 terminator = 3;
151 ++p;
152 ++c;
153 }
154
155 /* TODO: needs to handle floating point. */
156
157 if ( ! isdigit((u_char)*p))
158 return(spec_norm(sv, 0));
159
160 for (i = 0; isdigit((u_char)*p); i++) {
161 if (lim && i >= lim)
162 break;
163 ++p;
164 ++c;
165 }
166
167 if (terminator && terminator < 3) {
168 if (1 == terminator && *p != '\'')
169 return(spec_norm(sv, 0));
170 if (2 == terminator && *p != ']')
171 return(spec_norm(sv, 0));
172 ++p;
173 ++c;
174 }
175
176 return(spec_norm(sv, c));
177 case ('f'):
178 /* FALLTHROUGH */
179 case ('F'):
180 /* FALLTHROUGH */
181 case ('*'):
182 if ('\0' == *++p || isspace((u_char)*p))
183 return(spec_norm(sv, 0));
184 switch (*p) {
185 case ('('):
186 if ('\0' == *++p || isspace((u_char)*p))
187 return(spec_norm(sv, 0));
188 return(spec_norm(sv, 4));
189 case ('['):
190 for (c = 3, p++; *p && ']' != *p; p++, c++)
191 if (isspace((u_char)*p))
192 break;
193 return(spec_norm(sv, *p == ']' ? c : 0));
194 default:
195 break;
196 }
197 return(spec_norm(sv, 3));
198 case ('('):
199 if ('\0' == *++p || isspace((u_char)*p))
200 return(spec_norm(sv, 0));
201 if ('\0' == *++p || isspace((u_char)*p))
202 return(spec_norm(sv, 0));
203 return(spec_norm(sv, 4));
204 case ('['):
205 break;
206 default:
207 return(spec_norm(sv, 0));
208 }
209
210 for (c = 3, p++; *p && ']' != *p; p++, c++)
211 if (isspace((u_char)*p))
212 break;
213
214 return(spec_norm(sv, *p == ']' ? c : 0));
215 }
216
217
218 void *
219 mandoc_calloc(size_t num, size_t size)
220 {
221 void *ptr;
222
223 ptr = calloc(num, size);
224 if (NULL == ptr) {
225 perror(NULL);
226 exit(EXIT_FAILURE);
227 }
228
229 return(ptr);
230 }
231
232
233 void *
234 mandoc_malloc(size_t size)
235 {
236 void *ptr;
237
238 ptr = malloc(size);
239 if (NULL == ptr) {
240 perror(NULL);
241 exit(EXIT_FAILURE);
242 }
243
244 return(ptr);
245 }
246
247
248 void *
249 mandoc_realloc(void *ptr, size_t size)
250 {
251
252 ptr = realloc(ptr, size);
253 if (NULL == ptr) {
254 perror(NULL);
255 exit(EXIT_FAILURE);
256 }
257
258 return(ptr);
259 }
260
261
262 char *
263 mandoc_strdup(const char *ptr)
264 {
265 char *p;
266
267 p = strdup(ptr);
268 if (NULL == p) {
269 perror(NULL);
270 exit(EXIT_FAILURE);
271 }
272
273 return(p);
274 }
275
276
277 static int
278 a2time(time_t *t, const char *fmt, const char *p)
279 {
280 struct tm tm;
281 char *pp;
282
283 memset(&tm, 0, sizeof(struct tm));
284
285 pp = strptime(p, fmt, &tm);
286 if (NULL != pp && '\0' == *pp) {
287 *t = mktime(&tm);
288 return(1);
289 }
290
291 return(0);
292 }
293
294
295 /*
296 * Convert from a manual date string (see mdoc(7) and man(7)) into a
297 * date according to the stipulated date type.
298 */
299 time_t
300 mandoc_a2time(int flags, const char *p)
301 {
302 time_t t;
303
304 if (MTIME_MDOCDATE & flags) {
305 if (0 == strcmp(p, "$" "Mdocdate$"))
306 return(time(NULL));
307 if (a2time(&t, "$" "Mdocdate: %b %d %Y $", p))
308 return(t);
309 }
310
311 if (MTIME_CANONICAL & flags || MTIME_REDUCED & flags)
312 if (a2time(&t, "%b %d, %Y", p))
313 return(t);
314
315 if (MTIME_ISO_8601 & flags)
316 if (a2time(&t, "%Y-%m-%d", p))
317 return(t);
318
319 if (MTIME_REDUCED & flags) {
320 if (a2time(&t, "%d, %Y", p))
321 return(t);
322 if (a2time(&t, "%Y", p))
323 return(t);
324 }
325
326 return(0);
327 }
328
329
330 int
331 mandoc_eos(const char *p, size_t sz)
332 {
333
334 if (0 == sz)
335 return(0);
336
337 /*
338 * End-of-sentence recognition must include situations where
339 * some symbols, such as `)', allow prior EOS punctuation to
340 * propogate outward.
341 */
342
343 for ( ; sz; sz--) {
344 switch (p[(int)sz - 1]) {
345 case ('\"'):
346 /* FALLTHROUGH */
347 case ('\''):
348 /* FALLTHROUGH */
349 case (']'):
350 /* FALLTHROUGH */
351 case (')'):
352 break;
353 case ('.'):
354 /* Escaped periods. */
355 if (sz > 1 && '\\' == p[(int)sz - 2])
356 return(0);
357 /* FALLTHROUGH */
358 case ('!'):
359 /* FALLTHROUGH */
360 case ('?'):
361 return(1);
362 default:
363 return(0);
364 }
365 }
366
367 return(0);
368 }
369
370
371 int
372 mandoc_hyph(const char *start, const char *c)
373 {
374
375 /*
376 * Choose whether to break at a hyphenated character. We only
377 * do this if it's free-standing within a word.
378 */
379
380 /* Skip first/last character of buffer. */
381 if (c == start || '\0' == *(c + 1))
382 return(0);
383 /* Skip first/last character of word. */
384 if ('\t' == *(c + 1) || '\t' == *(c - 1))
385 return(0);
386 if (' ' == *(c + 1) || ' ' == *(c - 1))
387 return(0);
388 /* Skip double invocations. */
389 if ('-' == *(c + 1) || '-' == *(c - 1))
390 return(0);
391 /* Skip escapes. */
392 if ('\\' == *(c - 1))
393 return(0);
394
395 return(1);
396 }