]> git.cameronkatri.com Git - mandoc.git/blob - mandoc.c
When in a <PRE>, don't print out the <BR> before lines that have leading
[mandoc.git] / mandoc.c
1 /* $Id: mandoc.c,v 1.36 2011/01/03 22:42:37 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <sys/types.h>
23
24 #include <assert.h>
25 #include <ctype.h>
26 #include <stdlib.h>
27 #include <stdio.h>
28 #include <string.h>
29 #include <time.h>
30
31 #include "mandoc.h"
32 #include "libmandoc.h"
33
34 static int a2time(time_t *, const char *, const char *);
35
36
37 int
38 mandoc_special(char *p)
39 {
40 int len, i;
41 char term;
42 char *sv;
43
44 len = 0;
45 term = '\0';
46 sv = p;
47
48 assert('\\' == *p);
49 p++;
50
51 switch (*p++) {
52 #if 0
53 case ('Z'):
54 /* FALLTHROUGH */
55 case ('X'):
56 /* FALLTHROUGH */
57 case ('x'):
58 /* FALLTHROUGH */
59 case ('S'):
60 /* FALLTHROUGH */
61 case ('R'):
62 /* FALLTHROUGH */
63 case ('N'):
64 /* FALLTHROUGH */
65 case ('l'):
66 /* FALLTHROUGH */
67 case ('L'):
68 /* FALLTHROUGH */
69 case ('H'):
70 /* FALLTHROUGH */
71 case ('h'):
72 /* FALLTHROUGH */
73 case ('D'):
74 /* FALLTHROUGH */
75 case ('C'):
76 /* FALLTHROUGH */
77 case ('b'):
78 /* FALLTHROUGH */
79 case ('B'):
80 /* FALLTHROUGH */
81 case ('a'):
82 /* FALLTHROUGH */
83 case ('A'):
84 if (*p++ != '\'')
85 return(0);
86 term = '\'';
87 break;
88 #endif
89 case ('h'):
90 /* FALLTHROUGH */
91 case ('v'):
92 /* FALLTHROUGH */
93 case ('s'):
94 if (ASCII_HYPH == *p)
95 *p = '-';
96
97 i = 0;
98 if ('+' == *p || '-' == *p) {
99 p++;
100 i = 1;
101 }
102
103 switch (*p++) {
104 case ('('):
105 len = 2;
106 break;
107 case ('['):
108 term = ']';
109 break;
110 case ('\''):
111 term = '\'';
112 break;
113 case ('0'):
114 i = 1;
115 /* FALLTHROUGH */
116 default:
117 len = 1;
118 p--;
119 break;
120 }
121
122 if (ASCII_HYPH == *p)
123 *p = '-';
124 if ('+' == *p || '-' == *p) {
125 if (i)
126 return(0);
127 p++;
128 }
129
130 /* Handle embedded numerical subexp or escape. */
131
132 if ('(' == *p) {
133 while (*p && ')' != *p)
134 if ('\\' == *p++) {
135 i = mandoc_special(--p);
136 if (0 == i)
137 return(0);
138 p += i;
139 }
140
141 if (')' == *p++)
142 break;
143
144 return(0);
145 } else if ('\\' == *p) {
146 if (0 == (i = mandoc_special(p)))
147 return(0);
148 p += i;
149 }
150
151 break;
152 #if 0
153 case ('Y'):
154 /* FALLTHROUGH */
155 case ('V'):
156 /* FALLTHROUGH */
157 case ('$'):
158 /* FALLTHROUGH */
159 case ('n'):
160 /* FALLTHROUGH */
161 #endif
162 case ('k'):
163 /* FALLTHROUGH */
164 case ('M'):
165 /* FALLTHROUGH */
166 case ('m'):
167 /* FALLTHROUGH */
168 case ('f'):
169 /* FALLTHROUGH */
170 case ('F'):
171 /* FALLTHROUGH */
172 case ('*'):
173 switch (*p++) {
174 case ('('):
175 len = 2;
176 break;
177 case ('['):
178 term = ']';
179 break;
180 default:
181 len = 1;
182 p--;
183 break;
184 }
185 break;
186 case ('('):
187 len = 2;
188 break;
189 case ('['):
190 term = ']';
191 break;
192 case ('z'):
193 len = 1;
194 if ('\\' == *p) {
195 if (0 == (i = mandoc_special(p)))
196 return(0);
197 p += i;
198 return(*p ? (int)(p - sv) : 0);
199 }
200 break;
201 case ('o'):
202 /* FALLTHROUGH */
203 case ('w'):
204 if ('\'' == *p++) {
205 term = '\'';
206 break;
207 }
208 /* FALLTHROUGH */
209 default:
210 len = 1;
211 p--;
212 break;
213 }
214
215 if (term) {
216 for ( ; *p && term != *p; p++)
217 if (ASCII_HYPH == *p)
218 *p = '-';
219 return(*p ? (int)(p - sv) : 0);
220 }
221
222 for (i = 0; *p && i < len; i++, p++)
223 if (ASCII_HYPH == *p)
224 *p = '-';
225 return(i == len ? (int)(p - sv) : 0);
226 }
227
228
229 void *
230 mandoc_calloc(size_t num, size_t size)
231 {
232 void *ptr;
233
234 ptr = calloc(num, size);
235 if (NULL == ptr) {
236 perror(NULL);
237 exit((int)MANDOCLEVEL_SYSERR);
238 }
239
240 return(ptr);
241 }
242
243
244 void *
245 mandoc_malloc(size_t size)
246 {
247 void *ptr;
248
249 ptr = malloc(size);
250 if (NULL == ptr) {
251 perror(NULL);
252 exit((int)MANDOCLEVEL_SYSERR);
253 }
254
255 return(ptr);
256 }
257
258
259 void *
260 mandoc_realloc(void *ptr, size_t size)
261 {
262
263 ptr = realloc(ptr, size);
264 if (NULL == ptr) {
265 perror(NULL);
266 exit((int)MANDOCLEVEL_SYSERR);
267 }
268
269 return(ptr);
270 }
271
272
273 char *
274 mandoc_strdup(const char *ptr)
275 {
276 char *p;
277
278 p = strdup(ptr);
279 if (NULL == p) {
280 perror(NULL);
281 exit((int)MANDOCLEVEL_SYSERR);
282 }
283
284 return(p);
285 }
286
287 /*
288 * Parse a quoted or unquoted roff-style request or macro argument.
289 * Return a pointer to the parsed argument, which is either the original
290 * pointer or advanced by one byte in case the argument is quoted.
291 * Null-terminate the argument in place.
292 * Collapse pairs of quotes inside quoted arguments.
293 * Advance the argument pointer to the next argument,
294 * or to the null byte terminating the argument line.
295 */
296 char *
297 mandoc_getarg(char **cpp, mandocmsg msg, void *data, int ln, int *pos)
298 {
299 char *start, *cp;
300 int quoted, pairs, white;
301
302 /* Quoting can only start with a new word. */
303 start = *cpp;
304 if ('"' == *start) {
305 quoted = 1;
306 start++;
307 } else
308 quoted = 0;
309
310 pairs = 0;
311 white = 0;
312 for (cp = start; '\0' != *cp; cp++) {
313 /* Move left after quoted quotes and escaped backslashes. */
314 if (pairs)
315 cp[-pairs] = cp[0];
316 if ('\\' == cp[0]) {
317 if ('\\' == cp[1]) {
318 /* Poor man's copy mode. */
319 pairs++;
320 cp++;
321 } else if (0 == quoted && ' ' == cp[1])
322 /* Skip escaped blanks. */
323 cp++;
324 } else if (0 == quoted) {
325 if (' ' == cp[0]) {
326 /* Unescaped blanks end unquoted args. */
327 white = 1;
328 break;
329 }
330 } else if ('"' == cp[0]) {
331 if ('"' == cp[1]) {
332 /* Quoted quotes collapse. */
333 pairs++;
334 cp++;
335 } else {
336 /* Unquoted quotes end quoted args. */
337 quoted = 2;
338 break;
339 }
340 }
341 }
342
343 /* Quoted argument without a closing quote. */
344 if (1 == quoted && msg)
345 (*msg)(MANDOCERR_BADQUOTE, data, ln, *pos, NULL);
346
347 /* Null-terminate this argument and move to the next one. */
348 if (pairs)
349 cp[-pairs] = '\0';
350 if ('\0' != *cp) {
351 *cp++ = '\0';
352 while (' ' == *cp)
353 cp++;
354 }
355 *pos += (cp - start) + (quoted ? 1 : 0);
356 *cpp = cp;
357
358 if ('\0' == *cp && msg && (white || ' ' == cp[-1]))
359 (*msg)(MANDOCERR_EOLNSPACE, data, ln, *pos, NULL);
360
361 return(start);
362 }
363
364
365 static int
366 a2time(time_t *t, const char *fmt, const char *p)
367 {
368 struct tm tm;
369 char *pp;
370
371 memset(&tm, 0, sizeof(struct tm));
372
373 pp = strptime(p, fmt, &tm);
374 if (NULL != pp && '\0' == *pp) {
375 *t = mktime(&tm);
376 return(1);
377 }
378
379 return(0);
380 }
381
382
383 /*
384 * Convert from a manual date string (see mdoc(7) and man(7)) into a
385 * date according to the stipulated date type.
386 */
387 time_t
388 mandoc_a2time(int flags, const char *p)
389 {
390 time_t t;
391
392 if (MTIME_MDOCDATE & flags) {
393 if (0 == strcmp(p, "$" "Mdocdate$"))
394 return(time(NULL));
395 if (a2time(&t, "$" "Mdocdate: %b %d %Y $", p))
396 return(t);
397 }
398
399 if (MTIME_CANONICAL & flags || MTIME_REDUCED & flags)
400 if (a2time(&t, "%b %d, %Y", p))
401 return(t);
402
403 if (MTIME_ISO_8601 & flags)
404 if (a2time(&t, "%Y-%m-%d", p))
405 return(t);
406
407 if (MTIME_REDUCED & flags) {
408 if (a2time(&t, "%d, %Y", p))
409 return(t);
410 if (a2time(&t, "%Y", p))
411 return(t);
412 }
413
414 return(0);
415 }
416
417
418 int
419 mandoc_eos(const char *p, size_t sz, int enclosed)
420 {
421 const char *q;
422 int found;
423
424 if (0 == sz)
425 return(0);
426
427 /*
428 * End-of-sentence recognition must include situations where
429 * some symbols, such as `)', allow prior EOS punctuation to
430 * propogate outward.
431 */
432
433 found = 0;
434 for (q = p + (int)sz - 1; q >= p; q--) {
435 switch (*q) {
436 case ('\"'):
437 /* FALLTHROUGH */
438 case ('\''):
439 /* FALLTHROUGH */
440 case (']'):
441 /* FALLTHROUGH */
442 case (')'):
443 if (0 == found)
444 enclosed = 1;
445 break;
446 case ('.'):
447 /* FALLTHROUGH */
448 case ('!'):
449 /* FALLTHROUGH */
450 case ('?'):
451 found = 1;
452 break;
453 default:
454 return(found && (!enclosed || isalnum((unsigned char)*q)));
455 }
456 }
457
458 return(found && !enclosed);
459 }
460
461
462 int
463 mandoc_hyph(const char *start, const char *c)
464 {
465
466 /*
467 * Choose whether to break at a hyphenated character. We only
468 * do this if it's free-standing within a word.
469 */
470
471 /* Skip first/last character of buffer. */
472 if (c == start || '\0' == *(c + 1))
473 return(0);
474 /* Skip first/last character of word. */
475 if ('\t' == *(c + 1) || '\t' == *(c - 1))
476 return(0);
477 if (' ' == *(c + 1) || ' ' == *(c - 1))
478 return(0);
479 /* Skip double invocations. */
480 if ('-' == *(c + 1) || '-' == *(c - 1))
481 return(0);
482 /* Skip escapes. */
483 if ('\\' == *(c - 1))
484 return(0);
485
486 return(1);
487 }