]> git.cameronkatri.com Git - mandoc.git/blob - mdoc.c
Merging patch by Ingo Schwarze.
[mandoc.git] / mdoc.c
1 /* $Id: mdoc.c,v 1.119 2010/04/03 13:02:35 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <sys/types.h>
22
23 #include <assert.h>
24 #include <ctype.h>
25 #include <stdarg.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29
30 #include "libmdoc.h"
31 #include "libmandoc.h"
32
33 const char *const __mdoc_merrnames[MERRMAX] = {
34 "trailing whitespace", /* ETAILWS */
35 "unexpected quoted parameter", /* EQUOTPARM */
36 "unterminated quoted parameter", /* EQUOTTERM */
37 "argument parameter suggested", /* EARGVAL */
38 "macro disallowed in prologue", /* EBODYPROL */
39 "macro disallowed in body", /* EPROLBODY */
40 "text disallowed in prologue", /* ETEXTPROL */
41 "blank line disallowed", /* ENOBLANK */
42 "text parameter too long", /* ETOOLONG */
43 "invalid escape sequence", /* EESCAPE */
44 "invalid character", /* EPRINT */
45 "document has no body", /* ENODAT */
46 "document has no prologue", /* ENOPROLOGUE */
47 "expected line arguments", /* ELINE */
48 "invalid AT&T argument", /* EATT */
49 "default name not yet set", /* ENAME */
50 "missing list type", /* ELISTTYPE */
51 "missing display type", /* EDISPTYPE */
52 "too many display types", /* EMULTIDISP */
53 "too many list types", /* EMULTILIST */
54 "NAME section must be first", /* ESECNAME */
55 "badly-formed NAME section", /* ENAMESECINC */
56 "argument repeated", /* EARGREP */
57 "expected boolean parameter", /* EBOOL */
58 "inconsistent column syntax", /* ECOLMIS */
59 "nested display invalid", /* ENESTDISP */
60 "width argument missing", /* EMISSWIDTH */
61 "invalid section for this manual section", /* EWRONGMSEC */
62 "section out of conventional order", /* ESECOOO */
63 "section repeated", /* ESECREP */
64 "invalid standard argument", /* EBADSTAND */
65 "multi-line arguments discouraged", /* ENOMULTILINE */
66 "multi-line arguments suggested", /* EMULTILINE */
67 "line arguments discouraged", /* ENOLINE */
68 "prologue macro out of conventional order", /* EPROLOOO */
69 "prologue macro repeated", /* EPROLREP */
70 "invalid manual section", /* EBADMSEC */
71 "invalid section", /* EBADSEC */
72 "invalid font mode", /* EFONT */
73 "invalid date syntax", /* EBADDATE */
74 "invalid number format", /* ENUMFMT */
75 "superfluous width argument", /* ENOWIDTH */
76 "system: utsname error", /* EUTSNAME */
77 "obsolete macro", /* EOBS */
78 "end-of-line scope violation", /* EIMPBRK */
79 "empty macro ignored", /* EIGNE */
80 "unclosed explicit scope", /* EOPEN */
81 "unterminated quoted phrase", /* EQUOTPHR */
82 "closure macro without prior context", /* ENOCTX */
83 "no description found for library", /* ELIB */
84 "bad child for parent context", /* EBADCHILD */
85 "list arguments preceding type", /* ENOTYPE */
86 };
87
88 const char *const __mdoc_macronames[MDOC_MAX] = {
89 "Ap", "Dd", "Dt", "Os",
90 "Sh", "Ss", "Pp", "D1",
91 "Dl", "Bd", "Ed", "Bl",
92 "El", "It", "Ad", "An",
93 "Ar", "Cd", "Cm", "Dv",
94 "Er", "Ev", "Ex", "Fa",
95 "Fd", "Fl", "Fn", "Ft",
96 "Ic", "In", "Li", "Nd",
97 "Nm", "Op", "Ot", "Pa",
98 "Rv", "St", "Va", "Vt",
99 /* LINTED */
100 "Xr", "%A", "%B", "%D",
101 /* LINTED */
102 "%I", "%J", "%N", "%O",
103 /* LINTED */
104 "%P", "%R", "%T", "%V",
105 "Ac", "Ao", "Aq", "At",
106 "Bc", "Bf", "Bo", "Bq",
107 "Bsx", "Bx", "Db", "Dc",
108 "Do", "Dq", "Ec", "Ef",
109 "Em", "Eo", "Fx", "Ms",
110 "No", "Ns", "Nx", "Ox",
111 "Pc", "Pf", "Po", "Pq",
112 "Qc", "Ql", "Qo", "Qq",
113 "Re", "Rs", "Sc", "So",
114 "Sq", "Sm", "Sx", "Sy",
115 "Tn", "Ux", "Xc", "Xo",
116 "Fo", "Fc", "Oo", "Oc",
117 "Bk", "Ek", "Bt", "Hf",
118 "Fr", "Ud", "Lb", "Lp",
119 "Lk", "Mt", "Brq", "Bro",
120 /* LINTED */
121 "Brc", "%C", "Es", "En",
122 /* LINTED */
123 "Dx", "%Q", "br", "sp",
124 /* LINTED */
125 "%U"
126 };
127
128 const char *const __mdoc_argnames[MDOC_ARG_MAX] = {
129 "split", "nosplit", "ragged",
130 "unfilled", "literal", "file",
131 "offset", "bullet", "dash",
132 "hyphen", "item", "enum",
133 "tag", "diag", "hang",
134 "ohang", "inset", "column",
135 "width", "compact", "std",
136 "filled", "words", "emphasis",
137 "symbolic", "nested", "centered"
138 };
139
140 const char * const *mdoc_macronames = __mdoc_macronames;
141 const char * const *mdoc_argnames = __mdoc_argnames;
142
143 static void mdoc_free1(struct mdoc *);
144 static void mdoc_alloc1(struct mdoc *);
145 static struct mdoc_node *node_alloc(struct mdoc *, int, int,
146 enum mdoct, enum mdoc_type);
147 static int node_append(struct mdoc *,
148 struct mdoc_node *);
149 static int parsetext(struct mdoc *, int, char *);
150 static int parsemacro(struct mdoc *, int, char *);
151 static int macrowarn(struct mdoc *, int, const char *);
152 static int pstring(struct mdoc *, int, int,
153 const char *, size_t);
154
155 const struct mdoc_node *
156 mdoc_node(const struct mdoc *m)
157 {
158
159 return(MDOC_HALT & m->flags ? NULL : m->first);
160 }
161
162
163 const struct mdoc_meta *
164 mdoc_meta(const struct mdoc *m)
165 {
166
167 return(MDOC_HALT & m->flags ? NULL : &m->meta);
168 }
169
170
171 /*
172 * Frees volatile resources (parse tree, meta-data, fields).
173 */
174 static void
175 mdoc_free1(struct mdoc *mdoc)
176 {
177
178 if (mdoc->first)
179 mdoc_node_freelist(mdoc->first);
180 if (mdoc->meta.title)
181 free(mdoc->meta.title);
182 if (mdoc->meta.os)
183 free(mdoc->meta.os);
184 if (mdoc->meta.name)
185 free(mdoc->meta.name);
186 if (mdoc->meta.arch)
187 free(mdoc->meta.arch);
188 if (mdoc->meta.vol)
189 free(mdoc->meta.vol);
190 }
191
192
193 /*
194 * Allocate all volatile resources (parse tree, meta-data, fields).
195 */
196 static void
197 mdoc_alloc1(struct mdoc *mdoc)
198 {
199
200 memset(&mdoc->meta, 0, sizeof(struct mdoc_meta));
201 mdoc->flags = 0;
202 mdoc->lastnamed = mdoc->lastsec = SEC_NONE;
203 mdoc->last = mandoc_calloc(1, sizeof(struct mdoc_node));
204 mdoc->first = mdoc->last;
205 mdoc->last->type = MDOC_ROOT;
206 mdoc->next = MDOC_NEXT_CHILD;
207 }
208
209
210 /*
211 * Free up volatile resources (see mdoc_free1()) then re-initialises the
212 * data with mdoc_alloc1(). After invocation, parse data has been reset
213 * and the parser is ready for re-invocation on a new tree; however,
214 * cross-parse non-volatile data is kept intact.
215 */
216 void
217 mdoc_reset(struct mdoc *mdoc)
218 {
219
220 mdoc_free1(mdoc);
221 mdoc_alloc1(mdoc);
222 }
223
224
225 /*
226 * Completely free up all volatile and non-volatile parse resources.
227 * After invocation, the pointer is no longer usable.
228 */
229 void
230 mdoc_free(struct mdoc *mdoc)
231 {
232
233 mdoc_free1(mdoc);
234 free(mdoc);
235 }
236
237
238 /*
239 * Allocate volatile and non-volatile parse resources.
240 */
241 struct mdoc *
242 mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb)
243 {
244 struct mdoc *p;
245
246 p = mandoc_calloc(1, sizeof(struct mdoc));
247
248 if (cb)
249 memcpy(&p->cb, cb, sizeof(struct mdoc_cb));
250
251 p->data = data;
252 p->pflags = pflags;
253
254 mdoc_hash_init();
255 mdoc_alloc1(p);
256 return(p);
257 }
258
259
260 /*
261 * Climb back up the parse tree, validating open scopes. Mostly calls
262 * through to macro_end() in macro.c.
263 */
264 int
265 mdoc_endparse(struct mdoc *m)
266 {
267
268 if (MDOC_HALT & m->flags)
269 return(0);
270 else if (mdoc_macroend(m))
271 return(1);
272 m->flags |= MDOC_HALT;
273 return(0);
274 }
275
276
277 /*
278 * Main parse routine. Parses a single line -- really just hands off to
279 * the macro (parsemacro()) or text parser (parsetext()).
280 */
281 int
282 mdoc_parseln(struct mdoc *m, int ln, char *buf)
283 {
284
285 if (MDOC_HALT & m->flags)
286 return(0);
287
288 return('.' == *buf ? parsemacro(m, ln, buf) :
289 parsetext(m, ln, buf));
290 }
291
292
293 int
294 mdoc_verr(struct mdoc *mdoc, int ln, int pos,
295 const char *fmt, ...)
296 {
297 char buf[256];
298 va_list ap;
299
300 if (NULL == mdoc->cb.mdoc_err)
301 return(0);
302
303 va_start(ap, fmt);
304 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
305 va_end(ap);
306
307 return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf));
308 }
309
310
311 int
312 mdoc_vwarn(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...)
313 {
314 char buf[256];
315 va_list ap;
316
317 if (NULL == mdoc->cb.mdoc_warn)
318 return(0);
319
320 va_start(ap, fmt);
321 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
322 va_end(ap);
323
324 return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, buf));
325 }
326
327
328 int
329 mdoc_err(struct mdoc *m, int line, int pos, int iserr, enum merr type)
330 {
331 const char *p;
332
333 p = __mdoc_merrnames[(int)type];
334 assert(p);
335
336 if (iserr)
337 return(mdoc_verr(m, line, pos, p));
338
339 return(mdoc_vwarn(m, line, pos, p));
340 }
341
342
343 int
344 mdoc_macro(struct mdoc *m, enum mdoct tok,
345 int ln, int pp, int *pos, char *buf)
346 {
347
348 assert(tok < MDOC_MAX);
349 /*
350 * If we're in the prologue, deny "body" macros. Similarly, if
351 * we're in the body, deny prologue calls.
352 */
353 if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
354 MDOC_PBODY & m->flags)
355 return(mdoc_perr(m, ln, pp, EPROLBODY));
356 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
357 ! (MDOC_PBODY & m->flags))
358 return(mdoc_perr(m, ln, pp, EBODYPROL));
359
360 return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf));
361 }
362
363
364 static int
365 node_append(struct mdoc *mdoc, struct mdoc_node *p)
366 {
367
368 assert(mdoc->last);
369 assert(mdoc->first);
370 assert(MDOC_ROOT != p->type);
371
372 switch (mdoc->next) {
373 case (MDOC_NEXT_SIBLING):
374 mdoc->last->next = p;
375 p->prev = mdoc->last;
376 p->parent = mdoc->last->parent;
377 break;
378 case (MDOC_NEXT_CHILD):
379 mdoc->last->child = p;
380 p->parent = mdoc->last;
381 break;
382 default:
383 abort();
384 /* NOTREACHED */
385 }
386
387 p->parent->nchild++;
388
389 if ( ! mdoc_valid_pre(mdoc, p))
390 return(0);
391 if ( ! mdoc_action_pre(mdoc, p))
392 return(0);
393
394 switch (p->type) {
395 case (MDOC_HEAD):
396 assert(MDOC_BLOCK == p->parent->type);
397 p->parent->head = p;
398 break;
399 case (MDOC_TAIL):
400 assert(MDOC_BLOCK == p->parent->type);
401 p->parent->tail = p;
402 break;
403 case (MDOC_BODY):
404 assert(MDOC_BLOCK == p->parent->type);
405 p->parent->body = p;
406 break;
407 default:
408 break;
409 }
410
411 mdoc->last = p;
412
413 switch (p->type) {
414 case (MDOC_TEXT):
415 if ( ! mdoc_valid_post(mdoc))
416 return(0);
417 if ( ! mdoc_action_post(mdoc))
418 return(0);
419 break;
420 default:
421 break;
422 }
423
424 return(1);
425 }
426
427
428 static struct mdoc_node *
429 node_alloc(struct mdoc *m, int line, int pos,
430 enum mdoct tok, enum mdoc_type type)
431 {
432 struct mdoc_node *p;
433
434 p = mandoc_calloc(1, sizeof(struct mdoc_node));
435 p->sec = m->lastsec;
436 p->line = line;
437 p->pos = pos;
438 p->tok = tok;
439 p->type = type;
440
441 return(p);
442 }
443
444
445 int
446 mdoc_tail_alloc(struct mdoc *m, int line, int pos, enum mdoct tok)
447 {
448 struct mdoc_node *p;
449
450 p = node_alloc(m, line, pos, tok, MDOC_TAIL);
451 if ( ! node_append(m, p))
452 return(0);
453 m->next = MDOC_NEXT_CHILD;
454 return(1);
455 }
456
457
458 int
459 mdoc_head_alloc(struct mdoc *m, int line, int pos, enum mdoct tok)
460 {
461 struct mdoc_node *p;
462
463 assert(m->first);
464 assert(m->last);
465
466 p = node_alloc(m, line, pos, tok, MDOC_HEAD);
467 if ( ! node_append(m, p))
468 return(0);
469 m->next = MDOC_NEXT_CHILD;
470 return(1);
471 }
472
473
474 int
475 mdoc_body_alloc(struct mdoc *m, int line, int pos, enum mdoct tok)
476 {
477 struct mdoc_node *p;
478
479 p = node_alloc(m, line, pos, tok, MDOC_BODY);
480 if ( ! node_append(m, p))
481 return(0);
482 m->next = MDOC_NEXT_CHILD;
483 return(1);
484 }
485
486
487 int
488 mdoc_block_alloc(struct mdoc *m, int line, int pos,
489 enum mdoct tok, struct mdoc_arg *args)
490 {
491 struct mdoc_node *p;
492
493 p = node_alloc(m, line, pos, tok, MDOC_BLOCK);
494 p->args = args;
495 if (p->args)
496 (args->refcnt)++;
497 if ( ! node_append(m, p))
498 return(0);
499 m->next = MDOC_NEXT_CHILD;
500 return(1);
501 }
502
503
504 int
505 mdoc_elem_alloc(struct mdoc *m, int line, int pos,
506 enum mdoct tok, struct mdoc_arg *args)
507 {
508 struct mdoc_node *p;
509
510 p = node_alloc(m, line, pos, tok, MDOC_ELEM);
511 p->args = args;
512 if (p->args)
513 (args->refcnt)++;
514 if ( ! node_append(m, p))
515 return(0);
516 m->next = MDOC_NEXT_CHILD;
517 return(1);
518 }
519
520
521 static int
522 pstring(struct mdoc *m, int line, int pos, const char *p, size_t len)
523 {
524 struct mdoc_node *n;
525 size_t sv;
526
527 n = node_alloc(m, line, pos, -1, MDOC_TEXT);
528 n->string = mandoc_malloc(len + 1);
529 sv = strlcpy(n->string, p, len + 1);
530
531 /* Prohibit truncation. */
532 assert(sv < len + 1);
533
534 if ( ! node_append(m, n))
535 return(0);
536 m->next = MDOC_NEXT_SIBLING;
537 return(1);
538 }
539
540
541 int
542 mdoc_word_alloc(struct mdoc *m, int line, int pos, const char *p)
543 {
544
545 return(pstring(m, line, pos, p, strlen(p)));
546 }
547
548
549 void
550 mdoc_node_free(struct mdoc_node *p)
551 {
552
553 if (p->parent)
554 p->parent->nchild--;
555 if (p->string)
556 free(p->string);
557 if (p->args)
558 mdoc_argv_free(p->args);
559 free(p);
560 }
561
562
563 void
564 mdoc_node_freelist(struct mdoc_node *p)
565 {
566
567 if (p->child)
568 mdoc_node_freelist(p->child);
569 if (p->next)
570 mdoc_node_freelist(p->next);
571
572 assert(0 == p->nchild);
573 mdoc_node_free(p);
574 }
575
576
577 /*
578 * Parse free-form text, that is, a line that does not begin with the
579 * control character.
580 */
581 static int
582 parsetext(struct mdoc *m, int line, char *buf)
583 {
584 int i, j;
585 char sv;
586
587 if (SEC_NONE == m->lastnamed)
588 return(mdoc_perr(m, line, 0, ETEXTPROL));
589
590 /*
591 * If in literal mode, then pass the buffer directly to the
592 * back-end, as it should be preserved as a single term.
593 */
594
595 if (MDOC_LITERAL & m->flags)
596 return(mdoc_word_alloc(m, line, 0, buf));
597
598 /* Disallow blank/white-space lines in non-literal mode. */
599
600 for (i = 0; ' ' == buf[i]; i++)
601 /* Skip leading whitespace. */ ;
602
603 if ('\0' == buf[i]) {
604 if ( ! mdoc_pwarn(m, line, 0, ENOBLANK))
605 return(0);
606 /*
607 * Assume that a `Pp' should be inserted in the case of
608 * a blank line. Technically, blank lines aren't
609 * allowed, but enough manuals assume this behaviour
610 * that we want to work around it.
611 */
612 if ( ! mdoc_elem_alloc(m, line, 0, MDOC_Pp, NULL))
613 return(0);
614 }
615
616 /*
617 * Break apart a free-form line into tokens. Spaces are
618 * stripped out of the input.
619 */
620
621 for (j = i; buf[i]; i++) {
622 if (' ' != buf[i])
623 continue;
624
625 /* Escaped whitespace. */
626 if (i && ' ' == buf[i] && '\\' == buf[i - 1])
627 continue;
628
629 sv = buf[i];
630 buf[i++] = '\0';
631
632 if ( ! pstring(m, line, j, &buf[j], (size_t)(i - j)))
633 return(0);
634
635 /* Trailing whitespace? Check at overwritten byte. */
636
637 if (' ' == sv && '\0' == buf[i])
638 if ( ! mdoc_pwarn(m, line, i - 1, ETAILWS))
639 return(0);
640
641 for ( ; ' ' == buf[i]; i++)
642 /* Skip trailing whitespace. */ ;
643
644 j = i;
645
646 /* Trailing whitespace? */
647
648 if (' ' == buf[i - 1] && '\0' == buf[i])
649 if ( ! mdoc_pwarn(m, line, i - 1, ETAILWS))
650 return(0);
651
652 if ('\0' == buf[i])
653 break;
654 }
655
656 if (j != i && ! pstring(m, line, j, &buf[j], (size_t)(i - j)))
657 return(0);
658
659 m->next = MDOC_NEXT_SIBLING;
660 return(1);
661 }
662
663
664
665 static int
666 macrowarn(struct mdoc *m, int ln, const char *buf)
667 {
668 if ( ! (MDOC_IGN_MACRO & m->pflags))
669 return(mdoc_verr(m, ln, 0,
670 "unknown macro: %s%s",
671 buf, strlen(buf) > 3 ? "..." : ""));
672 return(mdoc_vwarn(m, ln, 0, "unknown macro: %s%s",
673 buf, strlen(buf) > 3 ? "..." : ""));
674 }
675
676
677 /*
678 * Parse a macro line, that is, a line beginning with the control
679 * character.
680 */
681 int
682 parsemacro(struct mdoc *m, int ln, char *buf)
683 {
684 int i, j, c;
685 char mac[5];
686
687 /* Empty lines are ignored. */
688
689 if ('\0' == buf[1])
690 return(1);
691
692 i = 1;
693
694 /* Accept whitespace after the initial control char. */
695
696 if (' ' == buf[i]) {
697 i++;
698 while (buf[i] && ' ' == buf[i])
699 i++;
700 if ('\0' == buf[i])
701 return(1);
702 }
703
704 /* Copy the first word into a nil-terminated buffer. */
705
706 for (j = 0; j < 4; j++, i++) {
707 if ('\0' == (mac[j] = buf[i]))
708 break;
709 else if (' ' == buf[i])
710 break;
711
712 /* Check for invalid characters. */
713
714 if (isgraph((u_char)buf[i]))
715 continue;
716 return(mdoc_perr(m, ln, i, EPRINT));
717 }
718
719 mac[j] = 0;
720
721 if (j == 4 || j < 2) {
722 if ( ! macrowarn(m, ln, mac))
723 goto err;
724 return(1);
725 }
726
727 if (MDOC_MAX == (c = mdoc_hash_find(mac))) {
728 if ( ! macrowarn(m, ln, mac))
729 goto err;
730 return(1);
731 }
732
733 /* The macro is sane. Jump to the next word. */
734
735 while (buf[i] && ' ' == buf[i])
736 i++;
737
738 /* Trailing whitespace? */
739
740 if ('\0' == buf[i] && ' ' == buf[i - 1])
741 if ( ! mdoc_pwarn(m, ln, i - 1, ETAILWS))
742 goto err;
743
744 /*
745 * Begin recursive parse sequence. Since we're at the start of
746 * the line, we don't need to do callable/parseable checks.
747 */
748 if ( ! mdoc_macro(m, c, ln, 1, &i, buf))
749 goto err;
750
751 return(1);
752
753 err: /* Error out. */
754
755 m->flags |= MDOC_HALT;
756 return(0);
757 }
758
759