]> git.cameronkatri.com Git - mandoc.git/blob - mdoc.c
Macro types enum-ated (enum mdoct) (for easier debugging in gdb of "tok" values).
[mandoc.git] / mdoc.c
1 /* $Id: mdoc.c,v 1.117 2010/03/31 07:13:53 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <sys/types.h>
22
23 #include <assert.h>
24 #include <ctype.h>
25 #include <stdarg.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29
30 #include "libmdoc.h"
31 #include "libmandoc.h"
32
33 const char *const __mdoc_merrnames[MERRMAX] = {
34 "trailing whitespace", /* ETAILWS */
35 "unexpected quoted parameter", /* EQUOTPARM */
36 "unterminated quoted parameter", /* EQUOTTERM */
37 "argument parameter suggested", /* EARGVAL */
38 "macro disallowed in prologue", /* EBODYPROL */
39 "macro disallowed in body", /* EPROLBODY */
40 "text disallowed in prologue", /* ETEXTPROL */
41 "blank line disallowed", /* ENOBLANK */
42 "text parameter too long", /* ETOOLONG */
43 "invalid escape sequence", /* EESCAPE */
44 "invalid character", /* EPRINT */
45 "document has no body", /* ENODAT */
46 "document has no prologue", /* ENOPROLOGUE */
47 "expected line arguments", /* ELINE */
48 "invalid AT&T argument", /* EATT */
49 "default name not yet set", /* ENAME */
50 "missing list type", /* ELISTTYPE */
51 "missing display type", /* EDISPTYPE */
52 "too many display types", /* EMULTIDISP */
53 "too many list types", /* EMULTILIST */
54 "NAME section must be first", /* ESECNAME */
55 "badly-formed NAME section", /* ENAMESECINC */
56 "argument repeated", /* EARGREP */
57 "expected boolean parameter", /* EBOOL */
58 "inconsistent column syntax", /* ECOLMIS */
59 "nested display invalid", /* ENESTDISP */
60 "width argument missing", /* EMISSWIDTH */
61 "invalid section for this manual section", /* EWRONGMSEC */
62 "section out of conventional order", /* ESECOOO */
63 "section repeated", /* ESECREP */
64 "invalid standard argument", /* EBADSTAND */
65 "multi-line arguments discouraged", /* ENOMULTILINE */
66 "multi-line arguments suggested", /* EMULTILINE */
67 "line arguments discouraged", /* ENOLINE */
68 "prologue macro out of conventional order", /* EPROLOOO */
69 "prologue macro repeated", /* EPROLREP */
70 "invalid manual section", /* EBADMSEC */
71 "invalid section", /* EBADSEC */
72 "invalid font mode", /* EFONT */
73 "invalid date syntax", /* EBADDATE */
74 "invalid number format", /* ENUMFMT */
75 "superfluous width argument", /* ENOWIDTH */
76 "system: utsname error", /* EUTSNAME */
77 "obsolete macro", /* EOBS */
78 "end-of-line scope violation", /* EIMPBRK */
79 "empty macro ignored", /* EIGNE */
80 "unclosed explicit scope", /* EOPEN */
81 "unterminated quoted phrase", /* EQUOTPHR */
82 "closure macro without prior context", /* ENOCTX */
83 "no description found for library", /* ELIB */
84 "bad child for parent context", /* EBADCHILD */
85 "list arguments preceding type", /* ENOTYPE */
86 };
87
88 const char *const __mdoc_macronames[MDOC_MAX] = {
89 "Ap", "Dd", "Dt", "Os",
90 "Sh", "Ss", "Pp", "D1",
91 "Dl", "Bd", "Ed", "Bl",
92 "El", "It", "Ad", "An",
93 "Ar", "Cd", "Cm", "Dv",
94 "Er", "Ev", "Ex", "Fa",
95 "Fd", "Fl", "Fn", "Ft",
96 "Ic", "In", "Li", "Nd",
97 "Nm", "Op", "Ot", "Pa",
98 "Rv", "St", "Va", "Vt",
99 /* LINTED */
100 "Xr", "%A", "%B", "%D",
101 /* LINTED */
102 "%I", "%J", "%N", "%O",
103 /* LINTED */
104 "%P", "%R", "%T", "%V",
105 "Ac", "Ao", "Aq", "At",
106 "Bc", "Bf", "Bo", "Bq",
107 "Bsx", "Bx", "Db", "Dc",
108 "Do", "Dq", "Ec", "Ef",
109 "Em", "Eo", "Fx", "Ms",
110 "No", "Ns", "Nx", "Ox",
111 "Pc", "Pf", "Po", "Pq",
112 "Qc", "Ql", "Qo", "Qq",
113 "Re", "Rs", "Sc", "So",
114 "Sq", "Sm", "Sx", "Sy",
115 "Tn", "Ux", "Xc", "Xo",
116 "Fo", "Fc", "Oo", "Oc",
117 "Bk", "Ek", "Bt", "Hf",
118 "Fr", "Ud", "Lb", "Lp",
119 "Lk", "Mt", "Brq", "Bro",
120 /* LINTED */
121 "Brc", "%C", "Es", "En",
122 /* LINTED */
123 "Dx", "%Q", "br", "sp",
124 /* LINTED */
125 "%U"
126 };
127
128 const char *const __mdoc_argnames[MDOC_ARG_MAX] = {
129 "split", "nosplit", "ragged",
130 "unfilled", "literal", "file",
131 "offset", "bullet", "dash",
132 "hyphen", "item", "enum",
133 "tag", "diag", "hang",
134 "ohang", "inset", "column",
135 "width", "compact", "std",
136 "filled", "words", "emphasis",
137 "symbolic", "nested", "centered"
138 };
139
140 const char * const *mdoc_macronames = __mdoc_macronames;
141 const char * const *mdoc_argnames = __mdoc_argnames;
142
143 static void mdoc_free1(struct mdoc *);
144 static void mdoc_alloc1(struct mdoc *);
145 static struct mdoc_node *node_alloc(struct mdoc *, int, int,
146 enum mdoct, enum mdoc_type);
147 static int node_append(struct mdoc *,
148 struct mdoc_node *);
149 static int parsetext(struct mdoc *, int, char *);
150 static int parsemacro(struct mdoc *, int, char *);
151 static int macrowarn(struct mdoc *, int, const char *);
152 static int pstring(struct mdoc *, int, int,
153 const char *, size_t);
154
155 const struct mdoc_node *
156 mdoc_node(const struct mdoc *m)
157 {
158
159 return(MDOC_HALT & m->flags ? NULL : m->first);
160 }
161
162
163 const struct mdoc_meta *
164 mdoc_meta(const struct mdoc *m)
165 {
166
167 return(MDOC_HALT & m->flags ? NULL : &m->meta);
168 }
169
170
171 /*
172 * Frees volatile resources (parse tree, meta-data, fields).
173 */
174 static void
175 mdoc_free1(struct mdoc *mdoc)
176 {
177
178 if (mdoc->first)
179 mdoc_node_freelist(mdoc->first);
180 if (mdoc->meta.title)
181 free(mdoc->meta.title);
182 if (mdoc->meta.os)
183 free(mdoc->meta.os);
184 if (mdoc->meta.name)
185 free(mdoc->meta.name);
186 if (mdoc->meta.arch)
187 free(mdoc->meta.arch);
188 if (mdoc->meta.vol)
189 free(mdoc->meta.vol);
190 }
191
192
193 /*
194 * Allocate all volatile resources (parse tree, meta-data, fields).
195 */
196 static void
197 mdoc_alloc1(struct mdoc *mdoc)
198 {
199
200 memset(&mdoc->meta, 0, sizeof(struct mdoc_meta));
201 mdoc->flags = 0;
202 mdoc->lastnamed = mdoc->lastsec = SEC_NONE;
203 mdoc->last = mandoc_calloc(1, sizeof(struct mdoc_node));
204 mdoc->first = mdoc->last;
205 mdoc->last->type = MDOC_ROOT;
206 mdoc->next = MDOC_NEXT_CHILD;
207 }
208
209
210 /*
211 * Free up volatile resources (see mdoc_free1()) then re-initialises the
212 * data with mdoc_alloc1(). After invocation, parse data has been reset
213 * and the parser is ready for re-invocation on a new tree; however,
214 * cross-parse non-volatile data is kept intact.
215 */
216 void
217 mdoc_reset(struct mdoc *mdoc)
218 {
219
220 mdoc_free1(mdoc);
221 mdoc_alloc1(mdoc);
222 }
223
224
225 /*
226 * Completely free up all volatile and non-volatile parse resources.
227 * After invocation, the pointer is no longer usable.
228 */
229 void
230 mdoc_free(struct mdoc *mdoc)
231 {
232
233 mdoc_free1(mdoc);
234 free(mdoc);
235 }
236
237
238 /*
239 * Allocate volatile and non-volatile parse resources.
240 */
241 struct mdoc *
242 mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb)
243 {
244 struct mdoc *p;
245
246 p = mandoc_calloc(1, sizeof(struct mdoc));
247
248 if (cb)
249 memcpy(&p->cb, cb, sizeof(struct mdoc_cb));
250
251 p->data = data;
252 p->pflags = pflags;
253
254 mdoc_hash_init();
255 mdoc_alloc1(p);
256 return(p);
257 }
258
259
260 /*
261 * Climb back up the parse tree, validating open scopes. Mostly calls
262 * through to macro_end() in macro.c.
263 */
264 int
265 mdoc_endparse(struct mdoc *m)
266 {
267
268 if (MDOC_HALT & m->flags)
269 return(0);
270 else if (mdoc_macroend(m))
271 return(1);
272 m->flags |= MDOC_HALT;
273 return(0);
274 }
275
276
277 /*
278 * Main parse routine. Parses a single line -- really just hands off to
279 * the macro (parsemacro()) or text parser (parsetext()).
280 */
281 int
282 mdoc_parseln(struct mdoc *m, int ln, char *buf)
283 {
284
285 if (MDOC_HALT & m->flags)
286 return(0);
287
288 return('.' == *buf ? parsemacro(m, ln, buf) :
289 parsetext(m, ln, buf));
290 }
291
292
293 int
294 mdoc_verr(struct mdoc *mdoc, int ln, int pos,
295 const char *fmt, ...)
296 {
297 char buf[256];
298 va_list ap;
299
300 if (NULL == mdoc->cb.mdoc_err)
301 return(0);
302
303 va_start(ap, fmt);
304 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
305 va_end(ap);
306
307 return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf));
308 }
309
310
311 int
312 mdoc_vwarn(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...)
313 {
314 char buf[256];
315 va_list ap;
316
317 if (NULL == mdoc->cb.mdoc_warn)
318 return(0);
319
320 va_start(ap, fmt);
321 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
322 va_end(ap);
323
324 return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, buf));
325 }
326
327
328 int
329 mdoc_err(struct mdoc *m, int line, int pos, int iserr, enum merr type)
330 {
331 const char *p;
332
333 p = __mdoc_merrnames[(int)type];
334 assert(p);
335
336 if (iserr)
337 return(mdoc_verr(m, line, pos, p));
338
339 return(mdoc_vwarn(m, line, pos, p));
340 }
341
342
343 int
344 mdoc_macro(struct mdoc *m, enum mdoct tok,
345 int ln, int pp, int *pos, char *buf)
346 {
347
348 assert(tok < MDOC_MAX);
349 /*
350 * If we're in the prologue, deny "body" macros. Similarly, if
351 * we're in the body, deny prologue calls.
352 */
353 if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
354 MDOC_PBODY & m->flags)
355 return(mdoc_perr(m, ln, pp, EPROLBODY));
356 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
357 ! (MDOC_PBODY & m->flags))
358 return(mdoc_perr(m, ln, pp, EBODYPROL));
359
360 return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf));
361 }
362
363
364 static int
365 node_append(struct mdoc *mdoc, struct mdoc_node *p)
366 {
367
368 assert(mdoc->last);
369 assert(mdoc->first);
370 assert(MDOC_ROOT != p->type);
371
372 switch (mdoc->next) {
373 case (MDOC_NEXT_SIBLING):
374 mdoc->last->next = p;
375 p->prev = mdoc->last;
376 p->parent = mdoc->last->parent;
377 break;
378 case (MDOC_NEXT_CHILD):
379 mdoc->last->child = p;
380 p->parent = mdoc->last;
381 break;
382 default:
383 abort();
384 /* NOTREACHED */
385 }
386
387 p->parent->nchild++;
388
389 if ( ! mdoc_valid_pre(mdoc, p))
390 return(0);
391 if ( ! mdoc_action_pre(mdoc, p))
392 return(0);
393
394 switch (p->type) {
395 case (MDOC_HEAD):
396 assert(MDOC_BLOCK == p->parent->type);
397 p->parent->head = p;
398 break;
399 case (MDOC_TAIL):
400 assert(MDOC_BLOCK == p->parent->type);
401 p->parent->tail = p;
402 break;
403 case (MDOC_BODY):
404 assert(MDOC_BLOCK == p->parent->type);
405 p->parent->body = p;
406 break;
407 default:
408 break;
409 }
410
411 mdoc->last = p;
412
413 switch (p->type) {
414 case (MDOC_TEXT):
415 if ( ! mdoc_valid_post(mdoc))
416 return(0);
417 if ( ! mdoc_action_post(mdoc))
418 return(0);
419 break;
420 default:
421 break;
422 }
423
424 return(1);
425 }
426
427
428 static struct mdoc_node *
429 node_alloc(struct mdoc *m, int line, int pos,
430 enum mdoct tok, enum mdoc_type type)
431 {
432 struct mdoc_node *p;
433
434 p = mandoc_calloc(1, sizeof(struct mdoc_node));
435 p->sec = m->lastsec;
436 p->line = line;
437 p->pos = pos;
438 p->tok = tok;
439
440 return(p);
441 }
442
443
444 int
445 mdoc_tail_alloc(struct mdoc *m, int line, int pos, enum mdoct tok)
446 {
447 struct mdoc_node *p;
448
449 p = node_alloc(m, line, pos, tok, MDOC_TAIL);
450 if ( ! node_append(m, p))
451 return(0);
452 m->next = MDOC_NEXT_CHILD;
453 return(1);
454 }
455
456
457 int
458 mdoc_head_alloc(struct mdoc *m, int line, int pos, enum mdoct tok)
459 {
460 struct mdoc_node *p;
461
462 assert(m->first);
463 assert(m->last);
464
465 p = node_alloc(m, line, pos, tok, MDOC_HEAD);
466 if ( ! node_append(m, p))
467 return(0);
468 m->next = MDOC_NEXT_CHILD;
469 return(1);
470 }
471
472
473 int
474 mdoc_body_alloc(struct mdoc *m, int line, int pos, enum mdoct tok)
475 {
476 struct mdoc_node *p;
477
478 p = node_alloc(m, line, pos, tok, MDOC_BODY);
479 if ( ! node_append(m, p))
480 return(0);
481 m->next = MDOC_NEXT_CHILD;
482 return(1);
483 }
484
485
486 int
487 mdoc_block_alloc(struct mdoc *m, int line, int pos,
488 enum mdoct tok, struct mdoc_arg *args)
489 {
490 struct mdoc_node *p;
491
492 p = node_alloc(m, line, pos, tok, MDOC_BLOCK);
493 p->args = args;
494 if (p->args)
495 (args->refcnt)++;
496 if ( ! node_append(m, p))
497 return(0);
498 m->next = MDOC_NEXT_CHILD;
499 return(1);
500 }
501
502
503 int
504 mdoc_elem_alloc(struct mdoc *m, int line, int pos,
505 enum mdoct tok, struct mdoc_arg *args)
506 {
507 struct mdoc_node *p;
508
509 p = node_alloc(m, line, pos, tok, MDOC_ELEM);
510 p->args = args;
511 if (p->args)
512 (args->refcnt)++;
513 if ( ! node_append(m, p))
514 return(0);
515 m->next = MDOC_NEXT_CHILD;
516 return(1);
517 }
518
519
520 static int
521 pstring(struct mdoc *m, int line, int pos, const char *p, size_t len)
522 {
523 struct mdoc_node *n;
524 size_t sv;
525
526 n = node_alloc(m, line, pos, -1, MDOC_TEXT);
527 n->string = mandoc_malloc(len + 1);
528 sv = strlcpy(n->string, p, len + 1);
529
530 /* Prohibit truncation. */
531 assert(sv < len + 1);
532
533 if ( ! node_append(m, n))
534 return(0);
535 m->next = MDOC_NEXT_SIBLING;
536 return(1);
537 }
538
539
540 int
541 mdoc_word_alloc(struct mdoc *m, int line, int pos, const char *p)
542 {
543
544 return(pstring(m, line, pos, p, strlen(p)));
545 }
546
547
548 void
549 mdoc_node_free(struct mdoc_node *p)
550 {
551
552 if (p->parent)
553 p->parent->nchild--;
554 if (p->string)
555 free(p->string);
556 if (p->args)
557 mdoc_argv_free(p->args);
558 free(p);
559 }
560
561
562 void
563 mdoc_node_freelist(struct mdoc_node *p)
564 {
565
566 if (p->child)
567 mdoc_node_freelist(p->child);
568 if (p->next)
569 mdoc_node_freelist(p->next);
570
571 assert(0 == p->nchild);
572 mdoc_node_free(p);
573 }
574
575
576 /*
577 * Parse free-form text, that is, a line that does not begin with the
578 * control character.
579 */
580 static int
581 parsetext(struct mdoc *m, int line, char *buf)
582 {
583 int i, j;
584 char sv;
585
586 if (SEC_NONE == m->lastnamed)
587 return(mdoc_perr(m, line, 0, ETEXTPROL));
588
589 /*
590 * If in literal mode, then pass the buffer directly to the
591 * back-end, as it should be preserved as a single term.
592 */
593
594 if (MDOC_LITERAL & m->flags)
595 return(mdoc_word_alloc(m, line, 0, buf));
596
597 /* Disallow blank/white-space lines in non-literal mode. */
598
599 for (i = 0; ' ' == buf[i]; i++)
600 /* Skip leading whitespace. */ ;
601
602 if ('\0' == buf[i])
603 return(mdoc_perr(m, line, 0, ENOBLANK));
604
605 /*
606 * Break apart a free-form line into tokens. Spaces are
607 * stripped out of the input.
608 */
609
610 for (j = i; buf[i]; i++) {
611 if (' ' != buf[i])
612 continue;
613
614 /* Escaped whitespace. */
615 if (i && ' ' == buf[i] && '\\' == buf[i - 1])
616 continue;
617
618 sv = buf[i];
619 buf[i++] = '\0';
620
621 if ( ! pstring(m, line, j, &buf[j], (size_t)(i - j)))
622 return(0);
623
624 /* Trailing whitespace? Check at overwritten byte. */
625
626 if (' ' == sv && '\0' == buf[i])
627 if ( ! mdoc_pwarn(m, line, i - 1, ETAILWS))
628 return(0);
629
630 for ( ; ' ' == buf[i]; i++)
631 /* Skip trailing whitespace. */ ;
632
633 j = i;
634
635 /* Trailing whitespace? */
636
637 if (' ' == buf[i - 1] && '\0' == buf[i])
638 if ( ! mdoc_pwarn(m, line, i - 1, ETAILWS))
639 return(0);
640
641 if ('\0' == buf[i])
642 break;
643 }
644
645 if (j != i && ! pstring(m, line, j, &buf[j], (size_t)(i - j)))
646 return(0);
647
648 m->next = MDOC_NEXT_SIBLING;
649 return(1);
650 }
651
652
653
654 static int
655 macrowarn(struct mdoc *m, int ln, const char *buf)
656 {
657 if ( ! (MDOC_IGN_MACRO & m->pflags))
658 return(mdoc_verr(m, ln, 0,
659 "unknown macro: %s%s",
660 buf, strlen(buf) > 3 ? "..." : ""));
661 return(mdoc_vwarn(m, ln, 0, "unknown macro: %s%s",
662 buf, strlen(buf) > 3 ? "..." : ""));
663 }
664
665
666 /*
667 * Parse a macro line, that is, a line beginning with the control
668 * character.
669 */
670 int
671 parsemacro(struct mdoc *m, int ln, char *buf)
672 {
673 int i, j, c;
674 char mac[5];
675
676 /* Empty lines are ignored. */
677
678 if ('\0' == buf[1])
679 return(1);
680
681 i = 1;
682
683 /* Accept whitespace after the initial control char. */
684
685 if (' ' == buf[i]) {
686 i++;
687 while (buf[i] && ' ' == buf[i])
688 i++;
689 if ('\0' == buf[i])
690 return(1);
691 }
692
693 /* Copy the first word into a nil-terminated buffer. */
694
695 for (j = 0; j < 4; j++, i++) {
696 if ('\0' == (mac[j] = buf[i]))
697 break;
698 else if (' ' == buf[i])
699 break;
700
701 /* Check for invalid characters. */
702
703 if (isgraph((u_char)buf[i]))
704 continue;
705 return(mdoc_perr(m, ln, i, EPRINT));
706 }
707
708 mac[j] = 0;
709
710 if (j == 4 || j < 2) {
711 if ( ! macrowarn(m, ln, mac))
712 goto err;
713 return(1);
714 }
715
716 if (MDOC_MAX == (c = mdoc_hash_find(mac))) {
717 if ( ! macrowarn(m, ln, mac))
718 goto err;
719 return(1);
720 }
721
722 /* The macro is sane. Jump to the next word. */
723
724 while (buf[i] && ' ' == buf[i])
725 i++;
726
727 /* Trailing whitespace? */
728
729 if ('\0' == buf[i] && ' ' == buf[i - 1])
730 if ( ! mdoc_pwarn(m, ln, i - 1, ETAILWS))
731 goto err;
732
733 /*
734 * Begin recursive parse sequence. Since we're at the start of
735 * the line, we don't need to do callable/parseable checks.
736 */
737 if ( ! mdoc_macro(m, c, ln, 1, &i, buf))
738 goto err;
739
740 return(1);
741
742 err: /* Error out. */
743
744 m->flags |= MDOC_HALT;
745 return(0);
746 }
747
748