]> git.cameronkatri.com Git - mandoc.git/blob - mdoc.c
bzero() -> memset() (noted by Joerg Sonnenberger).
[mandoc.git] / mdoc.c
1 /* $Id: mdoc.c,v 1.112 2009/10/27 08:26:12 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #include <sys/types.h>
18
19 #include <assert.h>
20 #include <ctype.h>
21 #include <stdarg.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25
26 #include "libmdoc.h"
27
28 const char *const __mdoc_merrnames[MERRMAX] = {
29 "trailing whitespace", /* ETAILWS */
30 "unexpected quoted parameter", /* EQUOTPARM */
31 "unterminated quoted parameter", /* EQUOTTERM */
32 "system: malloc error", /* EMALLOC */
33 "argument parameter suggested", /* EARGVAL */
34 "macro disallowed in prologue", /* EBODYPROL */
35 "macro disallowed in body", /* EPROLBODY */
36 "text disallowed in prologue", /* ETEXTPROL */
37 "blank line disallowed", /* ENOBLANK */
38 "text parameter too long", /* ETOOLONG */
39 "invalid escape sequence", /* EESCAPE */
40 "invalid character", /* EPRINT */
41 "document has no body", /* ENODAT */
42 "document has no prologue", /* ENOPROLOGUE */
43 "expected line arguments", /* ELINE */
44 "invalid AT&T argument", /* EATT */
45 "default name not yet set", /* ENAME */
46 "missing list type", /* ELISTTYPE */
47 "missing display type", /* EDISPTYPE */
48 "too many display types", /* EMULTIDISP */
49 "too many list types", /* EMULTILIST */
50 "NAME section must be first", /* ESECNAME */
51 "badly-formed NAME section", /* ENAMESECINC */
52 "argument repeated", /* EARGREP */
53 "expected boolean parameter", /* EBOOL */
54 "inconsistent column syntax", /* ECOLMIS */
55 "nested display invalid", /* ENESTDISP */
56 "width argument missing", /* EMISSWIDTH */
57 "invalid section for this manual section", /* EWRONGMSEC */
58 "section out of conventional order", /* ESECOOO */
59 "section repeated", /* ESECREP */
60 "invalid standard argument", /* EBADSTAND */
61 "multi-line arguments discouraged", /* ENOMULTILINE */
62 "multi-line arguments suggested", /* EMULTILINE */
63 "line arguments discouraged", /* ENOLINE */
64 "prologue macro out of conventional order", /* EPROLOOO */
65 "prologue macro repeated", /* EPROLREP */
66 "invalid manual section", /* EBADMSEC */
67 "invalid section", /* EBADSEC */
68 "invalid font mode", /* EFONT */
69 "invalid date syntax", /* EBADDATE */
70 "invalid number format", /* ENUMFMT */
71 "superfluous width argument", /* ENOWIDTH */
72 "system: utsname error", /* EUTSNAME */
73 "obsolete macro", /* EOBS */
74 "end-of-line scope violation", /* EIMPBRK */
75 "empty macro ignored", /* EIGNE */
76 "unclosed explicit scope", /* EOPEN */
77 "unterminated quoted phrase", /* EQUOTPHR */
78 "closure macro without prior context", /* ENOCTX */
79 "no description found for library", /* ELIB */
80 "bad child for parent context", /* EBADCHILD */
81 "list arguments preceding type", /* ENOTYPE */
82 };
83
84 const char *const __mdoc_macronames[MDOC_MAX] = {
85 "Ap", "Dd", "Dt", "Os",
86 "Sh", "Ss", "Pp", "D1",
87 "Dl", "Bd", "Ed", "Bl",
88 "El", "It", "Ad", "An",
89 "Ar", "Cd", "Cm", "Dv",
90 "Er", "Ev", "Ex", "Fa",
91 "Fd", "Fl", "Fn", "Ft",
92 "Ic", "In", "Li", "Nd",
93 "Nm", "Op", "Ot", "Pa",
94 "Rv", "St", "Va", "Vt",
95 /* LINTED */
96 "Xr", "\%A", "\%B", "\%D",
97 /* LINTED */
98 "\%I", "\%J", "\%N", "\%O",
99 /* LINTED */
100 "\%P", "\%R", "\%T", "\%V",
101 "Ac", "Ao", "Aq", "At",
102 "Bc", "Bf", "Bo", "Bq",
103 "Bsx", "Bx", "Db", "Dc",
104 "Do", "Dq", "Ec", "Ef",
105 "Em", "Eo", "Fx", "Ms",
106 "No", "Ns", "Nx", "Ox",
107 "Pc", "Pf", "Po", "Pq",
108 "Qc", "Ql", "Qo", "Qq",
109 "Re", "Rs", "Sc", "So",
110 "Sq", "Sm", "Sx", "Sy",
111 "Tn", "Ux", "Xc", "Xo",
112 "Fo", "Fc", "Oo", "Oc",
113 "Bk", "Ek", "Bt", "Hf",
114 "Fr", "Ud", "Lb", "Lp",
115 "Lk", "Mt", "Brq", "Bro",
116 /* LINTED */
117 "Brc", "\%C", "Es", "En",
118 /* LINTED */
119 "Dx", "\%Q", "br", "sp",
120 /* LINTED */
121 "\%U"
122 };
123
124 const char *const __mdoc_argnames[MDOC_ARG_MAX] = {
125 "split", "nosplit", "ragged",
126 "unfilled", "literal", "file",
127 "offset", "bullet", "dash",
128 "hyphen", "item", "enum",
129 "tag", "diag", "hang",
130 "ohang", "inset", "column",
131 "width", "compact", "std",
132 "filled", "words", "emphasis",
133 "symbolic", "nested", "centered"
134 };
135
136 const char * const *mdoc_macronames = __mdoc_macronames;
137 const char * const *mdoc_argnames = __mdoc_argnames;
138
139 static void mdoc_free1(struct mdoc *);
140 static int mdoc_alloc1(struct mdoc *);
141 static struct mdoc_node *node_alloc(struct mdoc *, int, int,
142 int, enum mdoc_type);
143 static int node_append(struct mdoc *,
144 struct mdoc_node *);
145 static int parsetext(struct mdoc *, int, char *);
146 static int parsemacro(struct mdoc *, int, char *);
147 static int macrowarn(struct mdoc *, int, const char *);
148 static int pstring(struct mdoc *, int, int,
149 const char *, size_t);
150
151 #ifdef __linux__
152 extern size_t strlcpy(char *, const char *, size_t);
153 #endif
154
155
156 const struct mdoc_node *
157 mdoc_node(const struct mdoc *m)
158 {
159
160 return(MDOC_HALT & m->flags ? NULL : m->first);
161 }
162
163
164 const struct mdoc_meta *
165 mdoc_meta(const struct mdoc *m)
166 {
167
168 return(MDOC_HALT & m->flags ? NULL : &m->meta);
169 }
170
171
172 /*
173 * Frees volatile resources (parse tree, meta-data, fields).
174 */
175 static void
176 mdoc_free1(struct mdoc *mdoc)
177 {
178
179 if (mdoc->first)
180 mdoc_node_freelist(mdoc->first);
181 if (mdoc->meta.title)
182 free(mdoc->meta.title);
183 if (mdoc->meta.os)
184 free(mdoc->meta.os);
185 if (mdoc->meta.name)
186 free(mdoc->meta.name);
187 if (mdoc->meta.arch)
188 free(mdoc->meta.arch);
189 if (mdoc->meta.vol)
190 free(mdoc->meta.vol);
191 }
192
193
194 /*
195 * Allocate all volatile resources (parse tree, meta-data, fields).
196 */
197 static int
198 mdoc_alloc1(struct mdoc *mdoc)
199 {
200
201 memset(&mdoc->meta, 0, sizeof(struct mdoc_meta));
202 mdoc->flags = 0;
203 mdoc->lastnamed = mdoc->lastsec = SEC_NONE;
204 mdoc->last = calloc(1, sizeof(struct mdoc_node));
205 if (NULL == mdoc->last)
206 return(0);
207
208 mdoc->first = mdoc->last;
209 mdoc->last->type = MDOC_ROOT;
210 mdoc->next = MDOC_NEXT_CHILD;
211 return(1);
212 }
213
214
215 /*
216 * Free up volatile resources (see mdoc_free1()) then re-initialises the
217 * data with mdoc_alloc1(). After invocation, parse data has been reset
218 * and the parser is ready for re-invocation on a new tree; however,
219 * cross-parse non-volatile data is kept intact.
220 */
221 int
222 mdoc_reset(struct mdoc *mdoc)
223 {
224
225 mdoc_free1(mdoc);
226 return(mdoc_alloc1(mdoc));
227 }
228
229
230 /*
231 * Completely free up all volatile and non-volatile parse resources.
232 * After invocation, the pointer is no longer usable.
233 */
234 void
235 mdoc_free(struct mdoc *mdoc)
236 {
237
238 mdoc_free1(mdoc);
239 free(mdoc);
240 }
241
242
243 /*
244 * Allocate volatile and non-volatile parse resources.
245 */
246 struct mdoc *
247 mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb)
248 {
249 struct mdoc *p;
250
251 if (NULL == (p = calloc(1, sizeof(struct mdoc))))
252 return(NULL);
253 if (cb)
254 (void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb));
255
256 mdoc_hash_init();
257
258 p->data = data;
259 p->pflags = pflags;
260
261 if (mdoc_alloc1(p))
262 return(p);
263
264 free(p);
265 return(NULL);
266 }
267
268
269 /*
270 * Climb back up the parse tree, validating open scopes. Mostly calls
271 * through to macro_end() in macro.c.
272 */
273 int
274 mdoc_endparse(struct mdoc *m)
275 {
276
277 if (MDOC_HALT & m->flags)
278 return(0);
279 else if (mdoc_macroend(m))
280 return(1);
281 m->flags |= MDOC_HALT;
282 return(0);
283 }
284
285
286 /*
287 * Main parse routine. Parses a single line -- really just hands off to
288 * the macro (parsemacro()) or text parser (parsetext()).
289 */
290 int
291 mdoc_parseln(struct mdoc *m, int ln, char *buf)
292 {
293
294 if (MDOC_HALT & m->flags)
295 return(0);
296
297 return('.' == *buf ? parsemacro(m, ln, buf) :
298 parsetext(m, ln, buf));
299 }
300
301
302 int
303 mdoc_verr(struct mdoc *mdoc, int ln, int pos,
304 const char *fmt, ...)
305 {
306 char buf[256];
307 va_list ap;
308
309 if (NULL == mdoc->cb.mdoc_err)
310 return(0);
311
312 va_start(ap, fmt);
313 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
314 va_end(ap);
315
316 return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf));
317 }
318
319
320 int
321 mdoc_vwarn(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...)
322 {
323 char buf[256];
324 va_list ap;
325
326 if (NULL == mdoc->cb.mdoc_warn)
327 return(0);
328
329 va_start(ap, fmt);
330 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
331 va_end(ap);
332
333 return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, buf));
334 }
335
336
337 int
338 mdoc_err(struct mdoc *m, int line, int pos, int iserr, enum merr type)
339 {
340 const char *p;
341
342 p = __mdoc_merrnames[(int)type];
343 assert(p);
344
345 if (iserr)
346 return(mdoc_verr(m, line, pos, p));
347
348 return(mdoc_vwarn(m, line, pos, p));
349 }
350
351
352 int
353 mdoc_macro(struct mdoc *m, int tok,
354 int ln, int pp, int *pos, char *buf)
355 {
356 /*
357 * If we're in the prologue, deny "body" macros. Similarly, if
358 * we're in the body, deny prologue calls.
359 */
360 if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
361 MDOC_PBODY & m->flags)
362 return(mdoc_perr(m, ln, pp, EPROLBODY));
363 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
364 ! (MDOC_PBODY & m->flags))
365 return(mdoc_perr(m, ln, pp, EBODYPROL));
366
367 return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf));
368 }
369
370
371 static int
372 node_append(struct mdoc *mdoc, struct mdoc_node *p)
373 {
374
375 assert(mdoc->last);
376 assert(mdoc->first);
377 assert(MDOC_ROOT != p->type);
378
379 switch (mdoc->next) {
380 case (MDOC_NEXT_SIBLING):
381 mdoc->last->next = p;
382 p->prev = mdoc->last;
383 p->parent = mdoc->last->parent;
384 break;
385 case (MDOC_NEXT_CHILD):
386 mdoc->last->child = p;
387 p->parent = mdoc->last;
388 break;
389 default:
390 abort();
391 /* NOTREACHED */
392 }
393
394 p->parent->nchild++;
395
396 if ( ! mdoc_valid_pre(mdoc, p))
397 return(0);
398 if ( ! mdoc_action_pre(mdoc, p))
399 return(0);
400
401 switch (p->type) {
402 case (MDOC_HEAD):
403 assert(MDOC_BLOCK == p->parent->type);
404 p->parent->head = p;
405 break;
406 case (MDOC_TAIL):
407 assert(MDOC_BLOCK == p->parent->type);
408 p->parent->tail = p;
409 break;
410 case (MDOC_BODY):
411 assert(MDOC_BLOCK == p->parent->type);
412 p->parent->body = p;
413 break;
414 default:
415 break;
416 }
417
418 mdoc->last = p;
419
420 switch (p->type) {
421 case (MDOC_TEXT):
422 if ( ! mdoc_valid_post(mdoc))
423 return(0);
424 if ( ! mdoc_action_post(mdoc))
425 return(0);
426 break;
427 default:
428 break;
429 }
430
431 return(1);
432 }
433
434
435 static struct mdoc_node *
436 node_alloc(struct mdoc *m, int line,
437 int pos, int tok, enum mdoc_type type)
438 {
439 struct mdoc_node *p;
440
441 if (NULL == (p = calloc(1, sizeof(struct mdoc_node)))) {
442 (void)mdoc_nerr(m, m->last, EMALLOC);
443 return(NULL);
444 }
445
446 p->sec = m->lastsec;
447 p->line = line;
448 p->pos = pos;
449 p->tok = tok;
450 if (MDOC_TEXT != (p->type = type))
451 assert(p->tok >= 0);
452
453 return(p);
454 }
455
456
457 int
458 mdoc_tail_alloc(struct mdoc *m, int line, int pos, int tok)
459 {
460 struct mdoc_node *p;
461
462 p = node_alloc(m, line, pos, tok, MDOC_TAIL);
463 if (NULL == p)
464 return(0);
465 if ( ! node_append(m, p))
466 return(0);
467 m->next = MDOC_NEXT_CHILD;
468 return(1);
469 }
470
471
472 int
473 mdoc_head_alloc(struct mdoc *m, int line, int pos, int tok)
474 {
475 struct mdoc_node *p;
476
477 assert(m->first);
478 assert(m->last);
479
480 p = node_alloc(m, line, pos, tok, MDOC_HEAD);
481 if (NULL == p)
482 return(0);
483 if ( ! node_append(m, p))
484 return(0);
485 m->next = MDOC_NEXT_CHILD;
486 return(1);
487 }
488
489
490 int
491 mdoc_body_alloc(struct mdoc *m, int line, int pos, int tok)
492 {
493 struct mdoc_node *p;
494
495 p = node_alloc(m, line, pos, tok, MDOC_BODY);
496 if (NULL == p)
497 return(0);
498 if ( ! node_append(m, p))
499 return(0);
500 m->next = MDOC_NEXT_CHILD;
501 return(1);
502 }
503
504
505 int
506 mdoc_block_alloc(struct mdoc *m, int line, int pos,
507 int tok, struct mdoc_arg *args)
508 {
509 struct mdoc_node *p;
510
511 p = node_alloc(m, line, pos, tok, MDOC_BLOCK);
512 if (NULL == p)
513 return(0);
514 p->args = args;
515 if (p->args)
516 (args->refcnt)++;
517 if ( ! node_append(m, p))
518 return(0);
519 m->next = MDOC_NEXT_CHILD;
520 return(1);
521 }
522
523
524 int
525 mdoc_elem_alloc(struct mdoc *m, int line, int pos,
526 int tok, struct mdoc_arg *args)
527 {
528 struct mdoc_node *p;
529
530 p = node_alloc(m, line, pos, tok, MDOC_ELEM);
531 if (NULL == p)
532 return(0);
533 p->args = args;
534 if (p->args)
535 (args->refcnt)++;
536 if ( ! node_append(m, p))
537 return(0);
538 m->next = MDOC_NEXT_CHILD;
539 return(1);
540 }
541
542
543 static int
544 pstring(struct mdoc *m, int line, int pos, const char *p, size_t len)
545 {
546 struct mdoc_node *n;
547 size_t sv;
548
549 n = node_alloc(m, line, pos, -1, MDOC_TEXT);
550 if (NULL == n)
551 return(mdoc_nerr(m, m->last, EMALLOC));
552
553 n->string = malloc(len + 1);
554 if (NULL == n->string) {
555 free(n);
556 return(mdoc_nerr(m, m->last, EMALLOC));
557 }
558
559 sv = strlcpy(n->string, p, len + 1);
560
561 /* Prohibit truncation. */
562 assert(sv < len + 1);
563
564 if ( ! node_append(m, n))
565 return(0);
566 m->next = MDOC_NEXT_SIBLING;
567 return(1);
568 }
569
570
571 int
572 mdoc_word_alloc(struct mdoc *m, int line, int pos, const char *p)
573 {
574
575 return(pstring(m, line, pos, p, strlen(p)));
576 }
577
578
579 void
580 mdoc_node_free(struct mdoc_node *p)
581 {
582
583 if (p->parent)
584 p->parent->nchild--;
585 if (p->string)
586 free(p->string);
587 if (p->args)
588 mdoc_argv_free(p->args);
589 free(p);
590 }
591
592
593 void
594 mdoc_node_freelist(struct mdoc_node *p)
595 {
596
597 if (p->child)
598 mdoc_node_freelist(p->child);
599 if (p->next)
600 mdoc_node_freelist(p->next);
601
602 assert(0 == p->nchild);
603 mdoc_node_free(p);
604 }
605
606
607 /*
608 * Parse free-form text, that is, a line that does not begin with the
609 * control character.
610 */
611 static int
612 parsetext(struct mdoc *m, int line, char *buf)
613 {
614 int i, j;
615
616 if (SEC_NONE == m->lastnamed)
617 return(mdoc_perr(m, line, 0, ETEXTPROL));
618
619 /*
620 * If in literal mode, then pass the buffer directly to the
621 * back-end, as it should be preserved as a single term.
622 */
623
624 if (MDOC_LITERAL & m->flags)
625 return(mdoc_word_alloc(m, line, 0, buf));
626
627 /* Disallow blank/white-space lines in non-literal mode. */
628
629 for (i = 0; ' ' == buf[i]; i++)
630 /* Skip leading whitespace. */ ;
631 if (0 == buf[i])
632 return(mdoc_perr(m, line, 0, ENOBLANK));
633
634 /*
635 * Break apart a free-form line into tokens. Spaces are
636 * stripped out of the input.
637 */
638
639 for (j = i; buf[i]; i++) {
640 if (' ' != buf[i])
641 continue;
642
643 /* Escaped whitespace. */
644 if (i && ' ' == buf[i] && '\\' == buf[i - 1])
645 continue;
646
647 buf[i++] = 0;
648 if ( ! pstring(m, line, j, &buf[j], (size_t)(i - j)))
649 return(0);
650
651 for ( ; ' ' == buf[i]; i++)
652 /* Skip trailing whitespace. */ ;
653
654 j = i;
655 if (0 == buf[i])
656 break;
657 }
658
659 if (j != i && ! pstring(m, line, j, &buf[j], (size_t)(i - j)))
660 return(0);
661
662 m->next = MDOC_NEXT_SIBLING;
663 return(1);
664 }
665
666
667
668 static int
669 macrowarn(struct mdoc *m, int ln, const char *buf)
670 {
671 if ( ! (MDOC_IGN_MACRO & m->pflags))
672 return(mdoc_verr(m, ln, 0,
673 "unknown macro: %s%s",
674 buf, strlen(buf) > 3 ? "..." : ""));
675 return(mdoc_vwarn(m, ln, 0, "unknown macro: %s%s",
676 buf, strlen(buf) > 3 ? "..." : ""));
677 }
678
679
680 /*
681 * Parse a macro line, that is, a line beginning with the control
682 * character.
683 */
684 int
685 parsemacro(struct mdoc *m, int ln, char *buf)
686 {
687 int i, j, c;
688 char mac[5];
689
690 /* Empty lines are ignored. */
691
692 if (0 == buf[1])
693 return(1);
694
695 i = 1;
696
697 /* Accept whitespace after the initial control char. */
698
699 if (' ' == buf[i]) {
700 i++;
701 while (buf[i] && ' ' == buf[i])
702 i++;
703 if (0 == buf[i])
704 return(1);
705 }
706
707 /* Copy the first word into a nil-terminated buffer. */
708
709 for (j = 0; j < 4; j++, i++) {
710 if (0 == (mac[j] = buf[i]))
711 break;
712 else if (' ' == buf[i])
713 break;
714
715 /* Check for invalid characters. */
716
717 if (isgraph((u_char)buf[i]))
718 continue;
719 return(mdoc_perr(m, ln, i, EPRINT));
720 }
721
722 mac[j] = 0;
723
724 if (j == 4 || j < 2) {
725 if ( ! macrowarn(m, ln, mac))
726 goto err;
727 return(1);
728 }
729
730 if (MDOC_MAX == (c = mdoc_hash_find(mac))) {
731 if ( ! macrowarn(m, ln, mac))
732 goto err;
733 return(1);
734 }
735
736 /* The macro is sane. Jump to the next word. */
737
738 while (buf[i] && ' ' == buf[i])
739 i++;
740
741 /*
742 * Begin recursive parse sequence. Since we're at the start of
743 * the line, we don't need to do callable/parseable checks.
744 */
745 if ( ! mdoc_macro(m, c, ln, 1, &i, buf))
746 goto err;
747
748 return(1);
749
750 err: /* Error out. */
751
752 m->flags |= MDOC_HALT;
753 return(0);
754 }
755
756