]> git.cameronkatri.com Git - mandoc.git/blob - mdoc.c
Columns with `Bl -column' correctly check for CALLABLE status.
[mandoc.git] / mdoc.c
1 /* $Id: mdoc.c,v 1.98 2009/07/29 08:52:24 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #include <assert.h>
18 #include <ctype.h>
19 #include <stdarg.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23
24 #include "libmdoc.h"
25
26 const char *const __mdoc_merrnames[MERRMAX] = {
27 "trailing whitespace", /* ETAILWS */
28 "unexpected quoted parameter", /* EQUOTPARM */
29 "unterminated quoted parameter", /* EQUOTTERM */
30 "system: malloc error", /* EMALLOC */
31 "argument parameter suggested", /* EARGVAL */
32 "macro disallowed in prologue", /* EBODYPROL */
33 "macro disallowed in body", /* EPROLBODY */
34 "text disallowed in prologue", /* ETEXTPROL */
35 "blank line disallowed", /* ENOBLANK */
36 "text parameter too long", /* ETOOLONG */
37 "invalid escape sequence", /* EESCAPE */
38 "invalid character", /* EPRINT */
39 "document has no body", /* ENODAT */
40 "document has no prologue", /* ENOPROLOGUE */
41 "expected line arguments", /* ELINE */
42 "invalid AT&T argument", /* EATT */
43 "default name not yet set", /* ENAME */
44 "missing list type", /* ELISTTYPE */
45 "missing display type", /* EDISPTYPE */
46 "too many display types", /* EMULTIDISP */
47 "too many list types", /* EMULTILIST */
48 "NAME section must be first", /* ESECNAME */
49 "badly-formed NAME section", /* ENAMESECINC */
50 "argument repeated", /* EARGREP */
51 "expected boolean parameter", /* EBOOL */
52 "inconsistent column syntax", /* ECOLMIS */
53 "nested display invalid", /* ENESTDISP */
54 "width argument missing", /* EMISSWIDTH */
55 "invalid section for this manual section", /* EWRONGMSEC */
56 "section out of conventional order", /* ESECOOO */
57 "section repeated", /* ESECREP */
58 "invalid standard argument", /* EBADSTAND */
59 "multi-line arguments discouraged", /* ENOMULTILINE */
60 "multi-line arguments suggested", /* EMULTILINE */
61 "line arguments discouraged", /* ENOLINE */
62 "prologue macro out of conventional order", /* EPROLOOO */
63 "prologue macro repeated", /* EPROLREP */
64 "invalid manual section", /* EBADMSEC */
65 "invalid section", /* EBADSEC */
66 "invalid font mode", /* EFONT */
67 "invalid date syntax", /* EBADDATE */
68 "invalid number format", /* ENUMFMT */
69 "superfluous width argument", /* ENOWIDTH */
70 "system: utsname error", /* EUTSNAME */
71 "obsolete macro", /* EOBS */
72 "end-of-line scope violation", /* EIMPBRK */
73 "empty macro ignored", /* EIGNE */
74 "unclosed explicit scope", /* EOPEN */
75 "unterminated quoted phrase", /* EQUOTPHR */
76 "closure macro without prior context", /* ENOCTX */
77 "invalid whitespace after control character", /* ESPACE */
78 "no description found for library" /* ELIB */
79 };
80
81 const char *const __mdoc_macronames[MDOC_MAX] = {
82 "Ap", "Dd", "Dt", "Os",
83 "Sh", "Ss", "Pp", "D1",
84 "Dl", "Bd", "Ed", "Bl",
85 "El", "It", "Ad", "An",
86 "Ar", "Cd", "Cm", "Dv",
87 "Er", "Ev", "Ex", "Fa",
88 "Fd", "Fl", "Fn", "Ft",
89 "Ic", "In", "Li", "Nd",
90 "Nm", "Op", "Ot", "Pa",
91 "Rv", "St", "Va", "Vt",
92 /* LINTED */
93 "Xr", "\%A", "\%B", "\%D",
94 /* LINTED */
95 "\%I", "\%J", "\%N", "\%O",
96 /* LINTED */
97 "\%P", "\%R", "\%T", "\%V",
98 "Ac", "Ao", "Aq", "At",
99 "Bc", "Bf", "Bo", "Bq",
100 "Bsx", "Bx", "Db", "Dc",
101 "Do", "Dq", "Ec", "Ef",
102 "Em", "Eo", "Fx", "Ms",
103 "No", "Ns", "Nx", "Ox",
104 "Pc", "Pf", "Po", "Pq",
105 "Qc", "Ql", "Qo", "Qq",
106 "Re", "Rs", "Sc", "So",
107 "Sq", "Sm", "Sx", "Sy",
108 "Tn", "Ux", "Xc", "Xo",
109 "Fo", "Fc", "Oo", "Oc",
110 "Bk", "Ek", "Bt", "Hf",
111 "Fr", "Ud", "Lb", "Lp",
112 "Lk", "Mt", "Brq", "Bro",
113 /* LINTED */
114 "Brc", "\%C", "Es", "En",
115 /* LINTED */
116 "Dx", "\%Q", "br", "sp"
117 };
118
119 const char *const __mdoc_argnames[MDOC_ARG_MAX] = {
120 "split", "nosplit", "ragged",
121 "unfilled", "literal", "file",
122 "offset", "bullet", "dash",
123 "hyphen", "item", "enum",
124 "tag", "diag", "hang",
125 "ohang", "inset", "column",
126 "width", "compact", "std",
127 "filled", "words", "emphasis",
128 "symbolic", "nested"
129 };
130
131 const char * const *mdoc_macronames = __mdoc_macronames;
132 const char * const *mdoc_argnames = __mdoc_argnames;
133
134 static void mdoc_free1(struct mdoc *);
135 static int mdoc_alloc1(struct mdoc *);
136 static struct mdoc_node *node_alloc(struct mdoc *, int, int,
137 int, enum mdoc_type);
138 static int node_append(struct mdoc *,
139 struct mdoc_node *);
140 static int parsetext(struct mdoc *, int, char *);
141 static int parsemacro(struct mdoc *, int, char *);
142 static int macrowarn(struct mdoc *, int, const char *);
143 static int pstring(struct mdoc *, int, int,
144 const char *, size_t);
145
146 #ifdef __linux__
147 extern size_t strlcpy(char *, const char *, size_t);
148 #endif
149
150
151 const struct mdoc_node *
152 mdoc_node(const struct mdoc *m)
153 {
154
155 return(MDOC_HALT & m->flags ? NULL : m->first);
156 }
157
158
159 const struct mdoc_meta *
160 mdoc_meta(const struct mdoc *m)
161 {
162
163 return(MDOC_HALT & m->flags ? NULL : &m->meta);
164 }
165
166
167 /*
168 * Frees volatile resources (parse tree, meta-data, fields).
169 */
170 static void
171 mdoc_free1(struct mdoc *mdoc)
172 {
173
174 if (mdoc->first)
175 mdoc_node_freelist(mdoc->first);
176 if (mdoc->meta.title)
177 free(mdoc->meta.title);
178 if (mdoc->meta.os)
179 free(mdoc->meta.os);
180 if (mdoc->meta.name)
181 free(mdoc->meta.name);
182 if (mdoc->meta.arch)
183 free(mdoc->meta.arch);
184 if (mdoc->meta.vol)
185 free(mdoc->meta.vol);
186 }
187
188
189 /*
190 * Allocate all volatile resources (parse tree, meta-data, fields).
191 */
192 static int
193 mdoc_alloc1(struct mdoc *mdoc)
194 {
195
196 bzero(&mdoc->meta, sizeof(struct mdoc_meta));
197 mdoc->flags = 0;
198 mdoc->lastnamed = mdoc->lastsec = SEC_NONE;
199 mdoc->last = calloc(1, sizeof(struct mdoc_node));
200 if (NULL == mdoc->last)
201 return(0);
202
203 mdoc->first = mdoc->last;
204 mdoc->last->type = MDOC_ROOT;
205 mdoc->next = MDOC_NEXT_CHILD;
206 return(1);
207 }
208
209
210 /*
211 * Free up volatile resources (see mdoc_free1()) then re-initialises the
212 * data with mdoc_alloc1(). After invocation, parse data has been reset
213 * and the parser is ready for re-invocation on a new tree; however,
214 * cross-parse non-volatile data is kept intact.
215 */
216 int
217 mdoc_reset(struct mdoc *mdoc)
218 {
219
220 mdoc_free1(mdoc);
221 return(mdoc_alloc1(mdoc));
222 }
223
224
225 /*
226 * Completely free up all volatile and non-volatile parse resources.
227 * After invocation, the pointer is no longer usable.
228 */
229 void
230 mdoc_free(struct mdoc *mdoc)
231 {
232
233 mdoc_free1(mdoc);
234 if (mdoc->htab)
235 mdoc_hash_free(mdoc->htab);
236 free(mdoc);
237 }
238
239
240 /*
241 * Allocate volatile and non-volatile parse resources.
242 */
243 struct mdoc *
244 mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb)
245 {
246 struct mdoc *p;
247
248 if (NULL == (p = calloc(1, sizeof(struct mdoc))))
249 return(NULL);
250 if (cb)
251 (void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb));
252
253 p->data = data;
254 p->pflags = pflags;
255
256 if (NULL == (p->htab = mdoc_hash_alloc())) {
257 free(p);
258 return(NULL);
259 } else if (mdoc_alloc1(p))
260 return(p);
261
262 free(p);
263 return(NULL);
264 }
265
266
267 /*
268 * Climb back up the parse tree, validating open scopes. Mostly calls
269 * through to macro_end() in macro.c.
270 */
271 int
272 mdoc_endparse(struct mdoc *m)
273 {
274
275 if (MDOC_HALT & m->flags)
276 return(0);
277 else if (mdoc_macroend(m))
278 return(1);
279 m->flags |= MDOC_HALT;
280 return(0);
281 }
282
283
284 /*
285 * Main parse routine. Parses a single line -- really just hands off to
286 * the macro (parsemacro()) or text parser (parsetext()).
287 */
288 int
289 mdoc_parseln(struct mdoc *m, int ln, char *buf)
290 {
291
292 if (MDOC_HALT & m->flags)
293 return(0);
294
295 return('.' == *buf ? parsemacro(m, ln, buf) :
296 parsetext(m, ln, buf));
297 }
298
299
300 int
301 mdoc_verr(struct mdoc *mdoc, int ln, int pos,
302 const char *fmt, ...)
303 {
304 char buf[256];
305 va_list ap;
306
307 if (NULL == mdoc->cb.mdoc_err)
308 return(0);
309
310 va_start(ap, fmt);
311 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
312 va_end(ap);
313
314 return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf));
315 }
316
317
318 int
319 mdoc_vwarn(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...)
320 {
321 char buf[256];
322 va_list ap;
323
324 if (NULL == mdoc->cb.mdoc_warn)
325 return(0);
326
327 va_start(ap, fmt);
328 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
329 va_end(ap);
330
331 return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, buf));
332 }
333
334
335 int
336 mdoc_err(struct mdoc *m, int line, int pos, int iserr, enum merr type)
337 {
338 const char *p;
339
340 p = __mdoc_merrnames[(int)type];
341 assert(p);
342
343 if (iserr)
344 return(mdoc_verr(m, line, pos, p));
345
346 return(mdoc_vwarn(m, line, pos, p));
347 }
348
349
350 int
351 mdoc_macro(struct mdoc *m, int tok,
352 int ln, int pp, int *pos, char *buf)
353 {
354 /*
355 * If we're in the prologue, deny "body" macros. Similarly, if
356 * we're in the body, deny prologue calls.
357 */
358 if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
359 MDOC_PBODY & m->flags)
360 return(mdoc_perr(m, ln, pp, EPROLBODY));
361 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
362 ! (MDOC_PBODY & m->flags))
363 return(mdoc_perr(m, ln, pp, EBODYPROL));
364
365 return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf));
366 }
367
368
369 static int
370 node_append(struct mdoc *mdoc, struct mdoc_node *p)
371 {
372
373 assert(mdoc->last);
374 assert(mdoc->first);
375 assert(MDOC_ROOT != p->type);
376
377 switch (mdoc->next) {
378 case (MDOC_NEXT_SIBLING):
379 mdoc->last->next = p;
380 p->prev = mdoc->last;
381 p->parent = mdoc->last->parent;
382 break;
383 case (MDOC_NEXT_CHILD):
384 mdoc->last->child = p;
385 p->parent = mdoc->last;
386 break;
387 default:
388 abort();
389 /* NOTREACHED */
390 }
391
392 p->parent->nchild++;
393
394 if ( ! mdoc_valid_pre(mdoc, p))
395 return(0);
396 if ( ! mdoc_action_pre(mdoc, p))
397 return(0);
398
399 switch (p->type) {
400 case (MDOC_HEAD):
401 assert(MDOC_BLOCK == p->parent->type);
402 p->parent->head = p;
403 break;
404 case (MDOC_TAIL):
405 assert(MDOC_BLOCK == p->parent->type);
406 p->parent->tail = p;
407 break;
408 case (MDOC_BODY):
409 assert(MDOC_BLOCK == p->parent->type);
410 p->parent->body = p;
411 break;
412 default:
413 break;
414 }
415
416 mdoc->last = p;
417
418 switch (p->type) {
419 case (MDOC_TEXT):
420 if ( ! mdoc_valid_post(mdoc))
421 return(0);
422 if ( ! mdoc_action_post(mdoc))
423 return(0);
424 break;
425 default:
426 break;
427 }
428
429 return(1);
430 }
431
432
433 static struct mdoc_node *
434 node_alloc(struct mdoc *m, int line,
435 int pos, int tok, enum mdoc_type type)
436 {
437 struct mdoc_node *p;
438
439 if (NULL == (p = calloc(1, sizeof(struct mdoc_node)))) {
440 (void)mdoc_nerr(m, m->last, EMALLOC);
441 return(NULL);
442 }
443
444 p->sec = m->lastsec;
445 p->line = line;
446 p->pos = pos;
447 p->tok = tok;
448 if (MDOC_TEXT != (p->type = type))
449 assert(p->tok >= 0);
450
451 return(p);
452 }
453
454
455 int
456 mdoc_tail_alloc(struct mdoc *m, int line, int pos, int tok)
457 {
458 struct mdoc_node *p;
459
460 p = node_alloc(m, line, pos, tok, MDOC_TAIL);
461 if (NULL == p)
462 return(0);
463 return(node_append(m, p));
464 }
465
466
467 int
468 mdoc_head_alloc(struct mdoc *m, int line, int pos, int tok)
469 {
470 struct mdoc_node *p;
471
472 assert(m->first);
473 assert(m->last);
474
475 p = node_alloc(m, line, pos, tok, MDOC_HEAD);
476 if (NULL == p)
477 return(0);
478 return(node_append(m, p));
479 }
480
481
482 int
483 mdoc_body_alloc(struct mdoc *m, int line, int pos, int tok)
484 {
485 struct mdoc_node *p;
486
487 p = node_alloc(m, line, pos, tok, MDOC_BODY);
488 if (NULL == p)
489 return(0);
490 return(node_append(m, p));
491 }
492
493
494 int
495 mdoc_block_alloc(struct mdoc *m, int line, int pos,
496 int tok, struct mdoc_arg *args)
497 {
498 struct mdoc_node *p;
499
500 p = node_alloc(m, line, pos, tok, MDOC_BLOCK);
501 if (NULL == p)
502 return(0);
503 p->args = args;
504 if (p->args)
505 (args->refcnt)++;
506 return(node_append(m, p));
507 }
508
509
510 int
511 mdoc_elem_alloc(struct mdoc *m, int line, int pos,
512 int tok, struct mdoc_arg *args)
513 {
514 struct mdoc_node *p;
515
516 p = node_alloc(m, line, pos, tok, MDOC_ELEM);
517 if (NULL == p)
518 return(0);
519 p->args = args;
520 if (p->args)
521 (args->refcnt)++;
522 return(node_append(m, p));
523 }
524
525
526 static int
527 pstring(struct mdoc *m, int line, int pos, const char *p, size_t len)
528 {
529 struct mdoc_node *n;
530 size_t sv;
531
532 n = node_alloc(m, line, pos, -1, MDOC_TEXT);
533 if (NULL == n)
534 return(mdoc_nerr(m, m->last, EMALLOC));
535
536 n->string = malloc(len + 1);
537 if (NULL == n->string) {
538 free(n);
539 return(mdoc_nerr(m, m->last, EMALLOC));
540 }
541
542 sv = strlcpy(n->string, p, len + 1);
543
544 /* Prohibit truncation. */
545 assert(sv < len + 1);
546
547 return(node_append(m, n));
548 }
549
550
551 int
552 mdoc_word_alloc(struct mdoc *m, int line, int pos, const char *p)
553 {
554
555 return(pstring(m, line, pos, p, strlen(p)));
556 }
557
558
559 void
560 mdoc_node_free(struct mdoc_node *p)
561 {
562
563 if (p->parent)
564 p->parent->nchild--;
565 if (p->string)
566 free(p->string);
567 if (p->args)
568 mdoc_argv_free(p->args);
569 free(p);
570 }
571
572
573 void
574 mdoc_node_freelist(struct mdoc_node *p)
575 {
576
577 if (p->child)
578 mdoc_node_freelist(p->child);
579 if (p->next)
580 mdoc_node_freelist(p->next);
581
582 assert(0 == p->nchild);
583 mdoc_node_free(p);
584 }
585
586
587 /*
588 * Parse free-form text, that is, a line that does not begin with the
589 * control character.
590 */
591 static int
592 parsetext(struct mdoc *m, int line, char *buf)
593 {
594 int i, j;
595
596 if (SEC_NONE == m->lastnamed)
597 return(mdoc_perr(m, line, 0, ETEXTPROL));
598
599 /*
600 * If in literal mode, then pass the buffer directly to the
601 * back-end, as it should be preserved as a single term.
602 */
603
604 if (MDOC_LITERAL & m->flags) {
605 if ( ! mdoc_word_alloc(m, line, 0, buf))
606 return(0);
607 m->next = MDOC_NEXT_SIBLING;
608 return(1);
609 }
610
611 /* Disallow blank/white-space lines in non-literal mode. */
612
613 for (i = 0; ' ' == buf[i]; i++)
614 /* Skip leading whitespace. */ ;
615 if (0 == buf[i])
616 return(mdoc_perr(m, line, 0, ENOBLANK));
617
618 /*
619 * Break apart a free-form line into tokens. Spaces are
620 * stripped out of the input.
621 */
622
623 for (j = i; buf[i]; i++) {
624 if (' ' != buf[i])
625 continue;
626
627 /* Escaped whitespace. */
628 if (i && ' ' == buf[i] && '\\' == buf[i - 1])
629 continue;
630
631 buf[i++] = 0;
632 if ( ! pstring(m, line, j, &buf[j], (size_t)(i - j)))
633 return(0);
634 m->next = MDOC_NEXT_SIBLING;
635
636 for ( ; ' ' == buf[i]; i++)
637 /* Skip trailing whitespace. */ ;
638
639 j = i;
640 if (0 == buf[i])
641 break;
642 }
643
644 if (j != i && ! pstring(m, line, j, &buf[j], (size_t)(i - j)))
645 return(0);
646
647 m->next = MDOC_NEXT_SIBLING;
648 return(1);
649 }
650
651
652
653
654 static int
655 macrowarn(struct mdoc *m, int ln, const char *buf)
656 {
657 if ( ! (MDOC_IGN_MACRO & m->pflags))
658 return(mdoc_verr(m, ln, 0,
659 "unknown macro: %s%s",
660 buf, strlen(buf) > 3 ? "..." : ""));
661 return(mdoc_vwarn(m, ln, 0, "unknown macro: %s%s",
662 buf, strlen(buf) > 3 ? "..." : ""));
663 }
664
665
666 /*
667 * Parse a macro line, that is, a line beginning with the control
668 * character.
669 */
670 int
671 parsemacro(struct mdoc *m, int ln, char *buf)
672 {
673 int i, c;
674 char mac[5];
675
676 /* Empty lines are ignored. */
677
678 if (0 == buf[1])
679 return(1);
680
681 if (' ' == buf[1]) {
682 i = 2;
683 while (buf[i] && ' ' == buf[i])
684 i++;
685 if (0 == buf[i])
686 return(1);
687 return(mdoc_perr(m, ln, 1, ESPACE));
688 }
689
690 /* Copy the first word into a nil-terminated buffer. */
691
692 for (i = 1; i < 5; i++) {
693 if (0 == (mac[i - 1] = buf[i]))
694 break;
695 else if (' ' == buf[i])
696 break;
697 }
698
699 mac[i - 1] = 0;
700
701 if (i == 5 || i <= 2) {
702 if ( ! macrowarn(m, ln, mac))
703 goto err;
704 return(1);
705 }
706
707 if (MDOC_MAX == (c = mdoc_hash_find(m->htab, mac))) {
708 if ( ! macrowarn(m, ln, mac))
709 goto err;
710 return(1);
711 }
712
713 /* The macro is sane. Jump to the next word. */
714
715 while (buf[i] && ' ' == buf[i])
716 i++;
717
718 /*
719 * Begin recursive parse sequence. Since we're at the start of
720 * the line, we don't need to do callable/parseable checks.
721 */
722 if ( ! mdoc_macro(m, c, ln, 1, &i, buf))
723 goto err;
724
725 return(1);
726
727 err: /* Error out. */
728
729 m->flags |= MDOC_HALT;
730 return(0);
731 }