]> git.cameronkatri.com Git - mandoc.git/blob - mdoc.c
POSIX -> POSIX.1 change (submitted by uqs@sporlein.net, ok jmc@openbsd.org, ingo...
[mandoc.git] / mdoc.c
1 /* $Id: mdoc.c,v 1.95 2009/07/20 14:09:38 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #include <assert.h>
18 #include <ctype.h>
19 #include <stdarg.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23
24 #include "libmdoc.h"
25
26 const char *const __mdoc_merrnames[MERRMAX] = {
27 "trailing whitespace", /* ETAILWS */
28 "unexpected quoted parameter", /* EQUOTPARM */
29 "unterminated quoted parameter", /* EQUOTTERM */
30 "system: malloc error", /* EMALLOC */
31 "argument parameter suggested", /* EARGVAL */
32 "macro not callable", /* ENOCALL */
33 "macro disallowed in prologue", /* EBODYPROL */
34 "macro disallowed in body", /* EPROLBODY */
35 "text disallowed in prologue", /* ETEXTPROL */
36 "blank line disallowed", /* ENOBLANK */
37 "text parameter too long", /* ETOOLONG */
38 "invalid escape sequence", /* EESCAPE */
39 "invalid character", /* EPRINT */
40 "document has no body", /* ENODAT */
41 "document has no prologue", /* ENOPROLOGUE */
42 "expected line arguments", /* ELINE */
43 "invalid AT&T argument", /* EATT */
44 "default name not yet set", /* ENAME */
45 "missing list type", /* ELISTTYPE */
46 "missing display type", /* EDISPTYPE */
47 "too many display types", /* EMULTIDISP */
48 "too many list types", /* EMULTILIST */
49 "NAME section must be first", /* ESECNAME */
50 "badly-formed NAME section", /* ENAMESECINC */
51 "argument repeated", /* EARGREP */
52 "expected boolean parameter", /* EBOOL */
53 "inconsistent column syntax", /* ECOLMIS */
54 "nested display invalid", /* ENESTDISP */
55 "width argument missing", /* EMISSWIDTH */
56 "invalid section for this manual section", /* EWRONGMSEC */
57 "section out of conventional order", /* ESECOOO */
58 "section repeated", /* ESECREP */
59 "invalid standard argument", /* EBADSTAND */
60 "multi-line arguments discouraged", /* ENOMULTILINE */
61 "multi-line arguments suggested", /* EMULTILINE */
62 "line arguments discouraged", /* ENOLINE */
63 "prologue macro out of conventional order", /* EPROLOOO */
64 "prologue macro repeated", /* EPROLREP */
65 "invalid manual section", /* EBADMSEC */
66 "invalid section", /* EBADSEC */
67 "invalid font mode", /* EFONT */
68 "invalid date syntax", /* EBADDATE */
69 "invalid number format", /* ENUMFMT */
70 "superfluous width argument", /* ENOWIDTH */
71 "system: utsname error", /* EUTSNAME */
72 "obsolete macro", /* EOBS */
73 "macro-like parameter", /* EMACPARM */
74 "end-of-line scope violation", /* EIMPBRK */
75 "empty macro ignored", /* EIGNE */
76 "unclosed explicit scope", /* EOPEN */
77 "unterminated quoted phrase", /* EQUOTPHR */
78 "closure macro without prior context", /* ENOCTX */
79 "invalid whitespace after control character", /* ESPACE */
80 "no description found for library" /* ELIB */
81 };
82
83 const char *const __mdoc_macronames[MDOC_MAX] = {
84 "Ap", "Dd", "Dt", "Os",
85 "Sh", "Ss", "Pp", "D1",
86 "Dl", "Bd", "Ed", "Bl",
87 "El", "It", "Ad", "An",
88 "Ar", "Cd", "Cm", "Dv",
89 "Er", "Ev", "Ex", "Fa",
90 "Fd", "Fl", "Fn", "Ft",
91 "Ic", "In", "Li", "Nd",
92 "Nm", "Op", "Ot", "Pa",
93 "Rv", "St", "Va", "Vt",
94 /* LINTED */
95 "Xr", "\%A", "\%B", "\%D",
96 /* LINTED */
97 "\%I", "\%J", "\%N", "\%O",
98 /* LINTED */
99 "\%P", "\%R", "\%T", "\%V",
100 "Ac", "Ao", "Aq", "At",
101 "Bc", "Bf", "Bo", "Bq",
102 "Bsx", "Bx", "Db", "Dc",
103 "Do", "Dq", "Ec", "Ef",
104 "Em", "Eo", "Fx", "Ms",
105 "No", "Ns", "Nx", "Ox",
106 "Pc", "Pf", "Po", "Pq",
107 "Qc", "Ql", "Qo", "Qq",
108 "Re", "Rs", "Sc", "So",
109 "Sq", "Sm", "Sx", "Sy",
110 "Tn", "Ux", "Xc", "Xo",
111 "Fo", "Fc", "Oo", "Oc",
112 "Bk", "Ek", "Bt", "Hf",
113 "Fr", "Ud", "Lb", "Lp",
114 "Lk", "Mt", "Brq", "Bro",
115 /* LINTED */
116 "Brc", "\%C", "Es", "En",
117 /* LINTED */
118 "Dx", "\%Q", "br", "sp"
119 };
120
121 const char *const __mdoc_argnames[MDOC_ARG_MAX] = {
122 "split", "nosplit", "ragged",
123 "unfilled", "literal", "file",
124 "offset", "bullet", "dash",
125 "hyphen", "item", "enum",
126 "tag", "diag", "hang",
127 "ohang", "inset", "column",
128 "width", "compact", "std",
129 "filled", "words", "emphasis",
130 "symbolic", "nested"
131 };
132
133 const char * const *mdoc_macronames = __mdoc_macronames;
134 const char * const *mdoc_argnames = __mdoc_argnames;
135
136 static void mdoc_free1(struct mdoc *);
137 static int mdoc_alloc1(struct mdoc *);
138 static struct mdoc_node *node_alloc(struct mdoc *, int, int,
139 int, enum mdoc_type);
140 static int node_append(struct mdoc *,
141 struct mdoc_node *);
142 static int parsetext(struct mdoc *, int, char *);
143 static int parsemacro(struct mdoc *, int, char *);
144 static int macrowarn(struct mdoc *, int, const char *);
145 static int pstring(struct mdoc *, int, int,
146 const char *, size_t);
147
148 #ifdef __linux__
149 extern size_t strlcpy(char *, const char *, size_t);
150 #endif
151
152
153 const struct mdoc_node *
154 mdoc_node(const struct mdoc *m)
155 {
156
157 return(MDOC_HALT & m->flags ? NULL : m->first);
158 }
159
160
161 const struct mdoc_meta *
162 mdoc_meta(const struct mdoc *m)
163 {
164
165 return(MDOC_HALT & m->flags ? NULL : &m->meta);
166 }
167
168
169 /*
170 * Frees volatile resources (parse tree, meta-data, fields).
171 */
172 static void
173 mdoc_free1(struct mdoc *mdoc)
174 {
175
176 if (mdoc->first)
177 mdoc_node_freelist(mdoc->first);
178 if (mdoc->meta.title)
179 free(mdoc->meta.title);
180 if (mdoc->meta.os)
181 free(mdoc->meta.os);
182 if (mdoc->meta.name)
183 free(mdoc->meta.name);
184 if (mdoc->meta.arch)
185 free(mdoc->meta.arch);
186 if (mdoc->meta.vol)
187 free(mdoc->meta.vol);
188 }
189
190
191 /*
192 * Allocate all volatile resources (parse tree, meta-data, fields).
193 */
194 static int
195 mdoc_alloc1(struct mdoc *mdoc)
196 {
197
198 bzero(&mdoc->meta, sizeof(struct mdoc_meta));
199 mdoc->flags = 0;
200 mdoc->lastnamed = mdoc->lastsec = SEC_NONE;
201 mdoc->last = calloc(1, sizeof(struct mdoc_node));
202 if (NULL == mdoc->last)
203 return(0);
204
205 mdoc->first = mdoc->last;
206 mdoc->last->type = MDOC_ROOT;
207 mdoc->next = MDOC_NEXT_CHILD;
208 return(1);
209 }
210
211
212 /*
213 * Free up volatile resources (see mdoc_free1()) then re-initialises the
214 * data with mdoc_alloc1(). After invocation, parse data has been reset
215 * and the parser is ready for re-invocation on a new tree; however,
216 * cross-parse non-volatile data is kept intact.
217 */
218 int
219 mdoc_reset(struct mdoc *mdoc)
220 {
221
222 mdoc_free1(mdoc);
223 return(mdoc_alloc1(mdoc));
224 }
225
226
227 /*
228 * Completely free up all volatile and non-volatile parse resources.
229 * After invocation, the pointer is no longer usable.
230 */
231 void
232 mdoc_free(struct mdoc *mdoc)
233 {
234
235 mdoc_free1(mdoc);
236 if (mdoc->htab)
237 mdoc_hash_free(mdoc->htab);
238 free(mdoc);
239 }
240
241
242 /*
243 * Allocate volatile and non-volatile parse resources.
244 */
245 struct mdoc *
246 mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb)
247 {
248 struct mdoc *p;
249
250 if (NULL == (p = calloc(1, sizeof(struct mdoc))))
251 return(NULL);
252 if (cb)
253 (void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb));
254
255 p->data = data;
256 p->pflags = pflags;
257
258 if (NULL == (p->htab = mdoc_hash_alloc())) {
259 free(p);
260 return(NULL);
261 } else if (mdoc_alloc1(p))
262 return(p);
263
264 free(p);
265 return(NULL);
266 }
267
268
269 /*
270 * Climb back up the parse tree, validating open scopes. Mostly calls
271 * through to macro_end() in macro.c.
272 */
273 int
274 mdoc_endparse(struct mdoc *m)
275 {
276
277 if (MDOC_HALT & m->flags)
278 return(0);
279 else if (mdoc_macroend(m))
280 return(1);
281 m->flags |= MDOC_HALT;
282 return(0);
283 }
284
285
286 /*
287 * Main parse routine. Parses a single line -- really just hands off to
288 * the macro (parsemacro()) or text parser (parsetext()).
289 */
290 int
291 mdoc_parseln(struct mdoc *m, int ln, char *buf)
292 {
293
294 if (MDOC_HALT & m->flags)
295 return(0);
296
297 return('.' == *buf ? parsemacro(m, ln, buf) :
298 parsetext(m, ln, buf));
299 }
300
301
302 int
303 mdoc_verr(struct mdoc *mdoc, int ln, int pos,
304 const char *fmt, ...)
305 {
306 char buf[256];
307 va_list ap;
308
309 if (NULL == mdoc->cb.mdoc_err)
310 return(0);
311
312 va_start(ap, fmt);
313 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
314 va_end(ap);
315
316 return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf));
317 }
318
319
320 int
321 mdoc_vwarn(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...)
322 {
323 char buf[256];
324 va_list ap;
325
326 if (NULL == mdoc->cb.mdoc_warn)
327 return(0);
328
329 va_start(ap, fmt);
330 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
331 va_end(ap);
332
333 return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, buf));
334 }
335
336
337 int
338 mdoc_err(struct mdoc *m, int line, int pos, int iserr, enum merr type)
339 {
340 const char *p;
341
342 p = __mdoc_merrnames[(int)type];
343 assert(p);
344
345 if (iserr)
346 return(mdoc_verr(m, line, pos, p));
347
348 return(mdoc_vwarn(m, line, pos, p));
349 }
350
351
352 int
353 mdoc_macro(struct mdoc *m, int tok,
354 int ln, int pp, int *pos, char *buf)
355 {
356
357 if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
358 MDOC_PBODY & m->flags)
359 return(mdoc_perr(m, ln, pp, EPROLBODY));
360 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
361 ! (MDOC_PBODY & m->flags))
362 return(mdoc_perr(m, ln, pp, EBODYPROL));
363
364 if (1 != pp && ! (MDOC_CALLABLE & mdoc_macros[tok].flags))
365 return(mdoc_perr(m, ln, pp, ENOCALL));
366
367 return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf));
368 }
369
370
371 static int
372 node_append(struct mdoc *mdoc, struct mdoc_node *p)
373 {
374
375 assert(mdoc->last);
376 assert(mdoc->first);
377 assert(MDOC_ROOT != p->type);
378
379 switch (mdoc->next) {
380 case (MDOC_NEXT_SIBLING):
381 mdoc->last->next = p;
382 p->prev = mdoc->last;
383 p->parent = mdoc->last->parent;
384 break;
385 case (MDOC_NEXT_CHILD):
386 mdoc->last->child = p;
387 p->parent = mdoc->last;
388 break;
389 default:
390 abort();
391 /* NOTREACHED */
392 }
393
394 p->parent->nchild++;
395
396 if ( ! mdoc_valid_pre(mdoc, p))
397 return(0);
398 if ( ! mdoc_action_pre(mdoc, p))
399 return(0);
400
401 switch (p->type) {
402 case (MDOC_HEAD):
403 assert(MDOC_BLOCK == p->parent->type);
404 p->parent->head = p;
405 break;
406 case (MDOC_TAIL):
407 assert(MDOC_BLOCK == p->parent->type);
408 p->parent->tail = p;
409 break;
410 case (MDOC_BODY):
411 assert(MDOC_BLOCK == p->parent->type);
412 p->parent->body = p;
413 break;
414 default:
415 break;
416 }
417
418 mdoc->last = p;
419
420 switch (p->type) {
421 case (MDOC_TEXT):
422 if ( ! mdoc_valid_post(mdoc))
423 return(0);
424 if ( ! mdoc_action_post(mdoc))
425 return(0);
426 break;
427 default:
428 break;
429 }
430
431 return(1);
432 }
433
434
435 static struct mdoc_node *
436 node_alloc(struct mdoc *m, int line,
437 int pos, int tok, enum mdoc_type type)
438 {
439 struct mdoc_node *p;
440
441 if (NULL == (p = calloc(1, sizeof(struct mdoc_node)))) {
442 (void)mdoc_nerr(m, m->last, EMALLOC);
443 return(NULL);
444 }
445
446 p->sec = m->lastsec;
447 p->line = line;
448 p->pos = pos;
449 p->tok = tok;
450 if (MDOC_TEXT != (p->type = type))
451 assert(p->tok >= 0);
452
453 return(p);
454 }
455
456
457 int
458 mdoc_tail_alloc(struct mdoc *m, int line, int pos, int tok)
459 {
460 struct mdoc_node *p;
461
462 p = node_alloc(m, line, pos, tok, MDOC_TAIL);
463 if (NULL == p)
464 return(0);
465 return(node_append(m, p));
466 }
467
468
469 int
470 mdoc_head_alloc(struct mdoc *m, int line, int pos, int tok)
471 {
472 struct mdoc_node *p;
473
474 assert(m->first);
475 assert(m->last);
476
477 p = node_alloc(m, line, pos, tok, MDOC_HEAD);
478 if (NULL == p)
479 return(0);
480 return(node_append(m, p));
481 }
482
483
484 int
485 mdoc_body_alloc(struct mdoc *m, int line, int pos, int tok)
486 {
487 struct mdoc_node *p;
488
489 p = node_alloc(m, line, pos, tok, MDOC_BODY);
490 if (NULL == p)
491 return(0);
492 return(node_append(m, p));
493 }
494
495
496 int
497 mdoc_block_alloc(struct mdoc *m, int line, int pos,
498 int tok, struct mdoc_arg *args)
499 {
500 struct mdoc_node *p;
501
502 p = node_alloc(m, line, pos, tok, MDOC_BLOCK);
503 if (NULL == p)
504 return(0);
505 p->args = args;
506 if (p->args)
507 (args->refcnt)++;
508 return(node_append(m, p));
509 }
510
511
512 int
513 mdoc_elem_alloc(struct mdoc *m, int line, int pos,
514 int tok, struct mdoc_arg *args)
515 {
516 struct mdoc_node *p;
517
518 p = node_alloc(m, line, pos, tok, MDOC_ELEM);
519 if (NULL == p)
520 return(0);
521 p->args = args;
522 if (p->args)
523 (args->refcnt)++;
524 return(node_append(m, p));
525 }
526
527
528 static int
529 pstring(struct mdoc *m, int line, int pos, const char *p, size_t len)
530 {
531 struct mdoc_node *n;
532 size_t sv;
533
534 n = node_alloc(m, line, pos, -1, MDOC_TEXT);
535 if (NULL == n)
536 return(mdoc_nerr(m, m->last, EMALLOC));
537
538 n->string = malloc(len + 1);
539 if (NULL == n->string) {
540 free(n);
541 return(mdoc_nerr(m, m->last, EMALLOC));
542 }
543
544 sv = strlcpy(n->string, p, len + 1);
545
546 /* Prohibit truncation. */
547 assert(sv < len + 1);
548
549 return(node_append(m, n));
550 }
551
552
553 int
554 mdoc_word_alloc(struct mdoc *m, int line, int pos, const char *p)
555 {
556
557 return(pstring(m, line, pos, p, strlen(p)));
558 }
559
560
561 void
562 mdoc_node_free(struct mdoc_node *p)
563 {
564
565 if (p->parent)
566 p->parent->nchild--;
567 if (p->string)
568 free(p->string);
569 if (p->args)
570 mdoc_argv_free(p->args);
571 free(p);
572 }
573
574
575 void
576 mdoc_node_freelist(struct mdoc_node *p)
577 {
578
579 if (p->child)
580 mdoc_node_freelist(p->child);
581 if (p->next)
582 mdoc_node_freelist(p->next);
583
584 assert(0 == p->nchild);
585 mdoc_node_free(p);
586 }
587
588
589 /*
590 * Parse free-form text, that is, a line that does not begin with the
591 * control character.
592 */
593 static int
594 parsetext(struct mdoc *m, int line, char *buf)
595 {
596 int i, j;
597
598 if (SEC_NONE == m->lastnamed)
599 return(mdoc_perr(m, line, 0, ETEXTPROL));
600
601 /*
602 * If in literal mode, then pass the buffer directly to the
603 * back-end, as it should be preserved as a single term.
604 */
605
606 if (MDOC_LITERAL & m->flags) {
607 if ( ! mdoc_word_alloc(m, line, 0, buf))
608 return(0);
609 m->next = MDOC_NEXT_SIBLING;
610 return(1);
611 }
612
613 /* Disallow blank/white-space lines in non-literal mode. */
614
615 for (i = 0; ' ' == buf[i]; i++)
616 /* Skip leading whitespace. */ ;
617 if (0 == buf[i])
618 return(mdoc_perr(m, line, 0, ENOBLANK));
619
620 /*
621 * Break apart a free-form line into tokens. Spaces are
622 * stripped out of the input.
623 */
624
625 for (j = i; buf[i]; i++) {
626 if (' ' != buf[i])
627 continue;
628
629 /* Escaped whitespace. */
630 if (i && ' ' == buf[i] && '\\' == buf[i - 1])
631 continue;
632
633 buf[i++] = 0;
634 if ( ! pstring(m, line, j, &buf[j], (size_t)(i - j)))
635 return(0);
636 m->next = MDOC_NEXT_SIBLING;
637
638 for ( ; ' ' == buf[i]; i++)
639 /* Skip trailing whitespace. */ ;
640
641 j = i;
642 if (0 == buf[i])
643 break;
644 }
645
646 if (j != i && ! pstring(m, line, j, &buf[j], (size_t)(i - j)))
647 return(0);
648
649 m->next = MDOC_NEXT_SIBLING;
650 return(1);
651 }
652
653
654
655
656 static int
657 macrowarn(struct mdoc *m, int ln, const char *buf)
658 {
659 if ( ! (MDOC_IGN_MACRO & m->pflags))
660 return(mdoc_verr(m, ln, 1,
661 "unknown macro: %s%s",
662 buf, strlen(buf) > 3 ? "..." : ""));
663 return(mdoc_vwarn(m, ln, 1, "unknown macro: %s%s",
664 buf, strlen(buf) > 3 ? "..." : ""));
665 }
666
667
668 /*
669 * Parse a macro line, that is, a line beginning with the control
670 * character.
671 */
672 int
673 parsemacro(struct mdoc *m, int ln, char *buf)
674 {
675 int i, c;
676 char mac[5];
677
678 /* Empty lines are ignored. */
679
680 if (0 == buf[1])
681 return(1);
682
683 if (' ' == buf[1]) {
684 i = 2;
685 while (buf[i] && ' ' == buf[i])
686 i++;
687 if (0 == buf[i])
688 return(1);
689 return(mdoc_perr(m, ln, 1, ESPACE));
690 }
691
692 /* Copy the first word into a nil-terminated buffer. */
693
694 for (i = 1; i < 5; i++) {
695 if (0 == (mac[i - 1] = buf[i]))
696 break;
697 else if (' ' == buf[i])
698 break;
699 }
700
701 mac[i - 1] = 0;
702
703 if (i == 5 || i <= 2) {
704 if ( ! macrowarn(m, ln, mac))
705 goto err;
706 return(1);
707 }
708
709 if (MDOC_MAX == (c = mdoc_hash_find(m->htab, mac))) {
710 if ( ! macrowarn(m, ln, mac))
711 goto err;
712 return(1);
713 }
714
715 /* The macro is sane. Jump to the next word. */
716
717 while (buf[i] && ' ' == buf[i])
718 i++;
719
720 /* Begin recursive parse sequence. */
721
722 if ( ! mdoc_macro(m, c, ln, 1, &i, buf))
723 goto err;
724
725 return(1);
726
727 err: /* Error out. */
728
729 m->flags |= MDOC_HALT;
730 return(0);
731 }