]> git.cameronkatri.com Git - mandoc.git/blob - mdoc.c
Fix to libmdoc passing over delimiters.
[mandoc.git] / mdoc.c
1 /* $Id: mdoc.c,v 1.103 2009/08/20 11:44:47 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #include <assert.h>
18 #include <ctype.h>
19 #include <stdarg.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23
24 #include "libmdoc.h"
25
26 const char *const __mdoc_merrnames[MERRMAX] = {
27 "trailing whitespace", /* ETAILWS */
28 "unexpected quoted parameter", /* EQUOTPARM */
29 "unterminated quoted parameter", /* EQUOTTERM */
30 "system: malloc error", /* EMALLOC */
31 "argument parameter suggested", /* EARGVAL */
32 "macro disallowed in prologue", /* EBODYPROL */
33 "macro disallowed in body", /* EPROLBODY */
34 "text disallowed in prologue", /* ETEXTPROL */
35 "blank line disallowed", /* ENOBLANK */
36 "text parameter too long", /* ETOOLONG */
37 "invalid escape sequence", /* EESCAPE */
38 "invalid character", /* EPRINT */
39 "document has no body", /* ENODAT */
40 "document has no prologue", /* ENOPROLOGUE */
41 "expected line arguments", /* ELINE */
42 "invalid AT&T argument", /* EATT */
43 "default name not yet set", /* ENAME */
44 "missing list type", /* ELISTTYPE */
45 "missing display type", /* EDISPTYPE */
46 "too many display types", /* EMULTIDISP */
47 "too many list types", /* EMULTILIST */
48 "NAME section must be first", /* ESECNAME */
49 "badly-formed NAME section", /* ENAMESECINC */
50 "argument repeated", /* EARGREP */
51 "expected boolean parameter", /* EBOOL */
52 "inconsistent column syntax", /* ECOLMIS */
53 "nested display invalid", /* ENESTDISP */
54 "width argument missing", /* EMISSWIDTH */
55 "invalid section for this manual section", /* EWRONGMSEC */
56 "section out of conventional order", /* ESECOOO */
57 "section repeated", /* ESECREP */
58 "invalid standard argument", /* EBADSTAND */
59 "multi-line arguments discouraged", /* ENOMULTILINE */
60 "multi-line arguments suggested", /* EMULTILINE */
61 "line arguments discouraged", /* ENOLINE */
62 "prologue macro out of conventional order", /* EPROLOOO */
63 "prologue macro repeated", /* EPROLREP */
64 "invalid manual section", /* EBADMSEC */
65 "invalid section", /* EBADSEC */
66 "invalid font mode", /* EFONT */
67 "invalid date syntax", /* EBADDATE */
68 "invalid number format", /* ENUMFMT */
69 "superfluous width argument", /* ENOWIDTH */
70 "system: utsname error", /* EUTSNAME */
71 "obsolete macro", /* EOBS */
72 "end-of-line scope violation", /* EIMPBRK */
73 "empty macro ignored", /* EIGNE */
74 "unclosed explicit scope", /* EOPEN */
75 "unterminated quoted phrase", /* EQUOTPHR */
76 "closure macro without prior context", /* ENOCTX */
77 "no description found for library" /* ELIB */
78 };
79
80 const char *const __mdoc_macronames[MDOC_MAX] = {
81 "Ap", "Dd", "Dt", "Os",
82 "Sh", "Ss", "Pp", "D1",
83 "Dl", "Bd", "Ed", "Bl",
84 "El", "It", "Ad", "An",
85 "Ar", "Cd", "Cm", "Dv",
86 "Er", "Ev", "Ex", "Fa",
87 "Fd", "Fl", "Fn", "Ft",
88 "Ic", "In", "Li", "Nd",
89 "Nm", "Op", "Ot", "Pa",
90 "Rv", "St", "Va", "Vt",
91 /* LINTED */
92 "Xr", "\%A", "\%B", "\%D",
93 /* LINTED */
94 "\%I", "\%J", "\%N", "\%O",
95 /* LINTED */
96 "\%P", "\%R", "\%T", "\%V",
97 "Ac", "Ao", "Aq", "At",
98 "Bc", "Bf", "Bo", "Bq",
99 "Bsx", "Bx", "Db", "Dc",
100 "Do", "Dq", "Ec", "Ef",
101 "Em", "Eo", "Fx", "Ms",
102 "No", "Ns", "Nx", "Ox",
103 "Pc", "Pf", "Po", "Pq",
104 "Qc", "Ql", "Qo", "Qq",
105 "Re", "Rs", "Sc", "So",
106 "Sq", "Sm", "Sx", "Sy",
107 "Tn", "Ux", "Xc", "Xo",
108 "Fo", "Fc", "Oo", "Oc",
109 "Bk", "Ek", "Bt", "Hf",
110 "Fr", "Ud", "Lb", "Lp",
111 "Lk", "Mt", "Brq", "Bro",
112 /* LINTED */
113 "Brc", "\%C", "Es", "En",
114 /* LINTED */
115 "Dx", "\%Q", "br", "sp"
116 };
117
118 const char *const __mdoc_argnames[MDOC_ARG_MAX] = {
119 "split", "nosplit", "ragged",
120 "unfilled", "literal", "file",
121 "offset", "bullet", "dash",
122 "hyphen", "item", "enum",
123 "tag", "diag", "hang",
124 "ohang", "inset", "column",
125 "width", "compact", "std",
126 "filled", "words", "emphasis",
127 "symbolic", "nested"
128 };
129
130 const char * const *mdoc_macronames = __mdoc_macronames;
131 const char * const *mdoc_argnames = __mdoc_argnames;
132
133 static void mdoc_free1(struct mdoc *);
134 static int mdoc_alloc1(struct mdoc *);
135 static struct mdoc_node *node_alloc(struct mdoc *, int, int,
136 int, enum mdoc_type);
137 static int node_append(struct mdoc *,
138 struct mdoc_node *);
139 static int parsetext(struct mdoc *, int, char *);
140 static int parsemacro(struct mdoc *, int, char *);
141 static int macrowarn(struct mdoc *, int, const char *);
142 static int pstring(struct mdoc *, int, int,
143 const char *, size_t);
144
145 #ifdef __linux__
146 extern size_t strlcpy(char *, const char *, size_t);
147 #endif
148
149
150 const struct mdoc_node *
151 mdoc_node(const struct mdoc *m)
152 {
153
154 return(MDOC_HALT & m->flags ? NULL : m->first);
155 }
156
157
158 const struct mdoc_meta *
159 mdoc_meta(const struct mdoc *m)
160 {
161
162 return(MDOC_HALT & m->flags ? NULL : &m->meta);
163 }
164
165
166 /*
167 * Frees volatile resources (parse tree, meta-data, fields).
168 */
169 static void
170 mdoc_free1(struct mdoc *mdoc)
171 {
172
173 if (mdoc->first)
174 mdoc_node_freelist(mdoc->first);
175 if (mdoc->meta.title)
176 free(mdoc->meta.title);
177 if (mdoc->meta.os)
178 free(mdoc->meta.os);
179 if (mdoc->meta.name)
180 free(mdoc->meta.name);
181 if (mdoc->meta.arch)
182 free(mdoc->meta.arch);
183 if (mdoc->meta.vol)
184 free(mdoc->meta.vol);
185 }
186
187
188 /*
189 * Allocate all volatile resources (parse tree, meta-data, fields).
190 */
191 static int
192 mdoc_alloc1(struct mdoc *mdoc)
193 {
194
195 bzero(&mdoc->meta, sizeof(struct mdoc_meta));
196 mdoc->flags = 0;
197 mdoc->lastnamed = mdoc->lastsec = SEC_NONE;
198 mdoc->last = calloc(1, sizeof(struct mdoc_node));
199 if (NULL == mdoc->last)
200 return(0);
201
202 mdoc->first = mdoc->last;
203 mdoc->last->type = MDOC_ROOT;
204 mdoc->next = MDOC_NEXT_CHILD;
205 return(1);
206 }
207
208
209 /*
210 * Free up volatile resources (see mdoc_free1()) then re-initialises the
211 * data with mdoc_alloc1(). After invocation, parse data has been reset
212 * and the parser is ready for re-invocation on a new tree; however,
213 * cross-parse non-volatile data is kept intact.
214 */
215 int
216 mdoc_reset(struct mdoc *mdoc)
217 {
218
219 mdoc_free1(mdoc);
220 return(mdoc_alloc1(mdoc));
221 }
222
223
224 /*
225 * Completely free up all volatile and non-volatile parse resources.
226 * After invocation, the pointer is no longer usable.
227 */
228 void
229 mdoc_free(struct mdoc *mdoc)
230 {
231
232 mdoc_free1(mdoc);
233 if (mdoc->htab)
234 mdoc_hash_free(mdoc->htab);
235 free(mdoc);
236 }
237
238
239 /*
240 * Allocate volatile and non-volatile parse resources.
241 */
242 struct mdoc *
243 mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb)
244 {
245 struct mdoc *p;
246
247 if (NULL == (p = calloc(1, sizeof(struct mdoc))))
248 return(NULL);
249 if (cb)
250 (void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb));
251
252 p->data = data;
253 p->pflags = pflags;
254
255 if (NULL == (p->htab = mdoc_hash_alloc())) {
256 free(p);
257 return(NULL);
258 } else if (mdoc_alloc1(p))
259 return(p);
260
261 free(p);
262 return(NULL);
263 }
264
265
266 /*
267 * Climb back up the parse tree, validating open scopes. Mostly calls
268 * through to macro_end() in macro.c.
269 */
270 int
271 mdoc_endparse(struct mdoc *m)
272 {
273
274 if (MDOC_HALT & m->flags)
275 return(0);
276 else if (mdoc_macroend(m))
277 return(1);
278 m->flags |= MDOC_HALT;
279 return(0);
280 }
281
282
283 /*
284 * Main parse routine. Parses a single line -- really just hands off to
285 * the macro (parsemacro()) or text parser (parsetext()).
286 */
287 int
288 mdoc_parseln(struct mdoc *m, int ln, char *buf)
289 {
290
291 if (MDOC_HALT & m->flags)
292 return(0);
293
294 return('.' == *buf ? parsemacro(m, ln, buf) :
295 parsetext(m, ln, buf));
296 }
297
298
299 int
300 mdoc_verr(struct mdoc *mdoc, int ln, int pos,
301 const char *fmt, ...)
302 {
303 char buf[256];
304 va_list ap;
305
306 if (NULL == mdoc->cb.mdoc_err)
307 return(0);
308
309 va_start(ap, fmt);
310 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
311 va_end(ap);
312
313 return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf));
314 }
315
316
317 int
318 mdoc_vwarn(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...)
319 {
320 char buf[256];
321 va_list ap;
322
323 if (NULL == mdoc->cb.mdoc_warn)
324 return(0);
325
326 va_start(ap, fmt);
327 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
328 va_end(ap);
329
330 return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, buf));
331 }
332
333
334 int
335 mdoc_err(struct mdoc *m, int line, int pos, int iserr, enum merr type)
336 {
337 const char *p;
338
339 p = __mdoc_merrnames[(int)type];
340 assert(p);
341
342 if (iserr)
343 return(mdoc_verr(m, line, pos, p));
344
345 return(mdoc_vwarn(m, line, pos, p));
346 }
347
348
349 int
350 mdoc_macro(struct mdoc *m, int tok,
351 int ln, int pp, int *pos, char *buf)
352 {
353 /*
354 * If we're in the prologue, deny "body" macros. Similarly, if
355 * we're in the body, deny prologue calls.
356 */
357 if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
358 MDOC_PBODY & m->flags)
359 return(mdoc_perr(m, ln, pp, EPROLBODY));
360 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
361 ! (MDOC_PBODY & m->flags))
362 return(mdoc_perr(m, ln, pp, EBODYPROL));
363
364 return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf));
365 }
366
367
368 static int
369 node_append(struct mdoc *mdoc, struct mdoc_node *p)
370 {
371
372 assert(mdoc->last);
373 assert(mdoc->first);
374 assert(MDOC_ROOT != p->type);
375
376 switch (mdoc->next) {
377 case (MDOC_NEXT_SIBLING):
378 mdoc->last->next = p;
379 p->prev = mdoc->last;
380 p->parent = mdoc->last->parent;
381 break;
382 case (MDOC_NEXT_CHILD):
383 mdoc->last->child = p;
384 p->parent = mdoc->last;
385 break;
386 default:
387 abort();
388 /* NOTREACHED */
389 }
390
391 p->parent->nchild++;
392
393 if ( ! mdoc_valid_pre(mdoc, p))
394 return(0);
395 if ( ! mdoc_action_pre(mdoc, p))
396 return(0);
397
398 switch (p->type) {
399 case (MDOC_HEAD):
400 assert(MDOC_BLOCK == p->parent->type);
401 p->parent->head = p;
402 break;
403 case (MDOC_TAIL):
404 assert(MDOC_BLOCK == p->parent->type);
405 p->parent->tail = p;
406 break;
407 case (MDOC_BODY):
408 assert(MDOC_BLOCK == p->parent->type);
409 p->parent->body = p;
410 break;
411 default:
412 break;
413 }
414
415 mdoc->last = p;
416
417 switch (p->type) {
418 case (MDOC_TEXT):
419 if ( ! mdoc_valid_post(mdoc))
420 return(0);
421 if ( ! mdoc_action_post(mdoc))
422 return(0);
423 break;
424 default:
425 break;
426 }
427
428 return(1);
429 }
430
431
432 static struct mdoc_node *
433 node_alloc(struct mdoc *m, int line,
434 int pos, int tok, enum mdoc_type type)
435 {
436 struct mdoc_node *p;
437
438 if (NULL == (p = calloc(1, sizeof(struct mdoc_node)))) {
439 (void)mdoc_nerr(m, m->last, EMALLOC);
440 return(NULL);
441 }
442
443 p->sec = m->lastsec;
444 p->line = line;
445 p->pos = pos;
446 p->tok = tok;
447 if (MDOC_TEXT != (p->type = type))
448 assert(p->tok >= 0);
449
450 return(p);
451 }
452
453
454 int
455 mdoc_tail_alloc(struct mdoc *m, int line, int pos, int tok)
456 {
457 struct mdoc_node *p;
458
459 p = node_alloc(m, line, pos, tok, MDOC_TAIL);
460 if (NULL == p)
461 return(0);
462 if ( ! node_append(m, p))
463 return(0);
464 m->next = MDOC_NEXT_CHILD;
465 return(1);
466 }
467
468
469 int
470 mdoc_head_alloc(struct mdoc *m, int line, int pos, int tok)
471 {
472 struct mdoc_node *p;
473
474 assert(m->first);
475 assert(m->last);
476
477 p = node_alloc(m, line, pos, tok, MDOC_HEAD);
478 if (NULL == p)
479 return(0);
480 if ( ! node_append(m, p))
481 return(0);
482 m->next = MDOC_NEXT_CHILD;
483 return(1);
484 }
485
486
487 int
488 mdoc_body_alloc(struct mdoc *m, int line, int pos, int tok)
489 {
490 struct mdoc_node *p;
491
492 p = node_alloc(m, line, pos, tok, MDOC_BODY);
493 if (NULL == p)
494 return(0);
495 if ( ! node_append(m, p))
496 return(0);
497 m->next = MDOC_NEXT_CHILD;
498 return(1);
499 }
500
501
502 int
503 mdoc_block_alloc(struct mdoc *m, int line, int pos,
504 int tok, struct mdoc_arg *args)
505 {
506 struct mdoc_node *p;
507
508 p = node_alloc(m, line, pos, tok, MDOC_BLOCK);
509 if (NULL == p)
510 return(0);
511 p->args = args;
512 if (p->args)
513 (args->refcnt)++;
514 if ( ! node_append(m, p))
515 return(0);
516 m->next = MDOC_NEXT_CHILD;
517 return(1);
518 }
519
520
521 int
522 mdoc_elem_alloc(struct mdoc *m, int line, int pos,
523 int tok, struct mdoc_arg *args)
524 {
525 struct mdoc_node *p;
526
527 p = node_alloc(m, line, pos, tok, MDOC_ELEM);
528 if (NULL == p)
529 return(0);
530 p->args = args;
531 if (p->args)
532 (args->refcnt)++;
533 if ( ! node_append(m, p))
534 return(0);
535 m->next = MDOC_NEXT_CHILD;
536 return(1);
537 }
538
539
540 static int
541 pstring(struct mdoc *m, int line, int pos, const char *p, size_t len)
542 {
543 struct mdoc_node *n;
544 size_t sv;
545
546 n = node_alloc(m, line, pos, -1, MDOC_TEXT);
547 if (NULL == n)
548 return(mdoc_nerr(m, m->last, EMALLOC));
549
550 n->string = malloc(len + 1);
551 if (NULL == n->string) {
552 free(n);
553 return(mdoc_nerr(m, m->last, EMALLOC));
554 }
555
556 sv = strlcpy(n->string, p, len + 1);
557
558 /* Prohibit truncation. */
559 assert(sv < len + 1);
560
561 if ( ! node_append(m, n))
562 return(0);
563 m->next = MDOC_NEXT_SIBLING;
564 return(1);
565 }
566
567
568 int
569 mdoc_word_alloc(struct mdoc *m, int line, int pos, const char *p)
570 {
571
572 return(pstring(m, line, pos, p, strlen(p)));
573 }
574
575
576 void
577 mdoc_node_free(struct mdoc_node *p)
578 {
579
580 if (p->parent)
581 p->parent->nchild--;
582 if (p->string)
583 free(p->string);
584 if (p->args)
585 mdoc_argv_free(p->args);
586 free(p);
587 }
588
589
590 void
591 mdoc_node_freelist(struct mdoc_node *p)
592 {
593
594 if (p->child)
595 mdoc_node_freelist(p->child);
596 if (p->next)
597 mdoc_node_freelist(p->next);
598
599 assert(0 == p->nchild);
600 mdoc_node_free(p);
601 }
602
603
604 /*
605 * Parse free-form text, that is, a line that does not begin with the
606 * control character.
607 */
608 static int
609 parsetext(struct mdoc *m, int line, char *buf)
610 {
611 int i, j;
612
613 if (SEC_NONE == m->lastnamed)
614 return(mdoc_perr(m, line, 0, ETEXTPROL));
615
616 /*
617 * If in literal mode, then pass the buffer directly to the
618 * back-end, as it should be preserved as a single term.
619 */
620
621 if (MDOC_LITERAL & m->flags)
622 return(mdoc_word_alloc(m, line, 0, buf));
623
624 /* Disallow blank/white-space lines in non-literal mode. */
625
626 for (i = 0; ' ' == buf[i]; i++)
627 /* Skip leading whitespace. */ ;
628 if (0 == buf[i])
629 return(mdoc_perr(m, line, 0, ENOBLANK));
630
631 /*
632 * Break apart a free-form line into tokens. Spaces are
633 * stripped out of the input.
634 */
635
636 for (j = i; buf[i]; i++) {
637 if (' ' != buf[i])
638 continue;
639
640 /* Escaped whitespace. */
641 if (i && ' ' == buf[i] && '\\' == buf[i - 1])
642 continue;
643
644 buf[i++] = 0;
645 if ( ! pstring(m, line, j, &buf[j], (size_t)(i - j)))
646 return(0);
647
648 for ( ; ' ' == buf[i]; i++)
649 /* Skip trailing whitespace. */ ;
650
651 j = i;
652 if (0 == buf[i])
653 break;
654 }
655
656 if (j != i && ! pstring(m, line, j, &buf[j], (size_t)(i - j)))
657 return(0);
658
659 m->next = MDOC_NEXT_SIBLING;
660 return(1);
661 }
662
663
664
665
666 static int
667 macrowarn(struct mdoc *m, int ln, const char *buf)
668 {
669 if ( ! (MDOC_IGN_MACRO & m->pflags))
670 return(mdoc_verr(m, ln, 0,
671 "unknown macro: %s%s",
672 buf, strlen(buf) > 3 ? "..." : ""));
673 return(mdoc_vwarn(m, ln, 0, "unknown macro: %s%s",
674 buf, strlen(buf) > 3 ? "..." : ""));
675 }
676
677
678 /*
679 * Parse a macro line, that is, a line beginning with the control
680 * character.
681 */
682 int
683 parsemacro(struct mdoc *m, int ln, char *buf)
684 {
685 int i, j, c;
686 char mac[5];
687
688 /* Empty lines are ignored. */
689
690 if (0 == buf[1])
691 return(1);
692
693 i = 1;
694
695 /* Accept whitespace after the initial control char. */
696
697 if (' ' == buf[i]) {
698 i++;
699 while (buf[i] && ' ' == buf[i])
700 i++;
701 if (0 == buf[i])
702 return(1);
703 }
704
705 /* Copy the first word into a nil-terminated buffer. */
706
707 for (j = 0; j < 4; j++, i++) {
708 if (0 == (mac[j] = buf[i]))
709 break;
710 else if (' ' == buf[i])
711 break;
712 }
713
714 mac[j] = 0;
715
716 if (j == 4 || j < 2) {
717 if ( ! macrowarn(m, ln, mac))
718 goto err;
719 return(1);
720 }
721
722 if (MDOC_MAX == (c = mdoc_hash_find(m->htab, mac))) {
723 if ( ! macrowarn(m, ln, mac))
724 goto err;
725 return(1);
726 }
727
728 /* The macro is sane. Jump to the next word. */
729
730 while (buf[i] && ' ' == buf[i])
731 i++;
732
733 /*
734 * Begin recursive parse sequence. Since we're at the start of
735 * the line, we don't need to do callable/parseable checks.
736 */
737 if ( ! mdoc_macro(m, c, ln, 1, &i, buf))
738 goto err;
739
740 return(1);
741
742 err: /* Error out. */
743
744 m->flags |= MDOC_HALT;
745 return(0);
746 }
747
748