]> git.cameronkatri.com Git - mandoc.git/blob - mdoc.c
Consolidated all err/warnings into mdoc.c via libmdoc.h.
[mandoc.git] / mdoc.c
1 /* $Id: mdoc.c,v 1.88 2009/07/06 13:04:52 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #include <assert.h>
18 #include <ctype.h>
19 #include <stdarg.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23
24 #include "libmdoc.h"
25
26 const char *const __mdoc_macronames[MDOC_MAX] = {
27 "Ap", "Dd", "Dt", "Os",
28 "Sh", "Ss", "Pp", "D1",
29 "Dl", "Bd", "Ed", "Bl",
30 "El", "It", "Ad", "An",
31 "Ar", "Cd", "Cm", "Dv",
32 "Er", "Ev", "Ex", "Fa",
33 "Fd", "Fl", "Fn", "Ft",
34 "Ic", "In", "Li", "Nd",
35 "Nm", "Op", "Ot", "Pa",
36 "Rv", "St", "Va", "Vt",
37 /* LINTED */
38 "Xr", "\%A", "\%B", "\%D",
39 /* LINTED */
40 "\%I", "\%J", "\%N", "\%O",
41 /* LINTED */
42 "\%P", "\%R", "\%T", "\%V",
43 "Ac", "Ao", "Aq", "At",
44 "Bc", "Bf", "Bo", "Bq",
45 "Bsx", "Bx", "Db", "Dc",
46 "Do", "Dq", "Ec", "Ef",
47 "Em", "Eo", "Fx", "Ms",
48 "No", "Ns", "Nx", "Ox",
49 "Pc", "Pf", "Po", "Pq",
50 "Qc", "Ql", "Qo", "Qq",
51 "Re", "Rs", "Sc", "So",
52 "Sq", "Sm", "Sx", "Sy",
53 "Tn", "Ux", "Xc", "Xo",
54 "Fo", "Fc", "Oo", "Oc",
55 "Bk", "Ek", "Bt", "Hf",
56 "Fr", "Ud", "Lb", "Lp",
57 "Lk", "Mt", "Brq", "Bro",
58 /* LINTED */
59 "Brc", "\%C", "Es", "En",
60 /* LINTED */
61 "Dx", "\%Q"
62 };
63
64 const char *const __mdoc_argnames[MDOC_ARG_MAX] = {
65 "split", "nosplit", "ragged",
66 "unfilled", "literal", "file",
67 "offset", "bullet", "dash",
68 "hyphen", "item", "enum",
69 "tag", "diag", "hang",
70 "ohang", "inset", "column",
71 "width", "compact", "std",
72 "filled", "words", "emphasis",
73 "symbolic", "nested"
74 };
75
76 const char * const *mdoc_macronames = __mdoc_macronames;
77 const char * const *mdoc_argnames = __mdoc_argnames;
78
79 static void mdoc_free1(struct mdoc *);
80 static int mdoc_alloc1(struct mdoc *);
81 static struct mdoc_node *node_alloc(struct mdoc *, int, int,
82 int, enum mdoc_type);
83 static int node_append(struct mdoc *,
84 struct mdoc_node *);
85 static int parsetext(struct mdoc *, int, char *);
86 static int parsemacro(struct mdoc *, int, char *);
87 static int macrowarn(struct mdoc *, int, const char *);
88
89
90 const struct mdoc_node *
91 mdoc_node(const struct mdoc *m)
92 {
93
94 return(MDOC_HALT & m->flags ? NULL : m->first);
95 }
96
97
98 const struct mdoc_meta *
99 mdoc_meta(const struct mdoc *m)
100 {
101
102 return(MDOC_HALT & m->flags ? NULL : &m->meta);
103 }
104
105
106 /*
107 * Frees volatile resources (parse tree, meta-data, fields).
108 */
109 static void
110 mdoc_free1(struct mdoc *mdoc)
111 {
112
113 if (mdoc->first)
114 mdoc_node_freelist(mdoc->first);
115 if (mdoc->meta.title)
116 free(mdoc->meta.title);
117 if (mdoc->meta.os)
118 free(mdoc->meta.os);
119 if (mdoc->meta.name)
120 free(mdoc->meta.name);
121 if (mdoc->meta.arch)
122 free(mdoc->meta.arch);
123 if (mdoc->meta.vol)
124 free(mdoc->meta.vol);
125 }
126
127
128 /*
129 * Allocate all volatile resources (parse tree, meta-data, fields).
130 */
131 static int
132 mdoc_alloc1(struct mdoc *mdoc)
133 {
134
135 bzero(&mdoc->meta, sizeof(struct mdoc_meta));
136 mdoc->flags = 0;
137 mdoc->lastnamed = mdoc->lastsec = SEC_NONE;
138 mdoc->last = calloc(1, sizeof(struct mdoc_node));
139 if (NULL == mdoc->last)
140 return(0);
141
142 mdoc->first = mdoc->last;
143 mdoc->last->type = MDOC_ROOT;
144 mdoc->next = MDOC_NEXT_CHILD;
145 return(1);
146 }
147
148
149 /*
150 * Free up volatile resources (see mdoc_free1()) then re-initialises the
151 * data with mdoc_alloc1(). After invocation, parse data has been reset
152 * and the parser is ready for re-invocation on a new tree; however,
153 * cross-parse non-volatile data is kept intact.
154 */
155 int
156 mdoc_reset(struct mdoc *mdoc)
157 {
158
159 mdoc_free1(mdoc);
160 return(mdoc_alloc1(mdoc));
161 }
162
163
164 /*
165 * Completely free up all volatile and non-volatile parse resources.
166 * After invocation, the pointer is no longer usable.
167 */
168 void
169 mdoc_free(struct mdoc *mdoc)
170 {
171
172 mdoc_free1(mdoc);
173 if (mdoc->htab)
174 mdoc_hash_free(mdoc->htab);
175 free(mdoc);
176 }
177
178
179 /*
180 * Allocate volatile and non-volatile parse resources.
181 */
182 struct mdoc *
183 mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb)
184 {
185 struct mdoc *p;
186
187 if (NULL == (p = calloc(1, sizeof(struct mdoc))))
188 return(NULL);
189 if (cb)
190 (void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb));
191
192 p->data = data;
193 p->pflags = pflags;
194
195 if (NULL == (p->htab = mdoc_hash_alloc())) {
196 free(p);
197 return(NULL);
198 } else if (mdoc_alloc1(p))
199 return(p);
200
201 free(p);
202 return(NULL);
203 }
204
205
206 /*
207 * Climb back up the parse tree, validating open scopes. Mostly calls
208 * through to macro_end() in macro.c.
209 */
210 int
211 mdoc_endparse(struct mdoc *m)
212 {
213
214 if (MDOC_HALT & m->flags)
215 return(0);
216 else if (mdoc_macroend(m))
217 return(1);
218 m->flags |= MDOC_HALT;
219 return(0);
220 }
221
222
223 /*
224 * Main parse routine. Parses a single line -- really just hands off to
225 * the macro (parsemacro()) or text parser (parsetext()).
226 */
227 int
228 mdoc_parseln(struct mdoc *m, int ln, char *buf)
229 {
230
231 if (MDOC_HALT & m->flags)
232 return(0);
233
234 return('.' == *buf ? parsemacro(m, ln, buf) :
235 parsetext(m, ln, buf));
236 }
237
238
239 int
240 mdoc_verr(struct mdoc *mdoc, int ln, int pos,
241 const char *fmt, ...)
242 {
243 char buf[256];
244 va_list ap;
245
246 if (NULL == mdoc->cb.mdoc_err)
247 return(0);
248
249 va_start(ap, fmt);
250 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
251 va_end(ap);
252
253 return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf));
254 }
255
256
257 int
258 mdoc_vwarn(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...)
259 {
260 char buf[256];
261 va_list ap;
262
263 if (NULL == mdoc->cb.mdoc_warn)
264 return(0);
265
266 va_start(ap, fmt);
267 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
268 va_end(ap);
269
270 return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, buf));
271 }
272
273
274 int
275 mdoc_err(struct mdoc *m, int line, int pos, int iserr, enum merr type)
276 {
277 char *p;
278
279 p = NULL;
280
281 switch (type) {
282 case (ENOCALL):
283 p = "not callable";
284 break;
285 case (EPROLBODY):
286 p = "macro disallowed in document body";
287 break;
288 case (EBODYPROL):
289 p = "macro disallowed in document prologue";
290 break;
291 case (EMALLOC):
292 p = "memory exhausted";
293 break;
294 case (ETEXTPROL):
295 p = "text disallowed in document prologue";
296 break;
297 case (ENOBLANK):
298 p = "blank lines disallowed in non-literal contexts";
299 break;
300 case (ESPACE):
301 p = "whitespace disallowed after delimiter";
302 break;
303 case (ETOOLONG):
304 p = "text argument too long";
305 break;
306 case (EESCAPE):
307 p = "invalid escape sequence";
308 break;
309 case (EPRINT):
310 p = "invalid character";
311 break;
312 case (ENESTDISP):
313 p = "displays may not be nested";
314 break;
315 case (EBOOL):
316 p = "expected boolean value";
317 break;
318 case (EARGREP):
319 p = "argument repeated";
320 break;
321 case (EMULTIDISP):
322 p = "multiple display types specified";
323 break;
324 case (EMULTILIST):
325 p = "multiple list types specified";
326 break;
327 case (ELISTTYPE):
328 p = "missing list type";
329 break;
330 case (EDISPTYPE):
331 p = "missing display type";
332 break;
333 case (ESECNAME):
334 p = "the NAME section must come first";
335 break;
336 case (ELINE):
337 p = "expected line arguments";
338 break;
339 case (ENOPROLOGUE):
340 p = "document has no prologue";
341 break;
342 case (ENODAT):
343 p = "document has no data";
344 break;
345 case (ECOLMIS):
346 p = "column syntax style mismatch";
347 break;
348 case (EATT):
349 p = "expected valid AT&T symbol";
350 break;
351 case (ENAME):
352 p = "default name not yet set";
353 break;
354 case (ENOWIDTH):
355 p = "superfluous width argument";
356 break;
357 case (EMISSWIDTH):
358 p = "missing width argument";
359 break;
360 case (EWRONGMSEC):
361 p = "document section in wrong manual section";
362 break;
363 case (ESECOOO):
364 p = "document section out of conventional order";
365 break;
366 case (ESECREP):
367 p = "document section repeated";
368 break;
369 case (EBADSTAND):
370 p = "unknown standard";
371 break;
372 case (ENAMESECINC):
373 p = "NAME section contents incomplete/badly-ordered";
374 break;
375 case (ENOMULTILINE):
376 p = "suggested no multi-line arguments";
377 break;
378 case (EMULTILINE):
379 p = "suggested multi-line arguments";
380 break;
381 case (ENOLINE):
382 p = "suggested no line arguments";
383 break;
384 case (EPROLOOO):
385 p = "prologue macros out-of-order";
386 break;
387 case (EPROLREP):
388 p = "prologue macros repeated";
389 break;
390 case (EARGVAL):
391 p = "argument value suggested";
392 break;
393 case (EFONT):
394 p = "invalid font mode";
395 break;
396 case (EBADMSEC):
397 p = "inappropriate manual section";
398 break;
399 case (EBADSEC):
400 p = "inappropriate document section";
401 break;
402 case (EQUOTTERM):
403 p = "unterminated quoted parameter";
404 break;
405 case (EQUOTPARM):
406 p = "unexpected quoted parameter";
407 break;
408 case (EARGVPARM):
409 p = "argument-like parameter";
410 break;
411 case (ECOLEMPTY):
412 p = "last list column is empty";
413 break;
414 case (ETAILWS):
415 p = "trailing whitespace";
416 break;
417 case (ENUMFMT):
418 p = "bad number format";
419 break;
420 case (EUTSNAME):
421 p = "utsname";
422 break;
423 case (EBADDATE):
424 p = "malformed date syntax";
425 break;
426 case (EOPEN):
427 p = "explicit scope still open on exit";
428 break;
429 case (EQUOT):
430 p = "unterminated quotation";
431 break;
432 case (ENOCTX):
433 p = "closure has no prior context";
434 break;
435 case (ENOPARMS):
436 p = "unexpect line arguments";
437 break;
438 case (EIGNE):
439 p = "ignoring empty element";
440 break;
441 case (EIMPBRK):
442 p = "crufty end-of-line scope violation";
443 break;
444 case (EMACPARM):
445 p = "macro-like parameter";
446 break;
447 case (EOBS):
448 p = "macro marked obsolete";
449 break;
450 }
451
452 assert(p);
453
454 if (iserr)
455 return(mdoc_verr(m, line, pos, p));
456
457 return(mdoc_vwarn(m, line, pos, p));
458 }
459
460
461 int
462 mdoc_macro(struct mdoc *m, int tok,
463 int ln, int pp, int *pos, char *buf)
464 {
465
466 if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
467 MDOC_PBODY & m->flags)
468 return(mdoc_perr(m, ln, pp, EPROLBODY));
469 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
470 ! (MDOC_PBODY & m->flags))
471 return(mdoc_perr(m, ln, pp, EBODYPROL));
472
473 if (1 != pp && ! (MDOC_CALLABLE & mdoc_macros[tok].flags))
474 return(mdoc_perr(m, ln, pp, ENOCALL));
475
476 return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf));
477 }
478
479
480 static int
481 node_append(struct mdoc *mdoc, struct mdoc_node *p)
482 {
483
484 assert(mdoc->last);
485 assert(mdoc->first);
486 assert(MDOC_ROOT != p->type);
487
488 switch (mdoc->next) {
489 case (MDOC_NEXT_SIBLING):
490 mdoc->last->next = p;
491 p->prev = mdoc->last;
492 p->parent = mdoc->last->parent;
493 break;
494 case (MDOC_NEXT_CHILD):
495 mdoc->last->child = p;
496 p->parent = mdoc->last;
497 break;
498 default:
499 abort();
500 /* NOTREACHED */
501 }
502
503 p->parent->nchild++;
504
505 if ( ! mdoc_valid_pre(mdoc, p))
506 return(0);
507 if ( ! mdoc_action_pre(mdoc, p))
508 return(0);
509
510 switch (p->type) {
511 case (MDOC_HEAD):
512 assert(MDOC_BLOCK == p->parent->type);
513 p->parent->head = p;
514 break;
515 case (MDOC_TAIL):
516 assert(MDOC_BLOCK == p->parent->type);
517 p->parent->tail = p;
518 break;
519 case (MDOC_BODY):
520 assert(MDOC_BLOCK == p->parent->type);
521 p->parent->body = p;
522 break;
523 default:
524 break;
525 }
526
527 mdoc->last = p;
528
529 switch (p->type) {
530 case (MDOC_TEXT):
531 if ( ! mdoc_valid_post(mdoc))
532 return(0);
533 if ( ! mdoc_action_post(mdoc))
534 return(0);
535 break;
536 default:
537 break;
538 }
539
540 return(1);
541 }
542
543
544 static struct mdoc_node *
545 node_alloc(struct mdoc *mdoc, int line,
546 int pos, int tok, enum mdoc_type type)
547 {
548 struct mdoc_node *p;
549
550 if (NULL == (p = calloc(1, sizeof(struct mdoc_node)))) {
551 (void)mdoc_nerr(mdoc, mdoc->last, EMALLOC);
552 return(NULL);
553 }
554
555 p->sec = mdoc->lastsec;
556 p->line = line;
557 p->pos = pos;
558 p->tok = tok;
559 if (MDOC_TEXT != (p->type = type))
560 assert(p->tok >= 0);
561
562 return(p);
563 }
564
565
566 int
567 mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, int tok)
568 {
569 struct mdoc_node *p;
570
571 p = node_alloc(mdoc, line, pos, tok, MDOC_TAIL);
572 if (NULL == p)
573 return(0);
574 return(node_append(mdoc, p));
575 }
576
577
578 int
579 mdoc_head_alloc(struct mdoc *mdoc, int line, int pos, int tok)
580 {
581 struct mdoc_node *p;
582
583 assert(mdoc->first);
584 assert(mdoc->last);
585
586 p = node_alloc(mdoc, line, pos, tok, MDOC_HEAD);
587 if (NULL == p)
588 return(0);
589 return(node_append(mdoc, p));
590 }
591
592
593 int
594 mdoc_body_alloc(struct mdoc *mdoc, int line, int pos, int tok)
595 {
596 struct mdoc_node *p;
597
598 p = node_alloc(mdoc, line, pos, tok, MDOC_BODY);
599 if (NULL == p)
600 return(0);
601 return(node_append(mdoc, p));
602 }
603
604
605 int
606 mdoc_block_alloc(struct mdoc *mdoc, int line, int pos,
607 int tok, struct mdoc_arg *args)
608 {
609 struct mdoc_node *p;
610
611 p = node_alloc(mdoc, line, pos, tok, MDOC_BLOCK);
612 if (NULL == p)
613 return(0);
614 p->args = args;
615 if (p->args)
616 (args->refcnt)++;
617 return(node_append(mdoc, p));
618 }
619
620
621 int
622 mdoc_elem_alloc(struct mdoc *mdoc, int line, int pos,
623 int tok, struct mdoc_arg *args)
624 {
625 struct mdoc_node *p;
626
627 p = node_alloc(mdoc, line, pos, tok, MDOC_ELEM);
628 if (NULL == p)
629 return(0);
630 p->args = args;
631 if (p->args)
632 (args->refcnt)++;
633 return(node_append(mdoc, p));
634 }
635
636
637 int
638 mdoc_word_alloc(struct mdoc *mdoc,
639 int line, int pos, const char *word)
640 {
641 struct mdoc_node *p;
642
643 p = node_alloc(mdoc, line, pos, -1, MDOC_TEXT);
644 if (NULL == p)
645 return(0);
646 if (NULL == (p->string = strdup(word))) {
647 (void)mdoc_nerr(mdoc, mdoc->last, EMALLOC);
648 return(0);
649 }
650
651 return(node_append(mdoc, p));
652 }
653
654
655 void
656 mdoc_node_free(struct mdoc_node *p)
657 {
658
659 if (p->parent)
660 p->parent->nchild--;
661 if (p->string)
662 free(p->string);
663 if (p->args)
664 mdoc_argv_free(p->args);
665 free(p);
666 }
667
668
669 void
670 mdoc_node_freelist(struct mdoc_node *p)
671 {
672
673 if (p->child)
674 mdoc_node_freelist(p->child);
675 if (p->next)
676 mdoc_node_freelist(p->next);
677
678 assert(0 == p->nchild);
679 mdoc_node_free(p);
680 }
681
682
683 /*
684 * Parse free-form text, that is, a line that does not begin with the
685 * control character.
686 */
687 static int
688 parsetext(struct mdoc *m, int line, char *buf)
689 {
690
691 if (SEC_NONE == m->lastnamed)
692 return(mdoc_perr(m, line, 0, ETEXTPROL));
693
694 if (0 == buf[0] && ! (MDOC_LITERAL & m->flags))
695 return(mdoc_perr(m, line, 0, ENOBLANK));
696
697 if ( ! mdoc_word_alloc(m, line, 0, buf))
698 return(0);
699
700 m->next = MDOC_NEXT_SIBLING;
701 return(1);
702 }
703
704
705 static int
706 macrowarn(struct mdoc *m, int ln, const char *buf)
707 {
708 if ( ! (MDOC_IGN_MACRO & m->pflags))
709 return(mdoc_verr(m, ln, 1,
710 "unknown macro: %s%s",
711 buf, strlen(buf) > 3 ? "..." : ""));
712 return(mdoc_vwarn(m, ln, 1, "unknown macro: %s%s",
713 buf, strlen(buf) > 3 ? "..." : ""));
714 }
715
716
717 /*
718 * Parse a macro line, that is, a line beginning with the control
719 * character.
720 */
721 int
722 parsemacro(struct mdoc *m, int ln, char *buf)
723 {
724 int i, c;
725 char mac[5];
726
727 /* Empty lines are ignored. */
728
729 if (0 == buf[1])
730 return(1);
731
732 if (' ' == buf[1]) {
733 i = 2;
734 while (buf[i] && ' ' == buf[i])
735 i++;
736 if (0 == buf[i])
737 return(1);
738 return(mdoc_perr(m, ln, 1, ESPACE));
739 }
740
741 /* Copy the first word into a nil-terminated buffer. */
742
743 for (i = 1; i < 5; i++) {
744 if (0 == (mac[i - 1] = buf[i]))
745 break;
746 else if (' ' == buf[i])
747 break;
748 }
749
750 mac[i - 1] = 0;
751
752 if (i == 5 || i <= 2) {
753 if ( ! macrowarn(m, ln, mac))
754 goto err;
755 return(1);
756 }
757
758 if (MDOC_MAX == (c = mdoc_hash_find(m->htab, mac))) {
759 if ( ! macrowarn(m, ln, mac))
760 goto err;
761 return(1);
762 }
763
764 /* The macro is sane. Jump to the next word. */
765
766 while (buf[i] && ' ' == buf[i])
767 i++;
768
769 /* Begin recursive parse sequence. */
770
771 if ( ! mdoc_macro(m, c, ln, 1, &i, buf))
772 goto err;
773
774 return(1);
775
776 err: /* Error out. */
777
778 m->flags |= MDOC_HALT;
779 return(0);
780 }