]> git.cameronkatri.com Git - mandoc.git/blob - mdoc.c
Fix man.7 to include AT and UC in its syntax table.
[mandoc.git] / mdoc.c
1 /* $Id: mdoc.c,v 1.138 2010/05/25 12:37:20 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <sys/types.h>
22
23 #include <assert.h>
24 #include <ctype.h>
25 #include <stdarg.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <time.h>
30
31 #include "mandoc.h"
32 #include "libmdoc.h"
33 #include "libmandoc.h"
34
35 const char *const __mdoc_macronames[MDOC_MAX] = {
36 "Ap", "Dd", "Dt", "Os",
37 "Sh", "Ss", "Pp", "D1",
38 "Dl", "Bd", "Ed", "Bl",
39 "El", "It", "Ad", "An",
40 "Ar", "Cd", "Cm", "Dv",
41 "Er", "Ev", "Ex", "Fa",
42 "Fd", "Fl", "Fn", "Ft",
43 "Ic", "In", "Li", "Nd",
44 "Nm", "Op", "Ot", "Pa",
45 "Rv", "St", "Va", "Vt",
46 /* LINTED */
47 "Xr", "%A", "%B", "%D",
48 /* LINTED */
49 "%I", "%J", "%N", "%O",
50 /* LINTED */
51 "%P", "%R", "%T", "%V",
52 "Ac", "Ao", "Aq", "At",
53 "Bc", "Bf", "Bo", "Bq",
54 "Bsx", "Bx", "Db", "Dc",
55 "Do", "Dq", "Ec", "Ef",
56 "Em", "Eo", "Fx", "Ms",
57 "No", "Ns", "Nx", "Ox",
58 "Pc", "Pf", "Po", "Pq",
59 "Qc", "Ql", "Qo", "Qq",
60 "Re", "Rs", "Sc", "So",
61 "Sq", "Sm", "Sx", "Sy",
62 "Tn", "Ux", "Xc", "Xo",
63 "Fo", "Fc", "Oo", "Oc",
64 "Bk", "Ek", "Bt", "Hf",
65 "Fr", "Ud", "Lb", "Lp",
66 "Lk", "Mt", "Brq", "Bro",
67 /* LINTED */
68 "Brc", "%C", "Es", "En",
69 /* LINTED */
70 "Dx", "%Q", "br", "sp",
71 /* LINTED */
72 "%U"
73 };
74
75 const char *const __mdoc_argnames[MDOC_ARG_MAX] = {
76 "split", "nosplit", "ragged",
77 "unfilled", "literal", "file",
78 "offset", "bullet", "dash",
79 "hyphen", "item", "enum",
80 "tag", "diag", "hang",
81 "ohang", "inset", "column",
82 "width", "compact", "std",
83 "filled", "words", "emphasis",
84 "symbolic", "nested", "centered"
85 };
86
87 const char * const *mdoc_macronames = __mdoc_macronames;
88 const char * const *mdoc_argnames = __mdoc_argnames;
89
90 static void mdoc_node_free(struct mdoc_node *);
91 static void mdoc_node_unlink(struct mdoc *,
92 struct mdoc_node *);
93 static void mdoc_free1(struct mdoc *);
94 static void mdoc_alloc1(struct mdoc *);
95 static struct mdoc_node *node_alloc(struct mdoc *, int, int,
96 enum mdoct, enum mdoc_type);
97 static int node_append(struct mdoc *,
98 struct mdoc_node *);
99 static int mdoc_ptext(struct mdoc *, int, char *, int);
100 static int mdoc_pmacro(struct mdoc *, int, char *, int);
101 static int macrowarn(struct mdoc *, int,
102 const char *, int);
103
104
105 const struct mdoc_node *
106 mdoc_node(const struct mdoc *m)
107 {
108
109 return(MDOC_HALT & m->flags ? NULL : m->first);
110 }
111
112
113 const struct mdoc_meta *
114 mdoc_meta(const struct mdoc *m)
115 {
116
117 return(MDOC_HALT & m->flags ? NULL : &m->meta);
118 }
119
120
121 /*
122 * Frees volatile resources (parse tree, meta-data, fields).
123 */
124 static void
125 mdoc_free1(struct mdoc *mdoc)
126 {
127
128 if (mdoc->first)
129 mdoc_node_delete(mdoc, mdoc->first);
130 if (mdoc->meta.title)
131 free(mdoc->meta.title);
132 if (mdoc->meta.os)
133 free(mdoc->meta.os);
134 if (mdoc->meta.name)
135 free(mdoc->meta.name);
136 if (mdoc->meta.arch)
137 free(mdoc->meta.arch);
138 if (mdoc->meta.vol)
139 free(mdoc->meta.vol);
140 if (mdoc->meta.msec)
141 free(mdoc->meta.msec);
142 }
143
144
145 /*
146 * Allocate all volatile resources (parse tree, meta-data, fields).
147 */
148 static void
149 mdoc_alloc1(struct mdoc *mdoc)
150 {
151
152 memset(&mdoc->meta, 0, sizeof(struct mdoc_meta));
153 mdoc->flags = 0;
154 mdoc->lastnamed = mdoc->lastsec = SEC_NONE;
155 mdoc->last = mandoc_calloc(1, sizeof(struct mdoc_node));
156 mdoc->first = mdoc->last;
157 mdoc->last->type = MDOC_ROOT;
158 mdoc->next = MDOC_NEXT_CHILD;
159 }
160
161
162 /*
163 * Free up volatile resources (see mdoc_free1()) then re-initialises the
164 * data with mdoc_alloc1(). After invocation, parse data has been reset
165 * and the parser is ready for re-invocation on a new tree; however,
166 * cross-parse non-volatile data is kept intact.
167 */
168 void
169 mdoc_reset(struct mdoc *mdoc)
170 {
171
172 mdoc_free1(mdoc);
173 mdoc_alloc1(mdoc);
174 }
175
176
177 /*
178 * Completely free up all volatile and non-volatile parse resources.
179 * After invocation, the pointer is no longer usable.
180 */
181 void
182 mdoc_free(struct mdoc *mdoc)
183 {
184
185 mdoc_free1(mdoc);
186 free(mdoc);
187 }
188
189
190 /*
191 * Allocate volatile and non-volatile parse resources.
192 */
193 struct mdoc *
194 mdoc_alloc(void *data, int pflags, mandocmsg msg)
195 {
196 struct mdoc *p;
197
198 p = mandoc_calloc(1, sizeof(struct mdoc));
199
200 p->msg = msg;
201 p->data = data;
202 p->pflags = pflags;
203
204 mdoc_hash_init();
205 mdoc_alloc1(p);
206 return(p);
207 }
208
209
210 /*
211 * Climb back up the parse tree, validating open scopes. Mostly calls
212 * through to macro_end() in macro.c.
213 */
214 int
215 mdoc_endparse(struct mdoc *m)
216 {
217
218 if (MDOC_HALT & m->flags)
219 return(0);
220 else if (mdoc_macroend(m))
221 return(1);
222 m->flags |= MDOC_HALT;
223 return(0);
224 }
225
226
227 /*
228 * Main parse routine. Parses a single line -- really just hands off to
229 * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()).
230 */
231 int
232 mdoc_parseln(struct mdoc *m, int ln, char *buf, int offs)
233 {
234
235 if (MDOC_HALT & m->flags)
236 return(0);
237
238 m->flags |= MDOC_NEWLINE;
239 return(('.' == buf[offs] || '\'' == buf[offs]) ?
240 mdoc_pmacro(m, ln, buf, offs) :
241 mdoc_ptext(m, ln, buf, offs));
242 }
243
244
245 int
246 mdoc_vmsg(struct mdoc *mdoc, enum mandocerr t,
247 int ln, int pos, const char *fmt, ...)
248 {
249 char buf[256];
250 va_list ap;
251
252 va_start(ap, fmt);
253 vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
254 va_end(ap);
255
256 return((*mdoc->msg)(t, mdoc->data, ln, pos, buf));
257 }
258
259
260 int
261 mdoc_macro(struct mdoc *m, enum mdoct tok,
262 int ln, int pp, int *pos, char *buf)
263 {
264 assert(tok < MDOC_MAX);
265
266 /* If we're in the body, deny prologue calls. */
267
268 if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
269 MDOC_PBODY & m->flags)
270 return(mdoc_pmsg(m, ln, pp, MANDOCERR_BADBODY));
271
272 /* If we're in the prologue, deny "body" macros. */
273
274 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
275 ! (MDOC_PBODY & m->flags)) {
276 if ( ! mdoc_pmsg(m, ln, pp, MANDOCERR_BADPROLOG))
277 return(0);
278 if (NULL == m->meta.title)
279 m->meta.title = mandoc_strdup("unknown");
280 if (NULL == m->meta.vol)
281 m->meta.vol = mandoc_strdup("local");
282 if (NULL == m->meta.os)
283 m->meta.os = mandoc_strdup("local");
284 if (0 == m->meta.date)
285 m->meta.date = time(NULL);
286 m->flags |= MDOC_PBODY;
287 }
288
289 return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf));
290 }
291
292
293 static int
294 node_append(struct mdoc *mdoc, struct mdoc_node *p)
295 {
296
297 assert(mdoc->last);
298 assert(mdoc->first);
299 assert(MDOC_ROOT != p->type);
300
301 switch (mdoc->next) {
302 case (MDOC_NEXT_SIBLING):
303 mdoc->last->next = p;
304 p->prev = mdoc->last;
305 p->parent = mdoc->last->parent;
306 break;
307 case (MDOC_NEXT_CHILD):
308 mdoc->last->child = p;
309 p->parent = mdoc->last;
310 break;
311 default:
312 abort();
313 /* NOTREACHED */
314 }
315
316 p->parent->nchild++;
317
318 if ( ! mdoc_valid_pre(mdoc, p))
319 return(0);
320 if ( ! mdoc_action_pre(mdoc, p))
321 return(0);
322
323 switch (p->type) {
324 case (MDOC_HEAD):
325 assert(MDOC_BLOCK == p->parent->type);
326 p->parent->head = p;
327 break;
328 case (MDOC_TAIL):
329 assert(MDOC_BLOCK == p->parent->type);
330 p->parent->tail = p;
331 break;
332 case (MDOC_BODY):
333 assert(MDOC_BLOCK == p->parent->type);
334 p->parent->body = p;
335 break;
336 default:
337 break;
338 }
339
340 mdoc->last = p;
341
342 switch (p->type) {
343 case (MDOC_TEXT):
344 if ( ! mdoc_valid_post(mdoc))
345 return(0);
346 if ( ! mdoc_action_post(mdoc))
347 return(0);
348 break;
349 default:
350 break;
351 }
352
353 return(1);
354 }
355
356
357 static struct mdoc_node *
358 node_alloc(struct mdoc *m, int line, int pos,
359 enum mdoct tok, enum mdoc_type type)
360 {
361 struct mdoc_node *p;
362
363 p = mandoc_calloc(1, sizeof(struct mdoc_node));
364 p->sec = m->lastsec;
365 p->line = line;
366 p->pos = pos;
367 p->tok = tok;
368 p->type = type;
369 if (MDOC_NEWLINE & m->flags)
370 p->flags |= MDOC_LINE;
371 m->flags &= ~MDOC_NEWLINE;
372 return(p);
373 }
374
375
376 int
377 mdoc_tail_alloc(struct mdoc *m, int line, int pos, enum mdoct tok)
378 {
379 struct mdoc_node *p;
380
381 p = node_alloc(m, line, pos, tok, MDOC_TAIL);
382 if ( ! node_append(m, p))
383 return(0);
384 m->next = MDOC_NEXT_CHILD;
385 return(1);
386 }
387
388
389 int
390 mdoc_head_alloc(struct mdoc *m, int line, int pos, enum mdoct tok)
391 {
392 struct mdoc_node *p;
393
394 assert(m->first);
395 assert(m->last);
396
397 p = node_alloc(m, line, pos, tok, MDOC_HEAD);
398 if ( ! node_append(m, p))
399 return(0);
400 m->next = MDOC_NEXT_CHILD;
401 return(1);
402 }
403
404
405 int
406 mdoc_body_alloc(struct mdoc *m, int line, int pos, enum mdoct tok)
407 {
408 struct mdoc_node *p;
409
410 p = node_alloc(m, line, pos, tok, MDOC_BODY);
411 if ( ! node_append(m, p))
412 return(0);
413 m->next = MDOC_NEXT_CHILD;
414 return(1);
415 }
416
417
418 int
419 mdoc_block_alloc(struct mdoc *m, int line, int pos,
420 enum mdoct tok, struct mdoc_arg *args)
421 {
422 struct mdoc_node *p;
423
424 p = node_alloc(m, line, pos, tok, MDOC_BLOCK);
425 p->args = args;
426 if (p->args)
427 (args->refcnt)++;
428 if ( ! node_append(m, p))
429 return(0);
430 m->next = MDOC_NEXT_CHILD;
431 return(1);
432 }
433
434
435 int
436 mdoc_elem_alloc(struct mdoc *m, int line, int pos,
437 enum mdoct tok, struct mdoc_arg *args)
438 {
439 struct mdoc_node *p;
440
441 p = node_alloc(m, line, pos, tok, MDOC_ELEM);
442 p->args = args;
443 if (p->args)
444 (args->refcnt)++;
445 if ( ! node_append(m, p))
446 return(0);
447 m->next = MDOC_NEXT_CHILD;
448 return(1);
449 }
450
451
452 int
453 mdoc_word_alloc(struct mdoc *m, int line, int pos, const char *p)
454 {
455 struct mdoc_node *n;
456 size_t sv, len;
457
458 len = strlen(p);
459
460 n = node_alloc(m, line, pos, MDOC_MAX, MDOC_TEXT);
461 n->string = mandoc_malloc(len + 1);
462 sv = strlcpy(n->string, p, len + 1);
463
464 /* Prohibit truncation. */
465 assert(sv < len + 1);
466
467 if ( ! node_append(m, n))
468 return(0);
469
470 m->next = MDOC_NEXT_SIBLING;
471 return(1);
472 }
473
474
475 void
476 mdoc_node_free(struct mdoc_node *p)
477 {
478
479 if (p->string)
480 free(p->string);
481 if (p->args)
482 mdoc_argv_free(p->args);
483 free(p);
484 }
485
486
487 static void
488 mdoc_node_unlink(struct mdoc *m, struct mdoc_node *n)
489 {
490
491 /* Adjust siblings. */
492
493 if (n->prev)
494 n->prev->next = n->next;
495 if (n->next)
496 n->next->prev = n->prev;
497
498 /* Adjust parent. */
499
500 if (n->parent) {
501 n->parent->nchild--;
502 if (n->parent->child == n)
503 n->parent->child = n->prev ? n->prev : n->next;
504 }
505
506 /* Adjust parse point, if applicable. */
507
508 if (m && m->last == n) {
509 if (n->prev) {
510 m->last = n->prev;
511 m->next = MDOC_NEXT_SIBLING;
512 } else {
513 m->last = n->parent;
514 m->next = MDOC_NEXT_CHILD;
515 }
516 }
517
518 if (m && m->first == n)
519 m->first = NULL;
520 }
521
522
523 void
524 mdoc_node_delete(struct mdoc *m, struct mdoc_node *p)
525 {
526
527 while (p->child) {
528 assert(p->nchild);
529 mdoc_node_delete(m, p->child);
530 }
531 assert(0 == p->nchild);
532
533 mdoc_node_unlink(m, p);
534 mdoc_node_free(p);
535 }
536
537
538 /*
539 * Parse free-form text, that is, a line that does not begin with the
540 * control character.
541 */
542 static int
543 mdoc_ptext(struct mdoc *m, int line, char *buf, int offs)
544 {
545 char *c, *ws, *end;
546
547 /* Ignore bogus comments. */
548
549 if ('\\' == buf[offs] &&
550 '.' == buf[offs + 1] &&
551 '"' == buf[offs + 2])
552 return(mdoc_pmsg(m, line, offs, MANDOCERR_BADCOMMENT));
553
554 /* No text before an initial macro. */
555
556 if (SEC_NONE == m->lastnamed)
557 return(mdoc_pmsg(m, line, offs, MANDOCERR_NOTEXT));
558
559 /*
560 * Search for the beginning of unescaped trailing whitespace (ws)
561 * and for the first character not to be output (end).
562 */
563 ws = NULL;
564 for (c = end = buf + offs; *c; c++) {
565 switch (*c) {
566 case '-':
567 if (mandoc_hyph(buf + offs, c))
568 *c = ASCII_HYPH;
569 break;
570 case ' ':
571 if (NULL == ws)
572 ws = c;
573 continue;
574 case '\t':
575 /*
576 * Always warn about trailing tabs,
577 * even outside literal context,
578 * where they should be put on the next line.
579 */
580 if (NULL == ws)
581 ws = c;
582 /*
583 * Strip trailing tabs in literal context only;
584 * outside, they affect the next line.
585 */
586 if (MDOC_LITERAL & m->flags)
587 continue;
588 break;
589 case '\\':
590 /* Skip the escaped character, too, if any. */
591 if (c[1])
592 c++;
593 /* FALLTHROUGH */
594 default:
595 ws = NULL;
596 break;
597 }
598 end = c + 1;
599 }
600 *end = '\0';
601
602 if (ws)
603 if ( ! mdoc_pmsg(m, line, (int)(ws-buf), MANDOCERR_EOLNSPACE))
604 return(0);
605
606 if ('\0' == buf[offs] && ! (MDOC_LITERAL & m->flags)) {
607 if ( ! mdoc_pmsg(m, line, (int)(c-buf), MANDOCERR_NOBLANKLN))
608 return(0);
609
610 /*
611 * Insert a `Pp' in the case of a blank line. Technically,
612 * blank lines aren't allowed, but enough manuals assume this
613 * behaviour that we want to work around it.
614 */
615 if ( ! mdoc_elem_alloc(m, line, offs, MDOC_Pp, NULL))
616 return(0);
617
618 m->next = MDOC_NEXT_SIBLING;
619 return(1);
620 }
621
622 if ( ! mdoc_word_alloc(m, line, offs, buf+offs))
623 return(0);
624
625 if (MDOC_LITERAL & m->flags)
626 return(1);
627
628 /*
629 * End-of-sentence check. If the last character is an unescaped
630 * EOS character, then flag the node as being the end of a
631 * sentence. The front-end will know how to interpret this.
632 */
633
634 assert(buf < end);
635
636 if (mandoc_eos(buf+offs, (size_t)(end-buf-offs)))
637 m->last->flags |= MDOC_EOS;
638
639 return(1);
640 }
641
642
643 static int
644 macrowarn(struct mdoc *m, int ln, const char *buf, int offs)
645 {
646 int rc;
647
648 rc = mdoc_vmsg(m, MANDOCERR_MACRO, ln, offs,
649 "unknown macro: %s%s",
650 buf, strlen(buf) > 3 ? "..." : "");
651
652 /* FIXME: logic should be in driver. */
653 return(MDOC_IGN_MACRO & m->pflags ? rc : 0);
654 }
655
656
657 /*
658 * Parse a macro line, that is, a line beginning with the control
659 * character.
660 */
661 int
662 mdoc_pmacro(struct mdoc *m, int ln, char *buf, int offs)
663 {
664 enum mdoct tok;
665 int i, j, sv;
666 char mac[5];
667
668 /* Empty lines are ignored. */
669
670 offs++;
671
672 if ('\0' == buf[offs])
673 return(1);
674
675 i = offs;
676
677 /* Accept whitespace after the initial control char. */
678
679 if (' ' == buf[i]) {
680 i++;
681 while (buf[i] && ' ' == buf[i])
682 i++;
683 if ('\0' == buf[i])
684 return(1);
685 }
686
687 sv = i;
688
689 /* Copy the first word into a nil-terminated buffer. */
690
691 for (j = 0; j < 4; j++, i++) {
692 if ('\0' == (mac[j] = buf[i]))
693 break;
694 else if (' ' == buf[i])
695 break;
696
697 /* Check for invalid characters. */
698
699 if (isgraph((u_char)buf[i]))
700 continue;
701 if ( ! mdoc_pmsg(m, ln, i, MANDOCERR_BADCHAR))
702 return(0);
703 i--;
704 }
705
706 mac[j] = '\0';
707
708 if (j == 4 || j < 2) {
709 if ( ! macrowarn(m, ln, mac, sv))
710 goto err;
711 return(1);
712 }
713
714 if (MDOC_MAX == (tok = mdoc_hash_find(mac))) {
715 if ( ! macrowarn(m, ln, mac, sv))
716 goto err;
717 return(1);
718 }
719
720 /* The macro is sane. Jump to the next word. */
721
722 while (buf[i] && ' ' == buf[i])
723 i++;
724
725 /*
726 * Trailing whitespace. Note that tabs are allowed to be passed
727 * into the parser as "text", so we only warn about spaces here.
728 */
729
730 if ('\0' == buf[i] && ' ' == buf[i - 1])
731 if ( ! mdoc_pmsg(m, ln, i - 1, MANDOCERR_EOLNSPACE))
732 goto err;
733
734 /*
735 * Begin recursive parse sequence. Since we're at the start of
736 * the line, we don't need to do callable/parseable checks.
737 */
738 if ( ! mdoc_macro(m, tok, ln, sv, &i, buf))
739 goto err;
740
741 return(1);
742
743 err: /* Error out. */
744
745 m->flags |= MDOC_HALT;
746 return(0);
747 }
748
749