]> git.cameronkatri.com Git - mandoc.git/blob - mdoc.c
args() should return ARGS_ERROR, not 0.
[mandoc.git] / mdoc.c
1 /* $Id: mdoc.c,v 1.90 2009/07/12 20:50:08 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #include <assert.h>
18 #include <ctype.h>
19 #include <stdarg.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23
24 #include "libmdoc.h"
25
26 const char *const __mdoc_merrnames[MERRMAX] = {
27 "trailing whitespace", /* ETAILWS */
28 "empty last list column", /* ECOLEMPTY */
29 "argument-like parameter", /* EARGVPARM */
30 "unexpected quoted parameter", /* EQUOTPARM */
31 "unterminated quoted parameter", /* EQUOTTERM */
32 "system: malloc error", /* EMALLOC */
33 "argument parameter suggested", /* EARGVAL */
34 "macro not callable", /* ENOCALL */
35 "macro disallowed in prologue", /* EBODYPROL */
36 "macro disallowed in body", /* EPROLBODY */
37 "text disallowed in prologue", /* ETEXTPROL */
38 "blank line disallowed", /* ENOBLANK */
39 "text parameter too long", /* ETOOLONG */
40 "invalid escape sequence", /* EESCAPE */
41 "invalid character", /* EPRINT */
42 "document has no body", /* ENODAT */
43 "document has no prologue", /* ENOPROLOGUE */
44 "expected line arguments", /* ELINE */
45 "invalid AT&T argument", /* EATT */
46 "default name not yet set", /* ENAME */
47 "missing list type", /* ELISTTYPE */
48 "missing display type", /* EDISPTYPE */
49 "too many display types", /* EMULTIDISP */
50 "too many list types", /* EMULTILIST */
51 "NAME section must be first", /* ESECNAME */
52 "badly-formed NAME section", /* ENAMESECINC */
53 "argument repeated", /* EARGREP */
54 "expected boolean parameter", /* EBOOL */
55 "inconsistent column syntax", /* ECOLMIS */
56 "nested display invalid", /* ENESTDISP */
57 "width argument missing", /* EMISSWIDTH */
58 "invalid section for this manual section", /* EWRONGMSEC */
59 "section out of conventional order", /* ESECOOO */
60 "section repeated", /* ESECREP */
61 "invalid standard argument", /* EBADSTAND */
62 "multi-line arguments discouraged", /* ENOMULTILINE */
63 "multi-line arguments suggested", /* EMULTILINE */
64 "line arguments discouraged", /* ENOLINE */
65 "prologue macro out of conventional order", /* EPROLOOO */
66 "prologue macro repeated", /* EPROLREP */
67 "invalid manual section", /* EBADMSEC */
68 "invalid section", /* EBADSEC */
69 "invalid font mode", /* EFONT */
70 "invalid date syntax", /* EBADDATE */
71 "invalid number format", /* ENUMFMT */
72 "superfluous width argument", /* ENOWIDTH */
73 "system: utsname error", /* EUTSNAME */
74 "obsolete macro", /* EOBS */
75 "macro-like parameter", /* EMACPARM */
76 "end-of-line scope violation", /* EIMPBRK */
77 "empty macro ignored", /* EIGNE */
78 "unclosed explicit scope", /* EOPEN */
79 "unterminated quoted phrase", /* EQUOTPHR */
80 "closure macro without prior context", /* ENOCTX */
81 "invalid whitespace after control character", /* ESPACE */
82 "no description found for library" /* ELIB */
83 };
84
85 const char *const __mdoc_macronames[MDOC_MAX] = {
86 "Ap", "Dd", "Dt", "Os",
87 "Sh", "Ss", "Pp", "D1",
88 "Dl", "Bd", "Ed", "Bl",
89 "El", "It", "Ad", "An",
90 "Ar", "Cd", "Cm", "Dv",
91 "Er", "Ev", "Ex", "Fa",
92 "Fd", "Fl", "Fn", "Ft",
93 "Ic", "In", "Li", "Nd",
94 "Nm", "Op", "Ot", "Pa",
95 "Rv", "St", "Va", "Vt",
96 /* LINTED */
97 "Xr", "\%A", "\%B", "\%D",
98 /* LINTED */
99 "\%I", "\%J", "\%N", "\%O",
100 /* LINTED */
101 "\%P", "\%R", "\%T", "\%V",
102 "Ac", "Ao", "Aq", "At",
103 "Bc", "Bf", "Bo", "Bq",
104 "Bsx", "Bx", "Db", "Dc",
105 "Do", "Dq", "Ec", "Ef",
106 "Em", "Eo", "Fx", "Ms",
107 "No", "Ns", "Nx", "Ox",
108 "Pc", "Pf", "Po", "Pq",
109 "Qc", "Ql", "Qo", "Qq",
110 "Re", "Rs", "Sc", "So",
111 "Sq", "Sm", "Sx", "Sy",
112 "Tn", "Ux", "Xc", "Xo",
113 "Fo", "Fc", "Oo", "Oc",
114 "Bk", "Ek", "Bt", "Hf",
115 "Fr", "Ud", "Lb", "Lp",
116 "Lk", "Mt", "Brq", "Bro",
117 /* LINTED */
118 "Brc", "\%C", "Es", "En",
119 /* LINTED */
120 "Dx", "\%Q"
121 };
122
123 const char *const __mdoc_argnames[MDOC_ARG_MAX] = {
124 "split", "nosplit", "ragged",
125 "unfilled", "literal", "file",
126 "offset", "bullet", "dash",
127 "hyphen", "item", "enum",
128 "tag", "diag", "hang",
129 "ohang", "inset", "column",
130 "width", "compact", "std",
131 "filled", "words", "emphasis",
132 "symbolic", "nested"
133 };
134
135 const char * const *mdoc_macronames = __mdoc_macronames;
136 const char * const *mdoc_argnames = __mdoc_argnames;
137
138 static void mdoc_free1(struct mdoc *);
139 static int mdoc_alloc1(struct mdoc *);
140 static struct mdoc_node *node_alloc(struct mdoc *, int, int,
141 int, enum mdoc_type);
142 static int node_append(struct mdoc *,
143 struct mdoc_node *);
144 static int parsetext(struct mdoc *, int, char *);
145 static int parsemacro(struct mdoc *, int, char *);
146 static int macrowarn(struct mdoc *, int, const char *);
147
148
149 const struct mdoc_node *
150 mdoc_node(const struct mdoc *m)
151 {
152
153 return(MDOC_HALT & m->flags ? NULL : m->first);
154 }
155
156
157 const struct mdoc_meta *
158 mdoc_meta(const struct mdoc *m)
159 {
160
161 return(MDOC_HALT & m->flags ? NULL : &m->meta);
162 }
163
164
165 /*
166 * Frees volatile resources (parse tree, meta-data, fields).
167 */
168 static void
169 mdoc_free1(struct mdoc *mdoc)
170 {
171
172 if (mdoc->first)
173 mdoc_node_freelist(mdoc->first);
174 if (mdoc->meta.title)
175 free(mdoc->meta.title);
176 if (mdoc->meta.os)
177 free(mdoc->meta.os);
178 if (mdoc->meta.name)
179 free(mdoc->meta.name);
180 if (mdoc->meta.arch)
181 free(mdoc->meta.arch);
182 if (mdoc->meta.vol)
183 free(mdoc->meta.vol);
184 }
185
186
187 /*
188 * Allocate all volatile resources (parse tree, meta-data, fields).
189 */
190 static int
191 mdoc_alloc1(struct mdoc *mdoc)
192 {
193
194 bzero(&mdoc->meta, sizeof(struct mdoc_meta));
195 mdoc->flags = 0;
196 mdoc->lastnamed = mdoc->lastsec = SEC_NONE;
197 mdoc->last = calloc(1, sizeof(struct mdoc_node));
198 if (NULL == mdoc->last)
199 return(0);
200
201 mdoc->first = mdoc->last;
202 mdoc->last->type = MDOC_ROOT;
203 mdoc->next = MDOC_NEXT_CHILD;
204 return(1);
205 }
206
207
208 /*
209 * Free up volatile resources (see mdoc_free1()) then re-initialises the
210 * data with mdoc_alloc1(). After invocation, parse data has been reset
211 * and the parser is ready for re-invocation on a new tree; however,
212 * cross-parse non-volatile data is kept intact.
213 */
214 int
215 mdoc_reset(struct mdoc *mdoc)
216 {
217
218 mdoc_free1(mdoc);
219 return(mdoc_alloc1(mdoc));
220 }
221
222
223 /*
224 * Completely free up all volatile and non-volatile parse resources.
225 * After invocation, the pointer is no longer usable.
226 */
227 void
228 mdoc_free(struct mdoc *mdoc)
229 {
230
231 mdoc_free1(mdoc);
232 if (mdoc->htab)
233 mdoc_hash_free(mdoc->htab);
234 free(mdoc);
235 }
236
237
238 /*
239 * Allocate volatile and non-volatile parse resources.
240 */
241 struct mdoc *
242 mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb)
243 {
244 struct mdoc *p;
245
246 if (NULL == (p = calloc(1, sizeof(struct mdoc))))
247 return(NULL);
248 if (cb)
249 (void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb));
250
251 p->data = data;
252 p->pflags = pflags;
253
254 if (NULL == (p->htab = mdoc_hash_alloc())) {
255 free(p);
256 return(NULL);
257 } else if (mdoc_alloc1(p))
258 return(p);
259
260 free(p);
261 return(NULL);
262 }
263
264
265 /*
266 * Climb back up the parse tree, validating open scopes. Mostly calls
267 * through to macro_end() in macro.c.
268 */
269 int
270 mdoc_endparse(struct mdoc *m)
271 {
272
273 if (MDOC_HALT & m->flags)
274 return(0);
275 else if (mdoc_macroend(m))
276 return(1);
277 m->flags |= MDOC_HALT;
278 return(0);
279 }
280
281
282 /*
283 * Main parse routine. Parses a single line -- really just hands off to
284 * the macro (parsemacro()) or text parser (parsetext()).
285 */
286 int
287 mdoc_parseln(struct mdoc *m, int ln, char *buf)
288 {
289
290 if (MDOC_HALT & m->flags)
291 return(0);
292
293 return('.' == *buf ? parsemacro(m, ln, buf) :
294 parsetext(m, ln, buf));
295 }
296
297
298 int
299 mdoc_verr(struct mdoc *mdoc, int ln, int pos,
300 const char *fmt, ...)
301 {
302 char buf[256];
303 va_list ap;
304
305 if (NULL == mdoc->cb.mdoc_err)
306 return(0);
307
308 va_start(ap, fmt);
309 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
310 va_end(ap);
311
312 return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf));
313 }
314
315
316 int
317 mdoc_vwarn(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...)
318 {
319 char buf[256];
320 va_list ap;
321
322 if (NULL == mdoc->cb.mdoc_warn)
323 return(0);
324
325 va_start(ap, fmt);
326 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
327 va_end(ap);
328
329 return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, buf));
330 }
331
332
333 int
334 mdoc_err(struct mdoc *m, int line, int pos, int iserr, enum merr type)
335 {
336 const char *p;
337
338 p = __mdoc_merrnames[(int)type];
339 assert(p);
340
341 if (iserr)
342 return(mdoc_verr(m, line, pos, p));
343
344 return(mdoc_vwarn(m, line, pos, p));
345 }
346
347
348 int
349 mdoc_macro(struct mdoc *m, int tok,
350 int ln, int pp, int *pos, char *buf)
351 {
352
353 if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
354 MDOC_PBODY & m->flags)
355 return(mdoc_perr(m, ln, pp, EPROLBODY));
356 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
357 ! (MDOC_PBODY & m->flags))
358 return(mdoc_perr(m, ln, pp, EBODYPROL));
359
360 if (1 != pp && ! (MDOC_CALLABLE & mdoc_macros[tok].flags))
361 return(mdoc_perr(m, ln, pp, ENOCALL));
362
363 return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf));
364 }
365
366
367 static int
368 node_append(struct mdoc *mdoc, struct mdoc_node *p)
369 {
370
371 assert(mdoc->last);
372 assert(mdoc->first);
373 assert(MDOC_ROOT != p->type);
374
375 switch (mdoc->next) {
376 case (MDOC_NEXT_SIBLING):
377 mdoc->last->next = p;
378 p->prev = mdoc->last;
379 p->parent = mdoc->last->parent;
380 break;
381 case (MDOC_NEXT_CHILD):
382 mdoc->last->child = p;
383 p->parent = mdoc->last;
384 break;
385 default:
386 abort();
387 /* NOTREACHED */
388 }
389
390 p->parent->nchild++;
391
392 if ( ! mdoc_valid_pre(mdoc, p))
393 return(0);
394 if ( ! mdoc_action_pre(mdoc, p))
395 return(0);
396
397 switch (p->type) {
398 case (MDOC_HEAD):
399 assert(MDOC_BLOCK == p->parent->type);
400 p->parent->head = p;
401 break;
402 case (MDOC_TAIL):
403 assert(MDOC_BLOCK == p->parent->type);
404 p->parent->tail = p;
405 break;
406 case (MDOC_BODY):
407 assert(MDOC_BLOCK == p->parent->type);
408 p->parent->body = p;
409 break;
410 default:
411 break;
412 }
413
414 mdoc->last = p;
415
416 switch (p->type) {
417 case (MDOC_TEXT):
418 if ( ! mdoc_valid_post(mdoc))
419 return(0);
420 if ( ! mdoc_action_post(mdoc))
421 return(0);
422 break;
423 default:
424 break;
425 }
426
427 return(1);
428 }
429
430
431 static struct mdoc_node *
432 node_alloc(struct mdoc *mdoc, int line,
433 int pos, int tok, enum mdoc_type type)
434 {
435 struct mdoc_node *p;
436
437 if (NULL == (p = calloc(1, sizeof(struct mdoc_node)))) {
438 (void)mdoc_nerr(mdoc, mdoc->last, EMALLOC);
439 return(NULL);
440 }
441
442 p->sec = mdoc->lastsec;
443 p->line = line;
444 p->pos = pos;
445 p->tok = tok;
446 if (MDOC_TEXT != (p->type = type))
447 assert(p->tok >= 0);
448
449 return(p);
450 }
451
452
453 int
454 mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, int tok)
455 {
456 struct mdoc_node *p;
457
458 p = node_alloc(mdoc, line, pos, tok, MDOC_TAIL);
459 if (NULL == p)
460 return(0);
461 return(node_append(mdoc, p));
462 }
463
464
465 int
466 mdoc_head_alloc(struct mdoc *mdoc, int line, int pos, int tok)
467 {
468 struct mdoc_node *p;
469
470 assert(mdoc->first);
471 assert(mdoc->last);
472
473 p = node_alloc(mdoc, line, pos, tok, MDOC_HEAD);
474 if (NULL == p)
475 return(0);
476 return(node_append(mdoc, p));
477 }
478
479
480 int
481 mdoc_body_alloc(struct mdoc *mdoc, int line, int pos, int tok)
482 {
483 struct mdoc_node *p;
484
485 p = node_alloc(mdoc, line, pos, tok, MDOC_BODY);
486 if (NULL == p)
487 return(0);
488 return(node_append(mdoc, p));
489 }
490
491
492 int
493 mdoc_block_alloc(struct mdoc *mdoc, int line, int pos,
494 int tok, struct mdoc_arg *args)
495 {
496 struct mdoc_node *p;
497
498 p = node_alloc(mdoc, line, pos, tok, MDOC_BLOCK);
499 if (NULL == p)
500 return(0);
501 p->args = args;
502 if (p->args)
503 (args->refcnt)++;
504 return(node_append(mdoc, p));
505 }
506
507
508 int
509 mdoc_elem_alloc(struct mdoc *mdoc, int line, int pos,
510 int tok, struct mdoc_arg *args)
511 {
512 struct mdoc_node *p;
513
514 p = node_alloc(mdoc, line, pos, tok, MDOC_ELEM);
515 if (NULL == p)
516 return(0);
517 p->args = args;
518 if (p->args)
519 (args->refcnt)++;
520 return(node_append(mdoc, p));
521 }
522
523
524 int
525 mdoc_word_alloc(struct mdoc *mdoc,
526 int line, int pos, const char *word)
527 {
528 struct mdoc_node *p;
529
530 p = node_alloc(mdoc, line, pos, -1, MDOC_TEXT);
531 if (NULL == p)
532 return(0);
533 if (NULL == (p->string = strdup(word))) {
534 (void)mdoc_nerr(mdoc, mdoc->last, EMALLOC);
535 return(0);
536 }
537
538 return(node_append(mdoc, p));
539 }
540
541
542 void
543 mdoc_node_free(struct mdoc_node *p)
544 {
545
546 if (p->parent)
547 p->parent->nchild--;
548 if (p->string)
549 free(p->string);
550 if (p->args)
551 mdoc_argv_free(p->args);
552 free(p);
553 }
554
555
556 void
557 mdoc_node_freelist(struct mdoc_node *p)
558 {
559
560 if (p->child)
561 mdoc_node_freelist(p->child);
562 if (p->next)
563 mdoc_node_freelist(p->next);
564
565 assert(0 == p->nchild);
566 mdoc_node_free(p);
567 }
568
569
570 /*
571 * Parse free-form text, that is, a line that does not begin with the
572 * control character.
573 */
574 static int
575 parsetext(struct mdoc *m, int line, char *buf)
576 {
577
578 if (SEC_NONE == m->lastnamed)
579 return(mdoc_perr(m, line, 0, ETEXTPROL));
580
581 if (0 == buf[0] && ! (MDOC_LITERAL & m->flags))
582 return(mdoc_perr(m, line, 0, ENOBLANK));
583
584 if ( ! mdoc_word_alloc(m, line, 0, buf))
585 return(0);
586
587 m->next = MDOC_NEXT_SIBLING;
588 return(1);
589 }
590
591
592 static int
593 macrowarn(struct mdoc *m, int ln, const char *buf)
594 {
595 if ( ! (MDOC_IGN_MACRO & m->pflags))
596 return(mdoc_verr(m, ln, 1,
597 "unknown macro: %s%s",
598 buf, strlen(buf) > 3 ? "..." : ""));
599 return(mdoc_vwarn(m, ln, 1, "unknown macro: %s%s",
600 buf, strlen(buf) > 3 ? "..." : ""));
601 }
602
603
604 /*
605 * Parse a macro line, that is, a line beginning with the control
606 * character.
607 */
608 int
609 parsemacro(struct mdoc *m, int ln, char *buf)
610 {
611 int i, c;
612 char mac[5];
613
614 /* Empty lines are ignored. */
615
616 if (0 == buf[1])
617 return(1);
618
619 if (' ' == buf[1]) {
620 i = 2;
621 while (buf[i] && ' ' == buf[i])
622 i++;
623 if (0 == buf[i])
624 return(1);
625 return(mdoc_perr(m, ln, 1, ESPACE));
626 }
627
628 /* Copy the first word into a nil-terminated buffer. */
629
630 for (i = 1; i < 5; i++) {
631 if (0 == (mac[i - 1] = buf[i]))
632 break;
633 else if (' ' == buf[i])
634 break;
635 }
636
637 mac[i - 1] = 0;
638
639 if (i == 5 || i <= 2) {
640 if ( ! macrowarn(m, ln, mac))
641 goto err;
642 return(1);
643 }
644
645 if (MDOC_MAX == (c = mdoc_hash_find(m->htab, mac))) {
646 if ( ! macrowarn(m, ln, mac))
647 goto err;
648 return(1);
649 }
650
651 /* The macro is sane. Jump to the next word. */
652
653 while (buf[i] && ' ' == buf[i])
654 i++;
655
656 /* Begin recursive parse sequence. */
657
658 if ( ! mdoc_macro(m, c, ln, 1, &i, buf))
659 goto err;
660
661 return(1);
662
663 err: /* Error out. */
664
665 m->flags |= MDOC_HALT;
666 return(0);
667 }