]> git.cameronkatri.com Git - mandoc.git/blob - mdoc.c
GNU/Linux also uses \- for Nd (ew).
[mandoc.git] / mdoc.c
1 /* $Id: mdoc.c,v 1.89 2009/07/07 09:29:15 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #include <assert.h>
18 #include <ctype.h>
19 #include <stdarg.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23
24 #include "libmdoc.h"
25
26 const char *const __mdoc_merrnames[MERRMAX] = {
27 "trailing whitespace", /* ETAILWS */
28 "empty last list column", /* ECOLEMPTY */
29 "argument-like parameter", /* EARGVPARM */
30 "unexpected quoted parameter", /* EQUOTPARM */
31 "unterminated quoted parameter", /* EQUOTTERM */
32 "system: malloc error", /* EMALLOC */
33 "argument parameter suggested", /* EARGVAL */
34 "macro not callable", /* ENOCALL */
35 "macro disallowed in prologue", /* EBODYPROL */
36 "macro disallowed in body", /* EPROLBODY */
37 "text disallowed in prologue", /* ETEXTPROL */
38 "blank line disallowed", /* ENOBLANK */
39 "text parameter too long", /* ETOOLONG */
40 "invalid escape sequence", /* EESCAPE */
41 "invalid character", /* EPRINT */
42 "document has no body", /* ENODAT */
43 "document has no prologue", /* ENOPROLOGUE */
44 "expected line arguments", /* ELINE */
45 "invalid AT&T argument", /* EATT */
46 "default name not yet set", /* ENAME */
47 "missing list type", /* ELISTTYPE */
48 "missing display type", /* EDISPTYPE */
49 "too many display types", /* EMULTIDISP */
50 "too many list types", /* EMULTILIST */
51 "NAME section must be first", /* ESECNAME */
52 "badly-formed NAME section", /* ENAMESECINC */
53 "argument repeated", /* EARGREP */
54 "expected boolean parameter", /* EBOOL */
55 "inconsistent column syntax", /* ECOLMIS */
56 "nested display invalid", /* ENESTDISP */
57 "width argument missing", /* EMISSWIDTH */
58 "invalid section for this manual section", /* EWRONGMSEC */
59 "section out of conventional order", /* ESECOOO */
60 "section repeated", /* ESECREP */
61 "invalid standard argument", /* EBADSTAND */
62 "multi-line arguments discouraged", /* ENOMULTILINE */
63 "multi-line arguments suggested", /* EMULTILINE */
64 "line arguments discouraged", /* ENOLINE */
65 "prologue macro out of conventional order", /* EPROLOOO */
66 "prologue macro repeated", /* EPROLREP */
67 "invalid manual section", /* EBADMSEC */
68 "invalid section", /* EBADSEC */
69 "invalid font mode", /* EFONT */
70 "invalid date syntax", /* EBADDATE */
71 "invalid number format", /* ENUMFMT */
72 "superfluous width argument", /* ENOWIDTH */
73 "system: utsname error", /* EUTSNAME */
74 "obsolete macro", /* EOBS */
75 "macro-like parameter", /* EMACPARM */
76 "end-of-line scope violation", /* EIMPBRK */
77 "empty macro ignored", /* EIGNE */
78 "unclosed explicit scope", /* EOPEN */
79 "unterminated quoted phrase", /* EQUOTPHR */
80 "closure macro without prior context", /* ENOCTX */
81 "invalid whitespace after control character", /* ESPACE */
82 };
83
84 const char *const __mdoc_macronames[MDOC_MAX] = {
85 "Ap", "Dd", "Dt", "Os",
86 "Sh", "Ss", "Pp", "D1",
87 "Dl", "Bd", "Ed", "Bl",
88 "El", "It", "Ad", "An",
89 "Ar", "Cd", "Cm", "Dv",
90 "Er", "Ev", "Ex", "Fa",
91 "Fd", "Fl", "Fn", "Ft",
92 "Ic", "In", "Li", "Nd",
93 "Nm", "Op", "Ot", "Pa",
94 "Rv", "St", "Va", "Vt",
95 /* LINTED */
96 "Xr", "\%A", "\%B", "\%D",
97 /* LINTED */
98 "\%I", "\%J", "\%N", "\%O",
99 /* LINTED */
100 "\%P", "\%R", "\%T", "\%V",
101 "Ac", "Ao", "Aq", "At",
102 "Bc", "Bf", "Bo", "Bq",
103 "Bsx", "Bx", "Db", "Dc",
104 "Do", "Dq", "Ec", "Ef",
105 "Em", "Eo", "Fx", "Ms",
106 "No", "Ns", "Nx", "Ox",
107 "Pc", "Pf", "Po", "Pq",
108 "Qc", "Ql", "Qo", "Qq",
109 "Re", "Rs", "Sc", "So",
110 "Sq", "Sm", "Sx", "Sy",
111 "Tn", "Ux", "Xc", "Xo",
112 "Fo", "Fc", "Oo", "Oc",
113 "Bk", "Ek", "Bt", "Hf",
114 "Fr", "Ud", "Lb", "Lp",
115 "Lk", "Mt", "Brq", "Bro",
116 /* LINTED */
117 "Brc", "\%C", "Es", "En",
118 /* LINTED */
119 "Dx", "\%Q"
120 };
121
122 const char *const __mdoc_argnames[MDOC_ARG_MAX] = {
123 "split", "nosplit", "ragged",
124 "unfilled", "literal", "file",
125 "offset", "bullet", "dash",
126 "hyphen", "item", "enum",
127 "tag", "diag", "hang",
128 "ohang", "inset", "column",
129 "width", "compact", "std",
130 "filled", "words", "emphasis",
131 "symbolic", "nested"
132 };
133
134 const char * const *mdoc_macronames = __mdoc_macronames;
135 const char * const *mdoc_argnames = __mdoc_argnames;
136
137 static void mdoc_free1(struct mdoc *);
138 static int mdoc_alloc1(struct mdoc *);
139 static struct mdoc_node *node_alloc(struct mdoc *, int, int,
140 int, enum mdoc_type);
141 static int node_append(struct mdoc *,
142 struct mdoc_node *);
143 static int parsetext(struct mdoc *, int, char *);
144 static int parsemacro(struct mdoc *, int, char *);
145 static int macrowarn(struct mdoc *, int, const char *);
146
147
148 const struct mdoc_node *
149 mdoc_node(const struct mdoc *m)
150 {
151
152 return(MDOC_HALT & m->flags ? NULL : m->first);
153 }
154
155
156 const struct mdoc_meta *
157 mdoc_meta(const struct mdoc *m)
158 {
159
160 return(MDOC_HALT & m->flags ? NULL : &m->meta);
161 }
162
163
164 /*
165 * Frees volatile resources (parse tree, meta-data, fields).
166 */
167 static void
168 mdoc_free1(struct mdoc *mdoc)
169 {
170
171 if (mdoc->first)
172 mdoc_node_freelist(mdoc->first);
173 if (mdoc->meta.title)
174 free(mdoc->meta.title);
175 if (mdoc->meta.os)
176 free(mdoc->meta.os);
177 if (mdoc->meta.name)
178 free(mdoc->meta.name);
179 if (mdoc->meta.arch)
180 free(mdoc->meta.arch);
181 if (mdoc->meta.vol)
182 free(mdoc->meta.vol);
183 }
184
185
186 /*
187 * Allocate all volatile resources (parse tree, meta-data, fields).
188 */
189 static int
190 mdoc_alloc1(struct mdoc *mdoc)
191 {
192
193 bzero(&mdoc->meta, sizeof(struct mdoc_meta));
194 mdoc->flags = 0;
195 mdoc->lastnamed = mdoc->lastsec = SEC_NONE;
196 mdoc->last = calloc(1, sizeof(struct mdoc_node));
197 if (NULL == mdoc->last)
198 return(0);
199
200 mdoc->first = mdoc->last;
201 mdoc->last->type = MDOC_ROOT;
202 mdoc->next = MDOC_NEXT_CHILD;
203 return(1);
204 }
205
206
207 /*
208 * Free up volatile resources (see mdoc_free1()) then re-initialises the
209 * data with mdoc_alloc1(). After invocation, parse data has been reset
210 * and the parser is ready for re-invocation on a new tree; however,
211 * cross-parse non-volatile data is kept intact.
212 */
213 int
214 mdoc_reset(struct mdoc *mdoc)
215 {
216
217 mdoc_free1(mdoc);
218 return(mdoc_alloc1(mdoc));
219 }
220
221
222 /*
223 * Completely free up all volatile and non-volatile parse resources.
224 * After invocation, the pointer is no longer usable.
225 */
226 void
227 mdoc_free(struct mdoc *mdoc)
228 {
229
230 mdoc_free1(mdoc);
231 if (mdoc->htab)
232 mdoc_hash_free(mdoc->htab);
233 free(mdoc);
234 }
235
236
237 /*
238 * Allocate volatile and non-volatile parse resources.
239 */
240 struct mdoc *
241 mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb)
242 {
243 struct mdoc *p;
244
245 if (NULL == (p = calloc(1, sizeof(struct mdoc))))
246 return(NULL);
247 if (cb)
248 (void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb));
249
250 p->data = data;
251 p->pflags = pflags;
252
253 if (NULL == (p->htab = mdoc_hash_alloc())) {
254 free(p);
255 return(NULL);
256 } else if (mdoc_alloc1(p))
257 return(p);
258
259 free(p);
260 return(NULL);
261 }
262
263
264 /*
265 * Climb back up the parse tree, validating open scopes. Mostly calls
266 * through to macro_end() in macro.c.
267 */
268 int
269 mdoc_endparse(struct mdoc *m)
270 {
271
272 if (MDOC_HALT & m->flags)
273 return(0);
274 else if (mdoc_macroend(m))
275 return(1);
276 m->flags |= MDOC_HALT;
277 return(0);
278 }
279
280
281 /*
282 * Main parse routine. Parses a single line -- really just hands off to
283 * the macro (parsemacro()) or text parser (parsetext()).
284 */
285 int
286 mdoc_parseln(struct mdoc *m, int ln, char *buf)
287 {
288
289 if (MDOC_HALT & m->flags)
290 return(0);
291
292 return('.' == *buf ? parsemacro(m, ln, buf) :
293 parsetext(m, ln, buf));
294 }
295
296
297 int
298 mdoc_verr(struct mdoc *mdoc, int ln, int pos,
299 const char *fmt, ...)
300 {
301 char buf[256];
302 va_list ap;
303
304 if (NULL == mdoc->cb.mdoc_err)
305 return(0);
306
307 va_start(ap, fmt);
308 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
309 va_end(ap);
310
311 return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf));
312 }
313
314
315 int
316 mdoc_vwarn(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...)
317 {
318 char buf[256];
319 va_list ap;
320
321 if (NULL == mdoc->cb.mdoc_warn)
322 return(0);
323
324 va_start(ap, fmt);
325 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
326 va_end(ap);
327
328 return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, buf));
329 }
330
331
332 int
333 mdoc_err(struct mdoc *m, int line, int pos, int iserr, enum merr type)
334 {
335 const char *p;
336
337 p = __mdoc_merrnames[(int)type];
338 assert(p);
339
340 if (iserr)
341 return(mdoc_verr(m, line, pos, p));
342
343 return(mdoc_vwarn(m, line, pos, p));
344 }
345
346
347 int
348 mdoc_macro(struct mdoc *m, int tok,
349 int ln, int pp, int *pos, char *buf)
350 {
351
352 if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
353 MDOC_PBODY & m->flags)
354 return(mdoc_perr(m, ln, pp, EPROLBODY));
355 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
356 ! (MDOC_PBODY & m->flags))
357 return(mdoc_perr(m, ln, pp, EBODYPROL));
358
359 if (1 != pp && ! (MDOC_CALLABLE & mdoc_macros[tok].flags))
360 return(mdoc_perr(m, ln, pp, ENOCALL));
361
362 return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf));
363 }
364
365
366 static int
367 node_append(struct mdoc *mdoc, struct mdoc_node *p)
368 {
369
370 assert(mdoc->last);
371 assert(mdoc->first);
372 assert(MDOC_ROOT != p->type);
373
374 switch (mdoc->next) {
375 case (MDOC_NEXT_SIBLING):
376 mdoc->last->next = p;
377 p->prev = mdoc->last;
378 p->parent = mdoc->last->parent;
379 break;
380 case (MDOC_NEXT_CHILD):
381 mdoc->last->child = p;
382 p->parent = mdoc->last;
383 break;
384 default:
385 abort();
386 /* NOTREACHED */
387 }
388
389 p->parent->nchild++;
390
391 if ( ! mdoc_valid_pre(mdoc, p))
392 return(0);
393 if ( ! mdoc_action_pre(mdoc, p))
394 return(0);
395
396 switch (p->type) {
397 case (MDOC_HEAD):
398 assert(MDOC_BLOCK == p->parent->type);
399 p->parent->head = p;
400 break;
401 case (MDOC_TAIL):
402 assert(MDOC_BLOCK == p->parent->type);
403 p->parent->tail = p;
404 break;
405 case (MDOC_BODY):
406 assert(MDOC_BLOCK == p->parent->type);
407 p->parent->body = p;
408 break;
409 default:
410 break;
411 }
412
413 mdoc->last = p;
414
415 switch (p->type) {
416 case (MDOC_TEXT):
417 if ( ! mdoc_valid_post(mdoc))
418 return(0);
419 if ( ! mdoc_action_post(mdoc))
420 return(0);
421 break;
422 default:
423 break;
424 }
425
426 return(1);
427 }
428
429
430 static struct mdoc_node *
431 node_alloc(struct mdoc *mdoc, int line,
432 int pos, int tok, enum mdoc_type type)
433 {
434 struct mdoc_node *p;
435
436 if (NULL == (p = calloc(1, sizeof(struct mdoc_node)))) {
437 (void)mdoc_nerr(mdoc, mdoc->last, EMALLOC);
438 return(NULL);
439 }
440
441 p->sec = mdoc->lastsec;
442 p->line = line;
443 p->pos = pos;
444 p->tok = tok;
445 if (MDOC_TEXT != (p->type = type))
446 assert(p->tok >= 0);
447
448 return(p);
449 }
450
451
452 int
453 mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, int tok)
454 {
455 struct mdoc_node *p;
456
457 p = node_alloc(mdoc, line, pos, tok, MDOC_TAIL);
458 if (NULL == p)
459 return(0);
460 return(node_append(mdoc, p));
461 }
462
463
464 int
465 mdoc_head_alloc(struct mdoc *mdoc, int line, int pos, int tok)
466 {
467 struct mdoc_node *p;
468
469 assert(mdoc->first);
470 assert(mdoc->last);
471
472 p = node_alloc(mdoc, line, pos, tok, MDOC_HEAD);
473 if (NULL == p)
474 return(0);
475 return(node_append(mdoc, p));
476 }
477
478
479 int
480 mdoc_body_alloc(struct mdoc *mdoc, int line, int pos, int tok)
481 {
482 struct mdoc_node *p;
483
484 p = node_alloc(mdoc, line, pos, tok, MDOC_BODY);
485 if (NULL == p)
486 return(0);
487 return(node_append(mdoc, p));
488 }
489
490
491 int
492 mdoc_block_alloc(struct mdoc *mdoc, int line, int pos,
493 int tok, struct mdoc_arg *args)
494 {
495 struct mdoc_node *p;
496
497 p = node_alloc(mdoc, line, pos, tok, MDOC_BLOCK);
498 if (NULL == p)
499 return(0);
500 p->args = args;
501 if (p->args)
502 (args->refcnt)++;
503 return(node_append(mdoc, p));
504 }
505
506
507 int
508 mdoc_elem_alloc(struct mdoc *mdoc, int line, int pos,
509 int tok, struct mdoc_arg *args)
510 {
511 struct mdoc_node *p;
512
513 p = node_alloc(mdoc, line, pos, tok, MDOC_ELEM);
514 if (NULL == p)
515 return(0);
516 p->args = args;
517 if (p->args)
518 (args->refcnt)++;
519 return(node_append(mdoc, p));
520 }
521
522
523 int
524 mdoc_word_alloc(struct mdoc *mdoc,
525 int line, int pos, const char *word)
526 {
527 struct mdoc_node *p;
528
529 p = node_alloc(mdoc, line, pos, -1, MDOC_TEXT);
530 if (NULL == p)
531 return(0);
532 if (NULL == (p->string = strdup(word))) {
533 (void)mdoc_nerr(mdoc, mdoc->last, EMALLOC);
534 return(0);
535 }
536
537 return(node_append(mdoc, p));
538 }
539
540
541 void
542 mdoc_node_free(struct mdoc_node *p)
543 {
544
545 if (p->parent)
546 p->parent->nchild--;
547 if (p->string)
548 free(p->string);
549 if (p->args)
550 mdoc_argv_free(p->args);
551 free(p);
552 }
553
554
555 void
556 mdoc_node_freelist(struct mdoc_node *p)
557 {
558
559 if (p->child)
560 mdoc_node_freelist(p->child);
561 if (p->next)
562 mdoc_node_freelist(p->next);
563
564 assert(0 == p->nchild);
565 mdoc_node_free(p);
566 }
567
568
569 /*
570 * Parse free-form text, that is, a line that does not begin with the
571 * control character.
572 */
573 static int
574 parsetext(struct mdoc *m, int line, char *buf)
575 {
576
577 if (SEC_NONE == m->lastnamed)
578 return(mdoc_perr(m, line, 0, ETEXTPROL));
579
580 if (0 == buf[0] && ! (MDOC_LITERAL & m->flags))
581 return(mdoc_perr(m, line, 0, ENOBLANK));
582
583 if ( ! mdoc_word_alloc(m, line, 0, buf))
584 return(0);
585
586 m->next = MDOC_NEXT_SIBLING;
587 return(1);
588 }
589
590
591 static int
592 macrowarn(struct mdoc *m, int ln, const char *buf)
593 {
594 if ( ! (MDOC_IGN_MACRO & m->pflags))
595 return(mdoc_verr(m, ln, 1,
596 "unknown macro: %s%s",
597 buf, strlen(buf) > 3 ? "..." : ""));
598 return(mdoc_vwarn(m, ln, 1, "unknown macro: %s%s",
599 buf, strlen(buf) > 3 ? "..." : ""));
600 }
601
602
603 /*
604 * Parse a macro line, that is, a line beginning with the control
605 * character.
606 */
607 int
608 parsemacro(struct mdoc *m, int ln, char *buf)
609 {
610 int i, c;
611 char mac[5];
612
613 /* Empty lines are ignored. */
614
615 if (0 == buf[1])
616 return(1);
617
618 if (' ' == buf[1]) {
619 i = 2;
620 while (buf[i] && ' ' == buf[i])
621 i++;
622 if (0 == buf[i])
623 return(1);
624 return(mdoc_perr(m, ln, 1, ESPACE));
625 }
626
627 /* Copy the first word into a nil-terminated buffer. */
628
629 for (i = 1; i < 5; i++) {
630 if (0 == (mac[i - 1] = buf[i]))
631 break;
632 else if (' ' == buf[i])
633 break;
634 }
635
636 mac[i - 1] = 0;
637
638 if (i == 5 || i <= 2) {
639 if ( ! macrowarn(m, ln, mac))
640 goto err;
641 return(1);
642 }
643
644 if (MDOC_MAX == (c = mdoc_hash_find(m->htab, mac))) {
645 if ( ! macrowarn(m, ln, mac))
646 goto err;
647 return(1);
648 }
649
650 /* The macro is sane. Jump to the next word. */
651
652 while (buf[i] && ' ' == buf[i])
653 i++;
654
655 /* Begin recursive parse sequence. */
656
657 if ( ! mdoc_macro(m, c, ln, 1, &i, buf))
658 goto err;
659
660 return(1);
661
662 err: /* Error out. */
663
664 m->flags |= MDOC_HALT;
665 return(0);
666 }