]> git.cameronkatri.com Git - mandoc.git/blob - mdoc.c
Added horizontal scaling units to -Tman -Tascii.
[mandoc.git] / mdoc.c
1 /* $Id: mdoc.c,v 1.109 2009/10/15 02:56:51 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #include <sys/types.h>
18
19 #include <assert.h>
20 #include <ctype.h>
21 #include <stdarg.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25
26 #include "libmdoc.h"
27
28 const char *const __mdoc_merrnames[MERRMAX] = {
29 "trailing whitespace", /* ETAILWS */
30 "unexpected quoted parameter", /* EQUOTPARM */
31 "unterminated quoted parameter", /* EQUOTTERM */
32 "system: malloc error", /* EMALLOC */
33 "argument parameter suggested", /* EARGVAL */
34 "macro disallowed in prologue", /* EBODYPROL */
35 "macro disallowed in body", /* EPROLBODY */
36 "text disallowed in prologue", /* ETEXTPROL */
37 "blank line disallowed", /* ENOBLANK */
38 "text parameter too long", /* ETOOLONG */
39 "invalid escape sequence", /* EESCAPE */
40 "invalid character", /* EPRINT */
41 "document has no body", /* ENODAT */
42 "document has no prologue", /* ENOPROLOGUE */
43 "expected line arguments", /* ELINE */
44 "invalid AT&T argument", /* EATT */
45 "default name not yet set", /* ENAME */
46 "missing list type", /* ELISTTYPE */
47 "missing display type", /* EDISPTYPE */
48 "too many display types", /* EMULTIDISP */
49 "too many list types", /* EMULTILIST */
50 "NAME section must be first", /* ESECNAME */
51 "badly-formed NAME section", /* ENAMESECINC */
52 "argument repeated", /* EARGREP */
53 "expected boolean parameter", /* EBOOL */
54 "inconsistent column syntax", /* ECOLMIS */
55 "nested display invalid", /* ENESTDISP */
56 "width argument missing", /* EMISSWIDTH */
57 "invalid section for this manual section", /* EWRONGMSEC */
58 "section out of conventional order", /* ESECOOO */
59 "section repeated", /* ESECREP */
60 "invalid standard argument", /* EBADSTAND */
61 "multi-line arguments discouraged", /* ENOMULTILINE */
62 "multi-line arguments suggested", /* EMULTILINE */
63 "line arguments discouraged", /* ENOLINE */
64 "prologue macro out of conventional order", /* EPROLOOO */
65 "prologue macro repeated", /* EPROLREP */
66 "invalid manual section", /* EBADMSEC */
67 "invalid section", /* EBADSEC */
68 "invalid font mode", /* EFONT */
69 "invalid date syntax", /* EBADDATE */
70 "invalid number format", /* ENUMFMT */
71 "superfluous width argument", /* ENOWIDTH */
72 "system: utsname error", /* EUTSNAME */
73 "obsolete macro", /* EOBS */
74 "end-of-line scope violation", /* EIMPBRK */
75 "empty macro ignored", /* EIGNE */
76 "unclosed explicit scope", /* EOPEN */
77 "unterminated quoted phrase", /* EQUOTPHR */
78 "closure macro without prior context", /* ENOCTX */
79 "no description found for library", /* ELIB */
80 "bad child for parent context", /* EBADCHILD */
81 "list arguments preceding type", /* ENOTYPE */
82 };
83
84 const char *const __mdoc_macronames[MDOC_MAX] = {
85 "Ap", "Dd", "Dt", "Os",
86 "Sh", "Ss", "Pp", "D1",
87 "Dl", "Bd", "Ed", "Bl",
88 "El", "It", "Ad", "An",
89 "Ar", "Cd", "Cm", "Dv",
90 "Er", "Ev", "Ex", "Fa",
91 "Fd", "Fl", "Fn", "Ft",
92 "Ic", "In", "Li", "Nd",
93 "Nm", "Op", "Ot", "Pa",
94 "Rv", "St", "Va", "Vt",
95 /* LINTED */
96 "Xr", "\%A", "\%B", "\%D",
97 /* LINTED */
98 "\%I", "\%J", "\%N", "\%O",
99 /* LINTED */
100 "\%P", "\%R", "\%T", "\%V",
101 "Ac", "Ao", "Aq", "At",
102 "Bc", "Bf", "Bo", "Bq",
103 "Bsx", "Bx", "Db", "Dc",
104 "Do", "Dq", "Ec", "Ef",
105 "Em", "Eo", "Fx", "Ms",
106 "No", "Ns", "Nx", "Ox",
107 "Pc", "Pf", "Po", "Pq",
108 "Qc", "Ql", "Qo", "Qq",
109 "Re", "Rs", "Sc", "So",
110 "Sq", "Sm", "Sx", "Sy",
111 "Tn", "Ux", "Xc", "Xo",
112 "Fo", "Fc", "Oo", "Oc",
113 "Bk", "Ek", "Bt", "Hf",
114 "Fr", "Ud", "Lb", "Lp",
115 "Lk", "Mt", "Brq", "Bro",
116 /* LINTED */
117 "Brc", "\%C", "Es", "En",
118 /* LINTED */
119 "Dx", "\%Q", "br", "sp"
120 };
121
122 const char *const __mdoc_argnames[MDOC_ARG_MAX] = {
123 "split", "nosplit", "ragged",
124 "unfilled", "literal", "file",
125 "offset", "bullet", "dash",
126 "hyphen", "item", "enum",
127 "tag", "diag", "hang",
128 "ohang", "inset", "column",
129 "width", "compact", "std",
130 "filled", "words", "emphasis",
131 "symbolic", "nested", "centered"
132 };
133
134 const char * const *mdoc_macronames = __mdoc_macronames;
135 const char * const *mdoc_argnames = __mdoc_argnames;
136
137 static void mdoc_free1(struct mdoc *);
138 static int mdoc_alloc1(struct mdoc *);
139 static struct mdoc_node *node_alloc(struct mdoc *, int, int,
140 int, enum mdoc_type);
141 static int node_append(struct mdoc *,
142 struct mdoc_node *);
143 static int parsetext(struct mdoc *, int, char *);
144 static int parsemacro(struct mdoc *, int, char *);
145 static int macrowarn(struct mdoc *, int, const char *);
146 static int pstring(struct mdoc *, int, int,
147 const char *, size_t);
148
149 #ifdef __linux__
150 extern size_t strlcpy(char *, const char *, size_t);
151 #endif
152
153
154 const struct mdoc_node *
155 mdoc_node(const struct mdoc *m)
156 {
157
158 return(MDOC_HALT & m->flags ? NULL : m->first);
159 }
160
161
162 const struct mdoc_meta *
163 mdoc_meta(const struct mdoc *m)
164 {
165
166 return(MDOC_HALT & m->flags ? NULL : &m->meta);
167 }
168
169
170 /*
171 * Frees volatile resources (parse tree, meta-data, fields).
172 */
173 static void
174 mdoc_free1(struct mdoc *mdoc)
175 {
176
177 if (mdoc->first)
178 mdoc_node_freelist(mdoc->first);
179 if (mdoc->meta.title)
180 free(mdoc->meta.title);
181 if (mdoc->meta.os)
182 free(mdoc->meta.os);
183 if (mdoc->meta.name)
184 free(mdoc->meta.name);
185 if (mdoc->meta.arch)
186 free(mdoc->meta.arch);
187 if (mdoc->meta.vol)
188 free(mdoc->meta.vol);
189 }
190
191
192 /*
193 * Allocate all volatile resources (parse tree, meta-data, fields).
194 */
195 static int
196 mdoc_alloc1(struct mdoc *mdoc)
197 {
198
199 bzero(&mdoc->meta, sizeof(struct mdoc_meta));
200 mdoc->flags = 0;
201 mdoc->lastnamed = mdoc->lastsec = SEC_NONE;
202 mdoc->last = calloc(1, sizeof(struct mdoc_node));
203 if (NULL == mdoc->last)
204 return(0);
205
206 mdoc->first = mdoc->last;
207 mdoc->last->type = MDOC_ROOT;
208 mdoc->next = MDOC_NEXT_CHILD;
209 return(1);
210 }
211
212
213 /*
214 * Free up volatile resources (see mdoc_free1()) then re-initialises the
215 * data with mdoc_alloc1(). After invocation, parse data has been reset
216 * and the parser is ready for re-invocation on a new tree; however,
217 * cross-parse non-volatile data is kept intact.
218 */
219 int
220 mdoc_reset(struct mdoc *mdoc)
221 {
222
223 mdoc_free1(mdoc);
224 return(mdoc_alloc1(mdoc));
225 }
226
227
228 /*
229 * Completely free up all volatile and non-volatile parse resources.
230 * After invocation, the pointer is no longer usable.
231 */
232 void
233 mdoc_free(struct mdoc *mdoc)
234 {
235
236 mdoc_free1(mdoc);
237 free(mdoc);
238 }
239
240
241 /*
242 * Allocate volatile and non-volatile parse resources.
243 */
244 struct mdoc *
245 mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb)
246 {
247 struct mdoc *p;
248
249 if (NULL == (p = calloc(1, sizeof(struct mdoc))))
250 return(NULL);
251 if (cb)
252 (void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb));
253
254 mdoc_hash_init();
255
256 p->data = data;
257 p->pflags = pflags;
258
259 if (mdoc_alloc1(p))
260 return(p);
261
262 free(p);
263 return(NULL);
264 }
265
266
267 /*
268 * Climb back up the parse tree, validating open scopes. Mostly calls
269 * through to macro_end() in macro.c.
270 */
271 int
272 mdoc_endparse(struct mdoc *m)
273 {
274
275 if (MDOC_HALT & m->flags)
276 return(0);
277 else if (mdoc_macroend(m))
278 return(1);
279 m->flags |= MDOC_HALT;
280 return(0);
281 }
282
283
284 /*
285 * Main parse routine. Parses a single line -- really just hands off to
286 * the macro (parsemacro()) or text parser (parsetext()).
287 */
288 int
289 mdoc_parseln(struct mdoc *m, int ln, char *buf)
290 {
291
292 if (MDOC_HALT & m->flags)
293 return(0);
294
295 return('.' == *buf ? parsemacro(m, ln, buf) :
296 parsetext(m, ln, buf));
297 }
298
299
300 int
301 mdoc_verr(struct mdoc *mdoc, int ln, int pos,
302 const char *fmt, ...)
303 {
304 char buf[256];
305 va_list ap;
306
307 if (NULL == mdoc->cb.mdoc_err)
308 return(0);
309
310 va_start(ap, fmt);
311 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
312 va_end(ap);
313
314 return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf));
315 }
316
317
318 int
319 mdoc_vwarn(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...)
320 {
321 char buf[256];
322 va_list ap;
323
324 if (NULL == mdoc->cb.mdoc_warn)
325 return(0);
326
327 va_start(ap, fmt);
328 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
329 va_end(ap);
330
331 return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, buf));
332 }
333
334
335 int
336 mdoc_err(struct mdoc *m, int line, int pos, int iserr, enum merr type)
337 {
338 const char *p;
339
340 p = __mdoc_merrnames[(int)type];
341 assert(p);
342
343 if (iserr)
344 return(mdoc_verr(m, line, pos, p));
345
346 return(mdoc_vwarn(m, line, pos, p));
347 }
348
349
350 int
351 mdoc_macro(struct mdoc *m, int tok,
352 int ln, int pp, int *pos, char *buf)
353 {
354 /*
355 * If we're in the prologue, deny "body" macros. Similarly, if
356 * we're in the body, deny prologue calls.
357 */
358 if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
359 MDOC_PBODY & m->flags)
360 return(mdoc_perr(m, ln, pp, EPROLBODY));
361 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
362 ! (MDOC_PBODY & m->flags))
363 return(mdoc_perr(m, ln, pp, EBODYPROL));
364
365 return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf));
366 }
367
368
369 static int
370 node_append(struct mdoc *mdoc, struct mdoc_node *p)
371 {
372
373 assert(mdoc->last);
374 assert(mdoc->first);
375 assert(MDOC_ROOT != p->type);
376
377 switch (mdoc->next) {
378 case (MDOC_NEXT_SIBLING):
379 mdoc->last->next = p;
380 p->prev = mdoc->last;
381 p->parent = mdoc->last->parent;
382 break;
383 case (MDOC_NEXT_CHILD):
384 mdoc->last->child = p;
385 p->parent = mdoc->last;
386 break;
387 default:
388 abort();
389 /* NOTREACHED */
390 }
391
392 p->parent->nchild++;
393
394 if ( ! mdoc_valid_pre(mdoc, p))
395 return(0);
396 if ( ! mdoc_action_pre(mdoc, p))
397 return(0);
398
399 switch (p->type) {
400 case (MDOC_HEAD):
401 assert(MDOC_BLOCK == p->parent->type);
402 p->parent->head = p;
403 break;
404 case (MDOC_TAIL):
405 assert(MDOC_BLOCK == p->parent->type);
406 p->parent->tail = p;
407 break;
408 case (MDOC_BODY):
409 assert(MDOC_BLOCK == p->parent->type);
410 p->parent->body = p;
411 break;
412 default:
413 break;
414 }
415
416 mdoc->last = p;
417
418 switch (p->type) {
419 case (MDOC_TEXT):
420 if ( ! mdoc_valid_post(mdoc))
421 return(0);
422 if ( ! mdoc_action_post(mdoc))
423 return(0);
424 break;
425 default:
426 break;
427 }
428
429 return(1);
430 }
431
432
433 static struct mdoc_node *
434 node_alloc(struct mdoc *m, int line,
435 int pos, int tok, enum mdoc_type type)
436 {
437 struct mdoc_node *p;
438
439 if (NULL == (p = calloc(1, sizeof(struct mdoc_node)))) {
440 (void)mdoc_nerr(m, m->last, EMALLOC);
441 return(NULL);
442 }
443
444 p->sec = m->lastsec;
445 p->line = line;
446 p->pos = pos;
447 p->tok = tok;
448 if (MDOC_TEXT != (p->type = type))
449 assert(p->tok >= 0);
450
451 return(p);
452 }
453
454
455 int
456 mdoc_tail_alloc(struct mdoc *m, int line, int pos, int tok)
457 {
458 struct mdoc_node *p;
459
460 p = node_alloc(m, line, pos, tok, MDOC_TAIL);
461 if (NULL == p)
462 return(0);
463 if ( ! node_append(m, p))
464 return(0);
465 m->next = MDOC_NEXT_CHILD;
466 return(1);
467 }
468
469
470 int
471 mdoc_head_alloc(struct mdoc *m, int line, int pos, int tok)
472 {
473 struct mdoc_node *p;
474
475 assert(m->first);
476 assert(m->last);
477
478 p = node_alloc(m, line, pos, tok, MDOC_HEAD);
479 if (NULL == p)
480 return(0);
481 if ( ! node_append(m, p))
482 return(0);
483 m->next = MDOC_NEXT_CHILD;
484 return(1);
485 }
486
487
488 int
489 mdoc_body_alloc(struct mdoc *m, int line, int pos, int tok)
490 {
491 struct mdoc_node *p;
492
493 p = node_alloc(m, line, pos, tok, MDOC_BODY);
494 if (NULL == p)
495 return(0);
496 if ( ! node_append(m, p))
497 return(0);
498 m->next = MDOC_NEXT_CHILD;
499 return(1);
500 }
501
502
503 int
504 mdoc_block_alloc(struct mdoc *m, int line, int pos,
505 int tok, struct mdoc_arg *args)
506 {
507 struct mdoc_node *p;
508
509 p = node_alloc(m, line, pos, tok, MDOC_BLOCK);
510 if (NULL == p)
511 return(0);
512 p->args = args;
513 if (p->args)
514 (args->refcnt)++;
515 if ( ! node_append(m, p))
516 return(0);
517 m->next = MDOC_NEXT_CHILD;
518 return(1);
519 }
520
521
522 int
523 mdoc_elem_alloc(struct mdoc *m, int line, int pos,
524 int tok, struct mdoc_arg *args)
525 {
526 struct mdoc_node *p;
527
528 p = node_alloc(m, line, pos, tok, MDOC_ELEM);
529 if (NULL == p)
530 return(0);
531 p->args = args;
532 if (p->args)
533 (args->refcnt)++;
534 if ( ! node_append(m, p))
535 return(0);
536 m->next = MDOC_NEXT_CHILD;
537 return(1);
538 }
539
540
541 static int
542 pstring(struct mdoc *m, int line, int pos, const char *p, size_t len)
543 {
544 struct mdoc_node *n;
545 size_t sv;
546
547 n = node_alloc(m, line, pos, -1, MDOC_TEXT);
548 if (NULL == n)
549 return(mdoc_nerr(m, m->last, EMALLOC));
550
551 n->string = malloc(len + 1);
552 if (NULL == n->string) {
553 free(n);
554 return(mdoc_nerr(m, m->last, EMALLOC));
555 }
556
557 sv = strlcpy(n->string, p, len + 1);
558
559 /* Prohibit truncation. */
560 assert(sv < len + 1);
561
562 if ( ! node_append(m, n))
563 return(0);
564 m->next = MDOC_NEXT_SIBLING;
565 return(1);
566 }
567
568
569 int
570 mdoc_word_alloc(struct mdoc *m, int line, int pos, const char *p)
571 {
572
573 return(pstring(m, line, pos, p, strlen(p)));
574 }
575
576
577 void
578 mdoc_node_free(struct mdoc_node *p)
579 {
580
581 if (p->parent)
582 p->parent->nchild--;
583 if (p->string)
584 free(p->string);
585 if (p->args)
586 mdoc_argv_free(p->args);
587 free(p);
588 }
589
590
591 void
592 mdoc_node_freelist(struct mdoc_node *p)
593 {
594
595 if (p->child)
596 mdoc_node_freelist(p->child);
597 if (p->next)
598 mdoc_node_freelist(p->next);
599
600 assert(0 == p->nchild);
601 mdoc_node_free(p);
602 }
603
604
605 /*
606 * Parse free-form text, that is, a line that does not begin with the
607 * control character.
608 */
609 static int
610 parsetext(struct mdoc *m, int line, char *buf)
611 {
612 int i, j;
613
614 if (SEC_NONE == m->lastnamed)
615 return(mdoc_perr(m, line, 0, ETEXTPROL));
616
617 /*
618 * If in literal mode, then pass the buffer directly to the
619 * back-end, as it should be preserved as a single term.
620 */
621
622 if (MDOC_LITERAL & m->flags)
623 return(mdoc_word_alloc(m, line, 0, buf));
624
625 /* Disallow blank/white-space lines in non-literal mode. */
626
627 for (i = 0; ' ' == buf[i]; i++)
628 /* Skip leading whitespace. */ ;
629 if (0 == buf[i])
630 return(mdoc_perr(m, line, 0, ENOBLANK));
631
632 /*
633 * Break apart a free-form line into tokens. Spaces are
634 * stripped out of the input.
635 */
636
637 for (j = i; buf[i]; i++) {
638 if (' ' != buf[i])
639 continue;
640
641 /* Escaped whitespace. */
642 if (i && ' ' == buf[i] && '\\' == buf[i - 1])
643 continue;
644
645 buf[i++] = 0;
646 if ( ! pstring(m, line, j, &buf[j], (size_t)(i - j)))
647 return(0);
648
649 for ( ; ' ' == buf[i]; i++)
650 /* Skip trailing whitespace. */ ;
651
652 j = i;
653 if (0 == buf[i])
654 break;
655 }
656
657 if (j != i && ! pstring(m, line, j, &buf[j], (size_t)(i - j)))
658 return(0);
659
660 m->next = MDOC_NEXT_SIBLING;
661 return(1);
662 }
663
664
665
666
667 static int
668 macrowarn(struct mdoc *m, int ln, const char *buf)
669 {
670 if ( ! (MDOC_IGN_MACRO & m->pflags))
671 return(mdoc_verr(m, ln, 0,
672 "unknown macro: %s%s",
673 buf, strlen(buf) > 3 ? "..." : ""));
674 return(mdoc_vwarn(m, ln, 0, "unknown macro: %s%s",
675 buf, strlen(buf) > 3 ? "..." : ""));
676 }
677
678
679 /*
680 * Parse a macro line, that is, a line beginning with the control
681 * character.
682 */
683 int
684 parsemacro(struct mdoc *m, int ln, char *buf)
685 {
686 int i, j, c;
687 char mac[5];
688
689 /* Empty lines are ignored. */
690
691 if (0 == buf[1])
692 return(1);
693
694 i = 1;
695
696 /* Accept whitespace after the initial control char. */
697
698 if (' ' == buf[i]) {
699 i++;
700 while (buf[i] && ' ' == buf[i])
701 i++;
702 if (0 == buf[i])
703 return(1);
704 }
705
706 /* Copy the first word into a nil-terminated buffer. */
707
708 for (j = 0; j < 4; j++, i++) {
709 if (0 == (mac[j] = buf[i]))
710 break;
711 else if (' ' == buf[i])
712 break;
713
714 /* Check for invalid characters. */
715
716 if (isgraph((u_char)buf[i]))
717 continue;
718 return(mdoc_perr(m, ln, i, EPRINT));
719 }
720
721 mac[j] = 0;
722
723 if (j == 4 || j < 2) {
724 if ( ! macrowarn(m, ln, mac))
725 goto err;
726 return(1);
727 }
728
729 if (MDOC_MAX == (c = mdoc_hash_find(mac))) {
730 if ( ! macrowarn(m, ln, mac))
731 goto err;
732 return(1);
733 }
734
735 /* The macro is sane. Jump to the next word. */
736
737 while (buf[i] && ' ' == buf[i])
738 i++;
739
740 /*
741 * Begin recursive parse sequence. Since we're at the start of
742 * the line, we don't need to do callable/parseable checks.
743 */
744 if ( ! mdoc_macro(m, c, ln, 1, &i, buf))
745 goto err;
746
747 return(1);
748
749 err: /* Error out. */
750
751 m->flags |= MDOC_HALT;
752 return(0);
753 }
754
755