]>
git.cameronkatri.com Git - mandoc.git/blob - mdoc.c
1 /* $Id: mdoc.c,v 1.120 2010/04/05 08:59:46 kristaps Exp $ */
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21 #include <sys/types.h>
32 #include "libmandoc.h"
34 const char *const __mdoc_merrnames
[MERRMAX
] = {
35 "trailing whitespace", /* ETAILWS */
36 "unexpected quoted parameter", /* EQUOTPARM */
37 "unterminated quoted parameter", /* EQUOTTERM */
38 "argument parameter suggested", /* EARGVAL */
39 "macro disallowed in prologue", /* EBODYPROL */
40 "macro disallowed in body", /* EPROLBODY */
41 "text disallowed in prologue", /* ETEXTPROL */
42 "blank line disallowed", /* ENOBLANK */
43 "text parameter too long", /* ETOOLONG */
44 "invalid escape sequence", /* EESCAPE */
45 "invalid character", /* EPRINT */
46 "document has no body", /* ENODAT */
47 "document has no prologue", /* ENOPROLOGUE */
48 "expected line arguments", /* ELINE */
49 "invalid AT&T argument", /* EATT */
50 "default name not yet set", /* ENAME */
51 "missing list type", /* ELISTTYPE */
52 "missing display type", /* EDISPTYPE */
53 "too many display types", /* EMULTIDISP */
54 "too many list types", /* EMULTILIST */
55 "NAME section must be first", /* ESECNAME */
56 "badly-formed NAME section", /* ENAMESECINC */
57 "argument repeated", /* EARGREP */
58 "expected boolean parameter", /* EBOOL */
59 "inconsistent column syntax", /* ECOLMIS */
60 "nested display invalid", /* ENESTDISP */
61 "width argument missing", /* EMISSWIDTH */
62 "invalid section for this manual section", /* EWRONGMSEC */
63 "section out of conventional order", /* ESECOOO */
64 "section repeated", /* ESECREP */
65 "invalid standard argument", /* EBADSTAND */
66 "multi-line arguments discouraged", /* ENOMULTILINE */
67 "multi-line arguments suggested", /* EMULTILINE */
68 "line arguments discouraged", /* ENOLINE */
69 "prologue macro out of conventional order", /* EPROLOOO */
70 "prologue macro repeated", /* EPROLREP */
71 "invalid manual section", /* EBADMSEC */
72 "invalid section", /* EBADSEC */
73 "invalid font mode", /* EFONT */
74 "invalid date syntax", /* EBADDATE */
75 "invalid number format", /* ENUMFMT */
76 "superfluous width argument", /* ENOWIDTH */
77 "system: utsname error", /* EUTSNAME */
78 "obsolete macro", /* EOBS */
79 "end-of-line scope violation", /* EIMPBRK */
80 "empty macro ignored", /* EIGNE */
81 "unclosed explicit scope", /* EOPEN */
82 "unterminated quoted phrase", /* EQUOTPHR */
83 "closure macro without prior context", /* ENOCTX */
84 "no description found for library", /* ELIB */
85 "bad child for parent context", /* EBADCHILD */
86 "list arguments preceding type", /* ENOTYPE */
89 const char *const __mdoc_macronames
[MDOC_MAX
] = {
90 "Ap", "Dd", "Dt", "Os",
91 "Sh", "Ss", "Pp", "D1",
92 "Dl", "Bd", "Ed", "Bl",
93 "El", "It", "Ad", "An",
94 "Ar", "Cd", "Cm", "Dv",
95 "Er", "Ev", "Ex", "Fa",
96 "Fd", "Fl", "Fn", "Ft",
97 "Ic", "In", "Li", "Nd",
98 "Nm", "Op", "Ot", "Pa",
99 "Rv", "St", "Va", "Vt",
101 "Xr", "%A", "%B", "%D",
103 "%I", "%J", "%N", "%O",
105 "%P", "%R", "%T", "%V",
106 "Ac", "Ao", "Aq", "At",
107 "Bc", "Bf", "Bo", "Bq",
108 "Bsx", "Bx", "Db", "Dc",
109 "Do", "Dq", "Ec", "Ef",
110 "Em", "Eo", "Fx", "Ms",
111 "No", "Ns", "Nx", "Ox",
112 "Pc", "Pf", "Po", "Pq",
113 "Qc", "Ql", "Qo", "Qq",
114 "Re", "Rs", "Sc", "So",
115 "Sq", "Sm", "Sx", "Sy",
116 "Tn", "Ux", "Xc", "Xo",
117 "Fo", "Fc", "Oo", "Oc",
118 "Bk", "Ek", "Bt", "Hf",
119 "Fr", "Ud", "Lb", "Lp",
120 "Lk", "Mt", "Brq", "Bro",
122 "Brc", "%C", "Es", "En",
124 "Dx", "%Q", "br", "sp",
129 const char *const __mdoc_argnames
[MDOC_ARG_MAX
] = {
130 "split", "nosplit", "ragged",
131 "unfilled", "literal", "file",
132 "offset", "bullet", "dash",
133 "hyphen", "item", "enum",
134 "tag", "diag", "hang",
135 "ohang", "inset", "column",
136 "width", "compact", "std",
137 "filled", "words", "emphasis",
138 "symbolic", "nested", "centered"
141 const char * const *mdoc_macronames
= __mdoc_macronames
;
142 const char * const *mdoc_argnames
= __mdoc_argnames
;
144 static void mdoc_free1(struct mdoc
*);
145 static void mdoc_alloc1(struct mdoc
*);
146 static struct mdoc_node
*node_alloc(struct mdoc
*, int, int,
147 enum mdoct
, enum mdoc_type
);
148 static int node_append(struct mdoc
*,
150 static int parsetext(struct mdoc
*, int, char *);
151 static int parsemacro(struct mdoc
*, int, char *);
152 static int macrowarn(struct mdoc
*, int, const char *);
153 static int pstring(struct mdoc
*, int, int,
154 const char *, size_t);
156 const struct mdoc_node
*
157 mdoc_node(const struct mdoc
*m
)
160 return(MDOC_HALT
& m
->flags
? NULL
: m
->first
);
164 const struct mdoc_meta
*
165 mdoc_meta(const struct mdoc
*m
)
168 return(MDOC_HALT
& m
->flags
? NULL
: &m
->meta
);
173 * Frees volatile resources (parse tree, meta-data, fields).
176 mdoc_free1(struct mdoc
*mdoc
)
180 mdoc_node_freelist(mdoc
->first
);
181 if (mdoc
->meta
.title
)
182 free(mdoc
->meta
.title
);
186 free(mdoc
->meta
.name
);
188 free(mdoc
->meta
.arch
);
190 free(mdoc
->meta
.vol
);
195 * Allocate all volatile resources (parse tree, meta-data, fields).
198 mdoc_alloc1(struct mdoc
*mdoc
)
201 memset(&mdoc
->meta
, 0, sizeof(struct mdoc_meta
));
203 mdoc
->lastnamed
= mdoc
->lastsec
= SEC_NONE
;
204 mdoc
->last
= mandoc_calloc(1, sizeof(struct mdoc_node
));
205 mdoc
->first
= mdoc
->last
;
206 mdoc
->last
->type
= MDOC_ROOT
;
207 mdoc
->next
= MDOC_NEXT_CHILD
;
212 * Free up volatile resources (see mdoc_free1()) then re-initialises the
213 * data with mdoc_alloc1(). After invocation, parse data has been reset
214 * and the parser is ready for re-invocation on a new tree; however,
215 * cross-parse non-volatile data is kept intact.
218 mdoc_reset(struct mdoc
*mdoc
)
227 * Completely free up all volatile and non-volatile parse resources.
228 * After invocation, the pointer is no longer usable.
231 mdoc_free(struct mdoc
*mdoc
)
240 * Allocate volatile and non-volatile parse resources.
243 mdoc_alloc(void *data
, int pflags
, const struct mdoc_cb
*cb
)
247 p
= mandoc_calloc(1, sizeof(struct mdoc
));
250 memcpy(&p
->cb
, cb
, sizeof(struct mdoc_cb
));
262 * Climb back up the parse tree, validating open scopes. Mostly calls
263 * through to macro_end() in macro.c.
266 mdoc_endparse(struct mdoc
*m
)
269 if (MDOC_HALT
& m
->flags
)
271 else if (mdoc_macroend(m
))
273 m
->flags
|= MDOC_HALT
;
279 * Main parse routine. Parses a single line -- really just hands off to
280 * the macro (parsemacro()) or text parser (parsetext()).
283 mdoc_parseln(struct mdoc
*m
, int ln
, char *buf
)
286 if (MDOC_HALT
& m
->flags
)
289 return('.' == *buf
? parsemacro(m
, ln
, buf
) :
290 parsetext(m
, ln
, buf
));
295 mdoc_verr(struct mdoc
*mdoc
, int ln
, int pos
,
296 const char *fmt
, ...)
301 if (NULL
== mdoc
->cb
.mdoc_err
)
305 (void)vsnprintf(buf
, sizeof(buf
) - 1, fmt
, ap
);
308 return((*mdoc
->cb
.mdoc_err
)(mdoc
->data
, ln
, pos
, buf
));
313 mdoc_vwarn(struct mdoc
*mdoc
, int ln
, int pos
, const char *fmt
, ...)
318 if (NULL
== mdoc
->cb
.mdoc_warn
)
322 (void)vsnprintf(buf
, sizeof(buf
) - 1, fmt
, ap
);
325 return((*mdoc
->cb
.mdoc_warn
)(mdoc
->data
, ln
, pos
, buf
));
330 mdoc_err(struct mdoc
*m
, int line
, int pos
, int iserr
, enum merr type
)
334 p
= __mdoc_merrnames
[(int)type
];
338 return(mdoc_verr(m
, line
, pos
, p
));
340 return(mdoc_vwarn(m
, line
, pos
, p
));
345 mdoc_macro(struct mdoc
*m
, enum mdoct tok
,
346 int ln
, int pp
, int *pos
, char *buf
)
349 assert(tok
< MDOC_MAX
);
351 * If we're in the prologue, deny "body" macros. Similarly, if
352 * we're in the body, deny prologue calls.
354 if (MDOC_PROLOGUE
& mdoc_macros
[tok
].flags
&&
355 MDOC_PBODY
& m
->flags
) {
356 if ( ! mdoc_pwarn(m
, ln
, pp
, EBODYPROL
))
359 * FIXME: do this in mdoc_action.c.
361 if (NULL
== m
->meta
.title
)
362 m
->meta
.title
= mandoc_strdup("unknown");
363 if (NULL
== m
->meta
.vol
)
364 m
->meta
.vol
= mandoc_strdup("local");
365 if (NULL
== m
->meta
.os
)
366 m
->meta
.os
= mandoc_strdup("local");
367 if (0 == m
->meta
.date
)
368 m
->meta
.date
= time(NULL
);
369 m
->flags
|= MDOC_PBODY
;
371 if ( ! (MDOC_PROLOGUE
& mdoc_macros
[tok
].flags
) &&
372 ! (MDOC_PBODY
& m
->flags
))
373 return(mdoc_perr(m
, ln
, pp
, EBODYPROL
));
375 return((*mdoc_macros
[tok
].fp
)(m
, tok
, ln
, pp
, pos
, buf
));
380 node_append(struct mdoc
*mdoc
, struct mdoc_node
*p
)
385 assert(MDOC_ROOT
!= p
->type
);
387 switch (mdoc
->next
) {
388 case (MDOC_NEXT_SIBLING
):
389 mdoc
->last
->next
= p
;
390 p
->prev
= mdoc
->last
;
391 p
->parent
= mdoc
->last
->parent
;
393 case (MDOC_NEXT_CHILD
):
394 mdoc
->last
->child
= p
;
395 p
->parent
= mdoc
->last
;
404 if ( ! mdoc_valid_pre(mdoc
, p
))
406 if ( ! mdoc_action_pre(mdoc
, p
))
411 assert(MDOC_BLOCK
== p
->parent
->type
);
415 assert(MDOC_BLOCK
== p
->parent
->type
);
419 assert(MDOC_BLOCK
== p
->parent
->type
);
430 if ( ! mdoc_valid_post(mdoc
))
432 if ( ! mdoc_action_post(mdoc
))
443 static struct mdoc_node
*
444 node_alloc(struct mdoc
*m
, int line
, int pos
,
445 enum mdoct tok
, enum mdoc_type type
)
449 p
= mandoc_calloc(1, sizeof(struct mdoc_node
));
461 mdoc_tail_alloc(struct mdoc
*m
, int line
, int pos
, enum mdoct tok
)
465 p
= node_alloc(m
, line
, pos
, tok
, MDOC_TAIL
);
466 if ( ! node_append(m
, p
))
468 m
->next
= MDOC_NEXT_CHILD
;
474 mdoc_head_alloc(struct mdoc
*m
, int line
, int pos
, enum mdoct tok
)
481 p
= node_alloc(m
, line
, pos
, tok
, MDOC_HEAD
);
482 if ( ! node_append(m
, p
))
484 m
->next
= MDOC_NEXT_CHILD
;
490 mdoc_body_alloc(struct mdoc
*m
, int line
, int pos
, enum mdoct tok
)
494 p
= node_alloc(m
, line
, pos
, tok
, MDOC_BODY
);
495 if ( ! node_append(m
, p
))
497 m
->next
= MDOC_NEXT_CHILD
;
503 mdoc_block_alloc(struct mdoc
*m
, int line
, int pos
,
504 enum mdoct tok
, struct mdoc_arg
*args
)
508 p
= node_alloc(m
, line
, pos
, tok
, MDOC_BLOCK
);
512 if ( ! node_append(m
, p
))
514 m
->next
= MDOC_NEXT_CHILD
;
520 mdoc_elem_alloc(struct mdoc
*m
, int line
, int pos
,
521 enum mdoct tok
, struct mdoc_arg
*args
)
525 p
= node_alloc(m
, line
, pos
, tok
, MDOC_ELEM
);
529 if ( ! node_append(m
, p
))
531 m
->next
= MDOC_NEXT_CHILD
;
537 pstring(struct mdoc
*m
, int line
, int pos
, const char *p
, size_t len
)
542 n
= node_alloc(m
, line
, pos
, -1, MDOC_TEXT
);
543 n
->string
= mandoc_malloc(len
+ 1);
544 sv
= strlcpy(n
->string
, p
, len
+ 1);
546 /* Prohibit truncation. */
547 assert(sv
< len
+ 1);
549 if ( ! node_append(m
, n
))
551 m
->next
= MDOC_NEXT_SIBLING
;
557 mdoc_word_alloc(struct mdoc
*m
, int line
, int pos
, const char *p
)
560 return(pstring(m
, line
, pos
, p
, strlen(p
)));
565 mdoc_node_free(struct mdoc_node
*p
)
573 mdoc_argv_free(p
->args
);
579 mdoc_node_freelist(struct mdoc_node
*p
)
583 mdoc_node_freelist(p
->child
);
585 mdoc_node_freelist(p
->next
);
587 assert(0 == p
->nchild
);
593 * Parse free-form text, that is, a line that does not begin with the
597 parsetext(struct mdoc
*m
, int line
, char *buf
)
602 if (SEC_NONE
== m
->lastnamed
)
603 return(mdoc_perr(m
, line
, 0, ETEXTPROL
));
606 * If in literal mode, then pass the buffer directly to the
607 * back-end, as it should be preserved as a single term.
610 if (MDOC_LITERAL
& m
->flags
)
611 return(mdoc_word_alloc(m
, line
, 0, buf
));
613 /* Disallow blank/white-space lines in non-literal mode. */
615 for (i
= 0; ' ' == buf
[i
]; i
++)
616 /* Skip leading whitespace. */ ;
618 if ('\0' == buf
[i
]) {
619 if ( ! mdoc_pwarn(m
, line
, 0, ENOBLANK
))
622 * Assume that a `Pp' should be inserted in the case of
623 * a blank line. Technically, blank lines aren't
624 * allowed, but enough manuals assume this behaviour
625 * that we want to work around it.
627 if ( ! mdoc_elem_alloc(m
, line
, 0, MDOC_Pp
, NULL
))
632 * Break apart a free-form line into tokens. Spaces are
633 * stripped out of the input.
636 for (j
= i
; buf
[i
]; i
++) {
640 /* Escaped whitespace. */
641 if (i
&& ' ' == buf
[i
] && '\\' == buf
[i
- 1])
647 if ( ! pstring(m
, line
, j
, &buf
[j
], (size_t)(i
- j
)))
650 /* Trailing whitespace? Check at overwritten byte. */
652 if (' ' == sv
&& '\0' == buf
[i
])
653 if ( ! mdoc_pwarn(m
, line
, i
- 1, ETAILWS
))
656 for ( ; ' ' == buf
[i
]; i
++)
657 /* Skip trailing whitespace. */ ;
661 /* Trailing whitespace? */
663 if (' ' == buf
[i
- 1] && '\0' == buf
[i
])
664 if ( ! mdoc_pwarn(m
, line
, i
- 1, ETAILWS
))
671 if (j
!= i
&& ! pstring(m
, line
, j
, &buf
[j
], (size_t)(i
- j
)))
674 m
->next
= MDOC_NEXT_SIBLING
;
681 macrowarn(struct mdoc
*m
, int ln
, const char *buf
)
683 if ( ! (MDOC_IGN_MACRO
& m
->pflags
))
684 return(mdoc_verr(m
, ln
, 0,
685 "unknown macro: %s%s",
686 buf
, strlen(buf
) > 3 ? "..." : ""));
687 return(mdoc_vwarn(m
, ln
, 0, "unknown macro: %s%s",
688 buf
, strlen(buf
) > 3 ? "..." : ""));
693 * Parse a macro line, that is, a line beginning with the control
697 parsemacro(struct mdoc
*m
, int ln
, char *buf
)
702 /* Empty lines are ignored. */
709 /* Accept whitespace after the initial control char. */
713 while (buf
[i
] && ' ' == buf
[i
])
719 /* Copy the first word into a nil-terminated buffer. */
721 for (j
= 0; j
< 4; j
++, i
++) {
722 if ('\0' == (mac
[j
] = buf
[i
]))
724 else if (' ' == buf
[i
])
727 /* Check for invalid characters. */
729 if (isgraph((u_char
)buf
[i
]))
731 return(mdoc_perr(m
, ln
, i
, EPRINT
));
736 if (j
== 4 || j
< 2) {
737 if ( ! macrowarn(m
, ln
, mac
))
742 if (MDOC_MAX
== (c
= mdoc_hash_find(mac
))) {
743 if ( ! macrowarn(m
, ln
, mac
))
748 /* The macro is sane. Jump to the next word. */
750 while (buf
[i
] && ' ' == buf
[i
])
753 /* Trailing whitespace? */
755 if ('\0' == buf
[i
] && ' ' == buf
[i
- 1])
756 if ( ! mdoc_pwarn(m
, ln
, i
- 1, ETAILWS
))
760 * Begin recursive parse sequence. Since we're at the start of
761 * the line, we don't need to do callable/parseable checks.
763 if ( ! mdoc_macro(m
, c
, ln
, 1, &i
, buf
))
768 err
: /* Error out. */
770 m
->flags
|= MDOC_HALT
;