]>
git.cameronkatri.com Git - mandoc.git/blob - mdoc.c
1 /* $Id: mdoc.c,v 1.119 2010/04/03 13:02:35 kristaps Exp $ */
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21 #include <sys/types.h>
31 #include "libmandoc.h"
33 const char *const __mdoc_merrnames
[MERRMAX
] = {
34 "trailing whitespace", /* ETAILWS */
35 "unexpected quoted parameter", /* EQUOTPARM */
36 "unterminated quoted parameter", /* EQUOTTERM */
37 "argument parameter suggested", /* EARGVAL */
38 "macro disallowed in prologue", /* EBODYPROL */
39 "macro disallowed in body", /* EPROLBODY */
40 "text disallowed in prologue", /* ETEXTPROL */
41 "blank line disallowed", /* ENOBLANK */
42 "text parameter too long", /* ETOOLONG */
43 "invalid escape sequence", /* EESCAPE */
44 "invalid character", /* EPRINT */
45 "document has no body", /* ENODAT */
46 "document has no prologue", /* ENOPROLOGUE */
47 "expected line arguments", /* ELINE */
48 "invalid AT&T argument", /* EATT */
49 "default name not yet set", /* ENAME */
50 "missing list type", /* ELISTTYPE */
51 "missing display type", /* EDISPTYPE */
52 "too many display types", /* EMULTIDISP */
53 "too many list types", /* EMULTILIST */
54 "NAME section must be first", /* ESECNAME */
55 "badly-formed NAME section", /* ENAMESECINC */
56 "argument repeated", /* EARGREP */
57 "expected boolean parameter", /* EBOOL */
58 "inconsistent column syntax", /* ECOLMIS */
59 "nested display invalid", /* ENESTDISP */
60 "width argument missing", /* EMISSWIDTH */
61 "invalid section for this manual section", /* EWRONGMSEC */
62 "section out of conventional order", /* ESECOOO */
63 "section repeated", /* ESECREP */
64 "invalid standard argument", /* EBADSTAND */
65 "multi-line arguments discouraged", /* ENOMULTILINE */
66 "multi-line arguments suggested", /* EMULTILINE */
67 "line arguments discouraged", /* ENOLINE */
68 "prologue macro out of conventional order", /* EPROLOOO */
69 "prologue macro repeated", /* EPROLREP */
70 "invalid manual section", /* EBADMSEC */
71 "invalid section", /* EBADSEC */
72 "invalid font mode", /* EFONT */
73 "invalid date syntax", /* EBADDATE */
74 "invalid number format", /* ENUMFMT */
75 "superfluous width argument", /* ENOWIDTH */
76 "system: utsname error", /* EUTSNAME */
77 "obsolete macro", /* EOBS */
78 "end-of-line scope violation", /* EIMPBRK */
79 "empty macro ignored", /* EIGNE */
80 "unclosed explicit scope", /* EOPEN */
81 "unterminated quoted phrase", /* EQUOTPHR */
82 "closure macro without prior context", /* ENOCTX */
83 "no description found for library", /* ELIB */
84 "bad child for parent context", /* EBADCHILD */
85 "list arguments preceding type", /* ENOTYPE */
88 const char *const __mdoc_macronames
[MDOC_MAX
] = {
89 "Ap", "Dd", "Dt", "Os",
90 "Sh", "Ss", "Pp", "D1",
91 "Dl", "Bd", "Ed", "Bl",
92 "El", "It", "Ad", "An",
93 "Ar", "Cd", "Cm", "Dv",
94 "Er", "Ev", "Ex", "Fa",
95 "Fd", "Fl", "Fn", "Ft",
96 "Ic", "In", "Li", "Nd",
97 "Nm", "Op", "Ot", "Pa",
98 "Rv", "St", "Va", "Vt",
100 "Xr", "%A", "%B", "%D",
102 "%I", "%J", "%N", "%O",
104 "%P", "%R", "%T", "%V",
105 "Ac", "Ao", "Aq", "At",
106 "Bc", "Bf", "Bo", "Bq",
107 "Bsx", "Bx", "Db", "Dc",
108 "Do", "Dq", "Ec", "Ef",
109 "Em", "Eo", "Fx", "Ms",
110 "No", "Ns", "Nx", "Ox",
111 "Pc", "Pf", "Po", "Pq",
112 "Qc", "Ql", "Qo", "Qq",
113 "Re", "Rs", "Sc", "So",
114 "Sq", "Sm", "Sx", "Sy",
115 "Tn", "Ux", "Xc", "Xo",
116 "Fo", "Fc", "Oo", "Oc",
117 "Bk", "Ek", "Bt", "Hf",
118 "Fr", "Ud", "Lb", "Lp",
119 "Lk", "Mt", "Brq", "Bro",
121 "Brc", "%C", "Es", "En",
123 "Dx", "%Q", "br", "sp",
128 const char *const __mdoc_argnames
[MDOC_ARG_MAX
] = {
129 "split", "nosplit", "ragged",
130 "unfilled", "literal", "file",
131 "offset", "bullet", "dash",
132 "hyphen", "item", "enum",
133 "tag", "diag", "hang",
134 "ohang", "inset", "column",
135 "width", "compact", "std",
136 "filled", "words", "emphasis",
137 "symbolic", "nested", "centered"
140 const char * const *mdoc_macronames
= __mdoc_macronames
;
141 const char * const *mdoc_argnames
= __mdoc_argnames
;
143 static void mdoc_free1(struct mdoc
*);
144 static void mdoc_alloc1(struct mdoc
*);
145 static struct mdoc_node
*node_alloc(struct mdoc
*, int, int,
146 enum mdoct
, enum mdoc_type
);
147 static int node_append(struct mdoc
*,
149 static int parsetext(struct mdoc
*, int, char *);
150 static int parsemacro(struct mdoc
*, int, char *);
151 static int macrowarn(struct mdoc
*, int, const char *);
152 static int pstring(struct mdoc
*, int, int,
153 const char *, size_t);
155 const struct mdoc_node
*
156 mdoc_node(const struct mdoc
*m
)
159 return(MDOC_HALT
& m
->flags
? NULL
: m
->first
);
163 const struct mdoc_meta
*
164 mdoc_meta(const struct mdoc
*m
)
167 return(MDOC_HALT
& m
->flags
? NULL
: &m
->meta
);
172 * Frees volatile resources (parse tree, meta-data, fields).
175 mdoc_free1(struct mdoc
*mdoc
)
179 mdoc_node_freelist(mdoc
->first
);
180 if (mdoc
->meta
.title
)
181 free(mdoc
->meta
.title
);
185 free(mdoc
->meta
.name
);
187 free(mdoc
->meta
.arch
);
189 free(mdoc
->meta
.vol
);
194 * Allocate all volatile resources (parse tree, meta-data, fields).
197 mdoc_alloc1(struct mdoc
*mdoc
)
200 memset(&mdoc
->meta
, 0, sizeof(struct mdoc_meta
));
202 mdoc
->lastnamed
= mdoc
->lastsec
= SEC_NONE
;
203 mdoc
->last
= mandoc_calloc(1, sizeof(struct mdoc_node
));
204 mdoc
->first
= mdoc
->last
;
205 mdoc
->last
->type
= MDOC_ROOT
;
206 mdoc
->next
= MDOC_NEXT_CHILD
;
211 * Free up volatile resources (see mdoc_free1()) then re-initialises the
212 * data with mdoc_alloc1(). After invocation, parse data has been reset
213 * and the parser is ready for re-invocation on a new tree; however,
214 * cross-parse non-volatile data is kept intact.
217 mdoc_reset(struct mdoc
*mdoc
)
226 * Completely free up all volatile and non-volatile parse resources.
227 * After invocation, the pointer is no longer usable.
230 mdoc_free(struct mdoc
*mdoc
)
239 * Allocate volatile and non-volatile parse resources.
242 mdoc_alloc(void *data
, int pflags
, const struct mdoc_cb
*cb
)
246 p
= mandoc_calloc(1, sizeof(struct mdoc
));
249 memcpy(&p
->cb
, cb
, sizeof(struct mdoc_cb
));
261 * Climb back up the parse tree, validating open scopes. Mostly calls
262 * through to macro_end() in macro.c.
265 mdoc_endparse(struct mdoc
*m
)
268 if (MDOC_HALT
& m
->flags
)
270 else if (mdoc_macroend(m
))
272 m
->flags
|= MDOC_HALT
;
278 * Main parse routine. Parses a single line -- really just hands off to
279 * the macro (parsemacro()) or text parser (parsetext()).
282 mdoc_parseln(struct mdoc
*m
, int ln
, char *buf
)
285 if (MDOC_HALT
& m
->flags
)
288 return('.' == *buf
? parsemacro(m
, ln
, buf
) :
289 parsetext(m
, ln
, buf
));
294 mdoc_verr(struct mdoc
*mdoc
, int ln
, int pos
,
295 const char *fmt
, ...)
300 if (NULL
== mdoc
->cb
.mdoc_err
)
304 (void)vsnprintf(buf
, sizeof(buf
) - 1, fmt
, ap
);
307 return((*mdoc
->cb
.mdoc_err
)(mdoc
->data
, ln
, pos
, buf
));
312 mdoc_vwarn(struct mdoc
*mdoc
, int ln
, int pos
, const char *fmt
, ...)
317 if (NULL
== mdoc
->cb
.mdoc_warn
)
321 (void)vsnprintf(buf
, sizeof(buf
) - 1, fmt
, ap
);
324 return((*mdoc
->cb
.mdoc_warn
)(mdoc
->data
, ln
, pos
, buf
));
329 mdoc_err(struct mdoc
*m
, int line
, int pos
, int iserr
, enum merr type
)
333 p
= __mdoc_merrnames
[(int)type
];
337 return(mdoc_verr(m
, line
, pos
, p
));
339 return(mdoc_vwarn(m
, line
, pos
, p
));
344 mdoc_macro(struct mdoc
*m
, enum mdoct tok
,
345 int ln
, int pp
, int *pos
, char *buf
)
348 assert(tok
< MDOC_MAX
);
350 * If we're in the prologue, deny "body" macros. Similarly, if
351 * we're in the body, deny prologue calls.
353 if (MDOC_PROLOGUE
& mdoc_macros
[tok
].flags
&&
354 MDOC_PBODY
& m
->flags
)
355 return(mdoc_perr(m
, ln
, pp
, EPROLBODY
));
356 if ( ! (MDOC_PROLOGUE
& mdoc_macros
[tok
].flags
) &&
357 ! (MDOC_PBODY
& m
->flags
))
358 return(mdoc_perr(m
, ln
, pp
, EBODYPROL
));
360 return((*mdoc_macros
[tok
].fp
)(m
, tok
, ln
, pp
, pos
, buf
));
365 node_append(struct mdoc
*mdoc
, struct mdoc_node
*p
)
370 assert(MDOC_ROOT
!= p
->type
);
372 switch (mdoc
->next
) {
373 case (MDOC_NEXT_SIBLING
):
374 mdoc
->last
->next
= p
;
375 p
->prev
= mdoc
->last
;
376 p
->parent
= mdoc
->last
->parent
;
378 case (MDOC_NEXT_CHILD
):
379 mdoc
->last
->child
= p
;
380 p
->parent
= mdoc
->last
;
389 if ( ! mdoc_valid_pre(mdoc
, p
))
391 if ( ! mdoc_action_pre(mdoc
, p
))
396 assert(MDOC_BLOCK
== p
->parent
->type
);
400 assert(MDOC_BLOCK
== p
->parent
->type
);
404 assert(MDOC_BLOCK
== p
->parent
->type
);
415 if ( ! mdoc_valid_post(mdoc
))
417 if ( ! mdoc_action_post(mdoc
))
428 static struct mdoc_node
*
429 node_alloc(struct mdoc
*m
, int line
, int pos
,
430 enum mdoct tok
, enum mdoc_type type
)
434 p
= mandoc_calloc(1, sizeof(struct mdoc_node
));
446 mdoc_tail_alloc(struct mdoc
*m
, int line
, int pos
, enum mdoct tok
)
450 p
= node_alloc(m
, line
, pos
, tok
, MDOC_TAIL
);
451 if ( ! node_append(m
, p
))
453 m
->next
= MDOC_NEXT_CHILD
;
459 mdoc_head_alloc(struct mdoc
*m
, int line
, int pos
, enum mdoct tok
)
466 p
= node_alloc(m
, line
, pos
, tok
, MDOC_HEAD
);
467 if ( ! node_append(m
, p
))
469 m
->next
= MDOC_NEXT_CHILD
;
475 mdoc_body_alloc(struct mdoc
*m
, int line
, int pos
, enum mdoct tok
)
479 p
= node_alloc(m
, line
, pos
, tok
, MDOC_BODY
);
480 if ( ! node_append(m
, p
))
482 m
->next
= MDOC_NEXT_CHILD
;
488 mdoc_block_alloc(struct mdoc
*m
, int line
, int pos
,
489 enum mdoct tok
, struct mdoc_arg
*args
)
493 p
= node_alloc(m
, line
, pos
, tok
, MDOC_BLOCK
);
497 if ( ! node_append(m
, p
))
499 m
->next
= MDOC_NEXT_CHILD
;
505 mdoc_elem_alloc(struct mdoc
*m
, int line
, int pos
,
506 enum mdoct tok
, struct mdoc_arg
*args
)
510 p
= node_alloc(m
, line
, pos
, tok
, MDOC_ELEM
);
514 if ( ! node_append(m
, p
))
516 m
->next
= MDOC_NEXT_CHILD
;
522 pstring(struct mdoc
*m
, int line
, int pos
, const char *p
, size_t len
)
527 n
= node_alloc(m
, line
, pos
, -1, MDOC_TEXT
);
528 n
->string
= mandoc_malloc(len
+ 1);
529 sv
= strlcpy(n
->string
, p
, len
+ 1);
531 /* Prohibit truncation. */
532 assert(sv
< len
+ 1);
534 if ( ! node_append(m
, n
))
536 m
->next
= MDOC_NEXT_SIBLING
;
542 mdoc_word_alloc(struct mdoc
*m
, int line
, int pos
, const char *p
)
545 return(pstring(m
, line
, pos
, p
, strlen(p
)));
550 mdoc_node_free(struct mdoc_node
*p
)
558 mdoc_argv_free(p
->args
);
564 mdoc_node_freelist(struct mdoc_node
*p
)
568 mdoc_node_freelist(p
->child
);
570 mdoc_node_freelist(p
->next
);
572 assert(0 == p
->nchild
);
578 * Parse free-form text, that is, a line that does not begin with the
582 parsetext(struct mdoc
*m
, int line
, char *buf
)
587 if (SEC_NONE
== m
->lastnamed
)
588 return(mdoc_perr(m
, line
, 0, ETEXTPROL
));
591 * If in literal mode, then pass the buffer directly to the
592 * back-end, as it should be preserved as a single term.
595 if (MDOC_LITERAL
& m
->flags
)
596 return(mdoc_word_alloc(m
, line
, 0, buf
));
598 /* Disallow blank/white-space lines in non-literal mode. */
600 for (i
= 0; ' ' == buf
[i
]; i
++)
601 /* Skip leading whitespace. */ ;
603 if ('\0' == buf
[i
]) {
604 if ( ! mdoc_pwarn(m
, line
, 0, ENOBLANK
))
607 * Assume that a `Pp' should be inserted in the case of
608 * a blank line. Technically, blank lines aren't
609 * allowed, but enough manuals assume this behaviour
610 * that we want to work around it.
612 if ( ! mdoc_elem_alloc(m
, line
, 0, MDOC_Pp
, NULL
))
617 * Break apart a free-form line into tokens. Spaces are
618 * stripped out of the input.
621 for (j
= i
; buf
[i
]; i
++) {
625 /* Escaped whitespace. */
626 if (i
&& ' ' == buf
[i
] && '\\' == buf
[i
- 1])
632 if ( ! pstring(m
, line
, j
, &buf
[j
], (size_t)(i
- j
)))
635 /* Trailing whitespace? Check at overwritten byte. */
637 if (' ' == sv
&& '\0' == buf
[i
])
638 if ( ! mdoc_pwarn(m
, line
, i
- 1, ETAILWS
))
641 for ( ; ' ' == buf
[i
]; i
++)
642 /* Skip trailing whitespace. */ ;
646 /* Trailing whitespace? */
648 if (' ' == buf
[i
- 1] && '\0' == buf
[i
])
649 if ( ! mdoc_pwarn(m
, line
, i
- 1, ETAILWS
))
656 if (j
!= i
&& ! pstring(m
, line
, j
, &buf
[j
], (size_t)(i
- j
)))
659 m
->next
= MDOC_NEXT_SIBLING
;
666 macrowarn(struct mdoc
*m
, int ln
, const char *buf
)
668 if ( ! (MDOC_IGN_MACRO
& m
->pflags
))
669 return(mdoc_verr(m
, ln
, 0,
670 "unknown macro: %s%s",
671 buf
, strlen(buf
) > 3 ? "..." : ""));
672 return(mdoc_vwarn(m
, ln
, 0, "unknown macro: %s%s",
673 buf
, strlen(buf
) > 3 ? "..." : ""));
678 * Parse a macro line, that is, a line beginning with the control
682 parsemacro(struct mdoc
*m
, int ln
, char *buf
)
687 /* Empty lines are ignored. */
694 /* Accept whitespace after the initial control char. */
698 while (buf
[i
] && ' ' == buf
[i
])
704 /* Copy the first word into a nil-terminated buffer. */
706 for (j
= 0; j
< 4; j
++, i
++) {
707 if ('\0' == (mac
[j
] = buf
[i
]))
709 else if (' ' == buf
[i
])
712 /* Check for invalid characters. */
714 if (isgraph((u_char
)buf
[i
]))
716 return(mdoc_perr(m
, ln
, i
, EPRINT
));
721 if (j
== 4 || j
< 2) {
722 if ( ! macrowarn(m
, ln
, mac
))
727 if (MDOC_MAX
== (c
= mdoc_hash_find(mac
))) {
728 if ( ! macrowarn(m
, ln
, mac
))
733 /* The macro is sane. Jump to the next word. */
735 while (buf
[i
] && ' ' == buf
[i
])
738 /* Trailing whitespace? */
740 if ('\0' == buf
[i
] && ' ' == buf
[i
- 1])
741 if ( ! mdoc_pwarn(m
, ln
, i
- 1, ETAILWS
))
745 * Begin recursive parse sequence. Since we're at the start of
746 * the line, we don't need to do callable/parseable checks.
748 if ( ! mdoc_macro(m
, c
, ln
, 1, &i
, buf
))
753 err
: /* Error out. */
755 m
->flags
|= MDOC_HALT
;