]>
git.cameronkatri.com Git - mandoc.git/blob - mdoc.c
1 /* $Id: mdoc.c,v 1.103 2009/08/20 11:44:47 kristaps Exp $ */
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
26 const char *const __mdoc_merrnames
[MERRMAX
] = {
27 "trailing whitespace", /* ETAILWS */
28 "unexpected quoted parameter", /* EQUOTPARM */
29 "unterminated quoted parameter", /* EQUOTTERM */
30 "system: malloc error", /* EMALLOC */
31 "argument parameter suggested", /* EARGVAL */
32 "macro disallowed in prologue", /* EBODYPROL */
33 "macro disallowed in body", /* EPROLBODY */
34 "text disallowed in prologue", /* ETEXTPROL */
35 "blank line disallowed", /* ENOBLANK */
36 "text parameter too long", /* ETOOLONG */
37 "invalid escape sequence", /* EESCAPE */
38 "invalid character", /* EPRINT */
39 "document has no body", /* ENODAT */
40 "document has no prologue", /* ENOPROLOGUE */
41 "expected line arguments", /* ELINE */
42 "invalid AT&T argument", /* EATT */
43 "default name not yet set", /* ENAME */
44 "missing list type", /* ELISTTYPE */
45 "missing display type", /* EDISPTYPE */
46 "too many display types", /* EMULTIDISP */
47 "too many list types", /* EMULTILIST */
48 "NAME section must be first", /* ESECNAME */
49 "badly-formed NAME section", /* ENAMESECINC */
50 "argument repeated", /* EARGREP */
51 "expected boolean parameter", /* EBOOL */
52 "inconsistent column syntax", /* ECOLMIS */
53 "nested display invalid", /* ENESTDISP */
54 "width argument missing", /* EMISSWIDTH */
55 "invalid section for this manual section", /* EWRONGMSEC */
56 "section out of conventional order", /* ESECOOO */
57 "section repeated", /* ESECREP */
58 "invalid standard argument", /* EBADSTAND */
59 "multi-line arguments discouraged", /* ENOMULTILINE */
60 "multi-line arguments suggested", /* EMULTILINE */
61 "line arguments discouraged", /* ENOLINE */
62 "prologue macro out of conventional order", /* EPROLOOO */
63 "prologue macro repeated", /* EPROLREP */
64 "invalid manual section", /* EBADMSEC */
65 "invalid section", /* EBADSEC */
66 "invalid font mode", /* EFONT */
67 "invalid date syntax", /* EBADDATE */
68 "invalid number format", /* ENUMFMT */
69 "superfluous width argument", /* ENOWIDTH */
70 "system: utsname error", /* EUTSNAME */
71 "obsolete macro", /* EOBS */
72 "end-of-line scope violation", /* EIMPBRK */
73 "empty macro ignored", /* EIGNE */
74 "unclosed explicit scope", /* EOPEN */
75 "unterminated quoted phrase", /* EQUOTPHR */
76 "closure macro without prior context", /* ENOCTX */
77 "no description found for library" /* ELIB */
80 const char *const __mdoc_macronames
[MDOC_MAX
] = {
81 "Ap", "Dd", "Dt", "Os",
82 "Sh", "Ss", "Pp", "D1",
83 "Dl", "Bd", "Ed", "Bl",
84 "El", "It", "Ad", "An",
85 "Ar", "Cd", "Cm", "Dv",
86 "Er", "Ev", "Ex", "Fa",
87 "Fd", "Fl", "Fn", "Ft",
88 "Ic", "In", "Li", "Nd",
89 "Nm", "Op", "Ot", "Pa",
90 "Rv", "St", "Va", "Vt",
92 "Xr", "\%A", "\%B", "\%D",
94 "\%I", "\%J", "\%N", "\%O",
96 "\%P", "\%R", "\%T", "\%V",
97 "Ac", "Ao", "Aq", "At",
98 "Bc", "Bf", "Bo", "Bq",
99 "Bsx", "Bx", "Db", "Dc",
100 "Do", "Dq", "Ec", "Ef",
101 "Em", "Eo", "Fx", "Ms",
102 "No", "Ns", "Nx", "Ox",
103 "Pc", "Pf", "Po", "Pq",
104 "Qc", "Ql", "Qo", "Qq",
105 "Re", "Rs", "Sc", "So",
106 "Sq", "Sm", "Sx", "Sy",
107 "Tn", "Ux", "Xc", "Xo",
108 "Fo", "Fc", "Oo", "Oc",
109 "Bk", "Ek", "Bt", "Hf",
110 "Fr", "Ud", "Lb", "Lp",
111 "Lk", "Mt", "Brq", "Bro",
113 "Brc", "\%C", "Es", "En",
115 "Dx", "\%Q", "br", "sp"
118 const char *const __mdoc_argnames
[MDOC_ARG_MAX
] = {
119 "split", "nosplit", "ragged",
120 "unfilled", "literal", "file",
121 "offset", "bullet", "dash",
122 "hyphen", "item", "enum",
123 "tag", "diag", "hang",
124 "ohang", "inset", "column",
125 "width", "compact", "std",
126 "filled", "words", "emphasis",
130 const char * const *mdoc_macronames
= __mdoc_macronames
;
131 const char * const *mdoc_argnames
= __mdoc_argnames
;
133 static void mdoc_free1(struct mdoc
*);
134 static int mdoc_alloc1(struct mdoc
*);
135 static struct mdoc_node
*node_alloc(struct mdoc
*, int, int,
136 int, enum mdoc_type
);
137 static int node_append(struct mdoc
*,
139 static int parsetext(struct mdoc
*, int, char *);
140 static int parsemacro(struct mdoc
*, int, char *);
141 static int macrowarn(struct mdoc
*, int, const char *);
142 static int pstring(struct mdoc
*, int, int,
143 const char *, size_t);
146 extern size_t strlcpy(char *, const char *, size_t);
150 const struct mdoc_node
*
151 mdoc_node(const struct mdoc
*m
)
154 return(MDOC_HALT
& m
->flags
? NULL
: m
->first
);
158 const struct mdoc_meta
*
159 mdoc_meta(const struct mdoc
*m
)
162 return(MDOC_HALT
& m
->flags
? NULL
: &m
->meta
);
167 * Frees volatile resources (parse tree, meta-data, fields).
170 mdoc_free1(struct mdoc
*mdoc
)
174 mdoc_node_freelist(mdoc
->first
);
175 if (mdoc
->meta
.title
)
176 free(mdoc
->meta
.title
);
180 free(mdoc
->meta
.name
);
182 free(mdoc
->meta
.arch
);
184 free(mdoc
->meta
.vol
);
189 * Allocate all volatile resources (parse tree, meta-data, fields).
192 mdoc_alloc1(struct mdoc
*mdoc
)
195 bzero(&mdoc
->meta
, sizeof(struct mdoc_meta
));
197 mdoc
->lastnamed
= mdoc
->lastsec
= SEC_NONE
;
198 mdoc
->last
= calloc(1, sizeof(struct mdoc_node
));
199 if (NULL
== mdoc
->last
)
202 mdoc
->first
= mdoc
->last
;
203 mdoc
->last
->type
= MDOC_ROOT
;
204 mdoc
->next
= MDOC_NEXT_CHILD
;
210 * Free up volatile resources (see mdoc_free1()) then re-initialises the
211 * data with mdoc_alloc1(). After invocation, parse data has been reset
212 * and the parser is ready for re-invocation on a new tree; however,
213 * cross-parse non-volatile data is kept intact.
216 mdoc_reset(struct mdoc
*mdoc
)
220 return(mdoc_alloc1(mdoc
));
225 * Completely free up all volatile and non-volatile parse resources.
226 * After invocation, the pointer is no longer usable.
229 mdoc_free(struct mdoc
*mdoc
)
234 mdoc_hash_free(mdoc
->htab
);
240 * Allocate volatile and non-volatile parse resources.
243 mdoc_alloc(void *data
, int pflags
, const struct mdoc_cb
*cb
)
247 if (NULL
== (p
= calloc(1, sizeof(struct mdoc
))))
250 (void)memcpy(&p
->cb
, cb
, sizeof(struct mdoc_cb
));
255 if (NULL
== (p
->htab
= mdoc_hash_alloc())) {
258 } else if (mdoc_alloc1(p
))
267 * Climb back up the parse tree, validating open scopes. Mostly calls
268 * through to macro_end() in macro.c.
271 mdoc_endparse(struct mdoc
*m
)
274 if (MDOC_HALT
& m
->flags
)
276 else if (mdoc_macroend(m
))
278 m
->flags
|= MDOC_HALT
;
284 * Main parse routine. Parses a single line -- really just hands off to
285 * the macro (parsemacro()) or text parser (parsetext()).
288 mdoc_parseln(struct mdoc
*m
, int ln
, char *buf
)
291 if (MDOC_HALT
& m
->flags
)
294 return('.' == *buf
? parsemacro(m
, ln
, buf
) :
295 parsetext(m
, ln
, buf
));
300 mdoc_verr(struct mdoc
*mdoc
, int ln
, int pos
,
301 const char *fmt
, ...)
306 if (NULL
== mdoc
->cb
.mdoc_err
)
310 (void)vsnprintf(buf
, sizeof(buf
) - 1, fmt
, ap
);
313 return((*mdoc
->cb
.mdoc_err
)(mdoc
->data
, ln
, pos
, buf
));
318 mdoc_vwarn(struct mdoc
*mdoc
, int ln
, int pos
, const char *fmt
, ...)
323 if (NULL
== mdoc
->cb
.mdoc_warn
)
327 (void)vsnprintf(buf
, sizeof(buf
) - 1, fmt
, ap
);
330 return((*mdoc
->cb
.mdoc_warn
)(mdoc
->data
, ln
, pos
, buf
));
335 mdoc_err(struct mdoc
*m
, int line
, int pos
, int iserr
, enum merr type
)
339 p
= __mdoc_merrnames
[(int)type
];
343 return(mdoc_verr(m
, line
, pos
, p
));
345 return(mdoc_vwarn(m
, line
, pos
, p
));
350 mdoc_macro(struct mdoc
*m
, int tok
,
351 int ln
, int pp
, int *pos
, char *buf
)
354 * If we're in the prologue, deny "body" macros. Similarly, if
355 * we're in the body, deny prologue calls.
357 if (MDOC_PROLOGUE
& mdoc_macros
[tok
].flags
&&
358 MDOC_PBODY
& m
->flags
)
359 return(mdoc_perr(m
, ln
, pp
, EPROLBODY
));
360 if ( ! (MDOC_PROLOGUE
& mdoc_macros
[tok
].flags
) &&
361 ! (MDOC_PBODY
& m
->flags
))
362 return(mdoc_perr(m
, ln
, pp
, EBODYPROL
));
364 return((*mdoc_macros
[tok
].fp
)(m
, tok
, ln
, pp
, pos
, buf
));
369 node_append(struct mdoc
*mdoc
, struct mdoc_node
*p
)
374 assert(MDOC_ROOT
!= p
->type
);
376 switch (mdoc
->next
) {
377 case (MDOC_NEXT_SIBLING
):
378 mdoc
->last
->next
= p
;
379 p
->prev
= mdoc
->last
;
380 p
->parent
= mdoc
->last
->parent
;
382 case (MDOC_NEXT_CHILD
):
383 mdoc
->last
->child
= p
;
384 p
->parent
= mdoc
->last
;
393 if ( ! mdoc_valid_pre(mdoc
, p
))
395 if ( ! mdoc_action_pre(mdoc
, p
))
400 assert(MDOC_BLOCK
== p
->parent
->type
);
404 assert(MDOC_BLOCK
== p
->parent
->type
);
408 assert(MDOC_BLOCK
== p
->parent
->type
);
419 if ( ! mdoc_valid_post(mdoc
))
421 if ( ! mdoc_action_post(mdoc
))
432 static struct mdoc_node
*
433 node_alloc(struct mdoc
*m
, int line
,
434 int pos
, int tok
, enum mdoc_type type
)
438 if (NULL
== (p
= calloc(1, sizeof(struct mdoc_node
)))) {
439 (void)mdoc_nerr(m
, m
->last
, EMALLOC
);
447 if (MDOC_TEXT
!= (p
->type
= type
))
455 mdoc_tail_alloc(struct mdoc
*m
, int line
, int pos
, int tok
)
459 p
= node_alloc(m
, line
, pos
, tok
, MDOC_TAIL
);
462 if ( ! node_append(m
, p
))
464 m
->next
= MDOC_NEXT_CHILD
;
470 mdoc_head_alloc(struct mdoc
*m
, int line
, int pos
, int tok
)
477 p
= node_alloc(m
, line
, pos
, tok
, MDOC_HEAD
);
480 if ( ! node_append(m
, p
))
482 m
->next
= MDOC_NEXT_CHILD
;
488 mdoc_body_alloc(struct mdoc
*m
, int line
, int pos
, int tok
)
492 p
= node_alloc(m
, line
, pos
, tok
, MDOC_BODY
);
495 if ( ! node_append(m
, p
))
497 m
->next
= MDOC_NEXT_CHILD
;
503 mdoc_block_alloc(struct mdoc
*m
, int line
, int pos
,
504 int tok
, struct mdoc_arg
*args
)
508 p
= node_alloc(m
, line
, pos
, tok
, MDOC_BLOCK
);
514 if ( ! node_append(m
, p
))
516 m
->next
= MDOC_NEXT_CHILD
;
522 mdoc_elem_alloc(struct mdoc
*m
, int line
, int pos
,
523 int tok
, struct mdoc_arg
*args
)
527 p
= node_alloc(m
, line
, pos
, tok
, MDOC_ELEM
);
533 if ( ! node_append(m
, p
))
535 m
->next
= MDOC_NEXT_CHILD
;
541 pstring(struct mdoc
*m
, int line
, int pos
, const char *p
, size_t len
)
546 n
= node_alloc(m
, line
, pos
, -1, MDOC_TEXT
);
548 return(mdoc_nerr(m
, m
->last
, EMALLOC
));
550 n
->string
= malloc(len
+ 1);
551 if (NULL
== n
->string
) {
553 return(mdoc_nerr(m
, m
->last
, EMALLOC
));
556 sv
= strlcpy(n
->string
, p
, len
+ 1);
558 /* Prohibit truncation. */
559 assert(sv
< len
+ 1);
561 if ( ! node_append(m
, n
))
563 m
->next
= MDOC_NEXT_SIBLING
;
569 mdoc_word_alloc(struct mdoc
*m
, int line
, int pos
, const char *p
)
572 return(pstring(m
, line
, pos
, p
, strlen(p
)));
577 mdoc_node_free(struct mdoc_node
*p
)
585 mdoc_argv_free(p
->args
);
591 mdoc_node_freelist(struct mdoc_node
*p
)
595 mdoc_node_freelist(p
->child
);
597 mdoc_node_freelist(p
->next
);
599 assert(0 == p
->nchild
);
605 * Parse free-form text, that is, a line that does not begin with the
609 parsetext(struct mdoc
*m
, int line
, char *buf
)
613 if (SEC_NONE
== m
->lastnamed
)
614 return(mdoc_perr(m
, line
, 0, ETEXTPROL
));
617 * If in literal mode, then pass the buffer directly to the
618 * back-end, as it should be preserved as a single term.
621 if (MDOC_LITERAL
& m
->flags
)
622 return(mdoc_word_alloc(m
, line
, 0, buf
));
624 /* Disallow blank/white-space lines in non-literal mode. */
626 for (i
= 0; ' ' == buf
[i
]; i
++)
627 /* Skip leading whitespace. */ ;
629 return(mdoc_perr(m
, line
, 0, ENOBLANK
));
632 * Break apart a free-form line into tokens. Spaces are
633 * stripped out of the input.
636 for (j
= i
; buf
[i
]; i
++) {
640 /* Escaped whitespace. */
641 if (i
&& ' ' == buf
[i
] && '\\' == buf
[i
- 1])
645 if ( ! pstring(m
, line
, j
, &buf
[j
], (size_t)(i
- j
)))
648 for ( ; ' ' == buf
[i
]; i
++)
649 /* Skip trailing whitespace. */ ;
656 if (j
!= i
&& ! pstring(m
, line
, j
, &buf
[j
], (size_t)(i
- j
)))
659 m
->next
= MDOC_NEXT_SIBLING
;
667 macrowarn(struct mdoc
*m
, int ln
, const char *buf
)
669 if ( ! (MDOC_IGN_MACRO
& m
->pflags
))
670 return(mdoc_verr(m
, ln
, 0,
671 "unknown macro: %s%s",
672 buf
, strlen(buf
) > 3 ? "..." : ""));
673 return(mdoc_vwarn(m
, ln
, 0, "unknown macro: %s%s",
674 buf
, strlen(buf
) > 3 ? "..." : ""));
679 * Parse a macro line, that is, a line beginning with the control
683 parsemacro(struct mdoc
*m
, int ln
, char *buf
)
688 /* Empty lines are ignored. */
695 /* Accept whitespace after the initial control char. */
699 while (buf
[i
] && ' ' == buf
[i
])
705 /* Copy the first word into a nil-terminated buffer. */
707 for (j
= 0; j
< 4; j
++, i
++) {
708 if (0 == (mac
[j
] = buf
[i
]))
710 else if (' ' == buf
[i
])
716 if (j
== 4 || j
< 2) {
717 if ( ! macrowarn(m
, ln
, mac
))
722 if (MDOC_MAX
== (c
= mdoc_hash_find(m
->htab
, mac
))) {
723 if ( ! macrowarn(m
, ln
, mac
))
728 /* The macro is sane. Jump to the next word. */
730 while (buf
[i
] && ' ' == buf
[i
])
734 * Begin recursive parse sequence. Since we're at the start of
735 * the line, we don't need to do callable/parseable checks.
737 if ( ! mdoc_macro(m
, c
, ln
, 1, &i
, buf
))
742 err
: /* Error out. */
744 m
->flags
|= MDOC_HALT
;