]>
git.cameronkatri.com Git - mandoc.git/blob - mdoc.c
1 /* $Id: mdoc.c,v 1.95 2009/07/20 14:09:38 kristaps Exp $ */
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
26 const char *const __mdoc_merrnames
[MERRMAX
] = {
27 "trailing whitespace", /* ETAILWS */
28 "unexpected quoted parameter", /* EQUOTPARM */
29 "unterminated quoted parameter", /* EQUOTTERM */
30 "system: malloc error", /* EMALLOC */
31 "argument parameter suggested", /* EARGVAL */
32 "macro not callable", /* ENOCALL */
33 "macro disallowed in prologue", /* EBODYPROL */
34 "macro disallowed in body", /* EPROLBODY */
35 "text disallowed in prologue", /* ETEXTPROL */
36 "blank line disallowed", /* ENOBLANK */
37 "text parameter too long", /* ETOOLONG */
38 "invalid escape sequence", /* EESCAPE */
39 "invalid character", /* EPRINT */
40 "document has no body", /* ENODAT */
41 "document has no prologue", /* ENOPROLOGUE */
42 "expected line arguments", /* ELINE */
43 "invalid AT&T argument", /* EATT */
44 "default name not yet set", /* ENAME */
45 "missing list type", /* ELISTTYPE */
46 "missing display type", /* EDISPTYPE */
47 "too many display types", /* EMULTIDISP */
48 "too many list types", /* EMULTILIST */
49 "NAME section must be first", /* ESECNAME */
50 "badly-formed NAME section", /* ENAMESECINC */
51 "argument repeated", /* EARGREP */
52 "expected boolean parameter", /* EBOOL */
53 "inconsistent column syntax", /* ECOLMIS */
54 "nested display invalid", /* ENESTDISP */
55 "width argument missing", /* EMISSWIDTH */
56 "invalid section for this manual section", /* EWRONGMSEC */
57 "section out of conventional order", /* ESECOOO */
58 "section repeated", /* ESECREP */
59 "invalid standard argument", /* EBADSTAND */
60 "multi-line arguments discouraged", /* ENOMULTILINE */
61 "multi-line arguments suggested", /* EMULTILINE */
62 "line arguments discouraged", /* ENOLINE */
63 "prologue macro out of conventional order", /* EPROLOOO */
64 "prologue macro repeated", /* EPROLREP */
65 "invalid manual section", /* EBADMSEC */
66 "invalid section", /* EBADSEC */
67 "invalid font mode", /* EFONT */
68 "invalid date syntax", /* EBADDATE */
69 "invalid number format", /* ENUMFMT */
70 "superfluous width argument", /* ENOWIDTH */
71 "system: utsname error", /* EUTSNAME */
72 "obsolete macro", /* EOBS */
73 "macro-like parameter", /* EMACPARM */
74 "end-of-line scope violation", /* EIMPBRK */
75 "empty macro ignored", /* EIGNE */
76 "unclosed explicit scope", /* EOPEN */
77 "unterminated quoted phrase", /* EQUOTPHR */
78 "closure macro without prior context", /* ENOCTX */
79 "invalid whitespace after control character", /* ESPACE */
80 "no description found for library" /* ELIB */
83 const char *const __mdoc_macronames
[MDOC_MAX
] = {
84 "Ap", "Dd", "Dt", "Os",
85 "Sh", "Ss", "Pp", "D1",
86 "Dl", "Bd", "Ed", "Bl",
87 "El", "It", "Ad", "An",
88 "Ar", "Cd", "Cm", "Dv",
89 "Er", "Ev", "Ex", "Fa",
90 "Fd", "Fl", "Fn", "Ft",
91 "Ic", "In", "Li", "Nd",
92 "Nm", "Op", "Ot", "Pa",
93 "Rv", "St", "Va", "Vt",
95 "Xr", "\%A", "\%B", "\%D",
97 "\%I", "\%J", "\%N", "\%O",
99 "\%P", "\%R", "\%T", "\%V",
100 "Ac", "Ao", "Aq", "At",
101 "Bc", "Bf", "Bo", "Bq",
102 "Bsx", "Bx", "Db", "Dc",
103 "Do", "Dq", "Ec", "Ef",
104 "Em", "Eo", "Fx", "Ms",
105 "No", "Ns", "Nx", "Ox",
106 "Pc", "Pf", "Po", "Pq",
107 "Qc", "Ql", "Qo", "Qq",
108 "Re", "Rs", "Sc", "So",
109 "Sq", "Sm", "Sx", "Sy",
110 "Tn", "Ux", "Xc", "Xo",
111 "Fo", "Fc", "Oo", "Oc",
112 "Bk", "Ek", "Bt", "Hf",
113 "Fr", "Ud", "Lb", "Lp",
114 "Lk", "Mt", "Brq", "Bro",
116 "Brc", "\%C", "Es", "En",
118 "Dx", "\%Q", "br", "sp"
121 const char *const __mdoc_argnames
[MDOC_ARG_MAX
] = {
122 "split", "nosplit", "ragged",
123 "unfilled", "literal", "file",
124 "offset", "bullet", "dash",
125 "hyphen", "item", "enum",
126 "tag", "diag", "hang",
127 "ohang", "inset", "column",
128 "width", "compact", "std",
129 "filled", "words", "emphasis",
133 const char * const *mdoc_macronames
= __mdoc_macronames
;
134 const char * const *mdoc_argnames
= __mdoc_argnames
;
136 static void mdoc_free1(struct mdoc
*);
137 static int mdoc_alloc1(struct mdoc
*);
138 static struct mdoc_node
*node_alloc(struct mdoc
*, int, int,
139 int, enum mdoc_type
);
140 static int node_append(struct mdoc
*,
142 static int parsetext(struct mdoc
*, int, char *);
143 static int parsemacro(struct mdoc
*, int, char *);
144 static int macrowarn(struct mdoc
*, int, const char *);
145 static int pstring(struct mdoc
*, int, int,
146 const char *, size_t);
149 extern size_t strlcpy(char *, const char *, size_t);
153 const struct mdoc_node
*
154 mdoc_node(const struct mdoc
*m
)
157 return(MDOC_HALT
& m
->flags
? NULL
: m
->first
);
161 const struct mdoc_meta
*
162 mdoc_meta(const struct mdoc
*m
)
165 return(MDOC_HALT
& m
->flags
? NULL
: &m
->meta
);
170 * Frees volatile resources (parse tree, meta-data, fields).
173 mdoc_free1(struct mdoc
*mdoc
)
177 mdoc_node_freelist(mdoc
->first
);
178 if (mdoc
->meta
.title
)
179 free(mdoc
->meta
.title
);
183 free(mdoc
->meta
.name
);
185 free(mdoc
->meta
.arch
);
187 free(mdoc
->meta
.vol
);
192 * Allocate all volatile resources (parse tree, meta-data, fields).
195 mdoc_alloc1(struct mdoc
*mdoc
)
198 bzero(&mdoc
->meta
, sizeof(struct mdoc_meta
));
200 mdoc
->lastnamed
= mdoc
->lastsec
= SEC_NONE
;
201 mdoc
->last
= calloc(1, sizeof(struct mdoc_node
));
202 if (NULL
== mdoc
->last
)
205 mdoc
->first
= mdoc
->last
;
206 mdoc
->last
->type
= MDOC_ROOT
;
207 mdoc
->next
= MDOC_NEXT_CHILD
;
213 * Free up volatile resources (see mdoc_free1()) then re-initialises the
214 * data with mdoc_alloc1(). After invocation, parse data has been reset
215 * and the parser is ready for re-invocation on a new tree; however,
216 * cross-parse non-volatile data is kept intact.
219 mdoc_reset(struct mdoc
*mdoc
)
223 return(mdoc_alloc1(mdoc
));
228 * Completely free up all volatile and non-volatile parse resources.
229 * After invocation, the pointer is no longer usable.
232 mdoc_free(struct mdoc
*mdoc
)
237 mdoc_hash_free(mdoc
->htab
);
243 * Allocate volatile and non-volatile parse resources.
246 mdoc_alloc(void *data
, int pflags
, const struct mdoc_cb
*cb
)
250 if (NULL
== (p
= calloc(1, sizeof(struct mdoc
))))
253 (void)memcpy(&p
->cb
, cb
, sizeof(struct mdoc_cb
));
258 if (NULL
== (p
->htab
= mdoc_hash_alloc())) {
261 } else if (mdoc_alloc1(p
))
270 * Climb back up the parse tree, validating open scopes. Mostly calls
271 * through to macro_end() in macro.c.
274 mdoc_endparse(struct mdoc
*m
)
277 if (MDOC_HALT
& m
->flags
)
279 else if (mdoc_macroend(m
))
281 m
->flags
|= MDOC_HALT
;
287 * Main parse routine. Parses a single line -- really just hands off to
288 * the macro (parsemacro()) or text parser (parsetext()).
291 mdoc_parseln(struct mdoc
*m
, int ln
, char *buf
)
294 if (MDOC_HALT
& m
->flags
)
297 return('.' == *buf
? parsemacro(m
, ln
, buf
) :
298 parsetext(m
, ln
, buf
));
303 mdoc_verr(struct mdoc
*mdoc
, int ln
, int pos
,
304 const char *fmt
, ...)
309 if (NULL
== mdoc
->cb
.mdoc_err
)
313 (void)vsnprintf(buf
, sizeof(buf
) - 1, fmt
, ap
);
316 return((*mdoc
->cb
.mdoc_err
)(mdoc
->data
, ln
, pos
, buf
));
321 mdoc_vwarn(struct mdoc
*mdoc
, int ln
, int pos
, const char *fmt
, ...)
326 if (NULL
== mdoc
->cb
.mdoc_warn
)
330 (void)vsnprintf(buf
, sizeof(buf
) - 1, fmt
, ap
);
333 return((*mdoc
->cb
.mdoc_warn
)(mdoc
->data
, ln
, pos
, buf
));
338 mdoc_err(struct mdoc
*m
, int line
, int pos
, int iserr
, enum merr type
)
342 p
= __mdoc_merrnames
[(int)type
];
346 return(mdoc_verr(m
, line
, pos
, p
));
348 return(mdoc_vwarn(m
, line
, pos
, p
));
353 mdoc_macro(struct mdoc
*m
, int tok
,
354 int ln
, int pp
, int *pos
, char *buf
)
357 if (MDOC_PROLOGUE
& mdoc_macros
[tok
].flags
&&
358 MDOC_PBODY
& m
->flags
)
359 return(mdoc_perr(m
, ln
, pp
, EPROLBODY
));
360 if ( ! (MDOC_PROLOGUE
& mdoc_macros
[tok
].flags
) &&
361 ! (MDOC_PBODY
& m
->flags
))
362 return(mdoc_perr(m
, ln
, pp
, EBODYPROL
));
364 if (1 != pp
&& ! (MDOC_CALLABLE
& mdoc_macros
[tok
].flags
))
365 return(mdoc_perr(m
, ln
, pp
, ENOCALL
));
367 return((*mdoc_macros
[tok
].fp
)(m
, tok
, ln
, pp
, pos
, buf
));
372 node_append(struct mdoc
*mdoc
, struct mdoc_node
*p
)
377 assert(MDOC_ROOT
!= p
->type
);
379 switch (mdoc
->next
) {
380 case (MDOC_NEXT_SIBLING
):
381 mdoc
->last
->next
= p
;
382 p
->prev
= mdoc
->last
;
383 p
->parent
= mdoc
->last
->parent
;
385 case (MDOC_NEXT_CHILD
):
386 mdoc
->last
->child
= p
;
387 p
->parent
= mdoc
->last
;
396 if ( ! mdoc_valid_pre(mdoc
, p
))
398 if ( ! mdoc_action_pre(mdoc
, p
))
403 assert(MDOC_BLOCK
== p
->parent
->type
);
407 assert(MDOC_BLOCK
== p
->parent
->type
);
411 assert(MDOC_BLOCK
== p
->parent
->type
);
422 if ( ! mdoc_valid_post(mdoc
))
424 if ( ! mdoc_action_post(mdoc
))
435 static struct mdoc_node
*
436 node_alloc(struct mdoc
*m
, int line
,
437 int pos
, int tok
, enum mdoc_type type
)
441 if (NULL
== (p
= calloc(1, sizeof(struct mdoc_node
)))) {
442 (void)mdoc_nerr(m
, m
->last
, EMALLOC
);
450 if (MDOC_TEXT
!= (p
->type
= type
))
458 mdoc_tail_alloc(struct mdoc
*m
, int line
, int pos
, int tok
)
462 p
= node_alloc(m
, line
, pos
, tok
, MDOC_TAIL
);
465 return(node_append(m
, p
));
470 mdoc_head_alloc(struct mdoc
*m
, int line
, int pos
, int tok
)
477 p
= node_alloc(m
, line
, pos
, tok
, MDOC_HEAD
);
480 return(node_append(m
, p
));
485 mdoc_body_alloc(struct mdoc
*m
, int line
, int pos
, int tok
)
489 p
= node_alloc(m
, line
, pos
, tok
, MDOC_BODY
);
492 return(node_append(m
, p
));
497 mdoc_block_alloc(struct mdoc
*m
, int line
, int pos
,
498 int tok
, struct mdoc_arg
*args
)
502 p
= node_alloc(m
, line
, pos
, tok
, MDOC_BLOCK
);
508 return(node_append(m
, p
));
513 mdoc_elem_alloc(struct mdoc
*m
, int line
, int pos
,
514 int tok
, struct mdoc_arg
*args
)
518 p
= node_alloc(m
, line
, pos
, tok
, MDOC_ELEM
);
524 return(node_append(m
, p
));
529 pstring(struct mdoc
*m
, int line
, int pos
, const char *p
, size_t len
)
534 n
= node_alloc(m
, line
, pos
, -1, MDOC_TEXT
);
536 return(mdoc_nerr(m
, m
->last
, EMALLOC
));
538 n
->string
= malloc(len
+ 1);
539 if (NULL
== n
->string
) {
541 return(mdoc_nerr(m
, m
->last
, EMALLOC
));
544 sv
= strlcpy(n
->string
, p
, len
+ 1);
546 /* Prohibit truncation. */
547 assert(sv
< len
+ 1);
549 return(node_append(m
, n
));
554 mdoc_word_alloc(struct mdoc
*m
, int line
, int pos
, const char *p
)
557 return(pstring(m
, line
, pos
, p
, strlen(p
)));
562 mdoc_node_free(struct mdoc_node
*p
)
570 mdoc_argv_free(p
->args
);
576 mdoc_node_freelist(struct mdoc_node
*p
)
580 mdoc_node_freelist(p
->child
);
582 mdoc_node_freelist(p
->next
);
584 assert(0 == p
->nchild
);
590 * Parse free-form text, that is, a line that does not begin with the
594 parsetext(struct mdoc
*m
, int line
, char *buf
)
598 if (SEC_NONE
== m
->lastnamed
)
599 return(mdoc_perr(m
, line
, 0, ETEXTPROL
));
602 * If in literal mode, then pass the buffer directly to the
603 * back-end, as it should be preserved as a single term.
606 if (MDOC_LITERAL
& m
->flags
) {
607 if ( ! mdoc_word_alloc(m
, line
, 0, buf
))
609 m
->next
= MDOC_NEXT_SIBLING
;
613 /* Disallow blank/white-space lines in non-literal mode. */
615 for (i
= 0; ' ' == buf
[i
]; i
++)
616 /* Skip leading whitespace. */ ;
618 return(mdoc_perr(m
, line
, 0, ENOBLANK
));
621 * Break apart a free-form line into tokens. Spaces are
622 * stripped out of the input.
625 for (j
= i
; buf
[i
]; i
++) {
629 /* Escaped whitespace. */
630 if (i
&& ' ' == buf
[i
] && '\\' == buf
[i
- 1])
634 if ( ! pstring(m
, line
, j
, &buf
[j
], (size_t)(i
- j
)))
636 m
->next
= MDOC_NEXT_SIBLING
;
638 for ( ; ' ' == buf
[i
]; i
++)
639 /* Skip trailing whitespace. */ ;
646 if (j
!= i
&& ! pstring(m
, line
, j
, &buf
[j
], (size_t)(i
- j
)))
649 m
->next
= MDOC_NEXT_SIBLING
;
657 macrowarn(struct mdoc
*m
, int ln
, const char *buf
)
659 if ( ! (MDOC_IGN_MACRO
& m
->pflags
))
660 return(mdoc_verr(m
, ln
, 1,
661 "unknown macro: %s%s",
662 buf
, strlen(buf
) > 3 ? "..." : ""));
663 return(mdoc_vwarn(m
, ln
, 1, "unknown macro: %s%s",
664 buf
, strlen(buf
) > 3 ? "..." : ""));
669 * Parse a macro line, that is, a line beginning with the control
673 parsemacro(struct mdoc
*m
, int ln
, char *buf
)
678 /* Empty lines are ignored. */
685 while (buf
[i
] && ' ' == buf
[i
])
689 return(mdoc_perr(m
, ln
, 1, ESPACE
));
692 /* Copy the first word into a nil-terminated buffer. */
694 for (i
= 1; i
< 5; i
++) {
695 if (0 == (mac
[i
- 1] = buf
[i
]))
697 else if (' ' == buf
[i
])
703 if (i
== 5 || i
<= 2) {
704 if ( ! macrowarn(m
, ln
, mac
))
709 if (MDOC_MAX
== (c
= mdoc_hash_find(m
->htab
, mac
))) {
710 if ( ! macrowarn(m
, ln
, mac
))
715 /* The macro is sane. Jump to the next word. */
717 while (buf
[i
] && ' ' == buf
[i
])
720 /* Begin recursive parse sequence. */
722 if ( ! mdoc_macro(m
, c
, ln
, 1, &i
, buf
))
727 err
: /* Error out. */
729 m
->flags
|= MDOC_HALT
;