]>
git.cameronkatri.com Git - mandoc.git/blob - mdoc.c
1 /* $Id: mdoc.c,v 1.109 2009/10/15 02:56:51 kristaps Exp $ */
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 #include <sys/types.h>
28 const char *const __mdoc_merrnames
[MERRMAX
] = {
29 "trailing whitespace", /* ETAILWS */
30 "unexpected quoted parameter", /* EQUOTPARM */
31 "unterminated quoted parameter", /* EQUOTTERM */
32 "system: malloc error", /* EMALLOC */
33 "argument parameter suggested", /* EARGVAL */
34 "macro disallowed in prologue", /* EBODYPROL */
35 "macro disallowed in body", /* EPROLBODY */
36 "text disallowed in prologue", /* ETEXTPROL */
37 "blank line disallowed", /* ENOBLANK */
38 "text parameter too long", /* ETOOLONG */
39 "invalid escape sequence", /* EESCAPE */
40 "invalid character", /* EPRINT */
41 "document has no body", /* ENODAT */
42 "document has no prologue", /* ENOPROLOGUE */
43 "expected line arguments", /* ELINE */
44 "invalid AT&T argument", /* EATT */
45 "default name not yet set", /* ENAME */
46 "missing list type", /* ELISTTYPE */
47 "missing display type", /* EDISPTYPE */
48 "too many display types", /* EMULTIDISP */
49 "too many list types", /* EMULTILIST */
50 "NAME section must be first", /* ESECNAME */
51 "badly-formed NAME section", /* ENAMESECINC */
52 "argument repeated", /* EARGREP */
53 "expected boolean parameter", /* EBOOL */
54 "inconsistent column syntax", /* ECOLMIS */
55 "nested display invalid", /* ENESTDISP */
56 "width argument missing", /* EMISSWIDTH */
57 "invalid section for this manual section", /* EWRONGMSEC */
58 "section out of conventional order", /* ESECOOO */
59 "section repeated", /* ESECREP */
60 "invalid standard argument", /* EBADSTAND */
61 "multi-line arguments discouraged", /* ENOMULTILINE */
62 "multi-line arguments suggested", /* EMULTILINE */
63 "line arguments discouraged", /* ENOLINE */
64 "prologue macro out of conventional order", /* EPROLOOO */
65 "prologue macro repeated", /* EPROLREP */
66 "invalid manual section", /* EBADMSEC */
67 "invalid section", /* EBADSEC */
68 "invalid font mode", /* EFONT */
69 "invalid date syntax", /* EBADDATE */
70 "invalid number format", /* ENUMFMT */
71 "superfluous width argument", /* ENOWIDTH */
72 "system: utsname error", /* EUTSNAME */
73 "obsolete macro", /* EOBS */
74 "end-of-line scope violation", /* EIMPBRK */
75 "empty macro ignored", /* EIGNE */
76 "unclosed explicit scope", /* EOPEN */
77 "unterminated quoted phrase", /* EQUOTPHR */
78 "closure macro without prior context", /* ENOCTX */
79 "no description found for library", /* ELIB */
80 "bad child for parent context", /* EBADCHILD */
81 "list arguments preceding type", /* ENOTYPE */
84 const char *const __mdoc_macronames
[MDOC_MAX
] = {
85 "Ap", "Dd", "Dt", "Os",
86 "Sh", "Ss", "Pp", "D1",
87 "Dl", "Bd", "Ed", "Bl",
88 "El", "It", "Ad", "An",
89 "Ar", "Cd", "Cm", "Dv",
90 "Er", "Ev", "Ex", "Fa",
91 "Fd", "Fl", "Fn", "Ft",
92 "Ic", "In", "Li", "Nd",
93 "Nm", "Op", "Ot", "Pa",
94 "Rv", "St", "Va", "Vt",
96 "Xr", "\%A", "\%B", "\%D",
98 "\%I", "\%J", "\%N", "\%O",
100 "\%P", "\%R", "\%T", "\%V",
101 "Ac", "Ao", "Aq", "At",
102 "Bc", "Bf", "Bo", "Bq",
103 "Bsx", "Bx", "Db", "Dc",
104 "Do", "Dq", "Ec", "Ef",
105 "Em", "Eo", "Fx", "Ms",
106 "No", "Ns", "Nx", "Ox",
107 "Pc", "Pf", "Po", "Pq",
108 "Qc", "Ql", "Qo", "Qq",
109 "Re", "Rs", "Sc", "So",
110 "Sq", "Sm", "Sx", "Sy",
111 "Tn", "Ux", "Xc", "Xo",
112 "Fo", "Fc", "Oo", "Oc",
113 "Bk", "Ek", "Bt", "Hf",
114 "Fr", "Ud", "Lb", "Lp",
115 "Lk", "Mt", "Brq", "Bro",
117 "Brc", "\%C", "Es", "En",
119 "Dx", "\%Q", "br", "sp"
122 const char *const __mdoc_argnames
[MDOC_ARG_MAX
] = {
123 "split", "nosplit", "ragged",
124 "unfilled", "literal", "file",
125 "offset", "bullet", "dash",
126 "hyphen", "item", "enum",
127 "tag", "diag", "hang",
128 "ohang", "inset", "column",
129 "width", "compact", "std",
130 "filled", "words", "emphasis",
131 "symbolic", "nested", "centered"
134 const char * const *mdoc_macronames
= __mdoc_macronames
;
135 const char * const *mdoc_argnames
= __mdoc_argnames
;
137 static void mdoc_free1(struct mdoc
*);
138 static int mdoc_alloc1(struct mdoc
*);
139 static struct mdoc_node
*node_alloc(struct mdoc
*, int, int,
140 int, enum mdoc_type
);
141 static int node_append(struct mdoc
*,
143 static int parsetext(struct mdoc
*, int, char *);
144 static int parsemacro(struct mdoc
*, int, char *);
145 static int macrowarn(struct mdoc
*, int, const char *);
146 static int pstring(struct mdoc
*, int, int,
147 const char *, size_t);
150 extern size_t strlcpy(char *, const char *, size_t);
154 const struct mdoc_node
*
155 mdoc_node(const struct mdoc
*m
)
158 return(MDOC_HALT
& m
->flags
? NULL
: m
->first
);
162 const struct mdoc_meta
*
163 mdoc_meta(const struct mdoc
*m
)
166 return(MDOC_HALT
& m
->flags
? NULL
: &m
->meta
);
171 * Frees volatile resources (parse tree, meta-data, fields).
174 mdoc_free1(struct mdoc
*mdoc
)
178 mdoc_node_freelist(mdoc
->first
);
179 if (mdoc
->meta
.title
)
180 free(mdoc
->meta
.title
);
184 free(mdoc
->meta
.name
);
186 free(mdoc
->meta
.arch
);
188 free(mdoc
->meta
.vol
);
193 * Allocate all volatile resources (parse tree, meta-data, fields).
196 mdoc_alloc1(struct mdoc
*mdoc
)
199 bzero(&mdoc
->meta
, sizeof(struct mdoc_meta
));
201 mdoc
->lastnamed
= mdoc
->lastsec
= SEC_NONE
;
202 mdoc
->last
= calloc(1, sizeof(struct mdoc_node
));
203 if (NULL
== mdoc
->last
)
206 mdoc
->first
= mdoc
->last
;
207 mdoc
->last
->type
= MDOC_ROOT
;
208 mdoc
->next
= MDOC_NEXT_CHILD
;
214 * Free up volatile resources (see mdoc_free1()) then re-initialises the
215 * data with mdoc_alloc1(). After invocation, parse data has been reset
216 * and the parser is ready for re-invocation on a new tree; however,
217 * cross-parse non-volatile data is kept intact.
220 mdoc_reset(struct mdoc
*mdoc
)
224 return(mdoc_alloc1(mdoc
));
229 * Completely free up all volatile and non-volatile parse resources.
230 * After invocation, the pointer is no longer usable.
233 mdoc_free(struct mdoc
*mdoc
)
242 * Allocate volatile and non-volatile parse resources.
245 mdoc_alloc(void *data
, int pflags
, const struct mdoc_cb
*cb
)
249 if (NULL
== (p
= calloc(1, sizeof(struct mdoc
))))
252 (void)memcpy(&p
->cb
, cb
, sizeof(struct mdoc_cb
));
268 * Climb back up the parse tree, validating open scopes. Mostly calls
269 * through to macro_end() in macro.c.
272 mdoc_endparse(struct mdoc
*m
)
275 if (MDOC_HALT
& m
->flags
)
277 else if (mdoc_macroend(m
))
279 m
->flags
|= MDOC_HALT
;
285 * Main parse routine. Parses a single line -- really just hands off to
286 * the macro (parsemacro()) or text parser (parsetext()).
289 mdoc_parseln(struct mdoc
*m
, int ln
, char *buf
)
292 if (MDOC_HALT
& m
->flags
)
295 return('.' == *buf
? parsemacro(m
, ln
, buf
) :
296 parsetext(m
, ln
, buf
));
301 mdoc_verr(struct mdoc
*mdoc
, int ln
, int pos
,
302 const char *fmt
, ...)
307 if (NULL
== mdoc
->cb
.mdoc_err
)
311 (void)vsnprintf(buf
, sizeof(buf
) - 1, fmt
, ap
);
314 return((*mdoc
->cb
.mdoc_err
)(mdoc
->data
, ln
, pos
, buf
));
319 mdoc_vwarn(struct mdoc
*mdoc
, int ln
, int pos
, const char *fmt
, ...)
324 if (NULL
== mdoc
->cb
.mdoc_warn
)
328 (void)vsnprintf(buf
, sizeof(buf
) - 1, fmt
, ap
);
331 return((*mdoc
->cb
.mdoc_warn
)(mdoc
->data
, ln
, pos
, buf
));
336 mdoc_err(struct mdoc
*m
, int line
, int pos
, int iserr
, enum merr type
)
340 p
= __mdoc_merrnames
[(int)type
];
344 return(mdoc_verr(m
, line
, pos
, p
));
346 return(mdoc_vwarn(m
, line
, pos
, p
));
351 mdoc_macro(struct mdoc
*m
, int tok
,
352 int ln
, int pp
, int *pos
, char *buf
)
355 * If we're in the prologue, deny "body" macros. Similarly, if
356 * we're in the body, deny prologue calls.
358 if (MDOC_PROLOGUE
& mdoc_macros
[tok
].flags
&&
359 MDOC_PBODY
& m
->flags
)
360 return(mdoc_perr(m
, ln
, pp
, EPROLBODY
));
361 if ( ! (MDOC_PROLOGUE
& mdoc_macros
[tok
].flags
) &&
362 ! (MDOC_PBODY
& m
->flags
))
363 return(mdoc_perr(m
, ln
, pp
, EBODYPROL
));
365 return((*mdoc_macros
[tok
].fp
)(m
, tok
, ln
, pp
, pos
, buf
));
370 node_append(struct mdoc
*mdoc
, struct mdoc_node
*p
)
375 assert(MDOC_ROOT
!= p
->type
);
377 switch (mdoc
->next
) {
378 case (MDOC_NEXT_SIBLING
):
379 mdoc
->last
->next
= p
;
380 p
->prev
= mdoc
->last
;
381 p
->parent
= mdoc
->last
->parent
;
383 case (MDOC_NEXT_CHILD
):
384 mdoc
->last
->child
= p
;
385 p
->parent
= mdoc
->last
;
394 if ( ! mdoc_valid_pre(mdoc
, p
))
396 if ( ! mdoc_action_pre(mdoc
, p
))
401 assert(MDOC_BLOCK
== p
->parent
->type
);
405 assert(MDOC_BLOCK
== p
->parent
->type
);
409 assert(MDOC_BLOCK
== p
->parent
->type
);
420 if ( ! mdoc_valid_post(mdoc
))
422 if ( ! mdoc_action_post(mdoc
))
433 static struct mdoc_node
*
434 node_alloc(struct mdoc
*m
, int line
,
435 int pos
, int tok
, enum mdoc_type type
)
439 if (NULL
== (p
= calloc(1, sizeof(struct mdoc_node
)))) {
440 (void)mdoc_nerr(m
, m
->last
, EMALLOC
);
448 if (MDOC_TEXT
!= (p
->type
= type
))
456 mdoc_tail_alloc(struct mdoc
*m
, int line
, int pos
, int tok
)
460 p
= node_alloc(m
, line
, pos
, tok
, MDOC_TAIL
);
463 if ( ! node_append(m
, p
))
465 m
->next
= MDOC_NEXT_CHILD
;
471 mdoc_head_alloc(struct mdoc
*m
, int line
, int pos
, int tok
)
478 p
= node_alloc(m
, line
, pos
, tok
, MDOC_HEAD
);
481 if ( ! node_append(m
, p
))
483 m
->next
= MDOC_NEXT_CHILD
;
489 mdoc_body_alloc(struct mdoc
*m
, int line
, int pos
, int tok
)
493 p
= node_alloc(m
, line
, pos
, tok
, MDOC_BODY
);
496 if ( ! node_append(m
, p
))
498 m
->next
= MDOC_NEXT_CHILD
;
504 mdoc_block_alloc(struct mdoc
*m
, int line
, int pos
,
505 int tok
, struct mdoc_arg
*args
)
509 p
= node_alloc(m
, line
, pos
, tok
, MDOC_BLOCK
);
515 if ( ! node_append(m
, p
))
517 m
->next
= MDOC_NEXT_CHILD
;
523 mdoc_elem_alloc(struct mdoc
*m
, int line
, int pos
,
524 int tok
, struct mdoc_arg
*args
)
528 p
= node_alloc(m
, line
, pos
, tok
, MDOC_ELEM
);
534 if ( ! node_append(m
, p
))
536 m
->next
= MDOC_NEXT_CHILD
;
542 pstring(struct mdoc
*m
, int line
, int pos
, const char *p
, size_t len
)
547 n
= node_alloc(m
, line
, pos
, -1, MDOC_TEXT
);
549 return(mdoc_nerr(m
, m
->last
, EMALLOC
));
551 n
->string
= malloc(len
+ 1);
552 if (NULL
== n
->string
) {
554 return(mdoc_nerr(m
, m
->last
, EMALLOC
));
557 sv
= strlcpy(n
->string
, p
, len
+ 1);
559 /* Prohibit truncation. */
560 assert(sv
< len
+ 1);
562 if ( ! node_append(m
, n
))
564 m
->next
= MDOC_NEXT_SIBLING
;
570 mdoc_word_alloc(struct mdoc
*m
, int line
, int pos
, const char *p
)
573 return(pstring(m
, line
, pos
, p
, strlen(p
)));
578 mdoc_node_free(struct mdoc_node
*p
)
586 mdoc_argv_free(p
->args
);
592 mdoc_node_freelist(struct mdoc_node
*p
)
596 mdoc_node_freelist(p
->child
);
598 mdoc_node_freelist(p
->next
);
600 assert(0 == p
->nchild
);
606 * Parse free-form text, that is, a line that does not begin with the
610 parsetext(struct mdoc
*m
, int line
, char *buf
)
614 if (SEC_NONE
== m
->lastnamed
)
615 return(mdoc_perr(m
, line
, 0, ETEXTPROL
));
618 * If in literal mode, then pass the buffer directly to the
619 * back-end, as it should be preserved as a single term.
622 if (MDOC_LITERAL
& m
->flags
)
623 return(mdoc_word_alloc(m
, line
, 0, buf
));
625 /* Disallow blank/white-space lines in non-literal mode. */
627 for (i
= 0; ' ' == buf
[i
]; i
++)
628 /* Skip leading whitespace. */ ;
630 return(mdoc_perr(m
, line
, 0, ENOBLANK
));
633 * Break apart a free-form line into tokens. Spaces are
634 * stripped out of the input.
637 for (j
= i
; buf
[i
]; i
++) {
641 /* Escaped whitespace. */
642 if (i
&& ' ' == buf
[i
] && '\\' == buf
[i
- 1])
646 if ( ! pstring(m
, line
, j
, &buf
[j
], (size_t)(i
- j
)))
649 for ( ; ' ' == buf
[i
]; i
++)
650 /* Skip trailing whitespace. */ ;
657 if (j
!= i
&& ! pstring(m
, line
, j
, &buf
[j
], (size_t)(i
- j
)))
660 m
->next
= MDOC_NEXT_SIBLING
;
668 macrowarn(struct mdoc
*m
, int ln
, const char *buf
)
670 if ( ! (MDOC_IGN_MACRO
& m
->pflags
))
671 return(mdoc_verr(m
, ln
, 0,
672 "unknown macro: %s%s",
673 buf
, strlen(buf
) > 3 ? "..." : ""));
674 return(mdoc_vwarn(m
, ln
, 0, "unknown macro: %s%s",
675 buf
, strlen(buf
) > 3 ? "..." : ""));
680 * Parse a macro line, that is, a line beginning with the control
684 parsemacro(struct mdoc
*m
, int ln
, char *buf
)
689 /* Empty lines are ignored. */
696 /* Accept whitespace after the initial control char. */
700 while (buf
[i
] && ' ' == buf
[i
])
706 /* Copy the first word into a nil-terminated buffer. */
708 for (j
= 0; j
< 4; j
++, i
++) {
709 if (0 == (mac
[j
] = buf
[i
]))
711 else if (' ' == buf
[i
])
714 /* Check for invalid characters. */
716 if (isgraph((u_char
)buf
[i
]))
718 return(mdoc_perr(m
, ln
, i
, EPRINT
));
723 if (j
== 4 || j
< 2) {
724 if ( ! macrowarn(m
, ln
, mac
))
729 if (MDOC_MAX
== (c
= mdoc_hash_find(mac
))) {
730 if ( ! macrowarn(m
, ln
, mac
))
735 /* The macro is sane. Jump to the next word. */
737 while (buf
[i
] && ' ' == buf
[i
])
741 * Begin recursive parse sequence. Since we're at the start of
742 * the line, we don't need to do callable/parseable checks.
744 if ( ! mdoc_macro(m
, c
, ln
, 1, &i
, buf
))
749 err
: /* Error out. */
751 m
->flags
|= MDOC_HALT
;