]>
git.cameronkatri.com Git - mandoc.git/blob - mdoc.c
38f97ed16c9ba7df8fe1a69dd7a25c93d88ae49a
1 /* $Id: mdoc.c,v 1.130 2010/05/13 06:22:11 kristaps Exp $ */
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21 #include <sys/types.h>
32 #include "libmandoc.h"
34 const char *const __mdoc_merrnames
[MERRMAX
] = {
35 "trailing whitespace", /* ETAILWS */
36 "unexpected quoted parameter", /* EQUOTPARM */
37 "unterminated quoted parameter", /* EQUOTTERM */
38 "argument parameter suggested", /* EARGVAL */
39 "macro disallowed in prologue", /* EBODYPROL */
40 "macro disallowed in body", /* EPROLBODY */
41 "text disallowed in prologue", /* ETEXTPROL */
42 "blank line disallowed", /* ENOBLANK */
43 "text parameter too long", /* ETOOLONG */
44 "invalid escape sequence", /* EESCAPE */
45 "invalid character", /* EPRINT */
46 "document has no body", /* ENODAT */
47 "document has no prologue", /* ENOPROLOGUE */
48 "expected line arguments", /* ELINE */
49 "invalid AT&T argument", /* EATT */
50 "default name not yet set", /* ENAME */
51 "missing list type", /* ELISTTYPE */
52 "missing display type", /* EDISPTYPE */
53 "too many display types", /* EMULTIDISP */
54 "too many list types", /* EMULTILIST */
55 "NAME section must be first", /* ESECNAME */
56 "badly-formed NAME section", /* ENAMESECINC */
57 "argument repeated", /* EARGREP */
58 "expected boolean parameter", /* EBOOL */
59 "inconsistent column syntax", /* ECOLMIS */
60 "nested display invalid", /* ENESTDISP */
61 "width argument missing", /* EMISSWIDTH */
62 "invalid section for this manual section", /* EWRONGMSEC */
63 "section out of conventional order", /* ESECOOO */
64 "section repeated", /* ESECREP */
65 "invalid standard argument", /* EBADSTAND */
66 "multi-line arguments discouraged", /* ENOMULTILINE */
67 "multi-line arguments suggested", /* EMULTILINE */
68 "line arguments discouraged", /* ENOLINE */
69 "prologue macro out of conventional order", /* EPROLOOO */
70 "prologue macro repeated", /* EPROLREP */
71 "invalid manual section", /* EBADMSEC */
72 "invalid section", /* EBADSEC */
73 "invalid font mode", /* EFONT */
74 "invalid date syntax", /* EBADDATE */
75 "invalid number format", /* ENUMFMT */
76 "superfluous width argument", /* ENOWIDTH */
77 "system: utsname error", /* EUTSNAME */
78 "obsolete macro", /* EOBS */
79 "end-of-line scope violation", /* EIMPBRK */
80 "empty macro ignored", /* EIGNE */
81 "unclosed explicit scope", /* EOPEN */
82 "unterminated quoted phrase", /* EQUOTPHR */
83 "closure macro without prior context", /* ENOCTX */
84 "no description found for library", /* ELIB */
85 "bad child for parent context", /* EBADCHILD */
86 "list arguments preceding type", /* ENOTYPE */
87 "deprecated comment style", /* EBADCOMMENT */
90 const char *const __mdoc_macronames
[MDOC_MAX
] = {
91 "Ap", "Dd", "Dt", "Os",
92 "Sh", "Ss", "Pp", "D1",
93 "Dl", "Bd", "Ed", "Bl",
94 "El", "It", "Ad", "An",
95 "Ar", "Cd", "Cm", "Dv",
96 "Er", "Ev", "Ex", "Fa",
97 "Fd", "Fl", "Fn", "Ft",
98 "Ic", "In", "Li", "Nd",
99 "Nm", "Op", "Ot", "Pa",
100 "Rv", "St", "Va", "Vt",
102 "Xr", "%A", "%B", "%D",
104 "%I", "%J", "%N", "%O",
106 "%P", "%R", "%T", "%V",
107 "Ac", "Ao", "Aq", "At",
108 "Bc", "Bf", "Bo", "Bq",
109 "Bsx", "Bx", "Db", "Dc",
110 "Do", "Dq", "Ec", "Ef",
111 "Em", "Eo", "Fx", "Ms",
112 "No", "Ns", "Nx", "Ox",
113 "Pc", "Pf", "Po", "Pq",
114 "Qc", "Ql", "Qo", "Qq",
115 "Re", "Rs", "Sc", "So",
116 "Sq", "Sm", "Sx", "Sy",
117 "Tn", "Ux", "Xc", "Xo",
118 "Fo", "Fc", "Oo", "Oc",
119 "Bk", "Ek", "Bt", "Hf",
120 "Fr", "Ud", "Lb", "Lp",
121 "Lk", "Mt", "Brq", "Bro",
123 "Brc", "%C", "Es", "En",
125 "Dx", "%Q", "br", "sp",
130 const char *const __mdoc_argnames
[MDOC_ARG_MAX
] = {
131 "split", "nosplit", "ragged",
132 "unfilled", "literal", "file",
133 "offset", "bullet", "dash",
134 "hyphen", "item", "enum",
135 "tag", "diag", "hang",
136 "ohang", "inset", "column",
137 "width", "compact", "std",
138 "filled", "words", "emphasis",
139 "symbolic", "nested", "centered"
142 const char * const *mdoc_macronames
= __mdoc_macronames
;
143 const char * const *mdoc_argnames
= __mdoc_argnames
;
145 static void mdoc_node_free(struct mdoc_node
*);
146 static void mdoc_node_unlink(struct mdoc
*,
148 static void mdoc_free1(struct mdoc
*);
149 static void mdoc_alloc1(struct mdoc
*);
150 static struct mdoc_node
*node_alloc(struct mdoc
*, int, int,
151 enum mdoct
, enum mdoc_type
);
152 static int node_append(struct mdoc
*,
154 static int mdoc_ptext(struct mdoc
*, int, char *);
155 static int mdoc_pmacro(struct mdoc
*, int, char *);
156 static int macrowarn(struct mdoc
*, int, const char *);
159 const struct mdoc_node
*
160 mdoc_node(const struct mdoc
*m
)
163 return(MDOC_HALT
& m
->flags
? NULL
: m
->first
);
167 const struct mdoc_meta
*
168 mdoc_meta(const struct mdoc
*m
)
171 return(MDOC_HALT
& m
->flags
? NULL
: &m
->meta
);
176 * Frees volatile resources (parse tree, meta-data, fields).
179 mdoc_free1(struct mdoc
*mdoc
)
183 mdoc_node_delete(mdoc
, mdoc
->first
);
184 if (mdoc
->meta
.title
)
185 free(mdoc
->meta
.title
);
189 free(mdoc
->meta
.name
);
191 free(mdoc
->meta
.arch
);
193 free(mdoc
->meta
.vol
);
198 * Allocate all volatile resources (parse tree, meta-data, fields).
201 mdoc_alloc1(struct mdoc
*mdoc
)
204 memset(&mdoc
->meta
, 0, sizeof(struct mdoc_meta
));
206 mdoc
->lastnamed
= mdoc
->lastsec
= SEC_NONE
;
207 mdoc
->last
= mandoc_calloc(1, sizeof(struct mdoc_node
));
208 mdoc
->first
= mdoc
->last
;
209 mdoc
->last
->type
= MDOC_ROOT
;
210 mdoc
->next
= MDOC_NEXT_CHILD
;
215 * Free up volatile resources (see mdoc_free1()) then re-initialises the
216 * data with mdoc_alloc1(). After invocation, parse data has been reset
217 * and the parser is ready for re-invocation on a new tree; however,
218 * cross-parse non-volatile data is kept intact.
221 mdoc_reset(struct mdoc
*mdoc
)
230 * Completely free up all volatile and non-volatile parse resources.
231 * After invocation, the pointer is no longer usable.
234 mdoc_free(struct mdoc
*mdoc
)
243 * Allocate volatile and non-volatile parse resources.
246 mdoc_alloc(void *data
, int pflags
, const struct mdoc_cb
*cb
)
250 p
= mandoc_calloc(1, sizeof(struct mdoc
));
253 memcpy(&p
->cb
, cb
, sizeof(struct mdoc_cb
));
265 * Climb back up the parse tree, validating open scopes. Mostly calls
266 * through to macro_end() in macro.c.
269 mdoc_endparse(struct mdoc
*m
)
272 if (MDOC_HALT
& m
->flags
)
274 else if (mdoc_macroend(m
))
276 m
->flags
|= MDOC_HALT
;
282 * Main parse routine. Parses a single line -- really just hands off to
283 * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()).
286 mdoc_parseln(struct mdoc
*m
, int ln
, char *buf
)
289 if (MDOC_HALT
& m
->flags
)
292 m
->flags
|= MDOC_NEWLINE
;
294 mdoc_pmacro(m
, ln
, buf
) :
295 mdoc_ptext(m
, ln
, buf
));
300 mdoc_verr(struct mdoc
*mdoc
, int ln
, int pos
,
301 const char *fmt
, ...)
306 if (NULL
== mdoc
->cb
.mdoc_err
)
310 (void)vsnprintf(buf
, sizeof(buf
) - 1, fmt
, ap
);
313 return((*mdoc
->cb
.mdoc_err
)(mdoc
->data
, ln
, pos
, buf
));
318 mdoc_vwarn(struct mdoc
*mdoc
, int ln
, int pos
, const char *fmt
, ...)
323 if (NULL
== mdoc
->cb
.mdoc_warn
)
327 (void)vsnprintf(buf
, sizeof(buf
) - 1, fmt
, ap
);
330 return((*mdoc
->cb
.mdoc_warn
)(mdoc
->data
, ln
, pos
, buf
));
335 mdoc_err(struct mdoc
*m
, int line
, int pos
, int iserr
, enum merr type
)
339 p
= __mdoc_merrnames
[(int)type
];
343 return(mdoc_verr(m
, line
, pos
, p
));
345 return(mdoc_vwarn(m
, line
, pos
, p
));
350 mdoc_macro(struct mdoc
*m
, enum mdoct tok
,
351 int ln
, int pp
, int *pos
, char *buf
)
353 assert(tok
< MDOC_MAX
);
355 /* If we're in the body, deny prologue calls. */
357 if (MDOC_PROLOGUE
& mdoc_macros
[tok
].flags
&&
358 MDOC_PBODY
& m
->flags
)
359 return(mdoc_perr(m
, ln
, pp
, EPROLBODY
));
361 /* If we're in the prologue, deny "body" macros. */
363 if ( ! (MDOC_PROLOGUE
& mdoc_macros
[tok
].flags
) &&
364 ! (MDOC_PBODY
& m
->flags
)) {
365 if ( ! mdoc_pwarn(m
, ln
, pp
, EBODYPROL
))
367 if (NULL
== m
->meta
.title
)
368 m
->meta
.title
= mandoc_strdup("unknown");
369 if (NULL
== m
->meta
.vol
)
370 m
->meta
.vol
= mandoc_strdup("local");
371 if (NULL
== m
->meta
.os
)
372 m
->meta
.os
= mandoc_strdup("local");
373 if (0 == m
->meta
.date
)
374 m
->meta
.date
= time(NULL
);
375 m
->flags
|= MDOC_PBODY
;
378 return((*mdoc_macros
[tok
].fp
)(m
, tok
, ln
, pp
, pos
, buf
));
383 node_append(struct mdoc
*mdoc
, struct mdoc_node
*p
)
388 assert(MDOC_ROOT
!= p
->type
);
390 switch (mdoc
->next
) {
391 case (MDOC_NEXT_SIBLING
):
392 mdoc
->last
->next
= p
;
393 p
->prev
= mdoc
->last
;
394 p
->parent
= mdoc
->last
->parent
;
396 case (MDOC_NEXT_CHILD
):
397 mdoc
->last
->child
= p
;
398 p
->parent
= mdoc
->last
;
407 if ( ! mdoc_valid_pre(mdoc
, p
))
409 if ( ! mdoc_action_pre(mdoc
, p
))
414 assert(MDOC_BLOCK
== p
->parent
->type
);
418 assert(MDOC_BLOCK
== p
->parent
->type
);
422 assert(MDOC_BLOCK
== p
->parent
->type
);
433 if ( ! mdoc_valid_post(mdoc
))
435 if ( ! mdoc_action_post(mdoc
))
446 static struct mdoc_node
*
447 node_alloc(struct mdoc
*m
, int line
, int pos
,
448 enum mdoct tok
, enum mdoc_type type
)
452 p
= mandoc_calloc(1, sizeof(struct mdoc_node
));
458 if (MDOC_NEWLINE
& m
->flags
)
459 p
->flags
|= MDOC_LINE
;
460 m
->flags
&= ~MDOC_NEWLINE
;
466 mdoc_tail_alloc(struct mdoc
*m
, int line
, int pos
, enum mdoct tok
)
470 p
= node_alloc(m
, line
, pos
, tok
, MDOC_TAIL
);
471 if ( ! node_append(m
, p
))
473 m
->next
= MDOC_NEXT_CHILD
;
479 mdoc_head_alloc(struct mdoc
*m
, int line
, int pos
, enum mdoct tok
)
486 p
= node_alloc(m
, line
, pos
, tok
, MDOC_HEAD
);
487 if ( ! node_append(m
, p
))
489 m
->next
= MDOC_NEXT_CHILD
;
495 mdoc_body_alloc(struct mdoc
*m
, int line
, int pos
, enum mdoct tok
)
499 p
= node_alloc(m
, line
, pos
, tok
, MDOC_BODY
);
500 if ( ! node_append(m
, p
))
502 m
->next
= MDOC_NEXT_CHILD
;
508 mdoc_block_alloc(struct mdoc
*m
, int line
, int pos
,
509 enum mdoct tok
, struct mdoc_arg
*args
)
513 p
= node_alloc(m
, line
, pos
, tok
, MDOC_BLOCK
);
517 if ( ! node_append(m
, p
))
519 m
->next
= MDOC_NEXT_CHILD
;
525 mdoc_elem_alloc(struct mdoc
*m
, int line
, int pos
,
526 enum mdoct tok
, struct mdoc_arg
*args
)
530 p
= node_alloc(m
, line
, pos
, tok
, MDOC_ELEM
);
534 if ( ! node_append(m
, p
))
536 m
->next
= MDOC_NEXT_CHILD
;
542 mdoc_word_alloc(struct mdoc
*m
, int line
, int pos
, const char *p
)
549 n
= node_alloc(m
, line
, pos
, MDOC_MAX
, MDOC_TEXT
);
550 n
->string
= mandoc_malloc(len
+ 1);
551 sv
= strlcpy(n
->string
, p
, len
+ 1);
553 /* Prohibit truncation. */
554 assert(sv
< len
+ 1);
556 if ( ! node_append(m
, n
))
559 m
->next
= MDOC_NEXT_SIBLING
;
565 mdoc_node_free(struct mdoc_node
*p
)
571 mdoc_argv_free(p
->args
);
577 mdoc_node_unlink(struct mdoc
*m
, struct mdoc_node
*n
)
580 /* Adjust siblings. */
583 n
->prev
->next
= n
->next
;
585 n
->next
->prev
= n
->prev
;
591 if (n
->parent
->child
== n
)
592 n
->parent
->child
= n
->prev
? n
->prev
: n
->next
;
595 /* Adjust parse point, if applicable. */
597 if (m
&& m
->last
== n
) {
600 m
->next
= MDOC_NEXT_SIBLING
;
603 m
->next
= MDOC_NEXT_CHILD
;
607 if (m
&& m
->first
== n
)
613 mdoc_node_delete(struct mdoc
*m
, struct mdoc_node
*p
)
618 mdoc_node_delete(m
, p
->child
);
620 assert(0 == p
->nchild
);
622 mdoc_node_unlink(m
, p
);
628 * Parse free-form text, that is, a line that does not begin with the
632 mdoc_ptext(struct mdoc
*m
, int line
, char *buf
)
636 /* Ignore bogus comments. */
638 if ('\\' == buf
[0] && '.' == buf
[1] && '\"' == buf
[2])
639 return(mdoc_pwarn(m
, line
, 0, EBADCOMMENT
));
641 /* No text before an initial macro. */
643 if (SEC_NONE
== m
->lastnamed
)
644 return(mdoc_perr(m
, line
, 0, ETEXTPROL
));
646 /* Literal just gets pulled in as-is. */
648 if (MDOC_LITERAL
& m
->flags
)
649 return(mdoc_word_alloc(m
, line
, 0, buf
));
651 /* Check for a blank line, which may also consist of spaces. */
653 for (i
= 0; ' ' == buf
[i
]; i
++)
654 /* Skip to first non-space. */ ;
656 if ('\0' == buf
[i
]) {
657 if ( ! mdoc_pwarn(m
, line
, 0, ENOBLANK
))
661 * Insert a `Pp' in the case of a blank line. Technically,
662 * blank lines aren't allowed, but enough manuals assume this
663 * behaviour that we want to work around it.
665 if ( ! mdoc_elem_alloc(m
, line
, 0, MDOC_Pp
, NULL
))
668 m
->next
= MDOC_NEXT_SIBLING
;
673 * Warn if the last un-escaped character is whitespace. Then
674 * strip away the remaining spaces (tabs stay!).
677 i
= (int)strlen(buf
);
680 if (' ' == buf
[i
- 1] || '\t' == buf
[i
- 1]) {
681 if (i
> 1 && '\\' != buf
[i
- 2])
682 if ( ! mdoc_pwarn(m
, line
, i
- 1, ETAILWS
))
685 for (--i
; i
&& ' ' == buf
[i
]; i
--)
686 /* Spin back to non-space. */ ;
688 /* Jump ahead of escaped whitespace. */
689 i
+= '\\' == buf
[i
] ? 2 : 1;
694 /* Allocate the whole word. */
696 if ( ! mdoc_word_alloc(m
, line
, 0, buf
))
700 * End-of-sentence check. If the last character is an unescaped
701 * EOS character, then flag the node as being the end of a
702 * sentence. The front-end will know how to interpret this.
707 if (mandoc_eos(buf
, (size_t)i
))
708 m
->last
->flags
|= MDOC_EOS
;
715 macrowarn(struct mdoc
*m
, int ln
, const char *buf
)
717 if ( ! (MDOC_IGN_MACRO
& m
->pflags
))
718 return(mdoc_verr(m
, ln
, 0, "unknown macro: %s%s",
719 buf
, strlen(buf
) > 3 ? "..." : ""));
720 return(mdoc_vwarn(m
, ln
, 0, "unknown macro: %s%s",
721 buf
, strlen(buf
) > 3 ? "..." : ""));
726 * Parse a macro line, that is, a line beginning with the control
730 mdoc_pmacro(struct mdoc
*m
, int ln
, char *buf
)
736 /* Empty lines are ignored. */
743 /* Accept whitespace after the initial control char. */
747 while (buf
[i
] && ' ' == buf
[i
])
755 /* Copy the first word into a nil-terminated buffer. */
757 for (j
= 0; j
< 4; j
++, i
++) {
758 if ('\0' == (mac
[j
] = buf
[i
]))
760 else if (' ' == buf
[i
])
763 /* Check for invalid characters. */
765 if (isgraph((u_char
)buf
[i
]))
767 return(mdoc_perr(m
, ln
, i
, EPRINT
));
772 if (j
== 4 || j
< 2) {
773 if ( ! macrowarn(m
, ln
, mac
))
778 if (MDOC_MAX
== (tok
= mdoc_hash_find(mac
))) {
779 if ( ! macrowarn(m
, ln
, mac
))
784 /* The macro is sane. Jump to the next word. */
786 while (buf
[i
] && ' ' == buf
[i
])
790 * Trailing whitespace. Note that tabs are allowed to be passed
791 * into the parser as "text", so we only warn about spaces here.
794 if ('\0' == buf
[i
] && ' ' == buf
[i
- 1])
795 if ( ! mdoc_pwarn(m
, ln
, i
- 1, ETAILWS
))
799 * Begin recursive parse sequence. Since we're at the start of
800 * the line, we don't need to do callable/parseable checks.
802 if ( ! mdoc_macro(m
, tok
, ln
, sv
, &i
, buf
))
807 err
: /* Error out. */
809 m
->flags
|= MDOC_HALT
;