]>
git.cameronkatri.com Git - mandoc.git/blob - mdoc.c
1 /* $Id: mdoc.c,v 1.134 2010/05/16 00:04:46 kristaps Exp $ */
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21 #include <sys/types.h>
32 #include "libmandoc.h"
34 const char *const __mdoc_merrnames
[MERRMAX
] = {
35 "trailing whitespace", /* ETAILWS */
36 "unexpected quoted parameter", /* EQUOTPARM */
37 "unterminated quoted parameter", /* EQUOTTERM */
38 "argument parameter suggested", /* EARGVAL */
39 "macro disallowed in prologue", /* EBODYPROL */
40 "macro disallowed in body", /* EPROLBODY */
41 "text disallowed in prologue", /* ETEXTPROL */
42 "blank line disallowed", /* ENOBLANK */
43 "text parameter too long", /* ETOOLONG */
44 "invalid escape sequence", /* EESCAPE */
45 "invalid character", /* EPRINT */
46 "document has no body", /* ENODAT */
47 "document has no prologue", /* ENOPROLOGUE */
48 "expected line arguments", /* ELINE */
49 "invalid AT&T argument", /* EATT */
50 "default name not yet set", /* ENAME */
51 "missing list type", /* ELISTTYPE */
52 "missing display type", /* EDISPTYPE */
53 "too many display types", /* EMULTIDISP */
54 "too many list types", /* EMULTILIST */
55 "NAME section must be first", /* ESECNAME */
56 "badly-formed NAME section", /* ENAMESECINC */
57 "argument repeated", /* EARGREP */
58 "expected boolean parameter", /* EBOOL */
59 "inconsistent column syntax", /* ECOLMIS */
60 "nested display invalid", /* ENESTDISP */
61 "width argument missing", /* EMISSWIDTH */
62 "invalid section for this manual section", /* EWRONGMSEC */
63 "section out of conventional order", /* ESECOOO */
64 "section repeated", /* ESECREP */
65 "invalid standard argument", /* EBADSTAND */
66 "multi-line arguments discouraged", /* ENOMULTILINE */
67 "multi-line arguments suggested", /* EMULTILINE */
68 "line arguments discouraged", /* ENOLINE */
69 "prologue macro out of conventional order", /* EPROLOOO */
70 "prologue macro repeated", /* EPROLREP */
71 "invalid manual section", /* EBADMSEC */
72 "invalid font mode", /* EFONT */
73 "invalid date syntax", /* EBADDATE */
74 "invalid number format", /* ENUMFMT */
75 "superfluous width argument", /* ENOWIDTH */
76 "system: utsname error", /* EUTSNAME */
77 "obsolete macro", /* EOBS */
78 "end-of-line scope violation", /* EIMPBRK */
79 "empty macro ignored", /* EIGNE */
80 "unclosed explicit scope", /* EOPEN */
81 "unterminated quoted phrase", /* EQUOTPHR */
82 "closure macro without prior context", /* ENOCTX */
83 "no description found for library", /* ELIB */
84 "bad child for parent context", /* EBADCHILD */
85 "list arguments preceding type", /* ENOTYPE */
86 "deprecated comment style", /* EBADCOMMENT */
89 const char *const __mdoc_macronames
[MDOC_MAX
] = {
90 "Ap", "Dd", "Dt", "Os",
91 "Sh", "Ss", "Pp", "D1",
92 "Dl", "Bd", "Ed", "Bl",
93 "El", "It", "Ad", "An",
94 "Ar", "Cd", "Cm", "Dv",
95 "Er", "Ev", "Ex", "Fa",
96 "Fd", "Fl", "Fn", "Ft",
97 "Ic", "In", "Li", "Nd",
98 "Nm", "Op", "Ot", "Pa",
99 "Rv", "St", "Va", "Vt",
101 "Xr", "%A", "%B", "%D",
103 "%I", "%J", "%N", "%O",
105 "%P", "%R", "%T", "%V",
106 "Ac", "Ao", "Aq", "At",
107 "Bc", "Bf", "Bo", "Bq",
108 "Bsx", "Bx", "Db", "Dc",
109 "Do", "Dq", "Ec", "Ef",
110 "Em", "Eo", "Fx", "Ms",
111 "No", "Ns", "Nx", "Ox",
112 "Pc", "Pf", "Po", "Pq",
113 "Qc", "Ql", "Qo", "Qq",
114 "Re", "Rs", "Sc", "So",
115 "Sq", "Sm", "Sx", "Sy",
116 "Tn", "Ux", "Xc", "Xo",
117 "Fo", "Fc", "Oo", "Oc",
118 "Bk", "Ek", "Bt", "Hf",
119 "Fr", "Ud", "Lb", "Lp",
120 "Lk", "Mt", "Brq", "Bro",
122 "Brc", "%C", "Es", "En",
124 "Dx", "%Q", "br", "sp",
129 const char *const __mdoc_argnames
[MDOC_ARG_MAX
] = {
130 "split", "nosplit", "ragged",
131 "unfilled", "literal", "file",
132 "offset", "bullet", "dash",
133 "hyphen", "item", "enum",
134 "tag", "diag", "hang",
135 "ohang", "inset", "column",
136 "width", "compact", "std",
137 "filled", "words", "emphasis",
138 "symbolic", "nested", "centered"
141 const char * const *mdoc_macronames
= __mdoc_macronames
;
142 const char * const *mdoc_argnames
= __mdoc_argnames
;
144 static void mdoc_node_free(struct mdoc_node
*);
145 static void mdoc_node_unlink(struct mdoc
*,
147 static void mdoc_free1(struct mdoc
*);
148 static void mdoc_alloc1(struct mdoc
*);
149 static struct mdoc_node
*node_alloc(struct mdoc
*, int, int,
150 enum mdoct
, enum mdoc_type
);
151 static int node_append(struct mdoc
*,
153 static int mdoc_ptext(struct mdoc
*, int, char *);
154 static int mdoc_pmacro(struct mdoc
*, int, char *);
155 static int macrowarn(struct mdoc
*, int, const char *);
158 const struct mdoc_node
*
159 mdoc_node(const struct mdoc
*m
)
162 return(MDOC_HALT
& m
->flags
? NULL
: m
->first
);
166 const struct mdoc_meta
*
167 mdoc_meta(const struct mdoc
*m
)
170 return(MDOC_HALT
& m
->flags
? NULL
: &m
->meta
);
175 * Frees volatile resources (parse tree, meta-data, fields).
178 mdoc_free1(struct mdoc
*mdoc
)
182 mdoc_node_delete(mdoc
, mdoc
->first
);
183 if (mdoc
->meta
.title
)
184 free(mdoc
->meta
.title
);
188 free(mdoc
->meta
.name
);
190 free(mdoc
->meta
.arch
);
192 free(mdoc
->meta
.vol
);
194 free(mdoc
->meta
.msec
);
199 * Allocate all volatile resources (parse tree, meta-data, fields).
202 mdoc_alloc1(struct mdoc
*mdoc
)
205 memset(&mdoc
->meta
, 0, sizeof(struct mdoc_meta
));
207 mdoc
->lastnamed
= mdoc
->lastsec
= SEC_NONE
;
208 mdoc
->last
= mandoc_calloc(1, sizeof(struct mdoc_node
));
209 mdoc
->first
= mdoc
->last
;
210 mdoc
->last
->type
= MDOC_ROOT
;
211 mdoc
->next
= MDOC_NEXT_CHILD
;
216 * Free up volatile resources (see mdoc_free1()) then re-initialises the
217 * data with mdoc_alloc1(). After invocation, parse data has been reset
218 * and the parser is ready for re-invocation on a new tree; however,
219 * cross-parse non-volatile data is kept intact.
222 mdoc_reset(struct mdoc
*mdoc
)
231 * Completely free up all volatile and non-volatile parse resources.
232 * After invocation, the pointer is no longer usable.
235 mdoc_free(struct mdoc
*mdoc
)
244 * Allocate volatile and non-volatile parse resources.
247 mdoc_alloc(void *data
, int pflags
, const struct mdoc_cb
*cb
)
251 p
= mandoc_calloc(1, sizeof(struct mdoc
));
254 memcpy(&p
->cb
, cb
, sizeof(struct mdoc_cb
));
266 * Climb back up the parse tree, validating open scopes. Mostly calls
267 * through to macro_end() in macro.c.
270 mdoc_endparse(struct mdoc
*m
)
273 if (MDOC_HALT
& m
->flags
)
275 else if (mdoc_macroend(m
))
277 m
->flags
|= MDOC_HALT
;
283 * Main parse routine. Parses a single line -- really just hands off to
284 * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()).
287 mdoc_parseln(struct mdoc
*m
, int ln
, char *buf
)
290 if (MDOC_HALT
& m
->flags
)
293 m
->flags
|= MDOC_NEWLINE
;
294 return(('.' == *buf
|| '\'' == *buf
) ?
295 mdoc_pmacro(m
, ln
, buf
) :
296 mdoc_ptext(m
, ln
, buf
));
301 mdoc_verr(struct mdoc
*mdoc
, int ln
, int pos
,
302 const char *fmt
, ...)
307 if (NULL
== mdoc
->cb
.mdoc_err
)
311 (void)vsnprintf(buf
, sizeof(buf
) - 1, fmt
, ap
);
314 return((*mdoc
->cb
.mdoc_err
)(mdoc
->data
, ln
, pos
, buf
));
319 mdoc_vwarn(struct mdoc
*mdoc
, int ln
, int pos
, const char *fmt
, ...)
324 if (NULL
== mdoc
->cb
.mdoc_warn
)
328 (void)vsnprintf(buf
, sizeof(buf
) - 1, fmt
, ap
);
331 return((*mdoc
->cb
.mdoc_warn
)(mdoc
->data
, ln
, pos
, buf
));
336 mdoc_err(struct mdoc
*m
, int line
, int pos
, int iserr
, enum merr type
)
340 p
= __mdoc_merrnames
[(int)type
];
344 return(mdoc_verr(m
, line
, pos
, p
));
346 return(mdoc_vwarn(m
, line
, pos
, p
));
351 mdoc_macro(struct mdoc
*m
, enum mdoct tok
,
352 int ln
, int pp
, int *pos
, char *buf
)
354 assert(tok
< MDOC_MAX
);
356 /* If we're in the body, deny prologue calls. */
358 if (MDOC_PROLOGUE
& mdoc_macros
[tok
].flags
&&
359 MDOC_PBODY
& m
->flags
)
360 return(mdoc_perr(m
, ln
, pp
, EPROLBODY
));
362 /* If we're in the prologue, deny "body" macros. */
364 if ( ! (MDOC_PROLOGUE
& mdoc_macros
[tok
].flags
) &&
365 ! (MDOC_PBODY
& m
->flags
)) {
366 if ( ! mdoc_pwarn(m
, ln
, pp
, EBODYPROL
))
368 if (NULL
== m
->meta
.title
)
369 m
->meta
.title
= mandoc_strdup("unknown");
370 if (NULL
== m
->meta
.vol
)
371 m
->meta
.vol
= mandoc_strdup("local");
372 if (NULL
== m
->meta
.os
)
373 m
->meta
.os
= mandoc_strdup("local");
374 if (0 == m
->meta
.date
)
375 m
->meta
.date
= time(NULL
);
376 m
->flags
|= MDOC_PBODY
;
379 return((*mdoc_macros
[tok
].fp
)(m
, tok
, ln
, pp
, pos
, buf
));
384 node_append(struct mdoc
*mdoc
, struct mdoc_node
*p
)
389 assert(MDOC_ROOT
!= p
->type
);
391 switch (mdoc
->next
) {
392 case (MDOC_NEXT_SIBLING
):
393 mdoc
->last
->next
= p
;
394 p
->prev
= mdoc
->last
;
395 p
->parent
= mdoc
->last
->parent
;
397 case (MDOC_NEXT_CHILD
):
398 mdoc
->last
->child
= p
;
399 p
->parent
= mdoc
->last
;
408 if ( ! mdoc_valid_pre(mdoc
, p
))
410 if ( ! mdoc_action_pre(mdoc
, p
))
415 assert(MDOC_BLOCK
== p
->parent
->type
);
419 assert(MDOC_BLOCK
== p
->parent
->type
);
423 assert(MDOC_BLOCK
== p
->parent
->type
);
434 if ( ! mdoc_valid_post(mdoc
))
436 if ( ! mdoc_action_post(mdoc
))
447 static struct mdoc_node
*
448 node_alloc(struct mdoc
*m
, int line
, int pos
,
449 enum mdoct tok
, enum mdoc_type type
)
453 p
= mandoc_calloc(1, sizeof(struct mdoc_node
));
459 if (MDOC_NEWLINE
& m
->flags
)
460 p
->flags
|= MDOC_LINE
;
461 m
->flags
&= ~MDOC_NEWLINE
;
467 mdoc_tail_alloc(struct mdoc
*m
, int line
, int pos
, enum mdoct tok
)
471 p
= node_alloc(m
, line
, pos
, tok
, MDOC_TAIL
);
472 if ( ! node_append(m
, p
))
474 m
->next
= MDOC_NEXT_CHILD
;
480 mdoc_head_alloc(struct mdoc
*m
, int line
, int pos
, enum mdoct tok
)
487 p
= node_alloc(m
, line
, pos
, tok
, MDOC_HEAD
);
488 if ( ! node_append(m
, p
))
490 m
->next
= MDOC_NEXT_CHILD
;
496 mdoc_body_alloc(struct mdoc
*m
, int line
, int pos
, enum mdoct tok
)
500 p
= node_alloc(m
, line
, pos
, tok
, MDOC_BODY
);
501 if ( ! node_append(m
, p
))
503 m
->next
= MDOC_NEXT_CHILD
;
509 mdoc_block_alloc(struct mdoc
*m
, int line
, int pos
,
510 enum mdoct tok
, struct mdoc_arg
*args
)
514 p
= node_alloc(m
, line
, pos
, tok
, MDOC_BLOCK
);
518 if ( ! node_append(m
, p
))
520 m
->next
= MDOC_NEXT_CHILD
;
526 mdoc_elem_alloc(struct mdoc
*m
, int line
, int pos
,
527 enum mdoct tok
, struct mdoc_arg
*args
)
531 p
= node_alloc(m
, line
, pos
, tok
, MDOC_ELEM
);
535 if ( ! node_append(m
, p
))
537 m
->next
= MDOC_NEXT_CHILD
;
543 mdoc_word_alloc(struct mdoc
*m
, int line
, int pos
, const char *p
)
550 n
= node_alloc(m
, line
, pos
, MDOC_MAX
, MDOC_TEXT
);
551 n
->string
= mandoc_malloc(len
+ 1);
552 sv
= strlcpy(n
->string
, p
, len
+ 1);
554 /* Prohibit truncation. */
555 assert(sv
< len
+ 1);
557 if ( ! node_append(m
, n
))
560 m
->next
= MDOC_NEXT_SIBLING
;
566 mdoc_node_free(struct mdoc_node
*p
)
572 mdoc_argv_free(p
->args
);
578 mdoc_node_unlink(struct mdoc
*m
, struct mdoc_node
*n
)
581 /* Adjust siblings. */
584 n
->prev
->next
= n
->next
;
586 n
->next
->prev
= n
->prev
;
592 if (n
->parent
->child
== n
)
593 n
->parent
->child
= n
->prev
? n
->prev
: n
->next
;
596 /* Adjust parse point, if applicable. */
598 if (m
&& m
->last
== n
) {
601 m
->next
= MDOC_NEXT_SIBLING
;
604 m
->next
= MDOC_NEXT_CHILD
;
608 if (m
&& m
->first
== n
)
614 mdoc_node_delete(struct mdoc
*m
, struct mdoc_node
*p
)
619 mdoc_node_delete(m
, p
->child
);
621 assert(0 == p
->nchild
);
623 mdoc_node_unlink(m
, p
);
629 * Parse free-form text, that is, a line that does not begin with the
633 mdoc_ptext(struct mdoc
*m
, int line
, char *buf
)
637 /* Ignore bogus comments. */
639 if ('\\' == buf
[0] && '.' == buf
[1] && '\"' == buf
[2])
640 return(mdoc_pwarn(m
, line
, 0, EBADCOMMENT
));
642 /* No text before an initial macro. */
644 if (SEC_NONE
== m
->lastnamed
)
645 return(mdoc_perr(m
, line
, 0, ETEXTPROL
));
647 /* Literal just gets pulled in as-is. */
649 if (MDOC_LITERAL
& m
->flags
)
650 return(mdoc_word_alloc(m
, line
, 0, buf
));
652 /* Check for a blank line, which may also consist of spaces. */
654 for (i
= 0; ' ' == buf
[i
]; i
++)
655 /* Skip to first non-space. */ ;
657 if ('\0' == buf
[i
]) {
658 if ( ! mdoc_pwarn(m
, line
, 0, ENOBLANK
))
662 * Insert a `Pp' in the case of a blank line. Technically,
663 * blank lines aren't allowed, but enough manuals assume this
664 * behaviour that we want to work around it.
666 if ( ! mdoc_elem_alloc(m
, line
, 0, MDOC_Pp
, NULL
))
669 m
->next
= MDOC_NEXT_SIBLING
;
674 * Warn if the last un-escaped character is whitespace. Then
675 * strip away the remaining spaces (tabs stay!).
678 i
= (int)strlen(buf
);
681 if (' ' == buf
[i
- 1] || '\t' == buf
[i
- 1]) {
682 if (i
> 1 && '\\' != buf
[i
- 2])
683 if ( ! mdoc_pwarn(m
, line
, i
- 1, ETAILWS
))
686 for (--i
; i
&& ' ' == buf
[i
]; i
--)
687 /* Spin back to non-space. */ ;
689 /* Jump ahead of escaped whitespace. */
690 i
+= '\\' == buf
[i
] ? 2 : 1;
695 /* Allocate the whole word. */
697 if ( ! mdoc_word_alloc(m
, line
, 0, buf
))
701 * End-of-sentence check. If the last character is an unescaped
702 * EOS character, then flag the node as being the end of a
703 * sentence. The front-end will know how to interpret this.
706 /* FIXME: chain of close delims. */
710 if (mandoc_eos(buf
, (size_t)i
))
711 m
->last
->flags
|= MDOC_EOS
;
718 macrowarn(struct mdoc
*m
, int ln
, const char *buf
)
720 if ( ! (MDOC_IGN_MACRO
& m
->pflags
))
721 return(mdoc_verr(m
, ln
, 0, "unknown macro: %s%s",
722 buf
, strlen(buf
) > 3 ? "..." : ""));
723 return(mdoc_vwarn(m
, ln
, 0, "unknown macro: %s%s",
724 buf
, strlen(buf
) > 3 ? "..." : ""));
729 * Parse a macro line, that is, a line beginning with the control
733 mdoc_pmacro(struct mdoc
*m
, int ln
, char *buf
)
739 /* Empty lines are ignored. */
746 /* Accept whitespace after the initial control char. */
750 while (buf
[i
] && ' ' == buf
[i
])
758 /* Copy the first word into a nil-terminated buffer. */
760 for (j
= 0; j
< 4; j
++, i
++) {
761 if ('\0' == (mac
[j
] = buf
[i
]))
763 else if (' ' == buf
[i
])
766 /* Check for invalid characters. */
768 if (isgraph((u_char
)buf
[i
]))
770 return(mdoc_perr(m
, ln
, i
, EPRINT
));
775 if (j
== 4 || j
< 2) {
776 if ( ! macrowarn(m
, ln
, mac
))
781 if (MDOC_MAX
== (tok
= mdoc_hash_find(mac
))) {
782 if ( ! macrowarn(m
, ln
, mac
))
787 /* The macro is sane. Jump to the next word. */
789 while (buf
[i
] && ' ' == buf
[i
])
793 * Trailing whitespace. Note that tabs are allowed to be passed
794 * into the parser as "text", so we only warn about spaces here.
797 if ('\0' == buf
[i
] && ' ' == buf
[i
- 1])
798 if ( ! mdoc_pwarn(m
, ln
, i
- 1, ETAILWS
))
802 * Begin recursive parse sequence. Since we're at the start of
803 * the line, we don't need to do callable/parseable checks.
805 if ( ! mdoc_macro(m
, tok
, ln
, sv
, &i
, buf
))
810 err
: /* Error out. */
812 m
->flags
|= MDOC_HALT
;