]>
git.cameronkatri.com Git - mandoc.git/blob - mdoc.c
1 /* $Id: mdoc.c,v 1.135 2010/05/16 10:59:36 kristaps Exp $ */
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21 #include <sys/types.h>
32 #include "libmandoc.h"
34 const char *const __mdoc_merrnames
[MERRMAX
] = {
35 "trailing whitespace", /* ETAILWS */
36 "unexpected quoted parameter", /* EQUOTPARM */
37 "unterminated quoted parameter", /* EQUOTTERM */
38 "argument parameter suggested", /* EARGVAL */
39 "macro disallowed in prologue", /* EBODYPROL */
40 "macro disallowed in body", /* EPROLBODY */
41 "text disallowed in prologue", /* ETEXTPROL */
42 "blank line disallowed", /* ENOBLANK */
43 "text parameter too long", /* ETOOLONG */
44 "invalid escape sequence", /* EESCAPE */
45 "invalid character", /* EPRINT */
46 "document has no body", /* ENODAT */
47 "document has no prologue", /* ENOPROLOGUE */
48 "expected line arguments", /* ELINE */
49 "invalid AT&T argument", /* EATT */
50 "default name not yet set", /* ENAME */
51 "missing list type", /* ELISTTYPE */
52 "missing display type", /* EDISPTYPE */
53 "too many display types", /* EMULTIDISP */
54 "too many list types", /* EMULTILIST */
55 "NAME section must be first", /* ESECNAME */
56 "badly-formed NAME section", /* ENAMESECINC */
57 "argument repeated", /* EARGREP */
58 "expected boolean parameter", /* EBOOL */
59 "inconsistent column syntax", /* ECOLMIS */
60 "nested display invalid", /* ENESTDISP */
61 "width argument missing", /* EMISSWIDTH */
62 "invalid section for this manual section", /* EWRONGMSEC */
63 "section out of conventional order", /* ESECOOO */
64 "section repeated", /* ESECREP */
65 "invalid standard argument", /* EBADSTAND */
66 "multi-line arguments discouraged", /* ENOMULTILINE */
67 "multi-line arguments suggested", /* EMULTILINE */
68 "line arguments discouraged", /* ENOLINE */
69 "prologue macro out of conventional order", /* EPROLOOO */
70 "prologue macro repeated", /* EPROLREP */
71 "invalid manual section", /* EBADMSEC */
72 "invalid font mode", /* EFONT */
73 "invalid date syntax", /* EBADDATE */
74 "invalid number format", /* ENUMFMT */
75 "superfluous width argument", /* ENOWIDTH */
76 "system: utsname error", /* EUTSNAME */
77 "obsolete macro", /* EOBS */
78 "end-of-line scope violation", /* EIMPBRK */
79 "empty macro ignored", /* EIGNE */
80 "unclosed explicit scope", /* EOPEN */
81 "unterminated quoted phrase", /* EQUOTPHR */
82 "closure macro without prior context", /* ENOCTX */
83 "no description found for library", /* ELIB */
84 "bad child for parent context", /* EBADCHILD */
85 "list arguments preceding type", /* ENOTYPE */
86 "deprecated comment style", /* EBADCOMMENT */
89 const char *const __mdoc_macronames
[MDOC_MAX
] = {
90 "Ap", "Dd", "Dt", "Os",
91 "Sh", "Ss", "Pp", "D1",
92 "Dl", "Bd", "Ed", "Bl",
93 "El", "It", "Ad", "An",
94 "Ar", "Cd", "Cm", "Dv",
95 "Er", "Ev", "Ex", "Fa",
96 "Fd", "Fl", "Fn", "Ft",
97 "Ic", "In", "Li", "Nd",
98 "Nm", "Op", "Ot", "Pa",
99 "Rv", "St", "Va", "Vt",
101 "Xr", "%A", "%B", "%D",
103 "%I", "%J", "%N", "%O",
105 "%P", "%R", "%T", "%V",
106 "Ac", "Ao", "Aq", "At",
107 "Bc", "Bf", "Bo", "Bq",
108 "Bsx", "Bx", "Db", "Dc",
109 "Do", "Dq", "Ec", "Ef",
110 "Em", "Eo", "Fx", "Ms",
111 "No", "Ns", "Nx", "Ox",
112 "Pc", "Pf", "Po", "Pq",
113 "Qc", "Ql", "Qo", "Qq",
114 "Re", "Rs", "Sc", "So",
115 "Sq", "Sm", "Sx", "Sy",
116 "Tn", "Ux", "Xc", "Xo",
117 "Fo", "Fc", "Oo", "Oc",
118 "Bk", "Ek", "Bt", "Hf",
119 "Fr", "Ud", "Lb", "Lp",
120 "Lk", "Mt", "Brq", "Bro",
122 "Brc", "%C", "Es", "En",
124 "Dx", "%Q", "br", "sp",
129 const char *const __mdoc_argnames
[MDOC_ARG_MAX
] = {
130 "split", "nosplit", "ragged",
131 "unfilled", "literal", "file",
132 "offset", "bullet", "dash",
133 "hyphen", "item", "enum",
134 "tag", "diag", "hang",
135 "ohang", "inset", "column",
136 "width", "compact", "std",
137 "filled", "words", "emphasis",
138 "symbolic", "nested", "centered"
141 const char * const *mdoc_macronames
= __mdoc_macronames
;
142 const char * const *mdoc_argnames
= __mdoc_argnames
;
144 static void mdoc_node_free(struct mdoc_node
*);
145 static void mdoc_node_unlink(struct mdoc
*,
147 static void mdoc_free1(struct mdoc
*);
148 static void mdoc_alloc1(struct mdoc
*);
149 static struct mdoc_node
*node_alloc(struct mdoc
*, int, int,
150 enum mdoct
, enum mdoc_type
);
151 static int node_append(struct mdoc
*,
153 static int mdoc_ptext(struct mdoc
*, int, char *, int);
154 static int mdoc_pmacro(struct mdoc
*, int, char *, int);
155 static int macrowarn(struct mdoc
*, int,
159 const struct mdoc_node
*
160 mdoc_node(const struct mdoc
*m
)
163 return(MDOC_HALT
& m
->flags
? NULL
: m
->first
);
167 const struct mdoc_meta
*
168 mdoc_meta(const struct mdoc
*m
)
171 return(MDOC_HALT
& m
->flags
? NULL
: &m
->meta
);
176 * Frees volatile resources (parse tree, meta-data, fields).
179 mdoc_free1(struct mdoc
*mdoc
)
183 mdoc_node_delete(mdoc
, mdoc
->first
);
184 if (mdoc
->meta
.title
)
185 free(mdoc
->meta
.title
);
189 free(mdoc
->meta
.name
);
191 free(mdoc
->meta
.arch
);
193 free(mdoc
->meta
.vol
);
195 free(mdoc
->meta
.msec
);
200 * Allocate all volatile resources (parse tree, meta-data, fields).
203 mdoc_alloc1(struct mdoc
*mdoc
)
206 memset(&mdoc
->meta
, 0, sizeof(struct mdoc_meta
));
208 mdoc
->lastnamed
= mdoc
->lastsec
= SEC_NONE
;
209 mdoc
->last
= mandoc_calloc(1, sizeof(struct mdoc_node
));
210 mdoc
->first
= mdoc
->last
;
211 mdoc
->last
->type
= MDOC_ROOT
;
212 mdoc
->next
= MDOC_NEXT_CHILD
;
217 * Free up volatile resources (see mdoc_free1()) then re-initialises the
218 * data with mdoc_alloc1(). After invocation, parse data has been reset
219 * and the parser is ready for re-invocation on a new tree; however,
220 * cross-parse non-volatile data is kept intact.
223 mdoc_reset(struct mdoc
*mdoc
)
232 * Completely free up all volatile and non-volatile parse resources.
233 * After invocation, the pointer is no longer usable.
236 mdoc_free(struct mdoc
*mdoc
)
245 * Allocate volatile and non-volatile parse resources.
248 mdoc_alloc(void *data
, int pflags
, const struct mdoc_cb
*cb
)
252 p
= mandoc_calloc(1, sizeof(struct mdoc
));
255 memcpy(&p
->cb
, cb
, sizeof(struct mdoc_cb
));
267 * Climb back up the parse tree, validating open scopes. Mostly calls
268 * through to macro_end() in macro.c.
271 mdoc_endparse(struct mdoc
*m
)
274 if (MDOC_HALT
& m
->flags
)
276 else if (mdoc_macroend(m
))
278 m
->flags
|= MDOC_HALT
;
284 * Main parse routine. Parses a single line -- really just hands off to
285 * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()).
288 mdoc_parseln(struct mdoc
*m
, int ln
, char *buf
, int offs
)
291 if (MDOC_HALT
& m
->flags
)
294 m
->flags
|= MDOC_NEWLINE
;
295 return(('.' == buf
[offs
] || '\'' == buf
[offs
]) ?
296 mdoc_pmacro(m
, ln
, buf
, offs
) :
297 mdoc_ptext(m
, ln
, buf
, offs
));
302 mdoc_verr(struct mdoc
*mdoc
, int ln
, int pos
,
303 const char *fmt
, ...)
308 if (NULL
== mdoc
->cb
.mdoc_err
)
312 (void)vsnprintf(buf
, sizeof(buf
) - 1, fmt
, ap
);
315 return((*mdoc
->cb
.mdoc_err
)(mdoc
->data
, ln
, pos
, buf
));
320 mdoc_vwarn(struct mdoc
*mdoc
, int ln
, int pos
, const char *fmt
, ...)
325 if (NULL
== mdoc
->cb
.mdoc_warn
)
329 (void)vsnprintf(buf
, sizeof(buf
) - 1, fmt
, ap
);
332 return((*mdoc
->cb
.mdoc_warn
)(mdoc
->data
, ln
, pos
, buf
));
337 mdoc_err(struct mdoc
*m
, int line
, int pos
, int iserr
, enum merr type
)
341 p
= __mdoc_merrnames
[(int)type
];
345 return(mdoc_verr(m
, line
, pos
, p
));
347 return(mdoc_vwarn(m
, line
, pos
, p
));
352 mdoc_macro(struct mdoc
*m
, enum mdoct tok
,
353 int ln
, int pp
, int *pos
, char *buf
)
355 assert(tok
< MDOC_MAX
);
357 /* If we're in the body, deny prologue calls. */
359 if (MDOC_PROLOGUE
& mdoc_macros
[tok
].flags
&&
360 MDOC_PBODY
& m
->flags
)
361 return(mdoc_perr(m
, ln
, pp
, EPROLBODY
));
363 /* If we're in the prologue, deny "body" macros. */
365 if ( ! (MDOC_PROLOGUE
& mdoc_macros
[tok
].flags
) &&
366 ! (MDOC_PBODY
& m
->flags
)) {
367 if ( ! mdoc_pwarn(m
, ln
, pp
, EBODYPROL
))
369 if (NULL
== m
->meta
.title
)
370 m
->meta
.title
= mandoc_strdup("unknown");
371 if (NULL
== m
->meta
.vol
)
372 m
->meta
.vol
= mandoc_strdup("local");
373 if (NULL
== m
->meta
.os
)
374 m
->meta
.os
= mandoc_strdup("local");
375 if (0 == m
->meta
.date
)
376 m
->meta
.date
= time(NULL
);
377 m
->flags
|= MDOC_PBODY
;
380 return((*mdoc_macros
[tok
].fp
)(m
, tok
, ln
, pp
, pos
, buf
));
385 node_append(struct mdoc
*mdoc
, struct mdoc_node
*p
)
390 assert(MDOC_ROOT
!= p
->type
);
392 switch (mdoc
->next
) {
393 case (MDOC_NEXT_SIBLING
):
394 mdoc
->last
->next
= p
;
395 p
->prev
= mdoc
->last
;
396 p
->parent
= mdoc
->last
->parent
;
398 case (MDOC_NEXT_CHILD
):
399 mdoc
->last
->child
= p
;
400 p
->parent
= mdoc
->last
;
409 if ( ! mdoc_valid_pre(mdoc
, p
))
411 if ( ! mdoc_action_pre(mdoc
, p
))
416 assert(MDOC_BLOCK
== p
->parent
->type
);
420 assert(MDOC_BLOCK
== p
->parent
->type
);
424 assert(MDOC_BLOCK
== p
->parent
->type
);
435 if ( ! mdoc_valid_post(mdoc
))
437 if ( ! mdoc_action_post(mdoc
))
448 static struct mdoc_node
*
449 node_alloc(struct mdoc
*m
, int line
, int pos
,
450 enum mdoct tok
, enum mdoc_type type
)
454 p
= mandoc_calloc(1, sizeof(struct mdoc_node
));
460 if (MDOC_NEWLINE
& m
->flags
)
461 p
->flags
|= MDOC_LINE
;
462 m
->flags
&= ~MDOC_NEWLINE
;
468 mdoc_tail_alloc(struct mdoc
*m
, int line
, int pos
, enum mdoct tok
)
472 p
= node_alloc(m
, line
, pos
, tok
, MDOC_TAIL
);
473 if ( ! node_append(m
, p
))
475 m
->next
= MDOC_NEXT_CHILD
;
481 mdoc_head_alloc(struct mdoc
*m
, int line
, int pos
, enum mdoct tok
)
488 p
= node_alloc(m
, line
, pos
, tok
, MDOC_HEAD
);
489 if ( ! node_append(m
, p
))
491 m
->next
= MDOC_NEXT_CHILD
;
497 mdoc_body_alloc(struct mdoc
*m
, int line
, int pos
, enum mdoct tok
)
501 p
= node_alloc(m
, line
, pos
, tok
, MDOC_BODY
);
502 if ( ! node_append(m
, p
))
504 m
->next
= MDOC_NEXT_CHILD
;
510 mdoc_block_alloc(struct mdoc
*m
, int line
, int pos
,
511 enum mdoct tok
, struct mdoc_arg
*args
)
515 p
= node_alloc(m
, line
, pos
, tok
, MDOC_BLOCK
);
519 if ( ! node_append(m
, p
))
521 m
->next
= MDOC_NEXT_CHILD
;
527 mdoc_elem_alloc(struct mdoc
*m
, int line
, int pos
,
528 enum mdoct tok
, struct mdoc_arg
*args
)
532 p
= node_alloc(m
, line
, pos
, tok
, MDOC_ELEM
);
536 if ( ! node_append(m
, p
))
538 m
->next
= MDOC_NEXT_CHILD
;
544 mdoc_word_alloc(struct mdoc
*m
, int line
, int pos
, const char *p
)
551 n
= node_alloc(m
, line
, pos
, MDOC_MAX
, MDOC_TEXT
);
552 n
->string
= mandoc_malloc(len
+ 1);
553 sv
= strlcpy(n
->string
, p
, len
+ 1);
555 /* Prohibit truncation. */
556 assert(sv
< len
+ 1);
558 if ( ! node_append(m
, n
))
561 m
->next
= MDOC_NEXT_SIBLING
;
567 mdoc_node_free(struct mdoc_node
*p
)
573 mdoc_argv_free(p
->args
);
579 mdoc_node_unlink(struct mdoc
*m
, struct mdoc_node
*n
)
582 /* Adjust siblings. */
585 n
->prev
->next
= n
->next
;
587 n
->next
->prev
= n
->prev
;
593 if (n
->parent
->child
== n
)
594 n
->parent
->child
= n
->prev
? n
->prev
: n
->next
;
597 /* Adjust parse point, if applicable. */
599 if (m
&& m
->last
== n
) {
602 m
->next
= MDOC_NEXT_SIBLING
;
605 m
->next
= MDOC_NEXT_CHILD
;
609 if (m
&& m
->first
== n
)
615 mdoc_node_delete(struct mdoc
*m
, struct mdoc_node
*p
)
620 mdoc_node_delete(m
, p
->child
);
622 assert(0 == p
->nchild
);
624 mdoc_node_unlink(m
, p
);
630 * Parse free-form text, that is, a line that does not begin with the
634 mdoc_ptext(struct mdoc
*m
, int line
, char *buf
, int offs
)
638 /* Ignore bogus comments. */
640 if ('\\' == buf
[offs
] &&
641 '.' == buf
[offs
+ 1] &&
642 '"' == buf
[offs
+ 2])
643 return(mdoc_pwarn(m
, line
, offs
, EBADCOMMENT
));
645 /* No text before an initial macro. */
647 if (SEC_NONE
== m
->lastnamed
)
648 return(mdoc_perr(m
, line
, offs
, ETEXTPROL
));
650 /* Literal just gets pulled in as-is. */
652 if (MDOC_LITERAL
& m
->flags
)
653 return(mdoc_word_alloc(m
, line
, offs
, buf
+ offs
));
655 /* Check for a blank line, which may also consist of spaces. */
657 for (i
= offs
; ' ' == buf
[i
]; i
++)
658 /* Skip to first non-space. */ ;
660 if ('\0' == buf
[i
]) {
661 if ( ! mdoc_pwarn(m
, line
, offs
, ENOBLANK
))
665 * Insert a `Pp' in the case of a blank line. Technically,
666 * blank lines aren't allowed, but enough manuals assume this
667 * behaviour that we want to work around it.
669 if ( ! mdoc_elem_alloc(m
, line
, offs
, MDOC_Pp
, NULL
))
672 m
->next
= MDOC_NEXT_SIBLING
;
677 * Warn if the last un-escaped character is whitespace. Then
678 * strip away the remaining spaces (tabs stay!).
681 i
= (int)strlen(buf
);
684 if (' ' == buf
[i
- 1] || '\t' == buf
[i
- 1]) {
685 if (i
> 1 && '\\' != buf
[i
- 2])
686 if ( ! mdoc_pwarn(m
, line
, i
- 1, ETAILWS
))
689 for (--i
; i
&& ' ' == buf
[i
]; i
--)
690 /* Spin back to non-space. */ ;
692 /* Jump ahead of escaped whitespace. */
693 i
+= '\\' == buf
[i
] ? 2 : 1;
698 /* Allocate the whole word. */
700 if ( ! mdoc_word_alloc(m
, line
, offs
, buf
+ offs
))
704 * End-of-sentence check. If the last character is an unescaped
705 * EOS character, then flag the node as being the end of a
706 * sentence. The front-end will know how to interpret this.
710 if (mandoc_eos(buf
, (size_t)i
))
711 m
->last
->flags
|= MDOC_EOS
;
718 macrowarn(struct mdoc
*m
, int ln
, const char *buf
, int offs
)
720 if ( ! (MDOC_IGN_MACRO
& m
->pflags
))
721 return(mdoc_verr(m
, ln
, offs
, "unknown macro: %s%s",
722 buf
, strlen(buf
) > 3 ? "..." : ""));
723 return(mdoc_vwarn(m
, ln
, offs
, "unknown macro: %s%s",
724 buf
, strlen(buf
) > 3 ? "..." : ""));
729 * Parse a macro line, that is, a line beginning with the control
733 mdoc_pmacro(struct mdoc
*m
, int ln
, char *buf
, int offs
)
739 /* Empty lines are ignored. */
743 if ('\0' == buf
[offs
])
748 /* Accept whitespace after the initial control char. */
752 while (buf
[i
] && ' ' == buf
[i
])
760 /* Copy the first word into a nil-terminated buffer. */
762 for (j
= 0; j
< 4; j
++, i
++) {
763 if ('\0' == (mac
[j
] = buf
[i
]))
765 else if (' ' == buf
[i
])
768 /* Check for invalid characters. */
770 if (isgraph((u_char
)buf
[i
]))
772 return(mdoc_perr(m
, ln
, i
, EPRINT
));
777 if (j
== 4 || j
< 2) {
778 if ( ! macrowarn(m
, ln
, mac
, sv
))
783 if (MDOC_MAX
== (tok
= mdoc_hash_find(mac
))) {
784 if ( ! macrowarn(m
, ln
, mac
, sv
))
789 /* The macro is sane. Jump to the next word. */
791 while (buf
[i
] && ' ' == buf
[i
])
795 * Trailing whitespace. Note that tabs are allowed to be passed
796 * into the parser as "text", so we only warn about spaces here.
799 if ('\0' == buf
[i
] && ' ' == buf
[i
- 1])
800 if ( ! mdoc_pwarn(m
, ln
, i
- 1, ETAILWS
))
804 * Begin recursive parse sequence. Since we're at the start of
805 * the line, we don't need to do callable/parseable checks.
807 if ( ! mdoc_macro(m
, tok
, ln
, sv
, &i
, buf
))
812 err
: /* Error out. */
814 m
->flags
|= MDOC_HALT
;