]>
git.cameronkatri.com Git - mandoc.git/blob - mdoc.c
1 /* $Id: mdoc.c,v 1.241 2015/04/02 23:48:19 schwarze Exp $ */
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010, 2012-2015 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
20 #include <sys/types.h>
30 #include "mandoc_aux.h"
34 #include "libmandoc.h"
37 const char *const __mdoc_macronames
[MDOC_MAX
+ 1] = {
38 "Ap", "Dd", "Dt", "Os",
39 "Sh", "Ss", "Pp", "D1",
40 "Dl", "Bd", "Ed", "Bl",
41 "El", "It", "Ad", "An",
42 "Ar", "Cd", "Cm", "Dv",
43 "Er", "Ev", "Ex", "Fa",
44 "Fd", "Fl", "Fn", "Ft",
45 "Ic", "In", "Li", "Nd",
46 "Nm", "Op", "Ot", "Pa",
47 "Rv", "St", "Va", "Vt",
48 "Xr", "%A", "%B", "%D",
49 "%I", "%J", "%N", "%O",
50 "%P", "%R", "%T", "%V",
51 "Ac", "Ao", "Aq", "At",
52 "Bc", "Bf", "Bo", "Bq",
53 "Bsx", "Bx", "Db", "Dc",
54 "Do", "Dq", "Ec", "Ef",
55 "Em", "Eo", "Fx", "Ms",
56 "No", "Ns", "Nx", "Ox",
57 "Pc", "Pf", "Po", "Pq",
58 "Qc", "Ql", "Qo", "Qq",
59 "Re", "Rs", "Sc", "So",
60 "Sq", "Sm", "Sx", "Sy",
61 "Tn", "Ux", "Xc", "Xo",
62 "Fo", "Fc", "Oo", "Oc",
63 "Bk", "Ek", "Bt", "Hf",
64 "Fr", "Ud", "Lb", "Lp",
65 "Lk", "Mt", "Brq", "Bro",
66 "Brc", "%C", "Es", "En",
67 "Dx", "%Q", "br", "sp",
68 "%U", "Ta", "ll", "text",
71 const char *const __mdoc_argnames
[MDOC_ARG_MAX
] = {
72 "split", "nosplit", "ragged",
73 "unfilled", "literal", "file",
74 "offset", "bullet", "dash",
75 "hyphen", "item", "enum",
76 "tag", "diag", "hang",
77 "ohang", "inset", "column",
78 "width", "compact", "std",
79 "filled", "words", "emphasis",
80 "symbolic", "nested", "centered"
83 const char * const *mdoc_macronames
= __mdoc_macronames
;
84 const char * const *mdoc_argnames
= __mdoc_argnames
;
86 static void mdoc_node_free(struct roff_node
*);
87 static void mdoc_node_unlink(struct mdoc
*,
89 static void mdoc_free1(struct mdoc
*);
90 static void mdoc_alloc1(struct mdoc
*);
91 static struct roff_node
*node_alloc(struct mdoc
*, int, int,
93 static void node_append(struct mdoc
*, struct roff_node
*);
94 static int mdoc_ptext(struct mdoc
*, int, char *, int);
95 static int mdoc_pmacro(struct mdoc
*, int, char *, int);
98 const struct roff_node
*
99 mdoc_node(const struct mdoc
*mdoc
)
105 const struct roff_meta
*
106 mdoc_meta(const struct mdoc
*mdoc
)
113 * Frees volatile resources (parse tree, meta-data, fields).
116 mdoc_free1(struct mdoc
*mdoc
)
120 mdoc_node_delete(mdoc
, mdoc
->first
);
121 free(mdoc
->meta
.msec
);
122 free(mdoc
->meta
.vol
);
123 free(mdoc
->meta
.arch
);
124 free(mdoc
->meta
.date
);
125 free(mdoc
->meta
.title
);
127 free(mdoc
->meta
.name
);
131 * Allocate all volatile resources (parse tree, meta-data, fields).
134 mdoc_alloc1(struct mdoc
*mdoc
)
137 memset(&mdoc
->meta
, 0, sizeof(mdoc
->meta
));
139 mdoc
->lastnamed
= mdoc
->lastsec
= SEC_NONE
;
140 mdoc
->last
= mandoc_calloc(1, sizeof(*mdoc
->last
));
141 mdoc
->first
= mdoc
->last
;
142 mdoc
->last
->type
= ROFFT_ROOT
;
143 mdoc
->last
->tok
= MDOC_MAX
;
144 mdoc
->next
= MDOC_NEXT_CHILD
;
148 * Free up volatile resources (see mdoc_free1()) then re-initialises the
149 * data with mdoc_alloc1(). After invocation, parse data has been reset
150 * and the parser is ready for re-invocation on a new tree; however,
151 * cross-parse non-volatile data is kept intact.
154 mdoc_reset(struct mdoc
*mdoc
)
162 * Completely free up all volatile and non-volatile parse resources.
163 * After invocation, the pointer is no longer usable.
166 mdoc_free(struct mdoc
*mdoc
)
174 * Allocate volatile and non-volatile parse resources.
177 mdoc_alloc(struct roff
*roff
, struct mparse
*parse
,
178 const char *defos
, int quick
)
182 p
= mandoc_calloc(1, sizeof(struct mdoc
));
195 mdoc_endparse(struct mdoc
*mdoc
)
202 mdoc_addeqn(struct mdoc
*mdoc
, const struct eqn
*ep
)
206 n
= node_alloc(mdoc
, ep
->ln
, ep
->pos
, MDOC_MAX
, ROFFT_EQN
);
208 if (ep
->ln
> mdoc
->last
->line
)
209 n
->flags
|= MDOC_LINE
;
210 node_append(mdoc
, n
);
211 mdoc
->next
= MDOC_NEXT_SIBLING
;
215 mdoc_addspan(struct mdoc
*mdoc
, const struct tbl_span
*sp
)
219 n
= node_alloc(mdoc
, sp
->line
, 0, MDOC_MAX
, ROFFT_TBL
);
221 node_append(mdoc
, n
);
222 mdoc
->next
= MDOC_NEXT_SIBLING
;
226 * Main parse routine. Parses a single line -- really just hands off to
227 * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()).
230 mdoc_parseln(struct mdoc
*mdoc
, int ln
, char *buf
, int offs
)
233 if (mdoc
->last
->type
!= ROFFT_EQN
|| ln
> mdoc
->last
->line
)
234 mdoc
->flags
|= MDOC_NEWLINE
;
237 * Let the roff nS register switch SYNOPSIS mode early,
238 * such that the parser knows at all times
239 * whether this mode is on or off.
240 * Note that this mode is also switched by the Sh macro.
242 if (roff_getreg(mdoc
->roff
, "nS"))
243 mdoc
->flags
|= MDOC_SYNOPSIS
;
245 mdoc
->flags
&= ~MDOC_SYNOPSIS
;
247 return(roff_getcontrol(mdoc
->roff
, buf
, &offs
) ?
248 mdoc_pmacro(mdoc
, ln
, buf
, offs
) :
249 mdoc_ptext(mdoc
, ln
, buf
, offs
));
253 mdoc_macro(MACRO_PROT_ARGS
)
255 assert(tok
< MDOC_MAX
);
257 if (mdoc
->flags
& MDOC_PBODY
) {
258 if (tok
== MDOC_Dt
) {
259 mandoc_vmsg(MANDOCERR_DT_LATE
,
260 mdoc
->parse
, line
, ppos
,
261 "Dt %s", buf
+ *pos
);
264 } else if ( ! (mdoc_macros
[tok
].flags
& MDOC_PROLOGUE
)) {
265 if (mdoc
->meta
.title
== NULL
) {
266 mandoc_vmsg(MANDOCERR_DT_NOTITLE
,
267 mdoc
->parse
, line
, ppos
, "%s %s",
268 mdoc_macronames
[tok
], buf
+ *pos
);
269 mdoc
->meta
.title
= mandoc_strdup("UNTITLED");
271 if (NULL
== mdoc
->meta
.vol
)
272 mdoc
->meta
.vol
= mandoc_strdup("LOCAL");
273 mdoc
->flags
|= MDOC_PBODY
;
275 (*mdoc_macros
[tok
].fp
)(mdoc
, tok
, line
, ppos
, pos
, buf
);
280 node_append(struct mdoc
*mdoc
, struct roff_node
*p
)
285 assert(p
->type
!= ROFFT_ROOT
);
287 switch (mdoc
->next
) {
288 case MDOC_NEXT_SIBLING
:
289 mdoc
->last
->next
= p
;
290 p
->prev
= mdoc
->last
;
291 p
->parent
= mdoc
->last
->parent
;
293 case MDOC_NEXT_CHILD
:
294 mdoc
->last
->child
= p
;
295 p
->parent
= mdoc
->last
;
305 * Copy over the normalised-data pointer of our parent. Not
306 * everybody has one, but copying a null pointer is fine.
311 if (ENDBODY_NOT
!= p
->end
)
317 p
->norm
= p
->parent
->norm
;
323 mdoc_valid_pre(mdoc
, p
);
327 assert(p
->parent
->type
== ROFFT_BLOCK
);
331 assert(p
->parent
->type
== ROFFT_BLOCK
);
337 assert(p
->parent
->type
== ROFFT_BLOCK
);
350 mdoc_valid_post(mdoc
);
357 static struct roff_node
*
358 node_alloc(struct mdoc
*mdoc
, int line
, int pos
,
359 int tok
, enum roff_type type
)
363 p
= mandoc_calloc(1, sizeof(*p
));
364 p
->sec
= mdoc
->lastsec
;
372 if (MDOC_SYNOPSIS
& mdoc
->flags
)
373 p
->flags
|= MDOC_SYNPRETTY
;
375 p
->flags
&= ~MDOC_SYNPRETTY
;
376 if (MDOC_NEWLINE
& mdoc
->flags
)
377 p
->flags
|= MDOC_LINE
;
378 mdoc
->flags
&= ~MDOC_NEWLINE
;
384 mdoc_tail_alloc(struct mdoc
*mdoc
, int line
, int pos
, int tok
)
388 p
= node_alloc(mdoc
, line
, pos
, tok
, ROFFT_TAIL
);
389 node_append(mdoc
, p
);
390 mdoc
->next
= MDOC_NEXT_CHILD
;
394 mdoc_head_alloc(struct mdoc
*mdoc
, int line
, int pos
, int tok
)
400 p
= node_alloc(mdoc
, line
, pos
, tok
, ROFFT_HEAD
);
401 node_append(mdoc
, p
);
402 mdoc
->next
= MDOC_NEXT_CHILD
;
407 mdoc_body_alloc(struct mdoc
*mdoc
, int line
, int pos
, int tok
)
411 p
= node_alloc(mdoc
, line
, pos
, tok
, ROFFT_BODY
);
412 node_append(mdoc
, p
);
413 mdoc
->next
= MDOC_NEXT_CHILD
;
418 mdoc_endbody_alloc(struct mdoc
*mdoc
, int line
, int pos
, int tok
,
419 struct roff_node
*body
, enum mdoc_endbody end
)
423 body
->flags
|= MDOC_ENDED
;
424 body
->parent
->flags
|= MDOC_ENDED
;
425 p
= node_alloc(mdoc
, line
, pos
, tok
, ROFFT_BODY
);
427 p
->norm
= body
->norm
;
429 node_append(mdoc
, p
);
430 mdoc
->next
= MDOC_NEXT_SIBLING
;
435 mdoc_block_alloc(struct mdoc
*mdoc
, int line
, int pos
,
436 int tok
, struct mdoc_arg
*args
)
440 p
= node_alloc(mdoc
, line
, pos
, tok
, ROFFT_BLOCK
);
455 p
->norm
= mandoc_calloc(1, sizeof(union mdoc_data
));
460 node_append(mdoc
, p
);
461 mdoc
->next
= MDOC_NEXT_CHILD
;
466 mdoc_elem_alloc(struct mdoc
*mdoc
, int line
, int pos
,
467 int tok
, struct mdoc_arg
*args
)
471 p
= node_alloc(mdoc
, line
, pos
, tok
, ROFFT_ELEM
);
478 p
->norm
= mandoc_calloc(1, sizeof(union mdoc_data
));
483 node_append(mdoc
, p
);
484 mdoc
->next
= MDOC_NEXT_CHILD
;
488 mdoc_word_alloc(struct mdoc
*mdoc
, int line
, int pos
, const char *p
)
492 n
= node_alloc(mdoc
, line
, pos
, MDOC_MAX
, ROFFT_TEXT
);
493 n
->string
= roff_strdup(mdoc
->roff
, p
);
494 node_append(mdoc
, n
);
495 mdoc
->next
= MDOC_NEXT_SIBLING
;
499 mdoc_word_append(struct mdoc
*mdoc
, const char *p
)
502 char *addstr
, *newstr
;
505 addstr
= roff_strdup(mdoc
->roff
, p
);
506 mandoc_asprintf(&newstr
, "%s %s", n
->string
, addstr
);
510 mdoc
->next
= MDOC_NEXT_SIBLING
;
514 mdoc_node_free(struct roff_node
*p
)
517 if (p
->type
== ROFFT_BLOCK
|| p
->type
== ROFFT_ELEM
)
522 mdoc_argv_free(p
->args
);
527 mdoc_node_unlink(struct mdoc
*mdoc
, struct roff_node
*n
)
530 /* Adjust siblings. */
533 n
->prev
->next
= n
->next
;
535 n
->next
->prev
= n
->prev
;
541 if (n
->parent
->child
== n
)
542 n
->parent
->child
= n
->prev
? n
->prev
: n
->next
;
543 if (n
->parent
->last
== n
)
544 n
->parent
->last
= n
->prev
? n
->prev
: NULL
;
547 /* Adjust parse point, if applicable. */
549 if (mdoc
&& mdoc
->last
== n
) {
551 mdoc
->last
= n
->prev
;
552 mdoc
->next
= MDOC_NEXT_SIBLING
;
554 mdoc
->last
= n
->parent
;
555 mdoc
->next
= MDOC_NEXT_CHILD
;
559 if (mdoc
&& mdoc
->first
== n
)
564 mdoc_node_delete(struct mdoc
*mdoc
, struct roff_node
*p
)
569 mdoc_node_delete(mdoc
, p
->child
);
571 assert(0 == p
->nchild
);
573 mdoc_node_unlink(mdoc
, p
);
578 mdoc_node_relink(struct mdoc
*mdoc
, struct roff_node
*p
)
581 mdoc_node_unlink(mdoc
, p
);
582 node_append(mdoc
, p
);
586 * Parse free-form text, that is, a line that does not begin with the
590 mdoc_ptext(struct mdoc
*mdoc
, int line
, char *buf
, int offs
)
599 * Divert directly to list processing if we're encountering a
600 * columnar ROFFT_BLOCK with or without a prior ROFFT_BLOCK entry
601 * (a ROFFT_BODY means it's already open, in which case we should
602 * process within its context in the normal way).
605 if (n
->tok
== MDOC_Bl
&& n
->type
== ROFFT_BODY
&&
606 n
->end
== ENDBODY_NOT
&& n
->norm
->Bl
.type
== LIST_column
) {
607 /* `Bl' is open without any children. */
608 mdoc
->flags
|= MDOC_FREECOL
;
609 mdoc_macro(mdoc
, MDOC_It
, line
, offs
, &offs
, buf
);
613 if (n
->tok
== MDOC_It
&& n
->type
== ROFFT_BLOCK
&&
615 MDOC_Bl
== n
->parent
->tok
&&
616 LIST_column
== n
->parent
->norm
->Bl
.type
) {
617 /* `Bl' has block-level `It' children. */
618 mdoc
->flags
|= MDOC_FREECOL
;
619 mdoc_macro(mdoc
, MDOC_It
, line
, offs
, &offs
, buf
);
624 * Search for the beginning of unescaped trailing whitespace (ws)
625 * and for the first character not to be output (end).
628 /* FIXME: replace with strcspn(). */
630 for (c
= end
= buf
+ offs
; *c
; c
++) {
638 * Always warn about trailing tabs,
639 * even outside literal context,
640 * where they should be put on the next line.
645 * Strip trailing tabs in literal context only;
646 * outside, they affect the next line.
648 if (MDOC_LITERAL
& mdoc
->flags
)
652 /* Skip the escaped character, too, if any. */
665 mandoc_msg(MANDOCERR_SPACE_EOL
, mdoc
->parse
,
666 line
, (int)(ws
-buf
), NULL
);
668 if (buf
[offs
] == '\0' && ! (mdoc
->flags
& MDOC_LITERAL
)) {
669 mandoc_msg(MANDOCERR_FI_BLANK
, mdoc
->parse
,
670 line
, (int)(c
- buf
), NULL
);
673 * Insert a `sp' in the case of a blank line. Technically,
674 * blank lines aren't allowed, but enough manuals assume this
675 * behaviour that we want to work around it.
677 mdoc_elem_alloc(mdoc
, line
, offs
, MDOC_sp
, NULL
);
678 mdoc
->next
= MDOC_NEXT_SIBLING
;
679 mdoc_valid_post(mdoc
);
683 mdoc_word_alloc(mdoc
, line
, offs
, buf
+offs
);
685 if (mdoc
->flags
& MDOC_LITERAL
)
689 * End-of-sentence check. If the last character is an unescaped
690 * EOS character, then flag the node as being the end of a
691 * sentence. The front-end will know how to interpret this.
696 if (mandoc_eos(buf
+offs
, (size_t)(end
-buf
-offs
)))
697 mdoc
->last
->flags
|= MDOC_EOS
;
702 * Parse a macro line, that is, a line beginning with the control
706 mdoc_pmacro(struct mdoc
*mdoc
, int ln
, char *buf
, int offs
)
717 * Copy the first word into a nil-terminated buffer.
718 * Stop when a space, tab, escape, or eoln is encountered.
722 while (i
< 4 && strchr(" \t\\", buf
[offs
]) == NULL
)
723 mac
[i
++] = buf
[offs
++];
727 tok
= (i
> 1 && i
< 4) ? mdoc_hash_find(mac
) : MDOC_MAX
;
729 if (tok
== MDOC_MAX
) {
730 mandoc_msg(MANDOCERR_MACRO
, mdoc
->parse
,
731 ln
, sv
, buf
+ sv
- 1);
735 /* Skip a leading escape sequence or tab. */
740 mandoc_escape(&cp
, NULL
, NULL
);
750 /* Jump to the next non-whitespace word. */
752 while (buf
[offs
] && ' ' == buf
[offs
])
756 * Trailing whitespace. Note that tabs are allowed to be passed
757 * into the parser as "text", so we only warn about spaces here.
760 if ('\0' == buf
[offs
] && ' ' == buf
[offs
- 1])
761 mandoc_msg(MANDOCERR_SPACE_EOL
, mdoc
->parse
,
765 * If an initial macro or a list invocation, divert directly
766 * into macro processing.
769 if (NULL
== mdoc
->last
|| MDOC_It
== tok
|| MDOC_El
== tok
) {
770 mdoc_macro(mdoc
, tok
, ln
, sv
, &offs
, buf
);
778 * If the first macro of a `Bl -column', open an `It' block
779 * context around the parsed macro.
782 if (n
->tok
== MDOC_Bl
&& n
->type
== ROFFT_BODY
&&
783 n
->end
== ENDBODY_NOT
&& n
->norm
->Bl
.type
== LIST_column
) {
784 mdoc
->flags
|= MDOC_FREECOL
;
785 mdoc_macro(mdoc
, MDOC_It
, ln
, sv
, &sv
, buf
);
790 * If we're following a block-level `It' within a `Bl -column'
791 * context (perhaps opened in the above block or in ptext()),
792 * then open an `It' block context around the parsed macro.
795 if (n
->tok
== MDOC_It
&& n
->type
== ROFFT_BLOCK
&&
797 MDOC_Bl
== n
->parent
->tok
&&
798 LIST_column
== n
->parent
->norm
->Bl
.type
) {
799 mdoc
->flags
|= MDOC_FREECOL
;
800 mdoc_macro(mdoc
, MDOC_It
, ln
, sv
, &sv
, buf
);
804 /* Normal processing of a macro. */
806 mdoc_macro(mdoc
, tok
, ln
, sv
, &offs
, buf
);
808 /* In quick mode (for mandocdb), abort after the NAME section. */
810 if (mdoc
->quick
&& MDOC_Sh
== tok
&&
811 SEC_NAME
!= mdoc
->last
->sec
)
818 mdoc_isdelim(const char *p
)
831 return(DELIM_MIDDLE
);
855 if (0 == strcmp(p
+ 1, "."))
857 if (0 == strcmp(p
+ 1, "fR|\\fP"))
858 return(DELIM_MIDDLE
);
864 mdoc_deroff(char **dest
, const struct roff_node
*n
)
869 if (n
->type
!= ROFFT_TEXT
) {
870 for (n
= n
->child
; n
; n
= n
->next
)
871 mdoc_deroff(dest
, n
);
875 /* Skip leading whitespace. */
877 for (cp
= n
->string
; '\0' != *cp
; cp
++)
878 if (0 == isspace((unsigned char)*cp
))
881 /* Skip trailing whitespace. */
883 for (sz
= strlen(cp
); sz
; sz
--)
884 if (0 == isspace((unsigned char)cp
[sz
-1]))
887 /* Skip empty strings. */
893 *dest
= mandoc_strndup(cp
, sz
);
897 mandoc_asprintf(&cp
, "%s %*s", *dest
, (int)sz
, cp
);