]>
git.cameronkatri.com Git - mandoc.git/blob - roff.c
1 /* $Id: roff.c,v 1.192 2014/02/14 22:27:41 schwarze Exp $ */
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
30 #include "libmandoc.h"
32 /* Maximum number of nested if-else conditionals. */
33 #define RSTACK_MAX 128
35 /* Maximum number of string expansions per line, to break infinite loops. */
36 #define EXPAND_LIMIT 1000
84 * An incredibly-simple string buffer.
87 char *p
; /* nil-terminated buffer */
88 size_t sz
; /* saved strlen(p) */
92 * A key-value roffstr pair as part of a singly-linked list.
97 struct roffkv
*next
; /* next in list */
101 * A single number register as part of a singly-linked list.
106 struct roffreg
*next
;
110 enum mparset parsetype
; /* requested parse type */
111 struct mparse
*parse
; /* parse point */
112 int quick
; /* skip standard macro deletion */
113 struct roffnode
*last
; /* leaf of stack */
114 enum roffrule rstack
[RSTACK_MAX
]; /* stack of !`ie' rules */
115 char control
; /* control character */
116 int rstackpos
; /* position in rstack */
117 struct roffreg
*regtab
; /* number registers */
118 struct roffkv
*strtab
; /* user-defined strings & macros */
119 struct roffkv
*xmbtab
; /* multi-byte trans table (`tr') */
120 struct roffstr
*xtab
; /* single-byte trans table (`tr') */
121 const char *current_string
; /* value of last called user macro */
122 struct tbl_node
*first_tbl
; /* first table parsed */
123 struct tbl_node
*last_tbl
; /* last table parsed */
124 struct tbl_node
*tbl
; /* current table being parsed */
125 struct eqn_node
*last_eqn
; /* last equation parsed */
126 struct eqn_node
*first_eqn
; /* first equation parsed */
127 struct eqn_node
*eqn
; /* current equation being parsed */
131 enum rofft tok
; /* type of node */
132 struct roffnode
*parent
; /* up one in stack */
133 int line
; /* parse line */
134 int col
; /* parse col */
135 char *name
; /* node name, e.g. macro name */
136 char *end
; /* end-rules: custom token */
137 int endspan
; /* end-rules: next-line or infty */
138 enum roffrule rule
; /* current evaluation rule */
141 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
142 enum rofft tok, /* tok of macro */ \
143 char **bufp, /* input buffer */ \
144 size_t *szp, /* size of input buffer */ \
145 int ln, /* parse line */ \
146 int ppos, /* original pos in buffer */ \
147 int pos, /* current pos in buffer */ \
148 int *offs /* reset offset of buffer data */
150 typedef enum rofferr (*roffproc
)(ROFF_ARGS
);
153 const char *name
; /* macro name */
154 roffproc proc
; /* process new macro */
155 roffproc text
; /* process as child text of macro */
156 roffproc sub
; /* process as child of macro */
158 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
159 struct roffmac
*next
;
163 const char *name
; /* predefined input name */
164 const char *str
; /* replacement symbol */
167 #define PREDEF(__name, __str) \
168 { (__name), (__str) },
170 static enum rofft
roffhash_find(const char *, size_t);
171 static void roffhash_init(void);
172 static void roffnode_cleanscope(struct roff
*);
173 static void roffnode_pop(struct roff
*);
174 static void roffnode_push(struct roff
*, enum rofft
,
175 const char *, int, int);
176 static enum rofferr
roff_block(ROFF_ARGS
);
177 static enum rofferr
roff_block_text(ROFF_ARGS
);
178 static enum rofferr
roff_block_sub(ROFF_ARGS
);
179 static enum rofferr
roff_cblock(ROFF_ARGS
);
180 static enum rofferr
roff_cc(ROFF_ARGS
);
181 static enum rofferr
roff_ccond(ROFF_ARGS
);
182 static enum rofferr
roff_cond(ROFF_ARGS
);
183 static enum rofferr
roff_cond_text(ROFF_ARGS
);
184 static enum rofferr
roff_cond_sub(ROFF_ARGS
);
185 static enum rofferr
roff_ds(ROFF_ARGS
);
186 static enum roffrule
roff_evalcond(const char *, int *);
187 static void roff_free1(struct roff
*);
188 static void roff_freereg(struct roffreg
*);
189 static void roff_freestr(struct roffkv
*);
190 static char *roff_getname(struct roff
*, char **, int, int);
191 static int roff_getnum(const char *, int *, int *);
192 static int roff_getop(const char *, int *, char *);
193 static int roff_getregn(const struct roff
*,
194 const char *, size_t);
195 static int roff_getregro(const char *name
);
196 static const char *roff_getstrn(const struct roff
*,
197 const char *, size_t);
198 static enum rofferr
roff_it(ROFF_ARGS
);
199 static enum rofferr
roff_line_ignore(ROFF_ARGS
);
200 static enum rofferr
roff_nr(ROFF_ARGS
);
201 static void roff_openeqn(struct roff
*, const char *,
202 int, int, const char *);
203 static enum rofft
roff_parse(struct roff
*, const char *, int *);
204 static enum rofferr
roff_parsetext(char **, size_t *, int, int *);
205 static enum rofferr
roff_res(struct roff
*,
206 char **, size_t *, int, int);
207 static enum rofferr
roff_rm(ROFF_ARGS
);
208 static void roff_setstr(struct roff
*,
209 const char *, const char *, int);
210 static void roff_setstrn(struct roffkv
**, const char *,
211 size_t, const char *, size_t, int);
212 static enum rofferr
roff_so(ROFF_ARGS
);
213 static enum rofferr
roff_tr(ROFF_ARGS
);
214 static enum rofferr
roff_Dd(ROFF_ARGS
);
215 static enum rofferr
roff_TH(ROFF_ARGS
);
216 static enum rofferr
roff_TE(ROFF_ARGS
);
217 static enum rofferr
roff_TS(ROFF_ARGS
);
218 static enum rofferr
roff_EQ(ROFF_ARGS
);
219 static enum rofferr
roff_EN(ROFF_ARGS
);
220 static enum rofferr
roff_T_(ROFF_ARGS
);
221 static enum rofferr
roff_userdef(ROFF_ARGS
);
223 /* See roffhash_find() */
227 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
229 static struct roffmac
*hash
[HASHWIDTH
];
231 static struct roffmac roffs
[ROFF_MAX
] = {
232 { "ad", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
233 { "am", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
234 { "ami", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
235 { "am1", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
236 { "cc", roff_cc
, NULL
, NULL
, 0, NULL
},
237 { "de", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
238 { "dei", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
239 { "de1", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
240 { "ds", roff_ds
, NULL
, NULL
, 0, NULL
},
241 { "el", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
242 { "fam", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
243 { "hw", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
244 { "hy", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
245 { "ie", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
246 { "if", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
247 { "ig", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
248 { "it", roff_it
, NULL
, NULL
, 0, NULL
},
249 { "ne", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
250 { "nh", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
251 { "nr", roff_nr
, NULL
, NULL
, 0, NULL
},
252 { "ns", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
253 { "ps", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
254 { "rm", roff_rm
, NULL
, NULL
, 0, NULL
},
255 { "so", roff_so
, NULL
, NULL
, 0, NULL
},
256 { "ta", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
257 { "tr", roff_tr
, NULL
, NULL
, 0, NULL
},
258 { "Dd", roff_Dd
, NULL
, NULL
, 0, NULL
},
259 { "TH", roff_TH
, NULL
, NULL
, 0, NULL
},
260 { "TS", roff_TS
, NULL
, NULL
, 0, NULL
},
261 { "TE", roff_TE
, NULL
, NULL
, 0, NULL
},
262 { "T&", roff_T_
, NULL
, NULL
, 0, NULL
},
263 { "EQ", roff_EQ
, NULL
, NULL
, 0, NULL
},
264 { "EN", roff_EN
, NULL
, NULL
, 0, NULL
},
265 { ".", roff_cblock
, NULL
, NULL
, 0, NULL
},
266 { "\\}", roff_ccond
, NULL
, NULL
, 0, NULL
},
267 { NULL
, roff_userdef
, NULL
, NULL
, 0, NULL
},
270 const char *const __mdoc_reserved
[] = {
271 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
272 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
273 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
274 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
275 "Ds", "Dt", "Dv", "Dx", "D1",
276 "Ec", "Ed", "Ef", "Ek", "El", "Em", "em",
277 "En", "Eo", "Eq", "Er", "Es", "Ev", "Ex",
278 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
279 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp", "LP",
280 "Me", "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
281 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
282 "Pa", "Pc", "Pf", "Po", "Pp", "PP", "pp", "Pq",
283 "Qc", "Ql", "Qo", "Qq", "Or", "Rd", "Re", "Rs", "Rv",
284 "Sc", "Sf", "Sh", "SH", "Sm", "So", "Sq",
285 "Ss", "St", "Sx", "Sy",
286 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
287 "%A", "%B", "%D", "%I", "%J", "%N", "%O",
288 "%P", "%Q", "%R", "%T", "%U", "%V",
292 const char *const __man_reserved
[] = {
293 "AT", "B", "BI", "BR", "BT", "DE", "DS", "DT",
294 "EE", "EN", "EQ", "EX", "HF", "HP", "I", "IB", "IP", "IR",
295 "LP", "ME", "MT", "OP", "P", "PD", "PP", "PT",
296 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS", "SY",
297 "TE", "TH", "TP", "TQ", "TS", "T&", "UC", "UE", "UR", "YS",
301 /* Array of injected predefined strings. */
302 #define PREDEFS_MAX 38
303 static const struct predef predefs
[PREDEFS_MAX
] = {
304 #include "predefs.in"
307 /* See roffhash_find() */
308 #define ROFF_HASH(p) (p[0] - ASCII_LO)
310 static int roffit_lines
; /* number of lines to delay */
311 static char *roffit_macro
; /* nil-terminated macro line */
319 for (i
= 0; i
< (int)ROFF_USERDEF
; i
++) {
320 assert(roffs
[i
].name
[0] >= ASCII_LO
);
321 assert(roffs
[i
].name
[0] <= ASCII_HI
);
323 buc
= ROFF_HASH(roffs
[i
].name
);
325 if (NULL
!= (n
= hash
[buc
])) {
326 for ( ; n
->next
; n
= n
->next
)
330 hash
[buc
] = &roffs
[i
];
335 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
336 * the nil-terminated string name could be found.
339 roffhash_find(const char *p
, size_t s
)
345 * libroff has an extremely simple hashtable, for the time
346 * being, which simply keys on the first character, which must
347 * be printable, then walks a chain. It works well enough until
351 if (p
[0] < ASCII_LO
|| p
[0] > ASCII_HI
)
356 if (NULL
== (n
= hash
[buc
]))
358 for ( ; n
; n
= n
->next
)
359 if (0 == strncmp(n
->name
, p
, s
) && '\0' == n
->name
[(int)s
])
360 return((enum rofft
)(n
- roffs
));
367 * Pop the current node off of the stack of roff instructions currently
371 roffnode_pop(struct roff
*r
)
378 r
->last
= r
->last
->parent
;
386 * Push a roff node onto the instruction stack. This must later be
387 * removed with roffnode_pop().
390 roffnode_push(struct roff
*r
, enum rofft tok
, const char *name
,
395 p
= mandoc_calloc(1, sizeof(struct roffnode
));
398 p
->name
= mandoc_strdup(name
);
402 p
->rule
= p
->parent
? p
->parent
->rule
: ROFFRULE_DENY
;
409 roff_free1(struct roff
*r
)
411 struct tbl_node
*tbl
;
415 while (NULL
!= (tbl
= r
->first_tbl
)) {
416 r
->first_tbl
= tbl
->next
;
420 r
->first_tbl
= r
->last_tbl
= r
->tbl
= NULL
;
422 while (NULL
!= (e
= r
->first_eqn
)) {
423 r
->first_eqn
= e
->next
;
427 r
->first_eqn
= r
->last_eqn
= r
->eqn
= NULL
;
432 roff_freestr(r
->strtab
);
433 roff_freestr(r
->xmbtab
);
435 r
->strtab
= r
->xmbtab
= NULL
;
437 roff_freereg(r
->regtab
);
442 for (i
= 0; i
< 128; i
++)
450 roff_reset(struct roff
*r
)
459 roff_free(struct roff
*r
)
468 roff_alloc(enum mparset type
, struct mparse
*parse
, int quick
)
472 r
= mandoc_calloc(1, sizeof(struct roff
));
484 * In the current line, expand user-defined strings ("\*")
485 * and references to number registers ("\n").
486 * Also check the syntax of other escape sequences.
489 roff_res(struct roff
*r
, char **bufp
, size_t *szp
, int ln
, int pos
)
491 char ubuf
[12]; /* buffer to print the number */
492 const char *stesc
; /* start of an escape sequence ('\\') */
493 const char *stnam
; /* start of the name, after "[(*" */
494 const char *cp
; /* end of the name, e.g. before ']' */
495 const char *res
; /* the string to be substituted */
496 char *nbuf
; /* new buffer to copy bufp to */
497 size_t nsz
; /* size of the new buffer */
498 size_t maxl
; /* expected length of the escape name */
499 size_t naml
; /* actual length of the escape name */
500 int expand_count
; /* to avoid infinite loops */
506 while (NULL
!= (cp
= strchr(cp
, '\\'))) {
510 * The second character must be an asterisk or an n.
511 * If it isn't, skip it anyway: It is escaped,
512 * so it can't start another escape sequence.
526 if (ESCAPE_ERROR
!= mandoc_escape(&cp
, NULL
, NULL
))
529 (MANDOCERR_BADESCAPE
, r
->parse
,
530 ln
, (int)(stesc
- *bufp
), NULL
);
537 * The third character decides the length
538 * of the name of the string or register.
539 * Save a pointer to the name.
559 /* Advance to the end of the name. */
561 for (naml
= 0; 0 == maxl
|| naml
< maxl
; naml
++, cp
++) {
564 (MANDOCERR_BADESCAPE
,
566 (int)(stesc
- *bufp
), NULL
);
569 if (0 == maxl
&& ']' == *cp
)
574 * Retrieve the replacement string; if it is
575 * undefined, resume searching for escapes.
579 res
= roff_getstrn(r
, stnam
, naml
);
581 snprintf(ubuf
, sizeof(ubuf
), "%d",
582 roff_getregn(r
, stnam
, naml
));
586 (MANDOCERR_BADESCAPE
, r
->parse
,
587 ln
, (int)(stesc
- *bufp
), NULL
);
591 /* Replace the escape sequence by the string. */
595 nsz
= *szp
+ strlen(res
) + 1;
596 nbuf
= mandoc_malloc(nsz
);
598 strlcpy(nbuf
, *bufp
, (size_t)(stesc
- *bufp
+ 1));
599 strlcat(nbuf
, res
, nsz
);
600 strlcat(nbuf
, cp
+ (maxl
? 0 : 1), nsz
);
607 if (EXPAND_LIMIT
>= ++expand_count
)
610 /* Just leave the string unexpanded. */
611 mandoc_msg(MANDOCERR_ROFFLOOP
, r
->parse
, ln
, pos
, NULL
);
618 * Process text streams:
619 * Convert all breakable hyphens into ASCII_HYPH.
620 * Decrement and spring input line trap.
623 roff_parsetext(char **bufp
, size_t *szp
, int pos
, int *offs
)
631 start
= p
= *bufp
+ pos
;
634 sz
= strcspn(p
, "-\\");
641 /* Skip over escapes. */
643 esc
= mandoc_escape((const char **)&p
, NULL
, NULL
);
644 if (ESCAPE_ERROR
== esc
)
647 } else if (p
== start
) {
652 if (isalpha((unsigned char)p
[-1]) &&
653 isalpha((unsigned char)p
[1]))
658 /* Spring the input line trap. */
659 if (1 == roffit_lines
) {
660 isz
= asprintf(&p
, "%s\n.%s", *bufp
, roffit_macro
);
663 exit((int)MANDOCLEVEL_SYSERR
);
671 return(ROFF_REPARSE
);
672 } else if (1 < roffit_lines
)
678 roff_parseln(struct roff
*r
, int ln
, char **bufp
,
679 size_t *szp
, int pos
, int *offs
)
686 * Run the reserved-word filter only if we have some reserved
690 e
= roff_res(r
, bufp
, szp
, ln
, pos
);
693 assert(ROFF_CONT
== e
);
696 ctl
= roff_getcontrol(r
, *bufp
, &pos
);
699 * First, if a scope is open and we're not a macro, pass the
700 * text through the macro's filter. If a scope isn't open and
701 * we're not a macro, just let it through.
702 * Finally, if there's an equation scope open, divert it into it
703 * no matter our state.
706 if (r
->last
&& ! ctl
) {
708 assert(roffs
[t
].text
);
710 (r
, t
, bufp
, szp
, ln
, pos
, pos
, offs
);
711 assert(ROFF_IGN
== e
|| ROFF_CONT
== e
);
716 return(eqn_read(&r
->eqn
, ln
, *bufp
, ppos
, offs
));
719 return(tbl_read(r
->tbl
, ln
, *bufp
, pos
));
720 return(roff_parsetext(bufp
, szp
, pos
, offs
));
724 * If a scope is open, go to the child handler for that macro,
725 * as it may want to preprocess before doing anything with it.
726 * Don't do so if an equation is open.
731 assert(roffs
[t
].sub
);
732 return((*roffs
[t
].sub
)
734 ln
, ppos
, pos
, offs
));
738 * Lastly, as we've no scope open, try to look up and execute
739 * the new macro. If no macro is found, simply return and let
740 * the compilers handle it.
743 if (ROFF_MAX
== (t
= roff_parse(r
, *bufp
, &pos
)))
746 assert(roffs
[t
].proc
);
747 return((*roffs
[t
].proc
)
749 ln
, ppos
, pos
, offs
));
754 roff_endparse(struct roff
*r
)
758 mandoc_msg(MANDOCERR_SCOPEEXIT
, r
->parse
,
759 r
->last
->line
, r
->last
->col
, NULL
);
762 mandoc_msg(MANDOCERR_SCOPEEXIT
, r
->parse
,
763 r
->eqn
->eqn
.ln
, r
->eqn
->eqn
.pos
, NULL
);
768 mandoc_msg(MANDOCERR_SCOPEEXIT
, r
->parse
,
769 r
->tbl
->line
, r
->tbl
->pos
, NULL
);
775 * Parse a roff node's type from the input buffer. This must be in the
776 * form of ".foo xxx" in the usual way.
779 roff_parse(struct roff
*r
, const char *buf
, int *pos
)
785 if ('\0' == buf
[*pos
] || '"' == buf
[*pos
] ||
786 '\t' == buf
[*pos
] || ' ' == buf
[*pos
])
790 * We stop the macro parse at an escape, tab, space, or nil.
791 * However, `\}' is also a valid macro, so make sure we don't
792 * clobber it by seeing the `\' as the end of token.
796 maclen
= strcspn(mac
+ 1, " \\\t\0") + 1;
798 t
= (r
->current_string
= roff_getstrn(r
, mac
, maclen
))
799 ? ROFF_USERDEF
: roffhash_find(mac
, maclen
);
803 while (buf
[*pos
] && ' ' == buf
[*pos
])
811 roff_cblock(ROFF_ARGS
)
815 * A block-close `..' should only be invoked as a child of an
816 * ignore macro, otherwise raise a warning and just ignore it.
819 if (NULL
== r
->last
) {
820 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
824 switch (r
->last
->tok
) {
832 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
839 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
844 mandoc_msg(MANDOCERR_ARGSLOST
, r
->parse
, ln
, pos
, NULL
);
847 roffnode_cleanscope(r
);
854 roffnode_cleanscope(struct roff
*r
)
858 if (--r
->last
->endspan
!= 0)
867 roff_ccond(ROFF_ARGS
)
870 if (NULL
== r
->last
) {
871 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
875 switch (r
->last
->tok
) {
883 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
887 if (r
->last
->endspan
> -1) {
888 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
893 mandoc_msg(MANDOCERR_ARGSLOST
, r
->parse
, ln
, pos
, NULL
);
896 roffnode_cleanscope(r
);
903 roff_block(ROFF_ARGS
)
911 if (ROFF_ig
!= tok
) {
912 if ('\0' == (*bufp
)[pos
]) {
913 mandoc_msg(MANDOCERR_NOARGS
, r
->parse
, ln
, ppos
, NULL
);
918 * Re-write `de1', since we don't really care about
919 * groff's strange compatibility mode, into `de'.
927 mandoc_msg(MANDOCERR_REQUEST
, r
->parse
, ln
, ppos
,
930 while ((*bufp
)[pos
] && ! isspace((unsigned char)(*bufp
)[pos
]))
933 while (isspace((unsigned char)(*bufp
)[pos
]))
934 (*bufp
)[pos
++] = '\0';
937 roffnode_push(r
, tok
, name
, ln
, ppos
);
940 * At the beginning of a `de' macro, clear the existing string
941 * with the same name, if there is one. New content will be
942 * added from roff_block_text() in multiline mode.
946 roff_setstr(r
, name
, "", 0);
948 if ('\0' == (*bufp
)[pos
])
951 /* If present, process the custom end-of-line marker. */
954 while ((*bufp
)[pos
] && ! isspace((unsigned char)(*bufp
)[pos
]))
958 * Note: groff does NOT like escape characters in the input.
959 * Instead of detecting this, we're just going to let it fly and
964 sz
= (size_t)(pos
- sv
);
966 if (1 == sz
&& '.' == (*bufp
)[sv
])
969 r
->last
->end
= mandoc_malloc(sz
+ 1);
971 memcpy(r
->last
->end
, *bufp
+ sv
, sz
);
972 r
->last
->end
[(int)sz
] = '\0';
975 mandoc_msg(MANDOCERR_ARGSLOST
, r
->parse
, ln
, pos
, NULL
);
983 roff_block_sub(ROFF_ARGS
)
989 * First check whether a custom macro exists at this level. If
990 * it does, then check against it. This is some of groff's
991 * stranger behaviours. If we encountered a custom end-scope
992 * tag and that tag also happens to be a "real" macro, then we
993 * need to try interpreting it again as a real macro. If it's
994 * not, then return ignore. Else continue.
998 for (i
= pos
, j
= 0; r
->last
->end
[j
]; j
++, i
++)
999 if ((*bufp
)[i
] != r
->last
->end
[j
])
1002 if ('\0' == r
->last
->end
[j
] &&
1003 ('\0' == (*bufp
)[i
] ||
1004 ' ' == (*bufp
)[i
] ||
1005 '\t' == (*bufp
)[i
])) {
1007 roffnode_cleanscope(r
);
1009 while (' ' == (*bufp
)[i
] || '\t' == (*bufp
)[i
])
1013 if (ROFF_MAX
!= roff_parse(r
, *bufp
, &pos
))
1020 * If we have no custom end-query or lookup failed, then try
1021 * pulling it out of the hashtable.
1024 t
= roff_parse(r
, *bufp
, &pos
);
1027 * Macros other than block-end are only significant
1028 * in `de' blocks; elsewhere, simply throw them away.
1030 if (ROFF_cblock
!= t
) {
1032 roff_setstr(r
, r
->last
->name
, *bufp
+ ppos
, 1);
1036 assert(roffs
[t
].proc
);
1037 return((*roffs
[t
].proc
)(r
, t
, bufp
, szp
,
1038 ln
, ppos
, pos
, offs
));
1044 roff_block_text(ROFF_ARGS
)
1048 roff_setstr(r
, r
->last
->name
, *bufp
+ pos
, 1);
1056 roff_cond_sub(ROFF_ARGS
)
1063 roffnode_cleanscope(r
);
1064 t
= roff_parse(r
, *bufp
, &pos
);
1067 * Fully handle known macros when they are structurally
1068 * required or when the conditional evaluated to true.
1071 if ((ROFF_MAX
!= t
) &&
1072 (ROFF_ccond
== t
|| ROFFRULE_ALLOW
== rr
||
1073 ROFFMAC_STRUCT
& roffs
[t
].flags
)) {
1074 assert(roffs
[t
].proc
);
1075 return((*roffs
[t
].proc
)(r
, t
, bufp
, szp
,
1076 ln
, ppos
, pos
, offs
));
1079 /* Always check for the closing delimiter `\}'. */
1082 while (NULL
!= (ep
= strchr(ep
, '\\'))) {
1087 * If we're at the end of line, then just chop
1088 * off the \} and resize the buffer.
1089 * If we aren't, then convert it to spaces.
1092 if ('\0' == *(ep
+ 1)) {
1096 *(ep
- 1) = *ep
= ' ';
1098 roff_ccond(r
, ROFF_ccond
, bufp
, szp
,
1099 ln
, pos
, pos
+ 2, offs
);
1102 return(ROFFRULE_DENY
== rr
? ROFF_IGN
: ROFF_CONT
);
1107 roff_cond_text(ROFF_ARGS
)
1113 roffnode_cleanscope(r
);
1116 for ( ; NULL
!= (ep
= strchr(ep
, '\\')); ep
++) {
1121 roff_ccond(r
, ROFF_ccond
, bufp
, szp
,
1122 ln
, pos
, pos
+ 2, offs
);
1124 return(ROFFRULE_DENY
== rr
? ROFF_IGN
: ROFF_CONT
);
1128 roff_getnum(const char *v
, int *pos
, int *res
)
1137 for (*res
= 0; isdigit((unsigned char)v
[p
]); p
++)
1138 *res
+= 10 * *res
+ v
[p
] - '0';
1150 roff_getop(const char *v
, int *pos
, char *res
)
1155 e
= v
[*pos
+ 1] == '=';
1177 static enum roffrule
1178 roff_evalcond(const char *v
, int *pos
)
1186 return(ROFFRULE_ALLOW
);
1193 return(ROFFRULE_DENY
);
1203 if (!roff_getnum(v
, pos
, &lh
))
1204 return ROFFRULE_DENY
;
1205 if (!roff_getop(v
, pos
, &op
)) {
1210 if (!roff_getnum(v
, pos
, &rh
))
1211 return ROFFRULE_DENY
;
1229 return ROFFRULE_DENY
;
1234 return lh
? ROFFRULE_ALLOW
: ROFFRULE_DENY
;
1239 roff_line_ignore(ROFF_ARGS
)
1247 roff_cond(ROFF_ARGS
)
1250 roffnode_push(r
, tok
, NULL
, ln
, ppos
);
1253 * An `.el' has no conditional body: it will consume the value
1254 * of the current rstack entry set in prior `ie' calls or
1257 * If we're not an `el', however, then evaluate the conditional.
1260 r
->last
->rule
= ROFF_el
== tok
?
1262 ROFFRULE_DENY
: r
->rstack
[r
->rstackpos
--]) :
1263 roff_evalcond(*bufp
, &pos
);
1266 * An if-else will put the NEGATION of the current evaluated
1267 * conditional into the stack of rules.
1270 if (ROFF_ie
== tok
) {
1271 if (r
->rstackpos
== RSTACK_MAX
- 1) {
1272 mandoc_msg(MANDOCERR_MEM
,
1273 r
->parse
, ln
, ppos
, NULL
);
1276 r
->rstack
[++r
->rstackpos
] =
1277 ROFFRULE_DENY
== r
->last
->rule
?
1278 ROFFRULE_ALLOW
: ROFFRULE_DENY
;
1281 /* If the parent has false as its rule, then so do we. */
1283 if (r
->last
->parent
&& ROFFRULE_DENY
== r
->last
->parent
->rule
)
1284 r
->last
->rule
= ROFFRULE_DENY
;
1288 * If there is nothing on the line after the conditional,
1289 * not even whitespace, use next-line scope.
1292 if ('\0' == (*bufp
)[pos
]) {
1293 r
->last
->endspan
= 2;
1297 while (' ' == (*bufp
)[pos
])
1300 /* An opening brace requests multiline scope. */
1302 if ('\\' == (*bufp
)[pos
] && '{' == (*bufp
)[pos
+ 1]) {
1303 r
->last
->endspan
= -1;
1309 * Anything else following the conditional causes
1310 * single-line scope. Warn if the scope contains
1311 * nothing but trailing whitespace.
1314 if ('\0' == (*bufp
)[pos
])
1315 mandoc_msg(MANDOCERR_NOARGS
, r
->parse
, ln
, ppos
, NULL
);
1317 r
->last
->endspan
= 1;
1329 char *name
, *string
;
1332 * A symbol is named by the first word following the macro
1333 * invocation up to a space. Its value is anything after the
1334 * name's trailing whitespace and optional double-quote. Thus,
1338 * will have `bar " ' as its value.
1341 string
= *bufp
+ pos
;
1342 name
= roff_getname(r
, &string
, ln
, pos
);
1346 /* Read past initial double-quote. */
1350 /* The rest is the value. */
1351 roff_setstr(r
, name
, string
, 0);
1356 roff_setreg(struct roff
*r
, const char *name
, int val
, char sign
)
1358 struct roffreg
*reg
;
1360 /* Search for an existing register with the same name. */
1363 while (reg
&& strcmp(name
, reg
->key
.p
))
1367 /* Create a new register. */
1368 reg
= mandoc_malloc(sizeof(struct roffreg
));
1369 reg
->key
.p
= mandoc_strdup(name
);
1370 reg
->key
.sz
= strlen(name
);
1372 reg
->next
= r
->regtab
;
1378 else if ('-' == sign
)
1385 * Handle some predefined read-only number registers.
1386 * For now, return -1 if the requested register is not predefined;
1387 * in case a predefined read-only register having the value -1
1388 * were to turn up, another special value would have to be chosen.
1391 roff_getregro(const char *name
)
1395 case ('A'): /* ASCII approximation mode is always off. */
1397 case ('g'): /* Groff compatibility mode is always on. */
1399 case ('H'): /* Fixed horizontal resolution. */
1401 case ('j'): /* Always adjust left margin only. */
1403 case ('T'): /* Some output device is always defined. */
1405 case ('V'): /* Fixed vertical resolution. */
1413 roff_getreg(const struct roff
*r
, const char *name
)
1415 struct roffreg
*reg
;
1418 if ('.' == name
[0] && '\0' != name
[1] && '\0' == name
[2]) {
1419 val
= roff_getregro(name
+ 1);
1424 for (reg
= r
->regtab
; reg
; reg
= reg
->next
)
1425 if (0 == strcmp(name
, reg
->key
.p
))
1432 roff_getregn(const struct roff
*r
, const char *name
, size_t len
)
1434 struct roffreg
*reg
;
1437 if ('.' == name
[0] && 2 == len
) {
1438 val
= roff_getregro(name
+ 1);
1443 for (reg
= r
->regtab
; reg
; reg
= reg
->next
)
1444 if (len
== reg
->key
.sz
&&
1445 0 == strncmp(name
, reg
->key
.p
, len
))
1452 roff_freereg(struct roffreg
*reg
)
1454 struct roffreg
*old_reg
;
1456 while (NULL
!= reg
) {
1475 key
= roff_getname(r
, &val
, ln
, pos
);
1478 if ('+' == sign
|| '-' == sign
)
1481 sz
= strspn(val
, "0123456789");
1482 iv
= sz
? mandoc_strntoi(val
, sz
, 10) : 0;
1484 roff_setreg(r
, key
, iv
, sign
);
1497 while ('\0' != *cp
) {
1498 name
= roff_getname(r
, &cp
, ln
, (int)(cp
- *bufp
));
1500 roff_setstr(r
, name
, NULL
, 0);
1513 /* Parse the number of lines. */
1515 len
= strcspn(cp
, " \t");
1517 if ((iv
= mandoc_strntoi(cp
, len
, 10)) <= 0) {
1518 mandoc_msg(MANDOCERR_NUMERIC
, r
->parse
,
1519 ln
, ppos
, *bufp
+ 1);
1524 /* Arm the input line trap. */
1526 roffit_macro
= mandoc_strdup(cp
);
1534 const char *const *cp
;
1536 if (0 == r
->quick
&& MPARSE_MDOC
!= r
->parsetype
)
1537 for (cp
= __mdoc_reserved
; *cp
; cp
++)
1538 roff_setstr(r
, *cp
, NULL
, 0);
1547 const char *const *cp
;
1549 if (0 == r
->quick
&& MPARSE_MDOC
!= r
->parsetype
)
1550 for (cp
= __man_reserved
; *cp
; cp
++)
1551 roff_setstr(r
, *cp
, NULL
, 0);
1562 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
1575 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
1577 tbl_restart(ppos
, ln
, r
->tbl
);
1584 roff_closeeqn(struct roff
*r
)
1587 return(r
->eqn
&& ROFF_EQN
== eqn_end(&r
->eqn
) ? 1 : 0);
1592 roff_openeqn(struct roff
*r
, const char *name
, int line
,
1593 int offs
, const char *buf
)
1598 assert(NULL
== r
->eqn
);
1599 e
= eqn_alloc(name
, offs
, line
, r
->parse
);
1602 r
->last_eqn
->next
= e
;
1604 r
->first_eqn
= r
->last_eqn
= e
;
1606 r
->eqn
= r
->last_eqn
= e
;
1610 eqn_read(&r
->eqn
, line
, buf
, offs
, &poff
);
1619 roff_openeqn(r
, *bufp
+ pos
, ln
, ppos
, NULL
);
1628 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
1636 struct tbl_node
*tbl
;
1639 mandoc_msg(MANDOCERR_SCOPEBROKEN
, r
->parse
, ln
, ppos
, NULL
);
1643 tbl
= tbl_alloc(ppos
, ln
, r
->parse
);
1646 r
->last_tbl
->next
= tbl
;
1648 r
->first_tbl
= r
->last_tbl
= tbl
;
1650 r
->tbl
= r
->last_tbl
= tbl
;
1662 if ('\0' == *p
|| '.' == (r
->control
= *p
++))
1666 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
, ln
, ppos
, NULL
);
1675 const char *p
, *first
, *second
;
1677 enum mandoc_esc esc
;
1682 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
, ln
, ppos
, NULL
);
1686 while ('\0' != *p
) {
1690 if ('\\' == *first
) {
1691 esc
= mandoc_escape(&p
, NULL
, NULL
);
1692 if (ESCAPE_ERROR
== esc
) {
1694 (MANDOCERR_BADESCAPE
, r
->parse
,
1695 ln
, (int)(p
- *bufp
), NULL
);
1698 fsz
= (size_t)(p
- first
);
1702 if ('\\' == *second
) {
1703 esc
= mandoc_escape(&p
, NULL
, NULL
);
1704 if (ESCAPE_ERROR
== esc
) {
1706 (MANDOCERR_BADESCAPE
, r
->parse
,
1707 ln
, (int)(p
- *bufp
), NULL
);
1710 ssz
= (size_t)(p
- second
);
1711 } else if ('\0' == *second
) {
1712 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
,
1713 ln
, (int)(p
- *bufp
), NULL
);
1719 roff_setstrn(&r
->xmbtab
, first
,
1720 fsz
, second
, ssz
, 0);
1724 if (NULL
== r
->xtab
)
1725 r
->xtab
= mandoc_calloc
1726 (128, sizeof(struct roffstr
));
1728 free(r
->xtab
[(int)*first
].p
);
1729 r
->xtab
[(int)*first
].p
= mandoc_strndup(second
, ssz
);
1730 r
->xtab
[(int)*first
].sz
= ssz
;
1742 mandoc_msg(MANDOCERR_SO
, r
->parse
, ln
, ppos
, NULL
);
1745 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1746 * opening anything that's not in our cwd or anything beneath
1747 * it. Thus, explicitly disallow traversing up the file-system
1748 * or using absolute paths.
1752 if ('/' == *name
|| strstr(name
, "../") || strstr(name
, "/..")) {
1753 mandoc_msg(MANDOCERR_SOPATH
, r
->parse
, ln
, pos
, NULL
);
1763 roff_userdef(ROFF_ARGS
)
1770 * Collect pointers to macro argument strings
1771 * and NUL-terminate them.
1774 for (i
= 0; i
< 9; i
++)
1775 arg
[i
] = '\0' == *cp
? "" :
1776 mandoc_getarg(r
->parse
, &cp
, ln
, &pos
);
1779 * Expand macro arguments.
1782 n1
= cp
= mandoc_strdup(r
->current_string
);
1783 while (NULL
!= (cp
= strstr(cp
, "\\$"))) {
1785 if (0 > i
|| 8 < i
) {
1786 /* Not an argument invocation. */
1791 *szp
= strlen(n1
) - 3 + strlen(arg
[i
]) + 1;
1792 n2
= mandoc_malloc(*szp
);
1794 strlcpy(n2
, n1
, (size_t)(cp
- n1
+ 1));
1795 strlcat(n2
, arg
[i
], *szp
);
1796 strlcat(n2
, cp
+ 3, *szp
);
1798 cp
= n2
+ (cp
- n1
);
1804 * Replace the macro invocation
1805 * by the expanded macro.
1810 *szp
= strlen(*bufp
) + 1;
1812 return(*szp
> 1 && '\n' == (*bufp
)[(int)*szp
- 2] ?
1813 ROFF_REPARSE
: ROFF_APPEND
);
1817 roff_getname(struct roff
*r
, char **cpp
, int ln
, int pos
)
1825 /* Read until end of name. */
1826 for (cp
= name
; '\0' != *cp
&& ' ' != *cp
; cp
++) {
1832 mandoc_msg(MANDOCERR_NAMESC
, r
->parse
, ln
, pos
, NULL
);
1837 /* Nil-terminate name. */
1841 /* Read past spaces. */
1850 * Store *string into the user-defined string called *name.
1851 * In multiline mode, append to an existing entry and append '\n';
1852 * else replace the existing entry, if there is one.
1853 * To clear an existing entry, call with (*r, *name, NULL, 0).
1856 roff_setstr(struct roff
*r
, const char *name
, const char *string
,
1860 roff_setstrn(&r
->strtab
, name
, strlen(name
), string
,
1861 string
? strlen(string
) : 0, multiline
);
1865 roff_setstrn(struct roffkv
**r
, const char *name
, size_t namesz
,
1866 const char *string
, size_t stringsz
, int multiline
)
1871 size_t oldch
, newch
;
1873 /* Search for an existing string with the same name. */
1876 while (n
&& strcmp(name
, n
->key
.p
))
1880 /* Create a new string table entry. */
1881 n
= mandoc_malloc(sizeof(struct roffkv
));
1882 n
->key
.p
= mandoc_strndup(name
, namesz
);
1888 } else if (0 == multiline
) {
1889 /* In multiline mode, append; else replace. */
1899 * One additional byte for the '\n' in multiline mode,
1900 * and one for the terminating '\0'.
1902 newch
= stringsz
+ (multiline
? 2u : 1u);
1904 if (NULL
== n
->val
.p
) {
1905 n
->val
.p
= mandoc_malloc(newch
);
1910 n
->val
.p
= mandoc_realloc(n
->val
.p
, oldch
+ newch
);
1913 /* Skip existing content in the destination buffer. */
1914 c
= n
->val
.p
+ (int)oldch
;
1916 /* Append new content to the destination buffer. */
1918 while (i
< (int)stringsz
) {
1920 * Rudimentary roff copy mode:
1921 * Handle escaped backslashes.
1923 if ('\\' == string
[i
] && '\\' == string
[i
+ 1])
1928 /* Append terminating bytes. */
1933 n
->val
.sz
= (int)(c
- n
->val
.p
);
1937 roff_getstrn(const struct roff
*r
, const char *name
, size_t len
)
1939 const struct roffkv
*n
;
1942 for (n
= r
->strtab
; n
; n
= n
->next
)
1943 if (0 == strncmp(name
, n
->key
.p
, len
) &&
1944 '\0' == n
->key
.p
[(int)len
])
1947 for (i
= 0; i
< PREDEFS_MAX
; i
++)
1948 if (0 == strncmp(name
, predefs
[i
].name
, len
) &&
1949 '\0' == predefs
[i
].name
[(int)len
])
1950 return(predefs
[i
].str
);
1956 roff_freestr(struct roffkv
*r
)
1958 struct roffkv
*n
, *nn
;
1960 for (n
= r
; n
; n
= nn
) {
1968 const struct tbl_span
*
1969 roff_span(const struct roff
*r
)
1972 return(r
->tbl
? tbl_span(r
->tbl
) : NULL
);
1976 roff_eqn(const struct roff
*r
)
1979 return(r
->last_eqn
? &r
->last_eqn
->eqn
: NULL
);
1983 * Duplicate an input string, making the appropriate character
1984 * conversations (as stipulated by `tr') along the way.
1985 * Returns a heap-allocated string with all the replacements made.
1988 roff_strdup(const struct roff
*r
, const char *p
)
1990 const struct roffkv
*cp
;
1994 enum mandoc_esc esc
;
1996 if (NULL
== r
->xmbtab
&& NULL
== r
->xtab
)
1997 return(mandoc_strdup(p
));
1998 else if ('\0' == *p
)
1999 return(mandoc_strdup(""));
2002 * Step through each character looking for term matches
2003 * (remember that a `tr' can be invoked with an escape, which is
2004 * a glyph but the escape is multi-character).
2005 * We only do this if the character hash has been initialised
2006 * and the string is >0 length.
2012 while ('\0' != *p
) {
2013 if ('\\' != *p
&& r
->xtab
&& r
->xtab
[(int)*p
].p
) {
2014 sz
= r
->xtab
[(int)*p
].sz
;
2015 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
2016 memcpy(res
+ ssz
, r
->xtab
[(int)*p
].p
, sz
);
2020 } else if ('\\' != *p
) {
2021 res
= mandoc_realloc(res
, ssz
+ 2);
2026 /* Search for term matches. */
2027 for (cp
= r
->xmbtab
; cp
; cp
= cp
->next
)
2028 if (0 == strncmp(p
, cp
->key
.p
, cp
->key
.sz
))
2033 * A match has been found.
2034 * Append the match to the array and move
2035 * forward by its keysize.
2037 res
= mandoc_realloc
2038 (res
, ssz
+ cp
->val
.sz
+ 1);
2039 memcpy(res
+ ssz
, cp
->val
.p
, cp
->val
.sz
);
2041 p
+= (int)cp
->key
.sz
;
2046 * Handle escapes carefully: we need to copy
2047 * over just the escape itself, or else we might
2048 * do replacements within the escape itself.
2049 * Make sure to pass along the bogus string.
2052 esc
= mandoc_escape(&p
, NULL
, NULL
);
2053 if (ESCAPE_ERROR
== esc
) {
2055 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
2056 memcpy(res
+ ssz
, pp
, sz
);
2060 * We bail out on bad escapes.
2061 * No need to warn: we already did so when
2062 * roff_res() was called.
2065 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
2066 memcpy(res
+ ssz
, pp
, sz
);
2070 res
[(int)ssz
] = '\0';
2075 * Find out whether a line is a macro line or not.
2076 * If it is, adjust the current position and return one; if it isn't,
2077 * return zero and don't change the current position.
2078 * If the control character has been set with `.cc', then let that grain
2080 * This is slighly contrary to groff, where using the non-breaking
2081 * control character when `cc' has been invoked will cause the
2082 * non-breaking macro contents to be printed verbatim.
2085 roff_getcontrol(const struct roff
*r
, const char *cp
, int *ppos
)
2091 if (0 != r
->control
&& cp
[pos
] == r
->control
)
2093 else if (0 != r
->control
)
2095 else if ('\\' == cp
[pos
] && '.' == cp
[pos
+ 1])
2097 else if ('.' == cp
[pos
] || '\'' == cp
[pos
])
2102 while (' ' == cp
[pos
] || '\t' == cp
[pos
])