]>
git.cameronkatri.com Git - mandoc.git/blob - roff.c
1 /* $Id: roff.c,v 1.178 2013/07/13 12:52:07 schwarze Exp $ */
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010, 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
30 #include "libmandoc.h"
32 /* Maximum number of nested if-else conditionals. */
33 #define RSTACK_MAX 128
35 /* Maximum number of string expansions per line, to break infinite loops. */
36 #define EXPAND_LIMIT 1000
82 * A single register entity. If "set" is zero, the value of the
83 * register should be the default one, which is per-register.
84 * Registers are assumed to be unsigned ints for now.
87 int set
; /* whether set or not */
88 unsigned int u
; /* unsigned integer */
92 * An incredibly-simple string buffer.
95 char *p
; /* nil-terminated buffer */
96 size_t sz
; /* saved strlen(p) */
100 * A key-value roffstr pair as part of a singly-linked list.
105 struct roffkv
*next
; /* next in list */
109 enum mparset parsetype
; /* requested parse type */
110 struct mparse
*parse
; /* parse point */
111 struct roffnode
*last
; /* leaf of stack */
112 enum roffrule rstack
[RSTACK_MAX
]; /* stack of !`ie' rules */
113 char control
; /* control character */
114 int rstackpos
; /* position in rstack */
115 struct reg regs
[REG__MAX
];
116 struct roffkv
*strtab
; /* user-defined strings & macros */
117 struct roffkv
*xmbtab
; /* multi-byte trans table (`tr') */
118 struct roffstr
*xtab
; /* single-byte trans table (`tr') */
119 const char *current_string
; /* value of last called user macro */
120 struct tbl_node
*first_tbl
; /* first table parsed */
121 struct tbl_node
*last_tbl
; /* last table parsed */
122 struct tbl_node
*tbl
; /* current table being parsed */
123 struct eqn_node
*last_eqn
; /* last equation parsed */
124 struct eqn_node
*first_eqn
; /* first equation parsed */
125 struct eqn_node
*eqn
; /* current equation being parsed */
129 enum rofft tok
; /* type of node */
130 struct roffnode
*parent
; /* up one in stack */
131 int line
; /* parse line */
132 int col
; /* parse col */
133 char *name
; /* node name, e.g. macro name */
134 char *end
; /* end-rules: custom token */
135 int endspan
; /* end-rules: next-line or infty */
136 enum roffrule rule
; /* current evaluation rule */
139 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
140 enum rofft tok, /* tok of macro */ \
141 char **bufp, /* input buffer */ \
142 size_t *szp, /* size of input buffer */ \
143 int ln, /* parse line */ \
144 int ppos, /* original pos in buffer */ \
145 int pos, /* current pos in buffer */ \
146 int *offs /* reset offset of buffer data */
148 typedef enum rofferr (*roffproc
)(ROFF_ARGS
);
151 const char *name
; /* macro name */
152 roffproc proc
; /* process new macro */
153 roffproc text
; /* process as child text of macro */
154 roffproc sub
; /* process as child of macro */
156 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
157 struct roffmac
*next
;
161 const char *name
; /* predefined input name */
162 const char *str
; /* replacement symbol */
165 #define PREDEF(__name, __str) \
166 { (__name), (__str) },
168 static enum rofft
roffhash_find(const char *, size_t);
169 static void roffhash_init(void);
170 static void roffnode_cleanscope(struct roff
*);
171 static void roffnode_pop(struct roff
*);
172 static void roffnode_push(struct roff
*, enum rofft
,
173 const char *, int, int);
174 static enum rofferr
roff_block(ROFF_ARGS
);
175 static enum rofferr
roff_block_text(ROFF_ARGS
);
176 static enum rofferr
roff_block_sub(ROFF_ARGS
);
177 static enum rofferr
roff_cblock(ROFF_ARGS
);
178 static enum rofferr
roff_cc(ROFF_ARGS
);
179 static enum rofferr
roff_ccond(ROFF_ARGS
);
180 static enum rofferr
roff_cond(ROFF_ARGS
);
181 static enum rofferr
roff_cond_text(ROFF_ARGS
);
182 static enum rofferr
roff_cond_sub(ROFF_ARGS
);
183 static enum rofferr
roff_ds(ROFF_ARGS
);
184 static enum roffrule
roff_evalcond(const char *, int *);
185 static void roff_free1(struct roff
*);
186 static void roff_freestr(struct roffkv
*);
187 static char *roff_getname(struct roff
*, char **, int, int);
188 static const char *roff_getstrn(const struct roff
*,
189 const char *, size_t);
190 static enum rofferr
roff_it(ROFF_ARGS
);
191 static enum rofferr
roff_line_ignore(ROFF_ARGS
);
192 static enum rofferr
roff_nr(ROFF_ARGS
);
193 static void roff_openeqn(struct roff
*, const char *,
194 int, int, const char *);
195 static enum rofft
roff_parse(struct roff
*, const char *, int *);
196 static enum rofferr
roff_parsetext(char **, size_t *, int, int *);
197 static enum rofferr
roff_res(struct roff
*,
198 char **, size_t *, int, int);
199 static enum rofferr
roff_rm(ROFF_ARGS
);
200 static void roff_setstr(struct roff
*,
201 const char *, const char *, int);
202 static void roff_setstrn(struct roffkv
**, const char *,
203 size_t, const char *, size_t, int);
204 static enum rofferr
roff_so(ROFF_ARGS
);
205 static enum rofferr
roff_tr(ROFF_ARGS
);
206 static enum rofferr
roff_Dd(ROFF_ARGS
);
207 static enum rofferr
roff_TH(ROFF_ARGS
);
208 static enum rofferr
roff_TE(ROFF_ARGS
);
209 static enum rofferr
roff_TS(ROFF_ARGS
);
210 static enum rofferr
roff_EQ(ROFF_ARGS
);
211 static enum rofferr
roff_EN(ROFF_ARGS
);
212 static enum rofferr
roff_T_(ROFF_ARGS
);
213 static enum rofferr
roff_userdef(ROFF_ARGS
);
215 /* See roffhash_find() */
219 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
221 static struct roffmac
*hash
[HASHWIDTH
];
223 static struct roffmac roffs
[ROFF_MAX
] = {
224 { "ad", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
225 { "am", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
226 { "ami", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
227 { "am1", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
228 { "cc", roff_cc
, NULL
, NULL
, 0, NULL
},
229 { "de", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
230 { "dei", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
231 { "de1", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
232 { "ds", roff_ds
, NULL
, NULL
, 0, NULL
},
233 { "el", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
234 { "hy", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
235 { "ie", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
236 { "if", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
237 { "ig", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
238 { "it", roff_it
, NULL
, NULL
, 0, NULL
},
239 { "ne", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
240 { "nh", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
241 { "nr", roff_nr
, NULL
, NULL
, 0, NULL
},
242 { "ns", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
243 { "ps", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
244 { "rm", roff_rm
, NULL
, NULL
, 0, NULL
},
245 { "so", roff_so
, NULL
, NULL
, 0, NULL
},
246 { "ta", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
247 { "tr", roff_tr
, NULL
, NULL
, 0, NULL
},
248 { "Dd", roff_Dd
, NULL
, NULL
, 0, NULL
},
249 { "TH", roff_TH
, NULL
, NULL
, 0, NULL
},
250 { "TS", roff_TS
, NULL
, NULL
, 0, NULL
},
251 { "TE", roff_TE
, NULL
, NULL
, 0, NULL
},
252 { "T&", roff_T_
, NULL
, NULL
, 0, NULL
},
253 { "EQ", roff_EQ
, NULL
, NULL
, 0, NULL
},
254 { "EN", roff_EN
, NULL
, NULL
, 0, NULL
},
255 { ".", roff_cblock
, NULL
, NULL
, 0, NULL
},
256 { "\\}", roff_ccond
, NULL
, NULL
, 0, NULL
},
257 { NULL
, roff_userdef
, NULL
, NULL
, 0, NULL
},
260 const char *const __mdoc_reserved
[] = {
261 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
262 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
263 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
264 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
265 "Ds", "Dt", "Dv", "Dx", "D1",
266 "Ec", "Ed", "Ef", "Ek", "El", "Em", "em",
267 "En", "Eo", "Eq", "Er", "Es", "Ev", "Ex",
268 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
269 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp", "LP",
270 "Me", "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
271 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
272 "Pa", "Pc", "Pf", "Po", "Pp", "PP", "pp", "Pq",
273 "Qc", "Ql", "Qo", "Qq", "Or", "Rd", "Re", "Rs", "Rv",
274 "Sc", "Sf", "Sh", "SH", "Sm", "So", "Sq",
275 "Ss", "St", "Sx", "Sy",
276 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
277 "%A", "%B", "%D", "%I", "%J", "%N", "%O",
278 "%P", "%Q", "%R", "%T", "%U", "%V",
282 const char *const __man_reserved
[] = {
283 "AT", "B", "BI", "BR", "BT", "DE", "DS", "DT",
284 "EE", "EN", "EQ", "EX", "HF", "HP", "I", "IB", "IP", "IR",
285 "LP", "ME", "MT", "OP", "P", "PD", "PP", "PT",
286 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS", "SY",
287 "TE", "TH", "TP", "TQ", "TS", "T&", "UC", "UE", "UR", "YS",
291 /* Array of injected predefined strings. */
292 #define PREDEFS_MAX 38
293 static const struct predef predefs
[PREDEFS_MAX
] = {
294 #include "predefs.in"
297 /* See roffhash_find() */
298 #define ROFF_HASH(p) (p[0] - ASCII_LO)
300 static int roffit_lines
; /* number of lines to delay */
301 static char *roffit_macro
; /* nil-terminated macro line */
309 for (i
= 0; i
< (int)ROFF_USERDEF
; i
++) {
310 assert(roffs
[i
].name
[0] >= ASCII_LO
);
311 assert(roffs
[i
].name
[0] <= ASCII_HI
);
313 buc
= ROFF_HASH(roffs
[i
].name
);
315 if (NULL
!= (n
= hash
[buc
])) {
316 for ( ; n
->next
; n
= n
->next
)
320 hash
[buc
] = &roffs
[i
];
325 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
326 * the nil-terminated string name could be found.
329 roffhash_find(const char *p
, size_t s
)
335 * libroff has an extremely simple hashtable, for the time
336 * being, which simply keys on the first character, which must
337 * be printable, then walks a chain. It works well enough until
341 if (p
[0] < ASCII_LO
|| p
[0] > ASCII_HI
)
346 if (NULL
== (n
= hash
[buc
]))
348 for ( ; n
; n
= n
->next
)
349 if (0 == strncmp(n
->name
, p
, s
) && '\0' == n
->name
[(int)s
])
350 return((enum rofft
)(n
- roffs
));
357 * Pop the current node off of the stack of roff instructions currently
361 roffnode_pop(struct roff
*r
)
368 r
->last
= r
->last
->parent
;
376 * Push a roff node onto the instruction stack. This must later be
377 * removed with roffnode_pop().
380 roffnode_push(struct roff
*r
, enum rofft tok
, const char *name
,
385 p
= mandoc_calloc(1, sizeof(struct roffnode
));
388 p
->name
= mandoc_strdup(name
);
392 p
->rule
= p
->parent
? p
->parent
->rule
: ROFFRULE_DENY
;
399 roff_free1(struct roff
*r
)
401 struct tbl_node
*tbl
;
405 while (NULL
!= (tbl
= r
->first_tbl
)) {
406 r
->first_tbl
= tbl
->next
;
410 r
->first_tbl
= r
->last_tbl
= r
->tbl
= NULL
;
412 while (NULL
!= (e
= r
->first_eqn
)) {
413 r
->first_eqn
= e
->next
;
417 r
->first_eqn
= r
->last_eqn
= r
->eqn
= NULL
;
422 roff_freestr(r
->strtab
);
423 roff_freestr(r
->xmbtab
);
425 r
->strtab
= r
->xmbtab
= NULL
;
428 for (i
= 0; i
< 128; i
++)
436 roff_reset(struct roff
*r
)
443 memset(&r
->regs
, 0, sizeof(struct reg
) * REG__MAX
);
445 for (i
= 0; i
< PREDEFS_MAX
; i
++)
446 roff_setstr(r
, predefs
[i
].name
, predefs
[i
].str
, 0);
451 roff_free(struct roff
*r
)
460 roff_alloc(enum mparset type
, struct mparse
*parse
)
465 r
= mandoc_calloc(1, sizeof(struct roff
));
472 for (i
= 0; i
< PREDEFS_MAX
; i
++)
473 roff_setstr(r
, predefs
[i
].name
, predefs
[i
].str
, 0);
479 * Pre-filter each and every line for reserved words (one beginning with
480 * `\*', e.g., `\*(ab'). These must be handled before the actual line
482 * This also checks the syntax of regular escapes.
485 roff_res(struct roff
*r
, char **bufp
, size_t *szp
, int ln
, int pos
)
488 const char *stesc
; /* start of an escape sequence ('\\') */
489 const char *stnam
; /* start of the name, after "[(*" */
490 const char *cp
; /* end of the name, e.g. before ']' */
491 const char *res
; /* the string to be substituted */
492 int i
, maxl
, expand_count
;
500 while (NULL
!= (cp
= strchr(cp
, '\\'))) {
504 * The second character must be an asterisk.
505 * If it isn't, skip it anyway: It is escaped,
506 * so it can't start another escape sequence.
514 esc
= mandoc_escape(&cp
, NULL
, NULL
);
515 if (ESCAPE_ERROR
!= esc
)
519 (MANDOCERR_BADESCAPE
, r
->parse
,
520 ln
, (int)(stesc
- *bufp
), NULL
);
527 * The third character decides the length
528 * of the name of the string.
529 * Save a pointer to the name.
549 /* Advance to the end of the name. */
551 for (i
= 0; 0 == maxl
|| i
< maxl
; i
++, cp
++) {
554 (MANDOCERR_BADESCAPE
,
556 (int)(stesc
- *bufp
), NULL
);
559 if (0 == maxl
&& ']' == *cp
)
564 * Retrieve the replacement string; if it is
565 * undefined, resume searching for escapes.
568 res
= roff_getstrn(r
, stnam
, (size_t)i
);
572 (MANDOCERR_BADESCAPE
, r
->parse
,
573 ln
, (int)(stesc
- *bufp
), NULL
);
577 /* Replace the escape sequence by the string. */
581 nsz
= *szp
+ strlen(res
) + 1;
582 n
= mandoc_malloc(nsz
);
584 strlcpy(n
, *bufp
, (size_t)(stesc
- *bufp
+ 1));
585 strlcat(n
, res
, nsz
);
586 strlcat(n
, cp
+ (maxl
? 0 : 1), nsz
);
593 if (EXPAND_LIMIT
>= ++expand_count
)
596 /* Just leave the string unexpanded. */
597 mandoc_msg(MANDOCERR_ROFFLOOP
, r
->parse
, ln
, pos
, NULL
);
604 * Process text streams:
605 * Convert all breakable hyphens into ASCII_HYPH.
606 * Decrement and spring input line trap.
609 roff_parsetext(char **bufp
, size_t *szp
, int pos
, int *offs
)
617 start
= p
= *bufp
+ pos
;
620 sz
= strcspn(p
, "-\\");
627 /* Skip over escapes. */
630 ((const char **)&p
, NULL
, NULL
);
631 if (ESCAPE_ERROR
== esc
)
634 } else if (p
== start
) {
639 if (isalpha((unsigned char)p
[-1]) &&
640 isalpha((unsigned char)p
[1]))
645 /* Spring the input line trap. */
646 if (1 == roffit_lines
) {
647 isz
= asprintf(&p
, "%s\n.%s", *bufp
, roffit_macro
);
650 exit((int)MANDOCLEVEL_SYSERR
);
658 return(ROFF_REPARSE
);
659 } else if (1 < roffit_lines
)
665 roff_parseln(struct roff
*r
, int ln
, char **bufp
,
666 size_t *szp
, int pos
, int *offs
)
673 * Run the reserved-word filter only if we have some reserved
677 e
= roff_res(r
, bufp
, szp
, ln
, pos
);
680 assert(ROFF_CONT
== e
);
683 ctl
= roff_getcontrol(r
, *bufp
, &pos
);
686 * First, if a scope is open and we're not a macro, pass the
687 * text through the macro's filter. If a scope isn't open and
688 * we're not a macro, just let it through.
689 * Finally, if there's an equation scope open, divert it into it
690 * no matter our state.
693 if (r
->last
&& ! ctl
) {
695 assert(roffs
[t
].text
);
697 (r
, t
, bufp
, szp
, ln
, pos
, pos
, offs
);
698 assert(ROFF_IGN
== e
|| ROFF_CONT
== e
);
702 return(eqn_read(&r
->eqn
, ln
, *bufp
, pos
, offs
));
704 return(tbl_read(r
->tbl
, ln
, *bufp
, pos
));
705 return(roff_parsetext(bufp
, szp
, pos
, offs
));
708 return(eqn_read(&r
->eqn
, ln
, *bufp
, pos
, offs
));
710 return(tbl_read(r
->tbl
, ln
, *bufp
, pos
));
711 return(roff_parsetext(bufp
, szp
, pos
, offs
));
713 return(eqn_read(&r
->eqn
, ln
, *bufp
, ppos
, offs
));
716 * If a scope is open, go to the child handler for that macro,
717 * as it may want to preprocess before doing anything with it.
718 * Don't do so if an equation is open.
723 assert(roffs
[t
].sub
);
724 return((*roffs
[t
].sub
)
726 ln
, ppos
, pos
, offs
));
730 * Lastly, as we've no scope open, try to look up and execute
731 * the new macro. If no macro is found, simply return and let
732 * the compilers handle it.
735 if (ROFF_MAX
== (t
= roff_parse(r
, *bufp
, &pos
)))
738 assert(roffs
[t
].proc
);
739 return((*roffs
[t
].proc
)
741 ln
, ppos
, pos
, offs
));
746 roff_endparse(struct roff
*r
)
750 mandoc_msg(MANDOCERR_SCOPEEXIT
, r
->parse
,
751 r
->last
->line
, r
->last
->col
, NULL
);
754 mandoc_msg(MANDOCERR_SCOPEEXIT
, r
->parse
,
755 r
->eqn
->eqn
.ln
, r
->eqn
->eqn
.pos
, NULL
);
760 mandoc_msg(MANDOCERR_SCOPEEXIT
, r
->parse
,
761 r
->tbl
->line
, r
->tbl
->pos
, NULL
);
767 * Parse a roff node's type from the input buffer. This must be in the
768 * form of ".foo xxx" in the usual way.
771 roff_parse(struct roff
*r
, const char *buf
, int *pos
)
777 if ('\0' == buf
[*pos
] || '"' == buf
[*pos
] ||
778 '\t' == buf
[*pos
] || ' ' == buf
[*pos
])
782 * We stop the macro parse at an escape, tab, space, or nil.
783 * However, `\}' is also a valid macro, so make sure we don't
784 * clobber it by seeing the `\' as the end of token.
788 maclen
= strcspn(mac
+ 1, " \\\t\0") + 1;
790 t
= (r
->current_string
= roff_getstrn(r
, mac
, maclen
))
791 ? ROFF_USERDEF
: roffhash_find(mac
, maclen
);
795 while (buf
[*pos
] && ' ' == buf
[*pos
])
803 roff_cblock(ROFF_ARGS
)
807 * A block-close `..' should only be invoked as a child of an
808 * ignore macro, otherwise raise a warning and just ignore it.
811 if (NULL
== r
->last
) {
812 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
816 switch (r
->last
->tok
) {
824 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
831 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
836 mandoc_msg(MANDOCERR_ARGSLOST
, r
->parse
, ln
, pos
, NULL
);
839 roffnode_cleanscope(r
);
846 roffnode_cleanscope(struct roff
*r
)
850 if (--r
->last
->endspan
!= 0)
859 roff_ccond(ROFF_ARGS
)
862 if (NULL
== r
->last
) {
863 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
867 switch (r
->last
->tok
) {
875 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
879 if (r
->last
->endspan
> -1) {
880 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
885 mandoc_msg(MANDOCERR_ARGSLOST
, r
->parse
, ln
, pos
, NULL
);
888 roffnode_cleanscope(r
);
895 roff_block(ROFF_ARGS
)
903 if (ROFF_ig
!= tok
) {
904 if ('\0' == (*bufp
)[pos
]) {
905 mandoc_msg(MANDOCERR_NOARGS
, r
->parse
, ln
, ppos
, NULL
);
910 * Re-write `de1', since we don't really care about
911 * groff's strange compatibility mode, into `de'.
919 mandoc_msg(MANDOCERR_REQUEST
, r
->parse
, ln
, ppos
,
922 while ((*bufp
)[pos
] && ! isspace((unsigned char)(*bufp
)[pos
]))
925 while (isspace((unsigned char)(*bufp
)[pos
]))
926 (*bufp
)[pos
++] = '\0';
929 roffnode_push(r
, tok
, name
, ln
, ppos
);
932 * At the beginning of a `de' macro, clear the existing string
933 * with the same name, if there is one. New content will be
934 * added from roff_block_text() in multiline mode.
938 roff_setstr(r
, name
, "", 0);
940 if ('\0' == (*bufp
)[pos
])
943 /* If present, process the custom end-of-line marker. */
946 while ((*bufp
)[pos
] && ! isspace((unsigned char)(*bufp
)[pos
]))
950 * Note: groff does NOT like escape characters in the input.
951 * Instead of detecting this, we're just going to let it fly and
956 sz
= (size_t)(pos
- sv
);
958 if (1 == sz
&& '.' == (*bufp
)[sv
])
961 r
->last
->end
= mandoc_malloc(sz
+ 1);
963 memcpy(r
->last
->end
, *bufp
+ sv
, sz
);
964 r
->last
->end
[(int)sz
] = '\0';
967 mandoc_msg(MANDOCERR_ARGSLOST
, r
->parse
, ln
, pos
, NULL
);
975 roff_block_sub(ROFF_ARGS
)
981 * First check whether a custom macro exists at this level. If
982 * it does, then check against it. This is some of groff's
983 * stranger behaviours. If we encountered a custom end-scope
984 * tag and that tag also happens to be a "real" macro, then we
985 * need to try interpreting it again as a real macro. If it's
986 * not, then return ignore. Else continue.
990 for (i
= pos
, j
= 0; r
->last
->end
[j
]; j
++, i
++)
991 if ((*bufp
)[i
] != r
->last
->end
[j
])
994 if ('\0' == r
->last
->end
[j
] &&
995 ('\0' == (*bufp
)[i
] ||
997 '\t' == (*bufp
)[i
])) {
999 roffnode_cleanscope(r
);
1001 while (' ' == (*bufp
)[i
] || '\t' == (*bufp
)[i
])
1005 if (ROFF_MAX
!= roff_parse(r
, *bufp
, &pos
))
1012 * If we have no custom end-query or lookup failed, then try
1013 * pulling it out of the hashtable.
1016 t
= roff_parse(r
, *bufp
, &pos
);
1019 * Macros other than block-end are only significant
1020 * in `de' blocks; elsewhere, simply throw them away.
1022 if (ROFF_cblock
!= t
) {
1024 roff_setstr(r
, r
->last
->name
, *bufp
+ ppos
, 1);
1028 assert(roffs
[t
].proc
);
1029 return((*roffs
[t
].proc
)(r
, t
, bufp
, szp
,
1030 ln
, ppos
, pos
, offs
));
1036 roff_block_text(ROFF_ARGS
)
1040 roff_setstr(r
, r
->last
->name
, *bufp
+ pos
, 1);
1048 roff_cond_sub(ROFF_ARGS
)
1055 roffnode_cleanscope(r
);
1056 t
= roff_parse(r
, *bufp
, &pos
);
1059 * Fully handle known macros when they are structurally
1060 * required or when the conditional evaluated to true.
1063 if ((ROFF_MAX
!= t
) &&
1064 (ROFF_ccond
== t
|| ROFFRULE_ALLOW
== rr
||
1065 ROFFMAC_STRUCT
& roffs
[t
].flags
)) {
1066 assert(roffs
[t
].proc
);
1067 return((*roffs
[t
].proc
)(r
, t
, bufp
, szp
,
1068 ln
, ppos
, pos
, offs
));
1071 /* Always check for the closing delimiter `\}'. */
1074 while (NULL
!= (ep
= strchr(ep
, '\\'))) {
1079 * If we're at the end of line, then just chop
1080 * off the \} and resize the buffer.
1081 * If we aren't, then convert it to spaces.
1084 if ('\0' == *(ep
+ 1)) {
1088 *(ep
- 1) = *ep
= ' ';
1090 roff_ccond(r
, ROFF_ccond
, bufp
, szp
,
1091 ln
, pos
, pos
+ 2, offs
);
1094 return(ROFFRULE_DENY
== rr
? ROFF_IGN
: ROFF_CONT
);
1099 roff_cond_text(ROFF_ARGS
)
1105 roffnode_cleanscope(r
);
1108 for ( ; NULL
!= (ep
= strchr(ep
, '\\')); ep
++) {
1113 roff_ccond(r
, ROFF_ccond
, bufp
, szp
,
1114 ln
, pos
, pos
+ 2, offs
);
1116 return(ROFFRULE_DENY
== rr
? ROFF_IGN
: ROFF_CONT
);
1119 static enum roffrule
1120 roff_evalcond(const char *v
, int *pos
)
1126 return(ROFFRULE_ALLOW
);
1133 return(ROFFRULE_DENY
);
1138 while (v
[*pos
] && ' ' != v
[*pos
])
1140 return(ROFFRULE_DENY
);
1145 roff_line_ignore(ROFF_ARGS
)
1153 roff_cond(ROFF_ARGS
)
1156 roffnode_push(r
, tok
, NULL
, ln
, ppos
);
1159 * An `.el' has no conditional body: it will consume the value
1160 * of the current rstack entry set in prior `ie' calls or
1163 * If we're not an `el', however, then evaluate the conditional.
1166 r
->last
->rule
= ROFF_el
== tok
?
1168 ROFFRULE_DENY
: r
->rstack
[r
->rstackpos
--]) :
1169 roff_evalcond(*bufp
, &pos
);
1172 * An if-else will put the NEGATION of the current evaluated
1173 * conditional into the stack of rules.
1176 if (ROFF_ie
== tok
) {
1177 if (r
->rstackpos
== RSTACK_MAX
- 1) {
1178 mandoc_msg(MANDOCERR_MEM
,
1179 r
->parse
, ln
, ppos
, NULL
);
1182 r
->rstack
[++r
->rstackpos
] =
1183 ROFFRULE_DENY
== r
->last
->rule
?
1184 ROFFRULE_ALLOW
: ROFFRULE_DENY
;
1187 /* If the parent has false as its rule, then so do we. */
1189 if (r
->last
->parent
&& ROFFRULE_DENY
== r
->last
->parent
->rule
)
1190 r
->last
->rule
= ROFFRULE_DENY
;
1194 * If there is nothing on the line after the conditional,
1195 * not even whitespace, use next-line scope.
1198 if ('\0' == (*bufp
)[pos
]) {
1199 r
->last
->endspan
= 2;
1203 while (' ' == (*bufp
)[pos
])
1206 /* An opening brace requests multiline scope. */
1208 if ('\\' == (*bufp
)[pos
] && '{' == (*bufp
)[pos
+ 1]) {
1209 r
->last
->endspan
= -1;
1215 * Anything else following the conditional causes
1216 * single-line scope. Warn if the scope contains
1217 * nothing but trailing whitespace.
1220 if ('\0' == (*bufp
)[pos
])
1221 mandoc_msg(MANDOCERR_NOARGS
, r
->parse
, ln
, ppos
, NULL
);
1223 r
->last
->endspan
= 1;
1235 char *name
, *string
;
1238 * A symbol is named by the first word following the macro
1239 * invocation up to a space. Its value is anything after the
1240 * name's trailing whitespace and optional double-quote. Thus,
1244 * will have `bar " ' as its value.
1247 string
= *bufp
+ pos
;
1248 name
= roff_getname(r
, &string
, ln
, pos
);
1252 /* Read past initial double-quote. */
1256 /* The rest is the value. */
1257 roff_setstr(r
, name
, string
, 0);
1262 roff_regisset(const struct roff
*r
, enum regs reg
)
1265 return(r
->regs
[(int)reg
].set
);
1269 roff_regget(const struct roff
*r
, enum regs reg
)
1272 return(r
->regs
[(int)reg
].u
);
1276 roff_regunset(struct roff
*r
, enum regs reg
)
1279 r
->regs
[(int)reg
].set
= 0;
1291 key
= roff_getname(r
, &val
, ln
, pos
);
1293 if (0 == strcmp(key
, "nS")) {
1294 r
->regs
[(int)REG_nS
].set
= 1;
1295 if ((iv
= mandoc_strntoi(val
, strlen(val
), 10)) >= 0)
1296 r
->regs
[(int)REG_nS
].u
= (unsigned)iv
;
1298 r
->regs
[(int)REG_nS
].u
= 0u;
1312 while ('\0' != *cp
) {
1313 name
= roff_getname(r
, &cp
, ln
, (int)(cp
- *bufp
));
1315 roff_setstr(r
, name
, NULL
, 0);
1328 /* Parse the number of lines. */
1330 len
= strcspn(cp
, " \t");
1332 if ((iv
= mandoc_strntoi(cp
, len
, 10)) <= 0) {
1333 mandoc_msg(MANDOCERR_NUMERIC
, r
->parse
,
1334 ln
, ppos
, *bufp
+ 1);
1339 /* Arm the input line trap. */
1341 roffit_macro
= mandoc_strdup(cp
);
1349 const char *const *cp
;
1351 if (MPARSE_MDOC
!= r
->parsetype
)
1352 for (cp
= __mdoc_reserved
; *cp
; cp
++)
1353 roff_setstr(r
, *cp
, NULL
, 0);
1362 const char *const *cp
;
1364 if (MPARSE_MDOC
!= r
->parsetype
)
1365 for (cp
= __man_reserved
; *cp
; cp
++)
1366 roff_setstr(r
, *cp
, NULL
, 0);
1377 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
1390 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
1392 tbl_restart(ppos
, ln
, r
->tbl
);
1399 roff_closeeqn(struct roff
*r
)
1402 return(r
->eqn
&& ROFF_EQN
== eqn_end(&r
->eqn
) ? 1 : 0);
1407 roff_openeqn(struct roff
*r
, const char *name
, int line
,
1408 int offs
, const char *buf
)
1413 assert(NULL
== r
->eqn
);
1414 e
= eqn_alloc(name
, offs
, line
, r
->parse
);
1417 r
->last_eqn
->next
= e
;
1419 r
->first_eqn
= r
->last_eqn
= e
;
1421 r
->eqn
= r
->last_eqn
= e
;
1425 eqn_read(&r
->eqn
, line
, buf
, offs
, &poff
);
1434 roff_openeqn(r
, *bufp
+ pos
, ln
, ppos
, NULL
);
1443 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
1451 struct tbl_node
*tbl
;
1454 mandoc_msg(MANDOCERR_SCOPEBROKEN
, r
->parse
, ln
, ppos
, NULL
);
1458 tbl
= tbl_alloc(ppos
, ln
, r
->parse
);
1461 r
->last_tbl
->next
= tbl
;
1463 r
->first_tbl
= r
->last_tbl
= tbl
;
1465 r
->tbl
= r
->last_tbl
= tbl
;
1477 if ('\0' == *p
|| '.' == (r
->control
= *p
++))
1481 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
, ln
, ppos
, NULL
);
1490 const char *p
, *first
, *second
;
1492 enum mandoc_esc esc
;
1497 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
, ln
, ppos
, NULL
);
1501 while ('\0' != *p
) {
1505 if ('\\' == *first
) {
1506 esc
= mandoc_escape(&p
, NULL
, NULL
);
1507 if (ESCAPE_ERROR
== esc
) {
1509 (MANDOCERR_BADESCAPE
, r
->parse
,
1510 ln
, (int)(p
- *bufp
), NULL
);
1513 fsz
= (size_t)(p
- first
);
1517 if ('\\' == *second
) {
1518 esc
= mandoc_escape(&p
, NULL
, NULL
);
1519 if (ESCAPE_ERROR
== esc
) {
1521 (MANDOCERR_BADESCAPE
, r
->parse
,
1522 ln
, (int)(p
- *bufp
), NULL
);
1525 ssz
= (size_t)(p
- second
);
1526 } else if ('\0' == *second
) {
1527 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
,
1528 ln
, (int)(p
- *bufp
), NULL
);
1534 roff_setstrn(&r
->xmbtab
, first
,
1535 fsz
, second
, ssz
, 0);
1539 if (NULL
== r
->xtab
)
1540 r
->xtab
= mandoc_calloc
1541 (128, sizeof(struct roffstr
));
1543 free(r
->xtab
[(int)*first
].p
);
1544 r
->xtab
[(int)*first
].p
= mandoc_strndup(second
, ssz
);
1545 r
->xtab
[(int)*first
].sz
= ssz
;
1557 mandoc_msg(MANDOCERR_SO
, r
->parse
, ln
, ppos
, NULL
);
1560 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1561 * opening anything that's not in our cwd or anything beneath
1562 * it. Thus, explicitly disallow traversing up the file-system
1563 * or using absolute paths.
1567 if ('/' == *name
|| strstr(name
, "../") || strstr(name
, "/..")) {
1568 mandoc_msg(MANDOCERR_SOPATH
, r
->parse
, ln
, pos
, NULL
);
1578 roff_userdef(ROFF_ARGS
)
1585 * Collect pointers to macro argument strings
1586 * and null-terminate them.
1589 for (i
= 0; i
< 9; i
++)
1590 arg
[i
] = '\0' == *cp
? "" :
1591 mandoc_getarg(r
->parse
, &cp
, ln
, &pos
);
1594 * Expand macro arguments.
1597 n1
= cp
= mandoc_strdup(r
->current_string
);
1598 while (NULL
!= (cp
= strstr(cp
, "\\$"))) {
1600 if (0 > i
|| 8 < i
) {
1601 /* Not an argument invocation. */
1606 *szp
= strlen(n1
) - 3 + strlen(arg
[i
]) + 1;
1607 n2
= mandoc_malloc(*szp
);
1609 strlcpy(n2
, n1
, (size_t)(cp
- n1
+ 1));
1610 strlcat(n2
, arg
[i
], *szp
);
1611 strlcat(n2
, cp
+ 3, *szp
);
1613 cp
= n2
+ (cp
- n1
);
1619 * Replace the macro invocation
1620 * by the expanded macro.
1625 *szp
= strlen(*bufp
) + 1;
1627 return(*szp
> 1 && '\n' == (*bufp
)[(int)*szp
- 2] ?
1628 ROFF_REPARSE
: ROFF_APPEND
);
1632 roff_getname(struct roff
*r
, char **cpp
, int ln
, int pos
)
1640 /* Read until end of name. */
1641 for (cp
= name
; '\0' != *cp
&& ' ' != *cp
; cp
++) {
1647 mandoc_msg(MANDOCERR_NAMESC
, r
->parse
, ln
, pos
, NULL
);
1652 /* Nil-terminate name. */
1656 /* Read past spaces. */
1665 * Store *string into the user-defined string called *name.
1666 * In multiline mode, append to an existing entry and append '\n';
1667 * else replace the existing entry, if there is one.
1668 * To clear an existing entry, call with (*r, *name, NULL, 0).
1671 roff_setstr(struct roff
*r
, const char *name
, const char *string
,
1675 roff_setstrn(&r
->strtab
, name
, strlen(name
), string
,
1676 string
? strlen(string
) : 0, multiline
);
1680 roff_setstrn(struct roffkv
**r
, const char *name
, size_t namesz
,
1681 const char *string
, size_t stringsz
, int multiline
)
1686 size_t oldch
, newch
;
1688 /* Search for an existing string with the same name. */
1691 while (n
&& strcmp(name
, n
->key
.p
))
1695 /* Create a new string table entry. */
1696 n
= mandoc_malloc(sizeof(struct roffkv
));
1697 n
->key
.p
= mandoc_strndup(name
, namesz
);
1703 } else if (0 == multiline
) {
1704 /* In multiline mode, append; else replace. */
1714 * One additional byte for the '\n' in multiline mode,
1715 * and one for the terminating '\0'.
1717 newch
= stringsz
+ (multiline
? 2u : 1u);
1719 if (NULL
== n
->val
.p
) {
1720 n
->val
.p
= mandoc_malloc(newch
);
1725 n
->val
.p
= mandoc_realloc(n
->val
.p
, oldch
+ newch
);
1728 /* Skip existing content in the destination buffer. */
1729 c
= n
->val
.p
+ (int)oldch
;
1731 /* Append new content to the destination buffer. */
1733 while (i
< (int)stringsz
) {
1735 * Rudimentary roff copy mode:
1736 * Handle escaped backslashes.
1738 if ('\\' == string
[i
] && '\\' == string
[i
+ 1])
1743 /* Append terminating bytes. */
1748 n
->val
.sz
= (int)(c
- n
->val
.p
);
1752 roff_getstrn(const struct roff
*r
, const char *name
, size_t len
)
1754 const struct roffkv
*n
;
1756 for (n
= r
->strtab
; n
; n
= n
->next
)
1757 if (0 == strncmp(name
, n
->key
.p
, len
) &&
1758 '\0' == n
->key
.p
[(int)len
])
1765 roff_freestr(struct roffkv
*r
)
1767 struct roffkv
*n
, *nn
;
1769 for (n
= r
; n
; n
= nn
) {
1777 const struct tbl_span
*
1778 roff_span(const struct roff
*r
)
1781 return(r
->tbl
? tbl_span(r
->tbl
) : NULL
);
1785 roff_eqn(const struct roff
*r
)
1788 return(r
->last_eqn
? &r
->last_eqn
->eqn
: NULL
);
1792 * Duplicate an input string, making the appropriate character
1793 * conversations (as stipulated by `tr') along the way.
1794 * Returns a heap-allocated string with all the replacements made.
1797 roff_strdup(const struct roff
*r
, const char *p
)
1799 const struct roffkv
*cp
;
1803 enum mandoc_esc esc
;
1805 if (NULL
== r
->xmbtab
&& NULL
== r
->xtab
)
1806 return(mandoc_strdup(p
));
1807 else if ('\0' == *p
)
1808 return(mandoc_strdup(""));
1811 * Step through each character looking for term matches
1812 * (remember that a `tr' can be invoked with an escape, which is
1813 * a glyph but the escape is multi-character).
1814 * We only do this if the character hash has been initialised
1815 * and the string is >0 length.
1821 while ('\0' != *p
) {
1822 if ('\\' != *p
&& r
->xtab
&& r
->xtab
[(int)*p
].p
) {
1823 sz
= r
->xtab
[(int)*p
].sz
;
1824 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
1825 memcpy(res
+ ssz
, r
->xtab
[(int)*p
].p
, sz
);
1829 } else if ('\\' != *p
) {
1830 res
= mandoc_realloc(res
, ssz
+ 2);
1835 /* Search for term matches. */
1836 for (cp
= r
->xmbtab
; cp
; cp
= cp
->next
)
1837 if (0 == strncmp(p
, cp
->key
.p
, cp
->key
.sz
))
1842 * A match has been found.
1843 * Append the match to the array and move
1844 * forward by its keysize.
1846 res
= mandoc_realloc
1847 (res
, ssz
+ cp
->val
.sz
+ 1);
1848 memcpy(res
+ ssz
, cp
->val
.p
, cp
->val
.sz
);
1850 p
+= (int)cp
->key
.sz
;
1855 * Handle escapes carefully: we need to copy
1856 * over just the escape itself, or else we might
1857 * do replacements within the escape itself.
1858 * Make sure to pass along the bogus string.
1861 esc
= mandoc_escape(&p
, NULL
, NULL
);
1862 if (ESCAPE_ERROR
== esc
) {
1864 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
1865 memcpy(res
+ ssz
, pp
, sz
);
1869 * We bail out on bad escapes.
1870 * No need to warn: we already did so when
1871 * roff_res() was called.
1874 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
1875 memcpy(res
+ ssz
, pp
, sz
);
1879 res
[(int)ssz
] = '\0';
1884 * Find out whether a line is a macro line or not.
1885 * If it is, adjust the current position and return one; if it isn't,
1886 * return zero and don't change the current position.
1887 * If the control character has been set with `.cc', then let that grain
1889 * This is slighly contrary to groff, where using the non-breaking
1890 * control character when `cc' has been invoked will cause the
1891 * non-breaking macro contents to be printed verbatim.
1894 roff_getcontrol(const struct roff
*r
, const char *cp
, int *ppos
)
1900 if (0 != r
->control
&& cp
[pos
] == r
->control
)
1902 else if (0 != r
->control
)
1904 else if ('\\' == cp
[pos
] && '.' == cp
[pos
+ 1])
1906 else if ('.' == cp
[pos
] || '\'' == cp
[pos
])
1911 while (' ' == cp
[pos
] || '\t' == cp
[pos
])