]>
git.cameronkatri.com Git - mandoc.git/blob - roff.c
1 /* $Id: roff.c,v 1.188 2013/12/25 00:50:05 schwarze Exp $ */
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010, 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
30 #include "libmandoc.h"
32 /* Maximum number of nested if-else conditionals. */
33 #define RSTACK_MAX 128
35 /* Maximum number of string expansions per line, to break infinite loops. */
36 #define EXPAND_LIMIT 1000
84 * An incredibly-simple string buffer.
87 char *p
; /* nil-terminated buffer */
88 size_t sz
; /* saved strlen(p) */
92 * A key-value roffstr pair as part of a singly-linked list.
97 struct roffkv
*next
; /* next in list */
101 * A single number register as part of a singly-linked list.
106 struct roffreg
*next
;
110 enum mparset parsetype
; /* requested parse type */
111 struct mparse
*parse
; /* parse point */
112 struct roffnode
*last
; /* leaf of stack */
113 enum roffrule rstack
[RSTACK_MAX
]; /* stack of !`ie' rules */
114 char control
; /* control character */
115 int rstackpos
; /* position in rstack */
116 struct roffreg
*regtab
; /* number registers */
117 struct roffkv
*strtab
; /* user-defined strings & macros */
118 struct roffkv
*xmbtab
; /* multi-byte trans table (`tr') */
119 struct roffstr
*xtab
; /* single-byte trans table (`tr') */
120 const char *current_string
; /* value of last called user macro */
121 struct tbl_node
*first_tbl
; /* first table parsed */
122 struct tbl_node
*last_tbl
; /* last table parsed */
123 struct tbl_node
*tbl
; /* current table being parsed */
124 struct eqn_node
*last_eqn
; /* last equation parsed */
125 struct eqn_node
*first_eqn
; /* first equation parsed */
126 struct eqn_node
*eqn
; /* current equation being parsed */
130 enum rofft tok
; /* type of node */
131 struct roffnode
*parent
; /* up one in stack */
132 int line
; /* parse line */
133 int col
; /* parse col */
134 char *name
; /* node name, e.g. macro name */
135 char *end
; /* end-rules: custom token */
136 int endspan
; /* end-rules: next-line or infty */
137 enum roffrule rule
; /* current evaluation rule */
140 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
141 enum rofft tok, /* tok of macro */ \
142 char **bufp, /* input buffer */ \
143 size_t *szp, /* size of input buffer */ \
144 int ln, /* parse line */ \
145 int ppos, /* original pos in buffer */ \
146 int pos, /* current pos in buffer */ \
147 int *offs /* reset offset of buffer data */
149 typedef enum rofferr (*roffproc
)(ROFF_ARGS
);
152 const char *name
; /* macro name */
153 roffproc proc
; /* process new macro */
154 roffproc text
; /* process as child text of macro */
155 roffproc sub
; /* process as child of macro */
157 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
158 struct roffmac
*next
;
162 const char *name
; /* predefined input name */
163 const char *str
; /* replacement symbol */
166 #define PREDEF(__name, __str) \
167 { (__name), (__str) },
169 static enum rofft
roffhash_find(const char *, size_t);
170 static void roffhash_init(void);
171 static void roffnode_cleanscope(struct roff
*);
172 static void roffnode_pop(struct roff
*);
173 static void roffnode_push(struct roff
*, enum rofft
,
174 const char *, int, int);
175 static enum rofferr
roff_block(ROFF_ARGS
);
176 static enum rofferr
roff_block_text(ROFF_ARGS
);
177 static enum rofferr
roff_block_sub(ROFF_ARGS
);
178 static enum rofferr
roff_cblock(ROFF_ARGS
);
179 static enum rofferr
roff_cc(ROFF_ARGS
);
180 static enum rofferr
roff_ccond(ROFF_ARGS
);
181 static enum rofferr
roff_cond(ROFF_ARGS
);
182 static enum rofferr
roff_cond_text(ROFF_ARGS
);
183 static enum rofferr
roff_cond_sub(ROFF_ARGS
);
184 static enum rofferr
roff_ds(ROFF_ARGS
);
185 static enum roffrule
roff_evalcond(const char *, int *);
186 static void roff_free1(struct roff
*);
187 static void roff_freereg(struct roffreg
*);
188 static void roff_freestr(struct roffkv
*);
189 static char *roff_getname(struct roff
*, char **, int, int);
190 static int roff_getnum(const char *, int *, int *);
191 static int roff_getop(const char *, int *, char *);
192 static int roff_getregn(const struct roff
*,
193 const char *, size_t);
194 static const char *roff_getstrn(const struct roff
*,
195 const char *, size_t);
196 static enum rofferr
roff_it(ROFF_ARGS
);
197 static enum rofferr
roff_line_ignore(ROFF_ARGS
);
198 static enum rofferr
roff_nr(ROFF_ARGS
);
199 static void roff_openeqn(struct roff
*, const char *,
200 int, int, const char *);
201 static enum rofft
roff_parse(struct roff
*, const char *, int *);
202 static enum rofferr
roff_parsetext(char **, size_t *, int, int *);
203 static enum rofferr
roff_res(struct roff
*,
204 char **, size_t *, int, int);
205 static enum rofferr
roff_rm(ROFF_ARGS
);
206 static void roff_setstr(struct roff
*,
207 const char *, const char *, int);
208 static void roff_setstrn(struct roffkv
**, const char *,
209 size_t, const char *, size_t, int);
210 static enum rofferr
roff_so(ROFF_ARGS
);
211 static enum rofferr
roff_tr(ROFF_ARGS
);
212 static enum rofferr
roff_Dd(ROFF_ARGS
);
213 static enum rofferr
roff_TH(ROFF_ARGS
);
214 static enum rofferr
roff_TE(ROFF_ARGS
);
215 static enum rofferr
roff_TS(ROFF_ARGS
);
216 static enum rofferr
roff_EQ(ROFF_ARGS
);
217 static enum rofferr
roff_EN(ROFF_ARGS
);
218 static enum rofferr
roff_T_(ROFF_ARGS
);
219 static enum rofferr
roff_userdef(ROFF_ARGS
);
221 /* See roffhash_find() */
225 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
227 static struct roffmac
*hash
[HASHWIDTH
];
229 static struct roffmac roffs
[ROFF_MAX
] = {
230 { "ad", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
231 { "am", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
232 { "ami", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
233 { "am1", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
234 { "cc", roff_cc
, NULL
, NULL
, 0, NULL
},
235 { "de", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
236 { "dei", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
237 { "de1", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
238 { "ds", roff_ds
, NULL
, NULL
, 0, NULL
},
239 { "el", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
240 { "fam", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
241 { "hw", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
242 { "hy", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
243 { "ie", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
244 { "if", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
245 { "ig", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
246 { "it", roff_it
, NULL
, NULL
, 0, NULL
},
247 { "ne", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
248 { "nh", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
249 { "nr", roff_nr
, NULL
, NULL
, 0, NULL
},
250 { "ns", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
251 { "ps", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
252 { "rm", roff_rm
, NULL
, NULL
, 0, NULL
},
253 { "so", roff_so
, NULL
, NULL
, 0, NULL
},
254 { "ta", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
255 { "tr", roff_tr
, NULL
, NULL
, 0, NULL
},
256 { "Dd", roff_Dd
, NULL
, NULL
, 0, NULL
},
257 { "TH", roff_TH
, NULL
, NULL
, 0, NULL
},
258 { "TS", roff_TS
, NULL
, NULL
, 0, NULL
},
259 { "TE", roff_TE
, NULL
, NULL
, 0, NULL
},
260 { "T&", roff_T_
, NULL
, NULL
, 0, NULL
},
261 { "EQ", roff_EQ
, NULL
, NULL
, 0, NULL
},
262 { "EN", roff_EN
, NULL
, NULL
, 0, NULL
},
263 { ".", roff_cblock
, NULL
, NULL
, 0, NULL
},
264 { "\\}", roff_ccond
, NULL
, NULL
, 0, NULL
},
265 { NULL
, roff_userdef
, NULL
, NULL
, 0, NULL
},
268 const char *const __mdoc_reserved
[] = {
269 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
270 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
271 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
272 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
273 "Ds", "Dt", "Dv", "Dx", "D1",
274 "Ec", "Ed", "Ef", "Ek", "El", "Em", "em",
275 "En", "Eo", "Eq", "Er", "Es", "Ev", "Ex",
276 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
277 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp", "LP",
278 "Me", "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
279 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
280 "Pa", "Pc", "Pf", "Po", "Pp", "PP", "pp", "Pq",
281 "Qc", "Ql", "Qo", "Qq", "Or", "Rd", "Re", "Rs", "Rv",
282 "Sc", "Sf", "Sh", "SH", "Sm", "So", "Sq",
283 "Ss", "St", "Sx", "Sy",
284 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
285 "%A", "%B", "%D", "%I", "%J", "%N", "%O",
286 "%P", "%Q", "%R", "%T", "%U", "%V",
290 const char *const __man_reserved
[] = {
291 "AT", "B", "BI", "BR", "BT", "DE", "DS", "DT",
292 "EE", "EN", "EQ", "EX", "HF", "HP", "I", "IB", "IP", "IR",
293 "LP", "ME", "MT", "OP", "P", "PD", "PP", "PT",
294 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS", "SY",
295 "TE", "TH", "TP", "TQ", "TS", "T&", "UC", "UE", "UR", "YS",
299 /* Array of injected predefined strings. */
300 #define PREDEFS_MAX 38
301 static const struct predef predefs
[PREDEFS_MAX
] = {
302 #include "predefs.in"
305 /* See roffhash_find() */
306 #define ROFF_HASH(p) (p[0] - ASCII_LO)
308 static int roffit_lines
; /* number of lines to delay */
309 static char *roffit_macro
; /* nil-terminated macro line */
317 for (i
= 0; i
< (int)ROFF_USERDEF
; i
++) {
318 assert(roffs
[i
].name
[0] >= ASCII_LO
);
319 assert(roffs
[i
].name
[0] <= ASCII_HI
);
321 buc
= ROFF_HASH(roffs
[i
].name
);
323 if (NULL
!= (n
= hash
[buc
])) {
324 for ( ; n
->next
; n
= n
->next
)
328 hash
[buc
] = &roffs
[i
];
333 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
334 * the nil-terminated string name could be found.
337 roffhash_find(const char *p
, size_t s
)
343 * libroff has an extremely simple hashtable, for the time
344 * being, which simply keys on the first character, which must
345 * be printable, then walks a chain. It works well enough until
349 if (p
[0] < ASCII_LO
|| p
[0] > ASCII_HI
)
354 if (NULL
== (n
= hash
[buc
]))
356 for ( ; n
; n
= n
->next
)
357 if (0 == strncmp(n
->name
, p
, s
) && '\0' == n
->name
[(int)s
])
358 return((enum rofft
)(n
- roffs
));
365 * Pop the current node off of the stack of roff instructions currently
369 roffnode_pop(struct roff
*r
)
376 r
->last
= r
->last
->parent
;
384 * Push a roff node onto the instruction stack. This must later be
385 * removed with roffnode_pop().
388 roffnode_push(struct roff
*r
, enum rofft tok
, const char *name
,
393 p
= mandoc_calloc(1, sizeof(struct roffnode
));
396 p
->name
= mandoc_strdup(name
);
400 p
->rule
= p
->parent
? p
->parent
->rule
: ROFFRULE_DENY
;
407 roff_free1(struct roff
*r
)
409 struct tbl_node
*tbl
;
413 while (NULL
!= (tbl
= r
->first_tbl
)) {
414 r
->first_tbl
= tbl
->next
;
418 r
->first_tbl
= r
->last_tbl
= r
->tbl
= NULL
;
420 while (NULL
!= (e
= r
->first_eqn
)) {
421 r
->first_eqn
= e
->next
;
425 r
->first_eqn
= r
->last_eqn
= r
->eqn
= NULL
;
430 roff_freestr(r
->strtab
);
431 roff_freestr(r
->xmbtab
);
433 r
->strtab
= r
->xmbtab
= NULL
;
435 roff_freereg(r
->regtab
);
440 for (i
= 0; i
< 128; i
++)
448 roff_reset(struct roff
*r
)
456 for (i
= 0; i
< PREDEFS_MAX
; i
++)
457 roff_setstr(r
, predefs
[i
].name
, predefs
[i
].str
, 0);
462 roff_free(struct roff
*r
)
471 roff_alloc(enum mparset type
, struct mparse
*parse
)
476 r
= mandoc_calloc(1, sizeof(struct roff
));
483 for (i
= 0; i
< PREDEFS_MAX
; i
++)
484 roff_setstr(r
, predefs
[i
].name
, predefs
[i
].str
, 0);
490 * In the current line, expand user-defined strings ("\*")
491 * and references to number registers ("\n").
492 * Also check the syntax of other escape sequences.
495 roff_res(struct roff
*r
, char **bufp
, size_t *szp
, int ln
, int pos
)
497 char ubuf
[12]; /* buffer to print the number */
498 const char *stesc
; /* start of an escape sequence ('\\') */
499 const char *stnam
; /* start of the name, after "[(*" */
500 const char *cp
; /* end of the name, e.g. before ']' */
501 const char *res
; /* the string to be substituted */
502 char *nbuf
; /* new buffer to copy bufp to */
503 size_t nsz
; /* size of the new buffer */
504 size_t maxl
; /* expected length of the escape name */
505 size_t naml
; /* actual length of the escape name */
506 int expand_count
; /* to avoid infinite loops */
512 while (NULL
!= (cp
= strchr(cp
, '\\'))) {
516 * The second character must be an asterisk or an n.
517 * If it isn't, skip it anyway: It is escaped,
518 * so it can't start another escape sequence.
532 if (ESCAPE_ERROR
!= mandoc_escape(&cp
, NULL
, NULL
))
535 (MANDOCERR_BADESCAPE
, r
->parse
,
536 ln
, (int)(stesc
- *bufp
), NULL
);
543 * The third character decides the length
544 * of the name of the string or register.
545 * Save a pointer to the name.
565 /* Advance to the end of the name. */
567 for (naml
= 0; 0 == maxl
|| naml
< maxl
; naml
++, cp
++) {
570 (MANDOCERR_BADESCAPE
,
572 (int)(stesc
- *bufp
), NULL
);
575 if (0 == maxl
&& ']' == *cp
)
580 * Retrieve the replacement string; if it is
581 * undefined, resume searching for escapes.
585 res
= roff_getstrn(r
, stnam
, naml
);
587 snprintf(ubuf
, sizeof(ubuf
), "%d",
588 roff_getregn(r
, stnam
, naml
));
592 (MANDOCERR_BADESCAPE
, r
->parse
,
593 ln
, (int)(stesc
- *bufp
), NULL
);
597 /* Replace the escape sequence by the string. */
601 nsz
= *szp
+ strlen(res
) + 1;
602 nbuf
= mandoc_malloc(nsz
);
604 strlcpy(nbuf
, *bufp
, (size_t)(stesc
- *bufp
+ 1));
605 strlcat(nbuf
, res
, nsz
);
606 strlcat(nbuf
, cp
+ (maxl
? 0 : 1), nsz
);
613 if (EXPAND_LIMIT
>= ++expand_count
)
616 /* Just leave the string unexpanded. */
617 mandoc_msg(MANDOCERR_ROFFLOOP
, r
->parse
, ln
, pos
, NULL
);
624 * Process text streams:
625 * Convert all breakable hyphens into ASCII_HYPH.
626 * Decrement and spring input line trap.
629 roff_parsetext(char **bufp
, size_t *szp
, int pos
, int *offs
)
637 start
= p
= *bufp
+ pos
;
640 sz
= strcspn(p
, "-\\");
647 /* Skip over escapes. */
650 ((const char const **)&p
, NULL
, NULL
);
651 if (ESCAPE_ERROR
== esc
)
654 } else if (p
== start
) {
659 if (isalpha((unsigned char)p
[-1]) &&
660 isalpha((unsigned char)p
[1]))
665 /* Spring the input line trap. */
666 if (1 == roffit_lines
) {
667 isz
= asprintf(&p
, "%s\n.%s", *bufp
, roffit_macro
);
670 exit((int)MANDOCLEVEL_SYSERR
);
678 return(ROFF_REPARSE
);
679 } else if (1 < roffit_lines
)
685 roff_parseln(struct roff
*r
, int ln
, char **bufp
,
686 size_t *szp
, int pos
, int *offs
)
693 * Run the reserved-word filter only if we have some reserved
697 e
= roff_res(r
, bufp
, szp
, ln
, pos
);
700 assert(ROFF_CONT
== e
);
703 ctl
= roff_getcontrol(r
, *bufp
, &pos
);
706 * First, if a scope is open and we're not a macro, pass the
707 * text through the macro's filter. If a scope isn't open and
708 * we're not a macro, just let it through.
709 * Finally, if there's an equation scope open, divert it into it
710 * no matter our state.
713 if (r
->last
&& ! ctl
) {
715 assert(roffs
[t
].text
);
717 (r
, t
, bufp
, szp
, ln
, pos
, pos
, offs
);
718 assert(ROFF_IGN
== e
|| ROFF_CONT
== e
);
723 return(eqn_read(&r
->eqn
, ln
, *bufp
, ppos
, offs
));
726 return(tbl_read(r
->tbl
, ln
, *bufp
, pos
));
727 return(roff_parsetext(bufp
, szp
, pos
, offs
));
731 * If a scope is open, go to the child handler for that macro,
732 * as it may want to preprocess before doing anything with it.
733 * Don't do so if an equation is open.
738 assert(roffs
[t
].sub
);
739 return((*roffs
[t
].sub
)
741 ln
, ppos
, pos
, offs
));
745 * Lastly, as we've no scope open, try to look up and execute
746 * the new macro. If no macro is found, simply return and let
747 * the compilers handle it.
750 if (ROFF_MAX
== (t
= roff_parse(r
, *bufp
, &pos
)))
753 assert(roffs
[t
].proc
);
754 return((*roffs
[t
].proc
)
756 ln
, ppos
, pos
, offs
));
761 roff_endparse(struct roff
*r
)
765 mandoc_msg(MANDOCERR_SCOPEEXIT
, r
->parse
,
766 r
->last
->line
, r
->last
->col
, NULL
);
769 mandoc_msg(MANDOCERR_SCOPEEXIT
, r
->parse
,
770 r
->eqn
->eqn
.ln
, r
->eqn
->eqn
.pos
, NULL
);
775 mandoc_msg(MANDOCERR_SCOPEEXIT
, r
->parse
,
776 r
->tbl
->line
, r
->tbl
->pos
, NULL
);
782 * Parse a roff node's type from the input buffer. This must be in the
783 * form of ".foo xxx" in the usual way.
786 roff_parse(struct roff
*r
, const char *buf
, int *pos
)
792 if ('\0' == buf
[*pos
] || '"' == buf
[*pos
] ||
793 '\t' == buf
[*pos
] || ' ' == buf
[*pos
])
797 * We stop the macro parse at an escape, tab, space, or nil.
798 * However, `\}' is also a valid macro, so make sure we don't
799 * clobber it by seeing the `\' as the end of token.
803 maclen
= strcspn(mac
+ 1, " \\\t\0") + 1;
805 t
= (r
->current_string
= roff_getstrn(r
, mac
, maclen
))
806 ? ROFF_USERDEF
: roffhash_find(mac
, maclen
);
810 while (buf
[*pos
] && ' ' == buf
[*pos
])
818 roff_cblock(ROFF_ARGS
)
822 * A block-close `..' should only be invoked as a child of an
823 * ignore macro, otherwise raise a warning and just ignore it.
826 if (NULL
== r
->last
) {
827 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
831 switch (r
->last
->tok
) {
839 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
846 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
851 mandoc_msg(MANDOCERR_ARGSLOST
, r
->parse
, ln
, pos
, NULL
);
854 roffnode_cleanscope(r
);
861 roffnode_cleanscope(struct roff
*r
)
865 if (--r
->last
->endspan
!= 0)
874 roff_ccond(ROFF_ARGS
)
877 if (NULL
== r
->last
) {
878 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
882 switch (r
->last
->tok
) {
890 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
894 if (r
->last
->endspan
> -1) {
895 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
900 mandoc_msg(MANDOCERR_ARGSLOST
, r
->parse
, ln
, pos
, NULL
);
903 roffnode_cleanscope(r
);
910 roff_block(ROFF_ARGS
)
918 if (ROFF_ig
!= tok
) {
919 if ('\0' == (*bufp
)[pos
]) {
920 mandoc_msg(MANDOCERR_NOARGS
, r
->parse
, ln
, ppos
, NULL
);
925 * Re-write `de1', since we don't really care about
926 * groff's strange compatibility mode, into `de'.
934 mandoc_msg(MANDOCERR_REQUEST
, r
->parse
, ln
, ppos
,
937 while ((*bufp
)[pos
] && ! isspace((unsigned char)(*bufp
)[pos
]))
940 while (isspace((unsigned char)(*bufp
)[pos
]))
941 (*bufp
)[pos
++] = '\0';
944 roffnode_push(r
, tok
, name
, ln
, ppos
);
947 * At the beginning of a `de' macro, clear the existing string
948 * with the same name, if there is one. New content will be
949 * added from roff_block_text() in multiline mode.
953 roff_setstr(r
, name
, "", 0);
955 if ('\0' == (*bufp
)[pos
])
958 /* If present, process the custom end-of-line marker. */
961 while ((*bufp
)[pos
] && ! isspace((unsigned char)(*bufp
)[pos
]))
965 * Note: groff does NOT like escape characters in the input.
966 * Instead of detecting this, we're just going to let it fly and
971 sz
= (size_t)(pos
- sv
);
973 if (1 == sz
&& '.' == (*bufp
)[sv
])
976 r
->last
->end
= mandoc_malloc(sz
+ 1);
978 memcpy(r
->last
->end
, *bufp
+ sv
, sz
);
979 r
->last
->end
[(int)sz
] = '\0';
982 mandoc_msg(MANDOCERR_ARGSLOST
, r
->parse
, ln
, pos
, NULL
);
990 roff_block_sub(ROFF_ARGS
)
996 * First check whether a custom macro exists at this level. If
997 * it does, then check against it. This is some of groff's
998 * stranger behaviours. If we encountered a custom end-scope
999 * tag and that tag also happens to be a "real" macro, then we
1000 * need to try interpreting it again as a real macro. If it's
1001 * not, then return ignore. Else continue.
1005 for (i
= pos
, j
= 0; r
->last
->end
[j
]; j
++, i
++)
1006 if ((*bufp
)[i
] != r
->last
->end
[j
])
1009 if ('\0' == r
->last
->end
[j
] &&
1010 ('\0' == (*bufp
)[i
] ||
1011 ' ' == (*bufp
)[i
] ||
1012 '\t' == (*bufp
)[i
])) {
1014 roffnode_cleanscope(r
);
1016 while (' ' == (*bufp
)[i
] || '\t' == (*bufp
)[i
])
1020 if (ROFF_MAX
!= roff_parse(r
, *bufp
, &pos
))
1027 * If we have no custom end-query or lookup failed, then try
1028 * pulling it out of the hashtable.
1031 t
= roff_parse(r
, *bufp
, &pos
);
1034 * Macros other than block-end are only significant
1035 * in `de' blocks; elsewhere, simply throw them away.
1037 if (ROFF_cblock
!= t
) {
1039 roff_setstr(r
, r
->last
->name
, *bufp
+ ppos
, 1);
1043 assert(roffs
[t
].proc
);
1044 return((*roffs
[t
].proc
)(r
, t
, bufp
, szp
,
1045 ln
, ppos
, pos
, offs
));
1051 roff_block_text(ROFF_ARGS
)
1055 roff_setstr(r
, r
->last
->name
, *bufp
+ pos
, 1);
1063 roff_cond_sub(ROFF_ARGS
)
1070 roffnode_cleanscope(r
);
1071 t
= roff_parse(r
, *bufp
, &pos
);
1074 * Fully handle known macros when they are structurally
1075 * required or when the conditional evaluated to true.
1078 if ((ROFF_MAX
!= t
) &&
1079 (ROFF_ccond
== t
|| ROFFRULE_ALLOW
== rr
||
1080 ROFFMAC_STRUCT
& roffs
[t
].flags
)) {
1081 assert(roffs
[t
].proc
);
1082 return((*roffs
[t
].proc
)(r
, t
, bufp
, szp
,
1083 ln
, ppos
, pos
, offs
));
1086 /* Always check for the closing delimiter `\}'. */
1089 while (NULL
!= (ep
= strchr(ep
, '\\'))) {
1094 * If we're at the end of line, then just chop
1095 * off the \} and resize the buffer.
1096 * If we aren't, then convert it to spaces.
1099 if ('\0' == *(ep
+ 1)) {
1103 *(ep
- 1) = *ep
= ' ';
1105 roff_ccond(r
, ROFF_ccond
, bufp
, szp
,
1106 ln
, pos
, pos
+ 2, offs
);
1109 return(ROFFRULE_DENY
== rr
? ROFF_IGN
: ROFF_CONT
);
1114 roff_cond_text(ROFF_ARGS
)
1120 roffnode_cleanscope(r
);
1123 for ( ; NULL
!= (ep
= strchr(ep
, '\\')); ep
++) {
1128 roff_ccond(r
, ROFF_ccond
, bufp
, szp
,
1129 ln
, pos
, pos
+ 2, offs
);
1131 return(ROFFRULE_DENY
== rr
? ROFF_IGN
: ROFF_CONT
);
1135 roff_getnum(const char *v
, int *pos
, int *res
)
1144 for (*res
= 0; isdigit((unsigned char)v
[p
]); p
++)
1145 *res
+= 10 * *res
+ v
[p
] - '0';
1157 roff_getop(const char *v
, int *pos
, char *res
)
1162 e
= v
[*pos
+ 1] == '=';
1184 static enum roffrule
1185 roff_evalcond(const char *v
, int *pos
)
1193 return(ROFFRULE_ALLOW
);
1200 return(ROFFRULE_DENY
);
1210 if (!roff_getnum(v
, pos
, &lh
))
1211 return ROFFRULE_DENY
;
1212 if (!roff_getop(v
, pos
, &op
)) {
1217 if (!roff_getnum(v
, pos
, &rh
))
1218 return ROFFRULE_DENY
;
1236 return ROFFRULE_DENY
;
1241 return lh
? ROFFRULE_ALLOW
: ROFFRULE_DENY
;
1246 roff_line_ignore(ROFF_ARGS
)
1254 roff_cond(ROFF_ARGS
)
1257 roffnode_push(r
, tok
, NULL
, ln
, ppos
);
1260 * An `.el' has no conditional body: it will consume the value
1261 * of the current rstack entry set in prior `ie' calls or
1264 * If we're not an `el', however, then evaluate the conditional.
1267 r
->last
->rule
= ROFF_el
== tok
?
1269 ROFFRULE_DENY
: r
->rstack
[r
->rstackpos
--]) :
1270 roff_evalcond(*bufp
, &pos
);
1273 * An if-else will put the NEGATION of the current evaluated
1274 * conditional into the stack of rules.
1277 if (ROFF_ie
== tok
) {
1278 if (r
->rstackpos
== RSTACK_MAX
- 1) {
1279 mandoc_msg(MANDOCERR_MEM
,
1280 r
->parse
, ln
, ppos
, NULL
);
1283 r
->rstack
[++r
->rstackpos
] =
1284 ROFFRULE_DENY
== r
->last
->rule
?
1285 ROFFRULE_ALLOW
: ROFFRULE_DENY
;
1288 /* If the parent has false as its rule, then so do we. */
1290 if (r
->last
->parent
&& ROFFRULE_DENY
== r
->last
->parent
->rule
)
1291 r
->last
->rule
= ROFFRULE_DENY
;
1295 * If there is nothing on the line after the conditional,
1296 * not even whitespace, use next-line scope.
1299 if ('\0' == (*bufp
)[pos
]) {
1300 r
->last
->endspan
= 2;
1304 while (' ' == (*bufp
)[pos
])
1307 /* An opening brace requests multiline scope. */
1309 if ('\\' == (*bufp
)[pos
] && '{' == (*bufp
)[pos
+ 1]) {
1310 r
->last
->endspan
= -1;
1316 * Anything else following the conditional causes
1317 * single-line scope. Warn if the scope contains
1318 * nothing but trailing whitespace.
1321 if ('\0' == (*bufp
)[pos
])
1322 mandoc_msg(MANDOCERR_NOARGS
, r
->parse
, ln
, ppos
, NULL
);
1324 r
->last
->endspan
= 1;
1336 char *name
, *string
;
1339 * A symbol is named by the first word following the macro
1340 * invocation up to a space. Its value is anything after the
1341 * name's trailing whitespace and optional double-quote. Thus,
1345 * will have `bar " ' as its value.
1348 string
= *bufp
+ pos
;
1349 name
= roff_getname(r
, &string
, ln
, pos
);
1353 /* Read past initial double-quote. */
1357 /* The rest is the value. */
1358 roff_setstr(r
, name
, string
, 0);
1363 roff_setreg(struct roff
*r
, const char *name
, int val
, char sign
)
1365 struct roffreg
*reg
;
1367 /* Search for an existing register with the same name. */
1370 while (reg
&& strcmp(name
, reg
->key
.p
))
1374 /* Create a new register. */
1375 reg
= mandoc_malloc(sizeof(struct roffreg
));
1376 reg
->key
.p
= mandoc_strdup(name
);
1377 reg
->key
.sz
= strlen(name
);
1379 reg
->next
= r
->regtab
;
1385 else if ('-' == sign
)
1392 roff_getreg(const struct roff
*r
, const char *name
)
1394 struct roffreg
*reg
;
1396 for (reg
= r
->regtab
; reg
; reg
= reg
->next
)
1397 if (0 == strcmp(name
, reg
->key
.p
))
1404 roff_getregn(const struct roff
*r
, const char *name
, size_t len
)
1406 struct roffreg
*reg
;
1408 for (reg
= r
->regtab
; reg
; reg
= reg
->next
)
1409 if (len
== reg
->key
.sz
&&
1410 0 == strncmp(name
, reg
->key
.p
, len
))
1417 roff_freereg(struct roffreg
*reg
)
1419 struct roffreg
*old_reg
;
1421 while (NULL
!= reg
) {
1440 key
= roff_getname(r
, &val
, ln
, pos
);
1443 if ('+' == sign
|| '-' == sign
)
1446 sz
= strspn(val
, "0123456789");
1447 iv
= sz
? mandoc_strntoi(val
, sz
, 10) : 0;
1449 roff_setreg(r
, key
, iv
, sign
);
1462 while ('\0' != *cp
) {
1463 name
= roff_getname(r
, &cp
, ln
, (int)(cp
- *bufp
));
1465 roff_setstr(r
, name
, NULL
, 0);
1478 /* Parse the number of lines. */
1480 len
= strcspn(cp
, " \t");
1482 if ((iv
= mandoc_strntoi(cp
, len
, 10)) <= 0) {
1483 mandoc_msg(MANDOCERR_NUMERIC
, r
->parse
,
1484 ln
, ppos
, *bufp
+ 1);
1489 /* Arm the input line trap. */
1491 roffit_macro
= mandoc_strdup(cp
);
1499 const char *const *cp
;
1501 if (MPARSE_MDOC
!= r
->parsetype
)
1502 for (cp
= __mdoc_reserved
; *cp
; cp
++)
1503 roff_setstr(r
, *cp
, NULL
, 0);
1512 const char *const *cp
;
1514 if (MPARSE_MDOC
!= r
->parsetype
)
1515 for (cp
= __man_reserved
; *cp
; cp
++)
1516 roff_setstr(r
, *cp
, NULL
, 0);
1527 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
1540 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
1542 tbl_restart(ppos
, ln
, r
->tbl
);
1549 roff_closeeqn(struct roff
*r
)
1552 return(r
->eqn
&& ROFF_EQN
== eqn_end(&r
->eqn
) ? 1 : 0);
1557 roff_openeqn(struct roff
*r
, const char *name
, int line
,
1558 int offs
, const char *buf
)
1563 assert(NULL
== r
->eqn
);
1564 e
= eqn_alloc(name
, offs
, line
, r
->parse
);
1567 r
->last_eqn
->next
= e
;
1569 r
->first_eqn
= r
->last_eqn
= e
;
1571 r
->eqn
= r
->last_eqn
= e
;
1575 eqn_read(&r
->eqn
, line
, buf
, offs
, &poff
);
1584 roff_openeqn(r
, *bufp
+ pos
, ln
, ppos
, NULL
);
1593 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
1601 struct tbl_node
*tbl
;
1604 mandoc_msg(MANDOCERR_SCOPEBROKEN
, r
->parse
, ln
, ppos
, NULL
);
1608 tbl
= tbl_alloc(ppos
, ln
, r
->parse
);
1611 r
->last_tbl
->next
= tbl
;
1613 r
->first_tbl
= r
->last_tbl
= tbl
;
1615 r
->tbl
= r
->last_tbl
= tbl
;
1627 if ('\0' == *p
|| '.' == (r
->control
= *p
++))
1631 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
, ln
, ppos
, NULL
);
1640 const char *p
, *first
, *second
;
1642 enum mandoc_esc esc
;
1647 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
, ln
, ppos
, NULL
);
1651 while ('\0' != *p
) {
1655 if ('\\' == *first
) {
1656 esc
= mandoc_escape(&p
, NULL
, NULL
);
1657 if (ESCAPE_ERROR
== esc
) {
1659 (MANDOCERR_BADESCAPE
, r
->parse
,
1660 ln
, (int)(p
- *bufp
), NULL
);
1663 fsz
= (size_t)(p
- first
);
1667 if ('\\' == *second
) {
1668 esc
= mandoc_escape(&p
, NULL
, NULL
);
1669 if (ESCAPE_ERROR
== esc
) {
1671 (MANDOCERR_BADESCAPE
, r
->parse
,
1672 ln
, (int)(p
- *bufp
), NULL
);
1675 ssz
= (size_t)(p
- second
);
1676 } else if ('\0' == *second
) {
1677 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
,
1678 ln
, (int)(p
- *bufp
), NULL
);
1684 roff_setstrn(&r
->xmbtab
, first
,
1685 fsz
, second
, ssz
, 0);
1689 if (NULL
== r
->xtab
)
1690 r
->xtab
= mandoc_calloc
1691 (128, sizeof(struct roffstr
));
1693 free(r
->xtab
[(int)*first
].p
);
1694 r
->xtab
[(int)*first
].p
= mandoc_strndup(second
, ssz
);
1695 r
->xtab
[(int)*first
].sz
= ssz
;
1707 mandoc_msg(MANDOCERR_SO
, r
->parse
, ln
, ppos
, NULL
);
1710 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1711 * opening anything that's not in our cwd or anything beneath
1712 * it. Thus, explicitly disallow traversing up the file-system
1713 * or using absolute paths.
1717 if ('/' == *name
|| strstr(name
, "../") || strstr(name
, "/..")) {
1718 mandoc_msg(MANDOCERR_SOPATH
, r
->parse
, ln
, pos
, NULL
);
1728 roff_userdef(ROFF_ARGS
)
1735 * Collect pointers to macro argument strings
1736 * and NUL-terminate them.
1739 for (i
= 0; i
< 9; i
++)
1740 arg
[i
] = '\0' == *cp
? "" :
1741 mandoc_getarg(r
->parse
, &cp
, ln
, &pos
);
1744 * Expand macro arguments.
1747 n1
= cp
= mandoc_strdup(r
->current_string
);
1748 while (NULL
!= (cp
= strstr(cp
, "\\$"))) {
1750 if (0 > i
|| 8 < i
) {
1751 /* Not an argument invocation. */
1756 *szp
= strlen(n1
) - 3 + strlen(arg
[i
]) + 1;
1757 n2
= mandoc_malloc(*szp
);
1759 strlcpy(n2
, n1
, (size_t)(cp
- n1
+ 1));
1760 strlcat(n2
, arg
[i
], *szp
);
1761 strlcat(n2
, cp
+ 3, *szp
);
1763 cp
= n2
+ (cp
- n1
);
1769 * Replace the macro invocation
1770 * by the expanded macro.
1775 *szp
= strlen(*bufp
) + 1;
1777 return(*szp
> 1 && '\n' == (*bufp
)[(int)*szp
- 2] ?
1778 ROFF_REPARSE
: ROFF_APPEND
);
1782 roff_getname(struct roff
*r
, char **cpp
, int ln
, int pos
)
1790 /* Read until end of name. */
1791 for (cp
= name
; '\0' != *cp
&& ' ' != *cp
; cp
++) {
1797 mandoc_msg(MANDOCERR_NAMESC
, r
->parse
, ln
, pos
, NULL
);
1802 /* Nil-terminate name. */
1806 /* Read past spaces. */
1815 * Store *string into the user-defined string called *name.
1816 * In multiline mode, append to an existing entry and append '\n';
1817 * else replace the existing entry, if there is one.
1818 * To clear an existing entry, call with (*r, *name, NULL, 0).
1821 roff_setstr(struct roff
*r
, const char *name
, const char *string
,
1825 roff_setstrn(&r
->strtab
, name
, strlen(name
), string
,
1826 string
? strlen(string
) : 0, multiline
);
1830 roff_setstrn(struct roffkv
**r
, const char *name
, size_t namesz
,
1831 const char *string
, size_t stringsz
, int multiline
)
1836 size_t oldch
, newch
;
1838 /* Search for an existing string with the same name. */
1841 while (n
&& strcmp(name
, n
->key
.p
))
1845 /* Create a new string table entry. */
1846 n
= mandoc_malloc(sizeof(struct roffkv
));
1847 n
->key
.p
= mandoc_strndup(name
, namesz
);
1853 } else if (0 == multiline
) {
1854 /* In multiline mode, append; else replace. */
1864 * One additional byte for the '\n' in multiline mode,
1865 * and one for the terminating '\0'.
1867 newch
= stringsz
+ (multiline
? 2u : 1u);
1869 if (NULL
== n
->val
.p
) {
1870 n
->val
.p
= mandoc_malloc(newch
);
1875 n
->val
.p
= mandoc_realloc(n
->val
.p
, oldch
+ newch
);
1878 /* Skip existing content in the destination buffer. */
1879 c
= n
->val
.p
+ (int)oldch
;
1881 /* Append new content to the destination buffer. */
1883 while (i
< (int)stringsz
) {
1885 * Rudimentary roff copy mode:
1886 * Handle escaped backslashes.
1888 if ('\\' == string
[i
] && '\\' == string
[i
+ 1])
1893 /* Append terminating bytes. */
1898 n
->val
.sz
= (int)(c
- n
->val
.p
);
1902 roff_getstrn(const struct roff
*r
, const char *name
, size_t len
)
1904 const struct roffkv
*n
;
1906 for (n
= r
->strtab
; n
; n
= n
->next
)
1907 if (0 == strncmp(name
, n
->key
.p
, len
) &&
1908 '\0' == n
->key
.p
[(int)len
])
1915 roff_freestr(struct roffkv
*r
)
1917 struct roffkv
*n
, *nn
;
1919 for (n
= r
; n
; n
= nn
) {
1927 const struct tbl_span
*
1928 roff_span(const struct roff
*r
)
1931 return(r
->tbl
? tbl_span(r
->tbl
) : NULL
);
1935 roff_eqn(const struct roff
*r
)
1938 return(r
->last_eqn
? &r
->last_eqn
->eqn
: NULL
);
1942 * Duplicate an input string, making the appropriate character
1943 * conversations (as stipulated by `tr') along the way.
1944 * Returns a heap-allocated string with all the replacements made.
1947 roff_strdup(const struct roff
*r
, const char *p
)
1949 const struct roffkv
*cp
;
1953 enum mandoc_esc esc
;
1955 if (NULL
== r
->xmbtab
&& NULL
== r
->xtab
)
1956 return(mandoc_strdup(p
));
1957 else if ('\0' == *p
)
1958 return(mandoc_strdup(""));
1961 * Step through each character looking for term matches
1962 * (remember that a `tr' can be invoked with an escape, which is
1963 * a glyph but the escape is multi-character).
1964 * We only do this if the character hash has been initialised
1965 * and the string is >0 length.
1971 while ('\0' != *p
) {
1972 if ('\\' != *p
&& r
->xtab
&& r
->xtab
[(int)*p
].p
) {
1973 sz
= r
->xtab
[(int)*p
].sz
;
1974 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
1975 memcpy(res
+ ssz
, r
->xtab
[(int)*p
].p
, sz
);
1979 } else if ('\\' != *p
) {
1980 res
= mandoc_realloc(res
, ssz
+ 2);
1985 /* Search for term matches. */
1986 for (cp
= r
->xmbtab
; cp
; cp
= cp
->next
)
1987 if (0 == strncmp(p
, cp
->key
.p
, cp
->key
.sz
))
1992 * A match has been found.
1993 * Append the match to the array and move
1994 * forward by its keysize.
1996 res
= mandoc_realloc
1997 (res
, ssz
+ cp
->val
.sz
+ 1);
1998 memcpy(res
+ ssz
, cp
->val
.p
, cp
->val
.sz
);
2000 p
+= (int)cp
->key
.sz
;
2005 * Handle escapes carefully: we need to copy
2006 * over just the escape itself, or else we might
2007 * do replacements within the escape itself.
2008 * Make sure to pass along the bogus string.
2011 esc
= mandoc_escape(&p
, NULL
, NULL
);
2012 if (ESCAPE_ERROR
== esc
) {
2014 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
2015 memcpy(res
+ ssz
, pp
, sz
);
2019 * We bail out on bad escapes.
2020 * No need to warn: we already did so when
2021 * roff_res() was called.
2024 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
2025 memcpy(res
+ ssz
, pp
, sz
);
2029 res
[(int)ssz
] = '\0';
2034 * Find out whether a line is a macro line or not.
2035 * If it is, adjust the current position and return one; if it isn't,
2036 * return zero and don't change the current position.
2037 * If the control character has been set with `.cc', then let that grain
2039 * This is slighly contrary to groff, where using the non-breaking
2040 * control character when `cc' has been invoked will cause the
2041 * non-breaking macro contents to be printed verbatim.
2044 roff_getcontrol(const struct roff
*r
, const char *cp
, int *ppos
)
2050 if (0 != r
->control
&& cp
[pos
] == r
->control
)
2052 else if (0 != r
->control
)
2054 else if ('\\' == cp
[pos
] && '.' == cp
[pos
+ 1])
2056 else if ('.' == cp
[pos
] || '\'' == cp
[pos
])
2061 while (' ' == cp
[pos
] || '\t' == cp
[pos
])