]>
git.cameronkatri.com Git - mandoc.git/blob - roff.c
1 /* $Id: roff.c,v 1.191 2014/01/06 23:46:07 schwarze Exp $ */
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
30 #include "libmandoc.h"
32 /* Maximum number of nested if-else conditionals. */
33 #define RSTACK_MAX 128
35 /* Maximum number of string expansions per line, to break infinite loops. */
36 #define EXPAND_LIMIT 1000
84 * An incredibly-simple string buffer.
87 char *p
; /* nil-terminated buffer */
88 size_t sz
; /* saved strlen(p) */
92 * A key-value roffstr pair as part of a singly-linked list.
97 struct roffkv
*next
; /* next in list */
101 * A single number register as part of a singly-linked list.
106 struct roffreg
*next
;
110 enum mparset parsetype
; /* requested parse type */
111 struct mparse
*parse
; /* parse point */
112 int quick
; /* skip standard macro deletion */
113 struct roffnode
*last
; /* leaf of stack */
114 enum roffrule rstack
[RSTACK_MAX
]; /* stack of !`ie' rules */
115 char control
; /* control character */
116 int rstackpos
; /* position in rstack */
117 struct roffreg
*regtab
; /* number registers */
118 struct roffkv
*strtab
; /* user-defined strings & macros */
119 struct roffkv
*xmbtab
; /* multi-byte trans table (`tr') */
120 struct roffstr
*xtab
; /* single-byte trans table (`tr') */
121 const char *current_string
; /* value of last called user macro */
122 struct tbl_node
*first_tbl
; /* first table parsed */
123 struct tbl_node
*last_tbl
; /* last table parsed */
124 struct tbl_node
*tbl
; /* current table being parsed */
125 struct eqn_node
*last_eqn
; /* last equation parsed */
126 struct eqn_node
*first_eqn
; /* first equation parsed */
127 struct eqn_node
*eqn
; /* current equation being parsed */
131 enum rofft tok
; /* type of node */
132 struct roffnode
*parent
; /* up one in stack */
133 int line
; /* parse line */
134 int col
; /* parse col */
135 char *name
; /* node name, e.g. macro name */
136 char *end
; /* end-rules: custom token */
137 int endspan
; /* end-rules: next-line or infty */
138 enum roffrule rule
; /* current evaluation rule */
141 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
142 enum rofft tok, /* tok of macro */ \
143 char **bufp, /* input buffer */ \
144 size_t *szp, /* size of input buffer */ \
145 int ln, /* parse line */ \
146 int ppos, /* original pos in buffer */ \
147 int pos, /* current pos in buffer */ \
148 int *offs /* reset offset of buffer data */
150 typedef enum rofferr (*roffproc
)(ROFF_ARGS
);
153 const char *name
; /* macro name */
154 roffproc proc
; /* process new macro */
155 roffproc text
; /* process as child text of macro */
156 roffproc sub
; /* process as child of macro */
158 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
159 struct roffmac
*next
;
163 const char *name
; /* predefined input name */
164 const char *str
; /* replacement symbol */
167 #define PREDEF(__name, __str) \
168 { (__name), (__str) },
170 static enum rofft
roffhash_find(const char *, size_t);
171 static void roffhash_init(void);
172 static void roffnode_cleanscope(struct roff
*);
173 static void roffnode_pop(struct roff
*);
174 static void roffnode_push(struct roff
*, enum rofft
,
175 const char *, int, int);
176 static enum rofferr
roff_block(ROFF_ARGS
);
177 static enum rofferr
roff_block_text(ROFF_ARGS
);
178 static enum rofferr
roff_block_sub(ROFF_ARGS
);
179 static enum rofferr
roff_cblock(ROFF_ARGS
);
180 static enum rofferr
roff_cc(ROFF_ARGS
);
181 static enum rofferr
roff_ccond(ROFF_ARGS
);
182 static enum rofferr
roff_cond(ROFF_ARGS
);
183 static enum rofferr
roff_cond_text(ROFF_ARGS
);
184 static enum rofferr
roff_cond_sub(ROFF_ARGS
);
185 static enum rofferr
roff_ds(ROFF_ARGS
);
186 static enum roffrule
roff_evalcond(const char *, int *);
187 static void roff_free1(struct roff
*);
188 static void roff_freereg(struct roffreg
*);
189 static void roff_freestr(struct roffkv
*);
190 static char *roff_getname(struct roff
*, char **, int, int);
191 static int roff_getnum(const char *, int *, int *);
192 static int roff_getop(const char *, int *, char *);
193 static int roff_getregn(const struct roff
*,
194 const char *, size_t);
195 static const char *roff_getstrn(const struct roff
*,
196 const char *, size_t);
197 static enum rofferr
roff_it(ROFF_ARGS
);
198 static enum rofferr
roff_line_ignore(ROFF_ARGS
);
199 static enum rofferr
roff_nr(ROFF_ARGS
);
200 static void roff_openeqn(struct roff
*, const char *,
201 int, int, const char *);
202 static enum rofft
roff_parse(struct roff
*, const char *, int *);
203 static enum rofferr
roff_parsetext(char **, size_t *, int, int *);
204 static enum rofferr
roff_res(struct roff
*,
205 char **, size_t *, int, int);
206 static enum rofferr
roff_rm(ROFF_ARGS
);
207 static void roff_setstr(struct roff
*,
208 const char *, const char *, int);
209 static void roff_setstrn(struct roffkv
**, const char *,
210 size_t, const char *, size_t, int);
211 static enum rofferr
roff_so(ROFF_ARGS
);
212 static enum rofferr
roff_tr(ROFF_ARGS
);
213 static enum rofferr
roff_Dd(ROFF_ARGS
);
214 static enum rofferr
roff_TH(ROFF_ARGS
);
215 static enum rofferr
roff_TE(ROFF_ARGS
);
216 static enum rofferr
roff_TS(ROFF_ARGS
);
217 static enum rofferr
roff_EQ(ROFF_ARGS
);
218 static enum rofferr
roff_EN(ROFF_ARGS
);
219 static enum rofferr
roff_T_(ROFF_ARGS
);
220 static enum rofferr
roff_userdef(ROFF_ARGS
);
222 /* See roffhash_find() */
226 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
228 static struct roffmac
*hash
[HASHWIDTH
];
230 static struct roffmac roffs
[ROFF_MAX
] = {
231 { "ad", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
232 { "am", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
233 { "ami", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
234 { "am1", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
235 { "cc", roff_cc
, NULL
, NULL
, 0, NULL
},
236 { "de", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
237 { "dei", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
238 { "de1", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
239 { "ds", roff_ds
, NULL
, NULL
, 0, NULL
},
240 { "el", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
241 { "fam", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
242 { "hw", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
243 { "hy", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
244 { "ie", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
245 { "if", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
246 { "ig", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
247 { "it", roff_it
, NULL
, NULL
, 0, NULL
},
248 { "ne", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
249 { "nh", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
250 { "nr", roff_nr
, NULL
, NULL
, 0, NULL
},
251 { "ns", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
252 { "ps", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
253 { "rm", roff_rm
, NULL
, NULL
, 0, NULL
},
254 { "so", roff_so
, NULL
, NULL
, 0, NULL
},
255 { "ta", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
256 { "tr", roff_tr
, NULL
, NULL
, 0, NULL
},
257 { "Dd", roff_Dd
, NULL
, NULL
, 0, NULL
},
258 { "TH", roff_TH
, NULL
, NULL
, 0, NULL
},
259 { "TS", roff_TS
, NULL
, NULL
, 0, NULL
},
260 { "TE", roff_TE
, NULL
, NULL
, 0, NULL
},
261 { "T&", roff_T_
, NULL
, NULL
, 0, NULL
},
262 { "EQ", roff_EQ
, NULL
, NULL
, 0, NULL
},
263 { "EN", roff_EN
, NULL
, NULL
, 0, NULL
},
264 { ".", roff_cblock
, NULL
, NULL
, 0, NULL
},
265 { "\\}", roff_ccond
, NULL
, NULL
, 0, NULL
},
266 { NULL
, roff_userdef
, NULL
, NULL
, 0, NULL
},
269 const char *const __mdoc_reserved
[] = {
270 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
271 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
272 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
273 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
274 "Ds", "Dt", "Dv", "Dx", "D1",
275 "Ec", "Ed", "Ef", "Ek", "El", "Em", "em",
276 "En", "Eo", "Eq", "Er", "Es", "Ev", "Ex",
277 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
278 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp", "LP",
279 "Me", "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
280 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
281 "Pa", "Pc", "Pf", "Po", "Pp", "PP", "pp", "Pq",
282 "Qc", "Ql", "Qo", "Qq", "Or", "Rd", "Re", "Rs", "Rv",
283 "Sc", "Sf", "Sh", "SH", "Sm", "So", "Sq",
284 "Ss", "St", "Sx", "Sy",
285 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
286 "%A", "%B", "%D", "%I", "%J", "%N", "%O",
287 "%P", "%Q", "%R", "%T", "%U", "%V",
291 const char *const __man_reserved
[] = {
292 "AT", "B", "BI", "BR", "BT", "DE", "DS", "DT",
293 "EE", "EN", "EQ", "EX", "HF", "HP", "I", "IB", "IP", "IR",
294 "LP", "ME", "MT", "OP", "P", "PD", "PP", "PT",
295 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS", "SY",
296 "TE", "TH", "TP", "TQ", "TS", "T&", "UC", "UE", "UR", "YS",
300 /* Array of injected predefined strings. */
301 #define PREDEFS_MAX 38
302 static const struct predef predefs
[PREDEFS_MAX
] = {
303 #include "predefs.in"
306 /* See roffhash_find() */
307 #define ROFF_HASH(p) (p[0] - ASCII_LO)
309 static int roffit_lines
; /* number of lines to delay */
310 static char *roffit_macro
; /* nil-terminated macro line */
318 for (i
= 0; i
< (int)ROFF_USERDEF
; i
++) {
319 assert(roffs
[i
].name
[0] >= ASCII_LO
);
320 assert(roffs
[i
].name
[0] <= ASCII_HI
);
322 buc
= ROFF_HASH(roffs
[i
].name
);
324 if (NULL
!= (n
= hash
[buc
])) {
325 for ( ; n
->next
; n
= n
->next
)
329 hash
[buc
] = &roffs
[i
];
334 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
335 * the nil-terminated string name could be found.
338 roffhash_find(const char *p
, size_t s
)
344 * libroff has an extremely simple hashtable, for the time
345 * being, which simply keys on the first character, which must
346 * be printable, then walks a chain. It works well enough until
350 if (p
[0] < ASCII_LO
|| p
[0] > ASCII_HI
)
355 if (NULL
== (n
= hash
[buc
]))
357 for ( ; n
; n
= n
->next
)
358 if (0 == strncmp(n
->name
, p
, s
) && '\0' == n
->name
[(int)s
])
359 return((enum rofft
)(n
- roffs
));
366 * Pop the current node off of the stack of roff instructions currently
370 roffnode_pop(struct roff
*r
)
377 r
->last
= r
->last
->parent
;
385 * Push a roff node onto the instruction stack. This must later be
386 * removed with roffnode_pop().
389 roffnode_push(struct roff
*r
, enum rofft tok
, const char *name
,
394 p
= mandoc_calloc(1, sizeof(struct roffnode
));
397 p
->name
= mandoc_strdup(name
);
401 p
->rule
= p
->parent
? p
->parent
->rule
: ROFFRULE_DENY
;
408 roff_free1(struct roff
*r
)
410 struct tbl_node
*tbl
;
414 while (NULL
!= (tbl
= r
->first_tbl
)) {
415 r
->first_tbl
= tbl
->next
;
419 r
->first_tbl
= r
->last_tbl
= r
->tbl
= NULL
;
421 while (NULL
!= (e
= r
->first_eqn
)) {
422 r
->first_eqn
= e
->next
;
426 r
->first_eqn
= r
->last_eqn
= r
->eqn
= NULL
;
431 roff_freestr(r
->strtab
);
432 roff_freestr(r
->xmbtab
);
434 r
->strtab
= r
->xmbtab
= NULL
;
436 roff_freereg(r
->regtab
);
441 for (i
= 0; i
< 128; i
++)
449 roff_reset(struct roff
*r
)
458 roff_free(struct roff
*r
)
467 roff_alloc(enum mparset type
, struct mparse
*parse
, int quick
)
471 r
= mandoc_calloc(1, sizeof(struct roff
));
483 * In the current line, expand user-defined strings ("\*")
484 * and references to number registers ("\n").
485 * Also check the syntax of other escape sequences.
488 roff_res(struct roff
*r
, char **bufp
, size_t *szp
, int ln
, int pos
)
490 char ubuf
[12]; /* buffer to print the number */
491 const char *stesc
; /* start of an escape sequence ('\\') */
492 const char *stnam
; /* start of the name, after "[(*" */
493 const char *cp
; /* end of the name, e.g. before ']' */
494 const char *res
; /* the string to be substituted */
495 char *nbuf
; /* new buffer to copy bufp to */
496 size_t nsz
; /* size of the new buffer */
497 size_t maxl
; /* expected length of the escape name */
498 size_t naml
; /* actual length of the escape name */
499 int expand_count
; /* to avoid infinite loops */
505 while (NULL
!= (cp
= strchr(cp
, '\\'))) {
509 * The second character must be an asterisk or an n.
510 * If it isn't, skip it anyway: It is escaped,
511 * so it can't start another escape sequence.
525 if (ESCAPE_ERROR
!= mandoc_escape(&cp
, NULL
, NULL
))
528 (MANDOCERR_BADESCAPE
, r
->parse
,
529 ln
, (int)(stesc
- *bufp
), NULL
);
536 * The third character decides the length
537 * of the name of the string or register.
538 * Save a pointer to the name.
558 /* Advance to the end of the name. */
560 for (naml
= 0; 0 == maxl
|| naml
< maxl
; naml
++, cp
++) {
563 (MANDOCERR_BADESCAPE
,
565 (int)(stesc
- *bufp
), NULL
);
568 if (0 == maxl
&& ']' == *cp
)
573 * Retrieve the replacement string; if it is
574 * undefined, resume searching for escapes.
578 res
= roff_getstrn(r
, stnam
, naml
);
580 snprintf(ubuf
, sizeof(ubuf
), "%d",
581 roff_getregn(r
, stnam
, naml
));
585 (MANDOCERR_BADESCAPE
, r
->parse
,
586 ln
, (int)(stesc
- *bufp
), NULL
);
590 /* Replace the escape sequence by the string. */
594 nsz
= *szp
+ strlen(res
) + 1;
595 nbuf
= mandoc_malloc(nsz
);
597 strlcpy(nbuf
, *bufp
, (size_t)(stesc
- *bufp
+ 1));
598 strlcat(nbuf
, res
, nsz
);
599 strlcat(nbuf
, cp
+ (maxl
? 0 : 1), nsz
);
606 if (EXPAND_LIMIT
>= ++expand_count
)
609 /* Just leave the string unexpanded. */
610 mandoc_msg(MANDOCERR_ROFFLOOP
, r
->parse
, ln
, pos
, NULL
);
617 * Process text streams:
618 * Convert all breakable hyphens into ASCII_HYPH.
619 * Decrement and spring input line trap.
622 roff_parsetext(char **bufp
, size_t *szp
, int pos
, int *offs
)
630 start
= p
= *bufp
+ pos
;
633 sz
= strcspn(p
, "-\\");
640 /* Skip over escapes. */
642 esc
= mandoc_escape((const char **)&p
, NULL
, NULL
);
643 if (ESCAPE_ERROR
== esc
)
646 } else if (p
== start
) {
651 if (isalpha((unsigned char)p
[-1]) &&
652 isalpha((unsigned char)p
[1]))
657 /* Spring the input line trap. */
658 if (1 == roffit_lines
) {
659 isz
= asprintf(&p
, "%s\n.%s", *bufp
, roffit_macro
);
662 exit((int)MANDOCLEVEL_SYSERR
);
670 return(ROFF_REPARSE
);
671 } else if (1 < roffit_lines
)
677 roff_parseln(struct roff
*r
, int ln
, char **bufp
,
678 size_t *szp
, int pos
, int *offs
)
685 * Run the reserved-word filter only if we have some reserved
689 e
= roff_res(r
, bufp
, szp
, ln
, pos
);
692 assert(ROFF_CONT
== e
);
695 ctl
= roff_getcontrol(r
, *bufp
, &pos
);
698 * First, if a scope is open and we're not a macro, pass the
699 * text through the macro's filter. If a scope isn't open and
700 * we're not a macro, just let it through.
701 * Finally, if there's an equation scope open, divert it into it
702 * no matter our state.
705 if (r
->last
&& ! ctl
) {
707 assert(roffs
[t
].text
);
709 (r
, t
, bufp
, szp
, ln
, pos
, pos
, offs
);
710 assert(ROFF_IGN
== e
|| ROFF_CONT
== e
);
715 return(eqn_read(&r
->eqn
, ln
, *bufp
, ppos
, offs
));
718 return(tbl_read(r
->tbl
, ln
, *bufp
, pos
));
719 return(roff_parsetext(bufp
, szp
, pos
, offs
));
723 * If a scope is open, go to the child handler for that macro,
724 * as it may want to preprocess before doing anything with it.
725 * Don't do so if an equation is open.
730 assert(roffs
[t
].sub
);
731 return((*roffs
[t
].sub
)
733 ln
, ppos
, pos
, offs
));
737 * Lastly, as we've no scope open, try to look up and execute
738 * the new macro. If no macro is found, simply return and let
739 * the compilers handle it.
742 if (ROFF_MAX
== (t
= roff_parse(r
, *bufp
, &pos
)))
745 assert(roffs
[t
].proc
);
746 return((*roffs
[t
].proc
)
748 ln
, ppos
, pos
, offs
));
753 roff_endparse(struct roff
*r
)
757 mandoc_msg(MANDOCERR_SCOPEEXIT
, r
->parse
,
758 r
->last
->line
, r
->last
->col
, NULL
);
761 mandoc_msg(MANDOCERR_SCOPEEXIT
, r
->parse
,
762 r
->eqn
->eqn
.ln
, r
->eqn
->eqn
.pos
, NULL
);
767 mandoc_msg(MANDOCERR_SCOPEEXIT
, r
->parse
,
768 r
->tbl
->line
, r
->tbl
->pos
, NULL
);
774 * Parse a roff node's type from the input buffer. This must be in the
775 * form of ".foo xxx" in the usual way.
778 roff_parse(struct roff
*r
, const char *buf
, int *pos
)
784 if ('\0' == buf
[*pos
] || '"' == buf
[*pos
] ||
785 '\t' == buf
[*pos
] || ' ' == buf
[*pos
])
789 * We stop the macro parse at an escape, tab, space, or nil.
790 * However, `\}' is also a valid macro, so make sure we don't
791 * clobber it by seeing the `\' as the end of token.
795 maclen
= strcspn(mac
+ 1, " \\\t\0") + 1;
797 t
= (r
->current_string
= roff_getstrn(r
, mac
, maclen
))
798 ? ROFF_USERDEF
: roffhash_find(mac
, maclen
);
802 while (buf
[*pos
] && ' ' == buf
[*pos
])
810 roff_cblock(ROFF_ARGS
)
814 * A block-close `..' should only be invoked as a child of an
815 * ignore macro, otherwise raise a warning and just ignore it.
818 if (NULL
== r
->last
) {
819 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
823 switch (r
->last
->tok
) {
831 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
838 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
843 mandoc_msg(MANDOCERR_ARGSLOST
, r
->parse
, ln
, pos
, NULL
);
846 roffnode_cleanscope(r
);
853 roffnode_cleanscope(struct roff
*r
)
857 if (--r
->last
->endspan
!= 0)
866 roff_ccond(ROFF_ARGS
)
869 if (NULL
== r
->last
) {
870 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
874 switch (r
->last
->tok
) {
882 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
886 if (r
->last
->endspan
> -1) {
887 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
892 mandoc_msg(MANDOCERR_ARGSLOST
, r
->parse
, ln
, pos
, NULL
);
895 roffnode_cleanscope(r
);
902 roff_block(ROFF_ARGS
)
910 if (ROFF_ig
!= tok
) {
911 if ('\0' == (*bufp
)[pos
]) {
912 mandoc_msg(MANDOCERR_NOARGS
, r
->parse
, ln
, ppos
, NULL
);
917 * Re-write `de1', since we don't really care about
918 * groff's strange compatibility mode, into `de'.
926 mandoc_msg(MANDOCERR_REQUEST
, r
->parse
, ln
, ppos
,
929 while ((*bufp
)[pos
] && ! isspace((unsigned char)(*bufp
)[pos
]))
932 while (isspace((unsigned char)(*bufp
)[pos
]))
933 (*bufp
)[pos
++] = '\0';
936 roffnode_push(r
, tok
, name
, ln
, ppos
);
939 * At the beginning of a `de' macro, clear the existing string
940 * with the same name, if there is one. New content will be
941 * added from roff_block_text() in multiline mode.
945 roff_setstr(r
, name
, "", 0);
947 if ('\0' == (*bufp
)[pos
])
950 /* If present, process the custom end-of-line marker. */
953 while ((*bufp
)[pos
] && ! isspace((unsigned char)(*bufp
)[pos
]))
957 * Note: groff does NOT like escape characters in the input.
958 * Instead of detecting this, we're just going to let it fly and
963 sz
= (size_t)(pos
- sv
);
965 if (1 == sz
&& '.' == (*bufp
)[sv
])
968 r
->last
->end
= mandoc_malloc(sz
+ 1);
970 memcpy(r
->last
->end
, *bufp
+ sv
, sz
);
971 r
->last
->end
[(int)sz
] = '\0';
974 mandoc_msg(MANDOCERR_ARGSLOST
, r
->parse
, ln
, pos
, NULL
);
982 roff_block_sub(ROFF_ARGS
)
988 * First check whether a custom macro exists at this level. If
989 * it does, then check against it. This is some of groff's
990 * stranger behaviours. If we encountered a custom end-scope
991 * tag and that tag also happens to be a "real" macro, then we
992 * need to try interpreting it again as a real macro. If it's
993 * not, then return ignore. Else continue.
997 for (i
= pos
, j
= 0; r
->last
->end
[j
]; j
++, i
++)
998 if ((*bufp
)[i
] != r
->last
->end
[j
])
1001 if ('\0' == r
->last
->end
[j
] &&
1002 ('\0' == (*bufp
)[i
] ||
1003 ' ' == (*bufp
)[i
] ||
1004 '\t' == (*bufp
)[i
])) {
1006 roffnode_cleanscope(r
);
1008 while (' ' == (*bufp
)[i
] || '\t' == (*bufp
)[i
])
1012 if (ROFF_MAX
!= roff_parse(r
, *bufp
, &pos
))
1019 * If we have no custom end-query or lookup failed, then try
1020 * pulling it out of the hashtable.
1023 t
= roff_parse(r
, *bufp
, &pos
);
1026 * Macros other than block-end are only significant
1027 * in `de' blocks; elsewhere, simply throw them away.
1029 if (ROFF_cblock
!= t
) {
1031 roff_setstr(r
, r
->last
->name
, *bufp
+ ppos
, 1);
1035 assert(roffs
[t
].proc
);
1036 return((*roffs
[t
].proc
)(r
, t
, bufp
, szp
,
1037 ln
, ppos
, pos
, offs
));
1043 roff_block_text(ROFF_ARGS
)
1047 roff_setstr(r
, r
->last
->name
, *bufp
+ pos
, 1);
1055 roff_cond_sub(ROFF_ARGS
)
1062 roffnode_cleanscope(r
);
1063 t
= roff_parse(r
, *bufp
, &pos
);
1066 * Fully handle known macros when they are structurally
1067 * required or when the conditional evaluated to true.
1070 if ((ROFF_MAX
!= t
) &&
1071 (ROFF_ccond
== t
|| ROFFRULE_ALLOW
== rr
||
1072 ROFFMAC_STRUCT
& roffs
[t
].flags
)) {
1073 assert(roffs
[t
].proc
);
1074 return((*roffs
[t
].proc
)(r
, t
, bufp
, szp
,
1075 ln
, ppos
, pos
, offs
));
1078 /* Always check for the closing delimiter `\}'. */
1081 while (NULL
!= (ep
= strchr(ep
, '\\'))) {
1086 * If we're at the end of line, then just chop
1087 * off the \} and resize the buffer.
1088 * If we aren't, then convert it to spaces.
1091 if ('\0' == *(ep
+ 1)) {
1095 *(ep
- 1) = *ep
= ' ';
1097 roff_ccond(r
, ROFF_ccond
, bufp
, szp
,
1098 ln
, pos
, pos
+ 2, offs
);
1101 return(ROFFRULE_DENY
== rr
? ROFF_IGN
: ROFF_CONT
);
1106 roff_cond_text(ROFF_ARGS
)
1112 roffnode_cleanscope(r
);
1115 for ( ; NULL
!= (ep
= strchr(ep
, '\\')); ep
++) {
1120 roff_ccond(r
, ROFF_ccond
, bufp
, szp
,
1121 ln
, pos
, pos
+ 2, offs
);
1123 return(ROFFRULE_DENY
== rr
? ROFF_IGN
: ROFF_CONT
);
1127 roff_getnum(const char *v
, int *pos
, int *res
)
1136 for (*res
= 0; isdigit((unsigned char)v
[p
]); p
++)
1137 *res
+= 10 * *res
+ v
[p
] - '0';
1149 roff_getop(const char *v
, int *pos
, char *res
)
1154 e
= v
[*pos
+ 1] == '=';
1176 static enum roffrule
1177 roff_evalcond(const char *v
, int *pos
)
1185 return(ROFFRULE_ALLOW
);
1192 return(ROFFRULE_DENY
);
1202 if (!roff_getnum(v
, pos
, &lh
))
1203 return ROFFRULE_DENY
;
1204 if (!roff_getop(v
, pos
, &op
)) {
1209 if (!roff_getnum(v
, pos
, &rh
))
1210 return ROFFRULE_DENY
;
1228 return ROFFRULE_DENY
;
1233 return lh
? ROFFRULE_ALLOW
: ROFFRULE_DENY
;
1238 roff_line_ignore(ROFF_ARGS
)
1246 roff_cond(ROFF_ARGS
)
1249 roffnode_push(r
, tok
, NULL
, ln
, ppos
);
1252 * An `.el' has no conditional body: it will consume the value
1253 * of the current rstack entry set in prior `ie' calls or
1256 * If we're not an `el', however, then evaluate the conditional.
1259 r
->last
->rule
= ROFF_el
== tok
?
1261 ROFFRULE_DENY
: r
->rstack
[r
->rstackpos
--]) :
1262 roff_evalcond(*bufp
, &pos
);
1265 * An if-else will put the NEGATION of the current evaluated
1266 * conditional into the stack of rules.
1269 if (ROFF_ie
== tok
) {
1270 if (r
->rstackpos
== RSTACK_MAX
- 1) {
1271 mandoc_msg(MANDOCERR_MEM
,
1272 r
->parse
, ln
, ppos
, NULL
);
1275 r
->rstack
[++r
->rstackpos
] =
1276 ROFFRULE_DENY
== r
->last
->rule
?
1277 ROFFRULE_ALLOW
: ROFFRULE_DENY
;
1280 /* If the parent has false as its rule, then so do we. */
1282 if (r
->last
->parent
&& ROFFRULE_DENY
== r
->last
->parent
->rule
)
1283 r
->last
->rule
= ROFFRULE_DENY
;
1287 * If there is nothing on the line after the conditional,
1288 * not even whitespace, use next-line scope.
1291 if ('\0' == (*bufp
)[pos
]) {
1292 r
->last
->endspan
= 2;
1296 while (' ' == (*bufp
)[pos
])
1299 /* An opening brace requests multiline scope. */
1301 if ('\\' == (*bufp
)[pos
] && '{' == (*bufp
)[pos
+ 1]) {
1302 r
->last
->endspan
= -1;
1308 * Anything else following the conditional causes
1309 * single-line scope. Warn if the scope contains
1310 * nothing but trailing whitespace.
1313 if ('\0' == (*bufp
)[pos
])
1314 mandoc_msg(MANDOCERR_NOARGS
, r
->parse
, ln
, ppos
, NULL
);
1316 r
->last
->endspan
= 1;
1328 char *name
, *string
;
1331 * A symbol is named by the first word following the macro
1332 * invocation up to a space. Its value is anything after the
1333 * name's trailing whitespace and optional double-quote. Thus,
1337 * will have `bar " ' as its value.
1340 string
= *bufp
+ pos
;
1341 name
= roff_getname(r
, &string
, ln
, pos
);
1345 /* Read past initial double-quote. */
1349 /* The rest is the value. */
1350 roff_setstr(r
, name
, string
, 0);
1355 roff_setreg(struct roff
*r
, const char *name
, int val
, char sign
)
1357 struct roffreg
*reg
;
1359 /* Search for an existing register with the same name. */
1362 while (reg
&& strcmp(name
, reg
->key
.p
))
1366 /* Create a new register. */
1367 reg
= mandoc_malloc(sizeof(struct roffreg
));
1368 reg
->key
.p
= mandoc_strdup(name
);
1369 reg
->key
.sz
= strlen(name
);
1371 reg
->next
= r
->regtab
;
1377 else if ('-' == sign
)
1384 roff_getreg(const struct roff
*r
, const char *name
)
1386 struct roffreg
*reg
;
1388 for (reg
= r
->regtab
; reg
; reg
= reg
->next
)
1389 if (0 == strcmp(name
, reg
->key
.p
))
1396 roff_getregn(const struct roff
*r
, const char *name
, size_t len
)
1398 struct roffreg
*reg
;
1400 for (reg
= r
->regtab
; reg
; reg
= reg
->next
)
1401 if (len
== reg
->key
.sz
&&
1402 0 == strncmp(name
, reg
->key
.p
, len
))
1409 roff_freereg(struct roffreg
*reg
)
1411 struct roffreg
*old_reg
;
1413 while (NULL
!= reg
) {
1432 key
= roff_getname(r
, &val
, ln
, pos
);
1435 if ('+' == sign
|| '-' == sign
)
1438 sz
= strspn(val
, "0123456789");
1439 iv
= sz
? mandoc_strntoi(val
, sz
, 10) : 0;
1441 roff_setreg(r
, key
, iv
, sign
);
1454 while ('\0' != *cp
) {
1455 name
= roff_getname(r
, &cp
, ln
, (int)(cp
- *bufp
));
1457 roff_setstr(r
, name
, NULL
, 0);
1470 /* Parse the number of lines. */
1472 len
= strcspn(cp
, " \t");
1474 if ((iv
= mandoc_strntoi(cp
, len
, 10)) <= 0) {
1475 mandoc_msg(MANDOCERR_NUMERIC
, r
->parse
,
1476 ln
, ppos
, *bufp
+ 1);
1481 /* Arm the input line trap. */
1483 roffit_macro
= mandoc_strdup(cp
);
1491 const char *const *cp
;
1493 if (0 == r
->quick
&& MPARSE_MDOC
!= r
->parsetype
)
1494 for (cp
= __mdoc_reserved
; *cp
; cp
++)
1495 roff_setstr(r
, *cp
, NULL
, 0);
1504 const char *const *cp
;
1506 if (0 == r
->quick
&& MPARSE_MDOC
!= r
->parsetype
)
1507 for (cp
= __man_reserved
; *cp
; cp
++)
1508 roff_setstr(r
, *cp
, NULL
, 0);
1519 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
1532 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
1534 tbl_restart(ppos
, ln
, r
->tbl
);
1541 roff_closeeqn(struct roff
*r
)
1544 return(r
->eqn
&& ROFF_EQN
== eqn_end(&r
->eqn
) ? 1 : 0);
1549 roff_openeqn(struct roff
*r
, const char *name
, int line
,
1550 int offs
, const char *buf
)
1555 assert(NULL
== r
->eqn
);
1556 e
= eqn_alloc(name
, offs
, line
, r
->parse
);
1559 r
->last_eqn
->next
= e
;
1561 r
->first_eqn
= r
->last_eqn
= e
;
1563 r
->eqn
= r
->last_eqn
= e
;
1567 eqn_read(&r
->eqn
, line
, buf
, offs
, &poff
);
1576 roff_openeqn(r
, *bufp
+ pos
, ln
, ppos
, NULL
);
1585 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
1593 struct tbl_node
*tbl
;
1596 mandoc_msg(MANDOCERR_SCOPEBROKEN
, r
->parse
, ln
, ppos
, NULL
);
1600 tbl
= tbl_alloc(ppos
, ln
, r
->parse
);
1603 r
->last_tbl
->next
= tbl
;
1605 r
->first_tbl
= r
->last_tbl
= tbl
;
1607 r
->tbl
= r
->last_tbl
= tbl
;
1619 if ('\0' == *p
|| '.' == (r
->control
= *p
++))
1623 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
, ln
, ppos
, NULL
);
1632 const char *p
, *first
, *second
;
1634 enum mandoc_esc esc
;
1639 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
, ln
, ppos
, NULL
);
1643 while ('\0' != *p
) {
1647 if ('\\' == *first
) {
1648 esc
= mandoc_escape(&p
, NULL
, NULL
);
1649 if (ESCAPE_ERROR
== esc
) {
1651 (MANDOCERR_BADESCAPE
, r
->parse
,
1652 ln
, (int)(p
- *bufp
), NULL
);
1655 fsz
= (size_t)(p
- first
);
1659 if ('\\' == *second
) {
1660 esc
= mandoc_escape(&p
, NULL
, NULL
);
1661 if (ESCAPE_ERROR
== esc
) {
1663 (MANDOCERR_BADESCAPE
, r
->parse
,
1664 ln
, (int)(p
- *bufp
), NULL
);
1667 ssz
= (size_t)(p
- second
);
1668 } else if ('\0' == *second
) {
1669 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
,
1670 ln
, (int)(p
- *bufp
), NULL
);
1676 roff_setstrn(&r
->xmbtab
, first
,
1677 fsz
, second
, ssz
, 0);
1681 if (NULL
== r
->xtab
)
1682 r
->xtab
= mandoc_calloc
1683 (128, sizeof(struct roffstr
));
1685 free(r
->xtab
[(int)*first
].p
);
1686 r
->xtab
[(int)*first
].p
= mandoc_strndup(second
, ssz
);
1687 r
->xtab
[(int)*first
].sz
= ssz
;
1699 mandoc_msg(MANDOCERR_SO
, r
->parse
, ln
, ppos
, NULL
);
1702 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1703 * opening anything that's not in our cwd or anything beneath
1704 * it. Thus, explicitly disallow traversing up the file-system
1705 * or using absolute paths.
1709 if ('/' == *name
|| strstr(name
, "../") || strstr(name
, "/..")) {
1710 mandoc_msg(MANDOCERR_SOPATH
, r
->parse
, ln
, pos
, NULL
);
1720 roff_userdef(ROFF_ARGS
)
1727 * Collect pointers to macro argument strings
1728 * and NUL-terminate them.
1731 for (i
= 0; i
< 9; i
++)
1732 arg
[i
] = '\0' == *cp
? "" :
1733 mandoc_getarg(r
->parse
, &cp
, ln
, &pos
);
1736 * Expand macro arguments.
1739 n1
= cp
= mandoc_strdup(r
->current_string
);
1740 while (NULL
!= (cp
= strstr(cp
, "\\$"))) {
1742 if (0 > i
|| 8 < i
) {
1743 /* Not an argument invocation. */
1748 *szp
= strlen(n1
) - 3 + strlen(arg
[i
]) + 1;
1749 n2
= mandoc_malloc(*szp
);
1751 strlcpy(n2
, n1
, (size_t)(cp
- n1
+ 1));
1752 strlcat(n2
, arg
[i
], *szp
);
1753 strlcat(n2
, cp
+ 3, *szp
);
1755 cp
= n2
+ (cp
- n1
);
1761 * Replace the macro invocation
1762 * by the expanded macro.
1767 *szp
= strlen(*bufp
) + 1;
1769 return(*szp
> 1 && '\n' == (*bufp
)[(int)*szp
- 2] ?
1770 ROFF_REPARSE
: ROFF_APPEND
);
1774 roff_getname(struct roff
*r
, char **cpp
, int ln
, int pos
)
1782 /* Read until end of name. */
1783 for (cp
= name
; '\0' != *cp
&& ' ' != *cp
; cp
++) {
1789 mandoc_msg(MANDOCERR_NAMESC
, r
->parse
, ln
, pos
, NULL
);
1794 /* Nil-terminate name. */
1798 /* Read past spaces. */
1807 * Store *string into the user-defined string called *name.
1808 * In multiline mode, append to an existing entry and append '\n';
1809 * else replace the existing entry, if there is one.
1810 * To clear an existing entry, call with (*r, *name, NULL, 0).
1813 roff_setstr(struct roff
*r
, const char *name
, const char *string
,
1817 roff_setstrn(&r
->strtab
, name
, strlen(name
), string
,
1818 string
? strlen(string
) : 0, multiline
);
1822 roff_setstrn(struct roffkv
**r
, const char *name
, size_t namesz
,
1823 const char *string
, size_t stringsz
, int multiline
)
1828 size_t oldch
, newch
;
1830 /* Search for an existing string with the same name. */
1833 while (n
&& strcmp(name
, n
->key
.p
))
1837 /* Create a new string table entry. */
1838 n
= mandoc_malloc(sizeof(struct roffkv
));
1839 n
->key
.p
= mandoc_strndup(name
, namesz
);
1845 } else if (0 == multiline
) {
1846 /* In multiline mode, append; else replace. */
1856 * One additional byte for the '\n' in multiline mode,
1857 * and one for the terminating '\0'.
1859 newch
= stringsz
+ (multiline
? 2u : 1u);
1861 if (NULL
== n
->val
.p
) {
1862 n
->val
.p
= mandoc_malloc(newch
);
1867 n
->val
.p
= mandoc_realloc(n
->val
.p
, oldch
+ newch
);
1870 /* Skip existing content in the destination buffer. */
1871 c
= n
->val
.p
+ (int)oldch
;
1873 /* Append new content to the destination buffer. */
1875 while (i
< (int)stringsz
) {
1877 * Rudimentary roff copy mode:
1878 * Handle escaped backslashes.
1880 if ('\\' == string
[i
] && '\\' == string
[i
+ 1])
1885 /* Append terminating bytes. */
1890 n
->val
.sz
= (int)(c
- n
->val
.p
);
1894 roff_getstrn(const struct roff
*r
, const char *name
, size_t len
)
1896 const struct roffkv
*n
;
1899 for (n
= r
->strtab
; n
; n
= n
->next
)
1900 if (0 == strncmp(name
, n
->key
.p
, len
) &&
1901 '\0' == n
->key
.p
[(int)len
])
1904 for (i
= 0; i
< PREDEFS_MAX
; i
++)
1905 if (0 == strncmp(name
, predefs
[i
].name
, len
) &&
1906 '\0' == predefs
[i
].name
[(int)len
])
1907 return(predefs
[i
].str
);
1913 roff_freestr(struct roffkv
*r
)
1915 struct roffkv
*n
, *nn
;
1917 for (n
= r
; n
; n
= nn
) {
1925 const struct tbl_span
*
1926 roff_span(const struct roff
*r
)
1929 return(r
->tbl
? tbl_span(r
->tbl
) : NULL
);
1933 roff_eqn(const struct roff
*r
)
1936 return(r
->last_eqn
? &r
->last_eqn
->eqn
: NULL
);
1940 * Duplicate an input string, making the appropriate character
1941 * conversations (as stipulated by `tr') along the way.
1942 * Returns a heap-allocated string with all the replacements made.
1945 roff_strdup(const struct roff
*r
, const char *p
)
1947 const struct roffkv
*cp
;
1951 enum mandoc_esc esc
;
1953 if (NULL
== r
->xmbtab
&& NULL
== r
->xtab
)
1954 return(mandoc_strdup(p
));
1955 else if ('\0' == *p
)
1956 return(mandoc_strdup(""));
1959 * Step through each character looking for term matches
1960 * (remember that a `tr' can be invoked with an escape, which is
1961 * a glyph but the escape is multi-character).
1962 * We only do this if the character hash has been initialised
1963 * and the string is >0 length.
1969 while ('\0' != *p
) {
1970 if ('\\' != *p
&& r
->xtab
&& r
->xtab
[(int)*p
].p
) {
1971 sz
= r
->xtab
[(int)*p
].sz
;
1972 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
1973 memcpy(res
+ ssz
, r
->xtab
[(int)*p
].p
, sz
);
1977 } else if ('\\' != *p
) {
1978 res
= mandoc_realloc(res
, ssz
+ 2);
1983 /* Search for term matches. */
1984 for (cp
= r
->xmbtab
; cp
; cp
= cp
->next
)
1985 if (0 == strncmp(p
, cp
->key
.p
, cp
->key
.sz
))
1990 * A match has been found.
1991 * Append the match to the array and move
1992 * forward by its keysize.
1994 res
= mandoc_realloc
1995 (res
, ssz
+ cp
->val
.sz
+ 1);
1996 memcpy(res
+ ssz
, cp
->val
.p
, cp
->val
.sz
);
1998 p
+= (int)cp
->key
.sz
;
2003 * Handle escapes carefully: we need to copy
2004 * over just the escape itself, or else we might
2005 * do replacements within the escape itself.
2006 * Make sure to pass along the bogus string.
2009 esc
= mandoc_escape(&p
, NULL
, NULL
);
2010 if (ESCAPE_ERROR
== esc
) {
2012 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
2013 memcpy(res
+ ssz
, pp
, sz
);
2017 * We bail out on bad escapes.
2018 * No need to warn: we already did so when
2019 * roff_res() was called.
2022 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
2023 memcpy(res
+ ssz
, pp
, sz
);
2027 res
[(int)ssz
] = '\0';
2032 * Find out whether a line is a macro line or not.
2033 * If it is, adjust the current position and return one; if it isn't,
2034 * return zero and don't change the current position.
2035 * If the control character has been set with `.cc', then let that grain
2037 * This is slighly contrary to groff, where using the non-breaking
2038 * control character when `cc' has been invoked will cause the
2039 * non-breaking macro contents to be printed verbatim.
2042 roff_getcontrol(const struct roff
*r
, const char *cp
, int *ppos
)
2048 if (0 != r
->control
&& cp
[pos
] == r
->control
)
2050 else if (0 != r
->control
)
2052 else if ('\\' == cp
[pos
] && '.' == cp
[pos
+ 1])
2054 else if ('.' == cp
[pos
] || '\'' == cp
[pos
])
2059 while (' ' == cp
[pos
] || '\t' == cp
[pos
])