]>
git.cameronkatri.com Git - mandoc.git/blob - roff.c
1 /* $Id: roff.c,v 1.176 2013/05/31 22:08:09 schwarze Exp $ */
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010, 2011, 2012 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
29 #include "libmandoc.h"
31 /* Maximum number of nested if-else conditionals. */
32 #define RSTACK_MAX 128
34 /* Maximum number of string expansions per line, to break infinite loops. */
35 #define EXPAND_LIMIT 1000
81 * A single register entity. If "set" is zero, the value of the
82 * register should be the default one, which is per-register.
83 * Registers are assumed to be unsigned ints for now.
86 int set
; /* whether set or not */
87 unsigned int u
; /* unsigned integer */
91 * An incredibly-simple string buffer.
94 char *p
; /* nil-terminated buffer */
95 size_t sz
; /* saved strlen(p) */
99 * A key-value roffstr pair as part of a singly-linked list.
104 struct roffkv
*next
; /* next in list */
108 enum mparset parsetype
; /* requested parse type */
109 struct mparse
*parse
; /* parse point */
110 struct roffnode
*last
; /* leaf of stack */
111 enum roffrule rstack
[RSTACK_MAX
]; /* stack of !`ie' rules */
112 char control
; /* control character */
113 int rstackpos
; /* position in rstack */
114 struct reg regs
[REG__MAX
];
115 struct roffkv
*strtab
; /* user-defined strings & macros */
116 struct roffkv
*xmbtab
; /* multi-byte trans table (`tr') */
117 struct roffstr
*xtab
; /* single-byte trans table (`tr') */
118 const char *current_string
; /* value of last called user macro */
119 struct tbl_node
*first_tbl
; /* first table parsed */
120 struct tbl_node
*last_tbl
; /* last table parsed */
121 struct tbl_node
*tbl
; /* current table being parsed */
122 struct eqn_node
*last_eqn
; /* last equation parsed */
123 struct eqn_node
*first_eqn
; /* first equation parsed */
124 struct eqn_node
*eqn
; /* current equation being parsed */
128 enum rofft tok
; /* type of node */
129 struct roffnode
*parent
; /* up one in stack */
130 int line
; /* parse line */
131 int col
; /* parse col */
132 char *name
; /* node name, e.g. macro name */
133 char *end
; /* end-rules: custom token */
134 int endspan
; /* end-rules: next-line or infty */
135 enum roffrule rule
; /* current evaluation rule */
138 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
139 enum rofft tok, /* tok of macro */ \
140 char **bufp, /* input buffer */ \
141 size_t *szp, /* size of input buffer */ \
142 int ln, /* parse line */ \
143 int ppos, /* original pos in buffer */ \
144 int pos, /* current pos in buffer */ \
145 int *offs /* reset offset of buffer data */
147 typedef enum rofferr (*roffproc
)(ROFF_ARGS
);
150 const char *name
; /* macro name */
151 roffproc proc
; /* process new macro */
152 roffproc text
; /* process as child text of macro */
153 roffproc sub
; /* process as child of macro */
155 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
156 struct roffmac
*next
;
160 const char *name
; /* predefined input name */
161 const char *str
; /* replacement symbol */
164 #define PREDEF(__name, __str) \
165 { (__name), (__str) },
167 static enum rofft
roffhash_find(const char *, size_t);
168 static void roffhash_init(void);
169 static void roffnode_cleanscope(struct roff
*);
170 static void roffnode_pop(struct roff
*);
171 static void roffnode_push(struct roff
*, enum rofft
,
172 const char *, int, int);
173 static enum rofferr
roff_block(ROFF_ARGS
);
174 static enum rofferr
roff_block_text(ROFF_ARGS
);
175 static enum rofferr
roff_block_sub(ROFF_ARGS
);
176 static enum rofferr
roff_cblock(ROFF_ARGS
);
177 static enum rofferr
roff_cc(ROFF_ARGS
);
178 static enum rofferr
roff_ccond(ROFF_ARGS
);
179 static enum rofferr
roff_cond(ROFF_ARGS
);
180 static enum rofferr
roff_cond_text(ROFF_ARGS
);
181 static enum rofferr
roff_cond_sub(ROFF_ARGS
);
182 static enum rofferr
roff_ds(ROFF_ARGS
);
183 static enum roffrule
roff_evalcond(const char *, int *);
184 static void roff_free1(struct roff
*);
185 static void roff_freestr(struct roffkv
*);
186 static char *roff_getname(struct roff
*, char **, int, int);
187 static const char *roff_getstrn(const struct roff
*,
188 const char *, size_t);
189 static enum rofferr
roff_line_ignore(ROFF_ARGS
);
190 static enum rofferr
roff_nr(ROFF_ARGS
);
191 static void roff_openeqn(struct roff
*, const char *,
192 int, int, const char *);
193 static enum rofft
roff_parse(struct roff
*, const char *, int *);
194 static enum rofferr
roff_parsetext(char *);
195 static enum rofferr
roff_res(struct roff
*,
196 char **, size_t *, int, int);
197 static enum rofferr
roff_rm(ROFF_ARGS
);
198 static void roff_setstr(struct roff
*,
199 const char *, const char *, int);
200 static void roff_setstrn(struct roffkv
**, const char *,
201 size_t, const char *, size_t, int);
202 static enum rofferr
roff_so(ROFF_ARGS
);
203 static enum rofferr
roff_tr(ROFF_ARGS
);
204 static enum rofferr
roff_Dd(ROFF_ARGS
);
205 static enum rofferr
roff_TH(ROFF_ARGS
);
206 static enum rofferr
roff_TE(ROFF_ARGS
);
207 static enum rofferr
roff_TS(ROFF_ARGS
);
208 static enum rofferr
roff_EQ(ROFF_ARGS
);
209 static enum rofferr
roff_EN(ROFF_ARGS
);
210 static enum rofferr
roff_T_(ROFF_ARGS
);
211 static enum rofferr
roff_userdef(ROFF_ARGS
);
213 /* See roffhash_find() */
217 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
219 static struct roffmac
*hash
[HASHWIDTH
];
221 static struct roffmac roffs
[ROFF_MAX
] = {
222 { "ad", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
223 { "am", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
224 { "ami", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
225 { "am1", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
226 { "cc", roff_cc
, NULL
, NULL
, 0, NULL
},
227 { "de", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
228 { "dei", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
229 { "de1", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
230 { "ds", roff_ds
, NULL
, NULL
, 0, NULL
},
231 { "el", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
232 { "hy", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
233 { "ie", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
234 { "if", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
235 { "ig", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
236 { "it", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
237 { "ne", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
238 { "nh", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
239 { "nr", roff_nr
, NULL
, NULL
, 0, NULL
},
240 { "ns", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
241 { "ps", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
242 { "rm", roff_rm
, NULL
, NULL
, 0, NULL
},
243 { "so", roff_so
, NULL
, NULL
, 0, NULL
},
244 { "ta", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
245 { "tr", roff_tr
, NULL
, NULL
, 0, NULL
},
246 { "Dd", roff_Dd
, NULL
, NULL
, 0, NULL
},
247 { "TH", roff_TH
, NULL
, NULL
, 0, NULL
},
248 { "TS", roff_TS
, NULL
, NULL
, 0, NULL
},
249 { "TE", roff_TE
, NULL
, NULL
, 0, NULL
},
250 { "T&", roff_T_
, NULL
, NULL
, 0, NULL
},
251 { "EQ", roff_EQ
, NULL
, NULL
, 0, NULL
},
252 { "EN", roff_EN
, NULL
, NULL
, 0, NULL
},
253 { ".", roff_cblock
, NULL
, NULL
, 0, NULL
},
254 { "\\}", roff_ccond
, NULL
, NULL
, 0, NULL
},
255 { NULL
, roff_userdef
, NULL
, NULL
, 0, NULL
},
258 const char *const __mdoc_reserved
[] = {
259 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
260 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
261 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
262 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
263 "Ds", "Dt", "Dv", "Dx", "D1",
264 "Ec", "Ed", "Ef", "Ek", "El", "Em", "em",
265 "En", "Eo", "Eq", "Er", "Es", "Ev", "Ex",
266 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
267 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp", "LP",
268 "Me", "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
269 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
270 "Pa", "Pc", "Pf", "Po", "Pp", "PP", "pp", "Pq",
271 "Qc", "Ql", "Qo", "Qq", "Or", "Rd", "Re", "Rs", "Rv",
272 "Sc", "Sf", "Sh", "SH", "Sm", "So", "Sq",
273 "Ss", "St", "Sx", "Sy",
274 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
275 "%A", "%B", "%D", "%I", "%J", "%N", "%O",
276 "%P", "%Q", "%R", "%T", "%U", "%V",
280 const char *const __man_reserved
[] = {
281 "AT", "B", "BI", "BR", "BT", "DE", "DS", "DT",
282 "EE", "EN", "EQ", "EX", "HF", "HP", "I", "IB", "IP", "IR",
283 "LP", "ME", "MT", "OP", "P", "PD", "PP", "PT",
284 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS", "SY",
285 "TE", "TH", "TP", "TQ", "TS", "T&", "UC", "UE", "UR", "YS",
289 /* Array of injected predefined strings. */
290 #define PREDEFS_MAX 38
291 static const struct predef predefs
[PREDEFS_MAX
] = {
292 #include "predefs.in"
295 /* See roffhash_find() */
296 #define ROFF_HASH(p) (p[0] - ASCII_LO)
304 for (i
= 0; i
< (int)ROFF_USERDEF
; i
++) {
305 assert(roffs
[i
].name
[0] >= ASCII_LO
);
306 assert(roffs
[i
].name
[0] <= ASCII_HI
);
308 buc
= ROFF_HASH(roffs
[i
].name
);
310 if (NULL
!= (n
= hash
[buc
])) {
311 for ( ; n
->next
; n
= n
->next
)
315 hash
[buc
] = &roffs
[i
];
320 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
321 * the nil-terminated string name could be found.
324 roffhash_find(const char *p
, size_t s
)
330 * libroff has an extremely simple hashtable, for the time
331 * being, which simply keys on the first character, which must
332 * be printable, then walks a chain. It works well enough until
336 if (p
[0] < ASCII_LO
|| p
[0] > ASCII_HI
)
341 if (NULL
== (n
= hash
[buc
]))
343 for ( ; n
; n
= n
->next
)
344 if (0 == strncmp(n
->name
, p
, s
) && '\0' == n
->name
[(int)s
])
345 return((enum rofft
)(n
- roffs
));
352 * Pop the current node off of the stack of roff instructions currently
356 roffnode_pop(struct roff
*r
)
363 r
->last
= r
->last
->parent
;
371 * Push a roff node onto the instruction stack. This must later be
372 * removed with roffnode_pop().
375 roffnode_push(struct roff
*r
, enum rofft tok
, const char *name
,
380 p
= mandoc_calloc(1, sizeof(struct roffnode
));
383 p
->name
= mandoc_strdup(name
);
387 p
->rule
= p
->parent
? p
->parent
->rule
: ROFFRULE_DENY
;
394 roff_free1(struct roff
*r
)
396 struct tbl_node
*tbl
;
400 while (NULL
!= (tbl
= r
->first_tbl
)) {
401 r
->first_tbl
= tbl
->next
;
405 r
->first_tbl
= r
->last_tbl
= r
->tbl
= NULL
;
407 while (NULL
!= (e
= r
->first_eqn
)) {
408 r
->first_eqn
= e
->next
;
412 r
->first_eqn
= r
->last_eqn
= r
->eqn
= NULL
;
417 roff_freestr(r
->strtab
);
418 roff_freestr(r
->xmbtab
);
420 r
->strtab
= r
->xmbtab
= NULL
;
423 for (i
= 0; i
< 128; i
++)
431 roff_reset(struct roff
*r
)
438 memset(&r
->regs
, 0, sizeof(struct reg
) * REG__MAX
);
440 for (i
= 0; i
< PREDEFS_MAX
; i
++)
441 roff_setstr(r
, predefs
[i
].name
, predefs
[i
].str
, 0);
446 roff_free(struct roff
*r
)
455 roff_alloc(enum mparset type
, struct mparse
*parse
)
460 r
= mandoc_calloc(1, sizeof(struct roff
));
467 for (i
= 0; i
< PREDEFS_MAX
; i
++)
468 roff_setstr(r
, predefs
[i
].name
, predefs
[i
].str
, 0);
474 * Pre-filter each and every line for reserved words (one beginning with
475 * `\*', e.g., `\*(ab'). These must be handled before the actual line
477 * This also checks the syntax of regular escapes.
480 roff_res(struct roff
*r
, char **bufp
, size_t *szp
, int ln
, int pos
)
483 const char *stesc
; /* start of an escape sequence ('\\') */
484 const char *stnam
; /* start of the name, after "[(*" */
485 const char *cp
; /* end of the name, e.g. before ']' */
486 const char *res
; /* the string to be substituted */
487 int i
, maxl
, expand_count
;
495 while (NULL
!= (cp
= strchr(cp
, '\\'))) {
499 * The second character must be an asterisk.
500 * If it isn't, skip it anyway: It is escaped,
501 * so it can't start another escape sequence.
509 esc
= mandoc_escape(&cp
, NULL
, NULL
);
510 if (ESCAPE_ERROR
!= esc
)
514 (MANDOCERR_BADESCAPE
, r
->parse
,
515 ln
, (int)(stesc
- *bufp
), NULL
);
522 * The third character decides the length
523 * of the name of the string.
524 * Save a pointer to the name.
544 /* Advance to the end of the name. */
546 for (i
= 0; 0 == maxl
|| i
< maxl
; i
++, cp
++) {
549 (MANDOCERR_BADESCAPE
,
551 (int)(stesc
- *bufp
), NULL
);
554 if (0 == maxl
&& ']' == *cp
)
559 * Retrieve the replacement string; if it is
560 * undefined, resume searching for escapes.
563 res
= roff_getstrn(r
, stnam
, (size_t)i
);
567 (MANDOCERR_BADESCAPE
, r
->parse
,
568 ln
, (int)(stesc
- *bufp
), NULL
);
572 /* Replace the escape sequence by the string. */
576 nsz
= *szp
+ strlen(res
) + 1;
577 n
= mandoc_malloc(nsz
);
579 strlcpy(n
, *bufp
, (size_t)(stesc
- *bufp
+ 1));
580 strlcat(n
, res
, nsz
);
581 strlcat(n
, cp
+ (maxl
? 0 : 1), nsz
);
588 if (EXPAND_LIMIT
>= ++expand_count
)
591 /* Just leave the string unexpanded. */
592 mandoc_msg(MANDOCERR_ROFFLOOP
, r
->parse
, ln
, pos
, NULL
);
599 * Process text streams: convert all breakable hyphens into ASCII_HYPH.
602 roff_parsetext(char *p
)
611 sz
= strcspn(p
, "-\\");
618 /* Skip over escapes. */
621 ((const char **)&p
, NULL
, NULL
);
622 if (ESCAPE_ERROR
== esc
)
625 } else if (p
== start
) {
630 if (isalpha((unsigned char)p
[-1]) &&
631 isalpha((unsigned char)p
[1]))
640 roff_parseln(struct roff
*r
, int ln
, char **bufp
,
641 size_t *szp
, int pos
, int *offs
)
648 * Run the reserved-word filter only if we have some reserved
652 e
= roff_res(r
, bufp
, szp
, ln
, pos
);
655 assert(ROFF_CONT
== e
);
658 ctl
= roff_getcontrol(r
, *bufp
, &pos
);
661 * First, if a scope is open and we're not a macro, pass the
662 * text through the macro's filter. If a scope isn't open and
663 * we're not a macro, just let it through.
664 * Finally, if there's an equation scope open, divert it into it
665 * no matter our state.
668 if (r
->last
&& ! ctl
) {
670 assert(roffs
[t
].text
);
672 (r
, t
, bufp
, szp
, ln
, pos
, pos
, offs
);
673 assert(ROFF_IGN
== e
|| ROFF_CONT
== e
);
677 return(eqn_read(&r
->eqn
, ln
, *bufp
, pos
, offs
));
679 return(tbl_read(r
->tbl
, ln
, *bufp
, pos
));
680 return(roff_parsetext(*bufp
+ pos
));
683 return(eqn_read(&r
->eqn
, ln
, *bufp
, pos
, offs
));
685 return(tbl_read(r
->tbl
, ln
, *bufp
, pos
));
686 return(roff_parsetext(*bufp
+ pos
));
688 return(eqn_read(&r
->eqn
, ln
, *bufp
, ppos
, offs
));
691 * If a scope is open, go to the child handler for that macro,
692 * as it may want to preprocess before doing anything with it.
693 * Don't do so if an equation is open.
698 assert(roffs
[t
].sub
);
699 return((*roffs
[t
].sub
)
701 ln
, ppos
, pos
, offs
));
705 * Lastly, as we've no scope open, try to look up and execute
706 * the new macro. If no macro is found, simply return and let
707 * the compilers handle it.
710 if (ROFF_MAX
== (t
= roff_parse(r
, *bufp
, &pos
)))
713 assert(roffs
[t
].proc
);
714 return((*roffs
[t
].proc
)
716 ln
, ppos
, pos
, offs
));
721 roff_endparse(struct roff
*r
)
725 mandoc_msg(MANDOCERR_SCOPEEXIT
, r
->parse
,
726 r
->last
->line
, r
->last
->col
, NULL
);
729 mandoc_msg(MANDOCERR_SCOPEEXIT
, r
->parse
,
730 r
->eqn
->eqn
.ln
, r
->eqn
->eqn
.pos
, NULL
);
735 mandoc_msg(MANDOCERR_SCOPEEXIT
, r
->parse
,
736 r
->tbl
->line
, r
->tbl
->pos
, NULL
);
742 * Parse a roff node's type from the input buffer. This must be in the
743 * form of ".foo xxx" in the usual way.
746 roff_parse(struct roff
*r
, const char *buf
, int *pos
)
752 if ('\0' == buf
[*pos
] || '"' == buf
[*pos
] ||
753 '\t' == buf
[*pos
] || ' ' == buf
[*pos
])
757 * We stop the macro parse at an escape, tab, space, or nil.
758 * However, `\}' is also a valid macro, so make sure we don't
759 * clobber it by seeing the `\' as the end of token.
763 maclen
= strcspn(mac
+ 1, " \\\t\0") + 1;
765 t
= (r
->current_string
= roff_getstrn(r
, mac
, maclen
))
766 ? ROFF_USERDEF
: roffhash_find(mac
, maclen
);
770 while (buf
[*pos
] && ' ' == buf
[*pos
])
778 roff_cblock(ROFF_ARGS
)
782 * A block-close `..' should only be invoked as a child of an
783 * ignore macro, otherwise raise a warning and just ignore it.
786 if (NULL
== r
->last
) {
787 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
791 switch (r
->last
->tok
) {
799 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
806 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
811 mandoc_msg(MANDOCERR_ARGSLOST
, r
->parse
, ln
, pos
, NULL
);
814 roffnode_cleanscope(r
);
821 roffnode_cleanscope(struct roff
*r
)
825 if (--r
->last
->endspan
!= 0)
834 roff_ccond(ROFF_ARGS
)
837 if (NULL
== r
->last
) {
838 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
842 switch (r
->last
->tok
) {
850 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
854 if (r
->last
->endspan
> -1) {
855 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
860 mandoc_msg(MANDOCERR_ARGSLOST
, r
->parse
, ln
, pos
, NULL
);
863 roffnode_cleanscope(r
);
870 roff_block(ROFF_ARGS
)
878 if (ROFF_ig
!= tok
) {
879 if ('\0' == (*bufp
)[pos
]) {
880 mandoc_msg(MANDOCERR_NOARGS
, r
->parse
, ln
, ppos
, NULL
);
885 * Re-write `de1', since we don't really care about
886 * groff's strange compatibility mode, into `de'.
894 mandoc_msg(MANDOCERR_REQUEST
, r
->parse
, ln
, ppos
,
897 while ((*bufp
)[pos
] && ! isspace((unsigned char)(*bufp
)[pos
]))
900 while (isspace((unsigned char)(*bufp
)[pos
]))
901 (*bufp
)[pos
++] = '\0';
904 roffnode_push(r
, tok
, name
, ln
, ppos
);
907 * At the beginning of a `de' macro, clear the existing string
908 * with the same name, if there is one. New content will be
909 * added from roff_block_text() in multiline mode.
913 roff_setstr(r
, name
, "", 0);
915 if ('\0' == (*bufp
)[pos
])
918 /* If present, process the custom end-of-line marker. */
921 while ((*bufp
)[pos
] && ! isspace((unsigned char)(*bufp
)[pos
]))
925 * Note: groff does NOT like escape characters in the input.
926 * Instead of detecting this, we're just going to let it fly and
931 sz
= (size_t)(pos
- sv
);
933 if (1 == sz
&& '.' == (*bufp
)[sv
])
936 r
->last
->end
= mandoc_malloc(sz
+ 1);
938 memcpy(r
->last
->end
, *bufp
+ sv
, sz
);
939 r
->last
->end
[(int)sz
] = '\0';
942 mandoc_msg(MANDOCERR_ARGSLOST
, r
->parse
, ln
, pos
, NULL
);
950 roff_block_sub(ROFF_ARGS
)
956 * First check whether a custom macro exists at this level. If
957 * it does, then check against it. This is some of groff's
958 * stranger behaviours. If we encountered a custom end-scope
959 * tag and that tag also happens to be a "real" macro, then we
960 * need to try interpreting it again as a real macro. If it's
961 * not, then return ignore. Else continue.
965 for (i
= pos
, j
= 0; r
->last
->end
[j
]; j
++, i
++)
966 if ((*bufp
)[i
] != r
->last
->end
[j
])
969 if ('\0' == r
->last
->end
[j
] &&
970 ('\0' == (*bufp
)[i
] ||
972 '\t' == (*bufp
)[i
])) {
974 roffnode_cleanscope(r
);
976 while (' ' == (*bufp
)[i
] || '\t' == (*bufp
)[i
])
980 if (ROFF_MAX
!= roff_parse(r
, *bufp
, &pos
))
987 * If we have no custom end-query or lookup failed, then try
988 * pulling it out of the hashtable.
991 t
= roff_parse(r
, *bufp
, &pos
);
994 * Macros other than block-end are only significant
995 * in `de' blocks; elsewhere, simply throw them away.
997 if (ROFF_cblock
!= t
) {
999 roff_setstr(r
, r
->last
->name
, *bufp
+ ppos
, 1);
1003 assert(roffs
[t
].proc
);
1004 return((*roffs
[t
].proc
)(r
, t
, bufp
, szp
,
1005 ln
, ppos
, pos
, offs
));
1011 roff_block_text(ROFF_ARGS
)
1015 roff_setstr(r
, r
->last
->name
, *bufp
+ pos
, 1);
1023 roff_cond_sub(ROFF_ARGS
)
1030 roffnode_cleanscope(r
);
1033 * If the macro is unknown, first check if it contains a closing
1034 * delimiter `\}'. If it does, close out our scope and return
1035 * the currently-scoped rule (ignore or continue). Else, drop
1036 * into the currently-scoped rule.
1039 if (ROFF_MAX
== (t
= roff_parse(r
, *bufp
, &pos
))) {
1041 for ( ; NULL
!= (ep
= strchr(ep
, '\\')); ep
++) {
1047 * Make the \} go away.
1048 * This is a little haphazard, as it's not quite
1049 * clear how nroff does this.
1050 * If we're at the end of line, then just chop
1051 * off the \} and resize the buffer.
1052 * If we aren't, then conver it to spaces.
1055 if ('\0' == *(ep
+ 1)) {
1059 *(ep
- 1) = *ep
= ' ';
1061 roff_ccond(r
, ROFF_ccond
, bufp
, szp
,
1062 ln
, pos
, pos
+ 2, offs
);
1065 return(ROFFRULE_DENY
== rr
? ROFF_IGN
: ROFF_CONT
);
1069 * A denied conditional must evaluate its children if and only
1070 * if they're either structurally required (such as loops and
1071 * conditionals) or a closing macro.
1074 if (ROFFRULE_DENY
== rr
)
1075 if ( ! (ROFFMAC_STRUCT
& roffs
[t
].flags
))
1076 if (ROFF_ccond
!= t
)
1079 assert(roffs
[t
].proc
);
1080 return((*roffs
[t
].proc
)(r
, t
, bufp
, szp
,
1081 ln
, ppos
, pos
, offs
));
1086 roff_cond_text(ROFF_ARGS
)
1092 roffnode_cleanscope(r
);
1095 for ( ; NULL
!= (ep
= strchr(ep
, '\\')); ep
++) {
1100 roff_ccond(r
, ROFF_ccond
, bufp
, szp
,
1101 ln
, pos
, pos
+ 2, offs
);
1103 return(ROFFRULE_DENY
== rr
? ROFF_IGN
: ROFF_CONT
);
1106 static enum roffrule
1107 roff_evalcond(const char *v
, int *pos
)
1113 return(ROFFRULE_ALLOW
);
1120 return(ROFFRULE_DENY
);
1125 while (v
[*pos
] && ' ' != v
[*pos
])
1127 return(ROFFRULE_DENY
);
1132 roff_line_ignore(ROFF_ARGS
)
1136 mandoc_msg(MANDOCERR_REQUEST
, r
->parse
, ln
, ppos
, "it");
1143 roff_cond(ROFF_ARGS
)
1146 roffnode_push(r
, tok
, NULL
, ln
, ppos
);
1149 * An `.el' has no conditional body: it will consume the value
1150 * of the current rstack entry set in prior `ie' calls or
1153 * If we're not an `el', however, then evaluate the conditional.
1156 r
->last
->rule
= ROFF_el
== tok
?
1158 ROFFRULE_DENY
: r
->rstack
[r
->rstackpos
--]) :
1159 roff_evalcond(*bufp
, &pos
);
1162 * An if-else will put the NEGATION of the current evaluated
1163 * conditional into the stack of rules.
1166 if (ROFF_ie
== tok
) {
1167 if (r
->rstackpos
== RSTACK_MAX
- 1) {
1168 mandoc_msg(MANDOCERR_MEM
,
1169 r
->parse
, ln
, ppos
, NULL
);
1172 r
->rstack
[++r
->rstackpos
] =
1173 ROFFRULE_DENY
== r
->last
->rule
?
1174 ROFFRULE_ALLOW
: ROFFRULE_DENY
;
1177 /* If the parent has false as its rule, then so do we. */
1179 if (r
->last
->parent
&& ROFFRULE_DENY
== r
->last
->parent
->rule
)
1180 r
->last
->rule
= ROFFRULE_DENY
;
1184 * If there is nothing on the line after the conditional,
1185 * not even whitespace, use next-line scope.
1188 if ('\0' == (*bufp
)[pos
]) {
1189 r
->last
->endspan
= 2;
1193 while (' ' == (*bufp
)[pos
])
1196 /* An opening brace requests multiline scope. */
1198 if ('\\' == (*bufp
)[pos
] && '{' == (*bufp
)[pos
+ 1]) {
1199 r
->last
->endspan
= -1;
1205 * Anything else following the conditional causes
1206 * single-line scope. Warn if the scope contains
1207 * nothing but trailing whitespace.
1210 if ('\0' == (*bufp
)[pos
])
1211 mandoc_msg(MANDOCERR_NOARGS
, r
->parse
, ln
, ppos
, NULL
);
1213 r
->last
->endspan
= 1;
1225 char *name
, *string
;
1228 * A symbol is named by the first word following the macro
1229 * invocation up to a space. Its value is anything after the
1230 * name's trailing whitespace and optional double-quote. Thus,
1234 * will have `bar " ' as its value.
1237 string
= *bufp
+ pos
;
1238 name
= roff_getname(r
, &string
, ln
, pos
);
1242 /* Read past initial double-quote. */
1246 /* The rest is the value. */
1247 roff_setstr(r
, name
, string
, 0);
1252 roff_regisset(const struct roff
*r
, enum regs reg
)
1255 return(r
->regs
[(int)reg
].set
);
1259 roff_regget(const struct roff
*r
, enum regs reg
)
1262 return(r
->regs
[(int)reg
].u
);
1266 roff_regunset(struct roff
*r
, enum regs reg
)
1269 r
->regs
[(int)reg
].set
= 0;
1281 key
= roff_getname(r
, &val
, ln
, pos
);
1283 if (0 == strcmp(key
, "nS")) {
1284 r
->regs
[(int)REG_nS
].set
= 1;
1285 if ((iv
= mandoc_strntoi(val
, strlen(val
), 10)) >= 0)
1286 r
->regs
[(int)REG_nS
].u
= (unsigned)iv
;
1288 r
->regs
[(int)REG_nS
].u
= 0u;
1302 while ('\0' != *cp
) {
1303 name
= roff_getname(r
, &cp
, ln
, (int)(cp
- *bufp
));
1305 roff_setstr(r
, name
, NULL
, 0);
1314 const char *const *cp
;
1316 if (MPARSE_MDOC
!= r
->parsetype
)
1317 for (cp
= __mdoc_reserved
; *cp
; cp
++)
1318 roff_setstr(r
, *cp
, NULL
, 0);
1327 const char *const *cp
;
1329 if (MPARSE_MDOC
!= r
->parsetype
)
1330 for (cp
= __man_reserved
; *cp
; cp
++)
1331 roff_setstr(r
, *cp
, NULL
, 0);
1342 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
1355 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
1357 tbl_restart(ppos
, ln
, r
->tbl
);
1364 roff_closeeqn(struct roff
*r
)
1367 return(r
->eqn
&& ROFF_EQN
== eqn_end(&r
->eqn
) ? 1 : 0);
1372 roff_openeqn(struct roff
*r
, const char *name
, int line
,
1373 int offs
, const char *buf
)
1378 assert(NULL
== r
->eqn
);
1379 e
= eqn_alloc(name
, offs
, line
, r
->parse
);
1382 r
->last_eqn
->next
= e
;
1384 r
->first_eqn
= r
->last_eqn
= e
;
1386 r
->eqn
= r
->last_eqn
= e
;
1390 eqn_read(&r
->eqn
, line
, buf
, offs
, &poff
);
1399 roff_openeqn(r
, *bufp
+ pos
, ln
, ppos
, NULL
);
1408 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
1416 struct tbl_node
*tbl
;
1419 mandoc_msg(MANDOCERR_SCOPEBROKEN
, r
->parse
, ln
, ppos
, NULL
);
1423 tbl
= tbl_alloc(ppos
, ln
, r
->parse
);
1426 r
->last_tbl
->next
= tbl
;
1428 r
->first_tbl
= r
->last_tbl
= tbl
;
1430 r
->tbl
= r
->last_tbl
= tbl
;
1442 if ('\0' == *p
|| '.' == (r
->control
= *p
++))
1446 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
, ln
, ppos
, NULL
);
1455 const char *p
, *first
, *second
;
1457 enum mandoc_esc esc
;
1462 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
, ln
, ppos
, NULL
);
1466 while ('\0' != *p
) {
1470 if ('\\' == *first
) {
1471 esc
= mandoc_escape(&p
, NULL
, NULL
);
1472 if (ESCAPE_ERROR
== esc
) {
1474 (MANDOCERR_BADESCAPE
, r
->parse
,
1475 ln
, (int)(p
- *bufp
), NULL
);
1478 fsz
= (size_t)(p
- first
);
1482 if ('\\' == *second
) {
1483 esc
= mandoc_escape(&p
, NULL
, NULL
);
1484 if (ESCAPE_ERROR
== esc
) {
1486 (MANDOCERR_BADESCAPE
, r
->parse
,
1487 ln
, (int)(p
- *bufp
), NULL
);
1490 ssz
= (size_t)(p
- second
);
1491 } else if ('\0' == *second
) {
1492 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
,
1493 ln
, (int)(p
- *bufp
), NULL
);
1499 roff_setstrn(&r
->xmbtab
, first
,
1500 fsz
, second
, ssz
, 0);
1504 if (NULL
== r
->xtab
)
1505 r
->xtab
= mandoc_calloc
1506 (128, sizeof(struct roffstr
));
1508 free(r
->xtab
[(int)*first
].p
);
1509 r
->xtab
[(int)*first
].p
= mandoc_strndup(second
, ssz
);
1510 r
->xtab
[(int)*first
].sz
= ssz
;
1522 mandoc_msg(MANDOCERR_SO
, r
->parse
, ln
, ppos
, NULL
);
1525 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1526 * opening anything that's not in our cwd or anything beneath
1527 * it. Thus, explicitly disallow traversing up the file-system
1528 * or using absolute paths.
1532 if ('/' == *name
|| strstr(name
, "../") || strstr(name
, "/..")) {
1533 mandoc_msg(MANDOCERR_SOPATH
, r
->parse
, ln
, pos
, NULL
);
1543 roff_userdef(ROFF_ARGS
)
1550 * Collect pointers to macro argument strings
1551 * and null-terminate them.
1554 for (i
= 0; i
< 9; i
++)
1555 arg
[i
] = '\0' == *cp
? "" :
1556 mandoc_getarg(r
->parse
, &cp
, ln
, &pos
);
1559 * Expand macro arguments.
1562 n1
= cp
= mandoc_strdup(r
->current_string
);
1563 while (NULL
!= (cp
= strstr(cp
, "\\$"))) {
1565 if (0 > i
|| 8 < i
) {
1566 /* Not an argument invocation. */
1571 *szp
= strlen(n1
) - 3 + strlen(arg
[i
]) + 1;
1572 n2
= mandoc_malloc(*szp
);
1574 strlcpy(n2
, n1
, (size_t)(cp
- n1
+ 1));
1575 strlcat(n2
, arg
[i
], *szp
);
1576 strlcat(n2
, cp
+ 3, *szp
);
1578 cp
= n2
+ (cp
- n1
);
1584 * Replace the macro invocation
1585 * by the expanded macro.
1590 *szp
= strlen(*bufp
) + 1;
1592 return(*szp
> 1 && '\n' == (*bufp
)[(int)*szp
- 2] ?
1593 ROFF_REPARSE
: ROFF_APPEND
);
1597 roff_getname(struct roff
*r
, char **cpp
, int ln
, int pos
)
1605 /* Read until end of name. */
1606 for (cp
= name
; '\0' != *cp
&& ' ' != *cp
; cp
++) {
1612 mandoc_msg(MANDOCERR_NAMESC
, r
->parse
, ln
, pos
, NULL
);
1617 /* Nil-terminate name. */
1621 /* Read past spaces. */
1630 * Store *string into the user-defined string called *name.
1631 * In multiline mode, append to an existing entry and append '\n';
1632 * else replace the existing entry, if there is one.
1633 * To clear an existing entry, call with (*r, *name, NULL, 0).
1636 roff_setstr(struct roff
*r
, const char *name
, const char *string
,
1640 roff_setstrn(&r
->strtab
, name
, strlen(name
), string
,
1641 string
? strlen(string
) : 0, multiline
);
1645 roff_setstrn(struct roffkv
**r
, const char *name
, size_t namesz
,
1646 const char *string
, size_t stringsz
, int multiline
)
1651 size_t oldch
, newch
;
1653 /* Search for an existing string with the same name. */
1656 while (n
&& strcmp(name
, n
->key
.p
))
1660 /* Create a new string table entry. */
1661 n
= mandoc_malloc(sizeof(struct roffkv
));
1662 n
->key
.p
= mandoc_strndup(name
, namesz
);
1668 } else if (0 == multiline
) {
1669 /* In multiline mode, append; else replace. */
1679 * One additional byte for the '\n' in multiline mode,
1680 * and one for the terminating '\0'.
1682 newch
= stringsz
+ (multiline
? 2u : 1u);
1684 if (NULL
== n
->val
.p
) {
1685 n
->val
.p
= mandoc_malloc(newch
);
1690 n
->val
.p
= mandoc_realloc(n
->val
.p
, oldch
+ newch
);
1693 /* Skip existing content in the destination buffer. */
1694 c
= n
->val
.p
+ (int)oldch
;
1696 /* Append new content to the destination buffer. */
1698 while (i
< (int)stringsz
) {
1700 * Rudimentary roff copy mode:
1701 * Handle escaped backslashes.
1703 if ('\\' == string
[i
] && '\\' == string
[i
+ 1])
1708 /* Append terminating bytes. */
1713 n
->val
.sz
= (int)(c
- n
->val
.p
);
1717 roff_getstrn(const struct roff
*r
, const char *name
, size_t len
)
1719 const struct roffkv
*n
;
1721 for (n
= r
->strtab
; n
; n
= n
->next
)
1722 if (0 == strncmp(name
, n
->key
.p
, len
) &&
1723 '\0' == n
->key
.p
[(int)len
])
1730 roff_freestr(struct roffkv
*r
)
1732 struct roffkv
*n
, *nn
;
1734 for (n
= r
; n
; n
= nn
) {
1742 const struct tbl_span
*
1743 roff_span(const struct roff
*r
)
1746 return(r
->tbl
? tbl_span(r
->tbl
) : NULL
);
1750 roff_eqn(const struct roff
*r
)
1753 return(r
->last_eqn
? &r
->last_eqn
->eqn
: NULL
);
1757 * Duplicate an input string, making the appropriate character
1758 * conversations (as stipulated by `tr') along the way.
1759 * Returns a heap-allocated string with all the replacements made.
1762 roff_strdup(const struct roff
*r
, const char *p
)
1764 const struct roffkv
*cp
;
1768 enum mandoc_esc esc
;
1770 if (NULL
== r
->xmbtab
&& NULL
== r
->xtab
)
1771 return(mandoc_strdup(p
));
1772 else if ('\0' == *p
)
1773 return(mandoc_strdup(""));
1776 * Step through each character looking for term matches
1777 * (remember that a `tr' can be invoked with an escape, which is
1778 * a glyph but the escape is multi-character).
1779 * We only do this if the character hash has been initialised
1780 * and the string is >0 length.
1786 while ('\0' != *p
) {
1787 if ('\\' != *p
&& r
->xtab
&& r
->xtab
[(int)*p
].p
) {
1788 sz
= r
->xtab
[(int)*p
].sz
;
1789 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
1790 memcpy(res
+ ssz
, r
->xtab
[(int)*p
].p
, sz
);
1794 } else if ('\\' != *p
) {
1795 res
= mandoc_realloc(res
, ssz
+ 2);
1800 /* Search for term matches. */
1801 for (cp
= r
->xmbtab
; cp
; cp
= cp
->next
)
1802 if (0 == strncmp(p
, cp
->key
.p
, cp
->key
.sz
))
1807 * A match has been found.
1808 * Append the match to the array and move
1809 * forward by its keysize.
1811 res
= mandoc_realloc
1812 (res
, ssz
+ cp
->val
.sz
+ 1);
1813 memcpy(res
+ ssz
, cp
->val
.p
, cp
->val
.sz
);
1815 p
+= (int)cp
->key
.sz
;
1820 * Handle escapes carefully: we need to copy
1821 * over just the escape itself, or else we might
1822 * do replacements within the escape itself.
1823 * Make sure to pass along the bogus string.
1826 esc
= mandoc_escape(&p
, NULL
, NULL
);
1827 if (ESCAPE_ERROR
== esc
) {
1829 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
1830 memcpy(res
+ ssz
, pp
, sz
);
1834 * We bail out on bad escapes.
1835 * No need to warn: we already did so when
1836 * roff_res() was called.
1839 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
1840 memcpy(res
+ ssz
, pp
, sz
);
1844 res
[(int)ssz
] = '\0';
1849 * Find out whether a line is a macro line or not.
1850 * If it is, adjust the current position and return one; if it isn't,
1851 * return zero and don't change the current position.
1852 * If the control character has been set with `.cc', then let that grain
1854 * This is slighly contrary to groff, where using the non-breaking
1855 * control character when `cc' has been invoked will cause the
1856 * non-breaking macro contents to be printed verbatim.
1859 roff_getcontrol(const struct roff
*r
, const char *cp
, int *ppos
)
1865 if (0 != r
->control
&& cp
[pos
] == r
->control
)
1867 else if (0 != r
->control
)
1869 else if ('\\' == cp
[pos
] && '.' == cp
[pos
+ 1])
1871 else if ('.' == cp
[pos
] || '\'' == cp
[pos
])
1876 while (' ' == cp
[pos
] || '\t' == cp
[pos
])