]>
git.cameronkatri.com Git - mandoc.git/blob - roff.c
1 /* $Id: roff.c,v 1.184 2013/10/05 22:25:12 schwarze Exp $ */
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010, 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
30 #include "libmandoc.h"
32 /* Maximum number of nested if-else conditionals. */
33 #define RSTACK_MAX 128
35 /* Maximum number of string expansions per line, to break infinite loops. */
36 #define EXPAND_LIMIT 1000
82 * An incredibly-simple string buffer.
85 char *p
; /* nil-terminated buffer */
86 size_t sz
; /* saved strlen(p) */
90 * A key-value roffstr pair as part of a singly-linked list.
95 struct roffkv
*next
; /* next in list */
99 * A single number register as part of a singly-linked list.
104 struct roffreg
*next
;
108 enum mparset parsetype
; /* requested parse type */
109 struct mparse
*parse
; /* parse point */
110 struct roffnode
*last
; /* leaf of stack */
111 enum roffrule rstack
[RSTACK_MAX
]; /* stack of !`ie' rules */
112 char control
; /* control character */
113 int rstackpos
; /* position in rstack */
114 struct roffreg
*regtab
; /* number registers */
115 struct roffkv
*strtab
; /* user-defined strings & macros */
116 struct roffkv
*xmbtab
; /* multi-byte trans table (`tr') */
117 struct roffstr
*xtab
; /* single-byte trans table (`tr') */
118 const char *current_string
; /* value of last called user macro */
119 struct tbl_node
*first_tbl
; /* first table parsed */
120 struct tbl_node
*last_tbl
; /* last table parsed */
121 struct tbl_node
*tbl
; /* current table being parsed */
122 struct eqn_node
*last_eqn
; /* last equation parsed */
123 struct eqn_node
*first_eqn
; /* first equation parsed */
124 struct eqn_node
*eqn
; /* current equation being parsed */
128 enum rofft tok
; /* type of node */
129 struct roffnode
*parent
; /* up one in stack */
130 int line
; /* parse line */
131 int col
; /* parse col */
132 char *name
; /* node name, e.g. macro name */
133 char *end
; /* end-rules: custom token */
134 int endspan
; /* end-rules: next-line or infty */
135 enum roffrule rule
; /* current evaluation rule */
138 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
139 enum rofft tok, /* tok of macro */ \
140 char **bufp, /* input buffer */ \
141 size_t *szp, /* size of input buffer */ \
142 int ln, /* parse line */ \
143 int ppos, /* original pos in buffer */ \
144 int pos, /* current pos in buffer */ \
145 int *offs /* reset offset of buffer data */
147 typedef enum rofferr (*roffproc
)(ROFF_ARGS
);
150 const char *name
; /* macro name */
151 roffproc proc
; /* process new macro */
152 roffproc text
; /* process as child text of macro */
153 roffproc sub
; /* process as child of macro */
155 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
156 struct roffmac
*next
;
160 const char *name
; /* predefined input name */
161 const char *str
; /* replacement symbol */
164 #define PREDEF(__name, __str) \
165 { (__name), (__str) },
167 static enum rofft
roffhash_find(const char *, size_t);
168 static void roffhash_init(void);
169 static void roffnode_cleanscope(struct roff
*);
170 static void roffnode_pop(struct roff
*);
171 static void roffnode_push(struct roff
*, enum rofft
,
172 const char *, int, int);
173 static enum rofferr
roff_block(ROFF_ARGS
);
174 static enum rofferr
roff_block_text(ROFF_ARGS
);
175 static enum rofferr
roff_block_sub(ROFF_ARGS
);
176 static enum rofferr
roff_cblock(ROFF_ARGS
);
177 static enum rofferr
roff_cc(ROFF_ARGS
);
178 static enum rofferr
roff_ccond(ROFF_ARGS
);
179 static enum rofferr
roff_cond(ROFF_ARGS
);
180 static enum rofferr
roff_cond_text(ROFF_ARGS
);
181 static enum rofferr
roff_cond_sub(ROFF_ARGS
);
182 static enum rofferr
roff_ds(ROFF_ARGS
);
183 static enum roffrule
roff_evalcond(const char *, int *);
184 static void roff_free1(struct roff
*);
185 static void roff_freereg(struct roffreg
*);
186 static void roff_freestr(struct roffkv
*);
187 static char *roff_getname(struct roff
*, char **, int, int);
188 static int roff_getnum(const char *, int *, int *);
189 static int roff_getop(const char *, int *, char *);
190 static int roff_getregn(const struct roff
*,
191 const char *, size_t);
192 static const char *roff_getstrn(const struct roff
*,
193 const char *, size_t);
194 static enum rofferr
roff_it(ROFF_ARGS
);
195 static enum rofferr
roff_line_ignore(ROFF_ARGS
);
196 static enum rofferr
roff_nr(ROFF_ARGS
);
197 static void roff_openeqn(struct roff
*, const char *,
198 int, int, const char *);
199 static enum rofft
roff_parse(struct roff
*, const char *, int *);
200 static enum rofferr
roff_parsetext(char **, size_t *, int, int *);
201 static enum rofferr
roff_res(struct roff
*,
202 char **, size_t *, int, int);
203 static enum rofferr
roff_rm(ROFF_ARGS
);
204 static void roff_setstr(struct roff
*,
205 const char *, const char *, int);
206 static void roff_setstrn(struct roffkv
**, const char *,
207 size_t, const char *, size_t, int);
208 static enum rofferr
roff_so(ROFF_ARGS
);
209 static enum rofferr
roff_tr(ROFF_ARGS
);
210 static enum rofferr
roff_Dd(ROFF_ARGS
);
211 static enum rofferr
roff_TH(ROFF_ARGS
);
212 static enum rofferr
roff_TE(ROFF_ARGS
);
213 static enum rofferr
roff_TS(ROFF_ARGS
);
214 static enum rofferr
roff_EQ(ROFF_ARGS
);
215 static enum rofferr
roff_EN(ROFF_ARGS
);
216 static enum rofferr
roff_T_(ROFF_ARGS
);
217 static enum rofferr
roff_userdef(ROFF_ARGS
);
219 /* See roffhash_find() */
223 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
225 static struct roffmac
*hash
[HASHWIDTH
];
227 static struct roffmac roffs
[ROFF_MAX
] = {
228 { "ad", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
229 { "am", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
230 { "ami", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
231 { "am1", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
232 { "cc", roff_cc
, NULL
, NULL
, 0, NULL
},
233 { "de", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
234 { "dei", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
235 { "de1", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
236 { "ds", roff_ds
, NULL
, NULL
, 0, NULL
},
237 { "el", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
238 { "hy", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
239 { "ie", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
240 { "if", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
241 { "ig", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
242 { "it", roff_it
, NULL
, NULL
, 0, NULL
},
243 { "ne", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
244 { "nh", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
245 { "nr", roff_nr
, NULL
, NULL
, 0, NULL
},
246 { "ns", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
247 { "ps", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
248 { "rm", roff_rm
, NULL
, NULL
, 0, NULL
},
249 { "so", roff_so
, NULL
, NULL
, 0, NULL
},
250 { "ta", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
251 { "tr", roff_tr
, NULL
, NULL
, 0, NULL
},
252 { "Dd", roff_Dd
, NULL
, NULL
, 0, NULL
},
253 { "TH", roff_TH
, NULL
, NULL
, 0, NULL
},
254 { "TS", roff_TS
, NULL
, NULL
, 0, NULL
},
255 { "TE", roff_TE
, NULL
, NULL
, 0, NULL
},
256 { "T&", roff_T_
, NULL
, NULL
, 0, NULL
},
257 { "EQ", roff_EQ
, NULL
, NULL
, 0, NULL
},
258 { "EN", roff_EN
, NULL
, NULL
, 0, NULL
},
259 { ".", roff_cblock
, NULL
, NULL
, 0, NULL
},
260 { "\\}", roff_ccond
, NULL
, NULL
, 0, NULL
},
261 { NULL
, roff_userdef
, NULL
, NULL
, 0, NULL
},
264 const char *const __mdoc_reserved
[] = {
265 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
266 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
267 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
268 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
269 "Ds", "Dt", "Dv", "Dx", "D1",
270 "Ec", "Ed", "Ef", "Ek", "El", "Em", "em",
271 "En", "Eo", "Eq", "Er", "Es", "Ev", "Ex",
272 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
273 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp", "LP",
274 "Me", "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
275 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
276 "Pa", "Pc", "Pf", "Po", "Pp", "PP", "pp", "Pq",
277 "Qc", "Ql", "Qo", "Qq", "Or", "Rd", "Re", "Rs", "Rv",
278 "Sc", "Sf", "Sh", "SH", "Sm", "So", "Sq",
279 "Ss", "St", "Sx", "Sy",
280 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
281 "%A", "%B", "%D", "%I", "%J", "%N", "%O",
282 "%P", "%Q", "%R", "%T", "%U", "%V",
286 const char *const __man_reserved
[] = {
287 "AT", "B", "BI", "BR", "BT", "DE", "DS", "DT",
288 "EE", "EN", "EQ", "EX", "HF", "HP", "I", "IB", "IP", "IR",
289 "LP", "ME", "MT", "OP", "P", "PD", "PP", "PT",
290 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS", "SY",
291 "TE", "TH", "TP", "TQ", "TS", "T&", "UC", "UE", "UR", "YS",
295 /* Array of injected predefined strings. */
296 #define PREDEFS_MAX 38
297 static const struct predef predefs
[PREDEFS_MAX
] = {
298 #include "predefs.in"
301 /* See roffhash_find() */
302 #define ROFF_HASH(p) (p[0] - ASCII_LO)
304 static int roffit_lines
; /* number of lines to delay */
305 static char *roffit_macro
; /* nil-terminated macro line */
313 for (i
= 0; i
< (int)ROFF_USERDEF
; i
++) {
314 assert(roffs
[i
].name
[0] >= ASCII_LO
);
315 assert(roffs
[i
].name
[0] <= ASCII_HI
);
317 buc
= ROFF_HASH(roffs
[i
].name
);
319 if (NULL
!= (n
= hash
[buc
])) {
320 for ( ; n
->next
; n
= n
->next
)
324 hash
[buc
] = &roffs
[i
];
329 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
330 * the nil-terminated string name could be found.
333 roffhash_find(const char *p
, size_t s
)
339 * libroff has an extremely simple hashtable, for the time
340 * being, which simply keys on the first character, which must
341 * be printable, then walks a chain. It works well enough until
345 if (p
[0] < ASCII_LO
|| p
[0] > ASCII_HI
)
350 if (NULL
== (n
= hash
[buc
]))
352 for ( ; n
; n
= n
->next
)
353 if (0 == strncmp(n
->name
, p
, s
) && '\0' == n
->name
[(int)s
])
354 return((enum rofft
)(n
- roffs
));
361 * Pop the current node off of the stack of roff instructions currently
365 roffnode_pop(struct roff
*r
)
372 r
->last
= r
->last
->parent
;
380 * Push a roff node onto the instruction stack. This must later be
381 * removed with roffnode_pop().
384 roffnode_push(struct roff
*r
, enum rofft tok
, const char *name
,
389 p
= mandoc_calloc(1, sizeof(struct roffnode
));
392 p
->name
= mandoc_strdup(name
);
396 p
->rule
= p
->parent
? p
->parent
->rule
: ROFFRULE_DENY
;
403 roff_free1(struct roff
*r
)
405 struct tbl_node
*tbl
;
409 while (NULL
!= (tbl
= r
->first_tbl
)) {
410 r
->first_tbl
= tbl
->next
;
414 r
->first_tbl
= r
->last_tbl
= r
->tbl
= NULL
;
416 while (NULL
!= (e
= r
->first_eqn
)) {
417 r
->first_eqn
= e
->next
;
421 r
->first_eqn
= r
->last_eqn
= r
->eqn
= NULL
;
426 roff_freestr(r
->strtab
);
427 roff_freestr(r
->xmbtab
);
429 r
->strtab
= r
->xmbtab
= NULL
;
431 roff_freereg(r
->regtab
);
436 for (i
= 0; i
< 128; i
++)
444 roff_reset(struct roff
*r
)
452 for (i
= 0; i
< PREDEFS_MAX
; i
++)
453 roff_setstr(r
, predefs
[i
].name
, predefs
[i
].str
, 0);
458 roff_free(struct roff
*r
)
467 roff_alloc(enum mparset type
, struct mparse
*parse
)
472 r
= mandoc_calloc(1, sizeof(struct roff
));
479 for (i
= 0; i
< PREDEFS_MAX
; i
++)
480 roff_setstr(r
, predefs
[i
].name
, predefs
[i
].str
, 0);
486 * In the current line, expand user-defined strings ("\*")
487 * and references to number registers ("\n").
488 * Also check the syntax of other escape sequences.
491 roff_res(struct roff
*r
, char **bufp
, size_t *szp
, int ln
, int pos
)
493 char ubuf
[12]; /* buffer to print the number */
494 const char *stesc
; /* start of an escape sequence ('\\') */
495 const char *stnam
; /* start of the name, after "[(*" */
496 const char *cp
; /* end of the name, e.g. before ']' */
497 const char *res
; /* the string to be substituted */
498 char *nbuf
; /* new buffer to copy bufp to */
499 size_t nsz
; /* size of the new buffer */
500 size_t maxl
; /* expected length of the escape name */
501 size_t naml
; /* actual length of the escape name */
502 int expand_count
; /* to avoid infinite loops */
508 while (NULL
!= (cp
= strchr(cp
, '\\'))) {
512 * The second character must be an asterisk or an n.
513 * If it isn't, skip it anyway: It is escaped,
514 * so it can't start another escape sequence.
528 if (ESCAPE_ERROR
!= mandoc_escape(&cp
, NULL
, NULL
))
531 (MANDOCERR_BADESCAPE
, r
->parse
,
532 ln
, (int)(stesc
- *bufp
), NULL
);
539 * The third character decides the length
540 * of the name of the string or register.
541 * Save a pointer to the name.
561 /* Advance to the end of the name. */
563 for (naml
= 0; 0 == maxl
|| naml
< maxl
; naml
++, cp
++) {
566 (MANDOCERR_BADESCAPE
,
568 (int)(stesc
- *bufp
), NULL
);
571 if (0 == maxl
&& ']' == *cp
)
576 * Retrieve the replacement string; if it is
577 * undefined, resume searching for escapes.
581 res
= roff_getstrn(r
, stnam
, naml
);
583 snprintf(ubuf
, sizeof(ubuf
), "%d",
584 roff_getregn(r
, stnam
, naml
));
588 (MANDOCERR_BADESCAPE
, r
->parse
,
589 ln
, (int)(stesc
- *bufp
), NULL
);
593 /* Replace the escape sequence by the string. */
597 nsz
= *szp
+ strlen(res
) + 1;
598 nbuf
= mandoc_malloc(nsz
);
600 strlcpy(nbuf
, *bufp
, (size_t)(stesc
- *bufp
+ 1));
601 strlcat(nbuf
, res
, nsz
);
602 strlcat(nbuf
, cp
+ (maxl
? 0 : 1), nsz
);
609 if (EXPAND_LIMIT
>= ++expand_count
)
612 /* Just leave the string unexpanded. */
613 mandoc_msg(MANDOCERR_ROFFLOOP
, r
->parse
, ln
, pos
, NULL
);
620 * Process text streams:
621 * Convert all breakable hyphens into ASCII_HYPH.
622 * Decrement and spring input line trap.
625 roff_parsetext(char **bufp
, size_t *szp
, int pos
, int *offs
)
633 start
= p
= *bufp
+ pos
;
636 sz
= strcspn(p
, "-\\");
643 /* Skip over escapes. */
646 ((const char const **)&p
, NULL
, NULL
);
647 if (ESCAPE_ERROR
== esc
)
650 } else if (p
== start
) {
655 if (isalpha((unsigned char)p
[-1]) &&
656 isalpha((unsigned char)p
[1]))
661 /* Spring the input line trap. */
662 if (1 == roffit_lines
) {
663 isz
= asprintf(&p
, "%s\n.%s", *bufp
, roffit_macro
);
666 exit((int)MANDOCLEVEL_SYSERR
);
674 return(ROFF_REPARSE
);
675 } else if (1 < roffit_lines
)
681 roff_parseln(struct roff
*r
, int ln
, char **bufp
,
682 size_t *szp
, int pos
, int *offs
)
689 * Run the reserved-word filter only if we have some reserved
693 e
= roff_res(r
, bufp
, szp
, ln
, pos
);
696 assert(ROFF_CONT
== e
);
699 ctl
= roff_getcontrol(r
, *bufp
, &pos
);
702 * First, if a scope is open and we're not a macro, pass the
703 * text through the macro's filter. If a scope isn't open and
704 * we're not a macro, just let it through.
705 * Finally, if there's an equation scope open, divert it into it
706 * no matter our state.
709 if (r
->last
&& ! ctl
) {
711 assert(roffs
[t
].text
);
713 (r
, t
, bufp
, szp
, ln
, pos
, pos
, offs
);
714 assert(ROFF_IGN
== e
|| ROFF_CONT
== e
);
719 return(eqn_read(&r
->eqn
, ln
, *bufp
, ppos
, offs
));
722 return(tbl_read(r
->tbl
, ln
, *bufp
, pos
));
723 return(roff_parsetext(bufp
, szp
, pos
, offs
));
727 * If a scope is open, go to the child handler for that macro,
728 * as it may want to preprocess before doing anything with it.
729 * Don't do so if an equation is open.
734 assert(roffs
[t
].sub
);
735 return((*roffs
[t
].sub
)
737 ln
, ppos
, pos
, offs
));
741 * Lastly, as we've no scope open, try to look up and execute
742 * the new macro. If no macro is found, simply return and let
743 * the compilers handle it.
746 if (ROFF_MAX
== (t
= roff_parse(r
, *bufp
, &pos
)))
749 assert(roffs
[t
].proc
);
750 return((*roffs
[t
].proc
)
752 ln
, ppos
, pos
, offs
));
757 roff_endparse(struct roff
*r
)
761 mandoc_msg(MANDOCERR_SCOPEEXIT
, r
->parse
,
762 r
->last
->line
, r
->last
->col
, NULL
);
765 mandoc_msg(MANDOCERR_SCOPEEXIT
, r
->parse
,
766 r
->eqn
->eqn
.ln
, r
->eqn
->eqn
.pos
, NULL
);
771 mandoc_msg(MANDOCERR_SCOPEEXIT
, r
->parse
,
772 r
->tbl
->line
, r
->tbl
->pos
, NULL
);
778 * Parse a roff node's type from the input buffer. This must be in the
779 * form of ".foo xxx" in the usual way.
782 roff_parse(struct roff
*r
, const char *buf
, int *pos
)
788 if ('\0' == buf
[*pos
] || '"' == buf
[*pos
] ||
789 '\t' == buf
[*pos
] || ' ' == buf
[*pos
])
793 * We stop the macro parse at an escape, tab, space, or nil.
794 * However, `\}' is also a valid macro, so make sure we don't
795 * clobber it by seeing the `\' as the end of token.
799 maclen
= strcspn(mac
+ 1, " \\\t\0") + 1;
801 t
= (r
->current_string
= roff_getstrn(r
, mac
, maclen
))
802 ? ROFF_USERDEF
: roffhash_find(mac
, maclen
);
806 while (buf
[*pos
] && ' ' == buf
[*pos
])
814 roff_cblock(ROFF_ARGS
)
818 * A block-close `..' should only be invoked as a child of an
819 * ignore macro, otherwise raise a warning and just ignore it.
822 if (NULL
== r
->last
) {
823 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
827 switch (r
->last
->tok
) {
835 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
842 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
847 mandoc_msg(MANDOCERR_ARGSLOST
, r
->parse
, ln
, pos
, NULL
);
850 roffnode_cleanscope(r
);
857 roffnode_cleanscope(struct roff
*r
)
861 if (--r
->last
->endspan
!= 0)
870 roff_ccond(ROFF_ARGS
)
873 if (NULL
== r
->last
) {
874 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
878 switch (r
->last
->tok
) {
886 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
890 if (r
->last
->endspan
> -1) {
891 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
896 mandoc_msg(MANDOCERR_ARGSLOST
, r
->parse
, ln
, pos
, NULL
);
899 roffnode_cleanscope(r
);
906 roff_block(ROFF_ARGS
)
914 if (ROFF_ig
!= tok
) {
915 if ('\0' == (*bufp
)[pos
]) {
916 mandoc_msg(MANDOCERR_NOARGS
, r
->parse
, ln
, ppos
, NULL
);
921 * Re-write `de1', since we don't really care about
922 * groff's strange compatibility mode, into `de'.
930 mandoc_msg(MANDOCERR_REQUEST
, r
->parse
, ln
, ppos
,
933 while ((*bufp
)[pos
] && ! isspace((unsigned char)(*bufp
)[pos
]))
936 while (isspace((unsigned char)(*bufp
)[pos
]))
937 (*bufp
)[pos
++] = '\0';
940 roffnode_push(r
, tok
, name
, ln
, ppos
);
943 * At the beginning of a `de' macro, clear the existing string
944 * with the same name, if there is one. New content will be
945 * added from roff_block_text() in multiline mode.
949 roff_setstr(r
, name
, "", 0);
951 if ('\0' == (*bufp
)[pos
])
954 /* If present, process the custom end-of-line marker. */
957 while ((*bufp
)[pos
] && ! isspace((unsigned char)(*bufp
)[pos
]))
961 * Note: groff does NOT like escape characters in the input.
962 * Instead of detecting this, we're just going to let it fly and
967 sz
= (size_t)(pos
- sv
);
969 if (1 == sz
&& '.' == (*bufp
)[sv
])
972 r
->last
->end
= mandoc_malloc(sz
+ 1);
974 memcpy(r
->last
->end
, *bufp
+ sv
, sz
);
975 r
->last
->end
[(int)sz
] = '\0';
978 mandoc_msg(MANDOCERR_ARGSLOST
, r
->parse
, ln
, pos
, NULL
);
986 roff_block_sub(ROFF_ARGS
)
992 * First check whether a custom macro exists at this level. If
993 * it does, then check against it. This is some of groff's
994 * stranger behaviours. If we encountered a custom end-scope
995 * tag and that tag also happens to be a "real" macro, then we
996 * need to try interpreting it again as a real macro. If it's
997 * not, then return ignore. Else continue.
1001 for (i
= pos
, j
= 0; r
->last
->end
[j
]; j
++, i
++)
1002 if ((*bufp
)[i
] != r
->last
->end
[j
])
1005 if ('\0' == r
->last
->end
[j
] &&
1006 ('\0' == (*bufp
)[i
] ||
1007 ' ' == (*bufp
)[i
] ||
1008 '\t' == (*bufp
)[i
])) {
1010 roffnode_cleanscope(r
);
1012 while (' ' == (*bufp
)[i
] || '\t' == (*bufp
)[i
])
1016 if (ROFF_MAX
!= roff_parse(r
, *bufp
, &pos
))
1023 * If we have no custom end-query or lookup failed, then try
1024 * pulling it out of the hashtable.
1027 t
= roff_parse(r
, *bufp
, &pos
);
1030 * Macros other than block-end are only significant
1031 * in `de' blocks; elsewhere, simply throw them away.
1033 if (ROFF_cblock
!= t
) {
1035 roff_setstr(r
, r
->last
->name
, *bufp
+ ppos
, 1);
1039 assert(roffs
[t
].proc
);
1040 return((*roffs
[t
].proc
)(r
, t
, bufp
, szp
,
1041 ln
, ppos
, pos
, offs
));
1047 roff_block_text(ROFF_ARGS
)
1051 roff_setstr(r
, r
->last
->name
, *bufp
+ pos
, 1);
1059 roff_cond_sub(ROFF_ARGS
)
1066 roffnode_cleanscope(r
);
1067 t
= roff_parse(r
, *bufp
, &pos
);
1070 * Fully handle known macros when they are structurally
1071 * required or when the conditional evaluated to true.
1074 if ((ROFF_MAX
!= t
) &&
1075 (ROFF_ccond
== t
|| ROFFRULE_ALLOW
== rr
||
1076 ROFFMAC_STRUCT
& roffs
[t
].flags
)) {
1077 assert(roffs
[t
].proc
);
1078 return((*roffs
[t
].proc
)(r
, t
, bufp
, szp
,
1079 ln
, ppos
, pos
, offs
));
1082 /* Always check for the closing delimiter `\}'. */
1085 while (NULL
!= (ep
= strchr(ep
, '\\'))) {
1090 * If we're at the end of line, then just chop
1091 * off the \} and resize the buffer.
1092 * If we aren't, then convert it to spaces.
1095 if ('\0' == *(ep
+ 1)) {
1099 *(ep
- 1) = *ep
= ' ';
1101 roff_ccond(r
, ROFF_ccond
, bufp
, szp
,
1102 ln
, pos
, pos
+ 2, offs
);
1105 return(ROFFRULE_DENY
== rr
? ROFF_IGN
: ROFF_CONT
);
1110 roff_cond_text(ROFF_ARGS
)
1116 roffnode_cleanscope(r
);
1119 for ( ; NULL
!= (ep
= strchr(ep
, '\\')); ep
++) {
1124 roff_ccond(r
, ROFF_ccond
, bufp
, szp
,
1125 ln
, pos
, pos
+ 2, offs
);
1127 return(ROFFRULE_DENY
== rr
? ROFF_IGN
: ROFF_CONT
);
1131 roff_getnum(const char *v
, int *pos
, int *res
)
1140 for (*res
= 0; isdigit((unsigned char)v
[p
]); p
++)
1141 *res
+= 10 * *res
+ v
[p
] - '0';
1153 roff_getop(const char *v
, int *pos
, char *res
)
1158 e
= v
[*pos
+ 1] == '=';
1180 static enum roffrule
1181 roff_evalcond(const char *v
, int *pos
)
1189 return(ROFFRULE_ALLOW
);
1196 return(ROFFRULE_DENY
);
1206 if (!roff_getnum(v
, pos
, &lh
))
1207 return ROFFRULE_DENY
;
1208 if (!roff_getop(v
, pos
, &op
)) {
1213 if (!roff_getnum(v
, pos
, &rh
))
1214 return ROFFRULE_DENY
;
1232 return ROFFRULE_DENY
;
1237 return lh
? ROFFRULE_ALLOW
: ROFFRULE_DENY
;
1242 roff_line_ignore(ROFF_ARGS
)
1250 roff_cond(ROFF_ARGS
)
1253 roffnode_push(r
, tok
, NULL
, ln
, ppos
);
1256 * An `.el' has no conditional body: it will consume the value
1257 * of the current rstack entry set in prior `ie' calls or
1260 * If we're not an `el', however, then evaluate the conditional.
1263 r
->last
->rule
= ROFF_el
== tok
?
1265 ROFFRULE_DENY
: r
->rstack
[r
->rstackpos
--]) :
1266 roff_evalcond(*bufp
, &pos
);
1269 * An if-else will put the NEGATION of the current evaluated
1270 * conditional into the stack of rules.
1273 if (ROFF_ie
== tok
) {
1274 if (r
->rstackpos
== RSTACK_MAX
- 1) {
1275 mandoc_msg(MANDOCERR_MEM
,
1276 r
->parse
, ln
, ppos
, NULL
);
1279 r
->rstack
[++r
->rstackpos
] =
1280 ROFFRULE_DENY
== r
->last
->rule
?
1281 ROFFRULE_ALLOW
: ROFFRULE_DENY
;
1284 /* If the parent has false as its rule, then so do we. */
1286 if (r
->last
->parent
&& ROFFRULE_DENY
== r
->last
->parent
->rule
)
1287 r
->last
->rule
= ROFFRULE_DENY
;
1291 * If there is nothing on the line after the conditional,
1292 * not even whitespace, use next-line scope.
1295 if ('\0' == (*bufp
)[pos
]) {
1296 r
->last
->endspan
= 2;
1300 while (' ' == (*bufp
)[pos
])
1303 /* An opening brace requests multiline scope. */
1305 if ('\\' == (*bufp
)[pos
] && '{' == (*bufp
)[pos
+ 1]) {
1306 r
->last
->endspan
= -1;
1312 * Anything else following the conditional causes
1313 * single-line scope. Warn if the scope contains
1314 * nothing but trailing whitespace.
1317 if ('\0' == (*bufp
)[pos
])
1318 mandoc_msg(MANDOCERR_NOARGS
, r
->parse
, ln
, ppos
, NULL
);
1320 r
->last
->endspan
= 1;
1332 char *name
, *string
;
1335 * A symbol is named by the first word following the macro
1336 * invocation up to a space. Its value is anything after the
1337 * name's trailing whitespace and optional double-quote. Thus,
1341 * will have `bar " ' as its value.
1344 string
= *bufp
+ pos
;
1345 name
= roff_getname(r
, &string
, ln
, pos
);
1349 /* Read past initial double-quote. */
1353 /* The rest is the value. */
1354 roff_setstr(r
, name
, string
, 0);
1359 roff_setreg(struct roff
*r
, const char *name
, int val
)
1361 struct roffreg
*reg
;
1363 /* Search for an existing register with the same name. */
1366 while (reg
&& strcmp(name
, reg
->key
.p
))
1370 /* Create a new register. */
1371 reg
= mandoc_malloc(sizeof(struct roffreg
));
1372 reg
->key
.p
= mandoc_strdup(name
);
1373 reg
->key
.sz
= strlen(name
);
1374 reg
->next
= r
->regtab
;
1382 roff_getreg(const struct roff
*r
, const char *name
)
1384 struct roffreg
*reg
;
1386 for (reg
= r
->regtab
; reg
; reg
= reg
->next
)
1387 if (0 == strcmp(name
, reg
->key
.p
))
1394 roff_getregn(const struct roff
*r
, const char *name
, size_t len
)
1396 struct roffreg
*reg
;
1398 for (reg
= r
->regtab
; reg
; reg
= reg
->next
)
1399 if (len
== reg
->key
.sz
&&
1400 0 == strncmp(name
, reg
->key
.p
, len
))
1407 roff_freereg(struct roffreg
*reg
)
1409 struct roffreg
*old_reg
;
1411 while (NULL
!= reg
) {
1428 key
= roff_getname(r
, &val
, ln
, pos
);
1430 iv
= mandoc_strntoi(val
, strlen(val
), 10);
1432 roff_setreg(r
, key
, iv
);
1445 while ('\0' != *cp
) {
1446 name
= roff_getname(r
, &cp
, ln
, (int)(cp
- *bufp
));
1448 roff_setstr(r
, name
, NULL
, 0);
1461 /* Parse the number of lines. */
1463 len
= strcspn(cp
, " \t");
1465 if ((iv
= mandoc_strntoi(cp
, len
, 10)) <= 0) {
1466 mandoc_msg(MANDOCERR_NUMERIC
, r
->parse
,
1467 ln
, ppos
, *bufp
+ 1);
1472 /* Arm the input line trap. */
1474 roffit_macro
= mandoc_strdup(cp
);
1482 const char *const *cp
;
1484 if (MPARSE_MDOC
!= r
->parsetype
)
1485 for (cp
= __mdoc_reserved
; *cp
; cp
++)
1486 roff_setstr(r
, *cp
, NULL
, 0);
1495 const char *const *cp
;
1497 if (MPARSE_MDOC
!= r
->parsetype
)
1498 for (cp
= __man_reserved
; *cp
; cp
++)
1499 roff_setstr(r
, *cp
, NULL
, 0);
1510 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
1523 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
1525 tbl_restart(ppos
, ln
, r
->tbl
);
1532 roff_closeeqn(struct roff
*r
)
1535 return(r
->eqn
&& ROFF_EQN
== eqn_end(&r
->eqn
) ? 1 : 0);
1540 roff_openeqn(struct roff
*r
, const char *name
, int line
,
1541 int offs
, const char *buf
)
1546 assert(NULL
== r
->eqn
);
1547 e
= eqn_alloc(name
, offs
, line
, r
->parse
);
1550 r
->last_eqn
->next
= e
;
1552 r
->first_eqn
= r
->last_eqn
= e
;
1554 r
->eqn
= r
->last_eqn
= e
;
1558 eqn_read(&r
->eqn
, line
, buf
, offs
, &poff
);
1567 roff_openeqn(r
, *bufp
+ pos
, ln
, ppos
, NULL
);
1576 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
1584 struct tbl_node
*tbl
;
1587 mandoc_msg(MANDOCERR_SCOPEBROKEN
, r
->parse
, ln
, ppos
, NULL
);
1591 tbl
= tbl_alloc(ppos
, ln
, r
->parse
);
1594 r
->last_tbl
->next
= tbl
;
1596 r
->first_tbl
= r
->last_tbl
= tbl
;
1598 r
->tbl
= r
->last_tbl
= tbl
;
1610 if ('\0' == *p
|| '.' == (r
->control
= *p
++))
1614 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
, ln
, ppos
, NULL
);
1623 const char *p
, *first
, *second
;
1625 enum mandoc_esc esc
;
1630 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
, ln
, ppos
, NULL
);
1634 while ('\0' != *p
) {
1638 if ('\\' == *first
) {
1639 esc
= mandoc_escape(&p
, NULL
, NULL
);
1640 if (ESCAPE_ERROR
== esc
) {
1642 (MANDOCERR_BADESCAPE
, r
->parse
,
1643 ln
, (int)(p
- *bufp
), NULL
);
1646 fsz
= (size_t)(p
- first
);
1650 if ('\\' == *second
) {
1651 esc
= mandoc_escape(&p
, NULL
, NULL
);
1652 if (ESCAPE_ERROR
== esc
) {
1654 (MANDOCERR_BADESCAPE
, r
->parse
,
1655 ln
, (int)(p
- *bufp
), NULL
);
1658 ssz
= (size_t)(p
- second
);
1659 } else if ('\0' == *second
) {
1660 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
,
1661 ln
, (int)(p
- *bufp
), NULL
);
1667 roff_setstrn(&r
->xmbtab
, first
,
1668 fsz
, second
, ssz
, 0);
1672 if (NULL
== r
->xtab
)
1673 r
->xtab
= mandoc_calloc
1674 (128, sizeof(struct roffstr
));
1676 free(r
->xtab
[(int)*first
].p
);
1677 r
->xtab
[(int)*first
].p
= mandoc_strndup(second
, ssz
);
1678 r
->xtab
[(int)*first
].sz
= ssz
;
1690 mandoc_msg(MANDOCERR_SO
, r
->parse
, ln
, ppos
, NULL
);
1693 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1694 * opening anything that's not in our cwd or anything beneath
1695 * it. Thus, explicitly disallow traversing up the file-system
1696 * or using absolute paths.
1700 if ('/' == *name
|| strstr(name
, "../") || strstr(name
, "/..")) {
1701 mandoc_msg(MANDOCERR_SOPATH
, r
->parse
, ln
, pos
, NULL
);
1711 roff_userdef(ROFF_ARGS
)
1718 * Collect pointers to macro argument strings
1719 * and null-terminate them.
1722 for (i
= 0; i
< 9; i
++)
1723 arg
[i
] = '\0' == *cp
? "" :
1724 mandoc_getarg(r
->parse
, &cp
, ln
, &pos
);
1727 * Expand macro arguments.
1730 n1
= cp
= mandoc_strdup(r
->current_string
);
1731 while (NULL
!= (cp
= strstr(cp
, "\\$"))) {
1733 if (0 > i
|| 8 < i
) {
1734 /* Not an argument invocation. */
1739 *szp
= strlen(n1
) - 3 + strlen(arg
[i
]) + 1;
1740 n2
= mandoc_malloc(*szp
);
1742 strlcpy(n2
, n1
, (size_t)(cp
- n1
+ 1));
1743 strlcat(n2
, arg
[i
], *szp
);
1744 strlcat(n2
, cp
+ 3, *szp
);
1746 cp
= n2
+ (cp
- n1
);
1752 * Replace the macro invocation
1753 * by the expanded macro.
1758 *szp
= strlen(*bufp
) + 1;
1760 return(*szp
> 1 && '\n' == (*bufp
)[(int)*szp
- 2] ?
1761 ROFF_REPARSE
: ROFF_APPEND
);
1765 roff_getname(struct roff
*r
, char **cpp
, int ln
, int pos
)
1773 /* Read until end of name. */
1774 for (cp
= name
; '\0' != *cp
&& ' ' != *cp
; cp
++) {
1780 mandoc_msg(MANDOCERR_NAMESC
, r
->parse
, ln
, pos
, NULL
);
1785 /* Nil-terminate name. */
1789 /* Read past spaces. */
1798 * Store *string into the user-defined string called *name.
1799 * In multiline mode, append to an existing entry and append '\n';
1800 * else replace the existing entry, if there is one.
1801 * To clear an existing entry, call with (*r, *name, NULL, 0).
1804 roff_setstr(struct roff
*r
, const char *name
, const char *string
,
1808 roff_setstrn(&r
->strtab
, name
, strlen(name
), string
,
1809 string
? strlen(string
) : 0, multiline
);
1813 roff_setstrn(struct roffkv
**r
, const char *name
, size_t namesz
,
1814 const char *string
, size_t stringsz
, int multiline
)
1819 size_t oldch
, newch
;
1821 /* Search for an existing string with the same name. */
1824 while (n
&& strcmp(name
, n
->key
.p
))
1828 /* Create a new string table entry. */
1829 n
= mandoc_malloc(sizeof(struct roffkv
));
1830 n
->key
.p
= mandoc_strndup(name
, namesz
);
1836 } else if (0 == multiline
) {
1837 /* In multiline mode, append; else replace. */
1847 * One additional byte for the '\n' in multiline mode,
1848 * and one for the terminating '\0'.
1850 newch
= stringsz
+ (multiline
? 2u : 1u);
1852 if (NULL
== n
->val
.p
) {
1853 n
->val
.p
= mandoc_malloc(newch
);
1858 n
->val
.p
= mandoc_realloc(n
->val
.p
, oldch
+ newch
);
1861 /* Skip existing content in the destination buffer. */
1862 c
= n
->val
.p
+ (int)oldch
;
1864 /* Append new content to the destination buffer. */
1866 while (i
< (int)stringsz
) {
1868 * Rudimentary roff copy mode:
1869 * Handle escaped backslashes.
1871 if ('\\' == string
[i
] && '\\' == string
[i
+ 1])
1876 /* Append terminating bytes. */
1881 n
->val
.sz
= (int)(c
- n
->val
.p
);
1885 roff_getstrn(const struct roff
*r
, const char *name
, size_t len
)
1887 const struct roffkv
*n
;
1889 for (n
= r
->strtab
; n
; n
= n
->next
)
1890 if (0 == strncmp(name
, n
->key
.p
, len
) &&
1891 '\0' == n
->key
.p
[(int)len
])
1898 roff_freestr(struct roffkv
*r
)
1900 struct roffkv
*n
, *nn
;
1902 for (n
= r
; n
; n
= nn
) {
1910 const struct tbl_span
*
1911 roff_span(const struct roff
*r
)
1914 return(r
->tbl
? tbl_span(r
->tbl
) : NULL
);
1918 roff_eqn(const struct roff
*r
)
1921 return(r
->last_eqn
? &r
->last_eqn
->eqn
: NULL
);
1925 * Duplicate an input string, making the appropriate character
1926 * conversations (as stipulated by `tr') along the way.
1927 * Returns a heap-allocated string with all the replacements made.
1930 roff_strdup(const struct roff
*r
, const char *p
)
1932 const struct roffkv
*cp
;
1936 enum mandoc_esc esc
;
1938 if (NULL
== r
->xmbtab
&& NULL
== r
->xtab
)
1939 return(mandoc_strdup(p
));
1940 else if ('\0' == *p
)
1941 return(mandoc_strdup(""));
1944 * Step through each character looking for term matches
1945 * (remember that a `tr' can be invoked with an escape, which is
1946 * a glyph but the escape is multi-character).
1947 * We only do this if the character hash has been initialised
1948 * and the string is >0 length.
1954 while ('\0' != *p
) {
1955 if ('\\' != *p
&& r
->xtab
&& r
->xtab
[(int)*p
].p
) {
1956 sz
= r
->xtab
[(int)*p
].sz
;
1957 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
1958 memcpy(res
+ ssz
, r
->xtab
[(int)*p
].p
, sz
);
1962 } else if ('\\' != *p
) {
1963 res
= mandoc_realloc(res
, ssz
+ 2);
1968 /* Search for term matches. */
1969 for (cp
= r
->xmbtab
; cp
; cp
= cp
->next
)
1970 if (0 == strncmp(p
, cp
->key
.p
, cp
->key
.sz
))
1975 * A match has been found.
1976 * Append the match to the array and move
1977 * forward by its keysize.
1979 res
= mandoc_realloc
1980 (res
, ssz
+ cp
->val
.sz
+ 1);
1981 memcpy(res
+ ssz
, cp
->val
.p
, cp
->val
.sz
);
1983 p
+= (int)cp
->key
.sz
;
1988 * Handle escapes carefully: we need to copy
1989 * over just the escape itself, or else we might
1990 * do replacements within the escape itself.
1991 * Make sure to pass along the bogus string.
1994 esc
= mandoc_escape(&p
, NULL
, NULL
);
1995 if (ESCAPE_ERROR
== esc
) {
1997 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
1998 memcpy(res
+ ssz
, pp
, sz
);
2002 * We bail out on bad escapes.
2003 * No need to warn: we already did so when
2004 * roff_res() was called.
2007 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
2008 memcpy(res
+ ssz
, pp
, sz
);
2012 res
[(int)ssz
] = '\0';
2017 * Find out whether a line is a macro line or not.
2018 * If it is, adjust the current position and return one; if it isn't,
2019 * return zero and don't change the current position.
2020 * If the control character has been set with `.cc', then let that grain
2022 * This is slighly contrary to groff, where using the non-breaking
2023 * control character when `cc' has been invoked will cause the
2024 * non-breaking macro contents to be printed verbatim.
2027 roff_getcontrol(const struct roff
*r
, const char *cp
, int *ppos
)
2033 if (0 != r
->control
&& cp
[pos
] == r
->control
)
2035 else if (0 != r
->control
)
2037 else if ('\\' == cp
[pos
] && '.' == cp
[pos
+ 1])
2039 else if ('.' == cp
[pos
] || '\'' == cp
[pos
])
2044 while (' ' == cp
[pos
] || '\t' == cp
[pos
])