]>
git.cameronkatri.com Git - mandoc.git/blob - roff.c
1 /* $Id: roff.c,v 1.197 2014/03/07 18:37:37 schwarze Exp $ */
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
30 #include "libmandoc.h"
32 /* Maximum number of nested if-else conditionals. */
33 #define RSTACK_MAX 128
35 /* Maximum number of string expansions per line, to break infinite loops. */
36 #define EXPAND_LIMIT 1000
85 * An incredibly-simple string buffer.
88 char *p
; /* nil-terminated buffer */
89 size_t sz
; /* saved strlen(p) */
93 * A key-value roffstr pair as part of a singly-linked list.
98 struct roffkv
*next
; /* next in list */
102 * A single number register as part of a singly-linked list.
107 struct roffreg
*next
;
111 enum mparset parsetype
; /* requested parse type */
112 struct mparse
*parse
; /* parse point */
113 int quick
; /* skip standard macro deletion */
114 struct roffnode
*last
; /* leaf of stack */
115 enum roffrule rstack
[RSTACK_MAX
]; /* stack of !`ie' rules */
116 char control
; /* control character */
117 int rstackpos
; /* position in rstack */
118 struct roffreg
*regtab
; /* number registers */
119 struct roffkv
*strtab
; /* user-defined strings & macros */
120 struct roffkv
*xmbtab
; /* multi-byte trans table (`tr') */
121 struct roffstr
*xtab
; /* single-byte trans table (`tr') */
122 const char *current_string
; /* value of last called user macro */
123 struct tbl_node
*first_tbl
; /* first table parsed */
124 struct tbl_node
*last_tbl
; /* last table parsed */
125 struct tbl_node
*tbl
; /* current table being parsed */
126 struct eqn_node
*last_eqn
; /* last equation parsed */
127 struct eqn_node
*first_eqn
; /* first equation parsed */
128 struct eqn_node
*eqn
; /* current equation being parsed */
132 enum rofft tok
; /* type of node */
133 struct roffnode
*parent
; /* up one in stack */
134 int line
; /* parse line */
135 int col
; /* parse col */
136 char *name
; /* node name, e.g. macro name */
137 char *end
; /* end-rules: custom token */
138 int endspan
; /* end-rules: next-line or infty */
139 enum roffrule rule
; /* current evaluation rule */
142 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
143 enum rofft tok, /* tok of macro */ \
144 char **bufp, /* input buffer */ \
145 size_t *szp, /* size of input buffer */ \
146 int ln, /* parse line */ \
147 int ppos, /* original pos in buffer */ \
148 int pos, /* current pos in buffer */ \
149 int *offs /* reset offset of buffer data */
151 typedef enum rofferr (*roffproc
)(ROFF_ARGS
);
154 const char *name
; /* macro name */
155 roffproc proc
; /* process new macro */
156 roffproc text
; /* process as child text of macro */
157 roffproc sub
; /* process as child of macro */
159 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
160 struct roffmac
*next
;
164 const char *name
; /* predefined input name */
165 const char *str
; /* replacement symbol */
168 #define PREDEF(__name, __str) \
169 { (__name), (__str) },
171 static enum rofft
roffhash_find(const char *, size_t);
172 static void roffhash_init(void);
173 static void roffnode_cleanscope(struct roff
*);
174 static void roffnode_pop(struct roff
*);
175 static void roffnode_push(struct roff
*, enum rofft
,
176 const char *, int, int);
177 static enum rofferr
roff_block(ROFF_ARGS
);
178 static enum rofferr
roff_block_text(ROFF_ARGS
);
179 static enum rofferr
roff_block_sub(ROFF_ARGS
);
180 static enum rofferr
roff_cblock(ROFF_ARGS
);
181 static enum rofferr
roff_cc(ROFF_ARGS
);
182 static void roff_ccond(struct roff
*, int, int);
183 static enum rofferr
roff_cond(ROFF_ARGS
);
184 static enum rofferr
roff_cond_text(ROFF_ARGS
);
185 static enum rofferr
roff_cond_sub(ROFF_ARGS
);
186 static enum rofferr
roff_ds(ROFF_ARGS
);
187 static enum roffrule
roff_evalcond(const char *, int *);
188 static void roff_free1(struct roff
*);
189 static void roff_freereg(struct roffreg
*);
190 static void roff_freestr(struct roffkv
*);
191 static char *roff_getname(struct roff
*, char **, int, int);
192 static int roff_getnum(const char *, int *, int *);
193 static int roff_getop(const char *, int *, char *);
194 static int roff_getregn(const struct roff
*,
195 const char *, size_t);
196 static int roff_getregro(const char *name
);
197 static const char *roff_getstrn(const struct roff
*,
198 const char *, size_t);
199 static enum rofferr
roff_it(ROFF_ARGS
);
200 static enum rofferr
roff_line_ignore(ROFF_ARGS
);
201 static enum rofferr
roff_nr(ROFF_ARGS
);
202 static void roff_openeqn(struct roff
*, const char *,
203 int, int, const char *);
204 static enum rofft
roff_parse(struct roff
*, const char *, int *);
205 static enum rofferr
roff_parsetext(char **, size_t *, int, int *);
206 static enum rofferr
roff_res(struct roff
*,
207 char **, size_t *, int, int);
208 static enum rofferr
roff_rm(ROFF_ARGS
);
209 static void roff_setstr(struct roff
*,
210 const char *, const char *, int);
211 static void roff_setstrn(struct roffkv
**, const char *,
212 size_t, const char *, size_t, int);
213 static enum rofferr
roff_so(ROFF_ARGS
);
214 static enum rofferr
roff_tr(ROFF_ARGS
);
215 static enum rofferr
roff_Dd(ROFF_ARGS
);
216 static enum rofferr
roff_TH(ROFF_ARGS
);
217 static enum rofferr
roff_TE(ROFF_ARGS
);
218 static enum rofferr
roff_TS(ROFF_ARGS
);
219 static enum rofferr
roff_EQ(ROFF_ARGS
);
220 static enum rofferr
roff_EN(ROFF_ARGS
);
221 static enum rofferr
roff_T_(ROFF_ARGS
);
222 static enum rofferr
roff_userdef(ROFF_ARGS
);
224 /* See roffhash_find() */
228 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
230 static struct roffmac
*hash
[HASHWIDTH
];
232 static struct roffmac roffs
[ROFF_MAX
] = {
233 { "ad", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
234 { "am", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
235 { "ami", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
236 { "am1", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
237 { "as", roff_ds
, NULL
, NULL
, 0, NULL
},
238 { "cc", roff_cc
, NULL
, NULL
, 0, NULL
},
239 { "ce", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
240 { "de", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
241 { "dei", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
242 { "de1", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
243 { "ds", roff_ds
, NULL
, NULL
, 0, NULL
},
244 { "el", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
245 { "fam", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
246 { "hw", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
247 { "hy", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
248 { "ie", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
249 { "if", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
250 { "ig", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
251 { "it", roff_it
, NULL
, NULL
, 0, NULL
},
252 { "ne", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
253 { "nh", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
254 { "nr", roff_nr
, NULL
, NULL
, 0, NULL
},
255 { "ns", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
256 { "ps", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
257 { "rm", roff_rm
, NULL
, NULL
, 0, NULL
},
258 { "so", roff_so
, NULL
, NULL
, 0, NULL
},
259 { "ta", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
260 { "tr", roff_tr
, NULL
, NULL
, 0, NULL
},
261 { "Dd", roff_Dd
, NULL
, NULL
, 0, NULL
},
262 { "TH", roff_TH
, NULL
, NULL
, 0, NULL
},
263 { "TS", roff_TS
, NULL
, NULL
, 0, NULL
},
264 { "TE", roff_TE
, NULL
, NULL
, 0, NULL
},
265 { "T&", roff_T_
, NULL
, NULL
, 0, NULL
},
266 { "EQ", roff_EQ
, NULL
, NULL
, 0, NULL
},
267 { "EN", roff_EN
, NULL
, NULL
, 0, NULL
},
268 { ".", roff_cblock
, NULL
, NULL
, 0, NULL
},
269 { NULL
, roff_userdef
, NULL
, NULL
, 0, NULL
},
272 const char *const __mdoc_reserved
[] = {
273 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
274 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
275 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
276 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
277 "Ds", "Dt", "Dv", "Dx", "D1",
278 "Ec", "Ed", "Ef", "Ek", "El", "Em", "em",
279 "En", "Eo", "Eq", "Er", "Es", "Ev", "Ex",
280 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
281 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp", "LP",
282 "Me", "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
283 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
284 "Pa", "Pc", "Pf", "Po", "Pp", "PP", "pp", "Pq",
285 "Qc", "Ql", "Qo", "Qq", "Or", "Rd", "Re", "Rs", "Rv",
286 "Sc", "Sf", "Sh", "SH", "Sm", "So", "Sq",
287 "Ss", "St", "Sx", "Sy",
288 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
289 "%A", "%B", "%D", "%I", "%J", "%N", "%O",
290 "%P", "%Q", "%R", "%T", "%U", "%V",
294 const char *const __man_reserved
[] = {
295 "AT", "B", "BI", "BR", "BT", "DE", "DS", "DT",
296 "EE", "EN", "EQ", "EX", "HF", "HP", "I", "IB", "IP", "IR",
297 "LP", "ME", "MT", "OP", "P", "PD", "PP", "PT",
298 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS", "SY",
299 "TE", "TH", "TP", "TQ", "TS", "T&", "UC", "UE", "UR", "YS",
303 /* Array of injected predefined strings. */
304 #define PREDEFS_MAX 38
305 static const struct predef predefs
[PREDEFS_MAX
] = {
306 #include "predefs.in"
309 /* See roffhash_find() */
310 #define ROFF_HASH(p) (p[0] - ASCII_LO)
312 static int roffit_lines
; /* number of lines to delay */
313 static char *roffit_macro
; /* nil-terminated macro line */
321 for (i
= 0; i
< (int)ROFF_USERDEF
; i
++) {
322 assert(roffs
[i
].name
[0] >= ASCII_LO
);
323 assert(roffs
[i
].name
[0] <= ASCII_HI
);
325 buc
= ROFF_HASH(roffs
[i
].name
);
327 if (NULL
!= (n
= hash
[buc
])) {
328 for ( ; n
->next
; n
= n
->next
)
332 hash
[buc
] = &roffs
[i
];
337 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
338 * the nil-terminated string name could be found.
341 roffhash_find(const char *p
, size_t s
)
347 * libroff has an extremely simple hashtable, for the time
348 * being, which simply keys on the first character, which must
349 * be printable, then walks a chain. It works well enough until
353 if (p
[0] < ASCII_LO
|| p
[0] > ASCII_HI
)
358 if (NULL
== (n
= hash
[buc
]))
360 for ( ; n
; n
= n
->next
)
361 if (0 == strncmp(n
->name
, p
, s
) && '\0' == n
->name
[(int)s
])
362 return((enum rofft
)(n
- roffs
));
369 * Pop the current node off of the stack of roff instructions currently
373 roffnode_pop(struct roff
*r
)
380 r
->last
= r
->last
->parent
;
388 * Push a roff node onto the instruction stack. This must later be
389 * removed with roffnode_pop().
392 roffnode_push(struct roff
*r
, enum rofft tok
, const char *name
,
397 p
= mandoc_calloc(1, sizeof(struct roffnode
));
400 p
->name
= mandoc_strdup(name
);
404 p
->rule
= p
->parent
? p
->parent
->rule
: ROFFRULE_DENY
;
411 roff_free1(struct roff
*r
)
413 struct tbl_node
*tbl
;
417 while (NULL
!= (tbl
= r
->first_tbl
)) {
418 r
->first_tbl
= tbl
->next
;
422 r
->first_tbl
= r
->last_tbl
= r
->tbl
= NULL
;
424 while (NULL
!= (e
= r
->first_eqn
)) {
425 r
->first_eqn
= e
->next
;
429 r
->first_eqn
= r
->last_eqn
= r
->eqn
= NULL
;
434 roff_freestr(r
->strtab
);
435 roff_freestr(r
->xmbtab
);
437 r
->strtab
= r
->xmbtab
= NULL
;
439 roff_freereg(r
->regtab
);
444 for (i
= 0; i
< 128; i
++)
452 roff_reset(struct roff
*r
)
461 roff_free(struct roff
*r
)
470 roff_alloc(enum mparset type
, struct mparse
*parse
, int quick
)
474 r
= mandoc_calloc(1, sizeof(struct roff
));
486 * In the current line, expand user-defined strings ("\*")
487 * and references to number registers ("\n").
488 * Also check the syntax of other escape sequences.
491 roff_res(struct roff
*r
, char **bufp
, size_t *szp
, int ln
, int pos
)
493 char ubuf
[12]; /* buffer to print the number */
494 const char *stesc
; /* start of an escape sequence ('\\') */
495 const char *stnam
; /* start of the name, after "[(*" */
496 const char *cp
; /* end of the name, e.g. before ']' */
497 const char *res
; /* the string to be substituted */
498 char *nbuf
; /* new buffer to copy bufp to */
499 size_t nsz
; /* size of the new buffer */
500 size_t maxl
; /* expected length of the escape name */
501 size_t naml
; /* actual length of the escape name */
502 int expand_count
; /* to avoid infinite loops */
508 while (NULL
!= (cp
= strchr(cp
, '\\'))) {
512 * The second character must be an asterisk or an n.
513 * If it isn't, skip it anyway: It is escaped,
514 * so it can't start another escape sequence.
528 if (ESCAPE_ERROR
!= mandoc_escape(&cp
, NULL
, NULL
))
531 (MANDOCERR_BADESCAPE
, r
->parse
,
532 ln
, (int)(stesc
- *bufp
), NULL
);
539 * The third character decides the length
540 * of the name of the string or register.
541 * Save a pointer to the name.
561 /* Advance to the end of the name. */
563 for (naml
= 0; 0 == maxl
|| naml
< maxl
; naml
++, cp
++) {
566 (MANDOCERR_BADESCAPE
,
568 (int)(stesc
- *bufp
), NULL
);
571 if (0 == maxl
&& ']' == *cp
)
576 * Retrieve the replacement string; if it is
577 * undefined, resume searching for escapes.
581 res
= roff_getstrn(r
, stnam
, naml
);
583 snprintf(ubuf
, sizeof(ubuf
), "%d",
584 roff_getregn(r
, stnam
, naml
));
588 (MANDOCERR_BADESCAPE
, r
->parse
,
589 ln
, (int)(stesc
- *bufp
), NULL
);
593 /* Replace the escape sequence by the string. */
597 nsz
= *szp
+ strlen(res
) + 1;
598 nbuf
= mandoc_malloc(nsz
);
600 strlcpy(nbuf
, *bufp
, (size_t)(stesc
- *bufp
+ 1));
601 strlcat(nbuf
, res
, nsz
);
602 strlcat(nbuf
, cp
+ (maxl
? 0 : 1), nsz
);
609 if (EXPAND_LIMIT
>= ++expand_count
)
612 /* Just leave the string unexpanded. */
613 mandoc_msg(MANDOCERR_ROFFLOOP
, r
->parse
, ln
, pos
, NULL
);
620 * Process text streams:
621 * Convert all breakable hyphens into ASCII_HYPH.
622 * Decrement and spring input line trap.
625 roff_parsetext(char **bufp
, size_t *szp
, int pos
, int *offs
)
633 start
= p
= *bufp
+ pos
;
636 sz
= strcspn(p
, "-\\");
643 /* Skip over escapes. */
645 esc
= mandoc_escape((const char **)&p
, NULL
, NULL
);
646 if (ESCAPE_ERROR
== esc
)
649 } else if (p
== start
) {
654 if (isalpha((unsigned char)p
[-1]) &&
655 isalpha((unsigned char)p
[1]))
660 /* Spring the input line trap. */
661 if (1 == roffit_lines
) {
662 isz
= asprintf(&p
, "%s\n.%s", *bufp
, roffit_macro
);
665 exit((int)MANDOCLEVEL_SYSERR
);
673 return(ROFF_REPARSE
);
674 } else if (1 < roffit_lines
)
680 roff_parseln(struct roff
*r
, int ln
, char **bufp
,
681 size_t *szp
, int pos
, int *offs
)
688 * Run the reserved-word filter only if we have some reserved
692 e
= roff_res(r
, bufp
, szp
, ln
, pos
);
695 assert(ROFF_CONT
== e
);
698 ctl
= roff_getcontrol(r
, *bufp
, &pos
);
701 * First, if a scope is open and we're not a macro, pass the
702 * text through the macro's filter. If a scope isn't open and
703 * we're not a macro, just let it through.
704 * Finally, if there's an equation scope open, divert it into it
705 * no matter our state.
708 if (r
->last
&& ! ctl
) {
710 assert(roffs
[t
].text
);
712 (r
, t
, bufp
, szp
, ln
, pos
, pos
, offs
);
713 assert(ROFF_IGN
== e
|| ROFF_CONT
== e
);
718 return(eqn_read(&r
->eqn
, ln
, *bufp
, ppos
, offs
));
721 return(tbl_read(r
->tbl
, ln
, *bufp
, pos
));
722 return(roff_parsetext(bufp
, szp
, pos
, offs
));
726 * If a scope is open, go to the child handler for that macro,
727 * as it may want to preprocess before doing anything with it.
728 * Don't do so if an equation is open.
733 assert(roffs
[t
].sub
);
734 return((*roffs
[t
].sub
)
736 ln
, ppos
, pos
, offs
));
740 * Lastly, as we've no scope open, try to look up and execute
741 * the new macro. If no macro is found, simply return and let
742 * the compilers handle it.
745 if (ROFF_MAX
== (t
= roff_parse(r
, *bufp
, &pos
)))
748 assert(roffs
[t
].proc
);
749 return((*roffs
[t
].proc
)
751 ln
, ppos
, pos
, offs
));
756 roff_endparse(struct roff
*r
)
760 mandoc_msg(MANDOCERR_SCOPEEXIT
, r
->parse
,
761 r
->last
->line
, r
->last
->col
, NULL
);
764 mandoc_msg(MANDOCERR_SCOPEEXIT
, r
->parse
,
765 r
->eqn
->eqn
.ln
, r
->eqn
->eqn
.pos
, NULL
);
770 mandoc_msg(MANDOCERR_SCOPEEXIT
, r
->parse
,
771 r
->tbl
->line
, r
->tbl
->pos
, NULL
);
777 * Parse a roff node's type from the input buffer. This must be in the
778 * form of ".foo xxx" in the usual way.
781 roff_parse(struct roff
*r
, const char *buf
, int *pos
)
787 if ('\0' == buf
[*pos
] || '"' == buf
[*pos
] ||
788 '\t' == buf
[*pos
] || ' ' == buf
[*pos
])
791 /* We stop the macro parse at an escape, tab, space, or nil. */
794 maclen
= strcspn(mac
, " \\\t\0");
796 t
= (r
->current_string
= roff_getstrn(r
, mac
, maclen
))
797 ? ROFF_USERDEF
: roffhash_find(mac
, maclen
);
801 while (buf
[*pos
] && ' ' == buf
[*pos
])
809 roff_cblock(ROFF_ARGS
)
813 * A block-close `..' should only be invoked as a child of an
814 * ignore macro, otherwise raise a warning and just ignore it.
817 if (NULL
== r
->last
) {
818 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
822 switch (r
->last
->tok
) {
830 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
837 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
842 mandoc_msg(MANDOCERR_ARGSLOST
, r
->parse
, ln
, pos
, NULL
);
845 roffnode_cleanscope(r
);
852 roffnode_cleanscope(struct roff
*r
)
856 if (--r
->last
->endspan
!= 0)
864 roff_ccond(struct roff
*r
, int ln
, int ppos
)
867 if (NULL
== r
->last
) {
868 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
872 switch (r
->last
->tok
) {
880 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
884 if (r
->last
->endspan
> -1) {
885 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
890 roffnode_cleanscope(r
);
897 roff_block(ROFF_ARGS
)
905 if (ROFF_ig
!= tok
) {
906 if ('\0' == (*bufp
)[pos
]) {
907 mandoc_msg(MANDOCERR_NOARGS
, r
->parse
, ln
, ppos
, NULL
);
912 * Re-write `de1', since we don't really care about
913 * groff's strange compatibility mode, into `de'.
921 mandoc_msg(MANDOCERR_REQUEST
, r
->parse
, ln
, ppos
,
924 while ((*bufp
)[pos
] && ! isspace((unsigned char)(*bufp
)[pos
]))
927 while (isspace((unsigned char)(*bufp
)[pos
]))
928 (*bufp
)[pos
++] = '\0';
931 roffnode_push(r
, tok
, name
, ln
, ppos
);
934 * At the beginning of a `de' macro, clear the existing string
935 * with the same name, if there is one. New content will be
936 * appended from roff_block_text() in multiline mode.
940 roff_setstr(r
, name
, "", 0);
942 if ('\0' == (*bufp
)[pos
])
945 /* If present, process the custom end-of-line marker. */
948 while ((*bufp
)[pos
] && ! isspace((unsigned char)(*bufp
)[pos
]))
952 * Note: groff does NOT like escape characters in the input.
953 * Instead of detecting this, we're just going to let it fly and
958 sz
= (size_t)(pos
- sv
);
960 if (1 == sz
&& '.' == (*bufp
)[sv
])
963 r
->last
->end
= mandoc_malloc(sz
+ 1);
965 memcpy(r
->last
->end
, *bufp
+ sv
, sz
);
966 r
->last
->end
[(int)sz
] = '\0';
969 mandoc_msg(MANDOCERR_ARGSLOST
, r
->parse
, ln
, pos
, NULL
);
977 roff_block_sub(ROFF_ARGS
)
983 * First check whether a custom macro exists at this level. If
984 * it does, then check against it. This is some of groff's
985 * stranger behaviours. If we encountered a custom end-scope
986 * tag and that tag also happens to be a "real" macro, then we
987 * need to try interpreting it again as a real macro. If it's
988 * not, then return ignore. Else continue.
992 for (i
= pos
, j
= 0; r
->last
->end
[j
]; j
++, i
++)
993 if ((*bufp
)[i
] != r
->last
->end
[j
])
996 if ('\0' == r
->last
->end
[j
] &&
997 ('\0' == (*bufp
)[i
] ||
999 '\t' == (*bufp
)[i
])) {
1001 roffnode_cleanscope(r
);
1003 while (' ' == (*bufp
)[i
] || '\t' == (*bufp
)[i
])
1007 if (ROFF_MAX
!= roff_parse(r
, *bufp
, &pos
))
1014 * If we have no custom end-query or lookup failed, then try
1015 * pulling it out of the hashtable.
1018 t
= roff_parse(r
, *bufp
, &pos
);
1021 * Macros other than block-end are only significant
1022 * in `de' blocks; elsewhere, simply throw them away.
1024 if (ROFF_cblock
!= t
) {
1026 roff_setstr(r
, r
->last
->name
, *bufp
+ ppos
, 2);
1030 assert(roffs
[t
].proc
);
1031 return((*roffs
[t
].proc
)(r
, t
, bufp
, szp
,
1032 ln
, ppos
, pos
, offs
));
1038 roff_block_text(ROFF_ARGS
)
1042 roff_setstr(r
, r
->last
->name
, *bufp
+ pos
, 2);
1050 roff_cond_sub(ROFF_ARGS
)
1057 roffnode_cleanscope(r
);
1058 t
= roff_parse(r
, *bufp
, &pos
);
1061 * Fully handle known macros when they are structurally
1062 * required or when the conditional evaluated to true.
1065 if ((ROFF_MAX
!= t
) &&
1066 (ROFFRULE_ALLOW
== rr
||
1067 ROFFMAC_STRUCT
& roffs
[t
].flags
)) {
1068 assert(roffs
[t
].proc
);
1069 return((*roffs
[t
].proc
)(r
, t
, bufp
, szp
,
1070 ln
, ppos
, pos
, offs
));
1074 * If `\}' occurs on a macro line without a preceding macro,
1075 * drop the line completely.
1079 if ('\\' == ep
[0] && '}' == ep
[1])
1082 /* Always check for the closing delimiter `\}'. */
1084 while (NULL
!= (ep
= strchr(ep
, '\\'))) {
1085 if ('}' == *(++ep
)) {
1087 roff_ccond(r
, ln
, ep
- *bufp
- 1);
1091 return(ROFFRULE_DENY
== rr
? ROFF_IGN
: ROFF_CONT
);
1096 roff_cond_text(ROFF_ARGS
)
1102 roffnode_cleanscope(r
);
1105 while (NULL
!= (ep
= strchr(ep
, '\\'))) {
1106 if ('}' == *(++ep
)) {
1108 roff_ccond(r
, ln
, ep
- *bufp
- 1);
1112 return(ROFFRULE_DENY
== rr
? ROFF_IGN
: ROFF_CONT
);
1116 roff_getnum(const char *v
, int *pos
, int *res
)
1125 for (*res
= 0; isdigit((unsigned char)v
[p
]); p
++)
1126 *res
+= 10 * *res
+ v
[p
] - '0';
1138 roff_getop(const char *v
, int *pos
, char *res
)
1143 e
= v
[*pos
+ 1] == '=';
1165 static enum roffrule
1166 roff_evalcond(const char *v
, int *pos
)
1174 return(ROFFRULE_ALLOW
);
1181 return(ROFFRULE_DENY
);
1191 if (!roff_getnum(v
, pos
, &lh
))
1192 return ROFFRULE_DENY
;
1193 if (!roff_getop(v
, pos
, &op
)) {
1198 if (!roff_getnum(v
, pos
, &rh
))
1199 return ROFFRULE_DENY
;
1217 return ROFFRULE_DENY
;
1222 return lh
? ROFFRULE_ALLOW
: ROFFRULE_DENY
;
1227 roff_line_ignore(ROFF_ARGS
)
1235 roff_cond(ROFF_ARGS
)
1238 roffnode_push(r
, tok
, NULL
, ln
, ppos
);
1241 * An `.el' has no conditional body: it will consume the value
1242 * of the current rstack entry set in prior `ie' calls or
1245 * If we're not an `el', however, then evaluate the conditional.
1248 r
->last
->rule
= ROFF_el
== tok
?
1250 ROFFRULE_DENY
: r
->rstack
[r
->rstackpos
--]) :
1251 roff_evalcond(*bufp
, &pos
);
1254 * An if-else will put the NEGATION of the current evaluated
1255 * conditional into the stack of rules.
1258 if (ROFF_ie
== tok
) {
1259 if (r
->rstackpos
== RSTACK_MAX
- 1) {
1260 mandoc_msg(MANDOCERR_MEM
,
1261 r
->parse
, ln
, ppos
, NULL
);
1264 r
->rstack
[++r
->rstackpos
] =
1265 ROFFRULE_DENY
== r
->last
->rule
?
1266 ROFFRULE_ALLOW
: ROFFRULE_DENY
;
1269 /* If the parent has false as its rule, then so do we. */
1271 if (r
->last
->parent
&& ROFFRULE_DENY
== r
->last
->parent
->rule
)
1272 r
->last
->rule
= ROFFRULE_DENY
;
1276 * If there is nothing on the line after the conditional,
1277 * not even whitespace, use next-line scope.
1280 if ('\0' == (*bufp
)[pos
]) {
1281 r
->last
->endspan
= 2;
1285 while (' ' == (*bufp
)[pos
])
1288 /* An opening brace requests multiline scope. */
1290 if ('\\' == (*bufp
)[pos
] && '{' == (*bufp
)[pos
+ 1]) {
1291 r
->last
->endspan
= -1;
1297 * Anything else following the conditional causes
1298 * single-line scope. Warn if the scope contains
1299 * nothing but trailing whitespace.
1302 if ('\0' == (*bufp
)[pos
])
1303 mandoc_msg(MANDOCERR_NOARGS
, r
->parse
, ln
, ppos
, NULL
);
1305 r
->last
->endspan
= 1;
1317 char *name
, *string
;
1320 * A symbol is named by the first word following the macro
1321 * invocation up to a space. Its value is anything after the
1322 * name's trailing whitespace and optional double-quote. Thus,
1326 * will have `bar " ' as its value.
1329 string
= *bufp
+ pos
;
1330 name
= roff_getname(r
, &string
, ln
, pos
);
1334 /* Read past initial double-quote. */
1338 /* The rest is the value. */
1339 roff_setstr(r
, name
, string
, ROFF_as
== tok
);
1344 roff_setreg(struct roff
*r
, const char *name
, int val
, char sign
)
1346 struct roffreg
*reg
;
1348 /* Search for an existing register with the same name. */
1351 while (reg
&& strcmp(name
, reg
->key
.p
))
1355 /* Create a new register. */
1356 reg
= mandoc_malloc(sizeof(struct roffreg
));
1357 reg
->key
.p
= mandoc_strdup(name
);
1358 reg
->key
.sz
= strlen(name
);
1360 reg
->next
= r
->regtab
;
1366 else if ('-' == sign
)
1373 * Handle some predefined read-only number registers.
1374 * For now, return -1 if the requested register is not predefined;
1375 * in case a predefined read-only register having the value -1
1376 * were to turn up, another special value would have to be chosen.
1379 roff_getregro(const char *name
)
1383 case ('A'): /* ASCII approximation mode is always off. */
1385 case ('g'): /* Groff compatibility mode is always on. */
1387 case ('H'): /* Fixed horizontal resolution. */
1389 case ('j'): /* Always adjust left margin only. */
1391 case ('T'): /* Some output device is always defined. */
1393 case ('V'): /* Fixed vertical resolution. */
1401 roff_getreg(const struct roff
*r
, const char *name
)
1403 struct roffreg
*reg
;
1406 if ('.' == name
[0] && '\0' != name
[1] && '\0' == name
[2]) {
1407 val
= roff_getregro(name
+ 1);
1412 for (reg
= r
->regtab
; reg
; reg
= reg
->next
)
1413 if (0 == strcmp(name
, reg
->key
.p
))
1420 roff_getregn(const struct roff
*r
, const char *name
, size_t len
)
1422 struct roffreg
*reg
;
1425 if ('.' == name
[0] && 2 == len
) {
1426 val
= roff_getregro(name
+ 1);
1431 for (reg
= r
->regtab
; reg
; reg
= reg
->next
)
1432 if (len
== reg
->key
.sz
&&
1433 0 == strncmp(name
, reg
->key
.p
, len
))
1440 roff_freereg(struct roffreg
*reg
)
1442 struct roffreg
*old_reg
;
1444 while (NULL
!= reg
) {
1463 key
= roff_getname(r
, &val
, ln
, pos
);
1466 if ('+' == sign
|| '-' == sign
)
1469 sz
= strspn(val
, "0123456789");
1470 iv
= sz
? mandoc_strntoi(val
, sz
, 10) : 0;
1472 roff_setreg(r
, key
, iv
, sign
);
1485 while ('\0' != *cp
) {
1486 name
= roff_getname(r
, &cp
, ln
, (int)(cp
- *bufp
));
1488 roff_setstr(r
, name
, NULL
, 0);
1501 /* Parse the number of lines. */
1503 len
= strcspn(cp
, " \t");
1505 if ((iv
= mandoc_strntoi(cp
, len
, 10)) <= 0) {
1506 mandoc_msg(MANDOCERR_NUMERIC
, r
->parse
,
1507 ln
, ppos
, *bufp
+ 1);
1512 /* Arm the input line trap. */
1514 roffit_macro
= mandoc_strdup(cp
);
1522 const char *const *cp
;
1524 if (0 == r
->quick
&& MPARSE_MDOC
!= r
->parsetype
)
1525 for (cp
= __mdoc_reserved
; *cp
; cp
++)
1526 roff_setstr(r
, *cp
, NULL
, 0);
1535 const char *const *cp
;
1537 if (0 == r
->quick
&& MPARSE_MDOC
!= r
->parsetype
)
1538 for (cp
= __man_reserved
; *cp
; cp
++)
1539 roff_setstr(r
, *cp
, NULL
, 0);
1550 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
1563 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
1565 tbl_restart(ppos
, ln
, r
->tbl
);
1572 roff_closeeqn(struct roff
*r
)
1575 return(r
->eqn
&& ROFF_EQN
== eqn_end(&r
->eqn
) ? 1 : 0);
1580 roff_openeqn(struct roff
*r
, const char *name
, int line
,
1581 int offs
, const char *buf
)
1586 assert(NULL
== r
->eqn
);
1587 e
= eqn_alloc(name
, offs
, line
, r
->parse
);
1590 r
->last_eqn
->next
= e
;
1592 r
->first_eqn
= r
->last_eqn
= e
;
1594 r
->eqn
= r
->last_eqn
= e
;
1598 eqn_read(&r
->eqn
, line
, buf
, offs
, &poff
);
1607 roff_openeqn(r
, *bufp
+ pos
, ln
, ppos
, NULL
);
1616 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
1624 struct tbl_node
*tbl
;
1627 mandoc_msg(MANDOCERR_SCOPEBROKEN
, r
->parse
, ln
, ppos
, NULL
);
1631 tbl
= tbl_alloc(ppos
, ln
, r
->parse
);
1634 r
->last_tbl
->next
= tbl
;
1636 r
->first_tbl
= r
->last_tbl
= tbl
;
1638 r
->tbl
= r
->last_tbl
= tbl
;
1650 if ('\0' == *p
|| '.' == (r
->control
= *p
++))
1654 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
, ln
, ppos
, NULL
);
1663 const char *p
, *first
, *second
;
1665 enum mandoc_esc esc
;
1670 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
, ln
, ppos
, NULL
);
1674 while ('\0' != *p
) {
1678 if ('\\' == *first
) {
1679 esc
= mandoc_escape(&p
, NULL
, NULL
);
1680 if (ESCAPE_ERROR
== esc
) {
1682 (MANDOCERR_BADESCAPE
, r
->parse
,
1683 ln
, (int)(p
- *bufp
), NULL
);
1686 fsz
= (size_t)(p
- first
);
1690 if ('\\' == *second
) {
1691 esc
= mandoc_escape(&p
, NULL
, NULL
);
1692 if (ESCAPE_ERROR
== esc
) {
1694 (MANDOCERR_BADESCAPE
, r
->parse
,
1695 ln
, (int)(p
- *bufp
), NULL
);
1698 ssz
= (size_t)(p
- second
);
1699 } else if ('\0' == *second
) {
1700 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
,
1701 ln
, (int)(p
- *bufp
), NULL
);
1707 roff_setstrn(&r
->xmbtab
, first
,
1708 fsz
, second
, ssz
, 0);
1712 if (NULL
== r
->xtab
)
1713 r
->xtab
= mandoc_calloc
1714 (128, sizeof(struct roffstr
));
1716 free(r
->xtab
[(int)*first
].p
);
1717 r
->xtab
[(int)*first
].p
= mandoc_strndup(second
, ssz
);
1718 r
->xtab
[(int)*first
].sz
= ssz
;
1730 mandoc_msg(MANDOCERR_SO
, r
->parse
, ln
, ppos
, NULL
);
1733 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1734 * opening anything that's not in our cwd or anything beneath
1735 * it. Thus, explicitly disallow traversing up the file-system
1736 * or using absolute paths.
1740 if ('/' == *name
|| strstr(name
, "../") || strstr(name
, "/..")) {
1741 mandoc_msg(MANDOCERR_SOPATH
, r
->parse
, ln
, pos
, NULL
);
1751 roff_userdef(ROFF_ARGS
)
1758 * Collect pointers to macro argument strings
1759 * and NUL-terminate them.
1762 for (i
= 0; i
< 9; i
++)
1763 arg
[i
] = '\0' == *cp
? "" :
1764 mandoc_getarg(r
->parse
, &cp
, ln
, &pos
);
1767 * Expand macro arguments.
1770 n1
= cp
= mandoc_strdup(r
->current_string
);
1771 while (NULL
!= (cp
= strstr(cp
, "\\$"))) {
1773 if (0 > i
|| 8 < i
) {
1774 /* Not an argument invocation. */
1779 *szp
= strlen(n1
) - 3 + strlen(arg
[i
]) + 1;
1780 n2
= mandoc_malloc(*szp
);
1782 strlcpy(n2
, n1
, (size_t)(cp
- n1
+ 1));
1783 strlcat(n2
, arg
[i
], *szp
);
1784 strlcat(n2
, cp
+ 3, *szp
);
1786 cp
= n2
+ (cp
- n1
);
1792 * Replace the macro invocation
1793 * by the expanded macro.
1798 *szp
= strlen(*bufp
) + 1;
1800 return(*szp
> 1 && '\n' == (*bufp
)[(int)*szp
- 2] ?
1801 ROFF_REPARSE
: ROFF_APPEND
);
1805 roff_getname(struct roff
*r
, char **cpp
, int ln
, int pos
)
1813 /* Read until end of name. */
1814 for (cp
= name
; '\0' != *cp
&& ' ' != *cp
; cp
++) {
1820 mandoc_msg(MANDOCERR_NAMESC
, r
->parse
, ln
, pos
, NULL
);
1825 /* Nil-terminate name. */
1829 /* Read past spaces. */
1838 * Store *string into the user-defined string called *name.
1839 * To clear an existing entry, call with (*r, *name, NULL, 0).
1840 * append == 0: replace mode
1841 * append == 1: single-line append mode
1842 * append == 2: multiline append mode, append '\n' after each call
1845 roff_setstr(struct roff
*r
, const char *name
, const char *string
,
1849 roff_setstrn(&r
->strtab
, name
, strlen(name
), string
,
1850 string
? strlen(string
) : 0, append
);
1854 roff_setstrn(struct roffkv
**r
, const char *name
, size_t namesz
,
1855 const char *string
, size_t stringsz
, int append
)
1860 size_t oldch
, newch
;
1862 /* Search for an existing string with the same name. */
1865 while (n
&& strcmp(name
, n
->key
.p
))
1869 /* Create a new string table entry. */
1870 n
= mandoc_malloc(sizeof(struct roffkv
));
1871 n
->key
.p
= mandoc_strndup(name
, namesz
);
1877 } else if (0 == append
) {
1887 * One additional byte for the '\n' in multiline mode,
1888 * and one for the terminating '\0'.
1890 newch
= stringsz
+ (1 < append
? 2u : 1u);
1892 if (NULL
== n
->val
.p
) {
1893 n
->val
.p
= mandoc_malloc(newch
);
1898 n
->val
.p
= mandoc_realloc(n
->val
.p
, oldch
+ newch
);
1901 /* Skip existing content in the destination buffer. */
1902 c
= n
->val
.p
+ (int)oldch
;
1904 /* Append new content to the destination buffer. */
1906 while (i
< (int)stringsz
) {
1908 * Rudimentary roff copy mode:
1909 * Handle escaped backslashes.
1911 if ('\\' == string
[i
] && '\\' == string
[i
+ 1])
1916 /* Append terminating bytes. */
1921 n
->val
.sz
= (int)(c
- n
->val
.p
);
1925 roff_getstrn(const struct roff
*r
, const char *name
, size_t len
)
1927 const struct roffkv
*n
;
1930 for (n
= r
->strtab
; n
; n
= n
->next
)
1931 if (0 == strncmp(name
, n
->key
.p
, len
) &&
1932 '\0' == n
->key
.p
[(int)len
])
1935 for (i
= 0; i
< PREDEFS_MAX
; i
++)
1936 if (0 == strncmp(name
, predefs
[i
].name
, len
) &&
1937 '\0' == predefs
[i
].name
[(int)len
])
1938 return(predefs
[i
].str
);
1944 roff_freestr(struct roffkv
*r
)
1946 struct roffkv
*n
, *nn
;
1948 for (n
= r
; n
; n
= nn
) {
1956 const struct tbl_span
*
1957 roff_span(const struct roff
*r
)
1960 return(r
->tbl
? tbl_span(r
->tbl
) : NULL
);
1964 roff_eqn(const struct roff
*r
)
1967 return(r
->last_eqn
? &r
->last_eqn
->eqn
: NULL
);
1971 * Duplicate an input string, making the appropriate character
1972 * conversations (as stipulated by `tr') along the way.
1973 * Returns a heap-allocated string with all the replacements made.
1976 roff_strdup(const struct roff
*r
, const char *p
)
1978 const struct roffkv
*cp
;
1982 enum mandoc_esc esc
;
1984 if (NULL
== r
->xmbtab
&& NULL
== r
->xtab
)
1985 return(mandoc_strdup(p
));
1986 else if ('\0' == *p
)
1987 return(mandoc_strdup(""));
1990 * Step through each character looking for term matches
1991 * (remember that a `tr' can be invoked with an escape, which is
1992 * a glyph but the escape is multi-character).
1993 * We only do this if the character hash has been initialised
1994 * and the string is >0 length.
2000 while ('\0' != *p
) {
2001 if ('\\' != *p
&& r
->xtab
&& r
->xtab
[(int)*p
].p
) {
2002 sz
= r
->xtab
[(int)*p
].sz
;
2003 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
2004 memcpy(res
+ ssz
, r
->xtab
[(int)*p
].p
, sz
);
2008 } else if ('\\' != *p
) {
2009 res
= mandoc_realloc(res
, ssz
+ 2);
2014 /* Search for term matches. */
2015 for (cp
= r
->xmbtab
; cp
; cp
= cp
->next
)
2016 if (0 == strncmp(p
, cp
->key
.p
, cp
->key
.sz
))
2021 * A match has been found.
2022 * Append the match to the array and move
2023 * forward by its keysize.
2025 res
= mandoc_realloc
2026 (res
, ssz
+ cp
->val
.sz
+ 1);
2027 memcpy(res
+ ssz
, cp
->val
.p
, cp
->val
.sz
);
2029 p
+= (int)cp
->key
.sz
;
2034 * Handle escapes carefully: we need to copy
2035 * over just the escape itself, or else we might
2036 * do replacements within the escape itself.
2037 * Make sure to pass along the bogus string.
2040 esc
= mandoc_escape(&p
, NULL
, NULL
);
2041 if (ESCAPE_ERROR
== esc
) {
2043 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
2044 memcpy(res
+ ssz
, pp
, sz
);
2048 * We bail out on bad escapes.
2049 * No need to warn: we already did so when
2050 * roff_res() was called.
2053 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
2054 memcpy(res
+ ssz
, pp
, sz
);
2058 res
[(int)ssz
] = '\0';
2063 * Find out whether a line is a macro line or not.
2064 * If it is, adjust the current position and return one; if it isn't,
2065 * return zero and don't change the current position.
2066 * If the control character has been set with `.cc', then let that grain
2068 * This is slighly contrary to groff, where using the non-breaking
2069 * control character when `cc' has been invoked will cause the
2070 * non-breaking macro contents to be printed verbatim.
2073 roff_getcontrol(const struct roff
*r
, const char *cp
, int *ppos
)
2079 if (0 != r
->control
&& cp
[pos
] == r
->control
)
2081 else if (0 != r
->control
)
2083 else if ('\\' == cp
[pos
] && '.' == cp
[pos
+ 1])
2085 else if ('.' == cp
[pos
] || '\'' == cp
[pos
])
2090 while (' ' == cp
[pos
] || '\t' == cp
[pos
])