]>
git.cameronkatri.com Git - mandoc.git/blob - roff.c
1316bfa9d2569997100c8d17cdb66dc029060f35
1 /* $Id: roff.c,v 1.203 2014/04/05 20:34:57 schwarze Exp $ */
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
29 #include "mandoc_aux.h"
31 #include "libmandoc.h"
33 /* Maximum number of nested if-else conditionals. */
34 #define RSTACK_MAX 128
36 /* Maximum number of string expansions per line, to break infinite loops. */
37 #define EXPAND_LIMIT 1000
82 * An incredibly-simple string buffer.
85 char *p
; /* nil-terminated buffer */
86 size_t sz
; /* saved strlen(p) */
90 * A key-value roffstr pair as part of a singly-linked list.
95 struct roffkv
*next
; /* next in list */
99 * A single number register as part of a singly-linked list.
104 struct roffreg
*next
;
108 struct mparse
*parse
; /* parse point */
109 int options
; /* parse options */
110 struct roffnode
*last
; /* leaf of stack */
111 int rstack
[RSTACK_MAX
]; /* stack of !`ie' rules */
112 char control
; /* control character */
113 int rstackpos
; /* position in rstack */
114 struct roffreg
*regtab
; /* number registers */
115 struct roffkv
*strtab
; /* user-defined strings & macros */
116 struct roffkv
*xmbtab
; /* multi-byte trans table (`tr') */
117 struct roffstr
*xtab
; /* single-byte trans table (`tr') */
118 const char *current_string
; /* value of last called user macro */
119 struct tbl_node
*first_tbl
; /* first table parsed */
120 struct tbl_node
*last_tbl
; /* last table parsed */
121 struct tbl_node
*tbl
; /* current table being parsed */
122 struct eqn_node
*last_eqn
; /* last equation parsed */
123 struct eqn_node
*first_eqn
; /* first equation parsed */
124 struct eqn_node
*eqn
; /* current equation being parsed */
128 enum rofft tok
; /* type of node */
129 struct roffnode
*parent
; /* up one in stack */
130 int line
; /* parse line */
131 int col
; /* parse col */
132 char *name
; /* node name, e.g. macro name */
133 char *end
; /* end-rules: custom token */
134 int endspan
; /* end-rules: next-line or infty */
135 int rule
; /* current evaluation rule */
138 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
139 enum rofft tok, /* tok of macro */ \
140 char **bufp, /* input buffer */ \
141 size_t *szp, /* size of input buffer */ \
142 int ln, /* parse line */ \
143 int ppos, /* original pos in buffer */ \
144 int pos, /* current pos in buffer */ \
145 int *offs /* reset offset of buffer data */
147 typedef enum rofferr (*roffproc
)(ROFF_ARGS
);
150 const char *name
; /* macro name */
151 roffproc proc
; /* process new macro */
152 roffproc text
; /* process as child text of macro */
153 roffproc sub
; /* process as child of macro */
155 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
156 struct roffmac
*next
;
160 const char *name
; /* predefined input name */
161 const char *str
; /* replacement symbol */
164 #define PREDEF(__name, __str) \
165 { (__name), (__str) },
167 static enum rofft
roffhash_find(const char *, size_t);
168 static void roffhash_init(void);
169 static void roffnode_cleanscope(struct roff
*);
170 static void roffnode_pop(struct roff
*);
171 static void roffnode_push(struct roff
*, enum rofft
,
172 const char *, int, int);
173 static enum rofferr
roff_block(ROFF_ARGS
);
174 static enum rofferr
roff_block_text(ROFF_ARGS
);
175 static enum rofferr
roff_block_sub(ROFF_ARGS
);
176 static enum rofferr
roff_cblock(ROFF_ARGS
);
177 static enum rofferr
roff_cc(ROFF_ARGS
);
178 static void roff_ccond(struct roff
*, int, int);
179 static enum rofferr
roff_cond(ROFF_ARGS
);
180 static enum rofferr
roff_cond_text(ROFF_ARGS
);
181 static enum rofferr
roff_cond_sub(ROFF_ARGS
);
182 static enum rofferr
roff_ds(ROFF_ARGS
);
183 static int roff_evalcond(const char *, int *);
184 static int roff_evalstrcond(const char *, int *);
185 static void roff_free1(struct roff
*);
186 static void roff_freereg(struct roffreg
*);
187 static void roff_freestr(struct roffkv
*);
188 static char *roff_getname(struct roff
*, char **, int, int);
189 static int roff_getnum(const char *, int *, int *);
190 static int roff_getop(const char *, int *, char *);
191 static int roff_getregn(const struct roff
*,
192 const char *, size_t);
193 static int roff_getregro(const char *name
);
194 static const char *roff_getstrn(const struct roff
*,
195 const char *, size_t);
196 static enum rofferr
roff_it(ROFF_ARGS
);
197 static enum rofferr
roff_line_ignore(ROFF_ARGS
);
198 static enum rofferr
roff_nr(ROFF_ARGS
);
199 static void roff_openeqn(struct roff
*, const char *,
200 int, int, const char *);
201 static enum rofft
roff_parse(struct roff
*, const char *, int *);
202 static enum rofferr
roff_parsetext(char **, size_t *, int, int *);
203 static enum rofferr
roff_res(struct roff
*,
204 char **, size_t *, int, int);
205 static enum rofferr
roff_rm(ROFF_ARGS
);
206 static enum rofferr
roff_rr(ROFF_ARGS
);
207 static void roff_setstr(struct roff
*,
208 const char *, const char *, int);
209 static void roff_setstrn(struct roffkv
**, const char *,
210 size_t, const char *, size_t, int);
211 static enum rofferr
roff_so(ROFF_ARGS
);
212 static enum rofferr
roff_tr(ROFF_ARGS
);
213 static enum rofferr
roff_Dd(ROFF_ARGS
);
214 static enum rofferr
roff_TH(ROFF_ARGS
);
215 static enum rofferr
roff_TE(ROFF_ARGS
);
216 static enum rofferr
roff_TS(ROFF_ARGS
);
217 static enum rofferr
roff_EQ(ROFF_ARGS
);
218 static enum rofferr
roff_EN(ROFF_ARGS
);
219 static enum rofferr
roff_T_(ROFF_ARGS
);
220 static enum rofferr
roff_userdef(ROFF_ARGS
);
222 /* See roffhash_find() */
226 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
228 static struct roffmac
*hash
[HASHWIDTH
];
230 static struct roffmac roffs
[ROFF_MAX
] = {
231 { "ad", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
232 { "am", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
233 { "ami", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
234 { "am1", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
235 { "as", roff_ds
, NULL
, NULL
, 0, NULL
},
236 { "cc", roff_cc
, NULL
, NULL
, 0, NULL
},
237 { "ce", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
238 { "de", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
239 { "dei", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
240 { "de1", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
241 { "ds", roff_ds
, NULL
, NULL
, 0, NULL
},
242 { "el", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
243 { "fam", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
244 { "hw", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
245 { "hy", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
246 { "ie", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
247 { "if", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
248 { "ig", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
249 { "it", roff_it
, NULL
, NULL
, 0, NULL
},
250 { "ne", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
251 { "nh", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
252 { "nr", roff_nr
, NULL
, NULL
, 0, NULL
},
253 { "ns", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
254 { "ps", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
255 { "rm", roff_rm
, NULL
, NULL
, 0, NULL
},
256 { "rr", roff_rr
, NULL
, NULL
, 0, NULL
},
257 { "so", roff_so
, NULL
, NULL
, 0, NULL
},
258 { "ta", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
259 { "tr", roff_tr
, NULL
, NULL
, 0, NULL
},
260 { "Dd", roff_Dd
, NULL
, NULL
, 0, NULL
},
261 { "TH", roff_TH
, NULL
, NULL
, 0, NULL
},
262 { "TS", roff_TS
, NULL
, NULL
, 0, NULL
},
263 { "TE", roff_TE
, NULL
, NULL
, 0, NULL
},
264 { "T&", roff_T_
, NULL
, NULL
, 0, NULL
},
265 { "EQ", roff_EQ
, NULL
, NULL
, 0, NULL
},
266 { "EN", roff_EN
, NULL
, NULL
, 0, NULL
},
267 { ".", roff_cblock
, NULL
, NULL
, 0, NULL
},
268 { NULL
, roff_userdef
, NULL
, NULL
, 0, NULL
},
271 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
272 const char *const __mdoc_reserved
[] = {
273 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
274 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
275 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
276 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
277 "Dt", "Dv", "Dx", "D1",
278 "Ec", "Ed", "Ef", "Ek", "El", "Em",
279 "En", "Eo", "Er", "Es", "Ev", "Ex",
280 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
281 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
282 "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
283 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
284 "Pa", "Pc", "Pf", "Po", "Pp", "Pq",
285 "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
286 "Sc", "Sh", "Sm", "So", "Sq",
287 "Ss", "St", "Sx", "Sy",
288 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
289 "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
290 "%P", "%Q", "%R", "%T", "%U", "%V",
294 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
295 const char *const __man_reserved
[] = {
296 "AT", "B", "BI", "BR", "DT",
297 "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
298 "LP", "OP", "P", "PD", "PP",
299 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
300 "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
304 /* Array of injected predefined strings. */
305 #define PREDEFS_MAX 38
306 static const struct predef predefs
[PREDEFS_MAX
] = {
307 #include "predefs.in"
310 /* See roffhash_find() */
311 #define ROFF_HASH(p) (p[0] - ASCII_LO)
313 static int roffit_lines
; /* number of lines to delay */
314 static char *roffit_macro
; /* nil-terminated macro line */
322 for (i
= 0; i
< (int)ROFF_USERDEF
; i
++) {
323 assert(roffs
[i
].name
[0] >= ASCII_LO
);
324 assert(roffs
[i
].name
[0] <= ASCII_HI
);
326 buc
= ROFF_HASH(roffs
[i
].name
);
328 if (NULL
!= (n
= hash
[buc
])) {
329 for ( ; n
->next
; n
= n
->next
)
333 hash
[buc
] = &roffs
[i
];
338 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
339 * the nil-terminated string name could be found.
342 roffhash_find(const char *p
, size_t s
)
348 * libroff has an extremely simple hashtable, for the time
349 * being, which simply keys on the first character, which must
350 * be printable, then walks a chain. It works well enough until
354 if (p
[0] < ASCII_LO
|| p
[0] > ASCII_HI
)
359 if (NULL
== (n
= hash
[buc
]))
361 for ( ; n
; n
= n
->next
)
362 if (0 == strncmp(n
->name
, p
, s
) && '\0' == n
->name
[(int)s
])
363 return((enum rofft
)(n
- roffs
));
370 * Pop the current node off of the stack of roff instructions currently
374 roffnode_pop(struct roff
*r
)
381 r
->last
= r
->last
->parent
;
389 * Push a roff node onto the instruction stack. This must later be
390 * removed with roffnode_pop().
393 roffnode_push(struct roff
*r
, enum rofft tok
, const char *name
,
398 p
= mandoc_calloc(1, sizeof(struct roffnode
));
401 p
->name
= mandoc_strdup(name
);
405 p
->rule
= p
->parent
? p
->parent
->rule
: 0;
412 roff_free1(struct roff
*r
)
414 struct tbl_node
*tbl
;
418 while (NULL
!= (tbl
= r
->first_tbl
)) {
419 r
->first_tbl
= tbl
->next
;
423 r
->first_tbl
= r
->last_tbl
= r
->tbl
= NULL
;
425 while (NULL
!= (e
= r
->first_eqn
)) {
426 r
->first_eqn
= e
->next
;
430 r
->first_eqn
= r
->last_eqn
= r
->eqn
= NULL
;
435 roff_freestr(r
->strtab
);
436 roff_freestr(r
->xmbtab
);
438 r
->strtab
= r
->xmbtab
= NULL
;
440 roff_freereg(r
->regtab
);
445 for (i
= 0; i
< 128; i
++)
453 roff_reset(struct roff
*r
)
462 roff_free(struct roff
*r
)
471 roff_alloc(struct mparse
*parse
, int options
)
475 r
= mandoc_calloc(1, sizeof(struct roff
));
477 r
->options
= options
;
486 * In the current line, expand user-defined strings ("\*")
487 * and references to number registers ("\n").
488 * Also check the syntax of other escape sequences.
491 roff_res(struct roff
*r
, char **bufp
, size_t *szp
, int ln
, int pos
)
493 char ubuf
[12]; /* buffer to print the number */
494 const char *stesc
; /* start of an escape sequence ('\\') */
495 const char *stnam
; /* start of the name, after "[(*" */
496 const char *cp
; /* end of the name, e.g. before ']' */
497 const char *res
; /* the string to be substituted */
498 char *nbuf
; /* new buffer to copy bufp to */
499 size_t nsz
; /* size of the new buffer */
500 size_t maxl
; /* expected length of the escape name */
501 size_t naml
; /* actual length of the escape name */
502 int expand_count
; /* to avoid infinite loops */
508 while (NULL
!= (cp
= strchr(cp
, '\\'))) {
512 * The second character must be an asterisk or an n.
513 * If it isn't, skip it anyway: It is escaped,
514 * so it can't start another escape sequence.
528 if (ESCAPE_ERROR
!= mandoc_escape(&cp
, NULL
, NULL
))
531 (MANDOCERR_BADESCAPE
, r
->parse
,
532 ln
, (int)(stesc
- *bufp
), NULL
);
539 * The third character decides the length
540 * of the name of the string or register.
541 * Save a pointer to the name.
561 /* Advance to the end of the name. */
563 for (naml
= 0; 0 == maxl
|| naml
< maxl
; naml
++, cp
++) {
566 (MANDOCERR_BADESCAPE
,
568 (int)(stesc
- *bufp
), NULL
);
571 if (0 == maxl
&& ']' == *cp
)
576 * Retrieve the replacement string; if it is
577 * undefined, resume searching for escapes.
581 res
= roff_getstrn(r
, stnam
, naml
);
583 snprintf(ubuf
, sizeof(ubuf
), "%d",
584 roff_getregn(r
, stnam
, naml
));
588 (MANDOCERR_BADESCAPE
, r
->parse
,
589 ln
, (int)(stesc
- *bufp
), NULL
);
593 /* Replace the escape sequence by the string. */
597 nsz
= *szp
+ strlen(res
) + 1;
598 nbuf
= mandoc_malloc(nsz
);
600 strlcpy(nbuf
, *bufp
, (size_t)(stesc
- *bufp
+ 1));
601 strlcat(nbuf
, res
, nsz
);
602 strlcat(nbuf
, cp
+ (maxl
? 0 : 1), nsz
);
609 if (EXPAND_LIMIT
>= ++expand_count
)
612 /* Just leave the string unexpanded. */
613 mandoc_msg(MANDOCERR_ROFFLOOP
, r
->parse
, ln
, pos
, NULL
);
620 * Process text streams:
621 * Convert all breakable hyphens into ASCII_HYPH.
622 * Decrement and spring input line trap.
625 roff_parsetext(char **bufp
, size_t *szp
, int pos
, int *offs
)
633 start
= p
= *bufp
+ pos
;
636 sz
= strcspn(p
, "-\\");
643 /* Skip over escapes. */
645 esc
= mandoc_escape((const char **)&p
, NULL
, NULL
);
646 if (ESCAPE_ERROR
== esc
)
649 } else if (p
== start
) {
654 if (isalpha((unsigned char)p
[-1]) &&
655 isalpha((unsigned char)p
[1]))
660 /* Spring the input line trap. */
661 if (1 == roffit_lines
) {
662 isz
= mandoc_asprintf(&p
, "%s\n.%s", *bufp
, roffit_macro
);
669 return(ROFF_REPARSE
);
670 } else if (1 < roffit_lines
)
676 roff_parseln(struct roff
*r
, int ln
, char **bufp
,
677 size_t *szp
, int pos
, int *offs
)
684 * Run the reserved-word filter only if we have some reserved
688 e
= roff_res(r
, bufp
, szp
, ln
, pos
);
691 assert(ROFF_CONT
== e
);
694 ctl
= roff_getcontrol(r
, *bufp
, &pos
);
697 * First, if a scope is open and we're not a macro, pass the
698 * text through the macro's filter. If a scope isn't open and
699 * we're not a macro, just let it through.
700 * Finally, if there's an equation scope open, divert it into it
701 * no matter our state.
704 if (r
->last
&& ! ctl
) {
706 assert(roffs
[t
].text
);
708 (r
, t
, bufp
, szp
, ln
, pos
, pos
, offs
);
709 assert(ROFF_IGN
== e
|| ROFF_CONT
== e
);
714 return(eqn_read(&r
->eqn
, ln
, *bufp
, ppos
, offs
));
717 return(tbl_read(r
->tbl
, ln
, *bufp
, pos
));
718 return(roff_parsetext(bufp
, szp
, pos
, offs
));
722 * If a scope is open, go to the child handler for that macro,
723 * as it may want to preprocess before doing anything with it.
724 * Don't do so if an equation is open.
729 assert(roffs
[t
].sub
);
730 return((*roffs
[t
].sub
)
732 ln
, ppos
, pos
, offs
));
736 * Lastly, as we've no scope open, try to look up and execute
737 * the new macro. If no macro is found, simply return and let
738 * the compilers handle it.
741 if (ROFF_MAX
== (t
= roff_parse(r
, *bufp
, &pos
)))
744 assert(roffs
[t
].proc
);
745 return((*roffs
[t
].proc
)
747 ln
, ppos
, pos
, offs
));
752 roff_endparse(struct roff
*r
)
756 mandoc_msg(MANDOCERR_SCOPEEXIT
, r
->parse
,
757 r
->last
->line
, r
->last
->col
, NULL
);
760 mandoc_msg(MANDOCERR_SCOPEEXIT
, r
->parse
,
761 r
->eqn
->eqn
.ln
, r
->eqn
->eqn
.pos
, NULL
);
766 mandoc_msg(MANDOCERR_SCOPEEXIT
, r
->parse
,
767 r
->tbl
->line
, r
->tbl
->pos
, NULL
);
773 * Parse a roff node's type from the input buffer. This must be in the
774 * form of ".foo xxx" in the usual way.
777 roff_parse(struct roff
*r
, const char *buf
, int *pos
)
783 if ('\0' == buf
[*pos
] || '"' == buf
[*pos
] ||
784 '\t' == buf
[*pos
] || ' ' == buf
[*pos
])
787 /* We stop the macro parse at an escape, tab, space, or nil. */
790 maclen
= strcspn(mac
, " \\\t\0");
792 t
= (r
->current_string
= roff_getstrn(r
, mac
, maclen
))
793 ? ROFF_USERDEF
: roffhash_find(mac
, maclen
);
797 while (buf
[*pos
] && ' ' == buf
[*pos
])
805 roff_cblock(ROFF_ARGS
)
809 * A block-close `..' should only be invoked as a child of an
810 * ignore macro, otherwise raise a warning and just ignore it.
813 if (NULL
== r
->last
) {
814 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
818 switch (r
->last
->tok
) {
826 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
833 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
838 mandoc_msg(MANDOCERR_ARGSLOST
, r
->parse
, ln
, pos
, NULL
);
841 roffnode_cleanscope(r
);
848 roffnode_cleanscope(struct roff
*r
)
852 if (--r
->last
->endspan
!= 0)
860 roff_ccond(struct roff
*r
, int ln
, int ppos
)
863 if (NULL
== r
->last
) {
864 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
868 switch (r
->last
->tok
) {
876 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
880 if (r
->last
->endspan
> -1) {
881 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
886 roffnode_cleanscope(r
);
893 roff_block(ROFF_ARGS
)
901 if (ROFF_ig
!= tok
) {
902 if ('\0' == (*bufp
)[pos
]) {
903 mandoc_msg(MANDOCERR_NOARGS
, r
->parse
, ln
, ppos
, NULL
);
908 * Re-write `de1', since we don't really care about
909 * groff's strange compatibility mode, into `de'.
917 mandoc_msg(MANDOCERR_REQUEST
, r
->parse
, ln
, ppos
,
920 while ((*bufp
)[pos
] && ! isspace((unsigned char)(*bufp
)[pos
]))
923 while (isspace((unsigned char)(*bufp
)[pos
]))
924 (*bufp
)[pos
++] = '\0';
927 roffnode_push(r
, tok
, name
, ln
, ppos
);
930 * At the beginning of a `de' macro, clear the existing string
931 * with the same name, if there is one. New content will be
932 * appended from roff_block_text() in multiline mode.
936 roff_setstr(r
, name
, "", 0);
938 if ('\0' == (*bufp
)[pos
])
941 /* If present, process the custom end-of-line marker. */
944 while ((*bufp
)[pos
] && ! isspace((unsigned char)(*bufp
)[pos
]))
948 * Note: groff does NOT like escape characters in the input.
949 * Instead of detecting this, we're just going to let it fly and
954 sz
= (size_t)(pos
- sv
);
956 if (1 == sz
&& '.' == (*bufp
)[sv
])
959 r
->last
->end
= mandoc_malloc(sz
+ 1);
961 memcpy(r
->last
->end
, *bufp
+ sv
, sz
);
962 r
->last
->end
[(int)sz
] = '\0';
965 mandoc_msg(MANDOCERR_ARGSLOST
, r
->parse
, ln
, pos
, NULL
);
973 roff_block_sub(ROFF_ARGS
)
979 * First check whether a custom macro exists at this level. If
980 * it does, then check against it. This is some of groff's
981 * stranger behaviours. If we encountered a custom end-scope
982 * tag and that tag also happens to be a "real" macro, then we
983 * need to try interpreting it again as a real macro. If it's
984 * not, then return ignore. Else continue.
988 for (i
= pos
, j
= 0; r
->last
->end
[j
]; j
++, i
++)
989 if ((*bufp
)[i
] != r
->last
->end
[j
])
992 if ('\0' == r
->last
->end
[j
] &&
993 ('\0' == (*bufp
)[i
] ||
995 '\t' == (*bufp
)[i
])) {
997 roffnode_cleanscope(r
);
999 while (' ' == (*bufp
)[i
] || '\t' == (*bufp
)[i
])
1003 if (ROFF_MAX
!= roff_parse(r
, *bufp
, &pos
))
1010 * If we have no custom end-query or lookup failed, then try
1011 * pulling it out of the hashtable.
1014 t
= roff_parse(r
, *bufp
, &pos
);
1017 * Macros other than block-end are only significant
1018 * in `de' blocks; elsewhere, simply throw them away.
1020 if (ROFF_cblock
!= t
) {
1022 roff_setstr(r
, r
->last
->name
, *bufp
+ ppos
, 2);
1026 assert(roffs
[t
].proc
);
1027 return((*roffs
[t
].proc
)(r
, t
, bufp
, szp
,
1028 ln
, ppos
, pos
, offs
));
1034 roff_block_text(ROFF_ARGS
)
1038 roff_setstr(r
, r
->last
->name
, *bufp
+ pos
, 2);
1046 roff_cond_sub(ROFF_ARGS
)
1053 roffnode_cleanscope(r
);
1054 t
= roff_parse(r
, *bufp
, &pos
);
1057 * Fully handle known macros when they are structurally
1058 * required or when the conditional evaluated to true.
1061 if ((ROFF_MAX
!= t
) &&
1062 (rr
|| ROFFMAC_STRUCT
& roffs
[t
].flags
)) {
1063 assert(roffs
[t
].proc
);
1064 return((*roffs
[t
].proc
)(r
, t
, bufp
, szp
,
1065 ln
, ppos
, pos
, offs
));
1069 * If `\}' occurs on a macro line without a preceding macro,
1070 * drop the line completely.
1074 if ('\\' == ep
[0] && '}' == ep
[1])
1077 /* Always check for the closing delimiter `\}'. */
1079 while (NULL
!= (ep
= strchr(ep
, '\\'))) {
1080 if ('}' == *(++ep
)) {
1082 roff_ccond(r
, ln
, ep
- *bufp
- 1);
1086 return(rr
? ROFF_CONT
: ROFF_IGN
);
1091 roff_cond_text(ROFF_ARGS
)
1097 roffnode_cleanscope(r
);
1100 while (NULL
!= (ep
= strchr(ep
, '\\'))) {
1101 if ('}' == *(++ep
)) {
1103 roff_ccond(r
, ln
, ep
- *bufp
- 1);
1107 return(rr
? ROFF_CONT
: ROFF_IGN
);
1111 roff_getnum(const char *v
, int *pos
, int *res
)
1120 for (*res
= 0; isdigit((unsigned char)v
[p
]); p
++)
1121 *res
+= 10 * *res
+ v
[p
] - '0';
1133 roff_getop(const char *v
, int *pos
, char *res
)
1138 e
= v
[*pos
+ 1] == '=';
1161 * Evaluate a string comparison condition.
1162 * The first character is the delimiter.
1163 * Succeed if the string up to its second occurrence
1164 * matches the string up to its third occurence.
1165 * Advance the cursor after the third occurrence
1166 * or lacking that, to the end of the line.
1169 roff_evalstrcond(const char *v
, int *pos
)
1171 const char *s1
, *s2
, *s3
;
1175 s1
= v
+ *pos
; /* initial delimiter */
1176 s2
= s1
+ 1; /* for scanning the first string */
1177 s3
= strchr(s2
, *s1
); /* for scanning the second string */
1179 if (NULL
== s3
) /* found no middle delimiter */
1182 while ('\0' != *++s3
) {
1183 if (*s2
!= *s3
) { /* mismatch */
1184 s3
= strchr(s3
, *s1
);
1187 if (*s3
== *s1
) { /* found the final delimiter */
1196 s3
= strchr(s2
, '\0');
1204 roff_evalcond(const char *v
, int *pos
)
1206 int wanttrue
, lh
, rh
;
1209 if ('!' == v
[*pos
]) {
1236 if (!roff_getnum(v
, pos
, &lh
))
1237 return(roff_evalstrcond(v
, pos
) == wanttrue
);
1238 if (!roff_getop(v
, pos
, &op
))
1239 return((lh
> 0) == wanttrue
);
1240 if (!roff_getnum(v
, pos
, &rh
))
1245 return((lh
>= rh
) == wanttrue
);
1247 return((lh
<= rh
) == wanttrue
);
1249 return((lh
== rh
) == wanttrue
);
1251 return((lh
> rh
) == wanttrue
);
1253 return((lh
< rh
) == wanttrue
);
1261 roff_line_ignore(ROFF_ARGS
)
1269 roff_cond(ROFF_ARGS
)
1272 roffnode_push(r
, tok
, NULL
, ln
, ppos
);
1275 * An `.el' has no conditional body: it will consume the value
1276 * of the current rstack entry set in prior `ie' calls or
1279 * If we're not an `el', however, then evaluate the conditional.
1282 r
->last
->rule
= ROFF_el
== tok
?
1283 (r
->rstackpos
< 0 ? 0 : r
->rstack
[r
->rstackpos
--]) :
1284 roff_evalcond(*bufp
, &pos
);
1287 * An if-else will put the NEGATION of the current evaluated
1288 * conditional into the stack of rules.
1291 if (ROFF_ie
== tok
) {
1292 if (r
->rstackpos
== RSTACK_MAX
- 1) {
1293 mandoc_msg(MANDOCERR_MEM
,
1294 r
->parse
, ln
, ppos
, NULL
);
1297 r
->rstack
[++r
->rstackpos
] = !r
->last
->rule
;
1300 /* If the parent has false as its rule, then so do we. */
1302 if (r
->last
->parent
&& !r
->last
->parent
->rule
)
1307 * If there is nothing on the line after the conditional,
1308 * not even whitespace, use next-line scope.
1311 if ('\0' == (*bufp
)[pos
]) {
1312 r
->last
->endspan
= 2;
1316 while (' ' == (*bufp
)[pos
])
1319 /* An opening brace requests multiline scope. */
1321 if ('\\' == (*bufp
)[pos
] && '{' == (*bufp
)[pos
+ 1]) {
1322 r
->last
->endspan
= -1;
1328 * Anything else following the conditional causes
1329 * single-line scope. Warn if the scope contains
1330 * nothing but trailing whitespace.
1333 if ('\0' == (*bufp
)[pos
])
1334 mandoc_msg(MANDOCERR_NOARGS
, r
->parse
, ln
, ppos
, NULL
);
1336 r
->last
->endspan
= 1;
1348 char *name
, *string
;
1351 * A symbol is named by the first word following the macro
1352 * invocation up to a space. Its value is anything after the
1353 * name's trailing whitespace and optional double-quote. Thus,
1357 * will have `bar " ' as its value.
1360 string
= *bufp
+ pos
;
1361 name
= roff_getname(r
, &string
, ln
, pos
);
1365 /* Read past initial double-quote. */
1369 /* The rest is the value. */
1370 roff_setstr(r
, name
, string
, ROFF_as
== tok
);
1375 roff_setreg(struct roff
*r
, const char *name
, int val
, char sign
)
1377 struct roffreg
*reg
;
1379 /* Search for an existing register with the same name. */
1382 while (reg
&& strcmp(name
, reg
->key
.p
))
1386 /* Create a new register. */
1387 reg
= mandoc_malloc(sizeof(struct roffreg
));
1388 reg
->key
.p
= mandoc_strdup(name
);
1389 reg
->key
.sz
= strlen(name
);
1391 reg
->next
= r
->regtab
;
1397 else if ('-' == sign
)
1404 * Handle some predefined read-only number registers.
1405 * For now, return -1 if the requested register is not predefined;
1406 * in case a predefined read-only register having the value -1
1407 * were to turn up, another special value would have to be chosen.
1410 roff_getregro(const char *name
)
1414 case ('A'): /* ASCII approximation mode is always off. */
1416 case ('g'): /* Groff compatibility mode is always on. */
1418 case ('H'): /* Fixed horizontal resolution. */
1420 case ('j'): /* Always adjust left margin only. */
1422 case ('T'): /* Some output device is always defined. */
1424 case ('V'): /* Fixed vertical resolution. */
1432 roff_getreg(const struct roff
*r
, const char *name
)
1434 struct roffreg
*reg
;
1437 if ('.' == name
[0] && '\0' != name
[1] && '\0' == name
[2]) {
1438 val
= roff_getregro(name
+ 1);
1443 for (reg
= r
->regtab
; reg
; reg
= reg
->next
)
1444 if (0 == strcmp(name
, reg
->key
.p
))
1451 roff_getregn(const struct roff
*r
, const char *name
, size_t len
)
1453 struct roffreg
*reg
;
1456 if ('.' == name
[0] && 2 == len
) {
1457 val
= roff_getregro(name
+ 1);
1462 for (reg
= r
->regtab
; reg
; reg
= reg
->next
)
1463 if (len
== reg
->key
.sz
&&
1464 0 == strncmp(name
, reg
->key
.p
, len
))
1471 roff_freereg(struct roffreg
*reg
)
1473 struct roffreg
*old_reg
;
1475 while (NULL
!= reg
) {
1494 key
= roff_getname(r
, &val
, ln
, pos
);
1497 if ('+' == sign
|| '-' == sign
)
1500 sz
= strspn(val
, "0123456789");
1501 iv
= sz
? mandoc_strntoi(val
, sz
, 10) : 0;
1503 roff_setreg(r
, key
, iv
, sign
);
1511 struct roffreg
*reg
, **prev
;
1516 name
= roff_getname(r
, &cp
, ln
, pos
);
1521 if (NULL
== reg
|| !strcmp(name
, reg
->key
.p
))
1541 while ('\0' != *cp
) {
1542 name
= roff_getname(r
, &cp
, ln
, (int)(cp
- *bufp
));
1544 roff_setstr(r
, name
, NULL
, 0);
1557 /* Parse the number of lines. */
1559 len
= strcspn(cp
, " \t");
1561 if ((iv
= mandoc_strntoi(cp
, len
, 10)) <= 0) {
1562 mandoc_msg(MANDOCERR_NUMERIC
, r
->parse
,
1563 ln
, ppos
, *bufp
+ 1);
1568 /* Arm the input line trap. */
1570 roffit_macro
= mandoc_strdup(cp
);
1578 const char *const *cp
;
1580 if (0 == ((MPARSE_MDOC
| MPARSE_QUICK
) & r
->options
))
1581 for (cp
= __mdoc_reserved
; *cp
; cp
++)
1582 roff_setstr(r
, *cp
, NULL
, 0);
1591 const char *const *cp
;
1593 if (0 == (MPARSE_QUICK
& r
->options
))
1594 for (cp
= __man_reserved
; *cp
; cp
++)
1595 roff_setstr(r
, *cp
, NULL
, 0);
1606 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
1619 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
1621 tbl_restart(ppos
, ln
, r
->tbl
);
1628 roff_closeeqn(struct roff
*r
)
1631 return(r
->eqn
&& ROFF_EQN
== eqn_end(&r
->eqn
) ? 1 : 0);
1636 roff_openeqn(struct roff
*r
, const char *name
, int line
,
1637 int offs
, const char *buf
)
1642 assert(NULL
== r
->eqn
);
1643 e
= eqn_alloc(name
, offs
, line
, r
->parse
);
1646 r
->last_eqn
->next
= e
;
1648 r
->first_eqn
= r
->last_eqn
= e
;
1650 r
->eqn
= r
->last_eqn
= e
;
1654 eqn_read(&r
->eqn
, line
, buf
, offs
, &poff
);
1663 roff_openeqn(r
, *bufp
+ pos
, ln
, ppos
, NULL
);
1672 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
1680 struct tbl_node
*tbl
;
1683 mandoc_msg(MANDOCERR_SCOPEBROKEN
, r
->parse
, ln
, ppos
, NULL
);
1687 tbl
= tbl_alloc(ppos
, ln
, r
->parse
);
1690 r
->last_tbl
->next
= tbl
;
1692 r
->first_tbl
= r
->last_tbl
= tbl
;
1694 r
->tbl
= r
->last_tbl
= tbl
;
1706 if ('\0' == *p
|| '.' == (r
->control
= *p
++))
1710 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
, ln
, ppos
, NULL
);
1719 const char *p
, *first
, *second
;
1721 enum mandoc_esc esc
;
1726 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
, ln
, ppos
, NULL
);
1730 while ('\0' != *p
) {
1734 if ('\\' == *first
) {
1735 esc
= mandoc_escape(&p
, NULL
, NULL
);
1736 if (ESCAPE_ERROR
== esc
) {
1738 (MANDOCERR_BADESCAPE
, r
->parse
,
1739 ln
, (int)(p
- *bufp
), NULL
);
1742 fsz
= (size_t)(p
- first
);
1746 if ('\\' == *second
) {
1747 esc
= mandoc_escape(&p
, NULL
, NULL
);
1748 if (ESCAPE_ERROR
== esc
) {
1750 (MANDOCERR_BADESCAPE
, r
->parse
,
1751 ln
, (int)(p
- *bufp
), NULL
);
1754 ssz
= (size_t)(p
- second
);
1755 } else if ('\0' == *second
) {
1756 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
,
1757 ln
, (int)(p
- *bufp
), NULL
);
1763 roff_setstrn(&r
->xmbtab
, first
,
1764 fsz
, second
, ssz
, 0);
1768 if (NULL
== r
->xtab
)
1769 r
->xtab
= mandoc_calloc
1770 (128, sizeof(struct roffstr
));
1772 free(r
->xtab
[(int)*first
].p
);
1773 r
->xtab
[(int)*first
].p
= mandoc_strndup(second
, ssz
);
1774 r
->xtab
[(int)*first
].sz
= ssz
;
1786 mandoc_msg(MANDOCERR_SO
, r
->parse
, ln
, ppos
, NULL
);
1789 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1790 * opening anything that's not in our cwd or anything beneath
1791 * it. Thus, explicitly disallow traversing up the file-system
1792 * or using absolute paths.
1796 if ('/' == *name
|| strstr(name
, "../") || strstr(name
, "/..")) {
1797 mandoc_msg(MANDOCERR_SOPATH
, r
->parse
, ln
, pos
, NULL
);
1807 roff_userdef(ROFF_ARGS
)
1814 * Collect pointers to macro argument strings
1815 * and NUL-terminate them.
1818 for (i
= 0; i
< 9; i
++)
1819 arg
[i
] = '\0' == *cp
? "" :
1820 mandoc_getarg(r
->parse
, &cp
, ln
, &pos
);
1823 * Expand macro arguments.
1826 n1
= cp
= mandoc_strdup(r
->current_string
);
1827 while (NULL
!= (cp
= strstr(cp
, "\\$"))) {
1829 if (0 > i
|| 8 < i
) {
1830 /* Not an argument invocation. */
1835 *szp
= strlen(n1
) - 3 + strlen(arg
[i
]) + 1;
1836 n2
= mandoc_malloc(*szp
);
1838 strlcpy(n2
, n1
, (size_t)(cp
- n1
+ 1));
1839 strlcat(n2
, arg
[i
], *szp
);
1840 strlcat(n2
, cp
+ 3, *szp
);
1842 cp
= n2
+ (cp
- n1
);
1848 * Replace the macro invocation
1849 * by the expanded macro.
1854 *szp
= strlen(*bufp
) + 1;
1856 return(*szp
> 1 && '\n' == (*bufp
)[(int)*szp
- 2] ?
1857 ROFF_REPARSE
: ROFF_APPEND
);
1861 roff_getname(struct roff
*r
, char **cpp
, int ln
, int pos
)
1869 /* Read until end of name. */
1870 for (cp
= name
; '\0' != *cp
&& ' ' != *cp
; cp
++) {
1876 mandoc_msg(MANDOCERR_NAMESC
, r
->parse
, ln
, pos
, NULL
);
1881 /* Nil-terminate name. */
1885 /* Read past spaces. */
1894 * Store *string into the user-defined string called *name.
1895 * To clear an existing entry, call with (*r, *name, NULL, 0).
1896 * append == 0: replace mode
1897 * append == 1: single-line append mode
1898 * append == 2: multiline append mode, append '\n' after each call
1901 roff_setstr(struct roff
*r
, const char *name
, const char *string
,
1905 roff_setstrn(&r
->strtab
, name
, strlen(name
), string
,
1906 string
? strlen(string
) : 0, append
);
1910 roff_setstrn(struct roffkv
**r
, const char *name
, size_t namesz
,
1911 const char *string
, size_t stringsz
, int append
)
1916 size_t oldch
, newch
;
1918 /* Search for an existing string with the same name. */
1921 while (n
&& strcmp(name
, n
->key
.p
))
1925 /* Create a new string table entry. */
1926 n
= mandoc_malloc(sizeof(struct roffkv
));
1927 n
->key
.p
= mandoc_strndup(name
, namesz
);
1933 } else if (0 == append
) {
1943 * One additional byte for the '\n' in multiline mode,
1944 * and one for the terminating '\0'.
1946 newch
= stringsz
+ (1 < append
? 2u : 1u);
1948 if (NULL
== n
->val
.p
) {
1949 n
->val
.p
= mandoc_malloc(newch
);
1954 n
->val
.p
= mandoc_realloc(n
->val
.p
, oldch
+ newch
);
1957 /* Skip existing content in the destination buffer. */
1958 c
= n
->val
.p
+ (int)oldch
;
1960 /* Append new content to the destination buffer. */
1962 while (i
< (int)stringsz
) {
1964 * Rudimentary roff copy mode:
1965 * Handle escaped backslashes.
1967 if ('\\' == string
[i
] && '\\' == string
[i
+ 1])
1972 /* Append terminating bytes. */
1977 n
->val
.sz
= (int)(c
- n
->val
.p
);
1981 roff_getstrn(const struct roff
*r
, const char *name
, size_t len
)
1983 const struct roffkv
*n
;
1986 for (n
= r
->strtab
; n
; n
= n
->next
)
1987 if (0 == strncmp(name
, n
->key
.p
, len
) &&
1988 '\0' == n
->key
.p
[(int)len
])
1991 for (i
= 0; i
< PREDEFS_MAX
; i
++)
1992 if (0 == strncmp(name
, predefs
[i
].name
, len
) &&
1993 '\0' == predefs
[i
].name
[(int)len
])
1994 return(predefs
[i
].str
);
2000 roff_freestr(struct roffkv
*r
)
2002 struct roffkv
*n
, *nn
;
2004 for (n
= r
; n
; n
= nn
) {
2012 const struct tbl_span
*
2013 roff_span(const struct roff
*r
)
2016 return(r
->tbl
? tbl_span(r
->tbl
) : NULL
);
2020 roff_eqn(const struct roff
*r
)
2023 return(r
->last_eqn
? &r
->last_eqn
->eqn
: NULL
);
2027 * Duplicate an input string, making the appropriate character
2028 * conversations (as stipulated by `tr') along the way.
2029 * Returns a heap-allocated string with all the replacements made.
2032 roff_strdup(const struct roff
*r
, const char *p
)
2034 const struct roffkv
*cp
;
2038 enum mandoc_esc esc
;
2040 if (NULL
== r
->xmbtab
&& NULL
== r
->xtab
)
2041 return(mandoc_strdup(p
));
2042 else if ('\0' == *p
)
2043 return(mandoc_strdup(""));
2046 * Step through each character looking for term matches
2047 * (remember that a `tr' can be invoked with an escape, which is
2048 * a glyph but the escape is multi-character).
2049 * We only do this if the character hash has been initialised
2050 * and the string is >0 length.
2056 while ('\0' != *p
) {
2057 if ('\\' != *p
&& r
->xtab
&& r
->xtab
[(int)*p
].p
) {
2058 sz
= r
->xtab
[(int)*p
].sz
;
2059 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
2060 memcpy(res
+ ssz
, r
->xtab
[(int)*p
].p
, sz
);
2064 } else if ('\\' != *p
) {
2065 res
= mandoc_realloc(res
, ssz
+ 2);
2070 /* Search for term matches. */
2071 for (cp
= r
->xmbtab
; cp
; cp
= cp
->next
)
2072 if (0 == strncmp(p
, cp
->key
.p
, cp
->key
.sz
))
2077 * A match has been found.
2078 * Append the match to the array and move
2079 * forward by its keysize.
2081 res
= mandoc_realloc
2082 (res
, ssz
+ cp
->val
.sz
+ 1);
2083 memcpy(res
+ ssz
, cp
->val
.p
, cp
->val
.sz
);
2085 p
+= (int)cp
->key
.sz
;
2090 * Handle escapes carefully: we need to copy
2091 * over just the escape itself, or else we might
2092 * do replacements within the escape itself.
2093 * Make sure to pass along the bogus string.
2096 esc
= mandoc_escape(&p
, NULL
, NULL
);
2097 if (ESCAPE_ERROR
== esc
) {
2099 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
2100 memcpy(res
+ ssz
, pp
, sz
);
2104 * We bail out on bad escapes.
2105 * No need to warn: we already did so when
2106 * roff_res() was called.
2109 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
2110 memcpy(res
+ ssz
, pp
, sz
);
2114 res
[(int)ssz
] = '\0';
2119 * Find out whether a line is a macro line or not.
2120 * If it is, adjust the current position and return one; if it isn't,
2121 * return zero and don't change the current position.
2122 * If the control character has been set with `.cc', then let that grain
2124 * This is slighly contrary to groff, where using the non-breaking
2125 * control character when `cc' has been invoked will cause the
2126 * non-breaking macro contents to be printed verbatim.
2129 roff_getcontrol(const struct roff
*r
, const char *cp
, int *ppos
)
2135 if (0 != r
->control
&& cp
[pos
] == r
->control
)
2137 else if (0 != r
->control
)
2139 else if ('\\' == cp
[pos
] && '.' == cp
[pos
+ 1])
2141 else if ('.' == cp
[pos
] || '\'' == cp
[pos
])
2146 while (' ' == cp
[pos
] || '\t' == cp
[pos
])