]>
git.cameronkatri.com Git - mandoc.git/blob - roff.c
1 /* $Id: roff.c,v 1.234 2014/10/20 19:04:45 kristaps Exp $ */
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
20 #include <sys/types.h>
29 #include "mandoc_aux.h"
30 #include "libmandoc.h"
33 /* Maximum number of nested if-else conditionals. */
34 #define RSTACK_MAX 128
36 /* Maximum number of string expansions per line, to break infinite loops. */
37 #define EXPAND_LIMIT 1000
83 * An incredibly-simple string buffer.
86 char *p
; /* nil-terminated buffer */
87 size_t sz
; /* saved strlen(p) */
91 * A key-value roffstr pair as part of a singly-linked list.
96 struct roffkv
*next
; /* next in list */
100 * A single number register as part of a singly-linked list.
105 struct roffreg
*next
;
109 struct mparse
*parse
; /* parse point */
110 struct roffnode
*last
; /* leaf of stack */
111 int *rstack
; /* stack of inverted `ie' values */
112 struct roffreg
*regtab
; /* number registers */
113 struct roffkv
*strtab
; /* user-defined strings & macros */
114 struct roffkv
*xmbtab
; /* multi-byte trans table (`tr') */
115 struct roffstr
*xtab
; /* single-byte trans table (`tr') */
116 const char *current_string
; /* value of last called user macro */
117 struct tbl_node
*first_tbl
; /* first table parsed */
118 struct tbl_node
*last_tbl
; /* last table parsed */
119 struct tbl_node
*tbl
; /* current table being parsed */
120 struct eqn_node
*last_eqn
; /* last equation parsed */
121 struct eqn_node
*first_eqn
; /* first equation parsed */
122 struct eqn_node
*eqn
; /* current equation being parsed */
123 int eqn_inline
; /* current equation is inline */
124 int options
; /* parse options */
125 int rstacksz
; /* current size limit of rstack */
126 int rstackpos
; /* position in rstack */
127 int format
; /* current file in mdoc or man format */
128 char control
; /* control character */
132 enum rofft tok
; /* type of node */
133 struct roffnode
*parent
; /* up one in stack */
134 int line
; /* parse line */
135 int col
; /* parse col */
136 char *name
; /* node name, e.g. macro name */
137 char *end
; /* end-rules: custom token */
138 int endspan
; /* end-rules: next-line or infty */
139 int rule
; /* current evaluation rule */
142 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
143 enum rofft tok, /* tok of macro */ \
144 char **bufp, /* input buffer */ \
145 size_t *szp, /* size of input buffer */ \
146 int ln, /* parse line */ \
147 int ppos, /* original pos in buffer */ \
148 int pos, /* current pos in buffer */ \
149 int *offs /* reset offset of buffer data */
151 typedef enum rofferr (*roffproc
)(ROFF_ARGS
);
154 const char *name
; /* macro name */
155 roffproc proc
; /* process new macro */
156 roffproc text
; /* process as child text of macro */
157 roffproc sub
; /* process as child of macro */
159 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
160 struct roffmac
*next
;
164 const char *name
; /* predefined input name */
165 const char *str
; /* replacement symbol */
168 #define PREDEF(__name, __str) \
169 { (__name), (__str) },
171 static enum rofft
roffhash_find(const char *, size_t);
172 static void roffhash_init(void);
173 static void roffnode_cleanscope(struct roff
*);
174 static void roffnode_pop(struct roff
*);
175 static void roffnode_push(struct roff
*, enum rofft
,
176 const char *, int, int);
177 static enum rofferr
roff_block(ROFF_ARGS
);
178 static enum rofferr
roff_block_text(ROFF_ARGS
);
179 static enum rofferr
roff_block_sub(ROFF_ARGS
);
180 static enum rofferr
roff_cblock(ROFF_ARGS
);
181 static enum rofferr
roff_cc(ROFF_ARGS
);
182 static void roff_ccond(struct roff
*, int, int);
183 static enum rofferr
roff_cond(ROFF_ARGS
);
184 static enum rofferr
roff_cond_text(ROFF_ARGS
);
185 static enum rofferr
roff_cond_sub(ROFF_ARGS
);
186 static enum rofferr
roff_ds(ROFF_ARGS
);
187 static enum rofferr
roff_eqndelim(struct roff
*,
188 char **, size_t *, int);
189 static int roff_evalcond(struct roff
*r
, int,
190 const char *, int *);
191 static int roff_evalnum(struct roff
*, int,
192 const char *, int *, int *, int);
193 static int roff_evalpar(struct roff
*, int,
194 const char *, int *, int *);
195 static int roff_evalstrcond(const char *, int *);
196 static void roff_free1(struct roff
*);
197 static void roff_freereg(struct roffreg
*);
198 static void roff_freestr(struct roffkv
*);
199 static size_t roff_getname(struct roff
*, char **, int, int);
200 static int roff_getnum(const char *, int *, int *);
201 static int roff_getop(const char *, int *, char *);
202 static int roff_getregn(const struct roff
*,
203 const char *, size_t);
204 static int roff_getregro(const char *name
);
205 static const char *roff_getstrn(const struct roff
*,
206 const char *, size_t);
207 static enum rofferr
roff_it(ROFF_ARGS
);
208 static enum rofferr
roff_line_ignore(ROFF_ARGS
);
209 static enum rofferr
roff_nr(ROFF_ARGS
);
210 static void roff_openeqn(struct roff
*, const char *,
211 int, int, const char *);
212 static enum rofft
roff_parse(struct roff
*, char *, int *,
214 static enum rofferr
roff_parsetext(char **, size_t *, int, int *);
215 static enum rofferr
roff_res(struct roff
*,
216 char **, size_t *, int, int);
217 static enum rofferr
roff_rm(ROFF_ARGS
);
218 static enum rofferr
roff_rr(ROFF_ARGS
);
219 static void roff_setstr(struct roff
*,
220 const char *, const char *, int);
221 static void roff_setstrn(struct roffkv
**, const char *,
222 size_t, const char *, size_t, int);
223 static enum rofferr
roff_so(ROFF_ARGS
);
224 static enum rofferr
roff_tr(ROFF_ARGS
);
225 static enum rofferr
roff_Dd(ROFF_ARGS
);
226 static enum rofferr
roff_TH(ROFF_ARGS
);
227 static enum rofferr
roff_TE(ROFF_ARGS
);
228 static enum rofferr
roff_TS(ROFF_ARGS
);
229 static enum rofferr
roff_EQ(ROFF_ARGS
);
230 static enum rofferr
roff_EN(ROFF_ARGS
);
231 static enum rofferr
roff_T_(ROFF_ARGS
);
232 static enum rofferr
roff_userdef(ROFF_ARGS
);
234 /* See roffhash_find() */
238 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
240 static struct roffmac
*hash
[HASHWIDTH
];
242 static struct roffmac roffs
[ROFF_MAX
] = {
243 { "ad", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
244 { "am", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
245 { "ami", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
246 { "am1", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
247 { "as", roff_ds
, NULL
, NULL
, 0, NULL
},
248 { "cc", roff_cc
, NULL
, NULL
, 0, NULL
},
249 { "ce", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
250 { "de", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
251 { "dei", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
252 { "de1", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
253 { "ds", roff_ds
, NULL
, NULL
, 0, NULL
},
254 { "el", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
255 { "fam", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
256 { "hw", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
257 { "hy", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
258 { "ie", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
259 { "if", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
260 { "ig", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
261 { "it", roff_it
, NULL
, NULL
, 0, NULL
},
262 { "ne", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
263 { "nh", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
264 { "nr", roff_nr
, NULL
, NULL
, 0, NULL
},
265 { "ns", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
266 { "pl", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
267 { "ps", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
268 { "rm", roff_rm
, NULL
, NULL
, 0, NULL
},
269 { "rr", roff_rr
, NULL
, NULL
, 0, NULL
},
270 { "so", roff_so
, NULL
, NULL
, 0, NULL
},
271 { "ta", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
272 { "tr", roff_tr
, NULL
, NULL
, 0, NULL
},
273 { "Dd", roff_Dd
, NULL
, NULL
, 0, NULL
},
274 { "TH", roff_TH
, NULL
, NULL
, 0, NULL
},
275 { "TS", roff_TS
, NULL
, NULL
, 0, NULL
},
276 { "TE", roff_TE
, NULL
, NULL
, 0, NULL
},
277 { "T&", roff_T_
, NULL
, NULL
, 0, NULL
},
278 { "EQ", roff_EQ
, NULL
, NULL
, 0, NULL
},
279 { "EN", roff_EN
, NULL
, NULL
, 0, NULL
},
280 { ".", roff_cblock
, NULL
, NULL
, 0, NULL
},
281 { NULL
, roff_userdef
, NULL
, NULL
, 0, NULL
},
284 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
285 const char *const __mdoc_reserved
[] = {
286 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
287 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
288 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
289 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
290 "Dt", "Dv", "Dx", "D1",
291 "Ec", "Ed", "Ef", "Ek", "El", "Em",
292 "En", "Eo", "Er", "Es", "Ev", "Ex",
293 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
294 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
295 "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
296 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
297 "Pa", "Pc", "Pf", "Po", "Pp", "Pq",
298 "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
299 "Sc", "Sh", "Sm", "So", "Sq",
300 "Ss", "St", "Sx", "Sy",
301 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
302 "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
303 "%P", "%Q", "%R", "%T", "%U", "%V",
307 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
308 const char *const __man_reserved
[] = {
309 "AT", "B", "BI", "BR", "DT",
310 "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
311 "LP", "OP", "P", "PD", "PP",
312 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
313 "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
317 /* Array of injected predefined strings. */
318 #define PREDEFS_MAX 38
319 static const struct predef predefs
[PREDEFS_MAX
] = {
320 #include "predefs.in"
323 /* See roffhash_find() */
324 #define ROFF_HASH(p) (p[0] - ASCII_LO)
326 static int roffit_lines
; /* number of lines to delay */
327 static char *roffit_macro
; /* nil-terminated macro line */
336 for (i
= 0; i
< (int)ROFF_USERDEF
; i
++) {
337 assert(roffs
[i
].name
[0] >= ASCII_LO
);
338 assert(roffs
[i
].name
[0] <= ASCII_HI
);
340 buc
= ROFF_HASH(roffs
[i
].name
);
342 if (NULL
!= (n
= hash
[buc
])) {
343 for ( ; n
->next
; n
= n
->next
)
347 hash
[buc
] = &roffs
[i
];
352 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
353 * the nil-terminated string name could be found.
356 roffhash_find(const char *p
, size_t s
)
362 * libroff has an extremely simple hashtable, for the time
363 * being, which simply keys on the first character, which must
364 * be printable, then walks a chain. It works well enough until
368 if (p
[0] < ASCII_LO
|| p
[0] > ASCII_HI
)
373 if (NULL
== (n
= hash
[buc
]))
375 for ( ; n
; n
= n
->next
)
376 if (0 == strncmp(n
->name
, p
, s
) && '\0' == n
->name
[(int)s
])
377 return((enum rofft
)(n
- roffs
));
383 * Pop the current node off of the stack of roff instructions currently
387 roffnode_pop(struct roff
*r
)
394 r
->last
= r
->last
->parent
;
401 * Push a roff node onto the instruction stack. This must later be
402 * removed with roffnode_pop().
405 roffnode_push(struct roff
*r
, enum rofft tok
, const char *name
,
410 p
= mandoc_calloc(1, sizeof(struct roffnode
));
413 p
->name
= mandoc_strdup(name
);
417 p
->rule
= p
->parent
? p
->parent
->rule
: 0;
423 roff_free1(struct roff
*r
)
425 struct tbl_node
*tbl
;
429 while (NULL
!= (tbl
= r
->first_tbl
)) {
430 r
->first_tbl
= tbl
->next
;
433 r
->first_tbl
= r
->last_tbl
= r
->tbl
= NULL
;
435 while (NULL
!= (e
= r
->first_eqn
)) {
436 r
->first_eqn
= e
->next
;
439 r
->first_eqn
= r
->last_eqn
= r
->eqn
= NULL
;
449 roff_freereg(r
->regtab
);
452 roff_freestr(r
->strtab
);
453 roff_freestr(r
->xmbtab
);
454 r
->strtab
= r
->xmbtab
= NULL
;
457 for (i
= 0; i
< 128; i
++)
464 roff_reset(struct roff
*r
)
468 r
->format
= r
->options
& (MPARSE_MDOC
| MPARSE_MAN
);
473 roff_free(struct roff
*r
)
481 roff_alloc(struct mparse
*parse
, int options
)
485 r
= mandoc_calloc(1, sizeof(struct roff
));
487 r
->options
= options
;
488 r
->format
= options
& (MPARSE_MDOC
| MPARSE_MAN
);
497 * In the current line, expand escape sequences that tend to get
498 * used in numerical expressions and conditional requests.
499 * Also check the syntax of the remaining escape sequences.
502 roff_res(struct roff
*r
, char **bufp
, size_t *szp
, int ln
, int pos
)
504 char ubuf
[24]; /* buffer to print the number */
505 const char *start
; /* start of the string to process */
506 char *stesc
; /* start of an escape sequence ('\\') */
507 const char *stnam
; /* start of the name, after "[(*" */
508 const char *cp
; /* end of the name, e.g. before ']' */
509 const char *res
; /* the string to be substituted */
510 char *nbuf
; /* new buffer to copy bufp to */
511 size_t maxl
; /* expected length of the escape name */
512 size_t naml
; /* actual length of the escape name */
513 int expand_count
; /* to avoid infinite loops */
514 int npos
; /* position in numeric expression */
515 int arg_complete
; /* argument not interrupted by eol */
516 char term
; /* character terminating the escape */
520 stesc
= strchr(start
, '\0') - 1;
521 while (stesc
-- > start
) {
523 /* Search backwards for the next backslash. */
528 /* If it is escaped, skip it. */
530 for (cp
= stesc
- 1; cp
>= start
; cp
--)
534 if (0 == (stesc
- cp
) % 2) {
539 /* Decide whether to expand or to check only. */
556 if (ESCAPE_ERROR
== mandoc_escape(&cp
, NULL
, NULL
))
557 mandoc_vmsg(MANDOCERR_ESC_BAD
,
558 r
->parse
, ln
, (int)(stesc
- *bufp
),
559 "%.*s", (int)(cp
- stesc
), stesc
);
563 if (EXPAND_LIMIT
< ++expand_count
) {
564 mandoc_msg(MANDOCERR_ROFFLOOP
, r
->parse
,
565 ln
, (int)(stesc
- *bufp
), NULL
);
570 * The third character decides the length
571 * of the name of the string or register.
572 * Save a pointer to the name.
599 /* Advance to the end of the name. */
602 for (naml
= 0; 0 == maxl
|| naml
< maxl
; naml
++, cp
++) {
604 mandoc_msg(MANDOCERR_ESC_BAD
, r
->parse
,
605 ln
, (int)(stesc
- *bufp
), stesc
);
609 if (0 == maxl
&& *cp
== term
) {
616 * Retrieve the replacement string; if it is
617 * undefined, resume searching for escapes.
623 res
= roff_getstrn(r
, stnam
, naml
);
627 ubuf
[0] = arg_complete
&&
628 roff_evalnum(r
, ln
, stnam
, &npos
, NULL
, 0) &&
629 stnam
+ npos
+ 1 == cp
? '1' : '0';
634 (void)snprintf(ubuf
, sizeof(ubuf
), "%d",
635 roff_getregn(r
, stnam
, naml
));
640 /* use even incomplete args */
641 (void)snprintf(ubuf
, sizeof(ubuf
), "%d",
647 mandoc_vmsg(MANDOCERR_STR_UNDEF
,
648 r
->parse
, ln
, (int)(stesc
- *bufp
),
649 "%.*s", (int)naml
, stnam
);
653 /* Replace the escape sequence by the string. */
656 *szp
= mandoc_asprintf(&nbuf
, "%s%s%s",
659 /* Prepare for the next replacement. */
662 stesc
= nbuf
+ (stesc
- *bufp
) + strlen(res
);
670 * Process text streams:
671 * Convert all breakable hyphens into ASCII_HYPH.
672 * Decrement and spring input line trap.
675 roff_parsetext(char **bufp
, size_t *szp
, int pos
, int *offs
)
683 start
= p
= *bufp
+ pos
;
686 sz
= strcspn(p
, "-\\");
693 /* Skip over escapes. */
695 esc
= mandoc_escape((const char **)&p
, NULL
, NULL
);
696 if (ESCAPE_ERROR
== esc
)
699 } else if (p
== start
) {
704 if (isalpha((unsigned char)p
[-1]) &&
705 isalpha((unsigned char)p
[1]))
710 /* Spring the input line trap. */
711 if (1 == roffit_lines
) {
712 isz
= mandoc_asprintf(&p
, "%s\n.%s", *bufp
, roffit_macro
);
719 return(ROFF_REPARSE
);
720 } else if (1 < roffit_lines
)
726 roff_parseln(struct roff
*r
, int ln
, char **bufp
,
727 size_t *szp
, int pos
, int *offs
)
733 /* Handle in-line equation delimiters. */
735 if (r
->last_eqn
!= NULL
&& r
->last_eqn
->delim
&&
736 (r
->eqn
== NULL
|| r
->eqn_inline
)) {
737 e
= roff_eqndelim(r
, bufp
, szp
, pos
);
738 if (e
== ROFF_REPARSE
)
740 assert(e
== ROFF_CONT
);
743 /* Expand some escape sequences. */
745 e
= roff_res(r
, bufp
, szp
, ln
, pos
);
748 assert(ROFF_CONT
== e
);
751 ctl
= roff_getcontrol(r
, *bufp
, &pos
);
754 * First, if a scope is open and we're not a macro, pass the
755 * text through the macro's filter. If a scope isn't open and
756 * we're not a macro, just let it through.
757 * Finally, if there's an equation scope open, divert it into it
758 * no matter our state.
761 if (r
->last
&& ! ctl
) {
763 assert(roffs
[t
].text
);
764 e
= (*roffs
[t
].text
)(r
, t
, bufp
, szp
, ln
, pos
, pos
, offs
);
765 assert(ROFF_IGN
== e
|| ROFF_CONT
== e
);
770 return(eqn_read(&r
->eqn
, ln
, *bufp
, ppos
, offs
));
773 return(tbl_read(r
->tbl
, ln
, *bufp
, pos
));
774 return(roff_parsetext(bufp
, szp
, pos
, offs
));
777 /* Skip empty request lines. */
779 if ((*bufp
)[pos
] == '"') {
780 mandoc_msg(MANDOCERR_COMMENT_BAD
, r
->parse
,
783 } else if ((*bufp
)[pos
] == '\0')
787 * If a scope is open, go to the child handler for that macro,
788 * as it may want to preprocess before doing anything with it.
789 * Don't do so if an equation is open.
794 assert(roffs
[t
].sub
);
795 return((*roffs
[t
].sub
)(r
, t
, bufp
, szp
,
796 ln
, ppos
, pos
, offs
));
800 * Lastly, as we've no scope open, try to look up and execute
801 * the new macro. If no macro is found, simply return and let
802 * the compilers handle it.
805 if (ROFF_MAX
== (t
= roff_parse(r
, *bufp
, &pos
, ln
, ppos
)))
808 assert(roffs
[t
].proc
);
809 return((*roffs
[t
].proc
)(r
, t
, bufp
, szp
, ln
, ppos
, pos
, offs
));
813 roff_endparse(struct roff
*r
)
817 mandoc_msg(MANDOCERR_BLK_NOEND
, r
->parse
,
818 r
->last
->line
, r
->last
->col
,
819 roffs
[r
->last
->tok
].name
);
822 mandoc_msg(MANDOCERR_BLK_NOEND
, r
->parse
,
823 r
->eqn
->eqn
.ln
, r
->eqn
->eqn
.pos
, "EQ");
828 mandoc_msg(MANDOCERR_BLK_NOEND
, r
->parse
,
829 r
->tbl
->line
, r
->tbl
->pos
, "TS");
835 * Parse a roff node's type from the input buffer. This must be in the
836 * form of ".foo xxx" in the usual way.
839 roff_parse(struct roff
*r
, char *buf
, int *pos
, int ln
, int ppos
)
848 if ('\0' == *cp
|| '"' == *cp
|| '\t' == *cp
|| ' ' == *cp
)
852 maclen
= roff_getname(r
, &cp
, ln
, ppos
);
854 t
= (r
->current_string
= roff_getstrn(r
, mac
, maclen
))
855 ? ROFF_USERDEF
: roffhash_find(mac
, maclen
);
864 roff_cblock(ROFF_ARGS
)
868 * A block-close `..' should only be invoked as a child of an
869 * ignore macro, otherwise raise a warning and just ignore it.
872 if (NULL
== r
->last
) {
873 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, r
->parse
,
878 switch (r
->last
->tok
) {
880 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
885 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
892 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, r
->parse
,
898 mandoc_vmsg(MANDOCERR_ARG_SKIP
, r
->parse
, ln
, pos
,
899 ".. %s", *bufp
+ pos
);
902 roffnode_cleanscope(r
);
908 roffnode_cleanscope(struct roff
*r
)
912 if (--r
->last
->endspan
!= 0)
919 roff_ccond(struct roff
*r
, int ln
, int ppos
)
922 if (NULL
== r
->last
) {
923 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, r
->parse
,
928 switch (r
->last
->tok
) {
936 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, r
->parse
,
941 if (r
->last
->endspan
> -1) {
942 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, r
->parse
,
948 roffnode_cleanscope(r
);
953 roff_block(ROFF_ARGS
)
959 /* Ignore groff compatibility mode for now. */
963 else if (ROFF_am1
== tok
)
966 /* Parse the macro name argument. */
969 if (ROFF_ig
== tok
) {
974 namesz
= roff_getname(r
, &cp
, ln
, ppos
);
975 iname
[namesz
] = '\0';
978 /* Resolve the macro name argument if it is indirect. */
980 if (namesz
&& (ROFF_dei
== tok
|| ROFF_ami
== tok
)) {
981 if (NULL
== (name
= roff_getstrn(r
, iname
, namesz
))) {
982 mandoc_vmsg(MANDOCERR_STR_UNDEF
,
983 r
->parse
, ln
, (int)(iname
- *bufp
),
984 "%.*s", (int)namesz
, iname
);
987 namesz
= strlen(name
);
991 if (0 == namesz
&& ROFF_ig
!= tok
) {
992 mandoc_msg(MANDOCERR_REQ_EMPTY
, r
->parse
,
993 ln
, ppos
, roffs
[tok
].name
);
997 roffnode_push(r
, tok
, name
, ln
, ppos
);
1000 * At the beginning of a `de' macro, clear the existing string
1001 * with the same name, if there is one. New content will be
1002 * appended from roff_block_text() in multiline mode.
1005 if (ROFF_de
== tok
|| ROFF_dei
== tok
)
1006 roff_setstrn(&r
->strtab
, name
, namesz
, "", 0, 0);
1011 /* Get the custom end marker. */
1014 namesz
= roff_getname(r
, &cp
, ln
, ppos
);
1016 /* Resolve the end marker if it is indirect. */
1018 if (namesz
&& (ROFF_dei
== tok
|| ROFF_ami
== tok
)) {
1019 if (NULL
== (name
= roff_getstrn(r
, iname
, namesz
))) {
1020 mandoc_vmsg(MANDOCERR_STR_UNDEF
,
1021 r
->parse
, ln
, (int)(iname
- *bufp
),
1022 "%.*s", (int)namesz
, iname
);
1025 namesz
= strlen(name
);
1030 r
->last
->end
= mandoc_strndup(name
, namesz
);
1033 mandoc_vmsg(MANDOCERR_ARG_EXCESS
, r
->parse
,
1034 ln
, pos
, ".%s ... %s", roffs
[tok
].name
, cp
);
1040 roff_block_sub(ROFF_ARGS
)
1046 * First check whether a custom macro exists at this level. If
1047 * it does, then check against it. This is some of groff's
1048 * stranger behaviours. If we encountered a custom end-scope
1049 * tag and that tag also happens to be a "real" macro, then we
1050 * need to try interpreting it again as a real macro. If it's
1051 * not, then return ignore. Else continue.
1055 for (i
= pos
, j
= 0; r
->last
->end
[j
]; j
++, i
++)
1056 if ((*bufp
)[i
] != r
->last
->end
[j
])
1059 if ('\0' == r
->last
->end
[j
] &&
1060 ('\0' == (*bufp
)[i
] ||
1061 ' ' == (*bufp
)[i
] ||
1062 '\t' == (*bufp
)[i
])) {
1064 roffnode_cleanscope(r
);
1066 while (' ' == (*bufp
)[i
] || '\t' == (*bufp
)[i
])
1070 if (ROFF_MAX
!= roff_parse(r
, *bufp
, &pos
, ln
, ppos
))
1077 * If we have no custom end-query or lookup failed, then try
1078 * pulling it out of the hashtable.
1081 t
= roff_parse(r
, *bufp
, &pos
, ln
, ppos
);
1083 if (ROFF_cblock
!= t
) {
1085 roff_setstr(r
, r
->last
->name
, *bufp
+ ppos
, 2);
1089 assert(roffs
[t
].proc
);
1090 return((*roffs
[t
].proc
)(r
, t
, bufp
, szp
, ln
, ppos
, pos
, offs
));
1094 roff_block_text(ROFF_ARGS
)
1098 roff_setstr(r
, r
->last
->name
, *bufp
+ pos
, 2);
1104 roff_cond_sub(ROFF_ARGS
)
1111 roffnode_cleanscope(r
);
1112 t
= roff_parse(r
, *bufp
, &pos
, ln
, ppos
);
1115 * Fully handle known macros when they are structurally
1116 * required or when the conditional evaluated to true.
1119 if ((ROFF_MAX
!= t
) &&
1120 (rr
|| ROFFMAC_STRUCT
& roffs
[t
].flags
)) {
1121 assert(roffs
[t
].proc
);
1122 return((*roffs
[t
].proc
)(r
, t
, bufp
, szp
,
1123 ln
, ppos
, pos
, offs
));
1127 * If `\}' occurs on a macro line without a preceding macro,
1128 * drop the line completely.
1132 if ('\\' == ep
[0] && '}' == ep
[1])
1135 /* Always check for the closing delimiter `\}'. */
1137 while (NULL
!= (ep
= strchr(ep
, '\\'))) {
1138 if ('}' == *(++ep
)) {
1140 roff_ccond(r
, ln
, ep
- *bufp
- 1);
1144 return(rr
? ROFF_CONT
: ROFF_IGN
);
1148 roff_cond_text(ROFF_ARGS
)
1154 roffnode_cleanscope(r
);
1157 while (NULL
!= (ep
= strchr(ep
, '\\'))) {
1158 if ('}' == *(++ep
)) {
1160 roff_ccond(r
, ln
, ep
- *bufp
- 1);
1164 return(rr
? ROFF_CONT
: ROFF_IGN
);
1168 * Parse a single signed integer number. Stop at the first non-digit.
1169 * If there is at least one digit, return success and advance the
1170 * parse point, else return failure and let the parse point unchanged.
1171 * Ignore overflows, treat them just like the C language.
1174 roff_getnum(const char *v
, int *pos
, int *res
)
1186 for (*res
= 0; isdigit((unsigned char)v
[p
]); p
++)
1187 *res
= 10 * *res
+ v
[p
] - '0';
1199 * Evaluate a string comparison condition.
1200 * The first character is the delimiter.
1201 * Succeed if the string up to its second occurrence
1202 * matches the string up to its third occurence.
1203 * Advance the cursor after the third occurrence
1204 * or lacking that, to the end of the line.
1207 roff_evalstrcond(const char *v
, int *pos
)
1209 const char *s1
, *s2
, *s3
;
1213 s1
= v
+ *pos
; /* initial delimiter */
1214 s2
= s1
+ 1; /* for scanning the first string */
1215 s3
= strchr(s2
, *s1
); /* for scanning the second string */
1217 if (NULL
== s3
) /* found no middle delimiter */
1220 while ('\0' != *++s3
) {
1221 if (*s2
!= *s3
) { /* mismatch */
1222 s3
= strchr(s3
, *s1
);
1225 if (*s3
== *s1
) { /* found the final delimiter */
1234 s3
= strchr(s2
, '\0');
1242 * Evaluate an optionally negated single character, numerical,
1243 * or string condition.
1246 roff_evalcond(struct roff
*r
, int ln
, const char *v
, int *pos
)
1248 int wanttrue
, number
;
1250 if ('!' == v
[*pos
]) {
1277 if (roff_evalnum(r
, ln
, v
, pos
, &number
, 0))
1278 return((number
> 0) == wanttrue
);
1280 return(roff_evalstrcond(v
, pos
) == wanttrue
);
1284 roff_line_ignore(ROFF_ARGS
)
1291 roff_cond(ROFF_ARGS
)
1294 roffnode_push(r
, tok
, NULL
, ln
, ppos
);
1297 * An `.el' has no conditional body: it will consume the value
1298 * of the current rstack entry set in prior `ie' calls or
1301 * If we're not an `el', however, then evaluate the conditional.
1304 r
->last
->rule
= ROFF_el
== tok
?
1305 (r
->rstackpos
< 0 ? 0 : r
->rstack
[r
->rstackpos
--]) :
1306 roff_evalcond(r
, ln
, *bufp
, &pos
);
1309 * An if-else will put the NEGATION of the current evaluated
1310 * conditional into the stack of rules.
1313 if (ROFF_ie
== tok
) {
1314 if (r
->rstackpos
+ 1 == r
->rstacksz
) {
1316 r
->rstack
= mandoc_reallocarray(r
->rstack
,
1317 r
->rstacksz
, sizeof(int));
1319 r
->rstack
[++r
->rstackpos
] = !r
->last
->rule
;
1322 /* If the parent has false as its rule, then so do we. */
1324 if (r
->last
->parent
&& !r
->last
->parent
->rule
)
1329 * If there is nothing on the line after the conditional,
1330 * not even whitespace, use next-line scope.
1333 if ('\0' == (*bufp
)[pos
]) {
1334 r
->last
->endspan
= 2;
1338 while (' ' == (*bufp
)[pos
])
1341 /* An opening brace requests multiline scope. */
1343 if ('\\' == (*bufp
)[pos
] && '{' == (*bufp
)[pos
+ 1]) {
1344 r
->last
->endspan
= -1;
1350 * Anything else following the conditional causes
1351 * single-line scope. Warn if the scope contains
1352 * nothing but trailing whitespace.
1355 if ('\0' == (*bufp
)[pos
])
1356 mandoc_msg(MANDOCERR_COND_EMPTY
, r
->parse
,
1357 ln
, ppos
, roffs
[tok
].name
);
1359 r
->last
->endspan
= 1;
1374 * The first word is the name of the string.
1375 * If it is empty or terminated by an escape sequence,
1376 * abort the `ds' request without defining anything.
1379 name
= string
= *bufp
+ pos
;
1383 namesz
= roff_getname(r
, &string
, ln
, pos
);
1384 if ('\\' == name
[namesz
])
1387 /* Read past the initial double-quote, if any. */
1391 /* The rest is the value. */
1392 roff_setstrn(&r
->strtab
, name
, namesz
, string
, strlen(string
),
1398 * Parse a single operator, one or two characters long.
1399 * If the operator is recognized, return success and advance the
1400 * parse point, else return failure and let the parse point unchanged.
1403 roff_getop(const char *v
, int *pos
, char *res
)
1424 switch (v
[*pos
+ 1]) {
1442 switch (v
[*pos
+ 1]) {
1456 if ('=' == v
[*pos
+ 1])
1468 * Evaluate either a parenthesized numeric expression
1469 * or a single signed integer number.
1472 roff_evalpar(struct roff
*r
, int ln
,
1473 const char *v
, int *pos
, int *res
)
1477 return(roff_getnum(v
, pos
, res
));
1480 if ( ! roff_evalnum(r
, ln
, v
, pos
, res
, 1))
1484 * Omission of the closing parenthesis
1485 * is an error in validation mode,
1486 * but ignored in evaluation mode.
1491 else if (NULL
== res
)
1498 * Evaluate a complete numeric expression.
1499 * Proceed left to right, there is no concept of precedence.
1502 roff_evalnum(struct roff
*r
, int ln
, const char *v
,
1503 int *pos
, int *res
, int skipwhite
)
1505 int mypos
, operand2
;
1514 while (isspace((unsigned char)v
[*pos
]))
1517 if ( ! roff_evalpar(r
, ln
, v
, pos
, res
))
1522 while (isspace((unsigned char)v
[*pos
]))
1525 if ( ! roff_getop(v
, pos
, &operator))
1529 while (isspace((unsigned char)v
[*pos
]))
1532 if ( ! roff_evalpar(r
, ln
, v
, pos
, &operand2
))
1536 while (isspace((unsigned char)v
[*pos
]))
1553 if (0 == operand2
) {
1554 mandoc_msg(MANDOCERR_DIVZERO
,
1555 r
->parse
, ln
, *pos
, v
);
1565 *res
= *res
< operand2
;
1568 *res
= *res
> operand2
;
1571 *res
= *res
<= operand2
;
1574 *res
= *res
>= operand2
;
1577 *res
= *res
== operand2
;
1580 *res
= *res
!= operand2
;
1583 *res
= *res
&& operand2
;
1586 *res
= *res
|| operand2
;
1589 if (operand2
< *res
)
1593 if (operand2
> *res
)
1604 roff_setreg(struct roff
*r
, const char *name
, int val
, char sign
)
1606 struct roffreg
*reg
;
1608 /* Search for an existing register with the same name. */
1611 while (reg
&& strcmp(name
, reg
->key
.p
))
1615 /* Create a new register. */
1616 reg
= mandoc_malloc(sizeof(struct roffreg
));
1617 reg
->key
.p
= mandoc_strdup(name
);
1618 reg
->key
.sz
= strlen(name
);
1620 reg
->next
= r
->regtab
;
1626 else if ('-' == sign
)
1633 * Handle some predefined read-only number registers.
1634 * For now, return -1 if the requested register is not predefined;
1635 * in case a predefined read-only register having the value -1
1636 * were to turn up, another special value would have to be chosen.
1639 roff_getregro(const char *name
)
1643 case 'A': /* ASCII approximation mode is always off. */
1645 case 'g': /* Groff compatibility mode is always on. */
1647 case 'H': /* Fixed horizontal resolution. */
1649 case 'j': /* Always adjust left margin only. */
1651 case 'T': /* Some output device is always defined. */
1653 case 'V': /* Fixed vertical resolution. */
1661 roff_getreg(const struct roff
*r
, const char *name
)
1663 struct roffreg
*reg
;
1666 if ('.' == name
[0] && '\0' != name
[1] && '\0' == name
[2]) {
1667 val
= roff_getregro(name
+ 1);
1672 for (reg
= r
->regtab
; reg
; reg
= reg
->next
)
1673 if (0 == strcmp(name
, reg
->key
.p
))
1680 roff_getregn(const struct roff
*r
, const char *name
, size_t len
)
1682 struct roffreg
*reg
;
1685 if ('.' == name
[0] && 2 == len
) {
1686 val
= roff_getregro(name
+ 1);
1691 for (reg
= r
->regtab
; reg
; reg
= reg
->next
)
1692 if (len
== reg
->key
.sz
&&
1693 0 == strncmp(name
, reg
->key
.p
, len
))
1700 roff_freereg(struct roffreg
*reg
)
1702 struct roffreg
*old_reg
;
1704 while (NULL
!= reg
) {
1720 key
= val
= *bufp
+ pos
;
1724 keysz
= roff_getname(r
, &val
, ln
, pos
);
1725 if ('\\' == key
[keysz
])
1730 if ('+' == sign
|| '-' == sign
)
1733 if (roff_evalnum(r
, ln
, val
, NULL
, &iv
, 0))
1734 roff_setreg(r
, key
, iv
, sign
);
1742 struct roffreg
*reg
, **prev
;
1746 name
= cp
= *bufp
+ pos
;
1749 namesz
= roff_getname(r
, &cp
, ln
, pos
);
1750 name
[namesz
] = '\0';
1755 if (NULL
== reg
|| !strcmp(name
, reg
->key
.p
))
1775 while ('\0' != *cp
) {
1777 namesz
= roff_getname(r
, &cp
, ln
, (int)(cp
- *bufp
));
1778 roff_setstrn(&r
->strtab
, name
, namesz
, NULL
, 0, 0);
1779 if ('\\' == name
[namesz
])
1792 /* Parse the number of lines. */
1794 len
= strcspn(cp
, " \t");
1796 if ((iv
= mandoc_strntoi(cp
, len
, 10)) <= 0) {
1797 mandoc_msg(MANDOCERR_IT_NONUM
, r
->parse
,
1798 ln
, ppos
, *bufp
+ 1);
1803 /* Arm the input line trap. */
1805 roffit_macro
= mandoc_strdup(cp
);
1812 const char *const *cp
;
1814 if ((r
->options
& (MPARSE_MDOC
| MPARSE_QUICK
)) == 0)
1815 for (cp
= __mdoc_reserved
; *cp
; cp
++)
1816 roff_setstr(r
, *cp
, NULL
, 0);
1819 r
->format
= MPARSE_MDOC
;
1827 const char *const *cp
;
1829 if ((r
->options
& MPARSE_QUICK
) == 0)
1830 for (cp
= __man_reserved
; *cp
; cp
++)
1831 roff_setstr(r
, *cp
, NULL
, 0);
1834 r
->format
= MPARSE_MAN
;
1844 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, r
->parse
,
1857 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, r
->parse
,
1860 tbl_restart(ppos
, ln
, r
->tbl
);
1866 * Handle in-line equation delimiters.
1869 roff_eqndelim(struct roff
*r
, char **bufp
, size_t *szp
, int pos
)
1872 const char *bef_pr
, *bef_nl
, *mac
, *aft_nl
, *aft_pr
;
1875 * Outside equations, look for an opening delimiter.
1876 * If we are inside an equation, we already know it is
1877 * in-line, or this function wouldn't have been called;
1878 * so look for a closing delimiter.
1882 cp2
= strchr(cp1
, r
->eqn
== NULL
?
1883 r
->last_eqn
->odelim
: r
->last_eqn
->cdelim
);
1888 bef_pr
= bef_nl
= aft_nl
= aft_pr
= "";
1890 /* Handle preceding text, protecting whitespace. */
1892 if (**bufp
!= '\0') {
1899 * Prepare replacing the delimiter with an equation macro
1900 * and drop leading white space from the equation.
1903 if (r
->eqn
== NULL
) {
1910 /* Handle following text, protecting whitespace. */
1918 /* Do the actual replacement. */
1920 *szp
= mandoc_asprintf(&cp1
, "%s%s%s%s%s%s%s", *bufp
,
1921 bef_pr
, bef_nl
, mac
, aft_nl
, aft_pr
, cp2
) + 1;
1925 /* Toggle the in-line state of the eqn subsystem. */
1927 r
->eqn_inline
= r
->eqn
== NULL
;
1928 return(ROFF_REPARSE
);
1932 roff_openeqn(struct roff
*r
, const char *name
, int line
,
1933 int offs
, const char *buf
)
1938 assert(NULL
== r
->eqn
);
1939 e
= eqn_alloc(name
, offs
, line
, r
->parse
);
1942 r
->last_eqn
->next
= e
;
1943 e
->delim
= r
->last_eqn
->delim
;
1944 e
->odelim
= r
->last_eqn
->odelim
;
1945 e
->cdelim
= r
->last_eqn
->cdelim
;
1947 r
->first_eqn
= r
->last_eqn
= e
;
1949 r
->eqn
= r
->last_eqn
= e
;
1953 eqn_read(&r
->eqn
, line
, buf
, offs
, &poff
);
1961 roff_openeqn(r
, *bufp
+ pos
, ln
, ppos
, NULL
);
1969 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, r
->parse
, ln
, ppos
, "EN");
1976 struct tbl_node
*tbl
;
1979 mandoc_msg(MANDOCERR_BLK_BROKEN
, r
->parse
,
1980 ln
, ppos
, "TS breaks TS");
1984 tbl
= tbl_alloc(ppos
, ln
, r
->parse
);
1987 r
->last_tbl
->next
= tbl
;
1989 r
->first_tbl
= r
->last_tbl
= tbl
;
1991 r
->tbl
= r
->last_tbl
= tbl
;
2002 if ('\0' == *p
|| '.' == (r
->control
= *p
++))
2006 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
, ln
, ppos
, NULL
);
2014 const char *p
, *first
, *second
;
2016 enum mandoc_esc esc
;
2021 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
, ln
, ppos
, NULL
);
2025 while ('\0' != *p
) {
2029 if ('\\' == *first
) {
2030 esc
= mandoc_escape(&p
, NULL
, NULL
);
2031 if (ESCAPE_ERROR
== esc
) {
2032 mandoc_msg(MANDOCERR_ESC_BAD
, r
->parse
,
2033 ln
, (int)(p
- *bufp
), first
);
2036 fsz
= (size_t)(p
- first
);
2040 if ('\\' == *second
) {
2041 esc
= mandoc_escape(&p
, NULL
, NULL
);
2042 if (ESCAPE_ERROR
== esc
) {
2043 mandoc_msg(MANDOCERR_ESC_BAD
, r
->parse
,
2044 ln
, (int)(p
- *bufp
), second
);
2047 ssz
= (size_t)(p
- second
);
2048 } else if ('\0' == *second
) {
2049 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
,
2050 ln
, (int)(p
- *bufp
), NULL
);
2056 roff_setstrn(&r
->xmbtab
, first
, fsz
,
2061 if (NULL
== r
->xtab
)
2062 r
->xtab
= mandoc_calloc(128,
2063 sizeof(struct roffstr
));
2065 free(r
->xtab
[(int)*first
].p
);
2066 r
->xtab
[(int)*first
].p
= mandoc_strndup(second
, ssz
);
2067 r
->xtab
[(int)*first
].sz
= ssz
;
2079 mandoc_vmsg(MANDOCERR_SO
, r
->parse
, ln
, ppos
, "so %s", name
);
2082 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
2083 * opening anything that's not in our cwd or anything beneath
2084 * it. Thus, explicitly disallow traversing up the file-system
2085 * or using absolute paths.
2088 if ('/' == *name
|| strstr(name
, "../") || strstr(name
, "/..")) {
2089 mandoc_vmsg(MANDOCERR_SO_PATH
, r
->parse
, ln
, ppos
,
2099 roff_userdef(ROFF_ARGS
)
2106 * Collect pointers to macro argument strings
2107 * and NUL-terminate them.
2110 for (i
= 0; i
< 9; i
++)
2111 arg
[i
] = '\0' == *cp
? "" :
2112 mandoc_getarg(r
->parse
, &cp
, ln
, &pos
);
2115 * Expand macro arguments.
2118 n1
= cp
= mandoc_strdup(r
->current_string
);
2119 while (NULL
!= (cp
= strstr(cp
, "\\$"))) {
2121 if (0 > i
|| 8 < i
) {
2122 /* Not an argument invocation. */
2127 *szp
= mandoc_asprintf(&n2
, "%s%s%s",
2128 n1
, arg
[i
], cp
+ 3) + 1;
2129 cp
= n2
+ (cp
- n1
);
2135 * Replace the macro invocation
2136 * by the expanded macro.
2141 *szp
= strlen(*bufp
) + 1;
2143 return(*szp
> 1 && '\n' == (*bufp
)[(int)*szp
- 2] ?
2144 ROFF_REPARSE
: ROFF_APPEND
);
2148 roff_getname(struct roff
*r
, char **cpp
, int ln
, int pos
)
2157 /* Read until end of name and terminate it with NUL. */
2158 for (cp
= name
; 1; cp
++) {
2159 if ('\0' == *cp
|| ' ' == *cp
) {
2166 if ('{' == cp
[1] || '}' == cp
[1])
2171 mandoc_vmsg(MANDOCERR_NAMESC
, r
->parse
, ln
, pos
,
2172 "%.*s", (int)(cp
- name
+ 1), name
);
2173 mandoc_escape((const char **)&cp
, NULL
, NULL
);
2177 /* Read past spaces. */
2186 * Store *string into the user-defined string called *name.
2187 * To clear an existing entry, call with (*r, *name, NULL, 0).
2188 * append == 0: replace mode
2189 * append == 1: single-line append mode
2190 * append == 2: multiline append mode, append '\n' after each call
2193 roff_setstr(struct roff
*r
, const char *name
, const char *string
,
2197 roff_setstrn(&r
->strtab
, name
, strlen(name
), string
,
2198 string
? strlen(string
) : 0, append
);
2202 roff_setstrn(struct roffkv
**r
, const char *name
, size_t namesz
,
2203 const char *string
, size_t stringsz
, int append
)
2208 size_t oldch
, newch
;
2210 /* Search for an existing string with the same name. */
2213 while (n
&& (namesz
!= n
->key
.sz
||
2214 strncmp(n
->key
.p
, name
, namesz
)))
2218 /* Create a new string table entry. */
2219 n
= mandoc_malloc(sizeof(struct roffkv
));
2220 n
->key
.p
= mandoc_strndup(name
, namesz
);
2226 } else if (0 == append
) {
2236 * One additional byte for the '\n' in multiline mode,
2237 * and one for the terminating '\0'.
2239 newch
= stringsz
+ (1 < append
? 2u : 1u);
2241 if (NULL
== n
->val
.p
) {
2242 n
->val
.p
= mandoc_malloc(newch
);
2247 n
->val
.p
= mandoc_realloc(n
->val
.p
, oldch
+ newch
);
2250 /* Skip existing content in the destination buffer. */
2251 c
= n
->val
.p
+ (int)oldch
;
2253 /* Append new content to the destination buffer. */
2255 while (i
< (int)stringsz
) {
2257 * Rudimentary roff copy mode:
2258 * Handle escaped backslashes.
2260 if ('\\' == string
[i
] && '\\' == string
[i
+ 1])
2265 /* Append terminating bytes. */
2270 n
->val
.sz
= (int)(c
- n
->val
.p
);
2274 roff_getstrn(const struct roff
*r
, const char *name
, size_t len
)
2276 const struct roffkv
*n
;
2279 for (n
= r
->strtab
; n
; n
= n
->next
)
2280 if (0 == strncmp(name
, n
->key
.p
, len
) &&
2281 '\0' == n
->key
.p
[(int)len
])
2284 for (i
= 0; i
< PREDEFS_MAX
; i
++)
2285 if (0 == strncmp(name
, predefs
[i
].name
, len
) &&
2286 '\0' == predefs
[i
].name
[(int)len
])
2287 return(predefs
[i
].str
);
2293 roff_freestr(struct roffkv
*r
)
2295 struct roffkv
*n
, *nn
;
2297 for (n
= r
; n
; n
= nn
) {
2305 const struct tbl_span
*
2306 roff_span(const struct roff
*r
)
2309 return(r
->tbl
? tbl_span(r
->tbl
) : NULL
);
2313 roff_eqn(const struct roff
*r
)
2316 return(r
->last_eqn
? &r
->last_eqn
->eqn
: NULL
);
2320 * Duplicate an input string, making the appropriate character
2321 * conversations (as stipulated by `tr') along the way.
2322 * Returns a heap-allocated string with all the replacements made.
2325 roff_strdup(const struct roff
*r
, const char *p
)
2327 const struct roffkv
*cp
;
2331 enum mandoc_esc esc
;
2333 if (NULL
== r
->xmbtab
&& NULL
== r
->xtab
)
2334 return(mandoc_strdup(p
));
2335 else if ('\0' == *p
)
2336 return(mandoc_strdup(""));
2339 * Step through each character looking for term matches
2340 * (remember that a `tr' can be invoked with an escape, which is
2341 * a glyph but the escape is multi-character).
2342 * We only do this if the character hash has been initialised
2343 * and the string is >0 length.
2349 while ('\0' != *p
) {
2350 if ('\\' != *p
&& r
->xtab
&& r
->xtab
[(int)*p
].p
) {
2351 sz
= r
->xtab
[(int)*p
].sz
;
2352 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
2353 memcpy(res
+ ssz
, r
->xtab
[(int)*p
].p
, sz
);
2357 } else if ('\\' != *p
) {
2358 res
= mandoc_realloc(res
, ssz
+ 2);
2363 /* Search for term matches. */
2364 for (cp
= r
->xmbtab
; cp
; cp
= cp
->next
)
2365 if (0 == strncmp(p
, cp
->key
.p
, cp
->key
.sz
))
2370 * A match has been found.
2371 * Append the match to the array and move
2372 * forward by its keysize.
2374 res
= mandoc_realloc(res
,
2375 ssz
+ cp
->val
.sz
+ 1);
2376 memcpy(res
+ ssz
, cp
->val
.p
, cp
->val
.sz
);
2378 p
+= (int)cp
->key
.sz
;
2383 * Handle escapes carefully: we need to copy
2384 * over just the escape itself, or else we might
2385 * do replacements within the escape itself.
2386 * Make sure to pass along the bogus string.
2389 esc
= mandoc_escape(&p
, NULL
, NULL
);
2390 if (ESCAPE_ERROR
== esc
) {
2392 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
2393 memcpy(res
+ ssz
, pp
, sz
);
2397 * We bail out on bad escapes.
2398 * No need to warn: we already did so when
2399 * roff_res() was called.
2402 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
2403 memcpy(res
+ ssz
, pp
, sz
);
2407 res
[(int)ssz
] = '\0';
2412 roff_getformat(const struct roff
*r
)
2419 * Find out whether a line is a macro line or not.
2420 * If it is, adjust the current position and return one; if it isn't,
2421 * return zero and don't change the current position.
2422 * If the control character has been set with `.cc', then let that grain
2424 * This is slighly contrary to groff, where using the non-breaking
2425 * control character when `cc' has been invoked will cause the
2426 * non-breaking macro contents to be printed verbatim.
2429 roff_getcontrol(const struct roff
*r
, const char *cp
, int *ppos
)
2435 if (0 != r
->control
&& cp
[pos
] == r
->control
)
2437 else if (0 != r
->control
)
2439 else if ('\\' == cp
[pos
] && '.' == cp
[pos
+ 1])
2441 else if ('.' == cp
[pos
] || '\'' == cp
[pos
])
2446 while (' ' == cp
[pos
] || '\t' == cp
[pos
])