]>
git.cameronkatri.com Git - mandoc.git/blob - roff.c
1 /* $Id: roff.c,v 1.236 2014/10/25 15:23:56 schwarze Exp $ */
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
20 #include <sys/types.h>
29 #include "mandoc_aux.h"
30 #include "libmandoc.h"
33 /* Maximum number of nested if-else conditionals. */
34 #define RSTACK_MAX 128
36 /* Maximum number of string expansions per line, to break infinite loops. */
37 #define EXPAND_LIMIT 1000
83 * An incredibly-simple string buffer.
86 char *p
; /* nil-terminated buffer */
87 size_t sz
; /* saved strlen(p) */
91 * A key-value roffstr pair as part of a singly-linked list.
96 struct roffkv
*next
; /* next in list */
100 * A single number register as part of a singly-linked list.
105 struct roffreg
*next
;
109 struct mparse
*parse
; /* parse point */
110 struct roffnode
*last
; /* leaf of stack */
111 int *rstack
; /* stack of inverted `ie' values */
112 struct roffreg
*regtab
; /* number registers */
113 struct roffkv
*strtab
; /* user-defined strings & macros */
114 struct roffkv
*xmbtab
; /* multi-byte trans table (`tr') */
115 struct roffstr
*xtab
; /* single-byte trans table (`tr') */
116 const char *current_string
; /* value of last called user macro */
117 struct tbl_node
*first_tbl
; /* first table parsed */
118 struct tbl_node
*last_tbl
; /* last table parsed */
119 struct tbl_node
*tbl
; /* current table being parsed */
120 struct eqn_node
*last_eqn
; /* last equation parsed */
121 struct eqn_node
*first_eqn
; /* first equation parsed */
122 struct eqn_node
*eqn
; /* current equation being parsed */
123 int eqn_inline
; /* current equation is inline */
124 int options
; /* parse options */
125 int rstacksz
; /* current size limit of rstack */
126 int rstackpos
; /* position in rstack */
127 int format
; /* current file in mdoc or man format */
128 char control
; /* control character */
132 enum rofft tok
; /* type of node */
133 struct roffnode
*parent
; /* up one in stack */
134 int line
; /* parse line */
135 int col
; /* parse col */
136 char *name
; /* node name, e.g. macro name */
137 char *end
; /* end-rules: custom token */
138 int endspan
; /* end-rules: next-line or infty */
139 int rule
; /* current evaluation rule */
142 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
143 enum rofft tok, /* tok of macro */ \
144 char **bufp, /* input buffer */ \
145 size_t *szp, /* size of input buffer */ \
146 int ln, /* parse line */ \
147 int ppos, /* original pos in buffer */ \
148 int pos, /* current pos in buffer */ \
149 int *offs /* reset offset of buffer data */
151 typedef enum rofferr (*roffproc
)(ROFF_ARGS
);
154 const char *name
; /* macro name */
155 roffproc proc
; /* process new macro */
156 roffproc text
; /* process as child text of macro */
157 roffproc sub
; /* process as child of macro */
159 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
160 struct roffmac
*next
;
164 const char *name
; /* predefined input name */
165 const char *str
; /* replacement symbol */
168 #define PREDEF(__name, __str) \
169 { (__name), (__str) },
171 static enum rofft
roffhash_find(const char *, size_t);
172 static void roffhash_init(void);
173 static void roffnode_cleanscope(struct roff
*);
174 static void roffnode_pop(struct roff
*);
175 static void roffnode_push(struct roff
*, enum rofft
,
176 const char *, int, int);
177 static enum rofferr
roff_block(ROFF_ARGS
);
178 static enum rofferr
roff_block_text(ROFF_ARGS
);
179 static enum rofferr
roff_block_sub(ROFF_ARGS
);
180 static enum rofferr
roff_cblock(ROFF_ARGS
);
181 static enum rofferr
roff_cc(ROFF_ARGS
);
182 static void roff_ccond(struct roff
*, int, int);
183 static enum rofferr
roff_cond(ROFF_ARGS
);
184 static enum rofferr
roff_cond_text(ROFF_ARGS
);
185 static enum rofferr
roff_cond_sub(ROFF_ARGS
);
186 static enum rofferr
roff_ds(ROFF_ARGS
);
187 static enum rofferr
roff_eqndelim(struct roff
*,
188 char **, size_t *, int);
189 static int roff_evalcond(struct roff
*r
, int,
190 const char *, int *);
191 static int roff_evalnum(struct roff
*, int,
192 const char *, int *, int *, int);
193 static int roff_evalpar(struct roff
*, int,
194 const char *, int *, int *);
195 static int roff_evalstrcond(const char *, int *);
196 static void roff_free1(struct roff
*);
197 static void roff_freereg(struct roffreg
*);
198 static void roff_freestr(struct roffkv
*);
199 static size_t roff_getname(struct roff
*, char **, int, int);
200 static int roff_getnum(const char *, int *, int *);
201 static int roff_getop(const char *, int *, char *);
202 static int roff_getregn(const struct roff
*,
203 const char *, size_t);
204 static int roff_getregro(const char *name
);
205 static const char *roff_getstrn(const struct roff
*,
206 const char *, size_t);
207 static enum rofferr
roff_it(ROFF_ARGS
);
208 static enum rofferr
roff_line_ignore(ROFF_ARGS
);
209 static enum rofferr
roff_nr(ROFF_ARGS
);
210 static enum rofft
roff_parse(struct roff
*, char *, int *,
212 static enum rofferr
roff_parsetext(char **, size_t *, int, int *);
213 static enum rofferr
roff_res(struct roff
*,
214 char **, size_t *, int, int);
215 static enum rofferr
roff_rm(ROFF_ARGS
);
216 static enum rofferr
roff_rr(ROFF_ARGS
);
217 static void roff_setstr(struct roff
*,
218 const char *, const char *, int);
219 static void roff_setstrn(struct roffkv
**, const char *,
220 size_t, const char *, size_t, int);
221 static enum rofferr
roff_so(ROFF_ARGS
);
222 static enum rofferr
roff_tr(ROFF_ARGS
);
223 static enum rofferr
roff_Dd(ROFF_ARGS
);
224 static enum rofferr
roff_TH(ROFF_ARGS
);
225 static enum rofferr
roff_TE(ROFF_ARGS
);
226 static enum rofferr
roff_TS(ROFF_ARGS
);
227 static enum rofferr
roff_EQ(ROFF_ARGS
);
228 static enum rofferr
roff_EN(ROFF_ARGS
);
229 static enum rofferr
roff_T_(ROFF_ARGS
);
230 static enum rofferr
roff_userdef(ROFF_ARGS
);
232 /* See roffhash_find() */
236 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
238 static struct roffmac
*hash
[HASHWIDTH
];
240 static struct roffmac roffs
[ROFF_MAX
] = {
241 { "ad", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
242 { "am", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
243 { "ami", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
244 { "am1", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
245 { "as", roff_ds
, NULL
, NULL
, 0, NULL
},
246 { "cc", roff_cc
, NULL
, NULL
, 0, NULL
},
247 { "ce", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
248 { "de", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
249 { "dei", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
250 { "de1", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
251 { "ds", roff_ds
, NULL
, NULL
, 0, NULL
},
252 { "el", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
253 { "fam", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
254 { "hw", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
255 { "hy", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
256 { "ie", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
257 { "if", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
258 { "ig", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
259 { "it", roff_it
, NULL
, NULL
, 0, NULL
},
260 { "ne", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
261 { "nh", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
262 { "nr", roff_nr
, NULL
, NULL
, 0, NULL
},
263 { "ns", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
264 { "pl", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
265 { "ps", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
266 { "rm", roff_rm
, NULL
, NULL
, 0, NULL
},
267 { "rr", roff_rr
, NULL
, NULL
, 0, NULL
},
268 { "so", roff_so
, NULL
, NULL
, 0, NULL
},
269 { "ta", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
270 { "tr", roff_tr
, NULL
, NULL
, 0, NULL
},
271 { "Dd", roff_Dd
, NULL
, NULL
, 0, NULL
},
272 { "TH", roff_TH
, NULL
, NULL
, 0, NULL
},
273 { "TS", roff_TS
, NULL
, NULL
, 0, NULL
},
274 { "TE", roff_TE
, NULL
, NULL
, 0, NULL
},
275 { "T&", roff_T_
, NULL
, NULL
, 0, NULL
},
276 { "EQ", roff_EQ
, NULL
, NULL
, 0, NULL
},
277 { "EN", roff_EN
, NULL
, NULL
, 0, NULL
},
278 { ".", roff_cblock
, NULL
, NULL
, 0, NULL
},
279 { NULL
, roff_userdef
, NULL
, NULL
, 0, NULL
},
282 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
283 const char *const __mdoc_reserved
[] = {
284 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
285 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
286 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
287 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
288 "Dt", "Dv", "Dx", "D1",
289 "Ec", "Ed", "Ef", "Ek", "El", "Em",
290 "En", "Eo", "Er", "Es", "Ev", "Ex",
291 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
292 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
293 "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
294 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
295 "Pa", "Pc", "Pf", "Po", "Pp", "Pq",
296 "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
297 "Sc", "Sh", "Sm", "So", "Sq",
298 "Ss", "St", "Sx", "Sy",
299 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
300 "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
301 "%P", "%Q", "%R", "%T", "%U", "%V",
305 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
306 const char *const __man_reserved
[] = {
307 "AT", "B", "BI", "BR", "DT",
308 "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
309 "LP", "OP", "P", "PD", "PP",
310 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
311 "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
315 /* Array of injected predefined strings. */
316 #define PREDEFS_MAX 38
317 static const struct predef predefs
[PREDEFS_MAX
] = {
318 #include "predefs.in"
321 /* See roffhash_find() */
322 #define ROFF_HASH(p) (p[0] - ASCII_LO)
324 static int roffit_lines
; /* number of lines to delay */
325 static char *roffit_macro
; /* nil-terminated macro line */
334 for (i
= 0; i
< (int)ROFF_USERDEF
; i
++) {
335 assert(roffs
[i
].name
[0] >= ASCII_LO
);
336 assert(roffs
[i
].name
[0] <= ASCII_HI
);
338 buc
= ROFF_HASH(roffs
[i
].name
);
340 if (NULL
!= (n
= hash
[buc
])) {
341 for ( ; n
->next
; n
= n
->next
)
345 hash
[buc
] = &roffs
[i
];
350 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
351 * the nil-terminated string name could be found.
354 roffhash_find(const char *p
, size_t s
)
360 * libroff has an extremely simple hashtable, for the time
361 * being, which simply keys on the first character, which must
362 * be printable, then walks a chain. It works well enough until
366 if (p
[0] < ASCII_LO
|| p
[0] > ASCII_HI
)
371 if (NULL
== (n
= hash
[buc
]))
373 for ( ; n
; n
= n
->next
)
374 if (0 == strncmp(n
->name
, p
, s
) && '\0' == n
->name
[(int)s
])
375 return((enum rofft
)(n
- roffs
));
381 * Pop the current node off of the stack of roff instructions currently
385 roffnode_pop(struct roff
*r
)
392 r
->last
= r
->last
->parent
;
399 * Push a roff node onto the instruction stack. This must later be
400 * removed with roffnode_pop().
403 roffnode_push(struct roff
*r
, enum rofft tok
, const char *name
,
408 p
= mandoc_calloc(1, sizeof(struct roffnode
));
411 p
->name
= mandoc_strdup(name
);
415 p
->rule
= p
->parent
? p
->parent
->rule
: 0;
421 roff_free1(struct roff
*r
)
423 struct tbl_node
*tbl
;
427 while (NULL
!= (tbl
= r
->first_tbl
)) {
428 r
->first_tbl
= tbl
->next
;
431 r
->first_tbl
= r
->last_tbl
= r
->tbl
= NULL
;
433 while (NULL
!= (e
= r
->first_eqn
)) {
434 r
->first_eqn
= e
->next
;
437 r
->first_eqn
= r
->last_eqn
= r
->eqn
= NULL
;
447 roff_freereg(r
->regtab
);
450 roff_freestr(r
->strtab
);
451 roff_freestr(r
->xmbtab
);
452 r
->strtab
= r
->xmbtab
= NULL
;
455 for (i
= 0; i
< 128; i
++)
462 roff_reset(struct roff
*r
)
466 r
->format
= r
->options
& (MPARSE_MDOC
| MPARSE_MAN
);
471 roff_free(struct roff
*r
)
479 roff_alloc(struct mparse
*parse
, int options
)
483 r
= mandoc_calloc(1, sizeof(struct roff
));
485 r
->options
= options
;
486 r
->format
= options
& (MPARSE_MDOC
| MPARSE_MAN
);
495 * In the current line, expand escape sequences that tend to get
496 * used in numerical expressions and conditional requests.
497 * Also check the syntax of the remaining escape sequences.
500 roff_res(struct roff
*r
, char **bufp
, size_t *szp
, int ln
, int pos
)
502 char ubuf
[24]; /* buffer to print the number */
503 const char *start
; /* start of the string to process */
504 char *stesc
; /* start of an escape sequence ('\\') */
505 const char *stnam
; /* start of the name, after "[(*" */
506 const char *cp
; /* end of the name, e.g. before ']' */
507 const char *res
; /* the string to be substituted */
508 char *nbuf
; /* new buffer to copy bufp to */
509 size_t maxl
; /* expected length of the escape name */
510 size_t naml
; /* actual length of the escape name */
511 int expand_count
; /* to avoid infinite loops */
512 int npos
; /* position in numeric expression */
513 int arg_complete
; /* argument not interrupted by eol */
514 char term
; /* character terminating the escape */
518 stesc
= strchr(start
, '\0') - 1;
519 while (stesc
-- > start
) {
521 /* Search backwards for the next backslash. */
526 /* If it is escaped, skip it. */
528 for (cp
= stesc
- 1; cp
>= start
; cp
--)
532 if (0 == (stesc
- cp
) % 2) {
537 /* Decide whether to expand or to check only. */
554 if (ESCAPE_ERROR
== mandoc_escape(&cp
, NULL
, NULL
))
555 mandoc_vmsg(MANDOCERR_ESC_BAD
,
556 r
->parse
, ln
, (int)(stesc
- *bufp
),
557 "%.*s", (int)(cp
- stesc
), stesc
);
561 if (EXPAND_LIMIT
< ++expand_count
) {
562 mandoc_msg(MANDOCERR_ROFFLOOP
, r
->parse
,
563 ln
, (int)(stesc
- *bufp
), NULL
);
568 * The third character decides the length
569 * of the name of the string or register.
570 * Save a pointer to the name.
597 /* Advance to the end of the name. */
600 for (naml
= 0; 0 == maxl
|| naml
< maxl
; naml
++, cp
++) {
602 mandoc_msg(MANDOCERR_ESC_BAD
, r
->parse
,
603 ln
, (int)(stesc
- *bufp
), stesc
);
607 if (0 == maxl
&& *cp
== term
) {
614 * Retrieve the replacement string; if it is
615 * undefined, resume searching for escapes.
621 res
= roff_getstrn(r
, stnam
, naml
);
625 ubuf
[0] = arg_complete
&&
626 roff_evalnum(r
, ln
, stnam
, &npos
, NULL
, 0) &&
627 stnam
+ npos
+ 1 == cp
? '1' : '0';
632 (void)snprintf(ubuf
, sizeof(ubuf
), "%d",
633 roff_getregn(r
, stnam
, naml
));
638 /* use even incomplete args */
639 (void)snprintf(ubuf
, sizeof(ubuf
), "%d",
645 mandoc_vmsg(MANDOCERR_STR_UNDEF
,
646 r
->parse
, ln
, (int)(stesc
- *bufp
),
647 "%.*s", (int)naml
, stnam
);
651 /* Replace the escape sequence by the string. */
654 *szp
= mandoc_asprintf(&nbuf
, "%s%s%s",
657 /* Prepare for the next replacement. */
660 stesc
= nbuf
+ (stesc
- *bufp
) + strlen(res
);
668 * Process text streams:
669 * Convert all breakable hyphens into ASCII_HYPH.
670 * Decrement and spring input line trap.
673 roff_parsetext(char **bufp
, size_t *szp
, int pos
, int *offs
)
681 start
= p
= *bufp
+ pos
;
684 sz
= strcspn(p
, "-\\");
691 /* Skip over escapes. */
693 esc
= mandoc_escape((const char **)&p
, NULL
, NULL
);
694 if (ESCAPE_ERROR
== esc
)
697 } else if (p
== start
) {
702 if (isalpha((unsigned char)p
[-1]) &&
703 isalpha((unsigned char)p
[1]))
708 /* Spring the input line trap. */
709 if (1 == roffit_lines
) {
710 isz
= mandoc_asprintf(&p
, "%s\n.%s", *bufp
, roffit_macro
);
717 return(ROFF_REPARSE
);
718 } else if (1 < roffit_lines
)
724 roff_parseln(struct roff
*r
, int ln
, char **bufp
,
725 size_t *szp
, int pos
, int *offs
)
731 /* Handle in-line equation delimiters. */
733 if (r
->tbl
== NULL
&&
734 r
->last_eqn
!= NULL
&& r
->last_eqn
->delim
&&
735 (r
->eqn
== NULL
|| r
->eqn_inline
)) {
736 e
= roff_eqndelim(r
, bufp
, szp
, pos
);
737 if (e
== ROFF_REPARSE
)
739 assert(e
== ROFF_CONT
);
742 /* Expand some escape sequences. */
744 e
= roff_res(r
, bufp
, szp
, ln
, pos
);
747 assert(ROFF_CONT
== e
);
750 ctl
= roff_getcontrol(r
, *bufp
, &pos
);
753 * First, if a scope is open and we're not a macro, pass the
754 * text through the macro's filter. If a scope isn't open and
755 * we're not a macro, just let it through.
756 * Finally, if there's an equation scope open, divert it into it
757 * no matter our state.
760 if (r
->last
&& ! ctl
) {
762 assert(roffs
[t
].text
);
763 e
= (*roffs
[t
].text
)(r
, t
, bufp
, szp
, ln
, pos
, pos
, offs
);
764 assert(ROFF_IGN
== e
|| ROFF_CONT
== e
);
769 return(eqn_read(&r
->eqn
, ln
, *bufp
, ppos
, offs
));
772 return(tbl_read(r
->tbl
, ln
, *bufp
, pos
));
773 return(roff_parsetext(bufp
, szp
, pos
, offs
));
776 /* Skip empty request lines. */
778 if ((*bufp
)[pos
] == '"') {
779 mandoc_msg(MANDOCERR_COMMENT_BAD
, r
->parse
,
782 } else if ((*bufp
)[pos
] == '\0')
786 * If a scope is open, go to the child handler for that macro,
787 * as it may want to preprocess before doing anything with it.
788 * Don't do so if an equation is open.
793 assert(roffs
[t
].sub
);
794 return((*roffs
[t
].sub
)(r
, t
, bufp
, szp
,
795 ln
, ppos
, pos
, offs
));
799 * Lastly, as we've no scope open, try to look up and execute
800 * the new macro. If no macro is found, simply return and let
801 * the compilers handle it.
804 if (ROFF_MAX
== (t
= roff_parse(r
, *bufp
, &pos
, ln
, ppos
)))
807 assert(roffs
[t
].proc
);
808 return((*roffs
[t
].proc
)(r
, t
, bufp
, szp
, ln
, ppos
, pos
, offs
));
812 roff_endparse(struct roff
*r
)
816 mandoc_msg(MANDOCERR_BLK_NOEND
, r
->parse
,
817 r
->last
->line
, r
->last
->col
,
818 roffs
[r
->last
->tok
].name
);
821 mandoc_msg(MANDOCERR_BLK_NOEND
, r
->parse
,
822 r
->eqn
->eqn
.ln
, r
->eqn
->eqn
.pos
, "EQ");
827 mandoc_msg(MANDOCERR_BLK_NOEND
, r
->parse
,
828 r
->tbl
->line
, r
->tbl
->pos
, "TS");
834 * Parse a roff node's type from the input buffer. This must be in the
835 * form of ".foo xxx" in the usual way.
838 roff_parse(struct roff
*r
, char *buf
, int *pos
, int ln
, int ppos
)
847 if ('\0' == *cp
|| '"' == *cp
|| '\t' == *cp
|| ' ' == *cp
)
851 maclen
= roff_getname(r
, &cp
, ln
, ppos
);
853 t
= (r
->current_string
= roff_getstrn(r
, mac
, maclen
))
854 ? ROFF_USERDEF
: roffhash_find(mac
, maclen
);
863 roff_cblock(ROFF_ARGS
)
867 * A block-close `..' should only be invoked as a child of an
868 * ignore macro, otherwise raise a warning and just ignore it.
871 if (NULL
== r
->last
) {
872 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, r
->parse
,
877 switch (r
->last
->tok
) {
879 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
884 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
891 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, r
->parse
,
897 mandoc_vmsg(MANDOCERR_ARG_SKIP
, r
->parse
, ln
, pos
,
898 ".. %s", *bufp
+ pos
);
901 roffnode_cleanscope(r
);
907 roffnode_cleanscope(struct roff
*r
)
911 if (--r
->last
->endspan
!= 0)
918 roff_ccond(struct roff
*r
, int ln
, int ppos
)
921 if (NULL
== r
->last
) {
922 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, r
->parse
,
927 switch (r
->last
->tok
) {
935 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, r
->parse
,
940 if (r
->last
->endspan
> -1) {
941 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, r
->parse
,
947 roffnode_cleanscope(r
);
952 roff_block(ROFF_ARGS
)
958 /* Ignore groff compatibility mode for now. */
962 else if (ROFF_am1
== tok
)
965 /* Parse the macro name argument. */
968 if (ROFF_ig
== tok
) {
973 namesz
= roff_getname(r
, &cp
, ln
, ppos
);
974 iname
[namesz
] = '\0';
977 /* Resolve the macro name argument if it is indirect. */
979 if (namesz
&& (ROFF_dei
== tok
|| ROFF_ami
== tok
)) {
980 if (NULL
== (name
= roff_getstrn(r
, iname
, namesz
))) {
981 mandoc_vmsg(MANDOCERR_STR_UNDEF
,
982 r
->parse
, ln
, (int)(iname
- *bufp
),
983 "%.*s", (int)namesz
, iname
);
986 namesz
= strlen(name
);
990 if (0 == namesz
&& ROFF_ig
!= tok
) {
991 mandoc_msg(MANDOCERR_REQ_EMPTY
, r
->parse
,
992 ln
, ppos
, roffs
[tok
].name
);
996 roffnode_push(r
, tok
, name
, ln
, ppos
);
999 * At the beginning of a `de' macro, clear the existing string
1000 * with the same name, if there is one. New content will be
1001 * appended from roff_block_text() in multiline mode.
1004 if (ROFF_de
== tok
|| ROFF_dei
== tok
)
1005 roff_setstrn(&r
->strtab
, name
, namesz
, "", 0, 0);
1010 /* Get the custom end marker. */
1013 namesz
= roff_getname(r
, &cp
, ln
, ppos
);
1015 /* Resolve the end marker if it is indirect. */
1017 if (namesz
&& (ROFF_dei
== tok
|| ROFF_ami
== tok
)) {
1018 if (NULL
== (name
= roff_getstrn(r
, iname
, namesz
))) {
1019 mandoc_vmsg(MANDOCERR_STR_UNDEF
,
1020 r
->parse
, ln
, (int)(iname
- *bufp
),
1021 "%.*s", (int)namesz
, iname
);
1024 namesz
= strlen(name
);
1029 r
->last
->end
= mandoc_strndup(name
, namesz
);
1032 mandoc_vmsg(MANDOCERR_ARG_EXCESS
, r
->parse
,
1033 ln
, pos
, ".%s ... %s", roffs
[tok
].name
, cp
);
1039 roff_block_sub(ROFF_ARGS
)
1045 * First check whether a custom macro exists at this level. If
1046 * it does, then check against it. This is some of groff's
1047 * stranger behaviours. If we encountered a custom end-scope
1048 * tag and that tag also happens to be a "real" macro, then we
1049 * need to try interpreting it again as a real macro. If it's
1050 * not, then return ignore. Else continue.
1054 for (i
= pos
, j
= 0; r
->last
->end
[j
]; j
++, i
++)
1055 if ((*bufp
)[i
] != r
->last
->end
[j
])
1058 if ('\0' == r
->last
->end
[j
] &&
1059 ('\0' == (*bufp
)[i
] ||
1060 ' ' == (*bufp
)[i
] ||
1061 '\t' == (*bufp
)[i
])) {
1063 roffnode_cleanscope(r
);
1065 while (' ' == (*bufp
)[i
] || '\t' == (*bufp
)[i
])
1069 if (ROFF_MAX
!= roff_parse(r
, *bufp
, &pos
, ln
, ppos
))
1076 * If we have no custom end-query or lookup failed, then try
1077 * pulling it out of the hashtable.
1080 t
= roff_parse(r
, *bufp
, &pos
, ln
, ppos
);
1082 if (ROFF_cblock
!= t
) {
1084 roff_setstr(r
, r
->last
->name
, *bufp
+ ppos
, 2);
1088 assert(roffs
[t
].proc
);
1089 return((*roffs
[t
].proc
)(r
, t
, bufp
, szp
, ln
, ppos
, pos
, offs
));
1093 roff_block_text(ROFF_ARGS
)
1097 roff_setstr(r
, r
->last
->name
, *bufp
+ pos
, 2);
1103 roff_cond_sub(ROFF_ARGS
)
1110 roffnode_cleanscope(r
);
1111 t
= roff_parse(r
, *bufp
, &pos
, ln
, ppos
);
1114 * Fully handle known macros when they are structurally
1115 * required or when the conditional evaluated to true.
1118 if ((ROFF_MAX
!= t
) &&
1119 (rr
|| ROFFMAC_STRUCT
& roffs
[t
].flags
)) {
1120 assert(roffs
[t
].proc
);
1121 return((*roffs
[t
].proc
)(r
, t
, bufp
, szp
,
1122 ln
, ppos
, pos
, offs
));
1126 * If `\}' occurs on a macro line without a preceding macro,
1127 * drop the line completely.
1131 if ('\\' == ep
[0] && '}' == ep
[1])
1134 /* Always check for the closing delimiter `\}'. */
1136 while (NULL
!= (ep
= strchr(ep
, '\\'))) {
1137 if ('}' == *(++ep
)) {
1139 roff_ccond(r
, ln
, ep
- *bufp
- 1);
1143 return(rr
? ROFF_CONT
: ROFF_IGN
);
1147 roff_cond_text(ROFF_ARGS
)
1153 roffnode_cleanscope(r
);
1156 while (NULL
!= (ep
= strchr(ep
, '\\'))) {
1157 if ('}' == *(++ep
)) {
1159 roff_ccond(r
, ln
, ep
- *bufp
- 1);
1163 return(rr
? ROFF_CONT
: ROFF_IGN
);
1167 * Parse a single signed integer number. Stop at the first non-digit.
1168 * If there is at least one digit, return success and advance the
1169 * parse point, else return failure and let the parse point unchanged.
1170 * Ignore overflows, treat them just like the C language.
1173 roff_getnum(const char *v
, int *pos
, int *res
)
1185 for (*res
= 0; isdigit((unsigned char)v
[p
]); p
++)
1186 *res
= 10 * *res
+ v
[p
] - '0';
1198 * Evaluate a string comparison condition.
1199 * The first character is the delimiter.
1200 * Succeed if the string up to its second occurrence
1201 * matches the string up to its third occurence.
1202 * Advance the cursor after the third occurrence
1203 * or lacking that, to the end of the line.
1206 roff_evalstrcond(const char *v
, int *pos
)
1208 const char *s1
, *s2
, *s3
;
1212 s1
= v
+ *pos
; /* initial delimiter */
1213 s2
= s1
+ 1; /* for scanning the first string */
1214 s3
= strchr(s2
, *s1
); /* for scanning the second string */
1216 if (NULL
== s3
) /* found no middle delimiter */
1219 while ('\0' != *++s3
) {
1220 if (*s2
!= *s3
) { /* mismatch */
1221 s3
= strchr(s3
, *s1
);
1224 if (*s3
== *s1
) { /* found the final delimiter */
1233 s3
= strchr(s2
, '\0');
1241 * Evaluate an optionally negated single character, numerical,
1242 * or string condition.
1245 roff_evalcond(struct roff
*r
, int ln
, const char *v
, int *pos
)
1247 int wanttrue
, number
;
1249 if ('!' == v
[*pos
]) {
1276 if (roff_evalnum(r
, ln
, v
, pos
, &number
, 0))
1277 return((number
> 0) == wanttrue
);
1279 return(roff_evalstrcond(v
, pos
) == wanttrue
);
1283 roff_line_ignore(ROFF_ARGS
)
1290 roff_cond(ROFF_ARGS
)
1293 roffnode_push(r
, tok
, NULL
, ln
, ppos
);
1296 * An `.el' has no conditional body: it will consume the value
1297 * of the current rstack entry set in prior `ie' calls or
1300 * If we're not an `el', however, then evaluate the conditional.
1303 r
->last
->rule
= ROFF_el
== tok
?
1304 (r
->rstackpos
< 0 ? 0 : r
->rstack
[r
->rstackpos
--]) :
1305 roff_evalcond(r
, ln
, *bufp
, &pos
);
1308 * An if-else will put the NEGATION of the current evaluated
1309 * conditional into the stack of rules.
1312 if (ROFF_ie
== tok
) {
1313 if (r
->rstackpos
+ 1 == r
->rstacksz
) {
1315 r
->rstack
= mandoc_reallocarray(r
->rstack
,
1316 r
->rstacksz
, sizeof(int));
1318 r
->rstack
[++r
->rstackpos
] = !r
->last
->rule
;
1321 /* If the parent has false as its rule, then so do we. */
1323 if (r
->last
->parent
&& !r
->last
->parent
->rule
)
1328 * If there is nothing on the line after the conditional,
1329 * not even whitespace, use next-line scope.
1332 if ('\0' == (*bufp
)[pos
]) {
1333 r
->last
->endspan
= 2;
1337 while (' ' == (*bufp
)[pos
])
1340 /* An opening brace requests multiline scope. */
1342 if ('\\' == (*bufp
)[pos
] && '{' == (*bufp
)[pos
+ 1]) {
1343 r
->last
->endspan
= -1;
1349 * Anything else following the conditional causes
1350 * single-line scope. Warn if the scope contains
1351 * nothing but trailing whitespace.
1354 if ('\0' == (*bufp
)[pos
])
1355 mandoc_msg(MANDOCERR_COND_EMPTY
, r
->parse
,
1356 ln
, ppos
, roffs
[tok
].name
);
1358 r
->last
->endspan
= 1;
1373 * The first word is the name of the string.
1374 * If it is empty or terminated by an escape sequence,
1375 * abort the `ds' request without defining anything.
1378 name
= string
= *bufp
+ pos
;
1382 namesz
= roff_getname(r
, &string
, ln
, pos
);
1383 if ('\\' == name
[namesz
])
1386 /* Read past the initial double-quote, if any. */
1390 /* The rest is the value. */
1391 roff_setstrn(&r
->strtab
, name
, namesz
, string
, strlen(string
),
1397 * Parse a single operator, one or two characters long.
1398 * If the operator is recognized, return success and advance the
1399 * parse point, else return failure and let the parse point unchanged.
1402 roff_getop(const char *v
, int *pos
, char *res
)
1423 switch (v
[*pos
+ 1]) {
1441 switch (v
[*pos
+ 1]) {
1455 if ('=' == v
[*pos
+ 1])
1467 * Evaluate either a parenthesized numeric expression
1468 * or a single signed integer number.
1471 roff_evalpar(struct roff
*r
, int ln
,
1472 const char *v
, int *pos
, int *res
)
1476 return(roff_getnum(v
, pos
, res
));
1479 if ( ! roff_evalnum(r
, ln
, v
, pos
, res
, 1))
1483 * Omission of the closing parenthesis
1484 * is an error in validation mode,
1485 * but ignored in evaluation mode.
1490 else if (NULL
== res
)
1497 * Evaluate a complete numeric expression.
1498 * Proceed left to right, there is no concept of precedence.
1501 roff_evalnum(struct roff
*r
, int ln
, const char *v
,
1502 int *pos
, int *res
, int skipwhite
)
1504 int mypos
, operand2
;
1513 while (isspace((unsigned char)v
[*pos
]))
1516 if ( ! roff_evalpar(r
, ln
, v
, pos
, res
))
1521 while (isspace((unsigned char)v
[*pos
]))
1524 if ( ! roff_getop(v
, pos
, &operator))
1528 while (isspace((unsigned char)v
[*pos
]))
1531 if ( ! roff_evalpar(r
, ln
, v
, pos
, &operand2
))
1535 while (isspace((unsigned char)v
[*pos
]))
1552 if (0 == operand2
) {
1553 mandoc_msg(MANDOCERR_DIVZERO
,
1554 r
->parse
, ln
, *pos
, v
);
1564 *res
= *res
< operand2
;
1567 *res
= *res
> operand2
;
1570 *res
= *res
<= operand2
;
1573 *res
= *res
>= operand2
;
1576 *res
= *res
== operand2
;
1579 *res
= *res
!= operand2
;
1582 *res
= *res
&& operand2
;
1585 *res
= *res
|| operand2
;
1588 if (operand2
< *res
)
1592 if (operand2
> *res
)
1603 roff_setreg(struct roff
*r
, const char *name
, int val
, char sign
)
1605 struct roffreg
*reg
;
1607 /* Search for an existing register with the same name. */
1610 while (reg
&& strcmp(name
, reg
->key
.p
))
1614 /* Create a new register. */
1615 reg
= mandoc_malloc(sizeof(struct roffreg
));
1616 reg
->key
.p
= mandoc_strdup(name
);
1617 reg
->key
.sz
= strlen(name
);
1619 reg
->next
= r
->regtab
;
1625 else if ('-' == sign
)
1632 * Handle some predefined read-only number registers.
1633 * For now, return -1 if the requested register is not predefined;
1634 * in case a predefined read-only register having the value -1
1635 * were to turn up, another special value would have to be chosen.
1638 roff_getregro(const char *name
)
1642 case 'A': /* ASCII approximation mode is always off. */
1644 case 'g': /* Groff compatibility mode is always on. */
1646 case 'H': /* Fixed horizontal resolution. */
1648 case 'j': /* Always adjust left margin only. */
1650 case 'T': /* Some output device is always defined. */
1652 case 'V': /* Fixed vertical resolution. */
1660 roff_getreg(const struct roff
*r
, const char *name
)
1662 struct roffreg
*reg
;
1665 if ('.' == name
[0] && '\0' != name
[1] && '\0' == name
[2]) {
1666 val
= roff_getregro(name
+ 1);
1671 for (reg
= r
->regtab
; reg
; reg
= reg
->next
)
1672 if (0 == strcmp(name
, reg
->key
.p
))
1679 roff_getregn(const struct roff
*r
, const char *name
, size_t len
)
1681 struct roffreg
*reg
;
1684 if ('.' == name
[0] && 2 == len
) {
1685 val
= roff_getregro(name
+ 1);
1690 for (reg
= r
->regtab
; reg
; reg
= reg
->next
)
1691 if (len
== reg
->key
.sz
&&
1692 0 == strncmp(name
, reg
->key
.p
, len
))
1699 roff_freereg(struct roffreg
*reg
)
1701 struct roffreg
*old_reg
;
1703 while (NULL
!= reg
) {
1719 key
= val
= *bufp
+ pos
;
1723 keysz
= roff_getname(r
, &val
, ln
, pos
);
1724 if ('\\' == key
[keysz
])
1729 if ('+' == sign
|| '-' == sign
)
1732 if (roff_evalnum(r
, ln
, val
, NULL
, &iv
, 0))
1733 roff_setreg(r
, key
, iv
, sign
);
1741 struct roffreg
*reg
, **prev
;
1745 name
= cp
= *bufp
+ pos
;
1748 namesz
= roff_getname(r
, &cp
, ln
, pos
);
1749 name
[namesz
] = '\0';
1754 if (NULL
== reg
|| !strcmp(name
, reg
->key
.p
))
1774 while ('\0' != *cp
) {
1776 namesz
= roff_getname(r
, &cp
, ln
, (int)(cp
- *bufp
));
1777 roff_setstrn(&r
->strtab
, name
, namesz
, NULL
, 0, 0);
1778 if ('\\' == name
[namesz
])
1791 /* Parse the number of lines. */
1793 len
= strcspn(cp
, " \t");
1795 if ((iv
= mandoc_strntoi(cp
, len
, 10)) <= 0) {
1796 mandoc_msg(MANDOCERR_IT_NONUM
, r
->parse
,
1797 ln
, ppos
, *bufp
+ 1);
1802 /* Arm the input line trap. */
1804 roffit_macro
= mandoc_strdup(cp
);
1811 const char *const *cp
;
1813 if ((r
->options
& (MPARSE_MDOC
| MPARSE_QUICK
)) == 0)
1814 for (cp
= __mdoc_reserved
; *cp
; cp
++)
1815 roff_setstr(r
, *cp
, NULL
, 0);
1818 r
->format
= MPARSE_MDOC
;
1826 const char *const *cp
;
1828 if ((r
->options
& MPARSE_QUICK
) == 0)
1829 for (cp
= __man_reserved
; *cp
; cp
++)
1830 roff_setstr(r
, *cp
, NULL
, 0);
1833 r
->format
= MPARSE_MAN
;
1843 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, r
->parse
,
1856 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, r
->parse
,
1859 tbl_restart(ppos
, ln
, r
->tbl
);
1865 * Handle in-line equation delimiters.
1868 roff_eqndelim(struct roff
*r
, char **bufp
, size_t *szp
, int pos
)
1871 const char *bef_pr
, *bef_nl
, *mac
, *aft_nl
, *aft_pr
;
1874 * Outside equations, look for an opening delimiter.
1875 * If we are inside an equation, we already know it is
1876 * in-line, or this function wouldn't have been called;
1877 * so look for a closing delimiter.
1881 cp2
= strchr(cp1
, r
->eqn
== NULL
?
1882 r
->last_eqn
->odelim
: r
->last_eqn
->cdelim
);
1887 bef_pr
= bef_nl
= aft_nl
= aft_pr
= "";
1889 /* Handle preceding text, protecting whitespace. */
1891 if (**bufp
!= '\0') {
1898 * Prepare replacing the delimiter with an equation macro
1899 * and drop leading white space from the equation.
1902 if (r
->eqn
== NULL
) {
1909 /* Handle following text, protecting whitespace. */
1917 /* Do the actual replacement. */
1919 *szp
= mandoc_asprintf(&cp1
, "%s%s%s%s%s%s%s", *bufp
,
1920 bef_pr
, bef_nl
, mac
, aft_nl
, aft_pr
, cp2
) + 1;
1924 /* Toggle the in-line state of the eqn subsystem. */
1926 r
->eqn_inline
= r
->eqn
== NULL
;
1927 return(ROFF_REPARSE
);
1935 assert(NULL
== r
->eqn
);
1936 e
= eqn_alloc(ppos
, ln
, r
->parse
);
1939 r
->last_eqn
->next
= e
;
1940 e
->delim
= r
->last_eqn
->delim
;
1941 e
->odelim
= r
->last_eqn
->odelim
;
1942 e
->cdelim
= r
->last_eqn
->cdelim
;
1944 r
->first_eqn
= r
->last_eqn
= e
;
1946 r
->eqn
= r
->last_eqn
= e
;
1949 mandoc_vmsg(MANDOCERR_ARG_SKIP
, r
->parse
, ln
, pos
,
1950 ".EQ %s", *bufp
+ pos
);
1959 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, r
->parse
, ln
, ppos
, "EN");
1966 struct tbl_node
*tbl
;
1969 mandoc_msg(MANDOCERR_BLK_BROKEN
, r
->parse
,
1970 ln
, ppos
, "TS breaks TS");
1974 tbl
= tbl_alloc(ppos
, ln
, r
->parse
);
1977 r
->last_tbl
->next
= tbl
;
1979 r
->first_tbl
= r
->last_tbl
= tbl
;
1981 r
->tbl
= r
->last_tbl
= tbl
;
1992 if ('\0' == *p
|| '.' == (r
->control
= *p
++))
1996 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
, ln
, ppos
, NULL
);
2004 const char *p
, *first
, *second
;
2006 enum mandoc_esc esc
;
2011 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
, ln
, ppos
, NULL
);
2015 while ('\0' != *p
) {
2019 if ('\\' == *first
) {
2020 esc
= mandoc_escape(&p
, NULL
, NULL
);
2021 if (ESCAPE_ERROR
== esc
) {
2022 mandoc_msg(MANDOCERR_ESC_BAD
, r
->parse
,
2023 ln
, (int)(p
- *bufp
), first
);
2026 fsz
= (size_t)(p
- first
);
2030 if ('\\' == *second
) {
2031 esc
= mandoc_escape(&p
, NULL
, NULL
);
2032 if (ESCAPE_ERROR
== esc
) {
2033 mandoc_msg(MANDOCERR_ESC_BAD
, r
->parse
,
2034 ln
, (int)(p
- *bufp
), second
);
2037 ssz
= (size_t)(p
- second
);
2038 } else if ('\0' == *second
) {
2039 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
,
2040 ln
, (int)(p
- *bufp
), NULL
);
2046 roff_setstrn(&r
->xmbtab
, first
, fsz
,
2051 if (NULL
== r
->xtab
)
2052 r
->xtab
= mandoc_calloc(128,
2053 sizeof(struct roffstr
));
2055 free(r
->xtab
[(int)*first
].p
);
2056 r
->xtab
[(int)*first
].p
= mandoc_strndup(second
, ssz
);
2057 r
->xtab
[(int)*first
].sz
= ssz
;
2069 mandoc_vmsg(MANDOCERR_SO
, r
->parse
, ln
, ppos
, "so %s", name
);
2072 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
2073 * opening anything that's not in our cwd or anything beneath
2074 * it. Thus, explicitly disallow traversing up the file-system
2075 * or using absolute paths.
2078 if ('/' == *name
|| strstr(name
, "../") || strstr(name
, "/..")) {
2079 mandoc_vmsg(MANDOCERR_SO_PATH
, r
->parse
, ln
, ppos
,
2089 roff_userdef(ROFF_ARGS
)
2096 * Collect pointers to macro argument strings
2097 * and NUL-terminate them.
2100 for (i
= 0; i
< 9; i
++)
2101 arg
[i
] = '\0' == *cp
? "" :
2102 mandoc_getarg(r
->parse
, &cp
, ln
, &pos
);
2105 * Expand macro arguments.
2108 n1
= cp
= mandoc_strdup(r
->current_string
);
2109 while (NULL
!= (cp
= strstr(cp
, "\\$"))) {
2111 if (0 > i
|| 8 < i
) {
2112 /* Not an argument invocation. */
2117 *szp
= mandoc_asprintf(&n2
, "%s%s%s",
2118 n1
, arg
[i
], cp
+ 3) + 1;
2119 cp
= n2
+ (cp
- n1
);
2125 * Replace the macro invocation
2126 * by the expanded macro.
2131 *szp
= strlen(*bufp
) + 1;
2133 return(*szp
> 1 && '\n' == (*bufp
)[(int)*szp
- 2] ?
2134 ROFF_REPARSE
: ROFF_APPEND
);
2138 roff_getname(struct roff
*r
, char **cpp
, int ln
, int pos
)
2147 /* Read until end of name and terminate it with NUL. */
2148 for (cp
= name
; 1; cp
++) {
2149 if ('\0' == *cp
|| ' ' == *cp
) {
2156 if ('{' == cp
[1] || '}' == cp
[1])
2161 mandoc_vmsg(MANDOCERR_NAMESC
, r
->parse
, ln
, pos
,
2162 "%.*s", (int)(cp
- name
+ 1), name
);
2163 mandoc_escape((const char **)&cp
, NULL
, NULL
);
2167 /* Read past spaces. */
2176 * Store *string into the user-defined string called *name.
2177 * To clear an existing entry, call with (*r, *name, NULL, 0).
2178 * append == 0: replace mode
2179 * append == 1: single-line append mode
2180 * append == 2: multiline append mode, append '\n' after each call
2183 roff_setstr(struct roff
*r
, const char *name
, const char *string
,
2187 roff_setstrn(&r
->strtab
, name
, strlen(name
), string
,
2188 string
? strlen(string
) : 0, append
);
2192 roff_setstrn(struct roffkv
**r
, const char *name
, size_t namesz
,
2193 const char *string
, size_t stringsz
, int append
)
2198 size_t oldch
, newch
;
2200 /* Search for an existing string with the same name. */
2203 while (n
&& (namesz
!= n
->key
.sz
||
2204 strncmp(n
->key
.p
, name
, namesz
)))
2208 /* Create a new string table entry. */
2209 n
= mandoc_malloc(sizeof(struct roffkv
));
2210 n
->key
.p
= mandoc_strndup(name
, namesz
);
2216 } else if (0 == append
) {
2226 * One additional byte for the '\n' in multiline mode,
2227 * and one for the terminating '\0'.
2229 newch
= stringsz
+ (1 < append
? 2u : 1u);
2231 if (NULL
== n
->val
.p
) {
2232 n
->val
.p
= mandoc_malloc(newch
);
2237 n
->val
.p
= mandoc_realloc(n
->val
.p
, oldch
+ newch
);
2240 /* Skip existing content in the destination buffer. */
2241 c
= n
->val
.p
+ (int)oldch
;
2243 /* Append new content to the destination buffer. */
2245 while (i
< (int)stringsz
) {
2247 * Rudimentary roff copy mode:
2248 * Handle escaped backslashes.
2250 if ('\\' == string
[i
] && '\\' == string
[i
+ 1])
2255 /* Append terminating bytes. */
2260 n
->val
.sz
= (int)(c
- n
->val
.p
);
2264 roff_getstrn(const struct roff
*r
, const char *name
, size_t len
)
2266 const struct roffkv
*n
;
2269 for (n
= r
->strtab
; n
; n
= n
->next
)
2270 if (0 == strncmp(name
, n
->key
.p
, len
) &&
2271 '\0' == n
->key
.p
[(int)len
])
2274 for (i
= 0; i
< PREDEFS_MAX
; i
++)
2275 if (0 == strncmp(name
, predefs
[i
].name
, len
) &&
2276 '\0' == predefs
[i
].name
[(int)len
])
2277 return(predefs
[i
].str
);
2283 roff_freestr(struct roffkv
*r
)
2285 struct roffkv
*n
, *nn
;
2287 for (n
= r
; n
; n
= nn
) {
2295 const struct tbl_span
*
2296 roff_span(const struct roff
*r
)
2299 return(r
->tbl
? tbl_span(r
->tbl
) : NULL
);
2303 roff_eqn(const struct roff
*r
)
2306 return(r
->last_eqn
? &r
->last_eqn
->eqn
: NULL
);
2310 * Duplicate an input string, making the appropriate character
2311 * conversations (as stipulated by `tr') along the way.
2312 * Returns a heap-allocated string with all the replacements made.
2315 roff_strdup(const struct roff
*r
, const char *p
)
2317 const struct roffkv
*cp
;
2321 enum mandoc_esc esc
;
2323 if (NULL
== r
->xmbtab
&& NULL
== r
->xtab
)
2324 return(mandoc_strdup(p
));
2325 else if ('\0' == *p
)
2326 return(mandoc_strdup(""));
2329 * Step through each character looking for term matches
2330 * (remember that a `tr' can be invoked with an escape, which is
2331 * a glyph but the escape is multi-character).
2332 * We only do this if the character hash has been initialised
2333 * and the string is >0 length.
2339 while ('\0' != *p
) {
2340 if ('\\' != *p
&& r
->xtab
&& r
->xtab
[(int)*p
].p
) {
2341 sz
= r
->xtab
[(int)*p
].sz
;
2342 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
2343 memcpy(res
+ ssz
, r
->xtab
[(int)*p
].p
, sz
);
2347 } else if ('\\' != *p
) {
2348 res
= mandoc_realloc(res
, ssz
+ 2);
2353 /* Search for term matches. */
2354 for (cp
= r
->xmbtab
; cp
; cp
= cp
->next
)
2355 if (0 == strncmp(p
, cp
->key
.p
, cp
->key
.sz
))
2360 * A match has been found.
2361 * Append the match to the array and move
2362 * forward by its keysize.
2364 res
= mandoc_realloc(res
,
2365 ssz
+ cp
->val
.sz
+ 1);
2366 memcpy(res
+ ssz
, cp
->val
.p
, cp
->val
.sz
);
2368 p
+= (int)cp
->key
.sz
;
2373 * Handle escapes carefully: we need to copy
2374 * over just the escape itself, or else we might
2375 * do replacements within the escape itself.
2376 * Make sure to pass along the bogus string.
2379 esc
= mandoc_escape(&p
, NULL
, NULL
);
2380 if (ESCAPE_ERROR
== esc
) {
2382 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
2383 memcpy(res
+ ssz
, pp
, sz
);
2387 * We bail out on bad escapes.
2388 * No need to warn: we already did so when
2389 * roff_res() was called.
2392 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
2393 memcpy(res
+ ssz
, pp
, sz
);
2397 res
[(int)ssz
] = '\0';
2402 roff_getformat(const struct roff
*r
)
2409 * Find out whether a line is a macro line or not.
2410 * If it is, adjust the current position and return one; if it isn't,
2411 * return zero and don't change the current position.
2412 * If the control character has been set with `.cc', then let that grain
2414 * This is slighly contrary to groff, where using the non-breaking
2415 * control character when `cc' has been invoked will cause the
2416 * non-breaking macro contents to be printed verbatim.
2419 roff_getcontrol(const struct roff
*r
, const char *cp
, int *ppos
)
2425 if (0 != r
->control
&& cp
[pos
] == r
->control
)
2427 else if (0 != r
->control
)
2429 else if ('\\' == cp
[pos
] && '.' == cp
[pos
+ 1])
2431 else if ('.' == cp
[pos
] || '\'' == cp
[pos
])
2436 while (' ' == cp
[pos
] || '\t' == cp
[pos
])