]>
git.cameronkatri.com Git - mandoc.git/blob - roff.c
1 /* $Id: roff.c,v 1.237 2014/10/28 17:36:19 schwarze Exp $ */
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
20 #include <sys/types.h>
29 #include "mandoc_aux.h"
30 #include "libmandoc.h"
33 /* Maximum number of nested if-else conditionals. */
34 #define RSTACK_MAX 128
36 /* Maximum number of string expansions per line, to break infinite loops. */
37 #define EXPAND_LIMIT 1000
83 * An incredibly-simple string buffer.
86 char *p
; /* nil-terminated buffer */
87 size_t sz
; /* saved strlen(p) */
91 * A key-value roffstr pair as part of a singly-linked list.
96 struct roffkv
*next
; /* next in list */
100 * A single number register as part of a singly-linked list.
105 struct roffreg
*next
;
109 struct mparse
*parse
; /* parse point */
110 const struct mchars
*mchars
; /* character table */
111 struct roffnode
*last
; /* leaf of stack */
112 int *rstack
; /* stack of inverted `ie' values */
113 struct roffreg
*regtab
; /* number registers */
114 struct roffkv
*strtab
; /* user-defined strings & macros */
115 struct roffkv
*xmbtab
; /* multi-byte trans table (`tr') */
116 struct roffstr
*xtab
; /* single-byte trans table (`tr') */
117 const char *current_string
; /* value of last called user macro */
118 struct tbl_node
*first_tbl
; /* first table parsed */
119 struct tbl_node
*last_tbl
; /* last table parsed */
120 struct tbl_node
*tbl
; /* current table being parsed */
121 struct eqn_node
*last_eqn
; /* last equation parsed */
122 struct eqn_node
*first_eqn
; /* first equation parsed */
123 struct eqn_node
*eqn
; /* current equation being parsed */
124 int eqn_inline
; /* current equation is inline */
125 int options
; /* parse options */
126 int rstacksz
; /* current size limit of rstack */
127 int rstackpos
; /* position in rstack */
128 int format
; /* current file in mdoc or man format */
129 char control
; /* control character */
133 enum rofft tok
; /* type of node */
134 struct roffnode
*parent
; /* up one in stack */
135 int line
; /* parse line */
136 int col
; /* parse col */
137 char *name
; /* node name, e.g. macro name */
138 char *end
; /* end-rules: custom token */
139 int endspan
; /* end-rules: next-line or infty */
140 int rule
; /* current evaluation rule */
143 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
144 enum rofft tok, /* tok of macro */ \
145 char **bufp, /* input buffer */ \
146 size_t *szp, /* size of input buffer */ \
147 int ln, /* parse line */ \
148 int ppos, /* original pos in buffer */ \
149 int pos, /* current pos in buffer */ \
150 int *offs /* reset offset of buffer data */
152 typedef enum rofferr (*roffproc
)(ROFF_ARGS
);
155 const char *name
; /* macro name */
156 roffproc proc
; /* process new macro */
157 roffproc text
; /* process as child text of macro */
158 roffproc sub
; /* process as child of macro */
160 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
161 struct roffmac
*next
;
165 const char *name
; /* predefined input name */
166 const char *str
; /* replacement symbol */
169 #define PREDEF(__name, __str) \
170 { (__name), (__str) },
172 static enum rofft
roffhash_find(const char *, size_t);
173 static void roffhash_init(void);
174 static void roffnode_cleanscope(struct roff
*);
175 static void roffnode_pop(struct roff
*);
176 static void roffnode_push(struct roff
*, enum rofft
,
177 const char *, int, int);
178 static enum rofferr
roff_block(ROFF_ARGS
);
179 static enum rofferr
roff_block_text(ROFF_ARGS
);
180 static enum rofferr
roff_block_sub(ROFF_ARGS
);
181 static enum rofferr
roff_cblock(ROFF_ARGS
);
182 static enum rofferr
roff_cc(ROFF_ARGS
);
183 static void roff_ccond(struct roff
*, int, int);
184 static enum rofferr
roff_cond(ROFF_ARGS
);
185 static enum rofferr
roff_cond_text(ROFF_ARGS
);
186 static enum rofferr
roff_cond_sub(ROFF_ARGS
);
187 static enum rofferr
roff_ds(ROFF_ARGS
);
188 static enum rofferr
roff_eqndelim(struct roff
*,
189 char **, size_t *, int);
190 static int roff_evalcond(struct roff
*r
, int,
191 const char *, int *);
192 static int roff_evalnum(struct roff
*, int,
193 const char *, int *, int *, int);
194 static int roff_evalpar(struct roff
*, int,
195 const char *, int *, int *);
196 static int roff_evalstrcond(const char *, int *);
197 static void roff_free1(struct roff
*);
198 static void roff_freereg(struct roffreg
*);
199 static void roff_freestr(struct roffkv
*);
200 static size_t roff_getname(struct roff
*, char **, int, int);
201 static int roff_getnum(const char *, int *, int *);
202 static int roff_getop(const char *, int *, char *);
203 static int roff_getregn(const struct roff
*,
204 const char *, size_t);
205 static int roff_getregro(const char *name
);
206 static const char *roff_getstrn(const struct roff
*,
207 const char *, size_t);
208 static enum rofferr
roff_it(ROFF_ARGS
);
209 static enum rofferr
roff_line_ignore(ROFF_ARGS
);
210 static enum rofferr
roff_nr(ROFF_ARGS
);
211 static enum rofft
roff_parse(struct roff
*, char *, int *,
213 static enum rofferr
roff_parsetext(char **, size_t *, int, int *);
214 static enum rofferr
roff_res(struct roff
*,
215 char **, size_t *, int, int);
216 static enum rofferr
roff_rm(ROFF_ARGS
);
217 static enum rofferr
roff_rr(ROFF_ARGS
);
218 static void roff_setstr(struct roff
*,
219 const char *, const char *, int);
220 static void roff_setstrn(struct roffkv
**, const char *,
221 size_t, const char *, size_t, int);
222 static enum rofferr
roff_so(ROFF_ARGS
);
223 static enum rofferr
roff_tr(ROFF_ARGS
);
224 static enum rofferr
roff_Dd(ROFF_ARGS
);
225 static enum rofferr
roff_TH(ROFF_ARGS
);
226 static enum rofferr
roff_TE(ROFF_ARGS
);
227 static enum rofferr
roff_TS(ROFF_ARGS
);
228 static enum rofferr
roff_EQ(ROFF_ARGS
);
229 static enum rofferr
roff_EN(ROFF_ARGS
);
230 static enum rofferr
roff_T_(ROFF_ARGS
);
231 static enum rofferr
roff_userdef(ROFF_ARGS
);
233 /* See roffhash_find() */
237 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
239 static struct roffmac
*hash
[HASHWIDTH
];
241 static struct roffmac roffs
[ROFF_MAX
] = {
242 { "ad", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
243 { "am", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
244 { "ami", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
245 { "am1", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
246 { "as", roff_ds
, NULL
, NULL
, 0, NULL
},
247 { "cc", roff_cc
, NULL
, NULL
, 0, NULL
},
248 { "ce", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
249 { "de", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
250 { "dei", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
251 { "de1", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
252 { "ds", roff_ds
, NULL
, NULL
, 0, NULL
},
253 { "el", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
254 { "fam", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
255 { "hw", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
256 { "hy", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
257 { "ie", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
258 { "if", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
259 { "ig", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
260 { "it", roff_it
, NULL
, NULL
, 0, NULL
},
261 { "ne", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
262 { "nh", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
263 { "nr", roff_nr
, NULL
, NULL
, 0, NULL
},
264 { "ns", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
265 { "pl", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
266 { "ps", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
267 { "rm", roff_rm
, NULL
, NULL
, 0, NULL
},
268 { "rr", roff_rr
, NULL
, NULL
, 0, NULL
},
269 { "so", roff_so
, NULL
, NULL
, 0, NULL
},
270 { "ta", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
271 { "tr", roff_tr
, NULL
, NULL
, 0, NULL
},
272 { "Dd", roff_Dd
, NULL
, NULL
, 0, NULL
},
273 { "TH", roff_TH
, NULL
, NULL
, 0, NULL
},
274 { "TS", roff_TS
, NULL
, NULL
, 0, NULL
},
275 { "TE", roff_TE
, NULL
, NULL
, 0, NULL
},
276 { "T&", roff_T_
, NULL
, NULL
, 0, NULL
},
277 { "EQ", roff_EQ
, NULL
, NULL
, 0, NULL
},
278 { "EN", roff_EN
, NULL
, NULL
, 0, NULL
},
279 { ".", roff_cblock
, NULL
, NULL
, 0, NULL
},
280 { NULL
, roff_userdef
, NULL
, NULL
, 0, NULL
},
283 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
284 const char *const __mdoc_reserved
[] = {
285 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
286 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
287 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
288 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
289 "Dt", "Dv", "Dx", "D1",
290 "Ec", "Ed", "Ef", "Ek", "El", "Em",
291 "En", "Eo", "Er", "Es", "Ev", "Ex",
292 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
293 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
294 "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
295 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
296 "Pa", "Pc", "Pf", "Po", "Pp", "Pq",
297 "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
298 "Sc", "Sh", "Sm", "So", "Sq",
299 "Ss", "St", "Sx", "Sy",
300 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
301 "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
302 "%P", "%Q", "%R", "%T", "%U", "%V",
306 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
307 const char *const __man_reserved
[] = {
308 "AT", "B", "BI", "BR", "DT",
309 "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
310 "LP", "OP", "P", "PD", "PP",
311 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
312 "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
316 /* Array of injected predefined strings. */
317 #define PREDEFS_MAX 38
318 static const struct predef predefs
[PREDEFS_MAX
] = {
319 #include "predefs.in"
322 /* See roffhash_find() */
323 #define ROFF_HASH(p) (p[0] - ASCII_LO)
325 static int roffit_lines
; /* number of lines to delay */
326 static char *roffit_macro
; /* nil-terminated macro line */
335 for (i
= 0; i
< (int)ROFF_USERDEF
; i
++) {
336 assert(roffs
[i
].name
[0] >= ASCII_LO
);
337 assert(roffs
[i
].name
[0] <= ASCII_HI
);
339 buc
= ROFF_HASH(roffs
[i
].name
);
341 if (NULL
!= (n
= hash
[buc
])) {
342 for ( ; n
->next
; n
= n
->next
)
346 hash
[buc
] = &roffs
[i
];
351 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
352 * the nil-terminated string name could be found.
355 roffhash_find(const char *p
, size_t s
)
361 * libroff has an extremely simple hashtable, for the time
362 * being, which simply keys on the first character, which must
363 * be printable, then walks a chain. It works well enough until
367 if (p
[0] < ASCII_LO
|| p
[0] > ASCII_HI
)
372 if (NULL
== (n
= hash
[buc
]))
374 for ( ; n
; n
= n
->next
)
375 if (0 == strncmp(n
->name
, p
, s
) && '\0' == n
->name
[(int)s
])
376 return((enum rofft
)(n
- roffs
));
382 * Pop the current node off of the stack of roff instructions currently
386 roffnode_pop(struct roff
*r
)
393 r
->last
= r
->last
->parent
;
400 * Push a roff node onto the instruction stack. This must later be
401 * removed with roffnode_pop().
404 roffnode_push(struct roff
*r
, enum rofft tok
, const char *name
,
409 p
= mandoc_calloc(1, sizeof(struct roffnode
));
412 p
->name
= mandoc_strdup(name
);
416 p
->rule
= p
->parent
? p
->parent
->rule
: 0;
422 roff_free1(struct roff
*r
)
424 struct tbl_node
*tbl
;
428 while (NULL
!= (tbl
= r
->first_tbl
)) {
429 r
->first_tbl
= tbl
->next
;
432 r
->first_tbl
= r
->last_tbl
= r
->tbl
= NULL
;
434 while (NULL
!= (e
= r
->first_eqn
)) {
435 r
->first_eqn
= e
->next
;
438 r
->first_eqn
= r
->last_eqn
= r
->eqn
= NULL
;
448 roff_freereg(r
->regtab
);
451 roff_freestr(r
->strtab
);
452 roff_freestr(r
->xmbtab
);
453 r
->strtab
= r
->xmbtab
= NULL
;
456 for (i
= 0; i
< 128; i
++)
463 roff_reset(struct roff
*r
)
467 r
->format
= r
->options
& (MPARSE_MDOC
| MPARSE_MAN
);
472 roff_free(struct roff
*r
)
480 roff_alloc(struct mparse
*parse
, const struct mchars
*mchars
, int options
)
484 r
= mandoc_calloc(1, sizeof(struct roff
));
487 r
->options
= options
;
488 r
->format
= options
& (MPARSE_MDOC
| MPARSE_MAN
);
497 * In the current line, expand escape sequences that tend to get
498 * used in numerical expressions and conditional requests.
499 * Also check the syntax of the remaining escape sequences.
502 roff_res(struct roff
*r
, char **bufp
, size_t *szp
, int ln
, int pos
)
504 char ubuf
[24]; /* buffer to print the number */
505 const char *start
; /* start of the string to process */
506 char *stesc
; /* start of an escape sequence ('\\') */
507 const char *stnam
; /* start of the name, after "[(*" */
508 const char *cp
; /* end of the name, e.g. before ']' */
509 const char *res
; /* the string to be substituted */
510 char *nbuf
; /* new buffer to copy bufp to */
511 size_t maxl
; /* expected length of the escape name */
512 size_t naml
; /* actual length of the escape name */
513 enum mandoc_esc esc
; /* type of the escape sequence */
514 int inaml
; /* length returned from mandoc_escape() */
515 int expand_count
; /* to avoid infinite loops */
516 int npos
; /* position in numeric expression */
517 int arg_complete
; /* argument not interrupted by eol */
518 char term
; /* character terminating the escape */
522 stesc
= strchr(start
, '\0') - 1;
523 while (stesc
-- > start
) {
525 /* Search backwards for the next backslash. */
530 /* If it is escaped, skip it. */
532 for (cp
= stesc
- 1; cp
>= start
; cp
--)
536 if (0 == (stesc
- cp
) % 2) {
541 /* Decide whether to expand or to check only. */
558 esc
= mandoc_escape(&cp
, &stnam
, &inaml
);
559 if (esc
== ESCAPE_ERROR
||
560 (esc
== ESCAPE_SPECIAL
&&
561 mchars_spec2cp(r
->mchars
, stnam
, inaml
) < 0))
562 mandoc_vmsg(MANDOCERR_ESC_BAD
,
563 r
->parse
, ln
, (int)(stesc
- *bufp
),
564 "%.*s", (int)(cp
- stesc
), stesc
);
568 if (EXPAND_LIMIT
< ++expand_count
) {
569 mandoc_msg(MANDOCERR_ROFFLOOP
, r
->parse
,
570 ln
, (int)(stesc
- *bufp
), NULL
);
575 * The third character decides the length
576 * of the name of the string or register.
577 * Save a pointer to the name.
604 /* Advance to the end of the name. */
607 for (naml
= 0; 0 == maxl
|| naml
< maxl
; naml
++, cp
++) {
609 mandoc_msg(MANDOCERR_ESC_BAD
, r
->parse
,
610 ln
, (int)(stesc
- *bufp
), stesc
);
614 if (0 == maxl
&& *cp
== term
) {
621 * Retrieve the replacement string; if it is
622 * undefined, resume searching for escapes.
628 res
= roff_getstrn(r
, stnam
, naml
);
632 ubuf
[0] = arg_complete
&&
633 roff_evalnum(r
, ln
, stnam
, &npos
, NULL
, 0) &&
634 stnam
+ npos
+ 1 == cp
? '1' : '0';
639 (void)snprintf(ubuf
, sizeof(ubuf
), "%d",
640 roff_getregn(r
, stnam
, naml
));
645 /* use even incomplete args */
646 (void)snprintf(ubuf
, sizeof(ubuf
), "%d",
652 mandoc_vmsg(MANDOCERR_STR_UNDEF
,
653 r
->parse
, ln
, (int)(stesc
- *bufp
),
654 "%.*s", (int)naml
, stnam
);
658 /* Replace the escape sequence by the string. */
661 *szp
= mandoc_asprintf(&nbuf
, "%s%s%s",
664 /* Prepare for the next replacement. */
667 stesc
= nbuf
+ (stesc
- *bufp
) + strlen(res
);
675 * Process text streams:
676 * Convert all breakable hyphens into ASCII_HYPH.
677 * Decrement and spring input line trap.
680 roff_parsetext(char **bufp
, size_t *szp
, int pos
, int *offs
)
688 start
= p
= *bufp
+ pos
;
691 sz
= strcspn(p
, "-\\");
698 /* Skip over escapes. */
700 esc
= mandoc_escape((const char **)&p
, NULL
, NULL
);
701 if (ESCAPE_ERROR
== esc
)
704 } else if (p
== start
) {
709 if (isalpha((unsigned char)p
[-1]) &&
710 isalpha((unsigned char)p
[1]))
715 /* Spring the input line trap. */
716 if (1 == roffit_lines
) {
717 isz
= mandoc_asprintf(&p
, "%s\n.%s", *bufp
, roffit_macro
);
724 return(ROFF_REPARSE
);
725 } else if (1 < roffit_lines
)
731 roff_parseln(struct roff
*r
, int ln
, char **bufp
,
732 size_t *szp
, int pos
, int *offs
)
738 /* Handle in-line equation delimiters. */
740 if (r
->tbl
== NULL
&&
741 r
->last_eqn
!= NULL
&& r
->last_eqn
->delim
&&
742 (r
->eqn
== NULL
|| r
->eqn_inline
)) {
743 e
= roff_eqndelim(r
, bufp
, szp
, pos
);
744 if (e
== ROFF_REPARSE
)
746 assert(e
== ROFF_CONT
);
749 /* Expand some escape sequences. */
751 e
= roff_res(r
, bufp
, szp
, ln
, pos
);
754 assert(ROFF_CONT
== e
);
757 ctl
= roff_getcontrol(r
, *bufp
, &pos
);
760 * First, if a scope is open and we're not a macro, pass the
761 * text through the macro's filter. If a scope isn't open and
762 * we're not a macro, just let it through.
763 * Finally, if there's an equation scope open, divert it into it
764 * no matter our state.
767 if (r
->last
&& ! ctl
) {
769 assert(roffs
[t
].text
);
770 e
= (*roffs
[t
].text
)(r
, t
, bufp
, szp
, ln
, pos
, pos
, offs
);
771 assert(ROFF_IGN
== e
|| ROFF_CONT
== e
);
776 return(eqn_read(&r
->eqn
, ln
, *bufp
, ppos
, offs
));
779 return(tbl_read(r
->tbl
, ln
, *bufp
, pos
));
780 return(roff_parsetext(bufp
, szp
, pos
, offs
));
783 /* Skip empty request lines. */
785 if ((*bufp
)[pos
] == '"') {
786 mandoc_msg(MANDOCERR_COMMENT_BAD
, r
->parse
,
789 } else if ((*bufp
)[pos
] == '\0')
793 * If a scope is open, go to the child handler for that macro,
794 * as it may want to preprocess before doing anything with it.
795 * Don't do so if an equation is open.
800 assert(roffs
[t
].sub
);
801 return((*roffs
[t
].sub
)(r
, t
, bufp
, szp
,
802 ln
, ppos
, pos
, offs
));
806 * Lastly, as we've no scope open, try to look up and execute
807 * the new macro. If no macro is found, simply return and let
808 * the compilers handle it.
811 if (ROFF_MAX
== (t
= roff_parse(r
, *bufp
, &pos
, ln
, ppos
)))
814 assert(roffs
[t
].proc
);
815 return((*roffs
[t
].proc
)(r
, t
, bufp
, szp
, ln
, ppos
, pos
, offs
));
819 roff_endparse(struct roff
*r
)
823 mandoc_msg(MANDOCERR_BLK_NOEND
, r
->parse
,
824 r
->last
->line
, r
->last
->col
,
825 roffs
[r
->last
->tok
].name
);
828 mandoc_msg(MANDOCERR_BLK_NOEND
, r
->parse
,
829 r
->eqn
->eqn
.ln
, r
->eqn
->eqn
.pos
, "EQ");
834 mandoc_msg(MANDOCERR_BLK_NOEND
, r
->parse
,
835 r
->tbl
->line
, r
->tbl
->pos
, "TS");
841 * Parse a roff node's type from the input buffer. This must be in the
842 * form of ".foo xxx" in the usual way.
845 roff_parse(struct roff
*r
, char *buf
, int *pos
, int ln
, int ppos
)
854 if ('\0' == *cp
|| '"' == *cp
|| '\t' == *cp
|| ' ' == *cp
)
858 maclen
= roff_getname(r
, &cp
, ln
, ppos
);
860 t
= (r
->current_string
= roff_getstrn(r
, mac
, maclen
))
861 ? ROFF_USERDEF
: roffhash_find(mac
, maclen
);
870 roff_cblock(ROFF_ARGS
)
874 * A block-close `..' should only be invoked as a child of an
875 * ignore macro, otherwise raise a warning and just ignore it.
878 if (NULL
== r
->last
) {
879 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, r
->parse
,
884 switch (r
->last
->tok
) {
886 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
891 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
898 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, r
->parse
,
904 mandoc_vmsg(MANDOCERR_ARG_SKIP
, r
->parse
, ln
, pos
,
905 ".. %s", *bufp
+ pos
);
908 roffnode_cleanscope(r
);
914 roffnode_cleanscope(struct roff
*r
)
918 if (--r
->last
->endspan
!= 0)
925 roff_ccond(struct roff
*r
, int ln
, int ppos
)
928 if (NULL
== r
->last
) {
929 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, r
->parse
,
934 switch (r
->last
->tok
) {
942 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, r
->parse
,
947 if (r
->last
->endspan
> -1) {
948 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, r
->parse
,
954 roffnode_cleanscope(r
);
959 roff_block(ROFF_ARGS
)
965 /* Ignore groff compatibility mode for now. */
969 else if (ROFF_am1
== tok
)
972 /* Parse the macro name argument. */
975 if (ROFF_ig
== tok
) {
980 namesz
= roff_getname(r
, &cp
, ln
, ppos
);
981 iname
[namesz
] = '\0';
984 /* Resolve the macro name argument if it is indirect. */
986 if (namesz
&& (ROFF_dei
== tok
|| ROFF_ami
== tok
)) {
987 if (NULL
== (name
= roff_getstrn(r
, iname
, namesz
))) {
988 mandoc_vmsg(MANDOCERR_STR_UNDEF
,
989 r
->parse
, ln
, (int)(iname
- *bufp
),
990 "%.*s", (int)namesz
, iname
);
993 namesz
= strlen(name
);
997 if (0 == namesz
&& ROFF_ig
!= tok
) {
998 mandoc_msg(MANDOCERR_REQ_EMPTY
, r
->parse
,
999 ln
, ppos
, roffs
[tok
].name
);
1003 roffnode_push(r
, tok
, name
, ln
, ppos
);
1006 * At the beginning of a `de' macro, clear the existing string
1007 * with the same name, if there is one. New content will be
1008 * appended from roff_block_text() in multiline mode.
1011 if (ROFF_de
== tok
|| ROFF_dei
== tok
)
1012 roff_setstrn(&r
->strtab
, name
, namesz
, "", 0, 0);
1017 /* Get the custom end marker. */
1020 namesz
= roff_getname(r
, &cp
, ln
, ppos
);
1022 /* Resolve the end marker if it is indirect. */
1024 if (namesz
&& (ROFF_dei
== tok
|| ROFF_ami
== tok
)) {
1025 if (NULL
== (name
= roff_getstrn(r
, iname
, namesz
))) {
1026 mandoc_vmsg(MANDOCERR_STR_UNDEF
,
1027 r
->parse
, ln
, (int)(iname
- *bufp
),
1028 "%.*s", (int)namesz
, iname
);
1031 namesz
= strlen(name
);
1036 r
->last
->end
= mandoc_strndup(name
, namesz
);
1039 mandoc_vmsg(MANDOCERR_ARG_EXCESS
, r
->parse
,
1040 ln
, pos
, ".%s ... %s", roffs
[tok
].name
, cp
);
1046 roff_block_sub(ROFF_ARGS
)
1052 * First check whether a custom macro exists at this level. If
1053 * it does, then check against it. This is some of groff's
1054 * stranger behaviours. If we encountered a custom end-scope
1055 * tag and that tag also happens to be a "real" macro, then we
1056 * need to try interpreting it again as a real macro. If it's
1057 * not, then return ignore. Else continue.
1061 for (i
= pos
, j
= 0; r
->last
->end
[j
]; j
++, i
++)
1062 if ((*bufp
)[i
] != r
->last
->end
[j
])
1065 if ('\0' == r
->last
->end
[j
] &&
1066 ('\0' == (*bufp
)[i
] ||
1067 ' ' == (*bufp
)[i
] ||
1068 '\t' == (*bufp
)[i
])) {
1070 roffnode_cleanscope(r
);
1072 while (' ' == (*bufp
)[i
] || '\t' == (*bufp
)[i
])
1076 if (ROFF_MAX
!= roff_parse(r
, *bufp
, &pos
, ln
, ppos
))
1083 * If we have no custom end-query or lookup failed, then try
1084 * pulling it out of the hashtable.
1087 t
= roff_parse(r
, *bufp
, &pos
, ln
, ppos
);
1089 if (ROFF_cblock
!= t
) {
1091 roff_setstr(r
, r
->last
->name
, *bufp
+ ppos
, 2);
1095 assert(roffs
[t
].proc
);
1096 return((*roffs
[t
].proc
)(r
, t
, bufp
, szp
, ln
, ppos
, pos
, offs
));
1100 roff_block_text(ROFF_ARGS
)
1104 roff_setstr(r
, r
->last
->name
, *bufp
+ pos
, 2);
1110 roff_cond_sub(ROFF_ARGS
)
1117 roffnode_cleanscope(r
);
1118 t
= roff_parse(r
, *bufp
, &pos
, ln
, ppos
);
1121 * Fully handle known macros when they are structurally
1122 * required or when the conditional evaluated to true.
1125 if ((ROFF_MAX
!= t
) &&
1126 (rr
|| ROFFMAC_STRUCT
& roffs
[t
].flags
)) {
1127 assert(roffs
[t
].proc
);
1128 return((*roffs
[t
].proc
)(r
, t
, bufp
, szp
,
1129 ln
, ppos
, pos
, offs
));
1133 * If `\}' occurs on a macro line without a preceding macro,
1134 * drop the line completely.
1138 if ('\\' == ep
[0] && '}' == ep
[1])
1141 /* Always check for the closing delimiter `\}'. */
1143 while (NULL
!= (ep
= strchr(ep
, '\\'))) {
1144 if ('}' == *(++ep
)) {
1146 roff_ccond(r
, ln
, ep
- *bufp
- 1);
1150 return(rr
? ROFF_CONT
: ROFF_IGN
);
1154 roff_cond_text(ROFF_ARGS
)
1160 roffnode_cleanscope(r
);
1163 while (NULL
!= (ep
= strchr(ep
, '\\'))) {
1164 if ('}' == *(++ep
)) {
1166 roff_ccond(r
, ln
, ep
- *bufp
- 1);
1170 return(rr
? ROFF_CONT
: ROFF_IGN
);
1174 * Parse a single signed integer number. Stop at the first non-digit.
1175 * If there is at least one digit, return success and advance the
1176 * parse point, else return failure and let the parse point unchanged.
1177 * Ignore overflows, treat them just like the C language.
1180 roff_getnum(const char *v
, int *pos
, int *res
)
1192 for (*res
= 0; isdigit((unsigned char)v
[p
]); p
++)
1193 *res
= 10 * *res
+ v
[p
] - '0';
1205 * Evaluate a string comparison condition.
1206 * The first character is the delimiter.
1207 * Succeed if the string up to its second occurrence
1208 * matches the string up to its third occurence.
1209 * Advance the cursor after the third occurrence
1210 * or lacking that, to the end of the line.
1213 roff_evalstrcond(const char *v
, int *pos
)
1215 const char *s1
, *s2
, *s3
;
1219 s1
= v
+ *pos
; /* initial delimiter */
1220 s2
= s1
+ 1; /* for scanning the first string */
1221 s3
= strchr(s2
, *s1
); /* for scanning the second string */
1223 if (NULL
== s3
) /* found no middle delimiter */
1226 while ('\0' != *++s3
) {
1227 if (*s2
!= *s3
) { /* mismatch */
1228 s3
= strchr(s3
, *s1
);
1231 if (*s3
== *s1
) { /* found the final delimiter */
1240 s3
= strchr(s2
, '\0');
1248 * Evaluate an optionally negated single character, numerical,
1249 * or string condition.
1252 roff_evalcond(struct roff
*r
, int ln
, const char *v
, int *pos
)
1254 int wanttrue
, number
;
1256 if ('!' == v
[*pos
]) {
1283 if (roff_evalnum(r
, ln
, v
, pos
, &number
, 0))
1284 return((number
> 0) == wanttrue
);
1286 return(roff_evalstrcond(v
, pos
) == wanttrue
);
1290 roff_line_ignore(ROFF_ARGS
)
1297 roff_cond(ROFF_ARGS
)
1300 roffnode_push(r
, tok
, NULL
, ln
, ppos
);
1303 * An `.el' has no conditional body: it will consume the value
1304 * of the current rstack entry set in prior `ie' calls or
1307 * If we're not an `el', however, then evaluate the conditional.
1310 r
->last
->rule
= ROFF_el
== tok
?
1311 (r
->rstackpos
< 0 ? 0 : r
->rstack
[r
->rstackpos
--]) :
1312 roff_evalcond(r
, ln
, *bufp
, &pos
);
1315 * An if-else will put the NEGATION of the current evaluated
1316 * conditional into the stack of rules.
1319 if (ROFF_ie
== tok
) {
1320 if (r
->rstackpos
+ 1 == r
->rstacksz
) {
1322 r
->rstack
= mandoc_reallocarray(r
->rstack
,
1323 r
->rstacksz
, sizeof(int));
1325 r
->rstack
[++r
->rstackpos
] = !r
->last
->rule
;
1328 /* If the parent has false as its rule, then so do we. */
1330 if (r
->last
->parent
&& !r
->last
->parent
->rule
)
1335 * If there is nothing on the line after the conditional,
1336 * not even whitespace, use next-line scope.
1339 if ('\0' == (*bufp
)[pos
]) {
1340 r
->last
->endspan
= 2;
1344 while (' ' == (*bufp
)[pos
])
1347 /* An opening brace requests multiline scope. */
1349 if ('\\' == (*bufp
)[pos
] && '{' == (*bufp
)[pos
+ 1]) {
1350 r
->last
->endspan
= -1;
1356 * Anything else following the conditional causes
1357 * single-line scope. Warn if the scope contains
1358 * nothing but trailing whitespace.
1361 if ('\0' == (*bufp
)[pos
])
1362 mandoc_msg(MANDOCERR_COND_EMPTY
, r
->parse
,
1363 ln
, ppos
, roffs
[tok
].name
);
1365 r
->last
->endspan
= 1;
1380 * The first word is the name of the string.
1381 * If it is empty or terminated by an escape sequence,
1382 * abort the `ds' request without defining anything.
1385 name
= string
= *bufp
+ pos
;
1389 namesz
= roff_getname(r
, &string
, ln
, pos
);
1390 if ('\\' == name
[namesz
])
1393 /* Read past the initial double-quote, if any. */
1397 /* The rest is the value. */
1398 roff_setstrn(&r
->strtab
, name
, namesz
, string
, strlen(string
),
1404 * Parse a single operator, one or two characters long.
1405 * If the operator is recognized, return success and advance the
1406 * parse point, else return failure and let the parse point unchanged.
1409 roff_getop(const char *v
, int *pos
, char *res
)
1430 switch (v
[*pos
+ 1]) {
1448 switch (v
[*pos
+ 1]) {
1462 if ('=' == v
[*pos
+ 1])
1474 * Evaluate either a parenthesized numeric expression
1475 * or a single signed integer number.
1478 roff_evalpar(struct roff
*r
, int ln
,
1479 const char *v
, int *pos
, int *res
)
1483 return(roff_getnum(v
, pos
, res
));
1486 if ( ! roff_evalnum(r
, ln
, v
, pos
, res
, 1))
1490 * Omission of the closing parenthesis
1491 * is an error in validation mode,
1492 * but ignored in evaluation mode.
1497 else if (NULL
== res
)
1504 * Evaluate a complete numeric expression.
1505 * Proceed left to right, there is no concept of precedence.
1508 roff_evalnum(struct roff
*r
, int ln
, const char *v
,
1509 int *pos
, int *res
, int skipwhite
)
1511 int mypos
, operand2
;
1520 while (isspace((unsigned char)v
[*pos
]))
1523 if ( ! roff_evalpar(r
, ln
, v
, pos
, res
))
1528 while (isspace((unsigned char)v
[*pos
]))
1531 if ( ! roff_getop(v
, pos
, &operator))
1535 while (isspace((unsigned char)v
[*pos
]))
1538 if ( ! roff_evalpar(r
, ln
, v
, pos
, &operand2
))
1542 while (isspace((unsigned char)v
[*pos
]))
1559 if (0 == operand2
) {
1560 mandoc_msg(MANDOCERR_DIVZERO
,
1561 r
->parse
, ln
, *pos
, v
);
1571 *res
= *res
< operand2
;
1574 *res
= *res
> operand2
;
1577 *res
= *res
<= operand2
;
1580 *res
= *res
>= operand2
;
1583 *res
= *res
== operand2
;
1586 *res
= *res
!= operand2
;
1589 *res
= *res
&& operand2
;
1592 *res
= *res
|| operand2
;
1595 if (operand2
< *res
)
1599 if (operand2
> *res
)
1610 roff_setreg(struct roff
*r
, const char *name
, int val
, char sign
)
1612 struct roffreg
*reg
;
1614 /* Search for an existing register with the same name. */
1617 while (reg
&& strcmp(name
, reg
->key
.p
))
1621 /* Create a new register. */
1622 reg
= mandoc_malloc(sizeof(struct roffreg
));
1623 reg
->key
.p
= mandoc_strdup(name
);
1624 reg
->key
.sz
= strlen(name
);
1626 reg
->next
= r
->regtab
;
1632 else if ('-' == sign
)
1639 * Handle some predefined read-only number registers.
1640 * For now, return -1 if the requested register is not predefined;
1641 * in case a predefined read-only register having the value -1
1642 * were to turn up, another special value would have to be chosen.
1645 roff_getregro(const char *name
)
1649 case 'A': /* ASCII approximation mode is always off. */
1651 case 'g': /* Groff compatibility mode is always on. */
1653 case 'H': /* Fixed horizontal resolution. */
1655 case 'j': /* Always adjust left margin only. */
1657 case 'T': /* Some output device is always defined. */
1659 case 'V': /* Fixed vertical resolution. */
1667 roff_getreg(const struct roff
*r
, const char *name
)
1669 struct roffreg
*reg
;
1672 if ('.' == name
[0] && '\0' != name
[1] && '\0' == name
[2]) {
1673 val
= roff_getregro(name
+ 1);
1678 for (reg
= r
->regtab
; reg
; reg
= reg
->next
)
1679 if (0 == strcmp(name
, reg
->key
.p
))
1686 roff_getregn(const struct roff
*r
, const char *name
, size_t len
)
1688 struct roffreg
*reg
;
1691 if ('.' == name
[0] && 2 == len
) {
1692 val
= roff_getregro(name
+ 1);
1697 for (reg
= r
->regtab
; reg
; reg
= reg
->next
)
1698 if (len
== reg
->key
.sz
&&
1699 0 == strncmp(name
, reg
->key
.p
, len
))
1706 roff_freereg(struct roffreg
*reg
)
1708 struct roffreg
*old_reg
;
1710 while (NULL
!= reg
) {
1726 key
= val
= *bufp
+ pos
;
1730 keysz
= roff_getname(r
, &val
, ln
, pos
);
1731 if ('\\' == key
[keysz
])
1736 if ('+' == sign
|| '-' == sign
)
1739 if (roff_evalnum(r
, ln
, val
, NULL
, &iv
, 0))
1740 roff_setreg(r
, key
, iv
, sign
);
1748 struct roffreg
*reg
, **prev
;
1752 name
= cp
= *bufp
+ pos
;
1755 namesz
= roff_getname(r
, &cp
, ln
, pos
);
1756 name
[namesz
] = '\0';
1761 if (NULL
== reg
|| !strcmp(name
, reg
->key
.p
))
1781 while ('\0' != *cp
) {
1783 namesz
= roff_getname(r
, &cp
, ln
, (int)(cp
- *bufp
));
1784 roff_setstrn(&r
->strtab
, name
, namesz
, NULL
, 0, 0);
1785 if ('\\' == name
[namesz
])
1798 /* Parse the number of lines. */
1800 len
= strcspn(cp
, " \t");
1802 if ((iv
= mandoc_strntoi(cp
, len
, 10)) <= 0) {
1803 mandoc_msg(MANDOCERR_IT_NONUM
, r
->parse
,
1804 ln
, ppos
, *bufp
+ 1);
1809 /* Arm the input line trap. */
1811 roffit_macro
= mandoc_strdup(cp
);
1818 const char *const *cp
;
1820 if ((r
->options
& (MPARSE_MDOC
| MPARSE_QUICK
)) == 0)
1821 for (cp
= __mdoc_reserved
; *cp
; cp
++)
1822 roff_setstr(r
, *cp
, NULL
, 0);
1825 r
->format
= MPARSE_MDOC
;
1833 const char *const *cp
;
1835 if ((r
->options
& MPARSE_QUICK
) == 0)
1836 for (cp
= __man_reserved
; *cp
; cp
++)
1837 roff_setstr(r
, *cp
, NULL
, 0);
1840 r
->format
= MPARSE_MAN
;
1850 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, r
->parse
,
1863 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, r
->parse
,
1866 tbl_restart(ppos
, ln
, r
->tbl
);
1872 * Handle in-line equation delimiters.
1875 roff_eqndelim(struct roff
*r
, char **bufp
, size_t *szp
, int pos
)
1878 const char *bef_pr
, *bef_nl
, *mac
, *aft_nl
, *aft_pr
;
1881 * Outside equations, look for an opening delimiter.
1882 * If we are inside an equation, we already know it is
1883 * in-line, or this function wouldn't have been called;
1884 * so look for a closing delimiter.
1888 cp2
= strchr(cp1
, r
->eqn
== NULL
?
1889 r
->last_eqn
->odelim
: r
->last_eqn
->cdelim
);
1894 bef_pr
= bef_nl
= aft_nl
= aft_pr
= "";
1896 /* Handle preceding text, protecting whitespace. */
1898 if (**bufp
!= '\0') {
1905 * Prepare replacing the delimiter with an equation macro
1906 * and drop leading white space from the equation.
1909 if (r
->eqn
== NULL
) {
1916 /* Handle following text, protecting whitespace. */
1924 /* Do the actual replacement. */
1926 *szp
= mandoc_asprintf(&cp1
, "%s%s%s%s%s%s%s", *bufp
,
1927 bef_pr
, bef_nl
, mac
, aft_nl
, aft_pr
, cp2
) + 1;
1931 /* Toggle the in-line state of the eqn subsystem. */
1933 r
->eqn_inline
= r
->eqn
== NULL
;
1934 return(ROFF_REPARSE
);
1942 assert(NULL
== r
->eqn
);
1943 e
= eqn_alloc(ppos
, ln
, r
->parse
);
1946 r
->last_eqn
->next
= e
;
1947 e
->delim
= r
->last_eqn
->delim
;
1948 e
->odelim
= r
->last_eqn
->odelim
;
1949 e
->cdelim
= r
->last_eqn
->cdelim
;
1951 r
->first_eqn
= r
->last_eqn
= e
;
1953 r
->eqn
= r
->last_eqn
= e
;
1956 mandoc_vmsg(MANDOCERR_ARG_SKIP
, r
->parse
, ln
, pos
,
1957 ".EQ %s", *bufp
+ pos
);
1966 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, r
->parse
, ln
, ppos
, "EN");
1973 struct tbl_node
*tbl
;
1976 mandoc_msg(MANDOCERR_BLK_BROKEN
, r
->parse
,
1977 ln
, ppos
, "TS breaks TS");
1981 tbl
= tbl_alloc(ppos
, ln
, r
->parse
);
1984 r
->last_tbl
->next
= tbl
;
1986 r
->first_tbl
= r
->last_tbl
= tbl
;
1988 r
->tbl
= r
->last_tbl
= tbl
;
1999 if ('\0' == *p
|| '.' == (r
->control
= *p
++))
2003 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
, ln
, ppos
, NULL
);
2011 const char *p
, *first
, *second
;
2013 enum mandoc_esc esc
;
2018 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
, ln
, ppos
, NULL
);
2022 while ('\0' != *p
) {
2026 if ('\\' == *first
) {
2027 esc
= mandoc_escape(&p
, NULL
, NULL
);
2028 if (ESCAPE_ERROR
== esc
) {
2029 mandoc_msg(MANDOCERR_ESC_BAD
, r
->parse
,
2030 ln
, (int)(p
- *bufp
), first
);
2033 fsz
= (size_t)(p
- first
);
2037 if ('\\' == *second
) {
2038 esc
= mandoc_escape(&p
, NULL
, NULL
);
2039 if (ESCAPE_ERROR
== esc
) {
2040 mandoc_msg(MANDOCERR_ESC_BAD
, r
->parse
,
2041 ln
, (int)(p
- *bufp
), second
);
2044 ssz
= (size_t)(p
- second
);
2045 } else if ('\0' == *second
) {
2046 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
,
2047 ln
, (int)(p
- *bufp
), NULL
);
2053 roff_setstrn(&r
->xmbtab
, first
, fsz
,
2058 if (NULL
== r
->xtab
)
2059 r
->xtab
= mandoc_calloc(128,
2060 sizeof(struct roffstr
));
2062 free(r
->xtab
[(int)*first
].p
);
2063 r
->xtab
[(int)*first
].p
= mandoc_strndup(second
, ssz
);
2064 r
->xtab
[(int)*first
].sz
= ssz
;
2076 mandoc_vmsg(MANDOCERR_SO
, r
->parse
, ln
, ppos
, "so %s", name
);
2079 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
2080 * opening anything that's not in our cwd or anything beneath
2081 * it. Thus, explicitly disallow traversing up the file-system
2082 * or using absolute paths.
2085 if ('/' == *name
|| strstr(name
, "../") || strstr(name
, "/..")) {
2086 mandoc_vmsg(MANDOCERR_SO_PATH
, r
->parse
, ln
, ppos
,
2096 roff_userdef(ROFF_ARGS
)
2103 * Collect pointers to macro argument strings
2104 * and NUL-terminate them.
2107 for (i
= 0; i
< 9; i
++)
2108 arg
[i
] = '\0' == *cp
? "" :
2109 mandoc_getarg(r
->parse
, &cp
, ln
, &pos
);
2112 * Expand macro arguments.
2115 n1
= cp
= mandoc_strdup(r
->current_string
);
2116 while (NULL
!= (cp
= strstr(cp
, "\\$"))) {
2118 if (0 > i
|| 8 < i
) {
2119 /* Not an argument invocation. */
2124 *szp
= mandoc_asprintf(&n2
, "%s%s%s",
2125 n1
, arg
[i
], cp
+ 3) + 1;
2126 cp
= n2
+ (cp
- n1
);
2132 * Replace the macro invocation
2133 * by the expanded macro.
2138 *szp
= strlen(*bufp
) + 1;
2140 return(*szp
> 1 && '\n' == (*bufp
)[(int)*szp
- 2] ?
2141 ROFF_REPARSE
: ROFF_APPEND
);
2145 roff_getname(struct roff
*r
, char **cpp
, int ln
, int pos
)
2154 /* Read until end of name and terminate it with NUL. */
2155 for (cp
= name
; 1; cp
++) {
2156 if ('\0' == *cp
|| ' ' == *cp
) {
2163 if ('{' == cp
[1] || '}' == cp
[1])
2168 mandoc_vmsg(MANDOCERR_NAMESC
, r
->parse
, ln
, pos
,
2169 "%.*s", (int)(cp
- name
+ 1), name
);
2170 mandoc_escape((const char **)&cp
, NULL
, NULL
);
2174 /* Read past spaces. */
2183 * Store *string into the user-defined string called *name.
2184 * To clear an existing entry, call with (*r, *name, NULL, 0).
2185 * append == 0: replace mode
2186 * append == 1: single-line append mode
2187 * append == 2: multiline append mode, append '\n' after each call
2190 roff_setstr(struct roff
*r
, const char *name
, const char *string
,
2194 roff_setstrn(&r
->strtab
, name
, strlen(name
), string
,
2195 string
? strlen(string
) : 0, append
);
2199 roff_setstrn(struct roffkv
**r
, const char *name
, size_t namesz
,
2200 const char *string
, size_t stringsz
, int append
)
2205 size_t oldch
, newch
;
2207 /* Search for an existing string with the same name. */
2210 while (n
&& (namesz
!= n
->key
.sz
||
2211 strncmp(n
->key
.p
, name
, namesz
)))
2215 /* Create a new string table entry. */
2216 n
= mandoc_malloc(sizeof(struct roffkv
));
2217 n
->key
.p
= mandoc_strndup(name
, namesz
);
2223 } else if (0 == append
) {
2233 * One additional byte for the '\n' in multiline mode,
2234 * and one for the terminating '\0'.
2236 newch
= stringsz
+ (1 < append
? 2u : 1u);
2238 if (NULL
== n
->val
.p
) {
2239 n
->val
.p
= mandoc_malloc(newch
);
2244 n
->val
.p
= mandoc_realloc(n
->val
.p
, oldch
+ newch
);
2247 /* Skip existing content in the destination buffer. */
2248 c
= n
->val
.p
+ (int)oldch
;
2250 /* Append new content to the destination buffer. */
2252 while (i
< (int)stringsz
) {
2254 * Rudimentary roff copy mode:
2255 * Handle escaped backslashes.
2257 if ('\\' == string
[i
] && '\\' == string
[i
+ 1])
2262 /* Append terminating bytes. */
2267 n
->val
.sz
= (int)(c
- n
->val
.p
);
2271 roff_getstrn(const struct roff
*r
, const char *name
, size_t len
)
2273 const struct roffkv
*n
;
2276 for (n
= r
->strtab
; n
; n
= n
->next
)
2277 if (0 == strncmp(name
, n
->key
.p
, len
) &&
2278 '\0' == n
->key
.p
[(int)len
])
2281 for (i
= 0; i
< PREDEFS_MAX
; i
++)
2282 if (0 == strncmp(name
, predefs
[i
].name
, len
) &&
2283 '\0' == predefs
[i
].name
[(int)len
])
2284 return(predefs
[i
].str
);
2290 roff_freestr(struct roffkv
*r
)
2292 struct roffkv
*n
, *nn
;
2294 for (n
= r
; n
; n
= nn
) {
2302 const struct tbl_span
*
2303 roff_span(const struct roff
*r
)
2306 return(r
->tbl
? tbl_span(r
->tbl
) : NULL
);
2310 roff_eqn(const struct roff
*r
)
2313 return(r
->last_eqn
? &r
->last_eqn
->eqn
: NULL
);
2317 * Duplicate an input string, making the appropriate character
2318 * conversations (as stipulated by `tr') along the way.
2319 * Returns a heap-allocated string with all the replacements made.
2322 roff_strdup(const struct roff
*r
, const char *p
)
2324 const struct roffkv
*cp
;
2328 enum mandoc_esc esc
;
2330 if (NULL
== r
->xmbtab
&& NULL
== r
->xtab
)
2331 return(mandoc_strdup(p
));
2332 else if ('\0' == *p
)
2333 return(mandoc_strdup(""));
2336 * Step through each character looking for term matches
2337 * (remember that a `tr' can be invoked with an escape, which is
2338 * a glyph but the escape is multi-character).
2339 * We only do this if the character hash has been initialised
2340 * and the string is >0 length.
2346 while ('\0' != *p
) {
2347 if ('\\' != *p
&& r
->xtab
&& r
->xtab
[(int)*p
].p
) {
2348 sz
= r
->xtab
[(int)*p
].sz
;
2349 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
2350 memcpy(res
+ ssz
, r
->xtab
[(int)*p
].p
, sz
);
2354 } else if ('\\' != *p
) {
2355 res
= mandoc_realloc(res
, ssz
+ 2);
2360 /* Search for term matches. */
2361 for (cp
= r
->xmbtab
; cp
; cp
= cp
->next
)
2362 if (0 == strncmp(p
, cp
->key
.p
, cp
->key
.sz
))
2367 * A match has been found.
2368 * Append the match to the array and move
2369 * forward by its keysize.
2371 res
= mandoc_realloc(res
,
2372 ssz
+ cp
->val
.sz
+ 1);
2373 memcpy(res
+ ssz
, cp
->val
.p
, cp
->val
.sz
);
2375 p
+= (int)cp
->key
.sz
;
2380 * Handle escapes carefully: we need to copy
2381 * over just the escape itself, or else we might
2382 * do replacements within the escape itself.
2383 * Make sure to pass along the bogus string.
2386 esc
= mandoc_escape(&p
, NULL
, NULL
);
2387 if (ESCAPE_ERROR
== esc
) {
2389 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
2390 memcpy(res
+ ssz
, pp
, sz
);
2394 * We bail out on bad escapes.
2395 * No need to warn: we already did so when
2396 * roff_res() was called.
2399 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
2400 memcpy(res
+ ssz
, pp
, sz
);
2404 res
[(int)ssz
] = '\0';
2409 roff_getformat(const struct roff
*r
)
2416 * Find out whether a line is a macro line or not.
2417 * If it is, adjust the current position and return one; if it isn't,
2418 * return zero and don't change the current position.
2419 * If the control character has been set with `.cc', then let that grain
2421 * This is slighly contrary to groff, where using the non-breaking
2422 * control character when `cc' has been invoked will cause the
2423 * non-breaking macro contents to be printed verbatim.
2426 roff_getcontrol(const struct roff
*r
, const char *cp
, int *ppos
)
2432 if (0 != r
->control
&& cp
[pos
] == r
->control
)
2434 else if (0 != r
->control
)
2436 else if ('\\' == cp
[pos
] && '.' == cp
[pos
+ 1])
2438 else if ('.' == cp
[pos
] || '\'' == cp
[pos
])
2443 while (' ' == cp
[pos
] || '\t' == cp
[pos
])