]>
git.cameronkatri.com Git - mandoc.git/blob - roff.c
1 /* $Id: roff.c,v 1.246 2014/12/28 14:16:26 schwarze Exp $ */
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
20 #include <sys/types.h>
30 #include "mandoc_aux.h"
31 #include "libmandoc.h"
34 /* Maximum number of nested if-else conditionals. */
35 #define RSTACK_MAX 128
37 /* Maximum number of string expansions per line, to break infinite loops. */
38 #define EXPAND_LIMIT 1000
84 * An incredibly-simple string buffer.
87 char *p
; /* nil-terminated buffer */
88 size_t sz
; /* saved strlen(p) */
92 * A key-value roffstr pair as part of a singly-linked list.
97 struct roffkv
*next
; /* next in list */
101 * A single number register as part of a singly-linked list.
106 struct roffreg
*next
;
110 struct mparse
*parse
; /* parse point */
111 const struct mchars
*mchars
; /* character table */
112 struct roffnode
*last
; /* leaf of stack */
113 int *rstack
; /* stack of inverted `ie' values */
114 struct roffreg
*regtab
; /* number registers */
115 struct roffkv
*strtab
; /* user-defined strings & macros */
116 struct roffkv
*xmbtab
; /* multi-byte trans table (`tr') */
117 struct roffstr
*xtab
; /* single-byte trans table (`tr') */
118 const char *current_string
; /* value of last called user macro */
119 struct tbl_node
*first_tbl
; /* first table parsed */
120 struct tbl_node
*last_tbl
; /* last table parsed */
121 struct tbl_node
*tbl
; /* current table being parsed */
122 struct eqn_node
*last_eqn
; /* last equation parsed */
123 struct eqn_node
*first_eqn
; /* first equation parsed */
124 struct eqn_node
*eqn
; /* current equation being parsed */
125 int eqn_inline
; /* current equation is inline */
126 int options
; /* parse options */
127 int rstacksz
; /* current size limit of rstack */
128 int rstackpos
; /* position in rstack */
129 int format
; /* current file in mdoc or man format */
130 char control
; /* control character */
134 enum rofft tok
; /* type of node */
135 struct roffnode
*parent
; /* up one in stack */
136 int line
; /* parse line */
137 int col
; /* parse col */
138 char *name
; /* node name, e.g. macro name */
139 char *end
; /* end-rules: custom token */
140 int endspan
; /* end-rules: next-line or infty */
141 int rule
; /* current evaluation rule */
144 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
145 enum rofft tok, /* tok of macro */ \
146 struct buf *buf, /* input buffer */ \
147 int ln, /* parse line */ \
148 int ppos, /* original pos in buffer */ \
149 int pos, /* current pos in buffer */ \
150 int *offs /* reset offset of buffer data */
152 typedef enum rofferr (*roffproc
)(ROFF_ARGS
);
155 const char *name
; /* macro name */
156 roffproc proc
; /* process new macro */
157 roffproc text
; /* process as child text of macro */
158 roffproc sub
; /* process as child of macro */
160 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
161 struct roffmac
*next
;
165 const char *name
; /* predefined input name */
166 const char *str
; /* replacement symbol */
169 #define PREDEF(__name, __str) \
170 { (__name), (__str) },
172 static enum rofft
roffhash_find(const char *, size_t);
173 static void roffhash_init(void);
174 static void roffnode_cleanscope(struct roff
*);
175 static void roffnode_pop(struct roff
*);
176 static void roffnode_push(struct roff
*, enum rofft
,
177 const char *, int, int);
178 static enum rofferr
roff_block(ROFF_ARGS
);
179 static enum rofferr
roff_block_text(ROFF_ARGS
);
180 static enum rofferr
roff_block_sub(ROFF_ARGS
);
181 static enum rofferr
roff_cblock(ROFF_ARGS
);
182 static enum rofferr
roff_cc(ROFF_ARGS
);
183 static void roff_ccond(struct roff
*, int, int);
184 static enum rofferr
roff_cond(ROFF_ARGS
);
185 static enum rofferr
roff_cond_text(ROFF_ARGS
);
186 static enum rofferr
roff_cond_sub(ROFF_ARGS
);
187 static enum rofferr
roff_ds(ROFF_ARGS
);
188 static enum rofferr
roff_eqndelim(struct roff
*, struct buf
*, int);
189 static int roff_evalcond(struct roff
*r
, int,
190 const char *, int *);
191 static int roff_evalnum(struct roff
*, int,
192 const char *, int *, int *, int);
193 static int roff_evalpar(struct roff
*, int,
194 const char *, int *, int *);
195 static int roff_evalstrcond(const char *, int *);
196 static void roff_free1(struct roff
*);
197 static void roff_freereg(struct roffreg
*);
198 static void roff_freestr(struct roffkv
*);
199 static size_t roff_getname(struct roff
*, char **, int, int);
200 static int roff_getnum(const char *, int *, int *);
201 static int roff_getop(const char *, int *, char *);
202 static int roff_getregn(const struct roff
*,
203 const char *, size_t);
204 static int roff_getregro(const char *name
);
205 static const char *roff_getstrn(const struct roff
*,
206 const char *, size_t);
207 static enum rofferr
roff_it(ROFF_ARGS
);
208 static enum rofferr
roff_line_ignore(ROFF_ARGS
);
209 static enum rofferr
roff_nr(ROFF_ARGS
);
210 static enum rofft
roff_parse(struct roff
*, char *, int *,
212 static enum rofferr
roff_parsetext(struct buf
*, int, int *);
213 static enum rofferr
roff_res(struct roff
*, struct buf
*, int, int);
214 static enum rofferr
roff_rm(ROFF_ARGS
);
215 static enum rofferr
roff_rr(ROFF_ARGS
);
216 static void roff_setstr(struct roff
*,
217 const char *, const char *, int);
218 static void roff_setstrn(struct roffkv
**, const char *,
219 size_t, const char *, size_t, int);
220 static enum rofferr
roff_so(ROFF_ARGS
);
221 static enum rofferr
roff_tr(ROFF_ARGS
);
222 static enum rofferr
roff_Dd(ROFF_ARGS
);
223 static enum rofferr
roff_TH(ROFF_ARGS
);
224 static enum rofferr
roff_TE(ROFF_ARGS
);
225 static enum rofferr
roff_TS(ROFF_ARGS
);
226 static enum rofferr
roff_EQ(ROFF_ARGS
);
227 static enum rofferr
roff_EN(ROFF_ARGS
);
228 static enum rofferr
roff_T_(ROFF_ARGS
);
229 static enum rofferr
roff_userdef(ROFF_ARGS
);
231 /* See roffhash_find() */
235 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
237 static struct roffmac
*hash
[HASHWIDTH
];
239 static struct roffmac roffs
[ROFF_MAX
] = {
240 { "ad", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
241 { "am", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
242 { "ami", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
243 { "am1", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
244 { "as", roff_ds
, NULL
, NULL
, 0, NULL
},
245 { "cc", roff_cc
, NULL
, NULL
, 0, NULL
},
246 { "ce", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
247 { "de", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
248 { "dei", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
249 { "de1", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
250 { "ds", roff_ds
, NULL
, NULL
, 0, NULL
},
251 { "el", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
252 { "fam", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
253 { "hw", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
254 { "hy", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
255 { "ie", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
256 { "if", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
257 { "ig", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
258 { "it", roff_it
, NULL
, NULL
, 0, NULL
},
259 { "ne", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
260 { "nh", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
261 { "nr", roff_nr
, NULL
, NULL
, 0, NULL
},
262 { "ns", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
263 { "pl", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
264 { "ps", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
265 { "rm", roff_rm
, NULL
, NULL
, 0, NULL
},
266 { "rr", roff_rr
, NULL
, NULL
, 0, NULL
},
267 { "so", roff_so
, NULL
, NULL
, 0, NULL
},
268 { "ta", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
269 { "tr", roff_tr
, NULL
, NULL
, 0, NULL
},
270 { "Dd", roff_Dd
, NULL
, NULL
, 0, NULL
},
271 { "TH", roff_TH
, NULL
, NULL
, 0, NULL
},
272 { "TS", roff_TS
, NULL
, NULL
, 0, NULL
},
273 { "TE", roff_TE
, NULL
, NULL
, 0, NULL
},
274 { "T&", roff_T_
, NULL
, NULL
, 0, NULL
},
275 { "EQ", roff_EQ
, NULL
, NULL
, 0, NULL
},
276 { "EN", roff_EN
, NULL
, NULL
, 0, NULL
},
277 { ".", roff_cblock
, NULL
, NULL
, 0, NULL
},
278 { NULL
, roff_userdef
, NULL
, NULL
, 0, NULL
},
281 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
282 const char *const __mdoc_reserved
[] = {
283 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
284 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
285 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
286 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
287 "Dt", "Dv", "Dx", "D1",
288 "Ec", "Ed", "Ef", "Ek", "El", "Em",
289 "En", "Eo", "Er", "Es", "Ev", "Ex",
290 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
291 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
292 "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
293 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
294 "Pa", "Pc", "Pf", "Po", "Pp", "Pq",
295 "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
296 "Sc", "Sh", "Sm", "So", "Sq",
297 "Ss", "St", "Sx", "Sy",
298 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
299 "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
300 "%P", "%Q", "%R", "%T", "%U", "%V",
304 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
305 const char *const __man_reserved
[] = {
306 "AT", "B", "BI", "BR", "DT",
307 "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
308 "LP", "OP", "P", "PD", "PP",
309 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
310 "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
314 /* Array of injected predefined strings. */
315 #define PREDEFS_MAX 38
316 static const struct predef predefs
[PREDEFS_MAX
] = {
317 #include "predefs.in"
320 /* See roffhash_find() */
321 #define ROFF_HASH(p) (p[0] - ASCII_LO)
323 static int roffit_lines
; /* number of lines to delay */
324 static char *roffit_macro
; /* nil-terminated macro line */
333 for (i
= 0; i
< (int)ROFF_USERDEF
; i
++) {
334 assert(roffs
[i
].name
[0] >= ASCII_LO
);
335 assert(roffs
[i
].name
[0] <= ASCII_HI
);
337 buc
= ROFF_HASH(roffs
[i
].name
);
339 if (NULL
!= (n
= hash
[buc
])) {
340 for ( ; n
->next
; n
= n
->next
)
344 hash
[buc
] = &roffs
[i
];
349 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
350 * the nil-terminated string name could be found.
353 roffhash_find(const char *p
, size_t s
)
359 * libroff has an extremely simple hashtable, for the time
360 * being, which simply keys on the first character, which must
361 * be printable, then walks a chain. It works well enough until
365 if (p
[0] < ASCII_LO
|| p
[0] > ASCII_HI
)
370 if (NULL
== (n
= hash
[buc
]))
372 for ( ; n
; n
= n
->next
)
373 if (0 == strncmp(n
->name
, p
, s
) && '\0' == n
->name
[(int)s
])
374 return((enum rofft
)(n
- roffs
));
380 * Pop the current node off of the stack of roff instructions currently
384 roffnode_pop(struct roff
*r
)
391 r
->last
= r
->last
->parent
;
398 * Push a roff node onto the instruction stack. This must later be
399 * removed with roffnode_pop().
402 roffnode_push(struct roff
*r
, enum rofft tok
, const char *name
,
407 p
= mandoc_calloc(1, sizeof(struct roffnode
));
410 p
->name
= mandoc_strdup(name
);
414 p
->rule
= p
->parent
? p
->parent
->rule
: 0;
420 roff_free1(struct roff
*r
)
422 struct tbl_node
*tbl
;
426 while (NULL
!= (tbl
= r
->first_tbl
)) {
427 r
->first_tbl
= tbl
->next
;
430 r
->first_tbl
= r
->last_tbl
= r
->tbl
= NULL
;
432 while (NULL
!= (e
= r
->first_eqn
)) {
433 r
->first_eqn
= e
->next
;
436 r
->first_eqn
= r
->last_eqn
= r
->eqn
= NULL
;
446 roff_freereg(r
->regtab
);
449 roff_freestr(r
->strtab
);
450 roff_freestr(r
->xmbtab
);
451 r
->strtab
= r
->xmbtab
= NULL
;
454 for (i
= 0; i
< 128; i
++)
461 roff_reset(struct roff
*r
)
465 r
->format
= r
->options
& (MPARSE_MDOC
| MPARSE_MAN
);
470 roff_free(struct roff
*r
)
478 roff_alloc(struct mparse
*parse
, const struct mchars
*mchars
, int options
)
482 r
= mandoc_calloc(1, sizeof(struct roff
));
485 r
->options
= options
;
486 r
->format
= options
& (MPARSE_MDOC
| MPARSE_MAN
);
495 * In the current line, expand escape sequences that tend to get
496 * used in numerical expressions and conditional requests.
497 * Also check the syntax of the remaining escape sequences.
500 roff_res(struct roff
*r
, struct buf
*buf
, int ln
, int pos
)
502 char ubuf
[24]; /* buffer to print the number */
503 const char *start
; /* start of the string to process */
504 char *stesc
; /* start of an escape sequence ('\\') */
505 const char *stnam
; /* start of the name, after "[(*" */
506 const char *cp
; /* end of the name, e.g. before ']' */
507 const char *res
; /* the string to be substituted */
508 char *nbuf
; /* new buffer to copy buf->buf to */
509 size_t maxl
; /* expected length of the escape name */
510 size_t naml
; /* actual length of the escape name */
511 enum mandoc_esc esc
; /* type of the escape sequence */
512 int inaml
; /* length returned from mandoc_escape() */
513 int expand_count
; /* to avoid infinite loops */
514 int npos
; /* position in numeric expression */
515 int arg_complete
; /* argument not interrupted by eol */
516 char term
; /* character terminating the escape */
519 start
= buf
->buf
+ pos
;
520 stesc
= strchr(start
, '\0') - 1;
521 while (stesc
-- > start
) {
523 /* Search backwards for the next backslash. */
528 /* If it is escaped, skip it. */
530 for (cp
= stesc
- 1; cp
>= start
; cp
--)
534 if ((stesc
- cp
) % 2 == 0) {
539 /* Decide whether to expand or to check only. */
556 esc
= mandoc_escape(&cp
, &stnam
, &inaml
);
557 if (esc
== ESCAPE_ERROR
||
558 (esc
== ESCAPE_SPECIAL
&&
559 mchars_spec2cp(r
->mchars
, stnam
, inaml
) < 0))
560 mandoc_vmsg(MANDOCERR_ESC_BAD
,
561 r
->parse
, ln
, (int)(stesc
- buf
->buf
),
562 "%.*s", (int)(cp
- stesc
), stesc
);
566 if (EXPAND_LIMIT
< ++expand_count
) {
567 mandoc_msg(MANDOCERR_ROFFLOOP
, r
->parse
,
568 ln
, (int)(stesc
- buf
->buf
), NULL
);
573 * The third character decides the length
574 * of the name of the string or register.
575 * Save a pointer to the name.
602 /* Advance to the end of the name. */
605 for (naml
= 0; maxl
== 0 || naml
< maxl
; naml
++, cp
++) {
607 mandoc_msg(MANDOCERR_ESC_BAD
, r
->parse
,
608 ln
, (int)(stesc
- buf
->buf
), stesc
);
612 if (maxl
== 0 && *cp
== term
) {
619 * Retrieve the replacement string; if it is
620 * undefined, resume searching for escapes.
626 res
= roff_getstrn(r
, stnam
, naml
);
630 ubuf
[0] = arg_complete
&&
631 roff_evalnum(r
, ln
, stnam
, &npos
, NULL
, 0) &&
632 stnam
+ npos
+ 1 == cp
? '1' : '0';
637 (void)snprintf(ubuf
, sizeof(ubuf
), "%d",
638 roff_getregn(r
, stnam
, naml
));
643 /* use even incomplete args */
644 (void)snprintf(ubuf
, sizeof(ubuf
), "%d",
650 mandoc_vmsg(MANDOCERR_STR_UNDEF
,
651 r
->parse
, ln
, (int)(stesc
- buf
->buf
),
652 "%.*s", (int)naml
, stnam
);
654 } else if (buf
->sz
+ strlen(res
) > SHRT_MAX
) {
655 mandoc_msg(MANDOCERR_ROFFLOOP
, r
->parse
,
656 ln
, (int)(stesc
- buf
->buf
), NULL
);
660 /* Replace the escape sequence by the string. */
663 buf
->sz
= mandoc_asprintf(&nbuf
, "%s%s%s",
664 buf
->buf
, res
, cp
) + 1;
666 /* Prepare for the next replacement. */
669 stesc
= nbuf
+ (stesc
- buf
->buf
) + strlen(res
);
677 * Process text streams:
678 * Convert all breakable hyphens into ASCII_HYPH.
679 * Decrement and spring input line trap.
682 roff_parsetext(struct buf
*buf
, int pos
, int *offs
)
690 start
= p
= buf
->buf
+ pos
;
693 sz
= strcspn(p
, "-\\");
700 /* Skip over escapes. */
702 esc
= mandoc_escape((const char **)&p
, NULL
, NULL
);
703 if (esc
== ESCAPE_ERROR
)
706 } else if (p
== start
) {
711 if (isalpha((unsigned char)p
[-1]) &&
712 isalpha((unsigned char)p
[1]))
717 /* Spring the input line trap. */
718 if (roffit_lines
== 1) {
719 isz
= mandoc_asprintf(&p
, "%s\n.%s", buf
->buf
, roffit_macro
);
726 return(ROFF_REPARSE
);
727 } else if (roffit_lines
> 1)
733 roff_parseln(struct roff
*r
, int ln
, struct buf
*buf
, int *offs
)
737 int pos
; /* parse point */
738 int spos
; /* saved parse point for messages */
739 int ppos
; /* original offset in buf->buf */
740 int ctl
; /* macro line (boolean) */
744 /* Handle in-line equation delimiters. */
746 if (r
->tbl
== NULL
&&
747 r
->last_eqn
!= NULL
&& r
->last_eqn
->delim
&&
748 (r
->eqn
== NULL
|| r
->eqn_inline
)) {
749 e
= roff_eqndelim(r
, buf
, pos
);
750 if (e
== ROFF_REPARSE
)
752 assert(e
== ROFF_CONT
);
755 /* Expand some escape sequences. */
757 e
= roff_res(r
, buf
, ln
, pos
);
760 assert(e
== ROFF_CONT
);
762 ctl
= roff_getcontrol(r
, buf
->buf
, &pos
);
765 * First, if a scope is open and we're not a macro, pass the
766 * text through the macro's filter. If a scope isn't open and
767 * we're not a macro, just let it through.
768 * Finally, if there's an equation scope open, divert it into it
769 * no matter our state.
772 if (r
->last
&& ! ctl
) {
774 assert(roffs
[t
].text
);
775 e
= (*roffs
[t
].text
)(r
, t
, buf
, ln
, pos
, pos
, offs
);
776 assert(e
== ROFF_IGN
|| e
== ROFF_CONT
);
781 return(eqn_read(&r
->eqn
, ln
, buf
->buf
, ppos
, offs
));
784 return(tbl_read(r
->tbl
, ln
, buf
->buf
, pos
));
785 return(roff_parsetext(buf
, pos
, offs
));
788 /* Skip empty request lines. */
790 if (buf
->buf
[pos
] == '"') {
791 mandoc_msg(MANDOCERR_COMMENT_BAD
, r
->parse
,
794 } else if (buf
->buf
[pos
] == '\0')
798 * If a scope is open, go to the child handler for that macro,
799 * as it may want to preprocess before doing anything with it.
800 * Don't do so if an equation is open.
805 assert(roffs
[t
].sub
);
806 return((*roffs
[t
].sub
)(r
, t
, buf
, ln
, ppos
, pos
, offs
));
809 /* No scope is open. This is a new request or macro. */
812 t
= roff_parse(r
, buf
->buf
, &pos
, ln
, ppos
);
814 /* Tables ignore most macros. */
816 if (r
->tbl
!= NULL
&& (t
== ROFF_MAX
|| t
== ROFF_TS
)) {
817 mandoc_msg(MANDOCERR_TBLMACRO
, r
->parse
,
818 ln
, pos
, buf
->buf
+ spos
);
823 * This is neither a roff request nor a user-defined macro.
824 * Let the standard macro set parsers handle it.
830 /* Execute a roff request or a user defined macro. */
832 assert(roffs
[t
].proc
);
833 return((*roffs
[t
].proc
)(r
, t
, buf
, ln
, ppos
, pos
, offs
));
837 roff_endparse(struct roff
*r
)
841 mandoc_msg(MANDOCERR_BLK_NOEND
, r
->parse
,
842 r
->last
->line
, r
->last
->col
,
843 roffs
[r
->last
->tok
].name
);
846 mandoc_msg(MANDOCERR_BLK_NOEND
, r
->parse
,
847 r
->eqn
->eqn
.ln
, r
->eqn
->eqn
.pos
, "EQ");
852 mandoc_msg(MANDOCERR_BLK_NOEND
, r
->parse
,
853 r
->tbl
->line
, r
->tbl
->pos
, "TS");
859 * Parse a roff node's type from the input buffer. This must be in the
860 * form of ".foo xxx" in the usual way.
863 roff_parse(struct roff
*r
, char *buf
, int *pos
, int ln
, int ppos
)
872 if ('\0' == *cp
|| '"' == *cp
|| '\t' == *cp
|| ' ' == *cp
)
876 maclen
= roff_getname(r
, &cp
, ln
, ppos
);
878 t
= (r
->current_string
= roff_getstrn(r
, mac
, maclen
))
879 ? ROFF_USERDEF
: roffhash_find(mac
, maclen
);
888 roff_cblock(ROFF_ARGS
)
892 * A block-close `..' should only be invoked as a child of an
893 * ignore macro, otherwise raise a warning and just ignore it.
896 if (r
->last
== NULL
) {
897 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, r
->parse
,
902 switch (r
->last
->tok
) {
904 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
909 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
916 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, r
->parse
,
921 if (buf
->buf
[pos
] != '\0')
922 mandoc_vmsg(MANDOCERR_ARG_SKIP
, r
->parse
, ln
, pos
,
923 ".. %s", buf
->buf
+ pos
);
926 roffnode_cleanscope(r
);
932 roffnode_cleanscope(struct roff
*r
)
936 if (--r
->last
->endspan
!= 0)
943 roff_ccond(struct roff
*r
, int ln
, int ppos
)
946 if (NULL
== r
->last
) {
947 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, r
->parse
,
952 switch (r
->last
->tok
) {
960 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, r
->parse
,
965 if (r
->last
->endspan
> -1) {
966 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, r
->parse
,
972 roffnode_cleanscope(r
);
977 roff_block(ROFF_ARGS
)
983 /* Ignore groff compatibility mode for now. */
987 else if (tok
== ROFF_am1
)
990 /* Parse the macro name argument. */
993 if (tok
== ROFF_ig
) {
998 namesz
= roff_getname(r
, &cp
, ln
, ppos
);
999 iname
[namesz
] = '\0';
1002 /* Resolve the macro name argument if it is indirect. */
1004 if (namesz
&& (tok
== ROFF_dei
|| tok
== ROFF_ami
)) {
1005 if ((name
= roff_getstrn(r
, iname
, namesz
)) == NULL
) {
1006 mandoc_vmsg(MANDOCERR_STR_UNDEF
,
1007 r
->parse
, ln
, (int)(iname
- buf
->buf
),
1008 "%.*s", (int)namesz
, iname
);
1011 namesz
= strlen(name
);
1015 if (namesz
== 0 && tok
!= ROFF_ig
) {
1016 mandoc_msg(MANDOCERR_REQ_EMPTY
, r
->parse
,
1017 ln
, ppos
, roffs
[tok
].name
);
1021 roffnode_push(r
, tok
, name
, ln
, ppos
);
1024 * At the beginning of a `de' macro, clear the existing string
1025 * with the same name, if there is one. New content will be
1026 * appended from roff_block_text() in multiline mode.
1029 if (tok
== ROFF_de
|| tok
== ROFF_dei
)
1030 roff_setstrn(&r
->strtab
, name
, namesz
, "", 0, 0);
1035 /* Get the custom end marker. */
1038 namesz
= roff_getname(r
, &cp
, ln
, ppos
);
1040 /* Resolve the end marker if it is indirect. */
1042 if (namesz
&& (tok
== ROFF_dei
|| tok
== ROFF_ami
)) {
1043 if ((name
= roff_getstrn(r
, iname
, namesz
)) == NULL
) {
1044 mandoc_vmsg(MANDOCERR_STR_UNDEF
,
1045 r
->parse
, ln
, (int)(iname
- buf
->buf
),
1046 "%.*s", (int)namesz
, iname
);
1049 namesz
= strlen(name
);
1054 r
->last
->end
= mandoc_strndup(name
, namesz
);
1057 mandoc_vmsg(MANDOCERR_ARG_EXCESS
, r
->parse
,
1058 ln
, pos
, ".%s ... %s", roffs
[tok
].name
, cp
);
1064 roff_block_sub(ROFF_ARGS
)
1070 * First check whether a custom macro exists at this level. If
1071 * it does, then check against it. This is some of groff's
1072 * stranger behaviours. If we encountered a custom end-scope
1073 * tag and that tag also happens to be a "real" macro, then we
1074 * need to try interpreting it again as a real macro. If it's
1075 * not, then return ignore. Else continue.
1079 for (i
= pos
, j
= 0; r
->last
->end
[j
]; j
++, i
++)
1080 if (buf
->buf
[i
] != r
->last
->end
[j
])
1083 if (r
->last
->end
[j
] == '\0' &&
1084 (buf
->buf
[i
] == '\0' ||
1085 buf
->buf
[i
] == ' ' ||
1086 buf
->buf
[i
] == '\t')) {
1088 roffnode_cleanscope(r
);
1090 while (buf
->buf
[i
] == ' ' || buf
->buf
[i
] == '\t')
1094 if (roff_parse(r
, buf
->buf
, &pos
, ln
, ppos
) !=
1102 * If we have no custom end-query or lookup failed, then try
1103 * pulling it out of the hashtable.
1106 t
= roff_parse(r
, buf
->buf
, &pos
, ln
, ppos
);
1108 if (t
!= ROFF_cblock
) {
1110 roff_setstr(r
, r
->last
->name
, buf
->buf
+ ppos
, 2);
1114 assert(roffs
[t
].proc
);
1115 return((*roffs
[t
].proc
)(r
, t
, buf
, ln
, ppos
, pos
, offs
));
1119 roff_block_text(ROFF_ARGS
)
1123 roff_setstr(r
, r
->last
->name
, buf
->buf
+ pos
, 2);
1129 roff_cond_sub(ROFF_ARGS
)
1136 roffnode_cleanscope(r
);
1137 t
= roff_parse(r
, buf
->buf
, &pos
, ln
, ppos
);
1140 * Fully handle known macros when they are structurally
1141 * required or when the conditional evaluated to true.
1144 if ((t
!= ROFF_MAX
) &&
1145 (rr
|| roffs
[t
].flags
& ROFFMAC_STRUCT
)) {
1146 assert(roffs
[t
].proc
);
1147 return((*roffs
[t
].proc
)(r
, t
, buf
, ln
, ppos
, pos
, offs
));
1151 * If `\}' occurs on a macro line without a preceding macro,
1152 * drop the line completely.
1155 ep
= buf
->buf
+ pos
;
1156 if (ep
[0] == '\\' && ep
[1] == '}')
1159 /* Always check for the closing delimiter `\}'. */
1161 while ((ep
= strchr(ep
, '\\')) != NULL
) {
1162 if (*(++ep
) == '}') {
1164 roff_ccond(r
, ln
, ep
- buf
->buf
- 1);
1168 return(rr
? ROFF_CONT
: ROFF_IGN
);
1172 roff_cond_text(ROFF_ARGS
)
1178 roffnode_cleanscope(r
);
1180 ep
= buf
->buf
+ pos
;
1181 while ((ep
= strchr(ep
, '\\')) != NULL
) {
1182 if (*(++ep
) == '}') {
1184 roff_ccond(r
, ln
, ep
- buf
->buf
- 1);
1188 return(rr
? ROFF_CONT
: ROFF_IGN
);
1192 * Parse a single signed integer number. Stop at the first non-digit.
1193 * If there is at least one digit, return success and advance the
1194 * parse point, else return failure and let the parse point unchanged.
1195 * Ignore overflows, treat them just like the C language.
1198 roff_getnum(const char *v
, int *pos
, int *res
)
1210 for (*res
= 0; isdigit((unsigned char)v
[p
]); p
++)
1211 *res
= 10 * *res
+ v
[p
] - '0';
1223 * Evaluate a string comparison condition.
1224 * The first character is the delimiter.
1225 * Succeed if the string up to its second occurrence
1226 * matches the string up to its third occurence.
1227 * Advance the cursor after the third occurrence
1228 * or lacking that, to the end of the line.
1231 roff_evalstrcond(const char *v
, int *pos
)
1233 const char *s1
, *s2
, *s3
;
1237 s1
= v
+ *pos
; /* initial delimiter */
1238 s2
= s1
+ 1; /* for scanning the first string */
1239 s3
= strchr(s2
, *s1
); /* for scanning the second string */
1241 if (NULL
== s3
) /* found no middle delimiter */
1244 while ('\0' != *++s3
) {
1245 if (*s2
!= *s3
) { /* mismatch */
1246 s3
= strchr(s3
, *s1
);
1249 if (*s3
== *s1
) { /* found the final delimiter */
1258 s3
= strchr(s2
, '\0');
1259 else if (*s3
!= '\0')
1266 * Evaluate an optionally negated single character, numerical,
1267 * or string condition.
1270 roff_evalcond(struct roff
*r
, int ln
, const char *v
, int *pos
)
1272 int number
, savepos
, wanttrue
;
1274 if ('!' == v
[*pos
]) {
1306 if (roff_evalnum(r
, ln
, v
, pos
, &number
, 0))
1307 return((number
> 0) == wanttrue
);
1308 else if (*pos
== savepos
)
1309 return(roff_evalstrcond(v
, pos
) == wanttrue
);
1315 roff_line_ignore(ROFF_ARGS
)
1322 roff_cond(ROFF_ARGS
)
1325 roffnode_push(r
, tok
, NULL
, ln
, ppos
);
1328 * An `.el' has no conditional body: it will consume the value
1329 * of the current rstack entry set in prior `ie' calls or
1332 * If we're not an `el', however, then evaluate the conditional.
1335 r
->last
->rule
= tok
== ROFF_el
?
1336 (r
->rstackpos
< 0 ? 0 : r
->rstack
[r
->rstackpos
--]) :
1337 roff_evalcond(r
, ln
, buf
->buf
, &pos
);
1340 * An if-else will put the NEGATION of the current evaluated
1341 * conditional into the stack of rules.
1344 if (tok
== ROFF_ie
) {
1345 if (r
->rstackpos
+ 1 == r
->rstacksz
) {
1347 r
->rstack
= mandoc_reallocarray(r
->rstack
,
1348 r
->rstacksz
, sizeof(int));
1350 r
->rstack
[++r
->rstackpos
] = !r
->last
->rule
;
1353 /* If the parent has false as its rule, then so do we. */
1355 if (r
->last
->parent
&& !r
->last
->parent
->rule
)
1360 * If there is nothing on the line after the conditional,
1361 * not even whitespace, use next-line scope.
1364 if (buf
->buf
[pos
] == '\0') {
1365 r
->last
->endspan
= 2;
1369 while (buf
->buf
[pos
] == ' ')
1372 /* An opening brace requests multiline scope. */
1374 if (buf
->buf
[pos
] == '\\' && buf
->buf
[pos
+ 1] == '{') {
1375 r
->last
->endspan
= -1;
1381 * Anything else following the conditional causes
1382 * single-line scope. Warn if the scope contains
1383 * nothing but trailing whitespace.
1386 if (buf
->buf
[pos
] == '\0')
1387 mandoc_msg(MANDOCERR_COND_EMPTY
, r
->parse
,
1388 ln
, ppos
, roffs
[tok
].name
);
1390 r
->last
->endspan
= 1;
1405 * The first word is the name of the string.
1406 * If it is empty or terminated by an escape sequence,
1407 * abort the `ds' request without defining anything.
1410 name
= string
= buf
->buf
+ pos
;
1414 namesz
= roff_getname(r
, &string
, ln
, pos
);
1415 if (name
[namesz
] == '\\')
1418 /* Read past the initial double-quote, if any. */
1422 /* The rest is the value. */
1423 roff_setstrn(&r
->strtab
, name
, namesz
, string
, strlen(string
),
1429 * Parse a single operator, one or two characters long.
1430 * If the operator is recognized, return success and advance the
1431 * parse point, else return failure and let the parse point unchanged.
1434 roff_getop(const char *v
, int *pos
, char *res
)
1455 switch (v
[*pos
+ 1]) {
1473 switch (v
[*pos
+ 1]) {
1487 if ('=' == v
[*pos
+ 1])
1499 * Evaluate either a parenthesized numeric expression
1500 * or a single signed integer number.
1503 roff_evalpar(struct roff
*r
, int ln
,
1504 const char *v
, int *pos
, int *res
)
1508 return(roff_getnum(v
, pos
, res
));
1511 if ( ! roff_evalnum(r
, ln
, v
, pos
, res
, 1))
1515 * Omission of the closing parenthesis
1516 * is an error in validation mode,
1517 * but ignored in evaluation mode.
1522 else if (NULL
== res
)
1529 * Evaluate a complete numeric expression.
1530 * Proceed left to right, there is no concept of precedence.
1533 roff_evalnum(struct roff
*r
, int ln
, const char *v
,
1534 int *pos
, int *res
, int skipwhite
)
1536 int mypos
, operand2
;
1545 while (isspace((unsigned char)v
[*pos
]))
1548 if ( ! roff_evalpar(r
, ln
, v
, pos
, res
))
1553 while (isspace((unsigned char)v
[*pos
]))
1556 if ( ! roff_getop(v
, pos
, &operator))
1560 while (isspace((unsigned char)v
[*pos
]))
1563 if ( ! roff_evalpar(r
, ln
, v
, pos
, &operand2
))
1567 while (isspace((unsigned char)v
[*pos
]))
1584 if (operand2
== 0) {
1585 mandoc_msg(MANDOCERR_DIVZERO
,
1586 r
->parse
, ln
, *pos
, v
);
1593 if (operand2
== 0) {
1594 mandoc_msg(MANDOCERR_DIVZERO
,
1595 r
->parse
, ln
, *pos
, v
);
1602 *res
= *res
< operand2
;
1605 *res
= *res
> operand2
;
1608 *res
= *res
<= operand2
;
1611 *res
= *res
>= operand2
;
1614 *res
= *res
== operand2
;
1617 *res
= *res
!= operand2
;
1620 *res
= *res
&& operand2
;
1623 *res
= *res
|| operand2
;
1626 if (operand2
< *res
)
1630 if (operand2
> *res
)
1641 roff_setreg(struct roff
*r
, const char *name
, int val
, char sign
)
1643 struct roffreg
*reg
;
1645 /* Search for an existing register with the same name. */
1648 while (reg
&& strcmp(name
, reg
->key
.p
))
1652 /* Create a new register. */
1653 reg
= mandoc_malloc(sizeof(struct roffreg
));
1654 reg
->key
.p
= mandoc_strdup(name
);
1655 reg
->key
.sz
= strlen(name
);
1657 reg
->next
= r
->regtab
;
1663 else if ('-' == sign
)
1670 * Handle some predefined read-only number registers.
1671 * For now, return -1 if the requested register is not predefined;
1672 * in case a predefined read-only register having the value -1
1673 * were to turn up, another special value would have to be chosen.
1676 roff_getregro(const char *name
)
1680 case 'A': /* ASCII approximation mode is always off. */
1682 case 'g': /* Groff compatibility mode is always on. */
1684 case 'H': /* Fixed horizontal resolution. */
1686 case 'j': /* Always adjust left margin only. */
1688 case 'T': /* Some output device is always defined. */
1690 case 'V': /* Fixed vertical resolution. */
1698 roff_getreg(const struct roff
*r
, const char *name
)
1700 struct roffreg
*reg
;
1703 if ('.' == name
[0] && '\0' != name
[1] && '\0' == name
[2]) {
1704 val
= roff_getregro(name
+ 1);
1709 for (reg
= r
->regtab
; reg
; reg
= reg
->next
)
1710 if (0 == strcmp(name
, reg
->key
.p
))
1717 roff_getregn(const struct roff
*r
, const char *name
, size_t len
)
1719 struct roffreg
*reg
;
1722 if ('.' == name
[0] && 2 == len
) {
1723 val
= roff_getregro(name
+ 1);
1728 for (reg
= r
->regtab
; reg
; reg
= reg
->next
)
1729 if (len
== reg
->key
.sz
&&
1730 0 == strncmp(name
, reg
->key
.p
, len
))
1737 roff_freereg(struct roffreg
*reg
)
1739 struct roffreg
*old_reg
;
1741 while (NULL
!= reg
) {
1757 key
= val
= buf
->buf
+ pos
;
1761 keysz
= roff_getname(r
, &val
, ln
, pos
);
1762 if (key
[keysz
] == '\\')
1767 if (sign
== '+' || sign
== '-')
1770 if (roff_evalnum(r
, ln
, val
, NULL
, &iv
, 0))
1771 roff_setreg(r
, key
, iv
, sign
);
1779 struct roffreg
*reg
, **prev
;
1783 name
= cp
= buf
->buf
+ pos
;
1786 namesz
= roff_getname(r
, &cp
, ln
, pos
);
1787 name
[namesz
] = '\0';
1792 if (reg
== NULL
|| !strcmp(name
, reg
->key
.p
))
1811 cp
= buf
->buf
+ pos
;
1812 while (*cp
!= '\0') {
1814 namesz
= roff_getname(r
, &cp
, ln
, (int)(cp
- buf
->buf
));
1815 roff_setstrn(&r
->strtab
, name
, namesz
, NULL
, 0, 0);
1816 if (name
[namesz
] == '\\')
1829 /* Parse the number of lines. */
1830 cp
= buf
->buf
+ pos
;
1831 len
= strcspn(cp
, " \t");
1833 if ((iv
= mandoc_strntoi(cp
, len
, 10)) <= 0) {
1834 mandoc_msg(MANDOCERR_IT_NONUM
, r
->parse
,
1835 ln
, ppos
, buf
->buf
+ 1);
1840 /* Arm the input line trap. */
1842 roffit_macro
= mandoc_strdup(cp
);
1849 const char *const *cp
;
1851 if ((r
->options
& (MPARSE_MDOC
| MPARSE_QUICK
)) == 0)
1852 for (cp
= __mdoc_reserved
; *cp
; cp
++)
1853 roff_setstr(r
, *cp
, NULL
, 0);
1856 r
->format
= MPARSE_MDOC
;
1864 const char *const *cp
;
1866 if ((r
->options
& MPARSE_QUICK
) == 0)
1867 for (cp
= __man_reserved
; *cp
; cp
++)
1868 roff_setstr(r
, *cp
, NULL
, 0);
1871 r
->format
= MPARSE_MAN
;
1881 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, r
->parse
,
1894 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, r
->parse
,
1897 tbl_restart(ppos
, ln
, r
->tbl
);
1903 * Handle in-line equation delimiters.
1906 roff_eqndelim(struct roff
*r
, struct buf
*buf
, int pos
)
1909 const char *bef_pr
, *bef_nl
, *mac
, *aft_nl
, *aft_pr
;
1912 * Outside equations, look for an opening delimiter.
1913 * If we are inside an equation, we already know it is
1914 * in-line, or this function wouldn't have been called;
1915 * so look for a closing delimiter.
1918 cp1
= buf
->buf
+ pos
;
1919 cp2
= strchr(cp1
, r
->eqn
== NULL
?
1920 r
->last_eqn
->odelim
: r
->last_eqn
->cdelim
);
1925 bef_pr
= bef_nl
= aft_nl
= aft_pr
= "";
1927 /* Handle preceding text, protecting whitespace. */
1929 if (*buf
->buf
!= '\0') {
1936 * Prepare replacing the delimiter with an equation macro
1937 * and drop leading white space from the equation.
1940 if (r
->eqn
== NULL
) {
1947 /* Handle following text, protecting whitespace. */
1955 /* Do the actual replacement. */
1957 buf
->sz
= mandoc_asprintf(&cp1
, "%s%s%s%s%s%s%s", buf
->buf
,
1958 bef_pr
, bef_nl
, mac
, aft_nl
, aft_pr
, cp2
) + 1;
1962 /* Toggle the in-line state of the eqn subsystem. */
1964 r
->eqn_inline
= r
->eqn
== NULL
;
1965 return(ROFF_REPARSE
);
1973 assert(r
->eqn
== NULL
);
1974 e
= eqn_alloc(ppos
, ln
, r
->parse
);
1977 r
->last_eqn
->next
= e
;
1978 e
->delim
= r
->last_eqn
->delim
;
1979 e
->odelim
= r
->last_eqn
->odelim
;
1980 e
->cdelim
= r
->last_eqn
->cdelim
;
1982 r
->first_eqn
= r
->last_eqn
= e
;
1984 r
->eqn
= r
->last_eqn
= e
;
1986 if (buf
->buf
[pos
] != '\0')
1987 mandoc_vmsg(MANDOCERR_ARG_SKIP
, r
->parse
, ln
, pos
,
1988 ".EQ %s", buf
->buf
+ pos
);
1997 mandoc_msg(MANDOCERR_BLK_NOTOPEN
, r
->parse
, ln
, ppos
, "EN");
2004 struct tbl_node
*tbl
;
2007 mandoc_msg(MANDOCERR_BLK_BROKEN
, r
->parse
,
2008 ln
, ppos
, "TS breaks TS");
2012 tbl
= tbl_alloc(ppos
, ln
, r
->parse
);
2015 r
->last_tbl
->next
= tbl
;
2017 r
->first_tbl
= r
->last_tbl
= tbl
;
2019 r
->tbl
= r
->last_tbl
= tbl
;
2030 if (*p
== '\0' || (r
->control
= *p
++) == '.')
2034 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
, ln
, ppos
, NULL
);
2042 const char *p
, *first
, *second
;
2044 enum mandoc_esc esc
;
2049 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
, ln
, ppos
, NULL
);
2053 while (*p
!= '\0') {
2057 if (*first
== '\\') {
2058 esc
= mandoc_escape(&p
, NULL
, NULL
);
2059 if (esc
== ESCAPE_ERROR
) {
2060 mandoc_msg(MANDOCERR_ESC_BAD
, r
->parse
,
2061 ln
, (int)(p
- buf
->buf
), first
);
2064 fsz
= (size_t)(p
- first
);
2068 if (*second
== '\\') {
2069 esc
= mandoc_escape(&p
, NULL
, NULL
);
2070 if (esc
== ESCAPE_ERROR
) {
2071 mandoc_msg(MANDOCERR_ESC_BAD
, r
->parse
,
2072 ln
, (int)(p
- buf
->buf
), second
);
2075 ssz
= (size_t)(p
- second
);
2076 } else if (*second
== '\0') {
2077 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
,
2078 ln
, (int)(p
- buf
->buf
), NULL
);
2084 roff_setstrn(&r
->xmbtab
, first
, fsz
,
2089 if (r
->xtab
== NULL
)
2090 r
->xtab
= mandoc_calloc(128,
2091 sizeof(struct roffstr
));
2093 free(r
->xtab
[(int)*first
].p
);
2094 r
->xtab
[(int)*first
].p
= mandoc_strndup(second
, ssz
);
2095 r
->xtab
[(int)*first
].sz
= ssz
;
2106 name
= buf
->buf
+ pos
;
2107 mandoc_vmsg(MANDOCERR_SO
, r
->parse
, ln
, ppos
, "so %s", name
);
2110 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
2111 * opening anything that's not in our cwd or anything beneath
2112 * it. Thus, explicitly disallow traversing up the file-system
2113 * or using absolute paths.
2116 if (*name
== '/' || strstr(name
, "../") || strstr(name
, "/..")) {
2117 mandoc_vmsg(MANDOCERR_SO_PATH
, r
->parse
, ln
, ppos
,
2127 roff_userdef(ROFF_ARGS
)
2134 * Collect pointers to macro argument strings
2135 * and NUL-terminate them.
2137 cp
= buf
->buf
+ pos
;
2138 for (i
= 0; i
< 9; i
++)
2139 arg
[i
] = *cp
== '\0' ? "" :
2140 mandoc_getarg(r
->parse
, &cp
, ln
, &pos
);
2143 * Expand macro arguments.
2146 n1
= cp
= mandoc_strdup(r
->current_string
);
2147 while ((cp
= strstr(cp
, "\\$")) != NULL
) {
2149 if (0 > i
|| 8 < i
) {
2150 /* Not an argument invocation. */
2155 buf
->sz
= mandoc_asprintf(&n2
, "%s%s%s",
2156 n1
, arg
[i
], cp
+ 3) + 1;
2157 cp
= n2
+ (cp
- n1
);
2163 * Replace the macro invocation
2164 * by the expanded macro.
2169 buf
->sz
= strlen(buf
->buf
) + 1;
2171 return(buf
->sz
> 1 && buf
->buf
[buf
->sz
- 2] == '\n' ?
2172 ROFF_REPARSE
: ROFF_APPEND
);
2176 roff_getname(struct roff
*r
, char **cpp
, int ln
, int pos
)
2185 /* Read until end of name and terminate it with NUL. */
2186 for (cp
= name
; 1; cp
++) {
2187 if ('\0' == *cp
|| ' ' == *cp
) {
2194 if ('{' == cp
[1] || '}' == cp
[1])
2199 mandoc_vmsg(MANDOCERR_NAMESC
, r
->parse
, ln
, pos
,
2200 "%.*s", (int)(cp
- name
+ 1), name
);
2201 mandoc_escape((const char **)&cp
, NULL
, NULL
);
2205 /* Read past spaces. */
2214 * Store *string into the user-defined string called *name.
2215 * To clear an existing entry, call with (*r, *name, NULL, 0).
2216 * append == 0: replace mode
2217 * append == 1: single-line append mode
2218 * append == 2: multiline append mode, append '\n' after each call
2221 roff_setstr(struct roff
*r
, const char *name
, const char *string
,
2225 roff_setstrn(&r
->strtab
, name
, strlen(name
), string
,
2226 string
? strlen(string
) : 0, append
);
2230 roff_setstrn(struct roffkv
**r
, const char *name
, size_t namesz
,
2231 const char *string
, size_t stringsz
, int append
)
2236 size_t oldch
, newch
;
2238 /* Search for an existing string with the same name. */
2241 while (n
&& (namesz
!= n
->key
.sz
||
2242 strncmp(n
->key
.p
, name
, namesz
)))
2246 /* Create a new string table entry. */
2247 n
= mandoc_malloc(sizeof(struct roffkv
));
2248 n
->key
.p
= mandoc_strndup(name
, namesz
);
2254 } else if (0 == append
) {
2264 * One additional byte for the '\n' in multiline mode,
2265 * and one for the terminating '\0'.
2267 newch
= stringsz
+ (1 < append
? 2u : 1u);
2269 if (NULL
== n
->val
.p
) {
2270 n
->val
.p
= mandoc_malloc(newch
);
2275 n
->val
.p
= mandoc_realloc(n
->val
.p
, oldch
+ newch
);
2278 /* Skip existing content in the destination buffer. */
2279 c
= n
->val
.p
+ (int)oldch
;
2281 /* Append new content to the destination buffer. */
2283 while (i
< (int)stringsz
) {
2285 * Rudimentary roff copy mode:
2286 * Handle escaped backslashes.
2288 if ('\\' == string
[i
] && '\\' == string
[i
+ 1])
2293 /* Append terminating bytes. */
2298 n
->val
.sz
= (int)(c
- n
->val
.p
);
2302 roff_getstrn(const struct roff
*r
, const char *name
, size_t len
)
2304 const struct roffkv
*n
;
2307 for (n
= r
->strtab
; n
; n
= n
->next
)
2308 if (0 == strncmp(name
, n
->key
.p
, len
) &&
2309 '\0' == n
->key
.p
[(int)len
])
2312 for (i
= 0; i
< PREDEFS_MAX
; i
++)
2313 if (0 == strncmp(name
, predefs
[i
].name
, len
) &&
2314 '\0' == predefs
[i
].name
[(int)len
])
2315 return(predefs
[i
].str
);
2321 roff_freestr(struct roffkv
*r
)
2323 struct roffkv
*n
, *nn
;
2325 for (n
= r
; n
; n
= nn
) {
2333 const struct tbl_span
*
2334 roff_span(const struct roff
*r
)
2337 return(r
->tbl
? tbl_span(r
->tbl
) : NULL
);
2341 roff_eqn(const struct roff
*r
)
2344 return(r
->last_eqn
? &r
->last_eqn
->eqn
: NULL
);
2348 * Duplicate an input string, making the appropriate character
2349 * conversations (as stipulated by `tr') along the way.
2350 * Returns a heap-allocated string with all the replacements made.
2353 roff_strdup(const struct roff
*r
, const char *p
)
2355 const struct roffkv
*cp
;
2359 enum mandoc_esc esc
;
2361 if (NULL
== r
->xmbtab
&& NULL
== r
->xtab
)
2362 return(mandoc_strdup(p
));
2363 else if ('\0' == *p
)
2364 return(mandoc_strdup(""));
2367 * Step through each character looking for term matches
2368 * (remember that a `tr' can be invoked with an escape, which is
2369 * a glyph but the escape is multi-character).
2370 * We only do this if the character hash has been initialised
2371 * and the string is >0 length.
2377 while ('\0' != *p
) {
2378 if ('\\' != *p
&& r
->xtab
&& r
->xtab
[(int)*p
].p
) {
2379 sz
= r
->xtab
[(int)*p
].sz
;
2380 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
2381 memcpy(res
+ ssz
, r
->xtab
[(int)*p
].p
, sz
);
2385 } else if ('\\' != *p
) {
2386 res
= mandoc_realloc(res
, ssz
+ 2);
2391 /* Search for term matches. */
2392 for (cp
= r
->xmbtab
; cp
; cp
= cp
->next
)
2393 if (0 == strncmp(p
, cp
->key
.p
, cp
->key
.sz
))
2398 * A match has been found.
2399 * Append the match to the array and move
2400 * forward by its keysize.
2402 res
= mandoc_realloc(res
,
2403 ssz
+ cp
->val
.sz
+ 1);
2404 memcpy(res
+ ssz
, cp
->val
.p
, cp
->val
.sz
);
2406 p
+= (int)cp
->key
.sz
;
2411 * Handle escapes carefully: we need to copy
2412 * over just the escape itself, or else we might
2413 * do replacements within the escape itself.
2414 * Make sure to pass along the bogus string.
2417 esc
= mandoc_escape(&p
, NULL
, NULL
);
2418 if (ESCAPE_ERROR
== esc
) {
2420 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
2421 memcpy(res
+ ssz
, pp
, sz
);
2425 * We bail out on bad escapes.
2426 * No need to warn: we already did so when
2427 * roff_res() was called.
2430 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
2431 memcpy(res
+ ssz
, pp
, sz
);
2435 res
[(int)ssz
] = '\0';
2440 roff_getformat(const struct roff
*r
)
2447 * Find out whether a line is a macro line or not.
2448 * If it is, adjust the current position and return one; if it isn't,
2449 * return zero and don't change the current position.
2450 * If the control character has been set with `.cc', then let that grain
2452 * This is slighly contrary to groff, where using the non-breaking
2453 * control character when `cc' has been invoked will cause the
2454 * non-breaking macro contents to be printed verbatim.
2457 roff_getcontrol(const struct roff
*r
, const char *cp
, int *ppos
)
2463 if (0 != r
->control
&& cp
[pos
] == r
->control
)
2465 else if (0 != r
->control
)
2467 else if ('\\' == cp
[pos
] && '.' == cp
[pos
+ 1])
2469 else if ('.' == cp
[pos
] || '\'' == cp
[pos
])
2474 while (' ' == cp
[pos
] || '\t' == cp
[pos
])