]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
Explicit block closure macros clobber next-line block head scope,
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.242 2014/12/16 03:53:43 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 #include "mandoc.h"
29 #include "mandoc_aux.h"
30 #include "libmandoc.h"
31 #include "libroff.h"
32
33 /* Maximum number of nested if-else conditionals. */
34 #define RSTACK_MAX 128
35
36 /* Maximum number of string expansions per line, to break infinite loops. */
37 #define EXPAND_LIMIT 1000
38
39 enum rofft {
40 ROFF_ad,
41 ROFF_am,
42 ROFF_ami,
43 ROFF_am1,
44 ROFF_as,
45 ROFF_cc,
46 ROFF_ce,
47 ROFF_de,
48 ROFF_dei,
49 ROFF_de1,
50 ROFF_ds,
51 ROFF_el,
52 ROFF_fam,
53 ROFF_hw,
54 ROFF_hy,
55 ROFF_ie,
56 ROFF_if,
57 ROFF_ig,
58 ROFF_it,
59 ROFF_ne,
60 ROFF_nh,
61 ROFF_nr,
62 ROFF_ns,
63 ROFF_pl,
64 ROFF_ps,
65 ROFF_rm,
66 ROFF_rr,
67 ROFF_so,
68 ROFF_ta,
69 ROFF_tr,
70 ROFF_Dd,
71 ROFF_TH,
72 ROFF_TS,
73 ROFF_TE,
74 ROFF_T_,
75 ROFF_EQ,
76 ROFF_EN,
77 ROFF_cblock,
78 ROFF_USERDEF,
79 ROFF_MAX
80 };
81
82 /*
83 * An incredibly-simple string buffer.
84 */
85 struct roffstr {
86 char *p; /* nil-terminated buffer */
87 size_t sz; /* saved strlen(p) */
88 };
89
90 /*
91 * A key-value roffstr pair as part of a singly-linked list.
92 */
93 struct roffkv {
94 struct roffstr key;
95 struct roffstr val;
96 struct roffkv *next; /* next in list */
97 };
98
99 /*
100 * A single number register as part of a singly-linked list.
101 */
102 struct roffreg {
103 struct roffstr key;
104 int val;
105 struct roffreg *next;
106 };
107
108 struct roff {
109 struct mparse *parse; /* parse point */
110 const struct mchars *mchars; /* character table */
111 struct roffnode *last; /* leaf of stack */
112 int *rstack; /* stack of inverted `ie' values */
113 struct roffreg *regtab; /* number registers */
114 struct roffkv *strtab; /* user-defined strings & macros */
115 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
116 struct roffstr *xtab; /* single-byte trans table (`tr') */
117 const char *current_string; /* value of last called user macro */
118 struct tbl_node *first_tbl; /* first table parsed */
119 struct tbl_node *last_tbl; /* last table parsed */
120 struct tbl_node *tbl; /* current table being parsed */
121 struct eqn_node *last_eqn; /* last equation parsed */
122 struct eqn_node *first_eqn; /* first equation parsed */
123 struct eqn_node *eqn; /* current equation being parsed */
124 int eqn_inline; /* current equation is inline */
125 int options; /* parse options */
126 int rstacksz; /* current size limit of rstack */
127 int rstackpos; /* position in rstack */
128 int format; /* current file in mdoc or man format */
129 char control; /* control character */
130 };
131
132 struct roffnode {
133 enum rofft tok; /* type of node */
134 struct roffnode *parent; /* up one in stack */
135 int line; /* parse line */
136 int col; /* parse col */
137 char *name; /* node name, e.g. macro name */
138 char *end; /* end-rules: custom token */
139 int endspan; /* end-rules: next-line or infty */
140 int rule; /* current evaluation rule */
141 };
142
143 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
144 enum rofft tok, /* tok of macro */ \
145 struct buf *buf, /* input buffer */ \
146 int ln, /* parse line */ \
147 int ppos, /* original pos in buffer */ \
148 int pos, /* current pos in buffer */ \
149 int *offs /* reset offset of buffer data */
150
151 typedef enum rofferr (*roffproc)(ROFF_ARGS);
152
153 struct roffmac {
154 const char *name; /* macro name */
155 roffproc proc; /* process new macro */
156 roffproc text; /* process as child text of macro */
157 roffproc sub; /* process as child of macro */
158 int flags;
159 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
160 struct roffmac *next;
161 };
162
163 struct predef {
164 const char *name; /* predefined input name */
165 const char *str; /* replacement symbol */
166 };
167
168 #define PREDEF(__name, __str) \
169 { (__name), (__str) },
170
171 static enum rofft roffhash_find(const char *, size_t);
172 static void roffhash_init(void);
173 static void roffnode_cleanscope(struct roff *);
174 static void roffnode_pop(struct roff *);
175 static void roffnode_push(struct roff *, enum rofft,
176 const char *, int, int);
177 static enum rofferr roff_block(ROFF_ARGS);
178 static enum rofferr roff_block_text(ROFF_ARGS);
179 static enum rofferr roff_block_sub(ROFF_ARGS);
180 static enum rofferr roff_cblock(ROFF_ARGS);
181 static enum rofferr roff_cc(ROFF_ARGS);
182 static void roff_ccond(struct roff *, int, int);
183 static enum rofferr roff_cond(ROFF_ARGS);
184 static enum rofferr roff_cond_text(ROFF_ARGS);
185 static enum rofferr roff_cond_sub(ROFF_ARGS);
186 static enum rofferr roff_ds(ROFF_ARGS);
187 static enum rofferr roff_eqndelim(struct roff *, struct buf *, int);
188 static int roff_evalcond(struct roff *r, int,
189 const char *, int *);
190 static int roff_evalnum(struct roff *, int,
191 const char *, int *, int *, int);
192 static int roff_evalpar(struct roff *, int,
193 const char *, int *, int *);
194 static int roff_evalstrcond(const char *, int *);
195 static void roff_free1(struct roff *);
196 static void roff_freereg(struct roffreg *);
197 static void roff_freestr(struct roffkv *);
198 static size_t roff_getname(struct roff *, char **, int, int);
199 static int roff_getnum(const char *, int *, int *);
200 static int roff_getop(const char *, int *, char *);
201 static int roff_getregn(const struct roff *,
202 const char *, size_t);
203 static int roff_getregro(const char *name);
204 static const char *roff_getstrn(const struct roff *,
205 const char *, size_t);
206 static enum rofferr roff_it(ROFF_ARGS);
207 static enum rofferr roff_line_ignore(ROFF_ARGS);
208 static enum rofferr roff_nr(ROFF_ARGS);
209 static enum rofft roff_parse(struct roff *, char *, int *,
210 int, int);
211 static enum rofferr roff_parsetext(struct buf *, int, int *);
212 static enum rofferr roff_res(struct roff *, struct buf *, int, int);
213 static enum rofferr roff_rm(ROFF_ARGS);
214 static enum rofferr roff_rr(ROFF_ARGS);
215 static void roff_setstr(struct roff *,
216 const char *, const char *, int);
217 static void roff_setstrn(struct roffkv **, const char *,
218 size_t, const char *, size_t, int);
219 static enum rofferr roff_so(ROFF_ARGS);
220 static enum rofferr roff_tr(ROFF_ARGS);
221 static enum rofferr roff_Dd(ROFF_ARGS);
222 static enum rofferr roff_TH(ROFF_ARGS);
223 static enum rofferr roff_TE(ROFF_ARGS);
224 static enum rofferr roff_TS(ROFF_ARGS);
225 static enum rofferr roff_EQ(ROFF_ARGS);
226 static enum rofferr roff_EN(ROFF_ARGS);
227 static enum rofferr roff_T_(ROFF_ARGS);
228 static enum rofferr roff_userdef(ROFF_ARGS);
229
230 /* See roffhash_find() */
231
232 #define ASCII_HI 126
233 #define ASCII_LO 33
234 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
235
236 static struct roffmac *hash[HASHWIDTH];
237
238 static struct roffmac roffs[ROFF_MAX] = {
239 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
240 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
241 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
242 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
243 { "as", roff_ds, NULL, NULL, 0, NULL },
244 { "cc", roff_cc, NULL, NULL, 0, NULL },
245 { "ce", roff_line_ignore, NULL, NULL, 0, NULL },
246 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
247 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
248 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
249 { "ds", roff_ds, NULL, NULL, 0, NULL },
250 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
251 { "fam", roff_line_ignore, NULL, NULL, 0, NULL },
252 { "hw", roff_line_ignore, NULL, NULL, 0, NULL },
253 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
254 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
255 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
256 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
257 { "it", roff_it, NULL, NULL, 0, NULL },
258 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
259 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
260 { "nr", roff_nr, NULL, NULL, 0, NULL },
261 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
262 { "pl", roff_line_ignore, NULL, NULL, 0, NULL },
263 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
264 { "rm", roff_rm, NULL, NULL, 0, NULL },
265 { "rr", roff_rr, NULL, NULL, 0, NULL },
266 { "so", roff_so, NULL, NULL, 0, NULL },
267 { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
268 { "tr", roff_tr, NULL, NULL, 0, NULL },
269 { "Dd", roff_Dd, NULL, NULL, 0, NULL },
270 { "TH", roff_TH, NULL, NULL, 0, NULL },
271 { "TS", roff_TS, NULL, NULL, 0, NULL },
272 { "TE", roff_TE, NULL, NULL, 0, NULL },
273 { "T&", roff_T_, NULL, NULL, 0, NULL },
274 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
275 { "EN", roff_EN, NULL, NULL, 0, NULL },
276 { ".", roff_cblock, NULL, NULL, 0, NULL },
277 { NULL, roff_userdef, NULL, NULL, 0, NULL },
278 };
279
280 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
281 const char *const __mdoc_reserved[] = {
282 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
283 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
284 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
285 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
286 "Dt", "Dv", "Dx", "D1",
287 "Ec", "Ed", "Ef", "Ek", "El", "Em",
288 "En", "Eo", "Er", "Es", "Ev", "Ex",
289 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
290 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
291 "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
292 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
293 "Pa", "Pc", "Pf", "Po", "Pp", "Pq",
294 "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
295 "Sc", "Sh", "Sm", "So", "Sq",
296 "Ss", "St", "Sx", "Sy",
297 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
298 "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
299 "%P", "%Q", "%R", "%T", "%U", "%V",
300 NULL
301 };
302
303 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
304 const char *const __man_reserved[] = {
305 "AT", "B", "BI", "BR", "DT",
306 "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
307 "LP", "OP", "P", "PD", "PP",
308 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
309 "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
310 NULL
311 };
312
313 /* Array of injected predefined strings. */
314 #define PREDEFS_MAX 38
315 static const struct predef predefs[PREDEFS_MAX] = {
316 #include "predefs.in"
317 };
318
319 /* See roffhash_find() */
320 #define ROFF_HASH(p) (p[0] - ASCII_LO)
321
322 static int roffit_lines; /* number of lines to delay */
323 static char *roffit_macro; /* nil-terminated macro line */
324
325
326 static void
327 roffhash_init(void)
328 {
329 struct roffmac *n;
330 int buc, i;
331
332 for (i = 0; i < (int)ROFF_USERDEF; i++) {
333 assert(roffs[i].name[0] >= ASCII_LO);
334 assert(roffs[i].name[0] <= ASCII_HI);
335
336 buc = ROFF_HASH(roffs[i].name);
337
338 if (NULL != (n = hash[buc])) {
339 for ( ; n->next; n = n->next)
340 /* Do nothing. */ ;
341 n->next = &roffs[i];
342 } else
343 hash[buc] = &roffs[i];
344 }
345 }
346
347 /*
348 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
349 * the nil-terminated string name could be found.
350 */
351 static enum rofft
352 roffhash_find(const char *p, size_t s)
353 {
354 int buc;
355 struct roffmac *n;
356
357 /*
358 * libroff has an extremely simple hashtable, for the time
359 * being, which simply keys on the first character, which must
360 * be printable, then walks a chain. It works well enough until
361 * optimised.
362 */
363
364 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
365 return(ROFF_MAX);
366
367 buc = ROFF_HASH(p);
368
369 if (NULL == (n = hash[buc]))
370 return(ROFF_MAX);
371 for ( ; n; n = n->next)
372 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
373 return((enum rofft)(n - roffs));
374
375 return(ROFF_MAX);
376 }
377
378 /*
379 * Pop the current node off of the stack of roff instructions currently
380 * pending.
381 */
382 static void
383 roffnode_pop(struct roff *r)
384 {
385 struct roffnode *p;
386
387 assert(r->last);
388 p = r->last;
389
390 r->last = r->last->parent;
391 free(p->name);
392 free(p->end);
393 free(p);
394 }
395
396 /*
397 * Push a roff node onto the instruction stack. This must later be
398 * removed with roffnode_pop().
399 */
400 static void
401 roffnode_push(struct roff *r, enum rofft tok, const char *name,
402 int line, int col)
403 {
404 struct roffnode *p;
405
406 p = mandoc_calloc(1, sizeof(struct roffnode));
407 p->tok = tok;
408 if (name)
409 p->name = mandoc_strdup(name);
410 p->parent = r->last;
411 p->line = line;
412 p->col = col;
413 p->rule = p->parent ? p->parent->rule : 0;
414
415 r->last = p;
416 }
417
418 static void
419 roff_free1(struct roff *r)
420 {
421 struct tbl_node *tbl;
422 struct eqn_node *e;
423 int i;
424
425 while (NULL != (tbl = r->first_tbl)) {
426 r->first_tbl = tbl->next;
427 tbl_free(tbl);
428 }
429 r->first_tbl = r->last_tbl = r->tbl = NULL;
430
431 while (NULL != (e = r->first_eqn)) {
432 r->first_eqn = e->next;
433 eqn_free(e);
434 }
435 r->first_eqn = r->last_eqn = r->eqn = NULL;
436
437 while (r->last)
438 roffnode_pop(r);
439
440 free (r->rstack);
441 r->rstack = NULL;
442 r->rstacksz = 0;
443 r->rstackpos = -1;
444
445 roff_freereg(r->regtab);
446 r->regtab = NULL;
447
448 roff_freestr(r->strtab);
449 roff_freestr(r->xmbtab);
450 r->strtab = r->xmbtab = NULL;
451
452 if (r->xtab)
453 for (i = 0; i < 128; i++)
454 free(r->xtab[i].p);
455 free(r->xtab);
456 r->xtab = NULL;
457 }
458
459 void
460 roff_reset(struct roff *r)
461 {
462
463 roff_free1(r);
464 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
465 r->control = 0;
466 }
467
468 void
469 roff_free(struct roff *r)
470 {
471
472 roff_free1(r);
473 free(r);
474 }
475
476 struct roff *
477 roff_alloc(struct mparse *parse, const struct mchars *mchars, int options)
478 {
479 struct roff *r;
480
481 r = mandoc_calloc(1, sizeof(struct roff));
482 r->parse = parse;
483 r->mchars = mchars;
484 r->options = options;
485 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
486 r->rstackpos = -1;
487
488 roffhash_init();
489
490 return(r);
491 }
492
493 /*
494 * In the current line, expand escape sequences that tend to get
495 * used in numerical expressions and conditional requests.
496 * Also check the syntax of the remaining escape sequences.
497 */
498 static enum rofferr
499 roff_res(struct roff *r, struct buf *buf, int ln, int pos)
500 {
501 char ubuf[24]; /* buffer to print the number */
502 const char *start; /* start of the string to process */
503 char *stesc; /* start of an escape sequence ('\\') */
504 const char *stnam; /* start of the name, after "[(*" */
505 const char *cp; /* end of the name, e.g. before ']' */
506 const char *res; /* the string to be substituted */
507 char *nbuf; /* new buffer to copy buf->buf to */
508 size_t maxl; /* expected length of the escape name */
509 size_t naml; /* actual length of the escape name */
510 enum mandoc_esc esc; /* type of the escape sequence */
511 int inaml; /* length returned from mandoc_escape() */
512 int expand_count; /* to avoid infinite loops */
513 int npos; /* position in numeric expression */
514 int arg_complete; /* argument not interrupted by eol */
515 char term; /* character terminating the escape */
516
517 expand_count = 0;
518 start = buf->buf + pos;
519 stesc = strchr(start, '\0') - 1;
520 while (stesc-- > start) {
521
522 /* Search backwards for the next backslash. */
523
524 if (*stesc != '\\')
525 continue;
526
527 /* If it is escaped, skip it. */
528
529 for (cp = stesc - 1; cp >= start; cp--)
530 if (*cp != '\\')
531 break;
532
533 if ((stesc - cp) % 2 == 0) {
534 stesc = (char *)cp;
535 continue;
536 }
537
538 /* Decide whether to expand or to check only. */
539
540 term = '\0';
541 cp = stesc + 1;
542 switch (*cp) {
543 case '*':
544 res = NULL;
545 break;
546 case 'B':
547 /* FALLTHROUGH */
548 case 'w':
549 term = cp[1];
550 /* FALLTHROUGH */
551 case 'n':
552 res = ubuf;
553 break;
554 default:
555 esc = mandoc_escape(&cp, &stnam, &inaml);
556 if (esc == ESCAPE_ERROR ||
557 (esc == ESCAPE_SPECIAL &&
558 mchars_spec2cp(r->mchars, stnam, inaml) < 0))
559 mandoc_vmsg(MANDOCERR_ESC_BAD,
560 r->parse, ln, (int)(stesc - buf->buf),
561 "%.*s", (int)(cp - stesc), stesc);
562 continue;
563 }
564
565 if (EXPAND_LIMIT < ++expand_count) {
566 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
567 ln, (int)(stesc - buf->buf), NULL);
568 return(ROFF_IGN);
569 }
570
571 /*
572 * The third character decides the length
573 * of the name of the string or register.
574 * Save a pointer to the name.
575 */
576
577 if (term == '\0') {
578 switch (*++cp) {
579 case '\0':
580 maxl = 0;
581 break;
582 case '(':
583 cp++;
584 maxl = 2;
585 break;
586 case '[':
587 cp++;
588 term = ']';
589 maxl = 0;
590 break;
591 default:
592 maxl = 1;
593 break;
594 }
595 } else {
596 cp += 2;
597 maxl = 0;
598 }
599 stnam = cp;
600
601 /* Advance to the end of the name. */
602
603 arg_complete = 1;
604 for (naml = 0; maxl == 0 || naml < maxl; naml++, cp++) {
605 if (*cp == '\0') {
606 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
607 ln, (int)(stesc - buf->buf), stesc);
608 arg_complete = 0;
609 break;
610 }
611 if (maxl == 0 && *cp == term) {
612 cp++;
613 break;
614 }
615 }
616
617 /*
618 * Retrieve the replacement string; if it is
619 * undefined, resume searching for escapes.
620 */
621
622 switch (stesc[1]) {
623 case '*':
624 if (arg_complete)
625 res = roff_getstrn(r, stnam, naml);
626 break;
627 case 'B':
628 npos = 0;
629 ubuf[0] = arg_complete &&
630 roff_evalnum(r, ln, stnam, &npos, NULL, 0) &&
631 stnam + npos + 1 == cp ? '1' : '0';
632 ubuf[1] = '\0';
633 break;
634 case 'n':
635 if (arg_complete)
636 (void)snprintf(ubuf, sizeof(ubuf), "%d",
637 roff_getregn(r, stnam, naml));
638 else
639 ubuf[0] = '\0';
640 break;
641 case 'w':
642 /* use even incomplete args */
643 (void)snprintf(ubuf, sizeof(ubuf), "%d",
644 24 * (int)naml);
645 break;
646 }
647
648 if (res == NULL) {
649 mandoc_vmsg(MANDOCERR_STR_UNDEF,
650 r->parse, ln, (int)(stesc - buf->buf),
651 "%.*s", (int)naml, stnam);
652 res = "";
653 }
654
655 /* Replace the escape sequence by the string. */
656
657 *stesc = '\0';
658 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
659 buf->buf, res, cp) + 1;
660
661 /* Prepare for the next replacement. */
662
663 start = nbuf + pos;
664 stesc = nbuf + (stesc - buf->buf) + strlen(res);
665 free(buf->buf);
666 buf->buf = nbuf;
667 }
668 return(ROFF_CONT);
669 }
670
671 /*
672 * Process text streams:
673 * Convert all breakable hyphens into ASCII_HYPH.
674 * Decrement and spring input line trap.
675 */
676 static enum rofferr
677 roff_parsetext(struct buf *buf, int pos, int *offs)
678 {
679 size_t sz;
680 const char *start;
681 char *p;
682 int isz;
683 enum mandoc_esc esc;
684
685 start = p = buf->buf + pos;
686
687 while (*p != '\0') {
688 sz = strcspn(p, "-\\");
689 p += sz;
690
691 if (*p == '\0')
692 break;
693
694 if (*p == '\\') {
695 /* Skip over escapes. */
696 p++;
697 esc = mandoc_escape((const char **)&p, NULL, NULL);
698 if (esc == ESCAPE_ERROR)
699 break;
700 continue;
701 } else if (p == start) {
702 p++;
703 continue;
704 }
705
706 if (isalpha((unsigned char)p[-1]) &&
707 isalpha((unsigned char)p[1]))
708 *p = ASCII_HYPH;
709 p++;
710 }
711
712 /* Spring the input line trap. */
713 if (roffit_lines == 1) {
714 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
715 free(buf->buf);
716 buf->buf = p;
717 buf->sz = isz + 1;
718 *offs = 0;
719 free(roffit_macro);
720 roffit_lines = 0;
721 return(ROFF_REPARSE);
722 } else if (roffit_lines > 1)
723 --roffit_lines;
724 return(ROFF_CONT);
725 }
726
727 enum rofferr
728 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
729 {
730 enum rofft t;
731 enum rofferr e;
732 int pos; /* parse point */
733 int ppos; /* original offset in buf->buf */
734 int ctl; /* macro line (boolean) */
735
736 ppos = pos = *offs;
737
738 /* Handle in-line equation delimiters. */
739
740 if (r->tbl == NULL &&
741 r->last_eqn != NULL && r->last_eqn->delim &&
742 (r->eqn == NULL || r->eqn_inline)) {
743 e = roff_eqndelim(r, buf, pos);
744 if (e == ROFF_REPARSE)
745 return(e);
746 assert(e == ROFF_CONT);
747 }
748
749 /* Expand some escape sequences. */
750
751 e = roff_res(r, buf, ln, pos);
752 if (e == ROFF_IGN)
753 return(e);
754 assert(e == ROFF_CONT);
755
756 ctl = roff_getcontrol(r, buf->buf, &pos);
757
758 /*
759 * First, if a scope is open and we're not a macro, pass the
760 * text through the macro's filter. If a scope isn't open and
761 * we're not a macro, just let it through.
762 * Finally, if there's an equation scope open, divert it into it
763 * no matter our state.
764 */
765
766 if (r->last && ! ctl) {
767 t = r->last->tok;
768 assert(roffs[t].text);
769 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
770 assert(e == ROFF_IGN || e == ROFF_CONT);
771 if (e != ROFF_CONT)
772 return(e);
773 }
774 if (r->eqn)
775 return(eqn_read(&r->eqn, ln, buf->buf, ppos, offs));
776 if ( ! ctl) {
777 if (r->tbl)
778 return(tbl_read(r->tbl, ln, buf->buf, pos));
779 return(roff_parsetext(buf, pos, offs));
780 }
781
782 /* Skip empty request lines. */
783
784 if (buf->buf[pos] == '"') {
785 mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse,
786 ln, pos, NULL);
787 return(ROFF_IGN);
788 } else if (buf->buf[pos] == '\0')
789 return(ROFF_IGN);
790
791 /*
792 * If a scope is open, go to the child handler for that macro,
793 * as it may want to preprocess before doing anything with it.
794 * Don't do so if an equation is open.
795 */
796
797 if (r->last) {
798 t = r->last->tok;
799 assert(roffs[t].sub);
800 return((*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs));
801 }
802
803 /*
804 * Lastly, as we've no scope open, try to look up and execute
805 * the new macro. If no macro is found, simply return and let
806 * the compilers handle it.
807 */
808
809 if ((t = roff_parse(r, buf->buf, &pos, ln, ppos)) == ROFF_MAX)
810 return(ROFF_CONT);
811
812 assert(roffs[t].proc);
813 return((*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs));
814 }
815
816 void
817 roff_endparse(struct roff *r)
818 {
819
820 if (r->last)
821 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
822 r->last->line, r->last->col,
823 roffs[r->last->tok].name);
824
825 if (r->eqn) {
826 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
827 r->eqn->eqn.ln, r->eqn->eqn.pos, "EQ");
828 eqn_end(&r->eqn);
829 }
830
831 if (r->tbl) {
832 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
833 r->tbl->line, r->tbl->pos, "TS");
834 tbl_end(&r->tbl);
835 }
836 }
837
838 /*
839 * Parse a roff node's type from the input buffer. This must be in the
840 * form of ".foo xxx" in the usual way.
841 */
842 static enum rofft
843 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
844 {
845 char *cp;
846 const char *mac;
847 size_t maclen;
848 enum rofft t;
849
850 cp = buf + *pos;
851
852 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
853 return(ROFF_MAX);
854
855 mac = cp;
856 maclen = roff_getname(r, &cp, ln, ppos);
857
858 t = (r->current_string = roff_getstrn(r, mac, maclen))
859 ? ROFF_USERDEF : roffhash_find(mac, maclen);
860
861 if (ROFF_MAX != t)
862 *pos = cp - buf;
863
864 return(t);
865 }
866
867 static enum rofferr
868 roff_cblock(ROFF_ARGS)
869 {
870
871 /*
872 * A block-close `..' should only be invoked as a child of an
873 * ignore macro, otherwise raise a warning and just ignore it.
874 */
875
876 if (r->last == NULL) {
877 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
878 ln, ppos, "..");
879 return(ROFF_IGN);
880 }
881
882 switch (r->last->tok) {
883 case ROFF_am:
884 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
885 /* FALLTHROUGH */
886 case ROFF_ami:
887 /* FALLTHROUGH */
888 case ROFF_de:
889 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
890 /* FALLTHROUGH */
891 case ROFF_dei:
892 /* FALLTHROUGH */
893 case ROFF_ig:
894 break;
895 default:
896 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
897 ln, ppos, "..");
898 return(ROFF_IGN);
899 }
900
901 if (buf->buf[pos] != '\0')
902 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
903 ".. %s", buf->buf + pos);
904
905 roffnode_pop(r);
906 roffnode_cleanscope(r);
907 return(ROFF_IGN);
908
909 }
910
911 static void
912 roffnode_cleanscope(struct roff *r)
913 {
914
915 while (r->last) {
916 if (--r->last->endspan != 0)
917 break;
918 roffnode_pop(r);
919 }
920 }
921
922 static void
923 roff_ccond(struct roff *r, int ln, int ppos)
924 {
925
926 if (NULL == r->last) {
927 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
928 ln, ppos, "\\}");
929 return;
930 }
931
932 switch (r->last->tok) {
933 case ROFF_el:
934 /* FALLTHROUGH */
935 case ROFF_ie:
936 /* FALLTHROUGH */
937 case ROFF_if:
938 break;
939 default:
940 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
941 ln, ppos, "\\}");
942 return;
943 }
944
945 if (r->last->endspan > -1) {
946 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
947 ln, ppos, "\\}");
948 return;
949 }
950
951 roffnode_pop(r);
952 roffnode_cleanscope(r);
953 return;
954 }
955
956 static enum rofferr
957 roff_block(ROFF_ARGS)
958 {
959 const char *name;
960 char *iname, *cp;
961 size_t namesz;
962
963 /* Ignore groff compatibility mode for now. */
964
965 if (tok == ROFF_de1)
966 tok = ROFF_de;
967 else if (tok == ROFF_am1)
968 tok = ROFF_am;
969
970 /* Parse the macro name argument. */
971
972 cp = buf->buf + pos;
973 if (tok == ROFF_ig) {
974 iname = NULL;
975 namesz = 0;
976 } else {
977 iname = cp;
978 namesz = roff_getname(r, &cp, ln, ppos);
979 iname[namesz] = '\0';
980 }
981
982 /* Resolve the macro name argument if it is indirect. */
983
984 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
985 if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
986 mandoc_vmsg(MANDOCERR_STR_UNDEF,
987 r->parse, ln, (int)(iname - buf->buf),
988 "%.*s", (int)namesz, iname);
989 namesz = 0;
990 } else
991 namesz = strlen(name);
992 } else
993 name = iname;
994
995 if (namesz == 0 && tok != ROFF_ig) {
996 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
997 ln, ppos, roffs[tok].name);
998 return(ROFF_IGN);
999 }
1000
1001 roffnode_push(r, tok, name, ln, ppos);
1002
1003 /*
1004 * At the beginning of a `de' macro, clear the existing string
1005 * with the same name, if there is one. New content will be
1006 * appended from roff_block_text() in multiline mode.
1007 */
1008
1009 if (tok == ROFF_de || tok == ROFF_dei)
1010 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
1011
1012 if (*cp == '\0')
1013 return(ROFF_IGN);
1014
1015 /* Get the custom end marker. */
1016
1017 iname = cp;
1018 namesz = roff_getname(r, &cp, ln, ppos);
1019
1020 /* Resolve the end marker if it is indirect. */
1021
1022 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1023 if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1024 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1025 r->parse, ln, (int)(iname - buf->buf),
1026 "%.*s", (int)namesz, iname);
1027 namesz = 0;
1028 } else
1029 namesz = strlen(name);
1030 } else
1031 name = iname;
1032
1033 if (namesz)
1034 r->last->end = mandoc_strndup(name, namesz);
1035
1036 if (*cp != '\0')
1037 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
1038 ln, pos, ".%s ... %s", roffs[tok].name, cp);
1039
1040 return(ROFF_IGN);
1041 }
1042
1043 static enum rofferr
1044 roff_block_sub(ROFF_ARGS)
1045 {
1046 enum rofft t;
1047 int i, j;
1048
1049 /*
1050 * First check whether a custom macro exists at this level. If
1051 * it does, then check against it. This is some of groff's
1052 * stranger behaviours. If we encountered a custom end-scope
1053 * tag and that tag also happens to be a "real" macro, then we
1054 * need to try interpreting it again as a real macro. If it's
1055 * not, then return ignore. Else continue.
1056 */
1057
1058 if (r->last->end) {
1059 for (i = pos, j = 0; r->last->end[j]; j++, i++)
1060 if (buf->buf[i] != r->last->end[j])
1061 break;
1062
1063 if (r->last->end[j] == '\0' &&
1064 (buf->buf[i] == '\0' ||
1065 buf->buf[i] == ' ' ||
1066 buf->buf[i] == '\t')) {
1067 roffnode_pop(r);
1068 roffnode_cleanscope(r);
1069
1070 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
1071 i++;
1072
1073 pos = i;
1074 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
1075 ROFF_MAX)
1076 return(ROFF_RERUN);
1077 return(ROFF_IGN);
1078 }
1079 }
1080
1081 /*
1082 * If we have no custom end-query or lookup failed, then try
1083 * pulling it out of the hashtable.
1084 */
1085
1086 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1087
1088 if (t != ROFF_cblock) {
1089 if (tok != ROFF_ig)
1090 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
1091 return(ROFF_IGN);
1092 }
1093
1094 assert(roffs[t].proc);
1095 return((*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs));
1096 }
1097
1098 static enum rofferr
1099 roff_block_text(ROFF_ARGS)
1100 {
1101
1102 if (tok != ROFF_ig)
1103 roff_setstr(r, r->last->name, buf->buf + pos, 2);
1104
1105 return(ROFF_IGN);
1106 }
1107
1108 static enum rofferr
1109 roff_cond_sub(ROFF_ARGS)
1110 {
1111 enum rofft t;
1112 char *ep;
1113 int rr;
1114
1115 rr = r->last->rule;
1116 roffnode_cleanscope(r);
1117 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1118
1119 /*
1120 * Fully handle known macros when they are structurally
1121 * required or when the conditional evaluated to true.
1122 */
1123
1124 if ((t != ROFF_MAX) &&
1125 (rr || roffs[t].flags & ROFFMAC_STRUCT)) {
1126 assert(roffs[t].proc);
1127 return((*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs));
1128 }
1129
1130 /*
1131 * If `\}' occurs on a macro line without a preceding macro,
1132 * drop the line completely.
1133 */
1134
1135 ep = buf->buf + pos;
1136 if (ep[0] == '\\' && ep[1] == '}')
1137 rr = 0;
1138
1139 /* Always check for the closing delimiter `\}'. */
1140
1141 while ((ep = strchr(ep, '\\')) != NULL) {
1142 if (*(++ep) == '}') {
1143 *ep = '&';
1144 roff_ccond(r, ln, ep - buf->buf - 1);
1145 }
1146 ++ep;
1147 }
1148 return(rr ? ROFF_CONT : ROFF_IGN);
1149 }
1150
1151 static enum rofferr
1152 roff_cond_text(ROFF_ARGS)
1153 {
1154 char *ep;
1155 int rr;
1156
1157 rr = r->last->rule;
1158 roffnode_cleanscope(r);
1159
1160 ep = buf->buf + pos;
1161 while ((ep = strchr(ep, '\\')) != NULL) {
1162 if (*(++ep) == '}') {
1163 *ep = '&';
1164 roff_ccond(r, ln, ep - buf->buf - 1);
1165 }
1166 ++ep;
1167 }
1168 return(rr ? ROFF_CONT : ROFF_IGN);
1169 }
1170
1171 /*
1172 * Parse a single signed integer number. Stop at the first non-digit.
1173 * If there is at least one digit, return success and advance the
1174 * parse point, else return failure and let the parse point unchanged.
1175 * Ignore overflows, treat them just like the C language.
1176 */
1177 static int
1178 roff_getnum(const char *v, int *pos, int *res)
1179 {
1180 int myres, n, p;
1181
1182 if (NULL == res)
1183 res = &myres;
1184
1185 p = *pos;
1186 n = v[p] == '-';
1187 if (n)
1188 p++;
1189
1190 for (*res = 0; isdigit((unsigned char)v[p]); p++)
1191 *res = 10 * *res + v[p] - '0';
1192 if (p == *pos + n)
1193 return 0;
1194
1195 if (n)
1196 *res = -*res;
1197
1198 *pos = p;
1199 return 1;
1200 }
1201
1202 /*
1203 * Evaluate a string comparison condition.
1204 * The first character is the delimiter.
1205 * Succeed if the string up to its second occurrence
1206 * matches the string up to its third occurence.
1207 * Advance the cursor after the third occurrence
1208 * or lacking that, to the end of the line.
1209 */
1210 static int
1211 roff_evalstrcond(const char *v, int *pos)
1212 {
1213 const char *s1, *s2, *s3;
1214 int match;
1215
1216 match = 0;
1217 s1 = v + *pos; /* initial delimiter */
1218 s2 = s1 + 1; /* for scanning the first string */
1219 s3 = strchr(s2, *s1); /* for scanning the second string */
1220
1221 if (NULL == s3) /* found no middle delimiter */
1222 goto out;
1223
1224 while ('\0' != *++s3) {
1225 if (*s2 != *s3) { /* mismatch */
1226 s3 = strchr(s3, *s1);
1227 break;
1228 }
1229 if (*s3 == *s1) { /* found the final delimiter */
1230 match = 1;
1231 break;
1232 }
1233 s2++;
1234 }
1235
1236 out:
1237 if (NULL == s3)
1238 s3 = strchr(s2, '\0');
1239 else if (*s3 != '\0')
1240 s3++;
1241 *pos = s3 - v;
1242 return(match);
1243 }
1244
1245 /*
1246 * Evaluate an optionally negated single character, numerical,
1247 * or string condition.
1248 */
1249 static int
1250 roff_evalcond(struct roff *r, int ln, const char *v, int *pos)
1251 {
1252 int number, savepos, wanttrue;
1253
1254 if ('!' == v[*pos]) {
1255 wanttrue = 0;
1256 (*pos)++;
1257 } else
1258 wanttrue = 1;
1259
1260 switch (v[*pos]) {
1261 case '\0':
1262 return(0);
1263 case 'n':
1264 /* FALLTHROUGH */
1265 case 'o':
1266 (*pos)++;
1267 return(wanttrue);
1268 case 'c':
1269 /* FALLTHROUGH */
1270 case 'd':
1271 /* FALLTHROUGH */
1272 case 'e':
1273 /* FALLTHROUGH */
1274 case 'r':
1275 /* FALLTHROUGH */
1276 case 't':
1277 /* FALLTHROUGH */
1278 case 'v':
1279 (*pos)++;
1280 return(!wanttrue);
1281 default:
1282 break;
1283 }
1284
1285 savepos = *pos;
1286 if (roff_evalnum(r, ln, v, pos, &number, 0))
1287 return((number > 0) == wanttrue);
1288 else if (*pos == savepos)
1289 return(roff_evalstrcond(v, pos) == wanttrue);
1290 else
1291 return (0);
1292 }
1293
1294 static enum rofferr
1295 roff_line_ignore(ROFF_ARGS)
1296 {
1297
1298 return(ROFF_IGN);
1299 }
1300
1301 static enum rofferr
1302 roff_cond(ROFF_ARGS)
1303 {
1304
1305 roffnode_push(r, tok, NULL, ln, ppos);
1306
1307 /*
1308 * An `.el' has no conditional body: it will consume the value
1309 * of the current rstack entry set in prior `ie' calls or
1310 * defaults to DENY.
1311 *
1312 * If we're not an `el', however, then evaluate the conditional.
1313 */
1314
1315 r->last->rule = tok == ROFF_el ?
1316 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
1317 roff_evalcond(r, ln, buf->buf, &pos);
1318
1319 /*
1320 * An if-else will put the NEGATION of the current evaluated
1321 * conditional into the stack of rules.
1322 */
1323
1324 if (tok == ROFF_ie) {
1325 if (r->rstackpos + 1 == r->rstacksz) {
1326 r->rstacksz += 16;
1327 r->rstack = mandoc_reallocarray(r->rstack,
1328 r->rstacksz, sizeof(int));
1329 }
1330 r->rstack[++r->rstackpos] = !r->last->rule;
1331 }
1332
1333 /* If the parent has false as its rule, then so do we. */
1334
1335 if (r->last->parent && !r->last->parent->rule)
1336 r->last->rule = 0;
1337
1338 /*
1339 * Determine scope.
1340 * If there is nothing on the line after the conditional,
1341 * not even whitespace, use next-line scope.
1342 */
1343
1344 if (buf->buf[pos] == '\0') {
1345 r->last->endspan = 2;
1346 goto out;
1347 }
1348
1349 while (buf->buf[pos] == ' ')
1350 pos++;
1351
1352 /* An opening brace requests multiline scope. */
1353
1354 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
1355 r->last->endspan = -1;
1356 pos += 2;
1357 goto out;
1358 }
1359
1360 /*
1361 * Anything else following the conditional causes
1362 * single-line scope. Warn if the scope contains
1363 * nothing but trailing whitespace.
1364 */
1365
1366 if (buf->buf[pos] == '\0')
1367 mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
1368 ln, ppos, roffs[tok].name);
1369
1370 r->last->endspan = 1;
1371
1372 out:
1373 *offs = pos;
1374 return(ROFF_RERUN);
1375 }
1376
1377 static enum rofferr
1378 roff_ds(ROFF_ARGS)
1379 {
1380 char *string;
1381 const char *name;
1382 size_t namesz;
1383
1384 /*
1385 * The first word is the name of the string.
1386 * If it is empty or terminated by an escape sequence,
1387 * abort the `ds' request without defining anything.
1388 */
1389
1390 name = string = buf->buf + pos;
1391 if (*name == '\0')
1392 return(ROFF_IGN);
1393
1394 namesz = roff_getname(r, &string, ln, pos);
1395 if (name[namesz] == '\\')
1396 return(ROFF_IGN);
1397
1398 /* Read past the initial double-quote, if any. */
1399 if (*string == '"')
1400 string++;
1401
1402 /* The rest is the value. */
1403 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
1404 ROFF_as == tok);
1405 return(ROFF_IGN);
1406 }
1407
1408 /*
1409 * Parse a single operator, one or two characters long.
1410 * If the operator is recognized, return success and advance the
1411 * parse point, else return failure and let the parse point unchanged.
1412 */
1413 static int
1414 roff_getop(const char *v, int *pos, char *res)
1415 {
1416
1417 *res = v[*pos];
1418
1419 switch (*res) {
1420 case '+':
1421 /* FALLTHROUGH */
1422 case '-':
1423 /* FALLTHROUGH */
1424 case '*':
1425 /* FALLTHROUGH */
1426 case '/':
1427 /* FALLTHROUGH */
1428 case '%':
1429 /* FALLTHROUGH */
1430 case '&':
1431 /* FALLTHROUGH */
1432 case ':':
1433 break;
1434 case '<':
1435 switch (v[*pos + 1]) {
1436 case '=':
1437 *res = 'l';
1438 (*pos)++;
1439 break;
1440 case '>':
1441 *res = '!';
1442 (*pos)++;
1443 break;
1444 case '?':
1445 *res = 'i';
1446 (*pos)++;
1447 break;
1448 default:
1449 break;
1450 }
1451 break;
1452 case '>':
1453 switch (v[*pos + 1]) {
1454 case '=':
1455 *res = 'g';
1456 (*pos)++;
1457 break;
1458 case '?':
1459 *res = 'a';
1460 (*pos)++;
1461 break;
1462 default:
1463 break;
1464 }
1465 break;
1466 case '=':
1467 if ('=' == v[*pos + 1])
1468 (*pos)++;
1469 break;
1470 default:
1471 return(0);
1472 }
1473 (*pos)++;
1474
1475 return(*res);
1476 }
1477
1478 /*
1479 * Evaluate either a parenthesized numeric expression
1480 * or a single signed integer number.
1481 */
1482 static int
1483 roff_evalpar(struct roff *r, int ln,
1484 const char *v, int *pos, int *res)
1485 {
1486
1487 if ('(' != v[*pos])
1488 return(roff_getnum(v, pos, res));
1489
1490 (*pos)++;
1491 if ( ! roff_evalnum(r, ln, v, pos, res, 1))
1492 return(0);
1493
1494 /*
1495 * Omission of the closing parenthesis
1496 * is an error in validation mode,
1497 * but ignored in evaluation mode.
1498 */
1499
1500 if (')' == v[*pos])
1501 (*pos)++;
1502 else if (NULL == res)
1503 return(0);
1504
1505 return(1);
1506 }
1507
1508 /*
1509 * Evaluate a complete numeric expression.
1510 * Proceed left to right, there is no concept of precedence.
1511 */
1512 static int
1513 roff_evalnum(struct roff *r, int ln, const char *v,
1514 int *pos, int *res, int skipwhite)
1515 {
1516 int mypos, operand2;
1517 char operator;
1518
1519 if (NULL == pos) {
1520 mypos = 0;
1521 pos = &mypos;
1522 }
1523
1524 if (skipwhite)
1525 while (isspace((unsigned char)v[*pos]))
1526 (*pos)++;
1527
1528 if ( ! roff_evalpar(r, ln, v, pos, res))
1529 return(0);
1530
1531 while (1) {
1532 if (skipwhite)
1533 while (isspace((unsigned char)v[*pos]))
1534 (*pos)++;
1535
1536 if ( ! roff_getop(v, pos, &operator))
1537 break;
1538
1539 if (skipwhite)
1540 while (isspace((unsigned char)v[*pos]))
1541 (*pos)++;
1542
1543 if ( ! roff_evalpar(r, ln, v, pos, &operand2))
1544 return(0);
1545
1546 if (skipwhite)
1547 while (isspace((unsigned char)v[*pos]))
1548 (*pos)++;
1549
1550 if (NULL == res)
1551 continue;
1552
1553 switch (operator) {
1554 case '+':
1555 *res += operand2;
1556 break;
1557 case '-':
1558 *res -= operand2;
1559 break;
1560 case '*':
1561 *res *= operand2;
1562 break;
1563 case '/':
1564 if (0 == operand2) {
1565 mandoc_msg(MANDOCERR_DIVZERO,
1566 r->parse, ln, *pos, v);
1567 *res = 0;
1568 break;
1569 }
1570 *res /= operand2;
1571 break;
1572 case '%':
1573 *res %= operand2;
1574 break;
1575 case '<':
1576 *res = *res < operand2;
1577 break;
1578 case '>':
1579 *res = *res > operand2;
1580 break;
1581 case 'l':
1582 *res = *res <= operand2;
1583 break;
1584 case 'g':
1585 *res = *res >= operand2;
1586 break;
1587 case '=':
1588 *res = *res == operand2;
1589 break;
1590 case '!':
1591 *res = *res != operand2;
1592 break;
1593 case '&':
1594 *res = *res && operand2;
1595 break;
1596 case ':':
1597 *res = *res || operand2;
1598 break;
1599 case 'i':
1600 if (operand2 < *res)
1601 *res = operand2;
1602 break;
1603 case 'a':
1604 if (operand2 > *res)
1605 *res = operand2;
1606 break;
1607 default:
1608 abort();
1609 }
1610 }
1611 return(1);
1612 }
1613
1614 void
1615 roff_setreg(struct roff *r, const char *name, int val, char sign)
1616 {
1617 struct roffreg *reg;
1618
1619 /* Search for an existing register with the same name. */
1620 reg = r->regtab;
1621
1622 while (reg && strcmp(name, reg->key.p))
1623 reg = reg->next;
1624
1625 if (NULL == reg) {
1626 /* Create a new register. */
1627 reg = mandoc_malloc(sizeof(struct roffreg));
1628 reg->key.p = mandoc_strdup(name);
1629 reg->key.sz = strlen(name);
1630 reg->val = 0;
1631 reg->next = r->regtab;
1632 r->regtab = reg;
1633 }
1634
1635 if ('+' == sign)
1636 reg->val += val;
1637 else if ('-' == sign)
1638 reg->val -= val;
1639 else
1640 reg->val = val;
1641 }
1642
1643 /*
1644 * Handle some predefined read-only number registers.
1645 * For now, return -1 if the requested register is not predefined;
1646 * in case a predefined read-only register having the value -1
1647 * were to turn up, another special value would have to be chosen.
1648 */
1649 static int
1650 roff_getregro(const char *name)
1651 {
1652
1653 switch (*name) {
1654 case 'A': /* ASCII approximation mode is always off. */
1655 return(0);
1656 case 'g': /* Groff compatibility mode is always on. */
1657 return(1);
1658 case 'H': /* Fixed horizontal resolution. */
1659 return (24);
1660 case 'j': /* Always adjust left margin only. */
1661 return(0);
1662 case 'T': /* Some output device is always defined. */
1663 return(1);
1664 case 'V': /* Fixed vertical resolution. */
1665 return (40);
1666 default:
1667 return (-1);
1668 }
1669 }
1670
1671 int
1672 roff_getreg(const struct roff *r, const char *name)
1673 {
1674 struct roffreg *reg;
1675 int val;
1676
1677 if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
1678 val = roff_getregro(name + 1);
1679 if (-1 != val)
1680 return (val);
1681 }
1682
1683 for (reg = r->regtab; reg; reg = reg->next)
1684 if (0 == strcmp(name, reg->key.p))
1685 return(reg->val);
1686
1687 return(0);
1688 }
1689
1690 static int
1691 roff_getregn(const struct roff *r, const char *name, size_t len)
1692 {
1693 struct roffreg *reg;
1694 int val;
1695
1696 if ('.' == name[0] && 2 == len) {
1697 val = roff_getregro(name + 1);
1698 if (-1 != val)
1699 return (val);
1700 }
1701
1702 for (reg = r->regtab; reg; reg = reg->next)
1703 if (len == reg->key.sz &&
1704 0 == strncmp(name, reg->key.p, len))
1705 return(reg->val);
1706
1707 return(0);
1708 }
1709
1710 static void
1711 roff_freereg(struct roffreg *reg)
1712 {
1713 struct roffreg *old_reg;
1714
1715 while (NULL != reg) {
1716 free(reg->key.p);
1717 old_reg = reg;
1718 reg = reg->next;
1719 free(old_reg);
1720 }
1721 }
1722
1723 static enum rofferr
1724 roff_nr(ROFF_ARGS)
1725 {
1726 char *key, *val;
1727 size_t keysz;
1728 int iv;
1729 char sign;
1730
1731 key = val = buf->buf + pos;
1732 if (*key == '\0')
1733 return(ROFF_IGN);
1734
1735 keysz = roff_getname(r, &val, ln, pos);
1736 if (key[keysz] == '\\')
1737 return(ROFF_IGN);
1738 key[keysz] = '\0';
1739
1740 sign = *val;
1741 if (sign == '+' || sign == '-')
1742 val++;
1743
1744 if (roff_evalnum(r, ln, val, NULL, &iv, 0))
1745 roff_setreg(r, key, iv, sign);
1746
1747 return(ROFF_IGN);
1748 }
1749
1750 static enum rofferr
1751 roff_rr(ROFF_ARGS)
1752 {
1753 struct roffreg *reg, **prev;
1754 char *name, *cp;
1755 size_t namesz;
1756
1757 name = cp = buf->buf + pos;
1758 if (*name == '\0')
1759 return(ROFF_IGN);
1760 namesz = roff_getname(r, &cp, ln, pos);
1761 name[namesz] = '\0';
1762
1763 prev = &r->regtab;
1764 while (1) {
1765 reg = *prev;
1766 if (reg == NULL || !strcmp(name, reg->key.p))
1767 break;
1768 prev = &reg->next;
1769 }
1770 if (reg != NULL) {
1771 *prev = reg->next;
1772 free(reg->key.p);
1773 free(reg);
1774 }
1775 return(ROFF_IGN);
1776 }
1777
1778 static enum rofferr
1779 roff_rm(ROFF_ARGS)
1780 {
1781 const char *name;
1782 char *cp;
1783 size_t namesz;
1784
1785 cp = buf->buf + pos;
1786 while (*cp != '\0') {
1787 name = cp;
1788 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
1789 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
1790 if (name[namesz] == '\\')
1791 break;
1792 }
1793 return(ROFF_IGN);
1794 }
1795
1796 static enum rofferr
1797 roff_it(ROFF_ARGS)
1798 {
1799 char *cp;
1800 size_t len;
1801 int iv;
1802
1803 /* Parse the number of lines. */
1804 cp = buf->buf + pos;
1805 len = strcspn(cp, " \t");
1806 cp[len] = '\0';
1807 if ((iv = mandoc_strntoi(cp, len, 10)) <= 0) {
1808 mandoc_msg(MANDOCERR_IT_NONUM, r->parse,
1809 ln, ppos, buf->buf + 1);
1810 return(ROFF_IGN);
1811 }
1812 cp += len + 1;
1813
1814 /* Arm the input line trap. */
1815 roffit_lines = iv;
1816 roffit_macro = mandoc_strdup(cp);
1817 return(ROFF_IGN);
1818 }
1819
1820 static enum rofferr
1821 roff_Dd(ROFF_ARGS)
1822 {
1823 const char *const *cp;
1824
1825 if ((r->options & (MPARSE_MDOC | MPARSE_QUICK)) == 0)
1826 for (cp = __mdoc_reserved; *cp; cp++)
1827 roff_setstr(r, *cp, NULL, 0);
1828
1829 if (r->format == 0)
1830 r->format = MPARSE_MDOC;
1831
1832 return(ROFF_CONT);
1833 }
1834
1835 static enum rofferr
1836 roff_TH(ROFF_ARGS)
1837 {
1838 const char *const *cp;
1839
1840 if ((r->options & MPARSE_QUICK) == 0)
1841 for (cp = __man_reserved; *cp; cp++)
1842 roff_setstr(r, *cp, NULL, 0);
1843
1844 if (r->format == 0)
1845 r->format = MPARSE_MAN;
1846
1847 return(ROFF_CONT);
1848 }
1849
1850 static enum rofferr
1851 roff_TE(ROFF_ARGS)
1852 {
1853
1854 if (NULL == r->tbl)
1855 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1856 ln, ppos, "TE");
1857 else
1858 tbl_end(&r->tbl);
1859
1860 return(ROFF_IGN);
1861 }
1862
1863 static enum rofferr
1864 roff_T_(ROFF_ARGS)
1865 {
1866
1867 if (NULL == r->tbl)
1868 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1869 ln, ppos, "T&");
1870 else
1871 tbl_restart(ppos, ln, r->tbl);
1872
1873 return(ROFF_IGN);
1874 }
1875
1876 /*
1877 * Handle in-line equation delimiters.
1878 */
1879 static enum rofferr
1880 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
1881 {
1882 char *cp1, *cp2;
1883 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
1884
1885 /*
1886 * Outside equations, look for an opening delimiter.
1887 * If we are inside an equation, we already know it is
1888 * in-line, or this function wouldn't have been called;
1889 * so look for a closing delimiter.
1890 */
1891
1892 cp1 = buf->buf + pos;
1893 cp2 = strchr(cp1, r->eqn == NULL ?
1894 r->last_eqn->odelim : r->last_eqn->cdelim);
1895 if (cp2 == NULL)
1896 return(ROFF_CONT);
1897
1898 *cp2++ = '\0';
1899 bef_pr = bef_nl = aft_nl = aft_pr = "";
1900
1901 /* Handle preceding text, protecting whitespace. */
1902
1903 if (*buf->buf != '\0') {
1904 if (r->eqn == NULL)
1905 bef_pr = "\\&";
1906 bef_nl = "\n";
1907 }
1908
1909 /*
1910 * Prepare replacing the delimiter with an equation macro
1911 * and drop leading white space from the equation.
1912 */
1913
1914 if (r->eqn == NULL) {
1915 while (*cp2 == ' ')
1916 cp2++;
1917 mac = ".EQ";
1918 } else
1919 mac = ".EN";
1920
1921 /* Handle following text, protecting whitespace. */
1922
1923 if (*cp2 != '\0') {
1924 aft_nl = "\n";
1925 if (r->eqn != NULL)
1926 aft_pr = "\\&";
1927 }
1928
1929 /* Do the actual replacement. */
1930
1931 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
1932 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
1933 free(buf->buf);
1934 buf->buf = cp1;
1935
1936 /* Toggle the in-line state of the eqn subsystem. */
1937
1938 r->eqn_inline = r->eqn == NULL;
1939 return(ROFF_REPARSE);
1940 }
1941
1942 static enum rofferr
1943 roff_EQ(ROFF_ARGS)
1944 {
1945 struct eqn_node *e;
1946
1947 assert(r->eqn == NULL);
1948 e = eqn_alloc(ppos, ln, r->parse);
1949
1950 if (r->last_eqn) {
1951 r->last_eqn->next = e;
1952 e->delim = r->last_eqn->delim;
1953 e->odelim = r->last_eqn->odelim;
1954 e->cdelim = r->last_eqn->cdelim;
1955 } else
1956 r->first_eqn = r->last_eqn = e;
1957
1958 r->eqn = r->last_eqn = e;
1959
1960 if (buf->buf[pos] != '\0')
1961 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
1962 ".EQ %s", buf->buf + pos);
1963
1964 return(ROFF_IGN);
1965 }
1966
1967 static enum rofferr
1968 roff_EN(ROFF_ARGS)
1969 {
1970
1971 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
1972 return(ROFF_IGN);
1973 }
1974
1975 static enum rofferr
1976 roff_TS(ROFF_ARGS)
1977 {
1978 struct tbl_node *tbl;
1979
1980 if (r->tbl) {
1981 mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
1982 ln, ppos, "TS breaks TS");
1983 tbl_end(&r->tbl);
1984 }
1985
1986 tbl = tbl_alloc(ppos, ln, r->parse);
1987
1988 if (r->last_tbl)
1989 r->last_tbl->next = tbl;
1990 else
1991 r->first_tbl = r->last_tbl = tbl;
1992
1993 r->tbl = r->last_tbl = tbl;
1994 return(ROFF_IGN);
1995 }
1996
1997 static enum rofferr
1998 roff_cc(ROFF_ARGS)
1999 {
2000 const char *p;
2001
2002 p = buf->buf + pos;
2003
2004 if (*p == '\0' || (r->control = *p++) == '.')
2005 r->control = 0;
2006
2007 if (*p != '\0')
2008 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
2009
2010 return(ROFF_IGN);
2011 }
2012
2013 static enum rofferr
2014 roff_tr(ROFF_ARGS)
2015 {
2016 const char *p, *first, *second;
2017 size_t fsz, ssz;
2018 enum mandoc_esc esc;
2019
2020 p = buf->buf + pos;
2021
2022 if (*p == '\0') {
2023 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
2024 return(ROFF_IGN);
2025 }
2026
2027 while (*p != '\0') {
2028 fsz = ssz = 1;
2029
2030 first = p++;
2031 if (*first == '\\') {
2032 esc = mandoc_escape(&p, NULL, NULL);
2033 if (esc == ESCAPE_ERROR) {
2034 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
2035 ln, (int)(p - buf->buf), first);
2036 return(ROFF_IGN);
2037 }
2038 fsz = (size_t)(p - first);
2039 }
2040
2041 second = p++;
2042 if (*second == '\\') {
2043 esc = mandoc_escape(&p, NULL, NULL);
2044 if (esc == ESCAPE_ERROR) {
2045 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
2046 ln, (int)(p - buf->buf), second);
2047 return(ROFF_IGN);
2048 }
2049 ssz = (size_t)(p - second);
2050 } else if (*second == '\0') {
2051 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
2052 ln, (int)(p - buf->buf), NULL);
2053 second = " ";
2054 p--;
2055 }
2056
2057 if (fsz > 1) {
2058 roff_setstrn(&r->xmbtab, first, fsz,
2059 second, ssz, 0);
2060 continue;
2061 }
2062
2063 if (r->xtab == NULL)
2064 r->xtab = mandoc_calloc(128,
2065 sizeof(struct roffstr));
2066
2067 free(r->xtab[(int)*first].p);
2068 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
2069 r->xtab[(int)*first].sz = ssz;
2070 }
2071
2072 return(ROFF_IGN);
2073 }
2074
2075 static enum rofferr
2076 roff_so(ROFF_ARGS)
2077 {
2078 char *name;
2079
2080 name = buf->buf + pos;
2081 mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name);
2082
2083 /*
2084 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
2085 * opening anything that's not in our cwd or anything beneath
2086 * it. Thus, explicitly disallow traversing up the file-system
2087 * or using absolute paths.
2088 */
2089
2090 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
2091 mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
2092 ".so %s", name);
2093 return(ROFF_ERR);
2094 }
2095
2096 *offs = pos;
2097 return(ROFF_SO);
2098 }
2099
2100 static enum rofferr
2101 roff_userdef(ROFF_ARGS)
2102 {
2103 const char *arg[9];
2104 char *cp, *n1, *n2;
2105 int i;
2106
2107 /*
2108 * Collect pointers to macro argument strings
2109 * and NUL-terminate them.
2110 */
2111 cp = buf->buf + pos;
2112 for (i = 0; i < 9; i++)
2113 arg[i] = *cp == '\0' ? "" :
2114 mandoc_getarg(r->parse, &cp, ln, &pos);
2115
2116 /*
2117 * Expand macro arguments.
2118 */
2119 buf->sz = 0;
2120 n1 = cp = mandoc_strdup(r->current_string);
2121 while ((cp = strstr(cp, "\\$")) != NULL) {
2122 i = cp[2] - '1';
2123 if (0 > i || 8 < i) {
2124 /* Not an argument invocation. */
2125 cp += 2;
2126 continue;
2127 }
2128 *cp = '\0';
2129 buf->sz = mandoc_asprintf(&n2, "%s%s%s",
2130 n1, arg[i], cp + 3) + 1;
2131 cp = n2 + (cp - n1);
2132 free(n1);
2133 n1 = n2;
2134 }
2135
2136 /*
2137 * Replace the macro invocation
2138 * by the expanded macro.
2139 */
2140 free(buf->buf);
2141 buf->buf = n1;
2142 if (buf->sz == 0)
2143 buf->sz = strlen(buf->buf) + 1;
2144
2145 return(buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ?
2146 ROFF_REPARSE : ROFF_APPEND);
2147 }
2148
2149 static size_t
2150 roff_getname(struct roff *r, char **cpp, int ln, int pos)
2151 {
2152 char *name, *cp;
2153 size_t namesz;
2154
2155 name = *cpp;
2156 if ('\0' == *name)
2157 return(0);
2158
2159 /* Read until end of name and terminate it with NUL. */
2160 for (cp = name; 1; cp++) {
2161 if ('\0' == *cp || ' ' == *cp) {
2162 namesz = cp - name;
2163 break;
2164 }
2165 if ('\\' != *cp)
2166 continue;
2167 namesz = cp - name;
2168 if ('{' == cp[1] || '}' == cp[1])
2169 break;
2170 cp++;
2171 if ('\\' == *cp)
2172 continue;
2173 mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos,
2174 "%.*s", (int)(cp - name + 1), name);
2175 mandoc_escape((const char **)&cp, NULL, NULL);
2176 break;
2177 }
2178
2179 /* Read past spaces. */
2180 while (' ' == *cp)
2181 cp++;
2182
2183 *cpp = cp;
2184 return(namesz);
2185 }
2186
2187 /*
2188 * Store *string into the user-defined string called *name.
2189 * To clear an existing entry, call with (*r, *name, NULL, 0).
2190 * append == 0: replace mode
2191 * append == 1: single-line append mode
2192 * append == 2: multiline append mode, append '\n' after each call
2193 */
2194 static void
2195 roff_setstr(struct roff *r, const char *name, const char *string,
2196 int append)
2197 {
2198
2199 roff_setstrn(&r->strtab, name, strlen(name), string,
2200 string ? strlen(string) : 0, append);
2201 }
2202
2203 static void
2204 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
2205 const char *string, size_t stringsz, int append)
2206 {
2207 struct roffkv *n;
2208 char *c;
2209 int i;
2210 size_t oldch, newch;
2211
2212 /* Search for an existing string with the same name. */
2213 n = *r;
2214
2215 while (n && (namesz != n->key.sz ||
2216 strncmp(n->key.p, name, namesz)))
2217 n = n->next;
2218
2219 if (NULL == n) {
2220 /* Create a new string table entry. */
2221 n = mandoc_malloc(sizeof(struct roffkv));
2222 n->key.p = mandoc_strndup(name, namesz);
2223 n->key.sz = namesz;
2224 n->val.p = NULL;
2225 n->val.sz = 0;
2226 n->next = *r;
2227 *r = n;
2228 } else if (0 == append) {
2229 free(n->val.p);
2230 n->val.p = NULL;
2231 n->val.sz = 0;
2232 }
2233
2234 if (NULL == string)
2235 return;
2236
2237 /*
2238 * One additional byte for the '\n' in multiline mode,
2239 * and one for the terminating '\0'.
2240 */
2241 newch = stringsz + (1 < append ? 2u : 1u);
2242
2243 if (NULL == n->val.p) {
2244 n->val.p = mandoc_malloc(newch);
2245 *n->val.p = '\0';
2246 oldch = 0;
2247 } else {
2248 oldch = n->val.sz;
2249 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
2250 }
2251
2252 /* Skip existing content in the destination buffer. */
2253 c = n->val.p + (int)oldch;
2254
2255 /* Append new content to the destination buffer. */
2256 i = 0;
2257 while (i < (int)stringsz) {
2258 /*
2259 * Rudimentary roff copy mode:
2260 * Handle escaped backslashes.
2261 */
2262 if ('\\' == string[i] && '\\' == string[i + 1])
2263 i++;
2264 *c++ = string[i++];
2265 }
2266
2267 /* Append terminating bytes. */
2268 if (1 < append)
2269 *c++ = '\n';
2270
2271 *c = '\0';
2272 n->val.sz = (int)(c - n->val.p);
2273 }
2274
2275 static const char *
2276 roff_getstrn(const struct roff *r, const char *name, size_t len)
2277 {
2278 const struct roffkv *n;
2279 int i;
2280
2281 for (n = r->strtab; n; n = n->next)
2282 if (0 == strncmp(name, n->key.p, len) &&
2283 '\0' == n->key.p[(int)len])
2284 return(n->val.p);
2285
2286 for (i = 0; i < PREDEFS_MAX; i++)
2287 if (0 == strncmp(name, predefs[i].name, len) &&
2288 '\0' == predefs[i].name[(int)len])
2289 return(predefs[i].str);
2290
2291 return(NULL);
2292 }
2293
2294 static void
2295 roff_freestr(struct roffkv *r)
2296 {
2297 struct roffkv *n, *nn;
2298
2299 for (n = r; n; n = nn) {
2300 free(n->key.p);
2301 free(n->val.p);
2302 nn = n->next;
2303 free(n);
2304 }
2305 }
2306
2307 const struct tbl_span *
2308 roff_span(const struct roff *r)
2309 {
2310
2311 return(r->tbl ? tbl_span(r->tbl) : NULL);
2312 }
2313
2314 const struct eqn *
2315 roff_eqn(const struct roff *r)
2316 {
2317
2318 return(r->last_eqn ? &r->last_eqn->eqn : NULL);
2319 }
2320
2321 /*
2322 * Duplicate an input string, making the appropriate character
2323 * conversations (as stipulated by `tr') along the way.
2324 * Returns a heap-allocated string with all the replacements made.
2325 */
2326 char *
2327 roff_strdup(const struct roff *r, const char *p)
2328 {
2329 const struct roffkv *cp;
2330 char *res;
2331 const char *pp;
2332 size_t ssz, sz;
2333 enum mandoc_esc esc;
2334
2335 if (NULL == r->xmbtab && NULL == r->xtab)
2336 return(mandoc_strdup(p));
2337 else if ('\0' == *p)
2338 return(mandoc_strdup(""));
2339
2340 /*
2341 * Step through each character looking for term matches
2342 * (remember that a `tr' can be invoked with an escape, which is
2343 * a glyph but the escape is multi-character).
2344 * We only do this if the character hash has been initialised
2345 * and the string is >0 length.
2346 */
2347
2348 res = NULL;
2349 ssz = 0;
2350
2351 while ('\0' != *p) {
2352 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
2353 sz = r->xtab[(int)*p].sz;
2354 res = mandoc_realloc(res, ssz + sz + 1);
2355 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
2356 ssz += sz;
2357 p++;
2358 continue;
2359 } else if ('\\' != *p) {
2360 res = mandoc_realloc(res, ssz + 2);
2361 res[ssz++] = *p++;
2362 continue;
2363 }
2364
2365 /* Search for term matches. */
2366 for (cp = r->xmbtab; cp; cp = cp->next)
2367 if (0 == strncmp(p, cp->key.p, cp->key.sz))
2368 break;
2369
2370 if (NULL != cp) {
2371 /*
2372 * A match has been found.
2373 * Append the match to the array and move
2374 * forward by its keysize.
2375 */
2376 res = mandoc_realloc(res,
2377 ssz + cp->val.sz + 1);
2378 memcpy(res + ssz, cp->val.p, cp->val.sz);
2379 ssz += cp->val.sz;
2380 p += (int)cp->key.sz;
2381 continue;
2382 }
2383
2384 /*
2385 * Handle escapes carefully: we need to copy
2386 * over just the escape itself, or else we might
2387 * do replacements within the escape itself.
2388 * Make sure to pass along the bogus string.
2389 */
2390 pp = p++;
2391 esc = mandoc_escape(&p, NULL, NULL);
2392 if (ESCAPE_ERROR == esc) {
2393 sz = strlen(pp);
2394 res = mandoc_realloc(res, ssz + sz + 1);
2395 memcpy(res + ssz, pp, sz);
2396 break;
2397 }
2398 /*
2399 * We bail out on bad escapes.
2400 * No need to warn: we already did so when
2401 * roff_res() was called.
2402 */
2403 sz = (int)(p - pp);
2404 res = mandoc_realloc(res, ssz + sz + 1);
2405 memcpy(res + ssz, pp, sz);
2406 ssz += sz;
2407 }
2408
2409 res[(int)ssz] = '\0';
2410 return(res);
2411 }
2412
2413 int
2414 roff_getformat(const struct roff *r)
2415 {
2416
2417 return(r->format);
2418 }
2419
2420 /*
2421 * Find out whether a line is a macro line or not.
2422 * If it is, adjust the current position and return one; if it isn't,
2423 * return zero and don't change the current position.
2424 * If the control character has been set with `.cc', then let that grain
2425 * precedence.
2426 * This is slighly contrary to groff, where using the non-breaking
2427 * control character when `cc' has been invoked will cause the
2428 * non-breaking macro contents to be printed verbatim.
2429 */
2430 int
2431 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
2432 {
2433 int pos;
2434
2435 pos = *ppos;
2436
2437 if (0 != r->control && cp[pos] == r->control)
2438 pos++;
2439 else if (0 != r->control)
2440 return(0);
2441 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
2442 pos += 2;
2443 else if ('.' == cp[pos] || '\'' == cp[pos])
2444 pos++;
2445 else
2446 return(0);
2447
2448 while (' ' == cp[pos] || '\t' == cp[pos])
2449 pos++;
2450
2451 *ppos = pos;
2452 return(1);
2453 }