]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
Let .Do .Dq .Ql .So .Sq generate the correct roff(7) character escape
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.176 2013/05/31 22:08:09 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010, 2011, 2012 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdlib.h>
25 #include <string.h>
26
27 #include "mandoc.h"
28 #include "libroff.h"
29 #include "libmandoc.h"
30
31 /* Maximum number of nested if-else conditionals. */
32 #define RSTACK_MAX 128
33
34 /* Maximum number of string expansions per line, to break infinite loops. */
35 #define EXPAND_LIMIT 1000
36
37 enum rofft {
38 ROFF_ad,
39 ROFF_am,
40 ROFF_ami,
41 ROFF_am1,
42 ROFF_cc,
43 ROFF_de,
44 ROFF_dei,
45 ROFF_de1,
46 ROFF_ds,
47 ROFF_el,
48 ROFF_hy,
49 ROFF_ie,
50 ROFF_if,
51 ROFF_ig,
52 ROFF_it,
53 ROFF_ne,
54 ROFF_nh,
55 ROFF_nr,
56 ROFF_ns,
57 ROFF_ps,
58 ROFF_rm,
59 ROFF_so,
60 ROFF_ta,
61 ROFF_tr,
62 ROFF_Dd,
63 ROFF_TH,
64 ROFF_TS,
65 ROFF_TE,
66 ROFF_T_,
67 ROFF_EQ,
68 ROFF_EN,
69 ROFF_cblock,
70 ROFF_ccond,
71 ROFF_USERDEF,
72 ROFF_MAX
73 };
74
75 enum roffrule {
76 ROFFRULE_ALLOW,
77 ROFFRULE_DENY
78 };
79
80 /*
81 * A single register entity. If "set" is zero, the value of the
82 * register should be the default one, which is per-register.
83 * Registers are assumed to be unsigned ints for now.
84 */
85 struct reg {
86 int set; /* whether set or not */
87 unsigned int u; /* unsigned integer */
88 };
89
90 /*
91 * An incredibly-simple string buffer.
92 */
93 struct roffstr {
94 char *p; /* nil-terminated buffer */
95 size_t sz; /* saved strlen(p) */
96 };
97
98 /*
99 * A key-value roffstr pair as part of a singly-linked list.
100 */
101 struct roffkv {
102 struct roffstr key;
103 struct roffstr val;
104 struct roffkv *next; /* next in list */
105 };
106
107 struct roff {
108 enum mparset parsetype; /* requested parse type */
109 struct mparse *parse; /* parse point */
110 struct roffnode *last; /* leaf of stack */
111 enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */
112 char control; /* control character */
113 int rstackpos; /* position in rstack */
114 struct reg regs[REG__MAX];
115 struct roffkv *strtab; /* user-defined strings & macros */
116 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
117 struct roffstr *xtab; /* single-byte trans table (`tr') */
118 const char *current_string; /* value of last called user macro */
119 struct tbl_node *first_tbl; /* first table parsed */
120 struct tbl_node *last_tbl; /* last table parsed */
121 struct tbl_node *tbl; /* current table being parsed */
122 struct eqn_node *last_eqn; /* last equation parsed */
123 struct eqn_node *first_eqn; /* first equation parsed */
124 struct eqn_node *eqn; /* current equation being parsed */
125 };
126
127 struct roffnode {
128 enum rofft tok; /* type of node */
129 struct roffnode *parent; /* up one in stack */
130 int line; /* parse line */
131 int col; /* parse col */
132 char *name; /* node name, e.g. macro name */
133 char *end; /* end-rules: custom token */
134 int endspan; /* end-rules: next-line or infty */
135 enum roffrule rule; /* current evaluation rule */
136 };
137
138 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
139 enum rofft tok, /* tok of macro */ \
140 char **bufp, /* input buffer */ \
141 size_t *szp, /* size of input buffer */ \
142 int ln, /* parse line */ \
143 int ppos, /* original pos in buffer */ \
144 int pos, /* current pos in buffer */ \
145 int *offs /* reset offset of buffer data */
146
147 typedef enum rofferr (*roffproc)(ROFF_ARGS);
148
149 struct roffmac {
150 const char *name; /* macro name */
151 roffproc proc; /* process new macro */
152 roffproc text; /* process as child text of macro */
153 roffproc sub; /* process as child of macro */
154 int flags;
155 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
156 struct roffmac *next;
157 };
158
159 struct predef {
160 const char *name; /* predefined input name */
161 const char *str; /* replacement symbol */
162 };
163
164 #define PREDEF(__name, __str) \
165 { (__name), (__str) },
166
167 static enum rofft roffhash_find(const char *, size_t);
168 static void roffhash_init(void);
169 static void roffnode_cleanscope(struct roff *);
170 static void roffnode_pop(struct roff *);
171 static void roffnode_push(struct roff *, enum rofft,
172 const char *, int, int);
173 static enum rofferr roff_block(ROFF_ARGS);
174 static enum rofferr roff_block_text(ROFF_ARGS);
175 static enum rofferr roff_block_sub(ROFF_ARGS);
176 static enum rofferr roff_cblock(ROFF_ARGS);
177 static enum rofferr roff_cc(ROFF_ARGS);
178 static enum rofferr roff_ccond(ROFF_ARGS);
179 static enum rofferr roff_cond(ROFF_ARGS);
180 static enum rofferr roff_cond_text(ROFF_ARGS);
181 static enum rofferr roff_cond_sub(ROFF_ARGS);
182 static enum rofferr roff_ds(ROFF_ARGS);
183 static enum roffrule roff_evalcond(const char *, int *);
184 static void roff_free1(struct roff *);
185 static void roff_freestr(struct roffkv *);
186 static char *roff_getname(struct roff *, char **, int, int);
187 static const char *roff_getstrn(const struct roff *,
188 const char *, size_t);
189 static enum rofferr roff_line_ignore(ROFF_ARGS);
190 static enum rofferr roff_nr(ROFF_ARGS);
191 static void roff_openeqn(struct roff *, const char *,
192 int, int, const char *);
193 static enum rofft roff_parse(struct roff *, const char *, int *);
194 static enum rofferr roff_parsetext(char *);
195 static enum rofferr roff_res(struct roff *,
196 char **, size_t *, int, int);
197 static enum rofferr roff_rm(ROFF_ARGS);
198 static void roff_setstr(struct roff *,
199 const char *, const char *, int);
200 static void roff_setstrn(struct roffkv **, const char *,
201 size_t, const char *, size_t, int);
202 static enum rofferr roff_so(ROFF_ARGS);
203 static enum rofferr roff_tr(ROFF_ARGS);
204 static enum rofferr roff_Dd(ROFF_ARGS);
205 static enum rofferr roff_TH(ROFF_ARGS);
206 static enum rofferr roff_TE(ROFF_ARGS);
207 static enum rofferr roff_TS(ROFF_ARGS);
208 static enum rofferr roff_EQ(ROFF_ARGS);
209 static enum rofferr roff_EN(ROFF_ARGS);
210 static enum rofferr roff_T_(ROFF_ARGS);
211 static enum rofferr roff_userdef(ROFF_ARGS);
212
213 /* See roffhash_find() */
214
215 #define ASCII_HI 126
216 #define ASCII_LO 33
217 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
218
219 static struct roffmac *hash[HASHWIDTH];
220
221 static struct roffmac roffs[ROFF_MAX] = {
222 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
223 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
224 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
225 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
226 { "cc", roff_cc, NULL, NULL, 0, NULL },
227 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
228 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
229 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
230 { "ds", roff_ds, NULL, NULL, 0, NULL },
231 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
232 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
233 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
234 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
235 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
236 { "it", roff_line_ignore, NULL, NULL, 0, NULL },
237 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
238 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
239 { "nr", roff_nr, NULL, NULL, 0, NULL },
240 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
241 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
242 { "rm", roff_rm, NULL, NULL, 0, NULL },
243 { "so", roff_so, NULL, NULL, 0, NULL },
244 { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
245 { "tr", roff_tr, NULL, NULL, 0, NULL },
246 { "Dd", roff_Dd, NULL, NULL, 0, NULL },
247 { "TH", roff_TH, NULL, NULL, 0, NULL },
248 { "TS", roff_TS, NULL, NULL, 0, NULL },
249 { "TE", roff_TE, NULL, NULL, 0, NULL },
250 { "T&", roff_T_, NULL, NULL, 0, NULL },
251 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
252 { "EN", roff_EN, NULL, NULL, 0, NULL },
253 { ".", roff_cblock, NULL, NULL, 0, NULL },
254 { "\\}", roff_ccond, NULL, NULL, 0, NULL },
255 { NULL, roff_userdef, NULL, NULL, 0, NULL },
256 };
257
258 const char *const __mdoc_reserved[] = {
259 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
260 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
261 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
262 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
263 "Ds", "Dt", "Dv", "Dx", "D1",
264 "Ec", "Ed", "Ef", "Ek", "El", "Em", "em",
265 "En", "Eo", "Eq", "Er", "Es", "Ev", "Ex",
266 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
267 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp", "LP",
268 "Me", "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
269 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
270 "Pa", "Pc", "Pf", "Po", "Pp", "PP", "pp", "Pq",
271 "Qc", "Ql", "Qo", "Qq", "Or", "Rd", "Re", "Rs", "Rv",
272 "Sc", "Sf", "Sh", "SH", "Sm", "So", "Sq",
273 "Ss", "St", "Sx", "Sy",
274 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
275 "%A", "%B", "%D", "%I", "%J", "%N", "%O",
276 "%P", "%Q", "%R", "%T", "%U", "%V",
277 NULL
278 };
279
280 const char *const __man_reserved[] = {
281 "AT", "B", "BI", "BR", "BT", "DE", "DS", "DT",
282 "EE", "EN", "EQ", "EX", "HF", "HP", "I", "IB", "IP", "IR",
283 "LP", "ME", "MT", "OP", "P", "PD", "PP", "PT",
284 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS", "SY",
285 "TE", "TH", "TP", "TQ", "TS", "T&", "UC", "UE", "UR", "YS",
286 NULL
287 };
288
289 /* Array of injected predefined strings. */
290 #define PREDEFS_MAX 38
291 static const struct predef predefs[PREDEFS_MAX] = {
292 #include "predefs.in"
293 };
294
295 /* See roffhash_find() */
296 #define ROFF_HASH(p) (p[0] - ASCII_LO)
297
298 static void
299 roffhash_init(void)
300 {
301 struct roffmac *n;
302 int buc, i;
303
304 for (i = 0; i < (int)ROFF_USERDEF; i++) {
305 assert(roffs[i].name[0] >= ASCII_LO);
306 assert(roffs[i].name[0] <= ASCII_HI);
307
308 buc = ROFF_HASH(roffs[i].name);
309
310 if (NULL != (n = hash[buc])) {
311 for ( ; n->next; n = n->next)
312 /* Do nothing. */ ;
313 n->next = &roffs[i];
314 } else
315 hash[buc] = &roffs[i];
316 }
317 }
318
319 /*
320 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
321 * the nil-terminated string name could be found.
322 */
323 static enum rofft
324 roffhash_find(const char *p, size_t s)
325 {
326 int buc;
327 struct roffmac *n;
328
329 /*
330 * libroff has an extremely simple hashtable, for the time
331 * being, which simply keys on the first character, which must
332 * be printable, then walks a chain. It works well enough until
333 * optimised.
334 */
335
336 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
337 return(ROFF_MAX);
338
339 buc = ROFF_HASH(p);
340
341 if (NULL == (n = hash[buc]))
342 return(ROFF_MAX);
343 for ( ; n; n = n->next)
344 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
345 return((enum rofft)(n - roffs));
346
347 return(ROFF_MAX);
348 }
349
350
351 /*
352 * Pop the current node off of the stack of roff instructions currently
353 * pending.
354 */
355 static void
356 roffnode_pop(struct roff *r)
357 {
358 struct roffnode *p;
359
360 assert(r->last);
361 p = r->last;
362
363 r->last = r->last->parent;
364 free(p->name);
365 free(p->end);
366 free(p);
367 }
368
369
370 /*
371 * Push a roff node onto the instruction stack. This must later be
372 * removed with roffnode_pop().
373 */
374 static void
375 roffnode_push(struct roff *r, enum rofft tok, const char *name,
376 int line, int col)
377 {
378 struct roffnode *p;
379
380 p = mandoc_calloc(1, sizeof(struct roffnode));
381 p->tok = tok;
382 if (name)
383 p->name = mandoc_strdup(name);
384 p->parent = r->last;
385 p->line = line;
386 p->col = col;
387 p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
388
389 r->last = p;
390 }
391
392
393 static void
394 roff_free1(struct roff *r)
395 {
396 struct tbl_node *tbl;
397 struct eqn_node *e;
398 int i;
399
400 while (NULL != (tbl = r->first_tbl)) {
401 r->first_tbl = tbl->next;
402 tbl_free(tbl);
403 }
404
405 r->first_tbl = r->last_tbl = r->tbl = NULL;
406
407 while (NULL != (e = r->first_eqn)) {
408 r->first_eqn = e->next;
409 eqn_free(e);
410 }
411
412 r->first_eqn = r->last_eqn = r->eqn = NULL;
413
414 while (r->last)
415 roffnode_pop(r);
416
417 roff_freestr(r->strtab);
418 roff_freestr(r->xmbtab);
419
420 r->strtab = r->xmbtab = NULL;
421
422 if (r->xtab)
423 for (i = 0; i < 128; i++)
424 free(r->xtab[i].p);
425
426 free(r->xtab);
427 r->xtab = NULL;
428 }
429
430 void
431 roff_reset(struct roff *r)
432 {
433 int i;
434
435 roff_free1(r);
436
437 r->control = 0;
438 memset(&r->regs, 0, sizeof(struct reg) * REG__MAX);
439
440 for (i = 0; i < PREDEFS_MAX; i++)
441 roff_setstr(r, predefs[i].name, predefs[i].str, 0);
442 }
443
444
445 void
446 roff_free(struct roff *r)
447 {
448
449 roff_free1(r);
450 free(r);
451 }
452
453
454 struct roff *
455 roff_alloc(enum mparset type, struct mparse *parse)
456 {
457 struct roff *r;
458 int i;
459
460 r = mandoc_calloc(1, sizeof(struct roff));
461 r->parsetype = type;
462 r->parse = parse;
463 r->rstackpos = -1;
464
465 roffhash_init();
466
467 for (i = 0; i < PREDEFS_MAX; i++)
468 roff_setstr(r, predefs[i].name, predefs[i].str, 0);
469
470 return(r);
471 }
472
473 /*
474 * Pre-filter each and every line for reserved words (one beginning with
475 * `\*', e.g., `\*(ab'). These must be handled before the actual line
476 * is processed.
477 * This also checks the syntax of regular escapes.
478 */
479 static enum rofferr
480 roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
481 {
482 enum mandoc_esc esc;
483 const char *stesc; /* start of an escape sequence ('\\') */
484 const char *stnam; /* start of the name, after "[(*" */
485 const char *cp; /* end of the name, e.g. before ']' */
486 const char *res; /* the string to be substituted */
487 int i, maxl, expand_count;
488 size_t nsz;
489 char *n;
490
491 expand_count = 0;
492
493 again:
494 cp = *bufp + pos;
495 while (NULL != (cp = strchr(cp, '\\'))) {
496 stesc = cp++;
497
498 /*
499 * The second character must be an asterisk.
500 * If it isn't, skip it anyway: It is escaped,
501 * so it can't start another escape sequence.
502 */
503
504 if ('\0' == *cp)
505 return(ROFF_CONT);
506
507 if ('*' != *cp) {
508 res = cp;
509 esc = mandoc_escape(&cp, NULL, NULL);
510 if (ESCAPE_ERROR != esc)
511 continue;
512 cp = res;
513 mandoc_msg
514 (MANDOCERR_BADESCAPE, r->parse,
515 ln, (int)(stesc - *bufp), NULL);
516 return(ROFF_CONT);
517 }
518
519 cp++;
520
521 /*
522 * The third character decides the length
523 * of the name of the string.
524 * Save a pointer to the name.
525 */
526
527 switch (*cp) {
528 case ('\0'):
529 return(ROFF_CONT);
530 case ('('):
531 cp++;
532 maxl = 2;
533 break;
534 case ('['):
535 cp++;
536 maxl = 0;
537 break;
538 default:
539 maxl = 1;
540 break;
541 }
542 stnam = cp;
543
544 /* Advance to the end of the name. */
545
546 for (i = 0; 0 == maxl || i < maxl; i++, cp++) {
547 if ('\0' == *cp) {
548 mandoc_msg
549 (MANDOCERR_BADESCAPE,
550 r->parse, ln,
551 (int)(stesc - *bufp), NULL);
552 return(ROFF_CONT);
553 }
554 if (0 == maxl && ']' == *cp)
555 break;
556 }
557
558 /*
559 * Retrieve the replacement string; if it is
560 * undefined, resume searching for escapes.
561 */
562
563 res = roff_getstrn(r, stnam, (size_t)i);
564
565 if (NULL == res) {
566 mandoc_msg
567 (MANDOCERR_BADESCAPE, r->parse,
568 ln, (int)(stesc - *bufp), NULL);
569 res = "";
570 }
571
572 /* Replace the escape sequence by the string. */
573
574 pos = stesc - *bufp;
575
576 nsz = *szp + strlen(res) + 1;
577 n = mandoc_malloc(nsz);
578
579 strlcpy(n, *bufp, (size_t)(stesc - *bufp + 1));
580 strlcat(n, res, nsz);
581 strlcat(n, cp + (maxl ? 0 : 1), nsz);
582
583 free(*bufp);
584
585 *bufp = n;
586 *szp = nsz;
587
588 if (EXPAND_LIMIT >= ++expand_count)
589 goto again;
590
591 /* Just leave the string unexpanded. */
592 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, ln, pos, NULL);
593 return(ROFF_IGN);
594 }
595 return(ROFF_CONT);
596 }
597
598 /*
599 * Process text streams: convert all breakable hyphens into ASCII_HYPH.
600 */
601 static enum rofferr
602 roff_parsetext(char *p)
603 {
604 size_t sz;
605 const char *start;
606 enum mandoc_esc esc;
607
608 start = p;
609
610 while ('\0' != *p) {
611 sz = strcspn(p, "-\\");
612 p += sz;
613
614 if ('\0' == *p)
615 break;
616
617 if ('\\' == *p) {
618 /* Skip over escapes. */
619 p++;
620 esc = mandoc_escape
621 ((const char **)&p, NULL, NULL);
622 if (ESCAPE_ERROR == esc)
623 break;
624 continue;
625 } else if (p == start) {
626 p++;
627 continue;
628 }
629
630 if (isalpha((unsigned char)p[-1]) &&
631 isalpha((unsigned char)p[1]))
632 *p = ASCII_HYPH;
633 p++;
634 }
635
636 return(ROFF_CONT);
637 }
638
639 enum rofferr
640 roff_parseln(struct roff *r, int ln, char **bufp,
641 size_t *szp, int pos, int *offs)
642 {
643 enum rofft t;
644 enum rofferr e;
645 int ppos, ctl;
646
647 /*
648 * Run the reserved-word filter only if we have some reserved
649 * words to fill in.
650 */
651
652 e = roff_res(r, bufp, szp, ln, pos);
653 if (ROFF_IGN == e)
654 return(e);
655 assert(ROFF_CONT == e);
656
657 ppos = pos;
658 ctl = roff_getcontrol(r, *bufp, &pos);
659
660 /*
661 * First, if a scope is open and we're not a macro, pass the
662 * text through the macro's filter. If a scope isn't open and
663 * we're not a macro, just let it through.
664 * Finally, if there's an equation scope open, divert it into it
665 * no matter our state.
666 */
667
668 if (r->last && ! ctl) {
669 t = r->last->tok;
670 assert(roffs[t].text);
671 e = (*roffs[t].text)
672 (r, t, bufp, szp, ln, pos, pos, offs);
673 assert(ROFF_IGN == e || ROFF_CONT == e);
674 if (ROFF_CONT != e)
675 return(e);
676 if (r->eqn)
677 return(eqn_read(&r->eqn, ln, *bufp, pos, offs));
678 if (r->tbl)
679 return(tbl_read(r->tbl, ln, *bufp, pos));
680 return(roff_parsetext(*bufp + pos));
681 } else if ( ! ctl) {
682 if (r->eqn)
683 return(eqn_read(&r->eqn, ln, *bufp, pos, offs));
684 if (r->tbl)
685 return(tbl_read(r->tbl, ln, *bufp, pos));
686 return(roff_parsetext(*bufp + pos));
687 } else if (r->eqn)
688 return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));
689
690 /*
691 * If a scope is open, go to the child handler for that macro,
692 * as it may want to preprocess before doing anything with it.
693 * Don't do so if an equation is open.
694 */
695
696 if (r->last) {
697 t = r->last->tok;
698 assert(roffs[t].sub);
699 return((*roffs[t].sub)
700 (r, t, bufp, szp,
701 ln, ppos, pos, offs));
702 }
703
704 /*
705 * Lastly, as we've no scope open, try to look up and execute
706 * the new macro. If no macro is found, simply return and let
707 * the compilers handle it.
708 */
709
710 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
711 return(ROFF_CONT);
712
713 assert(roffs[t].proc);
714 return((*roffs[t].proc)
715 (r, t, bufp, szp,
716 ln, ppos, pos, offs));
717 }
718
719
720 void
721 roff_endparse(struct roff *r)
722 {
723
724 if (r->last)
725 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
726 r->last->line, r->last->col, NULL);
727
728 if (r->eqn) {
729 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
730 r->eqn->eqn.ln, r->eqn->eqn.pos, NULL);
731 eqn_end(&r->eqn);
732 }
733
734 if (r->tbl) {
735 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
736 r->tbl->line, r->tbl->pos, NULL);
737 tbl_end(&r->tbl);
738 }
739 }
740
741 /*
742 * Parse a roff node's type from the input buffer. This must be in the
743 * form of ".foo xxx" in the usual way.
744 */
745 static enum rofft
746 roff_parse(struct roff *r, const char *buf, int *pos)
747 {
748 const char *mac;
749 size_t maclen;
750 enum rofft t;
751
752 if ('\0' == buf[*pos] || '"' == buf[*pos] ||
753 '\t' == buf[*pos] || ' ' == buf[*pos])
754 return(ROFF_MAX);
755
756 /*
757 * We stop the macro parse at an escape, tab, space, or nil.
758 * However, `\}' is also a valid macro, so make sure we don't
759 * clobber it by seeing the `\' as the end of token.
760 */
761
762 mac = buf + *pos;
763 maclen = strcspn(mac + 1, " \\\t\0") + 1;
764
765 t = (r->current_string = roff_getstrn(r, mac, maclen))
766 ? ROFF_USERDEF : roffhash_find(mac, maclen);
767
768 *pos += (int)maclen;
769
770 while (buf[*pos] && ' ' == buf[*pos])
771 (*pos)++;
772
773 return(t);
774 }
775
776 /* ARGSUSED */
777 static enum rofferr
778 roff_cblock(ROFF_ARGS)
779 {
780
781 /*
782 * A block-close `..' should only be invoked as a child of an
783 * ignore macro, otherwise raise a warning and just ignore it.
784 */
785
786 if (NULL == r->last) {
787 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
788 return(ROFF_IGN);
789 }
790
791 switch (r->last->tok) {
792 case (ROFF_am):
793 /* FALLTHROUGH */
794 case (ROFF_ami):
795 /* FALLTHROUGH */
796 case (ROFF_am1):
797 /* FALLTHROUGH */
798 case (ROFF_de):
799 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
800 /* FALLTHROUGH */
801 case (ROFF_dei):
802 /* FALLTHROUGH */
803 case (ROFF_ig):
804 break;
805 default:
806 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
807 return(ROFF_IGN);
808 }
809
810 if ((*bufp)[pos])
811 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
812
813 roffnode_pop(r);
814 roffnode_cleanscope(r);
815 return(ROFF_IGN);
816
817 }
818
819
820 static void
821 roffnode_cleanscope(struct roff *r)
822 {
823
824 while (r->last) {
825 if (--r->last->endspan != 0)
826 break;
827 roffnode_pop(r);
828 }
829 }
830
831
832 /* ARGSUSED */
833 static enum rofferr
834 roff_ccond(ROFF_ARGS)
835 {
836
837 if (NULL == r->last) {
838 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
839 return(ROFF_IGN);
840 }
841
842 switch (r->last->tok) {
843 case (ROFF_el):
844 /* FALLTHROUGH */
845 case (ROFF_ie):
846 /* FALLTHROUGH */
847 case (ROFF_if):
848 break;
849 default:
850 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
851 return(ROFF_IGN);
852 }
853
854 if (r->last->endspan > -1) {
855 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
856 return(ROFF_IGN);
857 }
858
859 if ((*bufp)[pos])
860 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
861
862 roffnode_pop(r);
863 roffnode_cleanscope(r);
864 return(ROFF_IGN);
865 }
866
867
868 /* ARGSUSED */
869 static enum rofferr
870 roff_block(ROFF_ARGS)
871 {
872 int sv;
873 size_t sz;
874 char *name;
875
876 name = NULL;
877
878 if (ROFF_ig != tok) {
879 if ('\0' == (*bufp)[pos]) {
880 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
881 return(ROFF_IGN);
882 }
883
884 /*
885 * Re-write `de1', since we don't really care about
886 * groff's strange compatibility mode, into `de'.
887 */
888
889 if (ROFF_de1 == tok)
890 tok = ROFF_de;
891 if (ROFF_de == tok)
892 name = *bufp + pos;
893 else
894 mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos,
895 roffs[tok].name);
896
897 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
898 pos++;
899
900 while (isspace((unsigned char)(*bufp)[pos]))
901 (*bufp)[pos++] = '\0';
902 }
903
904 roffnode_push(r, tok, name, ln, ppos);
905
906 /*
907 * At the beginning of a `de' macro, clear the existing string
908 * with the same name, if there is one. New content will be
909 * added from roff_block_text() in multiline mode.
910 */
911
912 if (ROFF_de == tok)
913 roff_setstr(r, name, "", 0);
914
915 if ('\0' == (*bufp)[pos])
916 return(ROFF_IGN);
917
918 /* If present, process the custom end-of-line marker. */
919
920 sv = pos;
921 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
922 pos++;
923
924 /*
925 * Note: groff does NOT like escape characters in the input.
926 * Instead of detecting this, we're just going to let it fly and
927 * to hell with it.
928 */
929
930 assert(pos > sv);
931 sz = (size_t)(pos - sv);
932
933 if (1 == sz && '.' == (*bufp)[sv])
934 return(ROFF_IGN);
935
936 r->last->end = mandoc_malloc(sz + 1);
937
938 memcpy(r->last->end, *bufp + sv, sz);
939 r->last->end[(int)sz] = '\0';
940
941 if ((*bufp)[pos])
942 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
943
944 return(ROFF_IGN);
945 }
946
947
948 /* ARGSUSED */
949 static enum rofferr
950 roff_block_sub(ROFF_ARGS)
951 {
952 enum rofft t;
953 int i, j;
954
955 /*
956 * First check whether a custom macro exists at this level. If
957 * it does, then check against it. This is some of groff's
958 * stranger behaviours. If we encountered a custom end-scope
959 * tag and that tag also happens to be a "real" macro, then we
960 * need to try interpreting it again as a real macro. If it's
961 * not, then return ignore. Else continue.
962 */
963
964 if (r->last->end) {
965 for (i = pos, j = 0; r->last->end[j]; j++, i++)
966 if ((*bufp)[i] != r->last->end[j])
967 break;
968
969 if ('\0' == r->last->end[j] &&
970 ('\0' == (*bufp)[i] ||
971 ' ' == (*bufp)[i] ||
972 '\t' == (*bufp)[i])) {
973 roffnode_pop(r);
974 roffnode_cleanscope(r);
975
976 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
977 i++;
978
979 pos = i;
980 if (ROFF_MAX != roff_parse(r, *bufp, &pos))
981 return(ROFF_RERUN);
982 return(ROFF_IGN);
983 }
984 }
985
986 /*
987 * If we have no custom end-query or lookup failed, then try
988 * pulling it out of the hashtable.
989 */
990
991 t = roff_parse(r, *bufp, &pos);
992
993 /*
994 * Macros other than block-end are only significant
995 * in `de' blocks; elsewhere, simply throw them away.
996 */
997 if (ROFF_cblock != t) {
998 if (ROFF_de == tok)
999 roff_setstr(r, r->last->name, *bufp + ppos, 1);
1000 return(ROFF_IGN);
1001 }
1002
1003 assert(roffs[t].proc);
1004 return((*roffs[t].proc)(r, t, bufp, szp,
1005 ln, ppos, pos, offs));
1006 }
1007
1008
1009 /* ARGSUSED */
1010 static enum rofferr
1011 roff_block_text(ROFF_ARGS)
1012 {
1013
1014 if (ROFF_de == tok)
1015 roff_setstr(r, r->last->name, *bufp + pos, 1);
1016
1017 return(ROFF_IGN);
1018 }
1019
1020
1021 /* ARGSUSED */
1022 static enum rofferr
1023 roff_cond_sub(ROFF_ARGS)
1024 {
1025 enum rofft t;
1026 enum roffrule rr;
1027 char *ep;
1028
1029 rr = r->last->rule;
1030 roffnode_cleanscope(r);
1031
1032 /*
1033 * If the macro is unknown, first check if it contains a closing
1034 * delimiter `\}'. If it does, close out our scope and return
1035 * the currently-scoped rule (ignore or continue). Else, drop
1036 * into the currently-scoped rule.
1037 */
1038
1039 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) {
1040 ep = &(*bufp)[pos];
1041 for ( ; NULL != (ep = strchr(ep, '\\')); ep++) {
1042 ep++;
1043 if ('}' != *ep)
1044 continue;
1045
1046 /*
1047 * Make the \} go away.
1048 * This is a little haphazard, as it's not quite
1049 * clear how nroff does this.
1050 * If we're at the end of line, then just chop
1051 * off the \} and resize the buffer.
1052 * If we aren't, then conver it to spaces.
1053 */
1054
1055 if ('\0' == *(ep + 1)) {
1056 *--ep = '\0';
1057 *szp -= 2;
1058 } else
1059 *(ep - 1) = *ep = ' ';
1060
1061 roff_ccond(r, ROFF_ccond, bufp, szp,
1062 ln, pos, pos + 2, offs);
1063 break;
1064 }
1065 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1066 }
1067
1068 /*
1069 * A denied conditional must evaluate its children if and only
1070 * if they're either structurally required (such as loops and
1071 * conditionals) or a closing macro.
1072 */
1073
1074 if (ROFFRULE_DENY == rr)
1075 if ( ! (ROFFMAC_STRUCT & roffs[t].flags))
1076 if (ROFF_ccond != t)
1077 return(ROFF_IGN);
1078
1079 assert(roffs[t].proc);
1080 return((*roffs[t].proc)(r, t, bufp, szp,
1081 ln, ppos, pos, offs));
1082 }
1083
1084 /* ARGSUSED */
1085 static enum rofferr
1086 roff_cond_text(ROFF_ARGS)
1087 {
1088 char *ep;
1089 enum roffrule rr;
1090
1091 rr = r->last->rule;
1092 roffnode_cleanscope(r);
1093
1094 ep = &(*bufp)[pos];
1095 for ( ; NULL != (ep = strchr(ep, '\\')); ep++) {
1096 ep++;
1097 if ('}' != *ep)
1098 continue;
1099 *ep = '&';
1100 roff_ccond(r, ROFF_ccond, bufp, szp,
1101 ln, pos, pos + 2, offs);
1102 }
1103 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1104 }
1105
1106 static enum roffrule
1107 roff_evalcond(const char *v, int *pos)
1108 {
1109
1110 switch (v[*pos]) {
1111 case ('n'):
1112 (*pos)++;
1113 return(ROFFRULE_ALLOW);
1114 case ('e'):
1115 /* FALLTHROUGH */
1116 case ('o'):
1117 /* FALLTHROUGH */
1118 case ('t'):
1119 (*pos)++;
1120 return(ROFFRULE_DENY);
1121 default:
1122 break;
1123 }
1124
1125 while (v[*pos] && ' ' != v[*pos])
1126 (*pos)++;
1127 return(ROFFRULE_DENY);
1128 }
1129
1130 /* ARGSUSED */
1131 static enum rofferr
1132 roff_line_ignore(ROFF_ARGS)
1133 {
1134
1135 if (ROFF_it == tok)
1136 mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos, "it");
1137
1138 return(ROFF_IGN);
1139 }
1140
1141 /* ARGSUSED */
1142 static enum rofferr
1143 roff_cond(ROFF_ARGS)
1144 {
1145
1146 roffnode_push(r, tok, NULL, ln, ppos);
1147
1148 /*
1149 * An `.el' has no conditional body: it will consume the value
1150 * of the current rstack entry set in prior `ie' calls or
1151 * defaults to DENY.
1152 *
1153 * If we're not an `el', however, then evaluate the conditional.
1154 */
1155
1156 r->last->rule = ROFF_el == tok ?
1157 (r->rstackpos < 0 ?
1158 ROFFRULE_DENY : r->rstack[r->rstackpos--]) :
1159 roff_evalcond(*bufp, &pos);
1160
1161 /*
1162 * An if-else will put the NEGATION of the current evaluated
1163 * conditional into the stack of rules.
1164 */
1165
1166 if (ROFF_ie == tok) {
1167 if (r->rstackpos == RSTACK_MAX - 1) {
1168 mandoc_msg(MANDOCERR_MEM,
1169 r->parse, ln, ppos, NULL);
1170 return(ROFF_ERR);
1171 }
1172 r->rstack[++r->rstackpos] =
1173 ROFFRULE_DENY == r->last->rule ?
1174 ROFFRULE_ALLOW : ROFFRULE_DENY;
1175 }
1176
1177 /* If the parent has false as its rule, then so do we. */
1178
1179 if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule)
1180 r->last->rule = ROFFRULE_DENY;
1181
1182 /*
1183 * Determine scope.
1184 * If there is nothing on the line after the conditional,
1185 * not even whitespace, use next-line scope.
1186 */
1187
1188 if ('\0' == (*bufp)[pos]) {
1189 r->last->endspan = 2;
1190 goto out;
1191 }
1192
1193 while (' ' == (*bufp)[pos])
1194 pos++;
1195
1196 /* An opening brace requests multiline scope. */
1197
1198 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1199 r->last->endspan = -1;
1200 pos += 2;
1201 goto out;
1202 }
1203
1204 /*
1205 * Anything else following the conditional causes
1206 * single-line scope. Warn if the scope contains
1207 * nothing but trailing whitespace.
1208 */
1209
1210 if ('\0' == (*bufp)[pos])
1211 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
1212
1213 r->last->endspan = 1;
1214
1215 out:
1216 *offs = pos;
1217 return(ROFF_RERUN);
1218 }
1219
1220
1221 /* ARGSUSED */
1222 static enum rofferr
1223 roff_ds(ROFF_ARGS)
1224 {
1225 char *name, *string;
1226
1227 /*
1228 * A symbol is named by the first word following the macro
1229 * invocation up to a space. Its value is anything after the
1230 * name's trailing whitespace and optional double-quote. Thus,
1231 *
1232 * [.ds foo "bar " ]
1233 *
1234 * will have `bar " ' as its value.
1235 */
1236
1237 string = *bufp + pos;
1238 name = roff_getname(r, &string, ln, pos);
1239 if ('\0' == *name)
1240 return(ROFF_IGN);
1241
1242 /* Read past initial double-quote. */
1243 if ('"' == *string)
1244 string++;
1245
1246 /* The rest is the value. */
1247 roff_setstr(r, name, string, 0);
1248 return(ROFF_IGN);
1249 }
1250
1251 int
1252 roff_regisset(const struct roff *r, enum regs reg)
1253 {
1254
1255 return(r->regs[(int)reg].set);
1256 }
1257
1258 unsigned int
1259 roff_regget(const struct roff *r, enum regs reg)
1260 {
1261
1262 return(r->regs[(int)reg].u);
1263 }
1264
1265 void
1266 roff_regunset(struct roff *r, enum regs reg)
1267 {
1268
1269 r->regs[(int)reg].set = 0;
1270 }
1271
1272 /* ARGSUSED */
1273 static enum rofferr
1274 roff_nr(ROFF_ARGS)
1275 {
1276 const char *key;
1277 char *val;
1278 int iv;
1279
1280 val = *bufp + pos;
1281 key = roff_getname(r, &val, ln, pos);
1282
1283 if (0 == strcmp(key, "nS")) {
1284 r->regs[(int)REG_nS].set = 1;
1285 if ((iv = mandoc_strntoi(val, strlen(val), 10)) >= 0)
1286 r->regs[(int)REG_nS].u = (unsigned)iv;
1287 else
1288 r->regs[(int)REG_nS].u = 0u;
1289 }
1290
1291 return(ROFF_IGN);
1292 }
1293
1294 /* ARGSUSED */
1295 static enum rofferr
1296 roff_rm(ROFF_ARGS)
1297 {
1298 const char *name;
1299 char *cp;
1300
1301 cp = *bufp + pos;
1302 while ('\0' != *cp) {
1303 name = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1304 if ('\0' != *name)
1305 roff_setstr(r, name, NULL, 0);
1306 }
1307 return(ROFF_IGN);
1308 }
1309
1310 /* ARGSUSED */
1311 static enum rofferr
1312 roff_Dd(ROFF_ARGS)
1313 {
1314 const char *const *cp;
1315
1316 if (MPARSE_MDOC != r->parsetype)
1317 for (cp = __mdoc_reserved; *cp; cp++)
1318 roff_setstr(r, *cp, NULL, 0);
1319
1320 return(ROFF_CONT);
1321 }
1322
1323 /* ARGSUSED */
1324 static enum rofferr
1325 roff_TH(ROFF_ARGS)
1326 {
1327 const char *const *cp;
1328
1329 if (MPARSE_MDOC != r->parsetype)
1330 for (cp = __man_reserved; *cp; cp++)
1331 roff_setstr(r, *cp, NULL, 0);
1332
1333 return(ROFF_CONT);
1334 }
1335
1336 /* ARGSUSED */
1337 static enum rofferr
1338 roff_TE(ROFF_ARGS)
1339 {
1340
1341 if (NULL == r->tbl)
1342 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1343 else
1344 tbl_end(&r->tbl);
1345
1346 return(ROFF_IGN);
1347 }
1348
1349 /* ARGSUSED */
1350 static enum rofferr
1351 roff_T_(ROFF_ARGS)
1352 {
1353
1354 if (NULL == r->tbl)
1355 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1356 else
1357 tbl_restart(ppos, ln, r->tbl);
1358
1359 return(ROFF_IGN);
1360 }
1361
1362 #if 0
1363 static int
1364 roff_closeeqn(struct roff *r)
1365 {
1366
1367 return(r->eqn && ROFF_EQN == eqn_end(&r->eqn) ? 1 : 0);
1368 }
1369 #endif
1370
1371 static void
1372 roff_openeqn(struct roff *r, const char *name, int line,
1373 int offs, const char *buf)
1374 {
1375 struct eqn_node *e;
1376 int poff;
1377
1378 assert(NULL == r->eqn);
1379 e = eqn_alloc(name, offs, line, r->parse);
1380
1381 if (r->last_eqn)
1382 r->last_eqn->next = e;
1383 else
1384 r->first_eqn = r->last_eqn = e;
1385
1386 r->eqn = r->last_eqn = e;
1387
1388 if (buf) {
1389 poff = 0;
1390 eqn_read(&r->eqn, line, buf, offs, &poff);
1391 }
1392 }
1393
1394 /* ARGSUSED */
1395 static enum rofferr
1396 roff_EQ(ROFF_ARGS)
1397 {
1398
1399 roff_openeqn(r, *bufp + pos, ln, ppos, NULL);
1400 return(ROFF_IGN);
1401 }
1402
1403 /* ARGSUSED */
1404 static enum rofferr
1405 roff_EN(ROFF_ARGS)
1406 {
1407
1408 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1409 return(ROFF_IGN);
1410 }
1411
1412 /* ARGSUSED */
1413 static enum rofferr
1414 roff_TS(ROFF_ARGS)
1415 {
1416 struct tbl_node *tbl;
1417
1418 if (r->tbl) {
1419 mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL);
1420 tbl_end(&r->tbl);
1421 }
1422
1423 tbl = tbl_alloc(ppos, ln, r->parse);
1424
1425 if (r->last_tbl)
1426 r->last_tbl->next = tbl;
1427 else
1428 r->first_tbl = r->last_tbl = tbl;
1429
1430 r->tbl = r->last_tbl = tbl;
1431 return(ROFF_IGN);
1432 }
1433
1434 /* ARGSUSED */
1435 static enum rofferr
1436 roff_cc(ROFF_ARGS)
1437 {
1438 const char *p;
1439
1440 p = *bufp + pos;
1441
1442 if ('\0' == *p || '.' == (r->control = *p++))
1443 r->control = 0;
1444
1445 if ('\0' != *p)
1446 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1447
1448 return(ROFF_IGN);
1449 }
1450
1451 /* ARGSUSED */
1452 static enum rofferr
1453 roff_tr(ROFF_ARGS)
1454 {
1455 const char *p, *first, *second;
1456 size_t fsz, ssz;
1457 enum mandoc_esc esc;
1458
1459 p = *bufp + pos;
1460
1461 if ('\0' == *p) {
1462 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1463 return(ROFF_IGN);
1464 }
1465
1466 while ('\0' != *p) {
1467 fsz = ssz = 1;
1468
1469 first = p++;
1470 if ('\\' == *first) {
1471 esc = mandoc_escape(&p, NULL, NULL);
1472 if (ESCAPE_ERROR == esc) {
1473 mandoc_msg
1474 (MANDOCERR_BADESCAPE, r->parse,
1475 ln, (int)(p - *bufp), NULL);
1476 return(ROFF_IGN);
1477 }
1478 fsz = (size_t)(p - first);
1479 }
1480
1481 second = p++;
1482 if ('\\' == *second) {
1483 esc = mandoc_escape(&p, NULL, NULL);
1484 if (ESCAPE_ERROR == esc) {
1485 mandoc_msg
1486 (MANDOCERR_BADESCAPE, r->parse,
1487 ln, (int)(p - *bufp), NULL);
1488 return(ROFF_IGN);
1489 }
1490 ssz = (size_t)(p - second);
1491 } else if ('\0' == *second) {
1492 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
1493 ln, (int)(p - *bufp), NULL);
1494 second = " ";
1495 p--;
1496 }
1497
1498 if (fsz > 1) {
1499 roff_setstrn(&r->xmbtab, first,
1500 fsz, second, ssz, 0);
1501 continue;
1502 }
1503
1504 if (NULL == r->xtab)
1505 r->xtab = mandoc_calloc
1506 (128, sizeof(struct roffstr));
1507
1508 free(r->xtab[(int)*first].p);
1509 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
1510 r->xtab[(int)*first].sz = ssz;
1511 }
1512
1513 return(ROFF_IGN);
1514 }
1515
1516 /* ARGSUSED */
1517 static enum rofferr
1518 roff_so(ROFF_ARGS)
1519 {
1520 char *name;
1521
1522 mandoc_msg(MANDOCERR_SO, r->parse, ln, ppos, NULL);
1523
1524 /*
1525 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1526 * opening anything that's not in our cwd or anything beneath
1527 * it. Thus, explicitly disallow traversing up the file-system
1528 * or using absolute paths.
1529 */
1530
1531 name = *bufp + pos;
1532 if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1533 mandoc_msg(MANDOCERR_SOPATH, r->parse, ln, pos, NULL);
1534 return(ROFF_ERR);
1535 }
1536
1537 *offs = pos;
1538 return(ROFF_SO);
1539 }
1540
1541 /* ARGSUSED */
1542 static enum rofferr
1543 roff_userdef(ROFF_ARGS)
1544 {
1545 const char *arg[9];
1546 char *cp, *n1, *n2;
1547 int i;
1548
1549 /*
1550 * Collect pointers to macro argument strings
1551 * and null-terminate them.
1552 */
1553 cp = *bufp + pos;
1554 for (i = 0; i < 9; i++)
1555 arg[i] = '\0' == *cp ? "" :
1556 mandoc_getarg(r->parse, &cp, ln, &pos);
1557
1558 /*
1559 * Expand macro arguments.
1560 */
1561 *szp = 0;
1562 n1 = cp = mandoc_strdup(r->current_string);
1563 while (NULL != (cp = strstr(cp, "\\$"))) {
1564 i = cp[2] - '1';
1565 if (0 > i || 8 < i) {
1566 /* Not an argument invocation. */
1567 cp += 2;
1568 continue;
1569 }
1570
1571 *szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1572 n2 = mandoc_malloc(*szp);
1573
1574 strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1575 strlcat(n2, arg[i], *szp);
1576 strlcat(n2, cp + 3, *szp);
1577
1578 cp = n2 + (cp - n1);
1579 free(n1);
1580 n1 = n2;
1581 }
1582
1583 /*
1584 * Replace the macro invocation
1585 * by the expanded macro.
1586 */
1587 free(*bufp);
1588 *bufp = n1;
1589 if (0 == *szp)
1590 *szp = strlen(*bufp) + 1;
1591
1592 return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
1593 ROFF_REPARSE : ROFF_APPEND);
1594 }
1595
1596 static char *
1597 roff_getname(struct roff *r, char **cpp, int ln, int pos)
1598 {
1599 char *name, *cp;
1600
1601 name = *cpp;
1602 if ('\0' == *name)
1603 return(name);
1604
1605 /* Read until end of name. */
1606 for (cp = name; '\0' != *cp && ' ' != *cp; cp++) {
1607 if ('\\' != *cp)
1608 continue;
1609 cp++;
1610 if ('\\' == *cp)
1611 continue;
1612 mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL);
1613 *cp = '\0';
1614 name = cp;
1615 }
1616
1617 /* Nil-terminate name. */
1618 if ('\0' != *cp)
1619 *(cp++) = '\0';
1620
1621 /* Read past spaces. */
1622 while (' ' == *cp)
1623 cp++;
1624
1625 *cpp = cp;
1626 return(name);
1627 }
1628
1629 /*
1630 * Store *string into the user-defined string called *name.
1631 * In multiline mode, append to an existing entry and append '\n';
1632 * else replace the existing entry, if there is one.
1633 * To clear an existing entry, call with (*r, *name, NULL, 0).
1634 */
1635 static void
1636 roff_setstr(struct roff *r, const char *name, const char *string,
1637 int multiline)
1638 {
1639
1640 roff_setstrn(&r->strtab, name, strlen(name), string,
1641 string ? strlen(string) : 0, multiline);
1642 }
1643
1644 static void
1645 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
1646 const char *string, size_t stringsz, int multiline)
1647 {
1648 struct roffkv *n;
1649 char *c;
1650 int i;
1651 size_t oldch, newch;
1652
1653 /* Search for an existing string with the same name. */
1654 n = *r;
1655
1656 while (n && strcmp(name, n->key.p))
1657 n = n->next;
1658
1659 if (NULL == n) {
1660 /* Create a new string table entry. */
1661 n = mandoc_malloc(sizeof(struct roffkv));
1662 n->key.p = mandoc_strndup(name, namesz);
1663 n->key.sz = namesz;
1664 n->val.p = NULL;
1665 n->val.sz = 0;
1666 n->next = *r;
1667 *r = n;
1668 } else if (0 == multiline) {
1669 /* In multiline mode, append; else replace. */
1670 free(n->val.p);
1671 n->val.p = NULL;
1672 n->val.sz = 0;
1673 }
1674
1675 if (NULL == string)
1676 return;
1677
1678 /*
1679 * One additional byte for the '\n' in multiline mode,
1680 * and one for the terminating '\0'.
1681 */
1682 newch = stringsz + (multiline ? 2u : 1u);
1683
1684 if (NULL == n->val.p) {
1685 n->val.p = mandoc_malloc(newch);
1686 *n->val.p = '\0';
1687 oldch = 0;
1688 } else {
1689 oldch = n->val.sz;
1690 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
1691 }
1692
1693 /* Skip existing content in the destination buffer. */
1694 c = n->val.p + (int)oldch;
1695
1696 /* Append new content to the destination buffer. */
1697 i = 0;
1698 while (i < (int)stringsz) {
1699 /*
1700 * Rudimentary roff copy mode:
1701 * Handle escaped backslashes.
1702 */
1703 if ('\\' == string[i] && '\\' == string[i + 1])
1704 i++;
1705 *c++ = string[i++];
1706 }
1707
1708 /* Append terminating bytes. */
1709 if (multiline)
1710 *c++ = '\n';
1711
1712 *c = '\0';
1713 n->val.sz = (int)(c - n->val.p);
1714 }
1715
1716 static const char *
1717 roff_getstrn(const struct roff *r, const char *name, size_t len)
1718 {
1719 const struct roffkv *n;
1720
1721 for (n = r->strtab; n; n = n->next)
1722 if (0 == strncmp(name, n->key.p, len) &&
1723 '\0' == n->key.p[(int)len])
1724 return(n->val.p);
1725
1726 return(NULL);
1727 }
1728
1729 static void
1730 roff_freestr(struct roffkv *r)
1731 {
1732 struct roffkv *n, *nn;
1733
1734 for (n = r; n; n = nn) {
1735 free(n->key.p);
1736 free(n->val.p);
1737 nn = n->next;
1738 free(n);
1739 }
1740 }
1741
1742 const struct tbl_span *
1743 roff_span(const struct roff *r)
1744 {
1745
1746 return(r->tbl ? tbl_span(r->tbl) : NULL);
1747 }
1748
1749 const struct eqn *
1750 roff_eqn(const struct roff *r)
1751 {
1752
1753 return(r->last_eqn ? &r->last_eqn->eqn : NULL);
1754 }
1755
1756 /*
1757 * Duplicate an input string, making the appropriate character
1758 * conversations (as stipulated by `tr') along the way.
1759 * Returns a heap-allocated string with all the replacements made.
1760 */
1761 char *
1762 roff_strdup(const struct roff *r, const char *p)
1763 {
1764 const struct roffkv *cp;
1765 char *res;
1766 const char *pp;
1767 size_t ssz, sz;
1768 enum mandoc_esc esc;
1769
1770 if (NULL == r->xmbtab && NULL == r->xtab)
1771 return(mandoc_strdup(p));
1772 else if ('\0' == *p)
1773 return(mandoc_strdup(""));
1774
1775 /*
1776 * Step through each character looking for term matches
1777 * (remember that a `tr' can be invoked with an escape, which is
1778 * a glyph but the escape is multi-character).
1779 * We only do this if the character hash has been initialised
1780 * and the string is >0 length.
1781 */
1782
1783 res = NULL;
1784 ssz = 0;
1785
1786 while ('\0' != *p) {
1787 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
1788 sz = r->xtab[(int)*p].sz;
1789 res = mandoc_realloc(res, ssz + sz + 1);
1790 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
1791 ssz += sz;
1792 p++;
1793 continue;
1794 } else if ('\\' != *p) {
1795 res = mandoc_realloc(res, ssz + 2);
1796 res[ssz++] = *p++;
1797 continue;
1798 }
1799
1800 /* Search for term matches. */
1801 for (cp = r->xmbtab; cp; cp = cp->next)
1802 if (0 == strncmp(p, cp->key.p, cp->key.sz))
1803 break;
1804
1805 if (NULL != cp) {
1806 /*
1807 * A match has been found.
1808 * Append the match to the array and move
1809 * forward by its keysize.
1810 */
1811 res = mandoc_realloc
1812 (res, ssz + cp->val.sz + 1);
1813 memcpy(res + ssz, cp->val.p, cp->val.sz);
1814 ssz += cp->val.sz;
1815 p += (int)cp->key.sz;
1816 continue;
1817 }
1818
1819 /*
1820 * Handle escapes carefully: we need to copy
1821 * over just the escape itself, or else we might
1822 * do replacements within the escape itself.
1823 * Make sure to pass along the bogus string.
1824 */
1825 pp = p++;
1826 esc = mandoc_escape(&p, NULL, NULL);
1827 if (ESCAPE_ERROR == esc) {
1828 sz = strlen(pp);
1829 res = mandoc_realloc(res, ssz + sz + 1);
1830 memcpy(res + ssz, pp, sz);
1831 break;
1832 }
1833 /*
1834 * We bail out on bad escapes.
1835 * No need to warn: we already did so when
1836 * roff_res() was called.
1837 */
1838 sz = (int)(p - pp);
1839 res = mandoc_realloc(res, ssz + sz + 1);
1840 memcpy(res + ssz, pp, sz);
1841 ssz += sz;
1842 }
1843
1844 res[(int)ssz] = '\0';
1845 return(res);
1846 }
1847
1848 /*
1849 * Find out whether a line is a macro line or not.
1850 * If it is, adjust the current position and return one; if it isn't,
1851 * return zero and don't change the current position.
1852 * If the control character has been set with `.cc', then let that grain
1853 * precedence.
1854 * This is slighly contrary to groff, where using the non-breaking
1855 * control character when `cc' has been invoked will cause the
1856 * non-breaking macro contents to be printed verbatim.
1857 */
1858 int
1859 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
1860 {
1861 int pos;
1862
1863 pos = *ppos;
1864
1865 if (0 != r->control && cp[pos] == r->control)
1866 pos++;
1867 else if (0 != r->control)
1868 return(0);
1869 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
1870 pos += 2;
1871 else if ('.' == cp[pos] || '\'' == cp[pos])
1872 pos++;
1873 else
1874 return(0);
1875
1876 while (' ' == cp[pos] || '\t' == cp[pos])
1877 pos++;
1878
1879 *ppos = pos;
1880 return(1);
1881 }