]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
Handle some predefined read-only number registers, e.g. .H and .V.
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.192 2014/02/14 22:27:41 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 #include "mandoc.h"
29 #include "libroff.h"
30 #include "libmandoc.h"
31
32 /* Maximum number of nested if-else conditionals. */
33 #define RSTACK_MAX 128
34
35 /* Maximum number of string expansions per line, to break infinite loops. */
36 #define EXPAND_LIMIT 1000
37
38 enum rofft {
39 ROFF_ad,
40 ROFF_am,
41 ROFF_ami,
42 ROFF_am1,
43 ROFF_cc,
44 ROFF_de,
45 ROFF_dei,
46 ROFF_de1,
47 ROFF_ds,
48 ROFF_el,
49 ROFF_fam,
50 ROFF_hw,
51 ROFF_hy,
52 ROFF_ie,
53 ROFF_if,
54 ROFF_ig,
55 ROFF_it,
56 ROFF_ne,
57 ROFF_nh,
58 ROFF_nr,
59 ROFF_ns,
60 ROFF_ps,
61 ROFF_rm,
62 ROFF_so,
63 ROFF_ta,
64 ROFF_tr,
65 ROFF_Dd,
66 ROFF_TH,
67 ROFF_TS,
68 ROFF_TE,
69 ROFF_T_,
70 ROFF_EQ,
71 ROFF_EN,
72 ROFF_cblock,
73 ROFF_ccond,
74 ROFF_USERDEF,
75 ROFF_MAX
76 };
77
78 enum roffrule {
79 ROFFRULE_DENY,
80 ROFFRULE_ALLOW
81 };
82
83 /*
84 * An incredibly-simple string buffer.
85 */
86 struct roffstr {
87 char *p; /* nil-terminated buffer */
88 size_t sz; /* saved strlen(p) */
89 };
90
91 /*
92 * A key-value roffstr pair as part of a singly-linked list.
93 */
94 struct roffkv {
95 struct roffstr key;
96 struct roffstr val;
97 struct roffkv *next; /* next in list */
98 };
99
100 /*
101 * A single number register as part of a singly-linked list.
102 */
103 struct roffreg {
104 struct roffstr key;
105 int val;
106 struct roffreg *next;
107 };
108
109 struct roff {
110 enum mparset parsetype; /* requested parse type */
111 struct mparse *parse; /* parse point */
112 int quick; /* skip standard macro deletion */
113 struct roffnode *last; /* leaf of stack */
114 enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */
115 char control; /* control character */
116 int rstackpos; /* position in rstack */
117 struct roffreg *regtab; /* number registers */
118 struct roffkv *strtab; /* user-defined strings & macros */
119 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
120 struct roffstr *xtab; /* single-byte trans table (`tr') */
121 const char *current_string; /* value of last called user macro */
122 struct tbl_node *first_tbl; /* first table parsed */
123 struct tbl_node *last_tbl; /* last table parsed */
124 struct tbl_node *tbl; /* current table being parsed */
125 struct eqn_node *last_eqn; /* last equation parsed */
126 struct eqn_node *first_eqn; /* first equation parsed */
127 struct eqn_node *eqn; /* current equation being parsed */
128 };
129
130 struct roffnode {
131 enum rofft tok; /* type of node */
132 struct roffnode *parent; /* up one in stack */
133 int line; /* parse line */
134 int col; /* parse col */
135 char *name; /* node name, e.g. macro name */
136 char *end; /* end-rules: custom token */
137 int endspan; /* end-rules: next-line or infty */
138 enum roffrule rule; /* current evaluation rule */
139 };
140
141 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
142 enum rofft tok, /* tok of macro */ \
143 char **bufp, /* input buffer */ \
144 size_t *szp, /* size of input buffer */ \
145 int ln, /* parse line */ \
146 int ppos, /* original pos in buffer */ \
147 int pos, /* current pos in buffer */ \
148 int *offs /* reset offset of buffer data */
149
150 typedef enum rofferr (*roffproc)(ROFF_ARGS);
151
152 struct roffmac {
153 const char *name; /* macro name */
154 roffproc proc; /* process new macro */
155 roffproc text; /* process as child text of macro */
156 roffproc sub; /* process as child of macro */
157 int flags;
158 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
159 struct roffmac *next;
160 };
161
162 struct predef {
163 const char *name; /* predefined input name */
164 const char *str; /* replacement symbol */
165 };
166
167 #define PREDEF(__name, __str) \
168 { (__name), (__str) },
169
170 static enum rofft roffhash_find(const char *, size_t);
171 static void roffhash_init(void);
172 static void roffnode_cleanscope(struct roff *);
173 static void roffnode_pop(struct roff *);
174 static void roffnode_push(struct roff *, enum rofft,
175 const char *, int, int);
176 static enum rofferr roff_block(ROFF_ARGS);
177 static enum rofferr roff_block_text(ROFF_ARGS);
178 static enum rofferr roff_block_sub(ROFF_ARGS);
179 static enum rofferr roff_cblock(ROFF_ARGS);
180 static enum rofferr roff_cc(ROFF_ARGS);
181 static enum rofferr roff_ccond(ROFF_ARGS);
182 static enum rofferr roff_cond(ROFF_ARGS);
183 static enum rofferr roff_cond_text(ROFF_ARGS);
184 static enum rofferr roff_cond_sub(ROFF_ARGS);
185 static enum rofferr roff_ds(ROFF_ARGS);
186 static enum roffrule roff_evalcond(const char *, int *);
187 static void roff_free1(struct roff *);
188 static void roff_freereg(struct roffreg *);
189 static void roff_freestr(struct roffkv *);
190 static char *roff_getname(struct roff *, char **, int, int);
191 static int roff_getnum(const char *, int *, int *);
192 static int roff_getop(const char *, int *, char *);
193 static int roff_getregn(const struct roff *,
194 const char *, size_t);
195 static int roff_getregro(const char *name);
196 static const char *roff_getstrn(const struct roff *,
197 const char *, size_t);
198 static enum rofferr roff_it(ROFF_ARGS);
199 static enum rofferr roff_line_ignore(ROFF_ARGS);
200 static enum rofferr roff_nr(ROFF_ARGS);
201 static void roff_openeqn(struct roff *, const char *,
202 int, int, const char *);
203 static enum rofft roff_parse(struct roff *, const char *, int *);
204 static enum rofferr roff_parsetext(char **, size_t *, int, int *);
205 static enum rofferr roff_res(struct roff *,
206 char **, size_t *, int, int);
207 static enum rofferr roff_rm(ROFF_ARGS);
208 static void roff_setstr(struct roff *,
209 const char *, const char *, int);
210 static void roff_setstrn(struct roffkv **, const char *,
211 size_t, const char *, size_t, int);
212 static enum rofferr roff_so(ROFF_ARGS);
213 static enum rofferr roff_tr(ROFF_ARGS);
214 static enum rofferr roff_Dd(ROFF_ARGS);
215 static enum rofferr roff_TH(ROFF_ARGS);
216 static enum rofferr roff_TE(ROFF_ARGS);
217 static enum rofferr roff_TS(ROFF_ARGS);
218 static enum rofferr roff_EQ(ROFF_ARGS);
219 static enum rofferr roff_EN(ROFF_ARGS);
220 static enum rofferr roff_T_(ROFF_ARGS);
221 static enum rofferr roff_userdef(ROFF_ARGS);
222
223 /* See roffhash_find() */
224
225 #define ASCII_HI 126
226 #define ASCII_LO 33
227 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
228
229 static struct roffmac *hash[HASHWIDTH];
230
231 static struct roffmac roffs[ROFF_MAX] = {
232 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
233 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
234 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
235 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
236 { "cc", roff_cc, NULL, NULL, 0, NULL },
237 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
238 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
239 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
240 { "ds", roff_ds, NULL, NULL, 0, NULL },
241 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
242 { "fam", roff_line_ignore, NULL, NULL, 0, NULL },
243 { "hw", roff_line_ignore, NULL, NULL, 0, NULL },
244 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
245 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
246 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
247 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
248 { "it", roff_it, NULL, NULL, 0, NULL },
249 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
250 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
251 { "nr", roff_nr, NULL, NULL, 0, NULL },
252 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
253 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
254 { "rm", roff_rm, NULL, NULL, 0, NULL },
255 { "so", roff_so, NULL, NULL, 0, NULL },
256 { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
257 { "tr", roff_tr, NULL, NULL, 0, NULL },
258 { "Dd", roff_Dd, NULL, NULL, 0, NULL },
259 { "TH", roff_TH, NULL, NULL, 0, NULL },
260 { "TS", roff_TS, NULL, NULL, 0, NULL },
261 { "TE", roff_TE, NULL, NULL, 0, NULL },
262 { "T&", roff_T_, NULL, NULL, 0, NULL },
263 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
264 { "EN", roff_EN, NULL, NULL, 0, NULL },
265 { ".", roff_cblock, NULL, NULL, 0, NULL },
266 { "\\}", roff_ccond, NULL, NULL, 0, NULL },
267 { NULL, roff_userdef, NULL, NULL, 0, NULL },
268 };
269
270 const char *const __mdoc_reserved[] = {
271 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
272 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
273 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
274 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
275 "Ds", "Dt", "Dv", "Dx", "D1",
276 "Ec", "Ed", "Ef", "Ek", "El", "Em", "em",
277 "En", "Eo", "Eq", "Er", "Es", "Ev", "Ex",
278 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
279 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp", "LP",
280 "Me", "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
281 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
282 "Pa", "Pc", "Pf", "Po", "Pp", "PP", "pp", "Pq",
283 "Qc", "Ql", "Qo", "Qq", "Or", "Rd", "Re", "Rs", "Rv",
284 "Sc", "Sf", "Sh", "SH", "Sm", "So", "Sq",
285 "Ss", "St", "Sx", "Sy",
286 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
287 "%A", "%B", "%D", "%I", "%J", "%N", "%O",
288 "%P", "%Q", "%R", "%T", "%U", "%V",
289 NULL
290 };
291
292 const char *const __man_reserved[] = {
293 "AT", "B", "BI", "BR", "BT", "DE", "DS", "DT",
294 "EE", "EN", "EQ", "EX", "HF", "HP", "I", "IB", "IP", "IR",
295 "LP", "ME", "MT", "OP", "P", "PD", "PP", "PT",
296 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS", "SY",
297 "TE", "TH", "TP", "TQ", "TS", "T&", "UC", "UE", "UR", "YS",
298 NULL
299 };
300
301 /* Array of injected predefined strings. */
302 #define PREDEFS_MAX 38
303 static const struct predef predefs[PREDEFS_MAX] = {
304 #include "predefs.in"
305 };
306
307 /* See roffhash_find() */
308 #define ROFF_HASH(p) (p[0] - ASCII_LO)
309
310 static int roffit_lines; /* number of lines to delay */
311 static char *roffit_macro; /* nil-terminated macro line */
312
313 static void
314 roffhash_init(void)
315 {
316 struct roffmac *n;
317 int buc, i;
318
319 for (i = 0; i < (int)ROFF_USERDEF; i++) {
320 assert(roffs[i].name[0] >= ASCII_LO);
321 assert(roffs[i].name[0] <= ASCII_HI);
322
323 buc = ROFF_HASH(roffs[i].name);
324
325 if (NULL != (n = hash[buc])) {
326 for ( ; n->next; n = n->next)
327 /* Do nothing. */ ;
328 n->next = &roffs[i];
329 } else
330 hash[buc] = &roffs[i];
331 }
332 }
333
334 /*
335 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
336 * the nil-terminated string name could be found.
337 */
338 static enum rofft
339 roffhash_find(const char *p, size_t s)
340 {
341 int buc;
342 struct roffmac *n;
343
344 /*
345 * libroff has an extremely simple hashtable, for the time
346 * being, which simply keys on the first character, which must
347 * be printable, then walks a chain. It works well enough until
348 * optimised.
349 */
350
351 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
352 return(ROFF_MAX);
353
354 buc = ROFF_HASH(p);
355
356 if (NULL == (n = hash[buc]))
357 return(ROFF_MAX);
358 for ( ; n; n = n->next)
359 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
360 return((enum rofft)(n - roffs));
361
362 return(ROFF_MAX);
363 }
364
365
366 /*
367 * Pop the current node off of the stack of roff instructions currently
368 * pending.
369 */
370 static void
371 roffnode_pop(struct roff *r)
372 {
373 struct roffnode *p;
374
375 assert(r->last);
376 p = r->last;
377
378 r->last = r->last->parent;
379 free(p->name);
380 free(p->end);
381 free(p);
382 }
383
384
385 /*
386 * Push a roff node onto the instruction stack. This must later be
387 * removed with roffnode_pop().
388 */
389 static void
390 roffnode_push(struct roff *r, enum rofft tok, const char *name,
391 int line, int col)
392 {
393 struct roffnode *p;
394
395 p = mandoc_calloc(1, sizeof(struct roffnode));
396 p->tok = tok;
397 if (name)
398 p->name = mandoc_strdup(name);
399 p->parent = r->last;
400 p->line = line;
401 p->col = col;
402 p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
403
404 r->last = p;
405 }
406
407
408 static void
409 roff_free1(struct roff *r)
410 {
411 struct tbl_node *tbl;
412 struct eqn_node *e;
413 int i;
414
415 while (NULL != (tbl = r->first_tbl)) {
416 r->first_tbl = tbl->next;
417 tbl_free(tbl);
418 }
419
420 r->first_tbl = r->last_tbl = r->tbl = NULL;
421
422 while (NULL != (e = r->first_eqn)) {
423 r->first_eqn = e->next;
424 eqn_free(e);
425 }
426
427 r->first_eqn = r->last_eqn = r->eqn = NULL;
428
429 while (r->last)
430 roffnode_pop(r);
431
432 roff_freestr(r->strtab);
433 roff_freestr(r->xmbtab);
434
435 r->strtab = r->xmbtab = NULL;
436
437 roff_freereg(r->regtab);
438
439 r->regtab = NULL;
440
441 if (r->xtab)
442 for (i = 0; i < 128; i++)
443 free(r->xtab[i].p);
444
445 free(r->xtab);
446 r->xtab = NULL;
447 }
448
449 void
450 roff_reset(struct roff *r)
451 {
452
453 roff_free1(r);
454 r->control = 0;
455 }
456
457
458 void
459 roff_free(struct roff *r)
460 {
461
462 roff_free1(r);
463 free(r);
464 }
465
466
467 struct roff *
468 roff_alloc(enum mparset type, struct mparse *parse, int quick)
469 {
470 struct roff *r;
471
472 r = mandoc_calloc(1, sizeof(struct roff));
473 r->parsetype = type;
474 r->parse = parse;
475 r->quick = quick;
476 r->rstackpos = -1;
477
478 roffhash_init();
479
480 return(r);
481 }
482
483 /*
484 * In the current line, expand user-defined strings ("\*")
485 * and references to number registers ("\n").
486 * Also check the syntax of other escape sequences.
487 */
488 static enum rofferr
489 roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
490 {
491 char ubuf[12]; /* buffer to print the number */
492 const char *stesc; /* start of an escape sequence ('\\') */
493 const char *stnam; /* start of the name, after "[(*" */
494 const char *cp; /* end of the name, e.g. before ']' */
495 const char *res; /* the string to be substituted */
496 char *nbuf; /* new buffer to copy bufp to */
497 size_t nsz; /* size of the new buffer */
498 size_t maxl; /* expected length of the escape name */
499 size_t naml; /* actual length of the escape name */
500 int expand_count; /* to avoid infinite loops */
501
502 expand_count = 0;
503
504 again:
505 cp = *bufp + pos;
506 while (NULL != (cp = strchr(cp, '\\'))) {
507 stesc = cp++;
508
509 /*
510 * The second character must be an asterisk or an n.
511 * If it isn't, skip it anyway: It is escaped,
512 * so it can't start another escape sequence.
513 */
514
515 if ('\0' == *cp)
516 return(ROFF_CONT);
517
518 switch (*cp) {
519 case ('*'):
520 res = NULL;
521 break;
522 case ('n'):
523 res = ubuf;
524 break;
525 default:
526 if (ESCAPE_ERROR != mandoc_escape(&cp, NULL, NULL))
527 continue;
528 mandoc_msg
529 (MANDOCERR_BADESCAPE, r->parse,
530 ln, (int)(stesc - *bufp), NULL);
531 return(ROFF_CONT);
532 }
533
534 cp++;
535
536 /*
537 * The third character decides the length
538 * of the name of the string or register.
539 * Save a pointer to the name.
540 */
541
542 switch (*cp) {
543 case ('\0'):
544 return(ROFF_CONT);
545 case ('('):
546 cp++;
547 maxl = 2;
548 break;
549 case ('['):
550 cp++;
551 maxl = 0;
552 break;
553 default:
554 maxl = 1;
555 break;
556 }
557 stnam = cp;
558
559 /* Advance to the end of the name. */
560
561 for (naml = 0; 0 == maxl || naml < maxl; naml++, cp++) {
562 if ('\0' == *cp) {
563 mandoc_msg
564 (MANDOCERR_BADESCAPE,
565 r->parse, ln,
566 (int)(stesc - *bufp), NULL);
567 return(ROFF_CONT);
568 }
569 if (0 == maxl && ']' == *cp)
570 break;
571 }
572
573 /*
574 * Retrieve the replacement string; if it is
575 * undefined, resume searching for escapes.
576 */
577
578 if (NULL == res)
579 res = roff_getstrn(r, stnam, naml);
580 else
581 snprintf(ubuf, sizeof(ubuf), "%d",
582 roff_getregn(r, stnam, naml));
583
584 if (NULL == res) {
585 mandoc_msg
586 (MANDOCERR_BADESCAPE, r->parse,
587 ln, (int)(stesc - *bufp), NULL);
588 res = "";
589 }
590
591 /* Replace the escape sequence by the string. */
592
593 pos = stesc - *bufp;
594
595 nsz = *szp + strlen(res) + 1;
596 nbuf = mandoc_malloc(nsz);
597
598 strlcpy(nbuf, *bufp, (size_t)(stesc - *bufp + 1));
599 strlcat(nbuf, res, nsz);
600 strlcat(nbuf, cp + (maxl ? 0 : 1), nsz);
601
602 free(*bufp);
603
604 *bufp = nbuf;
605 *szp = nsz;
606
607 if (EXPAND_LIMIT >= ++expand_count)
608 goto again;
609
610 /* Just leave the string unexpanded. */
611 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, ln, pos, NULL);
612 return(ROFF_IGN);
613 }
614 return(ROFF_CONT);
615 }
616
617 /*
618 * Process text streams:
619 * Convert all breakable hyphens into ASCII_HYPH.
620 * Decrement and spring input line trap.
621 */
622 static enum rofferr
623 roff_parsetext(char **bufp, size_t *szp, int pos, int *offs)
624 {
625 size_t sz;
626 const char *start;
627 char *p;
628 int isz;
629 enum mandoc_esc esc;
630
631 start = p = *bufp + pos;
632
633 while ('\0' != *p) {
634 sz = strcspn(p, "-\\");
635 p += sz;
636
637 if ('\0' == *p)
638 break;
639
640 if ('\\' == *p) {
641 /* Skip over escapes. */
642 p++;
643 esc = mandoc_escape((const char **)&p, NULL, NULL);
644 if (ESCAPE_ERROR == esc)
645 break;
646 continue;
647 } else if (p == start) {
648 p++;
649 continue;
650 }
651
652 if (isalpha((unsigned char)p[-1]) &&
653 isalpha((unsigned char)p[1]))
654 *p = ASCII_HYPH;
655 p++;
656 }
657
658 /* Spring the input line trap. */
659 if (1 == roffit_lines) {
660 isz = asprintf(&p, "%s\n.%s", *bufp, roffit_macro);
661 if (-1 == isz) {
662 perror(NULL);
663 exit((int)MANDOCLEVEL_SYSERR);
664 }
665 free(*bufp);
666 *bufp = p;
667 *szp = isz + 1;
668 *offs = 0;
669 free(roffit_macro);
670 roffit_lines = 0;
671 return(ROFF_REPARSE);
672 } else if (1 < roffit_lines)
673 --roffit_lines;
674 return(ROFF_CONT);
675 }
676
677 enum rofferr
678 roff_parseln(struct roff *r, int ln, char **bufp,
679 size_t *szp, int pos, int *offs)
680 {
681 enum rofft t;
682 enum rofferr e;
683 int ppos, ctl;
684
685 /*
686 * Run the reserved-word filter only if we have some reserved
687 * words to fill in.
688 */
689
690 e = roff_res(r, bufp, szp, ln, pos);
691 if (ROFF_IGN == e)
692 return(e);
693 assert(ROFF_CONT == e);
694
695 ppos = pos;
696 ctl = roff_getcontrol(r, *bufp, &pos);
697
698 /*
699 * First, if a scope is open and we're not a macro, pass the
700 * text through the macro's filter. If a scope isn't open and
701 * we're not a macro, just let it through.
702 * Finally, if there's an equation scope open, divert it into it
703 * no matter our state.
704 */
705
706 if (r->last && ! ctl) {
707 t = r->last->tok;
708 assert(roffs[t].text);
709 e = (*roffs[t].text)
710 (r, t, bufp, szp, ln, pos, pos, offs);
711 assert(ROFF_IGN == e || ROFF_CONT == e);
712 if (ROFF_CONT != e)
713 return(e);
714 }
715 if (r->eqn)
716 return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));
717 if ( ! ctl) {
718 if (r->tbl)
719 return(tbl_read(r->tbl, ln, *bufp, pos));
720 return(roff_parsetext(bufp, szp, pos, offs));
721 }
722
723 /*
724 * If a scope is open, go to the child handler for that macro,
725 * as it may want to preprocess before doing anything with it.
726 * Don't do so if an equation is open.
727 */
728
729 if (r->last) {
730 t = r->last->tok;
731 assert(roffs[t].sub);
732 return((*roffs[t].sub)
733 (r, t, bufp, szp,
734 ln, ppos, pos, offs));
735 }
736
737 /*
738 * Lastly, as we've no scope open, try to look up and execute
739 * the new macro. If no macro is found, simply return and let
740 * the compilers handle it.
741 */
742
743 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
744 return(ROFF_CONT);
745
746 assert(roffs[t].proc);
747 return((*roffs[t].proc)
748 (r, t, bufp, szp,
749 ln, ppos, pos, offs));
750 }
751
752
753 void
754 roff_endparse(struct roff *r)
755 {
756
757 if (r->last)
758 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
759 r->last->line, r->last->col, NULL);
760
761 if (r->eqn) {
762 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
763 r->eqn->eqn.ln, r->eqn->eqn.pos, NULL);
764 eqn_end(&r->eqn);
765 }
766
767 if (r->tbl) {
768 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
769 r->tbl->line, r->tbl->pos, NULL);
770 tbl_end(&r->tbl);
771 }
772 }
773
774 /*
775 * Parse a roff node's type from the input buffer. This must be in the
776 * form of ".foo xxx" in the usual way.
777 */
778 static enum rofft
779 roff_parse(struct roff *r, const char *buf, int *pos)
780 {
781 const char *mac;
782 size_t maclen;
783 enum rofft t;
784
785 if ('\0' == buf[*pos] || '"' == buf[*pos] ||
786 '\t' == buf[*pos] || ' ' == buf[*pos])
787 return(ROFF_MAX);
788
789 /*
790 * We stop the macro parse at an escape, tab, space, or nil.
791 * However, `\}' is also a valid macro, so make sure we don't
792 * clobber it by seeing the `\' as the end of token.
793 */
794
795 mac = buf + *pos;
796 maclen = strcspn(mac + 1, " \\\t\0") + 1;
797
798 t = (r->current_string = roff_getstrn(r, mac, maclen))
799 ? ROFF_USERDEF : roffhash_find(mac, maclen);
800
801 *pos += (int)maclen;
802
803 while (buf[*pos] && ' ' == buf[*pos])
804 (*pos)++;
805
806 return(t);
807 }
808
809 /* ARGSUSED */
810 static enum rofferr
811 roff_cblock(ROFF_ARGS)
812 {
813
814 /*
815 * A block-close `..' should only be invoked as a child of an
816 * ignore macro, otherwise raise a warning and just ignore it.
817 */
818
819 if (NULL == r->last) {
820 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
821 return(ROFF_IGN);
822 }
823
824 switch (r->last->tok) {
825 case (ROFF_am):
826 /* FALLTHROUGH */
827 case (ROFF_ami):
828 /* FALLTHROUGH */
829 case (ROFF_am1):
830 /* FALLTHROUGH */
831 case (ROFF_de):
832 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
833 /* FALLTHROUGH */
834 case (ROFF_dei):
835 /* FALLTHROUGH */
836 case (ROFF_ig):
837 break;
838 default:
839 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
840 return(ROFF_IGN);
841 }
842
843 if ((*bufp)[pos])
844 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
845
846 roffnode_pop(r);
847 roffnode_cleanscope(r);
848 return(ROFF_IGN);
849
850 }
851
852
853 static void
854 roffnode_cleanscope(struct roff *r)
855 {
856
857 while (r->last) {
858 if (--r->last->endspan != 0)
859 break;
860 roffnode_pop(r);
861 }
862 }
863
864
865 /* ARGSUSED */
866 static enum rofferr
867 roff_ccond(ROFF_ARGS)
868 {
869
870 if (NULL == r->last) {
871 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
872 return(ROFF_IGN);
873 }
874
875 switch (r->last->tok) {
876 case (ROFF_el):
877 /* FALLTHROUGH */
878 case (ROFF_ie):
879 /* FALLTHROUGH */
880 case (ROFF_if):
881 break;
882 default:
883 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
884 return(ROFF_IGN);
885 }
886
887 if (r->last->endspan > -1) {
888 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
889 return(ROFF_IGN);
890 }
891
892 if ((*bufp)[pos])
893 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
894
895 roffnode_pop(r);
896 roffnode_cleanscope(r);
897 return(ROFF_IGN);
898 }
899
900
901 /* ARGSUSED */
902 static enum rofferr
903 roff_block(ROFF_ARGS)
904 {
905 int sv;
906 size_t sz;
907 char *name;
908
909 name = NULL;
910
911 if (ROFF_ig != tok) {
912 if ('\0' == (*bufp)[pos]) {
913 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
914 return(ROFF_IGN);
915 }
916
917 /*
918 * Re-write `de1', since we don't really care about
919 * groff's strange compatibility mode, into `de'.
920 */
921
922 if (ROFF_de1 == tok)
923 tok = ROFF_de;
924 if (ROFF_de == tok)
925 name = *bufp + pos;
926 else
927 mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos,
928 roffs[tok].name);
929
930 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
931 pos++;
932
933 while (isspace((unsigned char)(*bufp)[pos]))
934 (*bufp)[pos++] = '\0';
935 }
936
937 roffnode_push(r, tok, name, ln, ppos);
938
939 /*
940 * At the beginning of a `de' macro, clear the existing string
941 * with the same name, if there is one. New content will be
942 * added from roff_block_text() in multiline mode.
943 */
944
945 if (ROFF_de == tok)
946 roff_setstr(r, name, "", 0);
947
948 if ('\0' == (*bufp)[pos])
949 return(ROFF_IGN);
950
951 /* If present, process the custom end-of-line marker. */
952
953 sv = pos;
954 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
955 pos++;
956
957 /*
958 * Note: groff does NOT like escape characters in the input.
959 * Instead of detecting this, we're just going to let it fly and
960 * to hell with it.
961 */
962
963 assert(pos > sv);
964 sz = (size_t)(pos - sv);
965
966 if (1 == sz && '.' == (*bufp)[sv])
967 return(ROFF_IGN);
968
969 r->last->end = mandoc_malloc(sz + 1);
970
971 memcpy(r->last->end, *bufp + sv, sz);
972 r->last->end[(int)sz] = '\0';
973
974 if ((*bufp)[pos])
975 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
976
977 return(ROFF_IGN);
978 }
979
980
981 /* ARGSUSED */
982 static enum rofferr
983 roff_block_sub(ROFF_ARGS)
984 {
985 enum rofft t;
986 int i, j;
987
988 /*
989 * First check whether a custom macro exists at this level. If
990 * it does, then check against it. This is some of groff's
991 * stranger behaviours. If we encountered a custom end-scope
992 * tag and that tag also happens to be a "real" macro, then we
993 * need to try interpreting it again as a real macro. If it's
994 * not, then return ignore. Else continue.
995 */
996
997 if (r->last->end) {
998 for (i = pos, j = 0; r->last->end[j]; j++, i++)
999 if ((*bufp)[i] != r->last->end[j])
1000 break;
1001
1002 if ('\0' == r->last->end[j] &&
1003 ('\0' == (*bufp)[i] ||
1004 ' ' == (*bufp)[i] ||
1005 '\t' == (*bufp)[i])) {
1006 roffnode_pop(r);
1007 roffnode_cleanscope(r);
1008
1009 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
1010 i++;
1011
1012 pos = i;
1013 if (ROFF_MAX != roff_parse(r, *bufp, &pos))
1014 return(ROFF_RERUN);
1015 return(ROFF_IGN);
1016 }
1017 }
1018
1019 /*
1020 * If we have no custom end-query or lookup failed, then try
1021 * pulling it out of the hashtable.
1022 */
1023
1024 t = roff_parse(r, *bufp, &pos);
1025
1026 /*
1027 * Macros other than block-end are only significant
1028 * in `de' blocks; elsewhere, simply throw them away.
1029 */
1030 if (ROFF_cblock != t) {
1031 if (ROFF_de == tok)
1032 roff_setstr(r, r->last->name, *bufp + ppos, 1);
1033 return(ROFF_IGN);
1034 }
1035
1036 assert(roffs[t].proc);
1037 return((*roffs[t].proc)(r, t, bufp, szp,
1038 ln, ppos, pos, offs));
1039 }
1040
1041
1042 /* ARGSUSED */
1043 static enum rofferr
1044 roff_block_text(ROFF_ARGS)
1045 {
1046
1047 if (ROFF_de == tok)
1048 roff_setstr(r, r->last->name, *bufp + pos, 1);
1049
1050 return(ROFF_IGN);
1051 }
1052
1053
1054 /* ARGSUSED */
1055 static enum rofferr
1056 roff_cond_sub(ROFF_ARGS)
1057 {
1058 enum rofft t;
1059 enum roffrule rr;
1060 char *ep;
1061
1062 rr = r->last->rule;
1063 roffnode_cleanscope(r);
1064 t = roff_parse(r, *bufp, &pos);
1065
1066 /*
1067 * Fully handle known macros when they are structurally
1068 * required or when the conditional evaluated to true.
1069 */
1070
1071 if ((ROFF_MAX != t) &&
1072 (ROFF_ccond == t || ROFFRULE_ALLOW == rr ||
1073 ROFFMAC_STRUCT & roffs[t].flags)) {
1074 assert(roffs[t].proc);
1075 return((*roffs[t].proc)(r, t, bufp, szp,
1076 ln, ppos, pos, offs));
1077 }
1078
1079 /* Always check for the closing delimiter `\}'. */
1080
1081 ep = &(*bufp)[pos];
1082 while (NULL != (ep = strchr(ep, '\\'))) {
1083 if ('}' != *(++ep))
1084 continue;
1085
1086 /*
1087 * If we're at the end of line, then just chop
1088 * off the \} and resize the buffer.
1089 * If we aren't, then convert it to spaces.
1090 */
1091
1092 if ('\0' == *(ep + 1)) {
1093 *--ep = '\0';
1094 *szp -= 2;
1095 } else
1096 *(ep - 1) = *ep = ' ';
1097
1098 roff_ccond(r, ROFF_ccond, bufp, szp,
1099 ln, pos, pos + 2, offs);
1100 break;
1101 }
1102 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1103 }
1104
1105 /* ARGSUSED */
1106 static enum rofferr
1107 roff_cond_text(ROFF_ARGS)
1108 {
1109 char *ep;
1110 enum roffrule rr;
1111
1112 rr = r->last->rule;
1113 roffnode_cleanscope(r);
1114
1115 ep = &(*bufp)[pos];
1116 for ( ; NULL != (ep = strchr(ep, '\\')); ep++) {
1117 ep++;
1118 if ('}' != *ep)
1119 continue;
1120 *ep = '&';
1121 roff_ccond(r, ROFF_ccond, bufp, szp,
1122 ln, pos, pos + 2, offs);
1123 }
1124 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1125 }
1126
1127 static int
1128 roff_getnum(const char *v, int *pos, int *res)
1129 {
1130 int p, n;
1131
1132 p = *pos;
1133 n = v[p] == '-';
1134 if (n)
1135 p++;
1136
1137 for (*res = 0; isdigit((unsigned char)v[p]); p++)
1138 *res += 10 * *res + v[p] - '0';
1139 if (p == *pos + n)
1140 return 0;
1141
1142 if (n)
1143 *res = -*res;
1144
1145 *pos = p;
1146 return 1;
1147 }
1148
1149 static int
1150 roff_getop(const char *v, int *pos, char *res)
1151 {
1152 int e;
1153
1154 *res = v[*pos];
1155 e = v[*pos + 1] == '=';
1156
1157 switch (*res) {
1158 case '=':
1159 break;
1160 case '>':
1161 if (e)
1162 *res = 'g';
1163 break;
1164 case '<':
1165 if (e)
1166 *res = 'l';
1167 break;
1168 default:
1169 return(0);
1170 }
1171
1172 *pos += 1 + e;
1173
1174 return(*res);
1175 }
1176
1177 static enum roffrule
1178 roff_evalcond(const char *v, int *pos)
1179 {
1180 int not, lh, rh;
1181 char op;
1182
1183 switch (v[*pos]) {
1184 case ('n'):
1185 (*pos)++;
1186 return(ROFFRULE_ALLOW);
1187 case ('e'):
1188 /* FALLTHROUGH */
1189 case ('o'):
1190 /* FALLTHROUGH */
1191 case ('t'):
1192 (*pos)++;
1193 return(ROFFRULE_DENY);
1194 case ('!'):
1195 (*pos)++;
1196 not = 1;
1197 break;
1198 default:
1199 not = 0;
1200 break;
1201 }
1202
1203 if (!roff_getnum(v, pos, &lh))
1204 return ROFFRULE_DENY;
1205 if (!roff_getop(v, pos, &op)) {
1206 if (lh < 0)
1207 lh = 0;
1208 goto out;
1209 }
1210 if (!roff_getnum(v, pos, &rh))
1211 return ROFFRULE_DENY;
1212 switch (op) {
1213 case 'g':
1214 lh = lh >= rh;
1215 break;
1216 case 'l':
1217 lh = lh <= rh;
1218 break;
1219 case '=':
1220 lh = lh == rh;
1221 break;
1222 case '>':
1223 lh = lh > rh;
1224 break;
1225 case '<':
1226 lh = lh < rh;
1227 break;
1228 default:
1229 return ROFFRULE_DENY;
1230 }
1231 out:
1232 if (not)
1233 lh = !lh;
1234 return lh ? ROFFRULE_ALLOW : ROFFRULE_DENY;
1235 }
1236
1237 /* ARGSUSED */
1238 static enum rofferr
1239 roff_line_ignore(ROFF_ARGS)
1240 {
1241
1242 return(ROFF_IGN);
1243 }
1244
1245 /* ARGSUSED */
1246 static enum rofferr
1247 roff_cond(ROFF_ARGS)
1248 {
1249
1250 roffnode_push(r, tok, NULL, ln, ppos);
1251
1252 /*
1253 * An `.el' has no conditional body: it will consume the value
1254 * of the current rstack entry set in prior `ie' calls or
1255 * defaults to DENY.
1256 *
1257 * If we're not an `el', however, then evaluate the conditional.
1258 */
1259
1260 r->last->rule = ROFF_el == tok ?
1261 (r->rstackpos < 0 ?
1262 ROFFRULE_DENY : r->rstack[r->rstackpos--]) :
1263 roff_evalcond(*bufp, &pos);
1264
1265 /*
1266 * An if-else will put the NEGATION of the current evaluated
1267 * conditional into the stack of rules.
1268 */
1269
1270 if (ROFF_ie == tok) {
1271 if (r->rstackpos == RSTACK_MAX - 1) {
1272 mandoc_msg(MANDOCERR_MEM,
1273 r->parse, ln, ppos, NULL);
1274 return(ROFF_ERR);
1275 }
1276 r->rstack[++r->rstackpos] =
1277 ROFFRULE_DENY == r->last->rule ?
1278 ROFFRULE_ALLOW : ROFFRULE_DENY;
1279 }
1280
1281 /* If the parent has false as its rule, then so do we. */
1282
1283 if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule)
1284 r->last->rule = ROFFRULE_DENY;
1285
1286 /*
1287 * Determine scope.
1288 * If there is nothing on the line after the conditional,
1289 * not even whitespace, use next-line scope.
1290 */
1291
1292 if ('\0' == (*bufp)[pos]) {
1293 r->last->endspan = 2;
1294 goto out;
1295 }
1296
1297 while (' ' == (*bufp)[pos])
1298 pos++;
1299
1300 /* An opening brace requests multiline scope. */
1301
1302 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1303 r->last->endspan = -1;
1304 pos += 2;
1305 goto out;
1306 }
1307
1308 /*
1309 * Anything else following the conditional causes
1310 * single-line scope. Warn if the scope contains
1311 * nothing but trailing whitespace.
1312 */
1313
1314 if ('\0' == (*bufp)[pos])
1315 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
1316
1317 r->last->endspan = 1;
1318
1319 out:
1320 *offs = pos;
1321 return(ROFF_RERUN);
1322 }
1323
1324
1325 /* ARGSUSED */
1326 static enum rofferr
1327 roff_ds(ROFF_ARGS)
1328 {
1329 char *name, *string;
1330
1331 /*
1332 * A symbol is named by the first word following the macro
1333 * invocation up to a space. Its value is anything after the
1334 * name's trailing whitespace and optional double-quote. Thus,
1335 *
1336 * [.ds foo "bar " ]
1337 *
1338 * will have `bar " ' as its value.
1339 */
1340
1341 string = *bufp + pos;
1342 name = roff_getname(r, &string, ln, pos);
1343 if ('\0' == *name)
1344 return(ROFF_IGN);
1345
1346 /* Read past initial double-quote. */
1347 if ('"' == *string)
1348 string++;
1349
1350 /* The rest is the value. */
1351 roff_setstr(r, name, string, 0);
1352 return(ROFF_IGN);
1353 }
1354
1355 void
1356 roff_setreg(struct roff *r, const char *name, int val, char sign)
1357 {
1358 struct roffreg *reg;
1359
1360 /* Search for an existing register with the same name. */
1361 reg = r->regtab;
1362
1363 while (reg && strcmp(name, reg->key.p))
1364 reg = reg->next;
1365
1366 if (NULL == reg) {
1367 /* Create a new register. */
1368 reg = mandoc_malloc(sizeof(struct roffreg));
1369 reg->key.p = mandoc_strdup(name);
1370 reg->key.sz = strlen(name);
1371 reg->val = 0;
1372 reg->next = r->regtab;
1373 r->regtab = reg;
1374 }
1375
1376 if ('+' == sign)
1377 reg->val += val;
1378 else if ('-' == sign)
1379 reg->val -= val;
1380 else
1381 reg->val = val;
1382 }
1383
1384 /*
1385 * Handle some predefined read-only number registers.
1386 * For now, return -1 if the requested register is not predefined;
1387 * in case a predefined read-only register having the value -1
1388 * were to turn up, another special value would have to be chosen.
1389 */
1390 static int
1391 roff_getregro(const char *name)
1392 {
1393
1394 switch (*name) {
1395 case ('A'): /* ASCII approximation mode is always off. */
1396 return(0);
1397 case ('g'): /* Groff compatibility mode is always on. */
1398 return(1);
1399 case ('H'): /* Fixed horizontal resolution. */
1400 return (24);
1401 case ('j'): /* Always adjust left margin only. */
1402 return(0);
1403 case ('T'): /* Some output device is always defined. */
1404 return(1);
1405 case ('V'): /* Fixed vertical resolution. */
1406 return (40);
1407 default:
1408 return (-1);
1409 }
1410 }
1411
1412 int
1413 roff_getreg(const struct roff *r, const char *name)
1414 {
1415 struct roffreg *reg;
1416 int val;
1417
1418 if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
1419 val = roff_getregro(name + 1);
1420 if (-1 != val)
1421 return (val);
1422 }
1423
1424 for (reg = r->regtab; reg; reg = reg->next)
1425 if (0 == strcmp(name, reg->key.p))
1426 return(reg->val);
1427
1428 return(0);
1429 }
1430
1431 static int
1432 roff_getregn(const struct roff *r, const char *name, size_t len)
1433 {
1434 struct roffreg *reg;
1435 int val;
1436
1437 if ('.' == name[0] && 2 == len) {
1438 val = roff_getregro(name + 1);
1439 if (-1 != val)
1440 return (val);
1441 }
1442
1443 for (reg = r->regtab; reg; reg = reg->next)
1444 if (len == reg->key.sz &&
1445 0 == strncmp(name, reg->key.p, len))
1446 return(reg->val);
1447
1448 return(0);
1449 }
1450
1451 static void
1452 roff_freereg(struct roffreg *reg)
1453 {
1454 struct roffreg *old_reg;
1455
1456 while (NULL != reg) {
1457 free(reg->key.p);
1458 old_reg = reg;
1459 reg = reg->next;
1460 free(old_reg);
1461 }
1462 }
1463
1464 /* ARGSUSED */
1465 static enum rofferr
1466 roff_nr(ROFF_ARGS)
1467 {
1468 const char *key;
1469 char *val;
1470 size_t sz;
1471 int iv;
1472 char sign;
1473
1474 val = *bufp + pos;
1475 key = roff_getname(r, &val, ln, pos);
1476
1477 sign = *val;
1478 if ('+' == sign || '-' == sign)
1479 val++;
1480
1481 sz = strspn(val, "0123456789");
1482 iv = sz ? mandoc_strntoi(val, sz, 10) : 0;
1483
1484 roff_setreg(r, key, iv, sign);
1485
1486 return(ROFF_IGN);
1487 }
1488
1489 /* ARGSUSED */
1490 static enum rofferr
1491 roff_rm(ROFF_ARGS)
1492 {
1493 const char *name;
1494 char *cp;
1495
1496 cp = *bufp + pos;
1497 while ('\0' != *cp) {
1498 name = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1499 if ('\0' != *name)
1500 roff_setstr(r, name, NULL, 0);
1501 }
1502 return(ROFF_IGN);
1503 }
1504
1505 /* ARGSUSED */
1506 static enum rofferr
1507 roff_it(ROFF_ARGS)
1508 {
1509 char *cp;
1510 size_t len;
1511 int iv;
1512
1513 /* Parse the number of lines. */
1514 cp = *bufp + pos;
1515 len = strcspn(cp, " \t");
1516 cp[len] = '\0';
1517 if ((iv = mandoc_strntoi(cp, len, 10)) <= 0) {
1518 mandoc_msg(MANDOCERR_NUMERIC, r->parse,
1519 ln, ppos, *bufp + 1);
1520 return(ROFF_IGN);
1521 }
1522 cp += len + 1;
1523
1524 /* Arm the input line trap. */
1525 roffit_lines = iv;
1526 roffit_macro = mandoc_strdup(cp);
1527 return(ROFF_IGN);
1528 }
1529
1530 /* ARGSUSED */
1531 static enum rofferr
1532 roff_Dd(ROFF_ARGS)
1533 {
1534 const char *const *cp;
1535
1536 if (0 == r->quick && MPARSE_MDOC != r->parsetype)
1537 for (cp = __mdoc_reserved; *cp; cp++)
1538 roff_setstr(r, *cp, NULL, 0);
1539
1540 return(ROFF_CONT);
1541 }
1542
1543 /* ARGSUSED */
1544 static enum rofferr
1545 roff_TH(ROFF_ARGS)
1546 {
1547 const char *const *cp;
1548
1549 if (0 == r->quick && MPARSE_MDOC != r->parsetype)
1550 for (cp = __man_reserved; *cp; cp++)
1551 roff_setstr(r, *cp, NULL, 0);
1552
1553 return(ROFF_CONT);
1554 }
1555
1556 /* ARGSUSED */
1557 static enum rofferr
1558 roff_TE(ROFF_ARGS)
1559 {
1560
1561 if (NULL == r->tbl)
1562 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1563 else
1564 tbl_end(&r->tbl);
1565
1566 return(ROFF_IGN);
1567 }
1568
1569 /* ARGSUSED */
1570 static enum rofferr
1571 roff_T_(ROFF_ARGS)
1572 {
1573
1574 if (NULL == r->tbl)
1575 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1576 else
1577 tbl_restart(ppos, ln, r->tbl);
1578
1579 return(ROFF_IGN);
1580 }
1581
1582 #if 0
1583 static int
1584 roff_closeeqn(struct roff *r)
1585 {
1586
1587 return(r->eqn && ROFF_EQN == eqn_end(&r->eqn) ? 1 : 0);
1588 }
1589 #endif
1590
1591 static void
1592 roff_openeqn(struct roff *r, const char *name, int line,
1593 int offs, const char *buf)
1594 {
1595 struct eqn_node *e;
1596 int poff;
1597
1598 assert(NULL == r->eqn);
1599 e = eqn_alloc(name, offs, line, r->parse);
1600
1601 if (r->last_eqn)
1602 r->last_eqn->next = e;
1603 else
1604 r->first_eqn = r->last_eqn = e;
1605
1606 r->eqn = r->last_eqn = e;
1607
1608 if (buf) {
1609 poff = 0;
1610 eqn_read(&r->eqn, line, buf, offs, &poff);
1611 }
1612 }
1613
1614 /* ARGSUSED */
1615 static enum rofferr
1616 roff_EQ(ROFF_ARGS)
1617 {
1618
1619 roff_openeqn(r, *bufp + pos, ln, ppos, NULL);
1620 return(ROFF_IGN);
1621 }
1622
1623 /* ARGSUSED */
1624 static enum rofferr
1625 roff_EN(ROFF_ARGS)
1626 {
1627
1628 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1629 return(ROFF_IGN);
1630 }
1631
1632 /* ARGSUSED */
1633 static enum rofferr
1634 roff_TS(ROFF_ARGS)
1635 {
1636 struct tbl_node *tbl;
1637
1638 if (r->tbl) {
1639 mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL);
1640 tbl_end(&r->tbl);
1641 }
1642
1643 tbl = tbl_alloc(ppos, ln, r->parse);
1644
1645 if (r->last_tbl)
1646 r->last_tbl->next = tbl;
1647 else
1648 r->first_tbl = r->last_tbl = tbl;
1649
1650 r->tbl = r->last_tbl = tbl;
1651 return(ROFF_IGN);
1652 }
1653
1654 /* ARGSUSED */
1655 static enum rofferr
1656 roff_cc(ROFF_ARGS)
1657 {
1658 const char *p;
1659
1660 p = *bufp + pos;
1661
1662 if ('\0' == *p || '.' == (r->control = *p++))
1663 r->control = 0;
1664
1665 if ('\0' != *p)
1666 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1667
1668 return(ROFF_IGN);
1669 }
1670
1671 /* ARGSUSED */
1672 static enum rofferr
1673 roff_tr(ROFF_ARGS)
1674 {
1675 const char *p, *first, *second;
1676 size_t fsz, ssz;
1677 enum mandoc_esc esc;
1678
1679 p = *bufp + pos;
1680
1681 if ('\0' == *p) {
1682 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1683 return(ROFF_IGN);
1684 }
1685
1686 while ('\0' != *p) {
1687 fsz = ssz = 1;
1688
1689 first = p++;
1690 if ('\\' == *first) {
1691 esc = mandoc_escape(&p, NULL, NULL);
1692 if (ESCAPE_ERROR == esc) {
1693 mandoc_msg
1694 (MANDOCERR_BADESCAPE, r->parse,
1695 ln, (int)(p - *bufp), NULL);
1696 return(ROFF_IGN);
1697 }
1698 fsz = (size_t)(p - first);
1699 }
1700
1701 second = p++;
1702 if ('\\' == *second) {
1703 esc = mandoc_escape(&p, NULL, NULL);
1704 if (ESCAPE_ERROR == esc) {
1705 mandoc_msg
1706 (MANDOCERR_BADESCAPE, r->parse,
1707 ln, (int)(p - *bufp), NULL);
1708 return(ROFF_IGN);
1709 }
1710 ssz = (size_t)(p - second);
1711 } else if ('\0' == *second) {
1712 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
1713 ln, (int)(p - *bufp), NULL);
1714 second = " ";
1715 p--;
1716 }
1717
1718 if (fsz > 1) {
1719 roff_setstrn(&r->xmbtab, first,
1720 fsz, second, ssz, 0);
1721 continue;
1722 }
1723
1724 if (NULL == r->xtab)
1725 r->xtab = mandoc_calloc
1726 (128, sizeof(struct roffstr));
1727
1728 free(r->xtab[(int)*first].p);
1729 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
1730 r->xtab[(int)*first].sz = ssz;
1731 }
1732
1733 return(ROFF_IGN);
1734 }
1735
1736 /* ARGSUSED */
1737 static enum rofferr
1738 roff_so(ROFF_ARGS)
1739 {
1740 char *name;
1741
1742 mandoc_msg(MANDOCERR_SO, r->parse, ln, ppos, NULL);
1743
1744 /*
1745 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1746 * opening anything that's not in our cwd or anything beneath
1747 * it. Thus, explicitly disallow traversing up the file-system
1748 * or using absolute paths.
1749 */
1750
1751 name = *bufp + pos;
1752 if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1753 mandoc_msg(MANDOCERR_SOPATH, r->parse, ln, pos, NULL);
1754 return(ROFF_ERR);
1755 }
1756
1757 *offs = pos;
1758 return(ROFF_SO);
1759 }
1760
1761 /* ARGSUSED */
1762 static enum rofferr
1763 roff_userdef(ROFF_ARGS)
1764 {
1765 const char *arg[9];
1766 char *cp, *n1, *n2;
1767 int i;
1768
1769 /*
1770 * Collect pointers to macro argument strings
1771 * and NUL-terminate them.
1772 */
1773 cp = *bufp + pos;
1774 for (i = 0; i < 9; i++)
1775 arg[i] = '\0' == *cp ? "" :
1776 mandoc_getarg(r->parse, &cp, ln, &pos);
1777
1778 /*
1779 * Expand macro arguments.
1780 */
1781 *szp = 0;
1782 n1 = cp = mandoc_strdup(r->current_string);
1783 while (NULL != (cp = strstr(cp, "\\$"))) {
1784 i = cp[2] - '1';
1785 if (0 > i || 8 < i) {
1786 /* Not an argument invocation. */
1787 cp += 2;
1788 continue;
1789 }
1790
1791 *szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1792 n2 = mandoc_malloc(*szp);
1793
1794 strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1795 strlcat(n2, arg[i], *szp);
1796 strlcat(n2, cp + 3, *szp);
1797
1798 cp = n2 + (cp - n1);
1799 free(n1);
1800 n1 = n2;
1801 }
1802
1803 /*
1804 * Replace the macro invocation
1805 * by the expanded macro.
1806 */
1807 free(*bufp);
1808 *bufp = n1;
1809 if (0 == *szp)
1810 *szp = strlen(*bufp) + 1;
1811
1812 return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
1813 ROFF_REPARSE : ROFF_APPEND);
1814 }
1815
1816 static char *
1817 roff_getname(struct roff *r, char **cpp, int ln, int pos)
1818 {
1819 char *name, *cp;
1820
1821 name = *cpp;
1822 if ('\0' == *name)
1823 return(name);
1824
1825 /* Read until end of name. */
1826 for (cp = name; '\0' != *cp && ' ' != *cp; cp++) {
1827 if ('\\' != *cp)
1828 continue;
1829 cp++;
1830 if ('\\' == *cp)
1831 continue;
1832 mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL);
1833 *cp = '\0';
1834 name = cp;
1835 }
1836
1837 /* Nil-terminate name. */
1838 if ('\0' != *cp)
1839 *(cp++) = '\0';
1840
1841 /* Read past spaces. */
1842 while (' ' == *cp)
1843 cp++;
1844
1845 *cpp = cp;
1846 return(name);
1847 }
1848
1849 /*
1850 * Store *string into the user-defined string called *name.
1851 * In multiline mode, append to an existing entry and append '\n';
1852 * else replace the existing entry, if there is one.
1853 * To clear an existing entry, call with (*r, *name, NULL, 0).
1854 */
1855 static void
1856 roff_setstr(struct roff *r, const char *name, const char *string,
1857 int multiline)
1858 {
1859
1860 roff_setstrn(&r->strtab, name, strlen(name), string,
1861 string ? strlen(string) : 0, multiline);
1862 }
1863
1864 static void
1865 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
1866 const char *string, size_t stringsz, int multiline)
1867 {
1868 struct roffkv *n;
1869 char *c;
1870 int i;
1871 size_t oldch, newch;
1872
1873 /* Search for an existing string with the same name. */
1874 n = *r;
1875
1876 while (n && strcmp(name, n->key.p))
1877 n = n->next;
1878
1879 if (NULL == n) {
1880 /* Create a new string table entry. */
1881 n = mandoc_malloc(sizeof(struct roffkv));
1882 n->key.p = mandoc_strndup(name, namesz);
1883 n->key.sz = namesz;
1884 n->val.p = NULL;
1885 n->val.sz = 0;
1886 n->next = *r;
1887 *r = n;
1888 } else if (0 == multiline) {
1889 /* In multiline mode, append; else replace. */
1890 free(n->val.p);
1891 n->val.p = NULL;
1892 n->val.sz = 0;
1893 }
1894
1895 if (NULL == string)
1896 return;
1897
1898 /*
1899 * One additional byte for the '\n' in multiline mode,
1900 * and one for the terminating '\0'.
1901 */
1902 newch = stringsz + (multiline ? 2u : 1u);
1903
1904 if (NULL == n->val.p) {
1905 n->val.p = mandoc_malloc(newch);
1906 *n->val.p = '\0';
1907 oldch = 0;
1908 } else {
1909 oldch = n->val.sz;
1910 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
1911 }
1912
1913 /* Skip existing content in the destination buffer. */
1914 c = n->val.p + (int)oldch;
1915
1916 /* Append new content to the destination buffer. */
1917 i = 0;
1918 while (i < (int)stringsz) {
1919 /*
1920 * Rudimentary roff copy mode:
1921 * Handle escaped backslashes.
1922 */
1923 if ('\\' == string[i] && '\\' == string[i + 1])
1924 i++;
1925 *c++ = string[i++];
1926 }
1927
1928 /* Append terminating bytes. */
1929 if (multiline)
1930 *c++ = '\n';
1931
1932 *c = '\0';
1933 n->val.sz = (int)(c - n->val.p);
1934 }
1935
1936 static const char *
1937 roff_getstrn(const struct roff *r, const char *name, size_t len)
1938 {
1939 const struct roffkv *n;
1940 int i;
1941
1942 for (n = r->strtab; n; n = n->next)
1943 if (0 == strncmp(name, n->key.p, len) &&
1944 '\0' == n->key.p[(int)len])
1945 return(n->val.p);
1946
1947 for (i = 0; i < PREDEFS_MAX; i++)
1948 if (0 == strncmp(name, predefs[i].name, len) &&
1949 '\0' == predefs[i].name[(int)len])
1950 return(predefs[i].str);
1951
1952 return(NULL);
1953 }
1954
1955 static void
1956 roff_freestr(struct roffkv *r)
1957 {
1958 struct roffkv *n, *nn;
1959
1960 for (n = r; n; n = nn) {
1961 free(n->key.p);
1962 free(n->val.p);
1963 nn = n->next;
1964 free(n);
1965 }
1966 }
1967
1968 const struct tbl_span *
1969 roff_span(const struct roff *r)
1970 {
1971
1972 return(r->tbl ? tbl_span(r->tbl) : NULL);
1973 }
1974
1975 const struct eqn *
1976 roff_eqn(const struct roff *r)
1977 {
1978
1979 return(r->last_eqn ? &r->last_eqn->eqn : NULL);
1980 }
1981
1982 /*
1983 * Duplicate an input string, making the appropriate character
1984 * conversations (as stipulated by `tr') along the way.
1985 * Returns a heap-allocated string with all the replacements made.
1986 */
1987 char *
1988 roff_strdup(const struct roff *r, const char *p)
1989 {
1990 const struct roffkv *cp;
1991 char *res;
1992 const char *pp;
1993 size_t ssz, sz;
1994 enum mandoc_esc esc;
1995
1996 if (NULL == r->xmbtab && NULL == r->xtab)
1997 return(mandoc_strdup(p));
1998 else if ('\0' == *p)
1999 return(mandoc_strdup(""));
2000
2001 /*
2002 * Step through each character looking for term matches
2003 * (remember that a `tr' can be invoked with an escape, which is
2004 * a glyph but the escape is multi-character).
2005 * We only do this if the character hash has been initialised
2006 * and the string is >0 length.
2007 */
2008
2009 res = NULL;
2010 ssz = 0;
2011
2012 while ('\0' != *p) {
2013 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
2014 sz = r->xtab[(int)*p].sz;
2015 res = mandoc_realloc(res, ssz + sz + 1);
2016 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
2017 ssz += sz;
2018 p++;
2019 continue;
2020 } else if ('\\' != *p) {
2021 res = mandoc_realloc(res, ssz + 2);
2022 res[ssz++] = *p++;
2023 continue;
2024 }
2025
2026 /* Search for term matches. */
2027 for (cp = r->xmbtab; cp; cp = cp->next)
2028 if (0 == strncmp(p, cp->key.p, cp->key.sz))
2029 break;
2030
2031 if (NULL != cp) {
2032 /*
2033 * A match has been found.
2034 * Append the match to the array and move
2035 * forward by its keysize.
2036 */
2037 res = mandoc_realloc
2038 (res, ssz + cp->val.sz + 1);
2039 memcpy(res + ssz, cp->val.p, cp->val.sz);
2040 ssz += cp->val.sz;
2041 p += (int)cp->key.sz;
2042 continue;
2043 }
2044
2045 /*
2046 * Handle escapes carefully: we need to copy
2047 * over just the escape itself, or else we might
2048 * do replacements within the escape itself.
2049 * Make sure to pass along the bogus string.
2050 */
2051 pp = p++;
2052 esc = mandoc_escape(&p, NULL, NULL);
2053 if (ESCAPE_ERROR == esc) {
2054 sz = strlen(pp);
2055 res = mandoc_realloc(res, ssz + sz + 1);
2056 memcpy(res + ssz, pp, sz);
2057 break;
2058 }
2059 /*
2060 * We bail out on bad escapes.
2061 * No need to warn: we already did so when
2062 * roff_res() was called.
2063 */
2064 sz = (int)(p - pp);
2065 res = mandoc_realloc(res, ssz + sz + 1);
2066 memcpy(res + ssz, pp, sz);
2067 ssz += sz;
2068 }
2069
2070 res[(int)ssz] = '\0';
2071 return(res);
2072 }
2073
2074 /*
2075 * Find out whether a line is a macro line or not.
2076 * If it is, adjust the current position and return one; if it isn't,
2077 * return zero and don't change the current position.
2078 * If the control character has been set with `.cc', then let that grain
2079 * precedence.
2080 * This is slighly contrary to groff, where using the non-breaking
2081 * control character when `cc' has been invoked will cause the
2082 * non-breaking macro contents to be printed verbatim.
2083 */
2084 int
2085 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
2086 {
2087 int pos;
2088
2089 pos = *ppos;
2090
2091 if (0 != r->control && cp[pos] == r->control)
2092 pos++;
2093 else if (0 != r->control)
2094 return(0);
2095 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
2096 pos += 2;
2097 else if ('.' == cp[pos] || '\'' == cp[pos])
2098 pos++;
2099 else
2100 return(0);
2101
2102 while (' ' == cp[pos] || '\t' == cp[pos])
2103 pos++;
2104
2105 *ppos = pos;
2106 return(1);
2107 }