]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
Three bugfixes related to the closing of conditional blocks:
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.195 2014/03/07 02:22:05 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 #include "mandoc.h"
29 #include "libroff.h"
30 #include "libmandoc.h"
31
32 /* Maximum number of nested if-else conditionals. */
33 #define RSTACK_MAX 128
34
35 /* Maximum number of string expansions per line, to break infinite loops. */
36 #define EXPAND_LIMIT 1000
37
38 enum rofft {
39 ROFF_ad,
40 ROFF_am,
41 ROFF_ami,
42 ROFF_am1,
43 ROFF_as,
44 ROFF_cc,
45 ROFF_ce,
46 ROFF_de,
47 ROFF_dei,
48 ROFF_de1,
49 ROFF_ds,
50 ROFF_el,
51 ROFF_fam,
52 ROFF_hw,
53 ROFF_hy,
54 ROFF_ie,
55 ROFF_if,
56 ROFF_ig,
57 ROFF_it,
58 ROFF_ne,
59 ROFF_nh,
60 ROFF_nr,
61 ROFF_ns,
62 ROFF_ps,
63 ROFF_rm,
64 ROFF_so,
65 ROFF_ta,
66 ROFF_tr,
67 ROFF_Dd,
68 ROFF_TH,
69 ROFF_TS,
70 ROFF_TE,
71 ROFF_T_,
72 ROFF_EQ,
73 ROFF_EN,
74 ROFF_cblock,
75 ROFF_USERDEF,
76 ROFF_MAX
77 };
78
79 enum roffrule {
80 ROFFRULE_DENY,
81 ROFFRULE_ALLOW
82 };
83
84 /*
85 * An incredibly-simple string buffer.
86 */
87 struct roffstr {
88 char *p; /* nil-terminated buffer */
89 size_t sz; /* saved strlen(p) */
90 };
91
92 /*
93 * A key-value roffstr pair as part of a singly-linked list.
94 */
95 struct roffkv {
96 struct roffstr key;
97 struct roffstr val;
98 struct roffkv *next; /* next in list */
99 };
100
101 /*
102 * A single number register as part of a singly-linked list.
103 */
104 struct roffreg {
105 struct roffstr key;
106 int val;
107 struct roffreg *next;
108 };
109
110 struct roff {
111 enum mparset parsetype; /* requested parse type */
112 struct mparse *parse; /* parse point */
113 int quick; /* skip standard macro deletion */
114 struct roffnode *last; /* leaf of stack */
115 enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */
116 char control; /* control character */
117 int rstackpos; /* position in rstack */
118 struct roffreg *regtab; /* number registers */
119 struct roffkv *strtab; /* user-defined strings & macros */
120 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
121 struct roffstr *xtab; /* single-byte trans table (`tr') */
122 const char *current_string; /* value of last called user macro */
123 struct tbl_node *first_tbl; /* first table parsed */
124 struct tbl_node *last_tbl; /* last table parsed */
125 struct tbl_node *tbl; /* current table being parsed */
126 struct eqn_node *last_eqn; /* last equation parsed */
127 struct eqn_node *first_eqn; /* first equation parsed */
128 struct eqn_node *eqn; /* current equation being parsed */
129 };
130
131 struct roffnode {
132 enum rofft tok; /* type of node */
133 struct roffnode *parent; /* up one in stack */
134 int line; /* parse line */
135 int col; /* parse col */
136 char *name; /* node name, e.g. macro name */
137 char *end; /* end-rules: custom token */
138 int endspan; /* end-rules: next-line or infty */
139 enum roffrule rule; /* current evaluation rule */
140 };
141
142 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
143 enum rofft tok, /* tok of macro */ \
144 char **bufp, /* input buffer */ \
145 size_t *szp, /* size of input buffer */ \
146 int ln, /* parse line */ \
147 int ppos, /* original pos in buffer */ \
148 int pos, /* current pos in buffer */ \
149 int *offs /* reset offset of buffer data */
150
151 typedef enum rofferr (*roffproc)(ROFF_ARGS);
152
153 struct roffmac {
154 const char *name; /* macro name */
155 roffproc proc; /* process new macro */
156 roffproc text; /* process as child text of macro */
157 roffproc sub; /* process as child of macro */
158 int flags;
159 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
160 struct roffmac *next;
161 };
162
163 struct predef {
164 const char *name; /* predefined input name */
165 const char *str; /* replacement symbol */
166 };
167
168 #define PREDEF(__name, __str) \
169 { (__name), (__str) },
170
171 static enum rofft roffhash_find(const char *, size_t);
172 static void roffhash_init(void);
173 static void roffnode_cleanscope(struct roff *);
174 static void roffnode_pop(struct roff *);
175 static void roffnode_push(struct roff *, enum rofft,
176 const char *, int, int);
177 static enum rofferr roff_block(ROFF_ARGS);
178 static enum rofferr roff_block_text(ROFF_ARGS);
179 static enum rofferr roff_block_sub(ROFF_ARGS);
180 static enum rofferr roff_cblock(ROFF_ARGS);
181 static enum rofferr roff_cc(ROFF_ARGS);
182 static void roff_ccond(struct roff *, int, int);
183 static enum rofferr roff_cond(ROFF_ARGS);
184 static enum rofferr roff_cond_text(ROFF_ARGS);
185 static enum rofferr roff_cond_sub(ROFF_ARGS);
186 static enum rofferr roff_ds(ROFF_ARGS);
187 static enum roffrule roff_evalcond(const char *, int *);
188 static void roff_free1(struct roff *);
189 static void roff_freereg(struct roffreg *);
190 static void roff_freestr(struct roffkv *);
191 static char *roff_getname(struct roff *, char **, int, int);
192 static int roff_getnum(const char *, int *, int *);
193 static int roff_getop(const char *, int *, char *);
194 static int roff_getregn(const struct roff *,
195 const char *, size_t);
196 static int roff_getregro(const char *name);
197 static const char *roff_getstrn(const struct roff *,
198 const char *, size_t);
199 static enum rofferr roff_it(ROFF_ARGS);
200 static enum rofferr roff_line_ignore(ROFF_ARGS);
201 static enum rofferr roff_nr(ROFF_ARGS);
202 static void roff_openeqn(struct roff *, const char *,
203 int, int, const char *);
204 static enum rofft roff_parse(struct roff *, const char *, int *);
205 static enum rofferr roff_parsetext(char **, size_t *, int, int *);
206 static enum rofferr roff_res(struct roff *,
207 char **, size_t *, int, int);
208 static enum rofferr roff_rm(ROFF_ARGS);
209 static void roff_setstr(struct roff *,
210 const char *, const char *, int);
211 static void roff_setstrn(struct roffkv **, const char *,
212 size_t, const char *, size_t, int);
213 static enum rofferr roff_so(ROFF_ARGS);
214 static enum rofferr roff_tr(ROFF_ARGS);
215 static enum rofferr roff_Dd(ROFF_ARGS);
216 static enum rofferr roff_TH(ROFF_ARGS);
217 static enum rofferr roff_TE(ROFF_ARGS);
218 static enum rofferr roff_TS(ROFF_ARGS);
219 static enum rofferr roff_EQ(ROFF_ARGS);
220 static enum rofferr roff_EN(ROFF_ARGS);
221 static enum rofferr roff_T_(ROFF_ARGS);
222 static enum rofferr roff_userdef(ROFF_ARGS);
223
224 /* See roffhash_find() */
225
226 #define ASCII_HI 126
227 #define ASCII_LO 33
228 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
229
230 static struct roffmac *hash[HASHWIDTH];
231
232 static struct roffmac roffs[ROFF_MAX] = {
233 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
234 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
235 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
236 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
237 { "as", roff_ds, NULL, NULL, 0, NULL },
238 { "cc", roff_cc, NULL, NULL, 0, NULL },
239 { "ce", roff_line_ignore, NULL, NULL, 0, NULL },
240 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
241 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
242 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
243 { "ds", roff_ds, NULL, NULL, 0, NULL },
244 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
245 { "fam", roff_line_ignore, NULL, NULL, 0, NULL },
246 { "hw", roff_line_ignore, NULL, NULL, 0, NULL },
247 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
248 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
249 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
250 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
251 { "it", roff_it, NULL, NULL, 0, NULL },
252 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
253 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
254 { "nr", roff_nr, NULL, NULL, 0, NULL },
255 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
256 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
257 { "rm", roff_rm, NULL, NULL, 0, NULL },
258 { "so", roff_so, NULL, NULL, 0, NULL },
259 { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
260 { "tr", roff_tr, NULL, NULL, 0, NULL },
261 { "Dd", roff_Dd, NULL, NULL, 0, NULL },
262 { "TH", roff_TH, NULL, NULL, 0, NULL },
263 { "TS", roff_TS, NULL, NULL, 0, NULL },
264 { "TE", roff_TE, NULL, NULL, 0, NULL },
265 { "T&", roff_T_, NULL, NULL, 0, NULL },
266 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
267 { "EN", roff_EN, NULL, NULL, 0, NULL },
268 { ".", roff_cblock, NULL, NULL, 0, NULL },
269 { NULL, roff_userdef, NULL, NULL, 0, NULL },
270 };
271
272 const char *const __mdoc_reserved[] = {
273 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
274 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
275 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
276 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
277 "Ds", "Dt", "Dv", "Dx", "D1",
278 "Ec", "Ed", "Ef", "Ek", "El", "Em", "em",
279 "En", "Eo", "Eq", "Er", "Es", "Ev", "Ex",
280 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
281 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp", "LP",
282 "Me", "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
283 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
284 "Pa", "Pc", "Pf", "Po", "Pp", "PP", "pp", "Pq",
285 "Qc", "Ql", "Qo", "Qq", "Or", "Rd", "Re", "Rs", "Rv",
286 "Sc", "Sf", "Sh", "SH", "Sm", "So", "Sq",
287 "Ss", "St", "Sx", "Sy",
288 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
289 "%A", "%B", "%D", "%I", "%J", "%N", "%O",
290 "%P", "%Q", "%R", "%T", "%U", "%V",
291 NULL
292 };
293
294 const char *const __man_reserved[] = {
295 "AT", "B", "BI", "BR", "BT", "DE", "DS", "DT",
296 "EE", "EN", "EQ", "EX", "HF", "HP", "I", "IB", "IP", "IR",
297 "LP", "ME", "MT", "OP", "P", "PD", "PP", "PT",
298 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS", "SY",
299 "TE", "TH", "TP", "TQ", "TS", "T&", "UC", "UE", "UR", "YS",
300 NULL
301 };
302
303 /* Array of injected predefined strings. */
304 #define PREDEFS_MAX 38
305 static const struct predef predefs[PREDEFS_MAX] = {
306 #include "predefs.in"
307 };
308
309 /* See roffhash_find() */
310 #define ROFF_HASH(p) (p[0] - ASCII_LO)
311
312 static int roffit_lines; /* number of lines to delay */
313 static char *roffit_macro; /* nil-terminated macro line */
314
315 static void
316 roffhash_init(void)
317 {
318 struct roffmac *n;
319 int buc, i;
320
321 for (i = 0; i < (int)ROFF_USERDEF; i++) {
322 assert(roffs[i].name[0] >= ASCII_LO);
323 assert(roffs[i].name[0] <= ASCII_HI);
324
325 buc = ROFF_HASH(roffs[i].name);
326
327 if (NULL != (n = hash[buc])) {
328 for ( ; n->next; n = n->next)
329 /* Do nothing. */ ;
330 n->next = &roffs[i];
331 } else
332 hash[buc] = &roffs[i];
333 }
334 }
335
336 /*
337 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
338 * the nil-terminated string name could be found.
339 */
340 static enum rofft
341 roffhash_find(const char *p, size_t s)
342 {
343 int buc;
344 struct roffmac *n;
345
346 /*
347 * libroff has an extremely simple hashtable, for the time
348 * being, which simply keys on the first character, which must
349 * be printable, then walks a chain. It works well enough until
350 * optimised.
351 */
352
353 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
354 return(ROFF_MAX);
355
356 buc = ROFF_HASH(p);
357
358 if (NULL == (n = hash[buc]))
359 return(ROFF_MAX);
360 for ( ; n; n = n->next)
361 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
362 return((enum rofft)(n - roffs));
363
364 return(ROFF_MAX);
365 }
366
367
368 /*
369 * Pop the current node off of the stack of roff instructions currently
370 * pending.
371 */
372 static void
373 roffnode_pop(struct roff *r)
374 {
375 struct roffnode *p;
376
377 assert(r->last);
378 p = r->last;
379
380 r->last = r->last->parent;
381 free(p->name);
382 free(p->end);
383 free(p);
384 }
385
386
387 /*
388 * Push a roff node onto the instruction stack. This must later be
389 * removed with roffnode_pop().
390 */
391 static void
392 roffnode_push(struct roff *r, enum rofft tok, const char *name,
393 int line, int col)
394 {
395 struct roffnode *p;
396
397 p = mandoc_calloc(1, sizeof(struct roffnode));
398 p->tok = tok;
399 if (name)
400 p->name = mandoc_strdup(name);
401 p->parent = r->last;
402 p->line = line;
403 p->col = col;
404 p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
405
406 r->last = p;
407 }
408
409
410 static void
411 roff_free1(struct roff *r)
412 {
413 struct tbl_node *tbl;
414 struct eqn_node *e;
415 int i;
416
417 while (NULL != (tbl = r->first_tbl)) {
418 r->first_tbl = tbl->next;
419 tbl_free(tbl);
420 }
421
422 r->first_tbl = r->last_tbl = r->tbl = NULL;
423
424 while (NULL != (e = r->first_eqn)) {
425 r->first_eqn = e->next;
426 eqn_free(e);
427 }
428
429 r->first_eqn = r->last_eqn = r->eqn = NULL;
430
431 while (r->last)
432 roffnode_pop(r);
433
434 roff_freestr(r->strtab);
435 roff_freestr(r->xmbtab);
436
437 r->strtab = r->xmbtab = NULL;
438
439 roff_freereg(r->regtab);
440
441 r->regtab = NULL;
442
443 if (r->xtab)
444 for (i = 0; i < 128; i++)
445 free(r->xtab[i].p);
446
447 free(r->xtab);
448 r->xtab = NULL;
449 }
450
451 void
452 roff_reset(struct roff *r)
453 {
454
455 roff_free1(r);
456 r->control = 0;
457 }
458
459
460 void
461 roff_free(struct roff *r)
462 {
463
464 roff_free1(r);
465 free(r);
466 }
467
468
469 struct roff *
470 roff_alloc(enum mparset type, struct mparse *parse, int quick)
471 {
472 struct roff *r;
473
474 r = mandoc_calloc(1, sizeof(struct roff));
475 r->parsetype = type;
476 r->parse = parse;
477 r->quick = quick;
478 r->rstackpos = -1;
479
480 roffhash_init();
481
482 return(r);
483 }
484
485 /*
486 * In the current line, expand user-defined strings ("\*")
487 * and references to number registers ("\n").
488 * Also check the syntax of other escape sequences.
489 */
490 static enum rofferr
491 roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
492 {
493 char ubuf[12]; /* buffer to print the number */
494 const char *stesc; /* start of an escape sequence ('\\') */
495 const char *stnam; /* start of the name, after "[(*" */
496 const char *cp; /* end of the name, e.g. before ']' */
497 const char *res; /* the string to be substituted */
498 char *nbuf; /* new buffer to copy bufp to */
499 size_t nsz; /* size of the new buffer */
500 size_t maxl; /* expected length of the escape name */
501 size_t naml; /* actual length of the escape name */
502 int expand_count; /* to avoid infinite loops */
503
504 expand_count = 0;
505
506 again:
507 cp = *bufp + pos;
508 while (NULL != (cp = strchr(cp, '\\'))) {
509 stesc = cp++;
510
511 /*
512 * The second character must be an asterisk or an n.
513 * If it isn't, skip it anyway: It is escaped,
514 * so it can't start another escape sequence.
515 */
516
517 if ('\0' == *cp)
518 return(ROFF_CONT);
519
520 switch (*cp) {
521 case ('*'):
522 res = NULL;
523 break;
524 case ('n'):
525 res = ubuf;
526 break;
527 default:
528 if (ESCAPE_ERROR != mandoc_escape(&cp, NULL, NULL))
529 continue;
530 mandoc_msg
531 (MANDOCERR_BADESCAPE, r->parse,
532 ln, (int)(stesc - *bufp), NULL);
533 return(ROFF_CONT);
534 }
535
536 cp++;
537
538 /*
539 * The third character decides the length
540 * of the name of the string or register.
541 * Save a pointer to the name.
542 */
543
544 switch (*cp) {
545 case ('\0'):
546 return(ROFF_CONT);
547 case ('('):
548 cp++;
549 maxl = 2;
550 break;
551 case ('['):
552 cp++;
553 maxl = 0;
554 break;
555 default:
556 maxl = 1;
557 break;
558 }
559 stnam = cp;
560
561 /* Advance to the end of the name. */
562
563 for (naml = 0; 0 == maxl || naml < maxl; naml++, cp++) {
564 if ('\0' == *cp) {
565 mandoc_msg
566 (MANDOCERR_BADESCAPE,
567 r->parse, ln,
568 (int)(stesc - *bufp), NULL);
569 return(ROFF_CONT);
570 }
571 if (0 == maxl && ']' == *cp)
572 break;
573 }
574
575 /*
576 * Retrieve the replacement string; if it is
577 * undefined, resume searching for escapes.
578 */
579
580 if (NULL == res)
581 res = roff_getstrn(r, stnam, naml);
582 else
583 snprintf(ubuf, sizeof(ubuf), "%d",
584 roff_getregn(r, stnam, naml));
585
586 if (NULL == res) {
587 mandoc_msg
588 (MANDOCERR_BADESCAPE, r->parse,
589 ln, (int)(stesc - *bufp), NULL);
590 res = "";
591 }
592
593 /* Replace the escape sequence by the string. */
594
595 pos = stesc - *bufp;
596
597 nsz = *szp + strlen(res) + 1;
598 nbuf = mandoc_malloc(nsz);
599
600 strlcpy(nbuf, *bufp, (size_t)(stesc - *bufp + 1));
601 strlcat(nbuf, res, nsz);
602 strlcat(nbuf, cp + (maxl ? 0 : 1), nsz);
603
604 free(*bufp);
605
606 *bufp = nbuf;
607 *szp = nsz;
608
609 if (EXPAND_LIMIT >= ++expand_count)
610 goto again;
611
612 /* Just leave the string unexpanded. */
613 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, ln, pos, NULL);
614 return(ROFF_IGN);
615 }
616 return(ROFF_CONT);
617 }
618
619 /*
620 * Process text streams:
621 * Convert all breakable hyphens into ASCII_HYPH.
622 * Decrement and spring input line trap.
623 */
624 static enum rofferr
625 roff_parsetext(char **bufp, size_t *szp, int pos, int *offs)
626 {
627 size_t sz;
628 const char *start;
629 char *p;
630 int isz;
631 enum mandoc_esc esc;
632
633 start = p = *bufp + pos;
634
635 while ('\0' != *p) {
636 sz = strcspn(p, "-\\");
637 p += sz;
638
639 if ('\0' == *p)
640 break;
641
642 if ('\\' == *p) {
643 /* Skip over escapes. */
644 p++;
645 esc = mandoc_escape((const char **)&p, NULL, NULL);
646 if (ESCAPE_ERROR == esc)
647 break;
648 continue;
649 } else if (p == start) {
650 p++;
651 continue;
652 }
653
654 if (isalpha((unsigned char)p[-1]) &&
655 isalpha((unsigned char)p[1]))
656 *p = ASCII_HYPH;
657 p++;
658 }
659
660 /* Spring the input line trap. */
661 if (1 == roffit_lines) {
662 isz = asprintf(&p, "%s\n.%s", *bufp, roffit_macro);
663 if (-1 == isz) {
664 perror(NULL);
665 exit((int)MANDOCLEVEL_SYSERR);
666 }
667 free(*bufp);
668 *bufp = p;
669 *szp = isz + 1;
670 *offs = 0;
671 free(roffit_macro);
672 roffit_lines = 0;
673 return(ROFF_REPARSE);
674 } else if (1 < roffit_lines)
675 --roffit_lines;
676 return(ROFF_CONT);
677 }
678
679 enum rofferr
680 roff_parseln(struct roff *r, int ln, char **bufp,
681 size_t *szp, int pos, int *offs)
682 {
683 enum rofft t;
684 enum rofferr e;
685 int ppos, ctl;
686
687 /*
688 * Run the reserved-word filter only if we have some reserved
689 * words to fill in.
690 */
691
692 e = roff_res(r, bufp, szp, ln, pos);
693 if (ROFF_IGN == e)
694 return(e);
695 assert(ROFF_CONT == e);
696
697 ppos = pos;
698 ctl = roff_getcontrol(r, *bufp, &pos);
699
700 /*
701 * First, if a scope is open and we're not a macro, pass the
702 * text through the macro's filter. If a scope isn't open and
703 * we're not a macro, just let it through.
704 * Finally, if there's an equation scope open, divert it into it
705 * no matter our state.
706 */
707
708 if (r->last && ! ctl) {
709 t = r->last->tok;
710 assert(roffs[t].text);
711 e = (*roffs[t].text)
712 (r, t, bufp, szp, ln, pos, pos, offs);
713 assert(ROFF_IGN == e || ROFF_CONT == e);
714 if (ROFF_CONT != e)
715 return(e);
716 }
717 if (r->eqn)
718 return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));
719 if ( ! ctl) {
720 if (r->tbl)
721 return(tbl_read(r->tbl, ln, *bufp, pos));
722 return(roff_parsetext(bufp, szp, pos, offs));
723 }
724
725 /*
726 * If a scope is open, go to the child handler for that macro,
727 * as it may want to preprocess before doing anything with it.
728 * Don't do so if an equation is open.
729 */
730
731 if (r->last) {
732 t = r->last->tok;
733 assert(roffs[t].sub);
734 return((*roffs[t].sub)
735 (r, t, bufp, szp,
736 ln, ppos, pos, offs));
737 }
738
739 /*
740 * Lastly, as we've no scope open, try to look up and execute
741 * the new macro. If no macro is found, simply return and let
742 * the compilers handle it.
743 */
744
745 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
746 return(ROFF_CONT);
747
748 assert(roffs[t].proc);
749 return((*roffs[t].proc)
750 (r, t, bufp, szp,
751 ln, ppos, pos, offs));
752 }
753
754
755 void
756 roff_endparse(struct roff *r)
757 {
758
759 if (r->last)
760 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
761 r->last->line, r->last->col, NULL);
762
763 if (r->eqn) {
764 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
765 r->eqn->eqn.ln, r->eqn->eqn.pos, NULL);
766 eqn_end(&r->eqn);
767 }
768
769 if (r->tbl) {
770 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
771 r->tbl->line, r->tbl->pos, NULL);
772 tbl_end(&r->tbl);
773 }
774 }
775
776 /*
777 * Parse a roff node's type from the input buffer. This must be in the
778 * form of ".foo xxx" in the usual way.
779 */
780 static enum rofft
781 roff_parse(struct roff *r, const char *buf, int *pos)
782 {
783 const char *mac;
784 size_t maclen;
785 enum rofft t;
786
787 if ('\0' == buf[*pos] || '"' == buf[*pos] ||
788 '\t' == buf[*pos] || ' ' == buf[*pos])
789 return(ROFF_MAX);
790
791 /* We stop the macro parse at an escape, tab, space, or nil. */
792
793 mac = buf + *pos;
794 maclen = strcspn(mac, " \\\t\0");
795
796 t = (r->current_string = roff_getstrn(r, mac, maclen))
797 ? ROFF_USERDEF : roffhash_find(mac, maclen);
798
799 *pos += (int)maclen;
800
801 while (buf[*pos] && ' ' == buf[*pos])
802 (*pos)++;
803
804 return(t);
805 }
806
807 /* ARGSUSED */
808 static enum rofferr
809 roff_cblock(ROFF_ARGS)
810 {
811
812 /*
813 * A block-close `..' should only be invoked as a child of an
814 * ignore macro, otherwise raise a warning and just ignore it.
815 */
816
817 if (NULL == r->last) {
818 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
819 return(ROFF_IGN);
820 }
821
822 switch (r->last->tok) {
823 case (ROFF_am):
824 /* FALLTHROUGH */
825 case (ROFF_ami):
826 /* FALLTHROUGH */
827 case (ROFF_am1):
828 /* FALLTHROUGH */
829 case (ROFF_de):
830 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
831 /* FALLTHROUGH */
832 case (ROFF_dei):
833 /* FALLTHROUGH */
834 case (ROFF_ig):
835 break;
836 default:
837 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
838 return(ROFF_IGN);
839 }
840
841 if ((*bufp)[pos])
842 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
843
844 roffnode_pop(r);
845 roffnode_cleanscope(r);
846 return(ROFF_IGN);
847
848 }
849
850
851 static void
852 roffnode_cleanscope(struct roff *r)
853 {
854
855 while (r->last) {
856 if (--r->last->endspan != 0)
857 break;
858 roffnode_pop(r);
859 }
860 }
861
862
863 static void
864 roff_ccond(struct roff *r, int ln, int ppos)
865 {
866
867 if (NULL == r->last) {
868 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
869 return;
870 }
871
872 switch (r->last->tok) {
873 case (ROFF_el):
874 /* FALLTHROUGH */
875 case (ROFF_ie):
876 /* FALLTHROUGH */
877 case (ROFF_if):
878 break;
879 default:
880 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
881 return;
882 }
883
884 if (r->last->endspan > -1) {
885 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
886 return;
887 }
888
889 roffnode_pop(r);
890 roffnode_cleanscope(r);
891 return;
892 }
893
894
895 /* ARGSUSED */
896 static enum rofferr
897 roff_block(ROFF_ARGS)
898 {
899 int sv;
900 size_t sz;
901 char *name;
902
903 name = NULL;
904
905 if (ROFF_ig != tok) {
906 if ('\0' == (*bufp)[pos]) {
907 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
908 return(ROFF_IGN);
909 }
910
911 /*
912 * Re-write `de1', since we don't really care about
913 * groff's strange compatibility mode, into `de'.
914 */
915
916 if (ROFF_de1 == tok)
917 tok = ROFF_de;
918 if (ROFF_de == tok)
919 name = *bufp + pos;
920 else
921 mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos,
922 roffs[tok].name);
923
924 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
925 pos++;
926
927 while (isspace((unsigned char)(*bufp)[pos]))
928 (*bufp)[pos++] = '\0';
929 }
930
931 roffnode_push(r, tok, name, ln, ppos);
932
933 /*
934 * At the beginning of a `de' macro, clear the existing string
935 * with the same name, if there is one. New content will be
936 * appended from roff_block_text() in multiline mode.
937 */
938
939 if (ROFF_de == tok)
940 roff_setstr(r, name, "", 0);
941
942 if ('\0' == (*bufp)[pos])
943 return(ROFF_IGN);
944
945 /* If present, process the custom end-of-line marker. */
946
947 sv = pos;
948 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
949 pos++;
950
951 /*
952 * Note: groff does NOT like escape characters in the input.
953 * Instead of detecting this, we're just going to let it fly and
954 * to hell with it.
955 */
956
957 assert(pos > sv);
958 sz = (size_t)(pos - sv);
959
960 if (1 == sz && '.' == (*bufp)[sv])
961 return(ROFF_IGN);
962
963 r->last->end = mandoc_malloc(sz + 1);
964
965 memcpy(r->last->end, *bufp + sv, sz);
966 r->last->end[(int)sz] = '\0';
967
968 if ((*bufp)[pos])
969 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
970
971 return(ROFF_IGN);
972 }
973
974
975 /* ARGSUSED */
976 static enum rofferr
977 roff_block_sub(ROFF_ARGS)
978 {
979 enum rofft t;
980 int i, j;
981
982 /*
983 * First check whether a custom macro exists at this level. If
984 * it does, then check against it. This is some of groff's
985 * stranger behaviours. If we encountered a custom end-scope
986 * tag and that tag also happens to be a "real" macro, then we
987 * need to try interpreting it again as a real macro. If it's
988 * not, then return ignore. Else continue.
989 */
990
991 if (r->last->end) {
992 for (i = pos, j = 0; r->last->end[j]; j++, i++)
993 if ((*bufp)[i] != r->last->end[j])
994 break;
995
996 if ('\0' == r->last->end[j] &&
997 ('\0' == (*bufp)[i] ||
998 ' ' == (*bufp)[i] ||
999 '\t' == (*bufp)[i])) {
1000 roffnode_pop(r);
1001 roffnode_cleanscope(r);
1002
1003 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
1004 i++;
1005
1006 pos = i;
1007 if (ROFF_MAX != roff_parse(r, *bufp, &pos))
1008 return(ROFF_RERUN);
1009 return(ROFF_IGN);
1010 }
1011 }
1012
1013 /*
1014 * If we have no custom end-query or lookup failed, then try
1015 * pulling it out of the hashtable.
1016 */
1017
1018 t = roff_parse(r, *bufp, &pos);
1019
1020 /*
1021 * Macros other than block-end are only significant
1022 * in `de' blocks; elsewhere, simply throw them away.
1023 */
1024 if (ROFF_cblock != t) {
1025 if (ROFF_de == tok)
1026 roff_setstr(r, r->last->name, *bufp + ppos, 2);
1027 return(ROFF_IGN);
1028 }
1029
1030 assert(roffs[t].proc);
1031 return((*roffs[t].proc)(r, t, bufp, szp,
1032 ln, ppos, pos, offs));
1033 }
1034
1035
1036 /* ARGSUSED */
1037 static enum rofferr
1038 roff_block_text(ROFF_ARGS)
1039 {
1040
1041 if (ROFF_de == tok)
1042 roff_setstr(r, r->last->name, *bufp + pos, 2);
1043
1044 return(ROFF_IGN);
1045 }
1046
1047
1048 /* ARGSUSED */
1049 static enum rofferr
1050 roff_cond_sub(ROFF_ARGS)
1051 {
1052 enum rofft t;
1053 enum roffrule rr;
1054 char *ep;
1055
1056 rr = r->last->rule;
1057 roffnode_cleanscope(r);
1058 t = roff_parse(r, *bufp, &pos);
1059
1060 /*
1061 * Fully handle known macros when they are structurally
1062 * required or when the conditional evaluated to true.
1063 */
1064
1065 if ((ROFF_MAX != t) &&
1066 (ROFFRULE_ALLOW == rr ||
1067 ROFFMAC_STRUCT & roffs[t].flags)) {
1068 assert(roffs[t].proc);
1069 return((*roffs[t].proc)(r, t, bufp, szp,
1070 ln, ppos, pos, offs));
1071 }
1072
1073 /* Always check for the closing delimiter `\}'. */
1074
1075 ep = &(*bufp)[pos];
1076 while (NULL != (ep = strchr(ep, '\\'))) {
1077 if ('}' != *(++ep))
1078 continue;
1079
1080 /*
1081 * If we're at the end of line, then just chop
1082 * off the \} and resize the buffer.
1083 * If we aren't, then convert it to spaces.
1084 */
1085
1086 if ('\0' == *(ep + 1)) {
1087 *--ep = '\0';
1088 *szp -= 2;
1089 } else
1090 *(ep - 1) = *ep = ' ';
1091
1092 roff_ccond(r, ln, pos);
1093 }
1094 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1095 }
1096
1097 /* ARGSUSED */
1098 static enum rofferr
1099 roff_cond_text(ROFF_ARGS)
1100 {
1101 char *ep;
1102 enum roffrule rr;
1103
1104 rr = r->last->rule;
1105 roffnode_cleanscope(r);
1106
1107 ep = &(*bufp)[pos];
1108 for ( ; NULL != (ep = strchr(ep, '\\')); ep++) {
1109 ep++;
1110 if ('}' != *ep)
1111 continue;
1112 *ep = '&';
1113 roff_ccond(r, ln, pos);
1114 }
1115 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1116 }
1117
1118 static int
1119 roff_getnum(const char *v, int *pos, int *res)
1120 {
1121 int p, n;
1122
1123 p = *pos;
1124 n = v[p] == '-';
1125 if (n)
1126 p++;
1127
1128 for (*res = 0; isdigit((unsigned char)v[p]); p++)
1129 *res += 10 * *res + v[p] - '0';
1130 if (p == *pos + n)
1131 return 0;
1132
1133 if (n)
1134 *res = -*res;
1135
1136 *pos = p;
1137 return 1;
1138 }
1139
1140 static int
1141 roff_getop(const char *v, int *pos, char *res)
1142 {
1143 int e;
1144
1145 *res = v[*pos];
1146 e = v[*pos + 1] == '=';
1147
1148 switch (*res) {
1149 case '=':
1150 break;
1151 case '>':
1152 if (e)
1153 *res = 'g';
1154 break;
1155 case '<':
1156 if (e)
1157 *res = 'l';
1158 break;
1159 default:
1160 return(0);
1161 }
1162
1163 *pos += 1 + e;
1164
1165 return(*res);
1166 }
1167
1168 static enum roffrule
1169 roff_evalcond(const char *v, int *pos)
1170 {
1171 int not, lh, rh;
1172 char op;
1173
1174 switch (v[*pos]) {
1175 case ('n'):
1176 (*pos)++;
1177 return(ROFFRULE_ALLOW);
1178 case ('e'):
1179 /* FALLTHROUGH */
1180 case ('o'):
1181 /* FALLTHROUGH */
1182 case ('t'):
1183 (*pos)++;
1184 return(ROFFRULE_DENY);
1185 case ('!'):
1186 (*pos)++;
1187 not = 1;
1188 break;
1189 default:
1190 not = 0;
1191 break;
1192 }
1193
1194 if (!roff_getnum(v, pos, &lh))
1195 return ROFFRULE_DENY;
1196 if (!roff_getop(v, pos, &op)) {
1197 if (lh < 0)
1198 lh = 0;
1199 goto out;
1200 }
1201 if (!roff_getnum(v, pos, &rh))
1202 return ROFFRULE_DENY;
1203 switch (op) {
1204 case 'g':
1205 lh = lh >= rh;
1206 break;
1207 case 'l':
1208 lh = lh <= rh;
1209 break;
1210 case '=':
1211 lh = lh == rh;
1212 break;
1213 case '>':
1214 lh = lh > rh;
1215 break;
1216 case '<':
1217 lh = lh < rh;
1218 break;
1219 default:
1220 return ROFFRULE_DENY;
1221 }
1222 out:
1223 if (not)
1224 lh = !lh;
1225 return lh ? ROFFRULE_ALLOW : ROFFRULE_DENY;
1226 }
1227
1228 /* ARGSUSED */
1229 static enum rofferr
1230 roff_line_ignore(ROFF_ARGS)
1231 {
1232
1233 return(ROFF_IGN);
1234 }
1235
1236 /* ARGSUSED */
1237 static enum rofferr
1238 roff_cond(ROFF_ARGS)
1239 {
1240
1241 roffnode_push(r, tok, NULL, ln, ppos);
1242
1243 /*
1244 * An `.el' has no conditional body: it will consume the value
1245 * of the current rstack entry set in prior `ie' calls or
1246 * defaults to DENY.
1247 *
1248 * If we're not an `el', however, then evaluate the conditional.
1249 */
1250
1251 r->last->rule = ROFF_el == tok ?
1252 (r->rstackpos < 0 ?
1253 ROFFRULE_DENY : r->rstack[r->rstackpos--]) :
1254 roff_evalcond(*bufp, &pos);
1255
1256 /*
1257 * An if-else will put the NEGATION of the current evaluated
1258 * conditional into the stack of rules.
1259 */
1260
1261 if (ROFF_ie == tok) {
1262 if (r->rstackpos == RSTACK_MAX - 1) {
1263 mandoc_msg(MANDOCERR_MEM,
1264 r->parse, ln, ppos, NULL);
1265 return(ROFF_ERR);
1266 }
1267 r->rstack[++r->rstackpos] =
1268 ROFFRULE_DENY == r->last->rule ?
1269 ROFFRULE_ALLOW : ROFFRULE_DENY;
1270 }
1271
1272 /* If the parent has false as its rule, then so do we. */
1273
1274 if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule)
1275 r->last->rule = ROFFRULE_DENY;
1276
1277 /*
1278 * Determine scope.
1279 * If there is nothing on the line after the conditional,
1280 * not even whitespace, use next-line scope.
1281 */
1282
1283 if ('\0' == (*bufp)[pos]) {
1284 r->last->endspan = 2;
1285 goto out;
1286 }
1287
1288 while (' ' == (*bufp)[pos])
1289 pos++;
1290
1291 /* An opening brace requests multiline scope. */
1292
1293 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1294 r->last->endspan = -1;
1295 pos += 2;
1296 goto out;
1297 }
1298
1299 /*
1300 * Anything else following the conditional causes
1301 * single-line scope. Warn if the scope contains
1302 * nothing but trailing whitespace.
1303 */
1304
1305 if ('\0' == (*bufp)[pos])
1306 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
1307
1308 r->last->endspan = 1;
1309
1310 out:
1311 *offs = pos;
1312 return(ROFF_RERUN);
1313 }
1314
1315
1316 /* ARGSUSED */
1317 static enum rofferr
1318 roff_ds(ROFF_ARGS)
1319 {
1320 char *name, *string;
1321
1322 /*
1323 * A symbol is named by the first word following the macro
1324 * invocation up to a space. Its value is anything after the
1325 * name's trailing whitespace and optional double-quote. Thus,
1326 *
1327 * [.ds foo "bar " ]
1328 *
1329 * will have `bar " ' as its value.
1330 */
1331
1332 string = *bufp + pos;
1333 name = roff_getname(r, &string, ln, pos);
1334 if ('\0' == *name)
1335 return(ROFF_IGN);
1336
1337 /* Read past initial double-quote. */
1338 if ('"' == *string)
1339 string++;
1340
1341 /* The rest is the value. */
1342 roff_setstr(r, name, string, ROFF_as == tok);
1343 return(ROFF_IGN);
1344 }
1345
1346 void
1347 roff_setreg(struct roff *r, const char *name, int val, char sign)
1348 {
1349 struct roffreg *reg;
1350
1351 /* Search for an existing register with the same name. */
1352 reg = r->regtab;
1353
1354 while (reg && strcmp(name, reg->key.p))
1355 reg = reg->next;
1356
1357 if (NULL == reg) {
1358 /* Create a new register. */
1359 reg = mandoc_malloc(sizeof(struct roffreg));
1360 reg->key.p = mandoc_strdup(name);
1361 reg->key.sz = strlen(name);
1362 reg->val = 0;
1363 reg->next = r->regtab;
1364 r->regtab = reg;
1365 }
1366
1367 if ('+' == sign)
1368 reg->val += val;
1369 else if ('-' == sign)
1370 reg->val -= val;
1371 else
1372 reg->val = val;
1373 }
1374
1375 /*
1376 * Handle some predefined read-only number registers.
1377 * For now, return -1 if the requested register is not predefined;
1378 * in case a predefined read-only register having the value -1
1379 * were to turn up, another special value would have to be chosen.
1380 */
1381 static int
1382 roff_getregro(const char *name)
1383 {
1384
1385 switch (*name) {
1386 case ('A'): /* ASCII approximation mode is always off. */
1387 return(0);
1388 case ('g'): /* Groff compatibility mode is always on. */
1389 return(1);
1390 case ('H'): /* Fixed horizontal resolution. */
1391 return (24);
1392 case ('j'): /* Always adjust left margin only. */
1393 return(0);
1394 case ('T'): /* Some output device is always defined. */
1395 return(1);
1396 case ('V'): /* Fixed vertical resolution. */
1397 return (40);
1398 default:
1399 return (-1);
1400 }
1401 }
1402
1403 int
1404 roff_getreg(const struct roff *r, const char *name)
1405 {
1406 struct roffreg *reg;
1407 int val;
1408
1409 if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
1410 val = roff_getregro(name + 1);
1411 if (-1 != val)
1412 return (val);
1413 }
1414
1415 for (reg = r->regtab; reg; reg = reg->next)
1416 if (0 == strcmp(name, reg->key.p))
1417 return(reg->val);
1418
1419 return(0);
1420 }
1421
1422 static int
1423 roff_getregn(const struct roff *r, const char *name, size_t len)
1424 {
1425 struct roffreg *reg;
1426 int val;
1427
1428 if ('.' == name[0] && 2 == len) {
1429 val = roff_getregro(name + 1);
1430 if (-1 != val)
1431 return (val);
1432 }
1433
1434 for (reg = r->regtab; reg; reg = reg->next)
1435 if (len == reg->key.sz &&
1436 0 == strncmp(name, reg->key.p, len))
1437 return(reg->val);
1438
1439 return(0);
1440 }
1441
1442 static void
1443 roff_freereg(struct roffreg *reg)
1444 {
1445 struct roffreg *old_reg;
1446
1447 while (NULL != reg) {
1448 free(reg->key.p);
1449 old_reg = reg;
1450 reg = reg->next;
1451 free(old_reg);
1452 }
1453 }
1454
1455 /* ARGSUSED */
1456 static enum rofferr
1457 roff_nr(ROFF_ARGS)
1458 {
1459 const char *key;
1460 char *val;
1461 size_t sz;
1462 int iv;
1463 char sign;
1464
1465 val = *bufp + pos;
1466 key = roff_getname(r, &val, ln, pos);
1467
1468 sign = *val;
1469 if ('+' == sign || '-' == sign)
1470 val++;
1471
1472 sz = strspn(val, "0123456789");
1473 iv = sz ? mandoc_strntoi(val, sz, 10) : 0;
1474
1475 roff_setreg(r, key, iv, sign);
1476
1477 return(ROFF_IGN);
1478 }
1479
1480 /* ARGSUSED */
1481 static enum rofferr
1482 roff_rm(ROFF_ARGS)
1483 {
1484 const char *name;
1485 char *cp;
1486
1487 cp = *bufp + pos;
1488 while ('\0' != *cp) {
1489 name = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1490 if ('\0' != *name)
1491 roff_setstr(r, name, NULL, 0);
1492 }
1493 return(ROFF_IGN);
1494 }
1495
1496 /* ARGSUSED */
1497 static enum rofferr
1498 roff_it(ROFF_ARGS)
1499 {
1500 char *cp;
1501 size_t len;
1502 int iv;
1503
1504 /* Parse the number of lines. */
1505 cp = *bufp + pos;
1506 len = strcspn(cp, " \t");
1507 cp[len] = '\0';
1508 if ((iv = mandoc_strntoi(cp, len, 10)) <= 0) {
1509 mandoc_msg(MANDOCERR_NUMERIC, r->parse,
1510 ln, ppos, *bufp + 1);
1511 return(ROFF_IGN);
1512 }
1513 cp += len + 1;
1514
1515 /* Arm the input line trap. */
1516 roffit_lines = iv;
1517 roffit_macro = mandoc_strdup(cp);
1518 return(ROFF_IGN);
1519 }
1520
1521 /* ARGSUSED */
1522 static enum rofferr
1523 roff_Dd(ROFF_ARGS)
1524 {
1525 const char *const *cp;
1526
1527 if (0 == r->quick && MPARSE_MDOC != r->parsetype)
1528 for (cp = __mdoc_reserved; *cp; cp++)
1529 roff_setstr(r, *cp, NULL, 0);
1530
1531 return(ROFF_CONT);
1532 }
1533
1534 /* ARGSUSED */
1535 static enum rofferr
1536 roff_TH(ROFF_ARGS)
1537 {
1538 const char *const *cp;
1539
1540 if (0 == r->quick && MPARSE_MDOC != r->parsetype)
1541 for (cp = __man_reserved; *cp; cp++)
1542 roff_setstr(r, *cp, NULL, 0);
1543
1544 return(ROFF_CONT);
1545 }
1546
1547 /* ARGSUSED */
1548 static enum rofferr
1549 roff_TE(ROFF_ARGS)
1550 {
1551
1552 if (NULL == r->tbl)
1553 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1554 else
1555 tbl_end(&r->tbl);
1556
1557 return(ROFF_IGN);
1558 }
1559
1560 /* ARGSUSED */
1561 static enum rofferr
1562 roff_T_(ROFF_ARGS)
1563 {
1564
1565 if (NULL == r->tbl)
1566 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1567 else
1568 tbl_restart(ppos, ln, r->tbl);
1569
1570 return(ROFF_IGN);
1571 }
1572
1573 #if 0
1574 static int
1575 roff_closeeqn(struct roff *r)
1576 {
1577
1578 return(r->eqn && ROFF_EQN == eqn_end(&r->eqn) ? 1 : 0);
1579 }
1580 #endif
1581
1582 static void
1583 roff_openeqn(struct roff *r, const char *name, int line,
1584 int offs, const char *buf)
1585 {
1586 struct eqn_node *e;
1587 int poff;
1588
1589 assert(NULL == r->eqn);
1590 e = eqn_alloc(name, offs, line, r->parse);
1591
1592 if (r->last_eqn)
1593 r->last_eqn->next = e;
1594 else
1595 r->first_eqn = r->last_eqn = e;
1596
1597 r->eqn = r->last_eqn = e;
1598
1599 if (buf) {
1600 poff = 0;
1601 eqn_read(&r->eqn, line, buf, offs, &poff);
1602 }
1603 }
1604
1605 /* ARGSUSED */
1606 static enum rofferr
1607 roff_EQ(ROFF_ARGS)
1608 {
1609
1610 roff_openeqn(r, *bufp + pos, ln, ppos, NULL);
1611 return(ROFF_IGN);
1612 }
1613
1614 /* ARGSUSED */
1615 static enum rofferr
1616 roff_EN(ROFF_ARGS)
1617 {
1618
1619 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1620 return(ROFF_IGN);
1621 }
1622
1623 /* ARGSUSED */
1624 static enum rofferr
1625 roff_TS(ROFF_ARGS)
1626 {
1627 struct tbl_node *tbl;
1628
1629 if (r->tbl) {
1630 mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL);
1631 tbl_end(&r->tbl);
1632 }
1633
1634 tbl = tbl_alloc(ppos, ln, r->parse);
1635
1636 if (r->last_tbl)
1637 r->last_tbl->next = tbl;
1638 else
1639 r->first_tbl = r->last_tbl = tbl;
1640
1641 r->tbl = r->last_tbl = tbl;
1642 return(ROFF_IGN);
1643 }
1644
1645 /* ARGSUSED */
1646 static enum rofferr
1647 roff_cc(ROFF_ARGS)
1648 {
1649 const char *p;
1650
1651 p = *bufp + pos;
1652
1653 if ('\0' == *p || '.' == (r->control = *p++))
1654 r->control = 0;
1655
1656 if ('\0' != *p)
1657 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1658
1659 return(ROFF_IGN);
1660 }
1661
1662 /* ARGSUSED */
1663 static enum rofferr
1664 roff_tr(ROFF_ARGS)
1665 {
1666 const char *p, *first, *second;
1667 size_t fsz, ssz;
1668 enum mandoc_esc esc;
1669
1670 p = *bufp + pos;
1671
1672 if ('\0' == *p) {
1673 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1674 return(ROFF_IGN);
1675 }
1676
1677 while ('\0' != *p) {
1678 fsz = ssz = 1;
1679
1680 first = p++;
1681 if ('\\' == *first) {
1682 esc = mandoc_escape(&p, NULL, NULL);
1683 if (ESCAPE_ERROR == esc) {
1684 mandoc_msg
1685 (MANDOCERR_BADESCAPE, r->parse,
1686 ln, (int)(p - *bufp), NULL);
1687 return(ROFF_IGN);
1688 }
1689 fsz = (size_t)(p - first);
1690 }
1691
1692 second = p++;
1693 if ('\\' == *second) {
1694 esc = mandoc_escape(&p, NULL, NULL);
1695 if (ESCAPE_ERROR == esc) {
1696 mandoc_msg
1697 (MANDOCERR_BADESCAPE, r->parse,
1698 ln, (int)(p - *bufp), NULL);
1699 return(ROFF_IGN);
1700 }
1701 ssz = (size_t)(p - second);
1702 } else if ('\0' == *second) {
1703 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
1704 ln, (int)(p - *bufp), NULL);
1705 second = " ";
1706 p--;
1707 }
1708
1709 if (fsz > 1) {
1710 roff_setstrn(&r->xmbtab, first,
1711 fsz, second, ssz, 0);
1712 continue;
1713 }
1714
1715 if (NULL == r->xtab)
1716 r->xtab = mandoc_calloc
1717 (128, sizeof(struct roffstr));
1718
1719 free(r->xtab[(int)*first].p);
1720 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
1721 r->xtab[(int)*first].sz = ssz;
1722 }
1723
1724 return(ROFF_IGN);
1725 }
1726
1727 /* ARGSUSED */
1728 static enum rofferr
1729 roff_so(ROFF_ARGS)
1730 {
1731 char *name;
1732
1733 mandoc_msg(MANDOCERR_SO, r->parse, ln, ppos, NULL);
1734
1735 /*
1736 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1737 * opening anything that's not in our cwd or anything beneath
1738 * it. Thus, explicitly disallow traversing up the file-system
1739 * or using absolute paths.
1740 */
1741
1742 name = *bufp + pos;
1743 if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1744 mandoc_msg(MANDOCERR_SOPATH, r->parse, ln, pos, NULL);
1745 return(ROFF_ERR);
1746 }
1747
1748 *offs = pos;
1749 return(ROFF_SO);
1750 }
1751
1752 /* ARGSUSED */
1753 static enum rofferr
1754 roff_userdef(ROFF_ARGS)
1755 {
1756 const char *arg[9];
1757 char *cp, *n1, *n2;
1758 int i;
1759
1760 /*
1761 * Collect pointers to macro argument strings
1762 * and NUL-terminate them.
1763 */
1764 cp = *bufp + pos;
1765 for (i = 0; i < 9; i++)
1766 arg[i] = '\0' == *cp ? "" :
1767 mandoc_getarg(r->parse, &cp, ln, &pos);
1768
1769 /*
1770 * Expand macro arguments.
1771 */
1772 *szp = 0;
1773 n1 = cp = mandoc_strdup(r->current_string);
1774 while (NULL != (cp = strstr(cp, "\\$"))) {
1775 i = cp[2] - '1';
1776 if (0 > i || 8 < i) {
1777 /* Not an argument invocation. */
1778 cp += 2;
1779 continue;
1780 }
1781
1782 *szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1783 n2 = mandoc_malloc(*szp);
1784
1785 strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1786 strlcat(n2, arg[i], *szp);
1787 strlcat(n2, cp + 3, *szp);
1788
1789 cp = n2 + (cp - n1);
1790 free(n1);
1791 n1 = n2;
1792 }
1793
1794 /*
1795 * Replace the macro invocation
1796 * by the expanded macro.
1797 */
1798 free(*bufp);
1799 *bufp = n1;
1800 if (0 == *szp)
1801 *szp = strlen(*bufp) + 1;
1802
1803 return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
1804 ROFF_REPARSE : ROFF_APPEND);
1805 }
1806
1807 static char *
1808 roff_getname(struct roff *r, char **cpp, int ln, int pos)
1809 {
1810 char *name, *cp;
1811
1812 name = *cpp;
1813 if ('\0' == *name)
1814 return(name);
1815
1816 /* Read until end of name. */
1817 for (cp = name; '\0' != *cp && ' ' != *cp; cp++) {
1818 if ('\\' != *cp)
1819 continue;
1820 cp++;
1821 if ('\\' == *cp)
1822 continue;
1823 mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL);
1824 *cp = '\0';
1825 name = cp;
1826 }
1827
1828 /* Nil-terminate name. */
1829 if ('\0' != *cp)
1830 *(cp++) = '\0';
1831
1832 /* Read past spaces. */
1833 while (' ' == *cp)
1834 cp++;
1835
1836 *cpp = cp;
1837 return(name);
1838 }
1839
1840 /*
1841 * Store *string into the user-defined string called *name.
1842 * To clear an existing entry, call with (*r, *name, NULL, 0).
1843 * append == 0: replace mode
1844 * append == 1: single-line append mode
1845 * append == 2: multiline append mode, append '\n' after each call
1846 */
1847 static void
1848 roff_setstr(struct roff *r, const char *name, const char *string,
1849 int append)
1850 {
1851
1852 roff_setstrn(&r->strtab, name, strlen(name), string,
1853 string ? strlen(string) : 0, append);
1854 }
1855
1856 static void
1857 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
1858 const char *string, size_t stringsz, int append)
1859 {
1860 struct roffkv *n;
1861 char *c;
1862 int i;
1863 size_t oldch, newch;
1864
1865 /* Search for an existing string with the same name. */
1866 n = *r;
1867
1868 while (n && strcmp(name, n->key.p))
1869 n = n->next;
1870
1871 if (NULL == n) {
1872 /* Create a new string table entry. */
1873 n = mandoc_malloc(sizeof(struct roffkv));
1874 n->key.p = mandoc_strndup(name, namesz);
1875 n->key.sz = namesz;
1876 n->val.p = NULL;
1877 n->val.sz = 0;
1878 n->next = *r;
1879 *r = n;
1880 } else if (0 == append) {
1881 free(n->val.p);
1882 n->val.p = NULL;
1883 n->val.sz = 0;
1884 }
1885
1886 if (NULL == string)
1887 return;
1888
1889 /*
1890 * One additional byte for the '\n' in multiline mode,
1891 * and one for the terminating '\0'.
1892 */
1893 newch = stringsz + (1 < append ? 2u : 1u);
1894
1895 if (NULL == n->val.p) {
1896 n->val.p = mandoc_malloc(newch);
1897 *n->val.p = '\0';
1898 oldch = 0;
1899 } else {
1900 oldch = n->val.sz;
1901 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
1902 }
1903
1904 /* Skip existing content in the destination buffer. */
1905 c = n->val.p + (int)oldch;
1906
1907 /* Append new content to the destination buffer. */
1908 i = 0;
1909 while (i < (int)stringsz) {
1910 /*
1911 * Rudimentary roff copy mode:
1912 * Handle escaped backslashes.
1913 */
1914 if ('\\' == string[i] && '\\' == string[i + 1])
1915 i++;
1916 *c++ = string[i++];
1917 }
1918
1919 /* Append terminating bytes. */
1920 if (1 < append)
1921 *c++ = '\n';
1922
1923 *c = '\0';
1924 n->val.sz = (int)(c - n->val.p);
1925 }
1926
1927 static const char *
1928 roff_getstrn(const struct roff *r, const char *name, size_t len)
1929 {
1930 const struct roffkv *n;
1931 int i;
1932
1933 for (n = r->strtab; n; n = n->next)
1934 if (0 == strncmp(name, n->key.p, len) &&
1935 '\0' == n->key.p[(int)len])
1936 return(n->val.p);
1937
1938 for (i = 0; i < PREDEFS_MAX; i++)
1939 if (0 == strncmp(name, predefs[i].name, len) &&
1940 '\0' == predefs[i].name[(int)len])
1941 return(predefs[i].str);
1942
1943 return(NULL);
1944 }
1945
1946 static void
1947 roff_freestr(struct roffkv *r)
1948 {
1949 struct roffkv *n, *nn;
1950
1951 for (n = r; n; n = nn) {
1952 free(n->key.p);
1953 free(n->val.p);
1954 nn = n->next;
1955 free(n);
1956 }
1957 }
1958
1959 const struct tbl_span *
1960 roff_span(const struct roff *r)
1961 {
1962
1963 return(r->tbl ? tbl_span(r->tbl) : NULL);
1964 }
1965
1966 const struct eqn *
1967 roff_eqn(const struct roff *r)
1968 {
1969
1970 return(r->last_eqn ? &r->last_eqn->eqn : NULL);
1971 }
1972
1973 /*
1974 * Duplicate an input string, making the appropriate character
1975 * conversations (as stipulated by `tr') along the way.
1976 * Returns a heap-allocated string with all the replacements made.
1977 */
1978 char *
1979 roff_strdup(const struct roff *r, const char *p)
1980 {
1981 const struct roffkv *cp;
1982 char *res;
1983 const char *pp;
1984 size_t ssz, sz;
1985 enum mandoc_esc esc;
1986
1987 if (NULL == r->xmbtab && NULL == r->xtab)
1988 return(mandoc_strdup(p));
1989 else if ('\0' == *p)
1990 return(mandoc_strdup(""));
1991
1992 /*
1993 * Step through each character looking for term matches
1994 * (remember that a `tr' can be invoked with an escape, which is
1995 * a glyph but the escape is multi-character).
1996 * We only do this if the character hash has been initialised
1997 * and the string is >0 length.
1998 */
1999
2000 res = NULL;
2001 ssz = 0;
2002
2003 while ('\0' != *p) {
2004 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
2005 sz = r->xtab[(int)*p].sz;
2006 res = mandoc_realloc(res, ssz + sz + 1);
2007 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
2008 ssz += sz;
2009 p++;
2010 continue;
2011 } else if ('\\' != *p) {
2012 res = mandoc_realloc(res, ssz + 2);
2013 res[ssz++] = *p++;
2014 continue;
2015 }
2016
2017 /* Search for term matches. */
2018 for (cp = r->xmbtab; cp; cp = cp->next)
2019 if (0 == strncmp(p, cp->key.p, cp->key.sz))
2020 break;
2021
2022 if (NULL != cp) {
2023 /*
2024 * A match has been found.
2025 * Append the match to the array and move
2026 * forward by its keysize.
2027 */
2028 res = mandoc_realloc
2029 (res, ssz + cp->val.sz + 1);
2030 memcpy(res + ssz, cp->val.p, cp->val.sz);
2031 ssz += cp->val.sz;
2032 p += (int)cp->key.sz;
2033 continue;
2034 }
2035
2036 /*
2037 * Handle escapes carefully: we need to copy
2038 * over just the escape itself, or else we might
2039 * do replacements within the escape itself.
2040 * Make sure to pass along the bogus string.
2041 */
2042 pp = p++;
2043 esc = mandoc_escape(&p, NULL, NULL);
2044 if (ESCAPE_ERROR == esc) {
2045 sz = strlen(pp);
2046 res = mandoc_realloc(res, ssz + sz + 1);
2047 memcpy(res + ssz, pp, sz);
2048 break;
2049 }
2050 /*
2051 * We bail out on bad escapes.
2052 * No need to warn: we already did so when
2053 * roff_res() was called.
2054 */
2055 sz = (int)(p - pp);
2056 res = mandoc_realloc(res, ssz + sz + 1);
2057 memcpy(res + ssz, pp, sz);
2058 ssz += sz;
2059 }
2060
2061 res[(int)ssz] = '\0';
2062 return(res);
2063 }
2064
2065 /*
2066 * Find out whether a line is a macro line or not.
2067 * If it is, adjust the current position and return one; if it isn't,
2068 * return zero and don't change the current position.
2069 * If the control character has been set with `.cc', then let that grain
2070 * precedence.
2071 * This is slighly contrary to groff, where using the non-breaking
2072 * control character when `cc' has been invoked will cause the
2073 * non-breaking macro contents to be printed verbatim.
2074 */
2075 int
2076 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
2077 {
2078 int pos;
2079
2080 pos = *ppos;
2081
2082 if (0 != r->control && cp[pos] == r->control)
2083 pos++;
2084 else if (0 != r->control)
2085 return(0);
2086 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
2087 pos += 2;
2088 else if ('.' == cp[pos] || '\'' == cp[pos])
2089 pos++;
2090 else
2091 return(0);
2092
2093 while (' ' == cp[pos] || '\t' == cp[pos])
2094 pos++;
2095
2096 *ppos = pos;
2097 return(1);
2098 }