]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
In roff_cond_sub(), make sure that the incorrect input sequence `\\}',
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.197 2014/03/07 18:37:37 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 #include "mandoc.h"
29 #include "libroff.h"
30 #include "libmandoc.h"
31
32 /* Maximum number of nested if-else conditionals. */
33 #define RSTACK_MAX 128
34
35 /* Maximum number of string expansions per line, to break infinite loops. */
36 #define EXPAND_LIMIT 1000
37
38 enum rofft {
39 ROFF_ad,
40 ROFF_am,
41 ROFF_ami,
42 ROFF_am1,
43 ROFF_as,
44 ROFF_cc,
45 ROFF_ce,
46 ROFF_de,
47 ROFF_dei,
48 ROFF_de1,
49 ROFF_ds,
50 ROFF_el,
51 ROFF_fam,
52 ROFF_hw,
53 ROFF_hy,
54 ROFF_ie,
55 ROFF_if,
56 ROFF_ig,
57 ROFF_it,
58 ROFF_ne,
59 ROFF_nh,
60 ROFF_nr,
61 ROFF_ns,
62 ROFF_ps,
63 ROFF_rm,
64 ROFF_so,
65 ROFF_ta,
66 ROFF_tr,
67 ROFF_Dd,
68 ROFF_TH,
69 ROFF_TS,
70 ROFF_TE,
71 ROFF_T_,
72 ROFF_EQ,
73 ROFF_EN,
74 ROFF_cblock,
75 ROFF_USERDEF,
76 ROFF_MAX
77 };
78
79 enum roffrule {
80 ROFFRULE_DENY,
81 ROFFRULE_ALLOW
82 };
83
84 /*
85 * An incredibly-simple string buffer.
86 */
87 struct roffstr {
88 char *p; /* nil-terminated buffer */
89 size_t sz; /* saved strlen(p) */
90 };
91
92 /*
93 * A key-value roffstr pair as part of a singly-linked list.
94 */
95 struct roffkv {
96 struct roffstr key;
97 struct roffstr val;
98 struct roffkv *next; /* next in list */
99 };
100
101 /*
102 * A single number register as part of a singly-linked list.
103 */
104 struct roffreg {
105 struct roffstr key;
106 int val;
107 struct roffreg *next;
108 };
109
110 struct roff {
111 enum mparset parsetype; /* requested parse type */
112 struct mparse *parse; /* parse point */
113 int quick; /* skip standard macro deletion */
114 struct roffnode *last; /* leaf of stack */
115 enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */
116 char control; /* control character */
117 int rstackpos; /* position in rstack */
118 struct roffreg *regtab; /* number registers */
119 struct roffkv *strtab; /* user-defined strings & macros */
120 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
121 struct roffstr *xtab; /* single-byte trans table (`tr') */
122 const char *current_string; /* value of last called user macro */
123 struct tbl_node *first_tbl; /* first table parsed */
124 struct tbl_node *last_tbl; /* last table parsed */
125 struct tbl_node *tbl; /* current table being parsed */
126 struct eqn_node *last_eqn; /* last equation parsed */
127 struct eqn_node *first_eqn; /* first equation parsed */
128 struct eqn_node *eqn; /* current equation being parsed */
129 };
130
131 struct roffnode {
132 enum rofft tok; /* type of node */
133 struct roffnode *parent; /* up one in stack */
134 int line; /* parse line */
135 int col; /* parse col */
136 char *name; /* node name, e.g. macro name */
137 char *end; /* end-rules: custom token */
138 int endspan; /* end-rules: next-line or infty */
139 enum roffrule rule; /* current evaluation rule */
140 };
141
142 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
143 enum rofft tok, /* tok of macro */ \
144 char **bufp, /* input buffer */ \
145 size_t *szp, /* size of input buffer */ \
146 int ln, /* parse line */ \
147 int ppos, /* original pos in buffer */ \
148 int pos, /* current pos in buffer */ \
149 int *offs /* reset offset of buffer data */
150
151 typedef enum rofferr (*roffproc)(ROFF_ARGS);
152
153 struct roffmac {
154 const char *name; /* macro name */
155 roffproc proc; /* process new macro */
156 roffproc text; /* process as child text of macro */
157 roffproc sub; /* process as child of macro */
158 int flags;
159 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
160 struct roffmac *next;
161 };
162
163 struct predef {
164 const char *name; /* predefined input name */
165 const char *str; /* replacement symbol */
166 };
167
168 #define PREDEF(__name, __str) \
169 { (__name), (__str) },
170
171 static enum rofft roffhash_find(const char *, size_t);
172 static void roffhash_init(void);
173 static void roffnode_cleanscope(struct roff *);
174 static void roffnode_pop(struct roff *);
175 static void roffnode_push(struct roff *, enum rofft,
176 const char *, int, int);
177 static enum rofferr roff_block(ROFF_ARGS);
178 static enum rofferr roff_block_text(ROFF_ARGS);
179 static enum rofferr roff_block_sub(ROFF_ARGS);
180 static enum rofferr roff_cblock(ROFF_ARGS);
181 static enum rofferr roff_cc(ROFF_ARGS);
182 static void roff_ccond(struct roff *, int, int);
183 static enum rofferr roff_cond(ROFF_ARGS);
184 static enum rofferr roff_cond_text(ROFF_ARGS);
185 static enum rofferr roff_cond_sub(ROFF_ARGS);
186 static enum rofferr roff_ds(ROFF_ARGS);
187 static enum roffrule roff_evalcond(const char *, int *);
188 static void roff_free1(struct roff *);
189 static void roff_freereg(struct roffreg *);
190 static void roff_freestr(struct roffkv *);
191 static char *roff_getname(struct roff *, char **, int, int);
192 static int roff_getnum(const char *, int *, int *);
193 static int roff_getop(const char *, int *, char *);
194 static int roff_getregn(const struct roff *,
195 const char *, size_t);
196 static int roff_getregro(const char *name);
197 static const char *roff_getstrn(const struct roff *,
198 const char *, size_t);
199 static enum rofferr roff_it(ROFF_ARGS);
200 static enum rofferr roff_line_ignore(ROFF_ARGS);
201 static enum rofferr roff_nr(ROFF_ARGS);
202 static void roff_openeqn(struct roff *, const char *,
203 int, int, const char *);
204 static enum rofft roff_parse(struct roff *, const char *, int *);
205 static enum rofferr roff_parsetext(char **, size_t *, int, int *);
206 static enum rofferr roff_res(struct roff *,
207 char **, size_t *, int, int);
208 static enum rofferr roff_rm(ROFF_ARGS);
209 static void roff_setstr(struct roff *,
210 const char *, const char *, int);
211 static void roff_setstrn(struct roffkv **, const char *,
212 size_t, const char *, size_t, int);
213 static enum rofferr roff_so(ROFF_ARGS);
214 static enum rofferr roff_tr(ROFF_ARGS);
215 static enum rofferr roff_Dd(ROFF_ARGS);
216 static enum rofferr roff_TH(ROFF_ARGS);
217 static enum rofferr roff_TE(ROFF_ARGS);
218 static enum rofferr roff_TS(ROFF_ARGS);
219 static enum rofferr roff_EQ(ROFF_ARGS);
220 static enum rofferr roff_EN(ROFF_ARGS);
221 static enum rofferr roff_T_(ROFF_ARGS);
222 static enum rofferr roff_userdef(ROFF_ARGS);
223
224 /* See roffhash_find() */
225
226 #define ASCII_HI 126
227 #define ASCII_LO 33
228 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
229
230 static struct roffmac *hash[HASHWIDTH];
231
232 static struct roffmac roffs[ROFF_MAX] = {
233 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
234 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
235 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
236 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
237 { "as", roff_ds, NULL, NULL, 0, NULL },
238 { "cc", roff_cc, NULL, NULL, 0, NULL },
239 { "ce", roff_line_ignore, NULL, NULL, 0, NULL },
240 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
241 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
242 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
243 { "ds", roff_ds, NULL, NULL, 0, NULL },
244 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
245 { "fam", roff_line_ignore, NULL, NULL, 0, NULL },
246 { "hw", roff_line_ignore, NULL, NULL, 0, NULL },
247 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
248 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
249 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
250 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
251 { "it", roff_it, NULL, NULL, 0, NULL },
252 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
253 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
254 { "nr", roff_nr, NULL, NULL, 0, NULL },
255 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
256 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
257 { "rm", roff_rm, NULL, NULL, 0, NULL },
258 { "so", roff_so, NULL, NULL, 0, NULL },
259 { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
260 { "tr", roff_tr, NULL, NULL, 0, NULL },
261 { "Dd", roff_Dd, NULL, NULL, 0, NULL },
262 { "TH", roff_TH, NULL, NULL, 0, NULL },
263 { "TS", roff_TS, NULL, NULL, 0, NULL },
264 { "TE", roff_TE, NULL, NULL, 0, NULL },
265 { "T&", roff_T_, NULL, NULL, 0, NULL },
266 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
267 { "EN", roff_EN, NULL, NULL, 0, NULL },
268 { ".", roff_cblock, NULL, NULL, 0, NULL },
269 { NULL, roff_userdef, NULL, NULL, 0, NULL },
270 };
271
272 const char *const __mdoc_reserved[] = {
273 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
274 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
275 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
276 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
277 "Ds", "Dt", "Dv", "Dx", "D1",
278 "Ec", "Ed", "Ef", "Ek", "El", "Em", "em",
279 "En", "Eo", "Eq", "Er", "Es", "Ev", "Ex",
280 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
281 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp", "LP",
282 "Me", "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
283 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
284 "Pa", "Pc", "Pf", "Po", "Pp", "PP", "pp", "Pq",
285 "Qc", "Ql", "Qo", "Qq", "Or", "Rd", "Re", "Rs", "Rv",
286 "Sc", "Sf", "Sh", "SH", "Sm", "So", "Sq",
287 "Ss", "St", "Sx", "Sy",
288 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
289 "%A", "%B", "%D", "%I", "%J", "%N", "%O",
290 "%P", "%Q", "%R", "%T", "%U", "%V",
291 NULL
292 };
293
294 const char *const __man_reserved[] = {
295 "AT", "B", "BI", "BR", "BT", "DE", "DS", "DT",
296 "EE", "EN", "EQ", "EX", "HF", "HP", "I", "IB", "IP", "IR",
297 "LP", "ME", "MT", "OP", "P", "PD", "PP", "PT",
298 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS", "SY",
299 "TE", "TH", "TP", "TQ", "TS", "T&", "UC", "UE", "UR", "YS",
300 NULL
301 };
302
303 /* Array of injected predefined strings. */
304 #define PREDEFS_MAX 38
305 static const struct predef predefs[PREDEFS_MAX] = {
306 #include "predefs.in"
307 };
308
309 /* See roffhash_find() */
310 #define ROFF_HASH(p) (p[0] - ASCII_LO)
311
312 static int roffit_lines; /* number of lines to delay */
313 static char *roffit_macro; /* nil-terminated macro line */
314
315 static void
316 roffhash_init(void)
317 {
318 struct roffmac *n;
319 int buc, i;
320
321 for (i = 0; i < (int)ROFF_USERDEF; i++) {
322 assert(roffs[i].name[0] >= ASCII_LO);
323 assert(roffs[i].name[0] <= ASCII_HI);
324
325 buc = ROFF_HASH(roffs[i].name);
326
327 if (NULL != (n = hash[buc])) {
328 for ( ; n->next; n = n->next)
329 /* Do nothing. */ ;
330 n->next = &roffs[i];
331 } else
332 hash[buc] = &roffs[i];
333 }
334 }
335
336 /*
337 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
338 * the nil-terminated string name could be found.
339 */
340 static enum rofft
341 roffhash_find(const char *p, size_t s)
342 {
343 int buc;
344 struct roffmac *n;
345
346 /*
347 * libroff has an extremely simple hashtable, for the time
348 * being, which simply keys on the first character, which must
349 * be printable, then walks a chain. It works well enough until
350 * optimised.
351 */
352
353 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
354 return(ROFF_MAX);
355
356 buc = ROFF_HASH(p);
357
358 if (NULL == (n = hash[buc]))
359 return(ROFF_MAX);
360 for ( ; n; n = n->next)
361 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
362 return((enum rofft)(n - roffs));
363
364 return(ROFF_MAX);
365 }
366
367
368 /*
369 * Pop the current node off of the stack of roff instructions currently
370 * pending.
371 */
372 static void
373 roffnode_pop(struct roff *r)
374 {
375 struct roffnode *p;
376
377 assert(r->last);
378 p = r->last;
379
380 r->last = r->last->parent;
381 free(p->name);
382 free(p->end);
383 free(p);
384 }
385
386
387 /*
388 * Push a roff node onto the instruction stack. This must later be
389 * removed with roffnode_pop().
390 */
391 static void
392 roffnode_push(struct roff *r, enum rofft tok, const char *name,
393 int line, int col)
394 {
395 struct roffnode *p;
396
397 p = mandoc_calloc(1, sizeof(struct roffnode));
398 p->tok = tok;
399 if (name)
400 p->name = mandoc_strdup(name);
401 p->parent = r->last;
402 p->line = line;
403 p->col = col;
404 p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
405
406 r->last = p;
407 }
408
409
410 static void
411 roff_free1(struct roff *r)
412 {
413 struct tbl_node *tbl;
414 struct eqn_node *e;
415 int i;
416
417 while (NULL != (tbl = r->first_tbl)) {
418 r->first_tbl = tbl->next;
419 tbl_free(tbl);
420 }
421
422 r->first_tbl = r->last_tbl = r->tbl = NULL;
423
424 while (NULL != (e = r->first_eqn)) {
425 r->first_eqn = e->next;
426 eqn_free(e);
427 }
428
429 r->first_eqn = r->last_eqn = r->eqn = NULL;
430
431 while (r->last)
432 roffnode_pop(r);
433
434 roff_freestr(r->strtab);
435 roff_freestr(r->xmbtab);
436
437 r->strtab = r->xmbtab = NULL;
438
439 roff_freereg(r->regtab);
440
441 r->regtab = NULL;
442
443 if (r->xtab)
444 for (i = 0; i < 128; i++)
445 free(r->xtab[i].p);
446
447 free(r->xtab);
448 r->xtab = NULL;
449 }
450
451 void
452 roff_reset(struct roff *r)
453 {
454
455 roff_free1(r);
456 r->control = 0;
457 }
458
459
460 void
461 roff_free(struct roff *r)
462 {
463
464 roff_free1(r);
465 free(r);
466 }
467
468
469 struct roff *
470 roff_alloc(enum mparset type, struct mparse *parse, int quick)
471 {
472 struct roff *r;
473
474 r = mandoc_calloc(1, sizeof(struct roff));
475 r->parsetype = type;
476 r->parse = parse;
477 r->quick = quick;
478 r->rstackpos = -1;
479
480 roffhash_init();
481
482 return(r);
483 }
484
485 /*
486 * In the current line, expand user-defined strings ("\*")
487 * and references to number registers ("\n").
488 * Also check the syntax of other escape sequences.
489 */
490 static enum rofferr
491 roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
492 {
493 char ubuf[12]; /* buffer to print the number */
494 const char *stesc; /* start of an escape sequence ('\\') */
495 const char *stnam; /* start of the name, after "[(*" */
496 const char *cp; /* end of the name, e.g. before ']' */
497 const char *res; /* the string to be substituted */
498 char *nbuf; /* new buffer to copy bufp to */
499 size_t nsz; /* size of the new buffer */
500 size_t maxl; /* expected length of the escape name */
501 size_t naml; /* actual length of the escape name */
502 int expand_count; /* to avoid infinite loops */
503
504 expand_count = 0;
505
506 again:
507 cp = *bufp + pos;
508 while (NULL != (cp = strchr(cp, '\\'))) {
509 stesc = cp++;
510
511 /*
512 * The second character must be an asterisk or an n.
513 * If it isn't, skip it anyway: It is escaped,
514 * so it can't start another escape sequence.
515 */
516
517 if ('\0' == *cp)
518 return(ROFF_CONT);
519
520 switch (*cp) {
521 case ('*'):
522 res = NULL;
523 break;
524 case ('n'):
525 res = ubuf;
526 break;
527 default:
528 if (ESCAPE_ERROR != mandoc_escape(&cp, NULL, NULL))
529 continue;
530 mandoc_msg
531 (MANDOCERR_BADESCAPE, r->parse,
532 ln, (int)(stesc - *bufp), NULL);
533 return(ROFF_CONT);
534 }
535
536 cp++;
537
538 /*
539 * The third character decides the length
540 * of the name of the string or register.
541 * Save a pointer to the name.
542 */
543
544 switch (*cp) {
545 case ('\0'):
546 return(ROFF_CONT);
547 case ('('):
548 cp++;
549 maxl = 2;
550 break;
551 case ('['):
552 cp++;
553 maxl = 0;
554 break;
555 default:
556 maxl = 1;
557 break;
558 }
559 stnam = cp;
560
561 /* Advance to the end of the name. */
562
563 for (naml = 0; 0 == maxl || naml < maxl; naml++, cp++) {
564 if ('\0' == *cp) {
565 mandoc_msg
566 (MANDOCERR_BADESCAPE,
567 r->parse, ln,
568 (int)(stesc - *bufp), NULL);
569 return(ROFF_CONT);
570 }
571 if (0 == maxl && ']' == *cp)
572 break;
573 }
574
575 /*
576 * Retrieve the replacement string; if it is
577 * undefined, resume searching for escapes.
578 */
579
580 if (NULL == res)
581 res = roff_getstrn(r, stnam, naml);
582 else
583 snprintf(ubuf, sizeof(ubuf), "%d",
584 roff_getregn(r, stnam, naml));
585
586 if (NULL == res) {
587 mandoc_msg
588 (MANDOCERR_BADESCAPE, r->parse,
589 ln, (int)(stesc - *bufp), NULL);
590 res = "";
591 }
592
593 /* Replace the escape sequence by the string. */
594
595 pos = stesc - *bufp;
596
597 nsz = *szp + strlen(res) + 1;
598 nbuf = mandoc_malloc(nsz);
599
600 strlcpy(nbuf, *bufp, (size_t)(stesc - *bufp + 1));
601 strlcat(nbuf, res, nsz);
602 strlcat(nbuf, cp + (maxl ? 0 : 1), nsz);
603
604 free(*bufp);
605
606 *bufp = nbuf;
607 *szp = nsz;
608
609 if (EXPAND_LIMIT >= ++expand_count)
610 goto again;
611
612 /* Just leave the string unexpanded. */
613 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, ln, pos, NULL);
614 return(ROFF_IGN);
615 }
616 return(ROFF_CONT);
617 }
618
619 /*
620 * Process text streams:
621 * Convert all breakable hyphens into ASCII_HYPH.
622 * Decrement and spring input line trap.
623 */
624 static enum rofferr
625 roff_parsetext(char **bufp, size_t *szp, int pos, int *offs)
626 {
627 size_t sz;
628 const char *start;
629 char *p;
630 int isz;
631 enum mandoc_esc esc;
632
633 start = p = *bufp + pos;
634
635 while ('\0' != *p) {
636 sz = strcspn(p, "-\\");
637 p += sz;
638
639 if ('\0' == *p)
640 break;
641
642 if ('\\' == *p) {
643 /* Skip over escapes. */
644 p++;
645 esc = mandoc_escape((const char **)&p, NULL, NULL);
646 if (ESCAPE_ERROR == esc)
647 break;
648 continue;
649 } else if (p == start) {
650 p++;
651 continue;
652 }
653
654 if (isalpha((unsigned char)p[-1]) &&
655 isalpha((unsigned char)p[1]))
656 *p = ASCII_HYPH;
657 p++;
658 }
659
660 /* Spring the input line trap. */
661 if (1 == roffit_lines) {
662 isz = asprintf(&p, "%s\n.%s", *bufp, roffit_macro);
663 if (-1 == isz) {
664 perror(NULL);
665 exit((int)MANDOCLEVEL_SYSERR);
666 }
667 free(*bufp);
668 *bufp = p;
669 *szp = isz + 1;
670 *offs = 0;
671 free(roffit_macro);
672 roffit_lines = 0;
673 return(ROFF_REPARSE);
674 } else if (1 < roffit_lines)
675 --roffit_lines;
676 return(ROFF_CONT);
677 }
678
679 enum rofferr
680 roff_parseln(struct roff *r, int ln, char **bufp,
681 size_t *szp, int pos, int *offs)
682 {
683 enum rofft t;
684 enum rofferr e;
685 int ppos, ctl;
686
687 /*
688 * Run the reserved-word filter only if we have some reserved
689 * words to fill in.
690 */
691
692 e = roff_res(r, bufp, szp, ln, pos);
693 if (ROFF_IGN == e)
694 return(e);
695 assert(ROFF_CONT == e);
696
697 ppos = pos;
698 ctl = roff_getcontrol(r, *bufp, &pos);
699
700 /*
701 * First, if a scope is open and we're not a macro, pass the
702 * text through the macro's filter. If a scope isn't open and
703 * we're not a macro, just let it through.
704 * Finally, if there's an equation scope open, divert it into it
705 * no matter our state.
706 */
707
708 if (r->last && ! ctl) {
709 t = r->last->tok;
710 assert(roffs[t].text);
711 e = (*roffs[t].text)
712 (r, t, bufp, szp, ln, pos, pos, offs);
713 assert(ROFF_IGN == e || ROFF_CONT == e);
714 if (ROFF_CONT != e)
715 return(e);
716 }
717 if (r->eqn)
718 return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));
719 if ( ! ctl) {
720 if (r->tbl)
721 return(tbl_read(r->tbl, ln, *bufp, pos));
722 return(roff_parsetext(bufp, szp, pos, offs));
723 }
724
725 /*
726 * If a scope is open, go to the child handler for that macro,
727 * as it may want to preprocess before doing anything with it.
728 * Don't do so if an equation is open.
729 */
730
731 if (r->last) {
732 t = r->last->tok;
733 assert(roffs[t].sub);
734 return((*roffs[t].sub)
735 (r, t, bufp, szp,
736 ln, ppos, pos, offs));
737 }
738
739 /*
740 * Lastly, as we've no scope open, try to look up and execute
741 * the new macro. If no macro is found, simply return and let
742 * the compilers handle it.
743 */
744
745 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
746 return(ROFF_CONT);
747
748 assert(roffs[t].proc);
749 return((*roffs[t].proc)
750 (r, t, bufp, szp,
751 ln, ppos, pos, offs));
752 }
753
754
755 void
756 roff_endparse(struct roff *r)
757 {
758
759 if (r->last)
760 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
761 r->last->line, r->last->col, NULL);
762
763 if (r->eqn) {
764 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
765 r->eqn->eqn.ln, r->eqn->eqn.pos, NULL);
766 eqn_end(&r->eqn);
767 }
768
769 if (r->tbl) {
770 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
771 r->tbl->line, r->tbl->pos, NULL);
772 tbl_end(&r->tbl);
773 }
774 }
775
776 /*
777 * Parse a roff node's type from the input buffer. This must be in the
778 * form of ".foo xxx" in the usual way.
779 */
780 static enum rofft
781 roff_parse(struct roff *r, const char *buf, int *pos)
782 {
783 const char *mac;
784 size_t maclen;
785 enum rofft t;
786
787 if ('\0' == buf[*pos] || '"' == buf[*pos] ||
788 '\t' == buf[*pos] || ' ' == buf[*pos])
789 return(ROFF_MAX);
790
791 /* We stop the macro parse at an escape, tab, space, or nil. */
792
793 mac = buf + *pos;
794 maclen = strcspn(mac, " \\\t\0");
795
796 t = (r->current_string = roff_getstrn(r, mac, maclen))
797 ? ROFF_USERDEF : roffhash_find(mac, maclen);
798
799 *pos += (int)maclen;
800
801 while (buf[*pos] && ' ' == buf[*pos])
802 (*pos)++;
803
804 return(t);
805 }
806
807 /* ARGSUSED */
808 static enum rofferr
809 roff_cblock(ROFF_ARGS)
810 {
811
812 /*
813 * A block-close `..' should only be invoked as a child of an
814 * ignore macro, otherwise raise a warning and just ignore it.
815 */
816
817 if (NULL == r->last) {
818 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
819 return(ROFF_IGN);
820 }
821
822 switch (r->last->tok) {
823 case (ROFF_am):
824 /* FALLTHROUGH */
825 case (ROFF_ami):
826 /* FALLTHROUGH */
827 case (ROFF_am1):
828 /* FALLTHROUGH */
829 case (ROFF_de):
830 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
831 /* FALLTHROUGH */
832 case (ROFF_dei):
833 /* FALLTHROUGH */
834 case (ROFF_ig):
835 break;
836 default:
837 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
838 return(ROFF_IGN);
839 }
840
841 if ((*bufp)[pos])
842 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
843
844 roffnode_pop(r);
845 roffnode_cleanscope(r);
846 return(ROFF_IGN);
847
848 }
849
850
851 static void
852 roffnode_cleanscope(struct roff *r)
853 {
854
855 while (r->last) {
856 if (--r->last->endspan != 0)
857 break;
858 roffnode_pop(r);
859 }
860 }
861
862
863 static void
864 roff_ccond(struct roff *r, int ln, int ppos)
865 {
866
867 if (NULL == r->last) {
868 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
869 return;
870 }
871
872 switch (r->last->tok) {
873 case (ROFF_el):
874 /* FALLTHROUGH */
875 case (ROFF_ie):
876 /* FALLTHROUGH */
877 case (ROFF_if):
878 break;
879 default:
880 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
881 return;
882 }
883
884 if (r->last->endspan > -1) {
885 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
886 return;
887 }
888
889 roffnode_pop(r);
890 roffnode_cleanscope(r);
891 return;
892 }
893
894
895 /* ARGSUSED */
896 static enum rofferr
897 roff_block(ROFF_ARGS)
898 {
899 int sv;
900 size_t sz;
901 char *name;
902
903 name = NULL;
904
905 if (ROFF_ig != tok) {
906 if ('\0' == (*bufp)[pos]) {
907 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
908 return(ROFF_IGN);
909 }
910
911 /*
912 * Re-write `de1', since we don't really care about
913 * groff's strange compatibility mode, into `de'.
914 */
915
916 if (ROFF_de1 == tok)
917 tok = ROFF_de;
918 if (ROFF_de == tok)
919 name = *bufp + pos;
920 else
921 mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos,
922 roffs[tok].name);
923
924 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
925 pos++;
926
927 while (isspace((unsigned char)(*bufp)[pos]))
928 (*bufp)[pos++] = '\0';
929 }
930
931 roffnode_push(r, tok, name, ln, ppos);
932
933 /*
934 * At the beginning of a `de' macro, clear the existing string
935 * with the same name, if there is one. New content will be
936 * appended from roff_block_text() in multiline mode.
937 */
938
939 if (ROFF_de == tok)
940 roff_setstr(r, name, "", 0);
941
942 if ('\0' == (*bufp)[pos])
943 return(ROFF_IGN);
944
945 /* If present, process the custom end-of-line marker. */
946
947 sv = pos;
948 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
949 pos++;
950
951 /*
952 * Note: groff does NOT like escape characters in the input.
953 * Instead of detecting this, we're just going to let it fly and
954 * to hell with it.
955 */
956
957 assert(pos > sv);
958 sz = (size_t)(pos - sv);
959
960 if (1 == sz && '.' == (*bufp)[sv])
961 return(ROFF_IGN);
962
963 r->last->end = mandoc_malloc(sz + 1);
964
965 memcpy(r->last->end, *bufp + sv, sz);
966 r->last->end[(int)sz] = '\0';
967
968 if ((*bufp)[pos])
969 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
970
971 return(ROFF_IGN);
972 }
973
974
975 /* ARGSUSED */
976 static enum rofferr
977 roff_block_sub(ROFF_ARGS)
978 {
979 enum rofft t;
980 int i, j;
981
982 /*
983 * First check whether a custom macro exists at this level. If
984 * it does, then check against it. This is some of groff's
985 * stranger behaviours. If we encountered a custom end-scope
986 * tag and that tag also happens to be a "real" macro, then we
987 * need to try interpreting it again as a real macro. If it's
988 * not, then return ignore. Else continue.
989 */
990
991 if (r->last->end) {
992 for (i = pos, j = 0; r->last->end[j]; j++, i++)
993 if ((*bufp)[i] != r->last->end[j])
994 break;
995
996 if ('\0' == r->last->end[j] &&
997 ('\0' == (*bufp)[i] ||
998 ' ' == (*bufp)[i] ||
999 '\t' == (*bufp)[i])) {
1000 roffnode_pop(r);
1001 roffnode_cleanscope(r);
1002
1003 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
1004 i++;
1005
1006 pos = i;
1007 if (ROFF_MAX != roff_parse(r, *bufp, &pos))
1008 return(ROFF_RERUN);
1009 return(ROFF_IGN);
1010 }
1011 }
1012
1013 /*
1014 * If we have no custom end-query or lookup failed, then try
1015 * pulling it out of the hashtable.
1016 */
1017
1018 t = roff_parse(r, *bufp, &pos);
1019
1020 /*
1021 * Macros other than block-end are only significant
1022 * in `de' blocks; elsewhere, simply throw them away.
1023 */
1024 if (ROFF_cblock != t) {
1025 if (ROFF_de == tok)
1026 roff_setstr(r, r->last->name, *bufp + ppos, 2);
1027 return(ROFF_IGN);
1028 }
1029
1030 assert(roffs[t].proc);
1031 return((*roffs[t].proc)(r, t, bufp, szp,
1032 ln, ppos, pos, offs));
1033 }
1034
1035
1036 /* ARGSUSED */
1037 static enum rofferr
1038 roff_block_text(ROFF_ARGS)
1039 {
1040
1041 if (ROFF_de == tok)
1042 roff_setstr(r, r->last->name, *bufp + pos, 2);
1043
1044 return(ROFF_IGN);
1045 }
1046
1047
1048 /* ARGSUSED */
1049 static enum rofferr
1050 roff_cond_sub(ROFF_ARGS)
1051 {
1052 enum rofft t;
1053 enum roffrule rr;
1054 char *ep;
1055
1056 rr = r->last->rule;
1057 roffnode_cleanscope(r);
1058 t = roff_parse(r, *bufp, &pos);
1059
1060 /*
1061 * Fully handle known macros when they are structurally
1062 * required or when the conditional evaluated to true.
1063 */
1064
1065 if ((ROFF_MAX != t) &&
1066 (ROFFRULE_ALLOW == rr ||
1067 ROFFMAC_STRUCT & roffs[t].flags)) {
1068 assert(roffs[t].proc);
1069 return((*roffs[t].proc)(r, t, bufp, szp,
1070 ln, ppos, pos, offs));
1071 }
1072
1073 /*
1074 * If `\}' occurs on a macro line without a preceding macro,
1075 * drop the line completely.
1076 */
1077
1078 ep = *bufp + pos;
1079 if ('\\' == ep[0] && '}' == ep[1])
1080 rr = ROFFRULE_DENY;
1081
1082 /* Always check for the closing delimiter `\}'. */
1083
1084 while (NULL != (ep = strchr(ep, '\\'))) {
1085 if ('}' == *(++ep)) {
1086 *ep = '&';
1087 roff_ccond(r, ln, ep - *bufp - 1);
1088 }
1089 ++ep;
1090 }
1091 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1092 }
1093
1094 /* ARGSUSED */
1095 static enum rofferr
1096 roff_cond_text(ROFF_ARGS)
1097 {
1098 char *ep;
1099 enum roffrule rr;
1100
1101 rr = r->last->rule;
1102 roffnode_cleanscope(r);
1103
1104 ep = *bufp + pos;
1105 while (NULL != (ep = strchr(ep, '\\'))) {
1106 if ('}' == *(++ep)) {
1107 *ep = '&';
1108 roff_ccond(r, ln, ep - *bufp - 1);
1109 }
1110 ++ep;
1111 }
1112 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1113 }
1114
1115 static int
1116 roff_getnum(const char *v, int *pos, int *res)
1117 {
1118 int p, n;
1119
1120 p = *pos;
1121 n = v[p] == '-';
1122 if (n)
1123 p++;
1124
1125 for (*res = 0; isdigit((unsigned char)v[p]); p++)
1126 *res += 10 * *res + v[p] - '0';
1127 if (p == *pos + n)
1128 return 0;
1129
1130 if (n)
1131 *res = -*res;
1132
1133 *pos = p;
1134 return 1;
1135 }
1136
1137 static int
1138 roff_getop(const char *v, int *pos, char *res)
1139 {
1140 int e;
1141
1142 *res = v[*pos];
1143 e = v[*pos + 1] == '=';
1144
1145 switch (*res) {
1146 case '=':
1147 break;
1148 case '>':
1149 if (e)
1150 *res = 'g';
1151 break;
1152 case '<':
1153 if (e)
1154 *res = 'l';
1155 break;
1156 default:
1157 return(0);
1158 }
1159
1160 *pos += 1 + e;
1161
1162 return(*res);
1163 }
1164
1165 static enum roffrule
1166 roff_evalcond(const char *v, int *pos)
1167 {
1168 int not, lh, rh;
1169 char op;
1170
1171 switch (v[*pos]) {
1172 case ('n'):
1173 (*pos)++;
1174 return(ROFFRULE_ALLOW);
1175 case ('e'):
1176 /* FALLTHROUGH */
1177 case ('o'):
1178 /* FALLTHROUGH */
1179 case ('t'):
1180 (*pos)++;
1181 return(ROFFRULE_DENY);
1182 case ('!'):
1183 (*pos)++;
1184 not = 1;
1185 break;
1186 default:
1187 not = 0;
1188 break;
1189 }
1190
1191 if (!roff_getnum(v, pos, &lh))
1192 return ROFFRULE_DENY;
1193 if (!roff_getop(v, pos, &op)) {
1194 if (lh < 0)
1195 lh = 0;
1196 goto out;
1197 }
1198 if (!roff_getnum(v, pos, &rh))
1199 return ROFFRULE_DENY;
1200 switch (op) {
1201 case 'g':
1202 lh = lh >= rh;
1203 break;
1204 case 'l':
1205 lh = lh <= rh;
1206 break;
1207 case '=':
1208 lh = lh == rh;
1209 break;
1210 case '>':
1211 lh = lh > rh;
1212 break;
1213 case '<':
1214 lh = lh < rh;
1215 break;
1216 default:
1217 return ROFFRULE_DENY;
1218 }
1219 out:
1220 if (not)
1221 lh = !lh;
1222 return lh ? ROFFRULE_ALLOW : ROFFRULE_DENY;
1223 }
1224
1225 /* ARGSUSED */
1226 static enum rofferr
1227 roff_line_ignore(ROFF_ARGS)
1228 {
1229
1230 return(ROFF_IGN);
1231 }
1232
1233 /* ARGSUSED */
1234 static enum rofferr
1235 roff_cond(ROFF_ARGS)
1236 {
1237
1238 roffnode_push(r, tok, NULL, ln, ppos);
1239
1240 /*
1241 * An `.el' has no conditional body: it will consume the value
1242 * of the current rstack entry set in prior `ie' calls or
1243 * defaults to DENY.
1244 *
1245 * If we're not an `el', however, then evaluate the conditional.
1246 */
1247
1248 r->last->rule = ROFF_el == tok ?
1249 (r->rstackpos < 0 ?
1250 ROFFRULE_DENY : r->rstack[r->rstackpos--]) :
1251 roff_evalcond(*bufp, &pos);
1252
1253 /*
1254 * An if-else will put the NEGATION of the current evaluated
1255 * conditional into the stack of rules.
1256 */
1257
1258 if (ROFF_ie == tok) {
1259 if (r->rstackpos == RSTACK_MAX - 1) {
1260 mandoc_msg(MANDOCERR_MEM,
1261 r->parse, ln, ppos, NULL);
1262 return(ROFF_ERR);
1263 }
1264 r->rstack[++r->rstackpos] =
1265 ROFFRULE_DENY == r->last->rule ?
1266 ROFFRULE_ALLOW : ROFFRULE_DENY;
1267 }
1268
1269 /* If the parent has false as its rule, then so do we. */
1270
1271 if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule)
1272 r->last->rule = ROFFRULE_DENY;
1273
1274 /*
1275 * Determine scope.
1276 * If there is nothing on the line after the conditional,
1277 * not even whitespace, use next-line scope.
1278 */
1279
1280 if ('\0' == (*bufp)[pos]) {
1281 r->last->endspan = 2;
1282 goto out;
1283 }
1284
1285 while (' ' == (*bufp)[pos])
1286 pos++;
1287
1288 /* An opening brace requests multiline scope. */
1289
1290 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1291 r->last->endspan = -1;
1292 pos += 2;
1293 goto out;
1294 }
1295
1296 /*
1297 * Anything else following the conditional causes
1298 * single-line scope. Warn if the scope contains
1299 * nothing but trailing whitespace.
1300 */
1301
1302 if ('\0' == (*bufp)[pos])
1303 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
1304
1305 r->last->endspan = 1;
1306
1307 out:
1308 *offs = pos;
1309 return(ROFF_RERUN);
1310 }
1311
1312
1313 /* ARGSUSED */
1314 static enum rofferr
1315 roff_ds(ROFF_ARGS)
1316 {
1317 char *name, *string;
1318
1319 /*
1320 * A symbol is named by the first word following the macro
1321 * invocation up to a space. Its value is anything after the
1322 * name's trailing whitespace and optional double-quote. Thus,
1323 *
1324 * [.ds foo "bar " ]
1325 *
1326 * will have `bar " ' as its value.
1327 */
1328
1329 string = *bufp + pos;
1330 name = roff_getname(r, &string, ln, pos);
1331 if ('\0' == *name)
1332 return(ROFF_IGN);
1333
1334 /* Read past initial double-quote. */
1335 if ('"' == *string)
1336 string++;
1337
1338 /* The rest is the value. */
1339 roff_setstr(r, name, string, ROFF_as == tok);
1340 return(ROFF_IGN);
1341 }
1342
1343 void
1344 roff_setreg(struct roff *r, const char *name, int val, char sign)
1345 {
1346 struct roffreg *reg;
1347
1348 /* Search for an existing register with the same name. */
1349 reg = r->regtab;
1350
1351 while (reg && strcmp(name, reg->key.p))
1352 reg = reg->next;
1353
1354 if (NULL == reg) {
1355 /* Create a new register. */
1356 reg = mandoc_malloc(sizeof(struct roffreg));
1357 reg->key.p = mandoc_strdup(name);
1358 reg->key.sz = strlen(name);
1359 reg->val = 0;
1360 reg->next = r->regtab;
1361 r->regtab = reg;
1362 }
1363
1364 if ('+' == sign)
1365 reg->val += val;
1366 else if ('-' == sign)
1367 reg->val -= val;
1368 else
1369 reg->val = val;
1370 }
1371
1372 /*
1373 * Handle some predefined read-only number registers.
1374 * For now, return -1 if the requested register is not predefined;
1375 * in case a predefined read-only register having the value -1
1376 * were to turn up, another special value would have to be chosen.
1377 */
1378 static int
1379 roff_getregro(const char *name)
1380 {
1381
1382 switch (*name) {
1383 case ('A'): /* ASCII approximation mode is always off. */
1384 return(0);
1385 case ('g'): /* Groff compatibility mode is always on. */
1386 return(1);
1387 case ('H'): /* Fixed horizontal resolution. */
1388 return (24);
1389 case ('j'): /* Always adjust left margin only. */
1390 return(0);
1391 case ('T'): /* Some output device is always defined. */
1392 return(1);
1393 case ('V'): /* Fixed vertical resolution. */
1394 return (40);
1395 default:
1396 return (-1);
1397 }
1398 }
1399
1400 int
1401 roff_getreg(const struct roff *r, const char *name)
1402 {
1403 struct roffreg *reg;
1404 int val;
1405
1406 if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
1407 val = roff_getregro(name + 1);
1408 if (-1 != val)
1409 return (val);
1410 }
1411
1412 for (reg = r->regtab; reg; reg = reg->next)
1413 if (0 == strcmp(name, reg->key.p))
1414 return(reg->val);
1415
1416 return(0);
1417 }
1418
1419 static int
1420 roff_getregn(const struct roff *r, const char *name, size_t len)
1421 {
1422 struct roffreg *reg;
1423 int val;
1424
1425 if ('.' == name[0] && 2 == len) {
1426 val = roff_getregro(name + 1);
1427 if (-1 != val)
1428 return (val);
1429 }
1430
1431 for (reg = r->regtab; reg; reg = reg->next)
1432 if (len == reg->key.sz &&
1433 0 == strncmp(name, reg->key.p, len))
1434 return(reg->val);
1435
1436 return(0);
1437 }
1438
1439 static void
1440 roff_freereg(struct roffreg *reg)
1441 {
1442 struct roffreg *old_reg;
1443
1444 while (NULL != reg) {
1445 free(reg->key.p);
1446 old_reg = reg;
1447 reg = reg->next;
1448 free(old_reg);
1449 }
1450 }
1451
1452 /* ARGSUSED */
1453 static enum rofferr
1454 roff_nr(ROFF_ARGS)
1455 {
1456 const char *key;
1457 char *val;
1458 size_t sz;
1459 int iv;
1460 char sign;
1461
1462 val = *bufp + pos;
1463 key = roff_getname(r, &val, ln, pos);
1464
1465 sign = *val;
1466 if ('+' == sign || '-' == sign)
1467 val++;
1468
1469 sz = strspn(val, "0123456789");
1470 iv = sz ? mandoc_strntoi(val, sz, 10) : 0;
1471
1472 roff_setreg(r, key, iv, sign);
1473
1474 return(ROFF_IGN);
1475 }
1476
1477 /* ARGSUSED */
1478 static enum rofferr
1479 roff_rm(ROFF_ARGS)
1480 {
1481 const char *name;
1482 char *cp;
1483
1484 cp = *bufp + pos;
1485 while ('\0' != *cp) {
1486 name = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1487 if ('\0' != *name)
1488 roff_setstr(r, name, NULL, 0);
1489 }
1490 return(ROFF_IGN);
1491 }
1492
1493 /* ARGSUSED */
1494 static enum rofferr
1495 roff_it(ROFF_ARGS)
1496 {
1497 char *cp;
1498 size_t len;
1499 int iv;
1500
1501 /* Parse the number of lines. */
1502 cp = *bufp + pos;
1503 len = strcspn(cp, " \t");
1504 cp[len] = '\0';
1505 if ((iv = mandoc_strntoi(cp, len, 10)) <= 0) {
1506 mandoc_msg(MANDOCERR_NUMERIC, r->parse,
1507 ln, ppos, *bufp + 1);
1508 return(ROFF_IGN);
1509 }
1510 cp += len + 1;
1511
1512 /* Arm the input line trap. */
1513 roffit_lines = iv;
1514 roffit_macro = mandoc_strdup(cp);
1515 return(ROFF_IGN);
1516 }
1517
1518 /* ARGSUSED */
1519 static enum rofferr
1520 roff_Dd(ROFF_ARGS)
1521 {
1522 const char *const *cp;
1523
1524 if (0 == r->quick && MPARSE_MDOC != r->parsetype)
1525 for (cp = __mdoc_reserved; *cp; cp++)
1526 roff_setstr(r, *cp, NULL, 0);
1527
1528 return(ROFF_CONT);
1529 }
1530
1531 /* ARGSUSED */
1532 static enum rofferr
1533 roff_TH(ROFF_ARGS)
1534 {
1535 const char *const *cp;
1536
1537 if (0 == r->quick && MPARSE_MDOC != r->parsetype)
1538 for (cp = __man_reserved; *cp; cp++)
1539 roff_setstr(r, *cp, NULL, 0);
1540
1541 return(ROFF_CONT);
1542 }
1543
1544 /* ARGSUSED */
1545 static enum rofferr
1546 roff_TE(ROFF_ARGS)
1547 {
1548
1549 if (NULL == r->tbl)
1550 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1551 else
1552 tbl_end(&r->tbl);
1553
1554 return(ROFF_IGN);
1555 }
1556
1557 /* ARGSUSED */
1558 static enum rofferr
1559 roff_T_(ROFF_ARGS)
1560 {
1561
1562 if (NULL == r->tbl)
1563 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1564 else
1565 tbl_restart(ppos, ln, r->tbl);
1566
1567 return(ROFF_IGN);
1568 }
1569
1570 #if 0
1571 static int
1572 roff_closeeqn(struct roff *r)
1573 {
1574
1575 return(r->eqn && ROFF_EQN == eqn_end(&r->eqn) ? 1 : 0);
1576 }
1577 #endif
1578
1579 static void
1580 roff_openeqn(struct roff *r, const char *name, int line,
1581 int offs, const char *buf)
1582 {
1583 struct eqn_node *e;
1584 int poff;
1585
1586 assert(NULL == r->eqn);
1587 e = eqn_alloc(name, offs, line, r->parse);
1588
1589 if (r->last_eqn)
1590 r->last_eqn->next = e;
1591 else
1592 r->first_eqn = r->last_eqn = e;
1593
1594 r->eqn = r->last_eqn = e;
1595
1596 if (buf) {
1597 poff = 0;
1598 eqn_read(&r->eqn, line, buf, offs, &poff);
1599 }
1600 }
1601
1602 /* ARGSUSED */
1603 static enum rofferr
1604 roff_EQ(ROFF_ARGS)
1605 {
1606
1607 roff_openeqn(r, *bufp + pos, ln, ppos, NULL);
1608 return(ROFF_IGN);
1609 }
1610
1611 /* ARGSUSED */
1612 static enum rofferr
1613 roff_EN(ROFF_ARGS)
1614 {
1615
1616 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1617 return(ROFF_IGN);
1618 }
1619
1620 /* ARGSUSED */
1621 static enum rofferr
1622 roff_TS(ROFF_ARGS)
1623 {
1624 struct tbl_node *tbl;
1625
1626 if (r->tbl) {
1627 mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL);
1628 tbl_end(&r->tbl);
1629 }
1630
1631 tbl = tbl_alloc(ppos, ln, r->parse);
1632
1633 if (r->last_tbl)
1634 r->last_tbl->next = tbl;
1635 else
1636 r->first_tbl = r->last_tbl = tbl;
1637
1638 r->tbl = r->last_tbl = tbl;
1639 return(ROFF_IGN);
1640 }
1641
1642 /* ARGSUSED */
1643 static enum rofferr
1644 roff_cc(ROFF_ARGS)
1645 {
1646 const char *p;
1647
1648 p = *bufp + pos;
1649
1650 if ('\0' == *p || '.' == (r->control = *p++))
1651 r->control = 0;
1652
1653 if ('\0' != *p)
1654 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1655
1656 return(ROFF_IGN);
1657 }
1658
1659 /* ARGSUSED */
1660 static enum rofferr
1661 roff_tr(ROFF_ARGS)
1662 {
1663 const char *p, *first, *second;
1664 size_t fsz, ssz;
1665 enum mandoc_esc esc;
1666
1667 p = *bufp + pos;
1668
1669 if ('\0' == *p) {
1670 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1671 return(ROFF_IGN);
1672 }
1673
1674 while ('\0' != *p) {
1675 fsz = ssz = 1;
1676
1677 first = p++;
1678 if ('\\' == *first) {
1679 esc = mandoc_escape(&p, NULL, NULL);
1680 if (ESCAPE_ERROR == esc) {
1681 mandoc_msg
1682 (MANDOCERR_BADESCAPE, r->parse,
1683 ln, (int)(p - *bufp), NULL);
1684 return(ROFF_IGN);
1685 }
1686 fsz = (size_t)(p - first);
1687 }
1688
1689 second = p++;
1690 if ('\\' == *second) {
1691 esc = mandoc_escape(&p, NULL, NULL);
1692 if (ESCAPE_ERROR == esc) {
1693 mandoc_msg
1694 (MANDOCERR_BADESCAPE, r->parse,
1695 ln, (int)(p - *bufp), NULL);
1696 return(ROFF_IGN);
1697 }
1698 ssz = (size_t)(p - second);
1699 } else if ('\0' == *second) {
1700 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
1701 ln, (int)(p - *bufp), NULL);
1702 second = " ";
1703 p--;
1704 }
1705
1706 if (fsz > 1) {
1707 roff_setstrn(&r->xmbtab, first,
1708 fsz, second, ssz, 0);
1709 continue;
1710 }
1711
1712 if (NULL == r->xtab)
1713 r->xtab = mandoc_calloc
1714 (128, sizeof(struct roffstr));
1715
1716 free(r->xtab[(int)*first].p);
1717 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
1718 r->xtab[(int)*first].sz = ssz;
1719 }
1720
1721 return(ROFF_IGN);
1722 }
1723
1724 /* ARGSUSED */
1725 static enum rofferr
1726 roff_so(ROFF_ARGS)
1727 {
1728 char *name;
1729
1730 mandoc_msg(MANDOCERR_SO, r->parse, ln, ppos, NULL);
1731
1732 /*
1733 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1734 * opening anything that's not in our cwd or anything beneath
1735 * it. Thus, explicitly disallow traversing up the file-system
1736 * or using absolute paths.
1737 */
1738
1739 name = *bufp + pos;
1740 if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1741 mandoc_msg(MANDOCERR_SOPATH, r->parse, ln, pos, NULL);
1742 return(ROFF_ERR);
1743 }
1744
1745 *offs = pos;
1746 return(ROFF_SO);
1747 }
1748
1749 /* ARGSUSED */
1750 static enum rofferr
1751 roff_userdef(ROFF_ARGS)
1752 {
1753 const char *arg[9];
1754 char *cp, *n1, *n2;
1755 int i;
1756
1757 /*
1758 * Collect pointers to macro argument strings
1759 * and NUL-terminate them.
1760 */
1761 cp = *bufp + pos;
1762 for (i = 0; i < 9; i++)
1763 arg[i] = '\0' == *cp ? "" :
1764 mandoc_getarg(r->parse, &cp, ln, &pos);
1765
1766 /*
1767 * Expand macro arguments.
1768 */
1769 *szp = 0;
1770 n1 = cp = mandoc_strdup(r->current_string);
1771 while (NULL != (cp = strstr(cp, "\\$"))) {
1772 i = cp[2] - '1';
1773 if (0 > i || 8 < i) {
1774 /* Not an argument invocation. */
1775 cp += 2;
1776 continue;
1777 }
1778
1779 *szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1780 n2 = mandoc_malloc(*szp);
1781
1782 strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1783 strlcat(n2, arg[i], *szp);
1784 strlcat(n2, cp + 3, *szp);
1785
1786 cp = n2 + (cp - n1);
1787 free(n1);
1788 n1 = n2;
1789 }
1790
1791 /*
1792 * Replace the macro invocation
1793 * by the expanded macro.
1794 */
1795 free(*bufp);
1796 *bufp = n1;
1797 if (0 == *szp)
1798 *szp = strlen(*bufp) + 1;
1799
1800 return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
1801 ROFF_REPARSE : ROFF_APPEND);
1802 }
1803
1804 static char *
1805 roff_getname(struct roff *r, char **cpp, int ln, int pos)
1806 {
1807 char *name, *cp;
1808
1809 name = *cpp;
1810 if ('\0' == *name)
1811 return(name);
1812
1813 /* Read until end of name. */
1814 for (cp = name; '\0' != *cp && ' ' != *cp; cp++) {
1815 if ('\\' != *cp)
1816 continue;
1817 cp++;
1818 if ('\\' == *cp)
1819 continue;
1820 mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL);
1821 *cp = '\0';
1822 name = cp;
1823 }
1824
1825 /* Nil-terminate name. */
1826 if ('\0' != *cp)
1827 *(cp++) = '\0';
1828
1829 /* Read past spaces. */
1830 while (' ' == *cp)
1831 cp++;
1832
1833 *cpp = cp;
1834 return(name);
1835 }
1836
1837 /*
1838 * Store *string into the user-defined string called *name.
1839 * To clear an existing entry, call with (*r, *name, NULL, 0).
1840 * append == 0: replace mode
1841 * append == 1: single-line append mode
1842 * append == 2: multiline append mode, append '\n' after each call
1843 */
1844 static void
1845 roff_setstr(struct roff *r, const char *name, const char *string,
1846 int append)
1847 {
1848
1849 roff_setstrn(&r->strtab, name, strlen(name), string,
1850 string ? strlen(string) : 0, append);
1851 }
1852
1853 static void
1854 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
1855 const char *string, size_t stringsz, int append)
1856 {
1857 struct roffkv *n;
1858 char *c;
1859 int i;
1860 size_t oldch, newch;
1861
1862 /* Search for an existing string with the same name. */
1863 n = *r;
1864
1865 while (n && strcmp(name, n->key.p))
1866 n = n->next;
1867
1868 if (NULL == n) {
1869 /* Create a new string table entry. */
1870 n = mandoc_malloc(sizeof(struct roffkv));
1871 n->key.p = mandoc_strndup(name, namesz);
1872 n->key.sz = namesz;
1873 n->val.p = NULL;
1874 n->val.sz = 0;
1875 n->next = *r;
1876 *r = n;
1877 } else if (0 == append) {
1878 free(n->val.p);
1879 n->val.p = NULL;
1880 n->val.sz = 0;
1881 }
1882
1883 if (NULL == string)
1884 return;
1885
1886 /*
1887 * One additional byte for the '\n' in multiline mode,
1888 * and one for the terminating '\0'.
1889 */
1890 newch = stringsz + (1 < append ? 2u : 1u);
1891
1892 if (NULL == n->val.p) {
1893 n->val.p = mandoc_malloc(newch);
1894 *n->val.p = '\0';
1895 oldch = 0;
1896 } else {
1897 oldch = n->val.sz;
1898 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
1899 }
1900
1901 /* Skip existing content in the destination buffer. */
1902 c = n->val.p + (int)oldch;
1903
1904 /* Append new content to the destination buffer. */
1905 i = 0;
1906 while (i < (int)stringsz) {
1907 /*
1908 * Rudimentary roff copy mode:
1909 * Handle escaped backslashes.
1910 */
1911 if ('\\' == string[i] && '\\' == string[i + 1])
1912 i++;
1913 *c++ = string[i++];
1914 }
1915
1916 /* Append terminating bytes. */
1917 if (1 < append)
1918 *c++ = '\n';
1919
1920 *c = '\0';
1921 n->val.sz = (int)(c - n->val.p);
1922 }
1923
1924 static const char *
1925 roff_getstrn(const struct roff *r, const char *name, size_t len)
1926 {
1927 const struct roffkv *n;
1928 int i;
1929
1930 for (n = r->strtab; n; n = n->next)
1931 if (0 == strncmp(name, n->key.p, len) &&
1932 '\0' == n->key.p[(int)len])
1933 return(n->val.p);
1934
1935 for (i = 0; i < PREDEFS_MAX; i++)
1936 if (0 == strncmp(name, predefs[i].name, len) &&
1937 '\0' == predefs[i].name[(int)len])
1938 return(predefs[i].str);
1939
1940 return(NULL);
1941 }
1942
1943 static void
1944 roff_freestr(struct roffkv *r)
1945 {
1946 struct roffkv *n, *nn;
1947
1948 for (n = r; n; n = nn) {
1949 free(n->key.p);
1950 free(n->val.p);
1951 nn = n->next;
1952 free(n);
1953 }
1954 }
1955
1956 const struct tbl_span *
1957 roff_span(const struct roff *r)
1958 {
1959
1960 return(r->tbl ? tbl_span(r->tbl) : NULL);
1961 }
1962
1963 const struct eqn *
1964 roff_eqn(const struct roff *r)
1965 {
1966
1967 return(r->last_eqn ? &r->last_eqn->eqn : NULL);
1968 }
1969
1970 /*
1971 * Duplicate an input string, making the appropriate character
1972 * conversations (as stipulated by `tr') along the way.
1973 * Returns a heap-allocated string with all the replacements made.
1974 */
1975 char *
1976 roff_strdup(const struct roff *r, const char *p)
1977 {
1978 const struct roffkv *cp;
1979 char *res;
1980 const char *pp;
1981 size_t ssz, sz;
1982 enum mandoc_esc esc;
1983
1984 if (NULL == r->xmbtab && NULL == r->xtab)
1985 return(mandoc_strdup(p));
1986 else if ('\0' == *p)
1987 return(mandoc_strdup(""));
1988
1989 /*
1990 * Step through each character looking for term matches
1991 * (remember that a `tr' can be invoked with an escape, which is
1992 * a glyph but the escape is multi-character).
1993 * We only do this if the character hash has been initialised
1994 * and the string is >0 length.
1995 */
1996
1997 res = NULL;
1998 ssz = 0;
1999
2000 while ('\0' != *p) {
2001 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
2002 sz = r->xtab[(int)*p].sz;
2003 res = mandoc_realloc(res, ssz + sz + 1);
2004 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
2005 ssz += sz;
2006 p++;
2007 continue;
2008 } else if ('\\' != *p) {
2009 res = mandoc_realloc(res, ssz + 2);
2010 res[ssz++] = *p++;
2011 continue;
2012 }
2013
2014 /* Search for term matches. */
2015 for (cp = r->xmbtab; cp; cp = cp->next)
2016 if (0 == strncmp(p, cp->key.p, cp->key.sz))
2017 break;
2018
2019 if (NULL != cp) {
2020 /*
2021 * A match has been found.
2022 * Append the match to the array and move
2023 * forward by its keysize.
2024 */
2025 res = mandoc_realloc
2026 (res, ssz + cp->val.sz + 1);
2027 memcpy(res + ssz, cp->val.p, cp->val.sz);
2028 ssz += cp->val.sz;
2029 p += (int)cp->key.sz;
2030 continue;
2031 }
2032
2033 /*
2034 * Handle escapes carefully: we need to copy
2035 * over just the escape itself, or else we might
2036 * do replacements within the escape itself.
2037 * Make sure to pass along the bogus string.
2038 */
2039 pp = p++;
2040 esc = mandoc_escape(&p, NULL, NULL);
2041 if (ESCAPE_ERROR == esc) {
2042 sz = strlen(pp);
2043 res = mandoc_realloc(res, ssz + sz + 1);
2044 memcpy(res + ssz, pp, sz);
2045 break;
2046 }
2047 /*
2048 * We bail out on bad escapes.
2049 * No need to warn: we already did so when
2050 * roff_res() was called.
2051 */
2052 sz = (int)(p - pp);
2053 res = mandoc_realloc(res, ssz + sz + 1);
2054 memcpy(res + ssz, pp, sz);
2055 ssz += sz;
2056 }
2057
2058 res[(int)ssz] = '\0';
2059 return(res);
2060 }
2061
2062 /*
2063 * Find out whether a line is a macro line or not.
2064 * If it is, adjust the current position and return one; if it isn't,
2065 * return zero and don't change the current position.
2066 * If the control character has been set with `.cc', then let that grain
2067 * precedence.
2068 * This is slighly contrary to groff, where using the non-breaking
2069 * control character when `cc' has been invoked will cause the
2070 * non-breaking macro contents to be printed verbatim.
2071 */
2072 int
2073 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
2074 {
2075 int pos;
2076
2077 pos = *ppos;
2078
2079 if (0 != r->control && cp[pos] == r->control)
2080 pos++;
2081 else if (0 != r->control)
2082 return(0);
2083 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
2084 pos += 2;
2085 else if ('.' == cp[pos] || '\'' == cp[pos])
2086 pos++;
2087 else
2088 return(0);
2089
2090 while (' ' == cp[pos] || '\t' == cp[pos])
2091 pos++;
2092
2093 *ppos = pos;
2094 return(1);
2095 }