]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
Rudimentary implementation of the .it request (input line trap).
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.178 2013/07/13 12:52:07 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010, 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 #include "mandoc.h"
29 #include "libroff.h"
30 #include "libmandoc.h"
31
32 /* Maximum number of nested if-else conditionals. */
33 #define RSTACK_MAX 128
34
35 /* Maximum number of string expansions per line, to break infinite loops. */
36 #define EXPAND_LIMIT 1000
37
38 enum rofft {
39 ROFF_ad,
40 ROFF_am,
41 ROFF_ami,
42 ROFF_am1,
43 ROFF_cc,
44 ROFF_de,
45 ROFF_dei,
46 ROFF_de1,
47 ROFF_ds,
48 ROFF_el,
49 ROFF_hy,
50 ROFF_ie,
51 ROFF_if,
52 ROFF_ig,
53 ROFF_it,
54 ROFF_ne,
55 ROFF_nh,
56 ROFF_nr,
57 ROFF_ns,
58 ROFF_ps,
59 ROFF_rm,
60 ROFF_so,
61 ROFF_ta,
62 ROFF_tr,
63 ROFF_Dd,
64 ROFF_TH,
65 ROFF_TS,
66 ROFF_TE,
67 ROFF_T_,
68 ROFF_EQ,
69 ROFF_EN,
70 ROFF_cblock,
71 ROFF_ccond,
72 ROFF_USERDEF,
73 ROFF_MAX
74 };
75
76 enum roffrule {
77 ROFFRULE_ALLOW,
78 ROFFRULE_DENY
79 };
80
81 /*
82 * A single register entity. If "set" is zero, the value of the
83 * register should be the default one, which is per-register.
84 * Registers are assumed to be unsigned ints for now.
85 */
86 struct reg {
87 int set; /* whether set or not */
88 unsigned int u; /* unsigned integer */
89 };
90
91 /*
92 * An incredibly-simple string buffer.
93 */
94 struct roffstr {
95 char *p; /* nil-terminated buffer */
96 size_t sz; /* saved strlen(p) */
97 };
98
99 /*
100 * A key-value roffstr pair as part of a singly-linked list.
101 */
102 struct roffkv {
103 struct roffstr key;
104 struct roffstr val;
105 struct roffkv *next; /* next in list */
106 };
107
108 struct roff {
109 enum mparset parsetype; /* requested parse type */
110 struct mparse *parse; /* parse point */
111 struct roffnode *last; /* leaf of stack */
112 enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */
113 char control; /* control character */
114 int rstackpos; /* position in rstack */
115 struct reg regs[REG__MAX];
116 struct roffkv *strtab; /* user-defined strings & macros */
117 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
118 struct roffstr *xtab; /* single-byte trans table (`tr') */
119 const char *current_string; /* value of last called user macro */
120 struct tbl_node *first_tbl; /* first table parsed */
121 struct tbl_node *last_tbl; /* last table parsed */
122 struct tbl_node *tbl; /* current table being parsed */
123 struct eqn_node *last_eqn; /* last equation parsed */
124 struct eqn_node *first_eqn; /* first equation parsed */
125 struct eqn_node *eqn; /* current equation being parsed */
126 };
127
128 struct roffnode {
129 enum rofft tok; /* type of node */
130 struct roffnode *parent; /* up one in stack */
131 int line; /* parse line */
132 int col; /* parse col */
133 char *name; /* node name, e.g. macro name */
134 char *end; /* end-rules: custom token */
135 int endspan; /* end-rules: next-line or infty */
136 enum roffrule rule; /* current evaluation rule */
137 };
138
139 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
140 enum rofft tok, /* tok of macro */ \
141 char **bufp, /* input buffer */ \
142 size_t *szp, /* size of input buffer */ \
143 int ln, /* parse line */ \
144 int ppos, /* original pos in buffer */ \
145 int pos, /* current pos in buffer */ \
146 int *offs /* reset offset of buffer data */
147
148 typedef enum rofferr (*roffproc)(ROFF_ARGS);
149
150 struct roffmac {
151 const char *name; /* macro name */
152 roffproc proc; /* process new macro */
153 roffproc text; /* process as child text of macro */
154 roffproc sub; /* process as child of macro */
155 int flags;
156 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
157 struct roffmac *next;
158 };
159
160 struct predef {
161 const char *name; /* predefined input name */
162 const char *str; /* replacement symbol */
163 };
164
165 #define PREDEF(__name, __str) \
166 { (__name), (__str) },
167
168 static enum rofft roffhash_find(const char *, size_t);
169 static void roffhash_init(void);
170 static void roffnode_cleanscope(struct roff *);
171 static void roffnode_pop(struct roff *);
172 static void roffnode_push(struct roff *, enum rofft,
173 const char *, int, int);
174 static enum rofferr roff_block(ROFF_ARGS);
175 static enum rofferr roff_block_text(ROFF_ARGS);
176 static enum rofferr roff_block_sub(ROFF_ARGS);
177 static enum rofferr roff_cblock(ROFF_ARGS);
178 static enum rofferr roff_cc(ROFF_ARGS);
179 static enum rofferr roff_ccond(ROFF_ARGS);
180 static enum rofferr roff_cond(ROFF_ARGS);
181 static enum rofferr roff_cond_text(ROFF_ARGS);
182 static enum rofferr roff_cond_sub(ROFF_ARGS);
183 static enum rofferr roff_ds(ROFF_ARGS);
184 static enum roffrule roff_evalcond(const char *, int *);
185 static void roff_free1(struct roff *);
186 static void roff_freestr(struct roffkv *);
187 static char *roff_getname(struct roff *, char **, int, int);
188 static const char *roff_getstrn(const struct roff *,
189 const char *, size_t);
190 static enum rofferr roff_it(ROFF_ARGS);
191 static enum rofferr roff_line_ignore(ROFF_ARGS);
192 static enum rofferr roff_nr(ROFF_ARGS);
193 static void roff_openeqn(struct roff *, const char *,
194 int, int, const char *);
195 static enum rofft roff_parse(struct roff *, const char *, int *);
196 static enum rofferr roff_parsetext(char **, size_t *, int, int *);
197 static enum rofferr roff_res(struct roff *,
198 char **, size_t *, int, int);
199 static enum rofferr roff_rm(ROFF_ARGS);
200 static void roff_setstr(struct roff *,
201 const char *, const char *, int);
202 static void roff_setstrn(struct roffkv **, const char *,
203 size_t, const char *, size_t, int);
204 static enum rofferr roff_so(ROFF_ARGS);
205 static enum rofferr roff_tr(ROFF_ARGS);
206 static enum rofferr roff_Dd(ROFF_ARGS);
207 static enum rofferr roff_TH(ROFF_ARGS);
208 static enum rofferr roff_TE(ROFF_ARGS);
209 static enum rofferr roff_TS(ROFF_ARGS);
210 static enum rofferr roff_EQ(ROFF_ARGS);
211 static enum rofferr roff_EN(ROFF_ARGS);
212 static enum rofferr roff_T_(ROFF_ARGS);
213 static enum rofferr roff_userdef(ROFF_ARGS);
214
215 /* See roffhash_find() */
216
217 #define ASCII_HI 126
218 #define ASCII_LO 33
219 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
220
221 static struct roffmac *hash[HASHWIDTH];
222
223 static struct roffmac roffs[ROFF_MAX] = {
224 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
225 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
226 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
227 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
228 { "cc", roff_cc, NULL, NULL, 0, NULL },
229 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
230 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
231 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
232 { "ds", roff_ds, NULL, NULL, 0, NULL },
233 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
234 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
235 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
236 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
237 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
238 { "it", roff_it, NULL, NULL, 0, NULL },
239 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
240 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
241 { "nr", roff_nr, NULL, NULL, 0, NULL },
242 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
243 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
244 { "rm", roff_rm, NULL, NULL, 0, NULL },
245 { "so", roff_so, NULL, NULL, 0, NULL },
246 { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
247 { "tr", roff_tr, NULL, NULL, 0, NULL },
248 { "Dd", roff_Dd, NULL, NULL, 0, NULL },
249 { "TH", roff_TH, NULL, NULL, 0, NULL },
250 { "TS", roff_TS, NULL, NULL, 0, NULL },
251 { "TE", roff_TE, NULL, NULL, 0, NULL },
252 { "T&", roff_T_, NULL, NULL, 0, NULL },
253 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
254 { "EN", roff_EN, NULL, NULL, 0, NULL },
255 { ".", roff_cblock, NULL, NULL, 0, NULL },
256 { "\\}", roff_ccond, NULL, NULL, 0, NULL },
257 { NULL, roff_userdef, NULL, NULL, 0, NULL },
258 };
259
260 const char *const __mdoc_reserved[] = {
261 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
262 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
263 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
264 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
265 "Ds", "Dt", "Dv", "Dx", "D1",
266 "Ec", "Ed", "Ef", "Ek", "El", "Em", "em",
267 "En", "Eo", "Eq", "Er", "Es", "Ev", "Ex",
268 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
269 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp", "LP",
270 "Me", "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
271 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
272 "Pa", "Pc", "Pf", "Po", "Pp", "PP", "pp", "Pq",
273 "Qc", "Ql", "Qo", "Qq", "Or", "Rd", "Re", "Rs", "Rv",
274 "Sc", "Sf", "Sh", "SH", "Sm", "So", "Sq",
275 "Ss", "St", "Sx", "Sy",
276 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
277 "%A", "%B", "%D", "%I", "%J", "%N", "%O",
278 "%P", "%Q", "%R", "%T", "%U", "%V",
279 NULL
280 };
281
282 const char *const __man_reserved[] = {
283 "AT", "B", "BI", "BR", "BT", "DE", "DS", "DT",
284 "EE", "EN", "EQ", "EX", "HF", "HP", "I", "IB", "IP", "IR",
285 "LP", "ME", "MT", "OP", "P", "PD", "PP", "PT",
286 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS", "SY",
287 "TE", "TH", "TP", "TQ", "TS", "T&", "UC", "UE", "UR", "YS",
288 NULL
289 };
290
291 /* Array of injected predefined strings. */
292 #define PREDEFS_MAX 38
293 static const struct predef predefs[PREDEFS_MAX] = {
294 #include "predefs.in"
295 };
296
297 /* See roffhash_find() */
298 #define ROFF_HASH(p) (p[0] - ASCII_LO)
299
300 static int roffit_lines; /* number of lines to delay */
301 static char *roffit_macro; /* nil-terminated macro line */
302
303 static void
304 roffhash_init(void)
305 {
306 struct roffmac *n;
307 int buc, i;
308
309 for (i = 0; i < (int)ROFF_USERDEF; i++) {
310 assert(roffs[i].name[0] >= ASCII_LO);
311 assert(roffs[i].name[0] <= ASCII_HI);
312
313 buc = ROFF_HASH(roffs[i].name);
314
315 if (NULL != (n = hash[buc])) {
316 for ( ; n->next; n = n->next)
317 /* Do nothing. */ ;
318 n->next = &roffs[i];
319 } else
320 hash[buc] = &roffs[i];
321 }
322 }
323
324 /*
325 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
326 * the nil-terminated string name could be found.
327 */
328 static enum rofft
329 roffhash_find(const char *p, size_t s)
330 {
331 int buc;
332 struct roffmac *n;
333
334 /*
335 * libroff has an extremely simple hashtable, for the time
336 * being, which simply keys on the first character, which must
337 * be printable, then walks a chain. It works well enough until
338 * optimised.
339 */
340
341 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
342 return(ROFF_MAX);
343
344 buc = ROFF_HASH(p);
345
346 if (NULL == (n = hash[buc]))
347 return(ROFF_MAX);
348 for ( ; n; n = n->next)
349 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
350 return((enum rofft)(n - roffs));
351
352 return(ROFF_MAX);
353 }
354
355
356 /*
357 * Pop the current node off of the stack of roff instructions currently
358 * pending.
359 */
360 static void
361 roffnode_pop(struct roff *r)
362 {
363 struct roffnode *p;
364
365 assert(r->last);
366 p = r->last;
367
368 r->last = r->last->parent;
369 free(p->name);
370 free(p->end);
371 free(p);
372 }
373
374
375 /*
376 * Push a roff node onto the instruction stack. This must later be
377 * removed with roffnode_pop().
378 */
379 static void
380 roffnode_push(struct roff *r, enum rofft tok, const char *name,
381 int line, int col)
382 {
383 struct roffnode *p;
384
385 p = mandoc_calloc(1, sizeof(struct roffnode));
386 p->tok = tok;
387 if (name)
388 p->name = mandoc_strdup(name);
389 p->parent = r->last;
390 p->line = line;
391 p->col = col;
392 p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
393
394 r->last = p;
395 }
396
397
398 static void
399 roff_free1(struct roff *r)
400 {
401 struct tbl_node *tbl;
402 struct eqn_node *e;
403 int i;
404
405 while (NULL != (tbl = r->first_tbl)) {
406 r->first_tbl = tbl->next;
407 tbl_free(tbl);
408 }
409
410 r->first_tbl = r->last_tbl = r->tbl = NULL;
411
412 while (NULL != (e = r->first_eqn)) {
413 r->first_eqn = e->next;
414 eqn_free(e);
415 }
416
417 r->first_eqn = r->last_eqn = r->eqn = NULL;
418
419 while (r->last)
420 roffnode_pop(r);
421
422 roff_freestr(r->strtab);
423 roff_freestr(r->xmbtab);
424
425 r->strtab = r->xmbtab = NULL;
426
427 if (r->xtab)
428 for (i = 0; i < 128; i++)
429 free(r->xtab[i].p);
430
431 free(r->xtab);
432 r->xtab = NULL;
433 }
434
435 void
436 roff_reset(struct roff *r)
437 {
438 int i;
439
440 roff_free1(r);
441
442 r->control = 0;
443 memset(&r->regs, 0, sizeof(struct reg) * REG__MAX);
444
445 for (i = 0; i < PREDEFS_MAX; i++)
446 roff_setstr(r, predefs[i].name, predefs[i].str, 0);
447 }
448
449
450 void
451 roff_free(struct roff *r)
452 {
453
454 roff_free1(r);
455 free(r);
456 }
457
458
459 struct roff *
460 roff_alloc(enum mparset type, struct mparse *parse)
461 {
462 struct roff *r;
463 int i;
464
465 r = mandoc_calloc(1, sizeof(struct roff));
466 r->parsetype = type;
467 r->parse = parse;
468 r->rstackpos = -1;
469
470 roffhash_init();
471
472 for (i = 0; i < PREDEFS_MAX; i++)
473 roff_setstr(r, predefs[i].name, predefs[i].str, 0);
474
475 return(r);
476 }
477
478 /*
479 * Pre-filter each and every line for reserved words (one beginning with
480 * `\*', e.g., `\*(ab'). These must be handled before the actual line
481 * is processed.
482 * This also checks the syntax of regular escapes.
483 */
484 static enum rofferr
485 roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
486 {
487 enum mandoc_esc esc;
488 const char *stesc; /* start of an escape sequence ('\\') */
489 const char *stnam; /* start of the name, after "[(*" */
490 const char *cp; /* end of the name, e.g. before ']' */
491 const char *res; /* the string to be substituted */
492 int i, maxl, expand_count;
493 size_t nsz;
494 char *n;
495
496 expand_count = 0;
497
498 again:
499 cp = *bufp + pos;
500 while (NULL != (cp = strchr(cp, '\\'))) {
501 stesc = cp++;
502
503 /*
504 * The second character must be an asterisk.
505 * If it isn't, skip it anyway: It is escaped,
506 * so it can't start another escape sequence.
507 */
508
509 if ('\0' == *cp)
510 return(ROFF_CONT);
511
512 if ('*' != *cp) {
513 res = cp;
514 esc = mandoc_escape(&cp, NULL, NULL);
515 if (ESCAPE_ERROR != esc)
516 continue;
517 cp = res;
518 mandoc_msg
519 (MANDOCERR_BADESCAPE, r->parse,
520 ln, (int)(stesc - *bufp), NULL);
521 return(ROFF_CONT);
522 }
523
524 cp++;
525
526 /*
527 * The third character decides the length
528 * of the name of the string.
529 * Save a pointer to the name.
530 */
531
532 switch (*cp) {
533 case ('\0'):
534 return(ROFF_CONT);
535 case ('('):
536 cp++;
537 maxl = 2;
538 break;
539 case ('['):
540 cp++;
541 maxl = 0;
542 break;
543 default:
544 maxl = 1;
545 break;
546 }
547 stnam = cp;
548
549 /* Advance to the end of the name. */
550
551 for (i = 0; 0 == maxl || i < maxl; i++, cp++) {
552 if ('\0' == *cp) {
553 mandoc_msg
554 (MANDOCERR_BADESCAPE,
555 r->parse, ln,
556 (int)(stesc - *bufp), NULL);
557 return(ROFF_CONT);
558 }
559 if (0 == maxl && ']' == *cp)
560 break;
561 }
562
563 /*
564 * Retrieve the replacement string; if it is
565 * undefined, resume searching for escapes.
566 */
567
568 res = roff_getstrn(r, stnam, (size_t)i);
569
570 if (NULL == res) {
571 mandoc_msg
572 (MANDOCERR_BADESCAPE, r->parse,
573 ln, (int)(stesc - *bufp), NULL);
574 res = "";
575 }
576
577 /* Replace the escape sequence by the string. */
578
579 pos = stesc - *bufp;
580
581 nsz = *szp + strlen(res) + 1;
582 n = mandoc_malloc(nsz);
583
584 strlcpy(n, *bufp, (size_t)(stesc - *bufp + 1));
585 strlcat(n, res, nsz);
586 strlcat(n, cp + (maxl ? 0 : 1), nsz);
587
588 free(*bufp);
589
590 *bufp = n;
591 *szp = nsz;
592
593 if (EXPAND_LIMIT >= ++expand_count)
594 goto again;
595
596 /* Just leave the string unexpanded. */
597 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, ln, pos, NULL);
598 return(ROFF_IGN);
599 }
600 return(ROFF_CONT);
601 }
602
603 /*
604 * Process text streams:
605 * Convert all breakable hyphens into ASCII_HYPH.
606 * Decrement and spring input line trap.
607 */
608 static enum rofferr
609 roff_parsetext(char **bufp, size_t *szp, int pos, int *offs)
610 {
611 size_t sz;
612 const char *start;
613 char *p;
614 int isz;
615 enum mandoc_esc esc;
616
617 start = p = *bufp + pos;
618
619 while ('\0' != *p) {
620 sz = strcspn(p, "-\\");
621 p += sz;
622
623 if ('\0' == *p)
624 break;
625
626 if ('\\' == *p) {
627 /* Skip over escapes. */
628 p++;
629 esc = mandoc_escape
630 ((const char **)&p, NULL, NULL);
631 if (ESCAPE_ERROR == esc)
632 break;
633 continue;
634 } else if (p == start) {
635 p++;
636 continue;
637 }
638
639 if (isalpha((unsigned char)p[-1]) &&
640 isalpha((unsigned char)p[1]))
641 *p = ASCII_HYPH;
642 p++;
643 }
644
645 /* Spring the input line trap. */
646 if (1 == roffit_lines) {
647 isz = asprintf(&p, "%s\n.%s", *bufp, roffit_macro);
648 if (-1 == isz) {
649 perror(NULL);
650 exit((int)MANDOCLEVEL_SYSERR);
651 }
652 free(*bufp);
653 *bufp = p;
654 *szp = isz + 1;
655 *offs = 0;
656 free(roffit_macro);
657 roffit_lines = 0;
658 return(ROFF_REPARSE);
659 } else if (1 < roffit_lines)
660 --roffit_lines;
661 return(ROFF_CONT);
662 }
663
664 enum rofferr
665 roff_parseln(struct roff *r, int ln, char **bufp,
666 size_t *szp, int pos, int *offs)
667 {
668 enum rofft t;
669 enum rofferr e;
670 int ppos, ctl;
671
672 /*
673 * Run the reserved-word filter only if we have some reserved
674 * words to fill in.
675 */
676
677 e = roff_res(r, bufp, szp, ln, pos);
678 if (ROFF_IGN == e)
679 return(e);
680 assert(ROFF_CONT == e);
681
682 ppos = pos;
683 ctl = roff_getcontrol(r, *bufp, &pos);
684
685 /*
686 * First, if a scope is open and we're not a macro, pass the
687 * text through the macro's filter. If a scope isn't open and
688 * we're not a macro, just let it through.
689 * Finally, if there's an equation scope open, divert it into it
690 * no matter our state.
691 */
692
693 if (r->last && ! ctl) {
694 t = r->last->tok;
695 assert(roffs[t].text);
696 e = (*roffs[t].text)
697 (r, t, bufp, szp, ln, pos, pos, offs);
698 assert(ROFF_IGN == e || ROFF_CONT == e);
699 if (ROFF_CONT != e)
700 return(e);
701 if (r->eqn)
702 return(eqn_read(&r->eqn, ln, *bufp, pos, offs));
703 if (r->tbl)
704 return(tbl_read(r->tbl, ln, *bufp, pos));
705 return(roff_parsetext(bufp, szp, pos, offs));
706 } else if ( ! ctl) {
707 if (r->eqn)
708 return(eqn_read(&r->eqn, ln, *bufp, pos, offs));
709 if (r->tbl)
710 return(tbl_read(r->tbl, ln, *bufp, pos));
711 return(roff_parsetext(bufp, szp, pos, offs));
712 } else if (r->eqn)
713 return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));
714
715 /*
716 * If a scope is open, go to the child handler for that macro,
717 * as it may want to preprocess before doing anything with it.
718 * Don't do so if an equation is open.
719 */
720
721 if (r->last) {
722 t = r->last->tok;
723 assert(roffs[t].sub);
724 return((*roffs[t].sub)
725 (r, t, bufp, szp,
726 ln, ppos, pos, offs));
727 }
728
729 /*
730 * Lastly, as we've no scope open, try to look up and execute
731 * the new macro. If no macro is found, simply return and let
732 * the compilers handle it.
733 */
734
735 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
736 return(ROFF_CONT);
737
738 assert(roffs[t].proc);
739 return((*roffs[t].proc)
740 (r, t, bufp, szp,
741 ln, ppos, pos, offs));
742 }
743
744
745 void
746 roff_endparse(struct roff *r)
747 {
748
749 if (r->last)
750 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
751 r->last->line, r->last->col, NULL);
752
753 if (r->eqn) {
754 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
755 r->eqn->eqn.ln, r->eqn->eqn.pos, NULL);
756 eqn_end(&r->eqn);
757 }
758
759 if (r->tbl) {
760 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
761 r->tbl->line, r->tbl->pos, NULL);
762 tbl_end(&r->tbl);
763 }
764 }
765
766 /*
767 * Parse a roff node's type from the input buffer. This must be in the
768 * form of ".foo xxx" in the usual way.
769 */
770 static enum rofft
771 roff_parse(struct roff *r, const char *buf, int *pos)
772 {
773 const char *mac;
774 size_t maclen;
775 enum rofft t;
776
777 if ('\0' == buf[*pos] || '"' == buf[*pos] ||
778 '\t' == buf[*pos] || ' ' == buf[*pos])
779 return(ROFF_MAX);
780
781 /*
782 * We stop the macro parse at an escape, tab, space, or nil.
783 * However, `\}' is also a valid macro, so make sure we don't
784 * clobber it by seeing the `\' as the end of token.
785 */
786
787 mac = buf + *pos;
788 maclen = strcspn(mac + 1, " \\\t\0") + 1;
789
790 t = (r->current_string = roff_getstrn(r, mac, maclen))
791 ? ROFF_USERDEF : roffhash_find(mac, maclen);
792
793 *pos += (int)maclen;
794
795 while (buf[*pos] && ' ' == buf[*pos])
796 (*pos)++;
797
798 return(t);
799 }
800
801 /* ARGSUSED */
802 static enum rofferr
803 roff_cblock(ROFF_ARGS)
804 {
805
806 /*
807 * A block-close `..' should only be invoked as a child of an
808 * ignore macro, otherwise raise a warning and just ignore it.
809 */
810
811 if (NULL == r->last) {
812 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
813 return(ROFF_IGN);
814 }
815
816 switch (r->last->tok) {
817 case (ROFF_am):
818 /* FALLTHROUGH */
819 case (ROFF_ami):
820 /* FALLTHROUGH */
821 case (ROFF_am1):
822 /* FALLTHROUGH */
823 case (ROFF_de):
824 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
825 /* FALLTHROUGH */
826 case (ROFF_dei):
827 /* FALLTHROUGH */
828 case (ROFF_ig):
829 break;
830 default:
831 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
832 return(ROFF_IGN);
833 }
834
835 if ((*bufp)[pos])
836 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
837
838 roffnode_pop(r);
839 roffnode_cleanscope(r);
840 return(ROFF_IGN);
841
842 }
843
844
845 static void
846 roffnode_cleanscope(struct roff *r)
847 {
848
849 while (r->last) {
850 if (--r->last->endspan != 0)
851 break;
852 roffnode_pop(r);
853 }
854 }
855
856
857 /* ARGSUSED */
858 static enum rofferr
859 roff_ccond(ROFF_ARGS)
860 {
861
862 if (NULL == r->last) {
863 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
864 return(ROFF_IGN);
865 }
866
867 switch (r->last->tok) {
868 case (ROFF_el):
869 /* FALLTHROUGH */
870 case (ROFF_ie):
871 /* FALLTHROUGH */
872 case (ROFF_if):
873 break;
874 default:
875 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
876 return(ROFF_IGN);
877 }
878
879 if (r->last->endspan > -1) {
880 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
881 return(ROFF_IGN);
882 }
883
884 if ((*bufp)[pos])
885 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
886
887 roffnode_pop(r);
888 roffnode_cleanscope(r);
889 return(ROFF_IGN);
890 }
891
892
893 /* ARGSUSED */
894 static enum rofferr
895 roff_block(ROFF_ARGS)
896 {
897 int sv;
898 size_t sz;
899 char *name;
900
901 name = NULL;
902
903 if (ROFF_ig != tok) {
904 if ('\0' == (*bufp)[pos]) {
905 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
906 return(ROFF_IGN);
907 }
908
909 /*
910 * Re-write `de1', since we don't really care about
911 * groff's strange compatibility mode, into `de'.
912 */
913
914 if (ROFF_de1 == tok)
915 tok = ROFF_de;
916 if (ROFF_de == tok)
917 name = *bufp + pos;
918 else
919 mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos,
920 roffs[tok].name);
921
922 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
923 pos++;
924
925 while (isspace((unsigned char)(*bufp)[pos]))
926 (*bufp)[pos++] = '\0';
927 }
928
929 roffnode_push(r, tok, name, ln, ppos);
930
931 /*
932 * At the beginning of a `de' macro, clear the existing string
933 * with the same name, if there is one. New content will be
934 * added from roff_block_text() in multiline mode.
935 */
936
937 if (ROFF_de == tok)
938 roff_setstr(r, name, "", 0);
939
940 if ('\0' == (*bufp)[pos])
941 return(ROFF_IGN);
942
943 /* If present, process the custom end-of-line marker. */
944
945 sv = pos;
946 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
947 pos++;
948
949 /*
950 * Note: groff does NOT like escape characters in the input.
951 * Instead of detecting this, we're just going to let it fly and
952 * to hell with it.
953 */
954
955 assert(pos > sv);
956 sz = (size_t)(pos - sv);
957
958 if (1 == sz && '.' == (*bufp)[sv])
959 return(ROFF_IGN);
960
961 r->last->end = mandoc_malloc(sz + 1);
962
963 memcpy(r->last->end, *bufp + sv, sz);
964 r->last->end[(int)sz] = '\0';
965
966 if ((*bufp)[pos])
967 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
968
969 return(ROFF_IGN);
970 }
971
972
973 /* ARGSUSED */
974 static enum rofferr
975 roff_block_sub(ROFF_ARGS)
976 {
977 enum rofft t;
978 int i, j;
979
980 /*
981 * First check whether a custom macro exists at this level. If
982 * it does, then check against it. This is some of groff's
983 * stranger behaviours. If we encountered a custom end-scope
984 * tag and that tag also happens to be a "real" macro, then we
985 * need to try interpreting it again as a real macro. If it's
986 * not, then return ignore. Else continue.
987 */
988
989 if (r->last->end) {
990 for (i = pos, j = 0; r->last->end[j]; j++, i++)
991 if ((*bufp)[i] != r->last->end[j])
992 break;
993
994 if ('\0' == r->last->end[j] &&
995 ('\0' == (*bufp)[i] ||
996 ' ' == (*bufp)[i] ||
997 '\t' == (*bufp)[i])) {
998 roffnode_pop(r);
999 roffnode_cleanscope(r);
1000
1001 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
1002 i++;
1003
1004 pos = i;
1005 if (ROFF_MAX != roff_parse(r, *bufp, &pos))
1006 return(ROFF_RERUN);
1007 return(ROFF_IGN);
1008 }
1009 }
1010
1011 /*
1012 * If we have no custom end-query or lookup failed, then try
1013 * pulling it out of the hashtable.
1014 */
1015
1016 t = roff_parse(r, *bufp, &pos);
1017
1018 /*
1019 * Macros other than block-end are only significant
1020 * in `de' blocks; elsewhere, simply throw them away.
1021 */
1022 if (ROFF_cblock != t) {
1023 if (ROFF_de == tok)
1024 roff_setstr(r, r->last->name, *bufp + ppos, 1);
1025 return(ROFF_IGN);
1026 }
1027
1028 assert(roffs[t].proc);
1029 return((*roffs[t].proc)(r, t, bufp, szp,
1030 ln, ppos, pos, offs));
1031 }
1032
1033
1034 /* ARGSUSED */
1035 static enum rofferr
1036 roff_block_text(ROFF_ARGS)
1037 {
1038
1039 if (ROFF_de == tok)
1040 roff_setstr(r, r->last->name, *bufp + pos, 1);
1041
1042 return(ROFF_IGN);
1043 }
1044
1045
1046 /* ARGSUSED */
1047 static enum rofferr
1048 roff_cond_sub(ROFF_ARGS)
1049 {
1050 enum rofft t;
1051 enum roffrule rr;
1052 char *ep;
1053
1054 rr = r->last->rule;
1055 roffnode_cleanscope(r);
1056 t = roff_parse(r, *bufp, &pos);
1057
1058 /*
1059 * Fully handle known macros when they are structurally
1060 * required or when the conditional evaluated to true.
1061 */
1062
1063 if ((ROFF_MAX != t) &&
1064 (ROFF_ccond == t || ROFFRULE_ALLOW == rr ||
1065 ROFFMAC_STRUCT & roffs[t].flags)) {
1066 assert(roffs[t].proc);
1067 return((*roffs[t].proc)(r, t, bufp, szp,
1068 ln, ppos, pos, offs));
1069 }
1070
1071 /* Always check for the closing delimiter `\}'. */
1072
1073 ep = &(*bufp)[pos];
1074 while (NULL != (ep = strchr(ep, '\\'))) {
1075 if ('}' != *(++ep))
1076 continue;
1077
1078 /*
1079 * If we're at the end of line, then just chop
1080 * off the \} and resize the buffer.
1081 * If we aren't, then convert it to spaces.
1082 */
1083
1084 if ('\0' == *(ep + 1)) {
1085 *--ep = '\0';
1086 *szp -= 2;
1087 } else
1088 *(ep - 1) = *ep = ' ';
1089
1090 roff_ccond(r, ROFF_ccond, bufp, szp,
1091 ln, pos, pos + 2, offs);
1092 break;
1093 }
1094 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1095 }
1096
1097 /* ARGSUSED */
1098 static enum rofferr
1099 roff_cond_text(ROFF_ARGS)
1100 {
1101 char *ep;
1102 enum roffrule rr;
1103
1104 rr = r->last->rule;
1105 roffnode_cleanscope(r);
1106
1107 ep = &(*bufp)[pos];
1108 for ( ; NULL != (ep = strchr(ep, '\\')); ep++) {
1109 ep++;
1110 if ('}' != *ep)
1111 continue;
1112 *ep = '&';
1113 roff_ccond(r, ROFF_ccond, bufp, szp,
1114 ln, pos, pos + 2, offs);
1115 }
1116 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1117 }
1118
1119 static enum roffrule
1120 roff_evalcond(const char *v, int *pos)
1121 {
1122
1123 switch (v[*pos]) {
1124 case ('n'):
1125 (*pos)++;
1126 return(ROFFRULE_ALLOW);
1127 case ('e'):
1128 /* FALLTHROUGH */
1129 case ('o'):
1130 /* FALLTHROUGH */
1131 case ('t'):
1132 (*pos)++;
1133 return(ROFFRULE_DENY);
1134 default:
1135 break;
1136 }
1137
1138 while (v[*pos] && ' ' != v[*pos])
1139 (*pos)++;
1140 return(ROFFRULE_DENY);
1141 }
1142
1143 /* ARGSUSED */
1144 static enum rofferr
1145 roff_line_ignore(ROFF_ARGS)
1146 {
1147
1148 return(ROFF_IGN);
1149 }
1150
1151 /* ARGSUSED */
1152 static enum rofferr
1153 roff_cond(ROFF_ARGS)
1154 {
1155
1156 roffnode_push(r, tok, NULL, ln, ppos);
1157
1158 /*
1159 * An `.el' has no conditional body: it will consume the value
1160 * of the current rstack entry set in prior `ie' calls or
1161 * defaults to DENY.
1162 *
1163 * If we're not an `el', however, then evaluate the conditional.
1164 */
1165
1166 r->last->rule = ROFF_el == tok ?
1167 (r->rstackpos < 0 ?
1168 ROFFRULE_DENY : r->rstack[r->rstackpos--]) :
1169 roff_evalcond(*bufp, &pos);
1170
1171 /*
1172 * An if-else will put the NEGATION of the current evaluated
1173 * conditional into the stack of rules.
1174 */
1175
1176 if (ROFF_ie == tok) {
1177 if (r->rstackpos == RSTACK_MAX - 1) {
1178 mandoc_msg(MANDOCERR_MEM,
1179 r->parse, ln, ppos, NULL);
1180 return(ROFF_ERR);
1181 }
1182 r->rstack[++r->rstackpos] =
1183 ROFFRULE_DENY == r->last->rule ?
1184 ROFFRULE_ALLOW : ROFFRULE_DENY;
1185 }
1186
1187 /* If the parent has false as its rule, then so do we. */
1188
1189 if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule)
1190 r->last->rule = ROFFRULE_DENY;
1191
1192 /*
1193 * Determine scope.
1194 * If there is nothing on the line after the conditional,
1195 * not even whitespace, use next-line scope.
1196 */
1197
1198 if ('\0' == (*bufp)[pos]) {
1199 r->last->endspan = 2;
1200 goto out;
1201 }
1202
1203 while (' ' == (*bufp)[pos])
1204 pos++;
1205
1206 /* An opening brace requests multiline scope. */
1207
1208 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1209 r->last->endspan = -1;
1210 pos += 2;
1211 goto out;
1212 }
1213
1214 /*
1215 * Anything else following the conditional causes
1216 * single-line scope. Warn if the scope contains
1217 * nothing but trailing whitespace.
1218 */
1219
1220 if ('\0' == (*bufp)[pos])
1221 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
1222
1223 r->last->endspan = 1;
1224
1225 out:
1226 *offs = pos;
1227 return(ROFF_RERUN);
1228 }
1229
1230
1231 /* ARGSUSED */
1232 static enum rofferr
1233 roff_ds(ROFF_ARGS)
1234 {
1235 char *name, *string;
1236
1237 /*
1238 * A symbol is named by the first word following the macro
1239 * invocation up to a space. Its value is anything after the
1240 * name's trailing whitespace and optional double-quote. Thus,
1241 *
1242 * [.ds foo "bar " ]
1243 *
1244 * will have `bar " ' as its value.
1245 */
1246
1247 string = *bufp + pos;
1248 name = roff_getname(r, &string, ln, pos);
1249 if ('\0' == *name)
1250 return(ROFF_IGN);
1251
1252 /* Read past initial double-quote. */
1253 if ('"' == *string)
1254 string++;
1255
1256 /* The rest is the value. */
1257 roff_setstr(r, name, string, 0);
1258 return(ROFF_IGN);
1259 }
1260
1261 int
1262 roff_regisset(const struct roff *r, enum regs reg)
1263 {
1264
1265 return(r->regs[(int)reg].set);
1266 }
1267
1268 unsigned int
1269 roff_regget(const struct roff *r, enum regs reg)
1270 {
1271
1272 return(r->regs[(int)reg].u);
1273 }
1274
1275 void
1276 roff_regunset(struct roff *r, enum regs reg)
1277 {
1278
1279 r->regs[(int)reg].set = 0;
1280 }
1281
1282 /* ARGSUSED */
1283 static enum rofferr
1284 roff_nr(ROFF_ARGS)
1285 {
1286 const char *key;
1287 char *val;
1288 int iv;
1289
1290 val = *bufp + pos;
1291 key = roff_getname(r, &val, ln, pos);
1292
1293 if (0 == strcmp(key, "nS")) {
1294 r->regs[(int)REG_nS].set = 1;
1295 if ((iv = mandoc_strntoi(val, strlen(val), 10)) >= 0)
1296 r->regs[(int)REG_nS].u = (unsigned)iv;
1297 else
1298 r->regs[(int)REG_nS].u = 0u;
1299 }
1300
1301 return(ROFF_IGN);
1302 }
1303
1304 /* ARGSUSED */
1305 static enum rofferr
1306 roff_rm(ROFF_ARGS)
1307 {
1308 const char *name;
1309 char *cp;
1310
1311 cp = *bufp + pos;
1312 while ('\0' != *cp) {
1313 name = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1314 if ('\0' != *name)
1315 roff_setstr(r, name, NULL, 0);
1316 }
1317 return(ROFF_IGN);
1318 }
1319
1320 /* ARGSUSED */
1321 static enum rofferr
1322 roff_it(ROFF_ARGS)
1323 {
1324 char *cp;
1325 size_t len;
1326 int iv;
1327
1328 /* Parse the number of lines. */
1329 cp = *bufp + pos;
1330 len = strcspn(cp, " \t");
1331 cp[len] = '\0';
1332 if ((iv = mandoc_strntoi(cp, len, 10)) <= 0) {
1333 mandoc_msg(MANDOCERR_NUMERIC, r->parse,
1334 ln, ppos, *bufp + 1);
1335 return(ROFF_IGN);
1336 }
1337 cp += len + 1;
1338
1339 /* Arm the input line trap. */
1340 roffit_lines = iv;
1341 roffit_macro = mandoc_strdup(cp);
1342 return(ROFF_IGN);
1343 }
1344
1345 /* ARGSUSED */
1346 static enum rofferr
1347 roff_Dd(ROFF_ARGS)
1348 {
1349 const char *const *cp;
1350
1351 if (MPARSE_MDOC != r->parsetype)
1352 for (cp = __mdoc_reserved; *cp; cp++)
1353 roff_setstr(r, *cp, NULL, 0);
1354
1355 return(ROFF_CONT);
1356 }
1357
1358 /* ARGSUSED */
1359 static enum rofferr
1360 roff_TH(ROFF_ARGS)
1361 {
1362 const char *const *cp;
1363
1364 if (MPARSE_MDOC != r->parsetype)
1365 for (cp = __man_reserved; *cp; cp++)
1366 roff_setstr(r, *cp, NULL, 0);
1367
1368 return(ROFF_CONT);
1369 }
1370
1371 /* ARGSUSED */
1372 static enum rofferr
1373 roff_TE(ROFF_ARGS)
1374 {
1375
1376 if (NULL == r->tbl)
1377 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1378 else
1379 tbl_end(&r->tbl);
1380
1381 return(ROFF_IGN);
1382 }
1383
1384 /* ARGSUSED */
1385 static enum rofferr
1386 roff_T_(ROFF_ARGS)
1387 {
1388
1389 if (NULL == r->tbl)
1390 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1391 else
1392 tbl_restart(ppos, ln, r->tbl);
1393
1394 return(ROFF_IGN);
1395 }
1396
1397 #if 0
1398 static int
1399 roff_closeeqn(struct roff *r)
1400 {
1401
1402 return(r->eqn && ROFF_EQN == eqn_end(&r->eqn) ? 1 : 0);
1403 }
1404 #endif
1405
1406 static void
1407 roff_openeqn(struct roff *r, const char *name, int line,
1408 int offs, const char *buf)
1409 {
1410 struct eqn_node *e;
1411 int poff;
1412
1413 assert(NULL == r->eqn);
1414 e = eqn_alloc(name, offs, line, r->parse);
1415
1416 if (r->last_eqn)
1417 r->last_eqn->next = e;
1418 else
1419 r->first_eqn = r->last_eqn = e;
1420
1421 r->eqn = r->last_eqn = e;
1422
1423 if (buf) {
1424 poff = 0;
1425 eqn_read(&r->eqn, line, buf, offs, &poff);
1426 }
1427 }
1428
1429 /* ARGSUSED */
1430 static enum rofferr
1431 roff_EQ(ROFF_ARGS)
1432 {
1433
1434 roff_openeqn(r, *bufp + pos, ln, ppos, NULL);
1435 return(ROFF_IGN);
1436 }
1437
1438 /* ARGSUSED */
1439 static enum rofferr
1440 roff_EN(ROFF_ARGS)
1441 {
1442
1443 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1444 return(ROFF_IGN);
1445 }
1446
1447 /* ARGSUSED */
1448 static enum rofferr
1449 roff_TS(ROFF_ARGS)
1450 {
1451 struct tbl_node *tbl;
1452
1453 if (r->tbl) {
1454 mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL);
1455 tbl_end(&r->tbl);
1456 }
1457
1458 tbl = tbl_alloc(ppos, ln, r->parse);
1459
1460 if (r->last_tbl)
1461 r->last_tbl->next = tbl;
1462 else
1463 r->first_tbl = r->last_tbl = tbl;
1464
1465 r->tbl = r->last_tbl = tbl;
1466 return(ROFF_IGN);
1467 }
1468
1469 /* ARGSUSED */
1470 static enum rofferr
1471 roff_cc(ROFF_ARGS)
1472 {
1473 const char *p;
1474
1475 p = *bufp + pos;
1476
1477 if ('\0' == *p || '.' == (r->control = *p++))
1478 r->control = 0;
1479
1480 if ('\0' != *p)
1481 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1482
1483 return(ROFF_IGN);
1484 }
1485
1486 /* ARGSUSED */
1487 static enum rofferr
1488 roff_tr(ROFF_ARGS)
1489 {
1490 const char *p, *first, *second;
1491 size_t fsz, ssz;
1492 enum mandoc_esc esc;
1493
1494 p = *bufp + pos;
1495
1496 if ('\0' == *p) {
1497 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1498 return(ROFF_IGN);
1499 }
1500
1501 while ('\0' != *p) {
1502 fsz = ssz = 1;
1503
1504 first = p++;
1505 if ('\\' == *first) {
1506 esc = mandoc_escape(&p, NULL, NULL);
1507 if (ESCAPE_ERROR == esc) {
1508 mandoc_msg
1509 (MANDOCERR_BADESCAPE, r->parse,
1510 ln, (int)(p - *bufp), NULL);
1511 return(ROFF_IGN);
1512 }
1513 fsz = (size_t)(p - first);
1514 }
1515
1516 second = p++;
1517 if ('\\' == *second) {
1518 esc = mandoc_escape(&p, NULL, NULL);
1519 if (ESCAPE_ERROR == esc) {
1520 mandoc_msg
1521 (MANDOCERR_BADESCAPE, r->parse,
1522 ln, (int)(p - *bufp), NULL);
1523 return(ROFF_IGN);
1524 }
1525 ssz = (size_t)(p - second);
1526 } else if ('\0' == *second) {
1527 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
1528 ln, (int)(p - *bufp), NULL);
1529 second = " ";
1530 p--;
1531 }
1532
1533 if (fsz > 1) {
1534 roff_setstrn(&r->xmbtab, first,
1535 fsz, second, ssz, 0);
1536 continue;
1537 }
1538
1539 if (NULL == r->xtab)
1540 r->xtab = mandoc_calloc
1541 (128, sizeof(struct roffstr));
1542
1543 free(r->xtab[(int)*first].p);
1544 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
1545 r->xtab[(int)*first].sz = ssz;
1546 }
1547
1548 return(ROFF_IGN);
1549 }
1550
1551 /* ARGSUSED */
1552 static enum rofferr
1553 roff_so(ROFF_ARGS)
1554 {
1555 char *name;
1556
1557 mandoc_msg(MANDOCERR_SO, r->parse, ln, ppos, NULL);
1558
1559 /*
1560 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1561 * opening anything that's not in our cwd or anything beneath
1562 * it. Thus, explicitly disallow traversing up the file-system
1563 * or using absolute paths.
1564 */
1565
1566 name = *bufp + pos;
1567 if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1568 mandoc_msg(MANDOCERR_SOPATH, r->parse, ln, pos, NULL);
1569 return(ROFF_ERR);
1570 }
1571
1572 *offs = pos;
1573 return(ROFF_SO);
1574 }
1575
1576 /* ARGSUSED */
1577 static enum rofferr
1578 roff_userdef(ROFF_ARGS)
1579 {
1580 const char *arg[9];
1581 char *cp, *n1, *n2;
1582 int i;
1583
1584 /*
1585 * Collect pointers to macro argument strings
1586 * and null-terminate them.
1587 */
1588 cp = *bufp + pos;
1589 for (i = 0; i < 9; i++)
1590 arg[i] = '\0' == *cp ? "" :
1591 mandoc_getarg(r->parse, &cp, ln, &pos);
1592
1593 /*
1594 * Expand macro arguments.
1595 */
1596 *szp = 0;
1597 n1 = cp = mandoc_strdup(r->current_string);
1598 while (NULL != (cp = strstr(cp, "\\$"))) {
1599 i = cp[2] - '1';
1600 if (0 > i || 8 < i) {
1601 /* Not an argument invocation. */
1602 cp += 2;
1603 continue;
1604 }
1605
1606 *szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1607 n2 = mandoc_malloc(*szp);
1608
1609 strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1610 strlcat(n2, arg[i], *szp);
1611 strlcat(n2, cp + 3, *szp);
1612
1613 cp = n2 + (cp - n1);
1614 free(n1);
1615 n1 = n2;
1616 }
1617
1618 /*
1619 * Replace the macro invocation
1620 * by the expanded macro.
1621 */
1622 free(*bufp);
1623 *bufp = n1;
1624 if (0 == *szp)
1625 *szp = strlen(*bufp) + 1;
1626
1627 return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
1628 ROFF_REPARSE : ROFF_APPEND);
1629 }
1630
1631 static char *
1632 roff_getname(struct roff *r, char **cpp, int ln, int pos)
1633 {
1634 char *name, *cp;
1635
1636 name = *cpp;
1637 if ('\0' == *name)
1638 return(name);
1639
1640 /* Read until end of name. */
1641 for (cp = name; '\0' != *cp && ' ' != *cp; cp++) {
1642 if ('\\' != *cp)
1643 continue;
1644 cp++;
1645 if ('\\' == *cp)
1646 continue;
1647 mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL);
1648 *cp = '\0';
1649 name = cp;
1650 }
1651
1652 /* Nil-terminate name. */
1653 if ('\0' != *cp)
1654 *(cp++) = '\0';
1655
1656 /* Read past spaces. */
1657 while (' ' == *cp)
1658 cp++;
1659
1660 *cpp = cp;
1661 return(name);
1662 }
1663
1664 /*
1665 * Store *string into the user-defined string called *name.
1666 * In multiline mode, append to an existing entry and append '\n';
1667 * else replace the existing entry, if there is one.
1668 * To clear an existing entry, call with (*r, *name, NULL, 0).
1669 */
1670 static void
1671 roff_setstr(struct roff *r, const char *name, const char *string,
1672 int multiline)
1673 {
1674
1675 roff_setstrn(&r->strtab, name, strlen(name), string,
1676 string ? strlen(string) : 0, multiline);
1677 }
1678
1679 static void
1680 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
1681 const char *string, size_t stringsz, int multiline)
1682 {
1683 struct roffkv *n;
1684 char *c;
1685 int i;
1686 size_t oldch, newch;
1687
1688 /* Search for an existing string with the same name. */
1689 n = *r;
1690
1691 while (n && strcmp(name, n->key.p))
1692 n = n->next;
1693
1694 if (NULL == n) {
1695 /* Create a new string table entry. */
1696 n = mandoc_malloc(sizeof(struct roffkv));
1697 n->key.p = mandoc_strndup(name, namesz);
1698 n->key.sz = namesz;
1699 n->val.p = NULL;
1700 n->val.sz = 0;
1701 n->next = *r;
1702 *r = n;
1703 } else if (0 == multiline) {
1704 /* In multiline mode, append; else replace. */
1705 free(n->val.p);
1706 n->val.p = NULL;
1707 n->val.sz = 0;
1708 }
1709
1710 if (NULL == string)
1711 return;
1712
1713 /*
1714 * One additional byte for the '\n' in multiline mode,
1715 * and one for the terminating '\0'.
1716 */
1717 newch = stringsz + (multiline ? 2u : 1u);
1718
1719 if (NULL == n->val.p) {
1720 n->val.p = mandoc_malloc(newch);
1721 *n->val.p = '\0';
1722 oldch = 0;
1723 } else {
1724 oldch = n->val.sz;
1725 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
1726 }
1727
1728 /* Skip existing content in the destination buffer. */
1729 c = n->val.p + (int)oldch;
1730
1731 /* Append new content to the destination buffer. */
1732 i = 0;
1733 while (i < (int)stringsz) {
1734 /*
1735 * Rudimentary roff copy mode:
1736 * Handle escaped backslashes.
1737 */
1738 if ('\\' == string[i] && '\\' == string[i + 1])
1739 i++;
1740 *c++ = string[i++];
1741 }
1742
1743 /* Append terminating bytes. */
1744 if (multiline)
1745 *c++ = '\n';
1746
1747 *c = '\0';
1748 n->val.sz = (int)(c - n->val.p);
1749 }
1750
1751 static const char *
1752 roff_getstrn(const struct roff *r, const char *name, size_t len)
1753 {
1754 const struct roffkv *n;
1755
1756 for (n = r->strtab; n; n = n->next)
1757 if (0 == strncmp(name, n->key.p, len) &&
1758 '\0' == n->key.p[(int)len])
1759 return(n->val.p);
1760
1761 return(NULL);
1762 }
1763
1764 static void
1765 roff_freestr(struct roffkv *r)
1766 {
1767 struct roffkv *n, *nn;
1768
1769 for (n = r; n; n = nn) {
1770 free(n->key.p);
1771 free(n->val.p);
1772 nn = n->next;
1773 free(n);
1774 }
1775 }
1776
1777 const struct tbl_span *
1778 roff_span(const struct roff *r)
1779 {
1780
1781 return(r->tbl ? tbl_span(r->tbl) : NULL);
1782 }
1783
1784 const struct eqn *
1785 roff_eqn(const struct roff *r)
1786 {
1787
1788 return(r->last_eqn ? &r->last_eqn->eqn : NULL);
1789 }
1790
1791 /*
1792 * Duplicate an input string, making the appropriate character
1793 * conversations (as stipulated by `tr') along the way.
1794 * Returns a heap-allocated string with all the replacements made.
1795 */
1796 char *
1797 roff_strdup(const struct roff *r, const char *p)
1798 {
1799 const struct roffkv *cp;
1800 char *res;
1801 const char *pp;
1802 size_t ssz, sz;
1803 enum mandoc_esc esc;
1804
1805 if (NULL == r->xmbtab && NULL == r->xtab)
1806 return(mandoc_strdup(p));
1807 else if ('\0' == *p)
1808 return(mandoc_strdup(""));
1809
1810 /*
1811 * Step through each character looking for term matches
1812 * (remember that a `tr' can be invoked with an escape, which is
1813 * a glyph but the escape is multi-character).
1814 * We only do this if the character hash has been initialised
1815 * and the string is >0 length.
1816 */
1817
1818 res = NULL;
1819 ssz = 0;
1820
1821 while ('\0' != *p) {
1822 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
1823 sz = r->xtab[(int)*p].sz;
1824 res = mandoc_realloc(res, ssz + sz + 1);
1825 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
1826 ssz += sz;
1827 p++;
1828 continue;
1829 } else if ('\\' != *p) {
1830 res = mandoc_realloc(res, ssz + 2);
1831 res[ssz++] = *p++;
1832 continue;
1833 }
1834
1835 /* Search for term matches. */
1836 for (cp = r->xmbtab; cp; cp = cp->next)
1837 if (0 == strncmp(p, cp->key.p, cp->key.sz))
1838 break;
1839
1840 if (NULL != cp) {
1841 /*
1842 * A match has been found.
1843 * Append the match to the array and move
1844 * forward by its keysize.
1845 */
1846 res = mandoc_realloc
1847 (res, ssz + cp->val.sz + 1);
1848 memcpy(res + ssz, cp->val.p, cp->val.sz);
1849 ssz += cp->val.sz;
1850 p += (int)cp->key.sz;
1851 continue;
1852 }
1853
1854 /*
1855 * Handle escapes carefully: we need to copy
1856 * over just the escape itself, or else we might
1857 * do replacements within the escape itself.
1858 * Make sure to pass along the bogus string.
1859 */
1860 pp = p++;
1861 esc = mandoc_escape(&p, NULL, NULL);
1862 if (ESCAPE_ERROR == esc) {
1863 sz = strlen(pp);
1864 res = mandoc_realloc(res, ssz + sz + 1);
1865 memcpy(res + ssz, pp, sz);
1866 break;
1867 }
1868 /*
1869 * We bail out on bad escapes.
1870 * No need to warn: we already did so when
1871 * roff_res() was called.
1872 */
1873 sz = (int)(p - pp);
1874 res = mandoc_realloc(res, ssz + sz + 1);
1875 memcpy(res + ssz, pp, sz);
1876 ssz += sz;
1877 }
1878
1879 res[(int)ssz] = '\0';
1880 return(res);
1881 }
1882
1883 /*
1884 * Find out whether a line is a macro line or not.
1885 * If it is, adjust the current position and return one; if it isn't,
1886 * return zero and don't change the current position.
1887 * If the control character has been set with `.cc', then let that grain
1888 * precedence.
1889 * This is slighly contrary to groff, where using the non-breaking
1890 * control character when `cc' has been invoked will cause the
1891 * non-breaking macro contents to be printed verbatim.
1892 */
1893 int
1894 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
1895 {
1896 int pos;
1897
1898 pos = *ppos;
1899
1900 if (0 != r->control && cp[pos] == r->control)
1901 pos++;
1902 else if (0 != r->control)
1903 return(0);
1904 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
1905 pos += 2;
1906 else if ('.' == cp[pos] || '\'' == cp[pos])
1907 pos++;
1908 else
1909 return(0);
1910
1911 while (' ' == cp[pos] || '\t' == cp[pos])
1912 pos++;
1913
1914 *ppos = pos;
1915 return(1);
1916 }