]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
7c46e9a593a96f9613227097720500374f585a3a
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.200 2014/03/20 02:57:28 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 #include "mandoc.h"
29 #include "libroff.h"
30 #include "libmandoc.h"
31
32 /* Maximum number of nested if-else conditionals. */
33 #define RSTACK_MAX 128
34
35 /* Maximum number of string expansions per line, to break infinite loops. */
36 #define EXPAND_LIMIT 1000
37
38 enum rofft {
39 ROFF_ad,
40 ROFF_am,
41 ROFF_ami,
42 ROFF_am1,
43 ROFF_as,
44 ROFF_cc,
45 ROFF_ce,
46 ROFF_de,
47 ROFF_dei,
48 ROFF_de1,
49 ROFF_ds,
50 ROFF_el,
51 ROFF_fam,
52 ROFF_hw,
53 ROFF_hy,
54 ROFF_ie,
55 ROFF_if,
56 ROFF_ig,
57 ROFF_it,
58 ROFF_ne,
59 ROFF_nh,
60 ROFF_nr,
61 ROFF_ns,
62 ROFF_ps,
63 ROFF_rm,
64 ROFF_so,
65 ROFF_ta,
66 ROFF_tr,
67 ROFF_Dd,
68 ROFF_TH,
69 ROFF_TS,
70 ROFF_TE,
71 ROFF_T_,
72 ROFF_EQ,
73 ROFF_EN,
74 ROFF_cblock,
75 ROFF_USERDEF,
76 ROFF_MAX
77 };
78
79 /*
80 * An incredibly-simple string buffer.
81 */
82 struct roffstr {
83 char *p; /* nil-terminated buffer */
84 size_t sz; /* saved strlen(p) */
85 };
86
87 /*
88 * A key-value roffstr pair as part of a singly-linked list.
89 */
90 struct roffkv {
91 struct roffstr key;
92 struct roffstr val;
93 struct roffkv *next; /* next in list */
94 };
95
96 /*
97 * A single number register as part of a singly-linked list.
98 */
99 struct roffreg {
100 struct roffstr key;
101 int val;
102 struct roffreg *next;
103 };
104
105 struct roff {
106 struct mparse *parse; /* parse point */
107 int options; /* parse options */
108 struct roffnode *last; /* leaf of stack */
109 int rstack[RSTACK_MAX]; /* stack of !`ie' rules */
110 char control; /* control character */
111 int rstackpos; /* position in rstack */
112 struct roffreg *regtab; /* number registers */
113 struct roffkv *strtab; /* user-defined strings & macros */
114 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
115 struct roffstr *xtab; /* single-byte trans table (`tr') */
116 const char *current_string; /* value of last called user macro */
117 struct tbl_node *first_tbl; /* first table parsed */
118 struct tbl_node *last_tbl; /* last table parsed */
119 struct tbl_node *tbl; /* current table being parsed */
120 struct eqn_node *last_eqn; /* last equation parsed */
121 struct eqn_node *first_eqn; /* first equation parsed */
122 struct eqn_node *eqn; /* current equation being parsed */
123 };
124
125 struct roffnode {
126 enum rofft tok; /* type of node */
127 struct roffnode *parent; /* up one in stack */
128 int line; /* parse line */
129 int col; /* parse col */
130 char *name; /* node name, e.g. macro name */
131 char *end; /* end-rules: custom token */
132 int endspan; /* end-rules: next-line or infty */
133 int rule; /* current evaluation rule */
134 };
135
136 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
137 enum rofft tok, /* tok of macro */ \
138 char **bufp, /* input buffer */ \
139 size_t *szp, /* size of input buffer */ \
140 int ln, /* parse line */ \
141 int ppos, /* original pos in buffer */ \
142 int pos, /* current pos in buffer */ \
143 int *offs /* reset offset of buffer data */
144
145 typedef enum rofferr (*roffproc)(ROFF_ARGS);
146
147 struct roffmac {
148 const char *name; /* macro name */
149 roffproc proc; /* process new macro */
150 roffproc text; /* process as child text of macro */
151 roffproc sub; /* process as child of macro */
152 int flags;
153 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
154 struct roffmac *next;
155 };
156
157 struct predef {
158 const char *name; /* predefined input name */
159 const char *str; /* replacement symbol */
160 };
161
162 #define PREDEF(__name, __str) \
163 { (__name), (__str) },
164
165 static enum rofft roffhash_find(const char *, size_t);
166 static void roffhash_init(void);
167 static void roffnode_cleanscope(struct roff *);
168 static void roffnode_pop(struct roff *);
169 static void roffnode_push(struct roff *, enum rofft,
170 const char *, int, int);
171 static enum rofferr roff_block(ROFF_ARGS);
172 static enum rofferr roff_block_text(ROFF_ARGS);
173 static enum rofferr roff_block_sub(ROFF_ARGS);
174 static enum rofferr roff_cblock(ROFF_ARGS);
175 static enum rofferr roff_cc(ROFF_ARGS);
176 static void roff_ccond(struct roff *, int, int);
177 static enum rofferr roff_cond(ROFF_ARGS);
178 static enum rofferr roff_cond_text(ROFF_ARGS);
179 static enum rofferr roff_cond_sub(ROFF_ARGS);
180 static enum rofferr roff_ds(ROFF_ARGS);
181 static int roff_evalcond(const char *, int *);
182 static int roff_evalstrcond(const char *, int *);
183 static void roff_free1(struct roff *);
184 static void roff_freereg(struct roffreg *);
185 static void roff_freestr(struct roffkv *);
186 static char *roff_getname(struct roff *, char **, int, int);
187 static int roff_getnum(const char *, int *, int *);
188 static int roff_getop(const char *, int *, char *);
189 static int roff_getregn(const struct roff *,
190 const char *, size_t);
191 static int roff_getregro(const char *name);
192 static const char *roff_getstrn(const struct roff *,
193 const char *, size_t);
194 static enum rofferr roff_it(ROFF_ARGS);
195 static enum rofferr roff_line_ignore(ROFF_ARGS);
196 static enum rofferr roff_nr(ROFF_ARGS);
197 static void roff_openeqn(struct roff *, const char *,
198 int, int, const char *);
199 static enum rofft roff_parse(struct roff *, const char *, int *);
200 static enum rofferr roff_parsetext(char **, size_t *, int, int *);
201 static enum rofferr roff_res(struct roff *,
202 char **, size_t *, int, int);
203 static enum rofferr roff_rm(ROFF_ARGS);
204 static void roff_setstr(struct roff *,
205 const char *, const char *, int);
206 static void roff_setstrn(struct roffkv **, const char *,
207 size_t, const char *, size_t, int);
208 static enum rofferr roff_so(ROFF_ARGS);
209 static enum rofferr roff_tr(ROFF_ARGS);
210 static enum rofferr roff_Dd(ROFF_ARGS);
211 static enum rofferr roff_TH(ROFF_ARGS);
212 static enum rofferr roff_TE(ROFF_ARGS);
213 static enum rofferr roff_TS(ROFF_ARGS);
214 static enum rofferr roff_EQ(ROFF_ARGS);
215 static enum rofferr roff_EN(ROFF_ARGS);
216 static enum rofferr roff_T_(ROFF_ARGS);
217 static enum rofferr roff_userdef(ROFF_ARGS);
218
219 /* See roffhash_find() */
220
221 #define ASCII_HI 126
222 #define ASCII_LO 33
223 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
224
225 static struct roffmac *hash[HASHWIDTH];
226
227 static struct roffmac roffs[ROFF_MAX] = {
228 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
229 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
230 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
231 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
232 { "as", roff_ds, NULL, NULL, 0, NULL },
233 { "cc", roff_cc, NULL, NULL, 0, NULL },
234 { "ce", roff_line_ignore, NULL, NULL, 0, NULL },
235 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
236 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
237 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
238 { "ds", roff_ds, NULL, NULL, 0, NULL },
239 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
240 { "fam", roff_line_ignore, NULL, NULL, 0, NULL },
241 { "hw", roff_line_ignore, NULL, NULL, 0, NULL },
242 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
243 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
244 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
245 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
246 { "it", roff_it, NULL, NULL, 0, NULL },
247 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
248 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
249 { "nr", roff_nr, NULL, NULL, 0, NULL },
250 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
251 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
252 { "rm", roff_rm, NULL, NULL, 0, NULL },
253 { "so", roff_so, NULL, NULL, 0, NULL },
254 { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
255 { "tr", roff_tr, NULL, NULL, 0, NULL },
256 { "Dd", roff_Dd, NULL, NULL, 0, NULL },
257 { "TH", roff_TH, NULL, NULL, 0, NULL },
258 { "TS", roff_TS, NULL, NULL, 0, NULL },
259 { "TE", roff_TE, NULL, NULL, 0, NULL },
260 { "T&", roff_T_, NULL, NULL, 0, NULL },
261 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
262 { "EN", roff_EN, NULL, NULL, 0, NULL },
263 { ".", roff_cblock, NULL, NULL, 0, NULL },
264 { NULL, roff_userdef, NULL, NULL, 0, NULL },
265 };
266
267 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
268 const char *const __mdoc_reserved[] = {
269 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
270 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
271 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
272 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
273 "Dt", "Dv", "Dx", "D1",
274 "Ec", "Ed", "Ef", "Ek", "El", "Em",
275 "En", "Eo", "Er", "Es", "Ev", "Ex",
276 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
277 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
278 "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
279 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
280 "Pa", "Pc", "Pf", "Po", "Pp", "Pq",
281 "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
282 "Sc", "Sh", "Sm", "So", "Sq",
283 "Ss", "St", "Sx", "Sy",
284 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
285 "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
286 "%P", "%Q", "%R", "%T", "%U", "%V",
287 NULL
288 };
289
290 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
291 const char *const __man_reserved[] = {
292 "AT", "B", "BI", "BR", "DT",
293 "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
294 "LP", "OP", "P", "PD", "PP",
295 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
296 "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
297 NULL
298 };
299
300 /* Array of injected predefined strings. */
301 #define PREDEFS_MAX 38
302 static const struct predef predefs[PREDEFS_MAX] = {
303 #include "predefs.in"
304 };
305
306 /* See roffhash_find() */
307 #define ROFF_HASH(p) (p[0] - ASCII_LO)
308
309 static int roffit_lines; /* number of lines to delay */
310 static char *roffit_macro; /* nil-terminated macro line */
311
312 static void
313 roffhash_init(void)
314 {
315 struct roffmac *n;
316 int buc, i;
317
318 for (i = 0; i < (int)ROFF_USERDEF; i++) {
319 assert(roffs[i].name[0] >= ASCII_LO);
320 assert(roffs[i].name[0] <= ASCII_HI);
321
322 buc = ROFF_HASH(roffs[i].name);
323
324 if (NULL != (n = hash[buc])) {
325 for ( ; n->next; n = n->next)
326 /* Do nothing. */ ;
327 n->next = &roffs[i];
328 } else
329 hash[buc] = &roffs[i];
330 }
331 }
332
333 /*
334 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
335 * the nil-terminated string name could be found.
336 */
337 static enum rofft
338 roffhash_find(const char *p, size_t s)
339 {
340 int buc;
341 struct roffmac *n;
342
343 /*
344 * libroff has an extremely simple hashtable, for the time
345 * being, which simply keys on the first character, which must
346 * be printable, then walks a chain. It works well enough until
347 * optimised.
348 */
349
350 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
351 return(ROFF_MAX);
352
353 buc = ROFF_HASH(p);
354
355 if (NULL == (n = hash[buc]))
356 return(ROFF_MAX);
357 for ( ; n; n = n->next)
358 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
359 return((enum rofft)(n - roffs));
360
361 return(ROFF_MAX);
362 }
363
364
365 /*
366 * Pop the current node off of the stack of roff instructions currently
367 * pending.
368 */
369 static void
370 roffnode_pop(struct roff *r)
371 {
372 struct roffnode *p;
373
374 assert(r->last);
375 p = r->last;
376
377 r->last = r->last->parent;
378 free(p->name);
379 free(p->end);
380 free(p);
381 }
382
383
384 /*
385 * Push a roff node onto the instruction stack. This must later be
386 * removed with roffnode_pop().
387 */
388 static void
389 roffnode_push(struct roff *r, enum rofft tok, const char *name,
390 int line, int col)
391 {
392 struct roffnode *p;
393
394 p = mandoc_calloc(1, sizeof(struct roffnode));
395 p->tok = tok;
396 if (name)
397 p->name = mandoc_strdup(name);
398 p->parent = r->last;
399 p->line = line;
400 p->col = col;
401 p->rule = p->parent ? p->parent->rule : 0;
402
403 r->last = p;
404 }
405
406
407 static void
408 roff_free1(struct roff *r)
409 {
410 struct tbl_node *tbl;
411 struct eqn_node *e;
412 int i;
413
414 while (NULL != (tbl = r->first_tbl)) {
415 r->first_tbl = tbl->next;
416 tbl_free(tbl);
417 }
418
419 r->first_tbl = r->last_tbl = r->tbl = NULL;
420
421 while (NULL != (e = r->first_eqn)) {
422 r->first_eqn = e->next;
423 eqn_free(e);
424 }
425
426 r->first_eqn = r->last_eqn = r->eqn = NULL;
427
428 while (r->last)
429 roffnode_pop(r);
430
431 roff_freestr(r->strtab);
432 roff_freestr(r->xmbtab);
433
434 r->strtab = r->xmbtab = NULL;
435
436 roff_freereg(r->regtab);
437
438 r->regtab = NULL;
439
440 if (r->xtab)
441 for (i = 0; i < 128; i++)
442 free(r->xtab[i].p);
443
444 free(r->xtab);
445 r->xtab = NULL;
446 }
447
448 void
449 roff_reset(struct roff *r)
450 {
451
452 roff_free1(r);
453 r->control = 0;
454 }
455
456
457 void
458 roff_free(struct roff *r)
459 {
460
461 roff_free1(r);
462 free(r);
463 }
464
465
466 struct roff *
467 roff_alloc(struct mparse *parse, int options)
468 {
469 struct roff *r;
470
471 r = mandoc_calloc(1, sizeof(struct roff));
472 r->parse = parse;
473 r->options = options;
474 r->rstackpos = -1;
475
476 roffhash_init();
477
478 return(r);
479 }
480
481 /*
482 * In the current line, expand user-defined strings ("\*")
483 * and references to number registers ("\n").
484 * Also check the syntax of other escape sequences.
485 */
486 static enum rofferr
487 roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
488 {
489 char ubuf[12]; /* buffer to print the number */
490 const char *stesc; /* start of an escape sequence ('\\') */
491 const char *stnam; /* start of the name, after "[(*" */
492 const char *cp; /* end of the name, e.g. before ']' */
493 const char *res; /* the string to be substituted */
494 char *nbuf; /* new buffer to copy bufp to */
495 size_t nsz; /* size of the new buffer */
496 size_t maxl; /* expected length of the escape name */
497 size_t naml; /* actual length of the escape name */
498 int expand_count; /* to avoid infinite loops */
499
500 expand_count = 0;
501
502 again:
503 cp = *bufp + pos;
504 while (NULL != (cp = strchr(cp, '\\'))) {
505 stesc = cp++;
506
507 /*
508 * The second character must be an asterisk or an n.
509 * If it isn't, skip it anyway: It is escaped,
510 * so it can't start another escape sequence.
511 */
512
513 if ('\0' == *cp)
514 return(ROFF_CONT);
515
516 switch (*cp) {
517 case ('*'):
518 res = NULL;
519 break;
520 case ('n'):
521 res = ubuf;
522 break;
523 default:
524 if (ESCAPE_ERROR != mandoc_escape(&cp, NULL, NULL))
525 continue;
526 mandoc_msg
527 (MANDOCERR_BADESCAPE, r->parse,
528 ln, (int)(stesc - *bufp), NULL);
529 return(ROFF_CONT);
530 }
531
532 cp++;
533
534 /*
535 * The third character decides the length
536 * of the name of the string or register.
537 * Save a pointer to the name.
538 */
539
540 switch (*cp) {
541 case ('\0'):
542 return(ROFF_CONT);
543 case ('('):
544 cp++;
545 maxl = 2;
546 break;
547 case ('['):
548 cp++;
549 maxl = 0;
550 break;
551 default:
552 maxl = 1;
553 break;
554 }
555 stnam = cp;
556
557 /* Advance to the end of the name. */
558
559 for (naml = 0; 0 == maxl || naml < maxl; naml++, cp++) {
560 if ('\0' == *cp) {
561 mandoc_msg
562 (MANDOCERR_BADESCAPE,
563 r->parse, ln,
564 (int)(stesc - *bufp), NULL);
565 return(ROFF_CONT);
566 }
567 if (0 == maxl && ']' == *cp)
568 break;
569 }
570
571 /*
572 * Retrieve the replacement string; if it is
573 * undefined, resume searching for escapes.
574 */
575
576 if (NULL == res)
577 res = roff_getstrn(r, stnam, naml);
578 else
579 snprintf(ubuf, sizeof(ubuf), "%d",
580 roff_getregn(r, stnam, naml));
581
582 if (NULL == res) {
583 mandoc_msg
584 (MANDOCERR_BADESCAPE, r->parse,
585 ln, (int)(stesc - *bufp), NULL);
586 res = "";
587 }
588
589 /* Replace the escape sequence by the string. */
590
591 pos = stesc - *bufp;
592
593 nsz = *szp + strlen(res) + 1;
594 nbuf = mandoc_malloc(nsz);
595
596 strlcpy(nbuf, *bufp, (size_t)(stesc - *bufp + 1));
597 strlcat(nbuf, res, nsz);
598 strlcat(nbuf, cp + (maxl ? 0 : 1), nsz);
599
600 free(*bufp);
601
602 *bufp = nbuf;
603 *szp = nsz;
604
605 if (EXPAND_LIMIT >= ++expand_count)
606 goto again;
607
608 /* Just leave the string unexpanded. */
609 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, ln, pos, NULL);
610 return(ROFF_IGN);
611 }
612 return(ROFF_CONT);
613 }
614
615 /*
616 * Process text streams:
617 * Convert all breakable hyphens into ASCII_HYPH.
618 * Decrement and spring input line trap.
619 */
620 static enum rofferr
621 roff_parsetext(char **bufp, size_t *szp, int pos, int *offs)
622 {
623 size_t sz;
624 const char *start;
625 char *p;
626 int isz;
627 enum mandoc_esc esc;
628
629 start = p = *bufp + pos;
630
631 while ('\0' != *p) {
632 sz = strcspn(p, "-\\");
633 p += sz;
634
635 if ('\0' == *p)
636 break;
637
638 if ('\\' == *p) {
639 /* Skip over escapes. */
640 p++;
641 esc = mandoc_escape((const char **)&p, NULL, NULL);
642 if (ESCAPE_ERROR == esc)
643 break;
644 continue;
645 } else if (p == start) {
646 p++;
647 continue;
648 }
649
650 if (isalpha((unsigned char)p[-1]) &&
651 isalpha((unsigned char)p[1]))
652 *p = ASCII_HYPH;
653 p++;
654 }
655
656 /* Spring the input line trap. */
657 if (1 == roffit_lines) {
658 isz = asprintf(&p, "%s\n.%s", *bufp, roffit_macro);
659 if (-1 == isz) {
660 perror(NULL);
661 exit((int)MANDOCLEVEL_SYSERR);
662 }
663 free(*bufp);
664 *bufp = p;
665 *szp = isz + 1;
666 *offs = 0;
667 free(roffit_macro);
668 roffit_lines = 0;
669 return(ROFF_REPARSE);
670 } else if (1 < roffit_lines)
671 --roffit_lines;
672 return(ROFF_CONT);
673 }
674
675 enum rofferr
676 roff_parseln(struct roff *r, int ln, char **bufp,
677 size_t *szp, int pos, int *offs)
678 {
679 enum rofft t;
680 enum rofferr e;
681 int ppos, ctl;
682
683 /*
684 * Run the reserved-word filter only if we have some reserved
685 * words to fill in.
686 */
687
688 e = roff_res(r, bufp, szp, ln, pos);
689 if (ROFF_IGN == e)
690 return(e);
691 assert(ROFF_CONT == e);
692
693 ppos = pos;
694 ctl = roff_getcontrol(r, *bufp, &pos);
695
696 /*
697 * First, if a scope is open and we're not a macro, pass the
698 * text through the macro's filter. If a scope isn't open and
699 * we're not a macro, just let it through.
700 * Finally, if there's an equation scope open, divert it into it
701 * no matter our state.
702 */
703
704 if (r->last && ! ctl) {
705 t = r->last->tok;
706 assert(roffs[t].text);
707 e = (*roffs[t].text)
708 (r, t, bufp, szp, ln, pos, pos, offs);
709 assert(ROFF_IGN == e || ROFF_CONT == e);
710 if (ROFF_CONT != e)
711 return(e);
712 }
713 if (r->eqn)
714 return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));
715 if ( ! ctl) {
716 if (r->tbl)
717 return(tbl_read(r->tbl, ln, *bufp, pos));
718 return(roff_parsetext(bufp, szp, pos, offs));
719 }
720
721 /*
722 * If a scope is open, go to the child handler for that macro,
723 * as it may want to preprocess before doing anything with it.
724 * Don't do so if an equation is open.
725 */
726
727 if (r->last) {
728 t = r->last->tok;
729 assert(roffs[t].sub);
730 return((*roffs[t].sub)
731 (r, t, bufp, szp,
732 ln, ppos, pos, offs));
733 }
734
735 /*
736 * Lastly, as we've no scope open, try to look up and execute
737 * the new macro. If no macro is found, simply return and let
738 * the compilers handle it.
739 */
740
741 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
742 return(ROFF_CONT);
743
744 assert(roffs[t].proc);
745 return((*roffs[t].proc)
746 (r, t, bufp, szp,
747 ln, ppos, pos, offs));
748 }
749
750
751 void
752 roff_endparse(struct roff *r)
753 {
754
755 if (r->last)
756 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
757 r->last->line, r->last->col, NULL);
758
759 if (r->eqn) {
760 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
761 r->eqn->eqn.ln, r->eqn->eqn.pos, NULL);
762 eqn_end(&r->eqn);
763 }
764
765 if (r->tbl) {
766 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
767 r->tbl->line, r->tbl->pos, NULL);
768 tbl_end(&r->tbl);
769 }
770 }
771
772 /*
773 * Parse a roff node's type from the input buffer. This must be in the
774 * form of ".foo xxx" in the usual way.
775 */
776 static enum rofft
777 roff_parse(struct roff *r, const char *buf, int *pos)
778 {
779 const char *mac;
780 size_t maclen;
781 enum rofft t;
782
783 if ('\0' == buf[*pos] || '"' == buf[*pos] ||
784 '\t' == buf[*pos] || ' ' == buf[*pos])
785 return(ROFF_MAX);
786
787 /* We stop the macro parse at an escape, tab, space, or nil. */
788
789 mac = buf + *pos;
790 maclen = strcspn(mac, " \\\t\0");
791
792 t = (r->current_string = roff_getstrn(r, mac, maclen))
793 ? ROFF_USERDEF : roffhash_find(mac, maclen);
794
795 *pos += (int)maclen;
796
797 while (buf[*pos] && ' ' == buf[*pos])
798 (*pos)++;
799
800 return(t);
801 }
802
803 /* ARGSUSED */
804 static enum rofferr
805 roff_cblock(ROFF_ARGS)
806 {
807
808 /*
809 * A block-close `..' should only be invoked as a child of an
810 * ignore macro, otherwise raise a warning and just ignore it.
811 */
812
813 if (NULL == r->last) {
814 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
815 return(ROFF_IGN);
816 }
817
818 switch (r->last->tok) {
819 case (ROFF_am):
820 /* FALLTHROUGH */
821 case (ROFF_ami):
822 /* FALLTHROUGH */
823 case (ROFF_am1):
824 /* FALLTHROUGH */
825 case (ROFF_de):
826 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
827 /* FALLTHROUGH */
828 case (ROFF_dei):
829 /* FALLTHROUGH */
830 case (ROFF_ig):
831 break;
832 default:
833 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
834 return(ROFF_IGN);
835 }
836
837 if ((*bufp)[pos])
838 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
839
840 roffnode_pop(r);
841 roffnode_cleanscope(r);
842 return(ROFF_IGN);
843
844 }
845
846
847 static void
848 roffnode_cleanscope(struct roff *r)
849 {
850
851 while (r->last) {
852 if (--r->last->endspan != 0)
853 break;
854 roffnode_pop(r);
855 }
856 }
857
858
859 static void
860 roff_ccond(struct roff *r, int ln, int ppos)
861 {
862
863 if (NULL == r->last) {
864 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
865 return;
866 }
867
868 switch (r->last->tok) {
869 case (ROFF_el):
870 /* FALLTHROUGH */
871 case (ROFF_ie):
872 /* FALLTHROUGH */
873 case (ROFF_if):
874 break;
875 default:
876 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
877 return;
878 }
879
880 if (r->last->endspan > -1) {
881 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
882 return;
883 }
884
885 roffnode_pop(r);
886 roffnode_cleanscope(r);
887 return;
888 }
889
890
891 /* ARGSUSED */
892 static enum rofferr
893 roff_block(ROFF_ARGS)
894 {
895 int sv;
896 size_t sz;
897 char *name;
898
899 name = NULL;
900
901 if (ROFF_ig != tok) {
902 if ('\0' == (*bufp)[pos]) {
903 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
904 return(ROFF_IGN);
905 }
906
907 /*
908 * Re-write `de1', since we don't really care about
909 * groff's strange compatibility mode, into `de'.
910 */
911
912 if (ROFF_de1 == tok)
913 tok = ROFF_de;
914 if (ROFF_de == tok)
915 name = *bufp + pos;
916 else
917 mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos,
918 roffs[tok].name);
919
920 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
921 pos++;
922
923 while (isspace((unsigned char)(*bufp)[pos]))
924 (*bufp)[pos++] = '\0';
925 }
926
927 roffnode_push(r, tok, name, ln, ppos);
928
929 /*
930 * At the beginning of a `de' macro, clear the existing string
931 * with the same name, if there is one. New content will be
932 * appended from roff_block_text() in multiline mode.
933 */
934
935 if (ROFF_de == tok)
936 roff_setstr(r, name, "", 0);
937
938 if ('\0' == (*bufp)[pos])
939 return(ROFF_IGN);
940
941 /* If present, process the custom end-of-line marker. */
942
943 sv = pos;
944 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
945 pos++;
946
947 /*
948 * Note: groff does NOT like escape characters in the input.
949 * Instead of detecting this, we're just going to let it fly and
950 * to hell with it.
951 */
952
953 assert(pos > sv);
954 sz = (size_t)(pos - sv);
955
956 if (1 == sz && '.' == (*bufp)[sv])
957 return(ROFF_IGN);
958
959 r->last->end = mandoc_malloc(sz + 1);
960
961 memcpy(r->last->end, *bufp + sv, sz);
962 r->last->end[(int)sz] = '\0';
963
964 if ((*bufp)[pos])
965 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
966
967 return(ROFF_IGN);
968 }
969
970
971 /* ARGSUSED */
972 static enum rofferr
973 roff_block_sub(ROFF_ARGS)
974 {
975 enum rofft t;
976 int i, j;
977
978 /*
979 * First check whether a custom macro exists at this level. If
980 * it does, then check against it. This is some of groff's
981 * stranger behaviours. If we encountered a custom end-scope
982 * tag and that tag also happens to be a "real" macro, then we
983 * need to try interpreting it again as a real macro. If it's
984 * not, then return ignore. Else continue.
985 */
986
987 if (r->last->end) {
988 for (i = pos, j = 0; r->last->end[j]; j++, i++)
989 if ((*bufp)[i] != r->last->end[j])
990 break;
991
992 if ('\0' == r->last->end[j] &&
993 ('\0' == (*bufp)[i] ||
994 ' ' == (*bufp)[i] ||
995 '\t' == (*bufp)[i])) {
996 roffnode_pop(r);
997 roffnode_cleanscope(r);
998
999 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
1000 i++;
1001
1002 pos = i;
1003 if (ROFF_MAX != roff_parse(r, *bufp, &pos))
1004 return(ROFF_RERUN);
1005 return(ROFF_IGN);
1006 }
1007 }
1008
1009 /*
1010 * If we have no custom end-query or lookup failed, then try
1011 * pulling it out of the hashtable.
1012 */
1013
1014 t = roff_parse(r, *bufp, &pos);
1015
1016 /*
1017 * Macros other than block-end are only significant
1018 * in `de' blocks; elsewhere, simply throw them away.
1019 */
1020 if (ROFF_cblock != t) {
1021 if (ROFF_de == tok)
1022 roff_setstr(r, r->last->name, *bufp + ppos, 2);
1023 return(ROFF_IGN);
1024 }
1025
1026 assert(roffs[t].proc);
1027 return((*roffs[t].proc)(r, t, bufp, szp,
1028 ln, ppos, pos, offs));
1029 }
1030
1031
1032 /* ARGSUSED */
1033 static enum rofferr
1034 roff_block_text(ROFF_ARGS)
1035 {
1036
1037 if (ROFF_de == tok)
1038 roff_setstr(r, r->last->name, *bufp + pos, 2);
1039
1040 return(ROFF_IGN);
1041 }
1042
1043
1044 /* ARGSUSED */
1045 static enum rofferr
1046 roff_cond_sub(ROFF_ARGS)
1047 {
1048 enum rofft t;
1049 char *ep;
1050 int rr;
1051
1052 rr = r->last->rule;
1053 roffnode_cleanscope(r);
1054 t = roff_parse(r, *bufp, &pos);
1055
1056 /*
1057 * Fully handle known macros when they are structurally
1058 * required or when the conditional evaluated to true.
1059 */
1060
1061 if ((ROFF_MAX != t) &&
1062 (rr || ROFFMAC_STRUCT & roffs[t].flags)) {
1063 assert(roffs[t].proc);
1064 return((*roffs[t].proc)(r, t, bufp, szp,
1065 ln, ppos, pos, offs));
1066 }
1067
1068 /*
1069 * If `\}' occurs on a macro line without a preceding macro,
1070 * drop the line completely.
1071 */
1072
1073 ep = *bufp + pos;
1074 if ('\\' == ep[0] && '}' == ep[1])
1075 rr = 0;
1076
1077 /* Always check for the closing delimiter `\}'. */
1078
1079 while (NULL != (ep = strchr(ep, '\\'))) {
1080 if ('}' == *(++ep)) {
1081 *ep = '&';
1082 roff_ccond(r, ln, ep - *bufp - 1);
1083 }
1084 ++ep;
1085 }
1086 return(rr ? ROFF_CONT : ROFF_IGN);
1087 }
1088
1089 /* ARGSUSED */
1090 static enum rofferr
1091 roff_cond_text(ROFF_ARGS)
1092 {
1093 char *ep;
1094 int rr;
1095
1096 rr = r->last->rule;
1097 roffnode_cleanscope(r);
1098
1099 ep = *bufp + pos;
1100 while (NULL != (ep = strchr(ep, '\\'))) {
1101 if ('}' == *(++ep)) {
1102 *ep = '&';
1103 roff_ccond(r, ln, ep - *bufp - 1);
1104 }
1105 ++ep;
1106 }
1107 return(rr ? ROFF_CONT : ROFF_IGN);
1108 }
1109
1110 static int
1111 roff_getnum(const char *v, int *pos, int *res)
1112 {
1113 int p, n;
1114
1115 p = *pos;
1116 n = v[p] == '-';
1117 if (n)
1118 p++;
1119
1120 for (*res = 0; isdigit((unsigned char)v[p]); p++)
1121 *res += 10 * *res + v[p] - '0';
1122 if (p == *pos + n)
1123 return 0;
1124
1125 if (n)
1126 *res = -*res;
1127
1128 *pos = p;
1129 return 1;
1130 }
1131
1132 static int
1133 roff_getop(const char *v, int *pos, char *res)
1134 {
1135 int e;
1136
1137 *res = v[*pos];
1138 e = v[*pos + 1] == '=';
1139
1140 switch (*res) {
1141 case '=':
1142 break;
1143 case '>':
1144 if (e)
1145 *res = 'g';
1146 break;
1147 case '<':
1148 if (e)
1149 *res = 'l';
1150 break;
1151 default:
1152 return(0);
1153 }
1154
1155 *pos += 1 + e;
1156
1157 return(*res);
1158 }
1159
1160 /*
1161 * Evaluate a string comparison condition.
1162 * The first character is the delimiter.
1163 * Succeed if the string up to its second occurrence
1164 * matches the string up to its third occurence.
1165 * Advance the cursor after the third occurrence
1166 * or lacking that, to the end of the line.
1167 */
1168 static int
1169 roff_evalstrcond(const char *v, int *pos)
1170 {
1171 const char *s1, *s2, *s3;
1172 int match;
1173
1174 match = 0;
1175 s1 = v + *pos; /* initial delimiter */
1176 s2 = s1 + 1; /* for scanning the first string */
1177 s3 = strchr(s2, *s1); /* for scanning the second string */
1178
1179 if (NULL == s3) /* found no middle delimiter */
1180 goto out;
1181
1182 while ('\0' != *++s3) {
1183 if (*s2 != *s3) { /* mismatch */
1184 s3 = strchr(s3, *s1);
1185 break;
1186 }
1187 if (*s3 == *s1) { /* found the final delimiter */
1188 match = 1;
1189 break;
1190 }
1191 s2++;
1192 }
1193
1194 out:
1195 if (NULL == s3)
1196 s3 = strchr(s2, '\0');
1197 else
1198 s3++;
1199 *pos = s3 - v;
1200 return(match);
1201 }
1202
1203 static int
1204 roff_evalcond(const char *v, int *pos)
1205 {
1206 int wanttrue, lh, rh;
1207 char op;
1208
1209 if ('!' == v[*pos]) {
1210 wanttrue = 0;
1211 (*pos)++;
1212 } else
1213 wanttrue = 1;
1214
1215 switch (v[*pos]) {
1216 case ('n'):
1217 /* FALLTHROUGH */
1218 case ('o'):
1219 (*pos)++;
1220 return(wanttrue);
1221 case ('c'):
1222 /* FALLTHROUGH */
1223 case ('d'):
1224 /* FALLTHROUGH */
1225 case ('e'):
1226 /* FALLTHROUGH */
1227 case ('r'):
1228 /* FALLTHROUGH */
1229 case ('t'):
1230 (*pos)++;
1231 return(!wanttrue);
1232 default:
1233 break;
1234 }
1235
1236 if (!roff_getnum(v, pos, &lh))
1237 return(roff_evalstrcond(v, pos) == wanttrue);
1238 if (!roff_getop(v, pos, &op))
1239 return((lh > 0) == wanttrue);
1240 if (!roff_getnum(v, pos, &rh))
1241 return(0);
1242
1243 switch (op) {
1244 case 'g':
1245 return((lh >= rh) == wanttrue);
1246 case 'l':
1247 return((lh <= rh) == wanttrue);
1248 case '=':
1249 return((lh == rh) == wanttrue);
1250 case '>':
1251 return((lh > rh) == wanttrue);
1252 case '<':
1253 return((lh < rh) == wanttrue);
1254 default:
1255 return(0);
1256 }
1257 }
1258
1259 /* ARGSUSED */
1260 static enum rofferr
1261 roff_line_ignore(ROFF_ARGS)
1262 {
1263
1264 return(ROFF_IGN);
1265 }
1266
1267 /* ARGSUSED */
1268 static enum rofferr
1269 roff_cond(ROFF_ARGS)
1270 {
1271
1272 roffnode_push(r, tok, NULL, ln, ppos);
1273
1274 /*
1275 * An `.el' has no conditional body: it will consume the value
1276 * of the current rstack entry set in prior `ie' calls or
1277 * defaults to DENY.
1278 *
1279 * If we're not an `el', however, then evaluate the conditional.
1280 */
1281
1282 r->last->rule = ROFF_el == tok ?
1283 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
1284 roff_evalcond(*bufp, &pos);
1285
1286 /*
1287 * An if-else will put the NEGATION of the current evaluated
1288 * conditional into the stack of rules.
1289 */
1290
1291 if (ROFF_ie == tok) {
1292 if (r->rstackpos == RSTACK_MAX - 1) {
1293 mandoc_msg(MANDOCERR_MEM,
1294 r->parse, ln, ppos, NULL);
1295 return(ROFF_ERR);
1296 }
1297 r->rstack[++r->rstackpos] = !r->last->rule;
1298 }
1299
1300 /* If the parent has false as its rule, then so do we. */
1301
1302 if (r->last->parent && !r->last->parent->rule)
1303 r->last->rule = 0;
1304
1305 /*
1306 * Determine scope.
1307 * If there is nothing on the line after the conditional,
1308 * not even whitespace, use next-line scope.
1309 */
1310
1311 if ('\0' == (*bufp)[pos]) {
1312 r->last->endspan = 2;
1313 goto out;
1314 }
1315
1316 while (' ' == (*bufp)[pos])
1317 pos++;
1318
1319 /* An opening brace requests multiline scope. */
1320
1321 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1322 r->last->endspan = -1;
1323 pos += 2;
1324 goto out;
1325 }
1326
1327 /*
1328 * Anything else following the conditional causes
1329 * single-line scope. Warn if the scope contains
1330 * nothing but trailing whitespace.
1331 */
1332
1333 if ('\0' == (*bufp)[pos])
1334 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
1335
1336 r->last->endspan = 1;
1337
1338 out:
1339 *offs = pos;
1340 return(ROFF_RERUN);
1341 }
1342
1343
1344 /* ARGSUSED */
1345 static enum rofferr
1346 roff_ds(ROFF_ARGS)
1347 {
1348 char *name, *string;
1349
1350 /*
1351 * A symbol is named by the first word following the macro
1352 * invocation up to a space. Its value is anything after the
1353 * name's trailing whitespace and optional double-quote. Thus,
1354 *
1355 * [.ds foo "bar " ]
1356 *
1357 * will have `bar " ' as its value.
1358 */
1359
1360 string = *bufp + pos;
1361 name = roff_getname(r, &string, ln, pos);
1362 if ('\0' == *name)
1363 return(ROFF_IGN);
1364
1365 /* Read past initial double-quote. */
1366 if ('"' == *string)
1367 string++;
1368
1369 /* The rest is the value. */
1370 roff_setstr(r, name, string, ROFF_as == tok);
1371 return(ROFF_IGN);
1372 }
1373
1374 void
1375 roff_setreg(struct roff *r, const char *name, int val, char sign)
1376 {
1377 struct roffreg *reg;
1378
1379 /* Search for an existing register with the same name. */
1380 reg = r->regtab;
1381
1382 while (reg && strcmp(name, reg->key.p))
1383 reg = reg->next;
1384
1385 if (NULL == reg) {
1386 /* Create a new register. */
1387 reg = mandoc_malloc(sizeof(struct roffreg));
1388 reg->key.p = mandoc_strdup(name);
1389 reg->key.sz = strlen(name);
1390 reg->val = 0;
1391 reg->next = r->regtab;
1392 r->regtab = reg;
1393 }
1394
1395 if ('+' == sign)
1396 reg->val += val;
1397 else if ('-' == sign)
1398 reg->val -= val;
1399 else
1400 reg->val = val;
1401 }
1402
1403 /*
1404 * Handle some predefined read-only number registers.
1405 * For now, return -1 if the requested register is not predefined;
1406 * in case a predefined read-only register having the value -1
1407 * were to turn up, another special value would have to be chosen.
1408 */
1409 static int
1410 roff_getregro(const char *name)
1411 {
1412
1413 switch (*name) {
1414 case ('A'): /* ASCII approximation mode is always off. */
1415 return(0);
1416 case ('g'): /* Groff compatibility mode is always on. */
1417 return(1);
1418 case ('H'): /* Fixed horizontal resolution. */
1419 return (24);
1420 case ('j'): /* Always adjust left margin only. */
1421 return(0);
1422 case ('T'): /* Some output device is always defined. */
1423 return(1);
1424 case ('V'): /* Fixed vertical resolution. */
1425 return (40);
1426 default:
1427 return (-1);
1428 }
1429 }
1430
1431 int
1432 roff_getreg(const struct roff *r, const char *name)
1433 {
1434 struct roffreg *reg;
1435 int val;
1436
1437 if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
1438 val = roff_getregro(name + 1);
1439 if (-1 != val)
1440 return (val);
1441 }
1442
1443 for (reg = r->regtab; reg; reg = reg->next)
1444 if (0 == strcmp(name, reg->key.p))
1445 return(reg->val);
1446
1447 return(0);
1448 }
1449
1450 static int
1451 roff_getregn(const struct roff *r, const char *name, size_t len)
1452 {
1453 struct roffreg *reg;
1454 int val;
1455
1456 if ('.' == name[0] && 2 == len) {
1457 val = roff_getregro(name + 1);
1458 if (-1 != val)
1459 return (val);
1460 }
1461
1462 for (reg = r->regtab; reg; reg = reg->next)
1463 if (len == reg->key.sz &&
1464 0 == strncmp(name, reg->key.p, len))
1465 return(reg->val);
1466
1467 return(0);
1468 }
1469
1470 static void
1471 roff_freereg(struct roffreg *reg)
1472 {
1473 struct roffreg *old_reg;
1474
1475 while (NULL != reg) {
1476 free(reg->key.p);
1477 old_reg = reg;
1478 reg = reg->next;
1479 free(old_reg);
1480 }
1481 }
1482
1483 /* ARGSUSED */
1484 static enum rofferr
1485 roff_nr(ROFF_ARGS)
1486 {
1487 const char *key;
1488 char *val;
1489 size_t sz;
1490 int iv;
1491 char sign;
1492
1493 val = *bufp + pos;
1494 key = roff_getname(r, &val, ln, pos);
1495
1496 sign = *val;
1497 if ('+' == sign || '-' == sign)
1498 val++;
1499
1500 sz = strspn(val, "0123456789");
1501 iv = sz ? mandoc_strntoi(val, sz, 10) : 0;
1502
1503 roff_setreg(r, key, iv, sign);
1504
1505 return(ROFF_IGN);
1506 }
1507
1508 /* ARGSUSED */
1509 static enum rofferr
1510 roff_rm(ROFF_ARGS)
1511 {
1512 const char *name;
1513 char *cp;
1514
1515 cp = *bufp + pos;
1516 while ('\0' != *cp) {
1517 name = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1518 if ('\0' != *name)
1519 roff_setstr(r, name, NULL, 0);
1520 }
1521 return(ROFF_IGN);
1522 }
1523
1524 /* ARGSUSED */
1525 static enum rofferr
1526 roff_it(ROFF_ARGS)
1527 {
1528 char *cp;
1529 size_t len;
1530 int iv;
1531
1532 /* Parse the number of lines. */
1533 cp = *bufp + pos;
1534 len = strcspn(cp, " \t");
1535 cp[len] = '\0';
1536 if ((iv = mandoc_strntoi(cp, len, 10)) <= 0) {
1537 mandoc_msg(MANDOCERR_NUMERIC, r->parse,
1538 ln, ppos, *bufp + 1);
1539 return(ROFF_IGN);
1540 }
1541 cp += len + 1;
1542
1543 /* Arm the input line trap. */
1544 roffit_lines = iv;
1545 roffit_macro = mandoc_strdup(cp);
1546 return(ROFF_IGN);
1547 }
1548
1549 /* ARGSUSED */
1550 static enum rofferr
1551 roff_Dd(ROFF_ARGS)
1552 {
1553 const char *const *cp;
1554
1555 if (0 == ((MPARSE_MDOC | MPARSE_QUICK) & r->options))
1556 for (cp = __mdoc_reserved; *cp; cp++)
1557 roff_setstr(r, *cp, NULL, 0);
1558
1559 return(ROFF_CONT);
1560 }
1561
1562 /* ARGSUSED */
1563 static enum rofferr
1564 roff_TH(ROFF_ARGS)
1565 {
1566 const char *const *cp;
1567
1568 if (0 == (MPARSE_QUICK & r->options))
1569 for (cp = __man_reserved; *cp; cp++)
1570 roff_setstr(r, *cp, NULL, 0);
1571
1572 return(ROFF_CONT);
1573 }
1574
1575 /* ARGSUSED */
1576 static enum rofferr
1577 roff_TE(ROFF_ARGS)
1578 {
1579
1580 if (NULL == r->tbl)
1581 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1582 else
1583 tbl_end(&r->tbl);
1584
1585 return(ROFF_IGN);
1586 }
1587
1588 /* ARGSUSED */
1589 static enum rofferr
1590 roff_T_(ROFF_ARGS)
1591 {
1592
1593 if (NULL == r->tbl)
1594 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1595 else
1596 tbl_restart(ppos, ln, r->tbl);
1597
1598 return(ROFF_IGN);
1599 }
1600
1601 #if 0
1602 static int
1603 roff_closeeqn(struct roff *r)
1604 {
1605
1606 return(r->eqn && ROFF_EQN == eqn_end(&r->eqn) ? 1 : 0);
1607 }
1608 #endif
1609
1610 static void
1611 roff_openeqn(struct roff *r, const char *name, int line,
1612 int offs, const char *buf)
1613 {
1614 struct eqn_node *e;
1615 int poff;
1616
1617 assert(NULL == r->eqn);
1618 e = eqn_alloc(name, offs, line, r->parse);
1619
1620 if (r->last_eqn)
1621 r->last_eqn->next = e;
1622 else
1623 r->first_eqn = r->last_eqn = e;
1624
1625 r->eqn = r->last_eqn = e;
1626
1627 if (buf) {
1628 poff = 0;
1629 eqn_read(&r->eqn, line, buf, offs, &poff);
1630 }
1631 }
1632
1633 /* ARGSUSED */
1634 static enum rofferr
1635 roff_EQ(ROFF_ARGS)
1636 {
1637
1638 roff_openeqn(r, *bufp + pos, ln, ppos, NULL);
1639 return(ROFF_IGN);
1640 }
1641
1642 /* ARGSUSED */
1643 static enum rofferr
1644 roff_EN(ROFF_ARGS)
1645 {
1646
1647 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1648 return(ROFF_IGN);
1649 }
1650
1651 /* ARGSUSED */
1652 static enum rofferr
1653 roff_TS(ROFF_ARGS)
1654 {
1655 struct tbl_node *tbl;
1656
1657 if (r->tbl) {
1658 mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL);
1659 tbl_end(&r->tbl);
1660 }
1661
1662 tbl = tbl_alloc(ppos, ln, r->parse);
1663
1664 if (r->last_tbl)
1665 r->last_tbl->next = tbl;
1666 else
1667 r->first_tbl = r->last_tbl = tbl;
1668
1669 r->tbl = r->last_tbl = tbl;
1670 return(ROFF_IGN);
1671 }
1672
1673 /* ARGSUSED */
1674 static enum rofferr
1675 roff_cc(ROFF_ARGS)
1676 {
1677 const char *p;
1678
1679 p = *bufp + pos;
1680
1681 if ('\0' == *p || '.' == (r->control = *p++))
1682 r->control = 0;
1683
1684 if ('\0' != *p)
1685 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1686
1687 return(ROFF_IGN);
1688 }
1689
1690 /* ARGSUSED */
1691 static enum rofferr
1692 roff_tr(ROFF_ARGS)
1693 {
1694 const char *p, *first, *second;
1695 size_t fsz, ssz;
1696 enum mandoc_esc esc;
1697
1698 p = *bufp + pos;
1699
1700 if ('\0' == *p) {
1701 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1702 return(ROFF_IGN);
1703 }
1704
1705 while ('\0' != *p) {
1706 fsz = ssz = 1;
1707
1708 first = p++;
1709 if ('\\' == *first) {
1710 esc = mandoc_escape(&p, NULL, NULL);
1711 if (ESCAPE_ERROR == esc) {
1712 mandoc_msg
1713 (MANDOCERR_BADESCAPE, r->parse,
1714 ln, (int)(p - *bufp), NULL);
1715 return(ROFF_IGN);
1716 }
1717 fsz = (size_t)(p - first);
1718 }
1719
1720 second = p++;
1721 if ('\\' == *second) {
1722 esc = mandoc_escape(&p, NULL, NULL);
1723 if (ESCAPE_ERROR == esc) {
1724 mandoc_msg
1725 (MANDOCERR_BADESCAPE, r->parse,
1726 ln, (int)(p - *bufp), NULL);
1727 return(ROFF_IGN);
1728 }
1729 ssz = (size_t)(p - second);
1730 } else if ('\0' == *second) {
1731 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
1732 ln, (int)(p - *bufp), NULL);
1733 second = " ";
1734 p--;
1735 }
1736
1737 if (fsz > 1) {
1738 roff_setstrn(&r->xmbtab, first,
1739 fsz, second, ssz, 0);
1740 continue;
1741 }
1742
1743 if (NULL == r->xtab)
1744 r->xtab = mandoc_calloc
1745 (128, sizeof(struct roffstr));
1746
1747 free(r->xtab[(int)*first].p);
1748 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
1749 r->xtab[(int)*first].sz = ssz;
1750 }
1751
1752 return(ROFF_IGN);
1753 }
1754
1755 /* ARGSUSED */
1756 static enum rofferr
1757 roff_so(ROFF_ARGS)
1758 {
1759 char *name;
1760
1761 mandoc_msg(MANDOCERR_SO, r->parse, ln, ppos, NULL);
1762
1763 /*
1764 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1765 * opening anything that's not in our cwd or anything beneath
1766 * it. Thus, explicitly disallow traversing up the file-system
1767 * or using absolute paths.
1768 */
1769
1770 name = *bufp + pos;
1771 if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1772 mandoc_msg(MANDOCERR_SOPATH, r->parse, ln, pos, NULL);
1773 return(ROFF_ERR);
1774 }
1775
1776 *offs = pos;
1777 return(ROFF_SO);
1778 }
1779
1780 /* ARGSUSED */
1781 static enum rofferr
1782 roff_userdef(ROFF_ARGS)
1783 {
1784 const char *arg[9];
1785 char *cp, *n1, *n2;
1786 int i;
1787
1788 /*
1789 * Collect pointers to macro argument strings
1790 * and NUL-terminate them.
1791 */
1792 cp = *bufp + pos;
1793 for (i = 0; i < 9; i++)
1794 arg[i] = '\0' == *cp ? "" :
1795 mandoc_getarg(r->parse, &cp, ln, &pos);
1796
1797 /*
1798 * Expand macro arguments.
1799 */
1800 *szp = 0;
1801 n1 = cp = mandoc_strdup(r->current_string);
1802 while (NULL != (cp = strstr(cp, "\\$"))) {
1803 i = cp[2] - '1';
1804 if (0 > i || 8 < i) {
1805 /* Not an argument invocation. */
1806 cp += 2;
1807 continue;
1808 }
1809
1810 *szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1811 n2 = mandoc_malloc(*szp);
1812
1813 strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1814 strlcat(n2, arg[i], *szp);
1815 strlcat(n2, cp + 3, *szp);
1816
1817 cp = n2 + (cp - n1);
1818 free(n1);
1819 n1 = n2;
1820 }
1821
1822 /*
1823 * Replace the macro invocation
1824 * by the expanded macro.
1825 */
1826 free(*bufp);
1827 *bufp = n1;
1828 if (0 == *szp)
1829 *szp = strlen(*bufp) + 1;
1830
1831 return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
1832 ROFF_REPARSE : ROFF_APPEND);
1833 }
1834
1835 static char *
1836 roff_getname(struct roff *r, char **cpp, int ln, int pos)
1837 {
1838 char *name, *cp;
1839
1840 name = *cpp;
1841 if ('\0' == *name)
1842 return(name);
1843
1844 /* Read until end of name. */
1845 for (cp = name; '\0' != *cp && ' ' != *cp; cp++) {
1846 if ('\\' != *cp)
1847 continue;
1848 cp++;
1849 if ('\\' == *cp)
1850 continue;
1851 mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL);
1852 *cp = '\0';
1853 name = cp;
1854 }
1855
1856 /* Nil-terminate name. */
1857 if ('\0' != *cp)
1858 *(cp++) = '\0';
1859
1860 /* Read past spaces. */
1861 while (' ' == *cp)
1862 cp++;
1863
1864 *cpp = cp;
1865 return(name);
1866 }
1867
1868 /*
1869 * Store *string into the user-defined string called *name.
1870 * To clear an existing entry, call with (*r, *name, NULL, 0).
1871 * append == 0: replace mode
1872 * append == 1: single-line append mode
1873 * append == 2: multiline append mode, append '\n' after each call
1874 */
1875 static void
1876 roff_setstr(struct roff *r, const char *name, const char *string,
1877 int append)
1878 {
1879
1880 roff_setstrn(&r->strtab, name, strlen(name), string,
1881 string ? strlen(string) : 0, append);
1882 }
1883
1884 static void
1885 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
1886 const char *string, size_t stringsz, int append)
1887 {
1888 struct roffkv *n;
1889 char *c;
1890 int i;
1891 size_t oldch, newch;
1892
1893 /* Search for an existing string with the same name. */
1894 n = *r;
1895
1896 while (n && strcmp(name, n->key.p))
1897 n = n->next;
1898
1899 if (NULL == n) {
1900 /* Create a new string table entry. */
1901 n = mandoc_malloc(sizeof(struct roffkv));
1902 n->key.p = mandoc_strndup(name, namesz);
1903 n->key.sz = namesz;
1904 n->val.p = NULL;
1905 n->val.sz = 0;
1906 n->next = *r;
1907 *r = n;
1908 } else if (0 == append) {
1909 free(n->val.p);
1910 n->val.p = NULL;
1911 n->val.sz = 0;
1912 }
1913
1914 if (NULL == string)
1915 return;
1916
1917 /*
1918 * One additional byte for the '\n' in multiline mode,
1919 * and one for the terminating '\0'.
1920 */
1921 newch = stringsz + (1 < append ? 2u : 1u);
1922
1923 if (NULL == n->val.p) {
1924 n->val.p = mandoc_malloc(newch);
1925 *n->val.p = '\0';
1926 oldch = 0;
1927 } else {
1928 oldch = n->val.sz;
1929 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
1930 }
1931
1932 /* Skip existing content in the destination buffer. */
1933 c = n->val.p + (int)oldch;
1934
1935 /* Append new content to the destination buffer. */
1936 i = 0;
1937 while (i < (int)stringsz) {
1938 /*
1939 * Rudimentary roff copy mode:
1940 * Handle escaped backslashes.
1941 */
1942 if ('\\' == string[i] && '\\' == string[i + 1])
1943 i++;
1944 *c++ = string[i++];
1945 }
1946
1947 /* Append terminating bytes. */
1948 if (1 < append)
1949 *c++ = '\n';
1950
1951 *c = '\0';
1952 n->val.sz = (int)(c - n->val.p);
1953 }
1954
1955 static const char *
1956 roff_getstrn(const struct roff *r, const char *name, size_t len)
1957 {
1958 const struct roffkv *n;
1959 int i;
1960
1961 for (n = r->strtab; n; n = n->next)
1962 if (0 == strncmp(name, n->key.p, len) &&
1963 '\0' == n->key.p[(int)len])
1964 return(n->val.p);
1965
1966 for (i = 0; i < PREDEFS_MAX; i++)
1967 if (0 == strncmp(name, predefs[i].name, len) &&
1968 '\0' == predefs[i].name[(int)len])
1969 return(predefs[i].str);
1970
1971 return(NULL);
1972 }
1973
1974 static void
1975 roff_freestr(struct roffkv *r)
1976 {
1977 struct roffkv *n, *nn;
1978
1979 for (n = r; n; n = nn) {
1980 free(n->key.p);
1981 free(n->val.p);
1982 nn = n->next;
1983 free(n);
1984 }
1985 }
1986
1987 const struct tbl_span *
1988 roff_span(const struct roff *r)
1989 {
1990
1991 return(r->tbl ? tbl_span(r->tbl) : NULL);
1992 }
1993
1994 const struct eqn *
1995 roff_eqn(const struct roff *r)
1996 {
1997
1998 return(r->last_eqn ? &r->last_eqn->eqn : NULL);
1999 }
2000
2001 /*
2002 * Duplicate an input string, making the appropriate character
2003 * conversations (as stipulated by `tr') along the way.
2004 * Returns a heap-allocated string with all the replacements made.
2005 */
2006 char *
2007 roff_strdup(const struct roff *r, const char *p)
2008 {
2009 const struct roffkv *cp;
2010 char *res;
2011 const char *pp;
2012 size_t ssz, sz;
2013 enum mandoc_esc esc;
2014
2015 if (NULL == r->xmbtab && NULL == r->xtab)
2016 return(mandoc_strdup(p));
2017 else if ('\0' == *p)
2018 return(mandoc_strdup(""));
2019
2020 /*
2021 * Step through each character looking for term matches
2022 * (remember that a `tr' can be invoked with an escape, which is
2023 * a glyph but the escape is multi-character).
2024 * We only do this if the character hash has been initialised
2025 * and the string is >0 length.
2026 */
2027
2028 res = NULL;
2029 ssz = 0;
2030
2031 while ('\0' != *p) {
2032 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
2033 sz = r->xtab[(int)*p].sz;
2034 res = mandoc_realloc(res, ssz + sz + 1);
2035 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
2036 ssz += sz;
2037 p++;
2038 continue;
2039 } else if ('\\' != *p) {
2040 res = mandoc_realloc(res, ssz + 2);
2041 res[ssz++] = *p++;
2042 continue;
2043 }
2044
2045 /* Search for term matches. */
2046 for (cp = r->xmbtab; cp; cp = cp->next)
2047 if (0 == strncmp(p, cp->key.p, cp->key.sz))
2048 break;
2049
2050 if (NULL != cp) {
2051 /*
2052 * A match has been found.
2053 * Append the match to the array and move
2054 * forward by its keysize.
2055 */
2056 res = mandoc_realloc
2057 (res, ssz + cp->val.sz + 1);
2058 memcpy(res + ssz, cp->val.p, cp->val.sz);
2059 ssz += cp->val.sz;
2060 p += (int)cp->key.sz;
2061 continue;
2062 }
2063
2064 /*
2065 * Handle escapes carefully: we need to copy
2066 * over just the escape itself, or else we might
2067 * do replacements within the escape itself.
2068 * Make sure to pass along the bogus string.
2069 */
2070 pp = p++;
2071 esc = mandoc_escape(&p, NULL, NULL);
2072 if (ESCAPE_ERROR == esc) {
2073 sz = strlen(pp);
2074 res = mandoc_realloc(res, ssz + sz + 1);
2075 memcpy(res + ssz, pp, sz);
2076 break;
2077 }
2078 /*
2079 * We bail out on bad escapes.
2080 * No need to warn: we already did so when
2081 * roff_res() was called.
2082 */
2083 sz = (int)(p - pp);
2084 res = mandoc_realloc(res, ssz + sz + 1);
2085 memcpy(res + ssz, pp, sz);
2086 ssz += sz;
2087 }
2088
2089 res[(int)ssz] = '\0';
2090 return(res);
2091 }
2092
2093 /*
2094 * Find out whether a line is a macro line or not.
2095 * If it is, adjust the current position and return one; if it isn't,
2096 * return zero and don't change the current position.
2097 * If the control character has been set with `.cc', then let that grain
2098 * precedence.
2099 * This is slighly contrary to groff, where using the non-breaking
2100 * control character when `cc' has been invoked will cause the
2101 * non-breaking macro contents to be printed verbatim.
2102 */
2103 int
2104 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
2105 {
2106 int pos;
2107
2108 pos = *ppos;
2109
2110 if (0 != r->control && cp[pos] == r->control)
2111 pos++;
2112 else if (0 != r->control)
2113 return(0);
2114 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
2115 pos += 2;
2116 else if ('.' == cp[pos] || '\'' == cp[pos])
2117 pos++;
2118 else
2119 return(0);
2120
2121 while (' ' == cp[pos] || '\t' == cp[pos])
2122 pos++;
2123
2124 *ppos = pos;
2125 return(1);
2126 }