]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
To better support MLINKS, we will have to split the "docs" database
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.188 2013/12/25 00:50:05 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010, 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 #include "mandoc.h"
29 #include "libroff.h"
30 #include "libmandoc.h"
31
32 /* Maximum number of nested if-else conditionals. */
33 #define RSTACK_MAX 128
34
35 /* Maximum number of string expansions per line, to break infinite loops. */
36 #define EXPAND_LIMIT 1000
37
38 enum rofft {
39 ROFF_ad,
40 ROFF_am,
41 ROFF_ami,
42 ROFF_am1,
43 ROFF_cc,
44 ROFF_de,
45 ROFF_dei,
46 ROFF_de1,
47 ROFF_ds,
48 ROFF_el,
49 ROFF_fam,
50 ROFF_hw,
51 ROFF_hy,
52 ROFF_ie,
53 ROFF_if,
54 ROFF_ig,
55 ROFF_it,
56 ROFF_ne,
57 ROFF_nh,
58 ROFF_nr,
59 ROFF_ns,
60 ROFF_ps,
61 ROFF_rm,
62 ROFF_so,
63 ROFF_ta,
64 ROFF_tr,
65 ROFF_Dd,
66 ROFF_TH,
67 ROFF_TS,
68 ROFF_TE,
69 ROFF_T_,
70 ROFF_EQ,
71 ROFF_EN,
72 ROFF_cblock,
73 ROFF_ccond,
74 ROFF_USERDEF,
75 ROFF_MAX
76 };
77
78 enum roffrule {
79 ROFFRULE_DENY,
80 ROFFRULE_ALLOW
81 };
82
83 /*
84 * An incredibly-simple string buffer.
85 */
86 struct roffstr {
87 char *p; /* nil-terminated buffer */
88 size_t sz; /* saved strlen(p) */
89 };
90
91 /*
92 * A key-value roffstr pair as part of a singly-linked list.
93 */
94 struct roffkv {
95 struct roffstr key;
96 struct roffstr val;
97 struct roffkv *next; /* next in list */
98 };
99
100 /*
101 * A single number register as part of a singly-linked list.
102 */
103 struct roffreg {
104 struct roffstr key;
105 int val;
106 struct roffreg *next;
107 };
108
109 struct roff {
110 enum mparset parsetype; /* requested parse type */
111 struct mparse *parse; /* parse point */
112 struct roffnode *last; /* leaf of stack */
113 enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */
114 char control; /* control character */
115 int rstackpos; /* position in rstack */
116 struct roffreg *regtab; /* number registers */
117 struct roffkv *strtab; /* user-defined strings & macros */
118 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
119 struct roffstr *xtab; /* single-byte trans table (`tr') */
120 const char *current_string; /* value of last called user macro */
121 struct tbl_node *first_tbl; /* first table parsed */
122 struct tbl_node *last_tbl; /* last table parsed */
123 struct tbl_node *tbl; /* current table being parsed */
124 struct eqn_node *last_eqn; /* last equation parsed */
125 struct eqn_node *first_eqn; /* first equation parsed */
126 struct eqn_node *eqn; /* current equation being parsed */
127 };
128
129 struct roffnode {
130 enum rofft tok; /* type of node */
131 struct roffnode *parent; /* up one in stack */
132 int line; /* parse line */
133 int col; /* parse col */
134 char *name; /* node name, e.g. macro name */
135 char *end; /* end-rules: custom token */
136 int endspan; /* end-rules: next-line or infty */
137 enum roffrule rule; /* current evaluation rule */
138 };
139
140 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
141 enum rofft tok, /* tok of macro */ \
142 char **bufp, /* input buffer */ \
143 size_t *szp, /* size of input buffer */ \
144 int ln, /* parse line */ \
145 int ppos, /* original pos in buffer */ \
146 int pos, /* current pos in buffer */ \
147 int *offs /* reset offset of buffer data */
148
149 typedef enum rofferr (*roffproc)(ROFF_ARGS);
150
151 struct roffmac {
152 const char *name; /* macro name */
153 roffproc proc; /* process new macro */
154 roffproc text; /* process as child text of macro */
155 roffproc sub; /* process as child of macro */
156 int flags;
157 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
158 struct roffmac *next;
159 };
160
161 struct predef {
162 const char *name; /* predefined input name */
163 const char *str; /* replacement symbol */
164 };
165
166 #define PREDEF(__name, __str) \
167 { (__name), (__str) },
168
169 static enum rofft roffhash_find(const char *, size_t);
170 static void roffhash_init(void);
171 static void roffnode_cleanscope(struct roff *);
172 static void roffnode_pop(struct roff *);
173 static void roffnode_push(struct roff *, enum rofft,
174 const char *, int, int);
175 static enum rofferr roff_block(ROFF_ARGS);
176 static enum rofferr roff_block_text(ROFF_ARGS);
177 static enum rofferr roff_block_sub(ROFF_ARGS);
178 static enum rofferr roff_cblock(ROFF_ARGS);
179 static enum rofferr roff_cc(ROFF_ARGS);
180 static enum rofferr roff_ccond(ROFF_ARGS);
181 static enum rofferr roff_cond(ROFF_ARGS);
182 static enum rofferr roff_cond_text(ROFF_ARGS);
183 static enum rofferr roff_cond_sub(ROFF_ARGS);
184 static enum rofferr roff_ds(ROFF_ARGS);
185 static enum roffrule roff_evalcond(const char *, int *);
186 static void roff_free1(struct roff *);
187 static void roff_freereg(struct roffreg *);
188 static void roff_freestr(struct roffkv *);
189 static char *roff_getname(struct roff *, char **, int, int);
190 static int roff_getnum(const char *, int *, int *);
191 static int roff_getop(const char *, int *, char *);
192 static int roff_getregn(const struct roff *,
193 const char *, size_t);
194 static const char *roff_getstrn(const struct roff *,
195 const char *, size_t);
196 static enum rofferr roff_it(ROFF_ARGS);
197 static enum rofferr roff_line_ignore(ROFF_ARGS);
198 static enum rofferr roff_nr(ROFF_ARGS);
199 static void roff_openeqn(struct roff *, const char *,
200 int, int, const char *);
201 static enum rofft roff_parse(struct roff *, const char *, int *);
202 static enum rofferr roff_parsetext(char **, size_t *, int, int *);
203 static enum rofferr roff_res(struct roff *,
204 char **, size_t *, int, int);
205 static enum rofferr roff_rm(ROFF_ARGS);
206 static void roff_setstr(struct roff *,
207 const char *, const char *, int);
208 static void roff_setstrn(struct roffkv **, const char *,
209 size_t, const char *, size_t, int);
210 static enum rofferr roff_so(ROFF_ARGS);
211 static enum rofferr roff_tr(ROFF_ARGS);
212 static enum rofferr roff_Dd(ROFF_ARGS);
213 static enum rofferr roff_TH(ROFF_ARGS);
214 static enum rofferr roff_TE(ROFF_ARGS);
215 static enum rofferr roff_TS(ROFF_ARGS);
216 static enum rofferr roff_EQ(ROFF_ARGS);
217 static enum rofferr roff_EN(ROFF_ARGS);
218 static enum rofferr roff_T_(ROFF_ARGS);
219 static enum rofferr roff_userdef(ROFF_ARGS);
220
221 /* See roffhash_find() */
222
223 #define ASCII_HI 126
224 #define ASCII_LO 33
225 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
226
227 static struct roffmac *hash[HASHWIDTH];
228
229 static struct roffmac roffs[ROFF_MAX] = {
230 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
231 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
232 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
233 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
234 { "cc", roff_cc, NULL, NULL, 0, NULL },
235 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
236 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
237 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
238 { "ds", roff_ds, NULL, NULL, 0, NULL },
239 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
240 { "fam", roff_line_ignore, NULL, NULL, 0, NULL },
241 { "hw", roff_line_ignore, NULL, NULL, 0, NULL },
242 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
243 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
244 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
245 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
246 { "it", roff_it, NULL, NULL, 0, NULL },
247 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
248 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
249 { "nr", roff_nr, NULL, NULL, 0, NULL },
250 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
251 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
252 { "rm", roff_rm, NULL, NULL, 0, NULL },
253 { "so", roff_so, NULL, NULL, 0, NULL },
254 { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
255 { "tr", roff_tr, NULL, NULL, 0, NULL },
256 { "Dd", roff_Dd, NULL, NULL, 0, NULL },
257 { "TH", roff_TH, NULL, NULL, 0, NULL },
258 { "TS", roff_TS, NULL, NULL, 0, NULL },
259 { "TE", roff_TE, NULL, NULL, 0, NULL },
260 { "T&", roff_T_, NULL, NULL, 0, NULL },
261 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
262 { "EN", roff_EN, NULL, NULL, 0, NULL },
263 { ".", roff_cblock, NULL, NULL, 0, NULL },
264 { "\\}", roff_ccond, NULL, NULL, 0, NULL },
265 { NULL, roff_userdef, NULL, NULL, 0, NULL },
266 };
267
268 const char *const __mdoc_reserved[] = {
269 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
270 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
271 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
272 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
273 "Ds", "Dt", "Dv", "Dx", "D1",
274 "Ec", "Ed", "Ef", "Ek", "El", "Em", "em",
275 "En", "Eo", "Eq", "Er", "Es", "Ev", "Ex",
276 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
277 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp", "LP",
278 "Me", "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
279 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
280 "Pa", "Pc", "Pf", "Po", "Pp", "PP", "pp", "Pq",
281 "Qc", "Ql", "Qo", "Qq", "Or", "Rd", "Re", "Rs", "Rv",
282 "Sc", "Sf", "Sh", "SH", "Sm", "So", "Sq",
283 "Ss", "St", "Sx", "Sy",
284 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
285 "%A", "%B", "%D", "%I", "%J", "%N", "%O",
286 "%P", "%Q", "%R", "%T", "%U", "%V",
287 NULL
288 };
289
290 const char *const __man_reserved[] = {
291 "AT", "B", "BI", "BR", "BT", "DE", "DS", "DT",
292 "EE", "EN", "EQ", "EX", "HF", "HP", "I", "IB", "IP", "IR",
293 "LP", "ME", "MT", "OP", "P", "PD", "PP", "PT",
294 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS", "SY",
295 "TE", "TH", "TP", "TQ", "TS", "T&", "UC", "UE", "UR", "YS",
296 NULL
297 };
298
299 /* Array of injected predefined strings. */
300 #define PREDEFS_MAX 38
301 static const struct predef predefs[PREDEFS_MAX] = {
302 #include "predefs.in"
303 };
304
305 /* See roffhash_find() */
306 #define ROFF_HASH(p) (p[0] - ASCII_LO)
307
308 static int roffit_lines; /* number of lines to delay */
309 static char *roffit_macro; /* nil-terminated macro line */
310
311 static void
312 roffhash_init(void)
313 {
314 struct roffmac *n;
315 int buc, i;
316
317 for (i = 0; i < (int)ROFF_USERDEF; i++) {
318 assert(roffs[i].name[0] >= ASCII_LO);
319 assert(roffs[i].name[0] <= ASCII_HI);
320
321 buc = ROFF_HASH(roffs[i].name);
322
323 if (NULL != (n = hash[buc])) {
324 for ( ; n->next; n = n->next)
325 /* Do nothing. */ ;
326 n->next = &roffs[i];
327 } else
328 hash[buc] = &roffs[i];
329 }
330 }
331
332 /*
333 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
334 * the nil-terminated string name could be found.
335 */
336 static enum rofft
337 roffhash_find(const char *p, size_t s)
338 {
339 int buc;
340 struct roffmac *n;
341
342 /*
343 * libroff has an extremely simple hashtable, for the time
344 * being, which simply keys on the first character, which must
345 * be printable, then walks a chain. It works well enough until
346 * optimised.
347 */
348
349 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
350 return(ROFF_MAX);
351
352 buc = ROFF_HASH(p);
353
354 if (NULL == (n = hash[buc]))
355 return(ROFF_MAX);
356 for ( ; n; n = n->next)
357 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
358 return((enum rofft)(n - roffs));
359
360 return(ROFF_MAX);
361 }
362
363
364 /*
365 * Pop the current node off of the stack of roff instructions currently
366 * pending.
367 */
368 static void
369 roffnode_pop(struct roff *r)
370 {
371 struct roffnode *p;
372
373 assert(r->last);
374 p = r->last;
375
376 r->last = r->last->parent;
377 free(p->name);
378 free(p->end);
379 free(p);
380 }
381
382
383 /*
384 * Push a roff node onto the instruction stack. This must later be
385 * removed with roffnode_pop().
386 */
387 static void
388 roffnode_push(struct roff *r, enum rofft tok, const char *name,
389 int line, int col)
390 {
391 struct roffnode *p;
392
393 p = mandoc_calloc(1, sizeof(struct roffnode));
394 p->tok = tok;
395 if (name)
396 p->name = mandoc_strdup(name);
397 p->parent = r->last;
398 p->line = line;
399 p->col = col;
400 p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
401
402 r->last = p;
403 }
404
405
406 static void
407 roff_free1(struct roff *r)
408 {
409 struct tbl_node *tbl;
410 struct eqn_node *e;
411 int i;
412
413 while (NULL != (tbl = r->first_tbl)) {
414 r->first_tbl = tbl->next;
415 tbl_free(tbl);
416 }
417
418 r->first_tbl = r->last_tbl = r->tbl = NULL;
419
420 while (NULL != (e = r->first_eqn)) {
421 r->first_eqn = e->next;
422 eqn_free(e);
423 }
424
425 r->first_eqn = r->last_eqn = r->eqn = NULL;
426
427 while (r->last)
428 roffnode_pop(r);
429
430 roff_freestr(r->strtab);
431 roff_freestr(r->xmbtab);
432
433 r->strtab = r->xmbtab = NULL;
434
435 roff_freereg(r->regtab);
436
437 r->regtab = NULL;
438
439 if (r->xtab)
440 for (i = 0; i < 128; i++)
441 free(r->xtab[i].p);
442
443 free(r->xtab);
444 r->xtab = NULL;
445 }
446
447 void
448 roff_reset(struct roff *r)
449 {
450 int i;
451
452 roff_free1(r);
453
454 r->control = 0;
455
456 for (i = 0; i < PREDEFS_MAX; i++)
457 roff_setstr(r, predefs[i].name, predefs[i].str, 0);
458 }
459
460
461 void
462 roff_free(struct roff *r)
463 {
464
465 roff_free1(r);
466 free(r);
467 }
468
469
470 struct roff *
471 roff_alloc(enum mparset type, struct mparse *parse)
472 {
473 struct roff *r;
474 int i;
475
476 r = mandoc_calloc(1, sizeof(struct roff));
477 r->parsetype = type;
478 r->parse = parse;
479 r->rstackpos = -1;
480
481 roffhash_init();
482
483 for (i = 0; i < PREDEFS_MAX; i++)
484 roff_setstr(r, predefs[i].name, predefs[i].str, 0);
485
486 return(r);
487 }
488
489 /*
490 * In the current line, expand user-defined strings ("\*")
491 * and references to number registers ("\n").
492 * Also check the syntax of other escape sequences.
493 */
494 static enum rofferr
495 roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
496 {
497 char ubuf[12]; /* buffer to print the number */
498 const char *stesc; /* start of an escape sequence ('\\') */
499 const char *stnam; /* start of the name, after "[(*" */
500 const char *cp; /* end of the name, e.g. before ']' */
501 const char *res; /* the string to be substituted */
502 char *nbuf; /* new buffer to copy bufp to */
503 size_t nsz; /* size of the new buffer */
504 size_t maxl; /* expected length of the escape name */
505 size_t naml; /* actual length of the escape name */
506 int expand_count; /* to avoid infinite loops */
507
508 expand_count = 0;
509
510 again:
511 cp = *bufp + pos;
512 while (NULL != (cp = strchr(cp, '\\'))) {
513 stesc = cp++;
514
515 /*
516 * The second character must be an asterisk or an n.
517 * If it isn't, skip it anyway: It is escaped,
518 * so it can't start another escape sequence.
519 */
520
521 if ('\0' == *cp)
522 return(ROFF_CONT);
523
524 switch (*cp) {
525 case ('*'):
526 res = NULL;
527 break;
528 case ('n'):
529 res = ubuf;
530 break;
531 default:
532 if (ESCAPE_ERROR != mandoc_escape(&cp, NULL, NULL))
533 continue;
534 mandoc_msg
535 (MANDOCERR_BADESCAPE, r->parse,
536 ln, (int)(stesc - *bufp), NULL);
537 return(ROFF_CONT);
538 }
539
540 cp++;
541
542 /*
543 * The third character decides the length
544 * of the name of the string or register.
545 * Save a pointer to the name.
546 */
547
548 switch (*cp) {
549 case ('\0'):
550 return(ROFF_CONT);
551 case ('('):
552 cp++;
553 maxl = 2;
554 break;
555 case ('['):
556 cp++;
557 maxl = 0;
558 break;
559 default:
560 maxl = 1;
561 break;
562 }
563 stnam = cp;
564
565 /* Advance to the end of the name. */
566
567 for (naml = 0; 0 == maxl || naml < maxl; naml++, cp++) {
568 if ('\0' == *cp) {
569 mandoc_msg
570 (MANDOCERR_BADESCAPE,
571 r->parse, ln,
572 (int)(stesc - *bufp), NULL);
573 return(ROFF_CONT);
574 }
575 if (0 == maxl && ']' == *cp)
576 break;
577 }
578
579 /*
580 * Retrieve the replacement string; if it is
581 * undefined, resume searching for escapes.
582 */
583
584 if (NULL == res)
585 res = roff_getstrn(r, stnam, naml);
586 else
587 snprintf(ubuf, sizeof(ubuf), "%d",
588 roff_getregn(r, stnam, naml));
589
590 if (NULL == res) {
591 mandoc_msg
592 (MANDOCERR_BADESCAPE, r->parse,
593 ln, (int)(stesc - *bufp), NULL);
594 res = "";
595 }
596
597 /* Replace the escape sequence by the string. */
598
599 pos = stesc - *bufp;
600
601 nsz = *szp + strlen(res) + 1;
602 nbuf = mandoc_malloc(nsz);
603
604 strlcpy(nbuf, *bufp, (size_t)(stesc - *bufp + 1));
605 strlcat(nbuf, res, nsz);
606 strlcat(nbuf, cp + (maxl ? 0 : 1), nsz);
607
608 free(*bufp);
609
610 *bufp = nbuf;
611 *szp = nsz;
612
613 if (EXPAND_LIMIT >= ++expand_count)
614 goto again;
615
616 /* Just leave the string unexpanded. */
617 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, ln, pos, NULL);
618 return(ROFF_IGN);
619 }
620 return(ROFF_CONT);
621 }
622
623 /*
624 * Process text streams:
625 * Convert all breakable hyphens into ASCII_HYPH.
626 * Decrement and spring input line trap.
627 */
628 static enum rofferr
629 roff_parsetext(char **bufp, size_t *szp, int pos, int *offs)
630 {
631 size_t sz;
632 const char *start;
633 char *p;
634 int isz;
635 enum mandoc_esc esc;
636
637 start = p = *bufp + pos;
638
639 while ('\0' != *p) {
640 sz = strcspn(p, "-\\");
641 p += sz;
642
643 if ('\0' == *p)
644 break;
645
646 if ('\\' == *p) {
647 /* Skip over escapes. */
648 p++;
649 esc = mandoc_escape
650 ((const char const **)&p, NULL, NULL);
651 if (ESCAPE_ERROR == esc)
652 break;
653 continue;
654 } else if (p == start) {
655 p++;
656 continue;
657 }
658
659 if (isalpha((unsigned char)p[-1]) &&
660 isalpha((unsigned char)p[1]))
661 *p = ASCII_HYPH;
662 p++;
663 }
664
665 /* Spring the input line trap. */
666 if (1 == roffit_lines) {
667 isz = asprintf(&p, "%s\n.%s", *bufp, roffit_macro);
668 if (-1 == isz) {
669 perror(NULL);
670 exit((int)MANDOCLEVEL_SYSERR);
671 }
672 free(*bufp);
673 *bufp = p;
674 *szp = isz + 1;
675 *offs = 0;
676 free(roffit_macro);
677 roffit_lines = 0;
678 return(ROFF_REPARSE);
679 } else if (1 < roffit_lines)
680 --roffit_lines;
681 return(ROFF_CONT);
682 }
683
684 enum rofferr
685 roff_parseln(struct roff *r, int ln, char **bufp,
686 size_t *szp, int pos, int *offs)
687 {
688 enum rofft t;
689 enum rofferr e;
690 int ppos, ctl;
691
692 /*
693 * Run the reserved-word filter only if we have some reserved
694 * words to fill in.
695 */
696
697 e = roff_res(r, bufp, szp, ln, pos);
698 if (ROFF_IGN == e)
699 return(e);
700 assert(ROFF_CONT == e);
701
702 ppos = pos;
703 ctl = roff_getcontrol(r, *bufp, &pos);
704
705 /*
706 * First, if a scope is open and we're not a macro, pass the
707 * text through the macro's filter. If a scope isn't open and
708 * we're not a macro, just let it through.
709 * Finally, if there's an equation scope open, divert it into it
710 * no matter our state.
711 */
712
713 if (r->last && ! ctl) {
714 t = r->last->tok;
715 assert(roffs[t].text);
716 e = (*roffs[t].text)
717 (r, t, bufp, szp, ln, pos, pos, offs);
718 assert(ROFF_IGN == e || ROFF_CONT == e);
719 if (ROFF_CONT != e)
720 return(e);
721 }
722 if (r->eqn)
723 return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));
724 if ( ! ctl) {
725 if (r->tbl)
726 return(tbl_read(r->tbl, ln, *bufp, pos));
727 return(roff_parsetext(bufp, szp, pos, offs));
728 }
729
730 /*
731 * If a scope is open, go to the child handler for that macro,
732 * as it may want to preprocess before doing anything with it.
733 * Don't do so if an equation is open.
734 */
735
736 if (r->last) {
737 t = r->last->tok;
738 assert(roffs[t].sub);
739 return((*roffs[t].sub)
740 (r, t, bufp, szp,
741 ln, ppos, pos, offs));
742 }
743
744 /*
745 * Lastly, as we've no scope open, try to look up and execute
746 * the new macro. If no macro is found, simply return and let
747 * the compilers handle it.
748 */
749
750 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
751 return(ROFF_CONT);
752
753 assert(roffs[t].proc);
754 return((*roffs[t].proc)
755 (r, t, bufp, szp,
756 ln, ppos, pos, offs));
757 }
758
759
760 void
761 roff_endparse(struct roff *r)
762 {
763
764 if (r->last)
765 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
766 r->last->line, r->last->col, NULL);
767
768 if (r->eqn) {
769 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
770 r->eqn->eqn.ln, r->eqn->eqn.pos, NULL);
771 eqn_end(&r->eqn);
772 }
773
774 if (r->tbl) {
775 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
776 r->tbl->line, r->tbl->pos, NULL);
777 tbl_end(&r->tbl);
778 }
779 }
780
781 /*
782 * Parse a roff node's type from the input buffer. This must be in the
783 * form of ".foo xxx" in the usual way.
784 */
785 static enum rofft
786 roff_parse(struct roff *r, const char *buf, int *pos)
787 {
788 const char *mac;
789 size_t maclen;
790 enum rofft t;
791
792 if ('\0' == buf[*pos] || '"' == buf[*pos] ||
793 '\t' == buf[*pos] || ' ' == buf[*pos])
794 return(ROFF_MAX);
795
796 /*
797 * We stop the macro parse at an escape, tab, space, or nil.
798 * However, `\}' is also a valid macro, so make sure we don't
799 * clobber it by seeing the `\' as the end of token.
800 */
801
802 mac = buf + *pos;
803 maclen = strcspn(mac + 1, " \\\t\0") + 1;
804
805 t = (r->current_string = roff_getstrn(r, mac, maclen))
806 ? ROFF_USERDEF : roffhash_find(mac, maclen);
807
808 *pos += (int)maclen;
809
810 while (buf[*pos] && ' ' == buf[*pos])
811 (*pos)++;
812
813 return(t);
814 }
815
816 /* ARGSUSED */
817 static enum rofferr
818 roff_cblock(ROFF_ARGS)
819 {
820
821 /*
822 * A block-close `..' should only be invoked as a child of an
823 * ignore macro, otherwise raise a warning and just ignore it.
824 */
825
826 if (NULL == r->last) {
827 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
828 return(ROFF_IGN);
829 }
830
831 switch (r->last->tok) {
832 case (ROFF_am):
833 /* FALLTHROUGH */
834 case (ROFF_ami):
835 /* FALLTHROUGH */
836 case (ROFF_am1):
837 /* FALLTHROUGH */
838 case (ROFF_de):
839 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
840 /* FALLTHROUGH */
841 case (ROFF_dei):
842 /* FALLTHROUGH */
843 case (ROFF_ig):
844 break;
845 default:
846 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
847 return(ROFF_IGN);
848 }
849
850 if ((*bufp)[pos])
851 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
852
853 roffnode_pop(r);
854 roffnode_cleanscope(r);
855 return(ROFF_IGN);
856
857 }
858
859
860 static void
861 roffnode_cleanscope(struct roff *r)
862 {
863
864 while (r->last) {
865 if (--r->last->endspan != 0)
866 break;
867 roffnode_pop(r);
868 }
869 }
870
871
872 /* ARGSUSED */
873 static enum rofferr
874 roff_ccond(ROFF_ARGS)
875 {
876
877 if (NULL == r->last) {
878 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
879 return(ROFF_IGN);
880 }
881
882 switch (r->last->tok) {
883 case (ROFF_el):
884 /* FALLTHROUGH */
885 case (ROFF_ie):
886 /* FALLTHROUGH */
887 case (ROFF_if):
888 break;
889 default:
890 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
891 return(ROFF_IGN);
892 }
893
894 if (r->last->endspan > -1) {
895 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
896 return(ROFF_IGN);
897 }
898
899 if ((*bufp)[pos])
900 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
901
902 roffnode_pop(r);
903 roffnode_cleanscope(r);
904 return(ROFF_IGN);
905 }
906
907
908 /* ARGSUSED */
909 static enum rofferr
910 roff_block(ROFF_ARGS)
911 {
912 int sv;
913 size_t sz;
914 char *name;
915
916 name = NULL;
917
918 if (ROFF_ig != tok) {
919 if ('\0' == (*bufp)[pos]) {
920 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
921 return(ROFF_IGN);
922 }
923
924 /*
925 * Re-write `de1', since we don't really care about
926 * groff's strange compatibility mode, into `de'.
927 */
928
929 if (ROFF_de1 == tok)
930 tok = ROFF_de;
931 if (ROFF_de == tok)
932 name = *bufp + pos;
933 else
934 mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos,
935 roffs[tok].name);
936
937 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
938 pos++;
939
940 while (isspace((unsigned char)(*bufp)[pos]))
941 (*bufp)[pos++] = '\0';
942 }
943
944 roffnode_push(r, tok, name, ln, ppos);
945
946 /*
947 * At the beginning of a `de' macro, clear the existing string
948 * with the same name, if there is one. New content will be
949 * added from roff_block_text() in multiline mode.
950 */
951
952 if (ROFF_de == tok)
953 roff_setstr(r, name, "", 0);
954
955 if ('\0' == (*bufp)[pos])
956 return(ROFF_IGN);
957
958 /* If present, process the custom end-of-line marker. */
959
960 sv = pos;
961 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
962 pos++;
963
964 /*
965 * Note: groff does NOT like escape characters in the input.
966 * Instead of detecting this, we're just going to let it fly and
967 * to hell with it.
968 */
969
970 assert(pos > sv);
971 sz = (size_t)(pos - sv);
972
973 if (1 == sz && '.' == (*bufp)[sv])
974 return(ROFF_IGN);
975
976 r->last->end = mandoc_malloc(sz + 1);
977
978 memcpy(r->last->end, *bufp + sv, sz);
979 r->last->end[(int)sz] = '\0';
980
981 if ((*bufp)[pos])
982 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
983
984 return(ROFF_IGN);
985 }
986
987
988 /* ARGSUSED */
989 static enum rofferr
990 roff_block_sub(ROFF_ARGS)
991 {
992 enum rofft t;
993 int i, j;
994
995 /*
996 * First check whether a custom macro exists at this level. If
997 * it does, then check against it. This is some of groff's
998 * stranger behaviours. If we encountered a custom end-scope
999 * tag and that tag also happens to be a "real" macro, then we
1000 * need to try interpreting it again as a real macro. If it's
1001 * not, then return ignore. Else continue.
1002 */
1003
1004 if (r->last->end) {
1005 for (i = pos, j = 0; r->last->end[j]; j++, i++)
1006 if ((*bufp)[i] != r->last->end[j])
1007 break;
1008
1009 if ('\0' == r->last->end[j] &&
1010 ('\0' == (*bufp)[i] ||
1011 ' ' == (*bufp)[i] ||
1012 '\t' == (*bufp)[i])) {
1013 roffnode_pop(r);
1014 roffnode_cleanscope(r);
1015
1016 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
1017 i++;
1018
1019 pos = i;
1020 if (ROFF_MAX != roff_parse(r, *bufp, &pos))
1021 return(ROFF_RERUN);
1022 return(ROFF_IGN);
1023 }
1024 }
1025
1026 /*
1027 * If we have no custom end-query or lookup failed, then try
1028 * pulling it out of the hashtable.
1029 */
1030
1031 t = roff_parse(r, *bufp, &pos);
1032
1033 /*
1034 * Macros other than block-end are only significant
1035 * in `de' blocks; elsewhere, simply throw them away.
1036 */
1037 if (ROFF_cblock != t) {
1038 if (ROFF_de == tok)
1039 roff_setstr(r, r->last->name, *bufp + ppos, 1);
1040 return(ROFF_IGN);
1041 }
1042
1043 assert(roffs[t].proc);
1044 return((*roffs[t].proc)(r, t, bufp, szp,
1045 ln, ppos, pos, offs));
1046 }
1047
1048
1049 /* ARGSUSED */
1050 static enum rofferr
1051 roff_block_text(ROFF_ARGS)
1052 {
1053
1054 if (ROFF_de == tok)
1055 roff_setstr(r, r->last->name, *bufp + pos, 1);
1056
1057 return(ROFF_IGN);
1058 }
1059
1060
1061 /* ARGSUSED */
1062 static enum rofferr
1063 roff_cond_sub(ROFF_ARGS)
1064 {
1065 enum rofft t;
1066 enum roffrule rr;
1067 char *ep;
1068
1069 rr = r->last->rule;
1070 roffnode_cleanscope(r);
1071 t = roff_parse(r, *bufp, &pos);
1072
1073 /*
1074 * Fully handle known macros when they are structurally
1075 * required or when the conditional evaluated to true.
1076 */
1077
1078 if ((ROFF_MAX != t) &&
1079 (ROFF_ccond == t || ROFFRULE_ALLOW == rr ||
1080 ROFFMAC_STRUCT & roffs[t].flags)) {
1081 assert(roffs[t].proc);
1082 return((*roffs[t].proc)(r, t, bufp, szp,
1083 ln, ppos, pos, offs));
1084 }
1085
1086 /* Always check for the closing delimiter `\}'. */
1087
1088 ep = &(*bufp)[pos];
1089 while (NULL != (ep = strchr(ep, '\\'))) {
1090 if ('}' != *(++ep))
1091 continue;
1092
1093 /*
1094 * If we're at the end of line, then just chop
1095 * off the \} and resize the buffer.
1096 * If we aren't, then convert it to spaces.
1097 */
1098
1099 if ('\0' == *(ep + 1)) {
1100 *--ep = '\0';
1101 *szp -= 2;
1102 } else
1103 *(ep - 1) = *ep = ' ';
1104
1105 roff_ccond(r, ROFF_ccond, bufp, szp,
1106 ln, pos, pos + 2, offs);
1107 break;
1108 }
1109 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1110 }
1111
1112 /* ARGSUSED */
1113 static enum rofferr
1114 roff_cond_text(ROFF_ARGS)
1115 {
1116 char *ep;
1117 enum roffrule rr;
1118
1119 rr = r->last->rule;
1120 roffnode_cleanscope(r);
1121
1122 ep = &(*bufp)[pos];
1123 for ( ; NULL != (ep = strchr(ep, '\\')); ep++) {
1124 ep++;
1125 if ('}' != *ep)
1126 continue;
1127 *ep = '&';
1128 roff_ccond(r, ROFF_ccond, bufp, szp,
1129 ln, pos, pos + 2, offs);
1130 }
1131 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1132 }
1133
1134 static int
1135 roff_getnum(const char *v, int *pos, int *res)
1136 {
1137 int p, n;
1138
1139 p = *pos;
1140 n = v[p] == '-';
1141 if (n)
1142 p++;
1143
1144 for (*res = 0; isdigit((unsigned char)v[p]); p++)
1145 *res += 10 * *res + v[p] - '0';
1146 if (p == *pos + n)
1147 return 0;
1148
1149 if (n)
1150 *res = -*res;
1151
1152 *pos = p;
1153 return 1;
1154 }
1155
1156 static int
1157 roff_getop(const char *v, int *pos, char *res)
1158 {
1159 int e;
1160
1161 *res = v[*pos];
1162 e = v[*pos + 1] == '=';
1163
1164 switch (*res) {
1165 case '=':
1166 break;
1167 case '>':
1168 if (e)
1169 *res = 'g';
1170 break;
1171 case '<':
1172 if (e)
1173 *res = 'l';
1174 break;
1175 default:
1176 return(0);
1177 }
1178
1179 *pos += 1 + e;
1180
1181 return(*res);
1182 }
1183
1184 static enum roffrule
1185 roff_evalcond(const char *v, int *pos)
1186 {
1187 int not, lh, rh;
1188 char op;
1189
1190 switch (v[*pos]) {
1191 case ('n'):
1192 (*pos)++;
1193 return(ROFFRULE_ALLOW);
1194 case ('e'):
1195 /* FALLTHROUGH */
1196 case ('o'):
1197 /* FALLTHROUGH */
1198 case ('t'):
1199 (*pos)++;
1200 return(ROFFRULE_DENY);
1201 case ('!'):
1202 (*pos)++;
1203 not = 1;
1204 break;
1205 default:
1206 not = 0;
1207 break;
1208 }
1209
1210 if (!roff_getnum(v, pos, &lh))
1211 return ROFFRULE_DENY;
1212 if (!roff_getop(v, pos, &op)) {
1213 if (lh < 0)
1214 lh = 0;
1215 goto out;
1216 }
1217 if (!roff_getnum(v, pos, &rh))
1218 return ROFFRULE_DENY;
1219 switch (op) {
1220 case 'g':
1221 lh = lh >= rh;
1222 break;
1223 case 'l':
1224 lh = lh <= rh;
1225 break;
1226 case '=':
1227 lh = lh == rh;
1228 break;
1229 case '>':
1230 lh = lh > rh;
1231 break;
1232 case '<':
1233 lh = lh < rh;
1234 break;
1235 default:
1236 return ROFFRULE_DENY;
1237 }
1238 out:
1239 if (not)
1240 lh = !lh;
1241 return lh ? ROFFRULE_ALLOW : ROFFRULE_DENY;
1242 }
1243
1244 /* ARGSUSED */
1245 static enum rofferr
1246 roff_line_ignore(ROFF_ARGS)
1247 {
1248
1249 return(ROFF_IGN);
1250 }
1251
1252 /* ARGSUSED */
1253 static enum rofferr
1254 roff_cond(ROFF_ARGS)
1255 {
1256
1257 roffnode_push(r, tok, NULL, ln, ppos);
1258
1259 /*
1260 * An `.el' has no conditional body: it will consume the value
1261 * of the current rstack entry set in prior `ie' calls or
1262 * defaults to DENY.
1263 *
1264 * If we're not an `el', however, then evaluate the conditional.
1265 */
1266
1267 r->last->rule = ROFF_el == tok ?
1268 (r->rstackpos < 0 ?
1269 ROFFRULE_DENY : r->rstack[r->rstackpos--]) :
1270 roff_evalcond(*bufp, &pos);
1271
1272 /*
1273 * An if-else will put the NEGATION of the current evaluated
1274 * conditional into the stack of rules.
1275 */
1276
1277 if (ROFF_ie == tok) {
1278 if (r->rstackpos == RSTACK_MAX - 1) {
1279 mandoc_msg(MANDOCERR_MEM,
1280 r->parse, ln, ppos, NULL);
1281 return(ROFF_ERR);
1282 }
1283 r->rstack[++r->rstackpos] =
1284 ROFFRULE_DENY == r->last->rule ?
1285 ROFFRULE_ALLOW : ROFFRULE_DENY;
1286 }
1287
1288 /* If the parent has false as its rule, then so do we. */
1289
1290 if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule)
1291 r->last->rule = ROFFRULE_DENY;
1292
1293 /*
1294 * Determine scope.
1295 * If there is nothing on the line after the conditional,
1296 * not even whitespace, use next-line scope.
1297 */
1298
1299 if ('\0' == (*bufp)[pos]) {
1300 r->last->endspan = 2;
1301 goto out;
1302 }
1303
1304 while (' ' == (*bufp)[pos])
1305 pos++;
1306
1307 /* An opening brace requests multiline scope. */
1308
1309 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1310 r->last->endspan = -1;
1311 pos += 2;
1312 goto out;
1313 }
1314
1315 /*
1316 * Anything else following the conditional causes
1317 * single-line scope. Warn if the scope contains
1318 * nothing but trailing whitespace.
1319 */
1320
1321 if ('\0' == (*bufp)[pos])
1322 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
1323
1324 r->last->endspan = 1;
1325
1326 out:
1327 *offs = pos;
1328 return(ROFF_RERUN);
1329 }
1330
1331
1332 /* ARGSUSED */
1333 static enum rofferr
1334 roff_ds(ROFF_ARGS)
1335 {
1336 char *name, *string;
1337
1338 /*
1339 * A symbol is named by the first word following the macro
1340 * invocation up to a space. Its value is anything after the
1341 * name's trailing whitespace and optional double-quote. Thus,
1342 *
1343 * [.ds foo "bar " ]
1344 *
1345 * will have `bar " ' as its value.
1346 */
1347
1348 string = *bufp + pos;
1349 name = roff_getname(r, &string, ln, pos);
1350 if ('\0' == *name)
1351 return(ROFF_IGN);
1352
1353 /* Read past initial double-quote. */
1354 if ('"' == *string)
1355 string++;
1356
1357 /* The rest is the value. */
1358 roff_setstr(r, name, string, 0);
1359 return(ROFF_IGN);
1360 }
1361
1362 void
1363 roff_setreg(struct roff *r, const char *name, int val, char sign)
1364 {
1365 struct roffreg *reg;
1366
1367 /* Search for an existing register with the same name. */
1368 reg = r->regtab;
1369
1370 while (reg && strcmp(name, reg->key.p))
1371 reg = reg->next;
1372
1373 if (NULL == reg) {
1374 /* Create a new register. */
1375 reg = mandoc_malloc(sizeof(struct roffreg));
1376 reg->key.p = mandoc_strdup(name);
1377 reg->key.sz = strlen(name);
1378 reg->val = 0;
1379 reg->next = r->regtab;
1380 r->regtab = reg;
1381 }
1382
1383 if ('+' == sign)
1384 reg->val += val;
1385 else if ('-' == sign)
1386 reg->val -= val;
1387 else
1388 reg->val = val;
1389 }
1390
1391 int
1392 roff_getreg(const struct roff *r, const char *name)
1393 {
1394 struct roffreg *reg;
1395
1396 for (reg = r->regtab; reg; reg = reg->next)
1397 if (0 == strcmp(name, reg->key.p))
1398 return(reg->val);
1399
1400 return(0);
1401 }
1402
1403 static int
1404 roff_getregn(const struct roff *r, const char *name, size_t len)
1405 {
1406 struct roffreg *reg;
1407
1408 for (reg = r->regtab; reg; reg = reg->next)
1409 if (len == reg->key.sz &&
1410 0 == strncmp(name, reg->key.p, len))
1411 return(reg->val);
1412
1413 return(0);
1414 }
1415
1416 static void
1417 roff_freereg(struct roffreg *reg)
1418 {
1419 struct roffreg *old_reg;
1420
1421 while (NULL != reg) {
1422 free(reg->key.p);
1423 old_reg = reg;
1424 reg = reg->next;
1425 free(old_reg);
1426 }
1427 }
1428
1429 /* ARGSUSED */
1430 static enum rofferr
1431 roff_nr(ROFF_ARGS)
1432 {
1433 const char *key;
1434 char *val;
1435 size_t sz;
1436 int iv;
1437 char sign;
1438
1439 val = *bufp + pos;
1440 key = roff_getname(r, &val, ln, pos);
1441
1442 sign = *val;
1443 if ('+' == sign || '-' == sign)
1444 val++;
1445
1446 sz = strspn(val, "0123456789");
1447 iv = sz ? mandoc_strntoi(val, sz, 10) : 0;
1448
1449 roff_setreg(r, key, iv, sign);
1450
1451 return(ROFF_IGN);
1452 }
1453
1454 /* ARGSUSED */
1455 static enum rofferr
1456 roff_rm(ROFF_ARGS)
1457 {
1458 const char *name;
1459 char *cp;
1460
1461 cp = *bufp + pos;
1462 while ('\0' != *cp) {
1463 name = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1464 if ('\0' != *name)
1465 roff_setstr(r, name, NULL, 0);
1466 }
1467 return(ROFF_IGN);
1468 }
1469
1470 /* ARGSUSED */
1471 static enum rofferr
1472 roff_it(ROFF_ARGS)
1473 {
1474 char *cp;
1475 size_t len;
1476 int iv;
1477
1478 /* Parse the number of lines. */
1479 cp = *bufp + pos;
1480 len = strcspn(cp, " \t");
1481 cp[len] = '\0';
1482 if ((iv = mandoc_strntoi(cp, len, 10)) <= 0) {
1483 mandoc_msg(MANDOCERR_NUMERIC, r->parse,
1484 ln, ppos, *bufp + 1);
1485 return(ROFF_IGN);
1486 }
1487 cp += len + 1;
1488
1489 /* Arm the input line trap. */
1490 roffit_lines = iv;
1491 roffit_macro = mandoc_strdup(cp);
1492 return(ROFF_IGN);
1493 }
1494
1495 /* ARGSUSED */
1496 static enum rofferr
1497 roff_Dd(ROFF_ARGS)
1498 {
1499 const char *const *cp;
1500
1501 if (MPARSE_MDOC != r->parsetype)
1502 for (cp = __mdoc_reserved; *cp; cp++)
1503 roff_setstr(r, *cp, NULL, 0);
1504
1505 return(ROFF_CONT);
1506 }
1507
1508 /* ARGSUSED */
1509 static enum rofferr
1510 roff_TH(ROFF_ARGS)
1511 {
1512 const char *const *cp;
1513
1514 if (MPARSE_MDOC != r->parsetype)
1515 for (cp = __man_reserved; *cp; cp++)
1516 roff_setstr(r, *cp, NULL, 0);
1517
1518 return(ROFF_CONT);
1519 }
1520
1521 /* ARGSUSED */
1522 static enum rofferr
1523 roff_TE(ROFF_ARGS)
1524 {
1525
1526 if (NULL == r->tbl)
1527 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1528 else
1529 tbl_end(&r->tbl);
1530
1531 return(ROFF_IGN);
1532 }
1533
1534 /* ARGSUSED */
1535 static enum rofferr
1536 roff_T_(ROFF_ARGS)
1537 {
1538
1539 if (NULL == r->tbl)
1540 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1541 else
1542 tbl_restart(ppos, ln, r->tbl);
1543
1544 return(ROFF_IGN);
1545 }
1546
1547 #if 0
1548 static int
1549 roff_closeeqn(struct roff *r)
1550 {
1551
1552 return(r->eqn && ROFF_EQN == eqn_end(&r->eqn) ? 1 : 0);
1553 }
1554 #endif
1555
1556 static void
1557 roff_openeqn(struct roff *r, const char *name, int line,
1558 int offs, const char *buf)
1559 {
1560 struct eqn_node *e;
1561 int poff;
1562
1563 assert(NULL == r->eqn);
1564 e = eqn_alloc(name, offs, line, r->parse);
1565
1566 if (r->last_eqn)
1567 r->last_eqn->next = e;
1568 else
1569 r->first_eqn = r->last_eqn = e;
1570
1571 r->eqn = r->last_eqn = e;
1572
1573 if (buf) {
1574 poff = 0;
1575 eqn_read(&r->eqn, line, buf, offs, &poff);
1576 }
1577 }
1578
1579 /* ARGSUSED */
1580 static enum rofferr
1581 roff_EQ(ROFF_ARGS)
1582 {
1583
1584 roff_openeqn(r, *bufp + pos, ln, ppos, NULL);
1585 return(ROFF_IGN);
1586 }
1587
1588 /* ARGSUSED */
1589 static enum rofferr
1590 roff_EN(ROFF_ARGS)
1591 {
1592
1593 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1594 return(ROFF_IGN);
1595 }
1596
1597 /* ARGSUSED */
1598 static enum rofferr
1599 roff_TS(ROFF_ARGS)
1600 {
1601 struct tbl_node *tbl;
1602
1603 if (r->tbl) {
1604 mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL);
1605 tbl_end(&r->tbl);
1606 }
1607
1608 tbl = tbl_alloc(ppos, ln, r->parse);
1609
1610 if (r->last_tbl)
1611 r->last_tbl->next = tbl;
1612 else
1613 r->first_tbl = r->last_tbl = tbl;
1614
1615 r->tbl = r->last_tbl = tbl;
1616 return(ROFF_IGN);
1617 }
1618
1619 /* ARGSUSED */
1620 static enum rofferr
1621 roff_cc(ROFF_ARGS)
1622 {
1623 const char *p;
1624
1625 p = *bufp + pos;
1626
1627 if ('\0' == *p || '.' == (r->control = *p++))
1628 r->control = 0;
1629
1630 if ('\0' != *p)
1631 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1632
1633 return(ROFF_IGN);
1634 }
1635
1636 /* ARGSUSED */
1637 static enum rofferr
1638 roff_tr(ROFF_ARGS)
1639 {
1640 const char *p, *first, *second;
1641 size_t fsz, ssz;
1642 enum mandoc_esc esc;
1643
1644 p = *bufp + pos;
1645
1646 if ('\0' == *p) {
1647 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1648 return(ROFF_IGN);
1649 }
1650
1651 while ('\0' != *p) {
1652 fsz = ssz = 1;
1653
1654 first = p++;
1655 if ('\\' == *first) {
1656 esc = mandoc_escape(&p, NULL, NULL);
1657 if (ESCAPE_ERROR == esc) {
1658 mandoc_msg
1659 (MANDOCERR_BADESCAPE, r->parse,
1660 ln, (int)(p - *bufp), NULL);
1661 return(ROFF_IGN);
1662 }
1663 fsz = (size_t)(p - first);
1664 }
1665
1666 second = p++;
1667 if ('\\' == *second) {
1668 esc = mandoc_escape(&p, NULL, NULL);
1669 if (ESCAPE_ERROR == esc) {
1670 mandoc_msg
1671 (MANDOCERR_BADESCAPE, r->parse,
1672 ln, (int)(p - *bufp), NULL);
1673 return(ROFF_IGN);
1674 }
1675 ssz = (size_t)(p - second);
1676 } else if ('\0' == *second) {
1677 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
1678 ln, (int)(p - *bufp), NULL);
1679 second = " ";
1680 p--;
1681 }
1682
1683 if (fsz > 1) {
1684 roff_setstrn(&r->xmbtab, first,
1685 fsz, second, ssz, 0);
1686 continue;
1687 }
1688
1689 if (NULL == r->xtab)
1690 r->xtab = mandoc_calloc
1691 (128, sizeof(struct roffstr));
1692
1693 free(r->xtab[(int)*first].p);
1694 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
1695 r->xtab[(int)*first].sz = ssz;
1696 }
1697
1698 return(ROFF_IGN);
1699 }
1700
1701 /* ARGSUSED */
1702 static enum rofferr
1703 roff_so(ROFF_ARGS)
1704 {
1705 char *name;
1706
1707 mandoc_msg(MANDOCERR_SO, r->parse, ln, ppos, NULL);
1708
1709 /*
1710 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1711 * opening anything that's not in our cwd or anything beneath
1712 * it. Thus, explicitly disallow traversing up the file-system
1713 * or using absolute paths.
1714 */
1715
1716 name = *bufp + pos;
1717 if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1718 mandoc_msg(MANDOCERR_SOPATH, r->parse, ln, pos, NULL);
1719 return(ROFF_ERR);
1720 }
1721
1722 *offs = pos;
1723 return(ROFF_SO);
1724 }
1725
1726 /* ARGSUSED */
1727 static enum rofferr
1728 roff_userdef(ROFF_ARGS)
1729 {
1730 const char *arg[9];
1731 char *cp, *n1, *n2;
1732 int i;
1733
1734 /*
1735 * Collect pointers to macro argument strings
1736 * and NUL-terminate them.
1737 */
1738 cp = *bufp + pos;
1739 for (i = 0; i < 9; i++)
1740 arg[i] = '\0' == *cp ? "" :
1741 mandoc_getarg(r->parse, &cp, ln, &pos);
1742
1743 /*
1744 * Expand macro arguments.
1745 */
1746 *szp = 0;
1747 n1 = cp = mandoc_strdup(r->current_string);
1748 while (NULL != (cp = strstr(cp, "\\$"))) {
1749 i = cp[2] - '1';
1750 if (0 > i || 8 < i) {
1751 /* Not an argument invocation. */
1752 cp += 2;
1753 continue;
1754 }
1755
1756 *szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1757 n2 = mandoc_malloc(*szp);
1758
1759 strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1760 strlcat(n2, arg[i], *szp);
1761 strlcat(n2, cp + 3, *szp);
1762
1763 cp = n2 + (cp - n1);
1764 free(n1);
1765 n1 = n2;
1766 }
1767
1768 /*
1769 * Replace the macro invocation
1770 * by the expanded macro.
1771 */
1772 free(*bufp);
1773 *bufp = n1;
1774 if (0 == *szp)
1775 *szp = strlen(*bufp) + 1;
1776
1777 return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
1778 ROFF_REPARSE : ROFF_APPEND);
1779 }
1780
1781 static char *
1782 roff_getname(struct roff *r, char **cpp, int ln, int pos)
1783 {
1784 char *name, *cp;
1785
1786 name = *cpp;
1787 if ('\0' == *name)
1788 return(name);
1789
1790 /* Read until end of name. */
1791 for (cp = name; '\0' != *cp && ' ' != *cp; cp++) {
1792 if ('\\' != *cp)
1793 continue;
1794 cp++;
1795 if ('\\' == *cp)
1796 continue;
1797 mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL);
1798 *cp = '\0';
1799 name = cp;
1800 }
1801
1802 /* Nil-terminate name. */
1803 if ('\0' != *cp)
1804 *(cp++) = '\0';
1805
1806 /* Read past spaces. */
1807 while (' ' == *cp)
1808 cp++;
1809
1810 *cpp = cp;
1811 return(name);
1812 }
1813
1814 /*
1815 * Store *string into the user-defined string called *name.
1816 * In multiline mode, append to an existing entry and append '\n';
1817 * else replace the existing entry, if there is one.
1818 * To clear an existing entry, call with (*r, *name, NULL, 0).
1819 */
1820 static void
1821 roff_setstr(struct roff *r, const char *name, const char *string,
1822 int multiline)
1823 {
1824
1825 roff_setstrn(&r->strtab, name, strlen(name), string,
1826 string ? strlen(string) : 0, multiline);
1827 }
1828
1829 static void
1830 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
1831 const char *string, size_t stringsz, int multiline)
1832 {
1833 struct roffkv *n;
1834 char *c;
1835 int i;
1836 size_t oldch, newch;
1837
1838 /* Search for an existing string with the same name. */
1839 n = *r;
1840
1841 while (n && strcmp(name, n->key.p))
1842 n = n->next;
1843
1844 if (NULL == n) {
1845 /* Create a new string table entry. */
1846 n = mandoc_malloc(sizeof(struct roffkv));
1847 n->key.p = mandoc_strndup(name, namesz);
1848 n->key.sz = namesz;
1849 n->val.p = NULL;
1850 n->val.sz = 0;
1851 n->next = *r;
1852 *r = n;
1853 } else if (0 == multiline) {
1854 /* In multiline mode, append; else replace. */
1855 free(n->val.p);
1856 n->val.p = NULL;
1857 n->val.sz = 0;
1858 }
1859
1860 if (NULL == string)
1861 return;
1862
1863 /*
1864 * One additional byte for the '\n' in multiline mode,
1865 * and one for the terminating '\0'.
1866 */
1867 newch = stringsz + (multiline ? 2u : 1u);
1868
1869 if (NULL == n->val.p) {
1870 n->val.p = mandoc_malloc(newch);
1871 *n->val.p = '\0';
1872 oldch = 0;
1873 } else {
1874 oldch = n->val.sz;
1875 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
1876 }
1877
1878 /* Skip existing content in the destination buffer. */
1879 c = n->val.p + (int)oldch;
1880
1881 /* Append new content to the destination buffer. */
1882 i = 0;
1883 while (i < (int)stringsz) {
1884 /*
1885 * Rudimentary roff copy mode:
1886 * Handle escaped backslashes.
1887 */
1888 if ('\\' == string[i] && '\\' == string[i + 1])
1889 i++;
1890 *c++ = string[i++];
1891 }
1892
1893 /* Append terminating bytes. */
1894 if (multiline)
1895 *c++ = '\n';
1896
1897 *c = '\0';
1898 n->val.sz = (int)(c - n->val.p);
1899 }
1900
1901 static const char *
1902 roff_getstrn(const struct roff *r, const char *name, size_t len)
1903 {
1904 const struct roffkv *n;
1905
1906 for (n = r->strtab; n; n = n->next)
1907 if (0 == strncmp(name, n->key.p, len) &&
1908 '\0' == n->key.p[(int)len])
1909 return(n->val.p);
1910
1911 return(NULL);
1912 }
1913
1914 static void
1915 roff_freestr(struct roffkv *r)
1916 {
1917 struct roffkv *n, *nn;
1918
1919 for (n = r; n; n = nn) {
1920 free(n->key.p);
1921 free(n->val.p);
1922 nn = n->next;
1923 free(n);
1924 }
1925 }
1926
1927 const struct tbl_span *
1928 roff_span(const struct roff *r)
1929 {
1930
1931 return(r->tbl ? tbl_span(r->tbl) : NULL);
1932 }
1933
1934 const struct eqn *
1935 roff_eqn(const struct roff *r)
1936 {
1937
1938 return(r->last_eqn ? &r->last_eqn->eqn : NULL);
1939 }
1940
1941 /*
1942 * Duplicate an input string, making the appropriate character
1943 * conversations (as stipulated by `tr') along the way.
1944 * Returns a heap-allocated string with all the replacements made.
1945 */
1946 char *
1947 roff_strdup(const struct roff *r, const char *p)
1948 {
1949 const struct roffkv *cp;
1950 char *res;
1951 const char *pp;
1952 size_t ssz, sz;
1953 enum mandoc_esc esc;
1954
1955 if (NULL == r->xmbtab && NULL == r->xtab)
1956 return(mandoc_strdup(p));
1957 else if ('\0' == *p)
1958 return(mandoc_strdup(""));
1959
1960 /*
1961 * Step through each character looking for term matches
1962 * (remember that a `tr' can be invoked with an escape, which is
1963 * a glyph but the escape is multi-character).
1964 * We only do this if the character hash has been initialised
1965 * and the string is >0 length.
1966 */
1967
1968 res = NULL;
1969 ssz = 0;
1970
1971 while ('\0' != *p) {
1972 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
1973 sz = r->xtab[(int)*p].sz;
1974 res = mandoc_realloc(res, ssz + sz + 1);
1975 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
1976 ssz += sz;
1977 p++;
1978 continue;
1979 } else if ('\\' != *p) {
1980 res = mandoc_realloc(res, ssz + 2);
1981 res[ssz++] = *p++;
1982 continue;
1983 }
1984
1985 /* Search for term matches. */
1986 for (cp = r->xmbtab; cp; cp = cp->next)
1987 if (0 == strncmp(p, cp->key.p, cp->key.sz))
1988 break;
1989
1990 if (NULL != cp) {
1991 /*
1992 * A match has been found.
1993 * Append the match to the array and move
1994 * forward by its keysize.
1995 */
1996 res = mandoc_realloc
1997 (res, ssz + cp->val.sz + 1);
1998 memcpy(res + ssz, cp->val.p, cp->val.sz);
1999 ssz += cp->val.sz;
2000 p += (int)cp->key.sz;
2001 continue;
2002 }
2003
2004 /*
2005 * Handle escapes carefully: we need to copy
2006 * over just the escape itself, or else we might
2007 * do replacements within the escape itself.
2008 * Make sure to pass along the bogus string.
2009 */
2010 pp = p++;
2011 esc = mandoc_escape(&p, NULL, NULL);
2012 if (ESCAPE_ERROR == esc) {
2013 sz = strlen(pp);
2014 res = mandoc_realloc(res, ssz + sz + 1);
2015 memcpy(res + ssz, pp, sz);
2016 break;
2017 }
2018 /*
2019 * We bail out on bad escapes.
2020 * No need to warn: we already did so when
2021 * roff_res() was called.
2022 */
2023 sz = (int)(p - pp);
2024 res = mandoc_realloc(res, ssz + sz + 1);
2025 memcpy(res + ssz, pp, sz);
2026 ssz += sz;
2027 }
2028
2029 res[(int)ssz] = '\0';
2030 return(res);
2031 }
2032
2033 /*
2034 * Find out whether a line is a macro line or not.
2035 * If it is, adjust the current position and return one; if it isn't,
2036 * return zero and don't change the current position.
2037 * If the control character has been set with `.cc', then let that grain
2038 * precedence.
2039 * This is slighly contrary to groff, where using the non-breaking
2040 * control character when `cc' has been invoked will cause the
2041 * non-breaking macro contents to be printed verbatim.
2042 */
2043 int
2044 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
2045 {
2046 int pos;
2047
2048 pos = *ppos;
2049
2050 if (0 != r->control && cp[pos] == r->control)
2051 pos++;
2052 else if (0 != r->control)
2053 return(0);
2054 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
2055 pos += 2;
2056 else if ('.' == cp[pos] || '\'' == cp[pos])
2057 pos++;
2058 else
2059 return(0);
2060
2061 while (' ' == cp[pos] || '\t' == cp[pos])
2062 pos++;
2063
2064 *ppos = pos;
2065 return(1);
2066 }