]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
mention the three unreleased versions since 1.10.3
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.184 2013/10/05 22:25:12 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010, 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 #include "mandoc.h"
29 #include "libroff.h"
30 #include "libmandoc.h"
31
32 /* Maximum number of nested if-else conditionals. */
33 #define RSTACK_MAX 128
34
35 /* Maximum number of string expansions per line, to break infinite loops. */
36 #define EXPAND_LIMIT 1000
37
38 enum rofft {
39 ROFF_ad,
40 ROFF_am,
41 ROFF_ami,
42 ROFF_am1,
43 ROFF_cc,
44 ROFF_de,
45 ROFF_dei,
46 ROFF_de1,
47 ROFF_ds,
48 ROFF_el,
49 ROFF_hy,
50 ROFF_ie,
51 ROFF_if,
52 ROFF_ig,
53 ROFF_it,
54 ROFF_ne,
55 ROFF_nh,
56 ROFF_nr,
57 ROFF_ns,
58 ROFF_ps,
59 ROFF_rm,
60 ROFF_so,
61 ROFF_ta,
62 ROFF_tr,
63 ROFF_Dd,
64 ROFF_TH,
65 ROFF_TS,
66 ROFF_TE,
67 ROFF_T_,
68 ROFF_EQ,
69 ROFF_EN,
70 ROFF_cblock,
71 ROFF_ccond,
72 ROFF_USERDEF,
73 ROFF_MAX
74 };
75
76 enum roffrule {
77 ROFFRULE_DENY,
78 ROFFRULE_ALLOW
79 };
80
81 /*
82 * An incredibly-simple string buffer.
83 */
84 struct roffstr {
85 char *p; /* nil-terminated buffer */
86 size_t sz; /* saved strlen(p) */
87 };
88
89 /*
90 * A key-value roffstr pair as part of a singly-linked list.
91 */
92 struct roffkv {
93 struct roffstr key;
94 struct roffstr val;
95 struct roffkv *next; /* next in list */
96 };
97
98 /*
99 * A single number register as part of a singly-linked list.
100 */
101 struct roffreg {
102 struct roffstr key;
103 int val;
104 struct roffreg *next;
105 };
106
107 struct roff {
108 enum mparset parsetype; /* requested parse type */
109 struct mparse *parse; /* parse point */
110 struct roffnode *last; /* leaf of stack */
111 enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */
112 char control; /* control character */
113 int rstackpos; /* position in rstack */
114 struct roffreg *regtab; /* number registers */
115 struct roffkv *strtab; /* user-defined strings & macros */
116 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
117 struct roffstr *xtab; /* single-byte trans table (`tr') */
118 const char *current_string; /* value of last called user macro */
119 struct tbl_node *first_tbl; /* first table parsed */
120 struct tbl_node *last_tbl; /* last table parsed */
121 struct tbl_node *tbl; /* current table being parsed */
122 struct eqn_node *last_eqn; /* last equation parsed */
123 struct eqn_node *first_eqn; /* first equation parsed */
124 struct eqn_node *eqn; /* current equation being parsed */
125 };
126
127 struct roffnode {
128 enum rofft tok; /* type of node */
129 struct roffnode *parent; /* up one in stack */
130 int line; /* parse line */
131 int col; /* parse col */
132 char *name; /* node name, e.g. macro name */
133 char *end; /* end-rules: custom token */
134 int endspan; /* end-rules: next-line or infty */
135 enum roffrule rule; /* current evaluation rule */
136 };
137
138 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
139 enum rofft tok, /* tok of macro */ \
140 char **bufp, /* input buffer */ \
141 size_t *szp, /* size of input buffer */ \
142 int ln, /* parse line */ \
143 int ppos, /* original pos in buffer */ \
144 int pos, /* current pos in buffer */ \
145 int *offs /* reset offset of buffer data */
146
147 typedef enum rofferr (*roffproc)(ROFF_ARGS);
148
149 struct roffmac {
150 const char *name; /* macro name */
151 roffproc proc; /* process new macro */
152 roffproc text; /* process as child text of macro */
153 roffproc sub; /* process as child of macro */
154 int flags;
155 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
156 struct roffmac *next;
157 };
158
159 struct predef {
160 const char *name; /* predefined input name */
161 const char *str; /* replacement symbol */
162 };
163
164 #define PREDEF(__name, __str) \
165 { (__name), (__str) },
166
167 static enum rofft roffhash_find(const char *, size_t);
168 static void roffhash_init(void);
169 static void roffnode_cleanscope(struct roff *);
170 static void roffnode_pop(struct roff *);
171 static void roffnode_push(struct roff *, enum rofft,
172 const char *, int, int);
173 static enum rofferr roff_block(ROFF_ARGS);
174 static enum rofferr roff_block_text(ROFF_ARGS);
175 static enum rofferr roff_block_sub(ROFF_ARGS);
176 static enum rofferr roff_cblock(ROFF_ARGS);
177 static enum rofferr roff_cc(ROFF_ARGS);
178 static enum rofferr roff_ccond(ROFF_ARGS);
179 static enum rofferr roff_cond(ROFF_ARGS);
180 static enum rofferr roff_cond_text(ROFF_ARGS);
181 static enum rofferr roff_cond_sub(ROFF_ARGS);
182 static enum rofferr roff_ds(ROFF_ARGS);
183 static enum roffrule roff_evalcond(const char *, int *);
184 static void roff_free1(struct roff *);
185 static void roff_freereg(struct roffreg *);
186 static void roff_freestr(struct roffkv *);
187 static char *roff_getname(struct roff *, char **, int, int);
188 static int roff_getnum(const char *, int *, int *);
189 static int roff_getop(const char *, int *, char *);
190 static int roff_getregn(const struct roff *,
191 const char *, size_t);
192 static const char *roff_getstrn(const struct roff *,
193 const char *, size_t);
194 static enum rofferr roff_it(ROFF_ARGS);
195 static enum rofferr roff_line_ignore(ROFF_ARGS);
196 static enum rofferr roff_nr(ROFF_ARGS);
197 static void roff_openeqn(struct roff *, const char *,
198 int, int, const char *);
199 static enum rofft roff_parse(struct roff *, const char *, int *);
200 static enum rofferr roff_parsetext(char **, size_t *, int, int *);
201 static enum rofferr roff_res(struct roff *,
202 char **, size_t *, int, int);
203 static enum rofferr roff_rm(ROFF_ARGS);
204 static void roff_setstr(struct roff *,
205 const char *, const char *, int);
206 static void roff_setstrn(struct roffkv **, const char *,
207 size_t, const char *, size_t, int);
208 static enum rofferr roff_so(ROFF_ARGS);
209 static enum rofferr roff_tr(ROFF_ARGS);
210 static enum rofferr roff_Dd(ROFF_ARGS);
211 static enum rofferr roff_TH(ROFF_ARGS);
212 static enum rofferr roff_TE(ROFF_ARGS);
213 static enum rofferr roff_TS(ROFF_ARGS);
214 static enum rofferr roff_EQ(ROFF_ARGS);
215 static enum rofferr roff_EN(ROFF_ARGS);
216 static enum rofferr roff_T_(ROFF_ARGS);
217 static enum rofferr roff_userdef(ROFF_ARGS);
218
219 /* See roffhash_find() */
220
221 #define ASCII_HI 126
222 #define ASCII_LO 33
223 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
224
225 static struct roffmac *hash[HASHWIDTH];
226
227 static struct roffmac roffs[ROFF_MAX] = {
228 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
229 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
230 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
231 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
232 { "cc", roff_cc, NULL, NULL, 0, NULL },
233 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
234 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
235 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
236 { "ds", roff_ds, NULL, NULL, 0, NULL },
237 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
238 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
239 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
240 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
241 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
242 { "it", roff_it, NULL, NULL, 0, NULL },
243 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
244 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
245 { "nr", roff_nr, NULL, NULL, 0, NULL },
246 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
247 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
248 { "rm", roff_rm, NULL, NULL, 0, NULL },
249 { "so", roff_so, NULL, NULL, 0, NULL },
250 { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
251 { "tr", roff_tr, NULL, NULL, 0, NULL },
252 { "Dd", roff_Dd, NULL, NULL, 0, NULL },
253 { "TH", roff_TH, NULL, NULL, 0, NULL },
254 { "TS", roff_TS, NULL, NULL, 0, NULL },
255 { "TE", roff_TE, NULL, NULL, 0, NULL },
256 { "T&", roff_T_, NULL, NULL, 0, NULL },
257 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
258 { "EN", roff_EN, NULL, NULL, 0, NULL },
259 { ".", roff_cblock, NULL, NULL, 0, NULL },
260 { "\\}", roff_ccond, NULL, NULL, 0, NULL },
261 { NULL, roff_userdef, NULL, NULL, 0, NULL },
262 };
263
264 const char *const __mdoc_reserved[] = {
265 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
266 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
267 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
268 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
269 "Ds", "Dt", "Dv", "Dx", "D1",
270 "Ec", "Ed", "Ef", "Ek", "El", "Em", "em",
271 "En", "Eo", "Eq", "Er", "Es", "Ev", "Ex",
272 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
273 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp", "LP",
274 "Me", "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
275 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
276 "Pa", "Pc", "Pf", "Po", "Pp", "PP", "pp", "Pq",
277 "Qc", "Ql", "Qo", "Qq", "Or", "Rd", "Re", "Rs", "Rv",
278 "Sc", "Sf", "Sh", "SH", "Sm", "So", "Sq",
279 "Ss", "St", "Sx", "Sy",
280 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
281 "%A", "%B", "%D", "%I", "%J", "%N", "%O",
282 "%P", "%Q", "%R", "%T", "%U", "%V",
283 NULL
284 };
285
286 const char *const __man_reserved[] = {
287 "AT", "B", "BI", "BR", "BT", "DE", "DS", "DT",
288 "EE", "EN", "EQ", "EX", "HF", "HP", "I", "IB", "IP", "IR",
289 "LP", "ME", "MT", "OP", "P", "PD", "PP", "PT",
290 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS", "SY",
291 "TE", "TH", "TP", "TQ", "TS", "T&", "UC", "UE", "UR", "YS",
292 NULL
293 };
294
295 /* Array of injected predefined strings. */
296 #define PREDEFS_MAX 38
297 static const struct predef predefs[PREDEFS_MAX] = {
298 #include "predefs.in"
299 };
300
301 /* See roffhash_find() */
302 #define ROFF_HASH(p) (p[0] - ASCII_LO)
303
304 static int roffit_lines; /* number of lines to delay */
305 static char *roffit_macro; /* nil-terminated macro line */
306
307 static void
308 roffhash_init(void)
309 {
310 struct roffmac *n;
311 int buc, i;
312
313 for (i = 0; i < (int)ROFF_USERDEF; i++) {
314 assert(roffs[i].name[0] >= ASCII_LO);
315 assert(roffs[i].name[0] <= ASCII_HI);
316
317 buc = ROFF_HASH(roffs[i].name);
318
319 if (NULL != (n = hash[buc])) {
320 for ( ; n->next; n = n->next)
321 /* Do nothing. */ ;
322 n->next = &roffs[i];
323 } else
324 hash[buc] = &roffs[i];
325 }
326 }
327
328 /*
329 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
330 * the nil-terminated string name could be found.
331 */
332 static enum rofft
333 roffhash_find(const char *p, size_t s)
334 {
335 int buc;
336 struct roffmac *n;
337
338 /*
339 * libroff has an extremely simple hashtable, for the time
340 * being, which simply keys on the first character, which must
341 * be printable, then walks a chain. It works well enough until
342 * optimised.
343 */
344
345 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
346 return(ROFF_MAX);
347
348 buc = ROFF_HASH(p);
349
350 if (NULL == (n = hash[buc]))
351 return(ROFF_MAX);
352 for ( ; n; n = n->next)
353 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
354 return((enum rofft)(n - roffs));
355
356 return(ROFF_MAX);
357 }
358
359
360 /*
361 * Pop the current node off of the stack of roff instructions currently
362 * pending.
363 */
364 static void
365 roffnode_pop(struct roff *r)
366 {
367 struct roffnode *p;
368
369 assert(r->last);
370 p = r->last;
371
372 r->last = r->last->parent;
373 free(p->name);
374 free(p->end);
375 free(p);
376 }
377
378
379 /*
380 * Push a roff node onto the instruction stack. This must later be
381 * removed with roffnode_pop().
382 */
383 static void
384 roffnode_push(struct roff *r, enum rofft tok, const char *name,
385 int line, int col)
386 {
387 struct roffnode *p;
388
389 p = mandoc_calloc(1, sizeof(struct roffnode));
390 p->tok = tok;
391 if (name)
392 p->name = mandoc_strdup(name);
393 p->parent = r->last;
394 p->line = line;
395 p->col = col;
396 p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
397
398 r->last = p;
399 }
400
401
402 static void
403 roff_free1(struct roff *r)
404 {
405 struct tbl_node *tbl;
406 struct eqn_node *e;
407 int i;
408
409 while (NULL != (tbl = r->first_tbl)) {
410 r->first_tbl = tbl->next;
411 tbl_free(tbl);
412 }
413
414 r->first_tbl = r->last_tbl = r->tbl = NULL;
415
416 while (NULL != (e = r->first_eqn)) {
417 r->first_eqn = e->next;
418 eqn_free(e);
419 }
420
421 r->first_eqn = r->last_eqn = r->eqn = NULL;
422
423 while (r->last)
424 roffnode_pop(r);
425
426 roff_freestr(r->strtab);
427 roff_freestr(r->xmbtab);
428
429 r->strtab = r->xmbtab = NULL;
430
431 roff_freereg(r->regtab);
432
433 r->regtab = NULL;
434
435 if (r->xtab)
436 for (i = 0; i < 128; i++)
437 free(r->xtab[i].p);
438
439 free(r->xtab);
440 r->xtab = NULL;
441 }
442
443 void
444 roff_reset(struct roff *r)
445 {
446 int i;
447
448 roff_free1(r);
449
450 r->control = 0;
451
452 for (i = 0; i < PREDEFS_MAX; i++)
453 roff_setstr(r, predefs[i].name, predefs[i].str, 0);
454 }
455
456
457 void
458 roff_free(struct roff *r)
459 {
460
461 roff_free1(r);
462 free(r);
463 }
464
465
466 struct roff *
467 roff_alloc(enum mparset type, struct mparse *parse)
468 {
469 struct roff *r;
470 int i;
471
472 r = mandoc_calloc(1, sizeof(struct roff));
473 r->parsetype = type;
474 r->parse = parse;
475 r->rstackpos = -1;
476
477 roffhash_init();
478
479 for (i = 0; i < PREDEFS_MAX; i++)
480 roff_setstr(r, predefs[i].name, predefs[i].str, 0);
481
482 return(r);
483 }
484
485 /*
486 * In the current line, expand user-defined strings ("\*")
487 * and references to number registers ("\n").
488 * Also check the syntax of other escape sequences.
489 */
490 static enum rofferr
491 roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
492 {
493 char ubuf[12]; /* buffer to print the number */
494 const char *stesc; /* start of an escape sequence ('\\') */
495 const char *stnam; /* start of the name, after "[(*" */
496 const char *cp; /* end of the name, e.g. before ']' */
497 const char *res; /* the string to be substituted */
498 char *nbuf; /* new buffer to copy bufp to */
499 size_t nsz; /* size of the new buffer */
500 size_t maxl; /* expected length of the escape name */
501 size_t naml; /* actual length of the escape name */
502 int expand_count; /* to avoid infinite loops */
503
504 expand_count = 0;
505
506 again:
507 cp = *bufp + pos;
508 while (NULL != (cp = strchr(cp, '\\'))) {
509 stesc = cp++;
510
511 /*
512 * The second character must be an asterisk or an n.
513 * If it isn't, skip it anyway: It is escaped,
514 * so it can't start another escape sequence.
515 */
516
517 if ('\0' == *cp)
518 return(ROFF_CONT);
519
520 switch (*cp) {
521 case ('*'):
522 res = NULL;
523 break;
524 case ('n'):
525 res = ubuf;
526 break;
527 default:
528 if (ESCAPE_ERROR != mandoc_escape(&cp, NULL, NULL))
529 continue;
530 mandoc_msg
531 (MANDOCERR_BADESCAPE, r->parse,
532 ln, (int)(stesc - *bufp), NULL);
533 return(ROFF_CONT);
534 }
535
536 cp++;
537
538 /*
539 * The third character decides the length
540 * of the name of the string or register.
541 * Save a pointer to the name.
542 */
543
544 switch (*cp) {
545 case ('\0'):
546 return(ROFF_CONT);
547 case ('('):
548 cp++;
549 maxl = 2;
550 break;
551 case ('['):
552 cp++;
553 maxl = 0;
554 break;
555 default:
556 maxl = 1;
557 break;
558 }
559 stnam = cp;
560
561 /* Advance to the end of the name. */
562
563 for (naml = 0; 0 == maxl || naml < maxl; naml++, cp++) {
564 if ('\0' == *cp) {
565 mandoc_msg
566 (MANDOCERR_BADESCAPE,
567 r->parse, ln,
568 (int)(stesc - *bufp), NULL);
569 return(ROFF_CONT);
570 }
571 if (0 == maxl && ']' == *cp)
572 break;
573 }
574
575 /*
576 * Retrieve the replacement string; if it is
577 * undefined, resume searching for escapes.
578 */
579
580 if (NULL == res)
581 res = roff_getstrn(r, stnam, naml);
582 else
583 snprintf(ubuf, sizeof(ubuf), "%d",
584 roff_getregn(r, stnam, naml));
585
586 if (NULL == res) {
587 mandoc_msg
588 (MANDOCERR_BADESCAPE, r->parse,
589 ln, (int)(stesc - *bufp), NULL);
590 res = "";
591 }
592
593 /* Replace the escape sequence by the string. */
594
595 pos = stesc - *bufp;
596
597 nsz = *szp + strlen(res) + 1;
598 nbuf = mandoc_malloc(nsz);
599
600 strlcpy(nbuf, *bufp, (size_t)(stesc - *bufp + 1));
601 strlcat(nbuf, res, nsz);
602 strlcat(nbuf, cp + (maxl ? 0 : 1), nsz);
603
604 free(*bufp);
605
606 *bufp = nbuf;
607 *szp = nsz;
608
609 if (EXPAND_LIMIT >= ++expand_count)
610 goto again;
611
612 /* Just leave the string unexpanded. */
613 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, ln, pos, NULL);
614 return(ROFF_IGN);
615 }
616 return(ROFF_CONT);
617 }
618
619 /*
620 * Process text streams:
621 * Convert all breakable hyphens into ASCII_HYPH.
622 * Decrement and spring input line trap.
623 */
624 static enum rofferr
625 roff_parsetext(char **bufp, size_t *szp, int pos, int *offs)
626 {
627 size_t sz;
628 const char *start;
629 char *p;
630 int isz;
631 enum mandoc_esc esc;
632
633 start = p = *bufp + pos;
634
635 while ('\0' != *p) {
636 sz = strcspn(p, "-\\");
637 p += sz;
638
639 if ('\0' == *p)
640 break;
641
642 if ('\\' == *p) {
643 /* Skip over escapes. */
644 p++;
645 esc = mandoc_escape
646 ((const char const **)&p, NULL, NULL);
647 if (ESCAPE_ERROR == esc)
648 break;
649 continue;
650 } else if (p == start) {
651 p++;
652 continue;
653 }
654
655 if (isalpha((unsigned char)p[-1]) &&
656 isalpha((unsigned char)p[1]))
657 *p = ASCII_HYPH;
658 p++;
659 }
660
661 /* Spring the input line trap. */
662 if (1 == roffit_lines) {
663 isz = asprintf(&p, "%s\n.%s", *bufp, roffit_macro);
664 if (-1 == isz) {
665 perror(NULL);
666 exit((int)MANDOCLEVEL_SYSERR);
667 }
668 free(*bufp);
669 *bufp = p;
670 *szp = isz + 1;
671 *offs = 0;
672 free(roffit_macro);
673 roffit_lines = 0;
674 return(ROFF_REPARSE);
675 } else if (1 < roffit_lines)
676 --roffit_lines;
677 return(ROFF_CONT);
678 }
679
680 enum rofferr
681 roff_parseln(struct roff *r, int ln, char **bufp,
682 size_t *szp, int pos, int *offs)
683 {
684 enum rofft t;
685 enum rofferr e;
686 int ppos, ctl;
687
688 /*
689 * Run the reserved-word filter only if we have some reserved
690 * words to fill in.
691 */
692
693 e = roff_res(r, bufp, szp, ln, pos);
694 if (ROFF_IGN == e)
695 return(e);
696 assert(ROFF_CONT == e);
697
698 ppos = pos;
699 ctl = roff_getcontrol(r, *bufp, &pos);
700
701 /*
702 * First, if a scope is open and we're not a macro, pass the
703 * text through the macro's filter. If a scope isn't open and
704 * we're not a macro, just let it through.
705 * Finally, if there's an equation scope open, divert it into it
706 * no matter our state.
707 */
708
709 if (r->last && ! ctl) {
710 t = r->last->tok;
711 assert(roffs[t].text);
712 e = (*roffs[t].text)
713 (r, t, bufp, szp, ln, pos, pos, offs);
714 assert(ROFF_IGN == e || ROFF_CONT == e);
715 if (ROFF_CONT != e)
716 return(e);
717 }
718 if (r->eqn)
719 return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));
720 if ( ! ctl) {
721 if (r->tbl)
722 return(tbl_read(r->tbl, ln, *bufp, pos));
723 return(roff_parsetext(bufp, szp, pos, offs));
724 }
725
726 /*
727 * If a scope is open, go to the child handler for that macro,
728 * as it may want to preprocess before doing anything with it.
729 * Don't do so if an equation is open.
730 */
731
732 if (r->last) {
733 t = r->last->tok;
734 assert(roffs[t].sub);
735 return((*roffs[t].sub)
736 (r, t, bufp, szp,
737 ln, ppos, pos, offs));
738 }
739
740 /*
741 * Lastly, as we've no scope open, try to look up and execute
742 * the new macro. If no macro is found, simply return and let
743 * the compilers handle it.
744 */
745
746 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
747 return(ROFF_CONT);
748
749 assert(roffs[t].proc);
750 return((*roffs[t].proc)
751 (r, t, bufp, szp,
752 ln, ppos, pos, offs));
753 }
754
755
756 void
757 roff_endparse(struct roff *r)
758 {
759
760 if (r->last)
761 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
762 r->last->line, r->last->col, NULL);
763
764 if (r->eqn) {
765 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
766 r->eqn->eqn.ln, r->eqn->eqn.pos, NULL);
767 eqn_end(&r->eqn);
768 }
769
770 if (r->tbl) {
771 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
772 r->tbl->line, r->tbl->pos, NULL);
773 tbl_end(&r->tbl);
774 }
775 }
776
777 /*
778 * Parse a roff node's type from the input buffer. This must be in the
779 * form of ".foo xxx" in the usual way.
780 */
781 static enum rofft
782 roff_parse(struct roff *r, const char *buf, int *pos)
783 {
784 const char *mac;
785 size_t maclen;
786 enum rofft t;
787
788 if ('\0' == buf[*pos] || '"' == buf[*pos] ||
789 '\t' == buf[*pos] || ' ' == buf[*pos])
790 return(ROFF_MAX);
791
792 /*
793 * We stop the macro parse at an escape, tab, space, or nil.
794 * However, `\}' is also a valid macro, so make sure we don't
795 * clobber it by seeing the `\' as the end of token.
796 */
797
798 mac = buf + *pos;
799 maclen = strcspn(mac + 1, " \\\t\0") + 1;
800
801 t = (r->current_string = roff_getstrn(r, mac, maclen))
802 ? ROFF_USERDEF : roffhash_find(mac, maclen);
803
804 *pos += (int)maclen;
805
806 while (buf[*pos] && ' ' == buf[*pos])
807 (*pos)++;
808
809 return(t);
810 }
811
812 /* ARGSUSED */
813 static enum rofferr
814 roff_cblock(ROFF_ARGS)
815 {
816
817 /*
818 * A block-close `..' should only be invoked as a child of an
819 * ignore macro, otherwise raise a warning and just ignore it.
820 */
821
822 if (NULL == r->last) {
823 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
824 return(ROFF_IGN);
825 }
826
827 switch (r->last->tok) {
828 case (ROFF_am):
829 /* FALLTHROUGH */
830 case (ROFF_ami):
831 /* FALLTHROUGH */
832 case (ROFF_am1):
833 /* FALLTHROUGH */
834 case (ROFF_de):
835 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
836 /* FALLTHROUGH */
837 case (ROFF_dei):
838 /* FALLTHROUGH */
839 case (ROFF_ig):
840 break;
841 default:
842 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
843 return(ROFF_IGN);
844 }
845
846 if ((*bufp)[pos])
847 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
848
849 roffnode_pop(r);
850 roffnode_cleanscope(r);
851 return(ROFF_IGN);
852
853 }
854
855
856 static void
857 roffnode_cleanscope(struct roff *r)
858 {
859
860 while (r->last) {
861 if (--r->last->endspan != 0)
862 break;
863 roffnode_pop(r);
864 }
865 }
866
867
868 /* ARGSUSED */
869 static enum rofferr
870 roff_ccond(ROFF_ARGS)
871 {
872
873 if (NULL == r->last) {
874 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
875 return(ROFF_IGN);
876 }
877
878 switch (r->last->tok) {
879 case (ROFF_el):
880 /* FALLTHROUGH */
881 case (ROFF_ie):
882 /* FALLTHROUGH */
883 case (ROFF_if):
884 break;
885 default:
886 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
887 return(ROFF_IGN);
888 }
889
890 if (r->last->endspan > -1) {
891 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
892 return(ROFF_IGN);
893 }
894
895 if ((*bufp)[pos])
896 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
897
898 roffnode_pop(r);
899 roffnode_cleanscope(r);
900 return(ROFF_IGN);
901 }
902
903
904 /* ARGSUSED */
905 static enum rofferr
906 roff_block(ROFF_ARGS)
907 {
908 int sv;
909 size_t sz;
910 char *name;
911
912 name = NULL;
913
914 if (ROFF_ig != tok) {
915 if ('\0' == (*bufp)[pos]) {
916 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
917 return(ROFF_IGN);
918 }
919
920 /*
921 * Re-write `de1', since we don't really care about
922 * groff's strange compatibility mode, into `de'.
923 */
924
925 if (ROFF_de1 == tok)
926 tok = ROFF_de;
927 if (ROFF_de == tok)
928 name = *bufp + pos;
929 else
930 mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos,
931 roffs[tok].name);
932
933 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
934 pos++;
935
936 while (isspace((unsigned char)(*bufp)[pos]))
937 (*bufp)[pos++] = '\0';
938 }
939
940 roffnode_push(r, tok, name, ln, ppos);
941
942 /*
943 * At the beginning of a `de' macro, clear the existing string
944 * with the same name, if there is one. New content will be
945 * added from roff_block_text() in multiline mode.
946 */
947
948 if (ROFF_de == tok)
949 roff_setstr(r, name, "", 0);
950
951 if ('\0' == (*bufp)[pos])
952 return(ROFF_IGN);
953
954 /* If present, process the custom end-of-line marker. */
955
956 sv = pos;
957 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
958 pos++;
959
960 /*
961 * Note: groff does NOT like escape characters in the input.
962 * Instead of detecting this, we're just going to let it fly and
963 * to hell with it.
964 */
965
966 assert(pos > sv);
967 sz = (size_t)(pos - sv);
968
969 if (1 == sz && '.' == (*bufp)[sv])
970 return(ROFF_IGN);
971
972 r->last->end = mandoc_malloc(sz + 1);
973
974 memcpy(r->last->end, *bufp + sv, sz);
975 r->last->end[(int)sz] = '\0';
976
977 if ((*bufp)[pos])
978 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
979
980 return(ROFF_IGN);
981 }
982
983
984 /* ARGSUSED */
985 static enum rofferr
986 roff_block_sub(ROFF_ARGS)
987 {
988 enum rofft t;
989 int i, j;
990
991 /*
992 * First check whether a custom macro exists at this level. If
993 * it does, then check against it. This is some of groff's
994 * stranger behaviours. If we encountered a custom end-scope
995 * tag and that tag also happens to be a "real" macro, then we
996 * need to try interpreting it again as a real macro. If it's
997 * not, then return ignore. Else continue.
998 */
999
1000 if (r->last->end) {
1001 for (i = pos, j = 0; r->last->end[j]; j++, i++)
1002 if ((*bufp)[i] != r->last->end[j])
1003 break;
1004
1005 if ('\0' == r->last->end[j] &&
1006 ('\0' == (*bufp)[i] ||
1007 ' ' == (*bufp)[i] ||
1008 '\t' == (*bufp)[i])) {
1009 roffnode_pop(r);
1010 roffnode_cleanscope(r);
1011
1012 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
1013 i++;
1014
1015 pos = i;
1016 if (ROFF_MAX != roff_parse(r, *bufp, &pos))
1017 return(ROFF_RERUN);
1018 return(ROFF_IGN);
1019 }
1020 }
1021
1022 /*
1023 * If we have no custom end-query or lookup failed, then try
1024 * pulling it out of the hashtable.
1025 */
1026
1027 t = roff_parse(r, *bufp, &pos);
1028
1029 /*
1030 * Macros other than block-end are only significant
1031 * in `de' blocks; elsewhere, simply throw them away.
1032 */
1033 if (ROFF_cblock != t) {
1034 if (ROFF_de == tok)
1035 roff_setstr(r, r->last->name, *bufp + ppos, 1);
1036 return(ROFF_IGN);
1037 }
1038
1039 assert(roffs[t].proc);
1040 return((*roffs[t].proc)(r, t, bufp, szp,
1041 ln, ppos, pos, offs));
1042 }
1043
1044
1045 /* ARGSUSED */
1046 static enum rofferr
1047 roff_block_text(ROFF_ARGS)
1048 {
1049
1050 if (ROFF_de == tok)
1051 roff_setstr(r, r->last->name, *bufp + pos, 1);
1052
1053 return(ROFF_IGN);
1054 }
1055
1056
1057 /* ARGSUSED */
1058 static enum rofferr
1059 roff_cond_sub(ROFF_ARGS)
1060 {
1061 enum rofft t;
1062 enum roffrule rr;
1063 char *ep;
1064
1065 rr = r->last->rule;
1066 roffnode_cleanscope(r);
1067 t = roff_parse(r, *bufp, &pos);
1068
1069 /*
1070 * Fully handle known macros when they are structurally
1071 * required or when the conditional evaluated to true.
1072 */
1073
1074 if ((ROFF_MAX != t) &&
1075 (ROFF_ccond == t || ROFFRULE_ALLOW == rr ||
1076 ROFFMAC_STRUCT & roffs[t].flags)) {
1077 assert(roffs[t].proc);
1078 return((*roffs[t].proc)(r, t, bufp, szp,
1079 ln, ppos, pos, offs));
1080 }
1081
1082 /* Always check for the closing delimiter `\}'. */
1083
1084 ep = &(*bufp)[pos];
1085 while (NULL != (ep = strchr(ep, '\\'))) {
1086 if ('}' != *(++ep))
1087 continue;
1088
1089 /*
1090 * If we're at the end of line, then just chop
1091 * off the \} and resize the buffer.
1092 * If we aren't, then convert it to spaces.
1093 */
1094
1095 if ('\0' == *(ep + 1)) {
1096 *--ep = '\0';
1097 *szp -= 2;
1098 } else
1099 *(ep - 1) = *ep = ' ';
1100
1101 roff_ccond(r, ROFF_ccond, bufp, szp,
1102 ln, pos, pos + 2, offs);
1103 break;
1104 }
1105 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1106 }
1107
1108 /* ARGSUSED */
1109 static enum rofferr
1110 roff_cond_text(ROFF_ARGS)
1111 {
1112 char *ep;
1113 enum roffrule rr;
1114
1115 rr = r->last->rule;
1116 roffnode_cleanscope(r);
1117
1118 ep = &(*bufp)[pos];
1119 for ( ; NULL != (ep = strchr(ep, '\\')); ep++) {
1120 ep++;
1121 if ('}' != *ep)
1122 continue;
1123 *ep = '&';
1124 roff_ccond(r, ROFF_ccond, bufp, szp,
1125 ln, pos, pos + 2, offs);
1126 }
1127 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1128 }
1129
1130 static int
1131 roff_getnum(const char *v, int *pos, int *res)
1132 {
1133 int p, n;
1134
1135 p = *pos;
1136 n = v[p] == '-';
1137 if (n)
1138 p++;
1139
1140 for (*res = 0; isdigit((unsigned char)v[p]); p++)
1141 *res += 10 * *res + v[p] - '0';
1142 if (p == *pos + n)
1143 return 0;
1144
1145 if (n)
1146 *res = -*res;
1147
1148 *pos = p;
1149 return 1;
1150 }
1151
1152 static int
1153 roff_getop(const char *v, int *pos, char *res)
1154 {
1155 int e;
1156
1157 *res = v[*pos];
1158 e = v[*pos + 1] == '=';
1159
1160 switch (*res) {
1161 case '=':
1162 break;
1163 case '>':
1164 if (e)
1165 *res = 'g';
1166 break;
1167 case '<':
1168 if (e)
1169 *res = 'l';
1170 break;
1171 default:
1172 return(0);
1173 }
1174
1175 *pos += 1 + e;
1176
1177 return(*res);
1178 }
1179
1180 static enum roffrule
1181 roff_evalcond(const char *v, int *pos)
1182 {
1183 int not, lh, rh;
1184 char op;
1185
1186 switch (v[*pos]) {
1187 case ('n'):
1188 (*pos)++;
1189 return(ROFFRULE_ALLOW);
1190 case ('e'):
1191 /* FALLTHROUGH */
1192 case ('o'):
1193 /* FALLTHROUGH */
1194 case ('t'):
1195 (*pos)++;
1196 return(ROFFRULE_DENY);
1197 case ('!'):
1198 (*pos)++;
1199 not = 1;
1200 break;
1201 default:
1202 not = 0;
1203 break;
1204 }
1205
1206 if (!roff_getnum(v, pos, &lh))
1207 return ROFFRULE_DENY;
1208 if (!roff_getop(v, pos, &op)) {
1209 if (lh < 0)
1210 lh = 0;
1211 goto out;
1212 }
1213 if (!roff_getnum(v, pos, &rh))
1214 return ROFFRULE_DENY;
1215 switch (op) {
1216 case 'g':
1217 lh = lh >= rh;
1218 break;
1219 case 'l':
1220 lh = lh <= rh;
1221 break;
1222 case '=':
1223 lh = lh == rh;
1224 break;
1225 case '>':
1226 lh = lh > rh;
1227 break;
1228 case '<':
1229 lh = lh < rh;
1230 break;
1231 default:
1232 return ROFFRULE_DENY;
1233 }
1234 out:
1235 if (not)
1236 lh = !lh;
1237 return lh ? ROFFRULE_ALLOW : ROFFRULE_DENY;
1238 }
1239
1240 /* ARGSUSED */
1241 static enum rofferr
1242 roff_line_ignore(ROFF_ARGS)
1243 {
1244
1245 return(ROFF_IGN);
1246 }
1247
1248 /* ARGSUSED */
1249 static enum rofferr
1250 roff_cond(ROFF_ARGS)
1251 {
1252
1253 roffnode_push(r, tok, NULL, ln, ppos);
1254
1255 /*
1256 * An `.el' has no conditional body: it will consume the value
1257 * of the current rstack entry set in prior `ie' calls or
1258 * defaults to DENY.
1259 *
1260 * If we're not an `el', however, then evaluate the conditional.
1261 */
1262
1263 r->last->rule = ROFF_el == tok ?
1264 (r->rstackpos < 0 ?
1265 ROFFRULE_DENY : r->rstack[r->rstackpos--]) :
1266 roff_evalcond(*bufp, &pos);
1267
1268 /*
1269 * An if-else will put the NEGATION of the current evaluated
1270 * conditional into the stack of rules.
1271 */
1272
1273 if (ROFF_ie == tok) {
1274 if (r->rstackpos == RSTACK_MAX - 1) {
1275 mandoc_msg(MANDOCERR_MEM,
1276 r->parse, ln, ppos, NULL);
1277 return(ROFF_ERR);
1278 }
1279 r->rstack[++r->rstackpos] =
1280 ROFFRULE_DENY == r->last->rule ?
1281 ROFFRULE_ALLOW : ROFFRULE_DENY;
1282 }
1283
1284 /* If the parent has false as its rule, then so do we. */
1285
1286 if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule)
1287 r->last->rule = ROFFRULE_DENY;
1288
1289 /*
1290 * Determine scope.
1291 * If there is nothing on the line after the conditional,
1292 * not even whitespace, use next-line scope.
1293 */
1294
1295 if ('\0' == (*bufp)[pos]) {
1296 r->last->endspan = 2;
1297 goto out;
1298 }
1299
1300 while (' ' == (*bufp)[pos])
1301 pos++;
1302
1303 /* An opening brace requests multiline scope. */
1304
1305 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1306 r->last->endspan = -1;
1307 pos += 2;
1308 goto out;
1309 }
1310
1311 /*
1312 * Anything else following the conditional causes
1313 * single-line scope. Warn if the scope contains
1314 * nothing but trailing whitespace.
1315 */
1316
1317 if ('\0' == (*bufp)[pos])
1318 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
1319
1320 r->last->endspan = 1;
1321
1322 out:
1323 *offs = pos;
1324 return(ROFF_RERUN);
1325 }
1326
1327
1328 /* ARGSUSED */
1329 static enum rofferr
1330 roff_ds(ROFF_ARGS)
1331 {
1332 char *name, *string;
1333
1334 /*
1335 * A symbol is named by the first word following the macro
1336 * invocation up to a space. Its value is anything after the
1337 * name's trailing whitespace and optional double-quote. Thus,
1338 *
1339 * [.ds foo "bar " ]
1340 *
1341 * will have `bar " ' as its value.
1342 */
1343
1344 string = *bufp + pos;
1345 name = roff_getname(r, &string, ln, pos);
1346 if ('\0' == *name)
1347 return(ROFF_IGN);
1348
1349 /* Read past initial double-quote. */
1350 if ('"' == *string)
1351 string++;
1352
1353 /* The rest is the value. */
1354 roff_setstr(r, name, string, 0);
1355 return(ROFF_IGN);
1356 }
1357
1358 void
1359 roff_setreg(struct roff *r, const char *name, int val)
1360 {
1361 struct roffreg *reg;
1362
1363 /* Search for an existing register with the same name. */
1364 reg = r->regtab;
1365
1366 while (reg && strcmp(name, reg->key.p))
1367 reg = reg->next;
1368
1369 if (NULL == reg) {
1370 /* Create a new register. */
1371 reg = mandoc_malloc(sizeof(struct roffreg));
1372 reg->key.p = mandoc_strdup(name);
1373 reg->key.sz = strlen(name);
1374 reg->next = r->regtab;
1375 r->regtab = reg;
1376 }
1377
1378 reg->val = val;
1379 }
1380
1381 int
1382 roff_getreg(const struct roff *r, const char *name)
1383 {
1384 struct roffreg *reg;
1385
1386 for (reg = r->regtab; reg; reg = reg->next)
1387 if (0 == strcmp(name, reg->key.p))
1388 return(reg->val);
1389
1390 return(0);
1391 }
1392
1393 static int
1394 roff_getregn(const struct roff *r, const char *name, size_t len)
1395 {
1396 struct roffreg *reg;
1397
1398 for (reg = r->regtab; reg; reg = reg->next)
1399 if (len == reg->key.sz &&
1400 0 == strncmp(name, reg->key.p, len))
1401 return(reg->val);
1402
1403 return(0);
1404 }
1405
1406 static void
1407 roff_freereg(struct roffreg *reg)
1408 {
1409 struct roffreg *old_reg;
1410
1411 while (NULL != reg) {
1412 free(reg->key.p);
1413 old_reg = reg;
1414 reg = reg->next;
1415 free(old_reg);
1416 }
1417 }
1418
1419 /* ARGSUSED */
1420 static enum rofferr
1421 roff_nr(ROFF_ARGS)
1422 {
1423 const char *key;
1424 char *val;
1425 int iv;
1426
1427 val = *bufp + pos;
1428 key = roff_getname(r, &val, ln, pos);
1429
1430 iv = mandoc_strntoi(val, strlen(val), 10);
1431
1432 roff_setreg(r, key, iv);
1433
1434 return(ROFF_IGN);
1435 }
1436
1437 /* ARGSUSED */
1438 static enum rofferr
1439 roff_rm(ROFF_ARGS)
1440 {
1441 const char *name;
1442 char *cp;
1443
1444 cp = *bufp + pos;
1445 while ('\0' != *cp) {
1446 name = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1447 if ('\0' != *name)
1448 roff_setstr(r, name, NULL, 0);
1449 }
1450 return(ROFF_IGN);
1451 }
1452
1453 /* ARGSUSED */
1454 static enum rofferr
1455 roff_it(ROFF_ARGS)
1456 {
1457 char *cp;
1458 size_t len;
1459 int iv;
1460
1461 /* Parse the number of lines. */
1462 cp = *bufp + pos;
1463 len = strcspn(cp, " \t");
1464 cp[len] = '\0';
1465 if ((iv = mandoc_strntoi(cp, len, 10)) <= 0) {
1466 mandoc_msg(MANDOCERR_NUMERIC, r->parse,
1467 ln, ppos, *bufp + 1);
1468 return(ROFF_IGN);
1469 }
1470 cp += len + 1;
1471
1472 /* Arm the input line trap. */
1473 roffit_lines = iv;
1474 roffit_macro = mandoc_strdup(cp);
1475 return(ROFF_IGN);
1476 }
1477
1478 /* ARGSUSED */
1479 static enum rofferr
1480 roff_Dd(ROFF_ARGS)
1481 {
1482 const char *const *cp;
1483
1484 if (MPARSE_MDOC != r->parsetype)
1485 for (cp = __mdoc_reserved; *cp; cp++)
1486 roff_setstr(r, *cp, NULL, 0);
1487
1488 return(ROFF_CONT);
1489 }
1490
1491 /* ARGSUSED */
1492 static enum rofferr
1493 roff_TH(ROFF_ARGS)
1494 {
1495 const char *const *cp;
1496
1497 if (MPARSE_MDOC != r->parsetype)
1498 for (cp = __man_reserved; *cp; cp++)
1499 roff_setstr(r, *cp, NULL, 0);
1500
1501 return(ROFF_CONT);
1502 }
1503
1504 /* ARGSUSED */
1505 static enum rofferr
1506 roff_TE(ROFF_ARGS)
1507 {
1508
1509 if (NULL == r->tbl)
1510 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1511 else
1512 tbl_end(&r->tbl);
1513
1514 return(ROFF_IGN);
1515 }
1516
1517 /* ARGSUSED */
1518 static enum rofferr
1519 roff_T_(ROFF_ARGS)
1520 {
1521
1522 if (NULL == r->tbl)
1523 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1524 else
1525 tbl_restart(ppos, ln, r->tbl);
1526
1527 return(ROFF_IGN);
1528 }
1529
1530 #if 0
1531 static int
1532 roff_closeeqn(struct roff *r)
1533 {
1534
1535 return(r->eqn && ROFF_EQN == eqn_end(&r->eqn) ? 1 : 0);
1536 }
1537 #endif
1538
1539 static void
1540 roff_openeqn(struct roff *r, const char *name, int line,
1541 int offs, const char *buf)
1542 {
1543 struct eqn_node *e;
1544 int poff;
1545
1546 assert(NULL == r->eqn);
1547 e = eqn_alloc(name, offs, line, r->parse);
1548
1549 if (r->last_eqn)
1550 r->last_eqn->next = e;
1551 else
1552 r->first_eqn = r->last_eqn = e;
1553
1554 r->eqn = r->last_eqn = e;
1555
1556 if (buf) {
1557 poff = 0;
1558 eqn_read(&r->eqn, line, buf, offs, &poff);
1559 }
1560 }
1561
1562 /* ARGSUSED */
1563 static enum rofferr
1564 roff_EQ(ROFF_ARGS)
1565 {
1566
1567 roff_openeqn(r, *bufp + pos, ln, ppos, NULL);
1568 return(ROFF_IGN);
1569 }
1570
1571 /* ARGSUSED */
1572 static enum rofferr
1573 roff_EN(ROFF_ARGS)
1574 {
1575
1576 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1577 return(ROFF_IGN);
1578 }
1579
1580 /* ARGSUSED */
1581 static enum rofferr
1582 roff_TS(ROFF_ARGS)
1583 {
1584 struct tbl_node *tbl;
1585
1586 if (r->tbl) {
1587 mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL);
1588 tbl_end(&r->tbl);
1589 }
1590
1591 tbl = tbl_alloc(ppos, ln, r->parse);
1592
1593 if (r->last_tbl)
1594 r->last_tbl->next = tbl;
1595 else
1596 r->first_tbl = r->last_tbl = tbl;
1597
1598 r->tbl = r->last_tbl = tbl;
1599 return(ROFF_IGN);
1600 }
1601
1602 /* ARGSUSED */
1603 static enum rofferr
1604 roff_cc(ROFF_ARGS)
1605 {
1606 const char *p;
1607
1608 p = *bufp + pos;
1609
1610 if ('\0' == *p || '.' == (r->control = *p++))
1611 r->control = 0;
1612
1613 if ('\0' != *p)
1614 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1615
1616 return(ROFF_IGN);
1617 }
1618
1619 /* ARGSUSED */
1620 static enum rofferr
1621 roff_tr(ROFF_ARGS)
1622 {
1623 const char *p, *first, *second;
1624 size_t fsz, ssz;
1625 enum mandoc_esc esc;
1626
1627 p = *bufp + pos;
1628
1629 if ('\0' == *p) {
1630 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1631 return(ROFF_IGN);
1632 }
1633
1634 while ('\0' != *p) {
1635 fsz = ssz = 1;
1636
1637 first = p++;
1638 if ('\\' == *first) {
1639 esc = mandoc_escape(&p, NULL, NULL);
1640 if (ESCAPE_ERROR == esc) {
1641 mandoc_msg
1642 (MANDOCERR_BADESCAPE, r->parse,
1643 ln, (int)(p - *bufp), NULL);
1644 return(ROFF_IGN);
1645 }
1646 fsz = (size_t)(p - first);
1647 }
1648
1649 second = p++;
1650 if ('\\' == *second) {
1651 esc = mandoc_escape(&p, NULL, NULL);
1652 if (ESCAPE_ERROR == esc) {
1653 mandoc_msg
1654 (MANDOCERR_BADESCAPE, r->parse,
1655 ln, (int)(p - *bufp), NULL);
1656 return(ROFF_IGN);
1657 }
1658 ssz = (size_t)(p - second);
1659 } else if ('\0' == *second) {
1660 mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
1661 ln, (int)(p - *bufp), NULL);
1662 second = " ";
1663 p--;
1664 }
1665
1666 if (fsz > 1) {
1667 roff_setstrn(&r->xmbtab, first,
1668 fsz, second, ssz, 0);
1669 continue;
1670 }
1671
1672 if (NULL == r->xtab)
1673 r->xtab = mandoc_calloc
1674 (128, sizeof(struct roffstr));
1675
1676 free(r->xtab[(int)*first].p);
1677 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
1678 r->xtab[(int)*first].sz = ssz;
1679 }
1680
1681 return(ROFF_IGN);
1682 }
1683
1684 /* ARGSUSED */
1685 static enum rofferr
1686 roff_so(ROFF_ARGS)
1687 {
1688 char *name;
1689
1690 mandoc_msg(MANDOCERR_SO, r->parse, ln, ppos, NULL);
1691
1692 /*
1693 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1694 * opening anything that's not in our cwd or anything beneath
1695 * it. Thus, explicitly disallow traversing up the file-system
1696 * or using absolute paths.
1697 */
1698
1699 name = *bufp + pos;
1700 if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1701 mandoc_msg(MANDOCERR_SOPATH, r->parse, ln, pos, NULL);
1702 return(ROFF_ERR);
1703 }
1704
1705 *offs = pos;
1706 return(ROFF_SO);
1707 }
1708
1709 /* ARGSUSED */
1710 static enum rofferr
1711 roff_userdef(ROFF_ARGS)
1712 {
1713 const char *arg[9];
1714 char *cp, *n1, *n2;
1715 int i;
1716
1717 /*
1718 * Collect pointers to macro argument strings
1719 * and null-terminate them.
1720 */
1721 cp = *bufp + pos;
1722 for (i = 0; i < 9; i++)
1723 arg[i] = '\0' == *cp ? "" :
1724 mandoc_getarg(r->parse, &cp, ln, &pos);
1725
1726 /*
1727 * Expand macro arguments.
1728 */
1729 *szp = 0;
1730 n1 = cp = mandoc_strdup(r->current_string);
1731 while (NULL != (cp = strstr(cp, "\\$"))) {
1732 i = cp[2] - '1';
1733 if (0 > i || 8 < i) {
1734 /* Not an argument invocation. */
1735 cp += 2;
1736 continue;
1737 }
1738
1739 *szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1740 n2 = mandoc_malloc(*szp);
1741
1742 strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1743 strlcat(n2, arg[i], *szp);
1744 strlcat(n2, cp + 3, *szp);
1745
1746 cp = n2 + (cp - n1);
1747 free(n1);
1748 n1 = n2;
1749 }
1750
1751 /*
1752 * Replace the macro invocation
1753 * by the expanded macro.
1754 */
1755 free(*bufp);
1756 *bufp = n1;
1757 if (0 == *szp)
1758 *szp = strlen(*bufp) + 1;
1759
1760 return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
1761 ROFF_REPARSE : ROFF_APPEND);
1762 }
1763
1764 static char *
1765 roff_getname(struct roff *r, char **cpp, int ln, int pos)
1766 {
1767 char *name, *cp;
1768
1769 name = *cpp;
1770 if ('\0' == *name)
1771 return(name);
1772
1773 /* Read until end of name. */
1774 for (cp = name; '\0' != *cp && ' ' != *cp; cp++) {
1775 if ('\\' != *cp)
1776 continue;
1777 cp++;
1778 if ('\\' == *cp)
1779 continue;
1780 mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL);
1781 *cp = '\0';
1782 name = cp;
1783 }
1784
1785 /* Nil-terminate name. */
1786 if ('\0' != *cp)
1787 *(cp++) = '\0';
1788
1789 /* Read past spaces. */
1790 while (' ' == *cp)
1791 cp++;
1792
1793 *cpp = cp;
1794 return(name);
1795 }
1796
1797 /*
1798 * Store *string into the user-defined string called *name.
1799 * In multiline mode, append to an existing entry and append '\n';
1800 * else replace the existing entry, if there is one.
1801 * To clear an existing entry, call with (*r, *name, NULL, 0).
1802 */
1803 static void
1804 roff_setstr(struct roff *r, const char *name, const char *string,
1805 int multiline)
1806 {
1807
1808 roff_setstrn(&r->strtab, name, strlen(name), string,
1809 string ? strlen(string) : 0, multiline);
1810 }
1811
1812 static void
1813 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
1814 const char *string, size_t stringsz, int multiline)
1815 {
1816 struct roffkv *n;
1817 char *c;
1818 int i;
1819 size_t oldch, newch;
1820
1821 /* Search for an existing string with the same name. */
1822 n = *r;
1823
1824 while (n && strcmp(name, n->key.p))
1825 n = n->next;
1826
1827 if (NULL == n) {
1828 /* Create a new string table entry. */
1829 n = mandoc_malloc(sizeof(struct roffkv));
1830 n->key.p = mandoc_strndup(name, namesz);
1831 n->key.sz = namesz;
1832 n->val.p = NULL;
1833 n->val.sz = 0;
1834 n->next = *r;
1835 *r = n;
1836 } else if (0 == multiline) {
1837 /* In multiline mode, append; else replace. */
1838 free(n->val.p);
1839 n->val.p = NULL;
1840 n->val.sz = 0;
1841 }
1842
1843 if (NULL == string)
1844 return;
1845
1846 /*
1847 * One additional byte for the '\n' in multiline mode,
1848 * and one for the terminating '\0'.
1849 */
1850 newch = stringsz + (multiline ? 2u : 1u);
1851
1852 if (NULL == n->val.p) {
1853 n->val.p = mandoc_malloc(newch);
1854 *n->val.p = '\0';
1855 oldch = 0;
1856 } else {
1857 oldch = n->val.sz;
1858 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
1859 }
1860
1861 /* Skip existing content in the destination buffer. */
1862 c = n->val.p + (int)oldch;
1863
1864 /* Append new content to the destination buffer. */
1865 i = 0;
1866 while (i < (int)stringsz) {
1867 /*
1868 * Rudimentary roff copy mode:
1869 * Handle escaped backslashes.
1870 */
1871 if ('\\' == string[i] && '\\' == string[i + 1])
1872 i++;
1873 *c++ = string[i++];
1874 }
1875
1876 /* Append terminating bytes. */
1877 if (multiline)
1878 *c++ = '\n';
1879
1880 *c = '\0';
1881 n->val.sz = (int)(c - n->val.p);
1882 }
1883
1884 static const char *
1885 roff_getstrn(const struct roff *r, const char *name, size_t len)
1886 {
1887 const struct roffkv *n;
1888
1889 for (n = r->strtab; n; n = n->next)
1890 if (0 == strncmp(name, n->key.p, len) &&
1891 '\0' == n->key.p[(int)len])
1892 return(n->val.p);
1893
1894 return(NULL);
1895 }
1896
1897 static void
1898 roff_freestr(struct roffkv *r)
1899 {
1900 struct roffkv *n, *nn;
1901
1902 for (n = r; n; n = nn) {
1903 free(n->key.p);
1904 free(n->val.p);
1905 nn = n->next;
1906 free(n);
1907 }
1908 }
1909
1910 const struct tbl_span *
1911 roff_span(const struct roff *r)
1912 {
1913
1914 return(r->tbl ? tbl_span(r->tbl) : NULL);
1915 }
1916
1917 const struct eqn *
1918 roff_eqn(const struct roff *r)
1919 {
1920
1921 return(r->last_eqn ? &r->last_eqn->eqn : NULL);
1922 }
1923
1924 /*
1925 * Duplicate an input string, making the appropriate character
1926 * conversations (as stipulated by `tr') along the way.
1927 * Returns a heap-allocated string with all the replacements made.
1928 */
1929 char *
1930 roff_strdup(const struct roff *r, const char *p)
1931 {
1932 const struct roffkv *cp;
1933 char *res;
1934 const char *pp;
1935 size_t ssz, sz;
1936 enum mandoc_esc esc;
1937
1938 if (NULL == r->xmbtab && NULL == r->xtab)
1939 return(mandoc_strdup(p));
1940 else if ('\0' == *p)
1941 return(mandoc_strdup(""));
1942
1943 /*
1944 * Step through each character looking for term matches
1945 * (remember that a `tr' can be invoked with an escape, which is
1946 * a glyph but the escape is multi-character).
1947 * We only do this if the character hash has been initialised
1948 * and the string is >0 length.
1949 */
1950
1951 res = NULL;
1952 ssz = 0;
1953
1954 while ('\0' != *p) {
1955 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
1956 sz = r->xtab[(int)*p].sz;
1957 res = mandoc_realloc(res, ssz + sz + 1);
1958 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
1959 ssz += sz;
1960 p++;
1961 continue;
1962 } else if ('\\' != *p) {
1963 res = mandoc_realloc(res, ssz + 2);
1964 res[ssz++] = *p++;
1965 continue;
1966 }
1967
1968 /* Search for term matches. */
1969 for (cp = r->xmbtab; cp; cp = cp->next)
1970 if (0 == strncmp(p, cp->key.p, cp->key.sz))
1971 break;
1972
1973 if (NULL != cp) {
1974 /*
1975 * A match has been found.
1976 * Append the match to the array and move
1977 * forward by its keysize.
1978 */
1979 res = mandoc_realloc
1980 (res, ssz + cp->val.sz + 1);
1981 memcpy(res + ssz, cp->val.p, cp->val.sz);
1982 ssz += cp->val.sz;
1983 p += (int)cp->key.sz;
1984 continue;
1985 }
1986
1987 /*
1988 * Handle escapes carefully: we need to copy
1989 * over just the escape itself, or else we might
1990 * do replacements within the escape itself.
1991 * Make sure to pass along the bogus string.
1992 */
1993 pp = p++;
1994 esc = mandoc_escape(&p, NULL, NULL);
1995 if (ESCAPE_ERROR == esc) {
1996 sz = strlen(pp);
1997 res = mandoc_realloc(res, ssz + sz + 1);
1998 memcpy(res + ssz, pp, sz);
1999 break;
2000 }
2001 /*
2002 * We bail out on bad escapes.
2003 * No need to warn: we already did so when
2004 * roff_res() was called.
2005 */
2006 sz = (int)(p - pp);
2007 res = mandoc_realloc(res, ssz + sz + 1);
2008 memcpy(res + ssz, pp, sz);
2009 ssz += sz;
2010 }
2011
2012 res[(int)ssz] = '\0';
2013 return(res);
2014 }
2015
2016 /*
2017 * Find out whether a line is a macro line or not.
2018 * If it is, adjust the current position and return one; if it isn't,
2019 * return zero and don't change the current position.
2020 * If the control character has been set with `.cc', then let that grain
2021 * precedence.
2022 * This is slighly contrary to groff, where using the non-breaking
2023 * control character when `cc' has been invoked will cause the
2024 * non-breaking macro contents to be printed verbatim.
2025 */
2026 int
2027 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
2028 {
2029 int pos;
2030
2031 pos = *ppos;
2032
2033 if (0 != r->control && cp[pos] == r->control)
2034 pos++;
2035 else if (0 != r->control)
2036 return(0);
2037 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
2038 pos += 2;
2039 else if ('.' == cp[pos] || '\'' == cp[pos])
2040 pos++;
2041 else
2042 return(0);
2043
2044 while (' ' == cp[pos] || '\t' == cp[pos])
2045 pos++;
2046
2047 *ppos = pos;
2048 return(1);
2049 }