]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
Step 2: blow away roff.h, as all of its functions have been moved into
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.129 2011/03/22 09:50:11 kristaps Exp $ */
2 /*
3 * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <assert.h>
23 #include <errno.h>
24 #include <ctype.h>
25 #include <limits.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <stdio.h>
29
30 #include "mandoc.h"
31 #include "libroff.h"
32 #include "libmandoc.h"
33
34 #define RSTACK_MAX 128
35
36 #define ROFF_CTL(c) \
37 ('.' == (c) || '\'' == (c))
38
39 enum rofft {
40 ROFF_ad,
41 ROFF_am,
42 ROFF_ami,
43 ROFF_am1,
44 ROFF_de,
45 ROFF_dei,
46 ROFF_de1,
47 ROFF_ds,
48 ROFF_el,
49 ROFF_hy,
50 ROFF_ie,
51 ROFF_if,
52 ROFF_ig,
53 ROFF_it,
54 ROFF_ne,
55 ROFF_nh,
56 ROFF_nr,
57 ROFF_ns,
58 ROFF_ps,
59 ROFF_rm,
60 ROFF_so,
61 ROFF_ta,
62 ROFF_tr,
63 ROFF_TS,
64 ROFF_TE,
65 ROFF_T_,
66 ROFF_EQ,
67 ROFF_EN,
68 ROFF_cblock,
69 ROFF_ccond, /* FIXME: remove this. */
70 ROFF_USERDEF,
71 ROFF_MAX
72 };
73
74 enum roffrule {
75 ROFFRULE_ALLOW,
76 ROFFRULE_DENY
77 };
78
79 struct roffstr {
80 char *name; /* key of symbol */
81 char *string; /* current value */
82 struct roffstr *next; /* next in list */
83 };
84
85 struct roff {
86 struct mparse *parse; /* parse point */
87 struct roffnode *last; /* leaf of stack */
88 enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */
89 int rstackpos; /* position in rstack */
90 struct regset *regs; /* read/writable registers */
91 struct roffstr *first_string; /* user-defined strings & macros */
92 const char *current_string; /* value of last called user macro */
93 struct tbl_node *first_tbl; /* first table parsed */
94 struct tbl_node *last_tbl; /* last table parsed */
95 struct tbl_node *tbl; /* current table being parsed */
96 struct eqn_node *last_eqn; /* last equation parsed */
97 struct eqn_node *first_eqn; /* first equation parsed */
98 struct eqn_node *eqn; /* current equation being parsed */
99 };
100
101 struct roffnode {
102 enum rofft tok; /* type of node */
103 struct roffnode *parent; /* up one in stack */
104 int line; /* parse line */
105 int col; /* parse col */
106 char *name; /* node name, e.g. macro name */
107 char *end; /* end-rules: custom token */
108 int endspan; /* end-rules: next-line or infty */
109 enum roffrule rule; /* current evaluation rule */
110 };
111
112 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
113 enum rofft tok, /* tok of macro */ \
114 char **bufp, /* input buffer */ \
115 size_t *szp, /* size of input buffer */ \
116 int ln, /* parse line */ \
117 int ppos, /* original pos in buffer */ \
118 int pos, /* current pos in buffer */ \
119 int *offs /* reset offset of buffer data */
120
121 typedef enum rofferr (*roffproc)(ROFF_ARGS);
122
123 struct roffmac {
124 const char *name; /* macro name */
125 roffproc proc; /* process new macro */
126 roffproc text; /* process as child text of macro */
127 roffproc sub; /* process as child of macro */
128 int flags;
129 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
130 struct roffmac *next;
131 };
132
133 static enum rofferr roff_block(ROFF_ARGS);
134 static enum rofferr roff_block_text(ROFF_ARGS);
135 static enum rofferr roff_block_sub(ROFF_ARGS);
136 static enum rofferr roff_cblock(ROFF_ARGS);
137 static enum rofferr roff_ccond(ROFF_ARGS);
138 static enum rofferr roff_cond(ROFF_ARGS);
139 static enum rofferr roff_cond_text(ROFF_ARGS);
140 static enum rofferr roff_cond_sub(ROFF_ARGS);
141 static enum rofferr roff_ds(ROFF_ARGS);
142 static enum roffrule roff_evalcond(const char *, int *);
143 static void roff_freestr(struct roff *);
144 static char *roff_getname(struct roff *, char **, int, int);
145 static const char *roff_getstrn(const struct roff *,
146 const char *, size_t);
147 static enum rofferr roff_line_ignore(ROFF_ARGS);
148 static enum rofferr roff_nr(ROFF_ARGS);
149 static int roff_res(struct roff *,
150 char **, size_t *, int);
151 static enum rofferr roff_rm(ROFF_ARGS);
152 static void roff_setstr(struct roff *,
153 const char *, const char *, int);
154 static enum rofferr roff_so(ROFF_ARGS);
155 static enum rofferr roff_TE(ROFF_ARGS);
156 static enum rofferr roff_TS(ROFF_ARGS);
157 static enum rofferr roff_EQ(ROFF_ARGS);
158 static enum rofferr roff_EN(ROFF_ARGS);
159 static enum rofferr roff_T_(ROFF_ARGS);
160 static enum rofferr roff_userdef(ROFF_ARGS);
161
162 /* See roff_hash_find() */
163
164 #define ASCII_HI 126
165 #define ASCII_LO 33
166 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
167
168 static struct roffmac *hash[HASHWIDTH];
169
170 static struct roffmac roffs[ROFF_MAX] = {
171 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
172 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
173 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
174 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
175 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
176 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
177 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
178 { "ds", roff_ds, NULL, NULL, 0, NULL },
179 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
180 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
181 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
182 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
183 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
184 { "it", roff_line_ignore, NULL, NULL, 0, NULL },
185 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
186 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
187 { "nr", roff_nr, NULL, NULL, 0, NULL },
188 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
189 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
190 { "rm", roff_rm, NULL, NULL, 0, NULL },
191 { "so", roff_so, NULL, NULL, 0, NULL },
192 { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
193 { "tr", roff_line_ignore, NULL, NULL, 0, NULL },
194 { "TS", roff_TS, NULL, NULL, 0, NULL },
195 { "TE", roff_TE, NULL, NULL, 0, NULL },
196 { "T&", roff_T_, NULL, NULL, 0, NULL },
197 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
198 { "EN", roff_EN, NULL, NULL, 0, NULL },
199 { ".", roff_cblock, NULL, NULL, 0, NULL },
200 { "\\}", roff_ccond, NULL, NULL, 0, NULL },
201 { NULL, roff_userdef, NULL, NULL, 0, NULL },
202 };
203
204 static void roff_free1(struct roff *);
205 static enum rofft roff_hash_find(const char *, size_t);
206 static void roff_hash_init(void);
207 static void roffnode_cleanscope(struct roff *);
208 static void roffnode_push(struct roff *, enum rofft,
209 const char *, int, int);
210 static void roffnode_pop(struct roff *);
211 static enum rofft roff_parse(struct roff *, const char *, int *);
212 static int roff_parse_nat(const char *, unsigned int *);
213
214 /* See roff_hash_find() */
215 #define ROFF_HASH(p) (p[0] - ASCII_LO)
216
217 static void
218 roff_hash_init(void)
219 {
220 struct roffmac *n;
221 int buc, i;
222
223 for (i = 0; i < (int)ROFF_USERDEF; i++) {
224 assert(roffs[i].name[0] >= ASCII_LO);
225 assert(roffs[i].name[0] <= ASCII_HI);
226
227 buc = ROFF_HASH(roffs[i].name);
228
229 if (NULL != (n = hash[buc])) {
230 for ( ; n->next; n = n->next)
231 /* Do nothing. */ ;
232 n->next = &roffs[i];
233 } else
234 hash[buc] = &roffs[i];
235 }
236 }
237
238
239 /*
240 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
241 * the nil-terminated string name could be found.
242 */
243 static enum rofft
244 roff_hash_find(const char *p, size_t s)
245 {
246 int buc;
247 struct roffmac *n;
248
249 /*
250 * libroff has an extremely simple hashtable, for the time
251 * being, which simply keys on the first character, which must
252 * be printable, then walks a chain. It works well enough until
253 * optimised.
254 */
255
256 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
257 return(ROFF_MAX);
258
259 buc = ROFF_HASH(p);
260
261 if (NULL == (n = hash[buc]))
262 return(ROFF_MAX);
263 for ( ; n; n = n->next)
264 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
265 return((enum rofft)(n - roffs));
266
267 return(ROFF_MAX);
268 }
269
270
271 /*
272 * Pop the current node off of the stack of roff instructions currently
273 * pending.
274 */
275 static void
276 roffnode_pop(struct roff *r)
277 {
278 struct roffnode *p;
279
280 assert(r->last);
281 p = r->last;
282
283 if (ROFF_el == p->tok)
284 if (r->rstackpos > -1)
285 r->rstackpos--;
286
287 r->last = r->last->parent;
288 free(p->name);
289 free(p->end);
290 free(p);
291 }
292
293
294 /*
295 * Push a roff node onto the instruction stack. This must later be
296 * removed with roffnode_pop().
297 */
298 static void
299 roffnode_push(struct roff *r, enum rofft tok, const char *name,
300 int line, int col)
301 {
302 struct roffnode *p;
303
304 p = mandoc_calloc(1, sizeof(struct roffnode));
305 p->tok = tok;
306 if (name)
307 p->name = mandoc_strdup(name);
308 p->parent = r->last;
309 p->line = line;
310 p->col = col;
311 p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
312
313 r->last = p;
314 }
315
316
317 static void
318 roff_free1(struct roff *r)
319 {
320 struct tbl_node *t;
321 struct eqn_node *e;
322
323 while (NULL != (t = r->first_tbl)) {
324 r->first_tbl = t->next;
325 tbl_free(t);
326 }
327
328 r->first_tbl = r->last_tbl = r->tbl = NULL;
329
330 while (NULL != (e = r->first_eqn)) {
331 r->first_eqn = e->next;
332 eqn_free(e);
333 }
334
335 r->first_eqn = r->last_eqn = r->eqn = NULL;
336
337 while (r->last)
338 roffnode_pop(r);
339
340 roff_freestr(r);
341 }
342
343
344 void
345 roff_reset(struct roff *r)
346 {
347
348 roff_free1(r);
349 }
350
351
352 void
353 roff_free(struct roff *r)
354 {
355
356 roff_free1(r);
357 free(r);
358 }
359
360
361 struct roff *
362 roff_alloc(struct regset *regs, struct mparse *parse)
363 {
364 struct roff *r;
365
366 r = mandoc_calloc(1, sizeof(struct roff));
367 r->regs = regs;
368 r->parse = parse;
369 r->rstackpos = -1;
370
371 roff_hash_init();
372 return(r);
373 }
374
375
376 /*
377 * Pre-filter each and every line for reserved words (one beginning with
378 * `\*', e.g., `\*(ab'). These must be handled before the actual line
379 * is processed.
380 */
381 static int
382 roff_res(struct roff *r, char **bufp, size_t *szp, int pos)
383 {
384 const char *stesc; /* start of an escape sequence ('\\') */
385 const char *stnam; /* start of the name, after "[(*" */
386 const char *cp; /* end of the name, e.g. before ']' */
387 const char *res; /* the string to be substituted */
388 int i, maxl;
389 size_t nsz;
390 char *n;
391
392 /* Search for a leading backslash and save a pointer to it. */
393
394 cp = *bufp + pos;
395 while (NULL != (cp = strchr(cp, '\\'))) {
396 stesc = cp++;
397
398 /*
399 * The second character must be an asterisk.
400 * If it isn't, skip it anyway: It is escaped,
401 * so it can't start another escape sequence.
402 */
403
404 if ('\0' == *cp)
405 return(1);
406 if ('*' != *cp++)
407 continue;
408
409 /*
410 * The third character decides the length
411 * of the name of the string.
412 * Save a pointer to the name.
413 */
414
415 switch (*cp) {
416 case ('\0'):
417 return(1);
418 case ('('):
419 cp++;
420 maxl = 2;
421 break;
422 case ('['):
423 cp++;
424 maxl = 0;
425 break;
426 default:
427 maxl = 1;
428 break;
429 }
430 stnam = cp;
431
432 /* Advance to the end of the name. */
433
434 for (i = 0; 0 == maxl || i < maxl; i++, cp++) {
435 if ('\0' == *cp)
436 return(1); /* Error. */
437 if (0 == maxl && ']' == *cp)
438 break;
439 }
440
441 /*
442 * Retrieve the replacement string; if it is
443 * undefined, resume searching for escapes.
444 */
445
446 res = roff_getstrn(r, stnam, (size_t)i);
447
448 if (NULL == res) {
449 cp -= maxl ? 1 : 0;
450 continue;
451 }
452
453 /* Replace the escape sequence by the string. */
454
455 nsz = *szp + strlen(res) + 1;
456 n = mandoc_malloc(nsz);
457
458 strlcpy(n, *bufp, (size_t)(stesc - *bufp + 1));
459 strlcat(n, res, nsz);
460 strlcat(n, cp + (maxl ? 0 : 1), nsz);
461
462 free(*bufp);
463
464 *bufp = n;
465 *szp = nsz;
466 return(0);
467 }
468
469 return(1);
470 }
471
472
473 enum rofferr
474 roff_parseln(struct roff *r, int ln, char **bufp,
475 size_t *szp, int pos, int *offs)
476 {
477 enum rofft t;
478 enum rofferr e;
479 int ppos;
480
481 /*
482 * Run the reserved-word filter only if we have some reserved
483 * words to fill in.
484 */
485
486 if (r->first_string && ! roff_res(r, bufp, szp, pos))
487 return(ROFF_REPARSE);
488
489 /*
490 * First, if a scope is open and we're not a macro, pass the
491 * text through the macro's filter. If a scope isn't open and
492 * we're not a macro, just let it through.
493 * Finally, if there's an equation scope open, divert it into it
494 * no matter our state.
495 */
496
497 if (r->last && ! ROFF_CTL((*bufp)[pos])) {
498 t = r->last->tok;
499 assert(roffs[t].text);
500 e = (*roffs[t].text)
501 (r, t, bufp, szp, ln, pos, pos, offs);
502 assert(ROFF_IGN == e || ROFF_CONT == e);
503 if (ROFF_CONT != e)
504 return(e);
505 if (r->eqn)
506 return(eqn_read(&r->eqn, ln, *bufp, *offs));
507 if (r->tbl)
508 return(tbl_read(r->tbl, ln, *bufp, *offs));
509 return(ROFF_CONT);
510 } else if ( ! ROFF_CTL((*bufp)[pos])) {
511 if (r->eqn)
512 return(eqn_read(&r->eqn, ln, *bufp, *offs));
513 if (r->tbl)
514 return(tbl_read(r->tbl, ln, *bufp, *offs));
515 return(ROFF_CONT);
516 } else if (r->eqn)
517 return(eqn_read(&r->eqn, ln, *bufp, *offs));
518
519 /*
520 * If a scope is open, go to the child handler for that macro,
521 * as it may want to preprocess before doing anything with it.
522 * Don't do so if an equation is open.
523 */
524
525 if (r->last) {
526 t = r->last->tok;
527 assert(roffs[t].sub);
528 return((*roffs[t].sub)
529 (r, t, bufp, szp,
530 ln, pos, pos, offs));
531 }
532
533 /*
534 * Lastly, as we've no scope open, try to look up and execute
535 * the new macro. If no macro is found, simply return and let
536 * the compilers handle it.
537 */
538
539 ppos = pos;
540 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
541 return(ROFF_CONT);
542
543 assert(roffs[t].proc);
544 return((*roffs[t].proc)
545 (r, t, bufp, szp,
546 ln, ppos, pos, offs));
547 }
548
549
550 void
551 roff_endparse(struct roff *r)
552 {
553
554 if (r->last)
555 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
556 r->last->line, r->last->col, NULL);
557
558 if (r->eqn) {
559 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
560 r->eqn->eqn.line, r->eqn->eqn.pos, NULL);
561 eqn_end(r->eqn);
562 r->eqn = NULL;
563 }
564
565 if (r->tbl) {
566 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
567 r->tbl->line, r->tbl->pos, NULL);
568 tbl_end(r->tbl);
569 r->tbl = NULL;
570 }
571 }
572
573
574 /*
575 * Parse a roff node's type from the input buffer. This must be in the
576 * form of ".foo xxx" in the usual way.
577 */
578 static enum rofft
579 roff_parse(struct roff *r, const char *buf, int *pos)
580 {
581 const char *mac;
582 size_t maclen;
583 enum rofft t;
584
585 assert(ROFF_CTL(buf[*pos]));
586 (*pos)++;
587
588 while (' ' == buf[*pos] || '\t' == buf[*pos])
589 (*pos)++;
590
591 if ('\0' == buf[*pos])
592 return(ROFF_MAX);
593
594 mac = buf + *pos;
595 maclen = strcspn(mac, " \\\t\0");
596
597 t = (r->current_string = roff_getstrn(r, mac, maclen))
598 ? ROFF_USERDEF : roff_hash_find(mac, maclen);
599
600 *pos += (int)maclen;
601 while (buf[*pos] && ' ' == buf[*pos])
602 (*pos)++;
603
604 return(t);
605 }
606
607
608 static int
609 roff_parse_nat(const char *buf, unsigned int *res)
610 {
611 char *ep;
612 long lval;
613
614 errno = 0;
615 lval = strtol(buf, &ep, 10);
616 if (buf[0] == '\0' || *ep != '\0')
617 return(0);
618 if ((errno == ERANGE &&
619 (lval == LONG_MAX || lval == LONG_MIN)) ||
620 (lval > INT_MAX || lval < 0))
621 return(0);
622
623 *res = (unsigned int)lval;
624 return(1);
625 }
626
627
628 /* ARGSUSED */
629 static enum rofferr
630 roff_cblock(ROFF_ARGS)
631 {
632
633 /*
634 * A block-close `..' should only be invoked as a child of an
635 * ignore macro, otherwise raise a warning and just ignore it.
636 */
637
638 if (NULL == r->last) {
639 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
640 return(ROFF_IGN);
641 }
642
643 switch (r->last->tok) {
644 case (ROFF_am):
645 /* FALLTHROUGH */
646 case (ROFF_ami):
647 /* FALLTHROUGH */
648 case (ROFF_am1):
649 /* FALLTHROUGH */
650 case (ROFF_de):
651 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
652 /* FALLTHROUGH */
653 case (ROFF_dei):
654 /* FALLTHROUGH */
655 case (ROFF_ig):
656 break;
657 default:
658 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
659 return(ROFF_IGN);
660 }
661
662 if ((*bufp)[pos])
663 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
664
665 roffnode_pop(r);
666 roffnode_cleanscope(r);
667 return(ROFF_IGN);
668
669 }
670
671
672 static void
673 roffnode_cleanscope(struct roff *r)
674 {
675
676 while (r->last) {
677 if (--r->last->endspan < 0)
678 break;
679 roffnode_pop(r);
680 }
681 }
682
683
684 /* ARGSUSED */
685 static enum rofferr
686 roff_ccond(ROFF_ARGS)
687 {
688
689 if (NULL == r->last) {
690 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
691 return(ROFF_IGN);
692 }
693
694 switch (r->last->tok) {
695 case (ROFF_el):
696 /* FALLTHROUGH */
697 case (ROFF_ie):
698 /* FALLTHROUGH */
699 case (ROFF_if):
700 break;
701 default:
702 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
703 return(ROFF_IGN);
704 }
705
706 if (r->last->endspan > -1) {
707 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
708 return(ROFF_IGN);
709 }
710
711 if ((*bufp)[pos])
712 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
713
714 roffnode_pop(r);
715 roffnode_cleanscope(r);
716 return(ROFF_IGN);
717 }
718
719
720 /* ARGSUSED */
721 static enum rofferr
722 roff_block(ROFF_ARGS)
723 {
724 int sv;
725 size_t sz;
726 char *name;
727
728 name = NULL;
729
730 if (ROFF_ig != tok) {
731 if ('\0' == (*bufp)[pos]) {
732 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
733 return(ROFF_IGN);
734 }
735
736 /*
737 * Re-write `de1', since we don't really care about
738 * groff's strange compatibility mode, into `de'.
739 */
740
741 if (ROFF_de1 == tok)
742 tok = ROFF_de;
743 if (ROFF_de == tok)
744 name = *bufp + pos;
745 else
746 mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos,
747 roffs[tok].name);
748
749 while ((*bufp)[pos] && ' ' != (*bufp)[pos])
750 pos++;
751
752 while (' ' == (*bufp)[pos])
753 (*bufp)[pos++] = '\0';
754 }
755
756 roffnode_push(r, tok, name, ln, ppos);
757
758 /*
759 * At the beginning of a `de' macro, clear the existing string
760 * with the same name, if there is one. New content will be
761 * added from roff_block_text() in multiline mode.
762 */
763
764 if (ROFF_de == tok)
765 roff_setstr(r, name, "", 0);
766
767 if ('\0' == (*bufp)[pos])
768 return(ROFF_IGN);
769
770 /* If present, process the custom end-of-line marker. */
771
772 sv = pos;
773 while ((*bufp)[pos] &&
774 ' ' != (*bufp)[pos] &&
775 '\t' != (*bufp)[pos])
776 pos++;
777
778 /*
779 * Note: groff does NOT like escape characters in the input.
780 * Instead of detecting this, we're just going to let it fly and
781 * to hell with it.
782 */
783
784 assert(pos > sv);
785 sz = (size_t)(pos - sv);
786
787 if (1 == sz && '.' == (*bufp)[sv])
788 return(ROFF_IGN);
789
790 r->last->end = mandoc_malloc(sz + 1);
791
792 memcpy(r->last->end, *bufp + sv, sz);
793 r->last->end[(int)sz] = '\0';
794
795 if ((*bufp)[pos])
796 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
797
798 return(ROFF_IGN);
799 }
800
801
802 /* ARGSUSED */
803 static enum rofferr
804 roff_block_sub(ROFF_ARGS)
805 {
806 enum rofft t;
807 int i, j;
808
809 /*
810 * First check whether a custom macro exists at this level. If
811 * it does, then check against it. This is some of groff's
812 * stranger behaviours. If we encountered a custom end-scope
813 * tag and that tag also happens to be a "real" macro, then we
814 * need to try interpreting it again as a real macro. If it's
815 * not, then return ignore. Else continue.
816 */
817
818 if (r->last->end) {
819 i = pos + 1;
820 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
821 i++;
822
823 for (j = 0; r->last->end[j]; j++, i++)
824 if ((*bufp)[i] != r->last->end[j])
825 break;
826
827 if ('\0' == r->last->end[j] &&
828 ('\0' == (*bufp)[i] ||
829 ' ' == (*bufp)[i] ||
830 '\t' == (*bufp)[i])) {
831 roffnode_pop(r);
832 roffnode_cleanscope(r);
833
834 if (ROFF_MAX != roff_parse(r, *bufp, &pos))
835 return(ROFF_RERUN);
836 return(ROFF_IGN);
837 }
838 }
839
840 /*
841 * If we have no custom end-query or lookup failed, then try
842 * pulling it out of the hashtable.
843 */
844
845 ppos = pos;
846 t = roff_parse(r, *bufp, &pos);
847
848 /*
849 * Macros other than block-end are only significant
850 * in `de' blocks; elsewhere, simply throw them away.
851 */
852 if (ROFF_cblock != t) {
853 if (ROFF_de == tok)
854 roff_setstr(r, r->last->name, *bufp + ppos, 1);
855 return(ROFF_IGN);
856 }
857
858 assert(roffs[t].proc);
859 return((*roffs[t].proc)(r, t, bufp, szp,
860 ln, ppos, pos, offs));
861 }
862
863
864 /* ARGSUSED */
865 static enum rofferr
866 roff_block_text(ROFF_ARGS)
867 {
868
869 if (ROFF_de == tok)
870 roff_setstr(r, r->last->name, *bufp + pos, 1);
871
872 return(ROFF_IGN);
873 }
874
875
876 /* ARGSUSED */
877 static enum rofferr
878 roff_cond_sub(ROFF_ARGS)
879 {
880 enum rofft t;
881 enum roffrule rr;
882
883 ppos = pos;
884 rr = r->last->rule;
885
886 /*
887 * Clean out scope. If we've closed ourselves, then don't
888 * continue.
889 */
890
891 roffnode_cleanscope(r);
892
893 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) {
894 if ('\\' == (*bufp)[pos] && '}' == (*bufp)[pos + 1])
895 return(roff_ccond
896 (r, ROFF_ccond, bufp, szp,
897 ln, pos, pos + 2, offs));
898 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
899 }
900
901 /*
902 * A denied conditional must evaluate its children if and only
903 * if they're either structurally required (such as loops and
904 * conditionals) or a closing macro.
905 */
906 if (ROFFRULE_DENY == rr)
907 if ( ! (ROFFMAC_STRUCT & roffs[t].flags))
908 if (ROFF_ccond != t)
909 return(ROFF_IGN);
910
911 assert(roffs[t].proc);
912 return((*roffs[t].proc)(r, t, bufp, szp,
913 ln, ppos, pos, offs));
914 }
915
916
917 /* ARGSUSED */
918 static enum rofferr
919 roff_cond_text(ROFF_ARGS)
920 {
921 char *ep, *st;
922 enum roffrule rr;
923
924 rr = r->last->rule;
925
926 /*
927 * We display the value of the text if out current evaluation
928 * scope permits us to do so.
929 */
930
931 /* FIXME: use roff_ccond? */
932
933 st = &(*bufp)[pos];
934 if (NULL == (ep = strstr(st, "\\}"))) {
935 roffnode_cleanscope(r);
936 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
937 }
938
939 if (ep == st || (ep > st && '\\' != *(ep - 1)))
940 roffnode_pop(r);
941
942 roffnode_cleanscope(r);
943 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
944 }
945
946
947 static enum roffrule
948 roff_evalcond(const char *v, int *pos)
949 {
950
951 switch (v[*pos]) {
952 case ('n'):
953 (*pos)++;
954 return(ROFFRULE_ALLOW);
955 case ('e'):
956 /* FALLTHROUGH */
957 case ('o'):
958 /* FALLTHROUGH */
959 case ('t'):
960 (*pos)++;
961 return(ROFFRULE_DENY);
962 default:
963 break;
964 }
965
966 while (v[*pos] && ' ' != v[*pos])
967 (*pos)++;
968 return(ROFFRULE_DENY);
969 }
970
971 /* ARGSUSED */
972 static enum rofferr
973 roff_line_ignore(ROFF_ARGS)
974 {
975
976 if (ROFF_it == tok)
977 mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos, "it");
978
979 return(ROFF_IGN);
980 }
981
982 /* ARGSUSED */
983 static enum rofferr
984 roff_cond(ROFF_ARGS)
985 {
986 int sv;
987 enum roffrule rule;
988
989 /* Stack overflow! */
990
991 if (ROFF_ie == tok && r->rstackpos == RSTACK_MAX - 1) {
992 mandoc_msg(MANDOCERR_MEM, r->parse, ln, ppos, NULL);
993 return(ROFF_ERR);
994 }
995
996 /* First, evaluate the conditional. */
997
998 if (ROFF_el == tok) {
999 /*
1000 * An `.el' will get the value of the current rstack
1001 * entry set in prior `ie' calls or defaults to DENY.
1002 */
1003 if (r->rstackpos < 0)
1004 rule = ROFFRULE_DENY;
1005 else
1006 rule = r->rstack[r->rstackpos];
1007 } else
1008 rule = roff_evalcond(*bufp, &pos);
1009
1010 sv = pos;
1011
1012 while (' ' == (*bufp)[pos])
1013 pos++;
1014
1015 /*
1016 * Roff is weird. If we have just white-space after the
1017 * conditional, it's considered the BODY and we exit without
1018 * really doing anything. Warn about this. It's probably
1019 * wrong.
1020 */
1021
1022 if ('\0' == (*bufp)[pos] && sv != pos) {
1023 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
1024 return(ROFF_IGN);
1025 }
1026
1027 roffnode_push(r, tok, NULL, ln, ppos);
1028
1029 r->last->rule = rule;
1030
1031 if (ROFF_ie == tok) {
1032 /*
1033 * An if-else will put the NEGATION of the current
1034 * evaluated conditional into the stack.
1035 */
1036 r->rstackpos++;
1037 if (ROFFRULE_DENY == r->last->rule)
1038 r->rstack[r->rstackpos] = ROFFRULE_ALLOW;
1039 else
1040 r->rstack[r->rstackpos] = ROFFRULE_DENY;
1041 }
1042
1043 /* If the parent has false as its rule, then so do we. */
1044
1045 if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule)
1046 r->last->rule = ROFFRULE_DENY;
1047
1048 /*
1049 * Determine scope. If we're invoked with "\{" trailing the
1050 * conditional, then we're in a multiline scope. Else our scope
1051 * expires on the next line.
1052 */
1053
1054 r->last->endspan = 1;
1055
1056 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1057 r->last->endspan = -1;
1058 pos += 2;
1059 }
1060
1061 /*
1062 * If there are no arguments on the line, the next-line scope is
1063 * assumed.
1064 */
1065
1066 if ('\0' == (*bufp)[pos])
1067 return(ROFF_IGN);
1068
1069 /* Otherwise re-run the roff parser after recalculating. */
1070
1071 *offs = pos;
1072 return(ROFF_RERUN);
1073 }
1074
1075
1076 /* ARGSUSED */
1077 static enum rofferr
1078 roff_ds(ROFF_ARGS)
1079 {
1080 char *name, *string;
1081
1082 /*
1083 * A symbol is named by the first word following the macro
1084 * invocation up to a space. Its value is anything after the
1085 * name's trailing whitespace and optional double-quote. Thus,
1086 *
1087 * [.ds foo "bar " ]
1088 *
1089 * will have `bar " ' as its value.
1090 */
1091
1092 string = *bufp + pos;
1093 name = roff_getname(r, &string, ln, pos);
1094 if ('\0' == *name)
1095 return(ROFF_IGN);
1096
1097 /* Read past initial double-quote. */
1098 if ('"' == *string)
1099 string++;
1100
1101 /* The rest is the value. */
1102 roff_setstr(r, name, string, 0);
1103 return(ROFF_IGN);
1104 }
1105
1106
1107 /* ARGSUSED */
1108 static enum rofferr
1109 roff_nr(ROFF_ARGS)
1110 {
1111 const char *key;
1112 char *val;
1113 struct reg *rg;
1114
1115 val = *bufp + pos;
1116 key = roff_getname(r, &val, ln, pos);
1117 rg = r->regs->regs;
1118
1119 if (0 == strcmp(key, "nS")) {
1120 rg[(int)REG_nS].set = 1;
1121 if ( ! roff_parse_nat(val, &rg[(int)REG_nS].v.u))
1122 rg[(int)REG_nS].v.u = 0;
1123 }
1124
1125 return(ROFF_IGN);
1126 }
1127
1128 /* ARGSUSED */
1129 static enum rofferr
1130 roff_rm(ROFF_ARGS)
1131 {
1132 const char *name;
1133 char *cp;
1134
1135 cp = *bufp + pos;
1136 while ('\0' != *cp) {
1137 name = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1138 if ('\0' != *name)
1139 roff_setstr(r, name, NULL, 0);
1140 }
1141 return(ROFF_IGN);
1142 }
1143
1144 /* ARGSUSED */
1145 static enum rofferr
1146 roff_TE(ROFF_ARGS)
1147 {
1148
1149 if (NULL == r->tbl)
1150 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1151 else
1152 tbl_end(r->tbl);
1153
1154 r->tbl = NULL;
1155 return(ROFF_IGN);
1156 }
1157
1158 /* ARGSUSED */
1159 static enum rofferr
1160 roff_T_(ROFF_ARGS)
1161 {
1162
1163 if (NULL == r->tbl)
1164 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1165 else
1166 tbl_restart(ppos, ln, r->tbl);
1167
1168 return(ROFF_IGN);
1169 }
1170
1171 /* ARGSUSED */
1172 static enum rofferr
1173 roff_EQ(ROFF_ARGS)
1174 {
1175 struct eqn_node *e;
1176
1177 assert(NULL == r->eqn);
1178 e = eqn_alloc(ppos, ln);
1179
1180 if (r->last_eqn)
1181 r->last_eqn->next = e;
1182 else
1183 r->first_eqn = r->last_eqn = e;
1184
1185 r->eqn = r->last_eqn = e;
1186 return(ROFF_IGN);
1187 }
1188
1189 /* ARGSUSED */
1190 static enum rofferr
1191 roff_EN(ROFF_ARGS)
1192 {
1193
1194 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1195 return(ROFF_IGN);
1196 }
1197
1198 /* ARGSUSED */
1199 static enum rofferr
1200 roff_TS(ROFF_ARGS)
1201 {
1202 struct tbl_node *t;
1203
1204 if (r->tbl) {
1205 mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL);
1206 tbl_end(r->tbl);
1207 }
1208
1209 t = tbl_alloc(ppos, ln, r->parse);
1210
1211 if (r->last_tbl)
1212 r->last_tbl->next = t;
1213 else
1214 r->first_tbl = r->last_tbl = t;
1215
1216 r->tbl = r->last_tbl = t;
1217 return(ROFF_IGN);
1218 }
1219
1220 /* ARGSUSED */
1221 static enum rofferr
1222 roff_so(ROFF_ARGS)
1223 {
1224 char *name;
1225
1226 mandoc_msg(MANDOCERR_SO, r->parse, ln, ppos, NULL);
1227
1228 /*
1229 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1230 * opening anything that's not in our cwd or anything beneath
1231 * it. Thus, explicitly disallow traversing up the file-system
1232 * or using absolute paths.
1233 */
1234
1235 name = *bufp + pos;
1236 if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1237 mandoc_msg(MANDOCERR_SOPATH, r->parse, ln, pos, NULL);
1238 return(ROFF_ERR);
1239 }
1240
1241 *offs = pos;
1242 return(ROFF_SO);
1243 }
1244
1245 /* ARGSUSED */
1246 static enum rofferr
1247 roff_userdef(ROFF_ARGS)
1248 {
1249 const char *arg[9];
1250 char *cp, *n1, *n2;
1251 int i;
1252
1253 /*
1254 * Collect pointers to macro argument strings
1255 * and null-terminate them.
1256 */
1257 cp = *bufp + pos;
1258 for (i = 0; i < 9; i++)
1259 arg[i] = '\0' == *cp ? "" :
1260 mandoc_getarg(r->parse, &cp, ln, &pos);
1261
1262 /*
1263 * Expand macro arguments.
1264 */
1265 *szp = 0;
1266 n1 = cp = mandoc_strdup(r->current_string);
1267 while (NULL != (cp = strstr(cp, "\\$"))) {
1268 i = cp[2] - '1';
1269 if (0 > i || 8 < i) {
1270 /* Not an argument invocation. */
1271 cp += 2;
1272 continue;
1273 }
1274
1275 *szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1276 n2 = mandoc_malloc(*szp);
1277
1278 strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1279 strlcat(n2, arg[i], *szp);
1280 strlcat(n2, cp + 3, *szp);
1281
1282 cp = n2 + (cp - n1);
1283 free(n1);
1284 n1 = n2;
1285 }
1286
1287 /*
1288 * Replace the macro invocation
1289 * by the expanded macro.
1290 */
1291 free(*bufp);
1292 *bufp = n1;
1293 if (0 == *szp)
1294 *szp = strlen(*bufp) + 1;
1295
1296 return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
1297 ROFF_REPARSE : ROFF_APPEND);
1298 }
1299
1300 static char *
1301 roff_getname(struct roff *r, char **cpp, int ln, int pos)
1302 {
1303 char *name, *cp;
1304
1305 name = *cpp;
1306 if ('\0' == *name)
1307 return(name);
1308
1309 /* Read until end of name. */
1310 for (cp = name; '\0' != *cp && ' ' != *cp; cp++) {
1311 if ('\\' != *cp)
1312 continue;
1313 cp++;
1314 if ('\\' == *cp)
1315 continue;
1316 mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL);
1317 *cp = '\0';
1318 name = cp;
1319 }
1320
1321 /* Nil-terminate name. */
1322 if ('\0' != *cp)
1323 *(cp++) = '\0';
1324
1325 /* Read past spaces. */
1326 while (' ' == *cp)
1327 cp++;
1328
1329 *cpp = cp;
1330 return(name);
1331 }
1332
1333 /*
1334 * Store *string into the user-defined string called *name.
1335 * In multiline mode, append to an existing entry and append '\n';
1336 * else replace the existing entry, if there is one.
1337 * To clear an existing entry, call with (*r, *name, NULL, 0).
1338 */
1339 static void
1340 roff_setstr(struct roff *r, const char *name, const char *string,
1341 int multiline)
1342 {
1343 struct roffstr *n;
1344 char *c;
1345 size_t oldch, newch;
1346
1347 /* Search for an existing string with the same name. */
1348 n = r->first_string;
1349 while (n && strcmp(name, n->name))
1350 n = n->next;
1351
1352 if (NULL == n) {
1353 /* Create a new string table entry. */
1354 n = mandoc_malloc(sizeof(struct roffstr));
1355 n->name = mandoc_strdup(name);
1356 n->string = NULL;
1357 n->next = r->first_string;
1358 r->first_string = n;
1359 } else if (0 == multiline) {
1360 /* In multiline mode, append; else replace. */
1361 free(n->string);
1362 n->string = NULL;
1363 }
1364
1365 if (NULL == string)
1366 return;
1367
1368 /*
1369 * One additional byte for the '\n' in multiline mode,
1370 * and one for the terminating '\0'.
1371 */
1372 newch = strlen(string) + (multiline ? 2u : 1u);
1373 if (NULL == n->string) {
1374 n->string = mandoc_malloc(newch);
1375 *n->string = '\0';
1376 oldch = 0;
1377 } else {
1378 oldch = strlen(n->string);
1379 n->string = mandoc_realloc(n->string, oldch + newch);
1380 }
1381
1382 /* Skip existing content in the destination buffer. */
1383 c = n->string + (int)oldch;
1384
1385 /* Append new content to the destination buffer. */
1386 while (*string) {
1387 /*
1388 * Rudimentary roff copy mode:
1389 * Handle escaped backslashes.
1390 */
1391 if ('\\' == *string && '\\' == *(string + 1))
1392 string++;
1393 *c++ = *string++;
1394 }
1395
1396 /* Append terminating bytes. */
1397 if (multiline)
1398 *c++ = '\n';
1399 *c = '\0';
1400 }
1401
1402 static const char *
1403 roff_getstrn(const struct roff *r, const char *name, size_t len)
1404 {
1405 const struct roffstr *n;
1406
1407 n = r->first_string;
1408 while (n && (strncmp(name, n->name, len) || '\0' != n->name[(int)len]))
1409 n = n->next;
1410
1411 return(n ? n->string : NULL);
1412 }
1413
1414 static void
1415 roff_freestr(struct roff *r)
1416 {
1417 struct roffstr *n, *nn;
1418
1419 for (n = r->first_string; n; n = nn) {
1420 free(n->name);
1421 free(n->string);
1422 nn = n->next;
1423 free(n);
1424 }
1425
1426 r->first_string = NULL;
1427 }
1428
1429 const struct tbl_span *
1430 roff_span(const struct roff *r)
1431 {
1432
1433 return(r->tbl ? tbl_span(r->tbl) : NULL);
1434 }
1435
1436 const struct eqn *
1437 roff_eqn(const struct roff *r)
1438 {
1439
1440 return(r->last_eqn ? &r->last_eqn->eqn : NULL);
1441 }