]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
Refactoring in preparation for .rm support:
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.121 2011/01/11 00:11:45 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <assert.h>
23 #include <errno.h>
24 #include <ctype.h>
25 #include <limits.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <stdio.h>
29
30 #include "mandoc.h"
31 #include "roff.h"
32 #include "libroff.h"
33 #include "libmandoc.h"
34
35 #define RSTACK_MAX 128
36
37 #define ROFF_CTL(c) \
38 ('.' == (c) || '\'' == (c))
39
40 enum rofft {
41 ROFF_ad,
42 ROFF_am,
43 ROFF_ami,
44 ROFF_am1,
45 ROFF_de,
46 ROFF_dei,
47 ROFF_de1,
48 ROFF_ds,
49 ROFF_el,
50 ROFF_hy,
51 ROFF_ie,
52 ROFF_if,
53 ROFF_ig,
54 ROFF_ne,
55 ROFF_nh,
56 ROFF_nr,
57 ROFF_rm,
58 ROFF_so,
59 ROFF_tr,
60 ROFF_TS,
61 ROFF_TE,
62 ROFF_T_,
63 ROFF_cblock,
64 ROFF_ccond, /* FIXME: remove this. */
65 ROFF_USERDEF,
66 ROFF_MAX
67 };
68
69 enum roffrule {
70 ROFFRULE_ALLOW,
71 ROFFRULE_DENY
72 };
73
74 struct roffstr {
75 char *name; /* key of symbol */
76 char *string; /* current value */
77 struct roffstr *next; /* next in list */
78 };
79
80 struct roff {
81 struct roffnode *last; /* leaf of stack */
82 mandocmsg msg; /* err/warn/fatal messages */
83 void *data; /* privdata for messages */
84 enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */
85 int rstackpos; /* position in rstack */
86 struct regset *regs; /* read/writable registers */
87 struct roffstr *first_string; /* user-defined strings & macros */
88 const char *current_string; /* value of last called user macro */
89 struct tbl_node *first_tbl; /* first table parsed */
90 struct tbl_node *last_tbl; /* last table parsed */
91 struct tbl_node *tbl; /* current table being parsed */
92 };
93
94 struct roffnode {
95 enum rofft tok; /* type of node */
96 struct roffnode *parent; /* up one in stack */
97 int line; /* parse line */
98 int col; /* parse col */
99 char *name; /* node name, e.g. macro name */
100 char *end; /* end-rules: custom token */
101 int endspan; /* end-rules: next-line or infty */
102 enum roffrule rule; /* current evaluation rule */
103 };
104
105 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
106 enum rofft tok, /* tok of macro */ \
107 char **bufp, /* input buffer */ \
108 size_t *szp, /* size of input buffer */ \
109 int ln, /* parse line */ \
110 int ppos, /* original pos in buffer */ \
111 int pos, /* current pos in buffer */ \
112 int *offs /* reset offset of buffer data */
113
114 typedef enum rofferr (*roffproc)(ROFF_ARGS);
115
116 struct roffmac {
117 const char *name; /* macro name */
118 roffproc proc; /* process new macro */
119 roffproc text; /* process as child text of macro */
120 roffproc sub; /* process as child of macro */
121 int flags;
122 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
123 struct roffmac *next;
124 };
125
126 static enum rofferr roff_block(ROFF_ARGS);
127 static enum rofferr roff_block_text(ROFF_ARGS);
128 static enum rofferr roff_block_sub(ROFF_ARGS);
129 static enum rofferr roff_cblock(ROFF_ARGS);
130 static enum rofferr roff_ccond(ROFF_ARGS);
131 static enum rofferr roff_cond(ROFF_ARGS);
132 static enum rofferr roff_cond_text(ROFF_ARGS);
133 static enum rofferr roff_cond_sub(ROFF_ARGS);
134 static enum rofferr roff_ds(ROFF_ARGS);
135 static enum roffrule roff_evalcond(const char *, int *);
136 static void roff_freestr(struct roff *);
137 static char *roff_getname(struct roff *, char **, int, int);
138 static const char *roff_getstrn(const struct roff *,
139 const char *, size_t);
140 static enum rofferr roff_line_ignore(ROFF_ARGS);
141 static enum rofferr roff_line_error(ROFF_ARGS);
142 static enum rofferr roff_nr(ROFF_ARGS);
143 static int roff_res(struct roff *,
144 char **, size_t *, int);
145 static void roff_setstr(struct roff *,
146 const char *, const char *, int);
147 static enum rofferr roff_so(ROFF_ARGS);
148 static enum rofferr roff_TE(ROFF_ARGS);
149 static enum rofferr roff_TS(ROFF_ARGS);
150 static enum rofferr roff_T_(ROFF_ARGS);
151 static enum rofferr roff_userdef(ROFF_ARGS);
152
153 /* See roff_hash_find() */
154
155 #define ASCII_HI 126
156 #define ASCII_LO 33
157 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
158
159 static struct roffmac *hash[HASHWIDTH];
160
161 static struct roffmac roffs[ROFF_MAX] = {
162 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
163 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
164 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
165 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
166 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
167 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
168 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
169 { "ds", roff_ds, NULL, NULL, 0, NULL },
170 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
171 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
172 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
173 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
174 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
175 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
176 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
177 { "nr", roff_nr, NULL, NULL, 0, NULL },
178 { "rm", roff_line_error, NULL, NULL, 0, NULL },
179 { "so", roff_so, NULL, NULL, 0, NULL },
180 { "tr", roff_line_ignore, NULL, NULL, 0, NULL },
181 { "TS", roff_TS, NULL, NULL, 0, NULL },
182 { "TE", roff_TE, NULL, NULL, 0, NULL },
183 { "T&", roff_T_, NULL, NULL, 0, NULL },
184 { ".", roff_cblock, NULL, NULL, 0, NULL },
185 { "\\}", roff_ccond, NULL, NULL, 0, NULL },
186 { NULL, roff_userdef, NULL, NULL, 0, NULL },
187 };
188
189 static void roff_free1(struct roff *);
190 static enum rofft roff_hash_find(const char *, size_t);
191 static void roff_hash_init(void);
192 static void roffnode_cleanscope(struct roff *);
193 static void roffnode_push(struct roff *, enum rofft,
194 const char *, int, int);
195 static void roffnode_pop(struct roff *);
196 static enum rofft roff_parse(struct roff *, const char *, int *);
197 static int roff_parse_nat(const char *, unsigned int *);
198
199 /* See roff_hash_find() */
200 #define ROFF_HASH(p) (p[0] - ASCII_LO)
201
202 static void
203 roff_hash_init(void)
204 {
205 struct roffmac *n;
206 int buc, i;
207
208 for (i = 0; i < (int)ROFF_USERDEF; i++) {
209 assert(roffs[i].name[0] >= ASCII_LO);
210 assert(roffs[i].name[0] <= ASCII_HI);
211
212 buc = ROFF_HASH(roffs[i].name);
213
214 if (NULL != (n = hash[buc])) {
215 for ( ; n->next; n = n->next)
216 /* Do nothing. */ ;
217 n->next = &roffs[i];
218 } else
219 hash[buc] = &roffs[i];
220 }
221 }
222
223
224 /*
225 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
226 * the nil-terminated string name could be found.
227 */
228 static enum rofft
229 roff_hash_find(const char *p, size_t s)
230 {
231 int buc;
232 struct roffmac *n;
233
234 /*
235 * libroff has an extremely simple hashtable, for the time
236 * being, which simply keys on the first character, which must
237 * be printable, then walks a chain. It works well enough until
238 * optimised.
239 */
240
241 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
242 return(ROFF_MAX);
243
244 buc = ROFF_HASH(p);
245
246 if (NULL == (n = hash[buc]))
247 return(ROFF_MAX);
248 for ( ; n; n = n->next)
249 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
250 return((enum rofft)(n - roffs));
251
252 return(ROFF_MAX);
253 }
254
255
256 /*
257 * Pop the current node off of the stack of roff instructions currently
258 * pending.
259 */
260 static void
261 roffnode_pop(struct roff *r)
262 {
263 struct roffnode *p;
264
265 assert(r->last);
266 p = r->last;
267
268 if (ROFF_el == p->tok)
269 if (r->rstackpos > -1)
270 r->rstackpos--;
271
272 r->last = r->last->parent;
273 free(p->name);
274 free(p->end);
275 free(p);
276 }
277
278
279 /*
280 * Push a roff node onto the instruction stack. This must later be
281 * removed with roffnode_pop().
282 */
283 static void
284 roffnode_push(struct roff *r, enum rofft tok, const char *name,
285 int line, int col)
286 {
287 struct roffnode *p;
288
289 p = mandoc_calloc(1, sizeof(struct roffnode));
290 p->tok = tok;
291 if (name)
292 p->name = mandoc_strdup(name);
293 p->parent = r->last;
294 p->line = line;
295 p->col = col;
296 p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
297
298 r->last = p;
299 }
300
301
302 static void
303 roff_free1(struct roff *r)
304 {
305 struct tbl_node *t;
306
307 while (r->first_tbl) {
308 t = r->first_tbl;
309 r->first_tbl = t->next;
310 tbl_free(t);
311 }
312
313 r->first_tbl = r->last_tbl = r->tbl = NULL;
314
315 while (r->last)
316 roffnode_pop(r);
317
318 roff_freestr(r);
319 }
320
321
322 void
323 roff_reset(struct roff *r)
324 {
325
326 roff_free1(r);
327 }
328
329
330 void
331 roff_free(struct roff *r)
332 {
333
334 roff_free1(r);
335 free(r);
336 }
337
338
339 struct roff *
340 roff_alloc(struct regset *regs, void *data, const mandocmsg msg)
341 {
342 struct roff *r;
343
344 r = mandoc_calloc(1, sizeof(struct roff));
345 r->regs = regs;
346 r->msg = msg;
347 r->data = data;
348 r->rstackpos = -1;
349
350 roff_hash_init();
351 return(r);
352 }
353
354
355 /*
356 * Pre-filter each and every line for reserved words (one beginning with
357 * `\*', e.g., `\*(ab'). These must be handled before the actual line
358 * is processed.
359 */
360 static int
361 roff_res(struct roff *r, char **bufp, size_t *szp, int pos)
362 {
363 const char *stesc; /* start of an escape sequence ('\\') */
364 const char *stnam; /* start of the name, after "[(*" */
365 const char *cp; /* end of the name, e.g. before ']' */
366 const char *res; /* the string to be substituted */
367 int i, maxl;
368 size_t nsz;
369 char *n;
370
371 /* Search for a leading backslash and save a pointer to it. */
372
373 cp = *bufp + pos;
374 while (NULL != (cp = strchr(cp, '\\'))) {
375 stesc = cp++;
376
377 /*
378 * The second character must be an asterisk.
379 * If it isn't, skip it anyway: It is escaped,
380 * so it can't start another escape sequence.
381 */
382
383 if ('\0' == *cp)
384 return(1);
385 if ('*' != *cp++)
386 continue;
387
388 /*
389 * The third character decides the length
390 * of the name of the string.
391 * Save a pointer to the name.
392 */
393
394 switch (*cp) {
395 case ('\0'):
396 return(1);
397 case ('('):
398 cp++;
399 maxl = 2;
400 break;
401 case ('['):
402 cp++;
403 maxl = 0;
404 break;
405 default:
406 maxl = 1;
407 break;
408 }
409 stnam = cp;
410
411 /* Advance to the end of the name. */
412
413 for (i = 0; 0 == maxl || i < maxl; i++, cp++) {
414 if ('\0' == *cp)
415 return(1); /* Error. */
416 if (0 == maxl && ']' == *cp)
417 break;
418 }
419
420 /*
421 * Retrieve the replacement string; if it is
422 * undefined, resume searching for escapes.
423 */
424
425 res = roff_getstrn(r, stnam, (size_t)i);
426
427 if (NULL == res) {
428 cp -= maxl ? 1 : 0;
429 continue;
430 }
431
432 /* Replace the escape sequence by the string. */
433
434 nsz = *szp + strlen(res) + 1;
435 n = mandoc_malloc(nsz);
436
437 strlcpy(n, *bufp, (size_t)(stesc - *bufp + 1));
438 strlcat(n, res, nsz);
439 strlcat(n, cp + (maxl ? 0 : 1), nsz);
440
441 free(*bufp);
442
443 *bufp = n;
444 *szp = nsz;
445 return(0);
446 }
447
448 return(1);
449 }
450
451
452 enum rofferr
453 roff_parseln(struct roff *r, int ln, char **bufp,
454 size_t *szp, int pos, int *offs)
455 {
456 enum rofft t;
457 enum rofferr e;
458 int ppos;
459
460 /*
461 * Run the reserved-word filter only if we have some reserved
462 * words to fill in.
463 */
464
465 if (r->first_string && ! roff_res(r, bufp, szp, pos))
466 return(ROFF_REPARSE);
467
468 /*
469 * First, if a scope is open and we're not a macro, pass the
470 * text through the macro's filter. If a scope isn't open and
471 * we're not a macro, just let it through.
472 */
473
474 if (r->last && ! ROFF_CTL((*bufp)[pos])) {
475 t = r->last->tok;
476 assert(roffs[t].text);
477 e = (*roffs[t].text)
478 (r, t, bufp, szp, ln, pos, pos, offs);
479 assert(ROFF_IGN == e || ROFF_CONT == e);
480 if (ROFF_CONT == e && r->tbl)
481 return(tbl_read(r->tbl, ln, *bufp, *offs));
482 return(e);
483 } else if ( ! ROFF_CTL((*bufp)[pos])) {
484 if (r->tbl)
485 return(tbl_read(r->tbl, ln, *bufp, *offs));
486 return(ROFF_CONT);
487 }
488
489 /*
490 * If a scope is open, go to the child handler for that macro,
491 * as it may want to preprocess before doing anything with it.
492 */
493
494 if (r->last) {
495 t = r->last->tok;
496 assert(roffs[t].sub);
497 return((*roffs[t].sub)
498 (r, t, bufp, szp,
499 ln, pos, pos, offs));
500 }
501
502 /*
503 * Lastly, as we've no scope open, try to look up and execute
504 * the new macro. If no macro is found, simply return and let
505 * the compilers handle it.
506 */
507
508 ppos = pos;
509 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
510 return(ROFF_CONT);
511
512 assert(roffs[t].proc);
513 return((*roffs[t].proc)
514 (r, t, bufp, szp,
515 ln, ppos, pos, offs));
516 }
517
518
519 void
520 roff_endparse(struct roff *r)
521 {
522
523 if (r->last)
524 (*r->msg)(MANDOCERR_SCOPEEXIT, r->data,
525 r->last->line, r->last->col, NULL);
526
527 if (r->tbl) {
528 (*r->msg)(MANDOCERR_SCOPEEXIT, r->data,
529 r->tbl->line, r->tbl->pos, NULL);
530 tbl_end(r->tbl);
531 r->tbl = NULL;
532 }
533 }
534
535
536 /*
537 * Parse a roff node's type from the input buffer. This must be in the
538 * form of ".foo xxx" in the usual way.
539 */
540 static enum rofft
541 roff_parse(struct roff *r, const char *buf, int *pos)
542 {
543 const char *mac;
544 size_t maclen;
545 enum rofft t;
546
547 assert(ROFF_CTL(buf[*pos]));
548 (*pos)++;
549
550 while (' ' == buf[*pos] || '\t' == buf[*pos])
551 (*pos)++;
552
553 if ('\0' == buf[*pos])
554 return(ROFF_MAX);
555
556 mac = buf + *pos;
557 maclen = strcspn(mac, " \\\t\0");
558
559 t = (r->current_string = roff_getstrn(r, mac, maclen))
560 ? ROFF_USERDEF : roff_hash_find(mac, maclen);
561
562 *pos += maclen;
563 while (buf[*pos] && ' ' == buf[*pos])
564 (*pos)++;
565
566 return(t);
567 }
568
569
570 static int
571 roff_parse_nat(const char *buf, unsigned int *res)
572 {
573 char *ep;
574 long lval;
575
576 errno = 0;
577 lval = strtol(buf, &ep, 10);
578 if (buf[0] == '\0' || *ep != '\0')
579 return(0);
580 if ((errno == ERANGE &&
581 (lval == LONG_MAX || lval == LONG_MIN)) ||
582 (lval > INT_MAX || lval < 0))
583 return(0);
584
585 *res = (unsigned int)lval;
586 return(1);
587 }
588
589
590 /* ARGSUSED */
591 static enum rofferr
592 roff_cblock(ROFF_ARGS)
593 {
594
595 /*
596 * A block-close `..' should only be invoked as a child of an
597 * ignore macro, otherwise raise a warning and just ignore it.
598 */
599
600 if (NULL == r->last) {
601 (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL);
602 return(ROFF_IGN);
603 }
604
605 switch (r->last->tok) {
606 case (ROFF_am):
607 /* FALLTHROUGH */
608 case (ROFF_ami):
609 /* FALLTHROUGH */
610 case (ROFF_am1):
611 /* FALLTHROUGH */
612 case (ROFF_de):
613 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
614 /* FALLTHROUGH */
615 case (ROFF_dei):
616 /* FALLTHROUGH */
617 case (ROFF_ig):
618 break;
619 default:
620 (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL);
621 return(ROFF_IGN);
622 }
623
624 if ((*bufp)[pos])
625 (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL);
626
627 roffnode_pop(r);
628 roffnode_cleanscope(r);
629 return(ROFF_IGN);
630
631 }
632
633
634 static void
635 roffnode_cleanscope(struct roff *r)
636 {
637
638 while (r->last) {
639 if (--r->last->endspan < 0)
640 break;
641 roffnode_pop(r);
642 }
643 }
644
645
646 /* ARGSUSED */
647 static enum rofferr
648 roff_ccond(ROFF_ARGS)
649 {
650
651 if (NULL == r->last) {
652 (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL);
653 return(ROFF_IGN);
654 }
655
656 switch (r->last->tok) {
657 case (ROFF_el):
658 /* FALLTHROUGH */
659 case (ROFF_ie):
660 /* FALLTHROUGH */
661 case (ROFF_if):
662 break;
663 default:
664 (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL);
665 return(ROFF_IGN);
666 }
667
668 if (r->last->endspan > -1) {
669 (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL);
670 return(ROFF_IGN);
671 }
672
673 if ((*bufp)[pos])
674 (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL);
675
676 roffnode_pop(r);
677 roffnode_cleanscope(r);
678 return(ROFF_IGN);
679 }
680
681
682 /* ARGSUSED */
683 static enum rofferr
684 roff_block(ROFF_ARGS)
685 {
686 int sv;
687 size_t sz;
688 char *name;
689
690 name = NULL;
691
692 if (ROFF_ig != tok) {
693 if ('\0' == (*bufp)[pos]) {
694 (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL);
695 return(ROFF_IGN);
696 }
697
698 /*
699 * Re-write `de1', since we don't really care about
700 * groff's strange compatibility mode, into `de'.
701 */
702
703 if (ROFF_de1 == tok)
704 tok = ROFF_de;
705 if (ROFF_de == tok)
706 name = *bufp + pos;
707 else
708 (*r->msg)(MANDOCERR_REQUEST, r->data, ln, ppos,
709 roffs[tok].name);
710
711 while ((*bufp)[pos] && ' ' != (*bufp)[pos])
712 pos++;
713
714 while (' ' == (*bufp)[pos])
715 (*bufp)[pos++] = '\0';
716 }
717
718 roffnode_push(r, tok, name, ln, ppos);
719
720 /*
721 * At the beginning of a `de' macro, clear the existing string
722 * with the same name, if there is one. New content will be
723 * added from roff_block_text() in multiline mode.
724 */
725
726 if (ROFF_de == tok)
727 roff_setstr(r, name, "", 0);
728
729 if ('\0' == (*bufp)[pos])
730 return(ROFF_IGN);
731
732 /* If present, process the custom end-of-line marker. */
733
734 sv = pos;
735 while ((*bufp)[pos] &&
736 ' ' != (*bufp)[pos] &&
737 '\t' != (*bufp)[pos])
738 pos++;
739
740 /*
741 * Note: groff does NOT like escape characters in the input.
742 * Instead of detecting this, we're just going to let it fly and
743 * to hell with it.
744 */
745
746 assert(pos > sv);
747 sz = (size_t)(pos - sv);
748
749 if (1 == sz && '.' == (*bufp)[sv])
750 return(ROFF_IGN);
751
752 r->last->end = mandoc_malloc(sz + 1);
753
754 memcpy(r->last->end, *bufp + sv, sz);
755 r->last->end[(int)sz] = '\0';
756
757 if ((*bufp)[pos])
758 (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL);
759
760 return(ROFF_IGN);
761 }
762
763
764 /* ARGSUSED */
765 static enum rofferr
766 roff_block_sub(ROFF_ARGS)
767 {
768 enum rofft t;
769 int i, j;
770
771 /*
772 * First check whether a custom macro exists at this level. If
773 * it does, then check against it. This is some of groff's
774 * stranger behaviours. If we encountered a custom end-scope
775 * tag and that tag also happens to be a "real" macro, then we
776 * need to try interpreting it again as a real macro. If it's
777 * not, then return ignore. Else continue.
778 */
779
780 if (r->last->end) {
781 i = pos + 1;
782 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
783 i++;
784
785 for (j = 0; r->last->end[j]; j++, i++)
786 if ((*bufp)[i] != r->last->end[j])
787 break;
788
789 if ('\0' == r->last->end[j] &&
790 ('\0' == (*bufp)[i] ||
791 ' ' == (*bufp)[i] ||
792 '\t' == (*bufp)[i])) {
793 roffnode_pop(r);
794 roffnode_cleanscope(r);
795
796 if (ROFF_MAX != roff_parse(r, *bufp, &pos))
797 return(ROFF_RERUN);
798 return(ROFF_IGN);
799 }
800 }
801
802 /*
803 * If we have no custom end-query or lookup failed, then try
804 * pulling it out of the hashtable.
805 */
806
807 ppos = pos;
808 t = roff_parse(r, *bufp, &pos);
809
810 /*
811 * Macros other than block-end are only significant
812 * in `de' blocks; elsewhere, simply throw them away.
813 */
814 if (ROFF_cblock != t) {
815 if (ROFF_de == tok)
816 roff_setstr(r, r->last->name, *bufp + ppos, 1);
817 return(ROFF_IGN);
818 }
819
820 assert(roffs[t].proc);
821 return((*roffs[t].proc)(r, t, bufp, szp,
822 ln, ppos, pos, offs));
823 }
824
825
826 /* ARGSUSED */
827 static enum rofferr
828 roff_block_text(ROFF_ARGS)
829 {
830
831 if (ROFF_de == tok)
832 roff_setstr(r, r->last->name, *bufp + pos, 1);
833
834 return(ROFF_IGN);
835 }
836
837
838 /* ARGSUSED */
839 static enum rofferr
840 roff_cond_sub(ROFF_ARGS)
841 {
842 enum rofft t;
843 enum roffrule rr;
844
845 ppos = pos;
846 rr = r->last->rule;
847
848 /*
849 * Clean out scope. If we've closed ourselves, then don't
850 * continue.
851 */
852
853 roffnode_cleanscope(r);
854
855 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) {
856 if ('\\' == (*bufp)[pos] && '}' == (*bufp)[pos + 1])
857 return(roff_ccond
858 (r, ROFF_ccond, bufp, szp,
859 ln, pos, pos + 2, offs));
860 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
861 }
862
863 /*
864 * A denied conditional must evaluate its children if and only
865 * if they're either structurally required (such as loops and
866 * conditionals) or a closing macro.
867 */
868 if (ROFFRULE_DENY == rr)
869 if ( ! (ROFFMAC_STRUCT & roffs[t].flags))
870 if (ROFF_ccond != t)
871 return(ROFF_IGN);
872
873 assert(roffs[t].proc);
874 return((*roffs[t].proc)(r, t, bufp, szp,
875 ln, ppos, pos, offs));
876 }
877
878
879 /* ARGSUSED */
880 static enum rofferr
881 roff_cond_text(ROFF_ARGS)
882 {
883 char *ep, *st;
884 enum roffrule rr;
885
886 rr = r->last->rule;
887
888 /*
889 * We display the value of the text if out current evaluation
890 * scope permits us to do so.
891 */
892
893 /* FIXME: use roff_ccond? */
894
895 st = &(*bufp)[pos];
896 if (NULL == (ep = strstr(st, "\\}"))) {
897 roffnode_cleanscope(r);
898 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
899 }
900
901 if (ep == st || (ep > st && '\\' != *(ep - 1)))
902 roffnode_pop(r);
903
904 roffnode_cleanscope(r);
905 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
906 }
907
908
909 static enum roffrule
910 roff_evalcond(const char *v, int *pos)
911 {
912
913 switch (v[*pos]) {
914 case ('n'):
915 (*pos)++;
916 return(ROFFRULE_ALLOW);
917 case ('e'):
918 /* FALLTHROUGH */
919 case ('o'):
920 /* FALLTHROUGH */
921 case ('t'):
922 (*pos)++;
923 return(ROFFRULE_DENY);
924 default:
925 break;
926 }
927
928 while (v[*pos] && ' ' != v[*pos])
929 (*pos)++;
930 return(ROFFRULE_DENY);
931 }
932
933 /* ARGSUSED */
934 static enum rofferr
935 roff_line_ignore(ROFF_ARGS)
936 {
937
938 return(ROFF_IGN);
939 }
940
941 /* ARGSUSED */
942 static enum rofferr
943 roff_line_error(ROFF_ARGS)
944 {
945
946 (*r->msg)(MANDOCERR_REQUEST, r->data, ln, ppos, roffs[tok].name);
947 return(ROFF_IGN);
948 }
949
950 /* ARGSUSED */
951 static enum rofferr
952 roff_cond(ROFF_ARGS)
953 {
954 int sv;
955 enum roffrule rule;
956
957 /* Stack overflow! */
958
959 if (ROFF_ie == tok && r->rstackpos == RSTACK_MAX - 1) {
960 (*r->msg)(MANDOCERR_MEM, r->data, ln, ppos, NULL);
961 return(ROFF_ERR);
962 }
963
964 /* First, evaluate the conditional. */
965
966 if (ROFF_el == tok) {
967 /*
968 * An `.el' will get the value of the current rstack
969 * entry set in prior `ie' calls or defaults to DENY.
970 */
971 if (r->rstackpos < 0)
972 rule = ROFFRULE_DENY;
973 else
974 rule = r->rstack[r->rstackpos];
975 } else
976 rule = roff_evalcond(*bufp, &pos);
977
978 sv = pos;
979
980 while (' ' == (*bufp)[pos])
981 pos++;
982
983 /*
984 * Roff is weird. If we have just white-space after the
985 * conditional, it's considered the BODY and we exit without
986 * really doing anything. Warn about this. It's probably
987 * wrong.
988 */
989
990 if ('\0' == (*bufp)[pos] && sv != pos) {
991 (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL);
992 return(ROFF_IGN);
993 }
994
995 roffnode_push(r, tok, NULL, ln, ppos);
996
997 r->last->rule = rule;
998
999 if (ROFF_ie == tok) {
1000 /*
1001 * An if-else will put the NEGATION of the current
1002 * evaluated conditional into the stack.
1003 */
1004 r->rstackpos++;
1005 if (ROFFRULE_DENY == r->last->rule)
1006 r->rstack[r->rstackpos] = ROFFRULE_ALLOW;
1007 else
1008 r->rstack[r->rstackpos] = ROFFRULE_DENY;
1009 }
1010
1011 /* If the parent has false as its rule, then so do we. */
1012
1013 if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule)
1014 r->last->rule = ROFFRULE_DENY;
1015
1016 /*
1017 * Determine scope. If we're invoked with "\{" trailing the
1018 * conditional, then we're in a multiline scope. Else our scope
1019 * expires on the next line.
1020 */
1021
1022 r->last->endspan = 1;
1023
1024 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1025 r->last->endspan = -1;
1026 pos += 2;
1027 }
1028
1029 /*
1030 * If there are no arguments on the line, the next-line scope is
1031 * assumed.
1032 */
1033
1034 if ('\0' == (*bufp)[pos])
1035 return(ROFF_IGN);
1036
1037 /* Otherwise re-run the roff parser after recalculating. */
1038
1039 *offs = pos;
1040 return(ROFF_RERUN);
1041 }
1042
1043
1044 /* ARGSUSED */
1045 static enum rofferr
1046 roff_ds(ROFF_ARGS)
1047 {
1048 char *name, *string;
1049
1050 /*
1051 * A symbol is named by the first word following the macro
1052 * invocation up to a space. Its value is anything after the
1053 * name's trailing whitespace and optional double-quote. Thus,
1054 *
1055 * [.ds foo "bar " ]
1056 *
1057 * will have `bar " ' as its value.
1058 */
1059
1060 string = *bufp + pos;
1061 name = roff_getname(r, &string, ln, pos);
1062 if ('\0' == *name)
1063 return(ROFF_IGN);
1064
1065 /* Read past initial double-quote. */
1066 if ('"' == *string)
1067 string++;
1068
1069 /* The rest is the value. */
1070 roff_setstr(r, name, string, 0);
1071 return(ROFF_IGN);
1072 }
1073
1074
1075 /* ARGSUSED */
1076 static enum rofferr
1077 roff_nr(ROFF_ARGS)
1078 {
1079 const char *key;
1080 char *val;
1081 struct reg *rg;
1082
1083 val = *bufp + pos;
1084 key = roff_getname(r, &val, ln, pos);
1085 rg = r->regs->regs;
1086
1087 if (0 == strcmp(key, "nS")) {
1088 rg[(int)REG_nS].set = 1;
1089 if ( ! roff_parse_nat(val, &rg[(int)REG_nS].v.u))
1090 rg[(int)REG_nS].v.u = 0;
1091 }
1092
1093 return(ROFF_IGN);
1094 }
1095
1096 /* ARGSUSED */
1097 static enum rofferr
1098 roff_TE(ROFF_ARGS)
1099 {
1100
1101 if (NULL == r->tbl)
1102 (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL);
1103 else
1104 tbl_end(r->tbl);
1105
1106 r->tbl = NULL;
1107 return(ROFF_IGN);
1108 }
1109
1110 /* ARGSUSED */
1111 static enum rofferr
1112 roff_T_(ROFF_ARGS)
1113 {
1114
1115 if (NULL == r->tbl)
1116 (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL);
1117 else
1118 tbl_restart(ppos, ln, r->tbl);
1119
1120 return(ROFF_IGN);
1121 }
1122
1123 /* ARGSUSED */
1124 static enum rofferr
1125 roff_TS(ROFF_ARGS)
1126 {
1127 struct tbl_node *t;
1128
1129 if (r->tbl) {
1130 (*r->msg)(MANDOCERR_SCOPEBROKEN, r->data, ln, ppos, NULL);
1131 tbl_end(r->tbl);
1132 }
1133
1134 t = tbl_alloc(ppos, ln, r->data, r->msg);
1135
1136 if (r->last_tbl)
1137 r->last_tbl->next = t;
1138 else
1139 r->first_tbl = r->last_tbl = t;
1140
1141 r->tbl = r->last_tbl = t;
1142 return(ROFF_IGN);
1143 }
1144
1145 /* ARGSUSED */
1146 static enum rofferr
1147 roff_so(ROFF_ARGS)
1148 {
1149 char *name;
1150
1151 (*r->msg)(MANDOCERR_SO, r->data, ln, ppos, NULL);
1152
1153 /*
1154 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1155 * opening anything that's not in our cwd or anything beneath
1156 * it. Thus, explicitly disallow traversing up the file-system
1157 * or using absolute paths.
1158 */
1159
1160 name = *bufp + pos;
1161 if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1162 (*r->msg)(MANDOCERR_SOPATH, r->data, ln, pos, NULL);
1163 return(ROFF_ERR);
1164 }
1165
1166 *offs = pos;
1167 return(ROFF_SO);
1168 }
1169
1170 /* ARGSUSED */
1171 static enum rofferr
1172 roff_userdef(ROFF_ARGS)
1173 {
1174 const char *arg[9];
1175 char *cp, *n1, *n2;
1176 int i;
1177
1178 /*
1179 * Collect pointers to macro argument strings
1180 * and null-terminate them.
1181 */
1182 cp = *bufp + pos;
1183 for (i = 0; i < 9; i++)
1184 arg[i] = '\0' == *cp ? "" :
1185 mandoc_getarg(&cp, r->msg, r->data, ln, &pos);
1186
1187 /*
1188 * Expand macro arguments.
1189 */
1190 *szp = 0;
1191 n1 = cp = mandoc_strdup(r->current_string);
1192 while (NULL != (cp = strstr(cp, "\\$"))) {
1193 i = cp[2] - '1';
1194 if (0 > i || 8 < i) {
1195 /* Not an argument invocation. */
1196 cp += 2;
1197 continue;
1198 }
1199
1200 *szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1201 n2 = mandoc_malloc(*szp);
1202
1203 strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1204 strlcat(n2, arg[i], *szp);
1205 strlcat(n2, cp + 3, *szp);
1206
1207 cp = n2 + (cp - n1);
1208 free(n1);
1209 n1 = n2;
1210 }
1211
1212 /*
1213 * Replace the macro invocation
1214 * by the expanded macro.
1215 */
1216 free(*bufp);
1217 *bufp = n1;
1218 if (0 == *szp)
1219 *szp = strlen(*bufp) + 1;
1220
1221 return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
1222 ROFF_REPARSE : ROFF_APPEND);
1223 }
1224
1225
1226 static char *
1227 roff_getname(struct roff *r, char **cpp, int ln, int pos)
1228 {
1229 char *name, *cp;
1230
1231 name = *cpp;
1232 if ('\0' == *name)
1233 return(name);
1234
1235 /* Read until end of name. */
1236 for (cp = name; '\0' != *cp && ' ' != *cp; cp++) {
1237 if ('\\' != *cp)
1238 continue;
1239 cp++;
1240 if ('\\' == *cp)
1241 continue;
1242 (*r->msg)(MANDOCERR_NAMESC, r->data, ln, pos, NULL);
1243 *cp = '\0';
1244 name = cp;
1245 }
1246
1247 /* Nil-terminate name. */
1248 if ('\0' != *cp)
1249 *(cp++) = '\0';
1250
1251 /* Read past spaces. */
1252 while (' ' == *cp)
1253 cp++;
1254
1255 *cpp = cp;
1256 return(name);
1257 }
1258
1259
1260 /*
1261 * Store *string into the user-defined string called *name.
1262 * In multiline mode, append to an existing entry and append '\n';
1263 * else replace the existing entry, if there is one.
1264 * To clear an existing entry, call with (*r, *name, NULL, 0).
1265 */
1266 static void
1267 roff_setstr(struct roff *r, const char *name, const char *string,
1268 int multiline)
1269 {
1270 struct roffstr *n;
1271 char *c;
1272 size_t oldch, newch;
1273
1274 /* Search for an existing string with the same name. */
1275 n = r->first_string;
1276 while (n && strcmp(name, n->name))
1277 n = n->next;
1278
1279 if (NULL == n) {
1280 /* Create a new string table entry. */
1281 n = mandoc_malloc(sizeof(struct roffstr));
1282 n->name = mandoc_strdup(name);
1283 n->string = NULL;
1284 n->next = r->first_string;
1285 r->first_string = n;
1286 } else if (0 == multiline) {
1287 /* In multiline mode, append; else replace. */
1288 free(n->string);
1289 n->string = NULL;
1290 }
1291
1292 if (NULL == string)
1293 return;
1294
1295 /*
1296 * One additional byte for the '\n' in multiline mode,
1297 * and one for the terminating '\0'.
1298 */
1299 newch = strlen(string) + (multiline ? 2 : 1);
1300 if (NULL == n->string) {
1301 n->string = mandoc_malloc(newch);
1302 *n->string = '\0';
1303 oldch = 0;
1304 } else {
1305 oldch = strlen(n->string);
1306 n->string = mandoc_realloc(n->string, oldch + newch);
1307 }
1308
1309 /* Skip existing content in the destination buffer. */
1310 c = n->string + oldch;
1311
1312 /* Append new content to the destination buffer. */
1313 while (*string) {
1314 /*
1315 * Rudimentary roff copy mode:
1316 * Handle escaped backslashes.
1317 */
1318 if ('\\' == *string && '\\' == *(string + 1))
1319 string++;
1320 *c++ = *string++;
1321 }
1322
1323 /* Append terminating bytes. */
1324 if (multiline)
1325 *c++ = '\n';
1326 *c = '\0';
1327 }
1328
1329
1330 static const char *
1331 roff_getstrn(const struct roff *r, const char *name, size_t len)
1332 {
1333 const struct roffstr *n;
1334
1335 n = r->first_string;
1336 while (n && (strncmp(name, n->name, len) || '\0' != n->name[(int)len]))
1337 n = n->next;
1338
1339 return(n ? n->string : NULL);
1340 }
1341
1342
1343 static void
1344 roff_freestr(struct roff *r)
1345 {
1346 struct roffstr *n, *nn;
1347
1348 for (n = r->first_string; n; n = nn) {
1349 free(n->name);
1350 free(n->string);
1351 nn = n->next;
1352 free(n);
1353 }
1354
1355 r->first_string = NULL;
1356 }
1357
1358 const struct tbl_span *
1359 roff_span(const struct roff *r)
1360 {
1361
1362 return(r->tbl ? tbl_span(r->tbl) : NULL);
1363 }