]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
Plan9 has a man(7) implementation that looks extremely archaic,
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.112 2010/12/29 14:53:31 kristaps Exp $ */
2 /*
3 * Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <assert.h>
23 #include <errno.h>
24 #include <ctype.h>
25 #include <limits.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <stdio.h>
29
30 #include "mandoc.h"
31 #include "roff.h"
32 #include "libroff.h"
33 #include "libmandoc.h"
34
35 #define RSTACK_MAX 128
36
37 #define ROFF_CTL(c) \
38 ('.' == (c) || '\'' == (c))
39
40 enum rofft {
41 ROFF_ad,
42 ROFF_am,
43 ROFF_ami,
44 ROFF_am1,
45 ROFF_de,
46 ROFF_dei,
47 ROFF_de1,
48 ROFF_ds,
49 ROFF_el,
50 ROFF_hy,
51 ROFF_ie,
52 ROFF_if,
53 ROFF_ig,
54 ROFF_ne,
55 ROFF_nh,
56 ROFF_nr,
57 ROFF_rm,
58 ROFF_so,
59 ROFF_tr,
60 ROFF_TS,
61 ROFF_TE,
62 ROFF_T_,
63 ROFF_cblock,
64 ROFF_ccond, /* FIXME: remove this. */
65 ROFF_USERDEF,
66 ROFF_MAX
67 };
68
69 enum roffrule {
70 ROFFRULE_ALLOW,
71 ROFFRULE_DENY
72 };
73
74 struct roffstr {
75 char *name; /* key of symbol */
76 char *string; /* current value */
77 struct roffstr *next; /* next in list */
78 };
79
80 struct roff {
81 struct roffnode *last; /* leaf of stack */
82 mandocmsg msg; /* err/warn/fatal messages */
83 void *data; /* privdata for messages */
84 enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */
85 int rstackpos; /* position in rstack */
86 struct regset *regs; /* read/writable registers */
87 struct roffstr *first_string; /* user-defined strings & macros */
88 const char *current_string; /* value of last called user macro */
89 struct tbl *tbl;
90 };
91
92 struct roffnode {
93 enum rofft tok; /* type of node */
94 struct roffnode *parent; /* up one in stack */
95 int line; /* parse line */
96 int col; /* parse col */
97 char *name; /* node name, e.g. macro name */
98 char *end; /* end-rules: custom token */
99 int endspan; /* end-rules: next-line or infty */
100 enum roffrule rule; /* current evaluation rule */
101 };
102
103 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
104 enum rofft tok, /* tok of macro */ \
105 char **bufp, /* input buffer */ \
106 size_t *szp, /* size of input buffer */ \
107 int ln, /* parse line */ \
108 int ppos, /* original pos in buffer */ \
109 int pos, /* current pos in buffer */ \
110 int *offs /* reset offset of buffer data */
111
112 typedef enum rofferr (*roffproc)(ROFF_ARGS);
113
114 struct roffmac {
115 const char *name; /* macro name */
116 roffproc proc; /* process new macro */
117 roffproc text; /* process as child text of macro */
118 roffproc sub; /* process as child of macro */
119 int flags;
120 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
121 struct roffmac *next;
122 };
123
124 static enum rofferr roff_block(ROFF_ARGS);
125 static enum rofferr roff_block_text(ROFF_ARGS);
126 static enum rofferr roff_block_sub(ROFF_ARGS);
127 static enum rofferr roff_cblock(ROFF_ARGS);
128 static enum rofferr roff_ccond(ROFF_ARGS);
129 static enum rofferr roff_cond(ROFF_ARGS);
130 static enum rofferr roff_cond_text(ROFF_ARGS);
131 static enum rofferr roff_cond_sub(ROFF_ARGS);
132 static enum rofferr roff_ds(ROFF_ARGS);
133 static enum roffrule roff_evalcond(const char *, int *);
134 static void roff_freestr(struct roff *);
135 static const char *roff_getstrn(const struct roff *,
136 const char *, size_t);
137 static enum rofferr roff_line_ignore(ROFF_ARGS);
138 static enum rofferr roff_line_error(ROFF_ARGS);
139 static enum rofferr roff_nr(ROFF_ARGS);
140 static int roff_res(struct roff *,
141 char **, size_t *, int);
142 static void roff_setstr(struct roff *,
143 const char *, const char *, int);
144 static enum rofferr roff_so(ROFF_ARGS);
145 static enum rofferr roff_TE(ROFF_ARGS);
146 static enum rofferr roff_TS(ROFF_ARGS);
147 static enum rofferr roff_T_(ROFF_ARGS);
148 static enum rofferr roff_userdef(ROFF_ARGS);
149
150 /* See roff_hash_find() */
151
152 #define ASCII_HI 126
153 #define ASCII_LO 33
154 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
155
156 static struct roffmac *hash[HASHWIDTH];
157
158 static struct roffmac roffs[ROFF_MAX] = {
159 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
160 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
161 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
162 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
163 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
164 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
165 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
166 { "ds", roff_ds, NULL, NULL, 0, NULL },
167 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
168 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
169 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
170 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
171 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
172 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
173 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
174 { "nr", roff_nr, NULL, NULL, 0, NULL },
175 { "rm", roff_line_error, NULL, NULL, 0, NULL },
176 { "so", roff_so, NULL, NULL, 0, NULL },
177 { "tr", roff_line_ignore, NULL, NULL, 0, NULL },
178 { "TS", roff_TS, NULL, NULL, 0, NULL },
179 { "TE", roff_TE, NULL, NULL, 0, NULL },
180 { "T&", roff_T_, NULL, NULL, 0, NULL },
181 { ".", roff_cblock, NULL, NULL, 0, NULL },
182 { "\\}", roff_ccond, NULL, NULL, 0, NULL },
183 { NULL, roff_userdef, NULL, NULL, 0, NULL },
184 };
185
186 static void roff_free1(struct roff *);
187 static enum rofft roff_hash_find(const char *, size_t);
188 static void roff_hash_init(void);
189 static void roffnode_cleanscope(struct roff *);
190 static void roffnode_push(struct roff *, enum rofft,
191 const char *, int, int);
192 static void roffnode_pop(struct roff *);
193 static enum rofft roff_parse(struct roff *, const char *, int *);
194 static int roff_parse_nat(const char *, unsigned int *);
195
196 /* See roff_hash_find() */
197 #define ROFF_HASH(p) (p[0] - ASCII_LO)
198
199 static void
200 roff_hash_init(void)
201 {
202 struct roffmac *n;
203 int buc, i;
204
205 for (i = 0; i < (int)ROFF_USERDEF; i++) {
206 assert(roffs[i].name[0] >= ASCII_LO);
207 assert(roffs[i].name[0] <= ASCII_HI);
208
209 buc = ROFF_HASH(roffs[i].name);
210
211 if (NULL != (n = hash[buc])) {
212 for ( ; n->next; n = n->next)
213 /* Do nothing. */ ;
214 n->next = &roffs[i];
215 } else
216 hash[buc] = &roffs[i];
217 }
218 }
219
220
221 /*
222 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
223 * the nil-terminated string name could be found.
224 */
225 static enum rofft
226 roff_hash_find(const char *p, size_t s)
227 {
228 int buc;
229 struct roffmac *n;
230
231 /*
232 * libroff has an extremely simple hashtable, for the time
233 * being, which simply keys on the first character, which must
234 * be printable, then walks a chain. It works well enough until
235 * optimised.
236 */
237
238 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
239 return(ROFF_MAX);
240
241 buc = ROFF_HASH(p);
242
243 if (NULL == (n = hash[buc]))
244 return(ROFF_MAX);
245 for ( ; n; n = n->next)
246 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
247 return((enum rofft)(n - roffs));
248
249 return(ROFF_MAX);
250 }
251
252
253 /*
254 * Pop the current node off of the stack of roff instructions currently
255 * pending.
256 */
257 static void
258 roffnode_pop(struct roff *r)
259 {
260 struct roffnode *p;
261
262 assert(r->last);
263 p = r->last;
264
265 if (ROFF_el == p->tok)
266 if (r->rstackpos > -1)
267 r->rstackpos--;
268
269 r->last = r->last->parent;
270 free(p->name);
271 free(p->end);
272 free(p);
273 }
274
275
276 /*
277 * Push a roff node onto the instruction stack. This must later be
278 * removed with roffnode_pop().
279 */
280 static void
281 roffnode_push(struct roff *r, enum rofft tok, const char *name,
282 int line, int col)
283 {
284 struct roffnode *p;
285
286 p = mandoc_calloc(1, sizeof(struct roffnode));
287 p->tok = tok;
288 if (name)
289 p->name = mandoc_strdup(name);
290 p->parent = r->last;
291 p->line = line;
292 p->col = col;
293 p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
294
295 r->last = p;
296 }
297
298
299 static void
300 roff_free1(struct roff *r)
301 {
302
303 if (r->tbl) {
304 tbl_free(r->tbl);
305 r->tbl = NULL;
306 }
307
308 while (r->last)
309 roffnode_pop(r);
310
311 roff_freestr(r);
312 }
313
314
315 void
316 roff_reset(struct roff *r)
317 {
318
319 roff_free1(r);
320 }
321
322
323 void
324 roff_free(struct roff *r)
325 {
326
327 roff_free1(r);
328 free(r);
329 }
330
331
332 struct roff *
333 roff_alloc(struct regset *regs, void *data, const mandocmsg msg)
334 {
335 struct roff *r;
336
337 r = mandoc_calloc(1, sizeof(struct roff));
338 r->regs = regs;
339 r->msg = msg;
340 r->data = data;
341 r->rstackpos = -1;
342
343 roff_hash_init();
344 return(r);
345 }
346
347
348 /*
349 * Pre-filter each and every line for reserved words (one beginning with
350 * `\*', e.g., `\*(ab'). These must be handled before the actual line
351 * is processed.
352 */
353 static int
354 roff_res(struct roff *r, char **bufp, size_t *szp, int pos)
355 {
356 const char *stesc; /* start of an escape sequence ('\\') */
357 const char *stnam; /* start of the name, after "[(*" */
358 const char *cp; /* end of the name, e.g. before ']' */
359 const char *res; /* the string to be substituted */
360 int i, maxl;
361 size_t nsz;
362 char *n;
363
364 /* Search for a leading backslash and save a pointer to it. */
365
366 cp = *bufp + pos;
367 while (NULL != (cp = strchr(cp, '\\'))) {
368 stesc = cp++;
369
370 /*
371 * The second character must be an asterisk.
372 * If it isn't, skip it anyway: It is escaped,
373 * so it can't start another escape sequence.
374 */
375
376 if ('\0' == *cp)
377 return(1);
378 if ('*' != *cp++)
379 continue;
380
381 /*
382 * The third character decides the length
383 * of the name of the string.
384 * Save a pointer to the name.
385 */
386
387 switch (*cp) {
388 case ('\0'):
389 return(1);
390 case ('('):
391 cp++;
392 maxl = 2;
393 break;
394 case ('['):
395 cp++;
396 maxl = 0;
397 break;
398 default:
399 maxl = 1;
400 break;
401 }
402 stnam = cp;
403
404 /* Advance to the end of the name. */
405
406 for (i = 0; 0 == maxl || i < maxl; i++, cp++) {
407 if ('\0' == *cp)
408 return(1); /* Error. */
409 if (0 == maxl && ']' == *cp)
410 break;
411 }
412
413 /*
414 * Retrieve the replacement string; if it is
415 * undefined, resume searching for escapes.
416 */
417
418 res = roff_getstrn(r, stnam, (size_t)i);
419
420 if (NULL == res) {
421 cp -= maxl ? 1 : 0;
422 continue;
423 }
424
425 /* Replace the escape sequence by the string. */
426
427 nsz = *szp + strlen(res) + 1;
428 n = mandoc_malloc(nsz);
429
430 strlcpy(n, *bufp, (size_t)(stesc - *bufp + 1));
431 strlcat(n, res, nsz);
432 strlcat(n, cp + (maxl ? 0 : 1), nsz);
433
434 free(*bufp);
435
436 *bufp = n;
437 *szp = nsz;
438 return(0);
439 }
440
441 return(1);
442 }
443
444
445 enum rofferr
446 roff_parseln(struct roff *r, int ln, char **bufp,
447 size_t *szp, int pos, int *offs)
448 {
449 enum rofft t;
450 enum rofferr e;
451 int ppos;
452
453 /*
454 * Run the reserved-word filter only if we have some reserved
455 * words to fill in.
456 */
457
458 if (r->first_string && ! roff_res(r, bufp, szp, pos))
459 return(ROFF_REPARSE);
460
461 /*
462 * First, if a scope is open and we're not a macro, pass the
463 * text through the macro's filter. If a scope isn't open and
464 * we're not a macro, just let it through.
465 */
466
467 if (r->last && ! ROFF_CTL((*bufp)[pos])) {
468 t = r->last->tok;
469 assert(roffs[t].text);
470 e = (*roffs[t].text)
471 (r, t, bufp, szp, ln, pos, pos, offs);
472 assert(ROFF_IGN == e || ROFF_CONT == e);
473 if (ROFF_CONT == e && r->tbl)
474 return(tbl_read(r->tbl, ln, *bufp, *offs));
475 return(e);
476 } else if ( ! ROFF_CTL((*bufp)[pos])) {
477 if (r->tbl)
478 return(tbl_read(r->tbl, ln, *bufp, *offs));
479 return(ROFF_CONT);
480 }
481
482 /*
483 * If a scope is open, go to the child handler for that macro,
484 * as it may want to preprocess before doing anything with it.
485 */
486
487 if (r->last) {
488 t = r->last->tok;
489 assert(roffs[t].sub);
490 return((*roffs[t].sub)
491 (r, t, bufp, szp,
492 ln, pos, pos, offs));
493 }
494
495 /*
496 * Lastly, as we've no scope open, try to look up and execute
497 * the new macro. If no macro is found, simply return and let
498 * the compilers handle it.
499 */
500
501 ppos = pos;
502 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
503 return(ROFF_CONT);
504
505 assert(roffs[t].proc);
506 return((*roffs[t].proc)
507 (r, t, bufp, szp,
508 ln, ppos, pos, offs));
509 }
510
511
512 int
513 roff_endparse(struct roff *r)
514 {
515
516 /* FIXME: if r->tbl */
517 if (r->last)
518 (*r->msg)(MANDOCERR_SCOPEEXIT, r->data,
519 r->last->line, r->last->col, NULL);
520 return(1);
521 }
522
523
524 /*
525 * Parse a roff node's type from the input buffer. This must be in the
526 * form of ".foo xxx" in the usual way.
527 */
528 static enum rofft
529 roff_parse(struct roff *r, const char *buf, int *pos)
530 {
531 const char *mac;
532 size_t maclen;
533 enum rofft t;
534
535 assert(ROFF_CTL(buf[*pos]));
536 (*pos)++;
537
538 while (' ' == buf[*pos] || '\t' == buf[*pos])
539 (*pos)++;
540
541 if ('\0' == buf[*pos])
542 return(ROFF_MAX);
543
544 mac = buf + *pos;
545 maclen = strcspn(mac, " \\\t\0");
546
547 t = (r->current_string = roff_getstrn(r, mac, maclen))
548 ? ROFF_USERDEF : roff_hash_find(mac, maclen);
549
550 *pos += maclen;
551 while (buf[*pos] && ' ' == buf[*pos])
552 (*pos)++;
553
554 return(t);
555 }
556
557
558 static int
559 roff_parse_nat(const char *buf, unsigned int *res)
560 {
561 char *ep;
562 long lval;
563
564 errno = 0;
565 lval = strtol(buf, &ep, 10);
566 if (buf[0] == '\0' || *ep != '\0')
567 return(0);
568 if ((errno == ERANGE &&
569 (lval == LONG_MAX || lval == LONG_MIN)) ||
570 (lval > INT_MAX || lval < 0))
571 return(0);
572
573 *res = (unsigned int)lval;
574 return(1);
575 }
576
577
578 /* ARGSUSED */
579 static enum rofferr
580 roff_cblock(ROFF_ARGS)
581 {
582
583 /*
584 * A block-close `..' should only be invoked as a child of an
585 * ignore macro, otherwise raise a warning and just ignore it.
586 */
587
588 if (NULL == r->last) {
589 (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL);
590 return(ROFF_IGN);
591 }
592
593 switch (r->last->tok) {
594 case (ROFF_am):
595 /* FALLTHROUGH */
596 case (ROFF_ami):
597 /* FALLTHROUGH */
598 case (ROFF_am1):
599 /* FALLTHROUGH */
600 case (ROFF_de):
601 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
602 /* FALLTHROUGH */
603 case (ROFF_dei):
604 /* FALLTHROUGH */
605 case (ROFF_ig):
606 break;
607 default:
608 (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL);
609 return(ROFF_IGN);
610 }
611
612 if ((*bufp)[pos])
613 (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL);
614
615 roffnode_pop(r);
616 roffnode_cleanscope(r);
617 return(ROFF_IGN);
618
619 }
620
621
622 static void
623 roffnode_cleanscope(struct roff *r)
624 {
625
626 while (r->last) {
627 if (--r->last->endspan < 0)
628 break;
629 roffnode_pop(r);
630 }
631 }
632
633
634 /* ARGSUSED */
635 static enum rofferr
636 roff_ccond(ROFF_ARGS)
637 {
638
639 if (NULL == r->last) {
640 (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL);
641 return(ROFF_IGN);
642 }
643
644 switch (r->last->tok) {
645 case (ROFF_el):
646 /* FALLTHROUGH */
647 case (ROFF_ie):
648 /* FALLTHROUGH */
649 case (ROFF_if):
650 break;
651 default:
652 (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL);
653 return(ROFF_IGN);
654 }
655
656 if (r->last->endspan > -1) {
657 (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL);
658 return(ROFF_IGN);
659 }
660
661 if ((*bufp)[pos])
662 (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL);
663
664 roffnode_pop(r);
665 roffnode_cleanscope(r);
666 return(ROFF_IGN);
667 }
668
669
670 /* ARGSUSED */
671 static enum rofferr
672 roff_block(ROFF_ARGS)
673 {
674 int sv;
675 size_t sz;
676 char *name;
677
678 name = NULL;
679
680 if (ROFF_ig != tok) {
681 if ('\0' == (*bufp)[pos]) {
682 (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL);
683 return(ROFF_IGN);
684 }
685
686 /*
687 * Re-write `de1', since we don't really care about
688 * groff's strange compatibility mode, into `de'.
689 */
690
691 if (ROFF_de1 == tok)
692 tok = ROFF_de;
693 if (ROFF_de == tok)
694 name = *bufp + pos;
695 else
696 (*r->msg)(MANDOCERR_REQUEST, r->data, ln, ppos,
697 roffs[tok].name);
698
699 while ((*bufp)[pos] && ' ' != (*bufp)[pos])
700 pos++;
701
702 while (' ' == (*bufp)[pos])
703 (*bufp)[pos++] = '\0';
704 }
705
706 roffnode_push(r, tok, name, ln, ppos);
707
708 /*
709 * At the beginning of a `de' macro, clear the existing string
710 * with the same name, if there is one. New content will be
711 * added from roff_block_text() in multiline mode.
712 */
713
714 if (ROFF_de == tok)
715 roff_setstr(r, name, "", 0);
716
717 if ('\0' == (*bufp)[pos])
718 return(ROFF_IGN);
719
720 /* If present, process the custom end-of-line marker. */
721
722 sv = pos;
723 while ((*bufp)[pos] &&
724 ' ' != (*bufp)[pos] &&
725 '\t' != (*bufp)[pos])
726 pos++;
727
728 /*
729 * Note: groff does NOT like escape characters in the input.
730 * Instead of detecting this, we're just going to let it fly and
731 * to hell with it.
732 */
733
734 assert(pos > sv);
735 sz = (size_t)(pos - sv);
736
737 if (1 == sz && '.' == (*bufp)[sv])
738 return(ROFF_IGN);
739
740 r->last->end = mandoc_malloc(sz + 1);
741
742 memcpy(r->last->end, *bufp + sv, sz);
743 r->last->end[(int)sz] = '\0';
744
745 if ((*bufp)[pos])
746 (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL);
747
748 return(ROFF_IGN);
749 }
750
751
752 /* ARGSUSED */
753 static enum rofferr
754 roff_block_sub(ROFF_ARGS)
755 {
756 enum rofft t;
757 int i, j;
758
759 /*
760 * First check whether a custom macro exists at this level. If
761 * it does, then check against it. This is some of groff's
762 * stranger behaviours. If we encountered a custom end-scope
763 * tag and that tag also happens to be a "real" macro, then we
764 * need to try interpreting it again as a real macro. If it's
765 * not, then return ignore. Else continue.
766 */
767
768 if (r->last->end) {
769 i = pos + 1;
770 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
771 i++;
772
773 for (j = 0; r->last->end[j]; j++, i++)
774 if ((*bufp)[i] != r->last->end[j])
775 break;
776
777 if ('\0' == r->last->end[j] &&
778 ('\0' == (*bufp)[i] ||
779 ' ' == (*bufp)[i] ||
780 '\t' == (*bufp)[i])) {
781 roffnode_pop(r);
782 roffnode_cleanscope(r);
783
784 if (ROFF_MAX != roff_parse(r, *bufp, &pos))
785 return(ROFF_RERUN);
786 return(ROFF_IGN);
787 }
788 }
789
790 /*
791 * If we have no custom end-query or lookup failed, then try
792 * pulling it out of the hashtable.
793 */
794
795 ppos = pos;
796 t = roff_parse(r, *bufp, &pos);
797
798 /*
799 * Macros other than block-end are only significant
800 * in `de' blocks; elsewhere, simply throw them away.
801 */
802 if (ROFF_cblock != t) {
803 if (ROFF_de == tok)
804 roff_setstr(r, r->last->name, *bufp + ppos, 1);
805 return(ROFF_IGN);
806 }
807
808 assert(roffs[t].proc);
809 return((*roffs[t].proc)(r, t, bufp, szp,
810 ln, ppos, pos, offs));
811 }
812
813
814 /* ARGSUSED */
815 static enum rofferr
816 roff_block_text(ROFF_ARGS)
817 {
818
819 if (ROFF_de == tok)
820 roff_setstr(r, r->last->name, *bufp + pos, 1);
821
822 return(ROFF_IGN);
823 }
824
825
826 /* ARGSUSED */
827 static enum rofferr
828 roff_cond_sub(ROFF_ARGS)
829 {
830 enum rofft t;
831 enum roffrule rr;
832
833 ppos = pos;
834 rr = r->last->rule;
835
836 /*
837 * Clean out scope. If we've closed ourselves, then don't
838 * continue.
839 */
840
841 roffnode_cleanscope(r);
842
843 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) {
844 if ('\\' == (*bufp)[pos] && '}' == (*bufp)[pos + 1])
845 return(roff_ccond
846 (r, ROFF_ccond, bufp, szp,
847 ln, pos, pos + 2, offs));
848 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
849 }
850
851 /*
852 * A denied conditional must evaluate its children if and only
853 * if they're either structurally required (such as loops and
854 * conditionals) or a closing macro.
855 */
856 if (ROFFRULE_DENY == rr)
857 if ( ! (ROFFMAC_STRUCT & roffs[t].flags))
858 if (ROFF_ccond != t)
859 return(ROFF_IGN);
860
861 assert(roffs[t].proc);
862 return((*roffs[t].proc)(r, t, bufp, szp,
863 ln, ppos, pos, offs));
864 }
865
866
867 /* ARGSUSED */
868 static enum rofferr
869 roff_cond_text(ROFF_ARGS)
870 {
871 char *ep, *st;
872 enum roffrule rr;
873
874 rr = r->last->rule;
875
876 /*
877 * We display the value of the text if out current evaluation
878 * scope permits us to do so.
879 */
880
881 /* FIXME: use roff_ccond? */
882
883 st = &(*bufp)[pos];
884 if (NULL == (ep = strstr(st, "\\}"))) {
885 roffnode_cleanscope(r);
886 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
887 }
888
889 if (ep == st || (ep > st && '\\' != *(ep - 1)))
890 roffnode_pop(r);
891
892 roffnode_cleanscope(r);
893 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
894 }
895
896
897 static enum roffrule
898 roff_evalcond(const char *v, int *pos)
899 {
900
901 switch (v[*pos]) {
902 case ('n'):
903 (*pos)++;
904 return(ROFFRULE_ALLOW);
905 case ('e'):
906 /* FALLTHROUGH */
907 case ('o'):
908 /* FALLTHROUGH */
909 case ('t'):
910 (*pos)++;
911 return(ROFFRULE_DENY);
912 default:
913 break;
914 }
915
916 while (v[*pos] && ' ' != v[*pos])
917 (*pos)++;
918 return(ROFFRULE_DENY);
919 }
920
921 /* ARGSUSED */
922 static enum rofferr
923 roff_line_ignore(ROFF_ARGS)
924 {
925
926 return(ROFF_IGN);
927 }
928
929 /* ARGSUSED */
930 static enum rofferr
931 roff_line_error(ROFF_ARGS)
932 {
933
934 (*r->msg)(MANDOCERR_REQUEST, r->data, ln, ppos, roffs[tok].name);
935 return(ROFF_IGN);
936 }
937
938 /* ARGSUSED */
939 static enum rofferr
940 roff_cond(ROFF_ARGS)
941 {
942 int sv;
943 enum roffrule rule;
944
945 /* Stack overflow! */
946
947 if (ROFF_ie == tok && r->rstackpos == RSTACK_MAX - 1) {
948 (*r->msg)(MANDOCERR_MEM, r->data, ln, ppos, NULL);
949 return(ROFF_ERR);
950 }
951
952 /* First, evaluate the conditional. */
953
954 if (ROFF_el == tok) {
955 /*
956 * An `.el' will get the value of the current rstack
957 * entry set in prior `ie' calls or defaults to DENY.
958 */
959 if (r->rstackpos < 0)
960 rule = ROFFRULE_DENY;
961 else
962 rule = r->rstack[r->rstackpos];
963 } else
964 rule = roff_evalcond(*bufp, &pos);
965
966 sv = pos;
967
968 while (' ' == (*bufp)[pos])
969 pos++;
970
971 /*
972 * Roff is weird. If we have just white-space after the
973 * conditional, it's considered the BODY and we exit without
974 * really doing anything. Warn about this. It's probably
975 * wrong.
976 */
977
978 if ('\0' == (*bufp)[pos] && sv != pos) {
979 (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL);
980 return(ROFF_IGN);
981 }
982
983 roffnode_push(r, tok, NULL, ln, ppos);
984
985 r->last->rule = rule;
986
987 if (ROFF_ie == tok) {
988 /*
989 * An if-else will put the NEGATION of the current
990 * evaluated conditional into the stack.
991 */
992 r->rstackpos++;
993 if (ROFFRULE_DENY == r->last->rule)
994 r->rstack[r->rstackpos] = ROFFRULE_ALLOW;
995 else
996 r->rstack[r->rstackpos] = ROFFRULE_DENY;
997 }
998
999 /* If the parent has false as its rule, then so do we. */
1000
1001 if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule)
1002 r->last->rule = ROFFRULE_DENY;
1003
1004 /*
1005 * Determine scope. If we're invoked with "\{" trailing the
1006 * conditional, then we're in a multiline scope. Else our scope
1007 * expires on the next line.
1008 */
1009
1010 r->last->endspan = 1;
1011
1012 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1013 r->last->endspan = -1;
1014 pos += 2;
1015 }
1016
1017 /*
1018 * If there are no arguments on the line, the next-line scope is
1019 * assumed.
1020 */
1021
1022 if ('\0' == (*bufp)[pos])
1023 return(ROFF_IGN);
1024
1025 /* Otherwise re-run the roff parser after recalculating. */
1026
1027 *offs = pos;
1028 return(ROFF_RERUN);
1029 }
1030
1031
1032 /* ARGSUSED */
1033 static enum rofferr
1034 roff_ds(ROFF_ARGS)
1035 {
1036 char *name, *string;
1037
1038 /*
1039 * A symbol is named by the first word following the macro
1040 * invocation up to a space. Its value is anything after the
1041 * name's trailing whitespace and optional double-quote. Thus,
1042 *
1043 * [.ds foo "bar " ]
1044 *
1045 * will have `bar " ' as its value.
1046 */
1047
1048 name = *bufp + pos;
1049 if ('\0' == *name)
1050 return(ROFF_IGN);
1051
1052 string = name;
1053 /* Read until end of name. */
1054 while (*string && ' ' != *string)
1055 string++;
1056
1057 /* Nil-terminate name. */
1058 if (*string)
1059 *(string++) = '\0';
1060
1061 /* Read past spaces. */
1062 while (*string && ' ' == *string)
1063 string++;
1064
1065 /* Read passed initial double-quote. */
1066 if (*string && '"' == *string)
1067 string++;
1068
1069 /* The rest is the value. */
1070 roff_setstr(r, name, string, 0);
1071 return(ROFF_IGN);
1072 }
1073
1074
1075 /* ARGSUSED */
1076 static enum rofferr
1077 roff_nr(ROFF_ARGS)
1078 {
1079 const char *key, *val;
1080 struct reg *rg;
1081
1082 key = &(*bufp)[pos];
1083 rg = r->regs->regs;
1084
1085 /* Parse register request. */
1086 while ((*bufp)[pos] && ' ' != (*bufp)[pos])
1087 pos++;
1088
1089 /*
1090 * Set our nil terminator. Because this line is going to be
1091 * ignored anyway, we can munge it as we please.
1092 */
1093 if ((*bufp)[pos])
1094 (*bufp)[pos++] = '\0';
1095
1096 /* Skip whitespace to register token. */
1097 while ((*bufp)[pos] && ' ' == (*bufp)[pos])
1098 pos++;
1099
1100 val = &(*bufp)[pos];
1101
1102 /* Process register token. */
1103
1104 if (0 == strcmp(key, "nS")) {
1105 rg[(int)REG_nS].set = 1;
1106 if ( ! roff_parse_nat(val, &rg[(int)REG_nS].v.u))
1107 rg[(int)REG_nS].v.u = 0;
1108 }
1109
1110 return(ROFF_IGN);
1111 }
1112
1113 /* ARGSUSED */
1114 static enum rofferr
1115 roff_TE(ROFF_ARGS)
1116 {
1117
1118 if (NULL == r->tbl)
1119 (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL);
1120 else
1121 tbl_free(r->tbl);
1122
1123 r->tbl = NULL;
1124 return(ROFF_IGN);
1125 }
1126
1127 /* ARGSUSED */
1128 static enum rofferr
1129 roff_T_(ROFF_ARGS)
1130 {
1131
1132 if (NULL == r->tbl)
1133 (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL);
1134 else
1135 tbl_restart(r->tbl);
1136
1137 return(ROFF_IGN);
1138 }
1139
1140 /* ARGSUSED */
1141 static enum rofferr
1142 roff_TS(ROFF_ARGS)
1143 {
1144
1145 if (r->tbl) {
1146 (*r->msg)(MANDOCERR_SCOPEBROKEN, r->data, ln, ppos, NULL);
1147 tbl_reset(r->tbl);
1148 } else
1149 r->tbl = tbl_alloc(r->data, r->msg);
1150
1151 return(ROFF_IGN);
1152 }
1153
1154 /* ARGSUSED */
1155 static enum rofferr
1156 roff_so(ROFF_ARGS)
1157 {
1158 char *name;
1159
1160 (*r->msg)(MANDOCERR_SO, r->data, ln, ppos, NULL);
1161
1162 /*
1163 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1164 * opening anything that's not in our cwd or anything beneath
1165 * it. Thus, explicitly disallow traversing up the file-system
1166 * or using absolute paths.
1167 */
1168
1169 name = *bufp + pos;
1170 if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1171 (*r->msg)(MANDOCERR_SOPATH, r->data, ln, pos, NULL);
1172 return(ROFF_ERR);
1173 }
1174
1175 *offs = pos;
1176 return(ROFF_SO);
1177 }
1178
1179 /* ARGSUSED */
1180 static enum rofferr
1181 roff_userdef(ROFF_ARGS)
1182 {
1183 const char *arg[9];
1184 char *cp, *n1, *n2;
1185 int i, quoted, pairs;
1186
1187 /*
1188 * Collect pointers to macro argument strings
1189 * and null-terminate them.
1190 */
1191 cp = *bufp + pos;
1192 for (i = 0; i < 9; i++) {
1193 /* Quoting can only start with a new word. */
1194 if ('"' == *cp) {
1195 quoted = 1;
1196 cp++;
1197 } else
1198 quoted = 0;
1199 arg[i] = cp;
1200 for (pairs = 0; '\0' != *cp; cp++) {
1201 /* Unquoted arguments end at blanks. */
1202 if (0 == quoted) {
1203 if (' ' == *cp)
1204 break;
1205 continue;
1206 }
1207 /* After pairs of quotes, move left. */
1208 if (pairs)
1209 cp[-pairs] = cp[0];
1210 /* Pairs of quotes do not end words, ... */
1211 if ('"' == cp[0] && '"' == cp[1]) {
1212 pairs++;
1213 cp++;
1214 continue;
1215 }
1216 /* ... but solitary quotes do. */
1217 if ('"' != *cp)
1218 continue;
1219 if (pairs)
1220 cp[-pairs] = '\0';
1221 *cp = ' ';
1222 break;
1223 }
1224 /* Last argument; the remaining ones are empty strings. */
1225 if ('\0' == *cp)
1226 continue;
1227 /* Null-terminate argument and move to the next one. */
1228 *cp++ = '\0';
1229 while (' ' == *cp)
1230 cp++;
1231 }
1232
1233 /*
1234 * Expand macro arguments.
1235 */
1236 *szp = 0;
1237 n1 = cp = mandoc_strdup(r->current_string);
1238 while (NULL != (cp = strstr(cp, "\\$"))) {
1239 i = cp[2] - '1';
1240 if (0 > i || 8 < i) {
1241 /* Not an argument invocation. */
1242 cp += 2;
1243 continue;
1244 }
1245
1246 *szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1247 n2 = mandoc_malloc(*szp);
1248
1249 strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1250 strlcat(n2, arg[i], *szp);
1251 strlcat(n2, cp + 3, *szp);
1252
1253 cp = n2 + (cp - n1);
1254 free(n1);
1255 n1 = n2;
1256 }
1257
1258 /*
1259 * Replace the macro invocation
1260 * by the expanded macro.
1261 */
1262 free(*bufp);
1263 *bufp = n1;
1264 if (0 == *szp)
1265 *szp = strlen(*bufp) + 1;
1266
1267 return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
1268 ROFF_REPARSE : ROFF_APPEND);
1269 }
1270
1271 /*
1272 * Store *string into the user-defined string called *name.
1273 * In multiline mode, append to an existing entry and append '\n';
1274 * else replace the existing entry, if there is one.
1275 * To clear an existing entry, call with (*r, *name, NULL, 0).
1276 */
1277 static void
1278 roff_setstr(struct roff *r, const char *name, const char *string,
1279 int multiline)
1280 {
1281 struct roffstr *n;
1282 char *c;
1283 size_t oldch, newch;
1284
1285 /* Search for an existing string with the same name. */
1286 n = r->first_string;
1287 while (n && strcmp(name, n->name))
1288 n = n->next;
1289
1290 if (NULL == n) {
1291 /* Create a new string table entry. */
1292 n = mandoc_malloc(sizeof(struct roffstr));
1293 n->name = mandoc_strdup(name);
1294 n->string = NULL;
1295 n->next = r->first_string;
1296 r->first_string = n;
1297 } else if (0 == multiline) {
1298 /* In multiline mode, append; else replace. */
1299 free(n->string);
1300 n->string = NULL;
1301 }
1302
1303 if (NULL == string)
1304 return;
1305
1306 /*
1307 * One additional byte for the '\n' in multiline mode,
1308 * and one for the terminating '\0'.
1309 */
1310 newch = strlen(string) + (multiline ? 2 : 1);
1311 if (NULL == n->string) {
1312 n->string = mandoc_malloc(newch);
1313 *n->string = '\0';
1314 oldch = 0;
1315 } else {
1316 oldch = strlen(n->string);
1317 n->string = mandoc_realloc(n->string, oldch + newch);
1318 }
1319
1320 /* Skip existing content in the destination buffer. */
1321 c = n->string + oldch;
1322
1323 /* Append new content to the destination buffer. */
1324 while (*string) {
1325 /*
1326 * Rudimentary roff copy mode:
1327 * Handle escaped backslashes.
1328 */
1329 if ('\\' == *string && '\\' == *(string + 1))
1330 string++;
1331 *c++ = *string++;
1332 }
1333
1334 /* Append terminating bytes. */
1335 if (multiline)
1336 *c++ = '\n';
1337 *c = '\0';
1338 }
1339
1340
1341 static const char *
1342 roff_getstrn(const struct roff *r, const char *name, size_t len)
1343 {
1344 const struct roffstr *n;
1345
1346 n = r->first_string;
1347 while (n && (strncmp(name, n->name, len) || '\0' != n->name[(int)len]))
1348 n = n->next;
1349
1350 return(n ? n->string : NULL);
1351 }
1352
1353
1354 static void
1355 roff_freestr(struct roff *r)
1356 {
1357 struct roffstr *n, *nn;
1358
1359 for (n = r->first_string; n; n = nn) {
1360 free(n->name);
1361 free(n->string);
1362 nn = n->next;
1363 free(n);
1364 }
1365
1366 r->first_string = NULL;
1367 }