]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
Revert IGNPAR to a warning after clue-stick applied by schwarze@:
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.108 2010/12/21 01:46:44 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <assert.h>
23 #include <errno.h>
24 #include <ctype.h>
25 #include <limits.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <stdio.h>
29
30 #include "mandoc.h"
31 #include "roff.h"
32 #include "libmandoc.h"
33
34 #define RSTACK_MAX 128
35
36 #define ROFF_CTL(c) \
37 ('.' == (c) || '\'' == (c))
38
39 #if 1
40 #define ROFF_DEBUG(fmt, args...) \
41 do { /* Nothing. */ } while (/*CONSTCOND*/ 0)
42 #else
43 #define ROFF_DEBUG(fmt, args...) \
44 do { fprintf(stderr, fmt , ##args); } while (/*CONSTCOND*/ 0)
45 #endif
46
47 enum rofft {
48 ROFF_ad,
49 ROFF_am,
50 ROFF_ami,
51 ROFF_am1,
52 ROFF_de,
53 ROFF_dei,
54 ROFF_de1,
55 ROFF_ds,
56 ROFF_el,
57 ROFF_hy,
58 ROFF_ie,
59 ROFF_if,
60 ROFF_ig,
61 ROFF_ne,
62 ROFF_nh,
63 ROFF_nr,
64 ROFF_rm,
65 ROFF_so,
66 ROFF_tr,
67 ROFF_cblock,
68 ROFF_ccond, /* FIXME: remove this. */
69 ROFF_USERDEF,
70 ROFF_MAX
71 };
72
73 enum roffrule {
74 ROFFRULE_ALLOW,
75 ROFFRULE_DENY
76 };
77
78
79 struct roffstr {
80 char *name; /* key of symbol */
81 char *string; /* current value */
82 struct roffstr *next; /* next in list */
83 };
84
85 struct roff {
86 struct roffnode *last; /* leaf of stack */
87 mandocmsg msg; /* err/warn/fatal messages */
88 void *data; /* privdata for messages */
89 enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */
90 int rstackpos; /* position in rstack */
91 struct regset *regs; /* read/writable registers */
92 struct roffstr *first_string; /* user-defined strings & macros */
93 const char *current_string; /* value of last called user macro */
94 };
95
96 struct roffnode {
97 enum rofft tok; /* type of node */
98 struct roffnode *parent; /* up one in stack */
99 int line; /* parse line */
100 int col; /* parse col */
101 char *name; /* node name, e.g. macro name */
102 char *end; /* end-rules: custom token */
103 int endspan; /* end-rules: next-line or infty */
104 enum roffrule rule; /* current evaluation rule */
105 };
106
107 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
108 enum rofft tok, /* tok of macro */ \
109 char **bufp, /* input buffer */ \
110 size_t *szp, /* size of input buffer */ \
111 int ln, /* parse line */ \
112 int ppos, /* original pos in buffer */ \
113 int pos, /* current pos in buffer */ \
114 int *offs /* reset offset of buffer data */
115
116 typedef enum rofferr (*roffproc)(ROFF_ARGS);
117
118 struct roffmac {
119 const char *name; /* macro name */
120 roffproc proc; /* process new macro */
121 roffproc text; /* process as child text of macro */
122 roffproc sub; /* process as child of macro */
123 int flags;
124 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
125 struct roffmac *next;
126 };
127
128 static enum rofferr roff_block(ROFF_ARGS);
129 static enum rofferr roff_block_text(ROFF_ARGS);
130 static enum rofferr roff_block_sub(ROFF_ARGS);
131 static enum rofferr roff_cblock(ROFF_ARGS);
132 static enum rofferr roff_ccond(ROFF_ARGS);
133 static enum rofferr roff_cond(ROFF_ARGS);
134 static enum rofferr roff_cond_text(ROFF_ARGS);
135 static enum rofferr roff_cond_sub(ROFF_ARGS);
136 static enum rofferr roff_ds(ROFF_ARGS);
137 static enum roffrule roff_evalcond(const char *, int *);
138 static void roff_freestr(struct roff *);
139 static const char *roff_getstrn(const struct roff *,
140 const char *, size_t);
141 static enum rofferr roff_line_ignore(ROFF_ARGS);
142 static enum rofferr roff_line_error(ROFF_ARGS);
143 static enum rofferr roff_nr(ROFF_ARGS);
144 static int roff_res(struct roff *,
145 char **, size_t *, int);
146 static void roff_setstr(struct roff *,
147 const char *, const char *, int);
148 static enum rofferr roff_so(ROFF_ARGS);
149 static enum rofferr roff_userdef(ROFF_ARGS);
150
151 /* See roff_hash_find() */
152
153 #define ASCII_HI 126
154 #define ASCII_LO 33
155 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
156
157 static struct roffmac *hash[HASHWIDTH];
158
159 static struct roffmac roffs[ROFF_MAX] = {
160 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
161 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
162 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
163 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
164 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
165 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
166 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
167 { "ds", roff_ds, NULL, NULL, 0, NULL },
168 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
169 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
170 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
171 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
172 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
173 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
174 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
175 { "nr", roff_nr, NULL, NULL, 0, NULL },
176 { "rm", roff_line_error, NULL, NULL, 0, NULL },
177 { "so", roff_so, NULL, NULL, 0, NULL },
178 { "tr", roff_line_ignore, NULL, NULL, 0, NULL },
179 { ".", roff_cblock, NULL, NULL, 0, NULL },
180 { "\\}", roff_ccond, NULL, NULL, 0, NULL },
181 { NULL, roff_userdef, NULL, NULL, 0, NULL },
182 };
183
184 static void roff_free1(struct roff *);
185 static enum rofft roff_hash_find(const char *, size_t);
186 static void roff_hash_init(void);
187 static void roffnode_cleanscope(struct roff *);
188 static void roffnode_push(struct roff *, enum rofft,
189 const char *, int, int);
190 static void roffnode_pop(struct roff *);
191 static enum rofft roff_parse(struct roff *, const char *, int *);
192 static int roff_parse_nat(const char *, unsigned int *);
193
194 /* See roff_hash_find() */
195 #define ROFF_HASH(p) (p[0] - ASCII_LO)
196
197 static void
198 roff_hash_init(void)
199 {
200 struct roffmac *n;
201 int buc, i;
202
203 for (i = 0; i < (int)ROFF_USERDEF; i++) {
204 assert(roffs[i].name[0] >= ASCII_LO);
205 assert(roffs[i].name[0] <= ASCII_HI);
206
207 buc = ROFF_HASH(roffs[i].name);
208
209 if (NULL != (n = hash[buc])) {
210 for ( ; n->next; n = n->next)
211 /* Do nothing. */ ;
212 n->next = &roffs[i];
213 } else
214 hash[buc] = &roffs[i];
215 }
216 }
217
218
219 /*
220 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
221 * the nil-terminated string name could be found.
222 */
223 static enum rofft
224 roff_hash_find(const char *p, size_t s)
225 {
226 int buc;
227 struct roffmac *n;
228
229 /*
230 * libroff has an extremely simple hashtable, for the time
231 * being, which simply keys on the first character, which must
232 * be printable, then walks a chain. It works well enough until
233 * optimised.
234 */
235
236 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
237 return(ROFF_MAX);
238
239 buc = ROFF_HASH(p);
240
241 if (NULL == (n = hash[buc]))
242 return(ROFF_MAX);
243 for ( ; n; n = n->next)
244 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
245 return((enum rofft)(n - roffs));
246
247 return(ROFF_MAX);
248 }
249
250
251 /*
252 * Pop the current node off of the stack of roff instructions currently
253 * pending.
254 */
255 static void
256 roffnode_pop(struct roff *r)
257 {
258 struct roffnode *p;
259
260 assert(r->last);
261 p = r->last;
262
263 if (ROFF_el == p->tok)
264 if (r->rstackpos > -1)
265 r->rstackpos--;
266
267 ROFF_DEBUG("roff: popping scope\n");
268 r->last = r->last->parent;
269 free(p->name);
270 free(p->end);
271 free(p);
272 }
273
274
275 /*
276 * Push a roff node onto the instruction stack. This must later be
277 * removed with roffnode_pop().
278 */
279 static void
280 roffnode_push(struct roff *r, enum rofft tok, const char *name,
281 int line, int col)
282 {
283 struct roffnode *p;
284
285 p = mandoc_calloc(1, sizeof(struct roffnode));
286 p->tok = tok;
287 if (name)
288 p->name = mandoc_strdup(name);
289 p->parent = r->last;
290 p->line = line;
291 p->col = col;
292 p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
293
294 r->last = p;
295 }
296
297
298 static void
299 roff_free1(struct roff *r)
300 {
301
302 while (r->last)
303 roffnode_pop(r);
304 roff_freestr(r);
305 }
306
307
308 void
309 roff_reset(struct roff *r)
310 {
311
312 roff_free1(r);
313 }
314
315
316 void
317 roff_free(struct roff *r)
318 {
319
320 roff_free1(r);
321 free(r);
322 }
323
324
325 struct roff *
326 roff_alloc(struct regset *regs, void *data, const mandocmsg msg)
327 {
328 struct roff *r;
329
330 r = mandoc_calloc(1, sizeof(struct roff));
331 r->regs = regs;
332 r->msg = msg;
333 r->data = data;
334 r->rstackpos = -1;
335
336 roff_hash_init();
337 return(r);
338 }
339
340
341 /*
342 * Pre-filter each and every line for reserved words (one beginning with
343 * `\*', e.g., `\*(ab'). These must be handled before the actual line
344 * is processed.
345 */
346 static int
347 roff_res(struct roff *r, char **bufp, size_t *szp, int pos)
348 {
349 const char *stesc; /* start of an escape sequence ('\\') */
350 const char *stnam; /* start of the name, after "[(*" */
351 const char *cp; /* end of the name, e.g. before ']' */
352 const char *res; /* the string to be substituted */
353 int i, maxl;
354 size_t nsz;
355 char *n;
356
357 /* Search for a leading backslash and save a pointer to it. */
358
359 cp = *bufp + pos;
360 while (NULL != (cp = strchr(cp, '\\'))) {
361 stesc = cp++;
362
363 /*
364 * The second character must be an asterisk.
365 * If it isn't, skip it anyway: It is escaped,
366 * so it can't start another escape sequence.
367 */
368
369 if ('\0' == *cp)
370 return(1);
371 if ('*' != *cp++)
372 continue;
373
374 /*
375 * The third character decides the length
376 * of the name of the string.
377 * Save a pointer to the name.
378 */
379
380 switch (*cp) {
381 case ('\0'):
382 return(1);
383 case ('('):
384 cp++;
385 maxl = 2;
386 break;
387 case ('['):
388 cp++;
389 maxl = 0;
390 break;
391 default:
392 maxl = 1;
393 break;
394 }
395 stnam = cp;
396
397 /* Advance to the end of the name. */
398
399 for (i = 0; 0 == maxl || i < maxl; i++, cp++) {
400 if ('\0' == *cp)
401 return(1); /* Error. */
402 if (0 == maxl && ']' == *cp)
403 break;
404 }
405
406 /*
407 * Retrieve the replacement string; if it is
408 * undefined, resume searching for escapes.
409 */
410
411 res = roff_getstrn(r, stnam, (size_t)i);
412
413 if (NULL == res) {
414 cp -= maxl ? 1 : 0;
415 continue;
416 }
417
418 /* Replace the escape sequence by the string. */
419
420 ROFF_DEBUG("roff: splicing reserved: [%.*s]\n", i, st);
421
422 nsz = *szp + strlen(res) + 1;
423 n = mandoc_malloc(nsz);
424
425 strlcpy(n, *bufp, (size_t)(stesc - *bufp + 1));
426 strlcat(n, res, nsz);
427 strlcat(n, cp + (maxl ? 0 : 1), nsz);
428
429 free(*bufp);
430
431 *bufp = n;
432 *szp = nsz;
433 return(0);
434 }
435
436 return(1);
437 }
438
439
440 enum rofferr
441 roff_parseln(struct roff *r, int ln, char **bufp,
442 size_t *szp, int pos, int *offs)
443 {
444 enum rofft t;
445 int ppos;
446
447 /*
448 * Run the reserved-word filter only if we have some reserved
449 * words to fill in.
450 */
451
452 if (r->first_string && ! roff_res(r, bufp, szp, pos))
453 return(ROFF_REPARSE);
454
455 /*
456 * First, if a scope is open and we're not a macro, pass the
457 * text through the macro's filter. If a scope isn't open and
458 * we're not a macro, just let it through.
459 */
460
461 if (r->last && ! ROFF_CTL((*bufp)[pos])) {
462 t = r->last->tok;
463 assert(roffs[t].text);
464 ROFF_DEBUG("roff: intercept scoped text: %s, [%s]\n",
465 roffs[t].name, &(*bufp)[pos]);
466 return((*roffs[t].text)
467 (r, t, bufp, szp,
468 ln, pos, pos, offs));
469 } else if ( ! ROFF_CTL((*bufp)[pos]))
470 return(ROFF_CONT);
471
472 /*
473 * If a scope is open, go to the child handler for that macro,
474 * as it may want to preprocess before doing anything with it.
475 */
476
477 if (r->last) {
478 t = r->last->tok;
479 assert(roffs[t].sub);
480 ROFF_DEBUG("roff: intercept scoped context: %s, [%s]\n",
481 roffs[t].name, &(*bufp)[pos]);
482 return((*roffs[t].sub)
483 (r, t, bufp, szp,
484 ln, pos, pos, offs));
485 }
486
487 /*
488 * Lastly, as we've no scope open, try to look up and execute
489 * the new macro. If no macro is found, simply return and let
490 * the compilers handle it.
491 */
492
493 ppos = pos;
494 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
495 return(ROFF_CONT);
496
497 ROFF_DEBUG("roff: intercept new-scope: [%s], [%s]\n",
498 ROFF_USERDEF == t ? r->current_string : roffs[t].name,
499 &(*bufp)[pos]);
500
501 assert(roffs[t].proc);
502 return((*roffs[t].proc)
503 (r, t, bufp, szp,
504 ln, ppos, pos, offs));
505 }
506
507
508 int
509 roff_endparse(struct roff *r)
510 {
511
512 if (NULL == r->last)
513 return(1);
514 return((*r->msg)(MANDOCERR_SCOPEEXIT, r->data, r->last->line,
515 r->last->col, NULL));
516 }
517
518
519 /*
520 * Parse a roff node's type from the input buffer. This must be in the
521 * form of ".foo xxx" in the usual way.
522 */
523 static enum rofft
524 roff_parse(struct roff *r, const char *buf, int *pos)
525 {
526 const char *mac;
527 size_t maclen;
528 enum rofft t;
529
530 assert(ROFF_CTL(buf[*pos]));
531 (*pos)++;
532
533 while (' ' == buf[*pos] || '\t' == buf[*pos])
534 (*pos)++;
535
536 if ('\0' == buf[*pos])
537 return(ROFF_MAX);
538
539 mac = buf + *pos;
540 maclen = strcspn(mac, " \\\t\0");
541
542 t = (r->current_string = roff_getstrn(r, mac, maclen))
543 ? ROFF_USERDEF : roff_hash_find(mac, maclen);
544
545 *pos += maclen;
546 while (buf[*pos] && ' ' == buf[*pos])
547 (*pos)++;
548
549 return(t);
550 }
551
552
553 static int
554 roff_parse_nat(const char *buf, unsigned int *res)
555 {
556 char *ep;
557 long lval;
558
559 errno = 0;
560 lval = strtol(buf, &ep, 10);
561 if (buf[0] == '\0' || *ep != '\0')
562 return(0);
563 if ((errno == ERANGE &&
564 (lval == LONG_MAX || lval == LONG_MIN)) ||
565 (lval > INT_MAX || lval < 0))
566 return(0);
567
568 *res = (unsigned int)lval;
569 return(1);
570 }
571
572
573 /* ARGSUSED */
574 static enum rofferr
575 roff_cblock(ROFF_ARGS)
576 {
577
578 /*
579 * A block-close `..' should only be invoked as a child of an
580 * ignore macro, otherwise raise a warning and just ignore it.
581 */
582
583 if (NULL == r->last) {
584 if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
585 return(ROFF_ERR);
586 return(ROFF_IGN);
587 }
588
589 switch (r->last->tok) {
590 case (ROFF_am):
591 /* FALLTHROUGH */
592 case (ROFF_ami):
593 /* FALLTHROUGH */
594 case (ROFF_am1):
595 /* FALLTHROUGH */
596 case (ROFF_de):
597 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
598 /* FALLTHROUGH */
599 case (ROFF_dei):
600 /* FALLTHROUGH */
601 case (ROFF_ig):
602 break;
603 default:
604 if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
605 return(ROFF_ERR);
606 return(ROFF_IGN);
607 }
608
609 if ((*bufp)[pos])
610 if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
611 return(ROFF_ERR);
612
613 roffnode_pop(r);
614 roffnode_cleanscope(r);
615 return(ROFF_IGN);
616
617 }
618
619
620 static void
621 roffnode_cleanscope(struct roff *r)
622 {
623
624 while (r->last) {
625 if (--r->last->endspan < 0)
626 break;
627 roffnode_pop(r);
628 }
629 }
630
631
632 /* ARGSUSED */
633 static enum rofferr
634 roff_ccond(ROFF_ARGS)
635 {
636
637 if (NULL == r->last) {
638 if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
639 return(ROFF_ERR);
640 return(ROFF_IGN);
641 }
642
643 switch (r->last->tok) {
644 case (ROFF_el):
645 /* FALLTHROUGH */
646 case (ROFF_ie):
647 /* FALLTHROUGH */
648 case (ROFF_if):
649 break;
650 default:
651 if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
652 return(ROFF_ERR);
653 return(ROFF_IGN);
654 }
655
656 if (r->last->endspan > -1) {
657 if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
658 return(ROFF_ERR);
659 return(ROFF_IGN);
660 }
661
662 if ((*bufp)[pos])
663 if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
664 return(ROFF_ERR);
665
666 roffnode_pop(r);
667 roffnode_cleanscope(r);
668 return(ROFF_IGN);
669 }
670
671
672 /* ARGSUSED */
673 static enum rofferr
674 roff_block(ROFF_ARGS)
675 {
676 int sv;
677 size_t sz;
678 char *name;
679
680 name = NULL;
681
682 if (ROFF_ig != tok) {
683 if ('\0' == (*bufp)[pos]) {
684 (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL);
685 return(ROFF_IGN);
686 }
687
688 /*
689 * Re-write `de1', since we don't really care about
690 * groff's strange compatibility mode, into `de'.
691 */
692
693 if (ROFF_de1 == tok)
694 tok = ROFF_de;
695 if (ROFF_de == tok)
696 name = *bufp + pos;
697 else
698 (*r->msg)(MANDOCERR_REQUEST, r->data, ln, ppos,
699 roffs[tok].name);
700
701 while ((*bufp)[pos] && ' ' != (*bufp)[pos])
702 pos++;
703
704 while (' ' == (*bufp)[pos])
705 (*bufp)[pos++] = '\0';
706 }
707
708 roffnode_push(r, tok, name, ln, ppos);
709
710 /*
711 * At the beginning of a `de' macro, clear the existing string
712 * with the same name, if there is one. New content will be
713 * added from roff_block_text() in multiline mode.
714 */
715
716 if (ROFF_de == tok)
717 roff_setstr(r, name, "", 0);
718
719 if ('\0' == (*bufp)[pos])
720 return(ROFF_IGN);
721
722 /* If present, process the custom end-of-line marker. */
723
724 sv = pos;
725 while ((*bufp)[pos] &&
726 ' ' != (*bufp)[pos] &&
727 '\t' != (*bufp)[pos])
728 pos++;
729
730 /*
731 * Note: groff does NOT like escape characters in the input.
732 * Instead of detecting this, we're just going to let it fly and
733 * to hell with it.
734 */
735
736 assert(pos > sv);
737 sz = (size_t)(pos - sv);
738
739 if (1 == sz && '.' == (*bufp)[sv])
740 return(ROFF_IGN);
741
742 r->last->end = mandoc_malloc(sz + 1);
743
744 memcpy(r->last->end, *bufp + sv, sz);
745 r->last->end[(int)sz] = '\0';
746
747 if ((*bufp)[pos])
748 (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL);
749
750 return(ROFF_IGN);
751 }
752
753
754 /* ARGSUSED */
755 static enum rofferr
756 roff_block_sub(ROFF_ARGS)
757 {
758 enum rofft t;
759 int i, j;
760
761 /*
762 * First check whether a custom macro exists at this level. If
763 * it does, then check against it. This is some of groff's
764 * stranger behaviours. If we encountered a custom end-scope
765 * tag and that tag also happens to be a "real" macro, then we
766 * need to try interpreting it again as a real macro. If it's
767 * not, then return ignore. Else continue.
768 */
769
770 if (r->last->end) {
771 i = pos + 1;
772 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
773 i++;
774
775 for (j = 0; r->last->end[j]; j++, i++)
776 if ((*bufp)[i] != r->last->end[j])
777 break;
778
779 if ('\0' == r->last->end[j] &&
780 ('\0' == (*bufp)[i] ||
781 ' ' == (*bufp)[i] ||
782 '\t' == (*bufp)[i])) {
783 roffnode_pop(r);
784 roffnode_cleanscope(r);
785
786 if (ROFF_MAX != roff_parse(r, *bufp, &pos))
787 return(ROFF_RERUN);
788 return(ROFF_IGN);
789 }
790 }
791
792 /*
793 * If we have no custom end-query or lookup failed, then try
794 * pulling it out of the hashtable.
795 */
796
797 ppos = pos;
798 t = roff_parse(r, *bufp, &pos);
799
800 /*
801 * Macros other than block-end are only significant
802 * in `de' blocks; elsewhere, simply throw them away.
803 */
804 if (ROFF_cblock != t) {
805 if (ROFF_de == tok)
806 roff_setstr(r, r->last->name, *bufp + ppos, 1);
807 return(ROFF_IGN);
808 }
809
810 assert(roffs[t].proc);
811 return((*roffs[t].proc)(r, t, bufp, szp,
812 ln, ppos, pos, offs));
813 }
814
815
816 /* ARGSUSED */
817 static enum rofferr
818 roff_block_text(ROFF_ARGS)
819 {
820
821 if (ROFF_de == tok)
822 roff_setstr(r, r->last->name, *bufp + pos, 1);
823
824 return(ROFF_IGN);
825 }
826
827
828 /* ARGSUSED */
829 static enum rofferr
830 roff_cond_sub(ROFF_ARGS)
831 {
832 enum rofft t;
833 enum roffrule rr;
834
835 ppos = pos;
836 rr = r->last->rule;
837
838 /*
839 * Clean out scope. If we've closed ourselves, then don't
840 * continue.
841 */
842
843 roffnode_cleanscope(r);
844
845 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) {
846 if ('\\' == (*bufp)[pos] && '}' == (*bufp)[pos + 1])
847 return(roff_ccond
848 (r, ROFF_ccond, bufp, szp,
849 ln, pos, pos + 2, offs));
850 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
851 }
852
853 /*
854 * A denied conditional must evaluate its children if and only
855 * if they're either structurally required (such as loops and
856 * conditionals) or a closing macro.
857 */
858 if (ROFFRULE_DENY == rr)
859 if ( ! (ROFFMAC_STRUCT & roffs[t].flags))
860 if (ROFF_ccond != t)
861 return(ROFF_IGN);
862
863 assert(roffs[t].proc);
864 return((*roffs[t].proc)(r, t, bufp, szp,
865 ln, ppos, pos, offs));
866 }
867
868
869 /* ARGSUSED */
870 static enum rofferr
871 roff_cond_text(ROFF_ARGS)
872 {
873 char *ep, *st;
874 enum roffrule rr;
875
876 rr = r->last->rule;
877
878 /*
879 * We display the value of the text if out current evaluation
880 * scope permits us to do so.
881 */
882
883 /* FIXME: use roff_ccond? */
884
885 st = &(*bufp)[pos];
886 if (NULL == (ep = strstr(st, "\\}"))) {
887 roffnode_cleanscope(r);
888 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
889 }
890
891 if (ep == st || (ep > st && '\\' != *(ep - 1)))
892 roffnode_pop(r);
893
894 roffnode_cleanscope(r);
895 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
896 }
897
898
899 static enum roffrule
900 roff_evalcond(const char *v, int *pos)
901 {
902
903 switch (v[*pos]) {
904 case ('n'):
905 (*pos)++;
906 return(ROFFRULE_ALLOW);
907 case ('e'):
908 /* FALLTHROUGH */
909 case ('o'):
910 /* FALLTHROUGH */
911 case ('t'):
912 (*pos)++;
913 return(ROFFRULE_DENY);
914 default:
915 break;
916 }
917
918 while (v[*pos] && ' ' != v[*pos])
919 (*pos)++;
920 return(ROFFRULE_DENY);
921 }
922
923 /* ARGSUSED */
924 static enum rofferr
925 roff_line_ignore(ROFF_ARGS)
926 {
927
928 return(ROFF_IGN);
929 }
930
931 /* ARGSUSED */
932 static enum rofferr
933 roff_line_error(ROFF_ARGS)
934 {
935
936 (*r->msg)(MANDOCERR_REQUEST, r->data, ln, ppos, roffs[tok].name);
937 return(ROFF_IGN);
938 }
939
940 /* ARGSUSED */
941 static enum rofferr
942 roff_cond(ROFF_ARGS)
943 {
944 int sv;
945 enum roffrule rule;
946
947 /* Stack overflow! */
948
949 if (ROFF_ie == tok && r->rstackpos == RSTACK_MAX - 1) {
950 (*r->msg)(MANDOCERR_MEM, r->data, ln, ppos, NULL);
951 return(ROFF_ERR);
952 }
953
954 /* First, evaluate the conditional. */
955
956 if (ROFF_el == tok) {
957 /*
958 * An `.el' will get the value of the current rstack
959 * entry set in prior `ie' calls or defaults to DENY.
960 */
961 if (r->rstackpos < 0)
962 rule = ROFFRULE_DENY;
963 else
964 rule = r->rstack[r->rstackpos];
965 } else
966 rule = roff_evalcond(*bufp, &pos);
967
968 sv = pos;
969
970 while (' ' == (*bufp)[pos])
971 pos++;
972
973 /*
974 * Roff is weird. If we have just white-space after the
975 * conditional, it's considered the BODY and we exit without
976 * really doing anything. Warn about this. It's probably
977 * wrong.
978 */
979
980 if ('\0' == (*bufp)[pos] && sv != pos) {
981 (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL);
982 return(ROFF_IGN);
983 }
984
985 roffnode_push(r, tok, NULL, ln, ppos);
986
987 r->last->rule = rule;
988
989 ROFF_DEBUG("roff: cond: %s -> %s\n", roffs[tok].name,
990 ROFFRULE_ALLOW == rule ? "allow" : "deny");
991
992 if (ROFF_ie == tok) {
993 /*
994 * An if-else will put the NEGATION of the current
995 * evaluated conditional into the stack.
996 */
997 r->rstackpos++;
998 if (ROFFRULE_DENY == r->last->rule)
999 r->rstack[r->rstackpos] = ROFFRULE_ALLOW;
1000 else
1001 r->rstack[r->rstackpos] = ROFFRULE_DENY;
1002 }
1003
1004 /* If the parent has false as its rule, then so do we. */
1005
1006 if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule) {
1007 r->last->rule = ROFFRULE_DENY;
1008 ROFF_DEBUG("roff: cond override: %s -> deny\n",
1009 roffs[tok].name);
1010 }
1011
1012 /*
1013 * Determine scope. If we're invoked with "\{" trailing the
1014 * conditional, then we're in a multiline scope. Else our scope
1015 * expires on the next line.
1016 */
1017
1018 r->last->endspan = 1;
1019
1020 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1021 r->last->endspan = -1;
1022 pos += 2;
1023 ROFF_DEBUG("roff: cond-scope: %s, multi-line\n",
1024 roffs[tok].name);
1025 } else
1026 ROFF_DEBUG("roff: cond-scope: %s, one-line\n",
1027 roffs[tok].name);
1028
1029 /*
1030 * If there are no arguments on the line, the next-line scope is
1031 * assumed.
1032 */
1033
1034 if ('\0' == (*bufp)[pos])
1035 return(ROFF_IGN);
1036
1037 /* Otherwise re-run the roff parser after recalculating. */
1038
1039 *offs = pos;
1040 return(ROFF_RERUN);
1041 }
1042
1043
1044 /* ARGSUSED */
1045 static enum rofferr
1046 roff_ds(ROFF_ARGS)
1047 {
1048 char *name, *string;
1049
1050 /*
1051 * A symbol is named by the first word following the macro
1052 * invocation up to a space. Its value is anything after the
1053 * name's trailing whitespace and optional double-quote. Thus,
1054 *
1055 * [.ds foo "bar " ]
1056 *
1057 * will have `bar " ' as its value.
1058 */
1059
1060 name = *bufp + pos;
1061 if ('\0' == *name)
1062 return(ROFF_IGN);
1063
1064 string = name;
1065 /* Read until end of name. */
1066 while (*string && ' ' != *string)
1067 string++;
1068
1069 /* Nil-terminate name. */
1070 if (*string)
1071 *(string++) = '\0';
1072
1073 /* Read past spaces. */
1074 while (*string && ' ' == *string)
1075 string++;
1076
1077 /* Read passed initial double-quote. */
1078 if (*string && '"' == *string)
1079 string++;
1080
1081 /* The rest is the value. */
1082 roff_setstr(r, name, string, 0);
1083 return(ROFF_IGN);
1084 }
1085
1086
1087 /* ARGSUSED */
1088 static enum rofferr
1089 roff_nr(ROFF_ARGS)
1090 {
1091 const char *key, *val;
1092 struct reg *rg;
1093
1094 key = &(*bufp)[pos];
1095 rg = r->regs->regs;
1096
1097 /* Parse register request. */
1098 while ((*bufp)[pos] && ' ' != (*bufp)[pos])
1099 pos++;
1100
1101 /*
1102 * Set our nil terminator. Because this line is going to be
1103 * ignored anyway, we can munge it as we please.
1104 */
1105 if ((*bufp)[pos])
1106 (*bufp)[pos++] = '\0';
1107
1108 /* Skip whitespace to register token. */
1109 while ((*bufp)[pos] && ' ' == (*bufp)[pos])
1110 pos++;
1111
1112 val = &(*bufp)[pos];
1113
1114 /* Process register token. */
1115
1116 if (0 == strcmp(key, "nS")) {
1117 rg[(int)REG_nS].set = 1;
1118 if ( ! roff_parse_nat(val, &rg[(int)REG_nS].v.u))
1119 rg[(int)REG_nS].v.u = 0;
1120
1121 ROFF_DEBUG("roff: register nS: %u\n",
1122 rg[(int)REG_nS].v.u);
1123 } else
1124 ROFF_DEBUG("roff: ignoring register: %s\n", key);
1125
1126 return(ROFF_IGN);
1127 }
1128
1129 /* ARGSUSED */
1130 static enum rofferr
1131 roff_so(ROFF_ARGS)
1132 {
1133 char *name;
1134
1135 (*r->msg)(MANDOCERR_SO, r->data, ln, ppos, NULL);
1136
1137 /*
1138 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1139 * opening anything that's not in our cwd or anything beneath
1140 * it. Thus, explicitly disallow traversing up the file-system
1141 * or using absolute paths.
1142 */
1143
1144 name = *bufp + pos;
1145 if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1146 (*r->msg)(MANDOCERR_SOPATH, r->data, ln, pos, NULL);
1147 return(ROFF_ERR);
1148 }
1149
1150 *offs = pos;
1151 return(ROFF_SO);
1152 }
1153
1154 /* ARGSUSED */
1155 static enum rofferr
1156 roff_userdef(ROFF_ARGS)
1157 {
1158 const char *arg[9];
1159 char *cp, *n1, *n2;
1160 int i, quoted, pairs;
1161
1162 /*
1163 * Collect pointers to macro argument strings
1164 * and null-terminate them.
1165 */
1166 cp = *bufp + pos;
1167 for (i = 0; i < 9; i++) {
1168 /* Quoting can only start with a new word. */
1169 if ('"' == *cp) {
1170 quoted = 1;
1171 cp++;
1172 } else
1173 quoted = 0;
1174 arg[i] = cp;
1175 for (pairs = 0; '\0' != *cp; cp++) {
1176 /* Unquoted arguments end at blanks. */
1177 if (0 == quoted) {
1178 if (' ' == *cp)
1179 break;
1180 continue;
1181 }
1182 /* After pairs of quotes, move left. */
1183 if (pairs)
1184 cp[-pairs] = cp[0];
1185 /* Pairs of quotes do not end words, ... */
1186 if ('"' == cp[0] && '"' == cp[1]) {
1187 pairs++;
1188 cp++;
1189 continue;
1190 }
1191 /* ... but solitary quotes do. */
1192 if ('"' != *cp)
1193 continue;
1194 if (pairs)
1195 cp[-pairs] = '\0';
1196 *cp = ' ';
1197 break;
1198 }
1199 /* Last argument; the remaining ones are empty strings. */
1200 if ('\0' == *cp)
1201 continue;
1202 /* Null-terminate argument and move to the next one. */
1203 *cp++ = '\0';
1204 while (' ' == *cp)
1205 cp++;
1206 }
1207
1208 /*
1209 * Expand macro arguments.
1210 */
1211 *szp = 0;
1212 n1 = cp = mandoc_strdup(r->current_string);
1213 while (NULL != (cp = strstr(cp, "\\$"))) {
1214 i = cp[2] - '1';
1215 if (0 > i || 8 < i) {
1216 /* Not an argument invocation. */
1217 cp += 2;
1218 continue;
1219 }
1220
1221 *szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1222 n2 = mandoc_malloc(*szp);
1223
1224 strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1225 strlcat(n2, arg[i], *szp);
1226 strlcat(n2, cp + 3, *szp);
1227
1228 cp = n2 + (cp - n1);
1229 free(n1);
1230 n1 = n2;
1231 }
1232
1233 /*
1234 * Replace the macro invocation
1235 * by the expanded macro.
1236 */
1237 free(*bufp);
1238 *bufp = n1;
1239 if (0 == *szp)
1240 *szp = strlen(*bufp) + 1;
1241
1242 return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
1243 ROFF_REPARSE : ROFF_APPEND);
1244 }
1245
1246 /*
1247 * Store *string into the user-defined string called *name.
1248 * In multiline mode, append to an existing entry and append '\n';
1249 * else replace the existing entry, if there is one.
1250 * To clear an existing entry, call with (*r, *name, NULL, 0).
1251 */
1252 static void
1253 roff_setstr(struct roff *r, const char *name, const char *string,
1254 int multiline)
1255 {
1256 struct roffstr *n;
1257 char *c;
1258 size_t oldch, newch;
1259
1260 /* Search for an existing string with the same name. */
1261 n = r->first_string;
1262 while (n && strcmp(name, n->name))
1263 n = n->next;
1264
1265 if (NULL == n) {
1266 /* Create a new string table entry. */
1267 n = mandoc_malloc(sizeof(struct roffstr));
1268 n->name = mandoc_strdup(name);
1269 n->string = NULL;
1270 n->next = r->first_string;
1271 r->first_string = n;
1272 } else if (0 == multiline) {
1273 /* In multiline mode, append; else replace. */
1274 free(n->string);
1275 n->string = NULL;
1276 }
1277
1278 if (NULL == string)
1279 return;
1280
1281 /*
1282 * One additional byte for the '\n' in multiline mode,
1283 * and one for the terminating '\0'.
1284 */
1285 newch = strlen(string) + (multiline ? 2 : 1);
1286 if (NULL == n->string) {
1287 n->string = mandoc_malloc(newch);
1288 *n->string = '\0';
1289 oldch = 0;
1290 } else {
1291 oldch = strlen(n->string);
1292 n->string = mandoc_realloc(n->string, oldch + newch);
1293 }
1294
1295 /* Skip existing content in the destination buffer. */
1296 c = n->string + oldch;
1297
1298 /* Append new content to the destination buffer. */
1299 while (*string) {
1300 /*
1301 * Rudimentary roff copy mode:
1302 * Handle escaped backslashes.
1303 */
1304 if ('\\' == *string && '\\' == *(string + 1))
1305 string++;
1306 *c++ = *string++;
1307 }
1308
1309 /* Append terminating bytes. */
1310 if (multiline)
1311 *c++ = '\n';
1312 *c = '\0';
1313 }
1314
1315
1316 static const char *
1317 roff_getstrn(const struct roff *r, const char *name, size_t len)
1318 {
1319 const struct roffstr *n;
1320
1321 n = r->first_string;
1322 while (n && (strncmp(name, n->name, len) || '\0' != n->name[(int)len]))
1323 n = n->next;
1324
1325 return(n ? n->string : NULL);
1326 }
1327
1328
1329 static void
1330 roff_freestr(struct roff *r)
1331 {
1332 struct roffstr *n, *nn;
1333
1334 for (n = r->first_string; n; n = nn) {
1335 free(n->name);
1336 free(n->string);
1337 nn = n->next;
1338 free(n);
1339 }
1340
1341 r->first_string = NULL;
1342 }