]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
Add initial EQN support to mandoc. This parses, then throws away, data
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.125 2011/02/06 20:36:36 kristaps Exp $ */
2 /*
3 * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <assert.h>
23 #include <errno.h>
24 #include <ctype.h>
25 #include <limits.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <stdio.h>
29
30 #include "mandoc.h"
31 #include "roff.h"
32 #include "libroff.h"
33 #include "libmandoc.h"
34
35 #define RSTACK_MAX 128
36
37 #define ROFF_CTL(c) \
38 ('.' == (c) || '\'' == (c))
39
40 enum rofft {
41 ROFF_ad,
42 ROFF_am,
43 ROFF_ami,
44 ROFF_am1,
45 ROFF_de,
46 ROFF_dei,
47 ROFF_de1,
48 ROFF_ds,
49 ROFF_el,
50 ROFF_hy,
51 ROFF_ie,
52 ROFF_if,
53 ROFF_ig,
54 ROFF_it,
55 ROFF_ne,
56 ROFF_nh,
57 ROFF_nr,
58 ROFF_ns,
59 ROFF_ps,
60 ROFF_rm,
61 ROFF_so,
62 ROFF_ta,
63 ROFF_tr,
64 ROFF_TS,
65 ROFF_TE,
66 ROFF_T_,
67 ROFF_EQ,
68 ROFF_EN,
69 ROFF_cblock,
70 ROFF_ccond, /* FIXME: remove this. */
71 ROFF_USERDEF,
72 ROFF_MAX
73 };
74
75 enum roffrule {
76 ROFFRULE_ALLOW,
77 ROFFRULE_DENY
78 };
79
80 struct roffstr {
81 char *name; /* key of symbol */
82 char *string; /* current value */
83 struct roffstr *next; /* next in list */
84 };
85
86 struct roff {
87 struct roffnode *last; /* leaf of stack */
88 mandocmsg msg; /* err/warn/fatal messages */
89 void *data; /* privdata for messages */
90 enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */
91 int rstackpos; /* position in rstack */
92 struct regset *regs; /* read/writable registers */
93 struct roffstr *first_string; /* user-defined strings & macros */
94 const char *current_string; /* value of last called user macro */
95 struct tbl_node *first_tbl; /* first table parsed */
96 struct tbl_node *last_tbl; /* last table parsed */
97 struct tbl_node *tbl; /* current table being parsed */
98 struct eqn_node *last_eqn; /* last equation parsed */
99 struct eqn_node *first_eqn; /* first equation parsed */
100 struct eqn_node *eqn; /* current equation being parsed */
101 };
102
103 struct roffnode {
104 enum rofft tok; /* type of node */
105 struct roffnode *parent; /* up one in stack */
106 int line; /* parse line */
107 int col; /* parse col */
108 char *name; /* node name, e.g. macro name */
109 char *end; /* end-rules: custom token */
110 int endspan; /* end-rules: next-line or infty */
111 enum roffrule rule; /* current evaluation rule */
112 };
113
114 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
115 enum rofft tok, /* tok of macro */ \
116 char **bufp, /* input buffer */ \
117 size_t *szp, /* size of input buffer */ \
118 int ln, /* parse line */ \
119 int ppos, /* original pos in buffer */ \
120 int pos, /* current pos in buffer */ \
121 int *offs /* reset offset of buffer data */
122
123 typedef enum rofferr (*roffproc)(ROFF_ARGS);
124
125 struct roffmac {
126 const char *name; /* macro name */
127 roffproc proc; /* process new macro */
128 roffproc text; /* process as child text of macro */
129 roffproc sub; /* process as child of macro */
130 int flags;
131 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
132 struct roffmac *next;
133 };
134
135 static enum rofferr roff_block(ROFF_ARGS);
136 static enum rofferr roff_block_text(ROFF_ARGS);
137 static enum rofferr roff_block_sub(ROFF_ARGS);
138 static enum rofferr roff_cblock(ROFF_ARGS);
139 static enum rofferr roff_ccond(ROFF_ARGS);
140 static enum rofferr roff_cond(ROFF_ARGS);
141 static enum rofferr roff_cond_text(ROFF_ARGS);
142 static enum rofferr roff_cond_sub(ROFF_ARGS);
143 static enum rofferr roff_ds(ROFF_ARGS);
144 static enum roffrule roff_evalcond(const char *, int *);
145 static void roff_freestr(struct roff *);
146 static char *roff_getname(struct roff *, char **, int, int);
147 static const char *roff_getstrn(const struct roff *,
148 const char *, size_t);
149 static enum rofferr roff_line_ignore(ROFF_ARGS);
150 static enum rofferr roff_nr(ROFF_ARGS);
151 static int roff_res(struct roff *,
152 char **, size_t *, int);
153 static enum rofferr roff_rm(ROFF_ARGS);
154 static void roff_setstr(struct roff *,
155 const char *, const char *, int);
156 static enum rofferr roff_so(ROFF_ARGS);
157 static enum rofferr roff_TE(ROFF_ARGS);
158 static enum rofferr roff_TS(ROFF_ARGS);
159 static enum rofferr roff_EQ(ROFF_ARGS);
160 static enum rofferr roff_EN(ROFF_ARGS);
161 static enum rofferr roff_T_(ROFF_ARGS);
162 static enum rofferr roff_userdef(ROFF_ARGS);
163
164 /* See roff_hash_find() */
165
166 #define ASCII_HI 126
167 #define ASCII_LO 33
168 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
169
170 static struct roffmac *hash[HASHWIDTH];
171
172 static struct roffmac roffs[ROFF_MAX] = {
173 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
174 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
175 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
176 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
177 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
178 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
179 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
180 { "ds", roff_ds, NULL, NULL, 0, NULL },
181 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
182 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
183 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
184 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
185 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
186 { "it", roff_line_ignore, NULL, NULL, 0, NULL },
187 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
188 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
189 { "nr", roff_nr, NULL, NULL, 0, NULL },
190 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
191 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
192 { "rm", roff_rm, NULL, NULL, 0, NULL },
193 { "so", roff_so, NULL, NULL, 0, NULL },
194 { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
195 { "tr", roff_line_ignore, NULL, NULL, 0, NULL },
196 { "TS", roff_TS, NULL, NULL, 0, NULL },
197 { "TE", roff_TE, NULL, NULL, 0, NULL },
198 { "T&", roff_T_, NULL, NULL, 0, NULL },
199 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
200 { "EN", roff_EN, NULL, NULL, 0, NULL },
201 { ".", roff_cblock, NULL, NULL, 0, NULL },
202 { "\\}", roff_ccond, NULL, NULL, 0, NULL },
203 { NULL, roff_userdef, NULL, NULL, 0, NULL },
204 };
205
206 static void roff_free1(struct roff *);
207 static enum rofft roff_hash_find(const char *, size_t);
208 static void roff_hash_init(void);
209 static void roffnode_cleanscope(struct roff *);
210 static void roffnode_push(struct roff *, enum rofft,
211 const char *, int, int);
212 static void roffnode_pop(struct roff *);
213 static enum rofft roff_parse(struct roff *, const char *, int *);
214 static int roff_parse_nat(const char *, unsigned int *);
215
216 /* See roff_hash_find() */
217 #define ROFF_HASH(p) (p[0] - ASCII_LO)
218
219 static void
220 roff_hash_init(void)
221 {
222 struct roffmac *n;
223 int buc, i;
224
225 for (i = 0; i < (int)ROFF_USERDEF; i++) {
226 assert(roffs[i].name[0] >= ASCII_LO);
227 assert(roffs[i].name[0] <= ASCII_HI);
228
229 buc = ROFF_HASH(roffs[i].name);
230
231 if (NULL != (n = hash[buc])) {
232 for ( ; n->next; n = n->next)
233 /* Do nothing. */ ;
234 n->next = &roffs[i];
235 } else
236 hash[buc] = &roffs[i];
237 }
238 }
239
240
241 /*
242 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
243 * the nil-terminated string name could be found.
244 */
245 static enum rofft
246 roff_hash_find(const char *p, size_t s)
247 {
248 int buc;
249 struct roffmac *n;
250
251 /*
252 * libroff has an extremely simple hashtable, for the time
253 * being, which simply keys on the first character, which must
254 * be printable, then walks a chain. It works well enough until
255 * optimised.
256 */
257
258 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
259 return(ROFF_MAX);
260
261 buc = ROFF_HASH(p);
262
263 if (NULL == (n = hash[buc]))
264 return(ROFF_MAX);
265 for ( ; n; n = n->next)
266 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
267 return((enum rofft)(n - roffs));
268
269 return(ROFF_MAX);
270 }
271
272
273 /*
274 * Pop the current node off of the stack of roff instructions currently
275 * pending.
276 */
277 static void
278 roffnode_pop(struct roff *r)
279 {
280 struct roffnode *p;
281
282 assert(r->last);
283 p = r->last;
284
285 if (ROFF_el == p->tok)
286 if (r->rstackpos > -1)
287 r->rstackpos--;
288
289 r->last = r->last->parent;
290 free(p->name);
291 free(p->end);
292 free(p);
293 }
294
295
296 /*
297 * Push a roff node onto the instruction stack. This must later be
298 * removed with roffnode_pop().
299 */
300 static void
301 roffnode_push(struct roff *r, enum rofft tok, const char *name,
302 int line, int col)
303 {
304 struct roffnode *p;
305
306 p = mandoc_calloc(1, sizeof(struct roffnode));
307 p->tok = tok;
308 if (name)
309 p->name = mandoc_strdup(name);
310 p->parent = r->last;
311 p->line = line;
312 p->col = col;
313 p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
314
315 r->last = p;
316 }
317
318
319 static void
320 roff_free1(struct roff *r)
321 {
322 struct tbl_node *t;
323 struct eqn_node *e;
324
325 while (NULL != (t = r->first_tbl)) {
326 r->first_tbl = t->next;
327 tbl_free(t);
328 }
329
330 r->first_tbl = r->last_tbl = r->tbl = NULL;
331
332 while (NULL != (e = r->first_eqn)) {
333 r->first_eqn = e->next;
334 eqn_free(e);
335 }
336
337 r->first_eqn = r->last_eqn = r->eqn = NULL;
338
339 while (r->last)
340 roffnode_pop(r);
341
342 roff_freestr(r);
343 }
344
345
346 void
347 roff_reset(struct roff *r)
348 {
349
350 roff_free1(r);
351 }
352
353
354 void
355 roff_free(struct roff *r)
356 {
357
358 roff_free1(r);
359 free(r);
360 }
361
362
363 struct roff *
364 roff_alloc(struct regset *regs, void *data, const mandocmsg msg)
365 {
366 struct roff *r;
367
368 r = mandoc_calloc(1, sizeof(struct roff));
369 r->regs = regs;
370 r->msg = msg;
371 r->data = data;
372 r->rstackpos = -1;
373
374 roff_hash_init();
375 return(r);
376 }
377
378
379 /*
380 * Pre-filter each and every line for reserved words (one beginning with
381 * `\*', e.g., `\*(ab'). These must be handled before the actual line
382 * is processed.
383 */
384 static int
385 roff_res(struct roff *r, char **bufp, size_t *szp, int pos)
386 {
387 const char *stesc; /* start of an escape sequence ('\\') */
388 const char *stnam; /* start of the name, after "[(*" */
389 const char *cp; /* end of the name, e.g. before ']' */
390 const char *res; /* the string to be substituted */
391 int i, maxl;
392 size_t nsz;
393 char *n;
394
395 /* Search for a leading backslash and save a pointer to it. */
396
397 cp = *bufp + pos;
398 while (NULL != (cp = strchr(cp, '\\'))) {
399 stesc = cp++;
400
401 /*
402 * The second character must be an asterisk.
403 * If it isn't, skip it anyway: It is escaped,
404 * so it can't start another escape sequence.
405 */
406
407 if ('\0' == *cp)
408 return(1);
409 if ('*' != *cp++)
410 continue;
411
412 /*
413 * The third character decides the length
414 * of the name of the string.
415 * Save a pointer to the name.
416 */
417
418 switch (*cp) {
419 case ('\0'):
420 return(1);
421 case ('('):
422 cp++;
423 maxl = 2;
424 break;
425 case ('['):
426 cp++;
427 maxl = 0;
428 break;
429 default:
430 maxl = 1;
431 break;
432 }
433 stnam = cp;
434
435 /* Advance to the end of the name. */
436
437 for (i = 0; 0 == maxl || i < maxl; i++, cp++) {
438 if ('\0' == *cp)
439 return(1); /* Error. */
440 if (0 == maxl && ']' == *cp)
441 break;
442 }
443
444 /*
445 * Retrieve the replacement string; if it is
446 * undefined, resume searching for escapes.
447 */
448
449 res = roff_getstrn(r, stnam, (size_t)i);
450
451 if (NULL == res) {
452 cp -= maxl ? 1 : 0;
453 continue;
454 }
455
456 /* Replace the escape sequence by the string. */
457
458 nsz = *szp + strlen(res) + 1;
459 n = mandoc_malloc(nsz);
460
461 strlcpy(n, *bufp, (size_t)(stesc - *bufp + 1));
462 strlcat(n, res, nsz);
463 strlcat(n, cp + (maxl ? 0 : 1), nsz);
464
465 free(*bufp);
466
467 *bufp = n;
468 *szp = nsz;
469 return(0);
470 }
471
472 return(1);
473 }
474
475
476 enum rofferr
477 roff_parseln(struct roff *r, int ln, char **bufp,
478 size_t *szp, int pos, int *offs)
479 {
480 enum rofft t;
481 enum rofferr e;
482 int ppos;
483
484 /*
485 * Run the reserved-word filter only if we have some reserved
486 * words to fill in.
487 */
488
489 if (r->first_string && ! roff_res(r, bufp, szp, pos))
490 return(ROFF_REPARSE);
491
492 /*
493 * First, if a scope is open and we're not a macro, pass the
494 * text through the macro's filter. If a scope isn't open and
495 * we're not a macro, just let it through.
496 * Finally, if there's an equation scope open, divert it into it
497 * no matter our state.
498 */
499
500 if (r->last && ! ROFF_CTL((*bufp)[pos])) {
501 t = r->last->tok;
502 assert(roffs[t].text);
503 e = (*roffs[t].text)
504 (r, t, bufp, szp, ln, pos, pos, offs);
505 assert(ROFF_IGN == e || ROFF_CONT == e);
506 if (ROFF_CONT != e)
507 return(e);
508 if (r->eqn)
509 return(eqn_read(&r->eqn, ln, *bufp, *offs));
510 if (r->tbl)
511 return(tbl_read(r->tbl, ln, *bufp, *offs));
512 return(ROFF_CONT);
513 } else if ( ! ROFF_CTL((*bufp)[pos])) {
514 if (r->eqn)
515 return(eqn_read(&r->eqn, ln, *bufp, *offs));
516 if (r->tbl)
517 return(tbl_read(r->tbl, ln, *bufp, *offs));
518 return(ROFF_CONT);
519 } else if (r->eqn)
520 return(eqn_read(&r->eqn, ln, *bufp, *offs));
521
522 /*
523 * If a scope is open, go to the child handler for that macro,
524 * as it may want to preprocess before doing anything with it.
525 * Don't do so if an equation is open.
526 */
527
528 if (r->last) {
529 t = r->last->tok;
530 assert(roffs[t].sub);
531 return((*roffs[t].sub)
532 (r, t, bufp, szp,
533 ln, pos, pos, offs));
534 }
535
536 /*
537 * Lastly, as we've no scope open, try to look up and execute
538 * the new macro. If no macro is found, simply return and let
539 * the compilers handle it.
540 */
541
542 ppos = pos;
543 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
544 return(ROFF_CONT);
545
546 assert(roffs[t].proc);
547 return((*roffs[t].proc)
548 (r, t, bufp, szp,
549 ln, ppos, pos, offs));
550 }
551
552
553 void
554 roff_endparse(struct roff *r)
555 {
556
557 if (r->last)
558 (*r->msg)(MANDOCERR_SCOPEEXIT, r->data,
559 r->last->line, r->last->col, NULL);
560
561 if (r->eqn) {
562 (*r->msg)(MANDOCERR_SCOPEEXIT, r->data,
563 r->eqn->line, r->eqn->pos, NULL);
564 eqn_end(r->eqn);
565 r->eqn = NULL;
566 }
567
568 if (r->tbl) {
569 (*r->msg)(MANDOCERR_SCOPEEXIT, r->data,
570 r->tbl->line, r->tbl->pos, NULL);
571 tbl_end(r->tbl);
572 r->tbl = NULL;
573 }
574 }
575
576
577 /*
578 * Parse a roff node's type from the input buffer. This must be in the
579 * form of ".foo xxx" in the usual way.
580 */
581 static enum rofft
582 roff_parse(struct roff *r, const char *buf, int *pos)
583 {
584 const char *mac;
585 size_t maclen;
586 enum rofft t;
587
588 assert(ROFF_CTL(buf[*pos]));
589 (*pos)++;
590
591 while (' ' == buf[*pos] || '\t' == buf[*pos])
592 (*pos)++;
593
594 if ('\0' == buf[*pos])
595 return(ROFF_MAX);
596
597 mac = buf + *pos;
598 maclen = strcspn(mac, " \\\t\0");
599
600 t = (r->current_string = roff_getstrn(r, mac, maclen))
601 ? ROFF_USERDEF : roff_hash_find(mac, maclen);
602
603 *pos += maclen;
604 while (buf[*pos] && ' ' == buf[*pos])
605 (*pos)++;
606
607 return(t);
608 }
609
610
611 static int
612 roff_parse_nat(const char *buf, unsigned int *res)
613 {
614 char *ep;
615 long lval;
616
617 errno = 0;
618 lval = strtol(buf, &ep, 10);
619 if (buf[0] == '\0' || *ep != '\0')
620 return(0);
621 if ((errno == ERANGE &&
622 (lval == LONG_MAX || lval == LONG_MIN)) ||
623 (lval > INT_MAX || lval < 0))
624 return(0);
625
626 *res = (unsigned int)lval;
627 return(1);
628 }
629
630
631 /* ARGSUSED */
632 static enum rofferr
633 roff_cblock(ROFF_ARGS)
634 {
635
636 /*
637 * A block-close `..' should only be invoked as a child of an
638 * ignore macro, otherwise raise a warning and just ignore it.
639 */
640
641 if (NULL == r->last) {
642 (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL);
643 return(ROFF_IGN);
644 }
645
646 switch (r->last->tok) {
647 case (ROFF_am):
648 /* FALLTHROUGH */
649 case (ROFF_ami):
650 /* FALLTHROUGH */
651 case (ROFF_am1):
652 /* FALLTHROUGH */
653 case (ROFF_de):
654 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
655 /* FALLTHROUGH */
656 case (ROFF_dei):
657 /* FALLTHROUGH */
658 case (ROFF_ig):
659 break;
660 default:
661 (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL);
662 return(ROFF_IGN);
663 }
664
665 if ((*bufp)[pos])
666 (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL);
667
668 roffnode_pop(r);
669 roffnode_cleanscope(r);
670 return(ROFF_IGN);
671
672 }
673
674
675 static void
676 roffnode_cleanscope(struct roff *r)
677 {
678
679 while (r->last) {
680 if (--r->last->endspan < 0)
681 break;
682 roffnode_pop(r);
683 }
684 }
685
686
687 /* ARGSUSED */
688 static enum rofferr
689 roff_ccond(ROFF_ARGS)
690 {
691
692 if (NULL == r->last) {
693 (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL);
694 return(ROFF_IGN);
695 }
696
697 switch (r->last->tok) {
698 case (ROFF_el):
699 /* FALLTHROUGH */
700 case (ROFF_ie):
701 /* FALLTHROUGH */
702 case (ROFF_if):
703 break;
704 default:
705 (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL);
706 return(ROFF_IGN);
707 }
708
709 if (r->last->endspan > -1) {
710 (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL);
711 return(ROFF_IGN);
712 }
713
714 if ((*bufp)[pos])
715 (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL);
716
717 roffnode_pop(r);
718 roffnode_cleanscope(r);
719 return(ROFF_IGN);
720 }
721
722
723 /* ARGSUSED */
724 static enum rofferr
725 roff_block(ROFF_ARGS)
726 {
727 int sv;
728 size_t sz;
729 char *name;
730
731 name = NULL;
732
733 if (ROFF_ig != tok) {
734 if ('\0' == (*bufp)[pos]) {
735 (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL);
736 return(ROFF_IGN);
737 }
738
739 /*
740 * Re-write `de1', since we don't really care about
741 * groff's strange compatibility mode, into `de'.
742 */
743
744 if (ROFF_de1 == tok)
745 tok = ROFF_de;
746 if (ROFF_de == tok)
747 name = *bufp + pos;
748 else
749 (*r->msg)(MANDOCERR_REQUEST, r->data, ln, ppos,
750 roffs[tok].name);
751
752 while ((*bufp)[pos] && ' ' != (*bufp)[pos])
753 pos++;
754
755 while (' ' == (*bufp)[pos])
756 (*bufp)[pos++] = '\0';
757 }
758
759 roffnode_push(r, tok, name, ln, ppos);
760
761 /*
762 * At the beginning of a `de' macro, clear the existing string
763 * with the same name, if there is one. New content will be
764 * added from roff_block_text() in multiline mode.
765 */
766
767 if (ROFF_de == tok)
768 roff_setstr(r, name, "", 0);
769
770 if ('\0' == (*bufp)[pos])
771 return(ROFF_IGN);
772
773 /* If present, process the custom end-of-line marker. */
774
775 sv = pos;
776 while ((*bufp)[pos] &&
777 ' ' != (*bufp)[pos] &&
778 '\t' != (*bufp)[pos])
779 pos++;
780
781 /*
782 * Note: groff does NOT like escape characters in the input.
783 * Instead of detecting this, we're just going to let it fly and
784 * to hell with it.
785 */
786
787 assert(pos > sv);
788 sz = (size_t)(pos - sv);
789
790 if (1 == sz && '.' == (*bufp)[sv])
791 return(ROFF_IGN);
792
793 r->last->end = mandoc_malloc(sz + 1);
794
795 memcpy(r->last->end, *bufp + sv, sz);
796 r->last->end[(int)sz] = '\0';
797
798 if ((*bufp)[pos])
799 (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL);
800
801 return(ROFF_IGN);
802 }
803
804
805 /* ARGSUSED */
806 static enum rofferr
807 roff_block_sub(ROFF_ARGS)
808 {
809 enum rofft t;
810 int i, j;
811
812 /*
813 * First check whether a custom macro exists at this level. If
814 * it does, then check against it. This is some of groff's
815 * stranger behaviours. If we encountered a custom end-scope
816 * tag and that tag also happens to be a "real" macro, then we
817 * need to try interpreting it again as a real macro. If it's
818 * not, then return ignore. Else continue.
819 */
820
821 if (r->last->end) {
822 i = pos + 1;
823 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
824 i++;
825
826 for (j = 0; r->last->end[j]; j++, i++)
827 if ((*bufp)[i] != r->last->end[j])
828 break;
829
830 if ('\0' == r->last->end[j] &&
831 ('\0' == (*bufp)[i] ||
832 ' ' == (*bufp)[i] ||
833 '\t' == (*bufp)[i])) {
834 roffnode_pop(r);
835 roffnode_cleanscope(r);
836
837 if (ROFF_MAX != roff_parse(r, *bufp, &pos))
838 return(ROFF_RERUN);
839 return(ROFF_IGN);
840 }
841 }
842
843 /*
844 * If we have no custom end-query or lookup failed, then try
845 * pulling it out of the hashtable.
846 */
847
848 ppos = pos;
849 t = roff_parse(r, *bufp, &pos);
850
851 /*
852 * Macros other than block-end are only significant
853 * in `de' blocks; elsewhere, simply throw them away.
854 */
855 if (ROFF_cblock != t) {
856 if (ROFF_de == tok)
857 roff_setstr(r, r->last->name, *bufp + ppos, 1);
858 return(ROFF_IGN);
859 }
860
861 assert(roffs[t].proc);
862 return((*roffs[t].proc)(r, t, bufp, szp,
863 ln, ppos, pos, offs));
864 }
865
866
867 /* ARGSUSED */
868 static enum rofferr
869 roff_block_text(ROFF_ARGS)
870 {
871
872 if (ROFF_de == tok)
873 roff_setstr(r, r->last->name, *bufp + pos, 1);
874
875 return(ROFF_IGN);
876 }
877
878
879 /* ARGSUSED */
880 static enum rofferr
881 roff_cond_sub(ROFF_ARGS)
882 {
883 enum rofft t;
884 enum roffrule rr;
885
886 ppos = pos;
887 rr = r->last->rule;
888
889 /*
890 * Clean out scope. If we've closed ourselves, then don't
891 * continue.
892 */
893
894 roffnode_cleanscope(r);
895
896 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) {
897 if ('\\' == (*bufp)[pos] && '}' == (*bufp)[pos + 1])
898 return(roff_ccond
899 (r, ROFF_ccond, bufp, szp,
900 ln, pos, pos + 2, offs));
901 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
902 }
903
904 /*
905 * A denied conditional must evaluate its children if and only
906 * if they're either structurally required (such as loops and
907 * conditionals) or a closing macro.
908 */
909 if (ROFFRULE_DENY == rr)
910 if ( ! (ROFFMAC_STRUCT & roffs[t].flags))
911 if (ROFF_ccond != t)
912 return(ROFF_IGN);
913
914 assert(roffs[t].proc);
915 return((*roffs[t].proc)(r, t, bufp, szp,
916 ln, ppos, pos, offs));
917 }
918
919
920 /* ARGSUSED */
921 static enum rofferr
922 roff_cond_text(ROFF_ARGS)
923 {
924 char *ep, *st;
925 enum roffrule rr;
926
927 rr = r->last->rule;
928
929 /*
930 * We display the value of the text if out current evaluation
931 * scope permits us to do so.
932 */
933
934 /* FIXME: use roff_ccond? */
935
936 st = &(*bufp)[pos];
937 if (NULL == (ep = strstr(st, "\\}"))) {
938 roffnode_cleanscope(r);
939 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
940 }
941
942 if (ep == st || (ep > st && '\\' != *(ep - 1)))
943 roffnode_pop(r);
944
945 roffnode_cleanscope(r);
946 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
947 }
948
949
950 static enum roffrule
951 roff_evalcond(const char *v, int *pos)
952 {
953
954 switch (v[*pos]) {
955 case ('n'):
956 (*pos)++;
957 return(ROFFRULE_ALLOW);
958 case ('e'):
959 /* FALLTHROUGH */
960 case ('o'):
961 /* FALLTHROUGH */
962 case ('t'):
963 (*pos)++;
964 return(ROFFRULE_DENY);
965 default:
966 break;
967 }
968
969 while (v[*pos] && ' ' != v[*pos])
970 (*pos)++;
971 return(ROFFRULE_DENY);
972 }
973
974 /* ARGSUSED */
975 static enum rofferr
976 roff_line_ignore(ROFF_ARGS)
977 {
978
979 if (ROFF_it == tok)
980 (*r->msg)(MANDOCERR_REQUEST, r->data, ln, ppos, "it");
981
982 return(ROFF_IGN);
983 }
984
985 /* ARGSUSED */
986 static enum rofferr
987 roff_cond(ROFF_ARGS)
988 {
989 int sv;
990 enum roffrule rule;
991
992 /* Stack overflow! */
993
994 if (ROFF_ie == tok && r->rstackpos == RSTACK_MAX - 1) {
995 (*r->msg)(MANDOCERR_MEM, r->data, ln, ppos, NULL);
996 return(ROFF_ERR);
997 }
998
999 /* First, evaluate the conditional. */
1000
1001 if (ROFF_el == tok) {
1002 /*
1003 * An `.el' will get the value of the current rstack
1004 * entry set in prior `ie' calls or defaults to DENY.
1005 */
1006 if (r->rstackpos < 0)
1007 rule = ROFFRULE_DENY;
1008 else
1009 rule = r->rstack[r->rstackpos];
1010 } else
1011 rule = roff_evalcond(*bufp, &pos);
1012
1013 sv = pos;
1014
1015 while (' ' == (*bufp)[pos])
1016 pos++;
1017
1018 /*
1019 * Roff is weird. If we have just white-space after the
1020 * conditional, it's considered the BODY and we exit without
1021 * really doing anything. Warn about this. It's probably
1022 * wrong.
1023 */
1024
1025 if ('\0' == (*bufp)[pos] && sv != pos) {
1026 (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL);
1027 return(ROFF_IGN);
1028 }
1029
1030 roffnode_push(r, tok, NULL, ln, ppos);
1031
1032 r->last->rule = rule;
1033
1034 if (ROFF_ie == tok) {
1035 /*
1036 * An if-else will put the NEGATION of the current
1037 * evaluated conditional into the stack.
1038 */
1039 r->rstackpos++;
1040 if (ROFFRULE_DENY == r->last->rule)
1041 r->rstack[r->rstackpos] = ROFFRULE_ALLOW;
1042 else
1043 r->rstack[r->rstackpos] = ROFFRULE_DENY;
1044 }
1045
1046 /* If the parent has false as its rule, then so do we. */
1047
1048 if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule)
1049 r->last->rule = ROFFRULE_DENY;
1050
1051 /*
1052 * Determine scope. If we're invoked with "\{" trailing the
1053 * conditional, then we're in a multiline scope. Else our scope
1054 * expires on the next line.
1055 */
1056
1057 r->last->endspan = 1;
1058
1059 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1060 r->last->endspan = -1;
1061 pos += 2;
1062 }
1063
1064 /*
1065 * If there are no arguments on the line, the next-line scope is
1066 * assumed.
1067 */
1068
1069 if ('\0' == (*bufp)[pos])
1070 return(ROFF_IGN);
1071
1072 /* Otherwise re-run the roff parser after recalculating. */
1073
1074 *offs = pos;
1075 return(ROFF_RERUN);
1076 }
1077
1078
1079 /* ARGSUSED */
1080 static enum rofferr
1081 roff_ds(ROFF_ARGS)
1082 {
1083 char *name, *string;
1084
1085 /*
1086 * A symbol is named by the first word following the macro
1087 * invocation up to a space. Its value is anything after the
1088 * name's trailing whitespace and optional double-quote. Thus,
1089 *
1090 * [.ds foo "bar " ]
1091 *
1092 * will have `bar " ' as its value.
1093 */
1094
1095 string = *bufp + pos;
1096 name = roff_getname(r, &string, ln, pos);
1097 if ('\0' == *name)
1098 return(ROFF_IGN);
1099
1100 /* Read past initial double-quote. */
1101 if ('"' == *string)
1102 string++;
1103
1104 /* The rest is the value. */
1105 roff_setstr(r, name, string, 0);
1106 return(ROFF_IGN);
1107 }
1108
1109
1110 /* ARGSUSED */
1111 static enum rofferr
1112 roff_nr(ROFF_ARGS)
1113 {
1114 const char *key;
1115 char *val;
1116 struct reg *rg;
1117
1118 val = *bufp + pos;
1119 key = roff_getname(r, &val, ln, pos);
1120 rg = r->regs->regs;
1121
1122 if (0 == strcmp(key, "nS")) {
1123 rg[(int)REG_nS].set = 1;
1124 if ( ! roff_parse_nat(val, &rg[(int)REG_nS].v.u))
1125 rg[(int)REG_nS].v.u = 0;
1126 }
1127
1128 return(ROFF_IGN);
1129 }
1130
1131 /* ARGSUSED */
1132 static enum rofferr
1133 roff_rm(ROFF_ARGS)
1134 {
1135 const char *name;
1136 char *cp;
1137
1138 cp = *bufp + pos;
1139 while ('\0' != *cp) {
1140 name = roff_getname(r, &cp, ln, cp - *bufp);
1141 if ('\0' != *name)
1142 roff_setstr(r, name, NULL, 0);
1143 }
1144 return(ROFF_IGN);
1145 }
1146
1147 /* ARGSUSED */
1148 static enum rofferr
1149 roff_TE(ROFF_ARGS)
1150 {
1151
1152 if (NULL == r->tbl)
1153 (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL);
1154 else
1155 tbl_end(r->tbl);
1156
1157 r->tbl = NULL;
1158 return(ROFF_IGN);
1159 }
1160
1161 /* ARGSUSED */
1162 static enum rofferr
1163 roff_T_(ROFF_ARGS)
1164 {
1165
1166 if (NULL == r->tbl)
1167 (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL);
1168 else
1169 tbl_restart(ppos, ln, r->tbl);
1170
1171 return(ROFF_IGN);
1172 }
1173
1174 /* ARGSUSED */
1175 static enum rofferr
1176 roff_EQ(ROFF_ARGS)
1177 {
1178 struct eqn_node *e;
1179
1180 assert(NULL == r->eqn);
1181 e = eqn_alloc(ppos, ln);
1182
1183 if (r->last_eqn)
1184 r->last_eqn->next = e;
1185 else
1186 r->first_eqn = r->last_eqn = e;
1187
1188 r->eqn = r->last_eqn = e;
1189 return(ROFF_IGN);
1190 }
1191
1192 /* ARGSUSED */
1193 static enum rofferr
1194 roff_EN(ROFF_ARGS)
1195 {
1196
1197 (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL);
1198 return(ROFF_IGN);
1199 }
1200
1201 /* ARGSUSED */
1202 static enum rofferr
1203 roff_TS(ROFF_ARGS)
1204 {
1205 struct tbl_node *t;
1206
1207 if (r->tbl) {
1208 (*r->msg)(MANDOCERR_SCOPEBROKEN, r->data, ln, ppos, NULL);
1209 tbl_end(r->tbl);
1210 }
1211
1212 t = tbl_alloc(ppos, ln, r->data, r->msg);
1213
1214 if (r->last_tbl)
1215 r->last_tbl->next = t;
1216 else
1217 r->first_tbl = r->last_tbl = t;
1218
1219 r->tbl = r->last_tbl = t;
1220 return(ROFF_IGN);
1221 }
1222
1223 /* ARGSUSED */
1224 static enum rofferr
1225 roff_so(ROFF_ARGS)
1226 {
1227 char *name;
1228
1229 (*r->msg)(MANDOCERR_SO, r->data, ln, ppos, NULL);
1230
1231 /*
1232 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1233 * opening anything that's not in our cwd or anything beneath
1234 * it. Thus, explicitly disallow traversing up the file-system
1235 * or using absolute paths.
1236 */
1237
1238 name = *bufp + pos;
1239 if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1240 (*r->msg)(MANDOCERR_SOPATH, r->data, ln, pos, NULL);
1241 return(ROFF_ERR);
1242 }
1243
1244 *offs = pos;
1245 return(ROFF_SO);
1246 }
1247
1248 /* ARGSUSED */
1249 static enum rofferr
1250 roff_userdef(ROFF_ARGS)
1251 {
1252 const char *arg[9];
1253 char *cp, *n1, *n2;
1254 int i;
1255
1256 /*
1257 * Collect pointers to macro argument strings
1258 * and null-terminate them.
1259 */
1260 cp = *bufp + pos;
1261 for (i = 0; i < 9; i++)
1262 arg[i] = '\0' == *cp ? "" :
1263 mandoc_getarg(&cp, r->msg, r->data, ln, &pos);
1264
1265 /*
1266 * Expand macro arguments.
1267 */
1268 *szp = 0;
1269 n1 = cp = mandoc_strdup(r->current_string);
1270 while (NULL != (cp = strstr(cp, "\\$"))) {
1271 i = cp[2] - '1';
1272 if (0 > i || 8 < i) {
1273 /* Not an argument invocation. */
1274 cp += 2;
1275 continue;
1276 }
1277
1278 *szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1279 n2 = mandoc_malloc(*szp);
1280
1281 strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1282 strlcat(n2, arg[i], *szp);
1283 strlcat(n2, cp + 3, *szp);
1284
1285 cp = n2 + (cp - n1);
1286 free(n1);
1287 n1 = n2;
1288 }
1289
1290 /*
1291 * Replace the macro invocation
1292 * by the expanded macro.
1293 */
1294 free(*bufp);
1295 *bufp = n1;
1296 if (0 == *szp)
1297 *szp = strlen(*bufp) + 1;
1298
1299 return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
1300 ROFF_REPARSE : ROFF_APPEND);
1301 }
1302
1303 static char *
1304 roff_getname(struct roff *r, char **cpp, int ln, int pos)
1305 {
1306 char *name, *cp;
1307
1308 name = *cpp;
1309 if ('\0' == *name)
1310 return(name);
1311
1312 /* Read until end of name. */
1313 for (cp = name; '\0' != *cp && ' ' != *cp; cp++) {
1314 if ('\\' != *cp)
1315 continue;
1316 cp++;
1317 if ('\\' == *cp)
1318 continue;
1319 (*r->msg)(MANDOCERR_NAMESC, r->data, ln, pos, NULL);
1320 *cp = '\0';
1321 name = cp;
1322 }
1323
1324 /* Nil-terminate name. */
1325 if ('\0' != *cp)
1326 *(cp++) = '\0';
1327
1328 /* Read past spaces. */
1329 while (' ' == *cp)
1330 cp++;
1331
1332 *cpp = cp;
1333 return(name);
1334 }
1335
1336 /*
1337 * Store *string into the user-defined string called *name.
1338 * In multiline mode, append to an existing entry and append '\n';
1339 * else replace the existing entry, if there is one.
1340 * To clear an existing entry, call with (*r, *name, NULL, 0).
1341 */
1342 static void
1343 roff_setstr(struct roff *r, const char *name, const char *string,
1344 int multiline)
1345 {
1346 struct roffstr *n;
1347 char *c;
1348 size_t oldch, newch;
1349
1350 /* Search for an existing string with the same name. */
1351 n = r->first_string;
1352 while (n && strcmp(name, n->name))
1353 n = n->next;
1354
1355 if (NULL == n) {
1356 /* Create a new string table entry. */
1357 n = mandoc_malloc(sizeof(struct roffstr));
1358 n->name = mandoc_strdup(name);
1359 n->string = NULL;
1360 n->next = r->first_string;
1361 r->first_string = n;
1362 } else if (0 == multiline) {
1363 /* In multiline mode, append; else replace. */
1364 free(n->string);
1365 n->string = NULL;
1366 }
1367
1368 if (NULL == string)
1369 return;
1370
1371 /*
1372 * One additional byte for the '\n' in multiline mode,
1373 * and one for the terminating '\0'.
1374 */
1375 newch = strlen(string) + (multiline ? 2 : 1);
1376 if (NULL == n->string) {
1377 n->string = mandoc_malloc(newch);
1378 *n->string = '\0';
1379 oldch = 0;
1380 } else {
1381 oldch = strlen(n->string);
1382 n->string = mandoc_realloc(n->string, oldch + newch);
1383 }
1384
1385 /* Skip existing content in the destination buffer. */
1386 c = n->string + oldch;
1387
1388 /* Append new content to the destination buffer. */
1389 while (*string) {
1390 /*
1391 * Rudimentary roff copy mode:
1392 * Handle escaped backslashes.
1393 */
1394 if ('\\' == *string && '\\' == *(string + 1))
1395 string++;
1396 *c++ = *string++;
1397 }
1398
1399 /* Append terminating bytes. */
1400 if (multiline)
1401 *c++ = '\n';
1402 *c = '\0';
1403 }
1404
1405 static const char *
1406 roff_getstrn(const struct roff *r, const char *name, size_t len)
1407 {
1408 const struct roffstr *n;
1409
1410 n = r->first_string;
1411 while (n && (strncmp(name, n->name, len) || '\0' != n->name[(int)len]))
1412 n = n->next;
1413
1414 return(n ? n->string : NULL);
1415 }
1416
1417 static void
1418 roff_freestr(struct roff *r)
1419 {
1420 struct roffstr *n, *nn;
1421
1422 for (n = r->first_string; n; n = nn) {
1423 free(n->name);
1424 free(n->string);
1425 nn = n->next;
1426 free(n);
1427 }
1428
1429 r->first_string = NULL;
1430 }
1431
1432 const struct tbl_span *
1433 roff_span(const struct roff *r)
1434 {
1435
1436 return(r->tbl ? tbl_span(r->tbl) : NULL);
1437 }
1438
1439 const struct eqn *
1440 roff_eqn(const struct roff *r)
1441 {
1442
1443 return(r->last_eqn ? &r->last_eqn->eqn : NULL);
1444 }