]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
Use a default prefix of /usr/local. Honour DESTDIR for install targets.
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.82 2010/05/17 02:01:05 kristaps Exp $ */
2 /*
3 * Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <assert.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <stdio.h>
25
26 #include "mandoc.h"
27 #include "roff.h"
28
29 #define RSTACK_MAX 128
30
31 #define ROFF_CTL(c) \
32 ('.' == (c) || '\'' == (c))
33
34 enum rofft {
35 ROFF_am,
36 ROFF_ami,
37 ROFF_am1,
38 ROFF_de,
39 ROFF_dei,
40 ROFF_de1,
41 ROFF_el,
42 ROFF_ie,
43 ROFF_if,
44 ROFF_ig,
45 ROFF_cblock,
46 ROFF_ccond,
47 ROFF_MAX
48 };
49
50 enum roffrule {
51 ROFFRULE_ALLOW,
52 ROFFRULE_DENY
53 };
54
55 struct roff {
56 struct roffnode *last; /* leaf of stack */
57 mandocmsg msg; /* err/warn/fatal messages */
58 void *data; /* privdata for messages */
59 enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */
60 int rstackpos; /* position in rstack */
61 };
62
63 struct roffnode {
64 enum rofft tok; /* type of node */
65 struct roffnode *parent; /* up one in stack */
66 int line; /* parse line */
67 int col; /* parse col */
68 char *end; /* end-rules: custom token */
69 int endspan; /* end-rules: next-line or infty */
70 enum roffrule rule; /* current evaluation rule */
71 };
72
73 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
74 enum rofft tok, /* tok of macro */ \
75 char **bufp, /* input buffer */ \
76 size_t *szp, /* size of input buffer */ \
77 int ln, /* parse line */ \
78 int ppos, /* original pos in buffer */ \
79 int pos, /* current pos in buffer */ \
80 int *offs /* reset offset of buffer data */
81
82 typedef enum rofferr (*roffproc)(ROFF_ARGS);
83
84 struct roffmac {
85 const char *name; /* macro name */
86 roffproc proc; /* process new macro */
87 roffproc text; /* process as child text of macro */
88 roffproc sub; /* process as child of macro */
89 int flags;
90 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
91 };
92
93 static enum rofferr roff_block(ROFF_ARGS);
94 static enum rofferr roff_block_text(ROFF_ARGS);
95 static enum rofferr roff_block_sub(ROFF_ARGS);
96 static enum rofferr roff_cblock(ROFF_ARGS);
97 static enum rofferr roff_ccond(ROFF_ARGS);
98 static enum rofferr roff_cond(ROFF_ARGS);
99 static enum rofferr roff_cond_text(ROFF_ARGS);
100 static enum rofferr roff_cond_sub(ROFF_ARGS);
101
102 const struct roffmac roffs[ROFF_MAX] = {
103 { "am", roff_block, roff_block_text, roff_block_sub, 0 },
104 { "ami", roff_block, roff_block_text, roff_block_sub, 0 },
105 { "am1", roff_block, roff_block_text, roff_block_sub, 0 },
106 { "de", roff_block, roff_block_text, roff_block_sub, 0 },
107 { "dei", roff_block, roff_block_text, roff_block_sub, 0 },
108 { "de1", roff_block, roff_block_text, roff_block_sub, 0 },
109 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },
110 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },
111 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },
112 { "ig", roff_block, roff_block_text, roff_block_sub, 0 },
113 { ".", roff_cblock, NULL, NULL, 0 },
114 { "\\}", roff_ccond, NULL, NULL, 0 },
115 };
116
117 static void roff_free1(struct roff *);
118 static enum rofft roff_hash_find(const char *);
119 static void roffnode_cleanscope(struct roff *);
120 static int roffnode_push(struct roff *,
121 enum rofft, int, int);
122 static void roffnode_pop(struct roff *);
123 static enum rofft roff_parse(const char *, int *);
124
125
126 /*
127 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
128 * the nil-terminated string name could be found.
129 */
130 static enum rofft
131 roff_hash_find(const char *p)
132 {
133 int i;
134
135 /* FIXME: make this be fast and efficient. */
136
137 for (i = 0; i < (int)ROFF_MAX; i++)
138 if (0 == strcmp(roffs[i].name, p))
139 return((enum rofft)i);
140
141 return(ROFF_MAX);
142 }
143
144
145 /*
146 * Pop the current node off of the stack of roff instructions currently
147 * pending.
148 */
149 static void
150 roffnode_pop(struct roff *r)
151 {
152 struct roffnode *p;
153
154 assert(r->last);
155 p = r->last;
156
157 if (ROFF_el == p->tok)
158 if (r->rstackpos > -1)
159 r->rstackpos--;
160
161 r->last = r->last->parent;
162 if (p->end)
163 free(p->end);
164 free(p);
165 }
166
167
168 /*
169 * Push a roff node onto the instruction stack. This must later be
170 * removed with roffnode_pop().
171 */
172 static int
173 roffnode_push(struct roff *r, enum rofft tok, int line, int col)
174 {
175 struct roffnode *p;
176
177 if (NULL == (p = calloc(1, sizeof(struct roffnode)))) {
178 (*r->msg)(MANDOCERR_MEM, r->data, line, col, NULL);
179 return(0);
180 }
181
182 p->tok = tok;
183 p->parent = r->last;
184 p->line = line;
185 p->col = col;
186 p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
187
188 r->last = p;
189 return(1);
190 }
191
192
193 static void
194 roff_free1(struct roff *r)
195 {
196
197 while (r->last)
198 roffnode_pop(r);
199 }
200
201
202 void
203 roff_reset(struct roff *r)
204 {
205
206 roff_free1(r);
207 }
208
209
210 void
211 roff_free(struct roff *r)
212 {
213
214 roff_free1(r);
215 free(r);
216 }
217
218
219 struct roff *
220 roff_alloc(const mandocmsg msg, void *data)
221 {
222 struct roff *r;
223
224 if (NULL == (r = calloc(1, sizeof(struct roff)))) {
225 (*msg)(MANDOCERR_MEM, data, 0, 0, NULL);
226 return(0);
227 }
228
229 r->msg = msg;
230 r->data = data;
231 r->rstackpos = -1;
232 return(r);
233 }
234
235
236 enum rofferr
237 roff_parseln(struct roff *r, int ln,
238 char **bufp, size_t *szp, int pos, int *offs)
239 {
240 enum rofft t;
241 int ppos;
242
243 /*
244 * First, if a scope is open and we're not a macro, pass the
245 * text through the macro's filter. If a scope isn't open and
246 * we're not a macro, just let it through.
247 */
248
249 if (r->last && ! ROFF_CTL((*bufp)[pos])) {
250 t = r->last->tok;
251 assert(roffs[t].text);
252 return((*roffs[t].text)
253 (r, t, bufp, szp, ln, pos, pos, offs));
254 } else if ( ! ROFF_CTL((*bufp)[pos]))
255 return(ROFF_CONT);
256
257 /*
258 * If a scope is open, go to the child handler for that macro,
259 * as it may want to preprocess before doing anything with it.
260 */
261
262 if (r->last) {
263 t = r->last->tok;
264 assert(roffs[t].sub);
265 return((*roffs[t].sub)
266 (r, t, bufp, szp, ln, pos, pos, offs));
267 }
268
269 /*
270 * Lastly, as we've no scope open, try to look up and execute
271 * the new macro. If no macro is found, simply return and let
272 * the compilers handle it.
273 */
274
275 ppos = pos;
276 if (ROFF_MAX == (t = roff_parse(*bufp, &pos)))
277 return(ROFF_CONT);
278
279 assert(roffs[t].proc);
280 return((*roffs[t].proc)
281 (r, t, bufp, szp, ln, ppos, pos, offs));
282 }
283
284
285 int
286 roff_endparse(struct roff *r)
287 {
288
289 if (NULL == r->last)
290 return(1);
291 return((*r->msg)(MANDOCERR_SCOPEEXIT, r->data, r->last->line,
292 r->last->col, NULL));
293 }
294
295
296 /*
297 * Parse a roff node's type from the input buffer. This must be in the
298 * form of ".foo xxx" in the usual way.
299 */
300 static enum rofft
301 roff_parse(const char *buf, int *pos)
302 {
303 int j;
304 char mac[5];
305 enum rofft t;
306
307 assert(ROFF_CTL(buf[*pos]));
308 (*pos)++;
309
310 while (buf[*pos] && (' ' == buf[*pos] || '\t' == buf[*pos]))
311 (*pos)++;
312
313 if ('\0' == buf[*pos])
314 return(ROFF_MAX);
315
316 for (j = 0; j < 4; j++, (*pos)++)
317 if ('\0' == (mac[j] = buf[*pos]))
318 break;
319 else if (' ' == buf[*pos] || (j && '\\' == buf[*pos]))
320 break;
321
322 if (j == 4 || j < 1)
323 return(ROFF_MAX);
324
325 mac[j] = '\0';
326
327 if (ROFF_MAX == (t = roff_hash_find(mac)))
328 return(t);
329
330 while (buf[*pos] && ' ' == buf[*pos])
331 (*pos)++;
332
333 return(t);
334 }
335
336
337 /* ARGSUSED */
338 static enum rofferr
339 roff_cblock(ROFF_ARGS)
340 {
341
342 /*
343 * A block-close `..' should only be invoked as a child of an
344 * ignore macro, otherwise raise a warning and just ignore it.
345 */
346
347 if (NULL == r->last) {
348 if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
349 return(ROFF_ERR);
350 return(ROFF_IGN);
351 }
352
353 switch (r->last->tok) {
354 case (ROFF_am):
355 /* FALLTHROUGH */
356 case (ROFF_ami):
357 /* FALLTHROUGH */
358 case (ROFF_am1):
359 /* FALLTHROUGH */
360 case (ROFF_de):
361 /* FALLTHROUGH */
362 case (ROFF_dei):
363 /* FALLTHROUGH */
364 case (ROFF_de1):
365 /* FALLTHROUGH */
366 case (ROFF_ig):
367 break;
368 default:
369 if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
370 return(ROFF_ERR);
371 return(ROFF_IGN);
372 }
373
374 if ((*bufp)[pos])
375 if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
376 return(ROFF_ERR);
377
378 roffnode_pop(r);
379 roffnode_cleanscope(r);
380 return(ROFF_IGN);
381
382 }
383
384
385 static void
386 roffnode_cleanscope(struct roff *r)
387 {
388
389 while (r->last) {
390 if (--r->last->endspan < 0)
391 break;
392 roffnode_pop(r);
393 }
394 }
395
396
397 /* ARGSUSED */
398 static enum rofferr
399 roff_ccond(ROFF_ARGS)
400 {
401
402 if (NULL == r->last) {
403 if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
404 return(ROFF_ERR);
405 return(ROFF_IGN);
406 }
407
408 switch (r->last->tok) {
409 case (ROFF_el):
410 /* FALLTHROUGH */
411 case (ROFF_ie):
412 /* FALLTHROUGH */
413 case (ROFF_if):
414 break;
415 default:
416 if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
417 return(ROFF_ERR);
418 return(ROFF_IGN);
419 }
420
421 if (r->last->endspan > -1) {
422 if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
423 return(ROFF_ERR);
424 return(ROFF_IGN);
425 }
426
427 if ((*bufp)[pos])
428 if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
429 return(ROFF_ERR);
430
431 roffnode_pop(r);
432 roffnode_cleanscope(r);
433 return(ROFF_IGN);
434 }
435
436
437 /* ARGSUSED */
438 static enum rofferr
439 roff_block(ROFF_ARGS)
440 {
441 int sv;
442 size_t sz;
443
444 if (ROFF_ig != tok && '\0' == (*bufp)[pos]) {
445 if ( ! (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL))
446 return(ROFF_ERR);
447 return(ROFF_IGN);
448 } else if (ROFF_ig != tok) {
449 while ((*bufp)[pos] && ' ' != (*bufp)[pos])
450 pos++;
451 while (' ' == (*bufp)[pos])
452 pos++;
453 }
454
455 if ( ! roffnode_push(r, tok, ln, ppos))
456 return(ROFF_ERR);
457
458 if ('\0' == (*bufp)[pos])
459 return(ROFF_IGN);
460
461 sv = pos;
462 while ((*bufp)[pos] && ' ' != (*bufp)[pos] &&
463 '\t' != (*bufp)[pos])
464 pos++;
465
466 /*
467 * Note: groff does NOT like escape characters in the input.
468 * Instead of detecting this, we're just going to let it fly and
469 * to hell with it.
470 */
471
472 assert(pos > sv);
473 sz = (size_t)(pos - sv);
474
475 if (1 == sz && '.' == (*bufp)[sv])
476 return(ROFF_IGN);
477
478 r->last->end = malloc(sz + 1);
479
480 if (NULL == r->last->end) {
481 (*r->msg)(MANDOCERR_MEM, r->data, ln, pos, NULL);
482 return(ROFF_ERR);
483 }
484
485 memcpy(r->last->end, *bufp + sv, sz);
486 r->last->end[(int)sz] = '\0';
487
488 if ((*bufp)[pos])
489 if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
490 return(ROFF_ERR);
491
492 return(ROFF_IGN);
493 }
494
495
496 /* ARGSUSED */
497 static enum rofferr
498 roff_block_sub(ROFF_ARGS)
499 {
500 enum rofft t;
501 int i, j;
502
503 /*
504 * First check whether a custom macro exists at this level. If
505 * it does, then check against it. This is some of groff's
506 * stranger behaviours. If we encountered a custom end-scope
507 * tag and that tag also happens to be a "real" macro, then we
508 * need to try interpreting it again as a real macro. If it's
509 * not, then return ignore. Else continue.
510 */
511
512 if (r->last->end) {
513 i = pos + 1;
514 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
515 i++;
516
517 for (j = 0; r->last->end[j]; j++, i++)
518 if ((*bufp)[i] != r->last->end[j])
519 break;
520
521 if ('\0' == r->last->end[j] &&
522 ('\0' == (*bufp)[i] ||
523 ' ' == (*bufp)[i] ||
524 '\t' == (*bufp)[i])) {
525 roffnode_pop(r);
526 roffnode_cleanscope(r);
527
528 if (ROFF_MAX != roff_parse(*bufp, &pos))
529 return(ROFF_RERUN);
530 return(ROFF_IGN);
531 }
532 }
533
534 /*
535 * If we have no custom end-query or lookup failed, then try
536 * pulling it out of the hashtable.
537 */
538
539 ppos = pos;
540 t = roff_parse(*bufp, &pos);
541
542 /* If we're not a comment-end, then throw it away. */
543 if (ROFF_cblock != t)
544 return(ROFF_IGN);
545
546 assert(roffs[t].proc);
547 return((*roffs[t].proc)(r, t, bufp,
548 szp, ln, ppos, pos, offs));
549 }
550
551
552 /* ARGSUSED */
553 static enum rofferr
554 roff_block_text(ROFF_ARGS)
555 {
556
557 return(ROFF_IGN);
558 }
559
560
561 /* ARGSUSED */
562 static enum rofferr
563 roff_cond_sub(ROFF_ARGS)
564 {
565 enum rofft t;
566 enum roffrule rr;
567
568 ppos = pos;
569 rr = r->last->rule;
570
571 roffnode_cleanscope(r);
572
573 if (ROFF_MAX == (t = roff_parse(*bufp, &pos)))
574 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
575
576 /*
577 * A denied conditional must evaluate its children if and only
578 * if they're either structurally required (such as loops and
579 * conditionals) or a closing macro.
580 */
581 if (ROFFRULE_DENY == rr)
582 if ( ! (ROFFMAC_STRUCT & roffs[t].flags))
583 if (ROFF_ccond != t)
584 return(ROFF_IGN);
585
586 assert(roffs[t].proc);
587 return((*roffs[t].proc)
588 (r, t, bufp, szp, ln, ppos, pos, offs));
589 }
590
591
592 /* ARGSUSED */
593 static enum rofferr
594 roff_cond_text(ROFF_ARGS)
595 {
596 char *ep, *st;
597 enum roffrule rr;
598
599 rr = r->last->rule;
600
601 /*
602 * We display the value of the text if out current evaluation
603 * scope permits us to do so.
604 */
605
606 st = &(*bufp)[pos];
607 if (NULL == (ep = strstr(st, "\\}"))) {
608 roffnode_cleanscope(r);
609 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
610 }
611
612 if (ep > st && '\\' != *(ep - 1))
613 roffnode_pop(r);
614
615 roffnode_cleanscope(r);
616 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
617 }
618
619
620 /* ARGSUSED */
621 static enum rofferr
622 roff_cond(ROFF_ARGS)
623 {
624 int sv;
625
626 /* Stack overflow! */
627
628 if (ROFF_ie == tok && r->rstackpos == RSTACK_MAX - 1) {
629 (*r->msg)(MANDOCERR_MEM, r->data, ln, ppos, NULL);
630 return(ROFF_ERR);
631 }
632
633 if (ROFF_if == tok || ROFF_ie == tok) {
634 /*
635 * Read ahead past the conditional. FIXME: this does
636 * not work, as conditionals don't end on whitespace,
637 * but are parsed according to a formal grammar. It's
638 * good enough for now, however.
639 */
640 while ((*bufp)[pos] && ' ' != (*bufp)[pos])
641 pos++;
642 }
643
644 sv = pos;
645 while (' ' == (*bufp)[pos])
646 pos++;
647
648 /*
649 * Roff is weird. If we have just white-space after the
650 * conditional, it's considered the BODY and we exit without
651 * really doing anything. Warn about this. It's probably
652 * wrong.
653 */
654 if ('\0' == (*bufp)[pos] && sv != pos) {
655 if ( ! (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL))
656 return(ROFF_ERR);
657 return(ROFF_IGN);
658 }
659
660 if ( ! roffnode_push(r, tok, ln, ppos))
661 return(ROFF_ERR);
662
663 /* TODO: here we would evaluate the conditional. */
664
665 if (ROFF_el == tok) {
666 /*
667 * An `.el' will get the value of the current rstack
668 * entry set in prior `ie' calls or defaults to DENY.
669 */
670 if (r->rstackpos < 0)
671 r->last->rule = ROFFRULE_DENY;
672 else
673 r->last->rule = r->rstack[r->rstackpos];
674 } else if (ROFF_ie == tok) {
675 /*
676 * An if-else will put the NEGATION of the current
677 * evaluated conditional into the stack.
678 */
679 r->rstackpos++;
680 if (ROFFRULE_DENY == r->last->rule)
681 r->rstack[r->rstackpos] = ROFFRULE_ALLOW;
682 else
683 r->rstack[r->rstackpos] = ROFFRULE_DENY;
684 }
685
686 r->last->endspan = 1;
687
688 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
689 r->last->endspan = -1;
690 pos += 2;
691 }
692
693 /*
694 * If there are no arguments on the line, the next-line scope is
695 * assumed.
696 */
697
698 if ('\0' == (*bufp)[pos])
699 return(ROFF_IGN);
700
701 /* Otherwise re-run the roff parser after recalculating. */
702
703 *offs = pos;
704 return(ROFF_RERUN);
705 }