]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
Fix man.7 to include AT and UC in its syntax table.
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.84 2010/05/24 23:54:18 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <assert.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <stdio.h>
25
26 #include "mandoc.h"
27 #include "roff.h"
28
29 #define RSTACK_MAX 128
30
31 #define ROFF_CTL(c) \
32 ('.' == (c) || '\'' == (c))
33
34 enum rofft {
35 ROFF_am,
36 ROFF_ami,
37 ROFF_am1,
38 ROFF_de,
39 ROFF_dei,
40 ROFF_de1,
41 ROFF_ds,
42 ROFF_el,
43 ROFF_ie,
44 ROFF_if,
45 ROFF_ig,
46 ROFF_rm,
47 ROFF_tr,
48 ROFF_cblock,
49 ROFF_ccond,
50 ROFF_MAX
51 };
52
53 enum roffrule {
54 ROFFRULE_ALLOW,
55 ROFFRULE_DENY
56 };
57
58 struct roff {
59 struct roffnode *last; /* leaf of stack */
60 mandocmsg msg; /* err/warn/fatal messages */
61 void *data; /* privdata for messages */
62 enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */
63 int rstackpos; /* position in rstack */
64 };
65
66 struct roffnode {
67 enum rofft tok; /* type of node */
68 struct roffnode *parent; /* up one in stack */
69 int line; /* parse line */
70 int col; /* parse col */
71 char *end; /* end-rules: custom token */
72 int endspan; /* end-rules: next-line or infty */
73 enum roffrule rule; /* current evaluation rule */
74 };
75
76 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
77 enum rofft tok, /* tok of macro */ \
78 char **bufp, /* input buffer */ \
79 size_t *szp, /* size of input buffer */ \
80 int ln, /* parse line */ \
81 int ppos, /* original pos in buffer */ \
82 int pos, /* current pos in buffer */ \
83 int *offs /* reset offset of buffer data */
84
85 typedef enum rofferr (*roffproc)(ROFF_ARGS);
86
87 struct roffmac {
88 const char *name; /* macro name */
89 roffproc proc; /* process new macro */
90 roffproc text; /* process as child text of macro */
91 roffproc sub; /* process as child of macro */
92 int flags;
93 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
94 };
95
96 static enum rofferr roff_block(ROFF_ARGS);
97 static enum rofferr roff_block_text(ROFF_ARGS);
98 static enum rofferr roff_block_sub(ROFF_ARGS);
99 static enum rofferr roff_cblock(ROFF_ARGS);
100 static enum rofferr roff_ccond(ROFF_ARGS);
101 static enum rofferr roff_cond(ROFF_ARGS);
102 static enum rofferr roff_cond_text(ROFF_ARGS);
103 static enum rofferr roff_cond_sub(ROFF_ARGS);
104 static enum rofferr roff_line(ROFF_ARGS);
105
106 const struct roffmac roffs[ROFF_MAX] = {
107 { "am", roff_block, roff_block_text, roff_block_sub, 0 },
108 { "ami", roff_block, roff_block_text, roff_block_sub, 0 },
109 { "am1", roff_block, roff_block_text, roff_block_sub, 0 },
110 { "de", roff_block, roff_block_text, roff_block_sub, 0 },
111 { "dei", roff_block, roff_block_text, roff_block_sub, 0 },
112 { "de1", roff_block, roff_block_text, roff_block_sub, 0 },
113 { "ds", roff_line, NULL, NULL, 0 },
114 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },
115 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },
116 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },
117 { "ig", roff_block, roff_block_text, roff_block_sub, 0 },
118 { "rm", roff_line, NULL, NULL, 0 },
119 { "tr", roff_line, NULL, NULL, 0 },
120 { ".", roff_cblock, NULL, NULL, 0 },
121 { "\\}", roff_ccond, NULL, NULL, 0 },
122 };
123
124 static void roff_free1(struct roff *);
125 static enum rofft roff_hash_find(const char *);
126 static void roffnode_cleanscope(struct roff *);
127 static int roffnode_push(struct roff *,
128 enum rofft, int, int);
129 static void roffnode_pop(struct roff *);
130 static enum rofft roff_parse(const char *, int *);
131
132
133 /*
134 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
135 * the nil-terminated string name could be found.
136 */
137 static enum rofft
138 roff_hash_find(const char *p)
139 {
140 int i;
141
142 /* FIXME: make this be fast and efficient. */
143
144 for (i = 0; i < (int)ROFF_MAX; i++)
145 if (0 == strcmp(roffs[i].name, p))
146 return((enum rofft)i);
147
148 return(ROFF_MAX);
149 }
150
151
152 /*
153 * Pop the current node off of the stack of roff instructions currently
154 * pending.
155 */
156 static void
157 roffnode_pop(struct roff *r)
158 {
159 struct roffnode *p;
160
161 assert(r->last);
162 p = r->last;
163
164 if (ROFF_el == p->tok)
165 if (r->rstackpos > -1)
166 r->rstackpos--;
167
168 r->last = r->last->parent;
169 if (p->end)
170 free(p->end);
171 free(p);
172 }
173
174
175 /*
176 * Push a roff node onto the instruction stack. This must later be
177 * removed with roffnode_pop().
178 */
179 static int
180 roffnode_push(struct roff *r, enum rofft tok, int line, int col)
181 {
182 struct roffnode *p;
183
184 if (NULL == (p = calloc(1, sizeof(struct roffnode)))) {
185 (*r->msg)(MANDOCERR_MEM, r->data, line, col, NULL);
186 return(0);
187 }
188
189 p->tok = tok;
190 p->parent = r->last;
191 p->line = line;
192 p->col = col;
193 p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
194
195 r->last = p;
196 return(1);
197 }
198
199
200 static void
201 roff_free1(struct roff *r)
202 {
203
204 while (r->last)
205 roffnode_pop(r);
206 }
207
208
209 void
210 roff_reset(struct roff *r)
211 {
212
213 roff_free1(r);
214 }
215
216
217 void
218 roff_free(struct roff *r)
219 {
220
221 roff_free1(r);
222 free(r);
223 }
224
225
226 struct roff *
227 roff_alloc(const mandocmsg msg, void *data)
228 {
229 struct roff *r;
230
231 if (NULL == (r = calloc(1, sizeof(struct roff)))) {
232 (*msg)(MANDOCERR_MEM, data, 0, 0, NULL);
233 return(0);
234 }
235
236 r->msg = msg;
237 r->data = data;
238 r->rstackpos = -1;
239 return(r);
240 }
241
242
243 enum rofferr
244 roff_parseln(struct roff *r, int ln,
245 char **bufp, size_t *szp, int pos, int *offs)
246 {
247 enum rofft t;
248 int ppos;
249
250 /*
251 * First, if a scope is open and we're not a macro, pass the
252 * text through the macro's filter. If a scope isn't open and
253 * we're not a macro, just let it through.
254 */
255
256 if (r->last && ! ROFF_CTL((*bufp)[pos])) {
257 t = r->last->tok;
258 assert(roffs[t].text);
259 return((*roffs[t].text)
260 (r, t, bufp, szp, ln, pos, pos, offs));
261 } else if ( ! ROFF_CTL((*bufp)[pos]))
262 return(ROFF_CONT);
263
264 /*
265 * If a scope is open, go to the child handler for that macro,
266 * as it may want to preprocess before doing anything with it.
267 */
268
269 if (r->last) {
270 t = r->last->tok;
271 assert(roffs[t].sub);
272 return((*roffs[t].sub)
273 (r, t, bufp, szp, ln, pos, pos, offs));
274 }
275
276 /*
277 * Lastly, as we've no scope open, try to look up and execute
278 * the new macro. If no macro is found, simply return and let
279 * the compilers handle it.
280 */
281
282 ppos = pos;
283 if (ROFF_MAX == (t = roff_parse(*bufp, &pos)))
284 return(ROFF_CONT);
285
286 assert(roffs[t].proc);
287 return((*roffs[t].proc)
288 (r, t, bufp, szp, ln, ppos, pos, offs));
289 }
290
291
292 int
293 roff_endparse(struct roff *r)
294 {
295
296 if (NULL == r->last)
297 return(1);
298 return((*r->msg)(MANDOCERR_SCOPEEXIT, r->data, r->last->line,
299 r->last->col, NULL));
300 }
301
302
303 /*
304 * Parse a roff node's type from the input buffer. This must be in the
305 * form of ".foo xxx" in the usual way.
306 */
307 static enum rofft
308 roff_parse(const char *buf, int *pos)
309 {
310 int j;
311 char mac[5];
312 enum rofft t;
313
314 assert(ROFF_CTL(buf[*pos]));
315 (*pos)++;
316
317 while (buf[*pos] && (' ' == buf[*pos] || '\t' == buf[*pos]))
318 (*pos)++;
319
320 if ('\0' == buf[*pos])
321 return(ROFF_MAX);
322
323 for (j = 0; j < 4; j++, (*pos)++)
324 if ('\0' == (mac[j] = buf[*pos]))
325 break;
326 else if (' ' == buf[*pos] || (j && '\\' == buf[*pos]))
327 break;
328
329 if (j == 4 || j < 1)
330 return(ROFF_MAX);
331
332 mac[j] = '\0';
333
334 if (ROFF_MAX == (t = roff_hash_find(mac)))
335 return(t);
336
337 while (buf[*pos] && ' ' == buf[*pos])
338 (*pos)++;
339
340 return(t);
341 }
342
343
344 /* ARGSUSED */
345 static enum rofferr
346 roff_cblock(ROFF_ARGS)
347 {
348
349 /*
350 * A block-close `..' should only be invoked as a child of an
351 * ignore macro, otherwise raise a warning and just ignore it.
352 */
353
354 if (NULL == r->last) {
355 if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
356 return(ROFF_ERR);
357 return(ROFF_IGN);
358 }
359
360 switch (r->last->tok) {
361 case (ROFF_am):
362 /* FALLTHROUGH */
363 case (ROFF_ami):
364 /* FALLTHROUGH */
365 case (ROFF_am1):
366 /* FALLTHROUGH */
367 case (ROFF_de):
368 /* FALLTHROUGH */
369 case (ROFF_dei):
370 /* FALLTHROUGH */
371 case (ROFF_de1):
372 /* FALLTHROUGH */
373 case (ROFF_ig):
374 break;
375 default:
376 if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
377 return(ROFF_ERR);
378 return(ROFF_IGN);
379 }
380
381 if ((*bufp)[pos])
382 if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
383 return(ROFF_ERR);
384
385 roffnode_pop(r);
386 roffnode_cleanscope(r);
387 return(ROFF_IGN);
388
389 }
390
391
392 static void
393 roffnode_cleanscope(struct roff *r)
394 {
395
396 while (r->last) {
397 if (--r->last->endspan < 0)
398 break;
399 roffnode_pop(r);
400 }
401 }
402
403
404 /* ARGSUSED */
405 static enum rofferr
406 roff_ccond(ROFF_ARGS)
407 {
408
409 if (NULL == r->last) {
410 if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
411 return(ROFF_ERR);
412 return(ROFF_IGN);
413 }
414
415 switch (r->last->tok) {
416 case (ROFF_el):
417 /* FALLTHROUGH */
418 case (ROFF_ie):
419 /* FALLTHROUGH */
420 case (ROFF_if):
421 break;
422 default:
423 if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
424 return(ROFF_ERR);
425 return(ROFF_IGN);
426 }
427
428 if (r->last->endspan > -1) {
429 if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
430 return(ROFF_ERR);
431 return(ROFF_IGN);
432 }
433
434 if ((*bufp)[pos])
435 if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
436 return(ROFF_ERR);
437
438 roffnode_pop(r);
439 roffnode_cleanscope(r);
440 return(ROFF_IGN);
441 }
442
443
444 /* ARGSUSED */
445 static enum rofferr
446 roff_block(ROFF_ARGS)
447 {
448 int sv;
449 size_t sz;
450
451 if (ROFF_ig != tok && '\0' == (*bufp)[pos]) {
452 if ( ! (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL))
453 return(ROFF_ERR);
454 return(ROFF_IGN);
455 } else if (ROFF_ig != tok) {
456 while ((*bufp)[pos] && ' ' != (*bufp)[pos])
457 pos++;
458 while (' ' == (*bufp)[pos])
459 pos++;
460 }
461
462 if ( ! roffnode_push(r, tok, ln, ppos))
463 return(ROFF_ERR);
464
465 if ('\0' == (*bufp)[pos])
466 return(ROFF_IGN);
467
468 sv = pos;
469 while ((*bufp)[pos] && ' ' != (*bufp)[pos] &&
470 '\t' != (*bufp)[pos])
471 pos++;
472
473 /*
474 * Note: groff does NOT like escape characters in the input.
475 * Instead of detecting this, we're just going to let it fly and
476 * to hell with it.
477 */
478
479 assert(pos > sv);
480 sz = (size_t)(pos - sv);
481
482 if (1 == sz && '.' == (*bufp)[sv])
483 return(ROFF_IGN);
484
485 r->last->end = malloc(sz + 1);
486
487 if (NULL == r->last->end) {
488 (*r->msg)(MANDOCERR_MEM, r->data, ln, pos, NULL);
489 return(ROFF_ERR);
490 }
491
492 memcpy(r->last->end, *bufp + sv, sz);
493 r->last->end[(int)sz] = '\0';
494
495 if ((*bufp)[pos])
496 if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
497 return(ROFF_ERR);
498
499 return(ROFF_IGN);
500 }
501
502
503 /* ARGSUSED */
504 static enum rofferr
505 roff_block_sub(ROFF_ARGS)
506 {
507 enum rofft t;
508 int i, j;
509
510 /*
511 * First check whether a custom macro exists at this level. If
512 * it does, then check against it. This is some of groff's
513 * stranger behaviours. If we encountered a custom end-scope
514 * tag and that tag also happens to be a "real" macro, then we
515 * need to try interpreting it again as a real macro. If it's
516 * not, then return ignore. Else continue.
517 */
518
519 if (r->last->end) {
520 i = pos + 1;
521 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
522 i++;
523
524 for (j = 0; r->last->end[j]; j++, i++)
525 if ((*bufp)[i] != r->last->end[j])
526 break;
527
528 if ('\0' == r->last->end[j] &&
529 ('\0' == (*bufp)[i] ||
530 ' ' == (*bufp)[i] ||
531 '\t' == (*bufp)[i])) {
532 roffnode_pop(r);
533 roffnode_cleanscope(r);
534
535 if (ROFF_MAX != roff_parse(*bufp, &pos))
536 return(ROFF_RERUN);
537 return(ROFF_IGN);
538 }
539 }
540
541 /*
542 * If we have no custom end-query or lookup failed, then try
543 * pulling it out of the hashtable.
544 */
545
546 ppos = pos;
547 t = roff_parse(*bufp, &pos);
548
549 /* If we're not a comment-end, then throw it away. */
550 if (ROFF_cblock != t)
551 return(ROFF_IGN);
552
553 assert(roffs[t].proc);
554 return((*roffs[t].proc)(r, t, bufp,
555 szp, ln, ppos, pos, offs));
556 }
557
558
559 /* ARGSUSED */
560 static enum rofferr
561 roff_block_text(ROFF_ARGS)
562 {
563
564 return(ROFF_IGN);
565 }
566
567
568 /* ARGSUSED */
569 static enum rofferr
570 roff_cond_sub(ROFF_ARGS)
571 {
572 enum rofft t;
573 enum roffrule rr;
574
575 ppos = pos;
576 rr = r->last->rule;
577
578 roff_cond_text(r, tok, bufp, szp, ln, ppos, pos, offs);
579
580 if (ROFF_MAX == (t = roff_parse(*bufp, &pos)))
581 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
582
583 /*
584 * A denied conditional must evaluate its children if and only
585 * if they're either structurally required (such as loops and
586 * conditionals) or a closing macro.
587 */
588 if (ROFFRULE_DENY == rr)
589 if ( ! (ROFFMAC_STRUCT & roffs[t].flags))
590 if (ROFF_ccond != t)
591 return(ROFF_IGN);
592
593 assert(roffs[t].proc);
594 return((*roffs[t].proc)
595 (r, t, bufp, szp, ln, ppos, pos, offs));
596 }
597
598
599 /* ARGSUSED */
600 static enum rofferr
601 roff_cond_text(ROFF_ARGS)
602 {
603 char *ep, *st;
604 enum roffrule rr;
605
606 rr = r->last->rule;
607
608 /*
609 * We display the value of the text if out current evaluation
610 * scope permits us to do so.
611 */
612
613 st = &(*bufp)[pos];
614 if (NULL == (ep = strstr(st, "\\}"))) {
615 roffnode_cleanscope(r);
616 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
617 }
618
619 if (ep > st && '\\' != *(ep - 1)) {
620 ep = '\0';
621 roffnode_pop(r);
622 }
623
624 roffnode_cleanscope(r);
625 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
626 }
627
628
629 /* ARGSUSED */
630 static enum rofferr
631 roff_cond(ROFF_ARGS)
632 {
633 int cpos; /* position of the condition */
634 int sv;
635
636 /* Stack overflow! */
637
638 if (ROFF_ie == tok && r->rstackpos == RSTACK_MAX - 1) {
639 (*r->msg)(MANDOCERR_MEM, r->data, ln, ppos, NULL);
640 return(ROFF_ERR);
641 }
642
643 cpos = pos;
644
645 if (ROFF_if == tok || ROFF_ie == tok) {
646 /*
647 * Read ahead past the conditional. FIXME: this does
648 * not work, as conditionals don't end on whitespace,
649 * but are parsed according to a formal grammar. It's
650 * good enough for now, however.
651 */
652 while ((*bufp)[pos] && ' ' != (*bufp)[pos])
653 pos++;
654 }
655
656 sv = pos;
657 while (' ' == (*bufp)[pos])
658 pos++;
659
660 /*
661 * Roff is weird. If we have just white-space after the
662 * conditional, it's considered the BODY and we exit without
663 * really doing anything. Warn about this. It's probably
664 * wrong.
665 */
666 if ('\0' == (*bufp)[pos] && sv != pos) {
667 if ( ! (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL))
668 return(ROFF_ERR);
669 return(ROFF_IGN);
670 }
671
672 if ( ! roffnode_push(r, tok, ln, ppos))
673 return(ROFF_ERR);
674
675 /* XXX: Implement more conditionals. */
676
677 if (ROFF_if == tok || ROFF_ie == tok)
678 r->last->rule = 'n' == (*bufp)[cpos] ?
679 ROFFRULE_ALLOW : ROFFRULE_DENY;
680 else if (ROFF_el == tok) {
681 /*
682 * An `.el' will get the value of the current rstack
683 * entry set in prior `ie' calls or defaults to DENY.
684 */
685 if (r->rstackpos < 0)
686 r->last->rule = ROFFRULE_DENY;
687 else
688 r->last->rule = r->rstack[r->rstackpos];
689 }
690 if (ROFF_ie == tok) {
691 /*
692 * An if-else will put the NEGATION of the current
693 * evaluated conditional into the stack.
694 */
695 r->rstackpos++;
696 if (ROFFRULE_DENY == r->last->rule)
697 r->rstack[r->rstackpos] = ROFFRULE_ALLOW;
698 else
699 r->rstack[r->rstackpos] = ROFFRULE_DENY;
700 }
701 if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule)
702 r->last->rule = ROFFRULE_DENY;
703
704 r->last->endspan = 1;
705
706 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
707 r->last->endspan = -1;
708 pos += 2;
709 }
710
711 /*
712 * If there are no arguments on the line, the next-line scope is
713 * assumed.
714 */
715
716 if ('\0' == (*bufp)[pos])
717 return(ROFF_IGN);
718
719 /* Otherwise re-run the roff parser after recalculating. */
720
721 *offs = pos;
722 return(ROFF_RERUN);
723 }
724
725
726 /* ARGSUSED */
727 static enum rofferr
728 roff_line(ROFF_ARGS)
729 {
730
731 return(ROFF_IGN);
732 }