]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
Oops: forgot to make roff_cblock() understand the new macros.
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.81 2010/05/17 00:46:35 kristaps Exp $ */
2 /*
3 * Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <assert.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <stdio.h>
25
26 #include "mandoc.h"
27 #include "roff.h"
28
29 #define ROFF_CTL(c) \
30 ('.' == (c) || '\'' == (c))
31
32 enum rofft {
33 ROFF_am,
34 ROFF_ami,
35 ROFF_am1,
36 ROFF_de,
37 ROFF_dei,
38 ROFF_de1,
39 ROFF_if,
40 ROFF_ig,
41 ROFF_cblock,
42 ROFF_ccond,
43 #if 0
44 ROFF_ie,
45 ROFF_el,
46 #endif
47 ROFF_MAX
48 };
49
50 struct roff {
51 struct roffnode *last; /* leaf of stack */
52 mandocmsg msg; /* err/warn/fatal messages */
53 void *data; /* privdata for messages */
54 };
55
56 enum roffrule {
57 ROFFRULE_ALLOW,
58 ROFFRULE_DENY
59 };
60
61 struct roffnode {
62 enum rofft tok; /* type of node */
63 struct roffnode *parent; /* up one in stack */
64 int line; /* parse line */
65 int col; /* parse col */
66 char *end; /* end-rules: custom token */
67 int endspan; /* end-rules: next-line or infty */
68 enum roffrule rule;
69 };
70
71 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
72 enum rofft tok, /* tok of macro */ \
73 char **bufp, /* input buffer */ \
74 size_t *szp, /* size of input buffer */ \
75 int ln, /* parse line */ \
76 int ppos, /* original pos in buffer */ \
77 int pos, /* current pos in buffer */ \
78 int *offs /* reset offset of buffer data */
79
80 typedef enum rofferr (*roffproc)(ROFF_ARGS);
81
82 struct roffmac {
83 const char *name; /* macro name */
84 roffproc proc; /* process new macro */
85 roffproc text; /* process as child text of macro */
86 roffproc sub; /* process as child of macro */
87 int flags;
88 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
89 };
90
91 static enum rofferr roff_block(ROFF_ARGS);
92 static enum rofferr roff_block_text(ROFF_ARGS);
93 static enum rofferr roff_block_sub(ROFF_ARGS);
94 static enum rofferr roff_cblock(ROFF_ARGS);
95 static enum rofferr roff_ccond(ROFF_ARGS);
96 static enum rofferr roff_if(ROFF_ARGS);
97 static enum rofferr roff_if_text(ROFF_ARGS);
98 static enum rofferr roff_if_sub(ROFF_ARGS);
99
100 const struct roffmac roffs[ROFF_MAX] = {
101 { "am", roff_block, roff_block_text, roff_block_sub, 0 },
102 { "ami", roff_block, roff_block_text, roff_block_sub, 0 },
103 { "am1", roff_block, roff_block_text, roff_block_sub, 0 },
104 { "de", roff_block, roff_block_text, roff_block_sub, 0 },
105 { "dei", roff_block, roff_block_text, roff_block_sub, 0 },
106 { "de1", roff_block, roff_block_text, roff_block_sub, 0 },
107 { "if", roff_if, roff_if_text, roff_if_sub, ROFFMAC_STRUCT },
108 { "ig", roff_block, roff_block_text, roff_block_sub, 0 },
109 { ".", roff_cblock, NULL, NULL, 0 },
110 { "\\}", roff_ccond, NULL, NULL, 0 },
111 };
112
113 static void roff_free1(struct roff *);
114 static enum rofft roff_hash_find(const char *);
115 static void roffnode_cleanscope(struct roff *);
116 static int roffnode_push(struct roff *,
117 enum rofft, int, int);
118 static void roffnode_pop(struct roff *);
119 static enum rofft roff_parse(const char *, int *);
120
121
122 /*
123 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
124 * the nil-terminated string name could be found.
125 */
126 static enum rofft
127 roff_hash_find(const char *p)
128 {
129 int i;
130
131 /* FIXME: make this be fast and efficient. */
132
133 for (i = 0; i < (int)ROFF_MAX; i++)
134 if (0 == strcmp(roffs[i].name, p))
135 return((enum rofft)i);
136
137 return(ROFF_MAX);
138 }
139
140
141 /*
142 * Pop the current node off of the stack of roff instructions currently
143 * pending.
144 */
145 static void
146 roffnode_pop(struct roff *r)
147 {
148 struct roffnode *p;
149
150 assert(r->last);
151 p = r->last;
152 r->last = r->last->parent;
153 if (p->end)
154 free(p->end);
155 free(p);
156 }
157
158
159 /*
160 * Push a roff node onto the instruction stack. This must later be
161 * removed with roffnode_pop().
162 */
163 static int
164 roffnode_push(struct roff *r, enum rofft tok, int line, int col)
165 {
166 struct roffnode *p;
167
168 if (NULL == (p = calloc(1, sizeof(struct roffnode)))) {
169 (*r->msg)(MANDOCERR_MEM, r->data, line, col, NULL);
170 return(0);
171 }
172
173 p->tok = tok;
174 p->parent = r->last;
175 p->line = line;
176 p->col = col;
177 p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
178
179 r->last = p;
180 return(1);
181 }
182
183
184 static void
185 roff_free1(struct roff *r)
186 {
187
188 while (r->last)
189 roffnode_pop(r);
190 }
191
192
193 void
194 roff_reset(struct roff *r)
195 {
196
197 roff_free1(r);
198 }
199
200
201 void
202 roff_free(struct roff *r)
203 {
204
205 roff_free1(r);
206 free(r);
207 }
208
209
210 struct roff *
211 roff_alloc(const mandocmsg msg, void *data)
212 {
213 struct roff *r;
214
215 if (NULL == (r = calloc(1, sizeof(struct roff)))) {
216 (*msg)(MANDOCERR_MEM, data, 0, 0, NULL);
217 return(0);
218 }
219
220 r->msg = msg;
221 r->data = data;
222 return(r);
223 }
224
225
226 enum rofferr
227 roff_parseln(struct roff *r, int ln,
228 char **bufp, size_t *szp, int pos, int *offs)
229 {
230 enum rofft t;
231 int ppos;
232
233 /*
234 * First, if a scope is open and we're not a macro, pass the
235 * text through the macro's filter. If a scope isn't open and
236 * we're not a macro, just let it through.
237 */
238
239 if (r->last && ! ROFF_CTL((*bufp)[pos])) {
240 t = r->last->tok;
241 assert(roffs[t].text);
242 return((*roffs[t].text)
243 (r, t, bufp, szp, ln, pos, pos, offs));
244 } else if ( ! ROFF_CTL((*bufp)[pos]))
245 return(ROFF_CONT);
246
247 /*
248 * If a scope is open, go to the child handler for that macro,
249 * as it may want to preprocess before doing anything with it.
250 */
251
252 if (r->last) {
253 t = r->last->tok;
254 assert(roffs[t].sub);
255 return((*roffs[t].sub)
256 (r, t, bufp, szp, ln, pos, pos, offs));
257 }
258
259 /*
260 * Lastly, as we've no scope open, try to look up and execute
261 * the new macro. If no macro is found, simply return and let
262 * the compilers handle it.
263 */
264
265 ppos = pos;
266 if (ROFF_MAX == (t = roff_parse(*bufp, &pos)))
267 return(ROFF_CONT);
268
269 assert(roffs[t].proc);
270 return((*roffs[t].proc)
271 (r, t, bufp, szp, ln, ppos, pos, offs));
272 }
273
274
275 int
276 roff_endparse(struct roff *r)
277 {
278
279 if (NULL == r->last)
280 return(1);
281 return((*r->msg)(MANDOCERR_SCOPEEXIT, r->data, r->last->line,
282 r->last->col, NULL));
283 }
284
285
286 /*
287 * Parse a roff node's type from the input buffer. This must be in the
288 * form of ".foo xxx" in the usual way.
289 */
290 static enum rofft
291 roff_parse(const char *buf, int *pos)
292 {
293 int j;
294 char mac[5];
295 enum rofft t;
296
297 assert(ROFF_CTL(buf[*pos]));
298 (*pos)++;
299
300 while (buf[*pos] && (' ' == buf[*pos] || '\t' == buf[*pos]))
301 (*pos)++;
302
303 if ('\0' == buf[*pos])
304 return(ROFF_MAX);
305
306 for (j = 0; j < 4; j++, (*pos)++)
307 if ('\0' == (mac[j] = buf[*pos]))
308 break;
309 else if (' ' == buf[*pos])
310 break;
311
312 if (j == 4 || j < 1)
313 return(ROFF_MAX);
314
315 mac[j] = '\0';
316
317 if (ROFF_MAX == (t = roff_hash_find(mac)))
318 return(t);
319
320 while (buf[*pos] && ' ' == buf[*pos])
321 (*pos)++;
322
323 return(t);
324 }
325
326
327 /* ARGSUSED */
328 static enum rofferr
329 roff_cblock(ROFF_ARGS)
330 {
331
332 /*
333 * A block-close `..' should only be invoked as a child of an
334 * ignore macro, otherwise raise a warning and just ignore it.
335 */
336
337 if (NULL == r->last) {
338 if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
339 return(ROFF_ERR);
340 return(ROFF_IGN);
341 }
342
343 switch (r->last->tok) {
344 case (ROFF_am):
345 /* FALLTHROUGH */
346 case (ROFF_ami):
347 /* FALLTHROUGH */
348 case (ROFF_am1):
349 /* FALLTHROUGH */
350 case (ROFF_de):
351 /* FALLTHROUGH */
352 case (ROFF_dei):
353 /* FALLTHROUGH */
354 case (ROFF_de1):
355 /* FALLTHROUGH */
356 case (ROFF_ig):
357 break;
358 default:
359 if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
360 return(ROFF_ERR);
361 return(ROFF_IGN);
362 }
363
364 if ((*bufp)[pos])
365 if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
366 return(ROFF_ERR);
367
368 roffnode_pop(r);
369 roffnode_cleanscope(r);
370 return(ROFF_IGN);
371
372 }
373
374
375 static void
376 roffnode_cleanscope(struct roff *r)
377 {
378
379 while (r->last) {
380 if (--r->last->endspan < 0)
381 break;
382 roffnode_pop(r);
383 }
384 }
385
386
387 /* ARGSUSED */
388 static enum rofferr
389 roff_ccond(ROFF_ARGS)
390 {
391
392 if (NULL == r->last) {
393 if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
394 return(ROFF_ERR);
395 return(ROFF_IGN);
396 }
397
398 if (ROFF_if != r->last->tok) {
399 if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
400 return(ROFF_ERR);
401 return(ROFF_IGN);
402 }
403
404 if (r->last->endspan > -1) {
405 if ( ! (*r->msg)(MANDOCERR_NOSCOPE, r->data, ln, ppos, NULL))
406 return(ROFF_ERR);
407 return(ROFF_IGN);
408 }
409
410 if ((*bufp)[pos])
411 if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
412 return(ROFF_ERR);
413
414 roffnode_pop(r);
415 roffnode_cleanscope(r);
416 return(ROFF_IGN);
417 }
418
419
420 /* ARGSUSED */
421 static enum rofferr
422 roff_block(ROFF_ARGS)
423 {
424 int sv;
425 size_t sz;
426
427 if (ROFF_ig != tok && '\0' == (*bufp)[pos]) {
428 if ( ! (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL))
429 return(ROFF_ERR);
430 return(ROFF_IGN);
431 } else if (ROFF_ig != tok) {
432 while ((*bufp)[pos] && ' ' != (*bufp)[pos])
433 pos++;
434 while (' ' == (*bufp)[pos])
435 pos++;
436 }
437
438 if ( ! roffnode_push(r, tok, ln, ppos))
439 return(ROFF_ERR);
440
441 if ('\0' == (*bufp)[pos])
442 return(ROFF_IGN);
443
444 sv = pos;
445 while ((*bufp)[pos] && ' ' != (*bufp)[pos] &&
446 '\t' != (*bufp)[pos])
447 pos++;
448
449 /*
450 * Note: groff does NOT like escape characters in the input.
451 * Instead of detecting this, we're just going to let it fly and
452 * to hell with it.
453 */
454
455 assert(pos > sv);
456 sz = (size_t)(pos - sv);
457
458 if (1 == sz && '.' == (*bufp)[sv])
459 return(ROFF_IGN);
460
461 r->last->end = malloc(sz + 1);
462
463 if (NULL == r->last->end) {
464 (*r->msg)(MANDOCERR_MEM, r->data, ln, pos, NULL);
465 return(ROFF_ERR);
466 }
467
468 memcpy(r->last->end, *bufp + sv, sz);
469 r->last->end[(int)sz] = '\0';
470
471 if ((*bufp)[pos])
472 if ( ! (*r->msg)(MANDOCERR_ARGSLOST, r->data, ln, pos, NULL))
473 return(ROFF_ERR);
474
475 return(ROFF_IGN);
476 }
477
478
479 /* ARGSUSED */
480 static enum rofferr
481 roff_if_sub(ROFF_ARGS)
482 {
483 enum rofft t;
484 enum roffrule rr;
485
486 ppos = pos;
487 rr = r->last->rule;
488 roffnode_cleanscope(r);
489
490 if (ROFF_MAX == (t = roff_parse(*bufp, &pos)))
491 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
492
493 /*
494 * A denied conditional must evaluate its children if and only
495 * if they're either structurally required (such as loops and
496 * conditionals) or a closing macro.
497 */
498 if (ROFFRULE_DENY == rr)
499 if ( ! (ROFFMAC_STRUCT & roffs[t].flags))
500 if (ROFF_ccond != t)
501 return(ROFF_IGN);
502
503 assert(roffs[t].proc);
504 return((*roffs[t].proc)
505 (r, t, bufp, szp, ln, ppos, pos, offs));
506 }
507
508
509 /* ARGSUSED */
510 static enum rofferr
511 roff_block_sub(ROFF_ARGS)
512 {
513 enum rofft t;
514 int i, j;
515
516 /*
517 * First check whether a custom macro exists at this level. If
518 * it does, then check against it. This is some of groff's
519 * stranger behaviours. If we encountered a custom end-scope
520 * tag and that tag also happens to be a "real" macro, then we
521 * need to try interpreting it again as a real macro. If it's
522 * not, then return ignore. Else continue.
523 */
524
525 if (r->last->end) {
526 i = pos + 1;
527 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
528 i++;
529
530 for (j = 0; r->last->end[j]; j++, i++)
531 if ((*bufp)[i] != r->last->end[j])
532 break;
533
534 if ('\0' == r->last->end[j] &&
535 ('\0' == (*bufp)[i] ||
536 ' ' == (*bufp)[i] ||
537 '\t' == (*bufp)[i])) {
538 roffnode_pop(r);
539 roffnode_cleanscope(r);
540
541 if (ROFF_MAX != roff_parse(*bufp, &pos))
542 return(ROFF_RERUN);
543 return(ROFF_IGN);
544 }
545 }
546
547 /*
548 * If we have no custom end-query or lookup failed, then try
549 * pulling it out of the hashtable.
550 */
551
552 ppos = pos;
553 t = roff_parse(*bufp, &pos);
554
555 /* If we're not a comment-end, then throw it away. */
556 if (ROFF_cblock != t)
557 return(ROFF_IGN);
558
559 assert(roffs[t].proc);
560 return((*roffs[t].proc)(r, t, bufp,
561 szp, ln, ppos, pos, offs));
562 }
563
564
565 /* ARGSUSED */
566 static enum rofferr
567 roff_block_text(ROFF_ARGS)
568 {
569
570 return(ROFF_IGN);
571 }
572
573
574 /* ARGSUSED */
575 static enum rofferr
576 roff_if_text(ROFF_ARGS)
577 {
578 char *ep, *st;
579
580 st = &(*bufp)[pos];
581 if (NULL == (ep = strstr(st, "\\}"))) {
582 roffnode_cleanscope(r);
583 return(ROFF_IGN);
584 }
585
586 if (ep > st && '\\' != *(ep - 1))
587 roffnode_pop(r);
588
589 roffnode_cleanscope(r);
590 return(ROFF_IGN);
591 }
592
593
594 /* ARGSUSED */
595 static enum rofferr
596 roff_if(ROFF_ARGS)
597 {
598 int sv;
599
600 /*
601 * Read ahead past the conditional.
602 * FIXME: this does not work, as conditionals don't end on
603 * whitespace, but are parsed according to a formal grammar.
604 * It's good enough for now, however.
605 */
606
607 while ((*bufp)[pos] && ' ' != (*bufp)[pos])
608 pos++;
609
610 sv = pos;
611 while (' ' == (*bufp)[pos])
612 pos++;
613
614 /*
615 * Roff is weird. If we have just white-space after the
616 * conditional, it's considered the BODY and we exit without
617 * really doing anything. Warn about this. It's probably
618 * wrong.
619 */
620
621 if ('\0' == (*bufp)[pos] && sv != pos) {
622 if ( ! (*r->msg)(MANDOCERR_NOARGS, r->data, ln, ppos, NULL))
623 return(ROFF_ERR);
624 return(ROFF_IGN);
625 }
626
627 if ( ! roffnode_push(r, tok, ln, ppos))
628 return(ROFF_ERR);
629
630 /* Don't evaluate: just assume NO. */
631
632 r->last->endspan = 1;
633
634 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
635 r->last->endspan = -1;
636 pos += 2;
637 }
638
639 /*
640 * If there are no arguments on the line, the next-line scope is
641 * assumed.
642 */
643
644 if ('\0' == (*bufp)[pos])
645 return(ROFF_IGN);
646
647 /* Otherwise re-run the roff parser after recalculating. */
648
649 *offs = pos;
650 return(ROFF_RERUN);
651 }