]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
Single call-back for filters.
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.4 2008/11/25 12:14:02 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the
7 * above copyright notice and this permission notice appear in all
8 * copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12 * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13 * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
14 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
15 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
16 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17 * PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include <assert.h>
20 #include <ctype.h>
21 #include <err.h>
22 #include <stdlib.h>
23 #include <stdio.h>
24 #include <string.h>
25 #include <time.h>
26
27 #include "libmdocml.h"
28 #include "private.h"
29
30 #define ROFF_MAXARG 10
31
32 enum roffd {
33 ROFF_ENTER = 0,
34 ROFF_EXIT
35 };
36
37 /* FIXME: prolog roffs can be text roffs, too. */
38
39 enum rofftype {
40 ROFF_COMMENT,
41 ROFF_TEXT,
42 ROFF_LAYOUT
43 };
44
45 #define ROFFCALL_ARGS \
46 int tok, struct rofftree *tree, \
47 const char *argv[], enum roffd type
48
49 struct rofftree;
50
51 struct rofftok {
52 int (*cb)(ROFFCALL_ARGS);
53 enum rofftype type;
54 int flags;
55 #define ROFF_NESTED (1 << 0)
56 #define ROFF_PARSED (1 << 1)
57 #define ROFF_CALLABLE (1 << 2)
58 #define ROFF_QUOTES (1 << 3)
59 };
60
61 struct roffarg {
62 int tok;
63 int flags;
64 #define ROFF_VALUE (1 << 0)
65 };
66
67 struct roffnode {
68 int tok;
69 struct roffnode *parent;
70 size_t line;
71 };
72
73 struct rofftree {
74 struct roffnode *last;
75 time_t date;
76 char os[64];
77 char title[64];
78 char section[64];
79 char volume[64];
80 int state;
81 #define ROFF_PRELUDE (1 << 1)
82 #define ROFF_PRELUDE_Os (1 << 2)
83 #define ROFF_PRELUDE_Dt (1 << 3)
84 #define ROFF_PRELUDE_Dd (1 << 4)
85 #define ROFF_BODY (1 << 5)
86
87 roffin roffin;
88 roffblkin roffblkin;
89 roffout roffout;
90 roffblkout roffblkout;
91
92 struct md_mbuf *mbuf; /* NULL if !flush. */
93 const struct md_args *args;
94 const struct md_rbuf *rbuf;
95 };
96
97 static int roff_Dd(ROFFCALL_ARGS);
98 static int roff_Dt(ROFFCALL_ARGS);
99 static int roff_Os(ROFFCALL_ARGS);
100
101 static int roff_layout(ROFFCALL_ARGS);
102 static int roff_text(ROFFCALL_ARGS);
103
104 static struct roffnode *roffnode_new(int, struct rofftree *);
105 static void roffnode_free(int, struct rofftree *);
106
107 static int rofffindtok(const char *);
108 static int rofffindarg(const char *);
109 static int rofffindcallable(const char *);
110 static int roffargs(int, char *, char **);
111 static int roffparse(struct rofftree *, char *, size_t);
112 static int textparse(const struct rofftree *,
113 const char *, size_t);
114
115
116 static const struct rofftok tokens[ROFF_MAX] = {
117 { NULL, ROFF_COMMENT, 0 },
118 { roff_Dd, ROFF_TEXT, 0 }, /* Dd */
119 { roff_Dt, ROFF_TEXT, 0 }, /* Dt */
120 { roff_Os, ROFF_TEXT, 0 }, /* Os */
121 { roff_layout, ROFF_LAYOUT, ROFF_PARSED }, /* Sh */
122 { roff_layout, ROFF_LAYOUT, ROFF_PARSED }, /* Ss XXX */
123 { roff_layout, ROFF_LAYOUT, 0 }, /* Pp */
124 { roff_layout, ROFF_LAYOUT, 0 }, /* D1 */
125 { roff_layout, ROFF_LAYOUT, 0 }, /* Dl */
126 { roff_layout, ROFF_LAYOUT, 0 }, /* Bd */
127 { roff_layout, ROFF_LAYOUT, 0 }, /* Ed */
128 { roff_layout, ROFF_LAYOUT, 0 }, /* Bl */
129 { roff_layout, ROFF_LAYOUT, 0 }, /* El */
130 { roff_layout, ROFF_LAYOUT, 0 }, /* It */
131 { roff_text, ROFF_TEXT, ROFF_PARSED }, /* An */
132 { roff_text, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE }, /* Li */
133 };
134
135 /* FIXME: multiple owners? */
136
137 static const struct roffarg tokenargs[ROFF_ARGMAX] = {
138 { ROFF_An, 0 }, /* split */
139 { ROFF_An, 0 }, /* nosplit */
140 { ROFF_Bd, 0 }, /* ragged */
141 { ROFF_Bd, 0 }, /* unfilled */
142 { ROFF_Bd, 0 }, /* literal */
143 { ROFF_Bd, ROFF_VALUE }, /* file */
144 { ROFF_Bd, ROFF_VALUE }, /* offset */
145 { ROFF_Bl, 0 }, /* bullet */
146 { ROFF_Bl, 0 }, /* dash */
147 { ROFF_Bl, 0 }, /* hyphen */
148 { ROFF_Bl, 0 }, /* item */
149 { ROFF_Bl, 0 }, /* enum */
150 { ROFF_Bl, 0 }, /* tag */
151 { ROFF_Bl, 0 }, /* diag */
152 { ROFF_Bl, 0 }, /* hang */
153 { ROFF_Bl, 0 }, /* ohang */
154 { ROFF_Bl, 0 }, /* inset */
155 { ROFF_Bl, 0 }, /* column */
156 };
157
158 static const char *const toknames[ROFF_MAX] = ROFF_NAMES;
159 static const char *const tokargnames[ROFF_ARGMAX] = ROFF_ARGNAMES;
160
161
162 int
163 roff_free(struct rofftree *tree, int flush)
164 {
165 int error;
166
167 assert(tree->mbuf);
168 if ( ! flush)
169 tree->mbuf = NULL;
170
171 /* LINTED */
172 while (tree->last)
173 if ( ! (*tokens[tree->last->tok].cb)
174 (tree->last->tok, tree, NULL, ROFF_EXIT))
175 /* Disallow flushing. */
176 tree->mbuf = NULL;
177
178 error = tree->mbuf ? 0 : 1;
179
180 if (tree->mbuf && (ROFF_PRELUDE & tree->state)) {
181 warnx("%s: prelude never finished",
182 tree->rbuf->name);
183 error = 1;
184 }
185
186 free(tree);
187 return(error ? 0 : 1);
188 }
189
190
191 struct rofftree *
192 roff_alloc(const struct md_args *args, struct md_mbuf *out,
193 const struct md_rbuf *in, roffin textin,
194 roffout textout, roffblkin blkin, roffblkout blkout)
195 {
196 struct rofftree *tree;
197
198 if (NULL == (tree = calloc(1, sizeof(struct rofftree)))) {
199 warn("malloc");
200 return(NULL);
201 }
202
203 tree->state = ROFF_PRELUDE;
204 tree->args = args;
205 tree->mbuf = out;
206 tree->rbuf = in;
207 tree->roffin = textin;
208 tree->roffout = textout;
209 tree->roffblkin = blkin;
210 tree->roffblkout = blkout;
211
212 return(tree);
213 }
214
215
216 int
217 roff_engine(struct rofftree *tree, char *buf, size_t sz)
218 {
219
220 if (0 == sz) {
221 warnx("%s: blank line (line %zu)",
222 tree->rbuf->name,
223 tree->rbuf->line);
224 return(0);
225 } else if ('.' != *buf)
226 return(textparse(tree, buf, sz));
227
228 return(roffparse(tree, buf, sz));
229 }
230
231
232 static int
233 textparse(const struct rofftree *tree, const char *buf, size_t sz)
234 {
235
236 if (NULL == tree->last) {
237 warnx("%s: unexpected text (line %zu)",
238 tree->rbuf->name,
239 tree->rbuf->line);
240 return(0);
241 } else if (NULL == tree->last->parent) {
242 warnx("%s: disallowed text (line %zu)",
243 tree->rbuf->name,
244 tree->rbuf->line);
245 return(0);
246 }
247
248 /* Print text. */
249
250 return(1);
251 }
252
253
254 static int
255 roffargs(int tok, char *buf, char **argv)
256 {
257 int i;
258
259 (void)tok;/* FIXME: quotable strings? */
260
261 assert(tok >= 0 && tok < ROFF_MAX);
262 assert('.' == *buf);
263
264 /* LINTED */
265 for (i = 0; *buf && i < ROFF_MAXARG; i++) {
266 argv[i] = buf++;
267 while (*buf && ! isspace(*buf))
268 buf++;
269 if (0 == *buf) {
270 continue;
271 }
272 *buf++ = 0;
273 while (*buf && isspace(*buf))
274 buf++;
275 }
276
277 assert(i > 0);
278 if (i < ROFF_MAXARG)
279 argv[i] = NULL;
280
281 return(ROFF_MAXARG > i);
282 }
283
284
285 static int
286 roffparse(struct rofftree *tree, char *buf, size_t sz)
287 {
288 int tok, t;
289 struct roffnode *node;
290 char *argv[ROFF_MAXARG];
291 const char **argvp;
292
293 assert(sz > 0);
294
295 /*
296 * Extract the token identifier from the buffer. If there's no
297 * callback for the token (comment, etc.) then exit immediately.
298 * We don't do any error handling (yet), so if the token doesn't
299 * exist, die.
300 */
301
302 if (3 > sz) {
303 warnx("%s: malformed line (line %zu)",
304 tree->rbuf->name,
305 tree->rbuf->line);
306 return(0);
307 } else if (ROFF_MAX == (tok = rofffindtok(buf + 1))) {
308 warnx("%s: unknown line token `%c%c' (line %zu)",
309 tree->rbuf->name,
310 *(buf + 1), *(buf + 2),
311 tree->rbuf->line);
312 return(0);
313 } else if (ROFF_COMMENT == tokens[tok].type)
314 /* Ignore comment tokens. */
315 return(1);
316
317 if ( ! roffargs(tok, buf, argv)) {
318 warnx("%s: too many arguments to `%s' (line %zu)",
319 tree->rbuf->name, toknames[tok],
320 tree->rbuf->line);
321 return(0);
322 }
323
324 /*
325 * If this is a non-nestable layout token and we're below a
326 * token of the same type, then recurse upward to the token,
327 * closing out the interim scopes.
328 *
329 * If there's a nested token on the chain, then raise an error
330 * as nested tokens have corresponding "ending" tokens and we're
331 * breaking their scope.
332 */
333
334 node = NULL;
335
336 if (ROFF_LAYOUT == tokens[tok].type &&
337 ! (ROFF_NESTED & tokens[tok].flags)) {
338 for (node = tree->last; node; node = node->parent) {
339 if (node->tok == tok)
340 break;
341
342 /* Don't break nested scope. */
343
344 if ( ! (ROFF_NESTED & tokens[node->tok].flags))
345 continue;
346 warnx("%s: scope of %s (line %zu) broken by "
347 "%s (line %zu)",
348 tree->rbuf->name,
349 toknames[tok], node->line,
350 toknames[node->tok],
351 tree->rbuf->line);
352 return(0);
353 }
354 }
355
356 if (node) {
357 assert(ROFF_LAYOUT == tokens[tok].type);
358 assert( ! (ROFF_NESTED & tokens[tok].flags));
359 assert(node->tok == tok);
360
361 /* Clear up to last scoped token. */
362
363 /* LINTED */
364 do {
365 t = tree->last->tok;
366 if ( ! (*tokens[tree->last->tok].cb)
367 (tree->last->tok, tree, NULL, ROFF_EXIT))
368 return(0);
369 } while (t != tok);
370 }
371
372 /* Proceed with actual token processing. */
373
374 argvp = (const char **)&argv[1];
375 return((*tokens[tok].cb)(tok, tree, argvp, ROFF_ENTER));
376 }
377
378
379 static int
380 rofffindarg(const char *name)
381 {
382 size_t i;
383
384 /* FIXME: use a table, this is slow but ok for now. */
385
386 /* LINTED */
387 for (i = 0; i < ROFF_ARGMAX; i++)
388 /* LINTED */
389 if (0 == strcmp(name, tokargnames[i]))
390 return((int)i);
391
392 return(ROFF_ARGMAX);
393 }
394
395
396 static int
397 rofffindtok(const char *name)
398 {
399 size_t i;
400
401 /* FIXME: use a table, this is slow but ok for now. */
402
403 /* LINTED */
404 for (i = 0; i < ROFF_MAX; i++)
405 /* LINTED */
406 if (0 == strncmp(name, toknames[i], 2))
407 return((int)i);
408
409 return(ROFF_MAX);
410 }
411
412
413 static int
414 rofffindcallable(const char *name)
415 {
416 int c;
417
418 if (ROFF_MAX == (c = rofffindtok(name)))
419 return(ROFF_MAX);
420 return(ROFF_CALLABLE & tokens[c].flags ? c : ROFF_MAX);
421 }
422
423
424 static struct roffnode *
425 roffnode_new(int tokid, struct rofftree *tree)
426 {
427 struct roffnode *p;
428
429 if (NULL == (p = malloc(sizeof(struct roffnode)))) {
430 warn("malloc");
431 return(NULL);
432 }
433
434 p->line = tree->rbuf->line;
435 p->tok = tokid;
436 p->parent = tree->last;
437 tree->last = p;
438 return(p);
439 }
440
441
442 static void
443 roffnode_free(int tokid, struct rofftree *tree)
444 {
445 struct roffnode *p;
446
447 assert(tree->last);
448 assert(tree->last->tok == tokid);
449
450 p = tree->last;
451 tree->last = tree->last->parent;
452 free(p);
453 }
454
455
456 /* ARGSUSED */
457 static int
458 roff_Dd(ROFFCALL_ARGS)
459 {
460
461 if (ROFF_BODY & tree->state) {
462 assert( ! (ROFF_PRELUDE & tree->state));
463 assert(ROFF_PRELUDE_Dd & tree->state);
464 return(roff_text(tok, tree, argv, type));
465 }
466
467 assert(ROFF_PRELUDE & tree->state);
468 assert( ! (ROFF_BODY & tree->state));
469
470 if (ROFF_PRELUDE_Dd & tree->state ||
471 ROFF_PRELUDE_Dt & tree->state) {
472 warnx("%s: prelude `Dd' out-of-order (line %zu)",
473 tree->rbuf->name, tree->rbuf->line);
474 return(0);
475 }
476
477 /* TODO: parse date. */
478
479 assert(NULL == tree->last);
480 tree->state |= ROFF_PRELUDE_Dd;
481
482 return(1);
483 }
484
485
486 /* ARGSUSED */
487 static int
488 roff_Dt(ROFFCALL_ARGS)
489 {
490
491 if (ROFF_BODY & tree->state) {
492 assert( ! (ROFF_PRELUDE & tree->state));
493 assert(ROFF_PRELUDE_Dt & tree->state);
494 return(roff_text(tok, tree, argv, type));
495 }
496
497 assert(ROFF_PRELUDE & tree->state);
498 assert( ! (ROFF_BODY & tree->state));
499
500 if ( ! (ROFF_PRELUDE_Dd & tree->state) ||
501 (ROFF_PRELUDE_Dt & tree->state)) {
502 warnx("%s: prelude `Dt' out-of-order (line %zu)",
503 tree->rbuf->name, tree->rbuf->line);
504 return(0);
505 }
506
507 /* TODO: parse date. */
508
509 assert(NULL == tree->last);
510 tree->state |= ROFF_PRELUDE_Dt;
511
512 return(1);
513 }
514
515
516 /* ARGSUSED */
517 static int
518 roff_Os(ROFFCALL_ARGS)
519 {
520
521 if (ROFF_EXIT == type) {
522 assert(ROFF_PRELUDE_Os & tree->state);
523 return(roff_layout(tok, tree, argv, type));
524 } else if (ROFF_BODY & tree->state) {
525 assert( ! (ROFF_PRELUDE & tree->state));
526 assert(ROFF_PRELUDE_Os & tree->state);
527 return(roff_text(tok, tree, argv, type));
528 }
529
530 assert(ROFF_PRELUDE & tree->state);
531 if ( ! (ROFF_PRELUDE_Dt & tree->state) ||
532 ! (ROFF_PRELUDE_Dd & tree->state)) {
533 warnx("%s: prelude `Os' out-of-order (line %zu)",
534 tree->rbuf->name, tree->rbuf->line);
535 return(0);
536 }
537
538 /* TODO: extract OS. */
539
540 tree->state |= ROFF_PRELUDE_Os;
541 tree->state &= ~ROFF_PRELUDE;
542 tree->state |= ROFF_BODY;
543
544 assert(NULL == tree->last);
545
546 return(roff_layout(tok, tree, argv, type));
547 }
548
549
550 /* ARGUSED */
551 static int
552 roffnextopt(const char ***in, char **val)
553 {
554 const char *arg, **argv;
555 int v;
556
557 *val = NULL;
558 argv = *in;
559 assert(argv);
560
561 if (NULL == (arg = *argv))
562 return(-1);
563 if ('-' != *arg)
564 return(-1);
565 if (ROFF_ARGMAX == (v = rofffindarg(&arg[1])))
566 return(-1);
567 if ( ! (ROFF_VALUE & tokenargs[v].flags))
568 return(v);
569
570 *in = ++argv;
571
572 /* FIXME: what if this looks like a roff token or argument? */
573
574 return(*argv ? v : ROFF_ARGMAX);
575 }
576
577
578 /* ARGSUSED */
579 static int
580 roff_layout(ROFFCALL_ARGS)
581 {
582 int i, c, argcp[ROFF_MAXARG];
583 char *v, *argvp[ROFF_MAXARG];
584
585 if (ROFF_PRELUDE & tree->state) {
586 warnx("%s: macro `%s' called in prelude (line %zu)",
587 tree->rbuf->name, toknames[tok],
588 tree->rbuf->line);
589 return(0);
590 }
591
592 if (ROFF_EXIT == type) {
593 roffnode_free(tok, tree);
594 return((*tree->roffblkout)(tok));
595 }
596
597 i = 0;
598 while (-1 != (c = roffnextopt(&argv, &v))) {
599 if (ROFF_ARGMAX == c) {
600 warnx("%s: error parsing `%s' args (line %zu)",
601 tree->rbuf->name,
602 toknames[tok],
603 tree->rbuf->line);
604 return(0);
605 }
606 argcp[i] = c;
607 argvp[i] = v;
608 argv++;
609 }
610
611 if (NULL == roffnode_new(tok, tree))
612 return(0);
613
614 if ( ! (*tree->roffin)(tok, argcp, argvp))
615 return(0);
616
617 if ( ! (ROFF_PARSED & tokens[tok].flags)) {
618 /* TODO: print all tokens. */
619
620 if ( ! ((*tree->roffout)(tok)))
621 return(0);
622 return((*tree->roffblkin)(tok));
623 }
624
625 while (*argv) {
626 if (2 >= strlen(*argv) && ROFF_MAX !=
627 (c = rofffindcallable(*argv)))
628 if ( ! (*tokens[c].cb)(c, tree,
629 argv + 1, ROFF_ENTER))
630 return(0);
631
632 /* TODO: print token. */
633 argv++;
634 }
635
636 if ( ! ((*tree->roffout)(tok)))
637 return(0);
638
639 return((*tree->roffblkin)(tok));
640 }
641
642
643 /* ARGSUSED */
644 static int
645 roff_text(ROFFCALL_ARGS)
646 {
647 int i, c, argcp[ROFF_MAXARG];
648 char *v, *argvp[ROFF_MAXARG];
649
650 if (ROFF_PRELUDE & tree->state) {
651 warnx("%s: macro `%s' called in prelude (line %zu)",
652 tree->rbuf->name, toknames[tok],
653 tree->rbuf->line);
654 return(0);
655 }
656
657 i = 0;
658 while (-1 != (c = roffnextopt(&argv, &v))) {
659 if (ROFF_ARGMAX == c) {
660 warnx("%s: error parsing `%s' args (line %zu)",
661 tree->rbuf->name,
662 toknames[tok],
663 tree->rbuf->line);
664 return(0);
665 }
666 argcp[i] = c;
667 argvp[i] = v;
668 argv++;
669 }
670
671 if ( ! (*tree->roffin)(tok, argcp, argvp))
672 return(0);
673
674 if ( ! (ROFF_PARSED & tokens[tok].flags)) {
675 /* TODO: print all tokens. */
676 return((*tree->roffout)(tok));
677 }
678
679 while (*argv) {
680 if (2 >= strlen(*argv) && ROFF_MAX !=
681 (c = rofffindcallable(*argv)))
682 if ( ! (*tokens[c].cb)(c, tree,
683 argv + 1, ROFF_ENTER))
684 return(0);
685
686 /* TODO: print token. */
687 argv++;
688 }
689
690 return((*tree->roffout)(tok));
691 }