]> git.cameronkatri.com Git - mandoc.git/blob - roff.c
3e2b765675e29cfb4a47fd2fd4e6ba98b475a297
[mandoc.git] / roff.c
1 /* $Id: roff.c,v 1.1 2008/11/24 14:24:55 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the
7 * above copyright notice and this permission notice appear in all
8 * copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12 * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13 * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
14 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
15 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
16 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17 * PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include <assert.h>
20 #include <ctype.h>
21 #include <err.h>
22 #include <stdlib.h>
23 #include <stdio.h>
24 #include <string.h>
25 #include <time.h>
26
27 #include "libmdocml.h"
28 #include "private.h"
29
30 #define ROFF_MAXARG 10
31
32 enum roffd {
33 ROFF_ENTER = 0,
34 ROFF_EXIT
35 };
36
37 enum rofftype {
38 ROFF_TITLE,
39 ROFF_COMMENT,
40 ROFF_TEXT,
41 ROFF_LAYOUT
42 };
43
44 #define ROFFCALL_ARGS \
45 struct rofftree *tree, const char *argv[], enum roffd type
46
47 struct rofftree;
48
49 struct rofftok {
50 char *name;
51 int (*cb)(ROFFCALL_ARGS);
52 enum rofftype type;
53 int flags;
54 #define ROFF_NESTED (1 << 0)
55 #define ROFF_PARSED (1 << 1)
56 #define ROFF_CALLABLE (1 << 2)
57 #define ROFF_QUOTES (1 << 3)
58 };
59
60 struct roffarg {
61 char *name;
62 int flags;
63 #define ROFF_VALUE (1 << 0)
64 };
65
66 struct roffnode {
67 int tok;
68 struct roffnode *parent;
69 size_t line;
70 };
71
72 struct rofftree {
73 struct roffnode *last;
74 time_t date;
75 char title[256];
76 char section[256];
77 char volume[256];
78 int state;
79 #define ROFF_PRELUDE (1 << 1)
80 #define ROFF_PRELUDE_Os (1 << 2)
81 #define ROFF_PRELUDE_Dt (1 << 3)
82 #define ROFF_PRELUDE_Dd (1 << 4)
83 #define ROFF_BODY (1 << 5)
84 struct md_mbuf *mbuf; /* NULL if ROFF_EXIT and error. */
85
86 const struct md_args *args;
87 const struct md_rbuf *rbuf;
88 };
89
90 #define ROFF___ 0
91 #define ROFF_Dd 1
92 #define ROFF_Dt 2
93 #define ROFF_Os 3
94 #define ROFF_Sh 4
95 #define ROFF_An 5
96 #define ROFF_Li 6
97 #define ROFF_MAX 7
98
99 static int roff_Dd(ROFFCALL_ARGS);
100 static int roff_Dt(ROFFCALL_ARGS);
101 static int roff_Os(ROFFCALL_ARGS);
102 static int roff_Sh(ROFFCALL_ARGS);
103 static int roff_An(ROFFCALL_ARGS);
104 static int roff_Li(ROFFCALL_ARGS);
105
106 static struct roffnode *roffnode_new(int, size_t,
107 struct rofftree *);
108 static void roffnode_free(int, struct rofftree *);
109
110 static int rofffindtok(const char *);
111 static int rofffindarg(const char *);
112 static int roffargs(int, char *, char **);
113 static int roffparse(struct rofftree *, char *, size_t);
114 static int textparse(const struct rofftree *,
115 const char *, size_t);
116
117 static void dbg_enter(const struct md_args *, int);
118 static void dbg_leave(const struct md_args *, int);
119
120
121 static const struct rofftok tokens[ROFF_MAX] = {
122 { "\\\"", NULL, ROFF_COMMENT, 0 },
123 { "Dd", roff_Dd, ROFF_TITLE, 0 },
124 { "Dt", roff_Dt, ROFF_TITLE, 0 },
125 { "Os", roff_Os, ROFF_TITLE, 0 },
126 { "Sh", roff_Sh, ROFF_LAYOUT, 0 },
127 { "An", roff_An, ROFF_TEXT, ROFF_PARSED },
128 { "Li", roff_Li, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE },
129 };
130
131 #define ROFF_Split 0
132 #define ROFF_Nosplit 1
133 #define ROFF_ARGMAX 2
134
135 static const struct roffarg tokenargs[ROFF_ARGMAX] = {
136 { "split", 0 },
137 { "nosplit", 0 },
138 };
139
140
141 int
142 roff_free(struct rofftree *tree, int flush)
143 {
144 int error;
145
146 assert(tree->mbuf);
147 if ( ! flush)
148 tree->mbuf = NULL;
149
150 /* LINTED */
151 while (tree->last)
152 if ( ! (*tokens[tree->last->tok].cb)
153 (tree, NULL, ROFF_EXIT))
154 /* Disallow flushing. */
155 tree->mbuf = NULL;
156
157 error = tree->mbuf ? 0 : 1;
158
159 if (tree->mbuf && (ROFF_PRELUDE & tree->state)) {
160 warnx("%s: prelude never finished",
161 tree->rbuf->name);
162 error = 1;
163 }
164
165 free(tree);
166 return(error ? 0 : 1);
167 }
168
169
170 struct rofftree *
171 roff_alloc(const struct md_args *args, struct md_mbuf *out,
172 const struct md_rbuf *in)
173 {
174 struct rofftree *tree;
175
176 if (NULL == (tree = calloc(1, sizeof(struct rofftree)))) {
177 warn("malloc");
178 return(NULL);
179 }
180
181 tree->state = ROFF_PRELUDE;
182 tree->args = args;
183 tree->mbuf = out;
184 tree->rbuf = in;
185
186 return(tree);
187 }
188
189
190 int
191 roff_engine(struct rofftree *tree, char *buf, size_t sz)
192 {
193
194 if (0 == sz) {
195 warnx("%s: blank line (line %zu)",
196 tree->rbuf->name,
197 tree->rbuf->line);
198 return(0);
199 } else if ('.' != *buf)
200 return(textparse(tree, buf, sz));
201
202 return(roffparse(tree, buf, sz));
203 }
204
205
206 static int
207 textparse(const struct rofftree *tree, const char *buf, size_t sz)
208 {
209
210 if (NULL == tree->last) {
211 warnx("%s: unexpected text (line %zu)",
212 tree->rbuf->name,
213 tree->rbuf->line);
214 return(0);
215 } else if (NULL == tree->last->parent) {
216 warnx("%s: disallowed text (line %zu)",
217 tree->rbuf->name,
218 tree->rbuf->line);
219 return(0);
220 }
221
222 /* Print text. */
223
224 return(1);
225 }
226
227
228 static int
229 roffargs(int tok, char *buf, char **argv)
230 {
231 int i;
232
233 (void)tok;/* FIXME: quotable strings? */
234
235 assert(tok >= 0 && tok < ROFF_MAX);
236 assert('.' == *buf);
237
238 /* LINTED */
239 for (i = 0; *buf && i < ROFF_MAXARG; i++) {
240 argv[i] = buf++;
241 while (*buf && ! isspace(*buf))
242 buf++;
243 if (0 == *buf) {
244 continue;
245 }
246 *buf++ = 0;
247 while (*buf && isspace(*buf))
248 buf++;
249 }
250
251 assert(i > 0);
252 if (i < ROFF_MAXARG)
253 argv[i] = NULL;
254
255 return(ROFF_MAXARG > i);
256 }
257
258
259 static int
260 roffparse(struct rofftree *tree, char *buf, size_t sz)
261 {
262 int tok, t;
263 struct roffnode *node;
264 char *argv[ROFF_MAXARG];
265 const char **argvp;
266
267 assert(sz > 0);
268
269 /*
270 * Extract the token identifier from the buffer. If there's no
271 * callback for the token (comment, etc.) then exit immediately.
272 * We don't do any error handling (yet), so if the token doesn't
273 * exist, die.
274 */
275
276 if (3 > sz) {
277 warnx("%s: malformed line (line %zu)",
278 tree->rbuf->name,
279 tree->rbuf->line);
280 return(0);
281 } else if (ROFF_MAX == (tok = rofffindtok(buf + 1))) {
282 warnx("%s: unknown line token `%c%c' (line %zu)",
283 tree->rbuf->name,
284 *(buf + 1), *(buf + 2),
285 tree->rbuf->line);
286 return(0);
287 } else if (ROFF_COMMENT == tokens[tok].type)
288 /* Ignore comment tokens. */
289 return(1);
290
291 if ( ! roffargs(tok, buf, argv)) {
292 warnx("%s: too many arguments to `%s' (line %zu)",
293 tree->rbuf->name, tokens[tok].name,
294 tree->rbuf->line);
295 return(0);
296 }
297
298 /* Domain cross-contamination (and sanity) checks. */
299
300 switch (tokens[tok].type) {
301 case (ROFF_TITLE):
302 if (ROFF_PRELUDE & tree->state) {
303 assert( ! (ROFF_BODY & tree->state));
304 break;
305 }
306 assert(ROFF_BODY & tree->state);
307 warnx("%s: prelude token `%s' in body (line %zu)",
308 tree->rbuf->name, tokens[tok].name,
309 tree->rbuf->line);
310 return(0);
311 case (ROFF_LAYOUT):
312 /* FALLTHROUGH */
313 case (ROFF_TEXT):
314 if (ROFF_BODY & tree->state) {
315 assert( ! (ROFF_PRELUDE & tree->state));
316 break;
317 }
318 assert(ROFF_PRELUDE & tree->state);
319 warnx("%s: body token `%s' in prelude (line %zu)",
320 tree->rbuf->name, tokens[tok].name,
321 tree->rbuf->line);
322 return(0);
323 case (ROFF_COMMENT):
324 return(1);
325 default:
326 abort();
327 }
328
329 /*
330 * If this is a non-nestable layout token and we're below a
331 * token of the same type, then recurse upward to the token,
332 * closing out the interim scopes.
333 *
334 * If there's a nested token on the chain, then raise an error
335 * as nested tokens have corresponding "ending" tokens and we're
336 * breaking their scope.
337 */
338
339 node = NULL;
340
341 if (ROFF_LAYOUT == tokens[tok].type &&
342 ! (ROFF_NESTED & tokens[tok].flags)) {
343 for (node = tree->last; node; node = node->parent) {
344 if (node->tok == tok)
345 break;
346
347 /* Don't break nested scope. */
348
349 if ( ! (ROFF_NESTED & tokens[node->tok].flags))
350 continue;
351 warnx("%s: scope of %s (line %zu) broken by "
352 "%s (line %zu)",
353 tree->rbuf->name,
354 tokens[tok].name,
355 node->line,
356 tokens[node->tok].name,
357 tree->rbuf->line);
358 return(0);
359 }
360 }
361
362 if (node) {
363 assert(ROFF_LAYOUT == tokens[tok].type);
364 assert( ! (ROFF_NESTED & tokens[tok].flags));
365 assert(node->tok == tok);
366
367 /* Clear up to last scoped token. */
368
369 /* LINTED */
370 do {
371 t = tree->last->tok;
372 if ( ! (*tokens[tree->last->tok].cb)
373 (tree, NULL, ROFF_EXIT))
374 return(0);
375 } while (t != tok);
376 }
377
378 /* Proceed with actual token processing. */
379
380 argvp = (const char **)&argv[1];
381 return((*tokens[tok].cb)(tree, argvp, ROFF_ENTER));
382 }
383
384
385 static int
386 rofffindarg(const char *name)
387 {
388 size_t i;
389
390 /* FIXME: use a table, this is slow but ok for now. */
391
392 /* LINTED */
393 for (i = 0; i < ROFF_ARGMAX; i++)
394 /* LINTED */
395 if (0 == strcmp(name, tokenargs[i].name))
396 return((int)i);
397
398 return(ROFF_ARGMAX);
399 }
400
401
402 static int
403 rofffindtok(const char *name)
404 {
405 size_t i;
406
407 /* FIXME: use a table, this is slow but ok for now. */
408
409 /* LINTED */
410 for (i = 0; i < ROFF_MAX; i++)
411 /* LINTED */
412 if (0 == strncmp(name, tokens[i].name, 2))
413 return((int)i);
414
415 return(ROFF_MAX);
416 }
417
418
419 /* FIXME: accept only struct rofftree *. */
420 static struct roffnode *
421 roffnode_new(int tokid, size_t line, struct rofftree *tree)
422 {
423 struct roffnode *p;
424
425 if (NULL == (p = malloc(sizeof(struct roffnode)))) {
426 warn("malloc");
427 return(NULL);
428 }
429
430 p->line = line;
431 p->tok = tokid;
432 p->parent = tree->last;
433 tree->last = p;
434 return(p);
435 }
436
437
438 static void
439 roffnode_free(int tokid, struct rofftree *tree)
440 {
441 struct roffnode *p;
442
443 assert(tree->last);
444 assert(tree->last->tok == tokid);
445
446 p = tree->last;
447 tree->last = tree->last->parent;
448 free(p);
449 }
450
451
452 static int dbg_lvl = 0;
453
454
455 static void
456 dbg_enter(const struct md_args *args, int tokid)
457 {
458 int i;
459 static char buf[72];
460
461 assert(args);
462 if ( ! (args->dbg & MD_DBG_TREE))
463 return;
464 assert(tokid >= 0 && tokid <= ROFF_MAX);
465
466 buf[0] = buf[71] = 0;
467
468 switch (tokens[tokid].type) {
469 case (ROFF_LAYOUT):
470 (void)strncat(buf, "[body-layout] ", sizeof(buf) - 1);
471 break;
472 case (ROFF_TEXT):
473 (void)strncat(buf, "[ body-text] ", sizeof(buf) - 1);
474 break;
475 case (ROFF_TITLE):
476 (void)strncat(buf, "[ prelude] ", sizeof(buf) - 1);
477 break;
478 default:
479 abort();
480 }
481
482 /* LINTED */
483 for (i = 0; i < dbg_lvl; i++)
484 (void)strncat(buf, " ", sizeof(buf) - 1);
485
486 (void)strncat(buf, tokens[tokid].name, sizeof(buf) - 1);
487
488 (void)printf("%s\n", buf);
489
490 dbg_lvl++;
491 }
492
493
494 /* FIXME: accept only struct rofftree *. */
495 static void
496 dbg_leave(const struct md_args *args, int tokid)
497 {
498 assert(args);
499 if ( ! (args->dbg & MD_DBG_TREE))
500 return;
501
502 assert(tokid >= 0 && tokid <= ROFF_MAX);
503 assert(dbg_lvl > 0);
504 dbg_lvl--;
505 }
506
507
508 /* FIXME: accept only struct rofftree *. */
509 /* ARGSUSED */
510 static int
511 roff_Dd(ROFFCALL_ARGS)
512 {
513
514 dbg_enter(tree->args, ROFF_Dd);
515
516 assert(ROFF_PRELUDE & tree->state);
517 if (ROFF_PRELUDE_Dt & tree->state ||
518 ROFF_PRELUDE_Dd & tree->state) {
519 warnx("%s: prelude `Dd' out-of-order (line %zu)",
520 tree->rbuf->name, tree->rbuf->line);
521 return(0);
522 }
523
524 assert(NULL == tree->last);
525 tree->state |= ROFF_PRELUDE_Dd;
526
527 dbg_leave(tree->args, ROFF_Dd);
528
529 return(1);
530 }
531
532
533 /* ARGSUSED */
534 static int
535 roff_Dt(ROFFCALL_ARGS)
536 {
537
538 dbg_enter(tree->args, ROFF_Dt);
539
540 assert(ROFF_PRELUDE & tree->state);
541 if ( ! (ROFF_PRELUDE_Dd & tree->state) ||
542 (ROFF_PRELUDE_Dt & tree->state)) {
543 warnx("%s: prelude `Dt' out-of-order (line %zu)",
544 tree->rbuf->name, tree->rbuf->line);
545 return(0);
546 }
547
548 assert(NULL == tree->last);
549 tree->state |= ROFF_PRELUDE_Dt;
550
551 dbg_leave(tree->args, ROFF_Dt);
552
553 return(1);
554 }
555
556
557 /* ARGSUSED */
558 static int
559 roff_Os(ROFFCALL_ARGS)
560 {
561
562 if (ROFF_EXIT == type) {
563 roffnode_free(ROFF_Os, tree);
564 dbg_leave(tree->args, ROFF_Os);
565 return(1);
566 }
567
568 dbg_enter(tree->args, ROFF_Os);
569
570 assert(ROFF_PRELUDE & tree->state);
571 if ( ! (ROFF_PRELUDE_Dt & tree->state) ||
572 ! (ROFF_PRELUDE_Dd & tree->state)) {
573 warnx("%s: prelude `Os' out-of-order (line %zu)",
574 tree->rbuf->name, tree->rbuf->line);
575 return(0);
576 }
577
578 assert(NULL == tree->last);
579 if (NULL == roffnode_new(ROFF_Os, tree->rbuf->line, tree))
580 return(0);
581
582 tree->state |= ROFF_PRELUDE_Os;
583 tree->state &= ~ROFF_PRELUDE;
584 tree->state |= ROFF_BODY;
585
586 return(1);
587 }
588
589
590 /* ARGSUSED */
591 static int
592 roff_Sh(ROFFCALL_ARGS)
593 {
594
595 if (ROFF_EXIT == type) {
596 roffnode_free(ROFF_Sh, tree);
597 dbg_leave(tree->args, ROFF_Sh);
598 return(1);
599 }
600
601 dbg_enter(tree->args, ROFF_Sh);
602
603 if (NULL == roffnode_new(ROFF_Sh, tree->rbuf->line, tree))
604 return(0);
605
606 return(1);
607 }
608
609
610 /* ARGSUSED */
611 static int
612 roff_Li(ROFFCALL_ARGS)
613 {
614
615 dbg_enter(tree->args, ROFF_Li);
616 dbg_leave(tree->args, ROFF_Li);
617
618 return(1);
619 }
620
621
622 static int
623 roffnextopt(const char ***in, char **val)
624 {
625 const char *arg, **argv;
626 int v;
627
628 *val = NULL;
629 argv = *in;
630 assert(argv);
631
632 if (NULL == (arg = *argv))
633 return(-1);
634 if ('-' != *arg)
635 return(-1);
636 if (ROFF_ARGMAX == (v = rofffindarg(&arg[1])))
637 return(-1);
638 if ( ! (ROFF_VALUE & tokenargs[v].flags))
639 return(v);
640
641 *in = ++argv;
642
643 /* FIXME: what if this looks like a roff token or argument? */
644
645 return(*argv ? v : ROFF_ARGMAX);
646 }
647
648
649 /* ARGSUSED */
650 static int
651 roff_An(ROFFCALL_ARGS)
652 {
653 int c;
654 char *val;
655
656 dbg_enter(tree->args, ROFF_An);
657
658 while (-1 != (c = roffnextopt(&argv, &val))) {
659 switch (c) {
660 case (ROFF_Split):
661 /* Process argument. */
662 break;
663 case (ROFF_Nosplit):
664 /* Process argument. */
665 break;
666 default:
667 warnx("%s: error parsing `An' args (line %zu)",
668 tree->rbuf->name,
669 tree->rbuf->line);
670 return(0);
671 }
672 argv++;
673 }
674
675 /* Print header. */
676
677 while (*argv) {
678 if (/* is_parsable && */ 2 >= strlen(*argv)) {
679 if (ROFF_MAX != (c = rofffindtok(*argv))) {
680 if (ROFF_CALLABLE & tokens[c].flags) {
681 /* Call to token. */
682 if ( ! (*tokens[c].cb)(tree, (const char **)argv + 1, ROFF_ENTER))
683 return(0);
684 }
685 /* Print token. */
686 } else {
687 /* Print token. */
688 }
689 } else {
690 /* Print token. */
691 }
692 argv++;
693 }
694
695 /* Print footer. */
696
697 dbg_leave(tree->args, ROFF_An);
698
699 return(1);
700 }
701