]> git.cameronkatri.com Git - mandoc.git/blob - main.c
Add a test case for nesting of .RS/.RE
[mandoc.git] / main.c
1 /* $Id: main.c,v 1.78 2010/05/16 22:28:33 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <sys/mman.h>
22 #include <sys/stat.h>
23
24 #include <assert.h>
25 #include <fcntl.h>
26 #include <stdio.h>
27 #include <stdint.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31
32 #include "mandoc.h"
33 #include "mdoc.h"
34 #include "man.h"
35 #include "roff.h"
36 #include "main.h"
37
38 #define UNCONST(a) ((void *)(uintptr_t)(const void *)(a))
39
40 /* FIXME: Intel's compiler? LLVM? pcc? */
41
42 #if !defined(__GNUC__) || (__GNUC__ < 2)
43 # if !defined(lint)
44 # define __attribute__(x)
45 # endif
46 #endif /* !defined(__GNUC__) || (__GNUC__ < 2) */
47
48 typedef void (*out_mdoc)(void *, const struct mdoc *);
49 typedef void (*out_man)(void *, const struct man *);
50 typedef void (*out_free)(void *);
51
52 struct buf {
53 char *buf;
54 size_t sz;
55 };
56
57 enum intt {
58 INTT_AUTO,
59 INTT_MDOC,
60 INTT_MAN
61 };
62
63 enum outt {
64 OUTT_ASCII = 0,
65 OUTT_TREE,
66 OUTT_HTML,
67 OUTT_XHTML,
68 OUTT_LINT
69 };
70
71 struct curparse {
72 const char *file; /* Current parse. */
73 int fd; /* Current parse. */
74 int wflags;
75 /* FIXME: set by max error */
76 #define WARN_WALL (1 << 0) /* All-warnings mask. */
77 #define WARN_WERR (1 << 2) /* Warnings->errors. */
78 int fflags;
79 #define FL_IGN_SCOPE (1 << 0) /* Ignore scope errors. */
80 #define FL_NIGN_ESCAPE (1 << 1) /* Don't ignore bad escapes. */
81 #define FL_NIGN_MACRO (1 << 2) /* Don't ignore bad macros. */
82 #define FL_IGN_ERRORS (1 << 4) /* Ignore failed parse. */
83 #define FL_STRICT FL_NIGN_ESCAPE | \
84 FL_NIGN_MACRO
85 enum intt inttype; /* Input parsers... */
86 struct man *man;
87 struct mdoc *mdoc;
88 struct roff *roff;
89 enum outt outtype; /* Output devices... */
90 out_mdoc outmdoc;
91 out_man outman;
92 out_free outfree;
93 void *outdata;
94 char outopts[BUFSIZ];
95 };
96
97 static void fdesc(struct curparse *);
98 static void ffile(const char *, struct curparse *);
99 static int foptions(int *, char *);
100 static struct man *man_init(struct curparse *);
101 static struct mdoc *mdoc_init(struct curparse *);
102 static struct roff *roff_init(struct curparse *);
103 static int merr(void *, int, int, const char *); /* DEPRECATED */
104 static int moptions(enum intt *, char *);
105 static int mwarn(void *, int, int, const char *); /* DEPRECATED */
106 static int mmsg(enum mandocerr, void *,
107 int, int, const char *);
108 static int pset(const char *, int, struct curparse *,
109 struct man **, struct mdoc **);
110 static int toptions(struct curparse *, char *);
111 static void usage(void) __attribute__((noreturn));
112 static void version(void) __attribute__((noreturn));
113 static int woptions(int *, char *);
114
115 static const char *progname;
116 static int with_error;
117 static int with_warning;
118
119 int
120 main(int argc, char *argv[])
121 {
122 int c;
123 struct curparse curp;
124
125 progname = strrchr(argv[0], '/');
126 if (progname == NULL)
127 progname = argv[0];
128 else
129 ++progname;
130
131 memset(&curp, 0, sizeof(struct curparse));
132
133 curp.inttype = INTT_AUTO;
134 curp.outtype = OUTT_ASCII;
135
136 /* LINTED */
137 while (-1 != (c = getopt(argc, argv, "f:m:O:T:VW:")))
138 switch (c) {
139 case ('f'):
140 if ( ! foptions(&curp.fflags, optarg))
141 return(EXIT_FAILURE);
142 break;
143 case ('m'):
144 if ( ! moptions(&curp.inttype, optarg))
145 return(EXIT_FAILURE);
146 break;
147 case ('O'):
148 (void)strlcat(curp.outopts, optarg, BUFSIZ);
149 (void)strlcat(curp.outopts, ",", BUFSIZ);
150 break;
151 case ('T'):
152 if ( ! toptions(&curp, optarg))
153 return(EXIT_FAILURE);
154 break;
155 case ('W'):
156 if ( ! woptions(&curp.wflags, optarg))
157 return(EXIT_FAILURE);
158 break;
159 case ('V'):
160 version();
161 /* NOTREACHED */
162 default:
163 usage();
164 /* NOTREACHED */
165 }
166
167 argc -= optind;
168 argv += optind;
169
170 if (NULL == *argv) {
171 curp.file = "<stdin>";
172 curp.fd = STDIN_FILENO;
173
174 fdesc(&curp);
175 }
176
177 while (*argv) {
178 ffile(*argv, &curp);
179
180 if (with_error && !(curp.fflags & FL_IGN_ERRORS))
181 break;
182 ++argv;
183 }
184
185 if (curp.outfree)
186 (*curp.outfree)(curp.outdata);
187 if (curp.mdoc)
188 mdoc_free(curp.mdoc);
189 if (curp.man)
190 man_free(curp.man);
191 if (curp.roff)
192 roff_free(curp.roff);
193
194 return((with_warning || with_error) ?
195 EXIT_FAILURE : EXIT_SUCCESS);
196 }
197
198
199 static void
200 version(void)
201 {
202
203 (void)printf("%s %s\n", progname, VERSION);
204 exit(EXIT_SUCCESS);
205 }
206
207
208 static void
209 usage(void)
210 {
211
212 (void)fprintf(stderr, "usage: %s [-V] [-foption] "
213 "[-mformat] [-Ooption] [-Toutput] "
214 "[-Werr] [file...]\n", progname);
215 exit(EXIT_FAILURE);
216 }
217
218
219 static struct man *
220 man_init(struct curparse *curp)
221 {
222 int pflags;
223 struct man_cb mancb;
224
225 mancb.man_err = merr;
226 mancb.man_warn = mwarn;
227
228 /* Defaults from mandoc.1. */
229
230 pflags = MAN_IGN_MACRO | MAN_IGN_ESCAPE;
231
232 if (curp->fflags & FL_NIGN_MACRO)
233 pflags &= ~MAN_IGN_MACRO;
234 if (curp->fflags & FL_NIGN_ESCAPE)
235 pflags &= ~MAN_IGN_ESCAPE;
236
237 return(man_alloc(curp, pflags, &mancb));
238 }
239
240
241 static struct roff *
242 roff_init(struct curparse *curp)
243 {
244
245 return(roff_alloc(mmsg, curp));
246 }
247
248
249 static struct mdoc *
250 mdoc_init(struct curparse *curp)
251 {
252 int pflags;
253 struct mdoc_cb mdoccb;
254
255 mdoccb.mdoc_err = merr;
256 mdoccb.mdoc_warn = mwarn;
257
258 /* Defaults from mandoc.1. */
259
260 pflags = MDOC_IGN_MACRO | MDOC_IGN_ESCAPE;
261
262 if (curp->fflags & FL_IGN_SCOPE)
263 pflags |= MDOC_IGN_SCOPE;
264 if (curp->fflags & FL_NIGN_ESCAPE)
265 pflags &= ~MDOC_IGN_ESCAPE;
266 if (curp->fflags & FL_NIGN_MACRO)
267 pflags &= ~MDOC_IGN_MACRO;
268
269 return(mdoc_alloc(curp, pflags, &mdoccb));
270 }
271
272
273 static void
274 ffile(const char *file, struct curparse *curp)
275 {
276
277 curp->file = file;
278 if (-1 == (curp->fd = open(curp->file, O_RDONLY, 0))) {
279 perror(curp->file);
280 with_error = 1;
281 return;
282 }
283
284 fdesc(curp);
285
286 if (-1 == close(curp->fd))
287 perror(curp->file);
288 }
289
290
291 static int
292 resize_buf(struct buf *buf, size_t initial)
293 {
294 void *tmp;
295 size_t sz;
296
297 if (buf->sz == 0)
298 sz = initial;
299 else
300 sz = 2 * buf->sz;
301 tmp = realloc(buf->buf, sz);
302 if (NULL == tmp) {
303 perror(NULL);
304 return(0);
305 }
306 buf->buf = tmp;
307 buf->sz = sz;
308 return(1);
309 }
310
311
312 static int
313 read_whole_file(struct curparse *curp, struct buf *fb, int *with_mmap)
314 {
315 struct stat st;
316 size_t off;
317 ssize_t ssz;
318
319 if (-1 == fstat(curp->fd, &st)) {
320 perror(curp->file);
321 with_error = 1;
322 return(0);
323 }
324
325 /*
326 * If we're a regular file, try just reading in the whole entry
327 * via mmap(). This is faster than reading it into blocks, and
328 * since each file is only a few bytes to begin with, I'm not
329 * concerned that this is going to tank any machines.
330 */
331
332 if (S_ISREG(st.st_mode)) {
333 if (st.st_size >= (1U << 31)) {
334 fprintf(stderr, "%s: input too large\n",
335 curp->file);
336 with_error = 1;
337 return(0);
338 }
339 *with_mmap = 1;
340 fb->sz = (size_t)st.st_size;
341 fb->buf = mmap(NULL, fb->sz, PROT_READ,
342 MAP_FILE, curp->fd, 0);
343 if (fb->buf != MAP_FAILED)
344 return(1);
345 }
346
347 /*
348 * If this isn't a regular file (like, say, stdin), then we must
349 * go the old way and just read things in bit by bit.
350 */
351
352 *with_mmap = 0;
353 off = 0;
354 fb->sz = 0;
355 fb->buf = NULL;
356 for (;;) {
357 if (off == fb->sz) {
358 if (fb->sz == (1U << 31)) {
359 fprintf(stderr, "%s: input too large\n",
360 curp->file);
361 break;
362 }
363 if (! resize_buf(fb, 65536))
364 break;
365 }
366 ssz = read(curp->fd, fb->buf + (int)off, fb->sz - off);
367 if (ssz == 0) {
368 fb->sz = off;
369 return(1);
370 }
371 if (ssz == -1) {
372 perror(curp->file);
373 break;
374 }
375 off += (size_t)ssz;
376 }
377
378 free(fb->buf);
379 fb->buf = NULL;
380 with_error = 1;
381 return(0);
382 }
383
384
385 static void
386 fdesc(struct curparse *curp)
387 {
388 struct buf ln, blk;
389 int i, pos, lnn, lnn_start, with_mmap, of;
390 enum rofferr re;
391 struct man *man;
392 struct mdoc *mdoc;
393 struct roff *roff;
394
395 man = NULL;
396 mdoc = NULL;
397 roff = NULL;
398 memset(&ln, 0, sizeof(struct buf));
399
400 /*
401 * Two buffers: ln and buf. buf is the input file and may be
402 * memory mapped. ln is a line buffer and grows on-demand.
403 */
404
405 if ( ! read_whole_file(curp, &blk, &with_mmap))
406 return;
407
408 if (NULL == curp->roff)
409 curp->roff = roff_init(curp);
410 if (NULL == (roff = curp->roff))
411 goto bailout;
412
413 for (i = 0, lnn = 1; i < (int)blk.sz;) {
414 pos = 0;
415 lnn_start = lnn;
416 while (i < (int)blk.sz) {
417 if ('\n' == blk.buf[i]) {
418 ++i;
419 ++lnn;
420 break;
421 }
422 /* Trailing backslash is like a plain character. */
423 if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) {
424 if (pos >= (int)ln.sz)
425 if (! resize_buf(&ln, 256))
426 goto bailout;
427 ln.buf[pos++] = blk.buf[i++];
428 continue;
429 }
430 /* Found an escape and at least one other character. */
431 if ('\n' == blk.buf[i + 1]) {
432 /* Escaped newlines are skipped over */
433 i += 2;
434 ++lnn;
435 continue;
436 }
437 if ('"' == blk.buf[i + 1]) {
438 i += 2;
439 /* Comment, skip to end of line */
440 for (; i < (int)blk.sz; ++i) {
441 if ('\n' == blk.buf[i]) {
442 ++i;
443 ++lnn;
444 break;
445 }
446 }
447 /* Backout trailing whitespaces */
448 for (; pos > 0; --pos) {
449 if (ln.buf[pos - 1] != ' ')
450 break;
451 if (pos > 2 && ln.buf[pos - 2] == '\\')
452 break;
453 }
454 break;
455 }
456 /* Some other escape sequence, copy and continue. */
457 if (pos + 1 >= (int)ln.sz)
458 if (! resize_buf(&ln, 256))
459 goto bailout;
460
461 ln.buf[pos++] = blk.buf[i++];
462 ln.buf[pos++] = blk.buf[i++];
463 }
464
465 if (pos >= (int)ln.sz)
466 if (! resize_buf(&ln, 256))
467 goto bailout;
468 ln.buf[pos] = '\0';
469
470 /*
471 * A significant amount of complexity is contained by
472 * the roff preprocessor. It's line-oriented but can be
473 * expressed on one line, so we need at times to
474 * readjust our starting point and re-run it. The roff
475 * preprocessor can also readjust the buffers with new
476 * data, so we pass them in wholesale.
477 */
478
479 of = 0;
480 do {
481 re = roff_parseln(roff, lnn_start,
482 &ln.buf, &ln.sz, of, &of);
483 } while (ROFF_RERUN == re);
484
485 if (ROFF_IGN == re)
486 continue;
487 else if (ROFF_ERR == re)
488 goto bailout;
489
490 /*
491 * If input parsers have not been allocated, do so now.
492 * We keep these instanced betwen parsers, but set them
493 * locally per parse routine since we can use different
494 * parsers with each one.
495 */
496
497 if ( ! (man || mdoc))
498 if ( ! pset(ln.buf + of, pos - of, curp, &man, &mdoc))
499 goto bailout;
500
501 /* Lastly, push down into the parsers themselves. */
502
503 if (man && ! man_parseln(man, lnn_start, ln.buf, of))
504 goto bailout;
505 if (mdoc && ! mdoc_parseln(mdoc, lnn_start, ln.buf, of))
506 goto bailout;
507 }
508
509 /* NOTE a parser may not have been assigned, yet. */
510
511 if ( ! (man || mdoc)) {
512 fprintf(stderr, "%s: Not a manual\n", curp->file);
513 goto bailout;
514 }
515
516 /* Clean up the parse routine ASTs. */
517
518 if (mdoc && ! mdoc_endparse(mdoc))
519 goto bailout;
520 if (man && ! man_endparse(man))
521 goto bailout;
522 if (roff && ! roff_endparse(roff))
523 goto bailout;
524
525 /* If unset, allocate output dev now (if applicable). */
526
527 if ( ! (curp->outman && curp->outmdoc)) {
528 switch (curp->outtype) {
529 case (OUTT_XHTML):
530 curp->outdata = xhtml_alloc(curp->outopts);
531 curp->outman = html_man;
532 curp->outmdoc = html_mdoc;
533 curp->outfree = html_free;
534 break;
535 case (OUTT_HTML):
536 curp->outdata = html_alloc(curp->outopts);
537 curp->outman = html_man;
538 curp->outmdoc = html_mdoc;
539 curp->outfree = html_free;
540 break;
541 case (OUTT_TREE):
542 curp->outman = tree_man;
543 curp->outmdoc = tree_mdoc;
544 break;
545 case (OUTT_LINT):
546 break;
547 default:
548 curp->outdata = ascii_alloc(80);
549 curp->outman = terminal_man;
550 curp->outmdoc = terminal_mdoc;
551 curp->outfree = terminal_free;
552 break;
553 }
554 }
555
556 /* Execute the out device, if it exists. */
557
558 if (man && curp->outman)
559 (*curp->outman)(curp->outdata, man);
560 if (mdoc && curp->outmdoc)
561 (*curp->outmdoc)(curp->outdata, mdoc);
562
563 cleanup:
564 if (mdoc)
565 mdoc_reset(mdoc);
566 if (man)
567 man_reset(man);
568 if (roff)
569 roff_reset(roff);
570 if (ln.buf)
571 free(ln.buf);
572 if (with_mmap)
573 munmap(blk.buf, blk.sz);
574 else
575 free(blk.buf);
576
577 return;
578
579 bailout:
580 with_error = 1;
581 goto cleanup;
582 }
583
584
585 static int
586 pset(const char *buf, int pos, struct curparse *curp,
587 struct man **man, struct mdoc **mdoc)
588 {
589 int i;
590
591 /*
592 * Try to intuit which kind of manual parser should be used. If
593 * passed in by command-line (-man, -mdoc), then use that
594 * explicitly. If passed as -mandoc, then try to guess from the
595 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
596 * default to -man, which is more lenient.
597 */
598
599 if ('.' == buf[0] || '\'' == buf[0]) {
600 for (i = 1; buf[i]; i++)
601 if (' ' != buf[i] && '\t' != buf[i])
602 break;
603 if (0 == buf[i])
604 return(1);
605 }
606
607 switch (curp->inttype) {
608 case (INTT_MDOC):
609 if (NULL == curp->mdoc)
610 curp->mdoc = mdoc_init(curp);
611 if (NULL == (*mdoc = curp->mdoc))
612 return(0);
613 return(1);
614 case (INTT_MAN):
615 if (NULL == curp->man)
616 curp->man = man_init(curp);
617 if (NULL == (*man = curp->man))
618 return(0);
619 return(1);
620 default:
621 break;
622 }
623
624 if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3)) {
625 if (NULL == curp->mdoc)
626 curp->mdoc = mdoc_init(curp);
627 if (NULL == (*mdoc = curp->mdoc))
628 return(0);
629 return(1);
630 }
631
632 if (NULL == curp->man)
633 curp->man = man_init(curp);
634 if (NULL == (*man = curp->man))
635 return(0);
636 return(1);
637 }
638
639
640 static int
641 moptions(enum intt *tflags, char *arg)
642 {
643
644 if (0 == strcmp(arg, "doc"))
645 *tflags = INTT_MDOC;
646 else if (0 == strcmp(arg, "andoc"))
647 *tflags = INTT_AUTO;
648 else if (0 == strcmp(arg, "an"))
649 *tflags = INTT_MAN;
650 else {
651 fprintf(stderr, "%s: Bad argument\n", arg);
652 return(0);
653 }
654
655 return(1);
656 }
657
658
659 static int
660 toptions(struct curparse *curp, char *arg)
661 {
662
663 if (0 == strcmp(arg, "ascii"))
664 curp->outtype = OUTT_ASCII;
665 else if (0 == strcmp(arg, "lint")) {
666 curp->outtype = OUTT_LINT;
667 curp->wflags |= WARN_WALL;
668 curp->fflags |= FL_STRICT;
669 }
670 else if (0 == strcmp(arg, "tree"))
671 curp->outtype = OUTT_TREE;
672 else if (0 == strcmp(arg, "html"))
673 curp->outtype = OUTT_HTML;
674 else if (0 == strcmp(arg, "xhtml"))
675 curp->outtype = OUTT_XHTML;
676 else {
677 fprintf(stderr, "%s: Bad argument\n", arg);
678 return(0);
679 }
680
681 return(1);
682 }
683
684
685 static int
686 foptions(int *fflags, char *arg)
687 {
688 char *v, *o;
689 const char *toks[8];
690
691 toks[0] = "ign-scope";
692 toks[1] = "no-ign-escape";
693 toks[2] = "no-ign-macro";
694 toks[3] = "ign-errors";
695 toks[4] = "strict";
696 toks[5] = "ign-escape";
697 toks[6] = NULL;
698
699 while (*arg) {
700 o = arg;
701 switch (getsubopt(&arg, UNCONST(toks), &v)) {
702 case (0):
703 *fflags |= FL_IGN_SCOPE;
704 break;
705 case (1):
706 *fflags |= FL_NIGN_ESCAPE;
707 break;
708 case (2):
709 *fflags |= FL_NIGN_MACRO;
710 break;
711 case (3):
712 *fflags |= FL_IGN_ERRORS;
713 break;
714 case (4):
715 *fflags |= FL_STRICT;
716 break;
717 case (5):
718 *fflags &= ~FL_NIGN_ESCAPE;
719 break;
720 default:
721 fprintf(stderr, "%s: Bad argument\n", o);
722 return(0);
723 }
724 }
725
726 return(1);
727 }
728
729
730 static int
731 woptions(int *wflags, char *arg)
732 {
733 char *v, *o;
734 const char *toks[3];
735
736 toks[0] = "all";
737 toks[1] = "error";
738 toks[2] = NULL;
739
740 while (*arg) {
741 o = arg;
742 switch (getsubopt(&arg, UNCONST(toks), &v)) {
743 case (0):
744 *wflags |= WARN_WALL;
745 break;
746 case (1):
747 *wflags |= WARN_WERR;
748 break;
749 default:
750 fprintf(stderr, "%s: Bad argument\n", o);
751 return(0);
752 }
753 }
754
755 return(1);
756 }
757
758
759 /* ARGSUSED */
760 static int
761 merr(void *arg, int line, int col, const char *msg)
762 {
763 struct curparse *curp;
764
765 curp = (struct curparse *)arg;
766
767 (void)fprintf(stderr, "%s:%d:%d: error: %s\n",
768 curp->file, line, col + 1, msg);
769
770 with_error = 1;
771
772 return(0);
773 }
774
775
776 static int
777 mwarn(void *arg, int line, int col, const char *msg)
778 {
779 struct curparse *curp;
780
781 curp = (struct curparse *)arg;
782
783 if ( ! (curp->wflags & WARN_WALL))
784 return(1);
785
786 (void)fprintf(stderr, "%s:%d:%d: warning: %s\n",
787 curp->file, line, col + 1, msg);
788
789 with_warning = 1;
790 if (curp->wflags & WARN_WERR) {
791 with_error = 1;
792 return(0);
793 }
794
795 return(1);
796 }
797
798 static const char * const mandocerrs[MANDOCERR_MAX] = {
799 "ok",
800 "multi-line scope open on exit",
801 "request for scope closure when no matching scope is open: ignored",
802 "macro requires line argument(s): ignored",
803 "line arguments will be lost",
804 "memory exhausted"
805 };
806
807 /*
808 * XXX: this is experimental code that will eventually become the
809 * generic means of covering all warnings and errors!
810 */
811 /* ARGSUSED */
812 static int
813 mmsg(enum mandocerr t, void *arg, int ln, int col, const char *msg)
814 {
815 #if 0
816 struct curparse *cp;
817
818 cp = (struct curparse *)arg;
819
820 fprintf(stderr, "%s:%d:%d: %s", cp->file,
821 ln, col + 1, mandocerrs[t]);
822
823 if (msg)
824 fprintf(stderr, ": %s", msg);
825
826 fputc('\n', stderr);
827 #endif
828 return(1);
829 }