]> git.cameronkatri.com Git - mandoc.git/blob - main.c
Regression tests in place for `.if' in libroff.
[mandoc.git] / main.c
1 /* $Id: main.c,v 1.77 2010/05/16 19:08:11 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <sys/mman.h>
22 #include <sys/stat.h>
23
24 #include <assert.h>
25 #include <fcntl.h>
26 #include <stdio.h>
27 #include <stdint.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31
32 #include "mandoc.h"
33 #include "mdoc.h"
34 #include "man.h"
35 #include "roff.h"
36 #include "main.h"
37
38 #define UNCONST(a) ((void *)(uintptr_t)(const void *)(a))
39
40 /* FIXME: Intel's compiler? LLVM? pcc? */
41
42 #if !defined(__GNUC__) || (__GNUC__ < 2)
43 # if !defined(lint)
44 # define __attribute__(x)
45 # endif
46 #endif /* !defined(__GNUC__) || (__GNUC__ < 2) */
47
48 typedef void (*out_mdoc)(void *, const struct mdoc *);
49 typedef void (*out_man)(void *, const struct man *);
50 typedef void (*out_free)(void *);
51
52 struct buf {
53 char *buf;
54 size_t sz;
55 };
56
57 enum intt {
58 INTT_AUTO,
59 INTT_MDOC,
60 INTT_MAN
61 };
62
63 enum outt {
64 OUTT_ASCII = 0,
65 OUTT_TREE,
66 OUTT_HTML,
67 OUTT_XHTML,
68 OUTT_LINT
69 };
70
71 struct curparse {
72 const char *file; /* Current parse. */
73 int fd; /* Current parse. */
74 int wflags;
75 /* FIXME: set by max error */
76 #define WARN_WALL (1 << 0) /* All-warnings mask. */
77 #define WARN_WERR (1 << 2) /* Warnings->errors. */
78 int fflags;
79 #define FL_IGN_SCOPE (1 << 0) /* Ignore scope errors. */
80 #define FL_NIGN_ESCAPE (1 << 1) /* Don't ignore bad escapes. */
81 #define FL_NIGN_MACRO (1 << 2) /* Don't ignore bad macros. */
82 #define FL_IGN_ERRORS (1 << 4) /* Ignore failed parse. */
83 #define FL_STRICT FL_NIGN_ESCAPE | \
84 FL_NIGN_MACRO
85 enum intt inttype; /* Input parsers... */
86 struct man *man;
87 struct mdoc *mdoc;
88 struct roff *roff;
89 enum outt outtype; /* Output devices... */
90 out_mdoc outmdoc;
91 out_man outman;
92 out_free outfree;
93 void *outdata;
94 char outopts[BUFSIZ];
95 };
96
97 static void fdesc(struct curparse *);
98 static void ffile(const char *, struct curparse *);
99 static int foptions(int *, char *);
100 static struct man *man_init(struct curparse *);
101 static struct mdoc *mdoc_init(struct curparse *);
102 static struct roff *roff_init(struct curparse *);
103 static int merr(void *, int, int, const char *); /* DEPRECATED */
104 static int moptions(enum intt *, char *);
105 static int mwarn(void *, int, int, const char *); /* DEPRECATED */
106 static int mmsg(enum mandocerr, void *,
107 int, int, const char *);
108 static int pset(const char *, int, struct curparse *,
109 struct man **, struct mdoc **);
110 static int toptions(struct curparse *, char *);
111 static void usage(void) __attribute__((noreturn));
112 static void version(void) __attribute__((noreturn));
113 static int woptions(int *, char *);
114
115 static const char *progname;
116 static int with_error;
117 static int with_warning;
118
119 int
120 main(int argc, char *argv[])
121 {
122 int c;
123 struct curparse curp;
124
125 progname = strrchr(argv[0], '/');
126 if (progname == NULL)
127 progname = argv[0];
128 else
129 ++progname;
130
131 memset(&curp, 0, sizeof(struct curparse));
132
133 curp.inttype = INTT_AUTO;
134 curp.outtype = OUTT_ASCII;
135
136 /* LINTED */
137 while (-1 != (c = getopt(argc, argv, "f:m:O:T:VW:")))
138 switch (c) {
139 case ('f'):
140 if ( ! foptions(&curp.fflags, optarg))
141 return(EXIT_FAILURE);
142 break;
143 case ('m'):
144 if ( ! moptions(&curp.inttype, optarg))
145 return(EXIT_FAILURE);
146 break;
147 case ('O'):
148 (void)strlcat(curp.outopts, optarg, BUFSIZ);
149 (void)strlcat(curp.outopts, ",", BUFSIZ);
150 break;
151 case ('T'):
152 if ( ! toptions(&curp, optarg))
153 return(EXIT_FAILURE);
154 break;
155 case ('W'):
156 if ( ! woptions(&curp.wflags, optarg))
157 return(EXIT_FAILURE);
158 break;
159 case ('V'):
160 version();
161 /* NOTREACHED */
162 default:
163 usage();
164 /* NOTREACHED */
165 }
166
167 argc -= optind;
168 argv += optind;
169
170 if (NULL == *argv) {
171 curp.file = "<stdin>";
172 curp.fd = STDIN_FILENO;
173
174 fdesc(&curp);
175 }
176
177 while (*argv) {
178 ffile(*argv, &curp);
179
180 if (with_error && !(curp.fflags & FL_IGN_ERRORS))
181 break;
182 ++argv;
183 }
184
185 if (curp.outfree)
186 (*curp.outfree)(curp.outdata);
187 if (curp.mdoc)
188 mdoc_free(curp.mdoc);
189 if (curp.man)
190 man_free(curp.man);
191 if (curp.roff)
192 roff_free(curp.roff);
193
194 return((with_warning || with_error) ?
195 EXIT_FAILURE : EXIT_SUCCESS);
196 }
197
198
199 static void
200 version(void)
201 {
202
203 (void)printf("%s %s\n", progname, VERSION);
204 exit(EXIT_SUCCESS);
205 }
206
207
208 static void
209 usage(void)
210 {
211
212 (void)fprintf(stderr, "usage: %s [-V] [-foption] "
213 "[-mformat] [-Ooption] [-Toutput] "
214 "[-Werr] [file...]\n", progname);
215 exit(EXIT_FAILURE);
216 }
217
218
219 static struct man *
220 man_init(struct curparse *curp)
221 {
222 int pflags;
223 struct man_cb mancb;
224
225 mancb.man_err = merr;
226 mancb.man_warn = mwarn;
227
228 /* Defaults from mandoc.1. */
229
230 pflags = MAN_IGN_MACRO | MAN_IGN_ESCAPE;
231
232 if (curp->fflags & FL_NIGN_MACRO)
233 pflags &= ~MAN_IGN_MACRO;
234 if (curp->fflags & FL_NIGN_ESCAPE)
235 pflags &= ~MAN_IGN_ESCAPE;
236
237 return(man_alloc(curp, pflags, &mancb));
238 }
239
240
241 static struct roff *
242 roff_init(struct curparse *curp)
243 {
244
245 return(roff_alloc(mmsg, curp));
246 }
247
248
249 static struct mdoc *
250 mdoc_init(struct curparse *curp)
251 {
252 int pflags;
253 struct mdoc_cb mdoccb;
254
255 mdoccb.mdoc_err = merr;
256 mdoccb.mdoc_warn = mwarn;
257
258 /* Defaults from mandoc.1. */
259
260 pflags = MDOC_IGN_MACRO | MDOC_IGN_ESCAPE;
261
262 if (curp->fflags & FL_IGN_SCOPE)
263 pflags |= MDOC_IGN_SCOPE;
264 if (curp->fflags & FL_NIGN_ESCAPE)
265 pflags &= ~MDOC_IGN_ESCAPE;
266 if (curp->fflags & FL_NIGN_MACRO)
267 pflags &= ~MDOC_IGN_MACRO;
268
269 return(mdoc_alloc(curp, pflags, &mdoccb));
270 }
271
272
273 static void
274 ffile(const char *file, struct curparse *curp)
275 {
276
277 curp->file = file;
278 if (-1 == (curp->fd = open(curp->file, O_RDONLY, 0))) {
279 perror(curp->file);
280 with_error = 1;
281 return;
282 }
283
284 fdesc(curp);
285
286 if (-1 == close(curp->fd))
287 perror(curp->file);
288 }
289
290
291 static int
292 resize_buf(struct buf *buf, size_t initial)
293 {
294 void *tmp;
295 size_t sz;
296
297 if (buf->sz == 0)
298 sz = initial;
299 else
300 sz = 2 * buf->sz;
301 tmp = realloc(buf->buf, sz);
302 if (NULL == tmp) {
303 perror(NULL);
304 return(0);
305 }
306 buf->buf = tmp;
307 buf->sz = sz;
308 return(1);
309 }
310
311
312 static int
313 read_whole_file(struct curparse *curp, struct buf *fb, int *with_mmap)
314 {
315 struct stat st;
316 size_t off;
317 ssize_t ssz;
318
319 if (-1 == fstat(curp->fd, &st)) {
320 perror(curp->file);
321 with_error = 1;
322 return(0);
323 }
324
325 /*
326 * If we're a regular file, try just reading in the whole entry
327 * via mmap(). This is faster than reading it into blocks, and
328 * since each file is only a few bytes to begin with, I'm not
329 * concerned that this is going to tank any machines.
330 */
331
332 if (S_ISREG(st.st_mode)) {
333 if (st.st_size >= (1U << 31)) {
334 fprintf(stderr, "%s: input too large\n",
335 curp->file);
336 with_error = 1;
337 return(0);
338 }
339 *with_mmap = 1;
340 fb->sz = (size_t)st.st_size;
341 fb->buf = mmap(NULL, fb->sz, PROT_READ,
342 MAP_FILE, curp->fd, 0);
343 if (fb->buf != MAP_FAILED)
344 return(1);
345 }
346
347 /*
348 * If this isn't a regular file (like, say, stdin), then we must
349 * go the old way and just read things in bit by bit.
350 */
351
352 *with_mmap = 0;
353 off = 0;
354 fb->sz = 0;
355 fb->buf = NULL;
356 for (;;) {
357 if (off == fb->sz) {
358 if (fb->sz == (1U << 31)) {
359 fprintf(stderr, "%s: input too large\n",
360 curp->file);
361 break;
362 }
363 if (! resize_buf(fb, 65536))
364 break;
365 }
366 ssz = read(curp->fd, fb->buf + (int)off, fb->sz - off);
367 if (ssz == 0) {
368 fb->sz = off;
369 return(1);
370 }
371 if (ssz == -1) {
372 perror(curp->file);
373 break;
374 }
375 off += (size_t)ssz;
376 }
377
378 free(fb->buf);
379 fb->buf = NULL;
380 with_error = 1;
381 return(0);
382 }
383
384
385 static void
386 fdesc(struct curparse *curp)
387 {
388 struct buf ln, blk;
389 int i, pos, lnn, lnn_start, with_mmap, of;
390 enum rofferr re;
391 struct man *man;
392 struct mdoc *mdoc;
393 struct roff *roff;
394
395 man = NULL;
396 mdoc = NULL;
397 roff = NULL;
398 memset(&ln, 0, sizeof(struct buf));
399
400 /*
401 * Two buffers: ln and buf. buf is the input file and may be
402 * memory mapped. ln is a line buffer and grows on-demand.
403 */
404
405 if ( ! read_whole_file(curp, &blk, &with_mmap))
406 return;
407
408 if (NULL == curp->roff)
409 curp->roff = roff_init(curp);
410 if (NULL == (roff = curp->roff))
411 goto bailout;
412
413 for (i = 0, lnn = 1; i < (int)blk.sz;) {
414 pos = 0;
415 lnn_start = lnn;
416 while (i < (int)blk.sz) {
417 if ('\n' == blk.buf[i]) {
418 ++i;
419 ++lnn;
420 break;
421 }
422 /* Trailing backslash is like a plain character. */
423 if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) {
424 if (pos >= (int)ln.sz)
425 if (! resize_buf(&ln, 256))
426 goto bailout;
427 ln.buf[pos++] = blk.buf[i++];
428 continue;
429 }
430 /* Found an escape and at least one other character. */
431 if ('\n' == blk.buf[i + 1]) {
432 /* Escaped newlines are skipped over */
433 i += 2;
434 ++lnn;
435 continue;
436 }
437 if ('"' == blk.buf[i + 1]) {
438 i += 2;
439 /* Comment, skip to end of line */
440 for (; i < (int)blk.sz; ++i) {
441 if ('\n' == blk.buf[i]) {
442 ++i;
443 ++lnn;
444 break;
445 }
446 }
447 /* Backout trailing whitespaces */
448 for (; pos > 0; --pos) {
449 if (ln.buf[pos - 1] != ' ')
450 break;
451 if (pos > 2 && ln.buf[pos - 2] == '\\')
452 break;
453 }
454 break;
455 }
456 /* Some other escape sequence, copy and continue. */
457 if (pos + 1 >= (int)ln.sz)
458 if (! resize_buf(&ln, 256))
459 goto bailout;
460
461 ln.buf[pos++] = blk.buf[i++];
462 ln.buf[pos++] = blk.buf[i++];
463 }
464
465 if (pos >= (int)ln.sz)
466 if (! resize_buf(&ln, 256))
467 goto bailout;
468 ln.buf[pos] = '\0';
469
470 /*
471 * A significant amount of complexity is contained by
472 * the roff preprocessor. It's line-oriented but can be
473 * expressed on one line, so we need at times to
474 * readjust our starting point and re-run it. The roff
475 * preprocessor can also readjust the buffers with new
476 * data, so we pass them in wholesale.
477 */
478
479 of = 0;
480 do {
481 re = roff_parseln(roff, lnn_start,
482 &ln.buf, &ln.sz, of, &of);
483 } while (ROFF_RERUN == re);
484
485 if (ROFF_IGN == re)
486 continue;
487 else if (ROFF_ERR == re)
488 goto bailout;
489
490 /*
491 * If input parsers have not been allocated, do so now.
492 * We keep these instanced betwen parsers, but set them
493 * locally per parse routine since we can use different
494 * parsers with each one.
495 */
496
497 if ( ! (man || mdoc))
498 if ( ! pset(ln.buf + of, pos - of, curp, &man, &mdoc))
499 goto bailout;
500
501 /* Lastly, push down into the parsers themselves. */
502
503 if (man && ! man_parseln(man, lnn_start, ln.buf, of))
504 goto bailout;
505 if (mdoc && ! mdoc_parseln(mdoc, lnn_start, ln.buf, of))
506 goto bailout;
507 }
508
509 /* NOTE a parser may not have been assigned, yet. */
510
511 if ( ! (man || mdoc)) {
512 fprintf(stderr, "%s: Not a manual\n", curp->file);
513 goto bailout;
514 }
515
516 /* Clean up the parse routine ASTs. */
517
518 if (mdoc && ! mdoc_endparse(mdoc))
519 goto bailout;
520 if (man && ! man_endparse(man))
521 goto bailout;
522 if (roff && ! roff_endparse(roff))
523 goto bailout;
524
525 /* If unset, allocate output dev now (if applicable). */
526
527 if ( ! (curp->outman && curp->outmdoc)) {
528 switch (curp->outtype) {
529 case (OUTT_XHTML):
530 curp->outdata = xhtml_alloc(curp->outopts);
531 curp->outman = html_man;
532 curp->outmdoc = html_mdoc;
533 curp->outfree = html_free;
534 break;
535 case (OUTT_HTML):
536 curp->outdata = html_alloc(curp->outopts);
537 curp->outman = html_man;
538 curp->outmdoc = html_mdoc;
539 curp->outfree = html_free;
540 break;
541 case (OUTT_TREE):
542 curp->outman = tree_man;
543 curp->outmdoc = tree_mdoc;
544 break;
545 case (OUTT_LINT):
546 break;
547 default:
548 curp->outdata = ascii_alloc(80);
549 curp->outman = terminal_man;
550 curp->outmdoc = terminal_mdoc;
551 curp->outfree = terminal_free;
552 break;
553 }
554 }
555
556 /* Execute the out device, if it exists. */
557
558 if (man && curp->outman)
559 (*curp->outman)(curp->outdata, man);
560 if (mdoc && curp->outmdoc)
561 (*curp->outmdoc)(curp->outdata, mdoc);
562
563 cleanup:
564 if (mdoc)
565 mdoc_reset(mdoc);
566 if (man)
567 man_reset(man);
568 if (roff)
569 roff_reset(roff);
570 if (ln.buf)
571 free(ln.buf);
572 if (with_mmap)
573 munmap(blk.buf, blk.sz);
574 else
575 free(blk.buf);
576
577 return;
578
579 bailout:
580 with_error = 1;
581 goto cleanup;
582 }
583
584
585 static int
586 pset(const char *buf, int pos, struct curparse *curp,
587 struct man **man, struct mdoc **mdoc)
588 {
589 int i;
590
591 /*
592 * Try to intuit which kind of manual parser should be used. If
593 * passed in by command-line (-man, -mdoc), then use that
594 * explicitly. If passed as -mandoc, then try to guess from the
595 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
596 * default to -man, which is more lenient.
597 */
598
599 if ('.' == buf[0] || '\'' == buf[0]) {
600 for (i = 1; buf[i]; i++)
601 if (' ' != buf[i] && '\t' != buf[i])
602 break;
603 if (0 == buf[i])
604 return(1);
605 }
606
607 switch (curp->inttype) {
608 case (INTT_MDOC):
609 if (NULL == curp->mdoc)
610 curp->mdoc = mdoc_init(curp);
611 if (NULL == (*mdoc = curp->mdoc))
612 return(0);
613 return(1);
614 case (INTT_MAN):
615 if (NULL == curp->man)
616 curp->man = man_init(curp);
617 if (NULL == (*man = curp->man))
618 return(0);
619 return(1);
620 default:
621 break;
622 }
623
624 if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3)) {
625 if (NULL == curp->mdoc)
626 curp->mdoc = mdoc_init(curp);
627 if (NULL == (*mdoc = curp->mdoc))
628 return(0);
629 return(1);
630 }
631
632 if (NULL == curp->man)
633 curp->man = man_init(curp);
634 if (NULL == (*man = curp->man))
635 return(0);
636 return(1);
637 }
638
639
640 static int
641 moptions(enum intt *tflags, char *arg)
642 {
643
644 if (0 == strcmp(arg, "doc"))
645 *tflags = INTT_MDOC;
646 else if (0 == strcmp(arg, "andoc"))
647 *tflags = INTT_AUTO;
648 else if (0 == strcmp(arg, "an"))
649 *tflags = INTT_MAN;
650 else {
651 fprintf(stderr, "%s: Bad argument\n", arg);
652 return(0);
653 }
654
655 return(1);
656 }
657
658
659 static int
660 toptions(struct curparse *curp, char *arg)
661 {
662
663 if (0 == strcmp(arg, "ascii"))
664 curp->outtype = OUTT_ASCII;
665 else if (0 == strcmp(arg, "lint")) {
666 curp->outtype = OUTT_LINT;
667 curp->wflags |= WARN_WALL;
668 curp->fflags |= FL_STRICT;
669 }
670 else if (0 == strcmp(arg, "tree"))
671 curp->outtype = OUTT_TREE;
672 else if (0 == strcmp(arg, "html"))
673 curp->outtype = OUTT_HTML;
674 else if (0 == strcmp(arg, "xhtml"))
675 curp->outtype = OUTT_XHTML;
676 else {
677 fprintf(stderr, "%s: Bad argument\n", arg);
678 return(0);
679 }
680
681 return(1);
682 }
683
684
685 static int
686 foptions(int *fflags, char *arg)
687 {
688 char *v, *o;
689 const char *toks[8];
690
691 toks[0] = "ign-scope";
692 toks[1] = "no-ign-escape";
693 toks[2] = "no-ign-macro";
694 toks[3] = "ign-errors";
695 toks[4] = "strict";
696 toks[5] = "ign-escape";
697 toks[6] = NULL;
698
699 while (*arg) {
700 o = arg;
701 switch (getsubopt(&arg, UNCONST(toks), &v)) {
702 case (0):
703 *fflags |= FL_IGN_SCOPE;
704 break;
705 case (1):
706 *fflags |= FL_NIGN_ESCAPE;
707 break;
708 case (2):
709 *fflags |= FL_NIGN_MACRO;
710 break;
711 case (3):
712 *fflags |= FL_IGN_ERRORS;
713 break;
714 case (4):
715 *fflags |= FL_STRICT;
716 break;
717 case (5):
718 *fflags &= ~FL_NIGN_ESCAPE;
719 break;
720 default:
721 fprintf(stderr, "%s: Bad argument\n", o);
722 return(0);
723 }
724 }
725
726 return(1);
727 }
728
729
730 static int
731 woptions(int *wflags, char *arg)
732 {
733 char *v, *o;
734 const char *toks[3];
735
736 toks[0] = "all";
737 toks[1] = "error";
738 toks[2] = NULL;
739
740 while (*arg) {
741 o = arg;
742 switch (getsubopt(&arg, UNCONST(toks), &v)) {
743 case (0):
744 *wflags |= WARN_WALL;
745 break;
746 case (1):
747 *wflags |= WARN_WERR;
748 break;
749 default:
750 fprintf(stderr, "%s: Bad argument\n", o);
751 return(0);
752 }
753 }
754
755 return(1);
756 }
757
758
759 /* ARGSUSED */
760 static int
761 merr(void *arg, int line, int col, const char *msg)
762 {
763 struct curparse *curp;
764
765 curp = (struct curparse *)arg;
766
767 (void)fprintf(stderr, "%s:%d:%d: error: %s\n",
768 curp->file, line, col + 1, msg);
769
770 with_error = 1;
771
772 return(0);
773 }
774
775
776 static int
777 mwarn(void *arg, int line, int col, const char *msg)
778 {
779 struct curparse *curp;
780
781 curp = (struct curparse *)arg;
782
783 if ( ! (curp->wflags & WARN_WALL))
784 return(1);
785
786 (void)fprintf(stderr, "%s:%d:%d: warning: %s\n",
787 curp->file, line, col + 1, msg);
788
789 with_warning = 1;
790 if (curp->wflags & WARN_WERR) {
791 with_error = 1;
792 return(0);
793 }
794
795 return(1);
796 }
797
798 static const char * const mandocerrs[MANDOCERR_MAX] = {
799 "ok",
800 "multi-line scope open on exit",
801 "request for scope closure when no matching scope is open",
802 "macro requires line argument(s)",
803 "line arguments will be lost",
804 "memory exhausted"
805 };
806
807 /*
808 * XXX: this is experimental code that will eventually become the
809 * generic means of covering all warnings and errors!
810 */
811 /* ARGSUSED */
812 static int
813 mmsg(enum mandocerr t, void *arg, int ln, int col, const char *msg)
814 {
815 #if 0
816 struct curparse *cp;
817
818 cp = (struct curparse *)arg;
819
820 fprintf(stderr, "%s:%d:%d: %s", cp->file,
821 ln, col + 1, mandocerrs[t]);
822
823 if (msg)
824 fprintf(stderr, ": %s", msg);
825
826 fputc('\n', stderr);
827 #endif
828 return(1);
829 }