]> git.cameronkatri.com Git - mandoc.git/blob - main.c
Fix allowing silly '\'' control character.
[mandoc.git] / main.c
1 /* $Id: main.c,v 1.75 2010/05/16 00:04:46 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <sys/mman.h>
22 #include <sys/stat.h>
23
24 #include <assert.h>
25 #include <fcntl.h>
26 #include <stdio.h>
27 #include <stdint.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31
32 #include "mandoc.h"
33 #include "mdoc.h"
34 #include "man.h"
35 #include "roff.h"
36 #include "main.h"
37
38 #define UNCONST(a) ((void *)(uintptr_t)(const void *)(a))
39
40 /* FIXME: Intel's compiler? LLVM? pcc? */
41
42 #if !defined(__GNUC__) || (__GNUC__ < 2)
43 # if !defined(lint)
44 # define __attribute__(x)
45 # endif
46 #endif /* !defined(__GNUC__) || (__GNUC__ < 2) */
47
48 typedef void (*out_mdoc)(void *, const struct mdoc *);
49 typedef void (*out_man)(void *, const struct man *);
50 typedef void (*out_free)(void *);
51
52 struct buf {
53 char *buf;
54 size_t sz;
55 };
56
57 enum intt {
58 INTT_AUTO,
59 INTT_MDOC,
60 INTT_MAN
61 };
62
63 enum outt {
64 OUTT_ASCII = 0,
65 OUTT_TREE,
66 OUTT_HTML,
67 OUTT_XHTML,
68 OUTT_LINT
69 };
70
71 struct curparse {
72 const char *file; /* Current parse. */
73 int fd; /* Current parse. */
74 int wflags;
75 /* FIXME: set by max error */
76 #define WARN_WALL (1 << 0) /* All-warnings mask. */
77 #define WARN_WERR (1 << 2) /* Warnings->errors. */
78 int fflags;
79 #define FL_IGN_SCOPE (1 << 0) /* Ignore scope errors. */
80 #define FL_NIGN_ESCAPE (1 << 1) /* Don't ignore bad escapes. */
81 #define FL_NIGN_MACRO (1 << 2) /* Don't ignore bad macros. */
82 #define FL_IGN_ERRORS (1 << 4) /* Ignore failed parse. */
83 #define FL_STRICT FL_NIGN_ESCAPE | \
84 FL_NIGN_MACRO
85 enum intt inttype; /* Input parsers... */
86 struct man *man;
87 struct mdoc *mdoc;
88 struct roff *roff;
89 enum outt outtype; /* Output devices... */
90 out_mdoc outmdoc;
91 out_man outman;
92 out_free outfree;
93 void *outdata;
94 char outopts[BUFSIZ];
95 };
96
97 static void fdesc(struct curparse *);
98 static void ffile(const char *, struct curparse *);
99 static int foptions(int *, char *);
100 static struct man *man_init(struct curparse *);
101 static struct mdoc *mdoc_init(struct curparse *);
102 static struct roff *roff_init(struct curparse *);
103 static int merr(void *, int, int, const char *); /* DEPRECATED */
104 static int moptions(enum intt *, char *);
105 static int mwarn(void *, int, int, const char *); /* DEPRECATED */
106 static int mmsg(enum mandocerr, void *,
107 int, int, const char *);
108 static int pset(const char *, int, struct curparse *,
109 struct man **, struct mdoc **);
110 static int toptions(struct curparse *, char *);
111 static void usage(void) __attribute__((noreturn));
112 static void version(void) __attribute__((noreturn));
113 static int woptions(int *, char *);
114
115 static const char *progname;
116 static int with_error;
117 static int with_warning;
118
119 int
120 main(int argc, char *argv[])
121 {
122 int c;
123 struct curparse curp;
124
125 progname = strrchr(argv[0], '/');
126 if (progname == NULL)
127 progname = argv[0];
128 else
129 ++progname;
130
131 memset(&curp, 0, sizeof(struct curparse));
132
133 curp.inttype = INTT_AUTO;
134 curp.outtype = OUTT_ASCII;
135
136 /* LINTED */
137 while (-1 != (c = getopt(argc, argv, "f:m:O:T:VW:")))
138 switch (c) {
139 case ('f'):
140 if ( ! foptions(&curp.fflags, optarg))
141 return(EXIT_FAILURE);
142 break;
143 case ('m'):
144 if ( ! moptions(&curp.inttype, optarg))
145 return(EXIT_FAILURE);
146 break;
147 case ('O'):
148 (void)strlcat(curp.outopts, optarg, BUFSIZ);
149 (void)strlcat(curp.outopts, ",", BUFSIZ);
150 break;
151 case ('T'):
152 if ( ! toptions(&curp, optarg))
153 return(EXIT_FAILURE);
154 break;
155 case ('W'):
156 if ( ! woptions(&curp.wflags, optarg))
157 return(EXIT_FAILURE);
158 break;
159 case ('V'):
160 version();
161 /* NOTREACHED */
162 default:
163 usage();
164 /* NOTREACHED */
165 }
166
167 argc -= optind;
168 argv += optind;
169
170 if (NULL == *argv) {
171 curp.file = "<stdin>";
172 curp.fd = STDIN_FILENO;
173
174 fdesc(&curp);
175 }
176
177 while (*argv) {
178 ffile(*argv, &curp);
179
180 if (with_error && !(curp.fflags & FL_IGN_ERRORS))
181 break;
182 ++argv;
183 }
184
185 if (curp.outfree)
186 (*curp.outfree)(curp.outdata);
187 if (curp.mdoc)
188 mdoc_free(curp.mdoc);
189 if (curp.man)
190 man_free(curp.man);
191 if (curp.roff)
192 roff_free(curp.roff);
193
194 return((with_warning || with_error) ?
195 EXIT_FAILURE : EXIT_SUCCESS);
196 }
197
198
199 static void
200 version(void)
201 {
202
203 (void)printf("%s %s\n", progname, VERSION);
204 exit(EXIT_SUCCESS);
205 }
206
207
208 static void
209 usage(void)
210 {
211
212 (void)fprintf(stderr, "usage: %s [-V] [-foption] "
213 "[-mformat] [-Ooption] [-Toutput] "
214 "[-Werr] [file...]\n", progname);
215 exit(EXIT_FAILURE);
216 }
217
218
219 static struct man *
220 man_init(struct curparse *curp)
221 {
222 int pflags;
223 struct man_cb mancb;
224
225 mancb.man_err = merr;
226 mancb.man_warn = mwarn;
227
228 /* Defaults from mandoc.1. */
229
230 pflags = MAN_IGN_MACRO | MAN_IGN_ESCAPE;
231
232 if (curp->fflags & FL_NIGN_MACRO)
233 pflags &= ~MAN_IGN_MACRO;
234 if (curp->fflags & FL_NIGN_ESCAPE)
235 pflags &= ~MAN_IGN_ESCAPE;
236
237 return(man_alloc(curp, pflags, &mancb));
238 }
239
240
241 static struct roff *
242 roff_init(struct curparse *curp)
243 {
244
245 return(roff_alloc(mmsg, curp));
246 }
247
248
249 static struct mdoc *
250 mdoc_init(struct curparse *curp)
251 {
252 int pflags;
253 struct mdoc_cb mdoccb;
254
255 mdoccb.mdoc_err = merr;
256 mdoccb.mdoc_warn = mwarn;
257
258 /* Defaults from mandoc.1. */
259
260 pflags = MDOC_IGN_MACRO | MDOC_IGN_ESCAPE;
261
262 if (curp->fflags & FL_IGN_SCOPE)
263 pflags |= MDOC_IGN_SCOPE;
264 if (curp->fflags & FL_NIGN_ESCAPE)
265 pflags &= ~MDOC_IGN_ESCAPE;
266 if (curp->fflags & FL_NIGN_MACRO)
267 pflags &= ~MDOC_IGN_MACRO;
268
269 return(mdoc_alloc(curp, pflags, &mdoccb));
270 }
271
272
273 static void
274 ffile(const char *file, struct curparse *curp)
275 {
276
277 curp->file = file;
278 if (-1 == (curp->fd = open(curp->file, O_RDONLY, 0))) {
279 perror(curp->file);
280 with_error = 1;
281 return;
282 }
283
284 fdesc(curp);
285
286 if (-1 == close(curp->fd))
287 perror(curp->file);
288 }
289
290
291 static int
292 resize_buf(struct buf *buf, size_t initial)
293 {
294 void *tmp;
295 size_t sz;
296
297 if (buf->sz == 0)
298 sz = initial;
299 else
300 sz = 2 * buf->sz;
301 tmp = realloc(buf->buf, sz);
302 if (NULL == tmp) {
303 perror(NULL);
304 return(0);
305 }
306 buf->buf = tmp;
307 buf->sz = sz;
308 return(1);
309 }
310
311
312 static int
313 read_whole_file(struct curparse *curp, struct buf *fb, int *with_mmap)
314 {
315 struct stat st;
316 size_t off;
317 ssize_t ssz;
318
319 if (-1 == fstat(curp->fd, &st)) {
320 perror(curp->file);
321 with_error = 1;
322 return(0);
323 }
324
325 /*
326 * If we're a regular file, try just reading in the whole entry
327 * via mmap(). This is faster than reading it into blocks, and
328 * since each file is only a few bytes to begin with, I'm not
329 * concerned that this is going to tank any machines.
330 */
331
332 if (S_ISREG(st.st_mode)) {
333 if (st.st_size >= (1U << 31)) {
334 fprintf(stderr, "%s: input too large\n",
335 curp->file);
336 with_error = 1;
337 return(0);
338 }
339 *with_mmap = 1;
340 fb->sz = (size_t)st.st_size;
341 fb->buf = mmap(NULL, fb->sz, PROT_READ,
342 MAP_FILE, curp->fd, 0);
343 if (fb->buf != MAP_FAILED)
344 return(1);
345 }
346
347 /*
348 * If this isn't a regular file (like, say, stdin), then we must
349 * go the old way and just read things in bit by bit.
350 */
351
352 *with_mmap = 0;
353 off = 0;
354 fb->sz = 0;
355 fb->buf = NULL;
356 for (;;) {
357 if (off == fb->sz) {
358 if (fb->sz == (1U << 31)) {
359 fprintf(stderr, "%s: input too large\n",
360 curp->file);
361 break;
362 }
363 if (! resize_buf(fb, 65536))
364 break;
365 }
366 ssz = read(curp->fd, fb->buf + (int)off, fb->sz - off);
367 if (ssz == 0) {
368 fb->sz = off;
369 return(1);
370 }
371 if (ssz == -1) {
372 perror(curp->file);
373 break;
374 }
375 off += (size_t)ssz;
376 }
377
378 free(fb->buf);
379 fb->buf = NULL;
380 with_error = 1;
381 return(0);
382 }
383
384
385 static void
386 fdesc(struct curparse *curp)
387 {
388 struct buf ln, blk;
389 int i, pos, lnn, lnn_start, with_mmap;
390 enum rofferr re;
391 struct man *man;
392 struct mdoc *mdoc;
393 struct roff *roff;
394
395 man = NULL;
396 mdoc = NULL;
397 roff = NULL;
398 memset(&ln, 0, sizeof(struct buf));
399
400 /*
401 * Two buffers: ln and buf. buf is the input file and may be
402 * memory mapped. ln is a line buffer and grows on-demand.
403 */
404
405 if ( ! read_whole_file(curp, &blk, &with_mmap))
406 return;
407
408 if (NULL == curp->roff)
409 curp->roff = roff_init(curp);
410 if (NULL == (roff = curp->roff))
411 goto bailout;
412
413 for (i = 0, lnn = 1; i < (int)blk.sz;) {
414 pos = 0;
415 lnn_start = lnn;
416 while (i < (int)blk.sz) {
417 if ('\n' == blk.buf[i]) {
418 ++i;
419 ++lnn;
420 break;
421 }
422 /* Trailing backslash is like a plain character. */
423 if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) {
424 if (pos >= (int)ln.sz)
425 if (! resize_buf(&ln, 256))
426 goto bailout;
427 ln.buf[pos++] = blk.buf[i++];
428 continue;
429 }
430 /* Found an escape and at least one other character. */
431 if ('\n' == blk.buf[i + 1]) {
432 /* Escaped newlines are skipped over */
433 i += 2;
434 ++lnn;
435 continue;
436 }
437 if ('"' == blk.buf[i + 1]) {
438 i += 2;
439 /* Comment, skip to end of line */
440 for (; i < (int)blk.sz; ++i) {
441 if ('\n' == blk.buf[i]) {
442 ++i;
443 ++lnn;
444 break;
445 }
446 }
447 /* Backout trailing whitespaces */
448 for (; pos > 0; --pos) {
449 if (ln.buf[pos - 1] != ' ')
450 break;
451 if (pos > 2 && ln.buf[pos - 2] == '\\')
452 break;
453 }
454 break;
455 }
456 /* Some other escape sequence, copy and continue. */
457 if (pos + 1 >= (int)ln.sz)
458 if (! resize_buf(&ln, 256))
459 goto bailout;
460
461 ln.buf[pos++] = blk.buf[i++];
462 ln.buf[pos++] = blk.buf[i++];
463 }
464
465 if (pos >= (int)ln.sz)
466 if (! resize_buf(&ln, 256))
467 goto bailout;
468 ln.buf[pos] = '\0';
469
470 re = roff_parseln(roff, lnn_start, &ln.buf, &ln.sz);
471 if (ROFF_IGN == re)
472 continue;
473 else if (ROFF_ERR == re)
474 goto bailout;
475
476 /* If unset, assign parser in pset(). */
477
478 if ( ! (man || mdoc) && ! pset(ln.buf, pos, curp, &man, &mdoc))
479 goto bailout;
480
481 /* Pass down into parsers. */
482
483 if (man && ! man_parseln(man, lnn_start, ln.buf))
484 goto bailout;
485 if (mdoc && ! mdoc_parseln(mdoc, lnn_start, ln.buf))
486 goto bailout;
487 }
488
489 /* NOTE a parser may not have been assigned, yet. */
490
491 if ( ! (man || mdoc)) {
492 fprintf(stderr, "%s: Not a manual\n", curp->file);
493 goto bailout;
494 }
495
496 if (mdoc && ! mdoc_endparse(mdoc))
497 goto bailout;
498 if (man && ! man_endparse(man))
499 goto bailout;
500 if (roff && ! roff_endparse(roff))
501 goto bailout;
502
503 /* If unset, allocate output dev now (if applicable). */
504
505 if ( ! (curp->outman && curp->outmdoc)) {
506 switch (curp->outtype) {
507 case (OUTT_XHTML):
508 curp->outdata = xhtml_alloc(curp->outopts);
509 curp->outman = html_man;
510 curp->outmdoc = html_mdoc;
511 curp->outfree = html_free;
512 break;
513 case (OUTT_HTML):
514 curp->outdata = html_alloc(curp->outopts);
515 curp->outman = html_man;
516 curp->outmdoc = html_mdoc;
517 curp->outfree = html_free;
518 break;
519 case (OUTT_TREE):
520 curp->outman = tree_man;
521 curp->outmdoc = tree_mdoc;
522 break;
523 case (OUTT_LINT):
524 break;
525 default:
526 curp->outdata = ascii_alloc(80);
527 curp->outman = terminal_man;
528 curp->outmdoc = terminal_mdoc;
529 curp->outfree = terminal_free;
530 break;
531 }
532 }
533
534 /* Execute the out device, if it exists. */
535
536 if (man && curp->outman)
537 (*curp->outman)(curp->outdata, man);
538 if (mdoc && curp->outmdoc)
539 (*curp->outmdoc)(curp->outdata, mdoc);
540
541 cleanup:
542 if (mdoc)
543 mdoc_reset(mdoc);
544 if (man)
545 man_reset(man);
546 if (roff)
547 roff_reset(roff);
548 if (ln.buf)
549 free(ln.buf);
550 if (with_mmap)
551 munmap(blk.buf, blk.sz);
552 else
553 free(blk.buf);
554
555 return;
556
557 bailout:
558 with_error = 1;
559 goto cleanup;
560 }
561
562
563 static int
564 pset(const char *buf, int pos, struct curparse *curp,
565 struct man **man, struct mdoc **mdoc)
566 {
567 int i;
568
569 /*
570 * Try to intuit which kind of manual parser should be used. If
571 * passed in by command-line (-man, -mdoc), then use that
572 * explicitly. If passed as -mandoc, then try to guess from the
573 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
574 * default to -man, which is more lenient.
575 */
576
577 if ('.' == buf[0] || '\'' == buf[0]) {
578 for (i = 1; buf[i]; i++)
579 if (' ' != buf[i] && '\t' != buf[i])
580 break;
581 if (0 == buf[i])
582 return(1);
583 }
584
585 switch (curp->inttype) {
586 case (INTT_MDOC):
587 if (NULL == curp->mdoc)
588 curp->mdoc = mdoc_init(curp);
589 if (NULL == (*mdoc = curp->mdoc))
590 return(0);
591 return(1);
592 case (INTT_MAN):
593 if (NULL == curp->man)
594 curp->man = man_init(curp);
595 if (NULL == (*man = curp->man))
596 return(0);
597 return(1);
598 default:
599 break;
600 }
601
602 if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3)) {
603 if (NULL == curp->mdoc)
604 curp->mdoc = mdoc_init(curp);
605 if (NULL == (*mdoc = curp->mdoc))
606 return(0);
607 return(1);
608 }
609
610 if (NULL == curp->man)
611 curp->man = man_init(curp);
612 if (NULL == (*man = curp->man))
613 return(0);
614 return(1);
615 }
616
617
618 static int
619 moptions(enum intt *tflags, char *arg)
620 {
621
622 if (0 == strcmp(arg, "doc"))
623 *tflags = INTT_MDOC;
624 else if (0 == strcmp(arg, "andoc"))
625 *tflags = INTT_AUTO;
626 else if (0 == strcmp(arg, "an"))
627 *tflags = INTT_MAN;
628 else {
629 fprintf(stderr, "%s: Bad argument\n", arg);
630 return(0);
631 }
632
633 return(1);
634 }
635
636
637 static int
638 toptions(struct curparse *curp, char *arg)
639 {
640
641 if (0 == strcmp(arg, "ascii"))
642 curp->outtype = OUTT_ASCII;
643 else if (0 == strcmp(arg, "lint")) {
644 curp->outtype = OUTT_LINT;
645 curp->wflags |= WARN_WALL;
646 curp->fflags |= FL_STRICT;
647 }
648 else if (0 == strcmp(arg, "tree"))
649 curp->outtype = OUTT_TREE;
650 else if (0 == strcmp(arg, "html"))
651 curp->outtype = OUTT_HTML;
652 else if (0 == strcmp(arg, "xhtml"))
653 curp->outtype = OUTT_XHTML;
654 else {
655 fprintf(stderr, "%s: Bad argument\n", arg);
656 return(0);
657 }
658
659 return(1);
660 }
661
662
663 static int
664 foptions(int *fflags, char *arg)
665 {
666 char *v, *o;
667 const char *toks[8];
668
669 toks[0] = "ign-scope";
670 toks[1] = "no-ign-escape";
671 toks[2] = "no-ign-macro";
672 toks[3] = "ign-errors";
673 toks[4] = "strict";
674 toks[5] = "ign-escape";
675 toks[6] = NULL;
676
677 while (*arg) {
678 o = arg;
679 switch (getsubopt(&arg, UNCONST(toks), &v)) {
680 case (0):
681 *fflags |= FL_IGN_SCOPE;
682 break;
683 case (1):
684 *fflags |= FL_NIGN_ESCAPE;
685 break;
686 case (2):
687 *fflags |= FL_NIGN_MACRO;
688 break;
689 case (3):
690 *fflags |= FL_IGN_ERRORS;
691 break;
692 case (4):
693 *fflags |= FL_STRICT;
694 break;
695 case (5):
696 *fflags &= ~FL_NIGN_ESCAPE;
697 break;
698 default:
699 fprintf(stderr, "%s: Bad argument\n", o);
700 return(0);
701 }
702 }
703
704 return(1);
705 }
706
707
708 static int
709 woptions(int *wflags, char *arg)
710 {
711 char *v, *o;
712 const char *toks[3];
713
714 toks[0] = "all";
715 toks[1] = "error";
716 toks[2] = NULL;
717
718 while (*arg) {
719 o = arg;
720 switch (getsubopt(&arg, UNCONST(toks), &v)) {
721 case (0):
722 *wflags |= WARN_WALL;
723 break;
724 case (1):
725 *wflags |= WARN_WERR;
726 break;
727 default:
728 fprintf(stderr, "%s: Bad argument\n", o);
729 return(0);
730 }
731 }
732
733 return(1);
734 }
735
736
737 /* ARGSUSED */
738 static int
739 merr(void *arg, int line, int col, const char *msg)
740 {
741 struct curparse *curp;
742
743 curp = (struct curparse *)arg;
744
745 (void)fprintf(stderr, "%s:%d:%d: error: %s\n",
746 curp->file, line, col + 1, msg);
747
748 with_error = 1;
749
750 return(0);
751 }
752
753
754 static int
755 mwarn(void *arg, int line, int col, const char *msg)
756 {
757 struct curparse *curp;
758
759 curp = (struct curparse *)arg;
760
761 if ( ! (curp->wflags & WARN_WALL))
762 return(1);
763
764 (void)fprintf(stderr, "%s:%d:%d: warning: %s\n",
765 curp->file, line, col + 1, msg);
766
767 with_warning = 1;
768 if (curp->wflags & WARN_WERR) {
769 with_error = 1;
770 return(0);
771 }
772
773 return(1);
774 }
775
776 static const char * const mandocerrs[MANDOCERR_MAX] = {
777 "ok",
778 "multi-line scope open on exit",
779 "request for scope closure when no matching scope is open",
780 "line arguments will be lost",
781 "memory exhausted"
782 };
783
784 /*
785 * XXX: this is experimental code that will eventually become the
786 * generic means of covering all warnings and errors!
787 */
788 /* ARGSUSED */
789 static int
790 mmsg(enum mandocerr t, void *arg, int ln, int col, const char *msg)
791 {
792 #if 0
793 struct curparse *cp;
794
795 cp = (struct curparse *)arg;
796
797 fprintf(stderr, "%s:%d:%d: %s", cp->file,
798 ln, col + 1, mandocerrs[t]);
799
800 if (msg)
801 fprintf(stderr, ": %s", msg);
802
803 fputc('\n', stderr);
804 #endif
805 return(1);
806 }