]> git.cameronkatri.com Git - mandoc.git/blob - main.c
no more info on bugs FIXED in bsd.lv;
[mandoc.git] / main.c
1 /* $Id: main.c,v 1.67 2010/05/15 05:50:19 joerg Exp $ */
2 /*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <sys/mman.h>
22 #include <sys/stat.h>
23
24 #include <assert.h>
25 #include <fcntl.h>
26 #include <stdio.h>
27 #include <stdint.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31
32 #include "mdoc.h"
33 #include "man.h"
34 #include "main.h"
35
36 #define UNCONST(a) ((void *)(uintptr_t)(const void *)(a))
37
38 /* FIXME: Intel's compiler? LLVM? pcc? */
39
40 #if !defined(__GNUC__) || (__GNUC__ < 2)
41 # if !defined(lint)
42 # define __attribute__(x)
43 # endif
44 #endif /* !defined(__GNUC__) || (__GNUC__ < 2) */
45
46 typedef void (*out_mdoc)(void *, const struct mdoc *);
47 typedef void (*out_man)(void *, const struct man *);
48 typedef void (*out_free)(void *);
49
50 struct buf {
51 char *buf;
52 size_t sz;
53 };
54
55 enum intt {
56 INTT_AUTO,
57 INTT_MDOC,
58 INTT_MAN
59 };
60
61 enum outt {
62 OUTT_ASCII = 0,
63 OUTT_TREE,
64 OUTT_HTML,
65 OUTT_XHTML,
66 OUTT_LINT
67 };
68
69 struct curparse {
70 const char *file; /* Current parse. */
71 int fd; /* Current parse. */
72 int wflags;
73 #define WARN_WALL (1 << 0) /* All-warnings mask. */
74 #define WARN_WERR (1 << 2) /* Warnings->errors. */
75 int fflags;
76 #define FL_IGN_SCOPE (1 << 0) /* Ignore scope errors. */
77 #define FL_NIGN_ESCAPE (1 << 1) /* Don't ignore bad escapes. */
78 #define FL_NIGN_MACRO (1 << 2) /* Don't ignore bad macros. */
79 #define FL_IGN_ERRORS (1 << 4) /* Ignore failed parse. */
80 #define FL_STRICT FL_NIGN_ESCAPE | \
81 FL_NIGN_MACRO
82 enum intt inttype; /* Input parsers... */
83 struct man *man;
84 struct mdoc *mdoc;
85 enum outt outtype; /* Output devices... */
86 out_mdoc outmdoc;
87 out_man outman;
88 out_free outfree;
89 void *outdata;
90 char outopts[BUFSIZ];
91 };
92
93 static void fdesc(struct curparse *);
94 static void ffile(const char *, struct curparse *);
95 static int foptions(int *, char *);
96 static struct man *man_init(struct curparse *);
97 static struct mdoc *mdoc_init(struct curparse *);
98 static int merr(void *, int, int, const char *);
99 static int moptions(enum intt *, char *);
100 static int mwarn(void *, int, int, const char *);
101 static int pset(const char *, int, struct curparse *,
102 struct man **, struct mdoc **);
103 static int toptions(struct curparse *, char *);
104 static void usage(void) __attribute__((noreturn));
105 static void version(void) __attribute__((noreturn));
106 static int woptions(int *, char *);
107
108 static const char *progname;
109 static int with_error;
110 static int with_warning;
111
112 int
113 main(int argc, char *argv[])
114 {
115 int c;
116 struct curparse curp;
117
118 progname = strrchr(argv[0], '/');
119 if (progname == NULL)
120 progname = argv[0];
121 else
122 ++progname;
123
124 memset(&curp, 0, sizeof(struct curparse));
125
126 curp.inttype = INTT_AUTO;
127 curp.outtype = OUTT_ASCII;
128
129 /* LINTED */
130 while (-1 != (c = getopt(argc, argv, "f:m:O:T:VW:")))
131 switch (c) {
132 case ('f'):
133 if ( ! foptions(&curp.fflags, optarg))
134 return(EXIT_FAILURE);
135 break;
136 case ('m'):
137 if ( ! moptions(&curp.inttype, optarg))
138 return(EXIT_FAILURE);
139 break;
140 case ('O'):
141 (void)strlcat(curp.outopts, optarg, BUFSIZ);
142 (void)strlcat(curp.outopts, ",", BUFSIZ);
143 break;
144 case ('T'):
145 if ( ! toptions(&curp, optarg))
146 return(EXIT_FAILURE);
147 break;
148 case ('W'):
149 if ( ! woptions(&curp.wflags, optarg))
150 return(EXIT_FAILURE);
151 break;
152 case ('V'):
153 version();
154 /* NOTREACHED */
155 default:
156 usage();
157 /* NOTREACHED */
158 }
159
160 argc -= optind;
161 argv += optind;
162
163 if (NULL == *argv) {
164 curp.file = "<stdin>";
165 curp.fd = STDIN_FILENO;
166
167 fdesc(&curp);
168 }
169
170 while (*argv) {
171 ffile(*argv, &curp);
172
173 if (with_error && !(curp.fflags & FL_IGN_ERRORS))
174 break;
175 ++argv;
176 }
177
178 if (curp.outfree)
179 (*curp.outfree)(curp.outdata);
180
181 return((with_warning || with_error) ?
182 EXIT_FAILURE : EXIT_SUCCESS);
183 }
184
185
186 static void
187 version(void)
188 {
189
190 (void)printf("%s %s\n", progname, VERSION);
191 exit(EXIT_SUCCESS);
192 }
193
194
195 static void
196 usage(void)
197 {
198
199 (void)fprintf(stderr, "usage: %s [-V] [-foption] "
200 "[-mformat] [-Ooption] [-Toutput] "
201 "[-Werr] [file...]\n", progname);
202 exit(EXIT_FAILURE);
203 }
204
205
206 static struct man *
207 man_init(struct curparse *curp)
208 {
209 int pflags;
210 struct man_cb mancb;
211
212 mancb.man_err = merr;
213 mancb.man_warn = mwarn;
214
215 /* Defaults from mandoc.1. */
216
217 pflags = MAN_IGN_MACRO | MAN_IGN_ESCAPE;
218
219 if (curp->fflags & FL_NIGN_MACRO)
220 pflags &= ~MAN_IGN_MACRO;
221 if (curp->fflags & FL_NIGN_ESCAPE)
222 pflags &= ~MAN_IGN_ESCAPE;
223
224 return(man_alloc(curp, pflags, &mancb));
225 }
226
227
228 static struct mdoc *
229 mdoc_init(struct curparse *curp)
230 {
231 int pflags;
232 struct mdoc_cb mdoccb;
233
234 mdoccb.mdoc_err = merr;
235 mdoccb.mdoc_warn = mwarn;
236
237 /* Defaults from mandoc.1. */
238
239 pflags = MDOC_IGN_MACRO | MDOC_IGN_ESCAPE;
240
241 if (curp->fflags & FL_IGN_SCOPE)
242 pflags |= MDOC_IGN_SCOPE;
243 if (curp->fflags & FL_NIGN_ESCAPE)
244 pflags &= ~MDOC_IGN_ESCAPE;
245 if (curp->fflags & FL_NIGN_MACRO)
246 pflags &= ~MDOC_IGN_MACRO;
247
248 return(mdoc_alloc(curp, pflags, &mdoccb));
249 }
250
251
252 static void
253 ffile(const char *file, struct curparse *curp)
254 {
255
256 curp->file = file;
257 if (-1 == (curp->fd = open(curp->file, O_RDONLY, 0))) {
258 perror(curp->file);
259 with_error = 1;
260 return;
261 }
262
263 fdesc(curp);
264
265 if (-1 == close(curp->fd))
266 perror(curp->file);
267 }
268
269
270 static int
271 read_whole_file(struct curparse *curp, struct buf *fb, int *with_mmap)
272 {
273 struct stat st;
274 char *buf;
275 size_t sz, off;
276 ssize_t ssz;
277
278 if (-1 == fstat(curp->fd, &st)) {
279 perror(curp->file);
280 with_error = 1;
281 return(0);
282 }
283
284 /*
285 * If we're a regular file, try just reading in the whole entry
286 * via mmap(). This is faster than reading it into blocks, and
287 * since each file is only a few bytes to begin with, I'm not
288 * concerned that this is going to tank any machines.
289 */
290
291 if (S_ISREG(st.st_mode)) {
292 if (st.st_size >= (1U << 31)) {
293 fprintf(stderr, "%s: input too large\n",
294 curp->file);
295 with_error = 1;
296 return(0);
297 }
298 *with_mmap = 1;
299 fb->sz = st.st_size;
300 fb->buf = mmap(NULL, fb->sz, PROT_READ,
301 MAP_FILE, curp->fd, 0);
302 if (fb->buf != MAP_FAILED)
303 return(1);
304 }
305
306 /*
307 * If this isn't a regular file (like, say, stdin), then we must
308 * go the old way and just read things in bit by bit.
309 */
310
311 *with_mmap = 0;
312 off = 0;
313 fb->sz = 0;
314 fb->buf = NULL;
315 for (;;) {
316 if (off == fb->sz) {
317 if (fb->sz == (1U << 31)) {
318 fprintf(stderr, "%s: input too large\n",
319 curp->file);
320 break;
321 }
322 if (fb->sz == 0)
323 sz = 65536;
324 else
325 sz = 2 * fb->sz;
326 buf = realloc(fb->buf, sz);
327 if (NULL == buf) {
328 perror(NULL);
329 break;
330 }
331 fb->buf = buf;
332 fb->sz = sz;
333 }
334 ssz = read(curp->fd, fb->buf + off, fb->sz - off);
335 if (ssz == 0) {
336 fb->sz = off;
337 return(1);
338 }
339 if (ssz == -1) {
340 perror(curp->file);
341 break;
342 }
343 off += ssz;
344 }
345
346 free(fb->buf);
347 fb->buf = NULL;
348 with_error = 1;
349 return(0);
350 }
351
352
353 static void
354 fdesc(struct curparse *curp)
355 {
356 size_t sz;
357 struct buf ln, blk;
358 int j, i, pos, lnn, comment, with_mmap;
359 struct man *man;
360 struct mdoc *mdoc;
361
362 sz = BUFSIZ;
363 man = NULL;
364 mdoc = NULL;
365 memset(&ln, 0, sizeof(struct buf));
366
367 /*
368 * Two buffers: ln and buf. buf is the input buffer optimised
369 * here for each file's block size. ln is a line buffer. Both
370 * growable, hence passed in by ptr-ptr.
371 */
372
373 if (!read_whole_file(curp, &blk, &with_mmap))
374 return;
375
376 /* Fill buf with file blocksize. */
377
378 for (i = lnn = pos = comment = 0; i < (int)blk.sz; ++i) {
379 if (pos >= (int)ln.sz) {
380 ln.sz += 256; /* Step-size. */
381 ln.buf = realloc(ln.buf, ln.sz);
382 if (NULL == ln.buf) {
383 perror(NULL);
384 goto bailout;
385 }
386 }
387
388 if ('\n' != blk.buf[i]) {
389 if (comment)
390 continue;
391 ln.buf[pos++] = blk.buf[i];
392
393 /* Handle in-line `\"' comments. */
394
395 if (1 == pos || '\"' != ln.buf[pos - 1])
396 continue;
397
398 for (j = pos - 2; j >= 0; j--)
399 if ('\\' != ln.buf[j])
400 break;
401
402 if ( ! ((pos - 2 - j) % 2))
403 continue;
404
405 comment = 1;
406 pos -= 2;
407 for (; pos > 0; --pos) {
408 if (ln.buf[pos - 1] != ' ')
409 break;
410 if (pos > 2 && ln.buf[pos - 2] == '\\')
411 break;
412 }
413 continue;
414 }
415
416 /* Handle escaped `\\n' newlines. */
417
418 if (pos > 0 && 0 == comment && '\\' == ln.buf[pos - 1]) {
419 for (j = pos - 1; j >= 0; j--)
420 if ('\\' != ln.buf[j])
421 break;
422 if ( ! ((pos - j) % 2)) {
423 pos--;
424 lnn++;
425 continue;
426 }
427 }
428
429 ln.buf[pos] = 0;
430 lnn++;
431
432 /* If unset, assign parser in pset(). */
433
434 if ( ! (man || mdoc) && ! pset(ln.buf, pos, curp, &man, &mdoc))
435 goto bailout;
436
437 pos = comment = 0;
438
439 /* Pass down into parsers. */
440
441 if (man && ! man_parseln(man, lnn, ln.buf))
442 goto bailout;
443 if (mdoc && ! mdoc_parseln(mdoc, lnn, ln.buf))
444 goto bailout;
445 }
446
447 /* NOTE a parser may not have been assigned, yet. */
448
449 if ( ! (man || mdoc)) {
450 fprintf(stderr, "%s: Not a manual\n", curp->file);
451 goto bailout;
452 }
453
454 if (mdoc && ! mdoc_endparse(mdoc))
455 goto bailout;
456 if (man && ! man_endparse(man))
457 goto bailout;
458
459 /* If unset, allocate output dev now (if applicable). */
460
461 if ( ! (curp->outman && curp->outmdoc)) {
462 switch (curp->outtype) {
463 case (OUTT_XHTML):
464 curp->outdata = xhtml_alloc(curp->outopts);
465 curp->outman = html_man;
466 curp->outmdoc = html_mdoc;
467 curp->outfree = html_free;
468 break;
469 case (OUTT_HTML):
470 curp->outdata = html_alloc(curp->outopts);
471 curp->outman = html_man;
472 curp->outmdoc = html_mdoc;
473 curp->outfree = html_free;
474 break;
475 case (OUTT_TREE):
476 curp->outman = tree_man;
477 curp->outmdoc = tree_mdoc;
478 break;
479 case (OUTT_LINT):
480 break;
481 default:
482 curp->outdata = ascii_alloc();
483 curp->outman = terminal_man;
484 curp->outmdoc = terminal_mdoc;
485 curp->outfree = terminal_free;
486 break;
487 }
488 }
489
490 /* Execute the out device, if it exists. */
491
492 if (man && curp->outman)
493 (*curp->outman)(curp->outdata, man);
494 if (mdoc && curp->outmdoc)
495 (*curp->outmdoc)(curp->outdata, mdoc);
496
497 cleanup:
498 if (curp->mdoc) {
499 mdoc_free(curp->mdoc);
500 curp->mdoc = NULL;
501 }
502 if (curp->man) {
503 man_free(curp->man);
504 curp->man = NULL;
505 }
506 if (ln.buf)
507 free(ln.buf);
508 if (with_mmap)
509 munmap(blk.buf, blk.sz);
510 else
511 free(blk.buf);
512 return;
513
514 bailout:
515 with_error = 1;
516 goto cleanup;
517 }
518
519
520 static int
521 pset(const char *buf, int pos, struct curparse *curp,
522 struct man **man, struct mdoc **mdoc)
523 {
524 int i;
525
526 /*
527 * Try to intuit which kind of manual parser should be used. If
528 * passed in by command-line (-man, -mdoc), then use that
529 * explicitly. If passed as -mandoc, then try to guess from the
530 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
531 * default to -man, which is more lenient.
532 */
533
534 if (buf[0] == '.') {
535 for (i = 1; buf[i]; i++)
536 if (' ' != buf[i] && '\t' != buf[i])
537 break;
538 if (0 == buf[i])
539 return(1);
540 }
541
542 switch (curp->inttype) {
543 case (INTT_MDOC):
544 if (NULL == curp->mdoc)
545 curp->mdoc = mdoc_init(curp);
546 if (NULL == (*mdoc = curp->mdoc))
547 return(0);
548 return(1);
549 case (INTT_MAN):
550 if (NULL == curp->man)
551 curp->man = man_init(curp);
552 if (NULL == (*man = curp->man))
553 return(0);
554 return(1);
555 default:
556 break;
557 }
558
559 if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3)) {
560 if (NULL == curp->mdoc)
561 curp->mdoc = mdoc_init(curp);
562 if (NULL == (*mdoc = curp->mdoc))
563 return(0);
564 return(1);
565 }
566
567 if (NULL == curp->man)
568 curp->man = man_init(curp);
569 if (NULL == (*man = curp->man))
570 return(0);
571 return(1);
572 }
573
574
575 static int
576 moptions(enum intt *tflags, char *arg)
577 {
578
579 if (0 == strcmp(arg, "doc"))
580 *tflags = INTT_MDOC;
581 else if (0 == strcmp(arg, "andoc"))
582 *tflags = INTT_AUTO;
583 else if (0 == strcmp(arg, "an"))
584 *tflags = INTT_MAN;
585 else {
586 fprintf(stderr, "%s: Bad argument\n", arg);
587 return(0);
588 }
589
590 return(1);
591 }
592
593
594 static int
595 toptions(struct curparse *curp, char *arg)
596 {
597
598 if (0 == strcmp(arg, "ascii"))
599 curp->outtype = OUTT_ASCII;
600 else if (0 == strcmp(arg, "lint")) {
601 curp->outtype = OUTT_LINT;
602 curp->wflags |= WARN_WALL;
603 curp->fflags |= FL_STRICT;
604 }
605 else if (0 == strcmp(arg, "tree"))
606 curp->outtype = OUTT_TREE;
607 else if (0 == strcmp(arg, "html"))
608 curp->outtype = OUTT_HTML;
609 else if (0 == strcmp(arg, "xhtml"))
610 curp->outtype = OUTT_XHTML;
611 else {
612 fprintf(stderr, "%s: Bad argument\n", arg);
613 return(0);
614 }
615
616 return(1);
617 }
618
619
620 static int
621 foptions(int *fflags, char *arg)
622 {
623 char *v, *o;
624 const char *toks[8];
625
626 toks[0] = "ign-scope";
627 toks[1] = "no-ign-escape";
628 toks[2] = "no-ign-macro";
629 toks[3] = "ign-errors";
630 toks[4] = "strict";
631 toks[5] = "ign-escape";
632 toks[6] = NULL;
633
634 while (*arg) {
635 o = arg;
636 switch (getsubopt(&arg, UNCONST(toks), &v)) {
637 case (0):
638 *fflags |= FL_IGN_SCOPE;
639 break;
640 case (1):
641 *fflags |= FL_NIGN_ESCAPE;
642 break;
643 case (2):
644 *fflags |= FL_NIGN_MACRO;
645 break;
646 case (3):
647 *fflags |= FL_IGN_ERRORS;
648 break;
649 case (4):
650 *fflags |= FL_STRICT;
651 break;
652 case (5):
653 *fflags &= ~FL_NIGN_ESCAPE;
654 break;
655 default:
656 fprintf(stderr, "%s: Bad argument\n", o);
657 return(0);
658 }
659 }
660
661 return(1);
662 }
663
664
665 static int
666 woptions(int *wflags, char *arg)
667 {
668 char *v, *o;
669 const char *toks[3];
670
671 toks[0] = "all";
672 toks[1] = "error";
673 toks[2] = NULL;
674
675 while (*arg) {
676 o = arg;
677 switch (getsubopt(&arg, UNCONST(toks), &v)) {
678 case (0):
679 *wflags |= WARN_WALL;
680 break;
681 case (1):
682 *wflags |= WARN_WERR;
683 break;
684 default:
685 fprintf(stderr, "%s: Bad argument\n", o);
686 return(0);
687 }
688 }
689
690 return(1);
691 }
692
693
694 /* ARGSUSED */
695 static int
696 merr(void *arg, int line, int col, const char *msg)
697 {
698 struct curparse *curp;
699
700 curp = (struct curparse *)arg;
701
702 (void)fprintf(stderr, "%s:%d:%d: error: %s\n",
703 curp->file, line, col + 1, msg);
704
705 with_error = 1;
706
707 return(0);
708 }
709
710
711 static int
712 mwarn(void *arg, int line, int col, const char *msg)
713 {
714 struct curparse *curp;
715
716 curp = (struct curparse *)arg;
717
718 if ( ! (curp->wflags & WARN_WALL))
719 return(1);
720
721 (void)fprintf(stderr, "%s:%d:%d: warning: %s\n",
722 curp->file, line, col + 1, msg);
723
724 with_warning = 1;
725 if (curp->wflags & WARN_WERR) {
726 with_error = 1;
727 return(0);
728 }
729
730 return(1);
731 }
732