]> git.cameronkatri.com Git - mandoc.git/blob - main.c
Push buffer resizing into its own function. Keep initial allocations the
[mandoc.git] / main.c
1 /* $Id: main.c,v 1.68 2010/05/15 09:46:31 joerg Exp $ */
2 /*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <sys/mman.h>
22 #include <sys/stat.h>
23
24 #include <assert.h>
25 #include <fcntl.h>
26 #include <stdio.h>
27 #include <stdint.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31
32 #include "mdoc.h"
33 #include "man.h"
34 #include "main.h"
35
36 #define UNCONST(a) ((void *)(uintptr_t)(const void *)(a))
37
38 /* FIXME: Intel's compiler? LLVM? pcc? */
39
40 #if !defined(__GNUC__) || (__GNUC__ < 2)
41 # if !defined(lint)
42 # define __attribute__(x)
43 # endif
44 #endif /* !defined(__GNUC__) || (__GNUC__ < 2) */
45
46 typedef void (*out_mdoc)(void *, const struct mdoc *);
47 typedef void (*out_man)(void *, const struct man *);
48 typedef void (*out_free)(void *);
49
50 struct buf {
51 char *buf;
52 size_t sz;
53 };
54
55 enum intt {
56 INTT_AUTO,
57 INTT_MDOC,
58 INTT_MAN
59 };
60
61 enum outt {
62 OUTT_ASCII = 0,
63 OUTT_TREE,
64 OUTT_HTML,
65 OUTT_XHTML,
66 OUTT_LINT
67 };
68
69 struct curparse {
70 const char *file; /* Current parse. */
71 int fd; /* Current parse. */
72 int wflags;
73 #define WARN_WALL (1 << 0) /* All-warnings mask. */
74 #define WARN_WERR (1 << 2) /* Warnings->errors. */
75 int fflags;
76 #define FL_IGN_SCOPE (1 << 0) /* Ignore scope errors. */
77 #define FL_NIGN_ESCAPE (1 << 1) /* Don't ignore bad escapes. */
78 #define FL_NIGN_MACRO (1 << 2) /* Don't ignore bad macros. */
79 #define FL_IGN_ERRORS (1 << 4) /* Ignore failed parse. */
80 #define FL_STRICT FL_NIGN_ESCAPE | \
81 FL_NIGN_MACRO
82 enum intt inttype; /* Input parsers... */
83 struct man *man;
84 struct mdoc *mdoc;
85 enum outt outtype; /* Output devices... */
86 out_mdoc outmdoc;
87 out_man outman;
88 out_free outfree;
89 void *outdata;
90 char outopts[BUFSIZ];
91 };
92
93 static void fdesc(struct curparse *);
94 static void ffile(const char *, struct curparse *);
95 static int foptions(int *, char *);
96 static struct man *man_init(struct curparse *);
97 static struct mdoc *mdoc_init(struct curparse *);
98 static int merr(void *, int, int, const char *);
99 static int moptions(enum intt *, char *);
100 static int mwarn(void *, int, int, const char *);
101 static int pset(const char *, int, struct curparse *,
102 struct man **, struct mdoc **);
103 static int toptions(struct curparse *, char *);
104 static void usage(void) __attribute__((noreturn));
105 static void version(void) __attribute__((noreturn));
106 static int woptions(int *, char *);
107
108 static const char *progname;
109 static int with_error;
110 static int with_warning;
111
112 int
113 main(int argc, char *argv[])
114 {
115 int c;
116 struct curparse curp;
117
118 progname = strrchr(argv[0], '/');
119 if (progname == NULL)
120 progname = argv[0];
121 else
122 ++progname;
123
124 memset(&curp, 0, sizeof(struct curparse));
125
126 curp.inttype = INTT_AUTO;
127 curp.outtype = OUTT_ASCII;
128
129 /* LINTED */
130 while (-1 != (c = getopt(argc, argv, "f:m:O:T:VW:")))
131 switch (c) {
132 case ('f'):
133 if ( ! foptions(&curp.fflags, optarg))
134 return(EXIT_FAILURE);
135 break;
136 case ('m'):
137 if ( ! moptions(&curp.inttype, optarg))
138 return(EXIT_FAILURE);
139 break;
140 case ('O'):
141 (void)strlcat(curp.outopts, optarg, BUFSIZ);
142 (void)strlcat(curp.outopts, ",", BUFSIZ);
143 break;
144 case ('T'):
145 if ( ! toptions(&curp, optarg))
146 return(EXIT_FAILURE);
147 break;
148 case ('W'):
149 if ( ! woptions(&curp.wflags, optarg))
150 return(EXIT_FAILURE);
151 break;
152 case ('V'):
153 version();
154 /* NOTREACHED */
155 default:
156 usage();
157 /* NOTREACHED */
158 }
159
160 argc -= optind;
161 argv += optind;
162
163 if (NULL == *argv) {
164 curp.file = "<stdin>";
165 curp.fd = STDIN_FILENO;
166
167 fdesc(&curp);
168 }
169
170 while (*argv) {
171 ffile(*argv, &curp);
172
173 if (with_error && !(curp.fflags & FL_IGN_ERRORS))
174 break;
175 ++argv;
176 }
177
178 if (curp.outfree)
179 (*curp.outfree)(curp.outdata);
180
181 return((with_warning || with_error) ?
182 EXIT_FAILURE : EXIT_SUCCESS);
183 }
184
185
186 static void
187 version(void)
188 {
189
190 (void)printf("%s %s\n", progname, VERSION);
191 exit(EXIT_SUCCESS);
192 }
193
194
195 static void
196 usage(void)
197 {
198
199 (void)fprintf(stderr, "usage: %s [-V] [-foption] "
200 "[-mformat] [-Ooption] [-Toutput] "
201 "[-Werr] [file...]\n", progname);
202 exit(EXIT_FAILURE);
203 }
204
205
206 static struct man *
207 man_init(struct curparse *curp)
208 {
209 int pflags;
210 struct man_cb mancb;
211
212 mancb.man_err = merr;
213 mancb.man_warn = mwarn;
214
215 /* Defaults from mandoc.1. */
216
217 pflags = MAN_IGN_MACRO | MAN_IGN_ESCAPE;
218
219 if (curp->fflags & FL_NIGN_MACRO)
220 pflags &= ~MAN_IGN_MACRO;
221 if (curp->fflags & FL_NIGN_ESCAPE)
222 pflags &= ~MAN_IGN_ESCAPE;
223
224 return(man_alloc(curp, pflags, &mancb));
225 }
226
227
228 static struct mdoc *
229 mdoc_init(struct curparse *curp)
230 {
231 int pflags;
232 struct mdoc_cb mdoccb;
233
234 mdoccb.mdoc_err = merr;
235 mdoccb.mdoc_warn = mwarn;
236
237 /* Defaults from mandoc.1. */
238
239 pflags = MDOC_IGN_MACRO | MDOC_IGN_ESCAPE;
240
241 if (curp->fflags & FL_IGN_SCOPE)
242 pflags |= MDOC_IGN_SCOPE;
243 if (curp->fflags & FL_NIGN_ESCAPE)
244 pflags &= ~MDOC_IGN_ESCAPE;
245 if (curp->fflags & FL_NIGN_MACRO)
246 pflags &= ~MDOC_IGN_MACRO;
247
248 return(mdoc_alloc(curp, pflags, &mdoccb));
249 }
250
251
252 static void
253 ffile(const char *file, struct curparse *curp)
254 {
255
256 curp->file = file;
257 if (-1 == (curp->fd = open(curp->file, O_RDONLY, 0))) {
258 perror(curp->file);
259 with_error = 1;
260 return;
261 }
262
263 fdesc(curp);
264
265 if (-1 == close(curp->fd))
266 perror(curp->file);
267 }
268
269
270 static int
271 resize_buf(struct buf *buf, size_t initial)
272 {
273 void *tmp;
274 size_t sz;
275
276 if (buf->sz == 0)
277 sz = initial;
278 else
279 sz = 2 * buf->sz;
280 tmp = realloc(buf->buf, sz);
281 if (NULL == tmp) {
282 perror(NULL);
283 return(0);
284 }
285 buf->buf = tmp;
286 buf->sz = sz;
287 return(1);
288 }
289
290
291 static int
292 read_whole_file(struct curparse *curp, struct buf *fb, int *with_mmap)
293 {
294 struct stat st;
295 size_t off;
296 ssize_t ssz;
297
298 if (-1 == fstat(curp->fd, &st)) {
299 perror(curp->file);
300 with_error = 1;
301 return(0);
302 }
303
304 /*
305 * If we're a regular file, try just reading in the whole entry
306 * via mmap(). This is faster than reading it into blocks, and
307 * since each file is only a few bytes to begin with, I'm not
308 * concerned that this is going to tank any machines.
309 */
310
311 if (S_ISREG(st.st_mode)) {
312 if (st.st_size >= (1U << 31)) {
313 fprintf(stderr, "%s: input too large\n",
314 curp->file);
315 with_error = 1;
316 return(0);
317 }
318 *with_mmap = 1;
319 fb->sz = st.st_size;
320 fb->buf = mmap(NULL, fb->sz, PROT_READ,
321 MAP_FILE, curp->fd, 0);
322 if (fb->buf != MAP_FAILED)
323 return(1);
324 }
325
326 /*
327 * If this isn't a regular file (like, say, stdin), then we must
328 * go the old way and just read things in bit by bit.
329 */
330
331 *with_mmap = 0;
332 off = 0;
333 fb->sz = 0;
334 fb->buf = NULL;
335 for (;;) {
336 if (off == fb->sz) {
337 if (fb->sz == (1U << 31)) {
338 fprintf(stderr, "%s: input too large\n",
339 curp->file);
340 break;
341 }
342 if (! resize_buf(fb, 65536))
343 break;
344 }
345 ssz = read(curp->fd, fb->buf + off, fb->sz - off);
346 if (ssz == 0) {
347 fb->sz = off;
348 return(1);
349 }
350 if (ssz == -1) {
351 perror(curp->file);
352 break;
353 }
354 off += ssz;
355 }
356
357 free(fb->buf);
358 fb->buf = NULL;
359 with_error = 1;
360 return(0);
361 }
362
363
364 static void
365 fdesc(struct curparse *curp)
366 {
367 struct buf ln, blk;
368 int j, i, pos, lnn, comment, with_mmap;
369 struct man *man;
370 struct mdoc *mdoc;
371
372 man = NULL;
373 mdoc = NULL;
374 memset(&ln, 0, sizeof(struct buf));
375
376 /*
377 * Two buffers: ln and buf. buf is the input buffer optimised
378 * here for each file's block size. ln is a line buffer. Both
379 * growable, hence passed in by ptr-ptr.
380 */
381
382 if (!read_whole_file(curp, &blk, &with_mmap))
383 return;
384
385 /* Fill buf with file blocksize. */
386
387 for (i = lnn = pos = comment = 0; i < (int)blk.sz; ++i) {
388 if (pos >= (int)ln.sz) {
389 if (! resize_buf(&ln, 256))
390 goto bailout;
391 }
392
393 if ('\n' != blk.buf[i]) {
394 if (comment)
395 continue;
396 ln.buf[pos++] = blk.buf[i];
397
398 /* Handle in-line `\"' comments. */
399
400 if (1 == pos || '\"' != ln.buf[pos - 1])
401 continue;
402
403 for (j = pos - 2; j >= 0; j--)
404 if ('\\' != ln.buf[j])
405 break;
406
407 if ( ! ((pos - 2 - j) % 2))
408 continue;
409
410 comment = 1;
411 pos -= 2;
412 for (; pos > 0; --pos) {
413 if (ln.buf[pos - 1] != ' ')
414 break;
415 if (pos > 2 && ln.buf[pos - 2] == '\\')
416 break;
417 }
418 continue;
419 }
420
421 /* Handle escaped `\\n' newlines. */
422
423 if (pos > 0 && 0 == comment && '\\' == ln.buf[pos - 1]) {
424 for (j = pos - 1; j >= 0; j--)
425 if ('\\' != ln.buf[j])
426 break;
427 if ( ! ((pos - j) % 2)) {
428 pos--;
429 lnn++;
430 continue;
431 }
432 }
433
434 ln.buf[pos] = 0;
435 lnn++;
436
437 /* If unset, assign parser in pset(). */
438
439 if ( ! (man || mdoc) && ! pset(ln.buf, pos, curp, &man, &mdoc))
440 goto bailout;
441
442 pos = comment = 0;
443
444 /* Pass down into parsers. */
445
446 if (man && ! man_parseln(man, lnn, ln.buf))
447 goto bailout;
448 if (mdoc && ! mdoc_parseln(mdoc, lnn, ln.buf))
449 goto bailout;
450 }
451
452 /* NOTE a parser may not have been assigned, yet. */
453
454 if ( ! (man || mdoc)) {
455 fprintf(stderr, "%s: Not a manual\n", curp->file);
456 goto bailout;
457 }
458
459 if (mdoc && ! mdoc_endparse(mdoc))
460 goto bailout;
461 if (man && ! man_endparse(man))
462 goto bailout;
463
464 /* If unset, allocate output dev now (if applicable). */
465
466 if ( ! (curp->outman && curp->outmdoc)) {
467 switch (curp->outtype) {
468 case (OUTT_XHTML):
469 curp->outdata = xhtml_alloc(curp->outopts);
470 curp->outman = html_man;
471 curp->outmdoc = html_mdoc;
472 curp->outfree = html_free;
473 break;
474 case (OUTT_HTML):
475 curp->outdata = html_alloc(curp->outopts);
476 curp->outman = html_man;
477 curp->outmdoc = html_mdoc;
478 curp->outfree = html_free;
479 break;
480 case (OUTT_TREE):
481 curp->outman = tree_man;
482 curp->outmdoc = tree_mdoc;
483 break;
484 case (OUTT_LINT):
485 break;
486 default:
487 curp->outdata = ascii_alloc();
488 curp->outman = terminal_man;
489 curp->outmdoc = terminal_mdoc;
490 curp->outfree = terminal_free;
491 break;
492 }
493 }
494
495 /* Execute the out device, if it exists. */
496
497 if (man && curp->outman)
498 (*curp->outman)(curp->outdata, man);
499 if (mdoc && curp->outmdoc)
500 (*curp->outmdoc)(curp->outdata, mdoc);
501
502 cleanup:
503 if (curp->mdoc) {
504 mdoc_free(curp->mdoc);
505 curp->mdoc = NULL;
506 }
507 if (curp->man) {
508 man_free(curp->man);
509 curp->man = NULL;
510 }
511 if (ln.buf)
512 free(ln.buf);
513 if (with_mmap)
514 munmap(blk.buf, blk.sz);
515 else
516 free(blk.buf);
517 return;
518
519 bailout:
520 with_error = 1;
521 goto cleanup;
522 }
523
524
525 static int
526 pset(const char *buf, int pos, struct curparse *curp,
527 struct man **man, struct mdoc **mdoc)
528 {
529 int i;
530
531 /*
532 * Try to intuit which kind of manual parser should be used. If
533 * passed in by command-line (-man, -mdoc), then use that
534 * explicitly. If passed as -mandoc, then try to guess from the
535 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
536 * default to -man, which is more lenient.
537 */
538
539 if (buf[0] == '.') {
540 for (i = 1; buf[i]; i++)
541 if (' ' != buf[i] && '\t' != buf[i])
542 break;
543 if (0 == buf[i])
544 return(1);
545 }
546
547 switch (curp->inttype) {
548 case (INTT_MDOC):
549 if (NULL == curp->mdoc)
550 curp->mdoc = mdoc_init(curp);
551 if (NULL == (*mdoc = curp->mdoc))
552 return(0);
553 return(1);
554 case (INTT_MAN):
555 if (NULL == curp->man)
556 curp->man = man_init(curp);
557 if (NULL == (*man = curp->man))
558 return(0);
559 return(1);
560 default:
561 break;
562 }
563
564 if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3)) {
565 if (NULL == curp->mdoc)
566 curp->mdoc = mdoc_init(curp);
567 if (NULL == (*mdoc = curp->mdoc))
568 return(0);
569 return(1);
570 }
571
572 if (NULL == curp->man)
573 curp->man = man_init(curp);
574 if (NULL == (*man = curp->man))
575 return(0);
576 return(1);
577 }
578
579
580 static int
581 moptions(enum intt *tflags, char *arg)
582 {
583
584 if (0 == strcmp(arg, "doc"))
585 *tflags = INTT_MDOC;
586 else if (0 == strcmp(arg, "andoc"))
587 *tflags = INTT_AUTO;
588 else if (0 == strcmp(arg, "an"))
589 *tflags = INTT_MAN;
590 else {
591 fprintf(stderr, "%s: Bad argument\n", arg);
592 return(0);
593 }
594
595 return(1);
596 }
597
598
599 static int
600 toptions(struct curparse *curp, char *arg)
601 {
602
603 if (0 == strcmp(arg, "ascii"))
604 curp->outtype = OUTT_ASCII;
605 else if (0 == strcmp(arg, "lint")) {
606 curp->outtype = OUTT_LINT;
607 curp->wflags |= WARN_WALL;
608 curp->fflags |= FL_STRICT;
609 }
610 else if (0 == strcmp(arg, "tree"))
611 curp->outtype = OUTT_TREE;
612 else if (0 == strcmp(arg, "html"))
613 curp->outtype = OUTT_HTML;
614 else if (0 == strcmp(arg, "xhtml"))
615 curp->outtype = OUTT_XHTML;
616 else {
617 fprintf(stderr, "%s: Bad argument\n", arg);
618 return(0);
619 }
620
621 return(1);
622 }
623
624
625 static int
626 foptions(int *fflags, char *arg)
627 {
628 char *v, *o;
629 const char *toks[8];
630
631 toks[0] = "ign-scope";
632 toks[1] = "no-ign-escape";
633 toks[2] = "no-ign-macro";
634 toks[3] = "ign-errors";
635 toks[4] = "strict";
636 toks[5] = "ign-escape";
637 toks[6] = NULL;
638
639 while (*arg) {
640 o = arg;
641 switch (getsubopt(&arg, UNCONST(toks), &v)) {
642 case (0):
643 *fflags |= FL_IGN_SCOPE;
644 break;
645 case (1):
646 *fflags |= FL_NIGN_ESCAPE;
647 break;
648 case (2):
649 *fflags |= FL_NIGN_MACRO;
650 break;
651 case (3):
652 *fflags |= FL_IGN_ERRORS;
653 break;
654 case (4):
655 *fflags |= FL_STRICT;
656 break;
657 case (5):
658 *fflags &= ~FL_NIGN_ESCAPE;
659 break;
660 default:
661 fprintf(stderr, "%s: Bad argument\n", o);
662 return(0);
663 }
664 }
665
666 return(1);
667 }
668
669
670 static int
671 woptions(int *wflags, char *arg)
672 {
673 char *v, *o;
674 const char *toks[3];
675
676 toks[0] = "all";
677 toks[1] = "error";
678 toks[2] = NULL;
679
680 while (*arg) {
681 o = arg;
682 switch (getsubopt(&arg, UNCONST(toks), &v)) {
683 case (0):
684 *wflags |= WARN_WALL;
685 break;
686 case (1):
687 *wflags |= WARN_WERR;
688 break;
689 default:
690 fprintf(stderr, "%s: Bad argument\n", o);
691 return(0);
692 }
693 }
694
695 return(1);
696 }
697
698
699 /* ARGSUSED */
700 static int
701 merr(void *arg, int line, int col, const char *msg)
702 {
703 struct curparse *curp;
704
705 curp = (struct curparse *)arg;
706
707 (void)fprintf(stderr, "%s:%d:%d: error: %s\n",
708 curp->file, line, col + 1, msg);
709
710 with_error = 1;
711
712 return(0);
713 }
714
715
716 static int
717 mwarn(void *arg, int line, int col, const char *msg)
718 {
719 struct curparse *curp;
720
721 curp = (struct curparse *)arg;
722
723 if ( ! (curp->wflags & WARN_WALL))
724 return(1);
725
726 (void)fprintf(stderr, "%s:%d:%d: warning: %s\n",
727 curp->file, line, col + 1, msg);
728
729 with_warning = 1;
730 if (curp->wflags & WARN_WERR) {
731 with_error = 1;
732 return(0);
733 }
734
735 return(1);
736 }
737