]> git.cameronkatri.com Git - mandoc.git/blob - main.c
Explicitly account for \*(Ba when checking for delims. Noted by Jason McIntyre via...
[mandoc.git] / main.c
1 /* $Id: main.c,v 1.61 2010/04/12 19:27:22 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <sys/stat.h>
22
23 #include <assert.h>
24 #include <fcntl.h>
25 #include <stdio.h>
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <unistd.h>
30
31 #include "mdoc.h"
32 #include "man.h"
33 #include "main.h"
34
35 #define UNCONST(a) ((void *)(uintptr_t)(const void *)(a))
36
37 /* FIXME: Intel's compiler? LLVM? pcc? */
38
39 #if !defined(__GNUC__) || (__GNUC__ < 2)
40 # if !defined(lint)
41 # define __attribute__(x)
42 # endif
43 #endif /* !defined(__GNUC__) || (__GNUC__ < 2) */
44
45 typedef void (*out_mdoc)(void *, const struct mdoc *);
46 typedef void (*out_man)(void *, const struct man *);
47 typedef void (*out_free)(void *);
48
49 struct buf {
50 char *buf;
51 size_t sz;
52 };
53
54 enum intt {
55 INTT_AUTO,
56 INTT_MDOC,
57 INTT_MAN
58 };
59
60 enum outt {
61 OUTT_ASCII = 0,
62 OUTT_TREE,
63 OUTT_HTML,
64 OUTT_XHTML,
65 OUTT_LINT
66 };
67
68 struct curparse {
69 const char *file; /* Current parse. */
70 int fd; /* Current parse. */
71 int wflags;
72 #define WARN_WALL (1 << 0) /* All-warnings mask. */
73 #define WARN_WERR (1 << 2) /* Warnings->errors. */
74 int fflags;
75 #define FL_IGN_SCOPE (1 << 0) /* Ignore scope errors. */
76 #define FL_NIGN_ESCAPE (1 << 1) /* Don't ignore bad escapes. */
77 #define FL_NIGN_MACRO (1 << 2) /* Don't ignore bad macros. */
78 #define FL_NIGN_CHARS (1 << 3) /* Don't ignore bad chars. */
79 #define FL_IGN_ERRORS (1 << 4) /* Ignore failed parse. */
80 enum intt inttype; /* Input parsers... */
81 struct man *man;
82 struct man *lastman;
83 struct mdoc *mdoc;
84 struct mdoc *lastmdoc;
85 enum outt outtype; /* Output devices... */
86 out_mdoc outmdoc;
87 out_man outman;
88 out_free outfree;
89 void *outdata;
90 char outopts[BUFSIZ];
91 };
92
93 #define FL_STRICT FL_NIGN_ESCAPE | \
94 FL_NIGN_MACRO | \
95 FL_NIGN_CHARS
96
97 static int foptions(int *, char *);
98 static int toptions(struct curparse *, char *);
99 static int moptions(enum intt *, char *);
100 static int woptions(int *, char *);
101 static int merr(void *, int, int, const char *);
102 static int mwarn(void *, int, int, const char *);
103 static int ffile(struct buf *, struct buf *,
104 const char *, struct curparse *);
105 static int fdesc(struct buf *, struct buf *,
106 struct curparse *);
107 static int pset(const char *, int, struct curparse *,
108 struct man **, struct mdoc **);
109 static struct man *man_init(struct curparse *);
110 static struct mdoc *mdoc_init(struct curparse *);
111 static void version(void) __attribute__((noreturn));
112 static void usage(void) __attribute__((noreturn));
113
114 static const char *progname;
115
116
117 int
118 main(int argc, char *argv[])
119 {
120 int c, rc;
121 struct buf ln, blk;
122 struct curparse curp;
123
124 progname = strrchr(argv[0], '/');
125 if (progname == NULL)
126 progname = argv[0];
127 else
128 ++progname;
129
130 memset(&curp, 0, sizeof(struct curparse));
131
132 curp.inttype = INTT_AUTO;
133 curp.outtype = OUTT_ASCII;
134
135 /* LINTED */
136 while (-1 != (c = getopt(argc, argv, "f:m:O:T:VW:")))
137 switch (c) {
138 case ('f'):
139 if ( ! foptions(&curp.fflags, optarg))
140 return(EXIT_FAILURE);
141 break;
142 case ('m'):
143 if ( ! moptions(&curp.inttype, optarg))
144 return(EXIT_FAILURE);
145 break;
146 case ('O'):
147 (void)strlcat(curp.outopts, optarg, BUFSIZ);
148 (void)strlcat(curp.outopts, ",", BUFSIZ);
149 break;
150 case ('T'):
151 if ( ! toptions(&curp, optarg))
152 return(EXIT_FAILURE);
153 break;
154 case ('W'):
155 if ( ! woptions(&curp.wflags, optarg))
156 return(EXIT_FAILURE);
157 break;
158 case ('V'):
159 version();
160 /* NOTREACHED */
161 default:
162 usage();
163 /* NOTREACHED */
164 }
165
166 argc -= optind;
167 argv += optind;
168
169 memset(&ln, 0, sizeof(struct buf));
170 memset(&blk, 0, sizeof(struct buf));
171
172 rc = 1;
173
174 if (NULL == *argv) {
175 curp.file = "<stdin>";
176 curp.fd = STDIN_FILENO;
177
178 c = fdesc(&blk, &ln, &curp);
179 if ( ! (FL_IGN_ERRORS & curp.fflags))
180 rc = 1 == c ? 1 : 0;
181 else
182 rc = -1 == c ? 0 : 1;
183 }
184
185 while (rc && *argv) {
186 c = ffile(&blk, &ln, *argv, &curp);
187 if ( ! (FL_IGN_ERRORS & curp.fflags))
188 rc = 1 == c ? 1 : 0;
189 else
190 rc = -1 == c ? 0 : 1;
191
192 argv++;
193 if (*argv && rc) {
194 if (curp.lastman)
195 man_reset(curp.lastman);
196 if (curp.lastmdoc)
197 mdoc_reset(curp.lastmdoc);
198 curp.lastman = NULL;
199 curp.lastmdoc = NULL;
200 }
201 }
202
203 if (blk.buf)
204 free(blk.buf);
205 if (ln.buf)
206 free(ln.buf);
207 if (curp.outfree)
208 (*curp.outfree)(curp.outdata);
209 if (curp.mdoc)
210 mdoc_free(curp.mdoc);
211 if (curp.man)
212 man_free(curp.man);
213
214 return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
215 }
216
217
218 static void
219 version(void)
220 {
221
222 (void)printf("%s %s\n", progname, VERSION);
223 exit(EXIT_SUCCESS);
224 }
225
226
227 static void
228 usage(void)
229 {
230
231 (void)fprintf(stderr, "usage: %s [-V] [-foption] "
232 "[-mformat] [-Ooption] [-Toutput] "
233 "[-Werr] [file...]\n", progname);
234 exit(EXIT_FAILURE);
235 }
236
237
238 static struct man *
239 man_init(struct curparse *curp)
240 {
241 int pflags;
242 struct man_cb mancb;
243
244 mancb.man_err = merr;
245 mancb.man_warn = mwarn;
246
247 /* Defaults from mandoc.1. */
248
249 pflags = MAN_IGN_MACRO | MAN_IGN_ESCAPE | MAN_IGN_CHARS;
250
251 if (curp->fflags & FL_NIGN_MACRO)
252 pflags &= ~MAN_IGN_MACRO;
253 if (curp->fflags & FL_NIGN_CHARS)
254 pflags &= ~MAN_IGN_CHARS;
255 if (curp->fflags & FL_NIGN_ESCAPE)
256 pflags &= ~MAN_IGN_ESCAPE;
257
258 return(man_alloc(curp, pflags, &mancb));
259 }
260
261
262 static struct mdoc *
263 mdoc_init(struct curparse *curp)
264 {
265 int pflags;
266 struct mdoc_cb mdoccb;
267
268 mdoccb.mdoc_err = merr;
269 mdoccb.mdoc_warn = mwarn;
270
271 /* Defaults from mandoc.1. */
272
273 pflags = MDOC_IGN_MACRO | MDOC_IGN_ESCAPE | MDOC_IGN_CHARS;
274
275 if (curp->fflags & FL_IGN_SCOPE)
276 pflags |= MDOC_IGN_SCOPE;
277 if (curp->fflags & FL_NIGN_ESCAPE)
278 pflags &= ~MDOC_IGN_ESCAPE;
279 if (curp->fflags & FL_NIGN_MACRO)
280 pflags &= ~MDOC_IGN_MACRO;
281 if (curp->fflags & FL_NIGN_CHARS)
282 pflags &= ~MDOC_IGN_CHARS;
283
284 return(mdoc_alloc(curp, pflags, &mdoccb));
285 }
286
287
288 static int
289 ffile(struct buf *blk, struct buf *ln,
290 const char *file, struct curparse *curp)
291 {
292 int c;
293
294 curp->file = file;
295 if (-1 == (curp->fd = open(curp->file, O_RDONLY, 0))) {
296 perror(curp->file);
297 return(-1);
298 }
299
300 c = fdesc(blk, ln, curp);
301
302 if (-1 == close(curp->fd))
303 perror(curp->file);
304
305 return(c);
306 }
307
308
309 static int
310 fdesc(struct buf *blk, struct buf *ln, struct curparse *curp)
311 {
312 size_t sz;
313 ssize_t ssz;
314 struct stat st;
315 int j, i, pos, lnn, comment;
316 struct man *man;
317 struct mdoc *mdoc;
318
319 sz = BUFSIZ;
320 man = NULL;
321 mdoc = NULL;
322
323 /*
324 * Two buffers: ln and buf. buf is the input buffer optimised
325 * here for each file's block size. ln is a line buffer. Both
326 * growable, hence passed in by ptr-ptr.
327 */
328
329 if (-1 == fstat(curp->fd, &st))
330 perror(curp->file);
331 else if ((size_t)st.st_blksize > sz)
332 sz = st.st_blksize;
333
334 if (sz > blk->sz) {
335 blk->buf = realloc(blk->buf, sz);
336 if (NULL == blk->buf) {
337 perror(NULL);
338 exit(EXIT_FAILURE);
339 }
340 blk->sz = sz;
341 }
342
343 /* Fill buf with file blocksize. */
344
345 for (lnn = pos = comment = 0; ; ) {
346 if (-1 == (ssz = read(curp->fd, blk->buf, sz))) {
347 perror(curp->file);
348 return(-1);
349 } else if (0 == ssz)
350 break;
351
352 /* Parse the read block into partial or full lines. */
353
354 for (i = 0; i < (int)ssz; i++) {
355 if (pos >= (int)ln->sz) {
356 ln->sz += 256; /* Step-size. */
357 ln->buf = realloc(ln->buf, ln->sz);
358 if (NULL == ln->buf) {
359 perror(NULL);
360 return(EXIT_FAILURE);
361 }
362 }
363
364 if ('\n' != blk->buf[i]) {
365 if (comment)
366 continue;
367 ln->buf[pos++] = blk->buf[i];
368
369 /* Handle in-line `\"' comments. */
370
371 if (1 == pos || '\"' != ln->buf[pos - 1])
372 continue;
373
374 for (j = pos - 2; j >= 0; j--)
375 if ('\\' != ln->buf[j])
376 break;
377
378 if ( ! ((pos - 2 - j) % 2))
379 continue;
380
381 comment = 1;
382 pos -= 2;
383 for (; pos > 0; --pos) {
384 if (ln->buf[pos] != ' ')
385 break;
386 if (ln->buf[pos - 1] == '\\')
387 break;
388 }
389 continue;
390 }
391
392 /* Handle escaped `\\n' newlines. */
393
394 if (pos > 0 && 0 == comment &&
395 '\\' == ln->buf[pos - 1]) {
396 for (j = pos - 1; j >= 0; j--)
397 if ('\\' != ln->buf[j])
398 break;
399 if ( ! ((pos - j) % 2)) {
400 pos--;
401 lnn++;
402 continue;
403 }
404 }
405
406 ln->buf[pos] = 0;
407 lnn++;
408
409 /* If unset, assign parser in pset(). */
410
411 if ( ! (man || mdoc) && ! pset(ln->buf,
412 pos, curp, &man, &mdoc))
413 return(-1);
414
415 pos = comment = 0;
416
417 /* Pass down into parsers. */
418
419 if (man && ! man_parseln(man, lnn, ln->buf))
420 return(0);
421 if (mdoc && ! mdoc_parseln(mdoc, lnn, ln->buf))
422 return(0);
423 }
424 }
425
426 /* NOTE a parser may not have been assigned, yet. */
427
428 if ( ! (man || mdoc)) {
429 fprintf(stderr, "%s: Not a manual\n", curp->file);
430 return(0);
431 }
432
433 if (mdoc && ! mdoc_endparse(mdoc))
434 return(0);
435 if (man && ! man_endparse(man))
436 return(0);
437
438 /* If unset, allocate output dev now (if applicable). */
439
440 if ( ! (curp->outman && curp->outmdoc)) {
441 switch (curp->outtype) {
442 case (OUTT_XHTML):
443 curp->outdata = xhtml_alloc(curp->outopts);
444 curp->outman = html_man;
445 curp->outmdoc = html_mdoc;
446 curp->outfree = html_free;
447 break;
448 case (OUTT_HTML):
449 curp->outdata = html_alloc(curp->outopts);
450 curp->outman = html_man;
451 curp->outmdoc = html_mdoc;
452 curp->outfree = html_free;
453 break;
454 case (OUTT_TREE):
455 curp->outman = tree_man;
456 curp->outmdoc = tree_mdoc;
457 break;
458 case (OUTT_LINT):
459 break;
460 default:
461 curp->outdata = ascii_alloc();
462 curp->outman = terminal_man;
463 curp->outmdoc = terminal_mdoc;
464 curp->outfree = terminal_free;
465 break;
466 }
467 }
468
469 /* Execute the out device, if it exists. */
470
471 if (man && curp->outman)
472 (*curp->outman)(curp->outdata, man);
473 if (mdoc && curp->outmdoc)
474 (*curp->outmdoc)(curp->outdata, mdoc);
475
476 return(1);
477 }
478
479
480 static int
481 pset(const char *buf, int pos, struct curparse *curp,
482 struct man **man, struct mdoc **mdoc)
483 {
484 int i;
485
486 /*
487 * Try to intuit which kind of manual parser should be used. If
488 * passed in by command-line (-man, -mdoc), then use that
489 * explicitly. If passed as -mandoc, then try to guess from the
490 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
491 * default to -man, which is more lenient.
492 */
493
494 if (buf[0] == '.') {
495 for (i = 1; buf[i]; i++)
496 if (' ' != buf[i] && '\t' != buf[i])
497 break;
498 if (0 == buf[i])
499 return(1);
500 }
501
502 switch (curp->inttype) {
503 case (INTT_MDOC):
504 if (NULL == curp->mdoc)
505 curp->mdoc = mdoc_init(curp);
506 if (NULL == (*mdoc = curp->mdoc))
507 return(0);
508 curp->lastmdoc = *mdoc;
509 return(1);
510 case (INTT_MAN):
511 if (NULL == curp->man)
512 curp->man = man_init(curp);
513 if (NULL == (*man = curp->man))
514 return(0);
515 curp->lastman = *man;
516 return(1);
517 default:
518 break;
519 }
520
521 if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3)) {
522 if (NULL == curp->mdoc)
523 curp->mdoc = mdoc_init(curp);
524 if (NULL == (*mdoc = curp->mdoc))
525 return(0);
526 curp->lastmdoc = *mdoc;
527 return(1);
528 }
529
530 if (NULL == curp->man)
531 curp->man = man_init(curp);
532 if (NULL == (*man = curp->man))
533 return(0);
534 curp->lastman = *man;
535 return(1);
536 }
537
538
539 static int
540 moptions(enum intt *tflags, char *arg)
541 {
542
543 if (0 == strcmp(arg, "doc"))
544 *tflags = INTT_MDOC;
545 else if (0 == strcmp(arg, "andoc"))
546 *tflags = INTT_AUTO;
547 else if (0 == strcmp(arg, "an"))
548 *tflags = INTT_MAN;
549 else {
550 fprintf(stderr, "%s: Bad argument\n", arg);
551 return(0);
552 }
553
554 return(1);
555 }
556
557
558 static int
559 toptions(struct curparse *curp, char *arg)
560 {
561
562 if (0 == strcmp(arg, "ascii"))
563 curp->outtype = OUTT_ASCII;
564 else if (0 == strcmp(arg, "lint")) {
565 curp->outtype = OUTT_LINT;
566 curp->wflags |= WARN_WALL;
567 curp->fflags |= FL_STRICT;
568 }
569 else if (0 == strcmp(arg, "tree"))
570 curp->outtype = OUTT_TREE;
571 else if (0 == strcmp(arg, "html"))
572 curp->outtype = OUTT_HTML;
573 else if (0 == strcmp(arg, "xhtml"))
574 curp->outtype = OUTT_XHTML;
575 else {
576 fprintf(stderr, "%s: Bad argument\n", arg);
577 return(0);
578 }
579
580 return(1);
581 }
582
583
584 static int
585 foptions(int *fflags, char *arg)
586 {
587 char *v, *o;
588 const char *toks[8];
589
590 toks[0] = "ign-scope";
591 toks[1] = "no-ign-escape";
592 toks[2] = "no-ign-macro";
593 toks[3] = "no-ign-chars";
594 toks[4] = "ign-errors";
595 toks[5] = "strict";
596 toks[6] = "ign-escape";
597 toks[7] = NULL;
598
599 while (*arg) {
600 o = arg;
601 switch (getsubopt(&arg, UNCONST(toks), &v)) {
602 case (0):
603 *fflags |= FL_IGN_SCOPE;
604 break;
605 case (1):
606 *fflags |= FL_NIGN_ESCAPE;
607 break;
608 case (2):
609 *fflags |= FL_NIGN_MACRO;
610 break;
611 case (3):
612 *fflags |= FL_NIGN_CHARS;
613 break;
614 case (4):
615 *fflags |= FL_IGN_ERRORS;
616 break;
617 case (5):
618 *fflags |= FL_STRICT;
619 break;
620 case (6):
621 *fflags &= ~FL_NIGN_ESCAPE;
622 break;
623 default:
624 fprintf(stderr, "%s: Bad argument\n", o);
625 return(0);
626 }
627 }
628
629 return(1);
630 }
631
632
633 static int
634 woptions(int *wflags, char *arg)
635 {
636 char *v, *o;
637 const char *toks[3];
638
639 toks[0] = "all";
640 toks[1] = "error";
641 toks[2] = NULL;
642
643 while (*arg) {
644 o = arg;
645 switch (getsubopt(&arg, UNCONST(toks), &v)) {
646 case (0):
647 *wflags |= WARN_WALL;
648 break;
649 case (1):
650 *wflags |= WARN_WERR;
651 break;
652 default:
653 fprintf(stderr, "%s: Bad argument\n", o);
654 return(0);
655 }
656 }
657
658 return(1);
659 }
660
661
662 /* ARGSUSED */
663 static int
664 merr(void *arg, int line, int col, const char *msg)
665 {
666 struct curparse *curp;
667
668 curp = (struct curparse *)arg;
669
670 (void)fprintf(stderr, "%s:%d:%d: error: %s\n",
671 curp->file, line, col + 1, msg);
672
673 return(0);
674 }
675
676
677 static int
678 mwarn(void *arg, int line, int col, const char *msg)
679 {
680 struct curparse *curp;
681
682 curp = (struct curparse *)arg;
683
684 if ( ! (curp->wflags & WARN_WALL))
685 return(1);
686
687 (void)fprintf(stderr, "%s:%d:%d: warning: %s\n",
688 curp->file, line, col + 1, msg);
689
690 if ( ! (curp->wflags & WARN_WERR))
691 return(1);
692
693 return(0);
694 }
695