]>
git.cameronkatri.com Git - mandoc.git/blob - main.c
78f0ddb0f885cea81f3f4aee04c4554a84f16aed
1 /* $Id: main.c,v 1.81 2010/06/01 14:54:37 kristaps Exp $ */
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
38 #define UNCONST(a) ((void *)(uintptr_t)(const void *)(a))
40 /* FIXME: Intel's compiler? LLVM? pcc? */
42 #if !defined(__GNUC__) || (__GNUC__ < 2)
44 # define __attribute__(x)
46 #endif /* !defined(__GNUC__) || (__GNUC__ < 2) */
48 typedef void (*out_mdoc
)(void *, const struct mdoc
*);
49 typedef void (*out_man
)(void *, const struct man
*);
50 typedef void (*out_free
)(void *);
72 const char *file
; /* Current parse. */
73 int fd
; /* Current parse. */
75 /* FIXME: set by max error */
76 #define WARN_WALL (1 << 0) /* All-warnings mask. */
77 #define WARN_WERR (1 << 2) /* Warnings->errors. */
79 #define FL_IGN_SCOPE (1 << 0) /* Ignore scope errors. */
80 #define FL_NIGN_ESCAPE (1 << 1) /* Don't ignore bad escapes. */
81 #define FL_NIGN_MACRO (1 << 2) /* Don't ignore bad macros. */
82 #define FL_IGN_ERRORS (1 << 4) /* Ignore failed parse. */
83 #define FL_STRICT FL_NIGN_ESCAPE | \
84 FL_NIGN_MACRO /* ignore nothing */
85 enum intt inttype
; /* which parser to use */
86 struct man
*man
; /* man parser */
87 struct mdoc
*mdoc
; /* mdoc parser */
88 struct roff
*roff
; /* roff parser (!NULL) */
89 enum outt outtype
; /* which output to use */
90 out_mdoc outmdoc
; /* mdoc output ptr */
91 out_man outman
; /* man output ptr */
92 out_free outfree
; /* free output ptr */
93 void *outdata
; /* data for output */
94 char outopts
[BUFSIZ
]; /* buf of output opts */
97 static const char * const mandocerrs
[MANDOCERR_MAX
] = {
99 "text should be uppercase",
100 "sections out of conventional order",
101 "section name repeats",
102 "out of order prologue",
103 "repeated prologue entry",
104 "list type must come first",
107 "bad escape sequence",
108 "unterminated quoted string",
109 "argument requires the width argument",
110 "superfluous width argument",
112 "bad width argument",
113 "unknown manual section",
114 "section not in conventional manual section",
115 "end of line whitespace",
116 "scope open on exit",
117 "NAME section must come first",
119 "child violates parent syntax",
121 "list type repeated",
122 "display type repeated",
124 "manual name not yet set",
125 "obsolete macro ignored",
126 "empty macro ignored",
127 "macro not allowed in body",
128 "macro not allowed in prologue",
130 "bad NAME section contents",
132 "no text in this context",
134 "unknown macro will be lost",
137 "argument count wrong",
138 "request scope close w/none open",
139 "scope already open",
140 "macro requires line argument(s)",
141 "macro requires body argument(s)",
142 "macro requires argument(s)",
143 "no title in document",
144 "line argument(s) will be lost",
145 "body argument(s) will be lost",
146 "column syntax is inconsistent",
148 "missing display type",
150 "displays may not be nested",
151 "no scope to rewind: syntax violated",
152 "scope broken, syntax violated",
153 "line scope broken, syntax violated",
154 "argument count wrong, violates syntax",
155 "child violates parent syntax",
156 "argument count wrong, violates syntax",
158 "no document prologue",
159 "utsname system call failed",
163 static void fdesc(struct curparse
*);
164 static void ffile(const char *, struct curparse
*);
165 static int foptions(int *, char *);
166 static struct man
*man_init(struct curparse
*);
167 static struct mdoc
*mdoc_init(struct curparse
*);
168 static struct roff
*roff_init(struct curparse
*);
169 static int moptions(enum intt
*, char *);
170 static int mmsg(enum mandocerr
, void *,
171 int, int, const char *);
172 static int pset(const char *, int, struct curparse
*,
173 struct man
**, struct mdoc
**);
174 static int toptions(struct curparse
*, char *);
175 static void usage(void) __attribute__((noreturn
));
176 static void version(void) __attribute__((noreturn
));
177 static int woptions(int *, char *);
179 static const char *progname
;
180 static int with_error
;
181 static int with_warning
;
184 main(int argc
, char *argv
[])
187 struct curparse curp
;
189 progname
= strrchr(argv
[0], '/');
190 if (progname
== NULL
)
195 memset(&curp
, 0, sizeof(struct curparse
));
197 curp
.inttype
= INTT_AUTO
;
198 curp
.outtype
= OUTT_ASCII
;
201 while (-1 != (c
= getopt(argc
, argv
, "f:m:O:T:VW:")))
204 if ( ! foptions(&curp
.fflags
, optarg
))
205 return(EXIT_FAILURE
);
208 if ( ! moptions(&curp
.inttype
, optarg
))
209 return(EXIT_FAILURE
);
212 (void)strlcat(curp
.outopts
, optarg
, BUFSIZ
);
213 (void)strlcat(curp
.outopts
, ",", BUFSIZ
);
216 if ( ! toptions(&curp
, optarg
))
217 return(EXIT_FAILURE
);
220 if ( ! woptions(&curp
.wflags
, optarg
))
221 return(EXIT_FAILURE
);
235 curp
.file
= "<stdin>";
236 curp
.fd
= STDIN_FILENO
;
244 if (with_error
&& !(curp
.fflags
& FL_IGN_ERRORS
))
250 (*curp
.outfree
)(curp
.outdata
);
252 mdoc_free(curp
.mdoc
);
256 roff_free(curp
.roff
);
258 return((with_warning
|| with_error
) ?
259 EXIT_FAILURE
: EXIT_SUCCESS
);
267 (void)printf("%s %s\n", progname
, VERSION
);
276 (void)fprintf(stderr
, "usage: %s [-V] [-foption] "
277 "[-mformat] [-Ooption] [-Toutput] "
278 "[-Werr] [file...]\n", progname
);
284 man_init(struct curparse
*curp
)
288 /* Defaults from mandoc.1. */
290 pflags
= MAN_IGN_MACRO
| MAN_IGN_ESCAPE
;
292 if (curp
->fflags
& FL_NIGN_MACRO
)
293 pflags
&= ~MAN_IGN_MACRO
;
294 if (curp
->fflags
& FL_NIGN_ESCAPE
)
295 pflags
&= ~MAN_IGN_ESCAPE
;
297 return(man_alloc(curp
, pflags
, mmsg
));
302 roff_init(struct curparse
*curp
)
305 return(roff_alloc(mmsg
, curp
));
310 mdoc_init(struct curparse
*curp
)
314 /* Defaults from mandoc.1. */
316 pflags
= MDOC_IGN_MACRO
| MDOC_IGN_ESCAPE
;
318 if (curp
->fflags
& FL_IGN_SCOPE
)
319 pflags
|= MDOC_IGN_SCOPE
;
320 if (curp
->fflags
& FL_NIGN_ESCAPE
)
321 pflags
&= ~MDOC_IGN_ESCAPE
;
322 if (curp
->fflags
& FL_NIGN_MACRO
)
323 pflags
&= ~MDOC_IGN_MACRO
;
325 return(mdoc_alloc(curp
, pflags
, mmsg
));
330 ffile(const char *file
, struct curparse
*curp
)
334 if (-1 == (curp
->fd
= open(curp
->file
, O_RDONLY
, 0))) {
342 if (-1 == close(curp
->fd
))
348 resize_buf(struct buf
*buf
, size_t initial
)
357 tmp
= realloc(buf
->buf
, sz
);
369 read_whole_file(struct curparse
*curp
, struct buf
*fb
, int *with_mmap
)
375 if (-1 == fstat(curp
->fd
, &st
)) {
382 * If we're a regular file, try just reading in the whole entry
383 * via mmap(). This is faster than reading it into blocks, and
384 * since each file is only a few bytes to begin with, I'm not
385 * concerned that this is going to tank any machines.
388 if (S_ISREG(st
.st_mode
)) {
389 if (st
.st_size
>= (1U << 31)) {
390 fprintf(stderr
, "%s: input too large\n",
396 fb
->sz
= (size_t)st
.st_size
;
397 fb
->buf
= mmap(NULL
, fb
->sz
, PROT_READ
,
398 MAP_FILE
, curp
->fd
, 0);
399 if (fb
->buf
!= MAP_FAILED
)
404 * If this isn't a regular file (like, say, stdin), then we must
405 * go the old way and just read things in bit by bit.
414 if (fb
->sz
== (1U << 31)) {
415 fprintf(stderr
, "%s: input too large\n",
419 if (! resize_buf(fb
, 65536))
422 ssz
= read(curp
->fd
, fb
->buf
+ (int)off
, fb
->sz
- off
);
442 fdesc(struct curparse
*curp
)
445 int i
, pos
, lnn
, lnn_start
, with_mmap
, of
;
454 memset(&ln
, 0, sizeof(struct buf
));
457 * Two buffers: ln and buf. buf is the input file and may be
458 * memory mapped. ln is a line buffer and grows on-demand.
461 if ( ! read_whole_file(curp
, &blk
, &with_mmap
))
464 if (NULL
== curp
->roff
)
465 curp
->roff
= roff_init(curp
);
466 if (NULL
== (roff
= curp
->roff
))
469 for (i
= 0, lnn
= 1; i
< (int)blk
.sz
;) {
472 while (i
< (int)blk
.sz
) {
473 if ('\n' == blk
.buf
[i
]) {
478 /* Trailing backslash is like a plain character. */
479 if ('\\' != blk
.buf
[i
] || i
+ 1 == (int)blk
.sz
) {
480 if (pos
>= (int)ln
.sz
)
481 if (! resize_buf(&ln
, 256))
483 ln
.buf
[pos
++] = blk
.buf
[i
++];
486 /* Found an escape and at least one other character. */
487 if ('\n' == blk
.buf
[i
+ 1]) {
488 /* Escaped newlines are skipped over */
493 if ('"' == blk
.buf
[i
+ 1]) {
495 /* Comment, skip to end of line */
496 for (; i
< (int)blk
.sz
; ++i
) {
497 if ('\n' == blk
.buf
[i
]) {
503 /* Backout trailing whitespaces */
504 for (; pos
> 0; --pos
) {
505 if (ln
.buf
[pos
- 1] != ' ')
507 if (pos
> 2 && ln
.buf
[pos
- 2] == '\\')
512 /* Some other escape sequence, copy and continue. */
513 if (pos
+ 1 >= (int)ln
.sz
)
514 if (! resize_buf(&ln
, 256))
517 ln
.buf
[pos
++] = blk
.buf
[i
++];
518 ln
.buf
[pos
++] = blk
.buf
[i
++];
521 if (pos
>= (int)ln
.sz
)
522 if (! resize_buf(&ln
, 256))
527 * A significant amount of complexity is contained by
528 * the roff preprocessor. It's line-oriented but can be
529 * expressed on one line, so we need at times to
530 * readjust our starting point and re-run it. The roff
531 * preprocessor can also readjust the buffers with new
532 * data, so we pass them in wholesale.
537 re
= roff_parseln(roff
, lnn_start
,
538 &ln
.buf
, &ln
.sz
, of
, &of
);
539 } while (ROFF_RERUN
== re
);
543 else if (ROFF_ERR
== re
)
547 * If input parsers have not been allocated, do so now.
548 * We keep these instanced betwen parsers, but set them
549 * locally per parse routine since we can use different
550 * parsers with each one.
553 if ( ! (man
|| mdoc
))
554 if ( ! pset(ln
.buf
+ of
, pos
- of
, curp
, &man
, &mdoc
))
557 /* Lastly, push down into the parsers themselves. */
559 if (man
&& ! man_parseln(man
, lnn_start
, ln
.buf
, of
))
561 if (mdoc
&& ! mdoc_parseln(mdoc
, lnn_start
, ln
.buf
, of
))
565 /* NOTE a parser may not have been assigned, yet. */
567 if ( ! (man
|| mdoc
)) {
568 fprintf(stderr
, "%s: Not a manual\n", curp
->file
);
572 /* Clean up the parse routine ASTs. */
574 if (mdoc
&& ! mdoc_endparse(mdoc
))
576 if (man
&& ! man_endparse(man
))
578 if (roff
&& ! roff_endparse(roff
))
581 /* If unset, allocate output dev now (if applicable). */
583 if ( ! (curp
->outman
&& curp
->outmdoc
)) {
584 switch (curp
->outtype
) {
586 curp
->outdata
= xhtml_alloc(curp
->outopts
);
587 curp
->outman
= html_man
;
588 curp
->outmdoc
= html_mdoc
;
589 curp
->outfree
= html_free
;
592 curp
->outdata
= html_alloc(curp
->outopts
);
593 curp
->outman
= html_man
;
594 curp
->outmdoc
= html_mdoc
;
595 curp
->outfree
= html_free
;
598 curp
->outman
= tree_man
;
599 curp
->outmdoc
= tree_mdoc
;
604 curp
->outdata
= ascii_alloc(80);
605 curp
->outman
= terminal_man
;
606 curp
->outmdoc
= terminal_mdoc
;
607 curp
->outfree
= terminal_free
;
612 /* Execute the out device, if it exists. */
614 if (man
&& curp
->outman
)
615 (*curp
->outman
)(curp
->outdata
, man
);
616 if (mdoc
&& curp
->outmdoc
)
617 (*curp
->outmdoc
)(curp
->outdata
, mdoc
);
629 munmap(blk
.buf
, blk
.sz
);
642 pset(const char *buf
, int pos
, struct curparse
*curp
,
643 struct man
**man
, struct mdoc
**mdoc
)
648 * Try to intuit which kind of manual parser should be used. If
649 * passed in by command-line (-man, -mdoc), then use that
650 * explicitly. If passed as -mandoc, then try to guess from the
651 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
652 * default to -man, which is more lenient.
655 if ('.' == buf
[0] || '\'' == buf
[0]) {
656 for (i
= 1; buf
[i
]; i
++)
657 if (' ' != buf
[i
] && '\t' != buf
[i
])
663 switch (curp
->inttype
) {
665 if (NULL
== curp
->mdoc
)
666 curp
->mdoc
= mdoc_init(curp
);
667 if (NULL
== (*mdoc
= curp
->mdoc
))
671 if (NULL
== curp
->man
)
672 curp
->man
= man_init(curp
);
673 if (NULL
== (*man
= curp
->man
))
680 if (pos
>= 3 && 0 == memcmp(buf
, ".Dd", 3)) {
681 if (NULL
== curp
->mdoc
)
682 curp
->mdoc
= mdoc_init(curp
);
683 if (NULL
== (*mdoc
= curp
->mdoc
))
688 if (NULL
== curp
->man
)
689 curp
->man
= man_init(curp
);
690 if (NULL
== (*man
= curp
->man
))
697 moptions(enum intt
*tflags
, char *arg
)
700 if (0 == strcmp(arg
, "doc"))
702 else if (0 == strcmp(arg
, "andoc"))
704 else if (0 == strcmp(arg
, "an"))
707 fprintf(stderr
, "%s: Bad argument\n", arg
);
716 toptions(struct curparse
*curp
, char *arg
)
719 if (0 == strcmp(arg
, "ascii"))
720 curp
->outtype
= OUTT_ASCII
;
721 else if (0 == strcmp(arg
, "lint")) {
722 curp
->outtype
= OUTT_LINT
;
723 curp
->wflags
|= WARN_WALL
;
724 curp
->fflags
|= FL_STRICT
;
726 else if (0 == strcmp(arg
, "tree"))
727 curp
->outtype
= OUTT_TREE
;
728 else if (0 == strcmp(arg
, "html"))
729 curp
->outtype
= OUTT_HTML
;
730 else if (0 == strcmp(arg
, "xhtml"))
731 curp
->outtype
= OUTT_XHTML
;
733 fprintf(stderr
, "%s: Bad argument\n", arg
);
742 foptions(int *fflags
, char *arg
)
747 toks
[0] = "ign-scope";
748 toks
[1] = "no-ign-escape";
749 toks
[2] = "no-ign-macro";
750 toks
[3] = "ign-errors";
752 toks
[5] = "ign-escape";
757 switch (getsubopt(&arg
, UNCONST(toks
), &v
)) {
759 *fflags
|= FL_IGN_SCOPE
;
762 *fflags
|= FL_NIGN_ESCAPE
;
765 *fflags
|= FL_NIGN_MACRO
;
768 *fflags
|= FL_IGN_ERRORS
;
771 *fflags
|= FL_STRICT
;
774 *fflags
&= ~FL_NIGN_ESCAPE
;
777 fprintf(stderr
, "%s: Bad argument\n", o
);
787 woptions(int *wflags
, char *arg
)
798 switch (getsubopt(&arg
, UNCONST(toks
), &v
)) {
800 *wflags
|= WARN_WALL
;
803 *wflags
|= WARN_WERR
;
806 fprintf(stderr
, "%s: Bad argument\n", o
);
816 mmsg(enum mandocerr t
, void *arg
, int ln
, int col
, const char *msg
)
820 cp
= (struct curparse
*)arg
;
822 if (t
<= MANDOCERR_ERROR
) {
823 if ( ! (cp
->wflags
& WARN_WALL
))
829 fprintf(stderr
, "%s:%d:%d: %s", cp
->file
,
830 ln
, col
+ 1, mandocerrs
[t
]);
833 fprintf(stderr
, ": %s", msg
);
837 /* This is superfluous, but whatever. */
838 if (t
> MANDOCERR_ERROR
)
840 if (cp
->wflags
& WARN_WERR
) {