]>
git.cameronkatri.com Git - mandoc.git/blob - main.c
1 /* $Id: main.c,v 1.100 2010/07/25 11:44:31 kristaps Exp $ */
3 * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
40 #define UNCONST(a) ((void *)(uintptr_t)(const void *)(a))
42 /* FIXME: Intel's compiler? LLVM? pcc? */
44 #if !defined(__GNUC__) || (__GNUC__ < 2)
46 # define __attribute__(x)
48 #endif /* !defined(__GNUC__) || (__GNUC__ < 2) */
50 typedef void (*out_mdoc
)(void *, const struct mdoc
*);
51 typedef void (*out_man
)(void *, const struct man
*);
52 typedef void (*out_free
)(void *);
76 const char *file
; /* Current parse. */
77 int fd
; /* Current parse. */
79 /* FIXME: set by max error */
80 #define WARN_WALL (1 << 0) /* All-warnings mask. */
81 #define WARN_WERR (1 << 2) /* Warnings->errors. */
83 #define FL_IGN_SCOPE (1 << 0) /* Ignore scope errors. */
84 #define FL_NIGN_ESCAPE (1 << 1) /* Don't ignore bad escapes. */
85 #define FL_NIGN_MACRO (1 << 2) /* Don't ignore bad macros. */
86 #define FL_IGN_ERRORS (1 << 4) /* Ignore failed parse. */
87 #define FL_STRICT FL_NIGN_ESCAPE | \
88 FL_NIGN_MACRO /* ignore nothing */
89 enum intt inttype
; /* which parser to use */
90 struct man
*man
; /* man parser */
91 struct mdoc
*mdoc
; /* mdoc parser */
92 struct roff
*roff
; /* roff parser (!NULL) */
93 struct regset regs
; /* roff registers */
94 enum outt outtype
; /* which output to use */
95 out_mdoc outmdoc
; /* mdoc output ptr */
96 out_man outman
; /* man output ptr */
97 out_free outfree
; /* free output ptr */
98 void *outdata
; /* data for output */
99 char outopts
[BUFSIZ
]; /* buf of output opts */
102 static const char * const mandocerrs
[MANDOCERR_MAX
] = {
107 "text should be uppercase",
108 "sections out of conventional order",
109 "section name repeats",
110 "out of order prologue",
111 "repeated prologue entry",
112 "list type must come first",
115 "tab in non-literal context",
116 "bad escape sequence",
117 "unterminated quoted string",
118 "argument requires the width argument",
119 "superfluous width argument",
122 "bad width argument",
123 "unknown manual section",
124 "section not in conventional manual section",
125 "end of line whitespace",
126 "blocks badly nested",
127 "scope open on exit",
131 "NAME section must come first",
133 "child violates parent syntax",
135 "list type repeated",
136 "display type repeated",
138 "manual name not yet set",
139 "obsolete macro ignored",
140 "empty macro ignored",
141 "macro not allowed in body",
142 "macro not allowed in prologue",
144 "bad NAME section contents",
146 "no text in this context",
148 "unknown macro will be lost",
150 "argument count wrong",
151 "request scope close w/none open",
152 "scope already open",
153 "macro requires line argument(s)",
154 "macro requires body argument(s)",
155 "macro requires argument(s)",
156 "no title in document",
158 "missing display type",
160 "line argument(s) will be lost",
161 "body argument(s) will be lost",
163 "generic fatal error",
165 "column syntax is inconsistent",
166 "displays may not be nested",
167 "unsupported display type",
168 "blocks badly nested",
169 "no such block is open",
170 "scope broken, syntax violated",
171 "line scope broken, syntax violated",
172 "argument count wrong, violates syntax",
173 "child violates parent syntax",
174 "argument count wrong, violates syntax",
176 "no document prologue",
177 "utsname system call failed",
181 static void fdesc(struct curparse
*);
182 static void ffile(const char *, struct curparse
*);
183 static int foptions(int *, char *);
184 static struct man
*man_init(struct curparse
*);
185 static struct mdoc
*mdoc_init(struct curparse
*);
186 static struct roff
*roff_init(struct curparse
*);
187 static int moptions(enum intt
*, char *);
188 static int mmsg(enum mandocerr
, void *,
189 int, int, const char *);
190 static int pset(const char *, int, struct curparse
*,
191 struct man
**, struct mdoc
**);
192 static int toptions(struct curparse
*, char *);
193 static void usage(void) __attribute__((noreturn
));
194 static void version(void) __attribute__((noreturn
));
195 static int woptions(int *, char *);
197 static const char *progname
;
198 static int with_fatal
;
199 static int with_error
;
202 main(int argc
, char *argv
[])
205 struct curparse curp
;
207 progname
= strrchr(argv
[0], '/');
208 if (progname
== NULL
)
213 memset(&curp
, 0, sizeof(struct curparse
));
215 curp
.inttype
= INTT_AUTO
;
216 curp
.outtype
= OUTT_ASCII
;
219 while (-1 != (c
= getopt(argc
, argv
, "f:m:O:T:VW:")))
222 if ( ! foptions(&curp
.fflags
, optarg
))
223 return(EXIT_FAILURE
);
226 if ( ! moptions(&curp
.inttype
, optarg
))
227 return(EXIT_FAILURE
);
230 (void)strlcat(curp
.outopts
, optarg
, BUFSIZ
);
231 (void)strlcat(curp
.outopts
, ",", BUFSIZ
);
234 if ( ! toptions(&curp
, optarg
))
235 return(EXIT_FAILURE
);
238 if ( ! woptions(&curp
.wflags
, optarg
))
239 return(EXIT_FAILURE
);
253 curp
.file
= "<stdin>";
254 curp
.fd
= STDIN_FILENO
;
262 if (with_fatal
&& !(curp
.fflags
& FL_IGN_ERRORS
))
268 (*curp
.outfree
)(curp
.outdata
);
270 mdoc_free(curp
.mdoc
);
274 roff_free(curp
.roff
);
276 return((with_fatal
|| with_error
) ?
277 EXIT_FAILURE
: EXIT_SUCCESS
);
285 (void)printf("%s %s\n", progname
, VERSION
);
294 (void)fprintf(stderr
, "usage: %s [-V] [-foption] "
295 "[-mformat] [-Ooption] [-Toutput] "
296 "[-Werr] [file...]\n", progname
);
302 man_init(struct curparse
*curp
)
306 /* Defaults from mandoc.1. */
308 pflags
= MAN_IGN_MACRO
| MAN_IGN_ESCAPE
;
310 if (curp
->fflags
& FL_NIGN_MACRO
)
311 pflags
&= ~MAN_IGN_MACRO
;
312 if (curp
->fflags
& FL_NIGN_ESCAPE
)
313 pflags
&= ~MAN_IGN_ESCAPE
;
315 return(man_alloc(&curp
->regs
, curp
, pflags
, mmsg
));
320 roff_init(struct curparse
*curp
)
323 return(roff_alloc(&curp
->regs
, mmsg
, curp
));
328 mdoc_init(struct curparse
*curp
)
332 /* Defaults from mandoc.1. */
334 pflags
= MDOC_IGN_MACRO
| MDOC_IGN_ESCAPE
;
336 if (curp
->fflags
& FL_IGN_SCOPE
)
337 pflags
|= MDOC_IGN_SCOPE
;
338 if (curp
->fflags
& FL_NIGN_ESCAPE
)
339 pflags
&= ~MDOC_IGN_ESCAPE
;
340 if (curp
->fflags
& FL_NIGN_MACRO
)
341 pflags
&= ~MDOC_IGN_MACRO
;
343 return(mdoc_alloc(&curp
->regs
, curp
, pflags
, mmsg
));
348 ffile(const char *file
, struct curparse
*curp
)
352 if (-1 == (curp
->fd
= open(curp
->file
, O_RDONLY
, 0))) {
360 if (-1 == close(curp
->fd
))
366 resize_buf(struct buf
*buf
, size_t initial
)
375 tmp
= realloc(buf
->buf
, sz
);
387 read_whole_file(struct curparse
*curp
, struct buf
*fb
, int *with_mmap
)
393 if (-1 == fstat(curp
->fd
, &st
)) {
400 * If we're a regular file, try just reading in the whole entry
401 * via mmap(). This is faster than reading it into blocks, and
402 * since each file is only a few bytes to begin with, I'm not
403 * concerned that this is going to tank any machines.
406 if (S_ISREG(st
.st_mode
)) {
407 if (st
.st_size
>= (1U << 31)) {
408 fprintf(stderr
, "%s: input too large\n",
414 fb
->sz
= (size_t)st
.st_size
;
415 fb
->buf
= mmap(NULL
, fb
->sz
, PROT_READ
,
416 MAP_FILE
|MAP_SHARED
, curp
->fd
, 0);
417 if (fb
->buf
!= MAP_FAILED
)
422 * If this isn't a regular file (like, say, stdin), then we must
423 * go the old way and just read things in bit by bit.
432 if (fb
->sz
== (1U << 31)) {
433 fprintf(stderr
, "%s: input too large\n",
437 if (! resize_buf(fb
, 65536))
440 ssz
= read(curp
->fd
, fb
->buf
+ (int)off
, fb
->sz
- off
);
460 fdesc(struct curparse
*curp
)
463 int i
, pos
, lnn
, lnn_start
, with_mmap
, of
;
473 memset(&ln
, 0, sizeof(struct buf
));
476 * Two buffers: ln and buf. buf is the input file and may be
477 * memory mapped. ln is a line buffer and grows on-demand.
480 if ( ! read_whole_file(curp
, &blk
, &with_mmap
))
483 if (NULL
== curp
->roff
)
484 curp
->roff
= roff_init(curp
);
485 if (NULL
== (roff
= curp
->roff
))
488 for (i
= 0, lnn
= 1; i
< (int)blk
.sz
;) {
491 while (i
< (int)blk
.sz
) {
492 if ('\n' == blk
.buf
[i
]) {
499 * Warn about bogus characters. If you're using
500 * non-ASCII encoding, you're screwing your
501 * readers. Since I'd rather this not happen,
502 * I'll be helpful and drop these characters so
503 * we don't display gibberish. Note to manual
504 * writers: use special characters.
507 if ( ! isgraph((u_char
)blk
.buf
[i
]) &&
508 ! isblank((u_char
)blk
.buf
[i
])) {
509 if ( ! mmsg(MANDOCERR_BADCHAR
, curp
,
517 /* Trailing backslash is like a plain character. */
518 if ('\\' != blk
.buf
[i
] || i
+ 1 == (int)blk
.sz
) {
519 if (pos
>= (int)ln
.sz
)
520 if (! resize_buf(&ln
, 256))
522 ln
.buf
[pos
++] = blk
.buf
[i
++];
525 /* Found an escape and at least one other character. */
526 if ('\n' == blk
.buf
[i
+ 1]) {
527 /* Escaped newlines are skipped over */
532 if ('"' == blk
.buf
[i
+ 1]) {
534 /* Comment, skip to end of line */
535 for (; i
< (int)blk
.sz
; ++i
) {
536 if ('\n' == blk
.buf
[i
]) {
542 /* Backout trailing whitespaces */
543 for (; pos
> 0; --pos
) {
544 if (ln
.buf
[pos
- 1] != ' ')
546 if (pos
> 2 && ln
.buf
[pos
- 2] == '\\')
551 /* Some other escape sequence, copy and continue. */
552 if (pos
+ 1 >= (int)ln
.sz
)
553 if (! resize_buf(&ln
, 256))
556 ln
.buf
[pos
++] = blk
.buf
[i
++];
557 ln
.buf
[pos
++] = blk
.buf
[i
++];
560 if (pos
>= (int)ln
.sz
)
561 if (! resize_buf(&ln
, 256))
566 * A significant amount of complexity is contained by
567 * the roff preprocessor. It's line-oriented but can be
568 * expressed on one line, so we need at times to
569 * readjust our starting point and re-run it. The roff
570 * preprocessor can also readjust the buffers with new
571 * data, so we pass them in wholesale.
576 re
= roff_parseln(roff
, lnn_start
,
577 &ln
.buf
, &ln
.sz
, of
, &of
);
578 } while (ROFF_RERUN
== re
);
582 else if (ROFF_ERR
== re
)
586 * If input parsers have not been allocated, do so now.
587 * We keep these instanced betwen parsers, but set them
588 * locally per parse routine since we can use different
589 * parsers with each one.
592 if ( ! (man
|| mdoc
))
593 if ( ! pset(ln
.buf
+ of
, pos
- of
, curp
, &man
, &mdoc
))
596 /* Lastly, push down into the parsers themselves. */
598 if (man
&& ! man_parseln(man
, lnn_start
, ln
.buf
, of
))
600 if (mdoc
&& ! mdoc_parseln(mdoc
, lnn_start
, ln
.buf
, of
))
604 /* NOTE a parser may not have been assigned, yet. */
606 if ( ! (man
|| mdoc
)) {
607 fprintf(stderr
, "%s: Not a manual\n", curp
->file
);
611 /* Clean up the parse routine ASTs. */
613 if (mdoc
&& ! mdoc_endparse(mdoc
))
615 if (man
&& ! man_endparse(man
))
617 if (roff
&& ! roff_endparse(roff
))
620 /* If unset, allocate output dev now (if applicable). */
622 if ( ! (curp
->outman
&& curp
->outmdoc
)) {
623 switch (curp
->outtype
) {
625 curp
->outdata
= xhtml_alloc(curp
->outopts
);
628 curp
->outdata
= html_alloc(curp
->outopts
);
631 curp
->outdata
= ascii_alloc(curp
->outopts
);
632 curp
->outfree
= ascii_free
;
635 curp
->outdata
= pdf_alloc(curp
->outopts
);
636 curp
->outfree
= pspdf_free
;
639 curp
->outdata
= ps_alloc(curp
->outopts
);
640 curp
->outfree
= pspdf_free
;
646 switch (curp
->outtype
) {
650 curp
->outman
= html_man
;
651 curp
->outmdoc
= html_mdoc
;
652 curp
->outfree
= html_free
;
655 curp
->outman
= tree_man
;
656 curp
->outmdoc
= tree_mdoc
;
663 curp
->outman
= terminal_man
;
664 curp
->outmdoc
= terminal_mdoc
;
671 /* Execute the out device, if it exists. */
673 if (man
&& curp
->outman
)
674 (*curp
->outman
)(curp
->outdata
, man
);
675 if (mdoc
&& curp
->outmdoc
)
676 (*curp
->outmdoc
)(curp
->outdata
, mdoc
);
679 memset(&curp
->regs
, 0, sizeof(struct regset
));
689 munmap(blk
.buf
, blk
.sz
);
702 pset(const char *buf
, int pos
, struct curparse
*curp
,
703 struct man
**man
, struct mdoc
**mdoc
)
708 * Try to intuit which kind of manual parser should be used. If
709 * passed in by command-line (-man, -mdoc), then use that
710 * explicitly. If passed as -mandoc, then try to guess from the
711 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
712 * default to -man, which is more lenient.
715 if ('.' == buf
[0] || '\'' == buf
[0]) {
716 for (i
= 1; buf
[i
]; i
++)
717 if (' ' != buf
[i
] && '\t' != buf
[i
])
723 switch (curp
->inttype
) {
725 if (NULL
== curp
->mdoc
)
726 curp
->mdoc
= mdoc_init(curp
);
727 if (NULL
== (*mdoc
= curp
->mdoc
))
731 if (NULL
== curp
->man
)
732 curp
->man
= man_init(curp
);
733 if (NULL
== (*man
= curp
->man
))
740 if (pos
>= 3 && 0 == memcmp(buf
, ".Dd", 3)) {
741 if (NULL
== curp
->mdoc
)
742 curp
->mdoc
= mdoc_init(curp
);
743 if (NULL
== (*mdoc
= curp
->mdoc
))
748 if (NULL
== curp
->man
)
749 curp
->man
= man_init(curp
);
750 if (NULL
== (*man
= curp
->man
))
757 moptions(enum intt
*tflags
, char *arg
)
760 if (0 == strcmp(arg
, "doc"))
762 else if (0 == strcmp(arg
, "andoc"))
764 else if (0 == strcmp(arg
, "an"))
767 fprintf(stderr
, "%s: Bad argument\n", arg
);
776 toptions(struct curparse
*curp
, char *arg
)
779 if (0 == strcmp(arg
, "ascii"))
780 curp
->outtype
= OUTT_ASCII
;
781 else if (0 == strcmp(arg
, "lint")) {
782 curp
->outtype
= OUTT_LINT
;
783 curp
->wflags
|= WARN_WALL
;
784 curp
->fflags
|= FL_STRICT
;
786 else if (0 == strcmp(arg
, "tree"))
787 curp
->outtype
= OUTT_TREE
;
788 else if (0 == strcmp(arg
, "html"))
789 curp
->outtype
= OUTT_HTML
;
790 else if (0 == strcmp(arg
, "xhtml"))
791 curp
->outtype
= OUTT_XHTML
;
792 else if (0 == strcmp(arg
, "ps"))
793 curp
->outtype
= OUTT_PS
;
794 else if (0 == strcmp(arg
, "pdf"))
795 curp
->outtype
= OUTT_PDF
;
797 fprintf(stderr
, "%s: Bad argument\n", arg
);
806 foptions(int *fflags
, char *arg
)
811 toks
[0] = "ign-scope";
812 toks
[1] = "no-ign-escape";
813 toks
[2] = "no-ign-macro";
814 toks
[3] = "ign-errors";
816 toks
[5] = "ign-escape";
821 switch (getsubopt(&arg
, UNCONST(toks
), &v
)) {
823 *fflags
|= FL_IGN_SCOPE
;
826 *fflags
|= FL_NIGN_ESCAPE
;
829 *fflags
|= FL_NIGN_MACRO
;
832 *fflags
|= FL_IGN_ERRORS
;
835 *fflags
|= FL_STRICT
;
838 *fflags
&= ~FL_NIGN_ESCAPE
;
841 fprintf(stderr
, "%s: Bad argument\n", o
);
851 woptions(int *wflags
, char *arg
)
862 switch (getsubopt(&arg
, UNCONST(toks
), &v
)) {
864 *wflags
|= WARN_WALL
;
867 *wflags
|= WARN_WERR
;
870 fprintf(stderr
, "%s: Bad argument\n", o
);
880 mmsg(enum mandocerr t
, void *arg
, int ln
, int col
, const char *msg
)
886 cp
= (struct curparse
*)arg
;
890 if (t
>= MANDOCERR_FATAL
) {
895 if ( ! (WARN_WALL
& cp
->wflags
))
897 if (t
>= MANDOCERR_ERROR
) {
901 if (WARN_WERR
& cp
->wflags
) {
907 fprintf(stderr
, "%s:%d:%d:", cp
->file
, ln
, col
+ 1);
909 fprintf(stderr
, " %s:", level
);
910 fprintf(stderr
, " %s", mandocerrs
[t
]);
912 fprintf(stderr
, ": %s", msg
);