]>
git.cameronkatri.com Git - mandoc.git/blob - main.c
1 /* $Id: main.c,v 1.102 2010/08/08 14:45:59 schwarze Exp $ */
3 * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
44 #define UNCONST(a) ((void *)(uintptr_t)(const void *)(a))
46 /* FIXME: Intel's compiler? LLVM? pcc? */
48 #if !defined(__GNUC__) || (__GNUC__ < 2)
50 # define __attribute__(x)
52 #endif /* !defined(__GNUC__) || (__GNUC__ < 2) */
54 typedef void (*out_mdoc
)(void *, const struct mdoc
*);
55 typedef void (*out_man
)(void *, const struct man
*);
56 typedef void (*out_free
)(void *);
80 const char *file
; /* Current parse. */
81 int fd
; /* Current parse. */
83 /* FIXME: set by max error */
84 #define WARN_WALL (1 << 0) /* All-warnings mask. */
85 #define WARN_WERR (1 << 2) /* Warnings->errors. */
87 #define FL_IGN_SCOPE (1 << 0) /* Ignore scope errors. */
88 #define FL_NIGN_ESCAPE (1 << 1) /* Don't ignore bad escapes. */
89 #define FL_NIGN_MACRO (1 << 2) /* Don't ignore bad macros. */
90 #define FL_IGN_ERRORS (1 << 4) /* Ignore failed parse. */
91 #define FL_STRICT FL_NIGN_ESCAPE | \
92 FL_NIGN_MACRO /* ignore nothing */
93 enum intt inttype
; /* which parser to use */
94 struct man
*man
; /* man parser */
95 struct mdoc
*mdoc
; /* mdoc parser */
96 struct roff
*roff
; /* roff parser (!NULL) */
97 struct regset regs
; /* roff registers */
98 enum outt outtype
; /* which output to use */
99 out_mdoc outmdoc
; /* mdoc output ptr */
100 out_man outman
; /* man output ptr */
101 out_free outfree
; /* free output ptr */
102 void *outdata
; /* data for output */
103 char outopts
[BUFSIZ
]; /* buf of output opts */
106 static const char * const mandocerrs
[MANDOCERR_MAX
] = {
111 "text should be uppercase",
112 "sections out of conventional order",
113 "section name repeats",
114 "out of order prologue",
115 "repeated prologue entry",
116 "list type must come first",
119 "tab in non-literal context",
120 "bad escape sequence",
121 "unterminated quoted string",
122 "argument requires the width argument",
123 "superfluous width argument",
126 "bad width argument",
127 "unknown manual section",
128 "section not in conventional manual section",
129 "end of line whitespace",
130 "blocks badly nested",
131 "scope open on exit",
135 "NAME section must come first",
137 "child violates parent syntax",
139 "list type repeated",
140 "display type repeated",
142 "manual name not yet set",
143 "obsolete macro ignored",
144 "empty macro ignored",
145 "macro not allowed in body",
146 "macro not allowed in prologue",
148 "bad NAME section contents",
150 "no text in this context",
152 "unknown macro will be lost",
154 "argument count wrong",
155 "request scope close w/none open",
156 "scope already open",
157 "macro requires line argument(s)",
158 "macro requires body argument(s)",
159 "macro requires argument(s)",
160 "no title in document",
162 "missing display type",
164 "line argument(s) will be lost",
165 "body argument(s) will be lost",
167 "generic fatal error",
169 "column syntax is inconsistent",
170 "displays may not be nested",
171 "unsupported display type",
172 "blocks badly nested",
173 "no such block is open",
174 "scope broken, syntax violated",
175 "line scope broken, syntax violated",
176 "argument count wrong, violates syntax",
177 "child violates parent syntax",
178 "argument count wrong, violates syntax",
180 "no document prologue",
181 "utsname system call failed",
185 static void fdesc(struct curparse
*);
186 static void ffile(const char *, struct curparse
*);
187 static int foptions(int *, char *);
188 static struct man
*man_init(struct curparse
*);
189 static struct mdoc
*mdoc_init(struct curparse
*);
190 static struct roff
*roff_init(struct curparse
*);
191 static int moptions(enum intt
*, char *);
192 static int mmsg(enum mandocerr
, void *,
193 int, int, const char *);
194 static int pset(const char *, int, struct curparse
*,
195 struct man
**, struct mdoc
**);
196 static int toptions(struct curparse
*, char *);
197 static void usage(void) __attribute__((noreturn
));
198 static void version(void) __attribute__((noreturn
));
199 static int woptions(int *, char *);
201 static const char *progname
;
202 static int with_fatal
;
203 static int with_error
;
206 main(int argc
, char *argv
[])
209 struct curparse curp
;
211 progname
= strrchr(argv
[0], '/');
212 if (progname
== NULL
)
217 memset(&curp
, 0, sizeof(struct curparse
));
219 curp
.inttype
= INTT_AUTO
;
220 curp
.outtype
= OUTT_ASCII
;
223 while (-1 != (c
= getopt(argc
, argv
, "f:m:O:T:VW:")))
226 if ( ! foptions(&curp
.fflags
, optarg
))
227 return(EXIT_FAILURE
);
230 if ( ! moptions(&curp
.inttype
, optarg
))
231 return(EXIT_FAILURE
);
234 (void)strlcat(curp
.outopts
, optarg
, BUFSIZ
);
235 (void)strlcat(curp
.outopts
, ",", BUFSIZ
);
238 if ( ! toptions(&curp
, optarg
))
239 return(EXIT_FAILURE
);
242 if ( ! woptions(&curp
.wflags
, optarg
))
243 return(EXIT_FAILURE
);
257 curp
.file
= "<stdin>";
258 curp
.fd
= STDIN_FILENO
;
266 if (with_fatal
&& !(curp
.fflags
& FL_IGN_ERRORS
))
272 (*curp
.outfree
)(curp
.outdata
);
274 mdoc_free(curp
.mdoc
);
278 roff_free(curp
.roff
);
280 return((with_fatal
|| with_error
) ?
281 EXIT_FAILURE
: EXIT_SUCCESS
);
289 (void)printf("%s %s\n", progname
, VERSION
);
298 (void)fprintf(stderr
, "usage: %s [-V] [-foption] "
299 "[-mformat] [-Ooption] [-Toutput] "
300 "[-Werr] [file...]\n", progname
);
306 man_init(struct curparse
*curp
)
310 /* Defaults from mandoc.1. */
312 pflags
= MAN_IGN_MACRO
| MAN_IGN_ESCAPE
;
314 if (curp
->fflags
& FL_NIGN_MACRO
)
315 pflags
&= ~MAN_IGN_MACRO
;
316 if (curp
->fflags
& FL_NIGN_ESCAPE
)
317 pflags
&= ~MAN_IGN_ESCAPE
;
319 return(man_alloc(&curp
->regs
, curp
, pflags
, mmsg
));
324 roff_init(struct curparse
*curp
)
327 return(roff_alloc(&curp
->regs
, mmsg
, curp
));
332 mdoc_init(struct curparse
*curp
)
336 /* Defaults from mandoc.1. */
338 pflags
= MDOC_IGN_MACRO
| MDOC_IGN_ESCAPE
;
340 if (curp
->fflags
& FL_IGN_SCOPE
)
341 pflags
|= MDOC_IGN_SCOPE
;
342 if (curp
->fflags
& FL_NIGN_ESCAPE
)
343 pflags
&= ~MDOC_IGN_ESCAPE
;
344 if (curp
->fflags
& FL_NIGN_MACRO
)
345 pflags
&= ~MDOC_IGN_MACRO
;
347 return(mdoc_alloc(&curp
->regs
, curp
, pflags
, mmsg
));
352 ffile(const char *file
, struct curparse
*curp
)
356 if (-1 == (curp
->fd
= open(curp
->file
, O_RDONLY
, 0))) {
364 if (-1 == close(curp
->fd
))
370 resize_buf(struct buf
*buf
, size_t initial
)
379 tmp
= realloc(buf
->buf
, sz
);
391 read_whole_file(struct curparse
*curp
, struct buf
*fb
, int *with_mmap
)
397 if (-1 == fstat(curp
->fd
, &st
)) {
404 * If we're a regular file, try just reading in the whole entry
405 * via mmap(). This is faster than reading it into blocks, and
406 * since each file is only a few bytes to begin with, I'm not
407 * concerned that this is going to tank any machines.
410 if (S_ISREG(st
.st_mode
)) {
411 if (st
.st_size
>= (1U << 31)) {
412 fprintf(stderr
, "%s: input too large\n",
418 fb
->sz
= (size_t)st
.st_size
;
419 fb
->buf
= mmap(NULL
, fb
->sz
, PROT_READ
,
420 MAP_FILE
|MAP_SHARED
, curp
->fd
, 0);
421 if (fb
->buf
!= MAP_FAILED
)
426 * If this isn't a regular file (like, say, stdin), then we must
427 * go the old way and just read things in bit by bit.
436 if (fb
->sz
== (1U << 31)) {
437 fprintf(stderr
, "%s: input too large\n",
441 if (! resize_buf(fb
, 65536))
444 ssz
= read(curp
->fd
, fb
->buf
+ (int)off
, fb
->sz
- off
);
464 fdesc(struct curparse
*curp
)
467 int i
, pos
, lnn
, lnn_start
, with_mmap
, of
;
478 memset(&ln
, 0, sizeof(struct buf
));
481 * Two buffers: ln and buf. buf is the input file and may be
482 * memory mapped. ln is a line buffer and grows on-demand.
485 if ( ! read_whole_file(curp
, &blk
, &with_mmap
))
488 if (NULL
== curp
->roff
)
489 curp
->roff
= roff_init(curp
);
490 if (NULL
== (roff
= curp
->roff
))
493 for (i
= 0, lnn
= 1; i
< (int)blk
.sz
;) {
496 while (i
< (int)blk
.sz
) {
497 if ('\n' == blk
.buf
[i
]) {
504 * Warn about bogus characters. If you're using
505 * non-ASCII encoding, you're screwing your
506 * readers. Since I'd rather this not happen,
507 * I'll be helpful and drop these characters so
508 * we don't display gibberish. Note to manual
509 * writers: use special characters.
512 c
= (unsigned char) blk
.buf
[i
];
513 if ( ! (isascii(c
) && (isgraph(c
) || isblank(c
)))) {
514 if ( ! mmsg(MANDOCERR_BADCHAR
, curp
,
522 /* Trailing backslash is like a plain character. */
523 if ('\\' != blk
.buf
[i
] || i
+ 1 == (int)blk
.sz
) {
524 if (pos
>= (int)ln
.sz
)
525 if (! resize_buf(&ln
, 256))
527 ln
.buf
[pos
++] = blk
.buf
[i
++];
530 /* Found an escape and at least one other character. */
531 if ('\n' == blk
.buf
[i
+ 1]) {
532 /* Escaped newlines are skipped over */
537 if ('"' == blk
.buf
[i
+ 1]) {
539 /* Comment, skip to end of line */
540 for (; i
< (int)blk
.sz
; ++i
) {
541 if ('\n' == blk
.buf
[i
]) {
547 /* Backout trailing whitespaces */
548 for (; pos
> 0; --pos
) {
549 if (ln
.buf
[pos
- 1] != ' ')
551 if (pos
> 2 && ln
.buf
[pos
- 2] == '\\')
556 /* Some other escape sequence, copy and continue. */
557 if (pos
+ 1 >= (int)ln
.sz
)
558 if (! resize_buf(&ln
, 256))
561 ln
.buf
[pos
++] = blk
.buf
[i
++];
562 ln
.buf
[pos
++] = blk
.buf
[i
++];
565 if (pos
>= (int)ln
.sz
)
566 if (! resize_buf(&ln
, 256))
571 * A significant amount of complexity is contained by
572 * the roff preprocessor. It's line-oriented but can be
573 * expressed on one line, so we need at times to
574 * readjust our starting point and re-run it. The roff
575 * preprocessor can also readjust the buffers with new
576 * data, so we pass them in wholesale.
581 re
= roff_parseln(roff
, lnn_start
,
582 &ln
.buf
, &ln
.sz
, of
, &of
);
583 } while (ROFF_RERUN
== re
);
587 else if (ROFF_ERR
== re
)
591 * If input parsers have not been allocated, do so now.
592 * We keep these instanced betwen parsers, but set them
593 * locally per parse routine since we can use different
594 * parsers with each one.
597 if ( ! (man
|| mdoc
))
598 if ( ! pset(ln
.buf
+ of
, pos
- of
, curp
, &man
, &mdoc
))
601 /* Lastly, push down into the parsers themselves. */
603 if (man
&& ! man_parseln(man
, lnn_start
, ln
.buf
, of
))
605 if (mdoc
&& ! mdoc_parseln(mdoc
, lnn_start
, ln
.buf
, of
))
609 /* NOTE a parser may not have been assigned, yet. */
611 if ( ! (man
|| mdoc
)) {
612 fprintf(stderr
, "%s: Not a manual\n", curp
->file
);
616 /* Clean up the parse routine ASTs. */
618 if (mdoc
&& ! mdoc_endparse(mdoc
))
620 if (man
&& ! man_endparse(man
))
622 if (roff
&& ! roff_endparse(roff
))
625 /* If unset, allocate output dev now (if applicable). */
627 if ( ! (curp
->outman
&& curp
->outmdoc
)) {
628 switch (curp
->outtype
) {
630 curp
->outdata
= xhtml_alloc(curp
->outopts
);
633 curp
->outdata
= html_alloc(curp
->outopts
);
636 curp
->outdata
= ascii_alloc(curp
->outopts
);
637 curp
->outfree
= ascii_free
;
640 curp
->outdata
= pdf_alloc(curp
->outopts
);
641 curp
->outfree
= pspdf_free
;
644 curp
->outdata
= ps_alloc(curp
->outopts
);
645 curp
->outfree
= pspdf_free
;
651 switch (curp
->outtype
) {
655 curp
->outman
= html_man
;
656 curp
->outmdoc
= html_mdoc
;
657 curp
->outfree
= html_free
;
660 curp
->outman
= tree_man
;
661 curp
->outmdoc
= tree_mdoc
;
668 curp
->outman
= terminal_man
;
669 curp
->outmdoc
= terminal_mdoc
;
676 /* Execute the out device, if it exists. */
678 if (man
&& curp
->outman
)
679 (*curp
->outman
)(curp
->outdata
, man
);
680 if (mdoc
&& curp
->outmdoc
)
681 (*curp
->outmdoc
)(curp
->outdata
, mdoc
);
684 memset(&curp
->regs
, 0, sizeof(struct regset
));
694 munmap(blk
.buf
, blk
.sz
);
707 pset(const char *buf
, int pos
, struct curparse
*curp
,
708 struct man
**man
, struct mdoc
**mdoc
)
713 * Try to intuit which kind of manual parser should be used. If
714 * passed in by command-line (-man, -mdoc), then use that
715 * explicitly. If passed as -mandoc, then try to guess from the
716 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
717 * default to -man, which is more lenient.
720 if ('.' == buf
[0] || '\'' == buf
[0]) {
721 for (i
= 1; buf
[i
]; i
++)
722 if (' ' != buf
[i
] && '\t' != buf
[i
])
728 switch (curp
->inttype
) {
730 if (NULL
== curp
->mdoc
)
731 curp
->mdoc
= mdoc_init(curp
);
732 if (NULL
== (*mdoc
= curp
->mdoc
))
736 if (NULL
== curp
->man
)
737 curp
->man
= man_init(curp
);
738 if (NULL
== (*man
= curp
->man
))
745 if (pos
>= 3 && 0 == memcmp(buf
, ".Dd", 3)) {
746 if (NULL
== curp
->mdoc
)
747 curp
->mdoc
= mdoc_init(curp
);
748 if (NULL
== (*mdoc
= curp
->mdoc
))
753 if (NULL
== curp
->man
)
754 curp
->man
= man_init(curp
);
755 if (NULL
== (*man
= curp
->man
))
762 moptions(enum intt
*tflags
, char *arg
)
765 if (0 == strcmp(arg
, "doc"))
767 else if (0 == strcmp(arg
, "andoc"))
769 else if (0 == strcmp(arg
, "an"))
772 fprintf(stderr
, "%s: Bad argument\n", arg
);
781 toptions(struct curparse
*curp
, char *arg
)
784 if (0 == strcmp(arg
, "ascii"))
785 curp
->outtype
= OUTT_ASCII
;
786 else if (0 == strcmp(arg
, "lint")) {
787 curp
->outtype
= OUTT_LINT
;
788 curp
->wflags
|= WARN_WALL
;
789 curp
->fflags
|= FL_STRICT
;
791 else if (0 == strcmp(arg
, "tree"))
792 curp
->outtype
= OUTT_TREE
;
793 else if (0 == strcmp(arg
, "html"))
794 curp
->outtype
= OUTT_HTML
;
795 else if (0 == strcmp(arg
, "xhtml"))
796 curp
->outtype
= OUTT_XHTML
;
797 else if (0 == strcmp(arg
, "ps"))
798 curp
->outtype
= OUTT_PS
;
799 else if (0 == strcmp(arg
, "pdf"))
800 curp
->outtype
= OUTT_PDF
;
802 fprintf(stderr
, "%s: Bad argument\n", arg
);
811 foptions(int *fflags
, char *arg
)
816 toks
[0] = "ign-scope";
817 toks
[1] = "no-ign-escape";
818 toks
[2] = "no-ign-macro";
819 toks
[3] = "ign-errors";
821 toks
[5] = "ign-escape";
826 switch (getsubopt(&arg
, UNCONST(toks
), &v
)) {
828 *fflags
|= FL_IGN_SCOPE
;
831 *fflags
|= FL_NIGN_ESCAPE
;
834 *fflags
|= FL_NIGN_MACRO
;
837 *fflags
|= FL_IGN_ERRORS
;
840 *fflags
|= FL_STRICT
;
843 *fflags
&= ~FL_NIGN_ESCAPE
;
846 fprintf(stderr
, "%s: Bad argument\n", o
);
856 woptions(int *wflags
, char *arg
)
867 switch (getsubopt(&arg
, UNCONST(toks
), &v
)) {
869 *wflags
|= WARN_WALL
;
872 *wflags
|= WARN_WERR
;
875 fprintf(stderr
, "%s: Bad argument\n", o
);
885 mmsg(enum mandocerr t
, void *arg
, int ln
, int col
, const char *msg
)
891 cp
= (struct curparse
*)arg
;
895 if (t
>= MANDOCERR_FATAL
) {
900 if ( ! (WARN_WALL
& cp
->wflags
))
902 if (t
>= MANDOCERR_ERROR
) {
906 if (WARN_WERR
& cp
->wflags
) {
912 fprintf(stderr
, "%s:%d:%d:", cp
->file
, ln
, col
+ 1);
914 fprintf(stderr
, " %s:", level
);
915 fprintf(stderr
, " %s", mandocerrs
[t
]);
917 fprintf(stderr
, ": %s", msg
);