]>
git.cameronkatri.com Git - mandoc.git/blob - main.c
1 /* $Id: main.c,v 1.122 2010/12/10 20:58:56 schwarze Exp $ */
3 * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
44 #define REPARSE_LIMIT 1000
45 #define UNCONST(a) ((void *)(uintptr_t)(const void *)(a))
47 /* FIXME: Intel's compiler? LLVM? pcc? */
49 #if !defined(__GNUC__) || (__GNUC__ < 2)
51 # define __attribute__(x)
53 #endif /* !defined(__GNUC__) || (__GNUC__ < 2) */
55 typedef void (*out_mdoc
)(void *, const struct mdoc
*);
56 typedef void (*out_man
)(void *, const struct man
*);
57 typedef void (*out_free
)(void *);
81 const char *file
; /* Current parse. */
82 int fd
; /* Current parse. */
83 int line
; /* Line number in the file. */
84 enum mandoclevel wlevel
; /* Ignore messages below this. */
85 int wstop
; /* Stop after a file with a warning. */
86 enum intt inttype
; /* which parser to use */
87 struct man
*pman
; /* persistent man parser */
88 struct mdoc
*pmdoc
; /* persistent mdoc parser */
89 struct man
*man
; /* man parser */
90 struct mdoc
*mdoc
; /* mdoc parser */
91 struct roff
*roff
; /* roff parser (!NULL) */
92 struct regset regs
; /* roff registers */
93 int reparse_count
; /* finite interpolation stack */
94 enum outt outtype
; /* which output to use */
95 out_mdoc outmdoc
; /* mdoc output ptr */
96 out_man outman
; /* man output ptr */
97 out_free outfree
; /* free output ptr */
98 void *outdata
; /* data for output */
99 char outopts
[BUFSIZ
]; /* buf of output opts */
102 static const char * const mandoclevels
[MANDOCLEVEL_MAX
] = {
112 static const enum mandocerr mandoclimits
[MANDOCLEVEL_MAX
] = {
122 static const char * const mandocerrs
[MANDOCERR_MAX
] = {
127 /* related to the prologue */
128 "no title in document",
129 "document title should be all caps",
130 "unknown manual section",
131 "cannot parse date argument",
132 "prologue macros out of order",
133 "duplicate prologue macro",
134 "macro not allowed in prologue",
135 "macro not allowed in body",
137 /* related to document structure */
138 ".so is fragile, better use ln(1)",
139 "NAME section must come first",
140 "bad NAME section contents",
141 "manual name not yet set",
142 "sections out of conventional order",
143 "duplicate section name",
144 "section not in conventional manual section",
146 /* related to macros and nesting */
147 "skipping obsolete macro",
148 "skipping paragraph macro",
149 "blocks badly nested",
150 "child violates parent syntax",
151 "nested displays are not portable",
152 "already in literal mode",
154 /* related to missing macro arguments */
155 "skipping empty macro",
156 "missing display type",
157 "list type must come first",
158 "tag lists require a width argument",
161 /* related to bad macro arguments */
163 "duplicate argument",
164 "duplicate display type",
165 "duplicate list type",
166 "unknown AT&T UNIX version",
169 "unknown standard specifier",
170 "bad width argument",
172 /* related to plain text */
173 "blank line in non-literal context",
174 "tab in non-literal context",
175 "end of line whitespace",
177 "unknown escape sequence",
178 "unterminated quoted string",
182 "input stack limit exceeded, infinite loop?",
183 "skipping bad character",
184 "skipping text before the first section header",
185 "skipping unknown macro",
186 "NOT IMPLEMENTED: skipping request",
188 "argument count wrong",
189 "skipping end of block that is not open",
190 "missing end of block",
191 "scope open on exit",
192 "uname(3) system call failed",
193 "macro requires line argument(s)",
194 "macro requires body argument(s)",
195 "macro requires argument(s)",
197 "line argument(s) will be lost",
198 "body argument(s) will be lost",
200 "generic fatal error",
202 "column syntax is inconsistent",
203 "NOT IMPLEMENTED: .Bd -file",
204 "line scope broken, syntax violated",
205 "argument count wrong, violates syntax",
206 "child violates parent syntax",
207 "argument count wrong, violates syntax",
208 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
210 "no document prologue",
211 "static buffer exhausted",
214 static void parsebuf(struct curparse
*, struct buf
, int);
215 static void pdesc(struct curparse
*);
216 static void fdesc(struct curparse
*);
217 static void ffile(const char *, struct curparse
*);
218 static int pfile(const char *, struct curparse
*);
219 static int moptions(enum intt
*, char *);
220 static int mmsg(enum mandocerr
, void *,
221 int, int, const char *);
222 static void pset(const char *, int, struct curparse
*);
223 static int toptions(struct curparse
*, char *);
224 static void usage(void) __attribute__((noreturn
));
225 static void version(void) __attribute__((noreturn
));
226 static int woptions(struct curparse
*, char *);
228 static const char *progname
;
229 static enum mandoclevel file_status
= MANDOCLEVEL_OK
;
230 static enum mandoclevel exit_status
= MANDOCLEVEL_OK
;
233 main(int argc
, char *argv
[])
236 struct curparse curp
;
238 progname
= strrchr(argv
[0], '/');
239 if (progname
== NULL
)
244 memset(&curp
, 0, sizeof(struct curparse
));
246 curp
.inttype
= INTT_AUTO
;
247 curp
.outtype
= OUTT_ASCII
;
248 curp
.wlevel
= MANDOCLEVEL_FATAL
;
251 while (-1 != (c
= getopt(argc
, argv
, "m:O:T:VW:")))
254 if ( ! moptions(&curp
.inttype
, optarg
))
255 return((int)MANDOCLEVEL_BADARG
);
258 (void)strlcat(curp
.outopts
, optarg
, BUFSIZ
);
259 (void)strlcat(curp
.outopts
, ",", BUFSIZ
);
262 if ( ! toptions(&curp
, optarg
))
263 return((int)MANDOCLEVEL_BADARG
);
266 if ( ! woptions(&curp
, optarg
))
267 return((int)MANDOCLEVEL_BADARG
);
281 curp
.file
= "<stdin>";
282 curp
.fd
= STDIN_FILENO
;
289 if (MANDOCLEVEL_OK
!= exit_status
&& curp
.wstop
)
295 (*curp
.outfree
)(curp
.outdata
);
297 mdoc_free(curp
.pmdoc
);
301 roff_free(curp
.roff
);
303 return((int)exit_status
);
311 (void)printf("%s %s\n", progname
, VERSION
);
312 exit((int)MANDOCLEVEL_OK
);
320 (void)fprintf(stderr
, "usage: %s "
330 exit((int)MANDOCLEVEL_BADARG
);
334 ffile(const char *file
, struct curparse
*curp
)
338 * Called once per input file. Get the file ready for reading,
339 * pass it through to the parser-driver, then close it out.
340 * XXX: don't do anything special as this is only called for
341 * files; stdin goes directly to fdesc().
346 if (-1 == (curp
->fd
= open(curp
->file
, O_RDONLY
, 0))) {
348 exit_status
= MANDOCLEVEL_SYSERR
;
354 if (-1 == close(curp
->fd
))
359 pfile(const char *file
, struct curparse
*curp
)
361 const char *savefile
;
364 if (-1 == (fd
= open(file
, O_RDONLY
, 0))) {
366 file_status
= MANDOCLEVEL_SYSERR
;
370 savefile
= curp
->file
;
378 curp
->file
= savefile
;
384 return(MANDOCLEVEL_FATAL
> file_status
? 1 : 0);
389 resize_buf(struct buf
*buf
, size_t initial
)
392 buf
->sz
= buf
->sz
? 2 * buf
->sz
: initial
;
393 buf
->buf
= realloc(buf
->buf
, buf
->sz
);
394 if (NULL
== buf
->buf
) {
396 exit((int)MANDOCLEVEL_SYSERR
);
402 read_whole_file(struct curparse
*curp
, struct buf
*fb
, int *with_mmap
)
408 if (-1 == fstat(curp
->fd
, &st
)) {
414 * If we're a regular file, try just reading in the whole entry
415 * via mmap(). This is faster than reading it into blocks, and
416 * since each file is only a few bytes to begin with, I'm not
417 * concerned that this is going to tank any machines.
420 if (S_ISREG(st
.st_mode
)) {
421 if (st
.st_size
>= (1U << 31)) {
422 fprintf(stderr
, "%s: input too large\n",
427 fb
->sz
= (size_t)st
.st_size
;
428 fb
->buf
= mmap(NULL
, fb
->sz
, PROT_READ
,
429 MAP_FILE
|MAP_SHARED
, curp
->fd
, 0);
430 if (fb
->buf
!= MAP_FAILED
)
435 * If this isn't a regular file (like, say, stdin), then we must
436 * go the old way and just read things in bit by bit.
445 if (fb
->sz
== (1U << 31)) {
446 fprintf(stderr
, "%s: input too large\n",
450 resize_buf(fb
, 65536);
452 ssz
= read(curp
->fd
, fb
->buf
+ (int)off
, fb
->sz
- off
);
471 fdesc(struct curparse
*curp
)
475 * Called once per file with an opened file descriptor. All
476 * pre-file-parse operations (whether stdin or a file) should go
479 * This calls down into the nested parser, which drills down and
480 * fully parses a file and all its dependences (i.e., `so'). It
481 * then runs the cleanup validators and pushes to output.
484 /* Zero the parse type. */
488 file_status
= MANDOCLEVEL_OK
;
490 /* Make sure the mandotory roff parser is initialised. */
492 if (NULL
== curp
->roff
) {
493 curp
->roff
= roff_alloc(&curp
->regs
, curp
, mmsg
);
497 /* Fully parse the file. */
501 if (MANDOCLEVEL_FATAL
<= file_status
)
504 /* NOTE a parser may not have been assigned, yet. */
506 if ( ! (curp
->man
|| curp
->mdoc
)) {
507 fprintf(stderr
, "%s: Not a manual\n", curp
->file
);
508 file_status
= MANDOCLEVEL_FATAL
;
512 /* Clean up the parse routine ASTs. */
514 if (curp
->mdoc
&& ! mdoc_endparse(curp
->mdoc
)) {
515 assert(MANDOCLEVEL_FATAL
<= file_status
);
519 if (curp
->man
&& ! man_endparse(curp
->man
)) {
520 assert(MANDOCLEVEL_FATAL
<= file_status
);
525 if ( ! roff_endparse(curp
->roff
)) {
526 assert(MANDOCLEVEL_FATAL
<= file_status
);
531 * With -Wstop and warnings or errors of at least
532 * the requested level, do not produce output.
535 if (MANDOCLEVEL_OK
!= file_status
&& curp
->wstop
)
538 /* If unset, allocate output dev now (if applicable). */
540 if ( ! (curp
->outman
&& curp
->outmdoc
)) {
541 switch (curp
->outtype
) {
543 curp
->outdata
= xhtml_alloc(curp
->outopts
);
546 curp
->outdata
= html_alloc(curp
->outopts
);
549 curp
->outdata
= ascii_alloc(curp
->outopts
);
550 curp
->outfree
= ascii_free
;
553 curp
->outdata
= pdf_alloc(curp
->outopts
);
554 curp
->outfree
= pspdf_free
;
557 curp
->outdata
= ps_alloc(curp
->outopts
);
558 curp
->outfree
= pspdf_free
;
564 switch (curp
->outtype
) {
568 curp
->outman
= html_man
;
569 curp
->outmdoc
= html_mdoc
;
570 curp
->outfree
= html_free
;
573 curp
->outman
= tree_man
;
574 curp
->outmdoc
= tree_mdoc
;
581 curp
->outman
= terminal_man
;
582 curp
->outmdoc
= terminal_mdoc
;
589 /* Execute the out device, if it exists. */
591 if (curp
->man
&& curp
->outman
)
592 (*curp
->outman
)(curp
->outdata
, curp
->man
);
593 if (curp
->mdoc
&& curp
->outmdoc
)
594 (*curp
->outmdoc
)(curp
->outdata
, curp
->mdoc
);
598 memset(&curp
->regs
, 0, sizeof(struct regset
));
600 /* Reset the current-parse compilers. */
603 mdoc_reset(curp
->mdoc
);
605 man_reset(curp
->man
);
608 roff_reset(curp
->roff
);
610 if (exit_status
< file_status
)
611 exit_status
= file_status
;
617 pdesc(struct curparse
*curp
)
623 * Run for each opened file; may be called more than once for
624 * each full parse sequence if the opened file is nested (i.e.,
625 * from `so'). Simply sucks in the whole file and moves into
626 * the parse phase for the file.
629 if ( ! read_whole_file(curp
, &blk
, &with_mmap
)) {
630 file_status
= MANDOCLEVEL_SYSERR
;
634 /* Line number is per-file. */
638 parsebuf(curp
, blk
, 1);
641 munmap(blk
.buf
, blk
.sz
);
647 parsebuf(struct curparse
*curp
, struct buf blk
, int start
)
652 int pos
; /* byte number in the ln buffer */
653 int lnn
; /* line number in the real file */
657 * Main parse routine for an opened file. This is called for
658 * each opened file and simply loops around the full input file,
659 * possibly nesting (i.e., with `so').
662 memset(&ln
, 0, sizeof(struct buf
));
667 for (i
= 0; i
< (int)blk
.sz
; ) {
668 if (0 == pos
&& '\0' == blk
.buf
[i
])
673 curp
->reparse_count
= 0;
676 while (i
< (int)blk
.sz
&& (start
|| '\0' != blk
.buf
[i
])) {
677 if ('\n' == blk
.buf
[i
]) {
684 * Warn about bogus characters. If you're using
685 * non-ASCII encoding, you're screwing your
686 * readers. Since I'd rather this not happen,
687 * I'll be helpful and drop these characters so
688 * we don't display gibberish. Note to manual
689 * writers: use special characters.
692 c
= (unsigned char) blk
.buf
[i
];
694 if ( ! (isascii(c
) &&
695 (isgraph(c
) || isblank(c
)))) {
696 mmsg(MANDOCERR_BADCHAR
, curp
,
697 curp
->line
, pos
, "ignoring byte");
702 /* Trailing backslash = a plain char. */
704 if ('\\' != blk
.buf
[i
] || i
+ 1 == (int)blk
.sz
) {
705 if (pos
>= (int)ln
.sz
)
706 resize_buf(&ln
, 256);
707 ln
.buf
[pos
++] = blk
.buf
[i
++];
711 /* Found escape & at least one other char. */
713 if ('\n' == blk
.buf
[i
+ 1]) {
715 /* Escaped newlines are skipped over */
720 if ('"' == blk
.buf
[i
+ 1]) {
722 /* Comment, skip to end of line */
723 for (; i
< (int)blk
.sz
; ++i
) {
724 if ('\n' == blk
.buf
[i
]) {
731 /* Backout trailing whitespaces */
732 for (; pos
> 0; --pos
) {
733 if (ln
.buf
[pos
- 1] != ' ')
735 if (pos
> 2 && ln
.buf
[pos
- 2] == '\\')
741 /* Some other escape sequence, copy & cont. */
743 if (pos
+ 1 >= (int)ln
.sz
)
744 resize_buf(&ln
, 256);
746 ln
.buf
[pos
++] = blk
.buf
[i
++];
747 ln
.buf
[pos
++] = blk
.buf
[i
++];
750 if (pos
>= (int)ln
.sz
)
751 resize_buf(&ln
, 256);
756 * A significant amount of complexity is contained by
757 * the roff preprocessor. It's line-oriented but can be
758 * expressed on one line, so we need at times to
759 * readjust our starting point and re-run it. The roff
760 * preprocessor can also readjust the buffers with new
761 * data, so we pass them in wholesale.
768 (curp
->roff
, curp
->line
,
769 &ln
.buf
, &ln
.sz
, of
, &of
);
773 if (REPARSE_LIMIT
>= ++curp
->reparse_count
)
774 parsebuf(curp
, ln
, 0);
776 mmsg(MANDOCERR_ROFFLOOP
, curp
,
777 curp
->line
, pos
, NULL
);
781 pos
= strlen(ln
.buf
);
789 assert(MANDOCLEVEL_FATAL
<= file_status
);
792 if (pfile(ln
.buf
+ of
, curp
)) {
802 * If input parsers have not been allocated, do so now.
803 * We keep these instanced betwen parsers, but set them
804 * locally per parse routine since we can use different
805 * parsers with each one.
808 if ( ! (curp
->man
|| curp
->mdoc
))
809 pset(ln
.buf
+ of
, pos
- of
, curp
);
812 * Lastly, push down into the parsers themselves. One
813 * of these will have already been set in the pset()
817 if (curp
->man
|| curp
->mdoc
) {
819 man_parseln(curp
->man
,
820 curp
->line
, ln
.buf
, of
) :
821 mdoc_parseln(curp
->mdoc
,
822 curp
->line
, ln
.buf
, of
);
825 assert(MANDOCLEVEL_FATAL
<= file_status
);
830 /* Temporary buffers typically are not full. */
832 if (0 == start
&& '\0' == blk
.buf
[i
])
835 /* Start the next input line. */
844 pset(const char *buf
, int pos
, struct curparse
*curp
)
849 * Try to intuit which kind of manual parser should be used. If
850 * passed in by command-line (-man, -mdoc), then use that
851 * explicitly. If passed as -mandoc, then try to guess from the
852 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
853 * default to -man, which is more lenient.
855 * Separate out pmdoc/pman from mdoc/man: the first persists
856 * through all parsers, while the latter is used per-parse.
859 if ('.' == buf
[0] || '\'' == buf
[0]) {
860 for (i
= 1; buf
[i
]; i
++)
861 if (' ' != buf
[i
] && '\t' != buf
[i
])
867 switch (curp
->inttype
) {
869 if (NULL
== curp
->pmdoc
)
870 curp
->pmdoc
= mdoc_alloc
871 (&curp
->regs
, curp
, mmsg
);
873 curp
->mdoc
= curp
->pmdoc
;
876 if (NULL
== curp
->pman
)
877 curp
->pman
= man_alloc
878 (&curp
->regs
, curp
, mmsg
);
880 curp
->man
= curp
->pman
;
886 if (pos
>= 3 && 0 == memcmp(buf
, ".Dd", 3)) {
887 if (NULL
== curp
->pmdoc
)
888 curp
->pmdoc
= mdoc_alloc
889 (&curp
->regs
, curp
, mmsg
);
891 curp
->mdoc
= curp
->pmdoc
;
895 if (NULL
== curp
->pman
)
896 curp
->pman
= man_alloc(&curp
->regs
, curp
, mmsg
);
898 curp
->man
= curp
->pman
;
902 moptions(enum intt
*tflags
, char *arg
)
905 if (0 == strcmp(arg
, "doc"))
907 else if (0 == strcmp(arg
, "andoc"))
909 else if (0 == strcmp(arg
, "an"))
912 fprintf(stderr
, "%s: Bad argument\n", arg
);
920 toptions(struct curparse
*curp
, char *arg
)
923 if (0 == strcmp(arg
, "ascii"))
924 curp
->outtype
= OUTT_ASCII
;
925 else if (0 == strcmp(arg
, "lint")) {
926 curp
->outtype
= OUTT_LINT
;
927 curp
->wlevel
= MANDOCLEVEL_WARNING
;
929 else if (0 == strcmp(arg
, "tree"))
930 curp
->outtype
= OUTT_TREE
;
931 else if (0 == strcmp(arg
, "html"))
932 curp
->outtype
= OUTT_HTML
;
933 else if (0 == strcmp(arg
, "xhtml"))
934 curp
->outtype
= OUTT_XHTML
;
935 else if (0 == strcmp(arg
, "ps"))
936 curp
->outtype
= OUTT_PS
;
937 else if (0 == strcmp(arg
, "pdf"))
938 curp
->outtype
= OUTT_PDF
;
940 fprintf(stderr
, "%s: Bad argument\n", arg
);
948 woptions(struct curparse
*curp
, char *arg
)
962 switch (getsubopt(&arg
, UNCONST(toks
), &v
)) {
969 curp
->wlevel
= MANDOCLEVEL_WARNING
;
972 curp
->wlevel
= MANDOCLEVEL_ERROR
;
975 curp
->wlevel
= MANDOCLEVEL_FATAL
;
978 fprintf(stderr
, "-W%s: Bad argument\n", o
);
987 mmsg(enum mandocerr t
, void *arg
, int ln
, int col
, const char *msg
)
990 enum mandoclevel level
;
992 level
= MANDOCLEVEL_FATAL
;
993 while (t
< mandoclimits
[level
])
997 cp
= (struct curparse
*)arg
;
998 if (level
< cp
->wlevel
)
1001 fprintf(stderr
, "%s:%d:%d: %s: %s",
1002 cp
->file
, ln
, col
+ 1, mandoclevels
[level
], mandocerrs
[t
]);
1004 fprintf(stderr
, ": %s", msg
);
1005 fputc('\n', stderr
);
1007 if (file_status
< level
)
1008 file_status
= level
;
1010 return(level
< MANDOCLEVEL_FATAL
);