]>
git.cameronkatri.com Git - mandoc.git/blob - main.c
1 /* $Id: main.c,v 1.139 2011/01/22 13:16:02 schwarze Exp $ */
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
44 #define REPARSE_LIMIT 1000
45 #define UNCONST(a) ((void *)(uintptr_t)(const void *)(a))
47 /* FIXME: Intel's compiler? LLVM? pcc? */
49 #if !defined(__GNUC__) || (__GNUC__ < 2)
51 # define __attribute__(x)
53 #endif /* !defined(__GNUC__) || (__GNUC__ < 2) */
55 typedef void (*out_mdoc
)(void *, const struct mdoc
*);
56 typedef void (*out_man
)(void *, const struct man
*);
57 typedef void (*out_free
)(void *);
81 const char *file
; /* Current parse. */
82 int fd
; /* Current parse. */
83 int line
; /* Line number in the file. */
84 enum mandoclevel wlevel
; /* Ignore messages below this. */
85 int wstop
; /* Stop after a file with a warning. */
86 enum intt inttype
; /* which parser to use */
87 struct man
*pman
; /* persistent man parser */
88 struct mdoc
*pmdoc
; /* persistent mdoc parser */
89 struct man
*man
; /* man parser */
90 struct mdoc
*mdoc
; /* mdoc parser */
91 struct roff
*roff
; /* roff parser (!NULL) */
92 struct regset regs
; /* roff registers */
93 int reparse_count
; /* finite interpolation stack */
94 enum outt outtype
; /* which output to use */
95 out_mdoc outmdoc
; /* mdoc output ptr */
96 out_man outman
; /* man output ptr */
97 out_free outfree
; /* free output ptr */
98 void *outdata
; /* data for output */
99 char outopts
[BUFSIZ
]; /* buf of output opts */
102 static const char * const mandoclevels
[MANDOCLEVEL_MAX
] = {
112 static const enum mandocerr mandoclimits
[MANDOCLEVEL_MAX
] = {
122 static const char * const mandocerrs
[MANDOCERR_MAX
] = {
127 /* related to the prologue */
128 "no title in document",
129 "document title should be all caps",
130 "unknown manual section",
131 "cannot parse date argument",
132 "prologue macros out of order",
133 "duplicate prologue macro",
134 "macro not allowed in prologue",
135 "macro not allowed in body",
137 /* related to document structure */
138 ".so is fragile, better use ln(1)",
139 "NAME section must come first",
140 "bad NAME section contents",
141 "manual name not yet set",
142 "sections out of conventional order",
143 "duplicate section name",
144 "section not in conventional manual section",
146 /* related to macros and nesting */
147 "skipping obsolete macro",
148 "skipping paragraph macro",
149 "blocks badly nested",
150 "child violates parent syntax",
151 "nested displays are not portable",
152 "already in literal mode",
154 /* related to missing macro arguments */
155 "skipping empty macro",
156 "argument count wrong",
157 "missing display type",
158 "list type must come first",
159 "tag lists require a width argument",
161 "skipping end of block that is not open",
163 /* related to bad macro arguments */
165 "duplicate argument",
166 "duplicate display type",
167 "duplicate list type",
168 "unknown AT&T UNIX version",
171 "unknown standard specifier",
172 "bad width argument",
174 /* related to plain text */
175 "blank line in non-literal context",
176 "tab in non-literal context",
177 "end of line whitespace",
179 "unknown escape sequence",
180 "unterminated quoted string",
184 /* related to tables */
188 "no table layout cells specified",
189 "no table data cells specified",
190 "ignore data in cell",
191 "data block still open",
192 "ignoring extra data cells",
194 "input stack limit exceeded, infinite loop?",
195 "skipping bad character",
196 "escaped character not allowed in a name",
197 "skipping text before the first section header",
198 "skipping unknown macro",
199 "NOT IMPLEMENTED, please use groff: skipping request",
201 "argument count wrong",
202 "skipping end of block that is not open",
203 "missing end of block",
204 "scope open on exit",
205 "uname(3) system call failed",
206 "macro requires line argument(s)",
207 "macro requires body argument(s)",
208 "macro requires argument(s)",
210 "line argument(s) will be lost",
211 "body argument(s) will be lost",
213 "generic fatal error",
215 "column syntax is inconsistent",
216 "NOT IMPLEMENTED: .Bd -file",
217 "line scope broken, syntax violated",
218 "argument count wrong, violates syntax",
219 "child violates parent syntax",
220 "argument count wrong, violates syntax",
221 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
223 "no document prologue",
224 "static buffer exhausted",
227 static void parsebuf(struct curparse
*, struct buf
, int);
228 static void pdesc(struct curparse
*);
229 static void fdesc(struct curparse
*);
230 static void ffile(const char *, struct curparse
*);
231 static int pfile(const char *, struct curparse
*);
232 static int moptions(enum intt
*, char *);
233 static int mmsg(enum mandocerr
, void *,
234 int, int, const char *);
235 static void pset(const char *, int, struct curparse
*);
236 static int toptions(struct curparse
*, char *);
237 static void usage(void) __attribute__((noreturn
));
238 static void version(void) __attribute__((noreturn
));
239 static int woptions(struct curparse
*, char *);
241 static const char *progname
;
242 static enum mandoclevel file_status
= MANDOCLEVEL_OK
;
243 static enum mandoclevel exit_status
= MANDOCLEVEL_OK
;
246 main(int argc
, char *argv
[])
249 struct curparse curp
;
251 progname
= strrchr(argv
[0], '/');
252 if (progname
== NULL
)
257 memset(&curp
, 0, sizeof(struct curparse
));
259 curp
.inttype
= INTT_AUTO
;
260 curp
.outtype
= OUTT_ASCII
;
261 curp
.wlevel
= MANDOCLEVEL_FATAL
;
264 while (-1 != (c
= getopt(argc
, argv
, "m:O:T:VW:")))
267 if ( ! moptions(&curp
.inttype
, optarg
))
268 return((int)MANDOCLEVEL_BADARG
);
271 (void)strlcat(curp
.outopts
, optarg
, BUFSIZ
);
272 (void)strlcat(curp
.outopts
, ",", BUFSIZ
);
275 if ( ! toptions(&curp
, optarg
))
276 return((int)MANDOCLEVEL_BADARG
);
279 if ( ! woptions(&curp
, optarg
))
280 return((int)MANDOCLEVEL_BADARG
);
294 curp
.file
= "<stdin>";
295 curp
.fd
= STDIN_FILENO
;
302 if (MANDOCLEVEL_OK
!= exit_status
&& curp
.wstop
)
308 (*curp
.outfree
)(curp
.outdata
);
310 mdoc_free(curp
.pmdoc
);
314 roff_free(curp
.roff
);
316 return((int)exit_status
);
324 (void)printf("%s %s\n", progname
, VERSION
);
325 exit((int)MANDOCLEVEL_OK
);
333 (void)fprintf(stderr
, "usage: %s "
343 exit((int)MANDOCLEVEL_BADARG
);
347 ffile(const char *file
, struct curparse
*curp
)
351 * Called once per input file. Get the file ready for reading,
352 * pass it through to the parser-driver, then close it out.
353 * XXX: don't do anything special as this is only called for
354 * files; stdin goes directly to fdesc().
359 if (-1 == (curp
->fd
= open(curp
->file
, O_RDONLY
, 0))) {
361 exit_status
= MANDOCLEVEL_SYSERR
;
367 if (-1 == close(curp
->fd
))
372 pfile(const char *file
, struct curparse
*curp
)
374 const char *savefile
;
377 if (-1 == (fd
= open(file
, O_RDONLY
, 0))) {
379 file_status
= MANDOCLEVEL_SYSERR
;
383 savefile
= curp
->file
;
391 curp
->file
= savefile
;
397 return(MANDOCLEVEL_FATAL
> file_status
? 1 : 0);
402 resize_buf(struct buf
*buf
, size_t initial
)
405 buf
->sz
= buf
->sz
> initial
/2 ? 2 * buf
->sz
: initial
;
406 buf
->buf
= realloc(buf
->buf
, buf
->sz
);
407 if (NULL
== buf
->buf
) {
409 exit((int)MANDOCLEVEL_SYSERR
);
415 read_whole_file(struct curparse
*curp
, struct buf
*fb
, int *with_mmap
)
421 if (-1 == fstat(curp
->fd
, &st
)) {
427 * If we're a regular file, try just reading in the whole entry
428 * via mmap(). This is faster than reading it into blocks, and
429 * since each file is only a few bytes to begin with, I'm not
430 * concerned that this is going to tank any machines.
433 if (S_ISREG(st
.st_mode
)) {
434 if (st
.st_size
>= (1U << 31)) {
435 fprintf(stderr
, "%s: input too large\n",
440 fb
->sz
= (size_t)st
.st_size
;
441 fb
->buf
= mmap(NULL
, fb
->sz
, PROT_READ
,
442 MAP_FILE
|MAP_SHARED
, curp
->fd
, 0);
443 if (fb
->buf
!= MAP_FAILED
)
448 * If this isn't a regular file (like, say, stdin), then we must
449 * go the old way and just read things in bit by bit.
458 if (fb
->sz
== (1U << 31)) {
459 fprintf(stderr
, "%s: input too large\n",
463 resize_buf(fb
, 65536);
465 ssz
= read(curp
->fd
, fb
->buf
+ (int)off
, fb
->sz
- off
);
484 fdesc(struct curparse
*curp
)
488 * Called once per file with an opened file descriptor. All
489 * pre-file-parse operations (whether stdin or a file) should go
492 * This calls down into the nested parser, which drills down and
493 * fully parses a file and all its dependences (i.e., `so'). It
494 * then runs the cleanup validators and pushes to output.
497 /* Zero the parse type. */
501 file_status
= MANDOCLEVEL_OK
;
503 /* Make sure the mandotory roff parser is initialised. */
505 if (NULL
== curp
->roff
) {
506 curp
->roff
= roff_alloc(&curp
->regs
, curp
, mmsg
);
510 /* Fully parse the file. */
514 if (MANDOCLEVEL_FATAL
<= file_status
)
517 /* NOTE a parser may not have been assigned, yet. */
519 if ( ! (curp
->man
|| curp
->mdoc
)) {
520 fprintf(stderr
, "%s: Not a manual\n", curp
->file
);
521 file_status
= MANDOCLEVEL_FATAL
;
525 /* Clean up the parse routine ASTs. */
527 if (curp
->mdoc
&& ! mdoc_endparse(curp
->mdoc
)) {
528 assert(MANDOCLEVEL_FATAL
<= file_status
);
532 if (curp
->man
&& ! man_endparse(curp
->man
)) {
533 assert(MANDOCLEVEL_FATAL
<= file_status
);
538 roff_endparse(curp
->roff
);
541 * With -Wstop and warnings or errors of at least
542 * the requested level, do not produce output.
545 if (MANDOCLEVEL_OK
!= file_status
&& curp
->wstop
)
548 /* If unset, allocate output dev now (if applicable). */
550 if ( ! (curp
->outman
&& curp
->outmdoc
)) {
551 switch (curp
->outtype
) {
553 curp
->outdata
= xhtml_alloc(curp
->outopts
);
556 curp
->outdata
= html_alloc(curp
->outopts
);
559 curp
->outdata
= ascii_alloc(curp
->outopts
);
560 curp
->outfree
= ascii_free
;
563 curp
->outdata
= pdf_alloc(curp
->outopts
);
564 curp
->outfree
= pspdf_free
;
567 curp
->outdata
= ps_alloc(curp
->outopts
);
568 curp
->outfree
= pspdf_free
;
574 switch (curp
->outtype
) {
578 curp
->outman
= html_man
;
579 curp
->outmdoc
= html_mdoc
;
580 curp
->outfree
= html_free
;
583 curp
->outman
= tree_man
;
584 curp
->outmdoc
= tree_mdoc
;
591 curp
->outman
= terminal_man
;
592 curp
->outmdoc
= terminal_mdoc
;
599 /* Execute the out device, if it exists. */
601 if (curp
->man
&& curp
->outman
)
602 (*curp
->outman
)(curp
->outdata
, curp
->man
);
603 if (curp
->mdoc
&& curp
->outmdoc
)
604 (*curp
->outmdoc
)(curp
->outdata
, curp
->mdoc
);
608 memset(&curp
->regs
, 0, sizeof(struct regset
));
610 /* Reset the current-parse compilers. */
613 mdoc_reset(curp
->mdoc
);
615 man_reset(curp
->man
);
618 roff_reset(curp
->roff
);
620 if (exit_status
< file_status
)
621 exit_status
= file_status
;
627 pdesc(struct curparse
*curp
)
633 * Run for each opened file; may be called more than once for
634 * each full parse sequence if the opened file is nested (i.e.,
635 * from `so'). Simply sucks in the whole file and moves into
636 * the parse phase for the file.
639 if ( ! read_whole_file(curp
, &blk
, &with_mmap
)) {
640 file_status
= MANDOCLEVEL_SYSERR
;
644 /* Line number is per-file. */
648 parsebuf(curp
, blk
, 1);
651 munmap(blk
.buf
, blk
.sz
);
657 parsebuf(struct curparse
*curp
, struct buf blk
, int start
)
662 int pos
; /* byte number in the ln buffer */
663 int lnn
; /* line number in the real file */
667 * Main parse routine for an opened file. This is called for
668 * each opened file and simply loops around the full input file,
669 * possibly nesting (i.e., with `so').
672 memset(&ln
, 0, sizeof(struct buf
));
677 for (i
= 0; i
< (int)blk
.sz
; ) {
678 if (0 == pos
&& '\0' == blk
.buf
[i
])
683 curp
->reparse_count
= 0;
686 while (i
< (int)blk
.sz
&& (start
|| '\0' != blk
.buf
[i
])) {
687 if ('\n' == blk
.buf
[i
]) {
694 * Warn about bogus characters. If you're using
695 * non-ASCII encoding, you're screwing your
696 * readers. Since I'd rather this not happen,
697 * I'll be helpful and drop these characters so
698 * we don't display gibberish. Note to manual
699 * writers: use special characters.
702 c
= (unsigned char) blk
.buf
[i
];
704 if ( ! (isascii(c
) &&
705 (isgraph(c
) || isblank(c
)))) {
706 mmsg(MANDOCERR_BADCHAR
, curp
,
707 curp
->line
, pos
, "ignoring byte");
712 /* Trailing backslash = a plain char. */
714 if ('\\' != blk
.buf
[i
] || i
+ 1 == (int)blk
.sz
) {
715 if (pos
>= (int)ln
.sz
)
716 resize_buf(&ln
, 256);
717 ln
.buf
[pos
++] = blk
.buf
[i
++];
721 /* Found escape & at least one other char. */
723 if ('\n' == blk
.buf
[i
+ 1]) {
725 /* Escaped newlines are skipped over */
730 if ('"' == blk
.buf
[i
+ 1]) {
732 /* Comment, skip to end of line */
733 for (; i
< (int)blk
.sz
; ++i
) {
734 if ('\n' == blk
.buf
[i
]) {
741 /* Backout trailing whitespaces */
742 for (; pos
> 0; --pos
) {
743 if (ln
.buf
[pos
- 1] != ' ')
745 if (pos
> 2 && ln
.buf
[pos
- 2] == '\\')
751 /* Some other escape sequence, copy & cont. */
753 if (pos
+ 1 >= (int)ln
.sz
)
754 resize_buf(&ln
, 256);
756 ln
.buf
[pos
++] = blk
.buf
[i
++];
757 ln
.buf
[pos
++] = blk
.buf
[i
++];
760 if (pos
>= (int)ln
.sz
)
761 resize_buf(&ln
, 256);
766 * A significant amount of complexity is contained by
767 * the roff preprocessor. It's line-oriented but can be
768 * expressed on one line, so we need at times to
769 * readjust our starting point and re-run it. The roff
770 * preprocessor can also readjust the buffers with new
771 * data, so we pass them in wholesale.
778 (curp
->roff
, curp
->line
,
779 &ln
.buf
, &ln
.sz
, of
, &of
);
783 if (REPARSE_LIMIT
>= ++curp
->reparse_count
)
784 parsebuf(curp
, ln
, 0);
786 mmsg(MANDOCERR_ROFFLOOP
, curp
,
787 curp
->line
, pos
, NULL
);
791 pos
= strlen(ln
.buf
);
799 assert(MANDOCLEVEL_FATAL
<= file_status
);
802 if (pfile(ln
.buf
+ of
, curp
)) {
812 * If we encounter errors in the recursive parsebuf()
813 * call, make sure we don't continue parsing.
816 if (MANDOCLEVEL_FATAL
<= file_status
)
820 * If input parsers have not been allocated, do so now.
821 * We keep these instanced betwen parsers, but set them
822 * locally per parse routine since we can use different
823 * parsers with each one.
826 if ( ! (curp
->man
|| curp
->mdoc
))
827 pset(ln
.buf
+ of
, pos
- of
, curp
);
830 * Lastly, push down into the parsers themselves. One
831 * of these will have already been set in the pset()
833 * If libroff returns ROFF_TBL, then add it to the
834 * currently open parse. Since we only get here if
835 * there does exist data (see tbl_data.c), we're
836 * guaranteed that something's been allocated.
839 if (ROFF_TBL
== rr
) {
840 assert(curp
->man
|| curp
->mdoc
);
842 man_addspan(curp
->man
, roff_span(curp
->roff
));
844 mdoc_addspan(curp
->mdoc
, roff_span(curp
->roff
));
846 } else if (curp
->man
|| curp
->mdoc
) {
848 man_parseln(curp
->man
,
849 curp
->line
, ln
.buf
, of
) :
850 mdoc_parseln(curp
->mdoc
,
851 curp
->line
, ln
.buf
, of
);
854 assert(MANDOCLEVEL_FATAL
<= file_status
);
859 /* Temporary buffers typically are not full. */
861 if (0 == start
&& '\0' == blk
.buf
[i
])
864 /* Start the next input line. */
873 pset(const char *buf
, int pos
, struct curparse
*curp
)
878 * Try to intuit which kind of manual parser should be used. If
879 * passed in by command-line (-man, -mdoc), then use that
880 * explicitly. If passed as -mandoc, then try to guess from the
881 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
882 * default to -man, which is more lenient.
884 * Separate out pmdoc/pman from mdoc/man: the first persists
885 * through all parsers, while the latter is used per-parse.
888 if ('.' == buf
[0] || '\'' == buf
[0]) {
889 for (i
= 1; buf
[i
]; i
++)
890 if (' ' != buf
[i
] && '\t' != buf
[i
])
896 switch (curp
->inttype
) {
898 if (NULL
== curp
->pmdoc
)
899 curp
->pmdoc
= mdoc_alloc
900 (&curp
->regs
, curp
, mmsg
);
902 curp
->mdoc
= curp
->pmdoc
;
905 if (NULL
== curp
->pman
)
906 curp
->pman
= man_alloc
907 (&curp
->regs
, curp
, mmsg
);
909 curp
->man
= curp
->pman
;
915 if (pos
>= 3 && 0 == memcmp(buf
, ".Dd", 3)) {
916 if (NULL
== curp
->pmdoc
)
917 curp
->pmdoc
= mdoc_alloc
918 (&curp
->regs
, curp
, mmsg
);
920 curp
->mdoc
= curp
->pmdoc
;
924 if (NULL
== curp
->pman
)
925 curp
->pman
= man_alloc(&curp
->regs
, curp
, mmsg
);
927 curp
->man
= curp
->pman
;
931 moptions(enum intt
*tflags
, char *arg
)
934 if (0 == strcmp(arg
, "doc"))
936 else if (0 == strcmp(arg
, "andoc"))
938 else if (0 == strcmp(arg
, "an"))
941 fprintf(stderr
, "%s: Bad argument\n", arg
);
949 toptions(struct curparse
*curp
, char *arg
)
952 if (0 == strcmp(arg
, "ascii"))
953 curp
->outtype
= OUTT_ASCII
;
954 else if (0 == strcmp(arg
, "lint")) {
955 curp
->outtype
= OUTT_LINT
;
956 curp
->wlevel
= MANDOCLEVEL_WARNING
;
958 else if (0 == strcmp(arg
, "tree"))
959 curp
->outtype
= OUTT_TREE
;
960 else if (0 == strcmp(arg
, "html"))
961 curp
->outtype
= OUTT_HTML
;
962 else if (0 == strcmp(arg
, "xhtml"))
963 curp
->outtype
= OUTT_XHTML
;
964 else if (0 == strcmp(arg
, "ps"))
965 curp
->outtype
= OUTT_PS
;
966 else if (0 == strcmp(arg
, "pdf"))
967 curp
->outtype
= OUTT_PDF
;
969 fprintf(stderr
, "%s: Bad argument\n", arg
);
977 woptions(struct curparse
*curp
, char *arg
)
991 switch (getsubopt(&arg
, UNCONST(toks
), &v
)) {
998 curp
->wlevel
= MANDOCLEVEL_WARNING
;
1001 curp
->wlevel
= MANDOCLEVEL_ERROR
;
1004 curp
->wlevel
= MANDOCLEVEL_FATAL
;
1007 fprintf(stderr
, "-W%s: Bad argument\n", o
);
1016 mmsg(enum mandocerr t
, void *arg
, int ln
, int col
, const char *msg
)
1018 struct curparse
*cp
;
1019 enum mandoclevel level
;
1021 level
= MANDOCLEVEL_FATAL
;
1022 while (t
< mandoclimits
[level
])
1026 cp
= (struct curparse
*)arg
;
1027 if (level
< cp
->wlevel
)
1030 fprintf(stderr
, "%s:%d:%d: %s: %s",
1031 cp
->file
, ln
, col
+ 1, mandoclevels
[level
], mandocerrs
[t
]);
1033 fprintf(stderr
, ": %s", msg
);
1034 fputc('\n', stderr
);
1036 if (file_status
< level
)
1037 file_status
= level
;
1039 return(level
< MANDOCLEVEL_FATAL
);