]>
git.cameronkatri.com Git - mandoc.git/blob - main.c
1 /* $Id: main.c,v 1.147 2011/03/07 01:35:51 schwarze Exp $ */
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
44 #define REPARSE_LIMIT 1000
45 #define UNCONST(a) ((void *)(uintptr_t)(const void *)(a))
47 /* FIXME: Intel's compiler? LLVM? pcc? */
49 #if !defined(__GNUC__) || (__GNUC__ < 2)
51 # define __attribute__(x)
53 #endif /* !defined(__GNUC__) || (__GNUC__ < 2) */
55 typedef void (*out_mdoc
)(void *, const struct mdoc
*);
56 typedef void (*out_man
)(void *, const struct man
*);
57 typedef void (*out_free
)(void *);
81 const char *file
; /* Current parse. */
82 int fd
; /* Current parse. */
83 int line
; /* Line number in the file. */
84 enum mandoclevel wlevel
; /* Ignore messages below this. */
85 int wstop
; /* Stop after a file with a warning. */
86 enum intt inttype
; /* which parser to use */
87 struct man
*pman
; /* persistent man parser */
88 struct mdoc
*pmdoc
; /* persistent mdoc parser */
89 struct man
*man
; /* man parser */
90 struct mdoc
*mdoc
; /* mdoc parser */
91 struct roff
*roff
; /* roff parser (!NULL) */
92 struct regset regs
; /* roff registers */
93 int reparse_count
; /* finite interpolation stack */
94 enum outt outtype
; /* which output to use */
95 out_mdoc outmdoc
; /* mdoc output ptr */
96 out_man outman
; /* man output ptr */
97 out_free outfree
; /* free output ptr */
98 void *outdata
; /* data for output */
99 char outopts
[BUFSIZ
]; /* buf of output opts */
102 static const char * const mandoclevels
[MANDOCLEVEL_MAX
] = {
112 static const enum mandocerr mandoclimits
[MANDOCLEVEL_MAX
] = {
122 static const char * const mandocerrs
[MANDOCERR_MAX
] = {
127 /* related to the prologue */
128 "no title in document",
129 "document title should be all caps",
130 "unknown manual section",
131 "date missing, using today's date",
132 "cannot parse date, using it verbatim",
133 "prologue macros out of order",
134 "duplicate prologue macro",
135 "macro not allowed in prologue",
136 "macro not allowed in body",
138 /* related to document structure */
139 ".so is fragile, better use ln(1)",
140 "NAME section must come first",
141 "bad NAME section contents",
142 "manual name not yet set",
143 "sections out of conventional order",
144 "duplicate section name",
145 "section not in conventional manual section",
147 /* related to macros and nesting */
148 "skipping obsolete macro",
149 "skipping paragraph macro",
150 "skipping no-space macro",
151 "blocks badly nested",
152 "child violates parent syntax",
153 "nested displays are not portable",
154 "already in literal mode",
156 /* related to missing macro arguments */
157 "skipping empty macro",
158 "argument count wrong",
159 "missing display type",
160 "list type must come first",
161 "tag lists require a width argument",
163 "skipping end of block that is not open",
165 /* related to bad macro arguments */
167 "duplicate argument",
168 "duplicate display type",
169 "duplicate list type",
170 "unknown AT&T UNIX version",
173 "unknown standard specifier",
174 "bad width argument",
176 /* related to plain text */
177 "blank line in non-literal context",
178 "tab in non-literal context",
179 "end of line whitespace",
181 "unknown escape sequence",
182 "unterminated quoted string",
186 /* related to tables */
190 "no table layout cells specified",
191 "no table data cells specified",
192 "ignore data in cell",
193 "data block still open",
194 "ignoring extra data cells",
196 "input stack limit exceeded, infinite loop?",
197 "skipping bad character",
198 "escaped character not allowed in a name",
199 "skipping text before the first section header",
200 "skipping unknown macro",
201 "NOT IMPLEMENTED, please use groff: skipping request",
203 "argument count wrong",
204 "skipping end of block that is not open",
205 "missing end of block",
206 "scope open on exit",
207 "uname(3) system call failed",
208 "macro requires line argument(s)",
209 "macro requires body argument(s)",
210 "macro requires argument(s)",
212 "line argument(s) will be lost",
213 "body argument(s) will be lost",
215 "generic fatal error",
217 "column syntax is inconsistent",
218 "NOT IMPLEMENTED: .Bd -file",
219 "line scope broken, syntax violated",
220 "argument count wrong, violates syntax",
221 "child violates parent syntax",
222 "argument count wrong, violates syntax",
223 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
225 "no document prologue",
226 "static buffer exhausted",
229 static void parsebuf(struct curparse
*, struct buf
, int);
230 static void pdesc(struct curparse
*);
231 static void fdesc(struct curparse
*);
232 static void ffile(const char *, struct curparse
*);
233 static int pfile(const char *, struct curparse
*);
234 static int moptions(enum intt
*, char *);
235 static int mmsg(enum mandocerr
, void *,
236 int, int, const char *);
237 static void pset(const char *, int, struct curparse
*);
238 static int toptions(struct curparse
*, char *);
239 static void usage(void) __attribute__((noreturn
));
240 static void version(void) __attribute__((noreturn
));
241 static int woptions(struct curparse
*, char *);
243 static const char *progname
;
244 static enum mandoclevel file_status
= MANDOCLEVEL_OK
;
245 static enum mandoclevel exit_status
= MANDOCLEVEL_OK
;
248 main(int argc
, char *argv
[])
251 struct curparse curp
;
253 progname
= strrchr(argv
[0], '/');
254 if (progname
== NULL
)
259 memset(&curp
, 0, sizeof(struct curparse
));
261 curp
.inttype
= INTT_AUTO
;
262 curp
.outtype
= OUTT_ASCII
;
263 curp
.wlevel
= MANDOCLEVEL_FATAL
;
266 while (-1 != (c
= getopt(argc
, argv
, "m:O:T:VW:")))
269 if ( ! moptions(&curp
.inttype
, optarg
))
270 return((int)MANDOCLEVEL_BADARG
);
273 (void)strlcat(curp
.outopts
, optarg
, BUFSIZ
);
274 (void)strlcat(curp
.outopts
, ",", BUFSIZ
);
277 if ( ! toptions(&curp
, optarg
))
278 return((int)MANDOCLEVEL_BADARG
);
281 if ( ! woptions(&curp
, optarg
))
282 return((int)MANDOCLEVEL_BADARG
);
296 curp
.file
= "<stdin>";
297 curp
.fd
= STDIN_FILENO
;
304 if (MANDOCLEVEL_OK
!= exit_status
&& curp
.wstop
)
310 (*curp
.outfree
)(curp
.outdata
);
312 mdoc_free(curp
.pmdoc
);
316 roff_free(curp
.roff
);
318 return((int)exit_status
);
326 (void)printf("%s %s\n", progname
, VERSION
);
327 exit((int)MANDOCLEVEL_OK
);
335 (void)fprintf(stderr
, "usage: %s "
345 exit((int)MANDOCLEVEL_BADARG
);
349 ffile(const char *file
, struct curparse
*curp
)
353 * Called once per input file. Get the file ready for reading,
354 * pass it through to the parser-driver, then close it out.
355 * XXX: don't do anything special as this is only called for
356 * files; stdin goes directly to fdesc().
361 if (-1 == (curp
->fd
= open(curp
->file
, O_RDONLY
, 0))) {
363 exit_status
= MANDOCLEVEL_SYSERR
;
369 if (-1 == close(curp
->fd
))
374 pfile(const char *file
, struct curparse
*curp
)
376 const char *savefile
;
379 if (-1 == (fd
= open(file
, O_RDONLY
, 0))) {
381 file_status
= MANDOCLEVEL_SYSERR
;
385 savefile
= curp
->file
;
393 curp
->file
= savefile
;
399 return(MANDOCLEVEL_FATAL
> file_status
? 1 : 0);
404 resize_buf(struct buf
*buf
, size_t initial
)
407 buf
->sz
= buf
->sz
> initial
/2 ? 2 * buf
->sz
: initial
;
408 buf
->buf
= realloc(buf
->buf
, buf
->sz
);
409 if (NULL
== buf
->buf
) {
411 exit((int)MANDOCLEVEL_SYSERR
);
417 read_whole_file(struct curparse
*curp
, struct buf
*fb
, int *with_mmap
)
423 if (-1 == fstat(curp
->fd
, &st
)) {
429 * If we're a regular file, try just reading in the whole entry
430 * via mmap(). This is faster than reading it into blocks, and
431 * since each file is only a few bytes to begin with, I'm not
432 * concerned that this is going to tank any machines.
435 if (S_ISREG(st
.st_mode
)) {
436 if (st
.st_size
>= (1U << 31)) {
437 fprintf(stderr
, "%s: input too large\n",
442 fb
->sz
= (size_t)st
.st_size
;
443 fb
->buf
= mmap(NULL
, fb
->sz
, PROT_READ
,
444 MAP_FILE
|MAP_SHARED
, curp
->fd
, 0);
445 if (fb
->buf
!= MAP_FAILED
)
450 * If this isn't a regular file (like, say, stdin), then we must
451 * go the old way and just read things in bit by bit.
460 if (fb
->sz
== (1U << 31)) {
461 fprintf(stderr
, "%s: input too large\n",
465 resize_buf(fb
, 65536);
467 ssz
= read(curp
->fd
, fb
->buf
+ (int)off
, fb
->sz
- off
);
486 fdesc(struct curparse
*curp
)
490 * Called once per file with an opened file descriptor. All
491 * pre-file-parse operations (whether stdin or a file) should go
494 * This calls down into the nested parser, which drills down and
495 * fully parses a file and all its dependences (i.e., `so'). It
496 * then runs the cleanup validators and pushes to output.
499 /* Zero the parse type. */
503 file_status
= MANDOCLEVEL_OK
;
505 /* Make sure the mandotory roff parser is initialised. */
507 if (NULL
== curp
->roff
) {
508 curp
->roff
= roff_alloc(&curp
->regs
, curp
, mmsg
);
512 /* Fully parse the file. */
516 if (MANDOCLEVEL_FATAL
<= file_status
)
519 /* NOTE a parser may not have been assigned, yet. */
521 if ( ! (curp
->man
|| curp
->mdoc
)) {
522 fprintf(stderr
, "%s: Not a manual\n", curp
->file
);
523 file_status
= MANDOCLEVEL_FATAL
;
527 /* Clean up the parse routine ASTs. */
529 if (curp
->mdoc
&& ! mdoc_endparse(curp
->mdoc
)) {
530 assert(MANDOCLEVEL_FATAL
<= file_status
);
534 if (curp
->man
&& ! man_endparse(curp
->man
)) {
535 assert(MANDOCLEVEL_FATAL
<= file_status
);
540 roff_endparse(curp
->roff
);
543 * With -Wstop and warnings or errors of at least
544 * the requested level, do not produce output.
547 if (MANDOCLEVEL_OK
!= file_status
&& curp
->wstop
)
550 /* If unset, allocate output dev now (if applicable). */
552 if ( ! (curp
->outman
&& curp
->outmdoc
)) {
553 switch (curp
->outtype
) {
555 curp
->outdata
= xhtml_alloc(curp
->outopts
);
558 curp
->outdata
= html_alloc(curp
->outopts
);
561 curp
->outdata
= ascii_alloc(curp
->outopts
);
562 curp
->outfree
= ascii_free
;
565 curp
->outdata
= pdf_alloc(curp
->outopts
);
566 curp
->outfree
= pspdf_free
;
569 curp
->outdata
= ps_alloc(curp
->outopts
);
570 curp
->outfree
= pspdf_free
;
576 switch (curp
->outtype
) {
580 curp
->outman
= html_man
;
581 curp
->outmdoc
= html_mdoc
;
582 curp
->outfree
= html_free
;
585 curp
->outman
= tree_man
;
586 curp
->outmdoc
= tree_mdoc
;
593 curp
->outman
= terminal_man
;
594 curp
->outmdoc
= terminal_mdoc
;
601 /* Execute the out device, if it exists. */
603 if (curp
->man
&& curp
->outman
)
604 (*curp
->outman
)(curp
->outdata
, curp
->man
);
605 if (curp
->mdoc
&& curp
->outmdoc
)
606 (*curp
->outmdoc
)(curp
->outdata
, curp
->mdoc
);
610 memset(&curp
->regs
, 0, sizeof(struct regset
));
612 /* Reset the current-parse compilers. */
615 mdoc_reset(curp
->mdoc
);
617 man_reset(curp
->man
);
620 roff_reset(curp
->roff
);
622 if (exit_status
< file_status
)
623 exit_status
= file_status
;
629 pdesc(struct curparse
*curp
)
635 * Run for each opened file; may be called more than once for
636 * each full parse sequence if the opened file is nested (i.e.,
637 * from `so'). Simply sucks in the whole file and moves into
638 * the parse phase for the file.
641 if ( ! read_whole_file(curp
, &blk
, &with_mmap
)) {
642 file_status
= MANDOCLEVEL_SYSERR
;
646 /* Line number is per-file. */
650 parsebuf(curp
, blk
, 1);
653 munmap(blk
.buf
, blk
.sz
);
659 parsebuf(struct curparse
*curp
, struct buf blk
, int start
)
661 const struct tbl_span
*span
;
665 int pos
; /* byte number in the ln buffer */
666 int lnn
; /* line number in the real file */
670 * Main parse routine for an opened file. This is called for
671 * each opened file and simply loops around the full input file,
672 * possibly nesting (i.e., with `so').
675 memset(&ln
, 0, sizeof(struct buf
));
680 for (i
= 0; i
< (int)blk
.sz
; ) {
681 if (0 == pos
&& '\0' == blk
.buf
[i
])
686 curp
->reparse_count
= 0;
689 while (i
< (int)blk
.sz
&& (start
|| '\0' != blk
.buf
[i
])) {
692 * When finding an unescaped newline character,
693 * leave the character loop to process the line.
694 * Skip a preceding carriage return, if any.
697 if ('\r' == blk
.buf
[i
] && i
+ 1 < (int)blk
.sz
&&
698 '\n' == blk
.buf
[i
+ 1])
700 if ('\n' == blk
.buf
[i
]) {
707 * Warn about bogus characters. If you're using
708 * non-ASCII encoding, you're screwing your
709 * readers. Since I'd rather this not happen,
710 * I'll be helpful and drop these characters so
711 * we don't display gibberish. Note to manual
712 * writers: use special characters.
715 c
= (unsigned char) blk
.buf
[i
];
717 if ( ! (isascii(c
) &&
718 (isgraph(c
) || isblank(c
)))) {
719 mmsg(MANDOCERR_BADCHAR
, curp
,
720 curp
->line
, pos
, "ignoring byte");
725 /* Trailing backslash = a plain char. */
727 if ('\\' != blk
.buf
[i
] || i
+ 1 == (int)blk
.sz
) {
728 if (pos
>= (int)ln
.sz
)
729 resize_buf(&ln
, 256);
730 ln
.buf
[pos
++] = blk
.buf
[i
++];
735 * Found escape and at least one other character.
736 * When it's a newline character, skip it.
737 * When there is a carriage return in between,
738 * skip that one as well.
741 if ('\r' == blk
.buf
[i
+ 1] && i
+ 2 < (int)blk
.sz
&&
742 '\n' == blk
.buf
[i
+ 2])
744 if ('\n' == blk
.buf
[i
+ 1]) {
750 if ('"' == blk
.buf
[i
+ 1]) {
752 /* Comment, skip to end of line */
753 for (; i
< (int)blk
.sz
; ++i
) {
754 if ('\n' == blk
.buf
[i
]) {
761 /* Backout trailing whitespaces */
762 for (; pos
> 0; --pos
) {
763 if (ln
.buf
[pos
- 1] != ' ')
765 if (pos
> 2 && ln
.buf
[pos
- 2] == '\\')
771 /* Some other escape sequence, copy & cont. */
773 if (pos
+ 1 >= (int)ln
.sz
)
774 resize_buf(&ln
, 256);
776 ln
.buf
[pos
++] = blk
.buf
[i
++];
777 ln
.buf
[pos
++] = blk
.buf
[i
++];
780 if (pos
>= (int)ln
.sz
)
781 resize_buf(&ln
, 256);
786 * A significant amount of complexity is contained by
787 * the roff preprocessor. It's line-oriented but can be
788 * expressed on one line, so we need at times to
789 * readjust our starting point and re-run it. The roff
790 * preprocessor can also readjust the buffers with new
791 * data, so we pass them in wholesale.
798 (curp
->roff
, curp
->line
,
799 &ln
.buf
, &ln
.sz
, of
, &of
);
803 if (REPARSE_LIMIT
>= ++curp
->reparse_count
)
804 parsebuf(curp
, ln
, 0);
806 mmsg(MANDOCERR_ROFFLOOP
, curp
,
807 curp
->line
, pos
, NULL
);
811 pos
= strlen(ln
.buf
);
819 assert(MANDOCLEVEL_FATAL
<= file_status
);
822 if (pfile(ln
.buf
+ of
, curp
)) {
832 * If we encounter errors in the recursive parsebuf()
833 * call, make sure we don't continue parsing.
836 if (MANDOCLEVEL_FATAL
<= file_status
)
840 * If input parsers have not been allocated, do so now.
841 * We keep these instanced betwen parsers, but set them
842 * locally per parse routine since we can use different
843 * parsers with each one.
846 if ( ! (curp
->man
|| curp
->mdoc
))
847 pset(ln
.buf
+ of
, pos
- of
, curp
);
850 * Lastly, push down into the parsers themselves. One
851 * of these will have already been set in the pset()
853 * If libroff returns ROFF_TBL, then add it to the
854 * currently open parse. Since we only get here if
855 * there does exist data (see tbl_data.c), we're
856 * guaranteed that something's been allocated.
857 * Do the same for ROFF_EQN.
863 while (NULL
!= (span
= roff_span(curp
->roff
))) {
865 man_addspan(curp
->man
, span
) :
866 mdoc_addspan(curp
->mdoc
, span
);
870 else if (ROFF_EQN
== rr
)
872 mdoc_addeqn(curp
->mdoc
,
873 roff_eqn(curp
->roff
)) :
874 man_addeqn(curp
->man
,
875 roff_eqn(curp
->roff
));
876 else if (curp
->man
|| curp
->mdoc
)
878 man_parseln(curp
->man
,
879 curp
->line
, ln
.buf
, of
) :
880 mdoc_parseln(curp
->mdoc
,
881 curp
->line
, ln
.buf
, of
);
884 assert(MANDOCLEVEL_FATAL
<= file_status
);
888 /* Temporary buffers typically are not full. */
890 if (0 == start
&& '\0' == blk
.buf
[i
])
893 /* Start the next input line. */
902 pset(const char *buf
, int pos
, struct curparse
*curp
)
907 * Try to intuit which kind of manual parser should be used. If
908 * passed in by command-line (-man, -mdoc), then use that
909 * explicitly. If passed as -mandoc, then try to guess from the
910 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
911 * default to -man, which is more lenient.
913 * Separate out pmdoc/pman from mdoc/man: the first persists
914 * through all parsers, while the latter is used per-parse.
917 if ('.' == buf
[0] || '\'' == buf
[0]) {
918 for (i
= 1; buf
[i
]; i
++)
919 if (' ' != buf
[i
] && '\t' != buf
[i
])
925 switch (curp
->inttype
) {
927 if (NULL
== curp
->pmdoc
)
928 curp
->pmdoc
= mdoc_alloc
929 (&curp
->regs
, curp
, mmsg
);
931 curp
->mdoc
= curp
->pmdoc
;
934 if (NULL
== curp
->pman
)
935 curp
->pman
= man_alloc
936 (&curp
->regs
, curp
, mmsg
);
938 curp
->man
= curp
->pman
;
944 if (pos
>= 3 && 0 == memcmp(buf
, ".Dd", 3)) {
945 if (NULL
== curp
->pmdoc
)
946 curp
->pmdoc
= mdoc_alloc
947 (&curp
->regs
, curp
, mmsg
);
949 curp
->mdoc
= curp
->pmdoc
;
953 if (NULL
== curp
->pman
)
954 curp
->pman
= man_alloc(&curp
->regs
, curp
, mmsg
);
956 curp
->man
= curp
->pman
;
960 moptions(enum intt
*tflags
, char *arg
)
963 if (0 == strcmp(arg
, "doc"))
965 else if (0 == strcmp(arg
, "andoc"))
967 else if (0 == strcmp(arg
, "an"))
970 fprintf(stderr
, "%s: Bad argument\n", arg
);
978 toptions(struct curparse
*curp
, char *arg
)
981 if (0 == strcmp(arg
, "ascii"))
982 curp
->outtype
= OUTT_ASCII
;
983 else if (0 == strcmp(arg
, "lint")) {
984 curp
->outtype
= OUTT_LINT
;
985 curp
->wlevel
= MANDOCLEVEL_WARNING
;
987 else if (0 == strcmp(arg
, "tree"))
988 curp
->outtype
= OUTT_TREE
;
989 else if (0 == strcmp(arg
, "html"))
990 curp
->outtype
= OUTT_HTML
;
991 else if (0 == strcmp(arg
, "xhtml"))
992 curp
->outtype
= OUTT_XHTML
;
993 else if (0 == strcmp(arg
, "ps"))
994 curp
->outtype
= OUTT_PS
;
995 else if (0 == strcmp(arg
, "pdf"))
996 curp
->outtype
= OUTT_PDF
;
998 fprintf(stderr
, "%s: Bad argument\n", arg
);
1006 woptions(struct curparse
*curp
, char *arg
)
1009 const char *toks
[6];
1013 toks
[2] = "warning";
1020 switch (getsubopt(&arg
, UNCONST(toks
), &v
)) {
1027 curp
->wlevel
= MANDOCLEVEL_WARNING
;
1030 curp
->wlevel
= MANDOCLEVEL_ERROR
;
1033 curp
->wlevel
= MANDOCLEVEL_FATAL
;
1036 fprintf(stderr
, "-W%s: Bad argument\n", o
);
1045 mmsg(enum mandocerr t
, void *arg
, int ln
, int col
, const char *msg
)
1047 struct curparse
*cp
;
1048 enum mandoclevel level
;
1050 level
= MANDOCLEVEL_FATAL
;
1051 while (t
< mandoclimits
[level
])
1055 cp
= (struct curparse
*)arg
;
1056 if (level
< cp
->wlevel
)
1059 fprintf(stderr
, "%s:%d:%d: %s: %s",
1060 cp
->file
, ln
, col
+ 1, mandoclevels
[level
], mandocerrs
[t
]);
1062 fprintf(stderr
, ": %s", msg
);
1063 fputc('\n', stderr
);
1065 if (file_status
< level
)
1066 file_status
= level
;
1068 return(level
< MANDOCLEVEL_FATAL
);