]>
git.cameronkatri.com Git - mandoc.git/blob - main.c
1 /* $Id: main.c,v 1.134 2011/01/04 12:06:21 kristaps Exp $ */
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
44 #define REPARSE_LIMIT 1000
45 #define UNCONST(a) ((void *)(uintptr_t)(const void *)(a))
47 /* FIXME: Intel's compiler? LLVM? pcc? */
49 #if !defined(__GNUC__) || (__GNUC__ < 2)
51 # define __attribute__(x)
53 #endif /* !defined(__GNUC__) || (__GNUC__ < 2) */
55 typedef void (*out_mdoc
)(void *, const struct mdoc
*);
56 typedef void (*out_man
)(void *, const struct man
*);
57 typedef void (*out_free
)(void *);
81 const char *file
; /* Current parse. */
82 int fd
; /* Current parse. */
83 int line
; /* Line number in the file. */
84 enum mandoclevel wlevel
; /* Ignore messages below this. */
85 int wstop
; /* Stop after a file with a warning. */
86 enum intt inttype
; /* which parser to use */
87 struct man
*pman
; /* persistent man parser */
88 struct mdoc
*pmdoc
; /* persistent mdoc parser */
89 struct man
*man
; /* man parser */
90 struct mdoc
*mdoc
; /* mdoc parser */
91 struct roff
*roff
; /* roff parser (!NULL) */
92 struct regset regs
; /* roff registers */
93 int reparse_count
; /* finite interpolation stack */
94 enum outt outtype
; /* which output to use */
95 out_mdoc outmdoc
; /* mdoc output ptr */
96 out_man outman
; /* man output ptr */
97 out_free outfree
; /* free output ptr */
98 void *outdata
; /* data for output */
99 char outopts
[BUFSIZ
]; /* buf of output opts */
102 static const char * const mandoclevels
[MANDOCLEVEL_MAX
] = {
112 static const enum mandocerr mandoclimits
[MANDOCLEVEL_MAX
] = {
122 static const char * const mandocerrs
[MANDOCERR_MAX
] = {
127 /* related to the prologue */
128 "no title in document",
129 "document title should be all caps",
130 "unknown manual section",
131 "cannot parse date argument",
132 "prologue macros out of order",
133 "duplicate prologue macro",
134 "macro not allowed in prologue",
135 "macro not allowed in body",
137 /* related to document structure */
138 ".so is fragile, better use ln(1)",
139 "NAME section must come first",
140 "bad NAME section contents",
141 "manual name not yet set",
142 "sections out of conventional order",
143 "duplicate section name",
144 "section not in conventional manual section",
146 /* related to macros and nesting */
147 "skipping obsolete macro",
148 "skipping paragraph macro",
149 "blocks badly nested",
150 "child violates parent syntax",
151 "nested displays are not portable",
152 "already in literal mode",
154 /* related to missing macro arguments */
155 "skipping empty macro",
156 "argument count wrong",
157 "missing display type",
158 "list type must come first",
159 "tag lists require a width argument",
162 /* related to bad macro arguments */
164 "duplicate argument",
165 "duplicate display type",
166 "duplicate list type",
167 "unknown AT&T UNIX version",
170 "unknown standard specifier",
171 "bad width argument",
173 /* related to plain text */
174 "blank line in non-literal context",
175 "tab in non-literal context",
176 "end of line whitespace",
178 "unknown escape sequence",
179 "unterminated quoted string",
181 /* related to tables */
186 /* related to tables */
190 "no table layout cells specified",
191 "no table data cells specified",
192 "ignore data in cell",
194 "input stack limit exceeded, infinite loop?",
195 "skipping bad character",
196 "skipping text before the first section header",
197 "skipping unknown macro",
198 "NOT IMPLEMENTED: skipping request",
200 "argument count wrong",
201 "skipping end of block that is not open",
202 "missing end of block",
203 "scope open on exit",
204 "uname(3) system call failed",
205 "macro requires line argument(s)",
206 "macro requires body argument(s)",
207 "macro requires argument(s)",
209 "line argument(s) will be lost",
210 "body argument(s) will be lost",
212 "generic fatal error",
214 "column syntax is inconsistent",
215 "NOT IMPLEMENTED: .Bd -file",
216 "line scope broken, syntax violated",
217 "argument count wrong, violates syntax",
218 "child violates parent syntax",
219 "argument count wrong, violates syntax",
220 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
222 "no document prologue",
223 "static buffer exhausted",
226 static void parsebuf(struct curparse
*, struct buf
, int);
227 static void pdesc(struct curparse
*);
228 static void fdesc(struct curparse
*);
229 static void ffile(const char *, struct curparse
*);
230 static int pfile(const char *, struct curparse
*);
231 static int moptions(enum intt
*, char *);
232 static int mmsg(enum mandocerr
, void *,
233 int, int, const char *);
234 static void pset(const char *, int, struct curparse
*);
235 static int toptions(struct curparse
*, char *);
236 static void usage(void) __attribute__((noreturn
));
237 static void version(void) __attribute__((noreturn
));
238 static int woptions(struct curparse
*, char *);
240 static const char *progname
;
241 static enum mandoclevel file_status
= MANDOCLEVEL_OK
;
242 static enum mandoclevel exit_status
= MANDOCLEVEL_OK
;
245 main(int argc
, char *argv
[])
248 struct curparse curp
;
250 progname
= strrchr(argv
[0], '/');
251 if (progname
== NULL
)
256 memset(&curp
, 0, sizeof(struct curparse
));
258 curp
.inttype
= INTT_AUTO
;
259 curp
.outtype
= OUTT_ASCII
;
260 curp
.wlevel
= MANDOCLEVEL_FATAL
;
263 while (-1 != (c
= getopt(argc
, argv
, "m:O:T:VW:")))
266 if ( ! moptions(&curp
.inttype
, optarg
))
267 return((int)MANDOCLEVEL_BADARG
);
270 (void)strlcat(curp
.outopts
, optarg
, BUFSIZ
);
271 (void)strlcat(curp
.outopts
, ",", BUFSIZ
);
274 if ( ! toptions(&curp
, optarg
))
275 return((int)MANDOCLEVEL_BADARG
);
278 if ( ! woptions(&curp
, optarg
))
279 return((int)MANDOCLEVEL_BADARG
);
293 curp
.file
= "<stdin>";
294 curp
.fd
= STDIN_FILENO
;
301 if (MANDOCLEVEL_OK
!= exit_status
&& curp
.wstop
)
307 (*curp
.outfree
)(curp
.outdata
);
309 mdoc_free(curp
.pmdoc
);
313 roff_free(curp
.roff
);
315 return((int)exit_status
);
323 (void)printf("%s %s\n", progname
, VERSION
);
324 exit((int)MANDOCLEVEL_OK
);
332 (void)fprintf(stderr
, "usage: %s "
342 exit((int)MANDOCLEVEL_BADARG
);
346 ffile(const char *file
, struct curparse
*curp
)
350 * Called once per input file. Get the file ready for reading,
351 * pass it through to the parser-driver, then close it out.
352 * XXX: don't do anything special as this is only called for
353 * files; stdin goes directly to fdesc().
358 if (-1 == (curp
->fd
= open(curp
->file
, O_RDONLY
, 0))) {
360 exit_status
= MANDOCLEVEL_SYSERR
;
366 if (-1 == close(curp
->fd
))
371 pfile(const char *file
, struct curparse
*curp
)
373 const char *savefile
;
376 if (-1 == (fd
= open(file
, O_RDONLY
, 0))) {
378 file_status
= MANDOCLEVEL_SYSERR
;
382 savefile
= curp
->file
;
390 curp
->file
= savefile
;
396 return(MANDOCLEVEL_FATAL
> file_status
? 1 : 0);
401 resize_buf(struct buf
*buf
, size_t initial
)
404 buf
->sz
= buf
->sz
> initial
/2 ? 2 * buf
->sz
: initial
;
405 buf
->buf
= realloc(buf
->buf
, buf
->sz
);
406 if (NULL
== buf
->buf
) {
408 exit((int)MANDOCLEVEL_SYSERR
);
414 read_whole_file(struct curparse
*curp
, struct buf
*fb
, int *with_mmap
)
420 if (-1 == fstat(curp
->fd
, &st
)) {
426 * If we're a regular file, try just reading in the whole entry
427 * via mmap(). This is faster than reading it into blocks, and
428 * since each file is only a few bytes to begin with, I'm not
429 * concerned that this is going to tank any machines.
432 if (S_ISREG(st
.st_mode
)) {
433 if (st
.st_size
>= (1U << 31)) {
434 fprintf(stderr
, "%s: input too large\n",
439 fb
->sz
= (size_t)st
.st_size
;
440 fb
->buf
= mmap(NULL
, fb
->sz
, PROT_READ
,
441 MAP_FILE
|MAP_SHARED
, curp
->fd
, 0);
442 if (fb
->buf
!= MAP_FAILED
)
447 * If this isn't a regular file (like, say, stdin), then we must
448 * go the old way and just read things in bit by bit.
457 if (fb
->sz
== (1U << 31)) {
458 fprintf(stderr
, "%s: input too large\n",
462 resize_buf(fb
, 65536);
464 ssz
= read(curp
->fd
, fb
->buf
+ (int)off
, fb
->sz
- off
);
483 fdesc(struct curparse
*curp
)
487 * Called once per file with an opened file descriptor. All
488 * pre-file-parse operations (whether stdin or a file) should go
491 * This calls down into the nested parser, which drills down and
492 * fully parses a file and all its dependences (i.e., `so'). It
493 * then runs the cleanup validators and pushes to output.
496 /* Zero the parse type. */
500 file_status
= MANDOCLEVEL_OK
;
502 /* Make sure the mandotory roff parser is initialised. */
504 if (NULL
== curp
->roff
) {
505 curp
->roff
= roff_alloc(&curp
->regs
, curp
, mmsg
);
509 /* Fully parse the file. */
513 if (MANDOCLEVEL_FATAL
<= file_status
)
516 /* NOTE a parser may not have been assigned, yet. */
518 if ( ! (curp
->man
|| curp
->mdoc
)) {
519 fprintf(stderr
, "%s: Not a manual\n", curp
->file
);
520 file_status
= MANDOCLEVEL_FATAL
;
524 /* Clean up the parse routine ASTs. */
526 if (curp
->mdoc
&& ! mdoc_endparse(curp
->mdoc
)) {
527 assert(MANDOCLEVEL_FATAL
<= file_status
);
531 if (curp
->man
&& ! man_endparse(curp
->man
)) {
532 assert(MANDOCLEVEL_FATAL
<= file_status
);
537 roff_endparse(curp
->roff
);
540 * With -Wstop and warnings or errors of at least
541 * the requested level, do not produce output.
544 if (MANDOCLEVEL_OK
!= file_status
&& curp
->wstop
)
547 /* If unset, allocate output dev now (if applicable). */
549 if ( ! (curp
->outman
&& curp
->outmdoc
)) {
550 switch (curp
->outtype
) {
552 curp
->outdata
= xhtml_alloc(curp
->outopts
);
555 curp
->outdata
= html_alloc(curp
->outopts
);
558 curp
->outdata
= ascii_alloc(curp
->outopts
);
559 curp
->outfree
= ascii_free
;
562 curp
->outdata
= pdf_alloc(curp
->outopts
);
563 curp
->outfree
= pspdf_free
;
566 curp
->outdata
= ps_alloc(curp
->outopts
);
567 curp
->outfree
= pspdf_free
;
573 switch (curp
->outtype
) {
577 curp
->outman
= html_man
;
578 curp
->outmdoc
= html_mdoc
;
579 curp
->outfree
= html_free
;
582 curp
->outman
= tree_man
;
583 curp
->outmdoc
= tree_mdoc
;
590 curp
->outman
= terminal_man
;
591 curp
->outmdoc
= terminal_mdoc
;
598 /* Execute the out device, if it exists. */
600 if (curp
->man
&& curp
->outman
)
601 (*curp
->outman
)(curp
->outdata
, curp
->man
);
602 if (curp
->mdoc
&& curp
->outmdoc
)
603 (*curp
->outmdoc
)(curp
->outdata
, curp
->mdoc
);
607 memset(&curp
->regs
, 0, sizeof(struct regset
));
609 /* Reset the current-parse compilers. */
612 mdoc_reset(curp
->mdoc
);
614 man_reset(curp
->man
);
617 roff_reset(curp
->roff
);
619 if (exit_status
< file_status
)
620 exit_status
= file_status
;
626 pdesc(struct curparse
*curp
)
632 * Run for each opened file; may be called more than once for
633 * each full parse sequence if the opened file is nested (i.e.,
634 * from `so'). Simply sucks in the whole file and moves into
635 * the parse phase for the file.
638 if ( ! read_whole_file(curp
, &blk
, &with_mmap
)) {
639 file_status
= MANDOCLEVEL_SYSERR
;
643 /* Line number is per-file. */
647 parsebuf(curp
, blk
, 1);
650 munmap(blk
.buf
, blk
.sz
);
656 parsebuf(struct curparse
*curp
, struct buf blk
, int start
)
661 int pos
; /* byte number in the ln buffer */
662 int lnn
; /* line number in the real file */
666 * Main parse routine for an opened file. This is called for
667 * each opened file and simply loops around the full input file,
668 * possibly nesting (i.e., with `so').
671 memset(&ln
, 0, sizeof(struct buf
));
676 for (i
= 0; i
< (int)blk
.sz
; ) {
677 if (0 == pos
&& '\0' == blk
.buf
[i
])
682 curp
->reparse_count
= 0;
685 while (i
< (int)blk
.sz
&& (start
|| '\0' != blk
.buf
[i
])) {
686 if ('\n' == blk
.buf
[i
]) {
693 * Warn about bogus characters. If you're using
694 * non-ASCII encoding, you're screwing your
695 * readers. Since I'd rather this not happen,
696 * I'll be helpful and drop these characters so
697 * we don't display gibberish. Note to manual
698 * writers: use special characters.
701 c
= (unsigned char) blk
.buf
[i
];
703 if ( ! (isascii(c
) &&
704 (isgraph(c
) || isblank(c
)))) {
705 mmsg(MANDOCERR_BADCHAR
, curp
,
706 curp
->line
, pos
, "ignoring byte");
711 /* Trailing backslash = a plain char. */
713 if ('\\' != blk
.buf
[i
] || i
+ 1 == (int)blk
.sz
) {
714 if (pos
>= (int)ln
.sz
)
715 resize_buf(&ln
, 256);
716 ln
.buf
[pos
++] = blk
.buf
[i
++];
720 /* Found escape & at least one other char. */
722 if ('\n' == blk
.buf
[i
+ 1]) {
724 /* Escaped newlines are skipped over */
729 if ('"' == blk
.buf
[i
+ 1]) {
731 /* Comment, skip to end of line */
732 for (; i
< (int)blk
.sz
; ++i
) {
733 if ('\n' == blk
.buf
[i
]) {
740 /* Backout trailing whitespaces */
741 for (; pos
> 0; --pos
) {
742 if (ln
.buf
[pos
- 1] != ' ')
744 if (pos
> 2 && ln
.buf
[pos
- 2] == '\\')
750 /* Some other escape sequence, copy & cont. */
752 if (pos
+ 1 >= (int)ln
.sz
)
753 resize_buf(&ln
, 256);
755 ln
.buf
[pos
++] = blk
.buf
[i
++];
756 ln
.buf
[pos
++] = blk
.buf
[i
++];
759 if (pos
>= (int)ln
.sz
)
760 resize_buf(&ln
, 256);
765 * A significant amount of complexity is contained by
766 * the roff preprocessor. It's line-oriented but can be
767 * expressed on one line, so we need at times to
768 * readjust our starting point and re-run it. The roff
769 * preprocessor can also readjust the buffers with new
770 * data, so we pass them in wholesale.
777 (curp
->roff
, curp
->line
,
778 &ln
.buf
, &ln
.sz
, of
, &of
);
782 if (REPARSE_LIMIT
>= ++curp
->reparse_count
)
783 parsebuf(curp
, ln
, 0);
785 mmsg(MANDOCERR_ROFFLOOP
, curp
,
786 curp
->line
, pos
, NULL
);
790 pos
= strlen(ln
.buf
);
798 assert(MANDOCLEVEL_FATAL
<= file_status
);
801 if (pfile(ln
.buf
+ of
, curp
)) {
811 * If we encounter errors in the recursive parsebuf()
812 * call, make sure we don't continue parsing.
815 if (MANDOCLEVEL_FATAL
<= file_status
)
819 * If input parsers have not been allocated, do so now.
820 * We keep these instanced betwen parsers, but set them
821 * locally per parse routine since we can use different
822 * parsers with each one.
825 if ( ! (curp
->man
|| curp
->mdoc
))
826 pset(ln
.buf
+ of
, pos
- of
, curp
);
829 * Lastly, push down into the parsers themselves. One
830 * of these will have already been set in the pset()
832 * If libroff returns ROFF_TBL, then add it to the
833 * currently open parse. Since we only get here if
834 * there does exist data (see tbl_data.c), we're
835 * guaranteed that something's been allocated.
838 if (ROFF_TBL
== rr
) {
839 assert(curp
->man
|| curp
->mdoc
);
841 man_addspan(curp
->man
, roff_span(curp
->roff
));
843 mdoc_addspan(curp
->mdoc
, roff_span(curp
->roff
));
845 } else if (curp
->man
|| curp
->mdoc
) {
847 man_parseln(curp
->man
,
848 curp
->line
, ln
.buf
, of
) :
849 mdoc_parseln(curp
->mdoc
,
850 curp
->line
, ln
.buf
, of
);
853 assert(MANDOCLEVEL_FATAL
<= file_status
);
858 /* Temporary buffers typically are not full. */
860 if (0 == start
&& '\0' == blk
.buf
[i
])
863 /* Start the next input line. */
872 pset(const char *buf
, int pos
, struct curparse
*curp
)
877 * Try to intuit which kind of manual parser should be used. If
878 * passed in by command-line (-man, -mdoc), then use that
879 * explicitly. If passed as -mandoc, then try to guess from the
880 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
881 * default to -man, which is more lenient.
883 * Separate out pmdoc/pman from mdoc/man: the first persists
884 * through all parsers, while the latter is used per-parse.
887 if ('.' == buf
[0] || '\'' == buf
[0]) {
888 for (i
= 1; buf
[i
]; i
++)
889 if (' ' != buf
[i
] && '\t' != buf
[i
])
895 switch (curp
->inttype
) {
897 if (NULL
== curp
->pmdoc
)
898 curp
->pmdoc
= mdoc_alloc
899 (&curp
->regs
, curp
, mmsg
);
901 curp
->mdoc
= curp
->pmdoc
;
904 if (NULL
== curp
->pman
)
905 curp
->pman
= man_alloc
906 (&curp
->regs
, curp
, mmsg
);
908 curp
->man
= curp
->pman
;
914 if (pos
>= 3 && 0 == memcmp(buf
, ".Dd", 3)) {
915 if (NULL
== curp
->pmdoc
)
916 curp
->pmdoc
= mdoc_alloc
917 (&curp
->regs
, curp
, mmsg
);
919 curp
->mdoc
= curp
->pmdoc
;
923 if (NULL
== curp
->pman
)
924 curp
->pman
= man_alloc(&curp
->regs
, curp
, mmsg
);
926 curp
->man
= curp
->pman
;
930 moptions(enum intt
*tflags
, char *arg
)
933 if (0 == strcmp(arg
, "doc"))
935 else if (0 == strcmp(arg
, "andoc"))
937 else if (0 == strcmp(arg
, "an"))
940 fprintf(stderr
, "%s: Bad argument\n", arg
);
948 toptions(struct curparse
*curp
, char *arg
)
951 if (0 == strcmp(arg
, "ascii"))
952 curp
->outtype
= OUTT_ASCII
;
953 else if (0 == strcmp(arg
, "lint")) {
954 curp
->outtype
= OUTT_LINT
;
955 curp
->wlevel
= MANDOCLEVEL_WARNING
;
957 else if (0 == strcmp(arg
, "tree"))
958 curp
->outtype
= OUTT_TREE
;
959 else if (0 == strcmp(arg
, "html"))
960 curp
->outtype
= OUTT_HTML
;
961 else if (0 == strcmp(arg
, "xhtml"))
962 curp
->outtype
= OUTT_XHTML
;
963 else if (0 == strcmp(arg
, "ps"))
964 curp
->outtype
= OUTT_PS
;
965 else if (0 == strcmp(arg
, "pdf"))
966 curp
->outtype
= OUTT_PDF
;
968 fprintf(stderr
, "%s: Bad argument\n", arg
);
976 woptions(struct curparse
*curp
, char *arg
)
990 switch (getsubopt(&arg
, UNCONST(toks
), &v
)) {
997 curp
->wlevel
= MANDOCLEVEL_WARNING
;
1000 curp
->wlevel
= MANDOCLEVEL_ERROR
;
1003 curp
->wlevel
= MANDOCLEVEL_FATAL
;
1006 fprintf(stderr
, "-W%s: Bad argument\n", o
);
1015 mmsg(enum mandocerr t
, void *arg
, int ln
, int col
, const char *msg
)
1017 struct curparse
*cp
;
1018 enum mandoclevel level
;
1020 level
= MANDOCLEVEL_FATAL
;
1021 while (t
< mandoclimits
[level
])
1025 cp
= (struct curparse
*)arg
;
1026 if (level
< cp
->wlevel
)
1029 fprintf(stderr
, "%s:%d:%d: %s: %s",
1030 cp
->file
, ln
, col
+ 1, mandoclevels
[level
], mandocerrs
[t
]);
1032 fprintf(stderr
, ": %s", msg
);
1033 fputc('\n', stderr
);
1035 if (file_status
< level
)
1036 file_status
= level
;
1038 return(level
< MANDOCLEVEL_FATAL
);