]>
git.cameronkatri.com Git - mandoc.git/blob - main.c
b8cb68e101288c9cc494556107b2fc7a6c2a445f
1 /* $Id: main.c,v 1.153 2011/03/17 12:08:01 kristaps Exp $ */
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
44 #define REPARSE_LIMIT 1000
46 /* FIXME: Intel's compiler? LLVM? pcc? */
48 #if !defined(__GNUC__) || (__GNUC__ < 2)
50 # define __attribute__(x)
52 #endif /* !defined(__GNUC__) || (__GNUC__ < 2) */
54 typedef void (*out_mdoc
)(void *, const struct mdoc
*);
55 typedef void (*out_man
)(void *, const struct man
*);
56 typedef void (*out_free
)(void *);
80 enum mandoclevel exit_status
; /* status of all file parses */
81 const char *file
; /* current file-name */
82 enum mandoclevel file_status
; /* error status of current parse */
83 int fd
; /* current file-descriptor */
84 int line
; /* line number in the file */
85 enum mandoclevel wlevel
; /* ignore messages below this */
86 int wstop
; /* stop after a file with a warning */
87 enum intt inttype
; /* which parser to use */
88 struct man
*pman
; /* persistent man parser */
89 struct mdoc
*pmdoc
; /* persistent mdoc parser */
90 struct man
*man
; /* man parser */
91 struct mdoc
*mdoc
; /* mdoc parser */
92 struct roff
*roff
; /* roff parser (!NULL) */
93 struct regset regs
; /* roff registers */
94 int reparse_count
; /* finite interpolation stack */
95 enum outt outtype
; /* which output to use */
96 out_mdoc outmdoc
; /* mdoc output ptr */
97 out_man outman
; /* man output ptr */
98 out_free outfree
; /* free output ptr */
99 void *outdata
; /* data for output */
100 char outopts
[BUFSIZ
]; /* buf of output opts */
103 static const char * const mandoclevels
[MANDOCLEVEL_MAX
] = {
113 static const enum mandocerr mandoclimits
[MANDOCLEVEL_MAX
] = {
123 static const char * const mandocerrs
[MANDOCERR_MAX
] = {
128 /* related to the prologue */
129 "no title in document",
130 "document title should be all caps",
131 "unknown manual section",
132 "date missing, using today's date",
133 "cannot parse date, using it verbatim",
134 "prologue macros out of order",
135 "duplicate prologue macro",
136 "macro not allowed in prologue",
137 "macro not allowed in body",
139 /* related to document structure */
140 ".so is fragile, better use ln(1)",
141 "NAME section must come first",
142 "bad NAME section contents",
143 "manual name not yet set",
144 "sections out of conventional order",
145 "duplicate section name",
146 "section not in conventional manual section",
148 /* related to macros and nesting */
149 "skipping obsolete macro",
150 "skipping paragraph macro",
151 "skipping no-space macro",
152 "blocks badly nested",
153 "child violates parent syntax",
154 "nested displays are not portable",
155 "already in literal mode",
157 /* related to missing macro arguments */
158 "skipping empty macro",
159 "argument count wrong",
160 "missing display type",
161 "list type must come first",
162 "tag lists require a width argument",
164 "skipping end of block that is not open",
166 /* related to bad macro arguments */
168 "duplicate argument",
169 "duplicate display type",
170 "duplicate list type",
171 "unknown AT&T UNIX version",
174 "unknown standard specifier",
175 "bad width argument",
177 /* related to plain text */
178 "blank line in non-literal context",
179 "tab in non-literal context",
180 "end of line whitespace",
182 "unknown escape sequence",
183 "unterminated quoted string",
187 /* related to tables */
191 "no table layout cells specified",
192 "no table data cells specified",
193 "ignore data in cell",
194 "data block still open",
195 "ignoring extra data cells",
197 "input stack limit exceeded, infinite loop?",
198 "skipping bad character",
199 "escaped character not allowed in a name",
200 "skipping text before the first section header",
201 "skipping unknown macro",
202 "NOT IMPLEMENTED, please use groff: skipping request",
204 "argument count wrong",
205 "skipping end of block that is not open",
206 "missing end of block",
207 "scope open on exit",
208 "uname(3) system call failed",
209 "macro requires line argument(s)",
210 "macro requires body argument(s)",
211 "macro requires argument(s)",
213 "line argument(s) will be lost",
214 "body argument(s) will be lost",
216 "generic fatal error",
218 "column syntax is inconsistent",
219 "NOT IMPLEMENTED: .Bd -file",
220 "line scope broken, syntax violated",
221 "argument count wrong, violates syntax",
222 "child violates parent syntax",
223 "argument count wrong, violates syntax",
224 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
226 "no document prologue",
227 "static buffer exhausted",
230 static void parsebuf(struct curparse
*, struct buf
, int);
231 static void pdesc(struct curparse
*);
232 static void fdesc(struct curparse
*);
233 static void ffile(const char *, struct curparse
*);
234 static int pfile(const char *, struct curparse
*);
235 static int moptions(enum intt
*, char *);
236 static void mmsg(enum mandocerr
, void *,
237 int, int, const char *);
238 static void pset(const char *, int, struct curparse
*);
239 static int toptions(struct curparse
*, char *);
240 static void usage(void) __attribute__((noreturn
));
241 static void version(void) __attribute__((noreturn
));
242 static int woptions(struct curparse
*, char *);
244 static const char *progname
;
247 main(int argc
, char *argv
[])
250 struct curparse curp
;
252 progname
= strrchr(argv
[0], '/');
253 if (progname
== NULL
)
258 memset(&curp
, 0, sizeof(struct curparse
));
260 curp
.inttype
= INTT_AUTO
;
261 curp
.outtype
= OUTT_ASCII
;
262 curp
.wlevel
= MANDOCLEVEL_FATAL
;
263 curp
.exit_status
= MANDOCLEVEL_OK
;
266 while (-1 != (c
= getopt(argc
, argv
, "m:O:T:VW:")))
269 if ( ! moptions(&curp
.inttype
, optarg
))
270 return((int)MANDOCLEVEL_BADARG
);
273 (void)strlcat(curp
.outopts
, optarg
, BUFSIZ
);
274 (void)strlcat(curp
.outopts
, ",", BUFSIZ
);
277 if ( ! toptions(&curp
, optarg
))
278 return((int)MANDOCLEVEL_BADARG
);
281 if ( ! woptions(&curp
, optarg
))
282 return((int)MANDOCLEVEL_BADARG
);
296 curp
.file
= "<stdin>";
297 curp
.fd
= STDIN_FILENO
;
304 if (MANDOCLEVEL_OK
!= curp
.exit_status
&& curp
.wstop
)
310 (*curp
.outfree
)(curp
.outdata
);
312 mdoc_free(curp
.pmdoc
);
316 roff_free(curp
.roff
);
318 return((int)curp
.exit_status
);
326 (void)printf("%s %s\n", progname
, VERSION
);
327 exit((int)MANDOCLEVEL_OK
);
335 (void)fprintf(stderr
, "usage: %s "
345 exit((int)MANDOCLEVEL_BADARG
);
349 ffile(const char *file
, struct curparse
*curp
)
353 * Called once per input file. Get the file ready for reading,
354 * pass it through to the parser-driver, then close it out.
355 * XXX: don't do anything special as this is only called for
356 * files; stdin goes directly to fdesc().
361 if (-1 == (curp
->fd
= open(curp
->file
, O_RDONLY
, 0))) {
363 curp
->exit_status
= MANDOCLEVEL_SYSERR
;
369 if (-1 == close(curp
->fd
))
374 pfile(const char *file
, struct curparse
*curp
)
376 const char *savefile
;
379 if (-1 == (fd
= open(file
, O_RDONLY
, 0))) {
381 curp
->file_status
= MANDOCLEVEL_SYSERR
;
385 savefile
= curp
->file
;
393 curp
->file
= savefile
;
399 return(MANDOCLEVEL_FATAL
> curp
->file_status
? 1 : 0);
404 resize_buf(struct buf
*buf
, size_t initial
)
407 buf
->sz
= buf
->sz
> initial
/2 ? 2 * buf
->sz
: initial
;
408 buf
->buf
= mandoc_realloc(buf
->buf
, buf
->sz
);
413 read_whole_file(struct curparse
*curp
, struct buf
*fb
, int *with_mmap
)
419 if (-1 == fstat(curp
->fd
, &st
)) {
425 * If we're a regular file, try just reading in the whole entry
426 * via mmap(). This is faster than reading it into blocks, and
427 * since each file is only a few bytes to begin with, I'm not
428 * concerned that this is going to tank any machines.
431 if (S_ISREG(st
.st_mode
)) {
432 if (st
.st_size
>= (1U << 31)) {
433 fprintf(stderr
, "%s: input too large\n",
438 fb
->sz
= (size_t)st
.st_size
;
439 fb
->buf
= mmap(NULL
, fb
->sz
, PROT_READ
,
440 MAP_FILE
|MAP_SHARED
, curp
->fd
, 0);
441 if (fb
->buf
!= MAP_FAILED
)
446 * If this isn't a regular file (like, say, stdin), then we must
447 * go the old way and just read things in bit by bit.
456 if (fb
->sz
== (1U << 31)) {
457 fprintf(stderr
, "%s: input too large\n",
461 resize_buf(fb
, 65536);
463 ssz
= read(curp
->fd
, fb
->buf
+ (int)off
, fb
->sz
- off
);
482 fdesc(struct curparse
*curp
)
486 * Called once per file with an opened file descriptor. All
487 * pre-file-parse operations (whether stdin or a file) should go
490 * This calls down into the nested parser, which drills down and
491 * fully parses a file and all its dependences (i.e., `so'). It
492 * then runs the cleanup validators and pushes to output.
495 /* Zero the parse type. */
499 curp
->file_status
= MANDOCLEVEL_OK
;
501 /* Make sure the mandotory roff parser is initialised. */
503 if (NULL
== curp
->roff
) {
504 curp
->roff
= roff_alloc(&curp
->regs
, curp
, mmsg
);
508 /* Fully parse the file. */
512 if (MANDOCLEVEL_FATAL
<= curp
->file_status
)
515 /* NOTE a parser may not have been assigned, yet. */
517 if ( ! (curp
->man
|| curp
->mdoc
)) {
518 fprintf(stderr
, "%s: Not a manual\n", curp
->file
);
519 curp
->file_status
= MANDOCLEVEL_FATAL
;
523 /* Clean up the parse routine ASTs. */
525 if (curp
->mdoc
&& ! mdoc_endparse(curp
->mdoc
)) {
526 assert(MANDOCLEVEL_FATAL
<= curp
->file_status
);
530 if (curp
->man
&& ! man_endparse(curp
->man
)) {
531 assert(MANDOCLEVEL_FATAL
<= curp
->file_status
);
536 roff_endparse(curp
->roff
);
539 * With -Wstop and warnings or errors of at least
540 * the requested level, do not produce output.
543 if (MANDOCLEVEL_OK
!= curp
->file_status
&& curp
->wstop
)
546 /* If unset, allocate output dev now (if applicable). */
548 if ( ! (curp
->outman
&& curp
->outmdoc
)) {
549 switch (curp
->outtype
) {
551 curp
->outdata
= xhtml_alloc(curp
->outopts
);
554 curp
->outdata
= html_alloc(curp
->outopts
);
557 curp
->outdata
= ascii_alloc(curp
->outopts
);
558 curp
->outfree
= ascii_free
;
561 curp
->outdata
= pdf_alloc(curp
->outopts
);
562 curp
->outfree
= pspdf_free
;
565 curp
->outdata
= ps_alloc(curp
->outopts
);
566 curp
->outfree
= pspdf_free
;
572 switch (curp
->outtype
) {
576 curp
->outman
= html_man
;
577 curp
->outmdoc
= html_mdoc
;
578 curp
->outfree
= html_free
;
581 curp
->outman
= tree_man
;
582 curp
->outmdoc
= tree_mdoc
;
589 curp
->outman
= terminal_man
;
590 curp
->outmdoc
= terminal_mdoc
;
597 /* Execute the out device, if it exists. */
599 if (curp
->man
&& curp
->outman
)
600 (*curp
->outman
)(curp
->outdata
, curp
->man
);
601 if (curp
->mdoc
&& curp
->outmdoc
)
602 (*curp
->outmdoc
)(curp
->outdata
, curp
->mdoc
);
606 memset(&curp
->regs
, 0, sizeof(struct regset
));
608 /* Reset the current-parse compilers. */
611 mdoc_reset(curp
->mdoc
);
613 man_reset(curp
->man
);
616 roff_reset(curp
->roff
);
618 if (curp
->exit_status
< curp
->file_status
)
619 curp
->exit_status
= curp
->file_status
;
625 pdesc(struct curparse
*curp
)
631 * Run for each opened file; may be called more than once for
632 * each full parse sequence if the opened file is nested (i.e.,
633 * from `so'). Simply sucks in the whole file and moves into
634 * the parse phase for the file.
637 if ( ! read_whole_file(curp
, &blk
, &with_mmap
)) {
638 curp
->file_status
= MANDOCLEVEL_SYSERR
;
642 /* Line number is per-file. */
646 parsebuf(curp
, blk
, 1);
649 munmap(blk
.buf
, blk
.sz
);
655 * Main parse routine for an opened file. This is called for each
656 * opened file and simply loops around the full input file, possibly
657 * nesting (i.e., with `so').
660 parsebuf(struct curparse
*curp
, struct buf blk
, int start
)
662 const struct tbl_span
*span
;
666 int pos
; /* byte number in the ln buffer */
667 int lnn
; /* line number in the real file */
670 memset(&ln
, 0, sizeof(struct buf
));
675 for (i
= 0; i
< (int)blk
.sz
; ) {
676 if (0 == pos
&& '\0' == blk
.buf
[i
])
681 curp
->reparse_count
= 0;
684 while (i
< (int)blk
.sz
&& (start
|| '\0' != blk
.buf
[i
])) {
687 * When finding an unescaped newline character,
688 * leave the character loop to process the line.
689 * Skip a preceding carriage return, if any.
692 if ('\r' == blk
.buf
[i
] && i
+ 1 < (int)blk
.sz
&&
693 '\n' == blk
.buf
[i
+ 1])
695 if ('\n' == blk
.buf
[i
]) {
702 * Warn about bogus characters. If you're using
703 * non-ASCII encoding, you're screwing your
704 * readers. Since I'd rather this not happen,
705 * I'll be helpful and drop these characters so
706 * we don't display gibberish. Note to manual
707 * writers: use special characters.
710 c
= (unsigned char) blk
.buf
[i
];
712 if ( ! (isascii(c
) &&
713 (isgraph(c
) || isblank(c
)))) {
714 mmsg(MANDOCERR_BADCHAR
, curp
,
715 curp
->line
, pos
, "ignoring byte");
720 /* Trailing backslash = a plain char. */
722 if ('\\' != blk
.buf
[i
] || i
+ 1 == (int)blk
.sz
) {
723 if (pos
>= (int)ln
.sz
)
724 resize_buf(&ln
, 256);
725 ln
.buf
[pos
++] = blk
.buf
[i
++];
730 * Found escape and at least one other character.
731 * When it's a newline character, skip it.
732 * When there is a carriage return in between,
733 * skip that one as well.
736 if ('\r' == blk
.buf
[i
+ 1] && i
+ 2 < (int)blk
.sz
&&
737 '\n' == blk
.buf
[i
+ 2])
739 if ('\n' == blk
.buf
[i
+ 1]) {
745 if ('"' == blk
.buf
[i
+ 1]) {
747 /* Comment, skip to end of line */
748 for (; i
< (int)blk
.sz
; ++i
) {
749 if ('\n' == blk
.buf
[i
]) {
756 /* Backout trailing whitespaces */
757 for (; pos
> 0; --pos
) {
758 if (ln
.buf
[pos
- 1] != ' ')
760 if (pos
> 2 && ln
.buf
[pos
- 2] == '\\')
766 /* Some other escape sequence, copy & cont. */
768 if (pos
+ 1 >= (int)ln
.sz
)
769 resize_buf(&ln
, 256);
771 ln
.buf
[pos
++] = blk
.buf
[i
++];
772 ln
.buf
[pos
++] = blk
.buf
[i
++];
775 if (pos
>= (int)ln
.sz
)
776 resize_buf(&ln
, 256);
781 * A significant amount of complexity is contained by
782 * the roff preprocessor. It's line-oriented but can be
783 * expressed on one line, so we need at times to
784 * readjust our starting point and re-run it. The roff
785 * preprocessor can also readjust the buffers with new
786 * data, so we pass them in wholesale.
793 (curp
->roff
, curp
->line
,
794 &ln
.buf
, &ln
.sz
, of
, &of
);
798 if (REPARSE_LIMIT
>= ++curp
->reparse_count
)
799 parsebuf(curp
, ln
, 0);
801 mmsg(MANDOCERR_ROFFLOOP
, curp
,
802 curp
->line
, pos
, NULL
);
806 pos
= (int)strlen(ln
.buf
);
814 assert(MANDOCLEVEL_FATAL
<= curp
->file_status
);
817 if (pfile(ln
.buf
+ of
, curp
)) {
827 * If we encounter errors in the recursive parsebuf()
828 * call, make sure we don't continue parsing.
831 if (MANDOCLEVEL_FATAL
<= curp
->file_status
)
835 * If input parsers have not been allocated, do so now.
836 * We keep these instanced betwen parsers, but set them
837 * locally per parse routine since we can use different
838 * parsers with each one.
841 if ( ! (curp
->man
|| curp
->mdoc
))
842 pset(ln
.buf
+ of
, pos
- of
, curp
);
845 * Lastly, push down into the parsers themselves. One
846 * of these will have already been set in the pset()
848 * If libroff returns ROFF_TBL, then add it to the
849 * currently open parse. Since we only get here if
850 * there does exist data (see tbl_data.c), we're
851 * guaranteed that something's been allocated.
852 * Do the same for ROFF_EQN.
858 while (NULL
!= (span
= roff_span(curp
->roff
))) {
860 man_addspan(curp
->man
, span
) :
861 mdoc_addspan(curp
->mdoc
, span
);
865 else if (ROFF_EQN
== rr
)
867 mdoc_addeqn(curp
->mdoc
,
868 roff_eqn(curp
->roff
)) :
869 man_addeqn(curp
->man
,
870 roff_eqn(curp
->roff
));
871 else if (curp
->man
|| curp
->mdoc
)
873 man_parseln(curp
->man
,
874 curp
->line
, ln
.buf
, of
) :
875 mdoc_parseln(curp
->mdoc
,
876 curp
->line
, ln
.buf
, of
);
879 assert(MANDOCLEVEL_FATAL
<= curp
->file_status
);
883 /* Temporary buffers typically are not full. */
885 if (0 == start
&& '\0' == blk
.buf
[i
])
888 /* Start the next input line. */
897 pset(const char *buf
, int pos
, struct curparse
*curp
)
902 * Try to intuit which kind of manual parser should be used. If
903 * passed in by command-line (-man, -mdoc), then use that
904 * explicitly. If passed as -mandoc, then try to guess from the
905 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
906 * default to -man, which is more lenient.
908 * Separate out pmdoc/pman from mdoc/man: the first persists
909 * through all parsers, while the latter is used per-parse.
912 if ('.' == buf
[0] || '\'' == buf
[0]) {
913 for (i
= 1; buf
[i
]; i
++)
914 if (' ' != buf
[i
] && '\t' != buf
[i
])
920 switch (curp
->inttype
) {
922 if (NULL
== curp
->pmdoc
)
923 curp
->pmdoc
= mdoc_alloc
924 (&curp
->regs
, curp
, mmsg
);
926 curp
->mdoc
= curp
->pmdoc
;
929 if (NULL
== curp
->pman
)
930 curp
->pman
= man_alloc
931 (&curp
->regs
, curp
, mmsg
);
933 curp
->man
= curp
->pman
;
939 if (pos
>= 3 && 0 == memcmp(buf
, ".Dd", 3)) {
940 if (NULL
== curp
->pmdoc
)
941 curp
->pmdoc
= mdoc_alloc
942 (&curp
->regs
, curp
, mmsg
);
944 curp
->mdoc
= curp
->pmdoc
;
948 if (NULL
== curp
->pman
)
949 curp
->pman
= man_alloc(&curp
->regs
, curp
, mmsg
);
951 curp
->man
= curp
->pman
;
955 moptions(enum intt
*tflags
, char *arg
)
958 if (0 == strcmp(arg
, "doc"))
960 else if (0 == strcmp(arg
, "andoc"))
962 else if (0 == strcmp(arg
, "an"))
965 fprintf(stderr
, "%s: Bad argument\n", arg
);
973 toptions(struct curparse
*curp
, char *arg
)
976 if (0 == strcmp(arg
, "ascii"))
977 curp
->outtype
= OUTT_ASCII
;
978 else if (0 == strcmp(arg
, "lint")) {
979 curp
->outtype
= OUTT_LINT
;
980 curp
->wlevel
= MANDOCLEVEL_WARNING
;
982 else if (0 == strcmp(arg
, "tree"))
983 curp
->outtype
= OUTT_TREE
;
984 else if (0 == strcmp(arg
, "html"))
985 curp
->outtype
= OUTT_HTML
;
986 else if (0 == strcmp(arg
, "xhtml"))
987 curp
->outtype
= OUTT_XHTML
;
988 else if (0 == strcmp(arg
, "ps"))
989 curp
->outtype
= OUTT_PS
;
990 else if (0 == strcmp(arg
, "pdf"))
991 curp
->outtype
= OUTT_PDF
;
993 fprintf(stderr
, "%s: Bad argument\n", arg
);
1001 woptions(struct curparse
*curp
, char *arg
)
1004 const char *toks
[6];
1008 toks
[2] = "warning";
1015 switch (getsubopt(&arg
, UNCONST(toks
), &v
)) {
1022 curp
->wlevel
= MANDOCLEVEL_WARNING
;
1025 curp
->wlevel
= MANDOCLEVEL_ERROR
;
1028 curp
->wlevel
= MANDOCLEVEL_FATAL
;
1031 fprintf(stderr
, "-W%s: Bad argument\n", o
);
1040 mmsg(enum mandocerr t
, void *arg
, int ln
, int col
, const char *msg
)
1042 struct curparse
*cp
;
1043 enum mandoclevel level
;
1045 level
= MANDOCLEVEL_FATAL
;
1046 while (t
< mandoclimits
[level
])
1050 cp
= (struct curparse
*)arg
;
1051 if (level
< cp
->wlevel
)
1054 fprintf(stderr
, "%s:%d:%d: %s: %s",
1055 cp
->file
, ln
, col
+ 1, mandoclevels
[level
], mandocerrs
[t
]);
1057 fprintf(stderr
, ": %s", msg
);
1058 fputc('\n', stderr
);
1060 if (cp
->file_status
< level
)
1061 cp
->file_status
= level
;