]>
git.cameronkatri.com Git - mandoc.git/blob - main.c
2122eda07c0d33f8354b43f39c49ef5176e6e980
1 /* $Id: main.c,v 1.112 2010/12/01 16:28:23 kristaps Exp $ */
3 * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
44 #define UNCONST(a) ((void *)(uintptr_t)(const void *)(a))
46 /* FIXME: Intel's compiler? LLVM? pcc? */
48 #if !defined(__GNUC__) || (__GNUC__ < 2)
50 # define __attribute__(x)
52 #endif /* !defined(__GNUC__) || (__GNUC__ < 2) */
54 typedef void (*out_mdoc
)(void *, const struct mdoc
*);
55 typedef void (*out_man
)(void *, const struct man
*);
56 typedef void (*out_free
)(void *);
80 const char *file
; /* Current parse. */
81 int fd
; /* Current parse. */
82 int line
; /* Line number in the file. */
83 enum mandoclevel wlevel
; /* Ignore messages below this. */
84 int wstop
; /* Stop after a file with a warning. */
85 enum intt inttype
; /* which parser to use */
86 struct man
*pman
; /* persistent man parser */
87 struct mdoc
*pmdoc
; /* persistent mdoc parser */
88 struct man
*man
; /* man parser */
89 struct mdoc
*mdoc
; /* mdoc parser */
90 struct roff
*roff
; /* roff parser (!NULL) */
91 struct regset regs
; /* roff registers */
92 enum outt outtype
; /* which output to use */
93 out_mdoc outmdoc
; /* mdoc output ptr */
94 out_man outman
; /* man output ptr */
95 out_free outfree
; /* free output ptr */
96 void *outdata
; /* data for output */
97 char outopts
[BUFSIZ
]; /* buf of output opts */
100 static const char * const mandoclevels
[MANDOCLEVEL_MAX
] = {
110 static const enum mandocerr mandoclimits
[MANDOCLEVEL_MAX
] = {
120 static const char * const mandocerrs
[MANDOCERR_MAX
] = {
125 "text should be uppercase",
126 "sections out of conventional order",
127 "section name repeats",
128 "out of order prologue",
129 "repeated prologue entry",
130 "list type must come first",
131 "tab in non-literal context",
132 "bad escape sequence",
133 "unterminated quoted string",
134 "argument requires the width argument",
135 "superfluous width argument",
137 "bad width argument",
138 "unknown manual section",
139 "section not in conventional manual section",
140 "end of line whitespace",
141 "blocks badly nested",
145 "NAME section must come first",
147 "child violates parent syntax",
150 "list type repeated",
151 "display type repeated",
154 "manual name not yet set",
155 "obsolete macro ignored",
156 "empty macro ignored",
157 "macro not allowed in body",
158 "macro not allowed in prologue",
160 "bad NAME section contents",
162 "no text in this context",
164 "unknown macro will be lost",
165 "NOT IMPLEMENTED: skipping request",
167 "argument count wrong",
168 "request scope close w/none open",
169 "scope already open",
170 "scope open on exit",
171 "macro requires line argument(s)",
172 "macro requires body argument(s)",
173 "macro requires argument(s)",
174 "no title in document",
176 "missing display type",
178 "line argument(s) will be lost",
179 "body argument(s) will be lost",
180 "paragraph macro ignored",
182 "generic fatal error",
184 "column syntax is inconsistent",
185 "displays may not be nested",
186 "unsupported display type",
187 "blocks badly nested",
188 "no such block is open",
189 "line scope broken, syntax violated",
190 "argument count wrong, violates syntax",
191 "child violates parent syntax",
192 "argument count wrong, violates syntax",
194 "no document prologue",
195 "utsname system call failed",
196 "static buffer exhausted",
199 static void parsebuf(struct curparse
*, struct buf
, int);
200 static void pdesc(struct curparse
*);
201 static void fdesc(struct curparse
*);
202 static void ffile(const char *, struct curparse
*);
203 static int pfile(const char *, struct curparse
*);
204 static int moptions(enum intt
*, char *);
205 static int mmsg(enum mandocerr
, void *,
206 int, int, const char *);
207 static void pset(const char *, int, struct curparse
*);
208 static int toptions(struct curparse
*, char *);
209 static void usage(void) __attribute__((noreturn
));
210 static void version(void) __attribute__((noreturn
));
211 static int woptions(struct curparse
*, char *);
213 static const char *progname
;
214 static enum mandoclevel exit_status
= MANDOCLEVEL_OK
;
217 main(int argc
, char *argv
[])
220 struct curparse curp
;
222 progname
= strrchr(argv
[0], '/');
223 if (progname
== NULL
)
228 memset(&curp
, 0, sizeof(struct curparse
));
230 curp
.inttype
= INTT_AUTO
;
231 curp
.outtype
= OUTT_ASCII
;
232 curp
.wlevel
= MANDOCLEVEL_FATAL
;
235 while (-1 != (c
= getopt(argc
, argv
, "m:O:T:VW:")))
238 if ( ! moptions(&curp
.inttype
, optarg
))
239 return((int)MANDOCLEVEL_BADARG
);
242 (void)strlcat(curp
.outopts
, optarg
, BUFSIZ
);
243 (void)strlcat(curp
.outopts
, ",", BUFSIZ
);
246 if ( ! toptions(&curp
, optarg
))
247 return((int)MANDOCLEVEL_BADARG
);
250 if ( ! woptions(&curp
, optarg
))
251 return((int)MANDOCLEVEL_BADARG
);
265 curp
.file
= "<stdin>";
266 curp
.fd
= STDIN_FILENO
;
273 if (MANDOCLEVEL_OK
!= exit_status
&& curp
.wstop
)
279 (*curp
.outfree
)(curp
.outdata
);
281 mdoc_free(curp
.pmdoc
);
285 roff_free(curp
.roff
);
287 return((int)exit_status
);
295 (void)printf("%s %s\n", progname
, VERSION
);
296 exit((int)MANDOCLEVEL_OK
);
304 (void)fprintf(stderr
, "usage: %s "
314 exit((int)MANDOCLEVEL_BADARG
);
318 ffile(const char *file
, struct curparse
*curp
)
322 * Called once per input file. Get the file ready for reading,
323 * pass it through to the parser-driver, then close it out.
324 * XXX: don't do anything special as this is only called for
325 * files; stdin goes directly to fdesc().
330 if (-1 == (curp
->fd
= open(curp
->file
, O_RDONLY
, 0))) {
332 exit_status
= MANDOCLEVEL_SYSERR
;
338 if (-1 == close(curp
->fd
))
343 pfile(const char *file
, struct curparse
*curp
)
345 const char *savefile
;
348 if (-1 == (fd
= open(file
, O_RDONLY
, 0))) {
350 exit_status
= MANDOCLEVEL_SYSERR
;
354 savefile
= curp
->file
;
362 curp
->file
= savefile
;
368 return(MANDOCLEVEL_FATAL
> exit_status
? 1 : 0);
373 resize_buf(struct buf
*buf
, size_t initial
)
376 buf
->sz
= buf
->sz
? 2 * buf
->sz
: initial
;
377 buf
->buf
= realloc(buf
->buf
, buf
->sz
);
378 if (NULL
== buf
->buf
) {
380 exit((int)MANDOCLEVEL_SYSERR
);
386 read_whole_file(struct curparse
*curp
, struct buf
*fb
, int *with_mmap
)
392 if (-1 == fstat(curp
->fd
, &st
)) {
398 * If we're a regular file, try just reading in the whole entry
399 * via mmap(). This is faster than reading it into blocks, and
400 * since each file is only a few bytes to begin with, I'm not
401 * concerned that this is going to tank any machines.
404 if (S_ISREG(st
.st_mode
)) {
405 if (st
.st_size
>= (1U << 31)) {
406 fprintf(stderr
, "%s: input too large\n",
411 fb
->sz
= (size_t)st
.st_size
;
412 fb
->buf
= mmap(NULL
, fb
->sz
, PROT_READ
,
413 MAP_FILE
|MAP_SHARED
, curp
->fd
, 0);
414 if (fb
->buf
!= MAP_FAILED
)
419 * If this isn't a regular file (like, say, stdin), then we must
420 * go the old way and just read things in bit by bit.
429 if (fb
->sz
== (1U << 31)) {
430 fprintf(stderr
, "%s: input too large\n",
434 resize_buf(fb
, 65536);
436 ssz
= read(curp
->fd
, fb
->buf
+ (int)off
, fb
->sz
- off
);
455 fdesc(struct curparse
*curp
)
459 * Called once per file with an opened file descriptor. All
460 * pre-file-parse operations (whether stdin or a file) should go
463 * This calls down into the nested parser, which drills down and
464 * fully parses a file and all its dependences (i.e., `so'). It
465 * then runs the cleanup validators and pushes to output.
468 /* Zero the parse type. */
473 /* Make sure the mandotory roff parser is initialised. */
475 if (NULL
== curp
->roff
) {
476 curp
->roff
= roff_alloc(&curp
->regs
, curp
, mmsg
);
480 /* Fully parse the file. */
484 if (MANDOCLEVEL_FATAL
<= exit_status
)
487 /* NOTE a parser may not have been assigned, yet. */
489 if ( ! (curp
->man
|| curp
->mdoc
)) {
490 fprintf(stderr
, "%s: Not a manual\n", curp
->file
);
491 exit_status
= MANDOCLEVEL_FATAL
;
495 /* Clean up the parse routine ASTs. */
497 if (curp
->mdoc
&& ! mdoc_endparse(curp
->mdoc
)) {
498 assert(MANDOCLEVEL_FATAL
<= exit_status
);
502 if (curp
->man
&& ! man_endparse(curp
->man
)) {
503 assert(MANDOCLEVEL_FATAL
<= exit_status
);
508 if ( ! roff_endparse(curp
->roff
)) {
509 assert(MANDOCLEVEL_FATAL
<= exit_status
);
514 * With -Wstop and warnings or errors of at least
515 * the requested level, do not produce output.
518 if (MANDOCLEVEL_OK
!= exit_status
&& curp
->wstop
)
521 /* If unset, allocate output dev now (if applicable). */
523 if ( ! (curp
->outman
&& curp
->outmdoc
)) {
524 switch (curp
->outtype
) {
526 curp
->outdata
= xhtml_alloc(curp
->outopts
);
529 curp
->outdata
= html_alloc(curp
->outopts
);
532 curp
->outdata
= ascii_alloc(curp
->outopts
);
533 curp
->outfree
= ascii_free
;
536 curp
->outdata
= pdf_alloc(curp
->outopts
);
537 curp
->outfree
= pspdf_free
;
540 curp
->outdata
= ps_alloc(curp
->outopts
);
541 curp
->outfree
= pspdf_free
;
547 switch (curp
->outtype
) {
551 curp
->outman
= html_man
;
552 curp
->outmdoc
= html_mdoc
;
553 curp
->outfree
= html_free
;
556 curp
->outman
= tree_man
;
557 curp
->outmdoc
= tree_mdoc
;
564 curp
->outman
= terminal_man
;
565 curp
->outmdoc
= terminal_mdoc
;
572 /* Execute the out device, if it exists. */
574 if (curp
->man
&& curp
->outman
)
575 (*curp
->outman
)(curp
->outdata
, curp
->man
);
576 if (curp
->mdoc
&& curp
->outmdoc
)
577 (*curp
->outmdoc
)(curp
->outdata
, curp
->mdoc
);
581 memset(&curp
->regs
, 0, sizeof(struct regset
));
583 /* Reset the current-parse compilers. */
586 mdoc_reset(curp
->mdoc
);
588 man_reset(curp
->man
);
591 roff_reset(curp
->roff
);
597 pdesc(struct curparse
*curp
)
603 * Run for each opened file; may be called more than once for
604 * each full parse sequence if the opened file is nested (i.e.,
605 * from `so'). Simply sucks in the whole file and moves into
606 * the parse phase for the file.
609 if ( ! read_whole_file(curp
, &blk
, &with_mmap
)) {
610 exit_status
= MANDOCLEVEL_SYSERR
;
614 /* Line number is per-file. */
618 parsebuf(curp
, blk
, 1);
621 munmap(blk
.buf
, blk
.sz
);
627 parsebuf(struct curparse
*curp
, struct buf blk
, int start
)
631 int pos
; /* byte number in the ln buffer */
632 int lnn
; /* line number in the real file */
636 * Main parse routine for an opened file. This is called for
637 * each opened file and simply loops around the full input file,
638 * possibly nesting (i.e., with `so').
641 memset(&ln
, 0, sizeof(struct buf
));
646 for (i
= 0; i
< (int)blk
.sz
; ) {
647 if (0 == pos
&& '\0' == blk
.buf
[i
])
653 while (i
< (int)blk
.sz
&& (start
|| '\0' != blk
.buf
[i
])) {
654 if ('\n' == blk
.buf
[i
]) {
661 * Warn about bogus characters. If you're using
662 * non-ASCII encoding, you're screwing your
663 * readers. Since I'd rather this not happen,
664 * I'll be helpful and drop these characters so
665 * we don't display gibberish. Note to manual
666 * writers: use special characters.
669 c
= (unsigned char) blk
.buf
[i
];
671 if ( ! (isascii(c
) &&
672 (isgraph(c
) || isblank(c
)))) {
673 mmsg(MANDOCERR_BADCHAR
, curp
,
674 curp
->line
, pos
, "ignoring byte");
679 /* Trailing backslash = a plain char. */
681 if ('\\' != blk
.buf
[i
] || i
+ 1 == (int)blk
.sz
) {
682 if (pos
>= (int)ln
.sz
)
683 resize_buf(&ln
, 256);
684 ln
.buf
[pos
++] = blk
.buf
[i
++];
688 /* Found escape & at least one other char. */
690 if ('\n' == blk
.buf
[i
+ 1]) {
692 /* Escaped newlines are skipped over */
697 if ('"' == blk
.buf
[i
+ 1]) {
699 /* Comment, skip to end of line */
700 for (; i
< (int)blk
.sz
; ++i
) {
701 if ('\n' == blk
.buf
[i
]) {
708 /* Backout trailing whitespaces */
709 for (; pos
> 0; --pos
) {
710 if (ln
.buf
[pos
- 1] != ' ')
712 if (pos
> 2 && ln
.buf
[pos
- 2] == '\\')
718 /* Some other escape sequence, copy & cont. */
720 if (pos
+ 1 >= (int)ln
.sz
)
721 resize_buf(&ln
, 256);
723 ln
.buf
[pos
++] = blk
.buf
[i
++];
724 ln
.buf
[pos
++] = blk
.buf
[i
++];
727 if (pos
>= (int)ln
.sz
)
728 resize_buf(&ln
, 256);
733 * A significant amount of complexity is contained by
734 * the roff preprocessor. It's line-oriented but can be
735 * expressed on one line, so we need at times to
736 * readjust our starting point and re-run it. The roff
737 * preprocessor can also readjust the buffers with new
738 * data, so we pass them in wholesale.
745 (curp
->roff
, curp
->line
,
746 &ln
.buf
, &ln
.sz
, of
, &of
);
750 parsebuf(curp
, ln
, 0);
754 pos
= strlen(ln
.buf
);
762 assert(MANDOCLEVEL_FATAL
<= exit_status
);
765 if (pfile(ln
.buf
+ of
, curp
)) {
775 * If input parsers have not been allocated, do so now.
776 * We keep these instanced betwen parsers, but set them
777 * locally per parse routine since we can use different
778 * parsers with each one.
781 if ( ! (curp
->man
|| curp
->mdoc
))
782 pset(ln
.buf
+ of
, pos
- of
, curp
);
785 * Lastly, push down into the parsers themselves. One
786 * of these will have already been set in the pset()
790 if (curp
->man
|| curp
->mdoc
) {
792 man_parseln(curp
->man
,
793 curp
->line
, ln
.buf
, of
) :
794 mdoc_parseln(curp
->mdoc
,
795 curp
->line
, ln
.buf
, of
);
798 assert(MANDOCLEVEL_FATAL
<= exit_status
);
803 /* Temporary buffers typically are not full. */
805 if (0 == start
&& '\0' == blk
.buf
[i
])
808 /* Start the next input line. */
817 pset(const char *buf
, int pos
, struct curparse
*curp
)
822 * Try to intuit which kind of manual parser should be used. If
823 * passed in by command-line (-man, -mdoc), then use that
824 * explicitly. If passed as -mandoc, then try to guess from the
825 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
826 * default to -man, which is more lenient.
828 * Separate out pmdoc/pman from mdoc/man: the first persists
829 * through all parsers, while the latter is used per-parse.
832 if ('.' == buf
[0] || '\'' == buf
[0]) {
833 for (i
= 1; buf
[i
]; i
++)
834 if (' ' != buf
[i
] && '\t' != buf
[i
])
840 switch (curp
->inttype
) {
842 if (NULL
== curp
->pmdoc
)
843 curp
->pmdoc
= mdoc_alloc
844 (&curp
->regs
, curp
, mmsg
);
846 curp
->mdoc
= curp
->pmdoc
;
849 if (NULL
== curp
->pman
)
850 curp
->pman
= man_alloc
851 (&curp
->regs
, curp
, mmsg
);
853 curp
->man
= curp
->pman
;
859 if (pos
>= 3 && 0 == memcmp(buf
, ".Dd", 3)) {
860 if (NULL
== curp
->pmdoc
)
861 curp
->pmdoc
= mdoc_alloc
862 (&curp
->regs
, curp
, mmsg
);
864 curp
->mdoc
= curp
->pmdoc
;
868 if (NULL
== curp
->pman
)
869 curp
->pman
= man_alloc(&curp
->regs
, curp
, mmsg
);
871 curp
->man
= curp
->pman
;
875 moptions(enum intt
*tflags
, char *arg
)
878 if (0 == strcmp(arg
, "doc"))
880 else if (0 == strcmp(arg
, "andoc"))
882 else if (0 == strcmp(arg
, "an"))
885 fprintf(stderr
, "%s: Bad argument\n", arg
);
893 toptions(struct curparse
*curp
, char *arg
)
896 if (0 == strcmp(arg
, "ascii"))
897 curp
->outtype
= OUTT_ASCII
;
898 else if (0 == strcmp(arg
, "lint")) {
899 curp
->outtype
= OUTT_LINT
;
900 curp
->wlevel
= MANDOCLEVEL_WARNING
;
902 else if (0 == strcmp(arg
, "tree"))
903 curp
->outtype
= OUTT_TREE
;
904 else if (0 == strcmp(arg
, "html"))
905 curp
->outtype
= OUTT_HTML
;
906 else if (0 == strcmp(arg
, "xhtml"))
907 curp
->outtype
= OUTT_XHTML
;
908 else if (0 == strcmp(arg
, "ps"))
909 curp
->outtype
= OUTT_PS
;
910 else if (0 == strcmp(arg
, "pdf"))
911 curp
->outtype
= OUTT_PDF
;
913 fprintf(stderr
, "%s: Bad argument\n", arg
);
921 woptions(struct curparse
*curp
, char *arg
)
935 switch (getsubopt(&arg
, UNCONST(toks
), &v
)) {
942 curp
->wlevel
= MANDOCLEVEL_WARNING
;
945 curp
->wlevel
= MANDOCLEVEL_ERROR
;
948 curp
->wlevel
= MANDOCLEVEL_FATAL
;
951 fprintf(stderr
, "-W%s: Bad argument\n", o
);
960 mmsg(enum mandocerr t
, void *arg
, int ln
, int col
, const char *msg
)
963 enum mandoclevel level
;
965 level
= MANDOCLEVEL_FATAL
;
966 while (t
< mandoclimits
[level
])
970 cp
= (struct curparse
*)arg
;
971 if (level
< cp
->wlevel
)
974 fprintf(stderr
, "%s:%d:%d: %s: %s",
975 cp
->file
, ln
, col
+ 1, mandoclevels
[level
], mandocerrs
[t
]);
977 fprintf(stderr
, ": %s", msg
);
980 if (exit_status
< level
)
983 return(level
< MANDOCLEVEL_FATAL
);