]>
git.cameronkatri.com Git - mandoc.git/blob - main.c
1 /* $Id: main.c,v 1.104 2010/08/20 08:13:43 schwarze Exp $ */
3 * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
44 #define UNCONST(a) ((void *)(uintptr_t)(const void *)(a))
46 /* FIXME: Intel's compiler? LLVM? pcc? */
48 #if !defined(__GNUC__) || (__GNUC__ < 2)
50 # define __attribute__(x)
52 #endif /* !defined(__GNUC__) || (__GNUC__ < 2) */
54 typedef void (*out_mdoc
)(void *, const struct mdoc
*);
55 typedef void (*out_man
)(void *, const struct man
*);
56 typedef void (*out_free
)(void *);
80 const char *file
; /* Current parse. */
81 int fd
; /* Current parse. */
82 enum mandoclevel wlevel
; /* Ignore messages below this. */
83 int wstop
; /* Stop after a file with a warning. */
84 enum intt inttype
; /* which parser to use */
85 struct man
*man
; /* man parser */
86 struct mdoc
*mdoc
; /* mdoc parser */
87 struct roff
*roff
; /* roff parser (!NULL) */
88 struct regset regs
; /* roff registers */
89 enum outt outtype
; /* which output to use */
90 out_mdoc outmdoc
; /* mdoc output ptr */
91 out_man outman
; /* man output ptr */
92 out_free outfree
; /* free output ptr */
93 void *outdata
; /* data for output */
94 char outopts
[BUFSIZ
]; /* buf of output opts */
97 static const char * const mandoclevels
[MANDOCLEVEL_MAX
] = {
107 static const enum mandocerr mandoclimits
[MANDOCLEVEL_MAX
] = {
117 static const char * const mandocerrs
[MANDOCERR_MAX
] = {
122 "text should be uppercase",
123 "sections out of conventional order",
124 "section name repeats",
125 "out of order prologue",
126 "repeated prologue entry",
127 "list type must come first",
130 "tab in non-literal context",
131 "bad escape sequence",
132 "unterminated quoted string",
133 "argument requires the width argument",
134 "superfluous width argument",
137 "bad width argument",
138 "unknown manual section",
139 "section not in conventional manual section",
140 "end of line whitespace",
141 "blocks badly nested",
142 "scope open on exit",
146 "NAME section must come first",
148 "child violates parent syntax",
150 "list type repeated",
151 "display type repeated",
153 "manual name not yet set",
154 "obsolete macro ignored",
155 "empty macro ignored",
156 "macro not allowed in body",
157 "macro not allowed in prologue",
159 "bad NAME section contents",
161 "no text in this context",
163 "unknown macro will be lost",
165 "argument count wrong",
166 "request scope close w/none open",
167 "scope already open",
168 "macro requires line argument(s)",
169 "macro requires body argument(s)",
170 "macro requires argument(s)",
171 "no title in document",
173 "missing display type",
175 "line argument(s) will be lost",
176 "body argument(s) will be lost",
178 "generic fatal error",
180 "column syntax is inconsistent",
181 "displays may not be nested",
182 "unsupported display type",
183 "blocks badly nested",
184 "no such block is open",
185 "scope broken, syntax violated",
186 "line scope broken, syntax violated",
187 "argument count wrong, violates syntax",
188 "child violates parent syntax",
189 "argument count wrong, violates syntax",
191 "no document prologue",
192 "utsname system call failed",
193 "static buffer exhausted",
196 static void fdesc(struct curparse
*);
197 static void ffile(const char *, struct curparse
*);
198 static int moptions(enum intt
*, char *);
199 static int mmsg(enum mandocerr
, void *,
200 int, int, const char *);
201 static void pset(const char *, int, struct curparse
*,
202 struct man
**, struct mdoc
**);
203 static int toptions(struct curparse
*, char *);
204 static void usage(void) __attribute__((noreturn
));
205 static void version(void) __attribute__((noreturn
));
206 static int woptions(struct curparse
*, char *);
208 static const char *progname
;
209 static enum mandoclevel exit_status
= MANDOCLEVEL_OK
;
212 main(int argc
, char *argv
[])
215 struct curparse curp
;
217 progname
= strrchr(argv
[0], '/');
218 if (progname
== NULL
)
223 memset(&curp
, 0, sizeof(struct curparse
));
225 curp
.inttype
= INTT_AUTO
;
226 curp
.outtype
= OUTT_ASCII
;
227 curp
.wlevel
= MANDOCLEVEL_FATAL
;
230 while (-1 != (c
= getopt(argc
, argv
, "m:O:T:VW:")))
233 if ( ! moptions(&curp
.inttype
, optarg
))
234 return(MANDOCLEVEL_BADARG
);
237 (void)strlcat(curp
.outopts
, optarg
, BUFSIZ
);
238 (void)strlcat(curp
.outopts
, ",", BUFSIZ
);
241 if ( ! toptions(&curp
, optarg
))
242 return(MANDOCLEVEL_BADARG
);
245 if ( ! woptions(&curp
, optarg
))
246 return(MANDOCLEVEL_BADARG
);
260 curp
.file
= "<stdin>";
261 curp
.fd
= STDIN_FILENO
;
268 if (MANDOCLEVEL_OK
!= exit_status
&& curp
.wstop
)
274 (*curp
.outfree
)(curp
.outdata
);
276 mdoc_free(curp
.mdoc
);
280 roff_free(curp
.roff
);
290 (void)printf("%s %s\n", progname
, VERSION
);
291 exit(MANDOCLEVEL_OK
);
299 (void)fprintf(stderr
, "usage: %s [-V] [-foption] "
300 "[-mformat] [-Ooption] [-Toutput] "
301 "[-Werr] [file...]\n", progname
);
302 exit(MANDOCLEVEL_BADARG
);
307 ffile(const char *file
, struct curparse
*curp
)
311 if (-1 == (curp
->fd
= open(curp
->file
, O_RDONLY
, 0))) {
313 exit_status
= MANDOCLEVEL_SYSERR
;
319 if (-1 == close(curp
->fd
))
325 resize_buf(struct buf
*buf
, size_t initial
)
328 buf
->sz
= buf
->sz
? 2 * buf
->sz
: initial
;
329 buf
->buf
= realloc(buf
->buf
, buf
->sz
);
330 if (NULL
== buf
->buf
) {
332 exit(MANDOCLEVEL_SYSERR
);
338 read_whole_file(struct curparse
*curp
, struct buf
*fb
, int *with_mmap
)
344 if (-1 == fstat(curp
->fd
, &st
)) {
350 * If we're a regular file, try just reading in the whole entry
351 * via mmap(). This is faster than reading it into blocks, and
352 * since each file is only a few bytes to begin with, I'm not
353 * concerned that this is going to tank any machines.
356 if (S_ISREG(st
.st_mode
)) {
357 if (st
.st_size
>= (1U << 31)) {
358 fprintf(stderr
, "%s: input too large\n",
363 fb
->sz
= (size_t)st
.st_size
;
364 fb
->buf
= mmap(NULL
, fb
->sz
, PROT_READ
,
365 MAP_FILE
|MAP_SHARED
, curp
->fd
, 0);
366 if (fb
->buf
!= MAP_FAILED
)
371 * If this isn't a regular file (like, say, stdin), then we must
372 * go the old way and just read things in bit by bit.
381 if (fb
->sz
== (1U << 31)) {
382 fprintf(stderr
, "%s: input too large\n",
386 resize_buf(fb
, 65536);
388 ssz
= read(curp
->fd
, fb
->buf
+ (int)off
, fb
->sz
- off
);
407 fdesc(struct curparse
*curp
)
410 int i
, pos
, lnn
, lnn_start
, with_mmap
, of
;
421 memset(&ln
, 0, sizeof(struct buf
));
424 * Two buffers: ln and buf. buf is the input file and may be
425 * memory mapped. ln is a line buffer and grows on-demand.
428 if ( ! read_whole_file(curp
, &blk
, &with_mmap
)) {
429 exit_status
= MANDOCLEVEL_SYSERR
;
433 if (NULL
== curp
->roff
)
434 curp
->roff
= roff_alloc(&curp
->regs
, curp
, mmsg
);
438 for (i
= 0, lnn
= 1; i
< (int)blk
.sz
;) {
441 while (i
< (int)blk
.sz
) {
442 if ('\n' == blk
.buf
[i
]) {
449 * Warn about bogus characters. If you're using
450 * non-ASCII encoding, you're screwing your
451 * readers. Since I'd rather this not happen,
452 * I'll be helpful and drop these characters so
453 * we don't display gibberish. Note to manual
454 * writers: use special characters.
457 c
= (unsigned char) blk
.buf
[i
];
458 if ( ! (isascii(c
) && (isgraph(c
) || isblank(c
)))) {
459 mmsg(MANDOCERR_BADCHAR
, curp
,
460 lnn_start
, pos
, "ignoring byte");
465 /* Trailing backslash is like a plain character. */
466 if ('\\' != blk
.buf
[i
] || i
+ 1 == (int)blk
.sz
) {
467 if (pos
>= (int)ln
.sz
)
468 resize_buf(&ln
, 256);
469 ln
.buf
[pos
++] = blk
.buf
[i
++];
472 /* Found an escape and at least one other character. */
473 if ('\n' == blk
.buf
[i
+ 1]) {
474 /* Escaped newlines are skipped over */
479 if ('"' == blk
.buf
[i
+ 1]) {
481 /* Comment, skip to end of line */
482 for (; i
< (int)blk
.sz
; ++i
) {
483 if ('\n' == blk
.buf
[i
]) {
489 /* Backout trailing whitespaces */
490 for (; pos
> 0; --pos
) {
491 if (ln
.buf
[pos
- 1] != ' ')
493 if (pos
> 2 && ln
.buf
[pos
- 2] == '\\')
498 /* Some other escape sequence, copy and continue. */
499 if (pos
+ 1 >= (int)ln
.sz
)
500 resize_buf(&ln
, 256);
502 ln
.buf
[pos
++] = blk
.buf
[i
++];
503 ln
.buf
[pos
++] = blk
.buf
[i
++];
506 if (pos
>= (int)ln
.sz
)
507 resize_buf(&ln
, 256);
511 * A significant amount of complexity is contained by
512 * the roff preprocessor. It's line-oriented but can be
513 * expressed on one line, so we need at times to
514 * readjust our starting point and re-run it. The roff
515 * preprocessor can also readjust the buffers with new
516 * data, so we pass them in wholesale.
521 re
= roff_parseln(roff
, lnn_start
,
522 &ln
.buf
, &ln
.sz
, of
, &of
);
523 } while (ROFF_RERUN
== re
);
525 if (ROFF_IGN
== re
) {
527 } else if (ROFF_ERR
== re
) {
528 assert(MANDOCLEVEL_FATAL
<= exit_status
);
533 * If input parsers have not been allocated, do so now.
534 * We keep these instanced betwen parsers, but set them
535 * locally per parse routine since we can use different
536 * parsers with each one.
539 if ( ! (man
|| mdoc
))
540 pset(ln
.buf
+ of
, pos
- of
, curp
, &man
, &mdoc
);
542 /* Lastly, push down into the parsers themselves. */
544 if (man
&& ! man_parseln(man
, lnn_start
, ln
.buf
, of
)) {
545 assert(MANDOCLEVEL_FATAL
<= exit_status
);
548 if (mdoc
&& ! mdoc_parseln(mdoc
, lnn_start
, ln
.buf
, of
)) {
549 assert(MANDOCLEVEL_FATAL
<= exit_status
);
554 /* NOTE a parser may not have been assigned, yet. */
556 if ( ! (man
|| mdoc
)) {
557 fprintf(stderr
, "%s: Not a manual\n", curp
->file
);
558 exit_status
= MANDOCLEVEL_FATAL
;
562 /* Clean up the parse routine ASTs. */
564 if (mdoc
&& ! mdoc_endparse(mdoc
)) {
565 assert(MANDOCLEVEL_FATAL
<= exit_status
);
568 if (man
&& ! man_endparse(man
)) {
569 assert(MANDOCLEVEL_FATAL
<= exit_status
);
572 if (roff
&& ! roff_endparse(roff
)) {
573 assert(MANDOCLEVEL_FATAL
<= exit_status
);
578 * With -Wstop and warnings or errors of at least
579 * the requested level, do not produce output.
582 if (MANDOCLEVEL_OK
!= exit_status
&& curp
->wstop
)
585 /* If unset, allocate output dev now (if applicable). */
587 if ( ! (curp
->outman
&& curp
->outmdoc
)) {
588 switch (curp
->outtype
) {
590 curp
->outdata
= xhtml_alloc(curp
->outopts
);
593 curp
->outdata
= html_alloc(curp
->outopts
);
596 curp
->outdata
= ascii_alloc(curp
->outopts
);
597 curp
->outfree
= ascii_free
;
600 curp
->outdata
= pdf_alloc(curp
->outopts
);
601 curp
->outfree
= pspdf_free
;
604 curp
->outdata
= ps_alloc(curp
->outopts
);
605 curp
->outfree
= pspdf_free
;
611 switch (curp
->outtype
) {
615 curp
->outman
= html_man
;
616 curp
->outmdoc
= html_mdoc
;
617 curp
->outfree
= html_free
;
620 curp
->outman
= tree_man
;
621 curp
->outmdoc
= tree_mdoc
;
628 curp
->outman
= terminal_man
;
629 curp
->outmdoc
= terminal_mdoc
;
636 /* Execute the out device, if it exists. */
638 if (man
&& curp
->outman
)
639 (*curp
->outman
)(curp
->outdata
, man
);
640 if (mdoc
&& curp
->outmdoc
)
641 (*curp
->outmdoc
)(curp
->outdata
, mdoc
);
644 memset(&curp
->regs
, 0, sizeof(struct regset
));
654 munmap(blk
.buf
, blk
.sz
);
663 pset(const char *buf
, int pos
, struct curparse
*curp
,
664 struct man
**man
, struct mdoc
**mdoc
)
669 * Try to intuit which kind of manual parser should be used. If
670 * passed in by command-line (-man, -mdoc), then use that
671 * explicitly. If passed as -mandoc, then try to guess from the
672 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
673 * default to -man, which is more lenient.
676 if ('.' == buf
[0] || '\'' == buf
[0]) {
677 for (i
= 1; buf
[i
]; i
++)
678 if (' ' != buf
[i
] && '\t' != buf
[i
])
684 switch (curp
->inttype
) {
686 if (NULL
== curp
->mdoc
)
687 curp
->mdoc
= mdoc_alloc(&curp
->regs
, curp
, mmsg
);
692 if (NULL
== curp
->man
)
693 curp
->man
= man_alloc(&curp
->regs
, curp
, mmsg
);
701 if (pos
>= 3 && 0 == memcmp(buf
, ".Dd", 3)) {
702 if (NULL
== curp
->mdoc
)
703 curp
->mdoc
= mdoc_alloc(&curp
->regs
, curp
, mmsg
);
709 if (NULL
== curp
->man
)
710 curp
->man
= man_alloc(&curp
->regs
, curp
, mmsg
);
717 moptions(enum intt
*tflags
, char *arg
)
720 if (0 == strcmp(arg
, "doc"))
722 else if (0 == strcmp(arg
, "andoc"))
724 else if (0 == strcmp(arg
, "an"))
727 fprintf(stderr
, "%s: Bad argument\n", arg
);
736 toptions(struct curparse
*curp
, char *arg
)
739 if (0 == strcmp(arg
, "ascii"))
740 curp
->outtype
= OUTT_ASCII
;
741 else if (0 == strcmp(arg
, "lint")) {
742 curp
->outtype
= OUTT_LINT
;
743 curp
->wlevel
= MANDOCLEVEL_WARNING
;
745 else if (0 == strcmp(arg
, "tree"))
746 curp
->outtype
= OUTT_TREE
;
747 else if (0 == strcmp(arg
, "html"))
748 curp
->outtype
= OUTT_HTML
;
749 else if (0 == strcmp(arg
, "xhtml"))
750 curp
->outtype
= OUTT_XHTML
;
751 else if (0 == strcmp(arg
, "ps"))
752 curp
->outtype
= OUTT_PS
;
753 else if (0 == strcmp(arg
, "pdf"))
754 curp
->outtype
= OUTT_PDF
;
756 fprintf(stderr
, "%s: Bad argument\n", arg
);
765 woptions(struct curparse
*curp
, char *arg
)
779 switch (getsubopt(&arg
, UNCONST(toks
), &v
)) {
786 curp
->wlevel
= MANDOCLEVEL_WARNING
;
789 curp
->wlevel
= MANDOCLEVEL_ERROR
;
792 curp
->wlevel
= MANDOCLEVEL_FATAL
;
795 fprintf(stderr
, "-W%s: Bad argument\n", o
);
805 mmsg(enum mandocerr t
, void *arg
, int ln
, int col
, const char *msg
)
808 enum mandoclevel level
;
810 level
= MANDOCLEVEL_FATAL
;
811 while (t
< mandoclimits
[level
])
814 cp
= (struct curparse
*)arg
;
815 if (level
< cp
->wlevel
)
818 fprintf(stderr
, "%s:%d:%d: %s: %s",
819 cp
->file
, ln
, col
+ 1, mandoclevels
[level
], mandocerrs
[t
]);
821 fprintf(stderr
, ": %s", msg
);
824 if (exit_status
< level
)
827 return(level
< MANDOCLEVEL_FATAL
);