]>
git.cameronkatri.com Git - mandoc.git/blob - main.c
e8031ae2e2b9ea7b3bd0409c95c2406bcca8fbd2
1 /* $Id: main.c,v 1.109 2010/11/29 15:45:15 kristaps Exp $ */
3 * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
44 #define UNCONST(a) ((void *)(uintptr_t)(const void *)(a))
46 /* FIXME: Intel's compiler? LLVM? pcc? */
48 #if !defined(__GNUC__) || (__GNUC__ < 2)
50 # define __attribute__(x)
52 #endif /* !defined(__GNUC__) || (__GNUC__ < 2) */
54 typedef void (*out_mdoc
)(void *, const struct mdoc
*);
55 typedef void (*out_man
)(void *, const struct man
*);
56 typedef void (*out_free
)(void *);
80 const char *file
; /* Current parse. */
81 int fd
; /* Current parse. */
82 enum mandoclevel wlevel
; /* Ignore messages below this. */
83 int wstop
; /* Stop after a file with a warning. */
84 enum intt inttype
; /* which parser to use */
85 struct man
*man
; /* man parser */
86 struct mdoc
*mdoc
; /* mdoc parser */
87 struct roff
*roff
; /* roff parser (!NULL) */
88 struct regset regs
; /* roff registers */
89 enum outt outtype
; /* which output to use */
90 out_mdoc outmdoc
; /* mdoc output ptr */
91 out_man outman
; /* man output ptr */
92 out_free outfree
; /* free output ptr */
93 void *outdata
; /* data for output */
94 char outopts
[BUFSIZ
]; /* buf of output opts */
97 static const char * const mandoclevels
[MANDOCLEVEL_MAX
] = {
107 static const enum mandocerr mandoclimits
[MANDOCLEVEL_MAX
] = {
117 static const char * const mandocerrs
[MANDOCERR_MAX
] = {
122 "text should be uppercase",
123 "sections out of conventional order",
124 "section name repeats",
125 "out of order prologue",
126 "repeated prologue entry",
127 "list type must come first",
128 "tab in non-literal context",
129 "bad escape sequence",
130 "unterminated quoted string",
131 "argument requires the width argument",
132 "superfluous width argument",
134 "bad width argument",
135 "unknown manual section",
136 "section not in conventional manual section",
137 "end of line whitespace",
138 "blocks badly nested",
142 "NAME section must come first",
144 "child violates parent syntax",
147 "list type repeated",
148 "display type repeated",
151 "manual name not yet set",
152 "obsolete macro ignored",
153 "empty macro ignored",
154 "macro not allowed in body",
155 "macro not allowed in prologue",
157 "bad NAME section contents",
159 "no text in this context",
161 "unknown macro will be lost",
163 "argument count wrong",
164 "request scope close w/none open",
165 "scope already open",
166 "scope open on exit",
167 "macro requires line argument(s)",
168 "macro requires body argument(s)",
169 "macro requires argument(s)",
170 "no title in document",
172 "missing display type",
174 "line argument(s) will be lost",
175 "body argument(s) will be lost",
176 "paragraph macro ignored",
178 "generic fatal error",
180 "column syntax is inconsistent",
181 "displays may not be nested",
182 "unsupported display type",
183 "blocks badly nested",
184 "no such block is open",
185 "line scope broken, syntax violated",
186 "argument count wrong, violates syntax",
187 "child violates parent syntax",
188 "argument count wrong, violates syntax",
190 "no document prologue",
191 "utsname system call failed",
192 "static buffer exhausted",
195 static void fdesc(struct curparse
*);
196 static void ffile(const char *, struct curparse
*);
197 static int moptions(enum intt
*, char *);
198 static int mmsg(enum mandocerr
, void *,
199 int, int, const char *);
200 static void pset(const char *, int, struct curparse
*,
201 struct man
**, struct mdoc
**);
202 static int toptions(struct curparse
*, char *);
203 static void usage(void) __attribute__((noreturn
));
204 static void version(void) __attribute__((noreturn
));
205 static int woptions(struct curparse
*, char *);
207 static const char *progname
;
208 static enum mandoclevel exit_status
= MANDOCLEVEL_OK
;
211 main(int argc
, char *argv
[])
214 struct curparse curp
;
216 progname
= strrchr(argv
[0], '/');
217 if (progname
== NULL
)
222 memset(&curp
, 0, sizeof(struct curparse
));
224 curp
.inttype
= INTT_AUTO
;
225 curp
.outtype
= OUTT_ASCII
;
226 curp
.wlevel
= MANDOCLEVEL_FATAL
;
229 while (-1 != (c
= getopt(argc
, argv
, "m:O:T:VW:")))
232 if ( ! moptions(&curp
.inttype
, optarg
))
233 return((int)MANDOCLEVEL_BADARG
);
236 (void)strlcat(curp
.outopts
, optarg
, BUFSIZ
);
237 (void)strlcat(curp
.outopts
, ",", BUFSIZ
);
240 if ( ! toptions(&curp
, optarg
))
241 return((int)MANDOCLEVEL_BADARG
);
244 if ( ! woptions(&curp
, optarg
))
245 return((int)MANDOCLEVEL_BADARG
);
259 curp
.file
= "<stdin>";
260 curp
.fd
= STDIN_FILENO
;
267 if (MANDOCLEVEL_OK
!= exit_status
&& curp
.wstop
)
273 (*curp
.outfree
)(curp
.outdata
);
275 mdoc_free(curp
.mdoc
);
279 roff_free(curp
.roff
);
281 return((int)exit_status
);
289 (void)printf("%s %s\n", progname
, VERSION
);
290 exit((int)MANDOCLEVEL_OK
);
298 (void)fprintf(stderr
, "usage: %s [-V] [-foption] "
299 "[-mformat] [-Ooption] [-Toutput] "
300 "[-Werr] [file...]\n", progname
);
301 exit((int)MANDOCLEVEL_BADARG
);
306 ffile(const char *file
, struct curparse
*curp
)
310 if (-1 == (curp
->fd
= open(curp
->file
, O_RDONLY
, 0))) {
312 exit_status
= MANDOCLEVEL_SYSERR
;
318 if (-1 == close(curp
->fd
))
324 resize_buf(struct buf
*buf
, size_t initial
)
327 buf
->sz
= buf
->sz
? 2 * buf
->sz
: initial
;
328 buf
->buf
= realloc(buf
->buf
, buf
->sz
);
329 if (NULL
== buf
->buf
) {
331 exit((int)MANDOCLEVEL_SYSERR
);
337 read_whole_file(struct curparse
*curp
, struct buf
*fb
, int *with_mmap
)
343 if (-1 == fstat(curp
->fd
, &st
)) {
349 * If we're a regular file, try just reading in the whole entry
350 * via mmap(). This is faster than reading it into blocks, and
351 * since each file is only a few bytes to begin with, I'm not
352 * concerned that this is going to tank any machines.
355 if (S_ISREG(st
.st_mode
)) {
356 if (st
.st_size
>= (1U << 31)) {
357 fprintf(stderr
, "%s: input too large\n",
362 fb
->sz
= (size_t)st
.st_size
;
363 fb
->buf
= mmap(NULL
, fb
->sz
, PROT_READ
,
364 MAP_FILE
|MAP_SHARED
, curp
->fd
, 0);
365 if (fb
->buf
!= MAP_FAILED
)
370 * If this isn't a regular file (like, say, stdin), then we must
371 * go the old way and just read things in bit by bit.
380 if (fb
->sz
== (1U << 31)) {
381 fprintf(stderr
, "%s: input too large\n",
385 resize_buf(fb
, 65536);
387 ssz
= read(curp
->fd
, fb
->buf
+ (int)off
, fb
->sz
- off
);
406 fdesc(struct curparse
*curp
)
409 int i
, pos
, lnn
, lnn_start
, with_mmap
, of
;
420 memset(&ln
, 0, sizeof(struct buf
));
423 * Two buffers: ln and buf. buf is the input file and may be
424 * memory mapped. ln is a line buffer and grows on-demand.
427 if ( ! read_whole_file(curp
, &blk
, &with_mmap
)) {
428 exit_status
= MANDOCLEVEL_SYSERR
;
432 if (NULL
== curp
->roff
)
433 curp
->roff
= roff_alloc(&curp
->regs
, curp
, mmsg
);
437 for (i
= 0, lnn
= 1; i
< (int)blk
.sz
;) {
440 while (i
< (int)blk
.sz
) {
441 if ('\n' == blk
.buf
[i
]) {
448 * Warn about bogus characters. If you're using
449 * non-ASCII encoding, you're screwing your
450 * readers. Since I'd rather this not happen,
451 * I'll be helpful and drop these characters so
452 * we don't display gibberish. Note to manual
453 * writers: use special characters.
456 c
= (unsigned char) blk
.buf
[i
];
457 if ( ! (isascii(c
) && (isgraph(c
) || isblank(c
)))) {
458 mmsg(MANDOCERR_BADCHAR
, curp
,
459 lnn_start
, pos
, "ignoring byte");
464 /* Trailing backslash is like a plain character. */
465 if ('\\' != blk
.buf
[i
] || i
+ 1 == (int)blk
.sz
) {
466 if (pos
>= (int)ln
.sz
)
467 resize_buf(&ln
, 256);
468 ln
.buf
[pos
++] = blk
.buf
[i
++];
471 /* Found an escape and at least one other character. */
472 if ('\n' == blk
.buf
[i
+ 1]) {
473 /* Escaped newlines are skipped over */
478 if ('"' == blk
.buf
[i
+ 1]) {
480 /* Comment, skip to end of line */
481 for (; i
< (int)blk
.sz
; ++i
) {
482 if ('\n' == blk
.buf
[i
]) {
488 /* Backout trailing whitespaces */
489 for (; pos
> 0; --pos
) {
490 if (ln
.buf
[pos
- 1] != ' ')
492 if (pos
> 2 && ln
.buf
[pos
- 2] == '\\')
497 /* Some other escape sequence, copy and continue. */
498 if (pos
+ 1 >= (int)ln
.sz
)
499 resize_buf(&ln
, 256);
501 ln
.buf
[pos
++] = blk
.buf
[i
++];
502 ln
.buf
[pos
++] = blk
.buf
[i
++];
505 if (pos
>= (int)ln
.sz
)
506 resize_buf(&ln
, 256);
510 * A significant amount of complexity is contained by
511 * the roff preprocessor. It's line-oriented but can be
512 * expressed on one line, so we need at times to
513 * readjust our starting point and re-run it. The roff
514 * preprocessor can also readjust the buffers with new
515 * data, so we pass them in wholesale.
520 re
= roff_parseln(roff
, lnn_start
,
521 &ln
.buf
, &ln
.sz
, of
, &of
);
522 } while (ROFF_RERUN
== re
);
524 if (ROFF_IGN
== re
) {
526 } else if (ROFF_ERR
== re
) {
527 assert(MANDOCLEVEL_FATAL
<= exit_status
);
532 * If input parsers have not been allocated, do so now.
533 * We keep these instanced betwen parsers, but set them
534 * locally per parse routine since we can use different
535 * parsers with each one.
538 if ( ! (man
|| mdoc
))
539 pset(ln
.buf
+ of
, pos
- of
, curp
, &man
, &mdoc
);
541 /* Lastly, push down into the parsers themselves. */
543 if (man
&& ! man_parseln(man
, lnn_start
, ln
.buf
, of
)) {
544 assert(MANDOCLEVEL_FATAL
<= exit_status
);
547 if (mdoc
&& ! mdoc_parseln(mdoc
, lnn_start
, ln
.buf
, of
)) {
548 assert(MANDOCLEVEL_FATAL
<= exit_status
);
553 /* NOTE a parser may not have been assigned, yet. */
555 if ( ! (man
|| mdoc
)) {
556 fprintf(stderr
, "%s: Not a manual\n", curp
->file
);
557 exit_status
= MANDOCLEVEL_FATAL
;
561 /* Clean up the parse routine ASTs. */
563 if (mdoc
&& ! mdoc_endparse(mdoc
)) {
564 assert(MANDOCLEVEL_FATAL
<= exit_status
);
567 if (man
&& ! man_endparse(man
)) {
568 assert(MANDOCLEVEL_FATAL
<= exit_status
);
571 if (roff
&& ! roff_endparse(roff
)) {
572 assert(MANDOCLEVEL_FATAL
<= exit_status
);
577 * With -Wstop and warnings or errors of at least
578 * the requested level, do not produce output.
581 if (MANDOCLEVEL_OK
!= exit_status
&& curp
->wstop
)
584 /* If unset, allocate output dev now (if applicable). */
586 if ( ! (curp
->outman
&& curp
->outmdoc
)) {
587 switch (curp
->outtype
) {
589 curp
->outdata
= xhtml_alloc(curp
->outopts
);
592 curp
->outdata
= html_alloc(curp
->outopts
);
595 curp
->outdata
= ascii_alloc(curp
->outopts
);
596 curp
->outfree
= ascii_free
;
599 curp
->outdata
= pdf_alloc(curp
->outopts
);
600 curp
->outfree
= pspdf_free
;
603 curp
->outdata
= ps_alloc(curp
->outopts
);
604 curp
->outfree
= pspdf_free
;
610 switch (curp
->outtype
) {
614 curp
->outman
= html_man
;
615 curp
->outmdoc
= html_mdoc
;
616 curp
->outfree
= html_free
;
619 curp
->outman
= tree_man
;
620 curp
->outmdoc
= tree_mdoc
;
627 curp
->outman
= terminal_man
;
628 curp
->outmdoc
= terminal_mdoc
;
635 /* Execute the out device, if it exists. */
637 if (man
&& curp
->outman
)
638 (*curp
->outman
)(curp
->outdata
, man
);
639 if (mdoc
&& curp
->outmdoc
)
640 (*curp
->outmdoc
)(curp
->outdata
, mdoc
);
643 memset(&curp
->regs
, 0, sizeof(struct regset
));
653 munmap(blk
.buf
, blk
.sz
);
662 pset(const char *buf
, int pos
, struct curparse
*curp
,
663 struct man
**man
, struct mdoc
**mdoc
)
668 * Try to intuit which kind of manual parser should be used. If
669 * passed in by command-line (-man, -mdoc), then use that
670 * explicitly. If passed as -mandoc, then try to guess from the
671 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
672 * default to -man, which is more lenient.
675 if ('.' == buf
[0] || '\'' == buf
[0]) {
676 for (i
= 1; buf
[i
]; i
++)
677 if (' ' != buf
[i
] && '\t' != buf
[i
])
683 switch (curp
->inttype
) {
685 if (NULL
== curp
->mdoc
)
686 curp
->mdoc
= mdoc_alloc(&curp
->regs
, curp
, mmsg
);
691 if (NULL
== curp
->man
)
692 curp
->man
= man_alloc(&curp
->regs
, curp
, mmsg
);
700 if (pos
>= 3 && 0 == memcmp(buf
, ".Dd", 3)) {
701 if (NULL
== curp
->mdoc
)
702 curp
->mdoc
= mdoc_alloc(&curp
->regs
, curp
, mmsg
);
708 if (NULL
== curp
->man
)
709 curp
->man
= man_alloc(&curp
->regs
, curp
, mmsg
);
716 moptions(enum intt
*tflags
, char *arg
)
719 if (0 == strcmp(arg
, "doc"))
721 else if (0 == strcmp(arg
, "andoc"))
723 else if (0 == strcmp(arg
, "an"))
726 fprintf(stderr
, "%s: Bad argument\n", arg
);
735 toptions(struct curparse
*curp
, char *arg
)
738 if (0 == strcmp(arg
, "ascii"))
739 curp
->outtype
= OUTT_ASCII
;
740 else if (0 == strcmp(arg
, "lint")) {
741 curp
->outtype
= OUTT_LINT
;
742 curp
->wlevel
= MANDOCLEVEL_WARNING
;
744 else if (0 == strcmp(arg
, "tree"))
745 curp
->outtype
= OUTT_TREE
;
746 else if (0 == strcmp(arg
, "html"))
747 curp
->outtype
= OUTT_HTML
;
748 else if (0 == strcmp(arg
, "xhtml"))
749 curp
->outtype
= OUTT_XHTML
;
750 else if (0 == strcmp(arg
, "ps"))
751 curp
->outtype
= OUTT_PS
;
752 else if (0 == strcmp(arg
, "pdf"))
753 curp
->outtype
= OUTT_PDF
;
755 fprintf(stderr
, "%s: Bad argument\n", arg
);
764 woptions(struct curparse
*curp
, char *arg
)
778 switch (getsubopt(&arg
, UNCONST(toks
), &v
)) {
785 curp
->wlevel
= MANDOCLEVEL_WARNING
;
788 curp
->wlevel
= MANDOCLEVEL_ERROR
;
791 curp
->wlevel
= MANDOCLEVEL_FATAL
;
794 fprintf(stderr
, "-W%s: Bad argument\n", o
);
804 mmsg(enum mandocerr t
, void *arg
, int ln
, int col
, const char *msg
)
807 enum mandoclevel level
;
809 level
= MANDOCLEVEL_FATAL
;
810 while (t
< mandoclimits
[level
])
814 cp
= (struct curparse
*)arg
;
815 if (level
< cp
->wlevel
)
818 fprintf(stderr
, "%s:%d:%d: %s: %s",
819 cp
->file
, ln
, col
+ 1, mandoclevels
[level
], mandocerrs
[t
]);
821 fprintf(stderr
, ": %s", msg
);
824 if (exit_status
< level
)
827 return(level
< MANDOCLEVEL_FATAL
);