]>
git.cameronkatri.com Git - mandoc.git/blob - main.c
1 /* $Id: main.c,v 1.98 2010/07/07 15:04:54 kristaps Exp $ */
3 * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
39 #define UNCONST(a) ((void *)(uintptr_t)(const void *)(a))
41 /* FIXME: Intel's compiler? LLVM? pcc? */
43 #if !defined(__GNUC__) || (__GNUC__ < 2)
45 # define __attribute__(x)
47 #endif /* !defined(__GNUC__) || (__GNUC__ < 2) */
49 typedef void (*out_mdoc
)(void *, const struct mdoc
*);
50 typedef void (*out_man
)(void *, const struct man
*);
51 typedef void (*out_free
)(void *);
74 const char *file
; /* Current parse. */
75 int fd
; /* Current parse. */
77 /* FIXME: set by max error */
78 #define WARN_WALL (1 << 0) /* All-warnings mask. */
79 #define WARN_WERR (1 << 2) /* Warnings->errors. */
81 #define FL_IGN_SCOPE (1 << 0) /* Ignore scope errors. */
82 #define FL_NIGN_ESCAPE (1 << 1) /* Don't ignore bad escapes. */
83 #define FL_NIGN_MACRO (1 << 2) /* Don't ignore bad macros. */
84 #define FL_IGN_ERRORS (1 << 4) /* Ignore failed parse. */
85 #define FL_STRICT FL_NIGN_ESCAPE | \
86 FL_NIGN_MACRO /* ignore nothing */
87 enum intt inttype
; /* which parser to use */
88 struct man
*man
; /* man parser */
89 struct mdoc
*mdoc
; /* mdoc parser */
90 struct roff
*roff
; /* roff parser (!NULL) */
91 struct regset regs
; /* roff registers */
92 enum outt outtype
; /* which output to use */
93 out_mdoc outmdoc
; /* mdoc output ptr */
94 out_man outman
; /* man output ptr */
95 out_free outfree
; /* free output ptr */
96 void *outdata
; /* data for output */
97 char outopts
[BUFSIZ
]; /* buf of output opts */
100 static const char * const mandocerrs
[MANDOCERR_MAX
] = {
105 "text should be uppercase",
106 "sections out of conventional order",
107 "section name repeats",
108 "out of order prologue",
109 "repeated prologue entry",
110 "list type must come first",
113 "bad escape sequence",
114 "unterminated quoted string",
115 "argument requires the width argument",
116 "superfluous width argument",
119 "bad width argument",
120 "unknown manual section",
121 "section not in conventional manual section",
122 "end of line whitespace",
123 "blocks badly nested",
124 "scope open on exit",
128 "NAME section must come first",
130 "child violates parent syntax",
132 "list type repeated",
133 "display type repeated",
135 "manual name not yet set",
136 "obsolete macro ignored",
137 "empty macro ignored",
138 "macro not allowed in body",
139 "macro not allowed in prologue",
141 "bad NAME section contents",
143 "no text in this context",
145 "unknown macro will be lost",
147 "argument count wrong",
148 "request scope close w/none open",
149 "scope already open",
150 "macro requires line argument(s)",
151 "macro requires body argument(s)",
152 "macro requires argument(s)",
153 "no title in document",
155 "missing display type",
157 "line argument(s) will be lost",
158 "body argument(s) will be lost",
160 "generic fatal error",
162 "column syntax is inconsistent",
163 "displays may not be nested",
164 "unsupported display type",
165 "blocks badly nested",
166 "no such block is open",
167 "scope broken, syntax violated",
168 "line scope broken, syntax violated",
169 "argument count wrong, violates syntax",
170 "child violates parent syntax",
171 "argument count wrong, violates syntax",
173 "no document prologue",
174 "utsname system call failed",
178 static void fdesc(struct curparse
*);
179 static void ffile(const char *, struct curparse
*);
180 static int foptions(int *, char *);
181 static struct man
*man_init(struct curparse
*);
182 static struct mdoc
*mdoc_init(struct curparse
*);
183 static struct roff
*roff_init(struct curparse
*);
184 static int moptions(enum intt
*, char *);
185 static int mmsg(enum mandocerr
, void *,
186 int, int, const char *);
187 static int pset(const char *, int, struct curparse
*,
188 struct man
**, struct mdoc
**);
189 static int toptions(struct curparse
*, char *);
190 static void usage(void) __attribute__((noreturn
));
191 static void version(void) __attribute__((noreturn
));
192 static int woptions(int *, char *);
194 static const char *progname
;
195 static int with_fatal
;
196 static int with_error
;
199 main(int argc
, char *argv
[])
202 struct curparse curp
;
204 progname
= strrchr(argv
[0], '/');
205 if (progname
== NULL
)
210 memset(&curp
, 0, sizeof(struct curparse
));
212 curp
.inttype
= INTT_AUTO
;
213 curp
.outtype
= OUTT_ASCII
;
216 while (-1 != (c
= getopt(argc
, argv
, "f:m:O:T:VW:")))
219 if ( ! foptions(&curp
.fflags
, optarg
))
220 return(EXIT_FAILURE
);
223 if ( ! moptions(&curp
.inttype
, optarg
))
224 return(EXIT_FAILURE
);
227 (void)strlcat(curp
.outopts
, optarg
, BUFSIZ
);
228 (void)strlcat(curp
.outopts
, ",", BUFSIZ
);
231 if ( ! toptions(&curp
, optarg
))
232 return(EXIT_FAILURE
);
235 if ( ! woptions(&curp
.wflags
, optarg
))
236 return(EXIT_FAILURE
);
250 curp
.file
= "<stdin>";
251 curp
.fd
= STDIN_FILENO
;
259 if (with_fatal
&& !(curp
.fflags
& FL_IGN_ERRORS
))
265 (*curp
.outfree
)(curp
.outdata
);
267 mdoc_free(curp
.mdoc
);
271 roff_free(curp
.roff
);
273 return((with_fatal
|| with_error
) ?
274 EXIT_FAILURE
: EXIT_SUCCESS
);
282 (void)printf("%s %s\n", progname
, VERSION
);
291 (void)fprintf(stderr
, "usage: %s [-V] [-foption] "
292 "[-mformat] [-Ooption] [-Toutput] "
293 "[-Werr] [file...]\n", progname
);
299 man_init(struct curparse
*curp
)
303 /* Defaults from mandoc.1. */
305 pflags
= MAN_IGN_MACRO
| MAN_IGN_ESCAPE
;
307 if (curp
->fflags
& FL_NIGN_MACRO
)
308 pflags
&= ~MAN_IGN_MACRO
;
309 if (curp
->fflags
& FL_NIGN_ESCAPE
)
310 pflags
&= ~MAN_IGN_ESCAPE
;
312 return(man_alloc(&curp
->regs
, curp
, pflags
, mmsg
));
317 roff_init(struct curparse
*curp
)
320 return(roff_alloc(&curp
->regs
, mmsg
, curp
));
325 mdoc_init(struct curparse
*curp
)
329 /* Defaults from mandoc.1. */
331 pflags
= MDOC_IGN_MACRO
| MDOC_IGN_ESCAPE
;
333 if (curp
->fflags
& FL_IGN_SCOPE
)
334 pflags
|= MDOC_IGN_SCOPE
;
335 if (curp
->fflags
& FL_NIGN_ESCAPE
)
336 pflags
&= ~MDOC_IGN_ESCAPE
;
337 if (curp
->fflags
& FL_NIGN_MACRO
)
338 pflags
&= ~MDOC_IGN_MACRO
;
340 return(mdoc_alloc(&curp
->regs
, curp
, pflags
, mmsg
));
345 ffile(const char *file
, struct curparse
*curp
)
349 if (-1 == (curp
->fd
= open(curp
->file
, O_RDONLY
, 0))) {
357 if (-1 == close(curp
->fd
))
363 resize_buf(struct buf
*buf
, size_t initial
)
372 tmp
= realloc(buf
->buf
, sz
);
384 read_whole_file(struct curparse
*curp
, struct buf
*fb
, int *with_mmap
)
390 if (-1 == fstat(curp
->fd
, &st
)) {
397 * If we're a regular file, try just reading in the whole entry
398 * via mmap(). This is faster than reading it into blocks, and
399 * since each file is only a few bytes to begin with, I'm not
400 * concerned that this is going to tank any machines.
403 if (S_ISREG(st
.st_mode
)) {
404 if (st
.st_size
>= (1U << 31)) {
405 fprintf(stderr
, "%s: input too large\n",
411 fb
->sz
= (size_t)st
.st_size
;
412 fb
->buf
= mmap(NULL
, fb
->sz
, PROT_READ
,
413 MAP_FILE
|MAP_SHARED
, curp
->fd
, 0);
414 if (fb
->buf
!= MAP_FAILED
)
419 * If this isn't a regular file (like, say, stdin), then we must
420 * go the old way and just read things in bit by bit.
429 if (fb
->sz
== (1U << 31)) {
430 fprintf(stderr
, "%s: input too large\n",
434 if (! resize_buf(fb
, 65536))
437 ssz
= read(curp
->fd
, fb
->buf
+ (int)off
, fb
->sz
- off
);
457 fdesc(struct curparse
*curp
)
460 int i
, pos
, lnn
, lnn_start
, with_mmap
, of
;
470 memset(&ln
, 0, sizeof(struct buf
));
473 * Two buffers: ln and buf. buf is the input file and may be
474 * memory mapped. ln is a line buffer and grows on-demand.
477 if ( ! read_whole_file(curp
, &blk
, &with_mmap
))
480 if (NULL
== curp
->roff
)
481 curp
->roff
= roff_init(curp
);
482 if (NULL
== (roff
= curp
->roff
))
485 for (i
= 0, lnn
= 1; i
< (int)blk
.sz
;) {
488 while (i
< (int)blk
.sz
) {
489 if ('\n' == blk
.buf
[i
]) {
494 /* Trailing backslash is like a plain character. */
495 if ('\\' != blk
.buf
[i
] || i
+ 1 == (int)blk
.sz
) {
496 if (pos
>= (int)ln
.sz
)
497 if (! resize_buf(&ln
, 256))
499 ln
.buf
[pos
++] = blk
.buf
[i
++];
502 /* Found an escape and at least one other character. */
503 if ('\n' == blk
.buf
[i
+ 1]) {
504 /* Escaped newlines are skipped over */
509 if ('"' == blk
.buf
[i
+ 1]) {
511 /* Comment, skip to end of line */
512 for (; i
< (int)blk
.sz
; ++i
) {
513 if ('\n' == blk
.buf
[i
]) {
519 /* Backout trailing whitespaces */
520 for (; pos
> 0; --pos
) {
521 if (ln
.buf
[pos
- 1] != ' ')
523 if (pos
> 2 && ln
.buf
[pos
- 2] == '\\')
528 /* Some other escape sequence, copy and continue. */
529 if (pos
+ 1 >= (int)ln
.sz
)
530 if (! resize_buf(&ln
, 256))
533 ln
.buf
[pos
++] = blk
.buf
[i
++];
534 ln
.buf
[pos
++] = blk
.buf
[i
++];
537 if (pos
>= (int)ln
.sz
)
538 if (! resize_buf(&ln
, 256))
543 * A significant amount of complexity is contained by
544 * the roff preprocessor. It's line-oriented but can be
545 * expressed on one line, so we need at times to
546 * readjust our starting point and re-run it. The roff
547 * preprocessor can also readjust the buffers with new
548 * data, so we pass them in wholesale.
553 re
= roff_parseln(roff
, lnn_start
,
554 &ln
.buf
, &ln
.sz
, of
, &of
);
555 } while (ROFF_RERUN
== re
);
559 else if (ROFF_ERR
== re
)
563 * If input parsers have not been allocated, do so now.
564 * We keep these instanced betwen parsers, but set them
565 * locally per parse routine since we can use different
566 * parsers with each one.
569 if ( ! (man
|| mdoc
))
570 if ( ! pset(ln
.buf
+ of
, pos
- of
, curp
, &man
, &mdoc
))
573 /* Lastly, push down into the parsers themselves. */
575 if (man
&& ! man_parseln(man
, lnn_start
, ln
.buf
, of
))
577 if (mdoc
&& ! mdoc_parseln(mdoc
, lnn_start
, ln
.buf
, of
))
581 /* NOTE a parser may not have been assigned, yet. */
583 if ( ! (man
|| mdoc
)) {
584 fprintf(stderr
, "%s: Not a manual\n", curp
->file
);
588 /* Clean up the parse routine ASTs. */
590 if (mdoc
&& ! mdoc_endparse(mdoc
))
592 if (man
&& ! man_endparse(man
))
594 if (roff
&& ! roff_endparse(roff
))
597 /* If unset, allocate output dev now (if applicable). */
599 if ( ! (curp
->outman
&& curp
->outmdoc
)) {
600 switch (curp
->outtype
) {
602 curp
->outdata
= xhtml_alloc(curp
->outopts
);
605 curp
->outdata
= html_alloc(curp
->outopts
);
608 curp
->outdata
= ascii_alloc(curp
->outopts
);
609 curp
->outfree
= ascii_free
;
612 curp
->outdata
= ps_alloc(curp
->outopts
);
613 curp
->outfree
= ps_free
;
619 switch (curp
->outtype
) {
623 curp
->outman
= html_man
;
624 curp
->outmdoc
= html_mdoc
;
625 curp
->outfree
= html_free
;
628 curp
->outman
= tree_man
;
629 curp
->outmdoc
= tree_mdoc
;
634 curp
->outman
= terminal_man
;
635 curp
->outmdoc
= terminal_mdoc
;
642 /* Execute the out device, if it exists. */
644 if (man
&& curp
->outman
)
645 (*curp
->outman
)(curp
->outdata
, man
);
646 if (mdoc
&& curp
->outmdoc
)
647 (*curp
->outmdoc
)(curp
->outdata
, mdoc
);
650 memset(&curp
->regs
, 0, sizeof(struct regset
));
660 munmap(blk
.buf
, blk
.sz
);
673 pset(const char *buf
, int pos
, struct curparse
*curp
,
674 struct man
**man
, struct mdoc
**mdoc
)
679 * Try to intuit which kind of manual parser should be used. If
680 * passed in by command-line (-man, -mdoc), then use that
681 * explicitly. If passed as -mandoc, then try to guess from the
682 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
683 * default to -man, which is more lenient.
686 if ('.' == buf
[0] || '\'' == buf
[0]) {
687 for (i
= 1; buf
[i
]; i
++)
688 if (' ' != buf
[i
] && '\t' != buf
[i
])
694 switch (curp
->inttype
) {
696 if (NULL
== curp
->mdoc
)
697 curp
->mdoc
= mdoc_init(curp
);
698 if (NULL
== (*mdoc
= curp
->mdoc
))
702 if (NULL
== curp
->man
)
703 curp
->man
= man_init(curp
);
704 if (NULL
== (*man
= curp
->man
))
711 if (pos
>= 3 && 0 == memcmp(buf
, ".Dd", 3)) {
712 if (NULL
== curp
->mdoc
)
713 curp
->mdoc
= mdoc_init(curp
);
714 if (NULL
== (*mdoc
= curp
->mdoc
))
719 if (NULL
== curp
->man
)
720 curp
->man
= man_init(curp
);
721 if (NULL
== (*man
= curp
->man
))
728 moptions(enum intt
*tflags
, char *arg
)
731 if (0 == strcmp(arg
, "doc"))
733 else if (0 == strcmp(arg
, "andoc"))
735 else if (0 == strcmp(arg
, "an"))
738 fprintf(stderr
, "%s: Bad argument\n", arg
);
747 toptions(struct curparse
*curp
, char *arg
)
750 if (0 == strcmp(arg
, "ascii"))
751 curp
->outtype
= OUTT_ASCII
;
752 else if (0 == strcmp(arg
, "lint")) {
753 curp
->outtype
= OUTT_LINT
;
754 curp
->wflags
|= WARN_WALL
;
755 curp
->fflags
|= FL_STRICT
;
757 else if (0 == strcmp(arg
, "tree"))
758 curp
->outtype
= OUTT_TREE
;
759 else if (0 == strcmp(arg
, "html"))
760 curp
->outtype
= OUTT_HTML
;
761 else if (0 == strcmp(arg
, "xhtml"))
762 curp
->outtype
= OUTT_XHTML
;
763 else if (0 == strcmp(arg
, "ps"))
764 curp
->outtype
= OUTT_PS
;
766 fprintf(stderr
, "%s: Bad argument\n", arg
);
775 foptions(int *fflags
, char *arg
)
780 toks
[0] = "ign-scope";
781 toks
[1] = "no-ign-escape";
782 toks
[2] = "no-ign-macro";
783 toks
[3] = "ign-errors";
785 toks
[5] = "ign-escape";
790 switch (getsubopt(&arg
, UNCONST(toks
), &v
)) {
792 *fflags
|= FL_IGN_SCOPE
;
795 *fflags
|= FL_NIGN_ESCAPE
;
798 *fflags
|= FL_NIGN_MACRO
;
801 *fflags
|= FL_IGN_ERRORS
;
804 *fflags
|= FL_STRICT
;
807 *fflags
&= ~FL_NIGN_ESCAPE
;
810 fprintf(stderr
, "%s: Bad argument\n", o
);
820 woptions(int *wflags
, char *arg
)
831 switch (getsubopt(&arg
, UNCONST(toks
), &v
)) {
833 *wflags
|= WARN_WALL
;
836 *wflags
|= WARN_WERR
;
839 fprintf(stderr
, "%s: Bad argument\n", o
);
849 mmsg(enum mandocerr t
, void *arg
, int ln
, int col
, const char *msg
)
855 cp
= (struct curparse
*)arg
;
859 if (t
>= MANDOCERR_FATAL
) {
864 if ( ! (WARN_WALL
& cp
->wflags
))
866 if (t
>= MANDOCERR_ERROR
) {
870 if (WARN_WERR
& cp
->wflags
) {
876 fprintf(stderr
, "%s:%d:%d:", cp
->file
, ln
, col
+ 1);
878 fprintf(stderr
, " %s:", level
);
879 fprintf(stderr
, " %s", mandocerrs
[t
]);
881 fprintf(stderr
, ": %s", msg
);