]>
git.cameronkatri.com Git - mandoc.git/blob - main.c
ee51348e11317d787851379372fad25e35fbfb6d
1 /* $Id: main.c,v 1.111 2010/12/01 15:09:01 kristaps Exp $ */
3 * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
44 #define UNCONST(a) ((void *)(uintptr_t)(const void *)(a))
46 /* FIXME: Intel's compiler? LLVM? pcc? */
48 #if !defined(__GNUC__) || (__GNUC__ < 2)
50 # define __attribute__(x)
52 #endif /* !defined(__GNUC__) || (__GNUC__ < 2) */
54 typedef void (*out_mdoc
)(void *, const struct mdoc
*);
55 typedef void (*out_man
)(void *, const struct man
*);
56 typedef void (*out_free
)(void *);
80 const char *file
; /* Current parse. */
81 int fd
; /* Current parse. */
82 int line
; /* Line number in the file. */
83 enum mandoclevel wlevel
; /* Ignore messages below this. */
84 int wstop
; /* Stop after a file with a warning. */
85 enum intt inttype
; /* which parser to use */
86 struct man
*man
; /* man parser */
87 struct mdoc
*mdoc
; /* mdoc parser */
88 struct roff
*roff
; /* roff parser (!NULL) */
89 struct regset regs
; /* roff registers */
90 enum outt outtype
; /* which output to use */
91 out_mdoc outmdoc
; /* mdoc output ptr */
92 out_man outman
; /* man output ptr */
93 out_free outfree
; /* free output ptr */
94 void *outdata
; /* data for output */
95 char outopts
[BUFSIZ
]; /* buf of output opts */
98 static const char * const mandoclevels
[MANDOCLEVEL_MAX
] = {
108 static const enum mandocerr mandoclimits
[MANDOCLEVEL_MAX
] = {
118 static const char * const mandocerrs
[MANDOCERR_MAX
] = {
123 "text should be uppercase",
124 "sections out of conventional order",
125 "section name repeats",
126 "out of order prologue",
127 "repeated prologue entry",
128 "list type must come first",
129 "tab in non-literal context",
130 "bad escape sequence",
131 "unterminated quoted string",
132 "argument requires the width argument",
133 "superfluous width argument",
135 "bad width argument",
136 "unknown manual section",
137 "section not in conventional manual section",
138 "end of line whitespace",
139 "blocks badly nested",
143 "NAME section must come first",
145 "child violates parent syntax",
148 "list type repeated",
149 "display type repeated",
152 "manual name not yet set",
153 "obsolete macro ignored",
154 "empty macro ignored",
155 "macro not allowed in body",
156 "macro not allowed in prologue",
158 "bad NAME section contents",
160 "no text in this context",
162 "unknown macro will be lost",
163 "NOT IMPLEMENTED: skipping request",
165 "argument count wrong",
166 "request scope close w/none open",
167 "scope already open",
168 "scope open on exit",
169 "macro requires line argument(s)",
170 "macro requires body argument(s)",
171 "macro requires argument(s)",
172 "no title in document",
174 "missing display type",
176 "line argument(s) will be lost",
177 "body argument(s) will be lost",
178 "paragraph macro ignored",
180 "generic fatal error",
182 "column syntax is inconsistent",
183 "displays may not be nested",
184 "unsupported display type",
185 "blocks badly nested",
186 "no such block is open",
187 "line scope broken, syntax violated",
188 "argument count wrong, violates syntax",
189 "child violates parent syntax",
190 "argument count wrong, violates syntax",
192 "no document prologue",
193 "utsname system call failed",
194 "static buffer exhausted",
197 static void parsebuf(struct curparse
*, struct buf
, int);
198 static void pdesc(struct curparse
*);
199 static void fdesc(struct curparse
*);
200 static void ffile(const char *, struct curparse
*);
201 static int pfile(const char *, struct curparse
*);
202 static int moptions(enum intt
*, char *);
203 static int mmsg(enum mandocerr
, void *,
204 int, int, const char *);
205 static void pset(const char *, int, struct curparse
*,
206 struct man
**, struct mdoc
**);
207 static int toptions(struct curparse
*, char *);
208 static void usage(void) __attribute__((noreturn
));
209 static void version(void) __attribute__((noreturn
));
210 static int woptions(struct curparse
*, char *);
212 static const char *progname
;
213 static enum mandoclevel exit_status
= MANDOCLEVEL_OK
;
216 main(int argc
, char *argv
[])
219 struct curparse curp
;
221 progname
= strrchr(argv
[0], '/');
222 if (progname
== NULL
)
227 memset(&curp
, 0, sizeof(struct curparse
));
229 curp
.inttype
= INTT_AUTO
;
230 curp
.outtype
= OUTT_ASCII
;
231 curp
.wlevel
= MANDOCLEVEL_FATAL
;
234 while (-1 != (c
= getopt(argc
, argv
, "m:O:T:VW:")))
237 if ( ! moptions(&curp
.inttype
, optarg
))
238 return((int)MANDOCLEVEL_BADARG
);
241 (void)strlcat(curp
.outopts
, optarg
, BUFSIZ
);
242 (void)strlcat(curp
.outopts
, ",", BUFSIZ
);
245 if ( ! toptions(&curp
, optarg
))
246 return((int)MANDOCLEVEL_BADARG
);
249 if ( ! woptions(&curp
, optarg
))
250 return((int)MANDOCLEVEL_BADARG
);
264 curp
.file
= "<stdin>";
265 curp
.fd
= STDIN_FILENO
;
272 if (MANDOCLEVEL_OK
!= exit_status
&& curp
.wstop
)
278 (*curp
.outfree
)(curp
.outdata
);
280 mdoc_free(curp
.mdoc
);
284 roff_free(curp
.roff
);
286 return((int)exit_status
);
294 (void)printf("%s %s\n", progname
, VERSION
);
295 exit((int)MANDOCLEVEL_OK
);
303 (void)fprintf(stderr
, "usage: %s [-V] [-foption] "
304 "[-mformat] [-Ooption] [-Toutput] "
305 "[-Werr] [file...]\n", progname
);
306 exit((int)MANDOCLEVEL_BADARG
);
311 ffile(const char *file
, struct curparse
*curp
)
315 if (-1 == (curp
->fd
= open(curp
->file
, O_RDONLY
, 0))) {
317 exit_status
= MANDOCLEVEL_SYSERR
;
323 if (-1 == close(curp
->fd
))
328 pfile(const char *file
, struct curparse
*curp
)
330 const char *savefile
;
333 if (-1 == (fd
= open(file
, O_RDONLY
, 0))) {
335 exit_status
= MANDOCLEVEL_SYSERR
;
339 savefile
= curp
->file
;
347 curp
->file
= savefile
;
353 return(MANDOCLEVEL_FATAL
> exit_status
? 1 : 0);
358 resize_buf(struct buf
*buf
, size_t initial
)
361 buf
->sz
= buf
->sz
? 2 * buf
->sz
: initial
;
362 buf
->buf
= realloc(buf
->buf
, buf
->sz
);
363 if (NULL
== buf
->buf
) {
365 exit((int)MANDOCLEVEL_SYSERR
);
371 read_whole_file(struct curparse
*curp
, struct buf
*fb
, int *with_mmap
)
377 if (-1 == fstat(curp
->fd
, &st
)) {
383 * If we're a regular file, try just reading in the whole entry
384 * via mmap(). This is faster than reading it into blocks, and
385 * since each file is only a few bytes to begin with, I'm not
386 * concerned that this is going to tank any machines.
389 if (S_ISREG(st
.st_mode
)) {
390 if (st
.st_size
>= (1U << 31)) {
391 fprintf(stderr
, "%s: input too large\n",
396 fb
->sz
= (size_t)st
.st_size
;
397 fb
->buf
= mmap(NULL
, fb
->sz
, PROT_READ
,
398 MAP_FILE
|MAP_SHARED
, curp
->fd
, 0);
399 if (fb
->buf
!= MAP_FAILED
)
404 * If this isn't a regular file (like, say, stdin), then we must
405 * go the old way and just read things in bit by bit.
414 if (fb
->sz
== (1U << 31)) {
415 fprintf(stderr
, "%s: input too large\n",
419 resize_buf(fb
, 65536);
421 ssz
= read(curp
->fd
, fb
->buf
+ (int)off
, fb
->sz
- off
);
440 fdesc(struct curparse
*curp
)
452 if (MANDOCLEVEL_FATAL
<= exit_status
)
455 /* NOTE a parser may not have been assigned, yet. */
457 if ( ! (man
|| mdoc
)) {
458 fprintf(stderr
, "%s: Not a manual\n", curp
->file
);
459 exit_status
= MANDOCLEVEL_FATAL
;
463 /* Clean up the parse routine ASTs. */
465 if (mdoc
&& ! mdoc_endparse(mdoc
)) {
466 assert(MANDOCLEVEL_FATAL
<= exit_status
);
469 if (man
&& ! man_endparse(man
)) {
470 assert(MANDOCLEVEL_FATAL
<= exit_status
);
473 if (roff
&& ! roff_endparse(roff
)) {
474 assert(MANDOCLEVEL_FATAL
<= exit_status
);
479 * With -Wstop and warnings or errors of at least
480 * the requested level, do not produce output.
483 if (MANDOCLEVEL_OK
!= exit_status
&& curp
->wstop
)
486 /* If unset, allocate output dev now (if applicable). */
488 if ( ! (curp
->outman
&& curp
->outmdoc
)) {
489 switch (curp
->outtype
) {
491 curp
->outdata
= xhtml_alloc(curp
->outopts
);
494 curp
->outdata
= html_alloc(curp
->outopts
);
497 curp
->outdata
= ascii_alloc(curp
->outopts
);
498 curp
->outfree
= ascii_free
;
501 curp
->outdata
= pdf_alloc(curp
->outopts
);
502 curp
->outfree
= pspdf_free
;
505 curp
->outdata
= ps_alloc(curp
->outopts
);
506 curp
->outfree
= pspdf_free
;
512 switch (curp
->outtype
) {
516 curp
->outman
= html_man
;
517 curp
->outmdoc
= html_mdoc
;
518 curp
->outfree
= html_free
;
521 curp
->outman
= tree_man
;
522 curp
->outmdoc
= tree_mdoc
;
529 curp
->outman
= terminal_man
;
530 curp
->outmdoc
= terminal_mdoc
;
537 /* Execute the out device, if it exists. */
539 if (man
&& curp
->outman
)
540 (*curp
->outman
)(curp
->outdata
, man
);
541 if (mdoc
&& curp
->outmdoc
)
542 (*curp
->outmdoc
)(curp
->outdata
, mdoc
);
545 memset(&curp
->regs
, 0, sizeof(struct regset
));
558 pdesc(struct curparse
*curp
)
563 if ( ! read_whole_file(curp
, &blk
, &with_mmap
)) {
564 exit_status
= MANDOCLEVEL_SYSERR
;
568 if (NULL
== curp
->roff
)
569 curp
->roff
= roff_alloc(&curp
->regs
, curp
, mmsg
);
573 parsebuf(curp
, blk
, 1);
576 munmap(blk
.buf
, blk
.sz
);
582 parsebuf(struct curparse
*curp
, struct buf blk
, int start
)
595 memset(&ln
, 0, sizeof(struct buf
));
597 lnn
= curp
->line
; /* line number in the real file */
598 pos
= 0; /* byte number in the ln buffer */
600 for (i
= 0; i
< (int)blk
.sz
;) {
601 if (0 == pos
&& '\0' == blk
.buf
[i
])
606 while (i
< (int)blk
.sz
&& (start
|| '\0' != blk
.buf
[i
])) {
607 if ('\n' == blk
.buf
[i
]) {
614 * Warn about bogus characters. If you're using
615 * non-ASCII encoding, you're screwing your
616 * readers. Since I'd rather this not happen,
617 * I'll be helpful and drop these characters so
618 * we don't display gibberish. Note to manual
619 * writers: use special characters.
622 c
= (unsigned char) blk
.buf
[i
];
623 if ( ! (isascii(c
) && (isgraph(c
) || isblank(c
)))) {
624 mmsg(MANDOCERR_BADCHAR
, curp
,
625 curp
->line
, pos
, "ignoring byte");
630 /* Trailing backslash is like a plain character. */
631 if ('\\' != blk
.buf
[i
] || i
+ 1 == (int)blk
.sz
) {
632 if (pos
>= (int)ln
.sz
)
633 resize_buf(&ln
, 256);
634 ln
.buf
[pos
++] = blk
.buf
[i
++];
637 /* Found an escape and at least one other character. */
638 if ('\n' == blk
.buf
[i
+ 1]) {
639 /* Escaped newlines are skipped over */
644 if ('"' == blk
.buf
[i
+ 1]) {
646 /* Comment, skip to end of line */
647 for (; i
< (int)blk
.sz
; ++i
) {
648 if ('\n' == blk
.buf
[i
]) {
654 /* Backout trailing whitespaces */
655 for (; pos
> 0; --pos
) {
656 if (ln
.buf
[pos
- 1] != ' ')
658 if (pos
> 2 && ln
.buf
[pos
- 2] == '\\')
663 /* Some other escape sequence, copy and continue. */
664 if (pos
+ 1 >= (int)ln
.sz
)
665 resize_buf(&ln
, 256);
667 ln
.buf
[pos
++] = blk
.buf
[i
++];
668 ln
.buf
[pos
++] = blk
.buf
[i
++];
671 if (pos
>= (int)ln
.sz
)
672 resize_buf(&ln
, 256);
676 * A significant amount of complexity is contained by
677 * the roff preprocessor. It's line-oriented but can be
678 * expressed on one line, so we need at times to
679 * readjust our starting point and re-run it. The roff
680 * preprocessor can also readjust the buffers with new
681 * data, so we pass them in wholesale.
686 switch (roff_parseln(roff
, curp
->line
, &ln
.buf
, &ln
.sz
,
689 parsebuf(curp
, ln
, 0);
693 pos
= strlen(ln
.buf
);
701 assert(MANDOCLEVEL_FATAL
<= exit_status
);
704 if (pfile(ln
.buf
+ of
, curp
)) {
714 * If input parsers have not been allocated, do so now.
715 * We keep these instanced betwen parsers, but set them
716 * locally per parse routine since we can use different
717 * parsers with each one.
720 if ( ! (man
|| mdoc
))
721 pset(ln
.buf
+ of
, pos
- of
, curp
, &man
, &mdoc
);
723 /* Lastly, push down into the parsers themselves. */
725 if (man
&& ! man_parseln(man
, curp
->line
, ln
.buf
, of
)) {
726 assert(MANDOCLEVEL_FATAL
<= exit_status
);
729 if (mdoc
&& ! mdoc_parseln(mdoc
, curp
->line
, ln
.buf
, of
)) {
730 assert(MANDOCLEVEL_FATAL
<= exit_status
);
734 /* Temporary buffers typically are not full. */
735 if (0 == start
&& '\0' == blk
.buf
[i
])
738 /* Start the next input line. */
747 pset(const char *buf
, int pos
, struct curparse
*curp
,
748 struct man
**man
, struct mdoc
**mdoc
)
753 * Try to intuit which kind of manual parser should be used. If
754 * passed in by command-line (-man, -mdoc), then use that
755 * explicitly. If passed as -mandoc, then try to guess from the
756 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
757 * default to -man, which is more lenient.
760 if ('.' == buf
[0] || '\'' == buf
[0]) {
761 for (i
= 1; buf
[i
]; i
++)
762 if (' ' != buf
[i
] && '\t' != buf
[i
])
768 switch (curp
->inttype
) {
770 if (NULL
== curp
->mdoc
)
771 curp
->mdoc
= mdoc_alloc(&curp
->regs
, curp
, mmsg
);
776 if (NULL
== curp
->man
)
777 curp
->man
= man_alloc(&curp
->regs
, curp
, mmsg
);
785 if (pos
>= 3 && 0 == memcmp(buf
, ".Dd", 3)) {
786 if (NULL
== curp
->mdoc
)
787 curp
->mdoc
= mdoc_alloc(&curp
->regs
, curp
, mmsg
);
793 if (NULL
== curp
->man
)
794 curp
->man
= man_alloc(&curp
->regs
, curp
, mmsg
);
801 moptions(enum intt
*tflags
, char *arg
)
804 if (0 == strcmp(arg
, "doc"))
806 else if (0 == strcmp(arg
, "andoc"))
808 else if (0 == strcmp(arg
, "an"))
811 fprintf(stderr
, "%s: Bad argument\n", arg
);
820 toptions(struct curparse
*curp
, char *arg
)
823 if (0 == strcmp(arg
, "ascii"))
824 curp
->outtype
= OUTT_ASCII
;
825 else if (0 == strcmp(arg
, "lint")) {
826 curp
->outtype
= OUTT_LINT
;
827 curp
->wlevel
= MANDOCLEVEL_WARNING
;
829 else if (0 == strcmp(arg
, "tree"))
830 curp
->outtype
= OUTT_TREE
;
831 else if (0 == strcmp(arg
, "html"))
832 curp
->outtype
= OUTT_HTML
;
833 else if (0 == strcmp(arg
, "xhtml"))
834 curp
->outtype
= OUTT_XHTML
;
835 else if (0 == strcmp(arg
, "ps"))
836 curp
->outtype
= OUTT_PS
;
837 else if (0 == strcmp(arg
, "pdf"))
838 curp
->outtype
= OUTT_PDF
;
840 fprintf(stderr
, "%s: Bad argument\n", arg
);
849 woptions(struct curparse
*curp
, char *arg
)
863 switch (getsubopt(&arg
, UNCONST(toks
), &v
)) {
870 curp
->wlevel
= MANDOCLEVEL_WARNING
;
873 curp
->wlevel
= MANDOCLEVEL_ERROR
;
876 curp
->wlevel
= MANDOCLEVEL_FATAL
;
879 fprintf(stderr
, "-W%s: Bad argument\n", o
);
889 mmsg(enum mandocerr t
, void *arg
, int ln
, int col
, const char *msg
)
892 enum mandoclevel level
;
894 level
= MANDOCLEVEL_FATAL
;
895 while (t
< mandoclimits
[level
])
899 cp
= (struct curparse
*)arg
;
900 if (level
< cp
->wlevel
)
903 fprintf(stderr
, "%s:%d:%d: %s: %s",
904 cp
->file
, ln
, col
+ 1, mandoclevels
[level
], mandocerrs
[t
]);
906 fprintf(stderr
, ": %s", msg
);
909 if (exit_status
< level
)
912 return(level
< MANDOCLEVEL_FATAL
);