X-Git-Url: https://git.cameronkatri.com/mandoc.git/blobdiff_plain/1c61a1b1241d9362e411865da7ea7cd895d7cc2f..5f164a6735403143d675639fe5dee257a943e09c:/main.c diff --git a/main.c b/main.c index 1664825d..5d15557f 100644 --- a/main.c +++ b/main.c @@ -1,7 +1,7 @@ -/* $Id: main.c,v 1.110 2010/12/01 10:31:34 kristaps Exp $ */ +/* $Id: main.c,v 1.166 2012/05/27 17:48:57 schwarze Exp $ */ /* - * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons - * Copyright (c) 2010 Ingo Schwarze + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons + * Copyright (c) 2010, 2011, 2012 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -19,12 +19,7 @@ #include "config.h" #endif -#include -#include - #include -#include -#include #include #include #include @@ -35,15 +30,6 @@ #include "main.h" #include "mdoc.h" #include "man.h" -#include "roff.h" - -#ifndef MAP_FILE -#define MAP_FILE 0 -#endif - -#define UNCONST(a) ((void *)(uintptr_t)(const void *)(a)) - -/* FIXME: Intel's compiler? LLVM? pcc? */ #if !defined(__GNUC__) || (__GNUC__ < 2) # if !defined(lint) @@ -55,37 +41,23 @@ typedef void (*out_mdoc)(void *, const struct mdoc *); typedef void (*out_man)(void *, const struct man *); typedef void (*out_free)(void *); -struct buf { - char *buf; - size_t sz; -}; - -enum intt { - INTT_AUTO, - INTT_MDOC, - INTT_MAN -}; - enum outt { - OUTT_ASCII = 0, - OUTT_TREE, - OUTT_HTML, - OUTT_XHTML, - OUTT_LINT, - OUTT_PS, - OUTT_PDF + OUTT_ASCII = 0, /* -Tascii */ + OUTT_LOCALE, /* -Tlocale */ + OUTT_UTF8, /* -Tutf8 */ + OUTT_TREE, /* -Ttree */ + OUTT_MAN, /* -Tman */ + OUTT_HTML, /* -Thtml */ + OUTT_XHTML, /* -Txhtml */ + OUTT_LINT, /* -Tlint */ + OUTT_PS, /* -Tps */ + OUTT_PDF /* -Tpdf */ }; struct curparse { - const char *file; /* Current parse. */ - int fd; /* Current parse. */ - enum mandoclevel wlevel; /* Ignore messages below this. */ - int wstop; /* Stop after a file with a warning. */ - enum intt inttype; /* which parser to use */ - struct man *man; /* man parser */ - struct mdoc *mdoc; /* mdoc parser */ - struct roff *roff; /* roff parser (!NULL) */ - struct regset regs; /* roff registers */ + struct mparse *mp; + enum mandoclevel wlevel; /* ignore messages below this */ + int wstop; /* stop after a file with a warning */ enum outt outtype; /* which output to use */ out_mdoc outmdoc; /* mdoc output ptr */ out_man outman; /* man output ptr */ @@ -94,125 +66,26 @@ struct curparse { char outopts[BUFSIZ]; /* buf of output opts */ }; -static const char * const mandoclevels[MANDOCLEVEL_MAX] = { - "SUCCESS", - "RESERVED", - "WARNING", - "ERROR", - "FATAL", - "BADARG", - "SYSERR" -}; - -static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = { - MANDOCERR_OK, - MANDOCERR_WARNING, - MANDOCERR_WARNING, - MANDOCERR_ERROR, - MANDOCERR_FATAL, - MANDOCERR_MAX, - MANDOCERR_MAX -}; - -static const char * const mandocerrs[MANDOCERR_MAX] = { - "ok", - - "generic warning", - - "text should be uppercase", - "sections out of conventional order", - "section name repeats", - "out of order prologue", - "repeated prologue entry", - "list type must come first", - "tab in non-literal context", - "bad escape sequence", - "unterminated quoted string", - "argument requires the width argument", - "superfluous width argument", - "bad date argument", - "bad width argument", - "unknown manual section", - "section not in conventional manual section", - "end of line whitespace", - "blocks badly nested", - - "generic error", - - "NAME section must come first", - "bad Boolean value", - "child violates parent syntax", - "bad AT&T symbol", - "bad standard", - "list type repeated", - "display type repeated", - "argument repeated", - "ignoring argument", - "manual name not yet set", - "obsolete macro ignored", - "empty macro ignored", - "macro not allowed in body", - "macro not allowed in prologue", - "bad character", - "bad NAME section contents", - "no blank lines", - "no text in this context", - "bad comment style", - "unknown macro will be lost", - "NOT IMPLEMENTED: skipping request", - "line scope broken", - "argument count wrong", - "request scope close w/none open", - "scope already open", - "scope open on exit", - "macro requires line argument(s)", - "macro requires body argument(s)", - "macro requires argument(s)", - "no title in document", - "missing list type", - "missing display type", - "missing font type", - "line argument(s) will be lost", - "body argument(s) will be lost", - "paragraph macro ignored", - - "generic fatal error", - - "column syntax is inconsistent", - "displays may not be nested", - "unsupported display type", - "blocks badly nested", - "no such block is open", - "line scope broken, syntax violated", - "argument count wrong, violates syntax", - "child violates parent syntax", - "argument count wrong, violates syntax", - "no document body", - "no document prologue", - "utsname system call failed", - "static buffer exhausted", -}; - -static void fdesc(struct curparse *); -static void ffile(const char *, struct curparse *); -static int moptions(enum intt *, char *); -static int mmsg(enum mandocerr, void *, - int, int, const char *); -static void pset(const char *, int, struct curparse *, - struct man **, struct mdoc **); +static int moptions(enum mparset *, char *); +static void mmsg(enum mandocerr, enum mandoclevel, + const char *, int, int, const char *); +static void parse(struct curparse *, int, + const char *, enum mandoclevel *); static int toptions(struct curparse *, char *); static void usage(void) __attribute__((noreturn)); static void version(void) __attribute__((noreturn)); static int woptions(struct curparse *, char *); static const char *progname; -static enum mandoclevel exit_status = MANDOCLEVEL_OK; int main(int argc, char *argv[]) { int c; struct curparse curp; + enum mparset type; + enum mandoclevel rc; + char *defos; progname = strrchr(argv[0], '/'); if (progname == NULL) @@ -222,15 +95,29 @@ main(int argc, char *argv[]) memset(&curp, 0, sizeof(struct curparse)); - curp.inttype = INTT_AUTO; + type = MPARSE_AUTO; curp.outtype = OUTT_ASCII; curp.wlevel = MANDOCLEVEL_FATAL; + defos = NULL; /* LINTED */ - while (-1 != (c = getopt(argc, argv, "m:O:T:VW:"))) + while (-1 != (c = getopt(argc, argv, "I:m:O:T:VW:"))) switch (c) { + case ('I'): + if (strncmp(optarg, "os=", 3)) { + fprintf(stderr, "-I%s: Bad argument\n", + optarg); + return((int)MANDOCLEVEL_BADARG); + } + if (defos) { + fprintf(stderr, "-I%s: Duplicate argument\n", + optarg); + return((int)MANDOCLEVEL_BADARG); + } + defos = mandoc_strdup(optarg + 3); + break; case ('m'): - if ( ! moptions(&curp.inttype, optarg)) + if ( ! moptions(&type, optarg)) return((int)MANDOCLEVEL_BADARG); break; case ('O'): @@ -253,333 +140,89 @@ main(int argc, char *argv[]) /* NOTREACHED */ } + curp.mp = mparse_alloc(type, curp.wlevel, mmsg, &curp, defos); + + /* + * Conditionally start up the lookaside buffer before parsing. + */ + if (OUTT_MAN == curp.outtype) + mparse_keep(curp.mp); + argc -= optind; argv += optind; - if (NULL == *argv) { - curp.file = ""; - curp.fd = STDIN_FILENO; + rc = MANDOCLEVEL_OK; - fdesc(&curp); - } + if (NULL == *argv) + parse(&curp, STDIN_FILENO, "", &rc); while (*argv) { - ffile(*argv, &curp); - if (MANDOCLEVEL_OK != exit_status && curp.wstop) + parse(&curp, -1, *argv, &rc); + if (MANDOCLEVEL_OK != rc && curp.wstop) break; ++argv; } if (curp.outfree) (*curp.outfree)(curp.outdata); - if (curp.mdoc) - mdoc_free(curp.mdoc); - if (curp.man) - man_free(curp.man); - if (curp.roff) - roff_free(curp.roff); - - return((int)exit_status); -} + if (curp.mp) + mparse_free(curp.mp); + free(defos); + return((int)rc); +} static void version(void) { - (void)printf("%s %s\n", progname, VERSION); + printf("%s %s\n", progname, VERSION); exit((int)MANDOCLEVEL_OK); } - static void usage(void) { - (void)fprintf(stderr, "usage: %s [-V] [-foption] " - "[-mformat] [-Ooption] [-Toutput] " - "[-Werr] [file...]\n", progname); - exit((int)MANDOCLEVEL_BADARG); -} - - -static void -ffile(const char *file, struct curparse *curp) -{ - - curp->file = file; - if (-1 == (curp->fd = open(curp->file, O_RDONLY, 0))) { - perror(curp->file); - exit_status = MANDOCLEVEL_SYSERR; - return; - } - - fdesc(curp); - - if (-1 == close(curp->fd)) - perror(curp->file); -} - - -static void -resize_buf(struct buf *buf, size_t initial) -{ - - buf->sz = buf->sz ? 2 * buf->sz : initial; - buf->buf = realloc(buf->buf, buf->sz); - if (NULL == buf->buf) { - perror(NULL); - exit((int)MANDOCLEVEL_SYSERR); - } -} - - -static int -read_whole_file(struct curparse *curp, struct buf *fb, int *with_mmap) -{ - struct stat st; - size_t off; - ssize_t ssz; - - if (-1 == fstat(curp->fd, &st)) { - perror(curp->file); - return(0); - } - - /* - * If we're a regular file, try just reading in the whole entry - * via mmap(). This is faster than reading it into blocks, and - * since each file is only a few bytes to begin with, I'm not - * concerned that this is going to tank any machines. - */ - - if (S_ISREG(st.st_mode)) { - if (st.st_size >= (1U << 31)) { - fprintf(stderr, "%s: input too large\n", - curp->file); - return(0); - } - *with_mmap = 1; - fb->sz = (size_t)st.st_size; - fb->buf = mmap(NULL, fb->sz, PROT_READ, - MAP_FILE|MAP_SHARED, curp->fd, 0); - if (fb->buf != MAP_FAILED) - return(1); - } - - /* - * If this isn't a regular file (like, say, stdin), then we must - * go the old way and just read things in bit by bit. - */ - - *with_mmap = 0; - off = 0; - fb->sz = 0; - fb->buf = NULL; - for (;;) { - if (off == fb->sz) { - if (fb->sz == (1U << 31)) { - fprintf(stderr, "%s: input too large\n", - curp->file); - break; - } - resize_buf(fb, 65536); - } - ssz = read(curp->fd, fb->buf + (int)off, fb->sz - off); - if (ssz == 0) { - fb->sz = off; - return(1); - } - if (ssz == -1) { - perror(curp->file); - break; - } - off += (size_t)ssz; - } + fprintf(stderr, "usage: %s " + "[-V] " + "[-foption] " + "[-mformat] " + "[-Ooption] " + "[-Toutput] " + "[-Wlevel] " + "[file...]\n", + progname); - free(fb->buf); - fb->buf = NULL; - return(0); + exit((int)MANDOCLEVEL_BADARG); } - static void -fdesc(struct curparse *curp) +parse(struct curparse *curp, int fd, + const char *file, enum mandoclevel *level) { - struct buf ln, blk; - int i, pos, lnn, lnn_start, with_mmap, of; - enum rofferr re; - unsigned char c; - struct man *man; - struct mdoc *mdoc; - struct roff *roff; - - man = NULL; - mdoc = NULL; - roff = NULL; - - memset(&ln, 0, sizeof(struct buf)); - - /* - * Two buffers: ln and buf. buf is the input file and may be - * memory mapped. ln is a line buffer and grows on-demand. - */ - - if ( ! read_whole_file(curp, &blk, &with_mmap)) { - exit_status = MANDOCLEVEL_SYSERR; - return; - } + enum mandoclevel rc; + struct mdoc *mdoc; + struct man *man; - if (NULL == curp->roff) - curp->roff = roff_alloc(&curp->regs, curp, mmsg); - assert(curp->roff); - roff = curp->roff; - - for (i = 0, lnn = 1; i < (int)blk.sz;) { - pos = 0; - lnn_start = lnn; - while (i < (int)blk.sz) { - if ('\n' == blk.buf[i]) { - ++i; - ++lnn; - break; - } + /* Begin by parsing the file itself. */ - /* - * Warn about bogus characters. If you're using - * non-ASCII encoding, you're screwing your - * readers. Since I'd rather this not happen, - * I'll be helpful and drop these characters so - * we don't display gibberish. Note to manual - * writers: use special characters. - */ - - c = (unsigned char) blk.buf[i]; - if ( ! (isascii(c) && (isgraph(c) || isblank(c)))) { - mmsg(MANDOCERR_BADCHAR, curp, - lnn_start, pos, "ignoring byte"); - i++; - continue; - } + assert(file); + assert(fd >= -1); - /* Trailing backslash is like a plain character. */ - if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) { - if (pos >= (int)ln.sz) - resize_buf(&ln, 256); - ln.buf[pos++] = blk.buf[i++]; - continue; - } - /* Found an escape and at least one other character. */ - if ('\n' == blk.buf[i + 1]) { - /* Escaped newlines are skipped over */ - i += 2; - ++lnn; - continue; - } - if ('"' == blk.buf[i + 1]) { - i += 2; - /* Comment, skip to end of line */ - for (; i < (int)blk.sz; ++i) { - if ('\n' == blk.buf[i]) { - ++i; - ++lnn; - break; - } - } - /* Backout trailing whitespaces */ - for (; pos > 0; --pos) { - if (ln.buf[pos - 1] != ' ') - break; - if (pos > 2 && ln.buf[pos - 2] == '\\') - break; - } - break; - } - /* Some other escape sequence, copy and continue. */ - if (pos + 1 >= (int)ln.sz) - resize_buf(&ln, 256); - - ln.buf[pos++] = blk.buf[i++]; - ln.buf[pos++] = blk.buf[i++]; - } + rc = mparse_readfd(curp->mp, fd, file); - if (pos >= (int)ln.sz) - resize_buf(&ln, 256); - ln.buf[pos] = '\0'; - - /* - * A significant amount of complexity is contained by - * the roff preprocessor. It's line-oriented but can be - * expressed on one line, so we need at times to - * readjust our starting point and re-run it. The roff - * preprocessor can also readjust the buffers with new - * data, so we pass them in wholesale. - */ - - of = 0; - do { - re = roff_parseln(roff, lnn_start, - &ln.buf, &ln.sz, of, &of); - } while (ROFF_RERUN == re); - - if (ROFF_IGN == re) { - continue; - } else if (ROFF_ERR == re) { - assert(MANDOCLEVEL_FATAL <= exit_status); - goto cleanup; - } + /* Stop immediately if the parse has failed. */ - /* - * If input parsers have not been allocated, do so now. - * We keep these instanced betwen parsers, but set them - * locally per parse routine since we can use different - * parsers with each one. - */ - - if ( ! (man || mdoc)) - pset(ln.buf + of, pos - of, curp, &man, &mdoc); - - /* Lastly, push down into the parsers themselves. */ - - if (man && ! man_parseln(man, lnn_start, ln.buf, of)) { - assert(MANDOCLEVEL_FATAL <= exit_status); - goto cleanup; - } - if (mdoc && ! mdoc_parseln(mdoc, lnn_start, ln.buf, of)) { - assert(MANDOCLEVEL_FATAL <= exit_status); - goto cleanup; - } - } - - /* NOTE a parser may not have been assigned, yet. */ - - if ( ! (man || mdoc)) { - fprintf(stderr, "%s: Not a manual\n", curp->file); - exit_status = MANDOCLEVEL_FATAL; + if (MANDOCLEVEL_FATAL <= rc) goto cleanup; - } - - /* Clean up the parse routine ASTs. */ - - if (mdoc && ! mdoc_endparse(mdoc)) { - assert(MANDOCLEVEL_FATAL <= exit_status); - goto cleanup; - } - if (man && ! man_endparse(man)) { - assert(MANDOCLEVEL_FATAL <= exit_status); - goto cleanup; - } - if (roff && ! roff_endparse(roff)) { - assert(MANDOCLEVEL_FATAL <= exit_status); - goto cleanup; - } /* - * With -Wstop and warnings or errors of at least - * the requested level, do not produce output. + * With -Wstop and warnings or errors of at least the requested + * level, do not produce output. */ - if (MANDOCLEVEL_OK != exit_status && curp->wstop) + if (MANDOCLEVEL_OK != rc && curp->wstop) goto cleanup; /* If unset, allocate output dev now (if applicable). */ @@ -588,9 +231,19 @@ fdesc(struct curparse *curp) switch (curp->outtype) { case (OUTT_XHTML): curp->outdata = xhtml_alloc(curp->outopts); + curp->outfree = html_free; break; case (OUTT_HTML): curp->outdata = html_alloc(curp->outopts); + curp->outfree = html_free; + break; + case (OUTT_UTF8): + curp->outdata = utf8_alloc(curp->outopts); + curp->outfree = ascii_free; + break; + case (OUTT_LOCALE): + curp->outdata = locale_alloc(curp->outopts); + curp->outfree = ascii_free; break; case (OUTT_ASCII): curp->outdata = ascii_alloc(curp->outopts); @@ -614,16 +267,23 @@ fdesc(struct curparse *curp) case (OUTT_XHTML): curp->outman = html_man; curp->outmdoc = html_mdoc; - curp->outfree = html_free; break; case (OUTT_TREE): curp->outman = tree_man; curp->outmdoc = tree_mdoc; break; + case (OUTT_MAN): + curp->outmdoc = man_mdoc; + curp->outman = man_man; + break; case (OUTT_PDF): /* FALLTHROUGH */ case (OUTT_ASCII): /* FALLTHROUGH */ + case (OUTT_UTF8): + /* FALLTHROUGH */ + case (OUTT_LOCALE): + /* FALLTHROUGH */ case (OUTT_PS): curp->outman = terminal_man; curp->outmdoc = terminal_mdoc; @@ -633,6 +293,8 @@ fdesc(struct curparse *curp) } } + mparse_result(curp->mp, &mdoc, &man); + /* Execute the out device, if it exists. */ if (man && curp->outman) @@ -641,88 +303,23 @@ fdesc(struct curparse *curp) (*curp->outmdoc)(curp->outdata, mdoc); cleanup: - memset(&curp->regs, 0, sizeof(struct regset)); - if (mdoc) - mdoc_reset(mdoc); - if (man) - man_reset(man); - if (roff) - roff_reset(roff); - if (ln.buf) - free(ln.buf); - if (with_mmap) - munmap(blk.buf, blk.sz); - else - free(blk.buf); - - return; -} - - -static void -pset(const char *buf, int pos, struct curparse *curp, - struct man **man, struct mdoc **mdoc) -{ - int i; - /* - * Try to intuit which kind of manual parser should be used. If - * passed in by command-line (-man, -mdoc), then use that - * explicitly. If passed as -mandoc, then try to guess from the - * line: either skip dot-lines, use -mdoc when finding `.Dt', or - * default to -man, which is more lenient. - */ - - if ('.' == buf[0] || '\'' == buf[0]) { - for (i = 1; buf[i]; i++) - if (' ' != buf[i] && '\t' != buf[i]) - break; - if ('\0' == buf[i]) - return; - } + mparse_reset(curp->mp); - switch (curp->inttype) { - case (INTT_MDOC): - if (NULL == curp->mdoc) - curp->mdoc = mdoc_alloc(&curp->regs, curp, mmsg); - assert(curp->mdoc); - *mdoc = curp->mdoc; - return; - case (INTT_MAN): - if (NULL == curp->man) - curp->man = man_alloc(&curp->regs, curp, mmsg); - assert(curp->man); - *man = curp->man; - return; - default: - break; - } - - if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3)) { - if (NULL == curp->mdoc) - curp->mdoc = mdoc_alloc(&curp->regs, curp, mmsg); - assert(curp->mdoc); - *mdoc = curp->mdoc; - return; - } - - if (NULL == curp->man) - curp->man = man_alloc(&curp->regs, curp, mmsg); - assert(curp->man); - *man = curp->man; + if (*level < rc) + *level = rc; } - static int -moptions(enum intt *tflags, char *arg) +moptions(enum mparset *tflags, char *arg) { if (0 == strcmp(arg, "doc")) - *tflags = INTT_MDOC; + *tflags = MPARSE_MDOC; else if (0 == strcmp(arg, "andoc")) - *tflags = INTT_AUTO; + *tflags = MPARSE_AUTO; else if (0 == strcmp(arg, "an")) - *tflags = INTT_MAN; + *tflags = MPARSE_MAN; else { fprintf(stderr, "%s: Bad argument\n", arg); return(0); @@ -731,7 +328,6 @@ moptions(enum intt *tflags, char *arg) return(1); } - static int toptions(struct curparse *curp, char *arg) { @@ -741,11 +337,16 @@ toptions(struct curparse *curp, char *arg) else if (0 == strcmp(arg, "lint")) { curp->outtype = OUTT_LINT; curp->wlevel = MANDOCLEVEL_WARNING; - } - else if (0 == strcmp(arg, "tree")) + } else if (0 == strcmp(arg, "tree")) curp->outtype = OUTT_TREE; + else if (0 == strcmp(arg, "man")) + curp->outtype = OUTT_MAN; else if (0 == strcmp(arg, "html")) curp->outtype = OUTT_HTML; + else if (0 == strcmp(arg, "utf8")) + curp->outtype = OUTT_UTF8; + else if (0 == strcmp(arg, "locale")) + curp->outtype = OUTT_LOCALE; else if (0 == strcmp(arg, "xhtml")) curp->outtype = OUTT_XHTML; else if (0 == strcmp(arg, "ps")) @@ -760,7 +361,6 @@ toptions(struct curparse *curp, char *arg) return(1); } - static int woptions(struct curparse *curp, char *arg) { @@ -800,30 +400,18 @@ woptions(struct curparse *curp, char *arg) return(1); } - -static int -mmsg(enum mandocerr t, void *arg, int ln, int col, const char *msg) +static void +mmsg(enum mandocerr t, enum mandoclevel lvl, + const char *file, int line, int col, const char *msg) { - struct curparse *cp; - enum mandoclevel level; - - level = MANDOCLEVEL_FATAL; - while (t < mandoclimits[level]) - /* LINTED */ - level--; - cp = (struct curparse *)arg; - if (level < cp->wlevel) - return(1); + fprintf(stderr, "%s:%d:%d: %s: %s", + file, line, col + 1, + mparse_strlevel(lvl), + mparse_strerror(t)); - fprintf(stderr, "%s:%d:%d: %s: %s", - cp->file, ln, col + 1, mandoclevels[level], mandocerrs[t]); if (msg) fprintf(stderr, ": %s", msg); - fputc('\n', stderr); - if (exit_status < level) - exit_status = level; - - return(level < MANDOCLEVEL_FATAL); + fputc('\n', stderr); }