]> git.cameronkatri.com Git - mandoc.git/blobdiff - read.c
Start systematic improvements of error reporting.
[mandoc.git] / read.c
diff --git a/read.c b/read.c
index 0ec58ecb29f396a97a0ad88cdaf327bf0085b784..3c68b42dae89a5a38f026e466294e1c06f4e6c52 100644 (file)
--- a/read.c
+++ b/read.c
@@ -1,7 +1,8 @@
-/*     $Id: read.c,v 1.38 2013/07/13 12:52:07 schwarze Exp $ */
+/*     $Id: read.c,v 1.49 2014/06/20 17:24:00 schwarze Exp $ */
 /*
  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2010, 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -26,6 +27,7 @@
 
 #include <assert.h>
 #include <ctype.h>
+#include <errno.h>
 #include <fcntl.h>
 #include <stdarg.h>
 #include <stdint.h>
@@ -35,6 +37,7 @@
 #include <unistd.h>
 
 #include "mandoc.h"
+#include "mandoc_aux.h"
 #include "libmandoc.h"
 #include "mdoc.h"
 #include "man.h"
@@ -43,7 +46,7 @@
 #define        REPARSE_LIMIT   1000
 
 struct buf {
-       char             *buf; /* binary input buffer */
+       char             *buf; /* binary input buffer */
        size_t            sz; /* size of binary buffer */
 };
 
@@ -51,16 +54,16 @@ struct      mparse {
        enum mandoclevel  file_status; /* status of current parse */
        enum mandoclevel  wlevel; /* ignore messages below this */
        int               line; /* line number in the file */
-       enum mparset      inttype; /* which parser to use */
+       int               options; /* parser options */
        struct man       *pman; /* persistent man parser */
        struct mdoc      *pmdoc; /* persistent mdoc parser */
        struct man       *man; /* man parser */
        struct mdoc      *mdoc; /* mdoc parser */
        struct roff      *roff; /* roff parser (!NULL) */
+       char             *sodest; /* filename pointed to by .so */
        int               reparse_count; /* finite interp. stack */
        mandocmsg         mmsg; /* warning/error message handler */
-       void             *arg; /* argument to mmsg */
-       const char       *file; 
+       const char       *file;
        struct buf       *secondary;
        char             *defos; /* default operating system */
 };
@@ -68,7 +71,8 @@ struct        mparse {
 static void      resize_buf(struct buf *, size_t);
 static void      mparse_buf_r(struct mparse *, struct buf, int);
 static void      pset(const char *, int, struct mparse *);
-static int       read_whole_file(const char *, int, struct buf *, int *);
+static int       read_whole_file(struct mparse *, const char *, int,
+                               struct buf *, int *);
 static void      mparse_end(struct mparse *);
 static void      mparse_parse_buffer(struct mparse *, struct buf,
                        const char *);
@@ -89,7 +93,7 @@ static        const char * const      mandocerrs[MANDOCERR_MAX] = {
        "generic warning",
 
        /* related to the prologue */
-       "no title in document",
+       "no TH macro in document",
        "document title should be all caps",
        "unknown manual section",
        "unknown manual volume or arch",
@@ -106,7 +110,7 @@ static      const char * const      mandocerrs[MANDOCERR_MAX] = {
        "bad NAME section contents",
        "sections out of conventional order",
        "duplicate section name",
-       "section not in conventional manual section",
+       "section header suited to sections 2, 3, and 9 only",
 
        /* related to macros and nesting */
        "skipping obsolete macro",
@@ -149,7 +153,7 @@ static      const char * const      mandocerrs[MANDOCERR_MAX] = {
 
        /* related to equations */
        "unexpected literal in equation",
-       
+
        "generic error",
 
        /* related to equations */
@@ -192,6 +196,7 @@ static      const char * const      mandocerrs[MANDOCERR_MAX] = {
 
        "generic fatal error",
 
+       "input too large",
        "not a manual",
        "column syntax is inconsistent",
        "NOT IMPLEMENTED: .Bd -file",
@@ -202,6 +207,11 @@ static     const char * const      mandocerrs[MANDOCERR_MAX] = {
        "no document body",
        "no document prologue",
        "static buffer exhausted",
+
+       /* system errors */
+       "cannot open file",
+       "cannot stat file",
+       "cannot read file",
 };
 
 static const char * const      mandoclevels[MANDOCLEVEL_MAX] = {
@@ -214,6 +224,7 @@ static      const char * const      mandoclevels[MANDOCLEVEL_MAX] = {
        "SYSERR"
 };
 
+
 static void
 resize_buf(struct buf *buf, size_t initial)
 {
@@ -246,35 +257,36 @@ pset(const char *buf, int pos, struct mparse *curp)
                        return;
        }
 
-       switch (curp->inttype) {
-       case (MPARSE_MDOC):
-               if (NULL == curp->pmdoc) 
-                       curp->pmdoc = mdoc_alloc(curp->roff, curp,
-                                       curp->defos);
+       if (MPARSE_MDOC & curp->options) {
+               if (NULL == curp->pmdoc)
+                       curp->pmdoc = mdoc_alloc(
+                           curp->roff, curp, curp->defos,
+                           MPARSE_QUICK & curp->options ? 1 : 0);
                assert(curp->pmdoc);
                curp->mdoc = curp->pmdoc;
                return;
-       case (MPARSE_MAN):
-               if (NULL == curp->pman) 
-                       curp->pman = man_alloc(curp->roff, curp);
+       } else if (MPARSE_MAN & curp->options) {
+               if (NULL == curp->pman)
+                       curp->pman = man_alloc(curp->roff, curp,
+                           MPARSE_QUICK & curp->options ? 1 : 0);
                assert(curp->pman);
                curp->man = curp->pman;
                return;
-       default:
-               break;
        }
 
        if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3))  {
-               if (NULL == curp->pmdoc) 
-                       curp->pmdoc = mdoc_alloc(curp->roff, curp,
-                                       curp->defos);
+               if (NULL == curp->pmdoc)
+                       curp->pmdoc = mdoc_alloc(
+                           curp->roff, curp, curp->defos,
+                           MPARSE_QUICK & curp->options ? 1 : 0);
                assert(curp->pmdoc);
                curp->mdoc = curp->pmdoc;
                return;
-       } 
+       }
 
-       if (NULL == curp->pman) 
-               curp->pman = man_alloc(curp->roff, curp);
+       if (NULL == curp->pman)
+               curp->pman = man_alloc(curp->roff, curp,
+                   MPARSE_QUICK & curp->options ? 1 : 0);
        assert(curp->pman);
        curp->man = curp->pman;
 }
@@ -297,8 +309,8 @@ mparse_buf_r(struct mparse *curp, struct buf blk, int start)
 
        memset(&ln, 0, sizeof(struct buf));
 
-       lnn = curp->line; 
-       pos = 0; 
+       lnn = curp->line;
+       pos = 0;
 
        for (i = 0; i < (int)blk.sz; ) {
                if (0 == pos && '\0' == blk.buf[i])
@@ -335,7 +347,7 @@ mparse_buf_r(struct mparse *curp, struct buf blk, int start)
                        if (pos + 2 >= (int)ln.sz)
                                resize_buf(&ln, 256);
 
-                       /* 
+                       /*
                         * Warn about bogus characters.  If you're using
                         * non-ASCII encoding, you're screwing your
                         * readers.  Since I'd rather this not happen,
@@ -346,10 +358,10 @@ mparse_buf_r(struct mparse *curp, struct buf blk, int start)
 
                        c = (unsigned char) blk.buf[i];
 
-                       if ( ! (isascii(c) && 
-                                       (isgraph(c) || isblank(c)))) {
+                       if ( ! (isascii(c) &&
+                           (isgraph(c) || isblank(c)))) {
                                mandoc_msg(MANDOCERR_BADCHAR, curp,
-                                               curp->line, pos, NULL);
+                                   curp->line, pos, NULL);
                                i++;
                                ln.buf[pos++] = '?';
                                continue;
@@ -403,10 +415,10 @@ mparse_buf_r(struct mparse *curp, struct buf blk, int start)
 
                        c = (unsigned char) blk.buf[i+1];
 
-                       if ( ! (isascii(c) && 
-                                       (isgraph(c) || isblank(c)))) {
+                       if ( ! (isascii(c) &&
+                           (isgraph(c) || isblank(c)))) {
                                mandoc_msg(MANDOCERR_BADCHAR, curp,
-                                               curp->line, pos, NULL);
+                                   curp->line, pos, NULL);
                                i += 2;
                                ln.buf[pos++] = '?';
                                continue;
@@ -418,7 +430,7 @@ mparse_buf_r(struct mparse *curp, struct buf blk, int start)
                        ln.buf[pos++] = blk.buf[i++];
                }
 
-               if (pos >= (int)ln.sz)
+               if (pos >= (int)ln.sz)
                        resize_buf(&ln, 256);
 
                ln.buf[pos] = '\0';
@@ -441,13 +453,12 @@ mparse_buf_r(struct mparse *curp, struct buf blk, int start)
                 */
 
                if (curp->secondary) {
-                       curp->secondary->buf = 
-                               mandoc_realloc
-                               (curp->secondary->buf, 
-                                curp->secondary->sz + pos + 2);
-                       memcpy(curp->secondary->buf + 
-                                       curp->secondary->sz, 
-                                       ln.buf, pos);
+                       curp->secondary->buf = mandoc_realloc(
+                           curp->secondary->buf,
+                           curp->secondary->sz + pos + 2);
+                       memcpy(curp->secondary->buf +
+                           curp->secondary->sz,
+                           ln.buf, pos);
                        curp->secondary->sz += pos;
                        curp->secondary->buf
                                [curp->secondary->sz] = '\n';
@@ -456,37 +467,42 @@ mparse_buf_r(struct mparse *curp, struct buf blk, int start)
                                [curp->secondary->sz] = '\0';
                }
 rerun:
-               rr = roff_parseln
-                       (curp->roff, curp->line, 
-                        &ln.buf, &ln.sz, of, &of);
+               rr = roff_parseln(curp->roff, curp->line,
+                   &ln.buf, &ln.sz, of, &of);
 
                switch (rr) {
-               case (ROFF_REPARSE):
+               case ROFF_REPARSE:
                        if (REPARSE_LIMIT >= ++curp->reparse_count)
                                mparse_buf_r(curp, ln, 0);
                        else
                                mandoc_msg(MANDOCERR_ROFFLOOP, curp,
-                                       curp->line, pos, NULL);
+                                   curp->line, pos, NULL);
                        pos = 0;
                        continue;
-               case (ROFF_APPEND):
+               case ROFF_APPEND:
                        pos = (int)strlen(ln.buf);
                        continue;
-               case (ROFF_RERUN):
+               case ROFF_RERUN:
                        goto rerun;
-               case (ROFF_IGN):
+               case ROFF_IGN:
                        pos = 0;
                        continue;
-               case (ROFF_ERR):
+               case ROFF_ERR:
                        assert(MANDOCLEVEL_FATAL <= curp->file_status);
                        break;
-               case (ROFF_SO):
+               case ROFF_SO:
+                       if (0 == (MPARSE_SO & curp->options) &&
+                           (i >= (int)blk.sz || '\0' == blk.buf[i])) {
+                               curp->sodest = mandoc_strdup(ln.buf + of);
+                               free(ln.buf);
+                               return;
+                       }
                        /*
                         * We remove `so' clauses from our lookaside
                         * buffer because we're going to descend into
                         * the file recursively.
                         */
-                       if (curp->secondary) 
+                       if (curp->secondary)
                                curp->secondary->sz -= pos + 1;
                        mparse_readfd(curp, -1, ln.buf + of);
                        if (MANDOCLEVEL_FATAL <= curp->file_status)
@@ -515,7 +531,7 @@ rerun:
                if ( ! (curp->man || curp->mdoc))
                        pset(ln.buf + of, pos - of, curp);
 
-               /* 
+               /*
                 * Lastly, push down into the parsers themselves.  One
                 * of these will have already been set in the pset()
                 * routine.
@@ -531,28 +547,29 @@ rerun:
                if (ROFF_TBL == rr)
                        while (NULL != (span = roff_span(curp->roff))) {
                                rc = curp->man ?
-                                       man_addspan(curp->man, span) :
-                                       mdoc_addspan(curp->mdoc, span);
+                                   man_addspan(curp->man, span) :
+                                   mdoc_addspan(curp->mdoc, span);
                                if (0 == rc)
                                        break;
                        }
                else if (ROFF_EQN == rr)
-                       rc = curp->mdoc ? 
-                               mdoc_addeqn(curp->mdoc, 
-                                       roff_eqn(curp->roff)) :
-                               man_addeqn(curp->man,
-                                       roff_eqn(curp->roff));
+                       rc = curp->mdoc ?
+                           mdoc_addeqn(curp->mdoc,
+                               roff_eqn(curp->roff)) :
+                           man_addeqn(curp->man,
+                               roff_eqn(curp->roff));
                else if (curp->man || curp->mdoc)
                        rc = curp->man ?
-                               man_parseln(curp->man, 
-                                       curp->line, ln.buf, of) :
-                               mdoc_parseln(curp->mdoc, 
-                                       curp->line, ln.buf, of);
+                           man_parseln(curp->man,
+                               curp->line, ln.buf, of) :
+                           mdoc_parseln(curp->mdoc,
+                               curp->line, ln.buf, of);
 
                if (0 == rc) {
                        assert(MANDOCLEVEL_FATAL <= curp->file_status);
                        break;
-               }
+               } else if (2 == rc)
+                       break;
 
                /* Temporary buffers typically are not full. */
 
@@ -568,7 +585,8 @@ rerun:
 }
 
 static int
-read_whole_file(const char *file, int fd, struct buf *fb, int *with_mmap)
+read_whole_file(struct mparse *curp, const char *file, int fd,
+               struct buf *fb, int *with_mmap)
 {
        size_t           off;
        ssize_t          ssz;
@@ -576,7 +594,10 @@ read_whole_file(const char *file, int fd, struct buf *fb, int *with_mmap)
 #ifdef HAVE_MMAP
        struct stat      st;
        if (-1 == fstat(fd, &st)) {
-               perror(file);
+               curp->file_status = MANDOCLEVEL_SYSERR;
+               if (curp->mmsg)
+                       (*curp->mmsg)(MANDOCERR_SYSSTAT, curp->file_status,
+                           file, 0, 0, strerror(errno));
                return(0);
        }
 
@@ -589,7 +610,10 @@ read_whole_file(const char *file, int fd, struct buf *fb, int *with_mmap)
 
        if (S_ISREG(st.st_mode)) {
                if (st.st_size >= (1U << 31)) {
-                       fprintf(stderr, "%s: input too large\n", file);
+                       curp->file_status = MANDOCLEVEL_FATAL;
+                       if (curp->mmsg)
+                               (*curp->mmsg)(MANDOCERR_TOOLARGE,
+                                   curp->file_status, file, 0, 0, NULL);
                        return(0);
                }
                *with_mmap = 1;
@@ -612,7 +636,11 @@ read_whole_file(const char *file, int fd, struct buf *fb, int *with_mmap)
        for (;;) {
                if (off == fb->sz) {
                        if (fb->sz == (1U << 31)) {
-                               fprintf(stderr, "%s: input too large\n", file);
+                               curp->file_status = MANDOCLEVEL_FATAL;
+                               if (curp->mmsg)
+                                       (*curp->mmsg)(MANDOCERR_TOOLARGE,
+                                           curp->file_status,
+                                           file, 0, 0, NULL);
                                break;
                        }
                        resize_buf(fb, 65536);
@@ -623,7 +651,11 @@ read_whole_file(const char *file, int fd, struct buf *fb, int *with_mmap)
                        return(1);
                }
                if (ssz == -1) {
-                       perror(file);
+                       curp->file_status = MANDOCLEVEL_SYSERR;
+                       if (curp->mmsg)
+                               (*curp->mmsg)(MANDOCERR_SYSREAD,
+                                   curp->file_status, file, 0, 0,
+                                   strerror(errno));
                        break;
                }
                off += (size_t)ssz;
@@ -651,7 +683,7 @@ mparse_end(struct mparse *curp)
                return;
        }
 
-       if ( ! (curp->man || curp->mdoc)) {
+       if ( ! (curp->mdoc || curp->man || curp->sodest)) {
                mandoc_msg(MANDOCERR_NOTMANUAL, curp, 1, 0, NULL);
                curp->file_status = MANDOCLEVEL_FATAL;
                return;
@@ -704,12 +736,15 @@ mparse_readfd(struct mparse *curp, int fd, const char *file)
        struct buf       blk;
        int              with_mmap;
 
-       if (-1 == fd)
-               if (-1 == (fd = open(file, O_RDONLY, 0))) {
-                       perror(file);
-                       curp->file_status = MANDOCLEVEL_SYSERR;
-                       goto out;
-               }
+       if (-1 == fd && -1 == (fd = open(file, O_RDONLY, 0))) {
+               curp->file_status = MANDOCLEVEL_SYSERR;
+               if (curp->mmsg)
+                       (*curp->mmsg)(MANDOCERR_SYSOPEN,
+                           curp->file_status,
+                           file, 0, 0, strerror(errno));
+               goto out;
+       }
+
        /*
         * Run for each opened file; may be called more than once for
         * each full parse sequence if the opened file is nested (i.e.,
@@ -717,10 +752,8 @@ mparse_readfd(struct mparse *curp, int fd, const char *file)
         * the parse phase for the file.
         */
 
-       if ( ! read_whole_file(file, fd, &blk, &with_mmap)) {
-               curp->file_status = MANDOCLEVEL_SYSERR;
+       if ( ! read_whole_file(curp, file, fd, &blk, &with_mmap))
                goto out;
-       }
 
        mparse_parse_buffer(curp, blk, file);
 
@@ -738,8 +771,8 @@ out:
 }
 
 struct mparse *
-mparse_alloc(enum mparset inttype, enum mandoclevel wlevel,
-               mandocmsg mmsg, void *arg, char *defos)
+mparse_alloc(int options, enum mandoclevel wlevel,
+               mandocmsg mmsg, char *defos)
 {
        struct mparse   *curp;
 
@@ -747,13 +780,12 @@ mparse_alloc(enum mparset inttype, enum mandoclevel wlevel,
 
        curp = mandoc_calloc(1, sizeof(struct mparse));
 
+       curp->options = options;
        curp->wlevel = wlevel;
        curp->mmsg = mmsg;
-       curp->arg = arg;
-       curp->inttype = inttype;
        curp->defos = defos;
 
-       curp->roff = roff_alloc(inttype, curp);
+       curp->roff = roff_alloc(curp, options);
        return(curp);
 }
 
@@ -773,6 +805,9 @@ mparse_reset(struct mparse *curp)
        curp->file_status = MANDOCLEVEL_OK;
        curp->mdoc = NULL;
        curp->man = NULL;
+
+       free(curp->sodest);
+       curp->sodest = NULL;
 }
 
 void
@@ -789,13 +824,20 @@ mparse_free(struct mparse *curp)
                free(curp->secondary->buf);
 
        free(curp->secondary);
+       free(curp->sodest);
        free(curp);
 }
 
 void
-mparse_result(struct mparse *curp, struct mdoc **mdoc, struct man **man)
+mparse_result(struct mparse *curp,
+       struct mdoc **mdoc, struct man **man, char **sodest)
 {
 
+       if (sodest && NULL != (*sodest = curp->sodest)) {
+               *mdoc = NULL;
+               *man = NULL;
+               return;
+       }
        if (mdoc)
                *mdoc = curp->mdoc;
        if (man)
@@ -810,14 +852,14 @@ mandoc_vmsg(enum mandocerr t, struct mparse *m,
        va_list          ap;
 
        va_start(ap, fmt);
-       vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
+       (void)vsnprintf(buf, sizeof(buf), fmt, ap);
        va_end(ap);
 
        mandoc_msg(t, m, ln, pos, buf);
 }
 
 void
-mandoc_msg(enum mandocerr er, struct mparse *m, 
+mandoc_msg(enum mandocerr er, struct mparse *m,
                int ln, int col, const char *msg)
 {
        enum mandoclevel level;