-/* $Id: read.c,v 1.136 2015/04/18 17:01:58 schwarze Exp $ */
+/* $Id: read.c,v 1.176 2017/06/11 19:37:01 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
#include "config.h"
#include <sys/types.h>
-#if HAVE_MMAP
#include <sys/mman.h>
#include <sys/stat.h>
-#endif
-#include <sys/wait.h>
#include <assert.h>
#include <ctype.h>
+#if HAVE_ERR
+#include <err.h>
+#endif
#include <errno.h>
#include <fcntl.h>
-#include <signal.h>
#include <stdarg.h>
-#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+#include <zlib.h>
#include "mandoc_aux.h"
#include "mandoc.h"
#include "mdoc.h"
#include "man.h"
#include "libmandoc.h"
+#include "roff_int.h"
#define REPARSE_LIMIT 1000
struct mparse {
- struct roff_man *man; /* man parser */
struct roff *roff; /* roff parser (!NULL) */
- const struct mchars *mchars; /* character table */
+ struct roff_man *man; /* man parser */
char *sodest; /* filename pointed to by .so */
const char *file; /* filename of current input file */
struct buf *primary; /* buffer currently being parsed */
enum mandoclevel file_status; /* status of current parse */
enum mandoclevel wlevel; /* ignore messages below this */
int options; /* parser options */
+ int gzip; /* current input file is gzipped */
int filenc; /* encoding of the current file */
int reparse_count; /* finite interp. stack */
int line; /* line number in the file */
- pid_t child; /* the gunzip(1) process */
};
static void choose_parser(struct mparse *);
static void resize_buf(struct buf *, size_t);
-static void mparse_buf_r(struct mparse *, struct buf, size_t, int);
+static int mparse_buf_r(struct mparse *, struct buf, size_t, int);
static int read_whole_file(struct mparse *, const char *, int,
struct buf *, int *);
static void mparse_end(struct mparse *);
static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
MANDOCERR_OK,
- MANDOCERR_WARNING,
+ MANDOCERR_STYLE,
MANDOCERR_WARNING,
MANDOCERR_ERROR,
MANDOCERR_UNSUPP,
static const char * const mandocerrs[MANDOCERR_MAX] = {
"ok",
+ "generic style suggestion",
+
+ "Mdocdate found",
+ "Mdocdate missing",
+ "legacy man(7) date format",
+ "useless macro",
+ "consider using OS macro",
+ "errnos out of order",
+ "duplicate errno",
+ "description line ends with a full stop",
+ "no blank before trailing delimiter",
+ "function name without markup",
+
"generic warning",
/* related to the prologue */
"no document body",
"content before first section header",
"first section is not \"NAME\"",
- "NAME section without name",
+ "NAME section without Nm before Nd",
"NAME section without description",
"description not at the end of NAME",
"bad NAME section content",
+ "missing comma before name",
"missing description line, using \"\"",
+ "description line outside NAME section",
"sections out of conventional order",
"duplicate section title",
"unexpected section",
"blocks badly nested",
"nested displays are not portable",
"moving content out of list",
- ".Vt block has child macro",
"fill mode already enabled, skipping",
"fill mode already disabled, skipping",
"line scope broken",
+ "skipping blank line in line scope",
/* related to missing macro arguments */
"skipping empty request",
"empty argument, using 0n",
"missing display type, using -ragged",
"list type is not the first argument",
- "missing -width in -tag list, using 8n",
+ "missing -width in -tag list, using 6n",
"missing utility name, using \"\"",
"missing function name, using \"\"",
"empty head in list item",
"unknown font type, using \\fR",
"nothing follows prefix",
"empty reference block",
+ "missing section argument",
"missing -std argument, adding it",
"missing option string, using \"\"",
"missing resource identifier, using \"\"",
"unknown AT&T UNIX version",
"comma in function argument",
"parenthesis in function name",
+ "unknown library name",
"invalid content in Rs block",
"invalid Boolean argument",
"unknown font, skipping request",
"blank line in fill mode, using .sp",
"tab in filled text",
"whitespace at end of input line",
+ "new sentence, new line",
"bad comment style",
"invalid escape sequence",
"undefined string, using \"\"",
/* related to request and macro arguments */
"escaped character not allowed in a name",
"NOT IMPLEMENTED: Bd -file",
+ "skipping display without arguments",
"missing list type, using -item",
+ "argument is not numeric, using 1",
"missing manual name, using \"\"",
"uname(3) system call failed, using UNKNOWN",
"unknown standard specifier",
static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
"SUCCESS",
- "RESERVED",
+ "STYLE",
"WARNING",
"ERROR",
"UNSUPP",
}
if (format == MPARSE_MDOC) {
- if (curp->man == NULL)
- curp->man = mdoc_alloc(
- curp->roff, curp, curp->defos,
- MPARSE_QUICK & curp->options ? 1 : 0);
- else
- curp->man->macroset = MACROSET_MDOC;
- mdoc_hash_init();
- return;
- }
-
- /* Fall back to man(7) as a last resort. */
-
- if (curp->man == NULL)
- curp->man = man_alloc(
- curp->roff, curp, curp->defos,
- MPARSE_QUICK & curp->options ? 1 : 0);
- else
+ curp->man->macroset = MACROSET_MDOC;
+ if (curp->man->mdocmac == NULL)
+ curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
+ } else {
curp->man->macroset = MACROSET_MAN;
- man_hash_init();
+ if (curp->man->manmac == NULL)
+ curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
+ }
+ curp->man->first->tok = TOKEN_NONE;
}
/*
* macros, inline equations, and input line traps)
* and indirectly (for .so file inclusion).
*/
-static void
+static int
mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
{
const struct tbl_span *span;
int of;
int lnn; /* line number in the real file */
int fd;
- pid_t save_child;
unsigned char c;
memset(&ln, 0, sizeof(ln));
continue;
}
- /* Trailing backslash = a plain char. */
-
- if (blk.buf[i] != '\\' || i + 1 == blk.sz) {
- ln.buf[pos++] = blk.buf[i++];
- continue;
- }
-
- /*
- * Found escape and at least one other character.
- * When it's a newline character, skip it.
- * When there is a carriage return in between,
- * skip that one as well.
- */
-
- if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz &&
- '\n' == blk.buf[i + 2])
- ++i;
- if ('\n' == blk.buf[i + 1]) {
- i += 2;
- ++lnn;
- continue;
- }
-
- if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
- i += 2;
- /* Comment, skip to end of line */
- for (; i < blk.sz; ++i) {
- if ('\n' == blk.buf[i]) {
- ++i;
- ++lnn;
- break;
- }
- }
-
- /* Backout trailing whitespaces */
- for (; pos > 0; --pos) {
- if (ln.buf[pos - 1] != ' ')
- break;
- if (pos > 2 && ln.buf[pos - 2] == '\\')
- break;
- }
- break;
- }
-
- /* Catch escaped bogus characters. */
-
- c = (unsigned char) blk.buf[i+1];
-
- if ( ! (isascii(c) &&
- (isgraph(c) || isblank(c)))) {
- mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
- curp->line, pos, "0x%x", c);
- i += 2;
- ln.buf[pos++] = '?';
- continue;
- }
-
- /* Some other escape sequence, copy & cont. */
-
- ln.buf[pos++] = blk.buf[i++];
ln.buf[pos++] = blk.buf[i++];
}
- if (pos >= ln.sz)
+ if (pos + 1 >= ln.sz)
resize_buf(&ln, 256);
+ if (i == blk.sz || blk.buf[i] == '\0')
+ ln.buf[pos++] = '\n';
ln.buf[pos] = '\0';
/*
switch (rr) {
case ROFF_REPARSE:
- if (REPARSE_LIMIT >= ++curp->reparse_count)
- mparse_buf_r(curp, ln, of, 0);
- else
+ if (++curp->reparse_count > REPARSE_LIMIT)
mandoc_msg(MANDOCERR_ROFFLOOP, curp,
curp->line, pos, NULL);
- pos = 0;
- continue;
+ else if (mparse_buf_r(curp, ln, of, 0) == 1 ||
+ start == 1) {
+ pos = 0;
+ continue;
+ }
+ free(ln.buf);
+ return 0;
case ROFF_APPEND:
pos = strlen(ln.buf);
continue;
(i >= blk.sz || blk.buf[i] == '\0')) {
curp->sodest = mandoc_strdup(ln.buf + of);
free(ln.buf);
- return;
+ return 1;
}
/*
* We remove `so' clauses from our lookaside
if (curp->secondary)
curp->secondary->sz -= pos + 1;
save_file = curp->file;
- save_child = curp->child;
- if (mparse_open(curp, &fd, ln.buf + of) ==
- MANDOCLEVEL_OK) {
+ if ((fd = mparse_open(curp, ln.buf + of)) != -1) {
mparse_readfd(curp, fd, ln.buf + of);
+ close(fd);
curp->file = save_file;
} else {
curp->file = save_file;
of = 0;
mparse_buf_r(curp, ln, of, 0);
}
- curp->child = save_child;
pos = 0;
continue;
default:
break;
}
- /*
- * If input parsers have not been allocated, do so now.
- * We keep these instanced between parsers, but set them
- * locally per parse routine since we can use different
- * parsers with each one.
- */
-
- if (curp->man == NULL ||
- curp->man->macroset == MACROSET_NONE)
+ if (curp->man->macroset == MACROSET_NONE)
choose_parser(curp);
/*
* Do the same for ROFF_EQN.
*/
- if (rr == ROFF_TBL) {
+ if (rr == ROFF_TBL)
while ((span = roff_span(curp->roff)) != NULL)
- if (curp->man->macroset == MACROSET_MDOC)
- mdoc_addspan(curp->man, span);
- else
- man_addspan(curp->man, span);
- } else if (rr == ROFF_EQN) {
- if (curp->man->macroset == MACROSET_MDOC)
- mdoc_addeqn(curp->man, roff_eqn(curp->roff));
- else
- man_addeqn(curp->man, roff_eqn(curp->roff));
- } else if ((curp->man->macroset == MACROSET_MDOC ?
+ roff_addtbl(curp->man, span);
+ else if (rr == ROFF_EQN)
+ roff_addeqn(curp->man, roff_eqn(curp->roff));
+ else if ((curp->man->macroset == MACROSET_MDOC ?
mdoc_parseln(curp->man, curp->line, ln.buf, of) :
man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
break;
}
free(ln.buf);
+ return 1;
}
static int
read_whole_file(struct mparse *curp, const char *file, int fd,
struct buf *fb, int *with_mmap)
{
+ struct stat st;
+ gzFile gz;
size_t off;
ssize_t ssz;
-#if HAVE_MMAP
- struct stat st;
- if (-1 == fstat(fd, &st)) {
- perror(file);
- exit((int)MANDOCLEVEL_SYSERR);
- }
+ if (fstat(fd, &st) == -1)
+ err((int)MANDOCLEVEL_SYSERR, "%s", file);
/*
* If we're a regular file, try just reading in the whole entry
* concerned that this is going to tank any machines.
*/
- if (S_ISREG(st.st_mode)) {
+ if (curp->gzip == 0 && S_ISREG(st.st_mode)) {
if (st.st_size > 0x7fffffff) {
mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
- return(0);
+ return 0;
}
*with_mmap = 1;
fb->sz = (size_t)st.st_size;
fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
if (fb->buf != MAP_FAILED)
- return(1);
+ return 1;
}
-#endif
+
+ if (curp->gzip) {
+ if ((gz = gzdopen(fd, "rb")) == NULL)
+ err((int)MANDOCLEVEL_SYSERR, "%s", file);
+ } else
+ gz = NULL;
/*
* If this isn't a regular file (like, say, stdin), then we must
}
resize_buf(fb, 65536);
}
- ssz = read(fd, fb->buf + (int)off, fb->sz - off);
+ ssz = curp->gzip ?
+ gzread(gz, fb->buf + (int)off, fb->sz - off) :
+ read(fd, fb->buf + (int)off, fb->sz - off);
if (ssz == 0) {
fb->sz = off;
- return(1);
- }
- if (ssz == -1) {
- perror(file);
- exit((int)MANDOCLEVEL_SYSERR);
+ return 1;
}
+ if (ssz == -1)
+ err((int)MANDOCLEVEL_SYSERR, "%s", file);
off += (size_t)ssz;
}
free(fb->buf);
fb->buf = NULL;
- return(0);
+ return 0;
}
static void
mparse_end(struct mparse *curp)
{
-
- if (curp->man == NULL && curp->sodest == NULL)
- curp->man = man_alloc(curp->roff, curp, curp->defos,
- curp->options & MPARSE_QUICK ? 1 : 0);
if (curp->man->macroset == MACROSET_NONE)
curp->man->macroset = MACROSET_MAN;
if (curp->man->macroset == MACROSET_MDOC)
blk.sz = len;
mparse_parse_buffer(curp, blk, file);
- return(curp->file_status);
+ return curp->file_status;
}
/*
(MPARSE_UTF8 | MPARSE_LATIN1);
mparse_parse_buffer(curp, blk, file);
curp->filenc = save_filenc;
-#if HAVE_MMAP
if (with_mmap)
munmap(blk.buf, blk.sz);
else
-#endif
free(blk.buf);
}
-
- if (fd != STDIN_FILENO && close(fd) == -1)
- perror(file);
-
- mparse_wait(curp);
- return(curp->file_status);
+ return curp->file_status;
}
-enum mandoclevel
-mparse_open(struct mparse *curp, int *fd, const char *file)
+int
+mparse_open(struct mparse *curp, const char *file)
{
- int pfd[2];
- int save_errno;
char *cp;
+ int fd;
curp->file = file;
+ cp = strrchr(file, '.');
+ curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz"));
- /* Unless zipped, try to just open the file. */
+ /* First try to use the filename as it is. */
- if ((cp = strrchr(file, '.')) == NULL ||
- strcmp(cp + 1, "gz")) {
- curp->child = 0;
- if ((*fd = open(file, O_RDONLY)) != -1)
- return(MANDOCLEVEL_OK);
+ if ((fd = open(file, O_RDONLY)) != -1)
+ return fd;
- /* Open failed; try to append ".gz". */
+ /*
+ * If that doesn't work and the filename doesn't
+ * already end in .gz, try appending .gz.
+ */
+ if ( ! curp->gzip) {
mandoc_asprintf(&cp, "%s.gz", file);
- file = cp;
- } else
- cp = NULL;
-
- /* Before forking, make sure the file can be read. */
-
- save_errno = errno;
- if (access(file, R_OK) == -1) {
- if (cp != NULL)
- errno = save_errno;
+ fd = open(cp, O_RDONLY);
free(cp);
- *fd = -1;
- curp->child = 0;
- mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
- return(MANDOCLEVEL_ERROR);
- }
-
- /* Run gunzip(1). */
-
- if (pipe(pfd) == -1) {
- perror("pipe");
- exit((int)MANDOCLEVEL_SYSERR);
- }
-
- switch (curp->child = fork()) {
- case -1:
- perror("fork");
- exit((int)MANDOCLEVEL_SYSERR);
- case 0:
- close(pfd[0]);
- if (dup2(pfd[1], STDOUT_FILENO) == -1) {
- perror("dup");
- exit((int)MANDOCLEVEL_SYSERR);
+ if (fd != -1) {
+ curp->gzip = 1;
+ return fd;
}
- signal(SIGPIPE, SIG_DFL);
- execlp("gunzip", "gunzip", "-c", file, NULL);
- perror("exec");
- exit((int)MANDOCLEVEL_SYSERR);
- default:
- close(pfd[1]);
- *fd = pfd[0];
- return(MANDOCLEVEL_OK);
}
-}
-enum mandoclevel
-mparse_wait(struct mparse *curp)
-{
- int status;
-
- if (curp->child == 0)
- return(MANDOCLEVEL_OK);
+ /* Neither worked, give up. */
- if (waitpid(curp->child, &status, 0) == -1) {
- perror("wait");
- exit((int)MANDOCLEVEL_SYSERR);
- }
- curp->child = 0;
- if (WIFSIGNALED(status)) {
- mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
- "gunzip died from signal %d", WTERMSIG(status));
- return(MANDOCLEVEL_ERROR);
- }
- if (WEXITSTATUS(status)) {
- mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
- "gunzip failed with code %d", WEXITSTATUS(status));
- return(MANDOCLEVEL_ERROR);
- }
- return(MANDOCLEVEL_OK);
+ mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
+ return -1;
}
struct mparse *
mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg,
- const struct mchars *mchars, const char *defos)
+ const char *defos)
{
struct mparse *curp;
curp->mmsg = mmsg;
curp->defos = defos;
- curp->mchars = mchars;
- curp->roff = roff_alloc(curp, curp->mchars, options);
+ curp->roff = roff_alloc(curp, options);
+ curp->man = roff_man_alloc( curp->roff, curp, curp->defos,
+ curp->options & MPARSE_QUICK ? 1 : 0);
if (curp->options & MPARSE_MDOC) {
- curp->man = mdoc_alloc(
- curp->roff, curp, curp->defos,
- curp->options & MPARSE_QUICK ? 1 : 0);
- mdoc_hash_init();
- }
- if (curp->options & MPARSE_MAN) {
- curp->man = man_alloc(
- curp->roff, curp, curp->defos,
- curp->options & MPARSE_QUICK ? 1 : 0);
- man_hash_init();
+ curp->man->macroset = MACROSET_MDOC;
+ if (curp->man->mdocmac == NULL)
+ curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
+ } else if (curp->options & MPARSE_MAN) {
+ curp->man->macroset = MACROSET_MAN;
+ if (curp->man->manmac == NULL)
+ curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
}
-
- return(curp);
+ curp->man->first->tok = TOKEN_NONE;
+ return curp;
}
void
mparse_reset(struct mparse *curp)
{
-
roff_reset(curp->roff);
+ roff_man_reset(curp->man);
+
+ free(curp->sodest);
+ curp->sodest = NULL;
- if (curp->man != NULL) {
- if (curp->man->macroset == MACROSET_MDOC)
- mdoc_reset(curp->man);
- else
- man_reset(curp->man);
- curp->man->macroset = MACROSET_NONE;
- }
if (curp->secondary)
curp->secondary->sz = 0;
curp->file_status = MANDOCLEVEL_OK;
-
- free(curp->sodest);
- curp->sodest = NULL;
+ curp->gzip = 0;
}
void
mparse_free(struct mparse *curp)
{
- if (curp->man->macroset == MACROSET_MDOC)
- mdoc_free(curp->man);
- if (curp->man->macroset == MACROSET_MAN)
- man_free(curp->man);
- if (curp->roff)
- roff_free(curp->roff);
+ roffhash_free(curp->man->mdocmac);
+ roffhash_free(curp->man->manmac);
+ roff_man_free(curp->man);
+ roff_free(curp->roff);
if (curp->secondary)
free(curp->secondary->buf);
*man = curp->man;
}
+void
+mparse_updaterc(struct mparse *curp, enum mandoclevel *rc)
+{
+ if (curp->file_status > *rc)
+ *rc = curp->file_status;
+}
+
void
mandoc_vmsg(enum mandocerr t, struct mparse *m,
int ln, int pos, const char *fmt, ...)
mparse_strerror(enum mandocerr er)
{
- return(mandocerrs[er]);
+ return mandocerrs[er];
}
const char *
mparse_strlevel(enum mandoclevel lvl)
{
- return(mandoclevels[lvl]);
+ return mandoclevels[lvl];
}
void
{
assert(p->secondary);
- return(p->secondary->sz ? p->secondary->buf : NULL);
+ return p->secondary->sz ? p->secondary->buf : NULL;
}