-/* $Id: read.c,v 1.144 2015/10/13 22:59:54 schwarze Exp $ */
+/* $Id: read.c,v 1.176 2017/06/11 19:37:01 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
#include "config.h"
#include <sys/types.h>
-#if HAVE_MMAP
#include <sys/mman.h>
#include <sys/stat.h>
-#endif
#include <assert.h>
#include <ctype.h>
+#if HAVE_ERR
#include <err.h>
+#endif
#include <errno.h>
#include <fcntl.h>
#include <stdarg.h>
-#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define REPARSE_LIMIT 1000
struct mparse {
- struct roff_man *man; /* man parser */
struct roff *roff; /* roff parser (!NULL) */
+ struct roff_man *man; /* man parser */
char *sodest; /* filename pointed to by .so */
const char *file; /* filename of current input file */
struct buf *primary; /* buffer currently being parsed */
static void choose_parser(struct mparse *);
static void resize_buf(struct buf *, size_t);
-static void mparse_buf_r(struct mparse *, struct buf, size_t, int);
+static int mparse_buf_r(struct mparse *, struct buf, size_t, int);
static int read_whole_file(struct mparse *, const char *, int,
struct buf *, int *);
static void mparse_end(struct mparse *);
static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
MANDOCERR_OK,
- MANDOCERR_WARNING,
+ MANDOCERR_STYLE,
MANDOCERR_WARNING,
MANDOCERR_ERROR,
MANDOCERR_UNSUPP,
static const char * const mandocerrs[MANDOCERR_MAX] = {
"ok",
+ "generic style suggestion",
+
+ "Mdocdate found",
+ "Mdocdate missing",
+ "legacy man(7) date format",
+ "useless macro",
+ "consider using OS macro",
+ "errnos out of order",
+ "duplicate errno",
+ "description line ends with a full stop",
+ "no blank before trailing delimiter",
+ "function name without markup",
+
"generic warning",
/* related to the prologue */
"no document body",
"content before first section header",
"first section is not \"NAME\"",
- "NAME section without name",
+ "NAME section without Nm before Nd",
"NAME section without description",
"description not at the end of NAME",
"bad NAME section content",
+ "missing comma before name",
"missing description line, using \"\"",
+ "description line outside NAME section",
"sections out of conventional order",
"duplicate section title",
"unexpected section",
"fill mode already enabled, skipping",
"fill mode already disabled, skipping",
"line scope broken",
+ "skipping blank line in line scope",
/* related to missing macro arguments */
"skipping empty request",
"empty argument, using 0n",
"missing display type, using -ragged",
"list type is not the first argument",
- "missing -width in -tag list, using 8n",
+ "missing -width in -tag list, using 6n",
"missing utility name, using \"\"",
"missing function name, using \"\"",
"empty head in list item",
"unknown font type, using \\fR",
"nothing follows prefix",
"empty reference block",
+ "missing section argument",
"missing -std argument, adding it",
"missing option string, using \"\"",
"missing resource identifier, using \"\"",
"unknown AT&T UNIX version",
"comma in function argument",
"parenthesis in function name",
+ "unknown library name",
"invalid content in Rs block",
"invalid Boolean argument",
"unknown font, skipping request",
"blank line in fill mode, using .sp",
"tab in filled text",
"whitespace at end of input line",
+ "new sentence, new line",
"bad comment style",
"invalid escape sequence",
"undefined string, using \"\"",
/* related to request and macro arguments */
"escaped character not allowed in a name",
"NOT IMPLEMENTED: Bd -file",
+ "skipping display without arguments",
"missing list type, using -item",
+ "argument is not numeric, using 1",
"missing manual name, using \"\"",
"uname(3) system call failed, using UNKNOWN",
"unknown standard specifier",
static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
"SUCCESS",
- "RESERVED",
+ "STYLE",
"WARNING",
"ERROR",
"UNSUPP",
}
}
- if (curp->man == NULL) {
- curp->man = roff_man_alloc(curp->roff, curp, curp->defos,
- curp->options & MPARSE_QUICK ? 1 : 0);
- curp->man->macroset = MACROSET_MAN;
- curp->man->first->tok = TOKEN_NONE;
- }
-
if (format == MPARSE_MDOC) {
- mdoc_hash_init();
curp->man->macroset = MACROSET_MDOC;
- curp->man->first->tok = TOKEN_NONE;
+ if (curp->man->mdocmac == NULL)
+ curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
} else {
- man_hash_init();
curp->man->macroset = MACROSET_MAN;
- curp->man->first->tok = TOKEN_NONE;
+ if (curp->man->manmac == NULL)
+ curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
}
+ curp->man->first->tok = TOKEN_NONE;
}
/*
* macros, inline equations, and input line traps)
* and indirectly (for .so file inclusion).
*/
-static void
+static int
mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
{
const struct tbl_span *span;
continue;
}
- /* Trailing backslash = a plain char. */
-
- if (blk.buf[i] != '\\' || i + 1 == blk.sz) {
- ln.buf[pos++] = blk.buf[i++];
- continue;
- }
-
- /*
- * Found escape and at least one other character.
- * When it's a newline character, skip it.
- * When there is a carriage return in between,
- * skip that one as well.
- */
-
- if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz &&
- '\n' == blk.buf[i + 2])
- ++i;
- if ('\n' == blk.buf[i + 1]) {
- i += 2;
- ++lnn;
- continue;
- }
-
- if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
- i += 2;
- /* Comment, skip to end of line */
- for (; i < blk.sz; ++i) {
- if ('\n' == blk.buf[i]) {
- ++i;
- ++lnn;
- break;
- }
- }
-
- /* Backout trailing whitespaces */
- for (; pos > 0; --pos) {
- if (ln.buf[pos - 1] != ' ')
- break;
- if (pos > 2 && ln.buf[pos - 2] == '\\')
- break;
- }
- break;
- }
-
- /* Catch escaped bogus characters. */
-
- c = (unsigned char) blk.buf[i+1];
-
- if ( ! (isascii(c) &&
- (isgraph(c) || isblank(c)))) {
- mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
- curp->line, pos, "0x%x", c);
- i += 2;
- ln.buf[pos++] = '?';
- continue;
- }
-
- /* Some other escape sequence, copy & cont. */
-
- ln.buf[pos++] = blk.buf[i++];
ln.buf[pos++] = blk.buf[i++];
}
- if (pos >= ln.sz)
+ if (pos + 1 >= ln.sz)
resize_buf(&ln, 256);
+ if (i == blk.sz || blk.buf[i] == '\0')
+ ln.buf[pos++] = '\n';
ln.buf[pos] = '\0';
/*
switch (rr) {
case ROFF_REPARSE:
- if (REPARSE_LIMIT >= ++curp->reparse_count)
- mparse_buf_r(curp, ln, of, 0);
- else
+ if (++curp->reparse_count > REPARSE_LIMIT)
mandoc_msg(MANDOCERR_ROFFLOOP, curp,
curp->line, pos, NULL);
- pos = 0;
- continue;
+ else if (mparse_buf_r(curp, ln, of, 0) == 1 ||
+ start == 1) {
+ pos = 0;
+ continue;
+ }
+ free(ln.buf);
+ return 0;
case ROFF_APPEND:
pos = strlen(ln.buf);
continue;
(i >= blk.sz || blk.buf[i] == '\0')) {
curp->sodest = mandoc_strdup(ln.buf + of);
free(ln.buf);
- return;
+ return 1;
}
/*
* We remove `so' clauses from our lookaside
if (curp->secondary)
curp->secondary->sz -= pos + 1;
save_file = curp->file;
- if (mparse_open(curp, &fd, ln.buf + of) ==
- MANDOCLEVEL_OK) {
+ if ((fd = mparse_open(curp, ln.buf + of)) != -1) {
mparse_readfd(curp, fd, ln.buf + of);
+ close(fd);
curp->file = save_file;
} else {
curp->file = save_file;
break;
}
- /*
- * If input parsers have not been allocated, do so now.
- * We keep these instanced between parsers, but set them
- * locally per parse routine since we can use different
- * parsers with each one.
- */
-
- if (curp->man == NULL ||
- curp->man->macroset == MACROSET_NONE)
+ if (curp->man->macroset == MACROSET_NONE)
choose_parser(curp);
/*
}
free(ln.buf);
+ return 1;
}
static int
read_whole_file(struct mparse *curp, const char *file, int fd,
struct buf *fb, int *with_mmap)
{
+ struct stat st;
gzFile gz;
size_t off;
ssize_t ssz;
-#if HAVE_MMAP
- struct stat st;
-
if (fstat(fd, &st) == -1)
err((int)MANDOCLEVEL_SYSERR, "%s", file);
if (fb->buf != MAP_FAILED)
return 1;
}
-#endif
if (curp->gzip) {
if ((gz = gzdopen(fd, "rb")) == NULL)
static void
mparse_end(struct mparse *curp)
{
-
- if (curp->man == NULL && curp->sodest == NULL)
- curp->man = roff_man_alloc(curp->roff, curp, curp->defos,
- curp->options & MPARSE_QUICK ? 1 : 0);
if (curp->man->macroset == MACROSET_NONE)
curp->man->macroset = MACROSET_MAN;
if (curp->man->macroset == MACROSET_MDOC)
(MPARSE_UTF8 | MPARSE_LATIN1);
mparse_parse_buffer(curp, blk, file);
curp->filenc = save_filenc;
-#if HAVE_MMAP
if (with_mmap)
munmap(blk.buf, blk.sz);
else
-#endif
free(blk.buf);
}
-
- if (fd != STDIN_FILENO && close(fd) == -1)
- perror(file);
-
return curp->file_status;
}
-enum mandoclevel
-mparse_open(struct mparse *curp, int *fd, const char *file)
+int
+mparse_open(struct mparse *curp, const char *file)
{
char *cp;
+ int fd;
curp->file = file;
cp = strrchr(file, '.');
/* First try to use the filename as it is. */
- if ((*fd = open(file, O_RDONLY)) != -1)
- return MANDOCLEVEL_OK;
+ if ((fd = open(file, O_RDONLY)) != -1)
+ return fd;
/*
* If that doesn't work and the filename doesn't
if ( ! curp->gzip) {
mandoc_asprintf(&cp, "%s.gz", file);
- *fd = open(file, O_RDONLY);
+ fd = open(cp, O_RDONLY);
free(cp);
- if (*fd != -1) {
+ if (fd != -1) {
curp->gzip = 1;
- return MANDOCLEVEL_OK;
+ return fd;
}
}
/* Neither worked, give up. */
mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
- return MANDOCLEVEL_ERROR;
+ return -1;
}
struct mparse *
curp->man = roff_man_alloc( curp->roff, curp, curp->defos,
curp->options & MPARSE_QUICK ? 1 : 0);
if (curp->options & MPARSE_MDOC) {
- mdoc_hash_init();
curp->man->macroset = MACROSET_MDOC;
+ if (curp->man->mdocmac == NULL)
+ curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
} else if (curp->options & MPARSE_MAN) {
- man_hash_init();
curp->man->macroset = MACROSET_MAN;
+ if (curp->man->manmac == NULL)
+ curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
}
curp->man->first->tok = TOKEN_NONE;
return curp;
void
mparse_reset(struct mparse *curp)
{
-
roff_reset(curp->roff);
+ roff_man_reset(curp->man);
+
+ free(curp->sodest);
+ curp->sodest = NULL;
- if (curp->man != NULL)
- roff_man_reset(curp->man);
if (curp->secondary)
curp->secondary->sz = 0;
curp->file_status = MANDOCLEVEL_OK;
-
- free(curp->sodest);
- curp->sodest = NULL;
+ curp->gzip = 0;
}
void
mparse_free(struct mparse *curp)
{
+ roffhash_free(curp->man->mdocmac);
+ roffhash_free(curp->man->manmac);
roff_man_free(curp->man);
- if (curp->roff)
- roff_free(curp->roff);
+ roff_free(curp->roff);
if (curp->secondary)
free(curp->secondary->buf);
*man = curp->man;
}
+void
+mparse_updaterc(struct mparse *curp, enum mandoclevel *rc)
+{
+ if (curp->file_status > *rc)
+ *rc = curp->file_status;
+}
+
void
mandoc_vmsg(enum mandocerr t, struct mparse *m,
int ln, int pos, const char *fmt, ...)