-/* $Id: read.c,v 1.14 2011/04/30 10:18:24 kristaps Exp $ */
+/* $Id: read.c,v 1.28 2012/02/16 20:51:31 joerg Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
#include "config.h"
#endif
-#include <sys/stat.h>
-#include <sys/mman.h>
+#ifdef HAVE_MMAP
+# include <sys/stat.h>
+# include <sys/mman.h>
+#endif
#include <assert.h>
#include <ctype.h>
#include <fcntl.h>
#include <stdarg.h>
+#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "libmandoc.h"
#include "mdoc.h"
#include "man.h"
+#include "main.h"
#ifndef MAP_FILE
#define MAP_FILE 0
struct man *man; /* man parser */
struct mdoc *mdoc; /* mdoc parser */
struct roff *roff; /* roff parser (!NULL) */
- struct regset regs; /* roff registers */
int reparse_count; /* finite interp. stack */
mandocmsg mmsg; /* warning/error message handler */
void *arg; /* argument to mmsg */
const char *file;
+ struct buf *secondary;
};
static void resize_buf(struct buf *, size_t);
static void mparse_buf_r(struct mparse *, struct buf, int);
static void mparse_readfd_r(struct mparse *, int, const char *, int);
static void pset(const char *, int, struct mparse *);
-static void pdesc(struct mparse *, const char *, int);
static int read_whole_file(const char *, int, struct buf *, int *);
static void mparse_end(struct mparse *);
"bad comment style",
"bad escape sequence",
"unterminated quoted string",
+
+ /* related to equations */
+ "unexpected literal in equation",
"generic error",
+ /* related to equations */
+ "unexpected equation scope closure",
+ "equation scope open on exit",
+ "overlapping equation scopes",
+ "unexpected end of equation",
+ "equation syntax error",
+
/* related to tables */
"bad table syntax",
"bad table option",
"not a manual",
"column syntax is inconsistent",
"NOT IMPLEMENTED: .Bd -file",
- "line scope broken, syntax violated",
"argument count wrong, violates syntax",
"child violates parent syntax",
"argument count wrong, violates syntax",
switch (curp->inttype) {
case (MPARSE_MDOC):
if (NULL == curp->pmdoc)
- curp->pmdoc = mdoc_alloc(&curp->regs, curp);
+ curp->pmdoc = mdoc_alloc(curp->roff, curp);
assert(curp->pmdoc);
curp->mdoc = curp->pmdoc;
return;
case (MPARSE_MAN):
if (NULL == curp->pman)
- curp->pman = man_alloc(&curp->regs, curp);
+ curp->pman = man_alloc(curp->roff, curp);
assert(curp->pman);
curp->man = curp->pman;
return;
if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3)) {
if (NULL == curp->pmdoc)
- curp->pmdoc = mdoc_alloc(&curp->regs, curp);
+ curp->pmdoc = mdoc_alloc(curp->roff, curp);
assert(curp->pmdoc);
curp->mdoc = curp->pmdoc;
return;
}
if (NULL == curp->pman)
- curp->pman = man_alloc(&curp->regs, curp);
+ curp->pman = man_alloc(curp->roff, curp);
assert(curp->pman);
curp->man = curp->pman;
}
* Warn about bogus characters. If you're using
* non-ASCII encoding, you're screwing your
* readers. Since I'd rather this not happen,
- * I'll be helpful and drop these characters so
- * we don't display gibberish. Note to manual
- * writers: use special characters.
+ * I'll be helpful and replace these characters
+ * with "?", so we don't display gibberish.
+ * Note to manual writers: use special characters.
*/
c = (unsigned char) blk.buf[i];
if ( ! (isascii(c) &&
(isgraph(c) || isblank(c)))) {
mandoc_msg(MANDOCERR_BADCHAR, curp,
- curp->line, pos, "ignoring byte");
+ curp->line, pos, NULL);
i++;
+ if (pos >= (int)ln.sz)
+ resize_buf(&ln, 256);
+ ln.buf[pos++] = '?';
continue;
}
of = 0;
+ /*
+ * Maintain a lookaside buffer of all parsed lines. We
+ * only do this if mparse_keep() has been invoked (the
+ * buffer may be accessed with mparse_getkeep()).
+ */
+
+ if (curp->secondary) {
+ curp->secondary->buf =
+ mandoc_realloc
+ (curp->secondary->buf,
+ curp->secondary->sz + pos + 2);
+ memcpy(curp->secondary->buf +
+ curp->secondary->sz,
+ ln.buf, pos);
+ curp->secondary->sz += pos;
+ curp->secondary->buf
+ [curp->secondary->sz] = '\n';
+ curp->secondary->sz++;
+ curp->secondary->buf
+ [curp->secondary->sz] = '\0';
+ }
rerun:
rr = roff_parseln
(curp->roff, curp->line,
assert(MANDOCLEVEL_FATAL <= curp->file_status);
break;
case (ROFF_SO):
+ /*
+ * We remove `so' clauses from our lookaside
+ * buffer because we're going to descend into
+ * the file recursively.
+ */
+ if (curp->secondary)
+ curp->secondary->sz -= pos + 1;
mparse_readfd_r(curp, -1, ln.buf + of, 1);
if (MANDOCLEVEL_FATAL <= curp->file_status)
break;
free(ln.buf);
}
-static void
-pdesc(struct mparse *curp, const char *file, int fd)
-{
- struct buf blk;
- int with_mmap;
-
- /*
- * Run for each opened file; may be called more than once for
- * each full parse sequence if the opened file is nested (i.e.,
- * from `so'). Simply sucks in the whole file and moves into
- * the parse phase for the file.
- */
-
- if ( ! read_whole_file(file, fd, &blk, &with_mmap)) {
- curp->file_status = MANDOCLEVEL_SYSERR;
- return;
- }
-
- /* Line number is per-file. */
-
- curp->line = 1;
-
- mparse_buf_r(curp, blk, 1);
-
- if (with_mmap)
- munmap(blk.buf, blk.sz);
- else
- free(blk.buf);
-}
-
static int
read_whole_file(const char *file, int fd, struct buf *fb, int *with_mmap)
{
- struct stat st;
size_t off;
ssize_t ssz;
+#ifdef HAVE_MMAP
+ struct stat st;
if (-1 == fstat(fd, &st)) {
perror(file);
return(0);
if (fb->buf != MAP_FAILED)
return(1);
}
+#endif
/*
* If this isn't a regular file (like, say, stdin), then we must
}
static void
-mparse_readfd_r(struct mparse *curp, int fd, const char *file, int re)
+mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file,
+ int re)
{
const char *svfile;
+ /* Line number is per-file. */
+ svfile = curp->file;
+ curp->file = file;
+ curp->line = 1;
+
+ mparse_buf_r(curp, blk, 1);
+
+ if (0 == re && MANDOCLEVEL_FATAL > curp->file_status)
+ mparse_end(curp);
+
+ curp->file = svfile;
+}
+
+enum mandoclevel
+mparse_readmem(struct mparse *curp, const void *buf, size_t len,
+ const char *file)
+{
+ struct buf blk;
+
+ blk.buf = UNCONST(buf);
+ blk.sz = len;
+
+ mparse_parse_buffer(curp, blk, file, 0);
+ return(curp->file_status);
+}
+
+static void
+mparse_readfd_r(struct mparse *curp, int fd, const char *file, int re)
+{
+ struct buf blk;
+ int with_mmap;
+
if (-1 == fd)
if (-1 == (fd = open(file, O_RDONLY, 0))) {
perror(file);
curp->file_status = MANDOCLEVEL_SYSERR;
return;
}
+ /*
+ * Run for each opened file; may be called more than once for
+ * each full parse sequence if the opened file is nested (i.e.,
+ * from `so'). Simply sucks in the whole file and moves into
+ * the parse phase for the file.
+ */
- svfile = curp->file;
- curp->file = file;
+ if ( ! read_whole_file(file, fd, &blk, &with_mmap)) {
+ curp->file_status = MANDOCLEVEL_SYSERR;
+ return;
+ }
- pdesc(curp, file, fd);
+ mparse_parse_buffer(curp, blk, file, re);
- if (0 == re && MANDOCLEVEL_FATAL > curp->file_status)
- mparse_end(curp);
+#ifdef HAVE_MMAP
+ if (with_mmap)
+ munmap(blk.buf, blk.sz);
+ else
+#endif
+ free(blk.buf);
if (STDIN_FILENO != fd && -1 == close(fd))
perror(file);
-
- curp->file = svfile;
}
enum mandoclevel
curp->arg = arg;
curp->inttype = inttype;
- curp->roff = roff_alloc(&curp->regs, curp);
+ curp->roff = roff_alloc(curp);
return(curp);
}
mparse_reset(struct mparse *curp)
{
- memset(&curp->regs, 0, sizeof(struct regset));
-
roff_reset(curp->roff);
if (curp->mdoc)
mdoc_reset(curp->mdoc);
if (curp->man)
man_reset(curp->man);
+ if (curp->secondary)
+ curp->secondary->sz = 0;
curp->file_status = MANDOCLEVEL_OK;
curp->mdoc = NULL;
man_free(curp->pman);
if (curp->roff)
roff_free(curp->roff);
+ if (curp->secondary)
+ free(curp->secondary->buf);
+ free(curp->secondary);
free(curp);
}
{
return(mandoclevels[lvl]);
}
+
+void
+mparse_keep(struct mparse *p)
+{
+
+ assert(NULL == p->secondary);
+ p->secondary = mandoc_calloc(1, sizeof(struct buf));
+}
+
+const char *
+mparse_getkeep(const struct mparse *p)
+{
+
+ assert(p->secondary);
+ return(p->secondary->sz ? p->secondary->buf : NULL);
+}