aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@openbsd.org>2014-09-06 22:39:36 +0000
committerIngo Schwarze <schwarze@openbsd.org>2014-09-06 22:39:36 +0000
commit1e75b893a49c0cbdd978438e15c5a8fa8aebb510 (patch)
treebf90cf9b636235019a1d66752a1e2e08e9d832dd
parenta828d911328c6ac46d0f8c2f2d99e9f0bb46e21d (diff)
downloadmandoc-1e75b893a49c0cbdd978438e15c5a8fa8aebb510.tar.gz
mandoc-1e75b893a49c0cbdd978438e15c5a8fa8aebb510.tar.zst
mandoc-1e75b893a49c0cbdd978438e15c5a8fa8aebb510.zip
Move main format autodetection from the parser dispatcher to the
roff parser where .Dd and .TH are already detected, anyway. This improves robustness because it correctly handles whitespace or an alternate control character before Dd. In the parser dispatcher, provide a fallback looking ahead in the input buffer instead of always assuming man(7). This corrects autodetection when Dd is preceded by other macros or macro-like handled requests like .ll. Triggered by reports from Daniel Levai about issues on Slackware Linux.
-rw-r--r--libmandoc.h3
-rw-r--r--read.c69
-rw-r--r--roff.c22
3 files changed, 63 insertions, 31 deletions
diff --git a/libmandoc.h b/libmandoc.h
index 1011cc50..6d629db3 100644
--- a/libmandoc.h
+++ b/libmandoc.h
@@ -1,4 +1,4 @@
-/* $Id: libmandoc.h,v 1.42 2014/07/09 11:31:43 schwarze Exp $ */
+/* $Id: libmandoc.h,v 1.43 2014/09/06 22:39:36 schwarze Exp $ */
/*
* Copyright (c) 2009, 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
@@ -77,6 +77,7 @@ int roff_getreg(const struct roff *, const char *);
char *roff_strdup(const struct roff *, const char *);
int roff_getcontrol(const struct roff *,
const char *, int *);
+int roff_getformat(const struct roff *);
#if 0
char roff_eqndelim(const struct roff *);
void roff_openeqn(struct roff *, const char *,
diff --git a/read.c b/read.c
index 3b9ce545..26cd6288 100644
--- a/read.c
+++ b/read.c
@@ -1,4 +1,4 @@
-/* $Id: read.c,v 1.82 2014/09/03 23:21:47 schwarze Exp $ */
+/* $Id: read.c,v 1.83 2014/09/06 22:39:36 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
@@ -51,21 +51,22 @@ struct buf {
};
struct mparse {
- enum mandoclevel file_status; /* status of current parse */
- enum mandoclevel wlevel; /* ignore messages below this */
- int line; /* line number in the file */
- int options; /* parser options */
struct man *pman; /* persistent man parser */
struct mdoc *pmdoc; /* persistent mdoc parser */
struct man *man; /* man parser */
struct mdoc *mdoc; /* mdoc parser */
struct roff *roff; /* roff parser (!NULL) */
char *sodest; /* filename pointed to by .so */
- int reparse_count; /* finite interp. stack */
- mandocmsg mmsg; /* warning/error message handler */
- const char *file;
- struct buf *secondary;
+ const char *file; /* filename of current input file */
+ struct buf *primary; /* buffer currently being parsed */
+ struct buf *secondary; /* preprocessed copy of input */
const char *defos; /* default operating system */
+ mandocmsg mmsg; /* warning/error message handler */
+ enum mandoclevel file_status; /* status of current parse */
+ enum mandoclevel wlevel; /* ignore messages below this */
+ int options; /* parser options */
+ int reparse_count; /* finite interp. stack */
+ int line; /* line number in the file */
};
static void resize_buf(struct buf *, size_t);
@@ -248,19 +249,10 @@ resize_buf(struct buf *buf, size_t initial)
static void
pset(const char *buf, int pos, struct mparse *curp)
{
+ char *cp, *ep;
+ int format;
int i;
- /*
- * Try to intuit which kind of manual parser should be used. If
- * passed in by command-line (-man, -mdoc), then use that
- * explicitly. If passed as -mandoc, then try to guess from the
- * line: either skip dot-lines, use -mdoc when finding `.Dt', or
- * default to -man, which is more lenient.
- *
- * Separate out pmdoc/pman from mdoc/man: the first persists
- * through all parsers, while the latter is used per-parse.
- */
-
if ('.' == buf[0] || '\'' == buf[0]) {
for (i = 1; buf[i]; i++)
if (' ' != buf[i] && '\t' != buf[i])
@@ -269,15 +261,35 @@ pset(const char *buf, int pos, struct mparse *curp)
return;
}
- if (MPARSE_MDOC & curp->options) {
- curp->mdoc = curp->pmdoc;
- return;
- } else if (MPARSE_MAN & curp->options) {
- curp->man = curp->pman;
- return;
+ /*
+ * If neither command line arguments -mdoc or -man select
+ * a parser nor the roff parser found a .Dd or .TH macro
+ * yet, look ahead in the main input buffer.
+ */
+
+ if ((format = roff_getformat(curp->roff)) == 0) {
+ cp = curp->primary->buf;
+ ep = cp + curp->primary->sz;
+ while (cp < ep) {
+ if (*cp == '.' || *cp != '\'') {
+ cp++;
+ if (cp[0] == 'D' && cp[1] == 'd') {
+ format = MPARSE_MDOC;
+ break;
+ }
+ if (cp[0] == 'T' && cp[1] == 'H') {
+ format = MPARSE_MAN;
+ break;
+ }
+ }
+ cp = memchr(cp, '\n', ep - cp);
+ if (cp == NULL)
+ break;
+ cp++;
+ }
}
- if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3)) {
+ if (format == MPARSE_MDOC) {
if (NULL == curp->pmdoc)
curp->pmdoc = mdoc_alloc(
curp->roff, curp, curp->defos,
@@ -287,6 +299,8 @@ pset(const char *buf, int pos, struct mparse *curp)
return;
}
+ /* Fall back to man(7) as a last resort. */
+
if (NULL == curp->pman)
curp->pman = man_alloc(curp->roff, curp,
MPARSE_QUICK & curp->options ? 1 : 0);
@@ -720,6 +734,7 @@ mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
/* Line number is per-file. */
svfile = curp->file;
curp->file = file;
+ curp->primary = &blk;
curp->line = 1;
recursion_depth++;
diff --git a/roff.c b/roff.c
index 33361402..1d8a3991 100644
--- a/roff.c
+++ b/roff.c
@@ -1,4 +1,4 @@
-/* $Id: roff.c,v 1.226 2014/08/19 16:52:32 schwarze Exp $ */
+/* $Id: roff.c,v 1.227 2014/09/06 22:39:36 schwarze Exp $ */
/*
* Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
@@ -122,6 +122,7 @@ struct roff {
int options; /* parse options */
int rstacksz; /* current size limit of rstack */
int rstackpos; /* position in rstack */
+ int format; /* current file in mdoc or man format */
char control; /* control character */
};
@@ -456,6 +457,7 @@ roff_reset(struct roff *r)
{
roff_free1(r);
+ r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
r->control = 0;
}
@@ -475,6 +477,7 @@ roff_alloc(struct mparse *parse, int options)
r = mandoc_calloc(1, sizeof(struct roff));
r->parse = parse;
r->options = options;
+ r->format = options & (MPARSE_MDOC | MPARSE_MAN);
r->rstackpos = -1;
roffhash_init();
@@ -1776,10 +1779,13 @@ roff_Dd(ROFF_ARGS)
{
const char *const *cp;
- if (0 == ((MPARSE_MDOC | MPARSE_QUICK) & r->options))
+ if ((r->options & (MPARSE_MDOC | MPARSE_QUICK)) == 0)
for (cp = __mdoc_reserved; *cp; cp++)
roff_setstr(r, *cp, NULL, 0);
+ if (r->format == 0)
+ r->format = MPARSE_MDOC;
+
return(ROFF_CONT);
}
@@ -1788,10 +1794,13 @@ roff_TH(ROFF_ARGS)
{
const char *const *cp;
- if (0 == (MPARSE_QUICK & r->options))
+ if ((r->options & MPARSE_QUICK) == 0)
for (cp = __man_reserved; *cp; cp++)
roff_setstr(r, *cp, NULL, 0);
+ if (r->format == 0)
+ r->format = MPARSE_MAN;
+
return(ROFF_CONT);
}
@@ -2307,6 +2316,13 @@ roff_strdup(const struct roff *r, const char *p)
return(res);
}
+int
+roff_getformat(const struct roff *r)
+{
+
+ return(r->format);
+}
+
/*
* Find out whether a line is a macro line or not.
* If it is, adjust the current position and return one; if it isn't,