aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@openbsd.org>2015-01-26 00:57:22 +0000
committerIngo Schwarze <schwarze@openbsd.org>2015-01-26 00:57:22 +0000
commit445b12f7ab17bc0449361eeb2f956d5a1a9bcdd6 (patch)
treeaddd569e7b40f08deeb3e88a3ad2f5c06621ede5
parent552620095793cb4451871506a6b9373d2677acd9 (diff)
downloadmandoc-445b12f7ab17bc0449361eeb2f956d5a1a9bcdd6.tar.gz
mandoc-445b12f7ab17bc0449361eeb2f956d5a1a9bcdd6.tar.zst
mandoc-445b12f7ab17bc0449361eeb2f956d5a1a9bcdd6.zip
Improve (or rather, rewrite) tbl(7) option parsing.
* Allow the layout to start after the semicolon on the options line. * Ignore leading commas. * Option arguments cannot contain closing parentheses. * Avoid needless UNSUPP messages. * Better ERROR reporting. * Delete unused "linesize" field in struct tbl_opts. * No need for static buffers. * Garbage collect one almost empty wrapper function. Improved functionality, but minus 40 lines of code.
-rw-r--r--mandoc.149
-rw-r--r--mandoc.h9
-rw-r--r--read.c8
-rw-r--r--tbl.c51
-rw-r--r--tbl_opts.c231
5 files changed, 158 insertions, 190 deletions
diff --git a/mandoc.1 b/mandoc.1
index 9a536ea3..d110181b 100644
--- a/mandoc.1
+++ b/mandoc.1
@@ -1,4 +1,4 @@
-.\" $Id: mandoc.1,v 1.135 2015/01/24 01:58:33 schwarze Exp $
+.\" $Id: mandoc.1,v 1.136 2015/01/26 00:57:22 schwarze Exp $
.\"
.\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
.\" Copyright (c) 2012, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
@@ -15,7 +15,7 @@
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
.\"
-.Dd $Mdocdate: January 24 2015 $
+.Dd $Mdocdate: January 26 2015 $
.Dt MANDOC 1
.Os
.Sh NAME
@@ -1281,12 +1281,35 @@ keeps the code more readable.
.It "unexpected end of equation"
.El
.Ss "Errors related to tables"
+.Bl -ohang
+.It Sy "non-alphabetic character in tbl options"
+.Pq tbl
+The table options line contains a character other than a letter,
+blank, or comma where the beginning of an option name is expected.
+The character is ignored.
+.It Sy "skipping unknown tbl option"
+.Pq tbl
+The table options line contains a string of letters that does not
+match any known option name.
+The word is ignored.
+.It Sy "missing tbl option argument"
+.Pq tbl
+A table option that requires an argument is not followed by an
+opening parenthesis, or the opening parenthesis is immediately
+followed by a closing parenthesis.
+The option is ignored.
+.It Sy "wrong tbl option argument size"
+.Pq tbl
+A table option argument contains an invalid number of characters.
+Both the option and the argument are ignored.
+.El
+.Pp
.Bl -inset -compact
-.It "no table layout cells specified"
-.It "no table data cells specified"
-.It "ignore data in cell"
-.It "data block still open"
-.It "ignoring extra data cells"
+.It Sy "no table layout cells specified"
+.It Sy "no table data cells specified"
+.It Sy "ignore data in cell"
+.It Sy "data block still open"
+.It Sy "ignoring extra data cells"
.El
.Ss "Errors related to roff, mdoc, and man code"
.Bl -ohang
@@ -1568,6 +1591,14 @@ cannot handle input files larger than its arbitrary size limit
of 2^31 bytes (2 Gigabytes).
Since useful manuals are always small, this is not a problem in practice.
Parsing is aborted as soon as the condition is detected.
+.It Sy "unsupported control character"
+.Pq roff
+An ASCII control character supported by other
+.Xr roff 7
+implementations but not by
+.Nm
+was found in an input file.
+It is replaced by a question mark.
.It Sy "unsupported roff request"
.Pq roff
An input file contains a
@@ -1576,9 +1607,7 @@ request supported by GNU troff or Heirloom troff but not by
.Nm ,
and it is likely that this will cause information loss
or considerable misformatting.
-.It Sy "bad table syntax"
-.It Sy "bad table option"
-.It Sy "bad table layout"
+.It Sy "unsupported table layout"
.It Sy "ignoring macro in table"
.El
.Sh COMPATIBILITY
diff --git a/mandoc.h b/mandoc.h
index 96c570bb..c0979ee0 100644
--- a/mandoc.h
+++ b/mandoc.h
@@ -1,4 +1,4 @@
-/* $Id: mandoc.h,v 1.186 2015/01/24 01:58:33 schwarze Exp $ */
+/* $Id: mandoc.h,v 1.187 2015/01/26 00:57:22 schwarze Exp $ */
/*
* Copyright (c) 2010, 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
@@ -136,6 +136,10 @@ enum mandocerr {
MANDOCERR_EQNEOF, /* unexpected end of equation */
/* related to tables */
+ MANDOCERR_TBLOPT_ALPHA, /* non-alphabetic character in tbl options */
+ MANDOCERR_TBLOPT_BAD, /* skipping unknown tbl option: option */
+ MANDOCERR_TBLOPT_NOARG, /* missing tbl option argument */
+ MANDOCERR_TBLOPT_ARGSZ, /* wrong tbl option argument size */
MANDOCERR_TBLNOLAYOUT, /* no table layout cells specified */
MANDOCERR_TBLNODATA, /* no table data cells specified */
MANDOCERR_TBLIGNDATA, /* ignore data in cell */
@@ -175,8 +179,6 @@ enum mandocerr {
MANDOCERR_TOOLARGE, /* input too large */
MANDOCERR_CHAR_UNSUPP, /* unsupported control character: number */
MANDOCERR_REQ_UNSUPP, /* unsupported roff request: request */
- MANDOCERR_TBL, /* unsupported table syntax */
- MANDOCERR_TBLOPT, /* unsupported table option */
MANDOCERR_TBLLAYOUT, /* unsupported table layout */
MANDOCERR_TBLMACRO, /* ignoring macro in table: macro */
@@ -186,7 +188,6 @@ enum mandocerr {
struct tbl_opts {
char tab; /* cell-separator */
char decimal; /* decimal point */
- int linesize;
int opts;
#define TBL_OPT_CENTRE (1 << 0)
#define TBL_OPT_EXPAND (1 << 1)
diff --git a/read.c b/read.c
index 70fe8240..090f5261 100644
--- a/read.c
+++ b/read.c
@@ -1,4 +1,4 @@
-/* $Id: read.c,v 1.115 2015/01/24 01:58:33 schwarze Exp $ */
+/* $Id: read.c,v 1.116 2015/01/26 00:57:22 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
@@ -180,6 +180,10 @@ static const char * const mandocerrs[MANDOCERR_MAX] = {
"unexpected end of equation",
/* related to tables */
+ "non-alphabetic character in tbl options",
+ "skipping unknown tbl option",
+ "missing tbl option argument",
+ "wrong tbl option argument size",
"no table layout cells specified",
"no table data cells specified",
"ignore data in cell",
@@ -218,8 +222,6 @@ static const char * const mandocerrs[MANDOCERR_MAX] = {
"input too large",
"unsupported control character",
"unsupported roff request",
- "unsupported table syntax",
- "unsupported table option",
"unsupported table layout",
"ignoring macro in table",
};
diff --git a/tbl.c b/tbl.c
index b6492524..184b7da4 100644
--- a/tbl.c
+++ b/tbl.c
@@ -1,7 +1,7 @@
-/* $Id: tbl.c,v 1.32 2015/01/21 00:47:04 schwarze Exp $ */
+/* $Id: tbl.c,v 1.33 2015/01/26 00:57:22 schwarze Exp $ */
/*
* Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2011, 2015 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -34,29 +34,45 @@
enum rofferr
tbl_read(struct tbl_node *tbl, int ln, const char *p, int offs)
{
- int len;
const char *cp;
-
- cp = &p[offs];
- len = (int)strlen(cp);
+ int active;
/*
- * If we're in the options section and we don't have a
- * terminating semicolon, assume we've moved directly into the
- * layout section. No need to report a warning: this is,
- * apparently, standard behaviour.
+ * In the options section, proceed to the layout section
+ * after a semicolon, or right away if there is no semicolon.
+ * Ignore semicolons in arguments.
*/
- if (TBL_PART_OPTS == tbl->part && len)
- if (';' != cp[len - 1])
- tbl->part = TBL_PART_LAYOUT;
+ if (tbl->part == TBL_PART_OPTS) {
+ tbl->part = TBL_PART_LAYOUT;
+ active = 1;
+ for (cp = p; *cp != '\0'; cp++) {
+ switch (*cp) {
+ case '(':
+ active = 0;
+ continue;
+ case ')':
+ active = 1;
+ continue;
+ case ';':
+ if (active)
+ break;
+ continue;
+ default:
+ continue;
+ }
+ break;
+ }
+ if (*cp == ';') {
+ tbl_option(tbl, ln, p);
+ if (*(p = cp + 1) == '\0')
+ return(ROFF_IGN);
+ }
+ }
- /* Now process each logical section of the table. */
+ /* Process the other section types. */
switch (tbl->part) {
- case TBL_PART_OPTS:
- tbl_option(tbl, ln, p);
- return(ROFF_IGN);
case TBL_PART_LAYOUT:
tbl_layout(tbl, ln, p);
return(ROFF_IGN);
@@ -81,7 +97,6 @@ tbl_alloc(int pos, int line, struct mparse *parse)
tbl->parse = parse;
tbl->part = TBL_PART_OPTS;
tbl->opts.tab = '\t';
- tbl->opts.linesize = 12;
tbl->opts.decimal = '.';
return(tbl);
}
diff --git a/tbl_opts.c b/tbl_opts.c
index 4716e436..3193d143 100644
--- a/tbl_opts.c
+++ b/tbl_opts.c
@@ -1,6 +1,7 @@
-/* $Id: tbl_opts.c,v 1.16 2015/01/14 22:44:55 schwarze Exp $ */
+/* $Id: tbl_opts.c,v 1.17 2015/01/26 00:57:22 schwarze Exp $ */
/*
* Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2015 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -53,12 +54,6 @@ struct tbl_phrase {
/* Handle Commonwealth/American spellings. */
#define KEY_MAXKEYS 14
-/* Maximum length of key name string. */
-#define KEY_MAXNAME 13
-
-/* Maximum length of key number size. */
-#define KEY_MAXNUMSZ 10
-
static const struct tbl_phrase keys[KEY_MAXKEYS] = {
{ "center", TBL_OPT_CENTRE, KEY_CENTRE},
{ "centre", TBL_OPT_CENTRE, KEY_CENTRE},
@@ -76,193 +71,119 @@ static const struct tbl_phrase keys[KEY_MAXKEYS] = {
{ "nospaces", TBL_OPT_NOSPACE, KEY_NOSPACE},
};
-static int arg(struct tbl_node *, int,
+static void arg(struct tbl_node *, int,
const char *, int *, enum tbl_ident);
-static void opt(struct tbl_node *, int,
- const char *, int *);
-static int
+static void
arg(struct tbl_node *tbl, int ln, const char *p, int *pos, enum tbl_ident key)
{
- int i;
- char buf[KEY_MAXNUMSZ];
+ const char *optname;
+ int len, want;
while (isspace((unsigned char)p[*pos]))
(*pos)++;
- /* Arguments always begin with a parenthesis. */
+ /* Arguments are enclosed in parentheses. */
- if ('(' != p[*pos]) {
- mandoc_msg(MANDOCERR_TBL, tbl->parse,
- ln, *pos, NULL);
- return(0);
+ len = 0;
+ if (p[*pos] == '(') {
+ (*pos)++;
+ while (p[*pos + len] != ')')
+ len++;
}
- (*pos)++;
-
- /*
- * The arguments can be ANY value, so we can't just stop at the
- * next close parenthesis (the argument can be a closed
- * parenthesis itself).
- */
-
switch (key) {
case KEY_DELIM:
- if ('\0' == p[(*pos)++]) {
- mandoc_msg(MANDOCERR_TBL, tbl->parse,
- ln, *pos - 1, NULL);
- return(0);
- }
-
- if ('\0' == p[(*pos)++]) {
- mandoc_msg(MANDOCERR_TBL, tbl->parse,
- ln, *pos - 1, NULL);
- return(0);
- }
+ optname = "delim";
+ want = 2;
break;
case KEY_TAB:
- if ('\0' != (tbl->opts.tab = p[(*pos)++]))
- break;
-
- mandoc_msg(MANDOCERR_TBL, tbl->parse,
- ln, *pos - 1, NULL);
- return(0);
+ optname = "tab";
+ want = 1;
+ if (len == want)
+ tbl->opts.tab = p[*pos];
+ break;
case KEY_LINESIZE:
- for (i = 0; i < KEY_MAXNUMSZ && p[*pos]; i++, (*pos)++) {
- buf[i] = p[*pos];
- if ( ! isdigit((unsigned char)buf[i]))
- break;
- }
-
- if (i < KEY_MAXNUMSZ) {
- buf[i] = '\0';
- tbl->opts.linesize = atoi(buf);
- break;
- }
-
- mandoc_msg(MANDOCERR_TBL, tbl->parse, ln, *pos, NULL);
- return(0);
+ optname = "linesize";
+ want = 0;
+ break;
case KEY_DPOINT:
- if ('\0' != (tbl->opts.decimal = p[(*pos)++]))
- break;
-
- mandoc_msg(MANDOCERR_TBL, tbl->parse,
- ln, *pos - 1, NULL);
- return(0);
+ optname = "decimalpoint";
+ want = 1;
+ if (len == want)
+ tbl->opts.decimal = p[*pos];
+ break;
default:
abort();
/* NOTREACHED */
}
- /* End with a close parenthesis. */
+ if (len == 0)
+ mandoc_msg(MANDOCERR_TBLOPT_NOARG,
+ tbl->parse, ln, *pos, optname);
+ else if (want && len != want)
+ mandoc_vmsg(MANDOCERR_TBLOPT_ARGSZ,
+ tbl->parse, ln, *pos,
+ "%s want %d have %d", optname, want, len);
- if (')' == p[(*pos)++])
- return(1);
-
- mandoc_msg(MANDOCERR_TBL, tbl->parse, ln, *pos - 1, NULL);
- return(0);
+ *pos += len;
+ if (p[*pos] == ')')
+ (*pos)++;
}
-static void
-opt(struct tbl_node *tbl, int ln, const char *p, int *pos)
+/*
+ * Parse one line of options up to the semicolon.
+ * Each option can be preceded by blanks and/or commas,
+ * and some options are followed by arguments.
+ */
+void
+tbl_option(struct tbl_node *tbl, int ln, const char *p)
{
- int i, sv;
- char buf[KEY_MAXNAME];
-
- /*
- * Parse individual options from the stream as surrounded by
- * this goto. Each pass through the routine parses out a single
- * option and registers it. Option arguments are processed in
- * the arg() function.
- */
-
-again: /*
- * EBNF describing this section:
- *
- * options ::= option_list [:space:]* [;][\n]
- * option_list ::= option option_tail
- * option_tail ::= [,:space:]+ option_list |
- * ::= epsilon
- * option ::= [:alpha:]+ args
- * args ::= [:space:]* [(] [:alpha:]+ [)]
- */
+ int i, pos, len;
- while (isspace((unsigned char)p[*pos]))
- (*pos)++;
-
- /* Safe exit point. */
-
- if (';' == p[*pos])
- return;
-
- /* Copy up to first non-alpha character. */
+ pos = 0;
+ for (;;) {
+ while (isspace((unsigned char)p[pos]) || p[pos] == ',')
+ pos++;
- for (sv = *pos, i = 0; i < KEY_MAXNAME; i++, (*pos)++) {
- buf[i] = (char)tolower((unsigned char)p[*pos]);
- if ( ! isalpha((unsigned char)buf[i]))
- break;
- }
+ if (p[pos] == ';')
+ return;
- /* Exit if buffer is empty (or overrun). */
+ /* Parse one option name. */
- if (KEY_MAXNAME == i || 0 == i) {
- mandoc_msg(MANDOCERR_TBL, tbl->parse, ln, *pos, NULL);
- return;
- }
+ len = 0;
+ while (isalpha((unsigned char)p[pos + len]))
+ len++;
- buf[i] = '\0';
+ if (len == 0) {
+ mandoc_vmsg(MANDOCERR_TBLOPT_ALPHA,
+ tbl->parse, ln, pos, "%c", p[pos]);
+ pos++;
+ continue;
+ }
- while (isspace((unsigned char)p[*pos]) || p[*pos] == ',')
- (*pos)++;
+ /* Look up the option name. */
- /*
- * Look through all of the available keys to find one that
- * matches the input. FIXME: hashtable this.
- */
+ i = 0;
+ while (i < KEY_MAXKEYS &&
+ (strncasecmp(p + pos, keys[i].name, len) ||
+ keys[i].name[len] != '\0'))
+ i++;
- for (i = 0; i < KEY_MAXKEYS; i++) {
- if (strcmp(buf, keys[i].name))
+ if (i == KEY_MAXKEYS) {
+ mandoc_vmsg(MANDOCERR_TBLOPT_BAD, tbl->parse,
+ ln, pos, "%.*s", len, p + pos);
+ pos += len;
continue;
+ }
- /*
- * Note: this is more difficult to recover from, as we
- * can be anywhere in the option sequence and it's
- * harder to jump to the next. Meanwhile, just bail out
- * of the sequence altogether.
- */
+ /* Handle the option. */
+ pos += len;
if (keys[i].key)
tbl->opts.opts |= keys[i].key;
- else if ( ! arg(tbl, ln, p, pos, keys[i].ident))
- return;
-
- break;
+ else
+ arg(tbl, ln, p, &pos, keys[i].ident);
}
-
- /*
- * Allow us to recover from bad options by continuing to another
- * parse sequence.
- */
-
- if (KEY_MAXKEYS == i)
- mandoc_msg(MANDOCERR_TBLOPT, tbl->parse, ln, sv, NULL);
-
- goto again;
- /* NOTREACHED */
-}
-
-void
-tbl_option(struct tbl_node *tbl, int ln, const char *p)
-{
- int pos;
-
- /*
- * Table options are always on just one line, so automatically
- * switch into the next input mode here.
- */
- tbl->part = TBL_PART_LAYOUT;
-
- pos = 0;
- opt(tbl, ln, p, &pos);
}