aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@openbsd.org>2015-01-26 18:42:30 +0000
committerIngo Schwarze <schwarze@openbsd.org>2015-01-26 18:42:30 +0000
commitb0aec68c2e236344e79a7ee4949b2e5c450d13f8 (patch)
treec443152b9cc28944522ddf2bc3103f1346ef178d
parente4a7a2efc2737e64ac0cf42b148f06566718c1d7 (diff)
downloadmandoc-b0aec68c2e236344e79a7ee4949b2e5c450d13f8.tar.gz
mandoc-b0aec68c2e236344e79a7ee4949b2e5c450d13f8.tar.zst
mandoc-b0aec68c2e236344e79a7ee4949b2e5c450d13f8.zip
Rework tbl(7) layout parsing:
* Continue parsing even if part of the input is invalid. * Do not require whitespace between cell specifications. * Allow tabs as well as blanks between modifiers. * Mark the 'm' modifier as unsupported. * Parse and ignore the 'p' and 'v' modifiers. * Better warning and error messages. * Get rid of a static buffer. Improved functionality but minus 50 lines of code.
-rw-r--r--mandoc.144
-rw-r--r--mandoc.h13
-rw-r--r--read.c13
-rw-r--r--tbl.735
-rw-r--r--tbl_layout.c238
5 files changed, 169 insertions, 174 deletions
diff --git a/mandoc.1 b/mandoc.1
index 736b2ae7..82c2d720 100644
--- a/mandoc.1
+++ b/mandoc.1
@@ -1,4 +1,4 @@
-.\" $Id: mandoc.1,v 1.137 2015/01/26 13:03:48 schwarze Exp $
+.\" $Id: mandoc.1,v 1.138 2015/01/26 18:42:30 schwarze Exp $
.\"
.\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
.\" Copyright (c) 2012, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
@@ -1273,6 +1273,24 @@ its value is implicitly set to the empty string.
However, defining strings explicitly before use
keeps the code more readable.
.El
+.Ss "Warnings related to tables"
+.Bl -ohang
+.It Sy "tbl line starts with span"
+.Pq tbl
+The first cell in a table layout line is a horizontal span
+.Pq Sq Cm s .
+Data provided for this cell is ignored, and nothing is printed in the cell.
+.It Sy "tbl column starts with span"
+.Pq tbl
+The first line of a table layout specification
+requests a vertical span
+.Pq Sq Cm ^ .
+Data provided for this cell is ignored, and nothing is printed in the cell.
+.It Sy "skipping vertical bar in tbl layout"
+.Pq tbl
+A table layout specification contains more than two consecutive vertical bars.
+A double bar is printed, all additional bars are discarded.
+.El
.Ss "Errors related to equations"
.Bl -inset -compact
.It "unexpected equation scope closure"
@@ -1302,10 +1320,25 @@ The option is ignored.
.Pq tbl
A table option argument contains an invalid number of characters.
Both the option and the argument are ignored.
+.It Sy "empty tbl layout"
+.Pq tbl
+A table layout specification is completely empty,
+specifying zero lines and zero columns.
+As a fallback, a single left-justified column is used.
+.It Sy "invalid character in tbl layout"
+.Pq tbl
+A table layout specification contains a character that can neither
+be interpreted as a layout key character nor as a layout modifier,
+or a modifier precedes the first key.
+The invalid character is discarded.
+.It Sy "unmatched parenthesis in tbl layout"
+.Pq tbl
+A table layout specification contains an opening parenthesis,
+but no matching closing parenthesis.
+The rest of the input line, starting from the parenthesis, has no effect.
.El
.Pp
.Bl -inset -compact
-.It Sy "no table layout cells specified"
.It Sy "no table data cells specified"
.It Sy "ignore data in cell"
.It Sy "data block still open"
@@ -1607,7 +1640,12 @@ request supported by GNU troff or Heirloom troff but not by
.Nm ,
and it is likely that this will cause information loss
or considerable misformatting.
-.It Sy "unsupported table layout"
+.It Sy "unsupported table layout modfier"
+.Pq tbl
+A table layout specification contains an
+.Sq Cm m
+modifier.
+The modifier is discarded.
.It Sy "ignoring macro in table"
.It Sy "eqn in tbl"
.Pq eqn , tbl
diff --git a/mandoc.h b/mandoc.h
index 40f2bdea..02c404fc 100644
--- a/mandoc.h
+++ b/mandoc.h
@@ -1,4 +1,4 @@
-/* $Id: mandoc.h,v 1.188 2015/01/26 13:03:48 schwarze Exp $ */
+/* $Id: mandoc.h,v 1.189 2015/01/26 18:42:30 schwarze Exp $ */
/*
* Copyright (c) 2010, 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
@@ -127,6 +127,11 @@ enum mandocerr {
MANDOCERR_ESC_BAD, /* invalid escape sequence: esc */
MANDOCERR_STR_UNDEF, /* undefined string, using "": name */
+ /* related to tables */
+ MANDOCERR_TBLLAYOUT_SPAN, /* tbl line starts with span */
+ MANDOCERR_TBLLAYOUT_DOWN, /* tbl column starts with span */
+ MANDOCERR_TBLLAYOUT_VERT, /* skipping vertical bar in tbl layout */
+
MANDOCERR_ERROR, /* ===== start of errors ===== */
/* related to equations */
@@ -140,7 +145,9 @@ enum mandocerr {
MANDOCERR_TBLOPT_BAD, /* skipping unknown tbl option: option */
MANDOCERR_TBLOPT_NOARG, /* missing tbl option argument */
MANDOCERR_TBLOPT_ARGSZ, /* wrong tbl option argument size */
- MANDOCERR_TBLNOLAYOUT, /* no table layout cells specified */
+ MANDOCERR_TBLLAYOUT_NONE, /* empty tbl layout */
+ MANDOCERR_TBLLAYOUT_CHAR, /* invalid character in tbl layout: char */
+ MANDOCERR_TBLLAYOUT_PAR, /* unmatched parenthesis in tbl layout */
MANDOCERR_TBLNODATA, /* no table data cells specified */
MANDOCERR_TBLIGNDATA, /* ignore data in cell */
MANDOCERR_TBLBLOCK, /* data block still open */
@@ -179,7 +186,7 @@ enum mandocerr {
MANDOCERR_TOOLARGE, /* input too large */
MANDOCERR_CHAR_UNSUPP, /* unsupported control character: number */
MANDOCERR_REQ_UNSUPP, /* unsupported roff request: request */
- MANDOCERR_TBLLAYOUT, /* unsupported table layout */
+ MANDOCERR_TBLLAYOUT_MOD, /* unsupported tbl layout modifier: m */
MANDOCERR_TBLMACRO, /* ignoring macro in table: macro */
MANDOCERR_TBLEQN, /* eqn in tbl */
diff --git a/read.c b/read.c
index 5f381584..7d36778e 100644
--- a/read.c
+++ b/read.c
@@ -1,4 +1,4 @@
-/* $Id: read.c,v 1.117 2015/01/26 13:03:48 schwarze Exp $ */
+/* $Id: read.c,v 1.118 2015/01/26 18:42:30 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
@@ -171,6 +171,11 @@ static const char * const mandocerrs[MANDOCERR_MAX] = {
"invalid escape sequence",
"undefined string, using \"\"",
+ /* related to tables */
+ "tbl line starts with span",
+ "tbl column starts with span",
+ "skipping vertical bar in tbl layout",
+
"generic error",
/* related to equations */
@@ -184,7 +189,9 @@ static const char * const mandocerrs[MANDOCERR_MAX] = {
"skipping unknown tbl option",
"missing tbl option argument",
"wrong tbl option argument size",
- "no table layout cells specified",
+ "empty tbl layout",
+ "invalid character in tbl layout",
+ "unmatched parenthesis in tbl layout",
"no table data cells specified",
"ignore data in cell",
"data block still open",
@@ -222,7 +229,7 @@ static const char * const mandocerrs[MANDOCERR_MAX] = {
"input too large",
"unsupported control character",
"unsupported roff request",
- "unsupported table layout",
+ "unsupported tbl layout modifier",
"ignoring macro in table",
"eqn in tbl",
};
diff --git a/tbl.7 b/tbl.7
index 7417d208..20f3e7f3 100644
--- a/tbl.7
+++ b/tbl.7
@@ -1,4 +1,4 @@
-.\" $Id: tbl.7,v 1.23 2015/01/26 13:03:48 schwarze Exp $
+.\" $Id: tbl.7,v 1.24 2015/01/26 18:42:30 schwarze Exp $
.\"
.\" Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
.\" Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
@@ -251,6 +251,9 @@ The following case-insensitive modifier keys are available:
.Bl -tag -width 2n
.It Cm b
Use a bold font for the contents of this column.
+.It Cm d
+Move cell content down to the last cell of a vertical span.
+Currently ignored.
.It Cm e
Make this column wider to match the maximum width
of any other column also having the
@@ -263,6 +266,27 @@ See the
manual for supported one-character font names.
.It Cm i
Use an italic font for the contents of this column.
+.It Cm m
+Specify a cell start macro.
+This is a GNU extension and currently unsupported.
+.It Cm p
+Set the point size to the following unsigned argument,
+or change it by the following signed argument.
+Currently ignored.
+.It Cm v
+Set the vertical line spacing to the following unsigned argument,
+or change it by the following signed argument.
+Currently ignored.
+.It Cm t
+Do not vertically center cell content in the vertical span,
+leave it at the top.
+Currently ignored.
+.It Cm u
+Move cell content up by half a table line.
+Currently ignored.
+.It Cm w
+Specify minimum column width.
+Currently ignored.
.It Cm x
After determining the width of all other columns, distribute the
rest of the line length among all columns having the
@@ -272,15 +296,6 @@ modifier.
Do not use this cell for determining the width of this column.
.El
.Pp
-The modifiers
-.Cm d ,
-.Cm t ,
-.Cm u ,
-and
-.Cm w
-are ignored by
-.Xr mandoc 1 .
-.Pp
For example, the following layout specifies a center-justified column of
minimum width 10, followed by vertical bar, followed by a left-justified
column of minimum width 10, another vertical bar, then a column using
diff --git a/tbl_layout.c b/tbl_layout.c
index 29a6cee2..eaf72769 100644
--- a/tbl_layout.c
+++ b/tbl_layout.c
@@ -1,7 +1,7 @@
-/* $Id: tbl_layout.c,v 1.31 2015/01/14 22:44:55 schwarze Exp $ */
+/* $Id: tbl_layout.c,v 1.32 2015/01/26 18:42:30 schwarze Exp $ */
/*
* Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2012, 2014 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2012, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -34,15 +34,7 @@ struct tbl_phrase {
enum tbl_cellt key;
};
-/*
- * FIXME: we can make this parse a lot nicer by, when an error is
- * encountered in a layout key, bailing to the next key (i.e. to the
- * next whitespace then continuing).
- */
-
-#define KEYS_MAX 11
-
-static const struct tbl_phrase keys[KEYS_MAX] = {
+static const struct tbl_phrase keys[] = {
{ 'c', TBL_CELL_CENTRE },
{ 'r', TBL_CELL_RIGHT },
{ 'l', TBL_CELL_LEFT },
@@ -55,57 +47,30 @@ static const struct tbl_phrase keys[KEYS_MAX] = {
{ '=', TBL_CELL_DHORIZ }
};
-static int mods(struct tbl_node *, struct tbl_cell *,
+#define KEYS_MAX ((int)(sizeof(keys)/sizeof(keys[0])))
+
+static void mods(struct tbl_node *, struct tbl_cell *,
int, const char *, int *);
-static int cell(struct tbl_node *, struct tbl_row *,
+static void cell(struct tbl_node *, struct tbl_row *,
int, const char *, int *);
static struct tbl_cell *cell_alloc(struct tbl_node *, struct tbl_row *,
enum tbl_cellt, int vert);
-static int
+static void
mods(struct tbl_node *tbl, struct tbl_cell *cp,
int ln, const char *p, int *pos)
{
- char buf[5];
- int i;
+ char *endptr;
- /* Not all types accept modifiers. */
+mod:
+ while (p[*pos] == ' ' || p[*pos] == '\t')
+ (*pos)++;
- switch (cp->pos) {
- case TBL_CELL_DOWN:
- /* FALLTHROUGH */
- case TBL_CELL_HORIZ:
- /* FALLTHROUGH */
- case TBL_CELL_DHORIZ:
- return(1);
- default:
- break;
- }
+ /* Row delimiters and cell specifiers end modifier lists. */
-mod:
- /*
- * XXX: since, at least for now, modifiers are non-conflicting
- * (are separable by value, regardless of position), we let
- * modifiers come in any order. The existing tbl doesn't let
- * this happen.
- */
- switch (p[*pos]) {
- case '\0':
- /* FALLTHROUGH */
- case ' ':
- /* FALLTHROUGH */
- case '\t':
- /* FALLTHROUGH */
- case ',':
- /* FALLTHROUGH */
- case '.':
- /* FALLTHROUGH */
- case '|':
- return(1);
- default:
- break;
- }
+ if (strchr(".,-=^_ACLNRSaclnrs|", p[*pos]) != NULL)
+ return;
/* Throw away parenthesised expression. */
@@ -117,72 +82,65 @@ mod:
(*pos)++;
goto mod;
}
- mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
+ mandoc_msg(MANDOCERR_TBLLAYOUT_PAR, tbl->parse,
ln, *pos, NULL);
- return(0);
+ return;
}
/* Parse numerical spacing from modifier string. */
if (isdigit((unsigned char)p[*pos])) {
- for (i = 0; i < 4; i++) {
- if ( ! isdigit((unsigned char)p[*pos + i]))
- break;
- buf[i] = p[*pos + i];
- }
- buf[i] = '\0';
-
- /* No greater than 4 digits. */
-
- if (4 == i) {
- mandoc_msg(MANDOCERR_TBLLAYOUT,
- tbl->parse, ln, *pos, NULL);
- return(0);
- }
-
- *pos += i;
- cp->spacing = (size_t)atoi(buf);
-
+ cp->spacing = strtoull(p + *pos, &endptr, 10);
+ *pos = endptr - p;
goto mod;
- /* NOTREACHED */
}
- /* TODO: GNU has many more extensions. */
-
switch (tolower((unsigned char)p[(*pos)++])) {
- case 'z':
- cp->flags |= TBL_CELL_WIGN;
- goto mod;
- case 'u':
- cp->flags |= TBL_CELL_UP;
+ case 'b':
+ /* FALLTHROUGH */
+ case 'i':
+ /* FALLTHROUGH */
+ case 'r':
+ (*pos)--;
+ break;
+ case 'd':
+ cp->flags |= TBL_CELL_BALIGN;
goto mod;
case 'e':
cp->flags |= TBL_CELL_EQUAL;
goto mod;
+ case 'f':
+ break;
+ case 'm':
+ mandoc_msg(MANDOCERR_TBLLAYOUT_MOD, tbl->parse,
+ ln, *pos, "m");
+ goto mod;
+ case 'p':
+ /* FALLTHROUGH */
+ case 'v':
+ if (p[*pos] == '-' || p[*pos] == '+')
+ (*pos)++;
+ while (isdigit((unsigned char)p[*pos]))
+ (*pos)++;
+ goto mod;
case 't':
cp->flags |= TBL_CELL_TALIGN;
goto mod;
- case 'd':
- cp->flags |= TBL_CELL_BALIGN;
+ case 'u':
+ cp->flags |= TBL_CELL_UP;
goto mod;
case 'w': /* XXX for now, ignore minimal column width */
goto mod;
case 'x':
cp->flags |= TBL_CELL_WMAX;
goto mod;
- case 'f':
- break;
- case 'r':
- /* FALLTHROUGH */
- case 'b':
- /* FALLTHROUGH */
- case 'i':
- (*pos)--;
- break;
+ case 'z':
+ cp->flags |= TBL_CELL_WIGN;
+ goto mod;
default:
- mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
- ln, *pos - 1, NULL);
- return(0);
+ mandoc_vmsg(MANDOCERR_TBLLAYOUT_CHAR, tbl->parse,
+ ln, *pos - 1, "%c", p[*pos - 1]);
+ goto mod;
}
switch (tolower((unsigned char)p[(*pos)++])) {
@@ -201,20 +159,13 @@ mod:
case 'r':
goto mod;
default:
- break;
- }
- if (isalnum((unsigned char)p[*pos - 1])) {
mandoc_vmsg(MANDOCERR_FT_BAD, tbl->parse,
ln, *pos - 1, "TS f%c", p[*pos - 1]);
goto mod;
}
-
- mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
- ln, *pos - 1, NULL);
- return(0);
}
-static int
+static void
cell(struct tbl_node *tbl, struct tbl_row *rp,
int ln, const char *p, int *pos)
{
@@ -223,16 +174,24 @@ cell(struct tbl_node *tbl, struct tbl_row *rp,
/* Handle vertical lines. */
- for (vert = 0; '|' == p[*pos]; ++*pos)
- vert++;
- while (' ' == p[*pos])
+ vert = 0;
+again:
+ while (p[*pos] == ' ' || p[*pos] == '\t' || p[*pos] == '|') {
+ if (p[*pos] == '|') {
+ if (vert < 2)
+ vert++;
+ else
+ mandoc_msg(MANDOCERR_TBLLAYOUT_VERT,
+ tbl->parse, ln, *pos, NULL);
+ }
(*pos)++;
+ }
/* Handle trailing vertical lines */
if ('.' == p[*pos] || '\0' == p[*pos]) {
rp->vert = vert;
- return(1);
+ return;
}
/* Parse the column position (`c', `l', `r', ...). */
@@ -241,62 +200,32 @@ cell(struct tbl_node *tbl, struct tbl_row *rp,
if (tolower((unsigned char)p[*pos]) == keys[i].name)
break;
- if (KEYS_MAX == i) {
- mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
- ln, *pos, NULL);
- return(0);
+ if (i == KEYS_MAX) {
+ mandoc_vmsg(MANDOCERR_TBLLAYOUT_CHAR, tbl->parse,
+ ln, *pos, "%c", p[*pos]);
+ (*pos)++;
+ goto again;
}
-
c = keys[i].key;
- /*
- * If a span cell is found first, raise a warning and abort the
- * parse. If a span cell is found and the last layout element
- * isn't a "normal" layout, bail.
- *
- * FIXME: recover from this somehow?
- */
-
- if (TBL_CELL_SPAN == c) {
- if (NULL == rp->first) {
- mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
- ln, *pos, NULL);
- return(0);
- } else if (rp->last)
- switch (rp->last->pos) {
- case TBL_CELL_HORIZ:
- /* FALLTHROUGH */
- case TBL_CELL_DHORIZ:
- mandoc_msg(MANDOCERR_TBLLAYOUT,
- tbl->parse, ln, *pos, NULL);
- return(0);
- default:
- break;
- }
- }
+ /* Special cases of spanners. */
- /*
- * If a vertical spanner is found, we may not be in the first
- * row.
- */
-
- if (TBL_CELL_DOWN == c && rp == tbl->first_row) {
- mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, ln, *pos, NULL);
- return(0);
- }
+ if (c == TBL_CELL_SPAN) {
+ if (rp->last == NULL)
+ mandoc_msg(MANDOCERR_TBLLAYOUT_SPAN,
+ tbl->parse, ln, *pos, NULL);
+ else if (rp->last->pos == TBL_CELL_HORIZ ||
+ rp->last->pos == TBL_CELL_DHORIZ)
+ c = rp->last->pos;
+ } else if (c == TBL_CELL_DOWN && rp == tbl->first_row)
+ mandoc_msg(MANDOCERR_TBLLAYOUT_DOWN,
+ tbl->parse, ln, *pos, NULL);
(*pos)++;
- /* Disallow adjacent spacers. */
-
- if (vert > 2) {
- mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, ln, *pos - 1, NULL);
- return(0);
- }
-
/* Allocate cell then parse its modifiers. */
- return(mods(tbl, cell_alloc(tbl, rp, c, vert), ln, p, pos));
+ mods(tbl, cell_alloc(tbl, rp, c, vert), ln, p, pos);
}
void
@@ -311,7 +240,7 @@ tbl_layout(struct tbl_node *tbl, int ln, const char *p)
for (;;) {
/* Skip whitespace before and after each cell. */
- while (isspace((unsigned char)p[pos]))
+ while (p[pos] == ' ' || p[pos] == '\t')
pos++;
switch (p[pos]) {
@@ -326,7 +255,7 @@ tbl_layout(struct tbl_node *tbl, int ln, const char *p)
tbl->part = TBL_PART_DATA;
if (tbl->first_row != NULL)
return;
- mandoc_msg(MANDOCERR_TBLNOLAYOUT,
+ mandoc_msg(MANDOCERR_TBLLAYOUT_NONE,
tbl->parse, ln, pos, NULL);
rp = mandoc_calloc(1, sizeof(*rp));
cell_alloc(tbl, rp, TBL_CELL_LEFT, 0);
@@ -344,8 +273,7 @@ tbl_layout(struct tbl_node *tbl, int ln, const char *p)
tbl->first_row = rp;
tbl->last_row = rp;
}
- if ( ! cell(tbl, rp, ln, p, &pos))
- return;
+ cell(tbl, rp, ln, p, &pos);
}
}