From 0085b048bff93dd2f247c5fa592a64b4c3f057bf Mon Sep 17 00:00:00 2001 From: Kristaps Dzonsons Date: Wed, 29 Dec 2010 01:16:57 +0000 Subject: Significant update to options handling, which now departs almost completely with the BSD.lv code due to performance issues and flat-out errors. Performance issues: functions called per character. Ugly. Flat-out errors: disallowing "reserved" tokens as arguments to those options accepting arguments. Also added are two mandoc.h error codes for general tbl syntax errors and for bad options. --- libroff.h | 23 +++---- main.c | 4 +- mandoc.h | 4 +- roff.c | 4 +- tbl.c | 87 ++++++++------------------- tbl_opts.c | 199 ++++++++++++++++++++++++++++++++++++++++--------------------- 6 files changed, 171 insertions(+), 150 deletions(-) diff --git a/libroff.h b/libroff.h index f3c7fed1..3f041277 100644 --- a/libroff.h +++ b/libroff.h @@ -1,4 +1,4 @@ -/* $Id: libroff.h,v 1.2 2010/12/28 13:46:07 kristaps Exp $ */ +/* $Id: libroff.h,v 1.3 2010/12/29 01:16:57 kristaps Exp $ */ /* * Copyright (c) 2009, 2010 Kristaps Dzonsons * @@ -19,18 +19,6 @@ __BEGIN_DECLS -enum tbl_tok { - TBL_TOK_OPENPAREN = 0, - TBL_TOK_CLOSEPAREN, - TBL_TOK_COMMA, - TBL_TOK_SEMICOLON, - TBL_TOK_PERIOD, - TBL_TOK_SPACE, - TBL_TOK_TAB, - TBL_TOK_NIL, - TBL_TOK__MAX -}; - enum tbl_part { TBL_PART_OPTS, /* in options (first line) */ TBL_PART_LAYOUT, /* describing layout */ @@ -38,8 +26,9 @@ enum tbl_part { }; struct tbl { + mandocmsg msg; /* status messages */ + void *data; /* privdata for messages */ enum tbl_part part; - char buf[BUFSIZ]; char tab; /* cell-separator */ char decimal; /* decimal point */ int linesize; @@ -54,11 +43,15 @@ struct tbl { #define TBL_OPT_NOSPACE (1 << 6) }; -struct tbl *tbl_alloc(void); +#define TBL_MSG(tblp, type, line, col) \ + (*(tblp)->msg)((type), (tblp)->data, (line), (col), NULL) + +struct tbl *tbl_alloc(void *, mandocmsg); void tbl_free(struct tbl *); void tbl_reset(struct tbl *); enum rofferr tbl_read(struct tbl *, int, const char *, int); enum tbl_tok tbl_next(struct tbl *, const char *, int *); +int tbl_option(struct tbl *, int, const char *); __END_DECLS diff --git a/main.c b/main.c index 131068ff..543f13fa 100644 --- a/main.c +++ b/main.c @@ -1,4 +1,4 @@ -/* $Id: main.c,v 1.125 2010/12/22 11:38:17 kristaps Exp $ */ +/* $Id: main.c,v 1.126 2010/12/29 01:16:57 kristaps Exp $ */ /* * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons * Copyright (c) 2010 Ingo Schwarze @@ -179,6 +179,8 @@ static const char * const mandocerrs[MANDOCERR_MAX] = { "generic error", + "bad table syntax", + "bad table option", "input stack limit exceeded, infinite loop?", "skipping bad character", "skipping text before the first section header", diff --git a/mandoc.h b/mandoc.h index 9b4eb759..df08fb9a 100644 --- a/mandoc.h +++ b/mandoc.h @@ -1,4 +1,4 @@ -/* $Id: mandoc.h,v 1.35 2010/12/22 11:38:17 kristaps Exp $ */ +/* $Id: mandoc.h,v 1.36 2010/12/29 01:16:57 kristaps Exp $ */ /* * Copyright (c) 2010 Kristaps Dzonsons * @@ -101,6 +101,8 @@ enum mandocerr { MANDOCERR_ERROR, /* ===== start of errors ===== */ + MANDOCERR_TBL, /* bad table syntax */ + MANDOCERR_TBLOPT, /* bad table option */ MANDOCERR_ROFFLOOP, /* input stack limit exceeded, infinite loop? */ MANDOCERR_BADCHAR, /* skipping bad character */ MANDOCERR_NOTEXT, /* skipping text before the first section header */ diff --git a/roff.c b/roff.c index aabf0880..1e62be4e 100644 --- a/roff.c +++ b/roff.c @@ -1,4 +1,4 @@ -/* $Id: roff.c,v 1.110 2010/12/28 10:59:07 kristaps Exp $ */ +/* $Id: roff.c,v 1.111 2010/12/29 01:16:57 kristaps Exp $ */ /* * Copyright (c) 2010 Kristaps Dzonsons * Copyright (c) 2010 Ingo Schwarze @@ -1130,7 +1130,7 @@ roff_TS(ROFF_ARGS) (*r->msg)(MANDOCERR_SCOPEBROKEN, r->data, ln, ppos, NULL); tbl_reset(r->tbl); } else - r->tbl = tbl_alloc(); + r->tbl = tbl_alloc(r->data, r->msg); return(ROFF_IGN); } diff --git a/tbl.c b/tbl.c index 71d92447..2e02e13b 100644 --- a/tbl.c +++ b/tbl.c @@ -1,4 +1,4 @@ -/* $Id: tbl.c,v 1.4 2010/12/28 13:47:38 kristaps Exp $ */ +/* $Id: tbl.c,v 1.5 2010/12/29 01:16:57 kristaps Exp $ */ /* * Copyright (c) 2009, 2010 Kristaps Dzonsons * @@ -25,14 +25,8 @@ #include "libmandoc.h" #include "libroff.h" -static const char tbl_toks[TBL_TOK__MAX] = { - '(', ')', ',', ';', '.', - ' ', '\t', '\0' -}; - static void tbl_init(struct tbl *); static void tbl_clear(struct tbl *); -static enum tbl_tok tbl_next_char(char); static void tbl_clear(struct tbl *tbl) @@ -45,6 +39,9 @@ tbl_init(struct tbl *tbl) { tbl->part = TBL_PART_OPTS; + tbl->tab = '\t'; + tbl->linesize = 12; + tbl->decimal = '.'; } enum rofferr @@ -56,19 +53,37 @@ tbl_read(struct tbl *tbl, int ln, const char *p, int offs) cp = &p[offs]; len = (int)strlen(cp); - if (len && TBL_PART_OPTS == tbl->part) + /* + * If we're in the options section and we don't have a + * terminating semicolon, assume we've moved directly into the + * layout section. No need to report a warning: this is, + * apparently, standard behaviour. + */ + + if (TBL_PART_OPTS == tbl->part && len) if (';' != cp[len - 1]) tbl->part = TBL_PART_LAYOUT; + + /* Now process each logical section of the table. */ + + switch (tbl->part) { + case (TBL_PART_OPTS): + return(tbl_option(tbl, ln, p) ? ROFF_IGN : ROFF_ERR); + default: + break; + } return(ROFF_CONT); } struct tbl * -tbl_alloc(void) +tbl_alloc(void *data, const mandocmsg msg) { struct tbl *p; p = mandoc_malloc(sizeof(struct tbl)); + p->data = data; + p->msg = msg; tbl_init(p); return(p); } @@ -89,57 +104,3 @@ tbl_reset(struct tbl *tbl) tbl_init(tbl); } -static enum tbl_tok -tbl_next_char(char c) -{ - int i; - - /* - * These are delimiting tokens. They separate out words in the - * token stream. - * - * FIXME: make this into a hashtable for faster lookup. - */ - for (i = 0; i < TBL_TOK__MAX; i++) - if (c == tbl_toks[i]) - return((enum tbl_tok)i); - - return(TBL_TOK__MAX); -} - -enum tbl_tok -tbl_next(struct tbl *tbl, const char *p, int *pos) -{ - int i; - enum tbl_tok c; - - tbl->buf[0] = '\0'; - - if (TBL_TOK__MAX != (c = tbl_next_char(p[*pos]))) { - if (TBL_TOK_NIL != c) { - tbl->buf[0] = p[*pos]; - tbl->buf[1] = '\0'; - (*pos)++; - } - return(c); - } - - /* - * Copy words into a nil-terminated buffer. For now, we use a - * static buffer. FIXME: eventually this should be made into a - * dynamic one living in struct tbl. - */ - - for (i = 0; i < BUFSIZ; i++, (*pos)++) - if (TBL_TOK__MAX == tbl_next_char(p[*pos])) - tbl->buf[i] = p[*pos]; - else - break; - - assert(i < BUFSIZ); - tbl->buf[i] = '\0'; - - return(TBL_TOK__MAX); -} - - diff --git a/tbl_opts.c b/tbl_opts.c index e98ba946..7470e4e2 100644 --- a/tbl_opts.c +++ b/tbl_opts.c @@ -1,4 +1,4 @@ -/* $Id: tbl_opts.c,v 1.2 2010/12/28 13:47:38 kristaps Exp $ */ +/* $Id: tbl_opts.c,v 1.3 2010/12/29 01:16:57 kristaps Exp $ */ /* * Copyright (c) 2009, 2010 Kristaps Dzonsons * @@ -14,10 +14,12 @@ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#include #include #include #include +#include "mandoc.h" #include "libroff.h" enum tbl_ident { @@ -46,6 +48,12 @@ struct tbl_phrase { /* Handle Commonwealth/American spellings. */ #define KEY_MAXKEYS 14 +/* Maximum length of key name string. */ +#define KEY_MAXNAME 13 + +/* Maximum length of key number size. */ +#define KEY_MAXNUMSZ 10 + static const struct tbl_phrase keys[KEY_MAXKEYS] = { { "center", TBL_OPT_CENTRE, KEY_CENTRE}, { "centre", TBL_OPT_CENTRE, KEY_CENTRE}, @@ -64,86 +72,99 @@ static const struct tbl_phrase keys[KEY_MAXKEYS] = { }; static int arg(struct tbl *, int, const char *, int *, int); -static int opt(struct tbl *, int, const char *, int *); +static void opt(struct tbl *, int, const char *, int *); static int arg(struct tbl *tbl, int ln, const char *p, int *pos, int key) { - int sv; + int i; + char buf[KEY_MAXNUMSZ]; -again: - sv = *pos; + while (isspace((unsigned char)p[*pos])) + (*pos)++; - switch (tbl_next(tbl, p, pos)) { - case (TBL_TOK_OPENPAREN): - break; - case (TBL_TOK_SPACE): - /* FALLTHROUGH */ - case (TBL_TOK_TAB): - goto again; - default: + /* Arguments always begin with a parenthesis. */ + + if ('(' != p[*pos]) { + TBL_MSG(tbl, MANDOCERR_TBL, ln, *pos); return(0); } - sv = *pos; + (*pos)++; - switch (tbl_next(tbl, p, pos)) { - case (TBL_TOK__MAX): - break; - default: - return(0); - } + /* + * The arguments can be ANY value, so we can't just stop at the + * next close parenthesis (the argument can be a closed + * parenthesis itself). + */ switch (key) { case (KEY_DELIM): - /* FIXME: cache this value. */ - if (2 != strlen(tbl->buf)) + if ('\0' == (tbl->delims[0] = p[(*pos)++])) { + TBL_MSG(tbl, MANDOCERR_TBL, ln, *pos - 1); return(0); - tbl->delims[0] = tbl->buf[0]; - tbl->delims[1] = tbl->buf[1]; - break; - case (KEY_TAB): - /* FIXME: cache this value. */ - if (1 != strlen(tbl->buf)) + } + + if ('\0' == (tbl->delims[1] = p[(*pos)++])) { + TBL_MSG(tbl, MANDOCERR_TBL, ln, *pos - 1); return(0); - tbl->tab = tbl->buf[0]; + } break; + case (KEY_TAB): + if ('\0' != (tbl->tab = p[(*pos)++])) + break; + + TBL_MSG(tbl, MANDOCERR_TBL, ln, *pos - 1); + return(0); case (KEY_LINESIZE): - if ((tbl->linesize = atoi(tbl->buf)) <= 0) - return(0); - break; + for (i = 0; i < KEY_MAXNUMSZ && p[*pos]; i++, (*pos)++) { + buf[i] = p[*pos]; + if ( ! isdigit((unsigned char)buf[i])) + break; + } + + if (i < KEY_MAXNUMSZ) { + buf[i] = '\0'; + tbl->linesize = atoi(buf); + break; + } + + (*tbl->msg)(MANDOCERR_TBL, tbl->data, ln, *pos, NULL); + return(0); case (KEY_DPOINT): - /* FIXME: cache this value. */ - if (1 != strlen(tbl->buf)) - return(0); - tbl->decimal = tbl->buf[0]; - break; + if ('\0' != (tbl->decimal = p[(*pos)++])) + break; + + TBL_MSG(tbl, MANDOCERR_TBL, ln, *pos - 1); + return(0); default: abort(); + /* NOTREACHED */ } - sv = *pos; + /* End with a close parenthesis. */ - switch (tbl_next(tbl, p, pos)) { - case (TBL_TOK_CLOSEPAREN): - break; - default: - return(0); - } + if (')' == p[(*pos)++]) + return(1); - return(1); + TBL_MSG(tbl, MANDOCERR_TBL, ln, *pos - 1); + return(0); } - -static int +static void opt(struct tbl *tbl, int ln, const char *p, int *pos) { int i, sv; - -again: - sv = *pos; + char buf[KEY_MAXNAME]; /* + * Parse individual options from the stream as surrounded by + * this goto. Each pass through the routine parses out a single + * option and registers it. Option arguments are processed in + * the arg() function. + */ + +again: /* * EBNF describing this section: * * options ::= option_list [:space:]* [;][\n] @@ -154,36 +175,69 @@ again: * args ::= [:space:]* [(] [:alpha:]+ [)] */ - switch (tbl_next(tbl, p, pos)) { - case (TBL_TOK__MAX): - break; - case (TBL_TOK_SPACE): - /* FALLTHROUGH */ - case (TBL_TOK_TAB): - goto again; - case (TBL_TOK_SEMICOLON): - tbl->part = TBL_PART_LAYOUT; - return(1); - default: - return(0); + while (isspace((unsigned char)p[*pos])) + (*pos)++; + + /* Safe exit point. */ + + if (';' == p[*pos]) + return; + + /* Copy up to first non-alpha character. */ + + for (sv = *pos, i = 0; i < KEY_MAXNAME; i++, (*pos)++) { + buf[i] = tolower(p[*pos]); + if ( ! isalpha((unsigned char)buf[i])) + break; + } + + /* Exit if buffer is empty (or overrun). */ + + if (KEY_MAXNAME == i || 0 == i) { + TBL_MSG(tbl, MANDOCERR_TBL, ln, *pos); + return; } + buf[i] = '\0'; + + while (isspace((unsigned char)p[*pos])) + (*pos)++; + + /* + * Look through all of the available keys to find one that + * matches the input. FIXME: hashtable this. + */ + for (i = 0; i < KEY_MAXKEYS; i++) { - /* FIXME: hashtable this? */ - if (strcasecmp(tbl->buf, keys[i].name)) + if (strcmp(buf, keys[i].name)) continue; + + /* + * Note: this is more difficult to recover from, as we + * can be anywhere in the option sequence and it's + * harder to jump to the next. Meanwhile, just bail out + * of the sequence altogether. + */ + if (keys[i].key) tbl->opts |= keys[i].key; else if ( ! arg(tbl, ln, p, pos, keys[i].ident)) - return(0); + return; break; } + /* + * Allow us to recover from bad options by continuing to another + * parse sequence. + */ + if (KEY_MAXKEYS == i) - return(0); + TBL_MSG(tbl, MANDOCERR_TBLOPT, ln, sv); - return(opt(tbl, ln, p, pos)); + /* Try again... */ + + goto again; } int @@ -191,6 +245,15 @@ tbl_option(struct tbl *tbl, int ln, const char *p) { int pos; + /* + * Table options are always on just one line, so automatically + * switch into the next input mode here. + */ + tbl->part = TBL_PART_LAYOUT; + pos = 0; - return(opt(tbl, ln, p, &pos)); + opt(tbl, ln, p, &pos); + + /* Always succeed. */ + return(1); } -- cgit v1.2.3-56-ge451