aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/mdoc_argv.c
diff options
context:
space:
mode:
authorKristaps Dzonsons <kristaps@bsd.lv>2011-06-18 16:18:04 +0000
committerKristaps Dzonsons <kristaps@bsd.lv>2011-06-18 16:18:04 +0000
commit274cb80b28fcd07c9ab6a12e1f9de6639c43a503 (patch)
tree6fbe23d21959d510a66c71194ae2817911713d25 /mdoc_argv.c
parentd7e5a98bddb2856e1f16c3d97272b73d70c93a13 (diff)
downloadmandoc-274cb80b28fcd07c9ab6a12e1f9de6639c43a503.tar.gz
mandoc-274cb80b28fcd07c9ab6a12e1f9de6639c43a503.tar.zst
mandoc-274cb80b28fcd07c9ab6a12e1f9de6639c43a503.zip
Fix an assertion failure raised by the following interesting scenario: a
auto-opened `It' (i.e., a column list with a free-text first line) with leading spaces in the line triggering assertion when searching for arguments. This led to a fix giving a nice performance speed-ups (a few percent, with some quick trials): the search for flags immediately exits if the macro has no flags, instead of having to first parse the leading word then look it up. I also cleaned up the argv parsing stuff a little bit and added more documentation. This comes from a TODO by joerg@.
Diffstat (limited to 'mdoc_argv.c')
-rw-r--r--mdoc_argv.c356
1 files changed, 164 insertions, 192 deletions
diff --git a/mdoc_argv.c b/mdoc_argv.c
index 38909f94..545a3cbf 100644
--- a/mdoc_argv.c
+++ b/mdoc_argv.c
@@ -1,4 +1,4 @@
-/* $Id: mdoc_argv.c,v 1.77 2011/05/12 23:44:01 kristaps Exp $ */
+/* $Id: mdoc_argv.c,v 1.78 2011/06/18 16:18:04 kristaps Exp $ */
/*
* Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -47,7 +47,11 @@ enum argvflag {
ARGV_OPT_SINGLE /* optional arg (e.g., -offset [xxx]) */
};
-static enum mdocargt argv_a2arg(enum mdoct, const char *);
+struct mdocarg {
+ enum argsflag flags;
+ const enum mdocargt *argvs;
+};
+
static enum margserr args(struct mdoc *, int, int *,
char *, enum argsflag, char **);
static int args_checkpunct(const char *, int);
@@ -90,131 +94,6 @@ static const enum argvflag argvflags[MDOC_ARG_MAX] = {
ARGV_NONE /* MDOC_Symbolic */
};
-static const enum argsflag argflags[MDOC_MAX] = {
- ARGSFL_NONE, /* Ap */
- ARGSFL_NONE, /* Dd */
- ARGSFL_NONE, /* Dt */
- ARGSFL_NONE, /* Os */
- ARGSFL_NONE, /* Sh */
- ARGSFL_NONE, /* Ss */
- ARGSFL_NONE, /* Pp */
- ARGSFL_DELIM, /* D1 */
- ARGSFL_DELIM, /* Dl */
- ARGSFL_NONE, /* Bd */
- ARGSFL_NONE, /* Ed */
- ARGSFL_NONE, /* Bl */
- ARGSFL_NONE, /* El */
- ARGSFL_NONE, /* It */
- ARGSFL_DELIM, /* Ad */
- ARGSFL_DELIM, /* An */
- ARGSFL_DELIM, /* Ar */
- ARGSFL_NONE, /* Cd */
- ARGSFL_DELIM, /* Cm */
- ARGSFL_DELIM, /* Dv */
- ARGSFL_DELIM, /* Er */
- ARGSFL_DELIM, /* Ev */
- ARGSFL_NONE, /* Ex */
- ARGSFL_DELIM, /* Fa */
- ARGSFL_NONE, /* Fd */
- ARGSFL_DELIM, /* Fl */
- ARGSFL_DELIM, /* Fn */
- ARGSFL_DELIM, /* Ft */
- ARGSFL_DELIM, /* Ic */
- ARGSFL_NONE, /* In */
- ARGSFL_DELIM, /* Li */
- ARGSFL_NONE, /* Nd */
- ARGSFL_DELIM, /* Nm */
- ARGSFL_DELIM, /* Op */
- ARGSFL_NONE, /* Ot */
- ARGSFL_DELIM, /* Pa */
- ARGSFL_NONE, /* Rv */
- ARGSFL_DELIM, /* St */
- ARGSFL_DELIM, /* Va */
- ARGSFL_DELIM, /* Vt */
- ARGSFL_DELIM, /* Xr */
- ARGSFL_NONE, /* %A */
- ARGSFL_NONE, /* %B */
- ARGSFL_NONE, /* %D */
- ARGSFL_NONE, /* %I */
- ARGSFL_NONE, /* %J */
- ARGSFL_NONE, /* %N */
- ARGSFL_NONE, /* %O */
- ARGSFL_NONE, /* %P */
- ARGSFL_NONE, /* %R */
- ARGSFL_NONE, /* %T */
- ARGSFL_NONE, /* %V */
- ARGSFL_DELIM, /* Ac */
- ARGSFL_NONE, /* Ao */
- ARGSFL_DELIM, /* Aq */
- ARGSFL_DELIM, /* At */
- ARGSFL_DELIM, /* Bc */
- ARGSFL_NONE, /* Bf */
- ARGSFL_NONE, /* Bo */
- ARGSFL_DELIM, /* Bq */
- ARGSFL_DELIM, /* Bsx */
- ARGSFL_DELIM, /* Bx */
- ARGSFL_NONE, /* Db */
- ARGSFL_DELIM, /* Dc */
- ARGSFL_NONE, /* Do */
- ARGSFL_DELIM, /* Dq */
- ARGSFL_DELIM, /* Ec */
- ARGSFL_NONE, /* Ef */
- ARGSFL_DELIM, /* Em */
- ARGSFL_NONE, /* Eo */
- ARGSFL_DELIM, /* Fx */
- ARGSFL_DELIM, /* Ms */
- ARGSFL_DELIM, /* No */
- ARGSFL_DELIM, /* Ns */
- ARGSFL_DELIM, /* Nx */
- ARGSFL_DELIM, /* Ox */
- ARGSFL_DELIM, /* Pc */
- ARGSFL_DELIM, /* Pf */
- ARGSFL_NONE, /* Po */
- ARGSFL_DELIM, /* Pq */
- ARGSFL_DELIM, /* Qc */
- ARGSFL_DELIM, /* Ql */
- ARGSFL_NONE, /* Qo */
- ARGSFL_DELIM, /* Qq */
- ARGSFL_NONE, /* Re */
- ARGSFL_NONE, /* Rs */
- ARGSFL_DELIM, /* Sc */
- ARGSFL_NONE, /* So */
- ARGSFL_DELIM, /* Sq */
- ARGSFL_NONE, /* Sm */
- ARGSFL_DELIM, /* Sx */
- ARGSFL_DELIM, /* Sy */
- ARGSFL_DELIM, /* Tn */
- ARGSFL_DELIM, /* Ux */
- ARGSFL_DELIM, /* Xc */
- ARGSFL_NONE, /* Xo */
- ARGSFL_NONE, /* Fo */
- ARGSFL_NONE, /* Fc */
- ARGSFL_NONE, /* Oo */
- ARGSFL_DELIM, /* Oc */
- ARGSFL_NONE, /* Bk */
- ARGSFL_NONE, /* Ek */
- ARGSFL_NONE, /* Bt */
- ARGSFL_NONE, /* Hf */
- ARGSFL_NONE, /* Fr */
- ARGSFL_NONE, /* Ud */
- ARGSFL_NONE, /* Lb */
- ARGSFL_NONE, /* Lp */
- ARGSFL_DELIM, /* Lk */
- ARGSFL_DELIM, /* Mt */
- ARGSFL_DELIM, /* Brq */
- ARGSFL_NONE, /* Bro */
- ARGSFL_DELIM, /* Brc */
- ARGSFL_NONE, /* %C */
- ARGSFL_NONE, /* Es */
- ARGSFL_NONE, /* En */
- ARGSFL_NONE, /* Dx */
- ARGSFL_NONE, /* %Q */
- ARGSFL_NONE, /* br */
- ARGSFL_NONE, /* sp */
- ARGSFL_NONE, /* %U */
- ARGSFL_NONE, /* Ta */
-};
-
static const enum mdocargt args_Ex[] = {
MDOC_Std,
MDOC_ARG_MAX
@@ -269,6 +148,132 @@ static const enum mdocargt args_Bl[] = {
MDOC_ARG_MAX
};
+static const struct mdocarg mdocargs[MDOC_MAX] = {
+ { ARGSFL_NONE, NULL }, /* Ap */
+ { ARGSFL_NONE, NULL }, /* Dd */
+ { ARGSFL_NONE, NULL }, /* Dt */
+ { ARGSFL_NONE, NULL }, /* Os */
+ { ARGSFL_NONE, NULL }, /* Sh */
+ { ARGSFL_NONE, NULL }, /* Ss */
+ { ARGSFL_NONE, NULL }, /* Pp */
+ { ARGSFL_DELIM, NULL }, /* D1 */
+ { ARGSFL_DELIM, NULL }, /* Dl */
+ { ARGSFL_NONE, args_Bd }, /* Bd */
+ { ARGSFL_NONE, NULL }, /* Ed */
+ { ARGSFL_NONE, args_Bl }, /* Bl */
+ { ARGSFL_NONE, NULL }, /* El */
+ { ARGSFL_NONE, NULL }, /* It */
+ { ARGSFL_DELIM, NULL }, /* Ad */
+ { ARGSFL_DELIM, args_An }, /* An */
+ { ARGSFL_DELIM, NULL }, /* Ar */
+ { ARGSFL_NONE, NULL }, /* Cd */
+ { ARGSFL_DELIM, NULL }, /* Cm */
+ { ARGSFL_DELIM, NULL }, /* Dv */
+ { ARGSFL_DELIM, NULL }, /* Er */
+ { ARGSFL_DELIM, NULL }, /* Ev */
+ { ARGSFL_NONE, args_Ex }, /* Ex */
+ { ARGSFL_DELIM, NULL }, /* Fa */
+ { ARGSFL_NONE, NULL }, /* Fd */
+ { ARGSFL_DELIM, NULL }, /* Fl */
+ { ARGSFL_DELIM, NULL }, /* Fn */
+ { ARGSFL_DELIM, NULL }, /* Ft */
+ { ARGSFL_DELIM, NULL }, /* Ic */
+ { ARGSFL_NONE, NULL }, /* In */
+ { ARGSFL_DELIM, NULL }, /* Li */
+ { ARGSFL_NONE, NULL }, /* Nd */
+ { ARGSFL_DELIM, NULL }, /* Nm */
+ { ARGSFL_DELIM, NULL }, /* Op */
+ { ARGSFL_NONE, NULL }, /* Ot */
+ { ARGSFL_DELIM, NULL }, /* Pa */
+ { ARGSFL_NONE, args_Ex }, /* Rv */
+ { ARGSFL_DELIM, NULL }, /* St */
+ { ARGSFL_DELIM, NULL }, /* Va */
+ { ARGSFL_DELIM, NULL }, /* Vt */
+ { ARGSFL_DELIM, NULL }, /* Xr */
+ { ARGSFL_NONE, NULL }, /* %A */
+ { ARGSFL_NONE, NULL }, /* %B */
+ { ARGSFL_NONE, NULL }, /* %D */
+ { ARGSFL_NONE, NULL }, /* %I */
+ { ARGSFL_NONE, NULL }, /* %J */
+ { ARGSFL_NONE, NULL }, /* %N */
+ { ARGSFL_NONE, NULL }, /* %O */
+ { ARGSFL_NONE, NULL }, /* %P */
+ { ARGSFL_NONE, NULL }, /* %R */
+ { ARGSFL_NONE, NULL }, /* %T */
+ { ARGSFL_NONE, NULL }, /* %V */
+ { ARGSFL_DELIM, NULL }, /* Ac */
+ { ARGSFL_NONE, NULL }, /* Ao */
+ { ARGSFL_DELIM, NULL }, /* Aq */
+ { ARGSFL_DELIM, NULL }, /* At */
+ { ARGSFL_DELIM, NULL }, /* Bc */
+ { ARGSFL_NONE, args_Bf }, /* Bf */
+ { ARGSFL_NONE, NULL }, /* Bo */
+ { ARGSFL_DELIM, NULL }, /* Bq */
+ { ARGSFL_DELIM, NULL }, /* Bsx */
+ { ARGSFL_DELIM, NULL }, /* Bx */
+ { ARGSFL_NONE, NULL }, /* Db */
+ { ARGSFL_DELIM, NULL }, /* Dc */
+ { ARGSFL_NONE, NULL }, /* Do */
+ { ARGSFL_DELIM, NULL }, /* Dq */
+ { ARGSFL_DELIM, NULL }, /* Ec */
+ { ARGSFL_NONE, NULL }, /* Ef */
+ { ARGSFL_DELIM, NULL }, /* Em */
+ { ARGSFL_NONE, NULL }, /* Eo */
+ { ARGSFL_DELIM, NULL }, /* Fx */
+ { ARGSFL_DELIM, NULL }, /* Ms */
+ { ARGSFL_DELIM, NULL }, /* No */
+ { ARGSFL_DELIM, NULL }, /* Ns */
+ { ARGSFL_DELIM, NULL }, /* Nx */
+ { ARGSFL_DELIM, NULL }, /* Ox */
+ { ARGSFL_DELIM, NULL }, /* Pc */
+ { ARGSFL_DELIM, NULL }, /* Pf */
+ { ARGSFL_NONE, NULL }, /* Po */
+ { ARGSFL_DELIM, NULL }, /* Pq */
+ { ARGSFL_DELIM, NULL }, /* Qc */
+ { ARGSFL_DELIM, NULL }, /* Ql */
+ { ARGSFL_NONE, NULL }, /* Qo */
+ { ARGSFL_DELIM, NULL }, /* Qq */
+ { ARGSFL_NONE, NULL }, /* Re */
+ { ARGSFL_NONE, NULL }, /* Rs */
+ { ARGSFL_DELIM, NULL }, /* Sc */
+ { ARGSFL_NONE, NULL }, /* So */
+ { ARGSFL_DELIM, NULL }, /* Sq */
+ { ARGSFL_NONE, NULL }, /* Sm */
+ { ARGSFL_DELIM, NULL }, /* Sx */
+ { ARGSFL_DELIM, NULL }, /* Sy */
+ { ARGSFL_DELIM, NULL }, /* Tn */
+ { ARGSFL_DELIM, NULL }, /* Ux */
+ { ARGSFL_DELIM, NULL }, /* Xc */
+ { ARGSFL_NONE, NULL }, /* Xo */
+ { ARGSFL_NONE, NULL }, /* Fo */
+ { ARGSFL_NONE, NULL }, /* Fc */
+ { ARGSFL_NONE, NULL }, /* Oo */
+ { ARGSFL_DELIM, NULL }, /* Oc */
+ { ARGSFL_NONE, args_Bk }, /* Bk */
+ { ARGSFL_NONE, NULL }, /* Ek */
+ { ARGSFL_NONE, NULL }, /* Bt */
+ { ARGSFL_NONE, NULL }, /* Hf */
+ { ARGSFL_NONE, NULL }, /* Fr */
+ { ARGSFL_NONE, NULL }, /* Ud */
+ { ARGSFL_NONE, NULL }, /* Lb */
+ { ARGSFL_NONE, NULL }, /* Lp */
+ { ARGSFL_DELIM, NULL }, /* Lk */
+ { ARGSFL_DELIM, NULL }, /* Mt */
+ { ARGSFL_DELIM, NULL }, /* Brq */
+ { ARGSFL_NONE, NULL }, /* Bro */
+ { ARGSFL_DELIM, NULL }, /* Brc */
+ { ARGSFL_NONE, NULL }, /* %C */
+ { ARGSFL_NONE, NULL }, /* Es */
+ { ARGSFL_NONE, NULL }, /* En */
+ { ARGSFL_NONE, NULL }, /* Dx */
+ { ARGSFL_NONE, NULL }, /* %Q */
+ { ARGSFL_NONE, NULL }, /* br */
+ { ARGSFL_NONE, NULL }, /* sp */
+ { ARGSFL_NONE, NULL }, /* %U */
+ { ARGSFL_NONE, NULL }, /* Ta */
+};
+
+
/*
* Parse an argument from line text. This comes in the form of -key
* [value0...], which may either have a single mandatory value, at least
@@ -281,47 +286,62 @@ mdoc_argv(struct mdoc *m, int line, enum mdoct tok,
char *p, sv;
struct mdoc_argv tmp;
struct mdoc_arg *arg;
+ const enum mdocargt *ap;
if ('\0' == buf[*pos])
return(ARGV_EOLN);
+ else if (NULL == (ap = mdocargs[tok].argvs))
+ return(ARGV_WORD);
assert(' ' != buf[*pos]);
- /* Parse through to the first unescaped space. */
+ /* Seek to the first unescaped space. */
p = &buf[++(*pos)];
assert(*pos > 0);
- /* LINTED */
- while (buf[*pos]) {
- if (' ' == buf[*pos])
- if ('\\' != buf[*pos - 1])
- break;
- (*pos)++;
- }
+ for ( ; buf[*pos] ; (*pos)++)
+ if (' ' == buf[*pos] && '\\' != buf[*pos - 1])
+ break;
- /* XXX - save zeroed byte, if not an argument. */
+ /*
+ * We want to nil-terminate the word to look it up (it's easier
+ * that way). But we may not have a flag, in which case we need
+ * to restore the line as-is. So keep around the stray byte,
+ * which we'll reset upon exiting (if necessary).
+ */
- sv = '\0';
- if (buf[*pos]) {
- sv = buf[*pos];
+ if ('\0' != (sv = buf[*pos]))
buf[(*pos)++] = '\0';
- }
+
+ /*
+ * Now look up the word as a flag. Use temporary storage that
+ * we'll copy into the node's flags, if necessary.
+ */
memset(&tmp, 0, sizeof(struct mdoc_argv));
+
tmp.line = line;
tmp.pos = *pos;
+ tmp.arg = MDOC_ARG_MAX;
- /* See if our token accepts the argument. */
+ while (MDOC_ARG_MAX != (tmp.arg = *ap++))
+ if (0 == strcmp(p, mdoc_argnames[tmp.arg]))
+ break;
- if (MDOC_ARG_MAX == (tmp.arg = argv_a2arg(tok, p))) {
- /* XXX - restore saved zeroed byte. */
+ if (MDOC_ARG_MAX == tmp.arg) {
+ /*
+ * The flag was not found.
+ * Restore saved zeroed byte and return as a word.
+ */
if (sv)
buf[*pos - 1] = sv;
return(ARGV_WORD);
}
+ /* Read to the next word (the argument). */
+
while (buf[*pos] && ' ' == buf[*pos])
(*pos)++;
@@ -395,7 +415,7 @@ mdoc_args(struct mdoc *m, int line, int *pos,
enum argsflag fl;
struct mdoc_node *n;
- fl = argflags[tok];
+ fl = mdocargs[tok].flags;
if (MDOC_It != tok)
return(args(m, line, pos, buf, fl, v));
@@ -424,8 +444,6 @@ args(struct mdoc *m, int line, int *pos,
char *p, *pp;
enum margserr rc;
- assert(' ' != buf[*pos]);
-
if ('\0' == buf[*pos]) {
if (MDOC_PPHRASE & m->flags)
return(ARGS_EOLN);
@@ -613,52 +631,6 @@ args_checkpunct(const char *buf, int i)
return('\0' == buf[i]);
}
-/*
- * Match up an argument string (e.g., `-foo bar' having "foo") with the
- * correrct identifier. It must apply to the given macro. If none was
- * found (including bad matches), return MDOC_ARG_MAX.
- */
-static enum mdocargt
-argv_a2arg(enum mdoct tok, const char *p)
-{
- const enum mdocargt *argsp;
-
- argsp = NULL;
-
- switch (tok) {
- case (MDOC_An):
- argsp = args_An;
- break;
- case (MDOC_Bd):
- argsp = args_Bd;
- break;
- case (MDOC_Bf):
- argsp = args_Bf;
- break;
- case (MDOC_Bk):
- argsp = args_Bk;
- break;
- case (MDOC_Bl):
- argsp = args_Bl;
- break;
- case (MDOC_Rv):
- /* FALLTHROUGH */
- case (MDOC_Ex):
- argsp = args_Ex;
- break;
- default:
- return(MDOC_ARG_MAX);
- }
-
- assert(argsp);
-
- for ( ; MDOC_ARG_MAX != *argsp ; argsp++)
- if (0 == strcmp(p, mdoc_argnames[*argsp]))
- return(*argsp);
-
- return(MDOC_ARG_MAX);
-}
-
static int
argv_multi(struct mdoc *m, int line,
struct mdoc_argv *v, int *pos, char *buf)