X-Git-Url: https://git.cameronkatri.com/mandoc.git/blobdiff_plain/4cd5b88b9ab0146c48e737ba2e83f5d7fa273d1d..693ec82b8e663fa43e54751b5157c5612d843092:/argv.c?ds=sidebyside diff --git a/argv.c b/argv.c index 5f591f3c..9499bd85 100644 --- a/argv.c +++ b/argv.c @@ -1,4 +1,4 @@ -/* $Id: argv.c,v 1.29 2009/02/23 15:34:53 kristaps Exp $ */ +/* $Id: argv.c,v 1.41 2009/03/08 12:46:38 kristaps Exp $ */ /* * Copyright (c) 2008 Kristaps Dzonsons * @@ -29,36 +29,75 @@ * Routines to parse arguments of macros. Arguments follow the syntax * of `-arg [val [valN...]]'. Arguments come in all types: quoted * arguments, multiple arguments per value, no-value arguments, etc. + * + * There's no limit to the number or arguments that may be allocated. */ #define ARGS_QUOTED (1 << 0) #define ARGS_DELIM (1 << 1) #define ARGS_TABSEP (1 << 2) +#define ARGS_ARGVLIKE (1 << 3) + +#define ARGV_NONE (1 << 0) +#define ARGV_SINGLE (1 << 1) +#define ARGV_MULTI (1 << 2) +#define ARGV_OPT_SINGLE (1 << 3) + +enum mwarn { + WQUOTPARM, + WARGVPARM, + WCOLEMPTY, + WTAILWS +}; + +enum merr { + EQUOTTERM, + EARGVAL +}; -static int lookup(int, const char *); +static int argv_a2arg(int, const char *); static int args(struct mdoc *, int, int *, char *, int, char **); -static int argv(struct mdoc *, int, - struct mdoc_arg *, int *, char *); +static int argv(struct mdoc *, int, int, + struct mdoc_argv *, int *, char *); static int argv_single(struct mdoc *, int, - struct mdoc_arg *, int *, char *); + struct mdoc_argv *, int *, char *); +static int argv_opt_single(struct mdoc *, int, + struct mdoc_argv *, int *, char *); static int argv_multi(struct mdoc *, int, - struct mdoc_arg *, int *, char *); -static int pwarn(struct mdoc *, int, int, int); -static int perr(struct mdoc *, int, int, int); - -/* Warning messages. */ - -#define WQUOTPARM (0) -#define WARGVPARM (1) -#define WCOLEMPTY (2) -#define WTAILWS (3) - -/* Error messages. */ - -#define EQUOTTERM (0) -#define EARGVAL (1) -#define EARGMANY (2) + struct mdoc_argv *, int *, char *); +static int pwarn(struct mdoc *, int, int, enum mwarn); +static int perr(struct mdoc *, int, int, enum merr); + +/* Per-argument flags. */ + +static int mdoc_argvflags[MDOC_ARG_MAX] = { + ARGV_NONE, /* MDOC_Split */ + ARGV_NONE, /* MDOC_Nosplit */ + ARGV_NONE, /* MDOC_Ragged */ + ARGV_NONE, /* MDOC_Unfilled */ + ARGV_NONE, /* MDOC_Literal */ + ARGV_NONE, /* MDOC_File */ + ARGV_SINGLE, /* MDOC_Offset */ + ARGV_NONE, /* MDOC_Bullet */ + ARGV_NONE, /* MDOC_Dash */ + ARGV_NONE, /* MDOC_Hyphen */ + ARGV_NONE, /* MDOC_Item */ + ARGV_NONE, /* MDOC_Enum */ + ARGV_NONE, /* MDOC_Tag */ + ARGV_NONE, /* MDOC_Diag */ + ARGV_NONE, /* MDOC_Hang */ + ARGV_NONE, /* MDOC_Ohang */ + ARGV_NONE, /* MDOC_Inset */ + ARGV_MULTI, /* MDOC_Column */ + ARGV_SINGLE, /* MDOC_Width */ + ARGV_NONE, /* MDOC_Compact */ + ARGV_SINGLE, /* MDOC_Std */ + ARGV_NONE, /* MDOC_Filled */ + ARGV_NONE, /* MDOC_Words */ + ARGV_NONE, /* MDOC_Emphasis */ + ARGV_NONE /* MDOC_Symbolic */ +}; static int mdoc_argflags[MDOC_MAX] = { 0, /* \" */ @@ -74,7 +113,7 @@ static int mdoc_argflags[MDOC_MAX] = { 0, /* Ed */ 0, /* Bl */ 0, /* El */ - ARGS_DELIM, /* It */ + 0, /* It */ ARGS_DELIM, /* Ad */ ARGS_DELIM, /* An */ ARGS_DELIM, /* Ar */ @@ -98,7 +137,7 @@ static int mdoc_argflags[MDOC_MAX] = { 0, /* Ot */ ARGS_DELIM, /* Pa */ 0, /* Rv */ - ARGS_DELIM, /* St */ + ARGS_DELIM | ARGS_ARGVLIKE, /* St */ ARGS_DELIM, /* Va */ ARGS_DELIM, /* Vt */ ARGS_DELIM, /* Xr */ @@ -167,62 +206,170 @@ static int mdoc_argflags[MDOC_MAX] = { 0, /* Hf */ 0, /* Fr */ 0, /* Ud */ + 0, /* Lb */ }; +/* + * Parse an argument from line text. This comes in the form of -key + * [value0...], which may either have a single mandatory value, at least + * one mandatory value, an optional single value, or no value. + */ +int +mdoc_argv(struct mdoc *mdoc, int line, int tok, + struct mdoc_arg **v, int *pos, char *buf) +{ + int i; + char *p; + struct mdoc_argv tmp; + struct mdoc_arg *arg; + + if (0 == buf[*pos]) + return(ARGV_EOLN); + + assert( ! isspace((u_char)buf[*pos])); + + if ('-' != buf[*pos]) + return(ARGV_WORD); + + i = *pos; + p = &buf[++(*pos)]; + + assert(*pos > 0); + + /* LINTED */ + while (buf[*pos]) { + if (isspace((u_char)buf[*pos])) + if ('\\' != buf[*pos - 1]) + break; + (*pos)++; + } + + if (buf[*pos]) + buf[(*pos)++] = 0; + + (void)memset(&tmp, 0, sizeof(struct mdoc_argv)); + tmp.line = line; + tmp.pos = *pos; + + /* + * We now parse out the per-macro arguments. XXX - this can be + * made much cleaner using per-argument tables. See argv_a2arg + * for details. + */ + + if (MDOC_ARG_MAX == (tmp.arg = argv_a2arg(tok, p))) { + if ( ! pwarn(mdoc, line, i, WARGVPARM)) + return(ARGV_ERROR); + return(ARGV_WORD); + } + + while (buf[*pos] && isspace((u_char)buf[*pos])) + (*pos)++; + + /* FIXME: whitespace if no value. */ + + if ( ! argv(mdoc, tok, line, &tmp, pos, buf)) + return(ARGV_ERROR); + + if (NULL == (arg = *v)) { + *v = xcalloc(1, sizeof(struct mdoc_arg)); + arg = *v; + } + + arg->argc++; + arg->argv = xrealloc(arg->argv, arg->argc * + sizeof(struct mdoc_argv)); + + (void)memcpy(&arg->argv[(int)arg->argc - 1], + &tmp, sizeof(struct mdoc_argv)); + + return(ARGV_ARG); +} + + +void +mdoc_argv_free(struct mdoc_arg *p) +{ + int i, j; + + if (NULL == p) + return; + + if (p->refcnt) { + --(p->refcnt); + if (p->refcnt) + return; + } + + assert(p->argc); + + /* LINTED */ + for (i = 0; i < (int)p->argc; i++) { + if (0 == p->argv[i].sz) + continue; + /* LINTED */ + for (j = 0; j < (int)p->argv[i].sz; j++) + free(p->argv[i].value[j]); + + free(p->argv[i].value); + } + + free(p->argv); + free(p); +} + + + static int -perr(struct mdoc *mdoc, int line, int pos, int code) +perr(struct mdoc *mdoc, int line, int pos, enum merr code) { - int c; + char *p; + + p = NULL; switch (code) { case (EQUOTTERM): - c = mdoc_perr(mdoc, line, pos, - "unterminated quoted parameter"); + p = "unterminated quoted parameter"; break; case (EARGVAL): - c = mdoc_perr(mdoc, line, pos, - "argument requires a value"); - break; - case (EARGMANY): - c = mdoc_perr(mdoc, line, pos, - "too many values for argument"); + p = "argument requires a value"; break; - default: - abort(); - /* NOTREACHED */ } - return(c); + + assert(p); + return(mdoc_perr(mdoc, line, pos, p)); } static int -pwarn(struct mdoc *mdoc, int line, int pos, int code) +pwarn(struct mdoc *mdoc, int line, int pos, enum mwarn code) { + char *p; int c; + p = NULL; + c = WARN_SYNTAX; + switch (code) { case (WQUOTPARM): - c = mdoc_pwarn(mdoc, line, pos, WARN_SYNTAX, - "unexpected quoted parameter"); + p = "unexpected quoted parameter"; break; case (WARGVPARM): - c = mdoc_pwarn(mdoc, line, pos, WARN_SYNTAX, - "argument-like parameter"); + p = "argument-like parameter"; break; case (WCOLEMPTY): - c = mdoc_pwarn(mdoc, line, pos, WARN_SYNTAX, - "last list column is empty"); + p = "last list column is empty"; + c = WARN_COMPAT; break; case (WTAILWS): - c = mdoc_pwarn(mdoc, line, pos, WARN_COMPAT, - "trailing whitespace"); + p = "trailing whitespace"; + c = WARN_COMPAT; break; - default: - abort(); - /* NOTREACHED */ } - return(c); + + assert(p); + return(mdoc_pwarn(mdoc, line, pos, c, p)); } @@ -236,29 +383,50 @@ mdoc_args(struct mdoc *mdoc, int line, fl = (0 == tok) ? 0 : mdoc_argflags[tok]; /* - * First see if we should use TABSEP (Bl -column). This - * invalidates the use of ARGS_DELIM. + * Override per-macro argument flags with context-specific ones. + * As of now, this is only valid for `It' depending on its list + * context. */ - if (MDOC_It == tok) { + switch (tok) { + case (MDOC_It): for (n = mdoc->last; n; n = n->parent) - if (MDOC_BLOCK == n->type) - if (MDOC_Bl == n->tok) - break; + if (MDOC_BLOCK == n->type && MDOC_Bl == n->tok) + break; + assert(n); - c = (int)n->data.block.argc; + c = (int)(n->args ? n->args->argc : 0); assert(c > 0); + /* + * Using `Bl -column' adds ARGS_TABSEP to the arguments + * and invalidates ARGS_DELIM. Using `Bl -diag' allows + * for quoted arguments. + */ + /* LINTED */ for (i = 0; i < c; i++) { - if (MDOC_Column != n->data.block.argv[i].arg) - continue; - fl |= ARGS_TABSEP; - fl &= ~ARGS_DELIM; - break; + switch (n->args->argv[i].arg) { + case (MDOC_Column): + fl |= ARGS_TABSEP; + fl &= ~ARGS_DELIM; + i = c; + break; + case (MDOC_Diag): + fl |= ARGS_QUOTED; + i = c; + break; + default: + break; + } } + break; + default: + break; } + /* Continue parsing the arguments themselves... */ + return(args(mdoc, line, pos, buf, fl, v)); } @@ -267,7 +435,7 @@ static int args(struct mdoc *mdoc, int line, int *pos, char *buf, int fl, char **v) { - int i, c; + int i; char *p, *pp; assert(*pos > 0); @@ -279,7 +447,7 @@ args(struct mdoc *mdoc, int line, if ( ! pwarn(mdoc, line, *pos, WQUOTPARM)) return(ARGS_ERROR); - if ('-' == buf[*pos]) + if ( ! (fl & ARGS_ARGVLIKE) && '-' == buf[*pos]) if ( ! pwarn(mdoc, line, *pos, WARGVPARM)) return(ARGS_ERROR); @@ -290,14 +458,15 @@ args(struct mdoc *mdoc, int line, */ if ((fl & ARGS_DELIM) && mdoc_iscdelim(buf[*pos])) { - for (i = *pos; (c = buf[i]); ) { - if ( ! mdoc_iscdelim(c)) + for (i = *pos; buf[i]; ) { + if ( ! mdoc_iscdelim(buf[i])) break; i++; - if (0 == buf[i] || ! isspace(c)) + /* There must be at least one space... */ + if (0 == buf[i] || ! isspace((u_char)buf[i])) break; i++; - while (buf[i] && isspace(c)) + while (buf[i] && isspace((u_char)buf[i])) i++; } if (0 == buf[i]) { @@ -375,7 +544,7 @@ args(struct mdoc *mdoc, int line, p++; if (0 != *p) *(p - 1) = 0; - *pos += p - *v; + *pos += (int)(p - *v); } if (p && 0 == *p) @@ -396,7 +565,7 @@ args(struct mdoc *mdoc, int line, if (p > *v && ' ' == *(p - 1)) if ( ! pwarn(mdoc, line, *pos, WTAILWS)) return(0); - *pos += p - *v; + *pos += (int)(p - *v); return(ARGS_WORD); } @@ -404,8 +573,8 @@ args(struct mdoc *mdoc, int line, /* Do non-tabsep look-ahead here. */ if ( ! (ARGS_TABSEP & fl)) - while ((c = buf[*pos])) { - if (isspace(c)) + while (buf[*pos]) { + if (isspace((u_char)buf[*pos])) if ('\\' != buf[*pos - 1]) break; (*pos)++; @@ -420,7 +589,7 @@ args(struct mdoc *mdoc, int line, return(ARGS_WORD); if ( ! (ARGS_TABSEP & fl)) - while (buf[*pos] && isspace((int)buf[*pos])) + while (buf[*pos] && isspace((u_char)buf[*pos])) (*pos)++; if (buf[*pos]) @@ -452,7 +621,7 @@ args(struct mdoc *mdoc, int line, if (0 == buf[*pos]) return(ARGS_QWORD); - while (buf[*pos] && isspace((int)buf[*pos])) + while (buf[*pos] && isspace((u_char)buf[*pos])) (*pos)++; if (buf[*pos]) @@ -466,9 +635,17 @@ args(struct mdoc *mdoc, int line, static int -lookup(int tok, const char *argv) +argv_a2arg(int tok, const char *argv) { + /* + * Parse an argument identifier from its text. XXX - this + * should really be table-driven to clarify the code. + * + * If you add an argument to the list, make sure that you + * register it here with its one or more macros! + */ + switch (tok) { case (MDOC_An): if (xstrcmp(argv, "split")) @@ -543,84 +720,6 @@ lookup(int tok, const char *argv) if (xstrcmp(argv, "std")) return(MDOC_Std); break; - - case (MDOC_St): - if (xstrcmp(argv, "p1003.1-88")) - return(MDOC_p1003_1_88); - else if (xstrcmp(argv, "p1003.1-90")) - return(MDOC_p1003_1_90); - else if (xstrcmp(argv, "p1003.1-96")) - return(MDOC_p1003_1_96); - else if (xstrcmp(argv, "p1003.1-2001")) - return(MDOC_p1003_1_2001); - else if (xstrcmp(argv, "p1003.1-2004")) - return(MDOC_p1003_1_2004); - else if (xstrcmp(argv, "p1003.1")) - return(MDOC_p1003_1); - else if (xstrcmp(argv, "p1003.1b")) - return(MDOC_p1003_1b); - else if (xstrcmp(argv, "p1003.1b-93")) - return(MDOC_p1003_1b_93); - else if (xstrcmp(argv, "p1003.1c-95")) - return(MDOC_p1003_1c_95); - else if (xstrcmp(argv, "p1003.1g-2000")) - return(MDOC_p1003_1g_2000); - else if (xstrcmp(argv, "p1003.2-92")) - return(MDOC_p1003_2_92); - else if (xstrcmp(argv, "p1003.2-95")) - return(MDOC_p1387_2_95); - else if (xstrcmp(argv, "p1003.2")) - return(MDOC_p1003_2); - else if (xstrcmp(argv, "p1387.2-95")) - return(MDOC_p1387_2); - else if (xstrcmp(argv, "isoC-90")) - return(MDOC_isoC_90); - else if (xstrcmp(argv, "isoC-amd1")) - return(MDOC_isoC_amd1); - else if (xstrcmp(argv, "isoC-tcor1")) - return(MDOC_isoC_tcor1); - else if (xstrcmp(argv, "isoC-tcor2")) - return(MDOC_isoC_tcor2); - else if (xstrcmp(argv, "isoC-99")) - return(MDOC_isoC_99); - else if (xstrcmp(argv, "ansiC")) - return(MDOC_ansiC); - else if (xstrcmp(argv, "ansiC-89")) - return(MDOC_ansiC_89); - else if (xstrcmp(argv, "ansiC-99")) - return(MDOC_ansiC_99); - else if (xstrcmp(argv, "ieee754")) - return(MDOC_ieee754); - else if (xstrcmp(argv, "iso8802-3")) - return(MDOC_iso8802_3); - else if (xstrcmp(argv, "xpg3")) - return(MDOC_xpg3); - else if (xstrcmp(argv, "xpg4")) - return(MDOC_xpg4); - else if (xstrcmp(argv, "xpg4.2")) - return(MDOC_xpg4_2); - else if (xstrcmp(argv, "xpg4.3")) - return(MDOC_xpg4_3); - else if (xstrcmp(argv, "xbd5")) - return(MDOC_xbd5); - else if (xstrcmp(argv, "xcu5")) - return(MDOC_xcu5); - else if (xstrcmp(argv, "xsh5")) - return(MDOC_xsh5); - else if (xstrcmp(argv, "xns5")) - return(MDOC_xns5); - else if (xstrcmp(argv, "xns5.2d2.0")) - return(MDOC_xns5_2d2_0); - else if (xstrcmp(argv, "xcurses4.2")) - return(MDOC_xcurses4_2); - else if (xstrcmp(argv, "susv2")) - return(MDOC_susv2); - else if (xstrcmp(argv, "susv3")) - return(MDOC_susv3); - else if (xstrcmp(argv, "svid4")) - return(MDOC_svid4); - break; - default: break; } @@ -631,152 +730,122 @@ lookup(int tok, const char *argv) static int argv_multi(struct mdoc *mdoc, int line, - struct mdoc_arg *v, int *pos, char *buf) + struct mdoc_argv *v, int *pos, char *buf) { int c, ppos; char *p; - v->sz = 0; - v->value = xcalloc(MDOC_LINEARG_MAX, sizeof(char *)); - ppos = *pos; - for (v->sz = 0; v->sz < MDOC_LINEARG_MAX; v->sz++) { + for (v->sz = 0; ; v->sz++) { if ('-' == buf[*pos]) break; c = args(mdoc, line, pos, buf, ARGS_QUOTED, &p); - if (ARGS_ERROR == c) { - free(v->value); + if (ARGS_ERROR == c) return(0); - } else if (ARGS_EOLN == c) + else if (ARGS_EOLN == c) break; - v->value[v->sz] = p; + + if (0 == v->sz % 5) + v->value = xrealloc(v->value, + (v->sz + 5) * sizeof(char *)); + + v->value[(int)v->sz] = xstrdup(p); } - if (0 < v->sz && v->sz < MDOC_LINEARG_MAX) + if (v->sz) return(1); - free(v->value); - if (0 == v->sz) - return(perr(mdoc, line, ppos, EARGVAL)); - - return(perr(mdoc, line, ppos, EARGMANY)); + return(perr(mdoc, line, ppos, EARGVAL)); } static int -argv_single(struct mdoc *mdoc, int line, - struct mdoc_arg *v, int *pos, char *buf) +argv_opt_single(struct mdoc *mdoc, int line, + struct mdoc_argv *v, int *pos, char *buf) { - int c, ppos; + int c; char *p; - ppos = *pos; + if ('-' == buf[*pos]) + return(1); c = args(mdoc, line, pos, buf, ARGS_QUOTED, &p); if (ARGS_ERROR == c) return(0); if (ARGS_EOLN == c) - return(perr(mdoc, line, ppos, EARGVAL)); + return(1); v->sz = 1; v->value = xcalloc(1, sizeof(char *)); - v->value[0] = p; + v->value[0] = xstrdup(p); return(1); } +/* + * Parse a single, mandatory value from the stream. + */ static int -argv(struct mdoc *mdoc, int line, - struct mdoc_arg *v, int *pos, char *buf) +argv_single(struct mdoc *mdoc, int line, + struct mdoc_argv *v, int *pos, char *buf) { + int c, ppos; + char *p; - v->sz = 0; - v->value = NULL; + ppos = *pos; - switch (v->arg) { - case(MDOC_Std): - /* FALLTHROUGH */ - case(MDOC_Width): - /* FALLTHROUGH */ - case(MDOC_Offset): - return(argv_single(mdoc, line, v, pos, buf)); - case(MDOC_Column): - return(argv_multi(mdoc, line, v, pos, buf)); - default: - break; - } + c = args(mdoc, line, pos, buf, ARGS_QUOTED, &p); + if (ARGS_ERROR == c) + return(0); + if (ARGS_EOLN == c) + return(perr(mdoc, line, ppos, EARGVAL)); + v->sz = 1; + v->value = xcalloc(1, sizeof(char *)); + v->value[0] = xstrdup(p); return(1); } -int -mdoc_argv(struct mdoc *mdoc, int line, int tok, - struct mdoc_arg *v, int *pos, char *buf) +/* + * Determine rules for parsing arguments. Arguments can either accept + * no parameters, an optional single parameter, one parameter, or + * multiple parameters. + */ +static int +argv(struct mdoc *mdoc, int tok, int line, + struct mdoc_argv *v, int *pos, char *buf) { - int i; - char *p; - - (void)memset(v, 0, sizeof(struct mdoc_arg)); - - if (0 == buf[*pos]) - return(ARGV_EOLN); + int fl; - assert( ! isspace((int)buf[*pos])); - - if ('-' != buf[*pos]) - return(ARGV_WORD); - - i = *pos; - p = &buf[++(*pos)]; - - v->line = line; - v->pos = *pos; + v->sz = 0; + v->value = NULL; + fl = mdoc_argvflags[v->arg]; - assert(*pos > 0); + /* + * Override the default per-argument value. + */ - /* LINTED */ - while (buf[*pos]) { - if (isspace((int)buf[*pos])) - if ('\\' != buf[*pos - 1]) - break; - (*pos)++; + switch (tok) { + case (MDOC_Ex): + fl = ARGV_OPT_SINGLE; + break; + default: + break; } - if (buf[*pos]) - buf[(*pos)++] = 0; - - if (MDOC_ARG_MAX == (v->arg = lookup(tok, p))) { - if ( ! pwarn(mdoc, line, i, WARGVPARM)) - return(ARGV_ERROR); - return(ARGV_WORD); + switch (fl) { + case (ARGV_SINGLE): + return(argv_single(mdoc, line, v, pos, buf)); + case (ARGV_MULTI): + return(argv_multi(mdoc, line, v, pos, buf)); + case (ARGV_OPT_SINGLE): + return(argv_opt_single(mdoc, line, v, pos, buf)); + default: + /* ARGV_NONE */ + break; } - while (buf[*pos] && isspace((int)buf[*pos])) - (*pos)++; - - /* FIXME: whitespace if no value. */ - - if ( ! argv(mdoc, line, v, pos, buf)) - return(ARGV_ERROR); - - return(ARGV_ARG); -} - - -void -mdoc_argv_free(int sz, struct mdoc_arg *arg) -{ - int i; - - for (i = 0; i < sz; i++) { - if (0 == arg[i].sz) { - assert(NULL == arg[i].value); - continue; - } - assert(arg[i].value); - free(arg[i].value); - } + return(1); } -