X-Git-Url: https://git.cameronkatri.com/mandoc.git/blobdiff_plain/85e908d626f50934f291b1fbeea2afbb4f205cbd..6a4217540b7dab72e441d0fb427eed052d3d0581:/argv.c?ds=inline diff --git a/argv.c b/argv.c index e97f536d..f26fca0a 100644 --- a/argv.c +++ b/argv.c @@ -1,4 +1,4 @@ -/* $Id: argv.c,v 1.21 2009/01/20 13:49:36 kristaps Exp $ */ +/* $Id: argv.c,v 1.27 2009/02/23 12:45:19 kristaps Exp $ */ /* * Copyright (c) 2008 Kristaps Dzonsons * @@ -26,54 +26,285 @@ #include "private.h" /* - * Parse arguments and parameters of macros. Arguments follow the - * syntax of `-arg [val [valN...]]', while parameters are free-form text - * following arguments (if any). This file must correctly handle the - * strange punctuation rules dictated by groff. + * Routines to parse arguments of macros. Arguments follow the syntax + * of `-arg [val [valN...]]'. Arguments come in all types: quoted + * arguments, multiple arguments per value, no-value arguments, etc. */ -/* FIXME: .It called with -column and quoted arguments. */ +#define ARGS_QUOTED (1 << 0) +#define ARGS_DELIM (1 << 1) +#define ARGS_TABSEP (1 << 2) static int lookup(int, const char *); -static int parse(struct mdoc *, int, +static int args(struct mdoc *, int, int *, + char *, int, char **); +static int argv(struct mdoc *, int, struct mdoc_arg *, int *, char *); -static int parse_single(struct mdoc *, int, +static int argv_single(struct mdoc *, int, struct mdoc_arg *, int *, char *); -static int parse_multi(struct mdoc *, int, +static int argv_multi(struct mdoc *, int, struct mdoc_arg *, int *, char *); -static int postparse(struct mdoc *, int, +static int postargv(struct mdoc *, int, const struct mdoc_arg *, int); +static int pwarn(struct mdoc *, int, int, int); +static int perr(struct mdoc *, int, int, int); + +/* Warning messages. */ + +#define WQUOTPARM (0) +#define WARGVPARM (1) +#define WCOLEMPTY (2) +#define WTAILWS (3) + +/* Error messages. */ + +#define EQUOTTERM (0) +#define EOFFSET (1) +#define EARGVAL (2) +#define EARGMANY (3) + +static int mdoc_argflags[MDOC_MAX] = { + 0, /* \" */ + 0, /* Dd */ + 0, /* Dt */ + 0, /* Os */ + 0, /* Sh */ + 0, /* Ss */ + ARGS_DELIM, /* Pp */ + ARGS_DELIM, /* D1 */ + ARGS_DELIM, /* Dl */ + 0, /* Bd */ + 0, /* Ed */ + 0, /* Bl */ + 0, /* El */ + ARGS_DELIM, /* It */ + ARGS_DELIM, /* Ad */ + ARGS_DELIM, /* An */ + ARGS_DELIM, /* Ar */ + ARGS_QUOTED, /* Cd */ + ARGS_DELIM, /* Cm */ + ARGS_DELIM, /* Dv */ + ARGS_DELIM, /* Er */ + ARGS_DELIM, /* Ev */ + 0, /* Ex */ + ARGS_DELIM | ARGS_QUOTED, /* Fa */ + 0, /* Fd */ + ARGS_DELIM, /* Fl */ + ARGS_DELIM | ARGS_QUOTED, /* Fn */ + ARGS_DELIM | ARGS_QUOTED, /* Ft */ + ARGS_DELIM, /* Ic */ + 0, /* In */ + ARGS_DELIM, /* Li */ + 0, /* Nd */ + ARGS_DELIM, /* Nm */ + ARGS_DELIM, /* Op */ + 0, /* Ot */ + ARGS_DELIM, /* Pa */ + 0, /* Rv */ + ARGS_DELIM, /* St */ + ARGS_DELIM, /* Va */ + ARGS_DELIM, /* Vt */ + ARGS_DELIM, /* Xr */ + ARGS_QUOTED, /* %A */ + ARGS_QUOTED, /* %B */ + ARGS_QUOTED, /* %D */ + ARGS_QUOTED, /* %I */ + ARGS_QUOTED, /* %J */ + ARGS_QUOTED, /* %N */ + ARGS_QUOTED, /* %O */ + ARGS_QUOTED, /* %P */ + ARGS_QUOTED, /* %R */ + ARGS_QUOTED, /* %T */ + ARGS_QUOTED, /* %V */ + ARGS_DELIM, /* Ac */ + 0, /* Ao */ + ARGS_DELIM, /* Aq */ + ARGS_DELIM, /* At */ + ARGS_DELIM, /* Bc */ + 0, /* Bf */ + 0, /* Bo */ + ARGS_DELIM, /* Bq */ + ARGS_DELIM, /* Bsx */ + ARGS_DELIM, /* Bx */ + 0, /* Db */ + ARGS_DELIM, /* Dc */ + 0, /* Do */ + ARGS_DELIM, /* Dq */ + ARGS_DELIM, /* Ec */ + 0, /* Ef */ + ARGS_DELIM, /* Em */ + 0, /* Eo */ + ARGS_DELIM, /* Fx */ + ARGS_DELIM, /* Ms */ + ARGS_DELIM, /* No */ + ARGS_DELIM, /* Ns */ + ARGS_DELIM, /* Nx */ + ARGS_DELIM, /* Ox */ + ARGS_DELIM, /* Pc */ + ARGS_DELIM, /* Pf */ + 0, /* Po */ + ARGS_DELIM, /* Pq */ + ARGS_DELIM, /* Qc */ + ARGS_DELIM, /* Ql */ + 0, /* Qo */ + ARGS_DELIM, /* Qq */ + 0, /* Re */ + 0, /* Rs */ + ARGS_DELIM, /* Sc */ + 0, /* So */ + ARGS_DELIM, /* Sq */ + 0, /* Sm */ + ARGS_DELIM, /* Sx */ + ARGS_DELIM, /* Sy */ + ARGS_DELIM, /* Tn */ + ARGS_DELIM, /* Ux */ + ARGS_DELIM, /* Xc */ + 0, /* Xo */ + 0, /* Fo */ + 0, /* Fc */ + 0, /* Oo */ + ARGS_DELIM, /* Oc */ + 0, /* Bk */ + 0, /* Ek */ + 0, /* Bt */ + 0, /* Hf */ + 0, /* Fr */ + 0, /* Ud */ +}; + + +static int +perr(struct mdoc *mdoc, int line, int pos, int code) +{ + int c; + + switch (code) { + case (EQUOTTERM): + c = mdoc_perr(mdoc, line, pos, + "unterminated quoted parameter"); + break; + case (EOFFSET): + c = mdoc_perr(mdoc, line, pos, + "invalid value for offset argument"); + break; + case (EARGVAL): + c = mdoc_perr(mdoc, line, pos, + "argument requires a value"); + break; + case (EARGMANY): + c = mdoc_perr(mdoc, line, pos, + "too many values for argument"); + break; + default: + abort(); + /* NOTREACHED */ + } + return(c); +} + + +static int +pwarn(struct mdoc *mdoc, int line, int pos, int code) +{ + int c; + + switch (code) { + case (WQUOTPARM): + c = mdoc_pwarn(mdoc, line, pos, WARN_SYNTAX, + "unexpected quoted parameter"); + break; + case (WARGVPARM): + c = mdoc_pwarn(mdoc, line, pos, WARN_SYNTAX, + "argument-like parameter"); + break; + case (WCOLEMPTY): + c = mdoc_pwarn(mdoc, line, pos, WARN_SYNTAX, + "last list column is empty"); + break; + case (WTAILWS): + c = mdoc_pwarn(mdoc, line, pos, WARN_COMPAT, + "trailing whitespace"); + break; + default: + abort(); + /* NOTREACHED */ + } + return(c); +} int -mdoc_args(struct mdoc *mdoc, int line, int *pos, char *buf, int fl, char **v) +mdoc_args(struct mdoc *mdoc, int line, + int *pos, char *buf, int tok, char **v) { - int i; + int fl, c, i; + struct mdoc_node *n; + + fl = (0 == tok) ? 0 : mdoc_argflags[tok]; + + /* + * First see if we should use TABSEP (Bl -column). This + * invalidates the use of ARGS_DELIM. + */ + + if (MDOC_It == tok) { + for (n = mdoc->last; n; n = n->parent) + if (MDOC_BLOCK == n->type) + if (MDOC_Bl == n->tok) + break; + assert(n); + c = (int)n->data.block.argc; + assert(c > 0); + + /* LINTED */ + for (i = 0; i < c; i++) { + if (MDOC_Column != n->data.block.argv[i].arg) + continue; + fl |= ARGS_TABSEP; + fl &= ~ARGS_DELIM; + break; + } + } + + return(args(mdoc, line, pos, buf, fl, v)); +} + + +static int +args(struct mdoc *mdoc, int line, + int *pos, char *buf, int fl, char **v) +{ + int i, c; + char *p, *pp; + + assert(*pos > 0); if (0 == buf[*pos]) return(ARGS_EOLN); if ('\"' == buf[*pos] && ! (fl & ARGS_QUOTED)) - if ( ! mdoc_pwarn(mdoc, line, *pos, WARN_SYNTAX, "unexpected quoted parameter")) + if ( ! pwarn(mdoc, line, *pos, WQUOTPARM)) return(ARGS_ERROR); if ('-' == buf[*pos]) - if ( ! mdoc_pwarn(mdoc, line, *pos, WARN_SYNTAX, "argument-like parameter")) + if ( ! pwarn(mdoc, line, *pos, WARGVPARM)) return(ARGS_ERROR); + /* + * If the first character is a delimiter and we're to look for + * delimited strings, then pass down the buffer seeing if it + * follows the pattern of [[::delim::][ ]+]+. + */ + if ((fl & ARGS_DELIM) && mdoc_iscdelim(buf[*pos])) { - /* - * If ARGS_DELIM, return ARGS_PUNCT if only space-separated - * punctuation remains. - */ - for (i = *pos; buf[i]; ) { - if ( ! mdoc_iscdelim(buf[i])) + for (i = *pos; (c = buf[i]); ) { + if ( ! mdoc_iscdelim(c)) break; i++; - if (0 == buf[i] || ! isspace((int)buf[i])) + if (0 == buf[i] || ! isspace(c)) break; i++; - while (buf[i] && isspace((int)buf[i])) + while (buf[i] && isspace(c)) i++; } if (0 == buf[i]) { @@ -82,37 +313,110 @@ mdoc_args(struct mdoc *mdoc, int line, int *pos, char *buf, int fl, char **v) } } - /* Parse routine for non-quoted string. */ + /* First parse non-quoted strings. */ - assert(*pos > 0); if ('\"' != buf[*pos] || ! (ARGS_QUOTED & fl)) { *v = &buf[*pos]; - /* FIXME: UGLY tab-sep processing. */ + /* + * Thar be dragons here! If we're tab-separated, search + * ahead for either a tab or the `Ta' macro. If a tab + * is detected, it mustn't be escaped; if a `Ta' is + * detected, it must be space-buffered before and after. + * If either of these hold true, then prune out the + * extra spaces and call it an argument. + */ - if (ARGS_TABSEP & fl) - while (buf[*pos]) { - if ('\t' == buf[*pos]) + if (ARGS_TABSEP & fl) { + /* Scan ahead to unescaped tab. */ + + for (p = *v; ; p++) { + if (NULL == (p = strchr(p, '\t'))) break; - if ('T' == buf[*pos]) { - (*pos)++; - if (0 == buf[*pos]) - break; - if ('a' == buf[*pos]) { - buf[*pos - 1] = 0; - break; - } - } - (*pos)++; + if (p == *v) + break; + if ('\\' != *(p - 1)) + break; + } + + /* Scan ahead to unescaped `Ta'. */ + + for (pp = *v; ; pp++) { + if (NULL == (pp = strstr(pp, "Ta"))) + break; + if (pp > *v && ' ' != *(pp - 1)) + continue; + if (' ' == *(pp + 2) || 0 == *(pp + 2)) + break; + } + + /* Choose delimiter tab/Ta. */ + + if (p && pp) + p = (p < pp ? p : pp); + else if ( ! p && pp) + p = pp; + + /* Strip delimiter's preceding whitespace. */ + + if (p && p > *v) { + pp = p - 1; + while (pp > *v && ' ' == *pp) + pp--; + if (pp == *v && ' ' == *pp) + *pp = 0; + else if (' ' == *pp) + *(pp + 1) = 0; } - else { - while (buf[*pos]) { - if (isspace((int)buf[*pos])) + + /* ...in- and proceding whitespace. */ + + if (p && ('\t' != *p)) { + *p++ = 0; + *p++ = 0; + } else if (p) + *p++ = 0; + + if (p) { + while (' ' == *p) + p++; + if (0 != *p) + *(p - 1) = 0; + *pos += p - *v; + } + + if (p && 0 == *p) + if ( ! pwarn(mdoc, line, *pos, WCOLEMPTY)) + return(0); + if (p && 0 == *p && p > *v && ' ' == *(p - 1)) + if ( ! pwarn(mdoc, line, *pos, WTAILWS)) + return(0); + + if (p) + return(ARGS_WORD); + + /* Configure the eoln case, too. */ + + p = strchr(*v, 0); + assert(p); + + if (p > *v && ' ' == *(p - 1)) + if ( ! pwarn(mdoc, line, *pos, WTAILWS)) + return(0); + *pos += p - *v; + + return(ARGS_WORD); + } + + /* Do non-tabsep look-ahead here. */ + + if ( ! (ARGS_TABSEP & fl)) + while ((c = buf[*pos])) { + if (isspace(c)) if ('\\' != buf[*pos - 1]) break; (*pos)++; } - } if (0 == buf[*pos]) return(ARGS_WORD); @@ -129,7 +433,7 @@ mdoc_args(struct mdoc *mdoc, int line, int *pos, char *buf, int fl, char **v) if (buf[*pos]) return(ARGS_WORD); - if ( ! mdoc_pwarn(mdoc, line, *pos, WARN_COMPAT, "whitespace at end-of-line")) + if ( ! pwarn(mdoc, line, *pos, WTAILWS)) return(ARGS_ERROR); return(ARGS_WORD); @@ -147,7 +451,7 @@ mdoc_args(struct mdoc *mdoc, int line, int *pos, char *buf, int fl, char **v) (*pos)++; if (0 == buf[*pos]) { - (void)mdoc_perr(mdoc, line, *pos, "unterminated quoted parameter"); + (void)perr(mdoc, line, *pos, EQUOTTERM); return(ARGS_ERROR); } @@ -161,7 +465,7 @@ mdoc_args(struct mdoc *mdoc, int line, int *pos, char *buf, int fl, char **v) if (buf[*pos]) return(ARGS_QWORD); - if ( ! mdoc_pwarn(mdoc, line, *pos, WARN_COMPAT, "whitespace at end-of-line")) + if ( ! pwarn(mdoc, line, *pos, WTAILWS)) return(ARGS_ERROR); return(ARGS_QWORD); @@ -333,7 +637,7 @@ lookup(int tok, const char *argv) static int -postparse(struct mdoc *mdoc, int line, const struct mdoc_arg *v, int pos) +postargv(struct mdoc *mdoc, int line, const struct mdoc_arg *v, int pos) { switch (v->arg) { @@ -350,7 +654,7 @@ postparse(struct mdoc *mdoc, int line, const struct mdoc_arg *v, int pos) break; if (xstrcmp(v->value[0], "indent-two")) break; - return(mdoc_perr(mdoc, line, pos, "invalid offset value")); + return(perr(mdoc, line, pos, EOFFSET)); default: break; } @@ -360,7 +664,7 @@ postparse(struct mdoc *mdoc, int line, const struct mdoc_arg *v, int pos) static int -parse_multi(struct mdoc *mdoc, int line, +argv_multi(struct mdoc *mdoc, int line, struct mdoc_arg *v, int *pos, char *buf) { int c, ppos; @@ -374,7 +678,7 @@ parse_multi(struct mdoc *mdoc, int line, for (v->sz = 0; v->sz < MDOC_LINEARG_MAX; v->sz++) { if ('-' == buf[*pos]) break; - c = mdoc_args(mdoc, line, pos, buf, ARGS_QUOTED, &p); + c = args(mdoc, line, pos, buf, ARGS_QUOTED, &p); if (ARGS_ERROR == c) { free(v->value); return(0); @@ -387,14 +691,15 @@ parse_multi(struct mdoc *mdoc, int line, return(1); free(v->value); - return(mdoc_perr(mdoc, line, ppos, 0 == v->sz ? - "argument requires a value" : - "too many values to argument")); + if (0 == v->sz) + return(perr(mdoc, line, ppos, EARGVAL)); + + return(perr(mdoc, line, ppos, EARGMANY)); } static int -parse_single(struct mdoc *mdoc, int line, +argv_single(struct mdoc *mdoc, int line, struct mdoc_arg *v, int *pos, char *buf) { int c, ppos; @@ -402,11 +707,11 @@ parse_single(struct mdoc *mdoc, int line, ppos = *pos; - c = mdoc_args(mdoc, line, pos, buf, ARGS_QUOTED, &p); + c = args(mdoc, line, pos, buf, ARGS_QUOTED, &p); if (ARGS_ERROR == c) return(0); if (ARGS_EOLN == c) - return(mdoc_perr(mdoc, line, ppos, "argument requires a value")); + return(perr(mdoc, line, ppos, EARGVAL)); v->sz = 1; v->value = xcalloc(1, sizeof(char *)); @@ -416,7 +721,7 @@ parse_single(struct mdoc *mdoc, int line, static int -parse(struct mdoc *mdoc, int line, +argv(struct mdoc *mdoc, int line, struct mdoc_arg *v, int *pos, char *buf) { @@ -429,9 +734,9 @@ parse(struct mdoc *mdoc, int line, case(MDOC_Width): /* FALLTHROUGH */ case(MDOC_Offset): - return(parse_single(mdoc, line, v, pos, buf)); + return(argv_single(mdoc, line, v, pos, buf)); case(MDOC_Column): - return(parse_multi(mdoc, line, v, pos, buf)); + return(argv_multi(mdoc, line, v, pos, buf)); default: break; } @@ -445,7 +750,7 @@ mdoc_argv(struct mdoc *mdoc, int line, int tok, struct mdoc_arg *v, int *pos, char *buf) { int i, ppos; - char *argv; + char *p; (void)memset(v, 0, sizeof(struct mdoc_arg)); @@ -458,12 +763,14 @@ mdoc_argv(struct mdoc *mdoc, int line, int tok, return(ARGV_WORD); i = *pos; - argv = &buf[++(*pos)]; + p = &buf[++(*pos)]; v->line = line; v->pos = *pos; assert(*pos > 0); + + /* LINTED */ while (buf[*pos]) { if (isspace((int)buf[*pos])) if ('\\' != buf[*pos - 1]) @@ -474,8 +781,8 @@ mdoc_argv(struct mdoc *mdoc, int line, int tok, if (buf[*pos]) buf[(*pos)++] = 0; - if (MDOC_ARG_MAX == (v->arg = lookup(tok, argv))) { - if ( ! mdoc_pwarn(mdoc, line, i, WARN_SYNTAX, "argument-like parameter")) + if (MDOC_ARG_MAX == (v->arg = lookup(tok, p))) { + if ( ! pwarn(mdoc, line, i, WARGVPARM)) return(ARGV_ERROR); return(ARGV_WORD); } @@ -486,9 +793,9 @@ mdoc_argv(struct mdoc *mdoc, int line, int tok, /* FIXME: whitespace if no value. */ ppos = *pos; - if ( ! parse(mdoc, line, v, pos, buf)) + if ( ! argv(mdoc, line, v, pos, buf)) return(ARGV_ERROR); - if ( ! postparse(mdoc, line, v, ppos)) + if ( ! postargv(mdoc, line, v, ppos)) return(ARGV_ERROR); return(ARGV_ARG);