From b3bbf7061a5906d3086a9fa08f36d77d77302e81 Mon Sep 17 00:00:00 2001 From: Kristaps Dzonsons Date: Tue, 24 Feb 2009 11:43:13 +0000 Subject: Escape-sequence validation in place (for nodes). --- Makefile | 13 +++++--- argv.c | 8 ++--- private.h | 3 +- regress/test.escape.00 | 20 +++++++++++++ regress/test.escape.01 | 20 +++++++++++++ regress/test.escape.02 | 20 +++++++++++++ regress/test.escape.03 | 20 +++++++++++++ regress/test.escape.04 | 20 +++++++++++++ regress/test.list.05 | 4 +-- strings.c | 50 +++++++++++++++++++++++++++++-- validate.c | 80 +++++++++++++++++++++++++++++++++++++++----------- 11 files changed, 227 insertions(+), 31 deletions(-) create mode 100644 regress/test.escape.00 create mode 100644 regress/test.escape.01 create mode 100644 regress/test.escape.02 create mode 100644 regress/test.escape.03 create mode 100644 regress/test.escape.04 diff --git a/Makefile b/Makefile index 443c6e99..29d51539 100644 --- a/Makefile +++ b/Makefile @@ -78,9 +78,11 @@ FAIL = regress/test.empty \ regress/test.prologue.31 \ regress/test.prologue.32 \ regress/test.prologue.33 \ - regress/test.sh.01 \ - regress/test.sh.02 \ - regress/test.sh.03 + regress/test.sh.03 \ + regress/test.escape.01 \ + regress/test.escape.02 \ + regress/test.escape.03 \ + regress/test.escape.04 SUCCEED = regress/test.prologue.05 \ regress/test.prologue.07 \ @@ -103,7 +105,10 @@ SUCCEED = regress/test.prologue.05 \ regress/test.list.03 \ regress/test.list.04 \ regress/test.list.05 \ - regress/test.list.06 + regress/test.list.06 \ + regress/test.sh.01 \ + regress/test.sh.02 \ + regress/test.escape.00 REGRESS = $(FAIL) $(SUCCEED) diff --git a/argv.c b/argv.c index 5f591f3c..e9eafffc 100644 --- a/argv.c +++ b/argv.c @@ -1,4 +1,4 @@ -/* $Id: argv.c,v 1.29 2009/02/23 15:34:53 kristaps Exp $ */ +/* $Id: argv.c,v 1.30 2009/02/24 11:43:13 kristaps Exp $ */ /* * Copyright (c) 2008 Kristaps Dzonsons * @@ -35,7 +35,7 @@ #define ARGS_DELIM (1 << 1) #define ARGS_TABSEP (1 << 2) -static int lookup(int, const char *); +static int argv_a2arg(int, const char *); static int args(struct mdoc *, int, int *, char *, int, char **); static int argv(struct mdoc *, int, @@ -466,7 +466,7 @@ args(struct mdoc *mdoc, int line, static int -lookup(int tok, const char *argv) +argv_a2arg(int tok, const char *argv) { switch (tok) { @@ -747,7 +747,7 @@ mdoc_argv(struct mdoc *mdoc, int line, int tok, if (buf[*pos]) buf[(*pos)++] = 0; - if (MDOC_ARG_MAX == (v->arg = lookup(tok, p))) { + if (MDOC_ARG_MAX == (v->arg = argv_a2arg(tok, p))) { if ( ! pwarn(mdoc, line, i, WARGVPARM)) return(ARGV_ERROR); return(ARGV_WORD); diff --git a/private.h b/private.h index 8e8c41e3..3a49fde0 100644 --- a/private.h +++ b/private.h @@ -1,4 +1,4 @@ -/* $Id: private.h,v 1.79 2009/02/23 12:45:19 kristaps Exp $ */ +/* $Id: private.h,v 1.80 2009/02/24 11:43:13 kristaps Exp $ */ /* * Copyright (c) 2008 Kristaps Dzonsons * @@ -125,6 +125,7 @@ void *mdoc_tokhash_alloc(void); int mdoc_tokhash_find(const void *, const char *); void mdoc_tokhash_free(void *); int mdoc_iscdelim(char); +size_t mdoc_isescape(const char *); enum mdoc_sec mdoc_atosec(const char *); enum mdoc_msec mdoc_atomsec(const char *); enum mdoc_vol mdoc_atovol(const char *); diff --git a/regress/test.escape.00 b/regress/test.escape.00 new file mode 100644 index 00000000..0e801382 --- /dev/null +++ b/regress/test.escape.00 @@ -0,0 +1,20 @@ +.\" +.Dd $Mdocdate: February 24 2009 $ +.Dt mdoc 3 +.Os +.\" +.Sh NAME +.Nm mdoc_free +.Nd mdoc macro compiler library +.\" +.Sh SYNOPSIS +Valid escape: \(ab +Valid escape: \[d] +Valid escape: \[dsdfajsdflaksjfhalksjdfh__----] +Valid escape: \\ +Valid escape: \e +Valid escape: \` +Valid escape: \' +Valid escape: \. +Valid escape: \- +Valid escape: \ diff --git a/regress/test.escape.01 b/regress/test.escape.01 new file mode 100644 index 00000000..ff9358c5 --- /dev/null +++ b/regress/test.escape.01 @@ -0,0 +1,20 @@ +.\" +.Dd $Mdocdate: February 24 2009 $ +.Dt mdoc 3 +.Os +.\" +.Sh NAME +.Nm mdoc_free +.Nd mdoc macro compiler library +.\" +.Sh SYNOPSIS +Valid escape: \(ab +Valid escape: \[d] +Valid escape: \[dsdfajsdflaksjfhalksjdfh__----] +Valid escape: \\ +Valid escape: \e +Valid escape: \` +Valid escape: \' +Valid escape: \. +Valid escape: \- +Invalid escape: \" diff --git a/regress/test.escape.02 b/regress/test.escape.02 new file mode 100644 index 00000000..6d121fcd --- /dev/null +++ b/regress/test.escape.02 @@ -0,0 +1,20 @@ +.\" +.Dd $Mdocdate: February 24 2009 $ +.Dt mdoc 3 +.Os +.\" +.Sh NAME +.Nm mdoc_free +.Nd mdoc macro compiler library +.\" +.Sh SYNOPSIS +Valid escape: \(ab +Valid escape: \[d] +Valid escape: \[dsdfajsdflaksjfhalksjdfh__----] +Valid escape: \\ +Valid escape: \e +Valid escape: \` +Valid escape: \' +Valid escape: \. +Valid escape: \- +Invalid escape: \( diff --git a/regress/test.escape.03 b/regress/test.escape.03 new file mode 100644 index 00000000..9e012fa7 --- /dev/null +++ b/regress/test.escape.03 @@ -0,0 +1,20 @@ +.\" +.Dd $Mdocdate: February 24 2009 $ +.Dt mdoc 3 +.Os +.\" +.Sh NAME +.Nm mdoc_free +.Nd mdoc macro compiler library +.\" +.Sh SYNOPSIS +Valid escape: \(ab +Valid escape: \[d] +Valid escape: \[dsdfajsdflaksjfhalksjdfh__----] +Valid escape: \\ +Valid escape: \e +Valid escape: \` +Valid escape: \' +Valid escape: \. +Valid escape: \- +Invalid escape: \ diff --git a/regress/test.escape.04 b/regress/test.escape.04 new file mode 100644 index 00000000..b906a1b4 --- /dev/null +++ b/regress/test.escape.04 @@ -0,0 +1,20 @@ +.\" +.Dd $Mdocdate: February 24 2009 $ +.Dt mdoc 3 +.Os +.\" +.Sh NAME +.Nm mdoc_free +.Nd mdoc macro compiler library +.\" +.Sh SYNOPSIS +Valid escape: \(ab +Valid escape: \[d] +Valid escape: \[dsdfajsdflaksjfhalksjdfh__----] +Valid escape: \\ +Valid escape: \e +Valid escape: \` +Valid escape: \' +Valid escape: \. +Valid escape: \- +Invalid escape: \[ diff --git a/regress/test.list.05 b/regress/test.list.05 index 8c462588..b6b56d91 100644 --- a/regress/test.list.05 +++ b/regress/test.list.05 @@ -1,4 +1,4 @@ -.Dd $Mdocdate: January 22 2009 $ +.Dd $Mdocdate: February 24 2009 $ .Dt .Os .Sh NAME @@ -6,5 +6,5 @@ .Nd description .Sh DESCRIPTION .Bl -column "xxxxx" "xxxxx" -.It d \ e f +.It d \ e f .El diff --git a/strings.c b/strings.c index e7538cf5..c3b0403d 100644 --- a/strings.c +++ b/strings.c @@ -1,4 +1,4 @@ -/* $Id: strings.c,v 1.15 2009/02/23 15:34:53 kristaps Exp $ */ +/* $Id: strings.c,v 1.16 2009/02/24 11:43:13 kristaps Exp $ */ /* * Copyright (c) 2008 Kristaps Dzonsons * @@ -35,6 +35,50 @@ extern char *strptime(const char *, const char *, struct tm *); #endif + +size_t +mdoc_isescape(const char *p) +{ + size_t c; + + if ('\\' != *p++) + return(0); + + switch (*p) { + case ('\\'): + /* FALLTHROUGH */ + case ('\''): + /* FALLTHROUGH */ + case ('`'): + /* FALLTHROUGH */ + case ('-'): + /* FALLTHROUGH */ + case (' '): + /* FALLTHROUGH */ + case ('.'): + /* FALLTHROUGH */ + case ('e'): + return(2); + case ('('): + if (0 == *++p) + return(0); + if (0 == *++p) + return(0); + return(4); + case ('['): + break; + default: + return(0); + } + + for (c = 3, p++; *p && ']' != *p; p++, c++) + if (isspace(*p)) + break; + + return(*p == ']' ? c : 0); +} + + int mdoc_iscdelim(char p) { @@ -132,9 +176,9 @@ mdoc_atotime(const char *p) (void)memset(&tm, 0, sizeof(struct tm)); - if (xstrcmp(p, "$Mdocdate: February 23 2009 $")) + if (xstrcmp(p, "$Mdocdate: February 24 2009 $")) return(time(NULL)); - if ((pp = strptime(p, "$Mdocdate: February 23 2009 $", &tm)) && 0 == *pp) + if ((pp = strptime(p, "$Mdocdate: February 24 2009 $", &tm)) && 0 == *pp) return(mktime(&tm)); /* XXX - this matches "June 1999", which is wrong. */ if ((pp = strptime(p, "%b %d %Y", &tm)) && 0 == *pp) diff --git a/validate.c b/validate.c index 085415ea..361fecb6 100644 --- a/validate.c +++ b/validate.c @@ -1,4 +1,4 @@ -/* $Id: validate.c,v 1.53 2009/02/23 22:51:10 kristaps Exp $ */ +/* $Id: validate.c,v 1.54 2009/02/24 11:43:13 kristaps Exp $ */ /* * Copyright (c) 2008 Kristaps Dzonsons * @@ -35,7 +35,6 @@ typedef int (*v_post)(struct mdoc *); /* FIXME: some sections should only occur in specific msecs. */ /* FIXME: ignoring Pp. */ /* FIXME: math symbols. */ -/* FIXME: valid character-escape checks. */ /* FIXME: .Fd only in synopsis section. */ struct valids { @@ -109,6 +108,7 @@ static int post_xr(struct mdoc *); static int post_nm(struct mdoc *); static int post_bf(struct mdoc *); static int post_root(struct mdoc *); +static int pre_text(struct mdoc *, const struct mdoc_node *); /* Collections of pre-child-parse routines. */ @@ -385,6 +385,22 @@ check_msec(struct mdoc *mdoc, struct mdoc_node *node, } +static int +check_parent(struct mdoc *mdoc, struct mdoc_node *n, + int tok, enum mdoc_type t) +{ + + assert(n->parent); + if ((MDOC_ROOT == t || tok == n->parent->tok) && + (t == n->parent->type)) + return(1); + + return(mdoc_nerr(mdoc, n, "require parent %s", + MDOC_ROOT == t ? "" : mdoc_macronames[tok])); +} + + + static int pre_display(struct mdoc *mdoc, struct mdoc_node *node) { @@ -921,6 +937,26 @@ ebool(struct mdoc *mdoc) } +static int +pre_text(struct mdoc *mdoc, const struct mdoc_node *n) +{ + size_t c; + const char *p; + + for (p = n->data.text.string; *p; p++) { + if ('\\' != *p) + continue; + if ((c = mdoc_isescape(p))) { + p += (c - 1); + continue; + } + return(mdoc_nerr(mdoc, n, "bad escape sequence")); + } + + return(1); +} + + static int post_root(struct mdoc *mdoc) { @@ -931,10 +967,10 @@ post_root(struct mdoc *mdoc) return(mdoc_err(mdoc, "document lacks prologue")); if (MDOC_BLOCK != mdoc->first->child->type) - return(mdoc_err(mdoc, "lacking post-prologue `%s'", + return(mdoc_err(mdoc, "lacking post-prologue %s", mdoc_macronames[MDOC_Sh])); if (MDOC_Sh != mdoc->first->child->tok) - return(mdoc_err(mdoc, "lacking post-prologue `%s'", + return(mdoc_err(mdoc, "lacking post-prologue %s", mdoc_macronames[MDOC_Sh])); return(1); @@ -969,8 +1005,8 @@ post_sh_body(struct mdoc *mdoc) */ if (NULL == (n = mdoc->last->child)) - return(mdoc_warn(mdoc, WARN_COMPAT, "section NAME " - "should contain %s and %s", + return(mdoc_warn(mdoc, WARN_SYNTAX, + "section should have %s and %s", mdoc_macronames[MDOC_Nm], mdoc_macronames[MDOC_Nd])); @@ -979,9 +1015,8 @@ post_sh_body(struct mdoc *mdoc) continue; if (MDOC_TEXT == n->type) continue; - if ( ! (mdoc_nwarn(mdoc, n, WARN_COMPAT, "section " - "NAME should contain %s as " - "initial body child", + if ( ! (mdoc_nwarn(mdoc, n, WARN_SYNTAX, + "section should have %s first", mdoc_macronames[MDOC_Nm]))) return(0); } @@ -989,8 +1024,8 @@ post_sh_body(struct mdoc *mdoc) if (MDOC_ELEM == n->type && MDOC_Nd == n->tok) return(1); - return(mdoc_warn(mdoc, WARN_COMPAT, "section NAME should " - "contain %s as the last child", + return(mdoc_warn(mdoc, WARN_SYNTAX, + "section should have %s last", mdoc_macronames[MDOC_Nd])); } @@ -1003,19 +1038,22 @@ post_sh_head(struct mdoc *mdoc) assert(MDOC_Sh == mdoc->last->tok); - if ( ! xstrlcats(buf, mdoc->last->child, 64)) - return(mdoc_err(mdoc, "macro parameters too long")); + if ( ! xstrlcats(buf, mdoc->last->child, sizeof(buf))) + return(mdoc_err(mdoc, "argument too long")); sec = mdoc_atosec(buf); if (SEC_BODY == mdoc->lastnamed && SEC_NAME != sec) - return(mdoc_err(mdoc, "section NAME must be first")); + return(mdoc_warn(mdoc, WARN_SYNTAX, + "section NAME should be first")); if (SEC_CUSTOM == sec) return(1); if (sec == mdoc->lastnamed) - return(mdoc_warn(mdoc, WARN_SYNTAX, "section repeated")); + return(mdoc_warn(mdoc, WARN_SYNTAX, + "section repeated")); if (sec < mdoc->lastnamed) - return(mdoc_warn(mdoc, WARN_SYNTAX, "section out of conventional order")); + return(mdoc_warn(mdoc, WARN_SYNTAX, + "section out of order")); return(1); } @@ -1027,7 +1065,7 @@ mdoc_valid_pre(struct mdoc *mdoc, struct mdoc_node *node) v_pre *p; if (MDOC_TEXT == node->type) - return(1); + return(pre_text(mdoc, node)); assert(MDOC_ROOT != node->type); if (NULL == mdoc_valids[node->tok].pre) @@ -1044,6 +1082,14 @@ mdoc_valid_post(struct mdoc *mdoc) { v_post *p; + /* + * This check occurs after the macro's children have been filled + * in: postfix validation. Since this happens when we're + * rewinding the scope tree, it's possible to have multiple + * invocations (as by design, for now), we set bit MDOC_VALID to + * indicate that we've validated. + */ + if (MDOC_VALID & mdoc->last->flags) return(1); mdoc->last->flags |= MDOC_VALID; -- cgit v1.2.3-56-ge451