aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@openbsd.org>2017-06-25 17:43:45 +0000
committerIngo Schwarze <schwarze@openbsd.org>2017-06-25 17:43:45 +0000
commit1fdb4db7a387f66dbf9fc6b0869d19dfe765a5aa (patch)
tree78c84c03a6f5831ed6c6f758e4b370367dba22af
parent968237f527c0925ee4300f535b2efb1d2a9e783f (diff)
downloadmandoc-1fdb4db7a387f66dbf9fc6b0869d19dfe765a5aa.tar.gz
mandoc-1fdb4db7a387f66dbf9fc6b0869d19dfe765a5aa.tar.zst
mandoc-1fdb4db7a387f66dbf9fc6b0869d19dfe765a5aa.zip
Catch typos in .Sh names; suggested by jmc@.
I'm using a very simple, linear time / zero space fuzzy string matching heuristic rather than a full Levenshtein metric, to keep the code both simple and fast.
-rw-r--r--mandoc.17
-rw-r--r--mandoc.h3
-rw-r--r--mdoc_validate.c65
-rw-r--r--read.c3
4 files changed, 73 insertions, 5 deletions
diff --git a/mandoc.1 b/mandoc.1
index 9906a39d..156db98d 100644
--- a/mandoc.1
+++ b/mandoc.1
@@ -1,4 +1,4 @@
-.\" $Id: mandoc.1,v 1.206 2017/06/25 11:42:02 schwarze Exp $
+.\" $Id: mandoc.1,v 1.207 2017/06/25 17:43:45 schwarze Exp $
.\"
.\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
.\" Copyright (c) 2012, 2014-2017 Ingo Schwarze <schwarze@openbsd.org>
@@ -857,6 +857,11 @@ A single manual page contains two copies of the RCS identifier for
the same operating system.
Consider deleting the later instance and moving the first one up
to the top of the page.
+.It Sy "typo in section name"
+.Pq mdoc
+Fuzzy string matching revealed that the argument of an
+.Ic \&Sh
+macro is similar, but not identical to a standard section name.
.It Sy "useless macro"
.Pq mdoc
A
diff --git a/mandoc.h b/mandoc.h
index e6b48f11..8e7ddf44 100644
--- a/mandoc.h
+++ b/mandoc.h
@@ -1,4 +1,4 @@
-/* $Id: mandoc.h,v 1.235 2017/06/24 18:58:33 schwarze Exp $ */
+/* $Id: mandoc.h,v 1.236 2017/06/25 17:43:45 schwarze Exp $ */
/*
* Copyright (c) 2010, 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org>
@@ -56,6 +56,7 @@ enum mandocerr {
MANDOCERR_DATE_LEGACY, /* legacy man(7) date format: Dd ... */
MANDOCERR_RCS_REP, /* duplicate RCS id: ... */
+ MANDOCERR_SEC_TYPO, /* typo in section name: Sh ... */
MANDOCERR_MACRO_USELESS, /* useless macro: macro */
MANDOCERR_BX, /* consider using OS macro: macro */
MANDOCERR_ER_ORDER, /* errnos out of order: Er ... */
diff --git a/mdoc_validate.c b/mdoc_validate.c
index da9be6d1..08f23583 100644
--- a/mdoc_validate.c
+++ b/mdoc_validate.c
@@ -1,4 +1,4 @@
-/* $Id: mdoc_validate.c,v 1.342 2017/06/24 18:58:33 schwarze Exp $ */
+/* $Id: mdoc_validate.c,v 1.343 2017/06/25 17:43:45 schwarze Exp $ */
/*
* Copyright (c) 2008-2012 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org>
@@ -60,6 +60,7 @@ static void check_toptext(struct roff_man *, int, int, const char *);
static int child_an(const struct roff_node *);
static size_t macro2len(enum roff_tok);
static void rewrite_macro2len(struct roff_man *, char **);
+static int similar(const char *, const char *);
static void post_an(POST_ARGS);
static void post_an_norm(POST_ARGS);
@@ -2148,11 +2149,54 @@ post_sh_authors(POST_ARGS)
mdoc->last->line, mdoc->last->pos, NULL);
}
+/*
+ * Return an upper bound for the string distance (allowing
+ * transpositions). Not a full Levenshtein implementation
+ * because Levenshtein is quadratic in the string length
+ * and this function is called for every standard name,
+ * so the check for each custom name would be cubic.
+ * The following crude heuristics is linear, resulting
+ * in quadratic behaviour for checking one custom name,
+ * which does not cause measurable slowdown.
+ */
+static int
+similar(const char *s1, const char *s2)
+{
+ const int maxdist = 3;
+ int dist = 0;
+
+ while (s1[0] != '\0' && s2[0] != '\0') {
+ if (s1[0] == s2[0]) {
+ s1++;
+ s2++;
+ continue;
+ }
+ if (++dist > maxdist)
+ return INT_MAX;
+ if (s1[1] == s2[1]) { /* replacement */
+ s1++;
+ s2++;
+ } else if (s1[0] == s2[1] && s1[1] == s2[0]) {
+ s1 += 2; /* transposition */
+ s2 += 2;
+ } else if (s1[0] == s2[1]) /* insertion */
+ s2++;
+ else if (s1[1] == s2[0]) /* deletion */
+ s1++;
+ else
+ return INT_MAX;
+ }
+ dist += strlen(s1) + strlen(s2);
+ return dist > maxdist ? INT_MAX : dist;
+}
+
static void
post_sh_head(POST_ARGS)
{
struct roff_node *nch;
const char *goodsec;
+ const char *const *testsec;
+ int dist, mindist;
enum roff_sec sec;
/*
@@ -2190,8 +2234,25 @@ post_sh_head(POST_ARGS)
/* We don't care about custom sections after this. */
- if (sec == SEC_CUSTOM)
+ if (sec == SEC_CUSTOM) {
+ if ((nch = mdoc->last->child) == NULL ||
+ nch->type != ROFFT_TEXT || nch->next != NULL)
+ return;
+ goodsec = NULL;
+ mindist = INT_MAX;
+ for (testsec = secnames + 1; *testsec != NULL; testsec++) {
+ dist = similar(nch->string, *testsec);
+ if (dist < mindist) {
+ goodsec = *testsec;
+ mindist = dist;
+ }
+ }
+ if (goodsec != NULL)
+ mandoc_vmsg(MANDOCERR_SEC_TYPO, mdoc->parse,
+ nch->line, nch->pos, "Sh %s instead of %s",
+ nch->string, goodsec);
return;
+ }
/*
* Check whether our non-custom section is being repeated or is
diff --git a/read.c b/read.c
index c1bb13a7..f1c14c1d 100644
--- a/read.c
+++ b/read.c
@@ -1,4 +1,4 @@
-/* $Id: read.c,v 1.181 2017/06/24 18:58:33 schwarze Exp $ */
+/* $Id: read.c,v 1.182 2017/06/25 17:43:45 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org>
@@ -98,6 +98,7 @@ static const char * const mandocerrs[MANDOCERR_MAX] = {
"legacy man(7) date format",
"duplicate RCS id",
+ "typo in section name",
"useless macro",
"consider using OS macro",
"errnos out of order",