aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/mdoc_validate.c
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@openbsd.org>2017-06-10 16:54:16 +0000
committerIngo Schwarze <schwarze@openbsd.org>2017-06-10 16:54:16 +0000
commita756448888657802f810973d8bddadebeed29030 (patch)
tree3e1a6ee6fb26b8863d5054c2f33be02393e32b74 /mdoc_validate.c
parentc5a7634a30089d4f3761cd6e1bf3ebd1d85296fd (diff)
downloadmandoc-a756448888657802f810973d8bddadebeed29030.tar.gz
mandoc-a756448888657802f810973d8bddadebeed29030.tar.zst
mandoc-a756448888657802f810973d8bddadebeed29030.zip
Reduce false positives for the "no blank before trailing delimiter" message.
This brings us down to one false positive for about every 18 pages.
Diffstat (limited to 'mdoc_validate.c')
-rw-r--r--mdoc_validate.c79
1 files changed, 76 insertions, 3 deletions
diff --git a/mdoc_validate.c b/mdoc_validate.c
index 002b0c15..c90c0f0d 100644
--- a/mdoc_validate.c
+++ b/mdoc_validate.c
@@ -1,4 +1,4 @@
-/* $Id: mdoc_validate.c,v 1.333 2017/06/10 01:48:53 schwarze Exp $ */
+/* $Id: mdoc_validate.c,v 1.334 2017/06/10 16:54:16 schwarze Exp $ */
/*
* Copyright (c) 2008-2012 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org>
@@ -412,9 +412,17 @@ static void
post_delim(POST_ARGS)
{
const struct roff_node *nch;
- const char *lc;
+ const char *lc, *cp;
+ int nw;
enum mdelim delim;
+ enum roff_tok tok;
+ /*
+ * Find candidates: at least two bytes,
+ * the last one a closing or middle delimiter.
+ */
+
+ tok = mdoc->last->tok;
nch = mdoc->last->last;
if (nch == NULL || nch->type != ROFFT_TEXT)
return;
@@ -424,9 +432,74 @@ post_delim(POST_ARGS)
delim = mdoc_isdelim(lc);
if (delim == DELIM_NONE || delim == DELIM_OPEN)
return;
+
+ /*
+ * Reduce false positives by allowing various cases.
+ */
+
+ /* Escaped delimiters. */
+ if (lc > nch->string + 1 && lc[-2] == '\\' &&
+ (lc[-1] == '&' || lc[-1] == 'e'))
+ return;
+
+ /* Specific byte sequences. */
+ switch (*lc) {
+ case ')':
+ for (cp = lc; cp >= nch->string; cp--)
+ if (*cp == '(')
+ return;
+ break;
+ case '.':
+ if (lc > nch->string + 1 && lc[-2] == '.' && lc[-1] == '.')
+ return;
+ if (lc[-1] == '.')
+ return;
+ break;
+ case ';':
+ if (tok == MDOC_Vt)
+ return;
+ break;
+ case '?':
+ if (lc[-1] == '?')
+ return;
+ break;
+ case ']':
+ for (cp = lc; cp >= nch->string; cp--)
+ if (*cp == '[')
+ return;
+ break;
+ case '|':
+ if (lc == nch->string + 1 && lc[-1] == '|')
+ return;
+ default:
+ break;
+ }
+
+ /* Exactly two non-alphanumeric bytes. */
+ if (lc == nch->string + 1 && !isalnum((unsigned char)lc[-1]))
+ return;
+
+ /* At least three alphabetic words with a sentence ending. */
+ if (strchr("!.:?", *lc) != NULL && (tok == MDOC_Em ||
+ tok == MDOC_Li || tok == MDOC_No || tok == MDOC_Po ||
+ tok == MDOC_Pq || tok == MDOC_Sy)) {
+ nw = 0;
+ for (cp = lc - 1; cp >= nch->string; cp--) {
+ if (*cp == ' ') {
+ nw++;
+ if (cp > nch->string && cp[-1] == ',')
+ cp--;
+ } else if (isalpha((unsigned int)*cp)) {
+ if (nw > 1)
+ return;
+ } else
+ break;
+ }
+ }
+
mandoc_vmsg(MANDOCERR_DELIM, mdoc->parse,
nch->line, nch->pos + (lc - nch->string),
- "%s%s %s", roff_name[mdoc->last->tok],
+ "%s%s %s", roff_name[tok],
nch == mdoc->last->child ? "" : " ...", nch->string);
}