aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/main.c
diff options
context:
space:
mode:
authorKristaps Dzonsons <kristaps@bsd.lv>2010-07-20 14:56:42 +0000
committerKristaps Dzonsons <kristaps@bsd.lv>2010-07-20 14:56:42 +0000
commit28f89d13b7d6b04d99517c6e31655ddfbd3ea541 (patch)
treeeddf064f205df6769af821586c16ae6b774b25fc /main.c
parentf62e3bc028493beff5eb1eab67d8f60670958e0b (diff)
downloadmandoc-28f89d13b7d6b04d99517c6e31655ddfbd3ea541.tar.gz
mandoc-28f89d13b7d6b04d99517c6e31655ddfbd3ea541.tar.zst
mandoc-28f89d13b7d6b04d99517c6e31655ddfbd3ea541.zip
Strip non-graphable input characters from input. The manuals
specifically say that this is not allowed, and were it allowed, output would be inconsistent across output media (-Tps will puke, non-your-charset terminals will puke, etc.). With this done, simplify check_text() to only check escapes and for tabs. Add in a new tab warning, too.
Diffstat (limited to 'main.c')
-rw-r--r--main.c24
1 files changed, 23 insertions, 1 deletions
diff --git a/main.c b/main.c
index 3324f8a8..66f5df88 100644
--- a/main.c
+++ b/main.c
@@ -1,4 +1,4 @@
-/* $Id: main.c,v 1.98 2010/07/07 15:04:54 kristaps Exp $ */
+/* $Id: main.c,v 1.99 2010/07/20 14:56:42 kristaps Exp $ */
/*
* Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
@@ -23,6 +23,7 @@
#include <sys/stat.h>
#include <assert.h>
+#include <ctype.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdint.h>
@@ -110,6 +111,7 @@ static const char * const mandocerrs[MANDOCERR_MAX] = {
"list type must come first",
"bad standard",
"bad library",
+ "tab in non-literal context",
"bad escape sequence",
"unterminated quoted string",
"argument requires the width argument",
@@ -491,6 +493,26 @@ fdesc(struct curparse *curp)
++lnn;
break;
}
+
+ /*
+ * Warn about bogus characters. If you're using
+ * non-ASCII encoding, you're screwing your
+ * readers. Since I'd rather this not happen,
+ * I'll be helpful and drop these characters so
+ * we don't display gibberish. Note to manual
+ * writers: use special characters.
+ */
+
+ if ( ! isgraph((u_char)blk.buf[i]) &&
+ ! isblank((u_char)blk.buf[i])) {
+ if ( ! mmsg(MANDOCERR_BADCHAR, curp,
+ lnn_start, pos,
+ "ignoring byte"))
+ goto bailout;
+ i++;
+ continue;
+ }
+
/* Trailing backslash is like a plain character. */
if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) {
if (pos >= (int)ln.sz)