From 28f89d13b7d6b04d99517c6e31655ddfbd3ea541 Mon Sep 17 00:00:00 2001 From: Kristaps Dzonsons Date: Tue, 20 Jul 2010 14:56:42 +0000 Subject: Strip non-graphable input characters from input. The manuals specifically say that this is not allowed, and were it allowed, output would be inconsistent across output media (-Tps will puke, non-your-charset terminals will puke, etc.). With this done, simplify check_text() to only check escapes and for tabs. Add in a new tab warning, too. --- main.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'main.c') diff --git a/main.c b/main.c index 3324f8a8..66f5df88 100644 --- a/main.c +++ b/main.c @@ -1,4 +1,4 @@ -/* $Id: main.c,v 1.98 2010/07/07 15:04:54 kristaps Exp $ */ +/* $Id: main.c,v 1.99 2010/07/20 14:56:42 kristaps Exp $ */ /* * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons * Copyright (c) 2010 Ingo Schwarze @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -110,6 +111,7 @@ static const char * const mandocerrs[MANDOCERR_MAX] = { "list type must come first", "bad standard", "bad library", + "tab in non-literal context", "bad escape sequence", "unterminated quoted string", "argument requires the width argument", @@ -491,6 +493,26 @@ fdesc(struct curparse *curp) ++lnn; break; } + + /* + * Warn about bogus characters. If you're using + * non-ASCII encoding, you're screwing your + * readers. Since I'd rather this not happen, + * I'll be helpful and drop these characters so + * we don't display gibberish. Note to manual + * writers: use special characters. + */ + + if ( ! isgraph((u_char)blk.buf[i]) && + ! isblank((u_char)blk.buf[i])) { + if ( ! mmsg(MANDOCERR_BADCHAR, curp, + lnn_start, pos, + "ignoring byte")) + goto bailout; + i++; + continue; + } + /* Trailing backslash is like a plain character. */ if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) { if (pos >= (int)ln.sz) -- cgit v1.2.3