From a46db8d6dd0bb86c956e7c8fc3a7966c89e7d078 Mon Sep 17 00:00:00 2001
From: Ingo Schwarze <schwarze@openbsd.org>
Date: Thu, 30 May 2013 03:52:59 +0000
Subject: [PATCH] Reject non-printable characters found in the input stream
 even when preceded by a backslash; otherwise, the escape sequence would later
 be identified as invalid and the non-printable character would be passed
 through to the output backends, sometimes triggering assertions.

Reported by Mike Small <smallm at panix dot com> on the mdocml discuss list.
---
 read.c | 33 ++++++++++++++++++++++++---------
 1 file changed, 24 insertions(+), 9 deletions(-)

diff --git a/read.c b/read.c
index cf780626..c9107931 100644
--- a/read.c
+++ b/read.c
@@ -1,7 +1,7 @@
-/*	$Id: read.c,v 1.34 2012/11/19 22:30:58 schwarze Exp $ */
+/*	$Id: read.c,v 1.35 2013/05/30 03:52:59 schwarze Exp $ */
 /*
  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2010, 2011, 2012 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2010, 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -328,6 +328,15 @@ mparse_buf_r(struct mparse *curp, struct buf blk, int start)
 				break;
 			}
 
+			/*
+			 * Make sure we have space for at least
+			 * one backslash and one other character
+			 * and the trailing NUL byte.
+			 */
+
+			if (pos + 2 >= (int)ln.sz)
+				resize_buf(&ln, 256);
+
 			/* 
 			 * Warn about bogus characters.  If you're using
 			 * non-ASCII encoding, you're screwing your
@@ -344,8 +353,6 @@ mparse_buf_r(struct mparse *curp, struct buf blk, int start)
 				mandoc_msg(MANDOCERR_BADCHAR, curp,
 						curp->line, pos, NULL);
 				i++;
-				if (pos >= (int)ln.sz)
-					resize_buf(&ln, 256);
 				ln.buf[pos++] = '?';
 				continue;
 			}
@@ -353,8 +360,6 @@ mparse_buf_r(struct mparse *curp, struct buf blk, int start)
 			/* Trailing backslash = a plain char. */
 
 			if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) {
-				if (pos >= (int)ln.sz)
-					resize_buf(&ln, 256);
 				ln.buf[pos++] = blk.buf[i++];
 				continue;
 			}
@@ -396,10 +401,20 @@ mparse_buf_r(struct mparse *curp, struct buf blk, int start)
 				break;
 			}
 
-			/* Some other escape sequence, copy & cont. */
+			/* Catch escaped bogus characters. */
 
-			if (pos + 1 >= (int)ln.sz)
-				resize_buf(&ln, 256);
+			c = (unsigned char) blk.buf[i+1];
+
+			if ( ! (isascii(c) && 
+					(isgraph(c) || isblank(c)))) {
+				mandoc_msg(MANDOCERR_BADCHAR, curp,
+						curp->line, pos, NULL);
+				i += 2;
+				ln.buf[pos++] = '?';
+				continue;
+			}
+
+			/* Some other escape sequence, copy & cont. */
 
 			ln.buf[pos++] = blk.buf[i++];
 			ln.buf[pos++] = blk.buf[i++];
-- 
2.47.1