Security fix:

After decoding numeric (\N) and one-character (\<, \> etc.) character escape sequences, do not forget to HTML-encode the resulting ASCII character. Malicious manuals were able to smuggle XSS content by roff-escaping the HTML-special characters they need. That's a classic bug type in many web applications, actually... :-( Found myself while auditing the HTML formatter for safe output handling.
author: Ingo Schwarze <schwarze@openbsd.org> 2014-07-23 15:00:08 +0000
committer: Ingo Schwarze <schwarze@openbsd.org> 2014-07-23 15:00:08 +0000
commit: 6f5332923fc94cad0bee91d0c1fa8be521828d5c (patch)
tree: 2e8849fe31297bf03a63cdfed8e5a75d1c933097 /html.c
parent: 5958bb58d226401788b8cb09c2a2b93dc28de2d5 (diff)
download: mandoc-6f5332923fc94cad0bee91d0c1fa8be521828d5c.tar.gz
mandoc-6f5332923fc94cad0bee91d0c1fa8be521828d5c.tar.zst
mandoc-6f5332923fc94cad0bee91d0c1fa8be521828d5c.zip
1 files changed, 38 insertions, 27 deletions
diff --git a/html.c b/html.c
index d2b9c3f0..d4783ee0 100644
--- a/html.c
+++ b/html.c
@@ -1,4 +1,4 @@
-/*	$Id: html.c,v 1.158 2014/07/22 22:41:35 schwarze Exp $ */
+/*	$Id: html.c,v 1.159 2014/07/23 15:00:08 schwarze Exp $ */
 /*
  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2011, 2012, 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
@@ -110,6 +110,7 @@ static	const char	*const roffscales[SCALE_MAX] = {
 
 static	void	 bufncat(struct html *, const char *, size_t);
 static	void	 print_ctag(struct html *, enum htmltag);
+static	int	 print_escape(char);
 static	int	 print_encode(struct html *, const char *, int);
 static	void	 print_metaf(struct html *, enum mandoc_esc);
 static	void	 print_attr(struct html *, const char *, const char *);
@@ -324,6 +325,37 @@ html_strlen(const char *cp)
 }
 
 static int
+print_escape(char c)
+{
+
+	switch (c) {
+	case '<':
+		printf("&lt;");
+		break;
+	case '>':
+		printf("&gt;");
+		break;
+	case '&':
+		printf("&amp;");
+		break;
+	case '"':
+		printf("&quot;");
+		break;
+	case ASCII_NBRSP:
+		putchar('-');
+		break;
+	case ASCII_HYPH:
+		putchar('-');
+		/* FALLTHROUGH */
+	case ASCII_BREAK:
+		break;
+	default:
+		return(0);
+	}
+	return(1);
+}
+
+static int
 print_encode(struct html *h, const char *p, int norecurse)
 {
 	size_t		 sz;
@@ -350,30 +382,8 @@ print_encode(struct html *h, const char *p, int norecurse)
 		if ('\0' == *p)
 			break;
 
-		switch (*p++) {
-		case '<':
-			printf("&lt;");
-			continue;
-		case '>':
-			printf("&gt;");
-			continue;
-		case '&':
-			printf("&amp;");
-			continue;
-		case '"':
-			printf("&quot;");
-			continue;
-		case ASCII_NBRSP:
-			putchar('-');
+		if (print_escape(*p++))
 			continue;
-		case ASCII_HYPH:
-			putchar('-');
-			/* FALLTHROUGH */
-		case ASCII_BREAK:
-			continue;
-		default:
-			break;
-		}
 
 		esc = mandoc_escape(&p, &seq, &len);
 		if (ESCAPE_ERROR == esc)
@@ -408,21 +418,22 @@ print_encode(struct html *h, const char *p, int norecurse)
 
 		switch (esc) {
 		case ESCAPE_UNICODE:
-			/* Skip passed "u" header. */
+			/* Skip past "u" header. */
 			c = mchars_num2uc(seq + 1, len - 1);
 			if ('\0' != c)
 				printf("&#x%x;", c);
 			break;
 		case ESCAPE_NUMBERED:
 			c = mchars_num2char(seq, len);
-			if ('\0' != c)
+			if ( ! ('\0' == c || print_escape(c)))
 				putchar(c);
 			break;
 		case ESCAPE_SPECIAL:
 			c = mchars_spec2cp(h->symtab, seq, len);
 			if (c > 0)
 				printf("&#%d;", c);
-			else if (-1 == c && 1 == len)
+			else if (-1 == c && 1 == len &&
+			    !print_escape(*seq))
 				putchar((int)*seq);
 			break;
 		case ESCAPE_NOSPACE:
author	Ingo Schwarze <schwarze@openbsd.org>	2014-07-23 15:00:08 +0000
committer	Ingo Schwarze <schwarze@openbsd.org>	2014-07-23 15:00:08 +0000
commit	6f5332923fc94cad0bee91d0c1fa8be521828d5c (patch)
tree	2e8849fe31297bf03a63cdfed8e5a75d1c933097 /html.c
parent	5958bb58d226401788b8cb09c2a2b93dc28de2d5 (diff)
download	mandoc-6f5332923fc94cad0bee91d0c1fa8be521828d5c.tar.gz mandoc-6f5332923fc94cad0bee91d0c1fa8be521828d5c.tar.zst mandoc-6f5332923fc94cad0bee91d0c1fa8be521828d5c.zip