aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/html.c
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@openbsd.org>2014-07-23 15:00:08 +0000
committerIngo Schwarze <schwarze@openbsd.org>2014-07-23 15:00:08 +0000
commit6f5332923fc94cad0bee91d0c1fa8be521828d5c (patch)
tree2e8849fe31297bf03a63cdfed8e5a75d1c933097 /html.c
parent5958bb58d226401788b8cb09c2a2b93dc28de2d5 (diff)
downloadmandoc-6f5332923fc94cad0bee91d0c1fa8be521828d5c.tar.gz
mandoc-6f5332923fc94cad0bee91d0c1fa8be521828d5c.tar.zst
mandoc-6f5332923fc94cad0bee91d0c1fa8be521828d5c.zip
Security fix:
After decoding numeric (\N) and one-character (\<, \> etc.) character escape sequences, do not forget to HTML-encode the resulting ASCII character. Malicious manuals were able to smuggle XSS content by roff-escaping the HTML-special characters they need. That's a classic bug type in many web applications, actually... :-( Found myself while auditing the HTML formatter for safe output handling.
Diffstat (limited to 'html.c')
-rw-r--r--html.c65
1 files changed, 38 insertions, 27 deletions
diff --git a/html.c b/html.c
index d2b9c3f0..d4783ee0 100644
--- a/html.c
+++ b/html.c
@@ -1,4 +1,4 @@
-/* $Id: html.c,v 1.158 2014/07/22 22:41:35 schwarze Exp $ */
+/* $Id: html.c,v 1.159 2014/07/23 15:00:08 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2011, 2012, 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
@@ -110,6 +110,7 @@ static const char *const roffscales[SCALE_MAX] = {
static void bufncat(struct html *, const char *, size_t);
static void print_ctag(struct html *, enum htmltag);
+static int print_escape(char);
static int print_encode(struct html *, const char *, int);
static void print_metaf(struct html *, enum mandoc_esc);
static void print_attr(struct html *, const char *, const char *);
@@ -324,6 +325,37 @@ html_strlen(const char *cp)
}
static int
+print_escape(char c)
+{
+
+ switch (c) {
+ case '<':
+ printf("&lt;");
+ break;
+ case '>':
+ printf("&gt;");
+ break;
+ case '&':
+ printf("&amp;");
+ break;
+ case '"':
+ printf("&quot;");
+ break;
+ case ASCII_NBRSP:
+ putchar('-');
+ break;
+ case ASCII_HYPH:
+ putchar('-');
+ /* FALLTHROUGH */
+ case ASCII_BREAK:
+ break;
+ default:
+ return(0);
+ }
+ return(1);
+}
+
+static int
print_encode(struct html *h, const char *p, int norecurse)
{
size_t sz;
@@ -350,30 +382,8 @@ print_encode(struct html *h, const char *p, int norecurse)
if ('\0' == *p)
break;
- switch (*p++) {
- case '<':
- printf("&lt;");
- continue;
- case '>':
- printf("&gt;");
- continue;
- case '&':
- printf("&amp;");
- continue;
- case '"':
- printf("&quot;");
- continue;
- case ASCII_NBRSP:
- putchar('-');
+ if (print_escape(*p++))
continue;
- case ASCII_HYPH:
- putchar('-');
- /* FALLTHROUGH */
- case ASCII_BREAK:
- continue;
- default:
- break;
- }
esc = mandoc_escape(&p, &seq, &len);
if (ESCAPE_ERROR == esc)
@@ -408,21 +418,22 @@ print_encode(struct html *h, const char *p, int norecurse)
switch (esc) {
case ESCAPE_UNICODE:
- /* Skip passed "u" header. */
+ /* Skip past "u" header. */
c = mchars_num2uc(seq + 1, len - 1);
if ('\0' != c)
printf("&#x%x;", c);
break;
case ESCAPE_NUMBERED:
c = mchars_num2char(seq, len);
- if ('\0' != c)
+ if ( ! ('\0' == c || print_escape(c)))
putchar(c);
break;
case ESCAPE_SPECIAL:
c = mchars_spec2cp(h->symtab, seq, len);
if (c > 0)
printf("&#%d;", c);
- else if (-1 == c && 1 == len)
+ else if (-1 == c && 1 == len &&
+ !print_escape(*seq))
putchar((int)*seq);
break;
case ESCAPE_NOSPACE: