From 6f5332923fc94cad0bee91d0c1fa8be521828d5c Mon Sep 17 00:00:00 2001 From: Ingo Schwarze Date: Wed, 23 Jul 2014 15:00:08 +0000 Subject: [PATCH] Security fix: After decoding numeric (\N) and one-character (\<, \> etc.) character escape sequences, do not forget to HTML-encode the resulting ASCII character. Malicious manuals were able to smuggle XSS content by roff-escaping the HTML-special characters they need. That's a classic bug type in many web applications, actually... :-( Found myself while auditing the HTML formatter for safe output handling. --- chars.c | 15 +++++++++++-- html.c | 65 +++++++++++++++++++++++++++++++++------------------------ 2 files changed, 51 insertions(+), 29 deletions(-) diff --git a/chars.c b/chars.c index baa56003..d758d0cc 100644 --- a/chars.c +++ b/chars.c @@ -1,4 +1,4 @@ -/* $Id: chars.c,v 1.57 2014/04/20 16:46:04 schwarze Exp $ */ +/* $Id: chars.c,v 1.58 2014/07/23 15:00:08 schwarze Exp $ */ /* * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons * Copyright (c) 2011 Ingo Schwarze @@ -127,7 +127,18 @@ mchars_num2uc(const char *p, size_t sz) if ((i = mandoc_strntoi(p, sz, 16)) < 0) return('\0'); - /* FIXME: make sure we're not in a bogus range. */ + + /* + * Security warning: + * Never extend the range of accepted characters + * to overlap with the ASCII range, 0x00-0x7F + * without re-auditing the callers of this function. + * Some callers might relay on the fact that we never + * return ASCII characters for their escaping decisions. + * + * XXX Code is missing here to exclude bogus ranges. + */ + return(i > 0x80 && i <= 0x10FFFF ? i : '\0'); } diff --git a/html.c b/html.c index d2b9c3f0..d4783ee0 100644 --- a/html.c +++ b/html.c @@ -1,4 +1,4 @@ -/* $Id: html.c,v 1.158 2014/07/22 22:41:35 schwarze Exp $ */ +/* $Id: html.c,v 1.159 2014/07/23 15:00:08 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons * Copyright (c) 2011, 2012, 2013, 2014 Ingo Schwarze @@ -110,6 +110,7 @@ static const char *const roffscales[SCALE_MAX] = { static void bufncat(struct html *, const char *, size_t); static void print_ctag(struct html *, enum htmltag); +static int print_escape(char); static int print_encode(struct html *, const char *, int); static void print_metaf(struct html *, enum mandoc_esc); static void print_attr(struct html *, const char *, const char *); @@ -323,6 +324,37 @@ html_strlen(const char *cp) return(sz); } +static int +print_escape(char c) +{ + + switch (c) { + case '<': + printf("<"); + break; + case '>': + printf(">"); + break; + case '&': + printf("&"); + break; + case '"': + printf("""); + break; + case ASCII_NBRSP: + putchar('-'); + break; + case ASCII_HYPH: + putchar('-'); + /* FALLTHROUGH */ + case ASCII_BREAK: + break; + default: + return(0); + } + return(1); +} + static int print_encode(struct html *h, const char *p, int norecurse) { @@ -350,30 +382,8 @@ print_encode(struct html *h, const char *p, int norecurse) if ('\0' == *p) break; - switch (*p++) { - case '<': - printf("<"); - continue; - case '>': - printf(">"); - continue; - case '&': - printf("&"); - continue; - case '"': - printf("""); - continue; - case ASCII_NBRSP: - putchar('-'); + if (print_escape(*p++)) continue; - case ASCII_HYPH: - putchar('-'); - /* FALLTHROUGH */ - case ASCII_BREAK: - continue; - default: - break; - } esc = mandoc_escape(&p, &seq, &len); if (ESCAPE_ERROR == esc) @@ -408,21 +418,22 @@ print_encode(struct html *h, const char *p, int norecurse) switch (esc) { case ESCAPE_UNICODE: - /* Skip passed "u" header. */ + /* Skip past "u" header. */ c = mchars_num2uc(seq + 1, len - 1); if ('\0' != c) printf("&#x%x;", c); break; case ESCAPE_NUMBERED: c = mchars_num2char(seq, len); - if ('\0' != c) + if ( ! ('\0' == c || print_escape(c))) putchar(c); break; case ESCAPE_SPECIAL: c = mchars_spec2cp(h->symtab, seq, len); if (c > 0) printf("&#%d;", c); - else if (-1 == c && 1 == len) + else if (-1 == c && 1 == len && + !print_escape(*seq)) putchar((int)*seq); break; case ESCAPE_NOSPACE: -- 2.47.1