aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/html.c
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@openbsd.org>2014-10-27 16:29:06 +0000
committerIngo Schwarze <schwarze@openbsd.org>2014-10-27 16:29:06 +0000
commit581ec294b3151ee015247639a62753ba3748aa44 (patch)
treee1a53e570b1c9cd85d2f5793d731a76287ab879a /html.c
parente31a1492391aa4d678a400d3a7024f93b4dec47e (diff)
downloadmandoc-581ec294b3151ee015247639a62753ba3748aa44.tar.gz
mandoc-581ec294b3151ee015247639a62753ba3748aa44.tar.zst
mandoc-581ec294b3151ee015247639a62753ba3748aa44.zip
Handle output encoding for unicode, numbered and named escape sequences
in one common, safe way instead of three different ways. In particular, * skip NUL, it is used to mean "no output desired" * deny 0x01-0x1F and 0x7F-0x9F, print REPLACEMENT CHARACTER instead * print 0x20-0x7E literally or name-encoded, as required * print characters above 0x9F numerically
Diffstat (limited to 'html.c')
-rw-r--r--html.c34
1 files changed, 11 insertions, 23 deletions
diff --git a/html.c b/html.c
index 71e1127c..20b9b443 100644
--- a/html.c
+++ b/html.c
@@ -1,4 +1,4 @@
-/* $Id: html.c,v 1.178 2014/10/27 13:31:04 schwarze Exp $ */
+/* $Id: html.c,v 1.179 2014/10/27 16:29:06 schwarze Exp $ */
/*
* Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2011, 2012, 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
@@ -437,40 +437,28 @@ print_encode(struct html *h, const char *p, int norecurse)
case ESCAPE_UNICODE:
/* Skip past "u" header. */
c = mchars_num2uc(seq + 1, len - 1);
-
- /*
- * XXX Security warning:
- * For now, forbid Unicode obfuscation of ASCII
- * characters. An audit of the callers is
- * required before this can be removed.
- */
-
- if (c < 0x80)
- c = 0xFFFD;
-
- printf("&#x%x;", c);
break;
case ESCAPE_NUMBERED:
c = mchars_num2char(seq, len);
- if ( ! ('\0' == c || print_escape(c)))
- putchar(c);
break;
case ESCAPE_SPECIAL:
c = mchars_spec2cp(h->symtab, seq, len);
- if (c <= 0)
- break;
- if (c < 0x20 || c > 0x7e)
- printf("&#%d;", c);
- else if ( ! print_escape(c))
- putchar(c);
break;
case ESCAPE_NOSPACE:
if ('\0' == *p)
nospace = 1;
- break;
+ continue;
default:
- break;
+ continue;
}
+ if (c <= 0)
+ continue;
+ if (c < 0x20 || (c > 0x7E && c < 0xA0))
+ c = 0xFFFD;
+ if (c > 0x7E)
+ printf("&#%d;", c);
+ else if ( ! print_escape(c))
+ putchar(c);
}
return(nospace);