From 5faa62e2445541401f9bee1667d1cd2b2e443e53 Mon Sep 17 00:00:00 2001 From: Ingo Schwarze Date: Sun, 26 Oct 2014 17:12:03 +0000 Subject: Improve -Tascii output for Unicode escape sequences: For the first 512 code points, provide ASCII approximations. This is already much better than what groff does, which prints nothing for most code points. A few minor fixes while here: * Handle Unicode escape sequences in the ASCII range. * In case of errors, use the REPLACEMENT CHARACTER U+FFFD for -Tutf8 and the string "" for -Tascii output. * Handle all one-character escape sequences in mchars_spec2{cp,str}() and remove the workarounds on the higher level. --- html.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'html.c') diff --git a/html.c b/html.c index 1830a12e..8d8d1130 100644 --- a/html.c +++ b/html.c @@ -1,4 +1,4 @@ -/* $Id: html.c,v 1.176 2014/10/10 15:26:29 schwarze Exp $ */ +/* $Id: html.c,v 1.177 2014/10/26 17:12:03 schwarze Exp $ */ /* * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons * Copyright (c) 2011, 2012, 2013, 2014 Ingo Schwarze @@ -437,8 +437,18 @@ print_encode(struct html *h, const char *p, int norecurse) case ESCAPE_UNICODE: /* Skip past "u" header. */ c = mchars_num2uc(seq + 1, len - 1); - if ('\0' != c) - printf("&#x%x;", c); + + /* + * XXX Security warning: + * For now, forbid Unicode obfuscation of ASCII + * characters. An audit of the callers is + * required before this can be removed. + */ + + if (c < 0x80) + c = 0xFFFD; + + printf("&#x%x;", c); break; case ESCAPE_NUMBERED: c = mchars_num2char(seq, len); -- cgit v1.2.3