From ada69bce757bdb8fff8d778e0660446c440c7613 Mon Sep 17 00:00:00 2001 From: Ingo Schwarze Date: Wed, 29 Oct 2014 00:17:43 +0000 Subject: In terminal output, unify handling of Unicode and numbered character escape sequences just like it was earlier implemented for -Thtml. Do not let control characters other than ASCII 9 (horizontal tab) propagate to the output, even though groff allows them; but that really doesn't look like a great idea. Let mchars_num2char() return int such that we can distinguish invalid \N syntax from \N'0'. This also reduces the danger of signed char issues popping up. --- html.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'html.c') diff --git a/html.c b/html.c index 912c006d..050fefe6 100644 --- a/html.c +++ b/html.c @@ -1,4 +1,4 @@ -/* $Id: html.c,v 1.180 2014/10/28 17:36:19 schwarze Exp $ */ +/* $Id: html.c,v 1.181 2014/10/29 00:17:43 schwarze Exp $ */ /* * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons * Copyright (c) 2011, 2012, 2013, 2014 Ingo Schwarze @@ -422,9 +422,13 @@ print_encode(struct html *h, const char *p, int norecurse) break; case ESCAPE_NUMBERED: c = mchars_num2char(seq, len); + if (c < 0) + continue; break; case ESCAPE_SPECIAL: c = mchars_spec2cp(h->symtab, seq, len); + if (c <= 0) + continue; break; case ESCAPE_NOSPACE: if ('\0' == *p) @@ -433,9 +437,8 @@ print_encode(struct html *h, const char *p, int norecurse) default: continue; } - if (c <= 0) - continue; - if (c < 0x20 || (c > 0x7E && c < 0xA0)) + if ((c < 0x20 && c != 0x09) || + (c > 0x7E && c < 0xA0)) c = 0xFFFD; if (c > 0x7E) printf("&#%d;", c); -- cgit v1.2.3