From e31a1492391aa4d678a400d3a7024f93b4dec47e Mon Sep 17 00:00:00 2001 From: Ingo Schwarze Date: Mon, 27 Oct 2014 13:31:04 +0000 Subject: Fix a regression in term.c rev. 1.229 reported by bentley@: In UTF-8 output, do not print anything if mchars_spec2cp() returns 0. In particular, this repairs handling of zero-width spaces (\&). While here, let mchars_spec2cp() return 0xFFFD instead of -1 if the character is not found, simplifying the using code. In HTML output, do not print obfuscated ASCII characters and do not test for one-char escapes, mchars_spec2cp() already does that. --- chars.c | 4 ++-- html.c | 11 ++++++----- mandocdb.c | 4 ++-- term.c | 12 +++++------- 4 files changed, 15 insertions(+), 16 deletions(-) diff --git a/chars.c b/chars.c index 31d75a6e..8c99d0a0 100644 --- a/chars.c +++ b/chars.c @@ -1,4 +1,4 @@ -/* $Id: chars.c,v 1.61 2014/10/26 18:07:28 schwarze Exp $ */ +/* $Id: chars.c,v 1.62 2014/10/27 13:31:04 schwarze Exp $ */ /* * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons * Copyright (c) 2011, 2014 Ingo Schwarze @@ -104,7 +104,7 @@ mchars_spec2cp(const struct mchars *arg, const char *p, size_t sz) const struct ln *ln; ln = find(arg, p, sz); - return(ln != NULL ? ln->unicode : sz == 1 ? *p : -1); + return(ln != NULL ? ln->unicode : sz == 1 ? *p : 0xFFFD); } char diff --git a/html.c b/html.c index 8d8d1130..71e1127c 100644 --- a/html.c +++ b/html.c @@ -1,4 +1,4 @@ -/* $Id: html.c,v 1.177 2014/10/26 17:12:03 schwarze Exp $ */ +/* $Id: html.c,v 1.178 2014/10/27 13:31:04 schwarze Exp $ */ /* * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons * Copyright (c) 2011, 2012, 2013, 2014 Ingo Schwarze @@ -457,11 +457,12 @@ print_encode(struct html *h, const char *p, int norecurse) break; case ESCAPE_SPECIAL: c = mchars_spec2cp(h->symtab, seq, len); - if (c > 0) + if (c <= 0) + break; + if (c < 0x20 || c > 0x7e) printf("&#%d;", c); - else if (-1 == c && 1 == len && - !print_escape(*seq)) - putchar((int)*seq); + else if ( ! print_escape(c)) + putchar(c); break; case ESCAPE_NOSPACE: if ('\0' == *p) diff --git a/mandocdb.c b/mandocdb.c index d0422420..0f3d7524 100644 --- a/mandocdb.c +++ b/mandocdb.c @@ -1,4 +1,4 @@ -/* $Id: mandocdb.c,v 1.166 2014/10/12 20:32:39 schwarze Exp $ */ +/* $Id: mandocdb.c,v 1.167 2014/10/27 13:31:04 schwarze Exp $ */ /* * Copyright (c) 2011, 2012 Kristaps Dzonsons * Copyright (c) 2011, 2012, 2013, 2014 Ingo Schwarze @@ -1903,7 +1903,7 @@ render_key(struct mchars *mc, struct str *key) */ if (write_utf8) { - if (0 == (u = mchars_spec2cp(mc, seq, len))) + if ((u = mchars_spec2cp(mc, seq, len)) <= 0) continue; cpp = utfbuf; if (0 == (sz = utf8(u, utfbuf))) diff --git a/term.c b/term.c index 017961e0..91166906 100644 --- a/term.c +++ b/term.c @@ -1,4 +1,4 @@ -/* $Id: term.c,v 1.229 2014/10/26 17:12:03 schwarze Exp $ */ +/* $Id: term.c,v 1.230 2014/10/27 13:31:04 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons * Copyright (c) 2010-2014 Ingo Schwarze @@ -468,9 +468,8 @@ term_word(struct termp *p, const char *word) encode(p, cp, ssz); } else { uc = mchars_spec2cp(p->symtab, seq, sz); - if (uc <= 0) - uc = 0xFFFD; - encode1(p, uc); + if (uc > 0) + encode1(p, uc); } break; case ESCAPE_FONTBOLD: @@ -705,9 +704,8 @@ term_strlen(const struct termp *p, const char *cp) } else { c = mchars_spec2cp(p->symtab, seq, ssz); - if (c <= 0) - c = 0xFFFD; - sz += cond_width(p, c, &skip); + if (c > 0) + sz += cond_width(p, c, &skip); } break; case ESCAPE_SKIPCHAR: -- cgit v1.2.3-56-ge451