From dead96d50530a782ee738938c2c911a44e97bede Mon Sep 17 00:00:00 2001 From: Ingo Schwarze Date: Sun, 26 Oct 2014 18:07:28 +0000 Subject: In -Tascii mode, provide approximations even for some Unicode escape sequences above codepoint 512 by doing a reverse lookup in the existing mandoc_char(7) character table. Again, groff isn't smart enough to do this and silently discards such escape sequences without printing anything. --- chars.c | 13 ++++++++++++- mandoc.h | 3 ++- mchars_alloc.3 | 15 +++++++++++++-- term_ascii.c | 8 +++++--- 4 files changed, 32 insertions(+), 7 deletions(-) diff --git a/chars.c b/chars.c index a7b34b21..31d75a6e 100644 --- a/chars.c +++ b/chars.c @@ -1,4 +1,4 @@ -/* $Id: chars.c,v 1.60 2014/10/26 17:12:03 schwarze Exp $ */ +/* $Id: chars.c,v 1.61 2014/10/26 18:07:28 schwarze Exp $ */ /* * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons * Copyright (c) 2011, 2014 Ingo Schwarze @@ -149,6 +149,17 @@ mchars_spec2str(const struct mchars *arg, return(ln->ascii); } +const char * +mchars_uc2str(int uc) +{ + int i; + + for (i = 0; i < LINES_MAX; i++) + if (uc == lines[i].unicode) + return(lines[i].ascii); + return(""); +} + static const struct ln * find(const struct mchars *tab, const char *p, size_t sz) { diff --git a/mandoc.h b/mandoc.h index 48ebb655..527b3d8e 100644 --- a/mandoc.h +++ b/mandoc.h @@ -1,4 +1,4 @@ -/* $Id: mandoc.h,v 1.165 2014/10/25 01:03:52 schwarze Exp $ */ +/* $Id: mandoc.h,v 1.166 2014/10/26 18:07:28 schwarze Exp $ */ /* * Copyright (c) 2010, 2011, 2014 Kristaps Dzonsons * Copyright (c) 2010-2014 Ingo Schwarze @@ -426,6 +426,7 @@ enum mandoc_esc mandoc_escape(const char **, const char **, int *); struct mchars *mchars_alloc(void); void mchars_free(struct mchars *); char mchars_num2char(const char *, size_t); +const char *mchars_uc2str(int); int mchars_num2uc(const char *, size_t); int mchars_spec2cp(const struct mchars *, const char *, size_t); diff --git a/mchars_alloc.3 b/mchars_alloc.3 index 8c3f8534..eba81b52 100644 --- a/mchars_alloc.3 +++ b/mchars_alloc.3 @@ -1,4 +1,4 @@ -.\" $Id: mchars_alloc.3,v 1.1 2014/08/05 05:48:56 schwarze Exp $ +.\" $Id: mchars_alloc.3,v 1.2 2014/10/26 18:07:28 schwarze Exp $ .\" .\" Copyright (c) 2014 Ingo Schwarze .\" @@ -14,7 +14,7 @@ .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. .\" -.Dd $Mdocdate: August 5 2014 $ +.Dd $Mdocdate: October 26 2014 $ .Dt MCHARS_ALLOC 3 .Os .Sh NAME @@ -59,6 +59,8 @@ .Fa "size_t sz" .Fa "size_t *rsz" .Fc +.Ft "const char *" +.Fn mchars_uc2str "int codepoint" .Sh DESCRIPTION These functions translate Unicode character numbers and .Xr roff 7 @@ -199,6 +201,14 @@ output module use this function to render and .Ic \eC\(aq Ns Ar name Ns Ic \(aq escape sequences. +.Pp +The function +.Fn mchars_uc2str +performs a reverse lookup of the Unicode +.Fa codepoint +and returns an ASCII string representation, or the string +.Qq +if none is available. .Sh FILES These funtions are implemented in the file .Pa chars.c . @@ -218,6 +228,7 @@ following mandoc versions: .It Fn mchars_num2uc Ta 1.11.3 Ta \(em Ta \(em .It Fn mchars_spec2cp Ta 1.11.2 Ta Fn chars_spec2cp Ta 1.10.5 .It Fn mchars_spec2str Ta 1.11.2 Ta Fn a2ascii Ta 1.5.3 +.It Fn mchars_uc2str Ta 1.13.2 Ta \(em Ta \(em .El .Sh AUTHORS .An Kristaps Dzonsons Aq Mt kristaps@bsd.lv diff --git a/term_ascii.c b/term_ascii.c index 64f9f686..1b3040a0 100644 --- a/term_ascii.c +++ b/term_ascii.c @@ -1,4 +1,4 @@ -/* $Id: term_ascii.c,v 1.34 2014/10/26 17:12:03 schwarze Exp $ */ +/* $Id: term_ascii.c,v 1.35 2014/10/26 18:07:28 schwarze Exp $ */ /* * Copyright (c) 2010, 2011 Kristaps Dzonsons * Copyright (c) 2014 Ingo Schwarze @@ -236,9 +236,11 @@ ascii_uc2str(int uc) "j", "DZ", "D", "dz", "G", "g", "HV", "W", "N", "n", "A", "a", "AE", "ae", "O", "o"}; - if (uc < 0 || (size_t)uc >= sizeof(tab)/sizeof(tab[0])) + if (uc < 0) return(""); - return(tab[uc]); + if ((size_t)uc < sizeof(tab)/sizeof(tab[0])) + return(tab[uc]); + return(mchars_uc2str(uc)); } static size_t -- cgit v1.2.3-56-ge451