From 5faa62e2445541401f9bee1667d1cd2b2e443e53 Mon Sep 17 00:00:00 2001 From: Ingo Schwarze Date: Sun, 26 Oct 2014 17:12:03 +0000 Subject: Improve -Tascii output for Unicode escape sequences: For the first 512 code points, provide ASCII approximations. This is already much better than what groff does, which prints nothing for most code points. A few minor fixes while here: * Handle Unicode escape sequences in the ASCII range. * In case of errors, use the REPLACEMENT CHARACTER U+FFFD for -Tutf8 and the string "" for -Tascii output. * Handle all one-character escape sequences in mchars_spec2{cp,str}() and remove the workarounds on the higher level. --- chars.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) (limited to 'chars.c') diff --git a/chars.c b/chars.c index acea7db6..a7b34b21 100644 --- a/chars.c +++ b/chars.c @@ -1,7 +1,7 @@ -/* $Id: chars.c,v 1.59 2014/08/10 23:54:41 schwarze Exp $ */ +/* $Id: chars.c,v 1.60 2014/10/26 17:12:03 schwarze Exp $ */ /* * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons - * Copyright (c) 2011 Ingo Schwarze + * Copyright (c) 2011, 2014 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -104,9 +104,7 @@ mchars_spec2cp(const struct mchars *arg, const char *p, size_t sz) const struct ln *ln; ln = find(arg, p, sz); - if (NULL == ln) - return(-1); - return(ln->unicode); + return(ln != NULL ? ln->unicode : sz == 1 ? *p : -1); } char @@ -126,20 +124,13 @@ mchars_num2uc(const char *p, size_t sz) int i; if ((i = mandoc_strntoi(p, sz, 16)) < 0) - return('\0'); + return(0xFFFD); /* - * Security warning: - * Never extend the range of accepted characters - * to overlap with the ASCII range, 0x00-0x7F - * without re-auditing the callers of this function. - * Some callers might relay on the fact that we never - * return ASCII characters for their escaping decisions. - * * XXX Code is missing here to exclude bogus ranges. */ - return(i > 0x80 && i <= 0x10FFFF ? i : '\0'); + return(i <= 0x10FFFF ? i : 0xFFFD); } const char * @@ -149,9 +140,9 @@ mchars_spec2str(const struct mchars *arg, const struct ln *ln; ln = find(arg, p, sz); - if (NULL == ln) { + if (ln == NULL) { *rsz = 1; - return(NULL); + return(sz == 1 ? p : NULL); } *rsz = strlen(ln->ascii); -- cgit v1.2.3