aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/chars.c
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@openbsd.org>2014-10-26 17:12:03 +0000
committerIngo Schwarze <schwarze@openbsd.org>2014-10-26 17:12:03 +0000
commit5faa62e2445541401f9bee1667d1cd2b2e443e53 (patch)
treefd737f26543e4c9e9e08db9bc3b51103c61736a1 /chars.c
parenteb1d4be7915b314c92a4c377c4a09a06e811fc57 (diff)
downloadmandoc-5faa62e2445541401f9bee1667d1cd2b2e443e53.tar.gz
mandoc-5faa62e2445541401f9bee1667d1cd2b2e443e53.tar.zst
mandoc-5faa62e2445541401f9bee1667d1cd2b2e443e53.zip
Improve -Tascii output for Unicode escape sequences: For the first 512
code points, provide ASCII approximations. This is already much better than what groff does, which prints nothing for most code points. A few minor fixes while here: * Handle Unicode escape sequences in the ASCII range. * In case of errors, use the REPLACEMENT CHARACTER U+FFFD for -Tutf8 and the string "<?>" for -Tascii output. * Handle all one-character escape sequences in mchars_spec2{cp,str}() and remove the workarounds on the higher level.
Diffstat (limited to 'chars.c')
-rw-r--r--chars.c23
1 files changed, 7 insertions, 16 deletions
diff --git a/chars.c b/chars.c
index acea7db6..a7b34b21 100644
--- a/chars.c
+++ b/chars.c
@@ -1,7 +1,7 @@
-/* $Id: chars.c,v 1.59 2014/08/10 23:54:41 schwarze Exp $ */
+/* $Id: chars.c,v 1.60 2014/10/26 17:12:03 schwarze Exp $ */
/*
* Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2011, 2014 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -104,9 +104,7 @@ mchars_spec2cp(const struct mchars *arg, const char *p, size_t sz)
const struct ln *ln;
ln = find(arg, p, sz);
- if (NULL == ln)
- return(-1);
- return(ln->unicode);
+ return(ln != NULL ? ln->unicode : sz == 1 ? *p : -1);
}
char
@@ -126,20 +124,13 @@ mchars_num2uc(const char *p, size_t sz)
int i;
if ((i = mandoc_strntoi(p, sz, 16)) < 0)
- return('\0');
+ return(0xFFFD);
/*
- * Security warning:
- * Never extend the range of accepted characters
- * to overlap with the ASCII range, 0x00-0x7F
- * without re-auditing the callers of this function.
- * Some callers might relay on the fact that we never
- * return ASCII characters for their escaping decisions.
- *
* XXX Code is missing here to exclude bogus ranges.
*/
- return(i > 0x80 && i <= 0x10FFFF ? i : '\0');
+ return(i <= 0x10FFFF ? i : 0xFFFD);
}
const char *
@@ -149,9 +140,9 @@ mchars_spec2str(const struct mchars *arg,
const struct ln *ln;
ln = find(arg, p, sz);
- if (NULL == ln) {
+ if (ln == NULL) {
*rsz = 1;
- return(NULL);
+ return(sz == 1 ? p : NULL);
}
*rsz = strlen(ln->ascii);