aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/mandoc.c
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@openbsd.org>2014-10-13 17:17:45 +0000
committerIngo Schwarze <schwarze@openbsd.org>2014-10-13 17:17:45 +0000
commitccddf0063047104179acf0697799f81a7f19f6ac (patch)
tree08d7cdac16e7c26cb68d1196e2e9ffb24c43c6e5 /mandoc.c
parent2623b4393620b67d1251b6af13d35b5fe9a6be80 (diff)
downloadmandoc-ccddf0063047104179acf0697799f81a7f19f6ac.tar.gz
mandoc-ccddf0063047104179acf0697799f81a7f19f6ac.tar.zst
mandoc-ccddf0063047104179acf0697799f81a7f19f6ac.zip
Stricter syntax checking of Unicode character names:
Require exactly 4, 5 or 6 hex digits and allow nothing else. This avoids mishandling stuff like \[ua] and \C'uA' as Unicode and also fixes underlining in eqn(7) -Thtml output which uses \[ul]. Problem found and semantics suggested by kristaps@.
Diffstat (limited to 'mandoc.c')
-rw-r--r--mandoc.c25
1 files changed, 12 insertions, 13 deletions
diff --git a/mandoc.c b/mandoc.c
index be3e264c..e82093b9 100644
--- a/mandoc.c
+++ b/mandoc.c
@@ -1,4 +1,4 @@
-/* $Id: mandoc.c,v 1.86 2014/08/18 09:11:47 kristaps Exp $ */
+/* $Id: mandoc.c,v 1.87 2014/10/13 17:17:45 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2011, 2012, 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
@@ -79,24 +79,13 @@ mandoc_escape(const char **end, const char **start, int *sz)
break;
case '[':
gly = ESCAPE_SPECIAL;
- /*
- * Unicode escapes are defined in groff as \[uXXXX] to
- * \[u10FFFF], where the contained value must be a valid
- * Unicode codepoint. Here, however, only check whether
- * it's not a zero-width escape.
- */
- if ('u' == (*start)[0] && ']' != (*start)[1])
- gly = ESCAPE_UNICODE;
term = ']';
break;
case 'C':
if ('\'' != **start)
return(ESCAPE_ERROR);
*start = ++*end;
- if ('u' == (*start)[0] && '\'' != (*start)[1])
- gly = ESCAPE_UNICODE;
- else
- gly = ESCAPE_SPECIAL;
+ gly = ESCAPE_SPECIAL;
term = '\'';
break;
@@ -344,6 +333,16 @@ mandoc_escape(const char **end, const char **start, int *sz)
case ESCAPE_SPECIAL:
if (1 == *sz && 'c' == **start)
gly = ESCAPE_NOSPACE;
+ /*
+ * Unicode escapes are defined in groff as \[uXXXX]
+ * to \[u10FFFF], where the contained value must be
+ * a valid Unicode codepoint. Here, however, only
+ * check the length and the validity of all digits.
+ */
+ else if (*sz > 4 && *sz < 8 && **start == 'u' &&
+ (int)strspn(*start + 1, "0123456789ABCDEFabcdef")
+ + 1 == *sz)
+ gly = ESCAPE_UNICODE;
break;
default:
break;