From 747bf8062cca375c58d984004718a2351a88daed Mon Sep 17 00:00:00 2001 From: Ingo Schwarze Date: Tue, 28 Oct 2014 13:24:44 +0000 Subject: Tighten Unicode escape name parsing. Accept only 0xXXXX, 0xYXXXX, 0x10XXXX with Y != 0. This simplifies mchars_num2uc(). --- mandoc.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'mandoc.c') diff --git a/mandoc.c b/mandoc.c index e82093b9..2ec179ea 100644 --- a/mandoc.c +++ b/mandoc.c @@ -1,4 +1,4 @@ -/* $Id: mandoc.c,v 1.87 2014/10/13 17:17:45 schwarze Exp $ */ +/* $Id: mandoc.c,v 1.88 2014/10/28 13:24:44 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons * Copyright (c) 2011, 2012, 2013, 2014 Ingo Schwarze @@ -334,13 +334,18 @@ mandoc_escape(const char **end, const char **start, int *sz) if (1 == *sz && 'c' == **start) gly = ESCAPE_NOSPACE; /* - * Unicode escapes are defined in groff as \[uXXXX] + * Unicode escapes are defined in groff as \[u0000] * to \[u10FFFF], where the contained value must be * a valid Unicode codepoint. Here, however, only - * check the length and the validity of all digits. + * check the length and range. */ - else if (*sz > 4 && *sz < 8 && **start == 'u' && - (int)strspn(*start + 1, "0123456789ABCDEFabcdef") + if (**start != 'u' || *sz < 5 || *sz > 7) + break; + if (*sz == 7 && ((*start)[1] != '1' || (*start)[2] != '0')) + break; + if (*sz == 6 && (*start)[1] == '0') + break; + if ((int)strspn(*start + 1, "0123456789ABCDEFabcdef") + 1 == *sz) gly = ESCAPE_UNICODE; break; -- cgit v1.2.3-56-ge451