From d672f8cb82878baf3834e938c4c416f4493952cf Mon Sep 17 00:00:00 2001 From: Ingo Schwarze Date: Sat, 15 Dec 2018 19:30:25 +0000 Subject: Several improvements to escape sequence handling. * Add the missing special character \_ (underscore). * Partial implementations of \a (leader character) and \E (uninterpreted escape character). * Parse and ignore \r (reverse line feed). * Add a WARNING message about undefined escape sequences. * Add an UNSUPP message about unsupported escape sequences. * Mark \! and \? (transparent throughput) and \O (suppress output) as unsupported. * Treat the various variants of zero-width spaces as one-byte escape sequences rather than as special characters, to avoid defining bogus forms with square brackets. * For special characters with one-byte names, do not define bogus forms with square brackets, except for \[-], which is valid. * In the form with square brackets, undefined special characters do not fall back to printing the name verbatim, not even for one-byte names. * Starting a special character name with a blank is an error. * Undefined escape sequences never abort formatting of the input string, not even in HTML output mode. * Document the newly handled escapes, and a few that were missing. * Regression tests for most of the above. --- chars.c | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) (limited to 'chars.c') diff --git a/chars.c b/chars.c index 3ef273c1..24166dbd 100644 --- a/chars.c +++ b/chars.c @@ -1,7 +1,7 @@ -/* $Id: chars.c,v 1.77 2018/12/14 01:18:25 schwarze Exp $ */ +/* $Id: chars.c,v 1.78 2018/12/15 19:30:26 schwarze Exp $ */ /* * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons - * Copyright (c) 2011, 2014, 2015, 2017 Ingo Schwarze + * Copyright (c) 2011,2014,2015,2017,2018 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -48,21 +48,13 @@ static struct ln lines[] = { { " ", ascii_nbrsp, 0x00a0 }, { "~", ascii_nbrsp, 0x00a0 }, { "0", " ", 0x2002 }, - { "|", "", 0 }, - { "^", "", 0 }, - { "&", "", 0 }, - { ")", "", 0 }, - { "%", "", 0 }, { ":", ascii_break, 0 }, - /* XXX The following three do not really belong here. */ - { "t", "", 0 }, - { "c", "", 0 }, - { "}", "", 0 }, /* Lines. */ { "ba", "|", 0x007c }, { "br", "|", 0x2502 }, { "ul", "_", 0x005f }, + { "_", "_", 0x005f }, { "ru", "_", 0x005f }, { "rn", "-", 0x203e }, { "bb", "|", 0x00a6 }, @@ -465,7 +457,7 @@ mchars_spec2cp(const char *p, size_t sz) end = p + sz; ln = ohash_find(&mchars, ohash_qlookupi(&mchars, p, &end)); - return ln != NULL ? ln->unicode : sz == 1 ? (unsigned char)*p : -1; + return ln != NULL ? ln->unicode : -1; } int @@ -495,10 +487,8 @@ mchars_spec2str(const char *p, size_t sz, size_t *rsz) end = p + sz; ln = ohash_find(&mchars, ohash_qlookupi(&mchars, p, &end)); - if (ln == NULL) { - *rsz = 1; - return sz == 1 ? p : NULL; - } + if (ln == NULL) + return NULL; *rsz = strlen(ln->ascii); return ln->ascii; -- cgit v1.2.3-56-ge451