From ccdf39d9cb63bb341dbb5c88c9090ac8e05b350c Mon Sep 17 00:00:00 2001 From: Ingo Schwarze Date: Fri, 3 Jun 2022 12:15:55 +0000 Subject: During identifier parsing, handle undefined escape sequences in the same way as groff: * \\ is always reduced to \ * \. is always reduced to . * other undefined escape sequences are usually reduced to the escape name, for example \G to G, except during the expansion of expanding escape sequences having the standard argument form (in particular \* and \n), in which case the backslash is preserved literally. Yes, this is confusing indeed. For example, the following have the same meaning: * .ds \. and .ds . which is not the same as .ds \\. * \*[\.] and \*[.] which is not the same as \*[\\.] * .ds \G and .ds G which is not the same as .ds \\G * \*[\G] and \*[\\G] which is not the same as \*[G] <- sic! To feel less dirty, have a leaning toothpick, if you are so inclined. This patch also slightly improves the string shown by the "escaped character not allowed in a name" error message. --- roff.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 49 insertions(+), 11 deletions(-) (limited to 'roff.c') diff --git a/roff.c b/roff.c index c4b944e3..b78ef59e 100644 --- a/roff.c +++ b/roff.c @@ -1,4 +1,4 @@ -/* $Id: roff.c,v 1.392 2022/06/02 11:29:07 schwarze Exp $ */ +/* $Id: roff.c,v 1.393 2022/06/03 12:15:55 schwarze Exp $ */ /* * Copyright (c) 2010-2015, 2017-2022 Ingo Schwarze * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons @@ -1375,6 +1375,7 @@ roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char ec) int iarg; /* index beginning the argument */ int iendarg; /* index right after the argument */ int iend; /* index right after the sequence */ + int isrc, idst; /* to reduce \\ and \. in names */ int deftype; /* type of definition to paste */ int argi; /* macro argument index */ int quote_args; /* true for \\$@, false for \\$* */ @@ -1428,6 +1429,21 @@ roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char ec) continue; } + /* Reduce \\ and \. in names. */ + + if (buf->buf[inam] == '*' || buf->buf[inam] == 'n') { + isrc = idst = iarg; + while (isrc < iendarg) { + if (isrc + 1 < iendarg && + buf->buf[isrc] == '\\' && + (buf->buf[isrc + 1] == '\\' || + buf->buf[isrc + 1] == '.')) + isrc++; + buf->buf[idst++] = buf->buf[isrc++]; + } + iendarg -= isrc - idst; + } + /* Handle expansion. */ res = NULL; @@ -4002,7 +4018,7 @@ static size_t roff_getname(struct roff *r, char **cpp, int ln, int pos) { char *name, *cp; - size_t namesz; + int namesz, inam, iend; name = *cpp; if (*name == '\0') @@ -4010,24 +4026,46 @@ roff_getname(struct roff *r, char **cpp, int ln, int pos) /* Advance cp to the byte after the end of the name. */ - for (cp = name; 1; cp++) { - namesz = cp - name; + cp = name; + namesz = 0; + for (;;) { if (*cp == '\0') break; if (*cp == ' ' || *cp == '\t') { cp++; break; } - if (*cp != '\\') + if (*cp != '\\') { + if (name + namesz < cp) { + name[namesz] = *cp; + *cp = ' '; + } + namesz++; + cp++; continue; + } if (cp[1] == '{' || cp[1] == '}') break; - if (*++cp == '\\') - continue; - mandoc_msg(MANDOCERR_NAMESC, ln, pos, - "%.*s", (int)(cp - name + 1), name); - mandoc_escape((const char **)&cp, NULL, NULL); - break; + if (roff_escape(cp, 0, 0, NULL, &inam, + NULL, NULL, &iend) != ESCAPE_UNDEF) { + mandoc_msg(MANDOCERR_NAMESC, ln, pos, + "%.*s%.*s", namesz, name, iend, cp); + cp += iend; + break; + } + + /* + * In an identifier, \\, \., \G and so on + * are reduced to \, ., G and so on, + * vaguely similar to copy mode. + */ + + name[namesz++] = cp[inam]; + while (iend--) { + if (cp >= name + namesz) + *cp = ' '; + cp++; + } } /* Read past spaces. */ -- cgit v1.2.3