From ccdf39d9cb63bb341dbb5c88c9090ac8e05b350c Mon Sep 17 00:00:00 2001 From: Ingo Schwarze Date: Fri, 3 Jun 2022 12:15:55 +0000 Subject: During identifier parsing, handle undefined escape sequences in the same way as groff: * \\ is always reduced to \ * \. is always reduced to . * other undefined escape sequences are usually reduced to the escape name, for example \G to G, except during the expansion of expanding escape sequences having the standard argument form (in particular \* and \n), in which case the backslash is preserved literally. Yes, this is confusing indeed. For example, the following have the same meaning: * .ds \. and .ds . which is not the same as .ds \\. * \*[\.] and \*[.] which is not the same as \*[\\.] * .ds \G and .ds G which is not the same as .ds \\G * \*[\G] and \*[\\G] which is not the same as \*[G] <- sic! To feel less dirty, have a leaning toothpick, if you are so inclined. This patch also slightly improves the string shown by the "escaped character not allowed in a name" error message. --- regress/roff/args/man.out_lint | 2 +- regress/roff/args/mdoc.out_lint | 2 +- regress/roff/cond/register.in | 31 +++++++++++++++++-- regress/roff/cond/register.out_ascii | 10 ++++-- regress/roff/cond/string.in | 29 +++++++++++++++-- regress/roff/cond/string.out_ascii | 8 ++++- regress/roff/de/escname.in | 29 +++++++++++++++-- regress/roff/de/escname.out_ascii | 15 +++++++-- regress/roff/de/escname.out_lint | 16 +++++----- regress/roff/ds/Makefile | 7 ++++- regress/roff/nr/escname.in | 8 +++-- regress/roff/nr/escname.out_ascii | 4 +-- regress/roff/nr/escname.out_lint | 7 +++-- regress/roff/string/Makefile | 7 ++++- regress/roff/string/name.in | 14 +++++++-- regress/roff/string/name.out_ascii | 6 +++- regress/roff/string/name.out_lint | 31 ++++++++++--------- roff.c | 60 +++++++++++++++++++++++++++++------- 18 files changed, 225 insertions(+), 61 deletions(-) diff --git a/regress/roff/args/man.out_lint b/regress/roff/args/man.out_lint index 6d6bbd84..5aaebd9e 100644 --- a/regress/roff/args/man.out_lint +++ b/regress/roff/args/man.out_lint @@ -6,4 +6,4 @@ mandoc: man.in:87:26: STYLE: whitespace at end of input line mandoc: man.in:91:27: STYLE: whitespace at end of input line mandoc: man.in:104:5: STYLE: unterminated quoted argument mandoc: man.in:107:9: STYLE: unterminated quoted argument -mandoc: man.in:131:1: ERROR: escaped character not allowed in a name: IB\( +mandoc: man.in:131:1: ERROR: escaped character not allowed in a name: IB\(lq diff --git a/regress/roff/args/mdoc.out_lint b/regress/roff/args/mdoc.out_lint index 4422d754..1691cf2d 100644 --- a/regress/roff/args/mdoc.out_lint +++ b/regress/roff/args/mdoc.out_lint @@ -14,4 +14,4 @@ mandoc: mdoc.in:112:5: STYLE: unterminated quoted argument mandoc: mdoc.in:112:11: STYLE: whitespace at end of input line mandoc: mdoc.in:113:9: STYLE: unterminated quoted argument mandoc: mdoc.in:113:15: STYLE: whitespace at end of input line -mandoc: mdoc.in:121:1: ERROR: escaped character not allowed in a name: Fl\( +mandoc: mdoc.in:121:1: ERROR: escaped character not allowed in a name: Fl\(lq diff --git a/regress/roff/cond/register.in b/regress/roff/cond/register.in index 879c573c..7f137c9c 100644 --- a/regress/roff/cond/register.in +++ b/regress/roff/cond/register.in @@ -1,5 +1,5 @@ -.\" $OpenBSD: register.in,v 1.3 2019/02/06 20:54:28 schwarze Exp $ -.TH REGISTER 1 "February 6, 2019" +.\" $OpenBSD: register.in,v 1.4 2022/06/03 11:50:25 schwarze Exp $ +.TH REGISTER 1 "June 3, 2022" .SH NAME register \- conditional testing whether a register is defined .SH DESCRIPTION @@ -11,10 +11,35 @@ register \- conditional testing whether a register is defined .el OOPS .if !rmyreg OOPS .PP -identifier + identifier: +tab after identifier: .ie rmyreg myreg is defined .el OOPS .PP escape sequence after identifier: .ie rmyreg\(enmyreg is defined .el OOPS +.PP +backslash in name: +.nr \\ 0 +.ie r\\ \e is defined +.el OOPS +.rr \\ +.if r\\ is still defined!? +.PP +dot in name: +.nr . 0 +.ie r. \&. is defined +.el OOPS +.ie r\. \e. is defined +.el OOPS +.rr \. +.if r. is still defined!? +.PP +invalid escape in name: +.nr G 0 +.ie rG G is defined +.el OOPS +.ie r\G \eG is defined +.el OOPS +.rr \G +.if rG is still defined!? diff --git a/regress/roff/cond/register.out_ascii b/regress/roff/cond/register.out_ascii index 928c6138..673b9f12 100644 --- a/regress/roff/cond/register.out_ascii +++ b/regress/roff/cond/register.out_ascii @@ -7,8 +7,14 @@ DDEESSCCRRIIPPTTIIOONN not yet defined now defined - identifier + identifier: myreg is defined + tab after identifier: myreg is defined escape sequence after identifier: -myreg is defined -OpenBSD February 6, 2019 REGISTER(1) + backslash in name: \ is defined + + dot in name: . is defined \. is defined + + invalid escape in name: G is defined \G is defined + +OpenBSD June 3, 2022 REGISTER(1) diff --git a/regress/roff/cond/string.in b/regress/roff/cond/string.in index 273984be..fedb4d81 100644 --- a/regress/roff/cond/string.in +++ b/regress/roff/cond/string.in @@ -1,5 +1,5 @@ -.\" $OpenBSD: string.in,v 1.4 2019/02/06 20:54:28 schwarze Exp $ -.TH STRING 1 "February 6, 2019" +.\" $OpenBSD: string.in,v 1.5 2022/06/03 11:50:25 schwarze Exp $ +.TH STRING 1 "June 3, 2022" .SH NAME string \- conditional testing whether a string is defined .SH DESCRIPTION @@ -40,3 +40,28 @@ identifier and tab: escape sequence after identifier: .ie d mystr\(enmystr is defined .el OOPS +.PP +backslash in name: +.ds \\ value +.ie d \\ \e is defined +.el OOPS +.rm \\ +.if d \\ still defined!? +.PP +dot in name: +.ds . value +.ie d . \&. is defined +.el OOPS +.ie d \. \e. is defined +.el OOPS +.rm . +.if d . still defined!? +.PP +invalid escape in name: +.ds G value +.ie d G G is defined +.el OOPS +.ie d \G \eG is defined +.el OOPS +.rm \G +.if d G still defined!? diff --git a/regress/roff/cond/string.out_ascii b/regress/roff/cond/string.out_ascii index 2d80a903..c67c0c55 100644 --- a/regress/roff/cond/string.out_ascii +++ b/regress/roff/cond/string.out_ascii @@ -19,4 +19,10 @@ DDEESSCCRRIIPPTTIIOONN escape sequence after identifier: -mystr is defined -OpenBSD February 6, 2019 STRING(1) + backslash in name: \ is defined + + dot in name: . is defined \. is defined + + invalid escape in name: G is defined \G is defined + +OpenBSD June 3, 2022 STRING(1) diff --git a/regress/roff/de/escname.in b/regress/roff/de/escname.in index 67d26091..99305eaf 100644 --- a/regress/roff/de/escname.in +++ b/regress/roff/de/escname.in @@ -1,5 +1,5 @@ -.\" $OpenBSD: escname.in,v 1.4 2017/07/04 14:53:27 schwarze Exp $ -.Dd $Mdocdate: July 4 2017 $ +.\" $OpenBSD: escname.in,v 1.5 2022/06/03 11:50:25 schwarze Exp $ +.Dd $Mdocdate: June 3 2022 $ .Dt DE-ESCNAME 1 .Os .Sh NAME @@ -23,10 +23,33 @@ define first = val1 val1 .. .Pp -Values (first, second, first\esecond): +define first\e.second = val_dot +.de first\.second +val_dot +.. +.Pp +define first\eGsecond = val_inval +.de first\Gsecond +val_inval +.. +.Pp +Values: +.Bl -tag -width first_.second -compact +.It first .first +.It second .second +.It first\esecond .first\\second +.It first.second +.first.second +.It first\e.second +.first\.second +.It firstGsecond +.firstGsecond +.It first\eGsecond +.first\Gsecond +.El .Pp Remove all but second: .rm first\\second first\esecond second diff --git a/regress/roff/de/escname.out_ascii b/regress/roff/de/escname.out_ascii index 367b5d0f..e096fd71 100644 --- a/regress/roff/de/escname.out_ascii +++ b/regress/roff/de/escname.out_ascii @@ -12,7 +12,18 @@ DDEESSCCRRIIPPTTIIOONN define first = val1 - Values (first, second, first\second): val1 val2 val3 + define first\.second = val_dot + + define first\Gsecond = val_inval + + Values: + first val1 + second val2 + first\second val3 + first.second val_dot + first\.second val_dot + firstGsecond val_inval + first\Gsecond val_inval Remove all but second: val2 @@ -20,4 +31,4 @@ DDEESSCCRRIIPPTTIIOONN final text -OpenBSD July 4, 2017 OpenBSD +OpenBSD June 3, 2022 OpenBSD diff --git a/regress/roff/de/escname.out_lint b/regress/roff/de/escname.out_lint index a3f9396f..5dba1973 100644 --- a/regress/roff/de/escname.out_lint +++ b/regress/roff/de/escname.out_lint @@ -1,8 +1,10 @@ mandoc: escname.in:22:2: ERROR: escaped character not allowed in a name: first\e -mandoc: escname.in:32:19: ERROR: escaped character not allowed in a name: first\e -mandoc: escname.in:33:2: ERROR: skipping unknown macro: .first -mandoc: escname.in:35:2: ERROR: skipping unknown macro: .first\\second -mandoc: escname.in:38:5: ERROR: skipping excess arguments: .de ... excess arguments -mandoc: escname.in:41:1: ERROR: escaped character not allowed in a name: witharg\( -mandoc: escname.in:43:1: ERROR: escaped character not allowed in a name: de\e -mandoc: escname.in:43:2: WARNING: skipping empty request: de +mandoc: escname.in:32:10: WARNING: undefined escape, printing literally: \G +mandoc: escname.in:51:7: WARNING: undefined escape, printing literally: \G +mandoc: escname.in:55:19: ERROR: escaped character not allowed in a name: first\e +mandoc: escname.in:56:2: ERROR: skipping unknown macro: .first +mandoc: escname.in:58:2: ERROR: skipping unknown macro: .first\second +mandoc: escname.in:61:5: ERROR: skipping excess arguments: .de ... excess arguments +mandoc: escname.in:64:1: ERROR: escaped character not allowed in a name: witharg\(en +mandoc: escname.in:66:1: ERROR: escaped character not allowed in a name: de\e +mandoc: escname.in:66:2: WARNING: skipping empty request: de diff --git a/regress/roff/ds/Makefile b/regress/roff/ds/Makefile index 773105e8..68a8b3bf 100644 --- a/regress/roff/ds/Makefile +++ b/regress/roff/ds/Makefile @@ -1,4 +1,9 @@ -# $OpenBSD: Makefile,v 1.6 2019/02/06 20:54:28 schwarze Exp $ +# $OpenBSD: Makefile,v 1.7 2022/06/03 11:50:25 schwarze Exp $ +# +# This directory is intended for tests of string *definitions*, +# in particular testing the behaviour of the .ds and .as macros. +# Tests of string *expansion* are better placed in the roff/string +# directory. REGRESS_TARGETS = append escname nested quoting tab diff --git a/regress/roff/nr/escname.in b/regress/roff/nr/escname.in index f81627e1..d4625552 100644 --- a/regress/roff/nr/escname.in +++ b/regress/roff/nr/escname.in @@ -1,5 +1,5 @@ -.\" $OpenBSD: escname.in,v 1.3 2017/07/04 14:53:27 schwarze Exp $ -.TH NR-ESCNAME 1 "June 29, 2014" +.\" $OpenBSD: escname.in,v 1.4 2022/06/03 11:50:25 schwarze Exp $ +.TH NR-ESCNAME 1 "June 3, 2022" .SH NAME nr-escname \- escape sequences in register names .SH DESCRIPTION @@ -7,7 +7,9 @@ nr-escname \- escape sequences in register names .nr second 2 .nr first\\second 3 .nr first\esecond 4 -\n[first] \n[second] \n[first\\second] +.nr first\.second 5 +.nr first\Gsecond 6 +\n[first] \n[second] \n[first\\second] \n[first.second] \n[firstGsecond] .PP .rr first\esecond \n[first] \n[second] \n[first\\second] diff --git a/regress/roff/nr/escname.out_ascii b/regress/roff/nr/escname.out_ascii index ca2d50f5..d0301af9 100644 --- a/regress/roff/nr/escname.out_ascii +++ b/regress/roff/nr/escname.out_ascii @@ -4,7 +4,7 @@ NNAAMMEE nr-escname - escape sequences in register names DDEESSCCRRIIPPTTIIOONN - 1 2 3 + 1 2 3 5 6 0 2 3 @@ -12,4 +12,4 @@ DDEESSCCRRIIPPTTIIOONN incomplete: -OpenBSD June 29, 2014 NR-ESCNAME(1) +OpenBSD June 3, 2022 NR-ESCNAME(1) diff --git a/regress/roff/nr/escname.out_lint b/regress/roff/nr/escname.out_lint index a2dabebe..edec17fa 100644 --- a/regress/roff/nr/escname.out_lint +++ b/regress/roff/nr/escname.out_lint @@ -1,4 +1,5 @@ mandoc: escname.in:9:5: ERROR: escaped character not allowed in a name: first\e -mandoc: escname.in:12:5: ERROR: escaped character not allowed in a name: first\e -mandoc: escname.in:18:13: WARNING: invalid escape sequence: \n[second -mandoc: escname.in:18:12: STYLE: whitespace at end of input line +mandoc: escname.in:11:10: WARNING: undefined escape, printing literally: \G +mandoc: escname.in:14:5: ERROR: escaped character not allowed in a name: first\e +mandoc: escname.in:20:13: WARNING: invalid escape sequence: \n[second +mandoc: escname.in:20:12: STYLE: whitespace at end of input line diff --git a/regress/roff/string/Makefile b/regress/roff/string/Makefile index b8885c65..2fa10557 100644 --- a/regress/roff/string/Makefile +++ b/regress/roff/string/Makefile @@ -1,4 +1,9 @@ -# $OpenBSD: Makefile,v 1.6 2014/07/06 19:08:57 schwarze Exp $ +# $OpenBSD: Makefile,v 1.10 2022/06/03 11:50:25 schwarze Exp $ +# +# This directory is intended for tests of string *expansion*, +# in particular testing the behaviour of the \* escape sequence. +# Tests of string *definitions* are better placed in the roff/ds +# directory. REGRESS_TARGETS = dotT escape infinite name std undef zerolength LINT_TARGETS = name std undef diff --git a/regress/roff/string/name.in b/regress/roff/string/name.in index 2948f164..b2612be2 100644 --- a/regress/roff/string/name.in +++ b/regress/roff/string/name.in @@ -1,5 +1,5 @@ -.\" $OpenBSD: name.in,v 1.4 2017/07/04 14:53:27 schwarze Exp $ -.Dd $Mdocdate: July 4 2017 $ +.\" $OpenBSD: name.in,v 1.5 2022/06/03 11:50:25 schwarze Exp $ +.Dd $Mdocdate: June 3 2022 $ .Dt STRING-NAME 1 .Os .Sh NAME @@ -10,7 +10,9 @@ .ds "quot" value of "quot" .ds bs\e value of bs\ee .ds bs\\e value of bs\e\ee +.ds dot. value of dot. .ds bl\ e value of bl\e e +.ds inval\\G value of inval\eG norm: \*[norm] .br norm without closing brace: \*[norm @@ -26,6 +28,10 @@ bs\e\ee: \*[bs\\e] bse: \*[bse] .br bs: \*[bs] +.br +dot.: \*[dot.] +.br +dot\e.: \*[dot\.] .\".br .\"bl\e e: \*[bl\ e] .br @@ -34,3 +40,7 @@ bl e: \*[bl e] ble: \*[ble] .br bl: \*[bl] +.br +inval\e\eG: \*[inval\\G] +.br +inval\eG: \*[inval\G] diff --git a/regress/roff/string/name.out_ascii b/regress/roff/string/name.out_ascii index 325e28aa..38258a72 100644 --- a/regress/roff/string/name.out_ascii +++ b/regress/roff/string/name.out_ascii @@ -11,8 +11,12 @@ DDEESSCCRRIIPPTTIIOONN bs\\e: value of bs\\e bse: bs: + dot.: value of dot. + dot\.: value of dot. bl e: ble: bl: + inval\\G: value of inval\G + inval\G: value of inval\G -OpenBSD July 4, 2017 OpenBSD +OpenBSD June 3, 2022 OpenBSD diff --git a/regress/roff/string/name.out_lint b/regress/roff/string/name.out_lint index 5a6340a0..13283f5d 100644 --- a/regress/roff/string/name.out_lint +++ b/regress/roff/string/name.out_lint @@ -1,16 +1,17 @@ mandoc: name.in:11:5: ERROR: escaped character not allowed in a name: bs\e -mandoc: name.in:13:5: ERROR: escaped character not allowed in a name: bl\ -mandoc: name.in:16:29: WARNING: invalid escape sequence: \*[norm -mandoc: name.in:16:28: STYLE: whitespace at end of input line -mandoc: name.in:18:7: WARNING: undefined string, using "": quot -mandoc: name.in:18:6: STYLE: whitespace at end of input line -mandoc: name.in:26:6: WARNING: undefined string, using "": bse -mandoc: name.in:26:5: STYLE: whitespace at end of input line -mandoc: name.in:28:5: WARNING: undefined string, using "": bs -mandoc: name.in:28:4: STYLE: whitespace at end of input line -mandoc: name.in:32:7: WARNING: undefined string, using "": bl e -mandoc: name.in:32:6: STYLE: whitespace at end of input line -mandoc: name.in:34:6: WARNING: undefined string, using "": ble -mandoc: name.in:34:5: STYLE: whitespace at end of input line -mandoc: name.in:36:5: WARNING: undefined string, using "": bl -mandoc: name.in:36:4: STYLE: whitespace at end of input line +mandoc: name.in:14:5: ERROR: escaped character not allowed in a name: bl\ +mandoc: name.in:18:29: WARNING: invalid escape sequence: \*[norm +mandoc: name.in:18:28: STYLE: whitespace at end of input line +mandoc: name.in:20:7: WARNING: undefined string, using "": quot +mandoc: name.in:20:6: STYLE: whitespace at end of input line +mandoc: name.in:28:6: WARNING: undefined string, using "": bse +mandoc: name.in:28:5: STYLE: whitespace at end of input line +mandoc: name.in:30:5: WARNING: undefined string, using "": bs +mandoc: name.in:30:4: STYLE: whitespace at end of input line +mandoc: name.in:38:7: WARNING: undefined string, using "": bl e +mandoc: name.in:38:6: STYLE: whitespace at end of input line +mandoc: name.in:40:6: WARNING: undefined string, using "": ble +mandoc: name.in:40:5: STYLE: whitespace at end of input line +mandoc: name.in:42:5: WARNING: undefined string, using "": bl +mandoc: name.in:42:4: STYLE: whitespace at end of input line +mandoc: name.in:46:19: WARNING: undefined escape, printing literally: \G diff --git a/roff.c b/roff.c index c4b944e3..b78ef59e 100644 --- a/roff.c +++ b/roff.c @@ -1,4 +1,4 @@ -/* $Id: roff.c,v 1.392 2022/06/02 11:29:07 schwarze Exp $ */ +/* $Id: roff.c,v 1.393 2022/06/03 12:15:55 schwarze Exp $ */ /* * Copyright (c) 2010-2015, 2017-2022 Ingo Schwarze * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons @@ -1375,6 +1375,7 @@ roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char ec) int iarg; /* index beginning the argument */ int iendarg; /* index right after the argument */ int iend; /* index right after the sequence */ + int isrc, idst; /* to reduce \\ and \. in names */ int deftype; /* type of definition to paste */ int argi; /* macro argument index */ int quote_args; /* true for \\$@, false for \\$* */ @@ -1428,6 +1429,21 @@ roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char ec) continue; } + /* Reduce \\ and \. in names. */ + + if (buf->buf[inam] == '*' || buf->buf[inam] == 'n') { + isrc = idst = iarg; + while (isrc < iendarg) { + if (isrc + 1 < iendarg && + buf->buf[isrc] == '\\' && + (buf->buf[isrc + 1] == '\\' || + buf->buf[isrc + 1] == '.')) + isrc++; + buf->buf[idst++] = buf->buf[isrc++]; + } + iendarg -= isrc - idst; + } + /* Handle expansion. */ res = NULL; @@ -4002,7 +4018,7 @@ static size_t roff_getname(struct roff *r, char **cpp, int ln, int pos) { char *name, *cp; - size_t namesz; + int namesz, inam, iend; name = *cpp; if (*name == '\0') @@ -4010,24 +4026,46 @@ roff_getname(struct roff *r, char **cpp, int ln, int pos) /* Advance cp to the byte after the end of the name. */ - for (cp = name; 1; cp++) { - namesz = cp - name; + cp = name; + namesz = 0; + for (;;) { if (*cp == '\0') break; if (*cp == ' ' || *cp == '\t') { cp++; break; } - if (*cp != '\\') + if (*cp != '\\') { + if (name + namesz < cp) { + name[namesz] = *cp; + *cp = ' '; + } + namesz++; + cp++; continue; + } if (cp[1] == '{' || cp[1] == '}') break; - if (*++cp == '\\') - continue; - mandoc_msg(MANDOCERR_NAMESC, ln, pos, - "%.*s", (int)(cp - name + 1), name); - mandoc_escape((const char **)&cp, NULL, NULL); - break; + if (roff_escape(cp, 0, 0, NULL, &inam, + NULL, NULL, &iend) != ESCAPE_UNDEF) { + mandoc_msg(MANDOCERR_NAMESC, ln, pos, + "%.*s%.*s", namesz, name, iend, cp); + cp += iend; + break; + } + + /* + * In an identifier, \\, \., \G and so on + * are reduced to \, ., G and so on, + * vaguely similar to copy mode. + */ + + name[namesz++] = cp[inam]; + while (iend--) { + if (cp >= name + namesz) + *cp = ' '; + cp++; + } } /* Read past spaces. */ -- cgit v1.2.3-56-ge451