From 3a43ce11ac1aec9b2c00d7c5f966919b71f5eda7 Mon Sep 17 00:00:00 2001 From: Ingo Schwarze Date: Sun, 5 Jun 2022 13:54:09 +0000 Subject: With the improved escape sequence parser, it becomes easy to also improve diagnostics. Distinguish "incomplete escape sequence", "invalid special character", and "unknown special character" from the generic "invalid escape sequence", also promoting them from WARNING to ERROR because incomplete escape sequences are severe syntax violations and because encountering an invalid or unknown special character makes it likely that part of the document content intended by the authors gets lost. --- mandoc.1 | 64 +++++++++++++++++++++++++--------- mandoc.h | 7 +++- mandoc_msg.c | 5 +++ regress/char/accent/nocombine.out_lint | 4 +-- regress/char/space/invalid.out_lint | 16 ++++----- regress/char/unicode/input.out_lint | 14 ++++---- regress/char/unicode/invalid.out_lint | 18 +++++----- regress/roff/char/badarg.out_lint | 4 +-- regress/roff/esc/B.out_lint | 2 +- regress/roff/esc/ignore.out_lint | 28 +++++++-------- regress/roff/esc/invalid.out_lint | 44 +++++++++++------------ regress/roff/esc/unsupp.out_lint | 4 +-- regress/roff/esc/w.out_lint | 2 +- regress/roff/nr/escname.out_lint | 2 +- regress/roff/string/name.out_lint | 2 +- roff_escape.c | 33 ++++++++++++------ 16 files changed, 150 insertions(+), 99 deletions(-) diff --git a/mandoc.1 b/mandoc.1 index d689fffa..e2cd0b5c 100644 --- a/mandoc.1 +++ b/mandoc.1 @@ -1,4 +1,4 @@ -.\" $Id: mandoc.1,v 1.258 2022/04/28 16:21:09 schwarze Exp $ +.\" $Id: mandoc.1,v 1.259 2022/06/05 13:54:09 schwarze Exp $ .\" .\" Copyright (c) 2012, 2014-2022 Ingo Schwarze .\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons @@ -15,7 +15,7 @@ .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. .\" -.Dd $Mdocdate: April 28 2022 $ +.Dd $Mdocdate: June 5 2022 $ .Dt MANDOC 1 .Os .Sh NAME @@ -1799,21 +1799,9 @@ A new sentence starts in the middle of a text line. Start it on a new input line to help formatters produce correct spacing. .It Sy "invalid escape sequence" .Pq roff -An escape sequence has an invalid opening argument delimiter, lacks the -closing argument delimiter, the argument is of an invalid form, or it is -a character escape sequence with an invalid name. -If the argument is incomplete, -.Ic \e* -and -.Ic \en -expand to an empty string, -.Ic \eB -to the digit -.Sq 0 , -and -.Ic \ew -to the length of the incomplete argument. -All other invalid escape sequences are ignored. +An escape sequence has an invalid opening argument delimiter +or the argument is of an invalid form. +Invalid escape sequences are ignored. .It Sy "undefined escape, printing literally" .Pq roff In an escape sequence, the first character @@ -2285,6 +2273,48 @@ with invalid arguments .El The excess arguments are ignored. .El +.Ss "Errors related to escape sequences" +.Bl -ohang +.It Sy "incomplete escape sequence" +.Pq roff +The end of the input line is encountered +while parsing the argument of an escape sequence. +In this case, +.Ic \e* +and +.Ic \en +expand to an empty string, +.Ic \eB +to the digit +.Sq 0 , +and +.Ic \ew +to the length of the incomplete argument. +All other incomplete escape sequences are ignored. +.It Sy "invalid special character" +.Pq roff +A special character escape sequence is invalid, +for example a Unicode sequence pointing to a surrogate +or beyond the Unicode range, a \e[char...] escape sequence +representing a control character or pointing beyond the +.Vt unsigned char +range, or an invalid variable-length form +of a single-byte character escape sequence, for example writing +.Qq \e[e] +or +.Qq \e[~] +instead of +.Qq \ee +or +.Qq \e~ , +respectively. +The escape sequence is ignored. +.It Sy "unknown special character" +.Pq roff +The name given in a special character escape sequence is not known to +.Nm . +The escape sequence is ignored. +.El .Ss Unsupported features .Bl -ohang .It Sy "input too large" diff --git a/mandoc.h b/mandoc.h index 03e469dc..f50405e9 100644 --- a/mandoc.h +++ b/mandoc.h @@ -1,4 +1,4 @@ -/* $Id: mandoc.h,v 1.277 2022/05/19 15:37:47 schwarze Exp $ */ +/* $Id: mandoc.h,v 1.278 2022/06/05 13:54:09 schwarze Exp $ */ /* * Copyright (c) 2012-2022 Ingo Schwarze * Copyright (c) 2010, 2011, 2014 Kristaps Dzonsons @@ -235,6 +235,11 @@ enum mandocerr { MANDOCERR_ARG_EXCESS, /* skipping excess arguments: macro ... args */ MANDOCERR_DIVZERO, /* divide by zero */ + /* related to escape sequences */ + MANDOCERR_ESC_INCOMPLETE, /* incomplete escape sequence: esc */ + MANDOCERR_ESC_BADCHAR, /* invalid special character: esc */ + MANDOCERR_ESC_UNKCHAR, /* unknown special character: esc */ + MANDOCERR_UNSUPP, /* ===== start of unsupported features ===== */ MANDOCERR_TOOLARGE, /* input too large */ diff --git a/mandoc_msg.c b/mandoc_msg.c index 80a49d1d..566a140e 100644 --- a/mandoc_msg.c +++ b/mandoc_msg.c @@ -236,6 +236,11 @@ static const char *const type_message[MANDOCERR_MAX] = { "skipping excess arguments", "divide by zero", + /* related to escape sequences */ + "incomplete escape sequence", + "invalid special character", + "unknown special character", + "unsupported feature", "input too large", "unsupported control character", diff --git a/regress/char/accent/nocombine.out_lint b/regress/char/accent/nocombine.out_lint index c9de4162..0f7be4d0 100644 --- a/regress/char/accent/nocombine.out_lint +++ b/regress/char/accent/nocombine.out_lint @@ -1,2 +1,2 @@ -mandoc: nocombine.in:8:27: WARNING: invalid escape sequence: \['] -mandoc: nocombine.in:14:27: WARNING: invalid escape sequence: \[`] +mandoc: nocombine.in:8:27: ERROR: invalid special character: \['] +mandoc: nocombine.in:14:27: ERROR: invalid special character: \[`] diff --git a/regress/char/space/invalid.out_lint b/regress/char/space/invalid.out_lint index c05ef38f..4c146853 100644 --- a/regress/char/space/invalid.out_lint +++ b/regress/char/space/invalid.out_lint @@ -1,9 +1,9 @@ mandoc: invalid.in:7:15: WARNING: invalid escape sequence: \[ -mandoc: invalid.in:8:14: WARNING: invalid escape sequence: \[%] -mandoc: invalid.in:9:16: WARNING: invalid escape sequence: \[&] -mandoc: invalid.in:10:12: WARNING: invalid escape sequence: \[:] -mandoc: invalid.in:11:12: WARNING: invalid escape sequence: \[^] -mandoc: invalid.in:12:16: WARNING: invalid escape sequence: \[_] -mandoc: invalid.in:13:11: WARNING: invalid escape sequence: \[|] -mandoc: invalid.in:14:12: WARNING: invalid escape sequence: \[~] -mandoc: invalid.in:15:18: WARNING: invalid escape sequence: \[0] +mandoc: invalid.in:8:14: ERROR: invalid special character: \[%] +mandoc: invalid.in:9:16: ERROR: invalid special character: \[&] +mandoc: invalid.in:10:12: ERROR: invalid special character: \[:] +mandoc: invalid.in:11:12: ERROR: invalid special character: \[^] +mandoc: invalid.in:12:16: ERROR: invalid special character: \[_] +mandoc: invalid.in:13:11: ERROR: invalid special character: \[|] +mandoc: invalid.in:14:12: ERROR: invalid special character: \[~] +mandoc: invalid.in:15:18: ERROR: invalid special character: \[0] diff --git a/regress/char/unicode/input.out_lint b/regress/char/unicode/input.out_lint index 578a7704..fa36f876 100644 --- a/regress/char/unicode/input.out_lint +++ b/regress/char/unicode/input.out_lint @@ -24,11 +24,11 @@ mandoc: input.in:35:19: ERROR: skipping bad character: 0xbf mandoc: input.in:42:25: ERROR: skipping bad character: 0xed mandoc: input.in:42:26: ERROR: skipping bad character: 0xa0 mandoc: input.in:42:27: ERROR: skipping bad character: 0x80 -mandoc: input.in:42:17: WARNING: invalid escape sequence: \[uD800] +mandoc: input.in:42:17: ERROR: invalid special character: \[uD800] mandoc: input.in:43:25: ERROR: skipping bad character: 0xed mandoc: input.in:43:26: ERROR: skipping bad character: 0xbf mandoc: input.in:43:27: ERROR: skipping bad character: 0xbf -mandoc: input.in:43:17: WARNING: invalid escape sequence: \[uDFFF] +mandoc: input.in:43:17: ERROR: invalid special character: \[uDFFF] mandoc: input.in:53:19: ERROR: skipping bad character: 0xf0 mandoc: input.in:53:20: ERROR: skipping bad character: 0x80 mandoc: input.in:53:21: ERROR: skipping bad character: 0x80 @@ -57,25 +57,25 @@ mandoc: input.in:67:31: ERROR: skipping bad character: 0xf4 mandoc: input.in:67:32: ERROR: skipping bad character: 0x90 mandoc: input.in:67:33: ERROR: skipping bad character: 0x80 mandoc: input.in:67:34: ERROR: skipping bad character: 0x80 -mandoc: input.in:67:21: WARNING: invalid escape sequence: \[u110000] +mandoc: input.in:67:21: ERROR: invalid special character: \[u110000] mandoc: input.in:68:31: ERROR: skipping bad character: 0xf4 mandoc: input.in:68:32: ERROR: skipping bad character: 0xbf mandoc: input.in:68:33: ERROR: skipping bad character: 0xbf mandoc: input.in:68:34: ERROR: skipping bad character: 0xbf -mandoc: input.in:68:21: WARNING: invalid escape sequence: \[u13FFFF] +mandoc: input.in:68:21: ERROR: invalid special character: \[u13FFFF] mandoc: input.in:69:31: ERROR: skipping bad character: 0xf5 mandoc: input.in:69:32: ERROR: skipping bad character: 0x80 mandoc: input.in:69:33: ERROR: skipping bad character: 0x80 mandoc: input.in:69:34: ERROR: skipping bad character: 0x80 -mandoc: input.in:69:21: WARNING: invalid escape sequence: \[u140000] +mandoc: input.in:69:21: ERROR: invalid special character: \[u140000] mandoc: input.in:70:31: ERROR: skipping bad character: 0xf7 mandoc: input.in:70:32: ERROR: skipping bad character: 0xbf mandoc: input.in:70:33: ERROR: skipping bad character: 0xbf mandoc: input.in:70:34: ERROR: skipping bad character: 0xbf -mandoc: input.in:70:21: WARNING: invalid escape sequence: \[u1FFFFF] +mandoc: input.in:70:21: ERROR: invalid special character: \[u1FFFFF] mandoc: input.in:71:33: ERROR: skipping bad character: 0xf8 mandoc: input.in:71:34: ERROR: skipping bad character: 0x88 mandoc: input.in:71:35: ERROR: skipping bad character: 0x80 mandoc: input.in:71:36: ERROR: skipping bad character: 0x80 mandoc: input.in:71:37: ERROR: skipping bad character: 0x80 -mandoc: input.in:71:23: WARNING: invalid escape sequence: \[u200000] +mandoc: input.in:71:23: ERROR: invalid special character: \[u200000] diff --git a/regress/char/unicode/invalid.out_lint b/regress/char/unicode/invalid.out_lint index ce9de988..0717605f 100644 --- a/regress/char/unicode/invalid.out_lint +++ b/regress/char/unicode/invalid.out_lint @@ -1,9 +1,9 @@ -mandoc: invalid.in:11:13: WARNING: invalid escape sequence: \[u2B] -mandoc: invalid.in:11:20: WARNING: invalid escape sequence: \[u02B] -mandoc: invalid.in:13:12: WARNING: invalid escape sequence: \[u0002B] -mandoc: invalid.in:13:22: WARNING: invalid escape sequence: \[u00002B] -mandoc: invalid.in:13:33: WARNING: invalid escape sequence: \[u000002B] -mandoc: invalid.in:14:13: WARNING: invalid escape sequence: \[u110000] -mandoc: invalid.in:14:24: WARNING: invalid escape sequence: \[u200000] -mandoc: invalid.in:14:35: WARNING: invalid escape sequence: \[u1000000] -mandoc: invalid.in:15:20: WARNING: invalid escape sequence: \[u1234g] +mandoc: invalid.in:11:13: ERROR: unknown special character: \[u2B] +mandoc: invalid.in:11:20: ERROR: unknown special character: \[u02B] +mandoc: invalid.in:13:12: ERROR: invalid special character: \[u0002B] +mandoc: invalid.in:13:22: ERROR: invalid special character: \[u00002B] +mandoc: invalid.in:13:33: ERROR: unknown special character: \[u000002B] +mandoc: invalid.in:14:13: ERROR: invalid special character: \[u110000] +mandoc: invalid.in:14:24: ERROR: invalid special character: \[u200000] +mandoc: invalid.in:14:35: ERROR: unknown special character: \[u1000000] +mandoc: invalid.in:15:20: ERROR: unknown special character: \[u1234g] diff --git a/regress/roff/char/badarg.out_lint b/regress/roff/char/badarg.out_lint index e07faa3c..dd5c7999 100644 --- a/regress/roff/char/badarg.out_lint +++ b/regress/roff/char/badarg.out_lint @@ -1,6 +1,6 @@ mandoc: badarg.in:6:6: ERROR: argument is not a character: char mandoc: badarg.in:7:7: ERROR: argument is not a character: char \fR myval -mandoc: badarg.in:8:7: WARNING: invalid escape sequence: \[myc] +mandoc: badarg.in:8:7: ERROR: unknown special character: \[myc] mandoc: badarg.in:8:7: ERROR: argument is not a character: char \[myc]x myval mandoc: badarg.in:9:7: ERROR: argument is not a character: char xy myval -mandoc: badarg.in:10:7: WARNING: invalid escape sequence: \[myc] +mandoc: badarg.in:10:7: ERROR: unknown special character: \[myc] diff --git a/regress/roff/esc/B.out_lint b/regress/roff/esc/B.out_lint index f52270a4..fbaded65 100644 --- a/regress/roff/esc/B.out_lint +++ b/regress/roff/esc/B.out_lint @@ -1 +1 @@ -mandoc: B.in:37:23: WARNING: invalid escape sequence: \B'1+1 +mandoc: B.in:37:23: ERROR: incomplete escape sequence: \B'1+1 diff --git a/regress/roff/esc/ignore.out_lint b/regress/roff/esc/ignore.out_lint index 16d64a97..ddbc8d6e 100644 --- a/regress/roff/esc/ignore.out_lint +++ b/regress/roff/esc/ignore.out_lint @@ -1,14 +1,14 @@ -mandoc: ignore.in:7:36: WARNING: invalid escape sequence: \[%] -mandoc: ignore.in:8:35: WARNING: invalid escape sequence: \[&] -mandoc: ignore.in:9:51: WARNING: invalid escape sequence: \[)] -mandoc: ignore.in:10:37: WARNING: invalid escape sequence: \[,] -mandoc: ignore.in:11:38: WARNING: invalid escape sequence: \[/] -mandoc: ignore.in:12:28: WARNING: invalid escape sequence: \[^] -mandoc: ignore.in:13:17: WARNING: invalid escape sequence: \[a] -mandoc: ignore.in:14:25: WARNING: invalid escape sequence: \[d] -mandoc: ignore.in:15:25: WARNING: invalid escape sequence: \[t] -mandoc: ignore.in:16:33: WARNING: invalid escape sequence: \[u] -mandoc: ignore.in:17:20: WARNING: invalid escape sequence: \[{] -mandoc: ignore.in:18:25: WARNING: invalid escape sequence: \[|] -mandoc: ignore.in:19:20: WARNING: invalid escape sequence: \[}] -mandoc: ignore.in:23:56: WARNING: invalid escape sequence: \s- +mandoc: ignore.in:7:36: ERROR: invalid special character: \[%] +mandoc: ignore.in:8:35: ERROR: invalid special character: \[&] +mandoc: ignore.in:9:51: ERROR: invalid special character: \[)] +mandoc: ignore.in:10:37: ERROR: invalid special character: \[,] +mandoc: ignore.in:11:38: ERROR: invalid special character: \[/] +mandoc: ignore.in:12:28: ERROR: invalid special character: \[^] +mandoc: ignore.in:13:17: ERROR: invalid special character: \[a] +mandoc: ignore.in:14:25: ERROR: invalid special character: \[d] +mandoc: ignore.in:15:25: ERROR: invalid special character: \[t] +mandoc: ignore.in:16:33: ERROR: invalid special character: \[u] +mandoc: ignore.in:17:20: ERROR: invalid special character: \[{] +mandoc: ignore.in:18:25: ERROR: invalid special character: \[|] +mandoc: ignore.in:19:20: ERROR: invalid special character: \[}] +mandoc: ignore.in:23:56: ERROR: incomplete escape sequence: \s- diff --git a/regress/roff/esc/invalid.out_lint b/regress/roff/esc/invalid.out_lint index baef3176..ff52893f 100644 --- a/regress/roff/esc/invalid.out_lint +++ b/regress/roff/esc/invalid.out_lint @@ -1,43 +1,43 @@ mandoc: invalid.in:7:8: WARNING: undefined escape, printing literally: \+ -mandoc: invalid.in:7:11: WARNING: invalid escape sequence: \[+] +mandoc: invalid.in:7:11: ERROR: invalid special character: \[+] mandoc: invalid.in:8:13: WARNING: undefined escape, printing literally: \; -mandoc: invalid.in:8:16: WARNING: invalid escape sequence: \[;] +mandoc: invalid.in:8:16: ERROR: invalid special character: \[;] mandoc: invalid.in:9:13: WARNING: undefined escape, printing literally: \< -mandoc: invalid.in:9:16: WARNING: invalid escape sequence: \[<] +mandoc: invalid.in:9:16: ERROR: invalid special character: \[<] mandoc: invalid.in:10:12: WARNING: undefined escape, printing literally: \= -mandoc: invalid.in:10:15: WARNING: invalid escape sequence: \[=] +mandoc: invalid.in:10:15: ERROR: invalid special character: \[=] mandoc: invalid.in:11:16: WARNING: undefined escape, printing literally: \> -mandoc: invalid.in:11:19: WARNING: invalid escape sequence: \[>] +mandoc: invalid.in:11:19: ERROR: invalid special character: \[>] mandoc: invalid.in:12:6: WARNING: undefined escape, printing literally: \@ -mandoc: invalid.in:12:9: WARNING: invalid escape sequence: \[@] +mandoc: invalid.in:12:9: ERROR: invalid special character: \[@] mandoc: invalid.in:13:18: WARNING: undefined escape, printing literally: \] -mandoc: invalid.in:14:16: WARNING: invalid escape sequence: \[{] -mandoc: invalid.in:14:21: WARNING: invalid escape sequence: \[}] +mandoc: invalid.in:14:16: ERROR: invalid special character: \[{] +mandoc: invalid.in:14:21: ERROR: invalid special character: \[}] mandoc: invalid.in:15:9: WARNING: undefined escape, printing literally: \1 -mandoc: invalid.in:15:12: WARNING: invalid escape sequence: \[1] +mandoc: invalid.in:15:12: ERROR: invalid special character: \[1] mandoc: invalid.in:16:5: WARNING: undefined escape, printing literally: \G -mandoc: invalid.in:16:8: WARNING: invalid escape sequence: \[G] +mandoc: invalid.in:16:8: ERROR: invalid special character: \[G] mandoc: invalid.in:17:5: WARNING: undefined escape, printing literally: \I -mandoc: invalid.in:17:8: WARNING: invalid escape sequence: \[I] +mandoc: invalid.in:17:8: ERROR: invalid special character: \[I] mandoc: invalid.in:18:5: WARNING: undefined escape, printing literally: \i -mandoc: invalid.in:18:8: WARNING: invalid escape sequence: \[i] +mandoc: invalid.in:18:8: ERROR: invalid special character: \[i] mandoc: invalid.in:19:5: WARNING: undefined escape, printing literally: \J -mandoc: invalid.in:19:8: WARNING: invalid escape sequence: \[J] +mandoc: invalid.in:19:8: ERROR: invalid special character: \[J] mandoc: invalid.in:20:5: WARNING: undefined escape, printing literally: \j -mandoc: invalid.in:20:8: WARNING: invalid escape sequence: \[j] +mandoc: invalid.in:20:8: ERROR: invalid special character: \[j] mandoc: invalid.in:21:5: WARNING: undefined escape, printing literally: \K -mandoc: invalid.in:21:8: WARNING: invalid escape sequence: \[K] +mandoc: invalid.in:21:8: ERROR: invalid special character: \[K] mandoc: invalid.in:22:5: WARNING: undefined escape, printing literally: \P -mandoc: invalid.in:22:8: WARNING: invalid escape sequence: \[P] +mandoc: invalid.in:22:8: ERROR: invalid special character: \[P] mandoc: invalid.in:23:5: WARNING: undefined escape, printing literally: \Q -mandoc: invalid.in:23:8: WARNING: invalid escape sequence: \[Q] +mandoc: invalid.in:23:8: ERROR: invalid special character: \[Q] mandoc: invalid.in:24:5: WARNING: undefined escape, printing literally: \q -mandoc: invalid.in:24:8: WARNING: invalid escape sequence: \[q] +mandoc: invalid.in:24:8: ERROR: invalid special character: \[q] mandoc: invalid.in:25:5: WARNING: undefined escape, printing literally: \T -mandoc: invalid.in:25:8: WARNING: invalid escape sequence: \[T] +mandoc: invalid.in:25:8: ERROR: invalid special character: \[T] mandoc: invalid.in:26:5: WARNING: undefined escape, printing literally: \U -mandoc: invalid.in:26:8: WARNING: invalid escape sequence: \[U] +mandoc: invalid.in:26:8: ERROR: invalid special character: \[U] mandoc: invalid.in:27:5: WARNING: undefined escape, printing literally: \W -mandoc: invalid.in:27:8: WARNING: invalid escape sequence: \[W] +mandoc: invalid.in:27:8: ERROR: invalid special character: \[W] mandoc: invalid.in:28:5: WARNING: undefined escape, printing literally: \y -mandoc: invalid.in:28:8: WARNING: invalid escape sequence: \[y] +mandoc: invalid.in:28:8: ERROR: invalid special character: \[y] diff --git a/regress/roff/esc/unsupp.out_lint b/regress/roff/esc/unsupp.out_lint index fae97c81..db7631c6 100644 --- a/regress/roff/esc/unsupp.out_lint +++ b/regress/roff/esc/unsupp.out_lint @@ -1,5 +1,5 @@ mandoc: unsupp.in:7:20: UNSUPP: unsupported escape sequence: \! -mandoc: unsupp.in:7:23: WARNING: invalid escape sequence: \[!] +mandoc: unsupp.in:7:23: ERROR: invalid special character: \[!] mandoc: unsupp.in:8:17: UNSUPP: unsupported escape sequence: \? mandoc: unsupp.in:8:21: UNSUPP: unsupported escape sequence: \? -mandoc: unsupp.in:8:24: WARNING: invalid escape sequence: \[?] +mandoc: unsupp.in:8:24: ERROR: invalid special character: \[?] diff --git a/regress/roff/esc/w.out_lint b/regress/roff/esc/w.out_lint index d48495c2..11dfbef5 100644 --- a/regress/roff/esc/w.out_lint +++ b/regress/roff/esc/w.out_lint @@ -1 +1 @@ -mandoc: w.in:17:15: WARNING: invalid escape sequence: \w'foo +mandoc: w.in:17:15: ERROR: incomplete escape sequence: \w'foo diff --git a/regress/roff/nr/escname.out_lint b/regress/roff/nr/escname.out_lint index edec17fa..8655da96 100644 --- a/regress/roff/nr/escname.out_lint +++ b/regress/roff/nr/escname.out_lint @@ -1,5 +1,5 @@ mandoc: escname.in:9:5: ERROR: escaped character not allowed in a name: first\e mandoc: escname.in:11:10: WARNING: undefined escape, printing literally: \G mandoc: escname.in:14:5: ERROR: escaped character not allowed in a name: first\e -mandoc: escname.in:20:13: WARNING: invalid escape sequence: \n[second +mandoc: escname.in:20:13: ERROR: incomplete escape sequence: \n[second mandoc: escname.in:20:12: STYLE: whitespace at end of input line diff --git a/regress/roff/string/name.out_lint b/regress/roff/string/name.out_lint index 13283f5d..55f1f87b 100644 --- a/regress/roff/string/name.out_lint +++ b/regress/roff/string/name.out_lint @@ -1,6 +1,6 @@ mandoc: name.in:11:5: ERROR: escaped character not allowed in a name: bs\e mandoc: name.in:14:5: ERROR: escaped character not allowed in a name: bl\ -mandoc: name.in:18:29: WARNING: invalid escape sequence: \*[norm +mandoc: name.in:18:29: ERROR: incomplete escape sequence: \*[norm mandoc: name.in:18:28: STYLE: whitespace at end of input line mandoc: name.in:20:7: WARNING: undefined string, using "": quot mandoc: name.in:20:6: STYLE: whitespace at end of input line diff --git a/roff_escape.c b/roff_escape.c index 8145a9dd..777d753b 100644 --- a/roff_escape.c +++ b/roff_escape.c @@ -310,13 +310,12 @@ roff_escape(const char *buf, const int ln, const int aesc, iendarg = iarg; while (maxl > 0) { if (buf[iendarg] == '\0') { + err = MANDOCERR_ESC_INCOMPLETE; + if (rval != ESCAPE_EXPAND) + rval = ESCAPE_ERROR; /* Ignore an incomplete argument except for \w. */ if (buf[inam] != 'w') iendarg = iarg; - if (rval == ESCAPE_EXPAND) - err = MANDOCERR_ESC_BAD; - else - rval = ESCAPE_ERROR; break; } if (buf[iendarg] == term) { @@ -401,6 +400,7 @@ roff_escape(const char *buf, const int ln, const int aesc, */ if (term != '\0' && argl == 1 && buf[iarg] != '-') { + err = MANDOCERR_ESC_BADCHAR; rval = ESCAPE_ERROR; break; } @@ -416,8 +416,10 @@ roff_escape(const char *buf, const int ln, const int aesc, c = 0; for (i = iarg; i < iendarg; i++) c = 10 * c + (buf[i] - '0'); - if (c < 0x21 || (c > 0x7e && c < 0xa0) || c > 0xff) + if (c < 0x21 || (c > 0x7e && c < 0xa0) || c > 0xff) { + err = MANDOCERR_ESC_BADCHAR; break; + } iarg += 4; rval = ESCAPE_NUMBERED; break; @@ -433,13 +435,19 @@ roff_escape(const char *buf, const int ln, const int aesc, if (buf[iarg] != 'u' || argl < 5 || argl > 7) break; if (argl == 7 && - (buf[iarg + 1] != '1' || buf[iarg + 2] != '0')) + (buf[iarg + 1] != '1' || buf[iarg + 2] != '0')) { + err = MANDOCERR_ESC_BADCHAR; break; - if (argl == 6 && buf[iarg + 1] == '0') + } + if (argl == 6 && buf[iarg + 1] == '0') { + err = MANDOCERR_ESC_BADCHAR; break; + } if (argl == 5 && buf[iarg + 1] == 'D' && - strchr("89ABCDEF", buf[iarg + 2]) != NULL) + strchr("89ABCDEF", buf[iarg + 2]) != NULL) { + err = MANDOCERR_ESC_BADCHAR; break; + } if ((int)strspn(buf + iarg + 1, "0123456789ABCDEFabcdef") + 1 == argl) rval = ESCAPE_UNICODE; @@ -477,7 +485,8 @@ out: *resc = iesc; switch (rval) { case ESCAPE_ERROR: - err = MANDOCERR_ESC_BAD; + if (err == MANDOCERR_OK) + err = MANDOCERR_ESC_BAD; break; case ESCAPE_UNSUPP: err = MANDOCERR_ESC_UNSUPP; @@ -487,8 +496,10 @@ out: err = MANDOCERR_ESC_UNDEF; break; case ESCAPE_SPECIAL: - if (mchars_spec2cp(buf + iarg, argl) < 0) - err = MANDOCERR_ESC_BAD; + if (mchars_spec2cp(buf + iarg, argl) >= 0) + err = MANDOCERR_OK; + else if (err == MANDOCERR_OK) + err = MANDOCERR_ESC_UNKCHAR; break; default: break; -- cgit v1.2.3-56-ge451