aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/roff.c
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@openbsd.org>2018-12-15 19:30:25 +0000
committerIngo Schwarze <schwarze@openbsd.org>2018-12-15 19:30:25 +0000
commitd672f8cb82878baf3834e938c4c416f4493952cf (patch)
treea46f3c1570ef9a8cdb5d67fd75e3bb77c8e4068f /roff.c
parentbb4f86d616ee99eb82c0875af27b9d8a216ecfdc (diff)
downloadmandoc-d672f8cb82878baf3834e938c4c416f4493952cf.tar.gz
mandoc-d672f8cb82878baf3834e938c4c416f4493952cf.tar.zst
mandoc-d672f8cb82878baf3834e938c4c416f4493952cf.zip
Several improvements to escape sequence handling.
* Add the missing special character \_ (underscore). * Partial implementations of \a (leader character) and \E (uninterpreted escape character). * Parse and ignore \r (reverse line feed). * Add a WARNING message about undefined escape sequences. * Add an UNSUPP message about unsupported escape sequences. * Mark \! and \? (transparent throughput) and \O (suppress output) as unsupported. * Treat the various variants of zero-width spaces as one-byte escape sequences rather than as special characters, to avoid defining bogus forms with square brackets. * For special characters with one-byte names, do not define bogus forms with square brackets, except for \[-], which is valid. * In the form with square brackets, undefined special characters do not fall back to printing the name verbatim, not even for one-byte names. * Starting a special character name with a blank is an error. * Undefined escape sequences never abort formatting of the input string, not even in HTML output mode. * Document the newly handled escapes, and a few that were missing. * Regression tests for most of the above.
Diffstat (limited to 'roff.c')
-rw-r--r--roff.c49
1 files changed, 34 insertions, 15 deletions
diff --git a/roff.c b/roff.c
index a303c86b..96a5b33d 100644
--- a/roff.c
+++ b/roff.c
@@ -1,4 +1,4 @@
-/* $Id: roff.c,v 1.351 2018/12/14 06:33:14 schwarze Exp $ */
+/* $Id: roff.c,v 1.352 2018/12/15 19:30:26 schwarze Exp $ */
/*
* Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010-2015, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org>
@@ -1154,6 +1154,7 @@ roff_res(struct roff *r, struct buf *buf, int ln, int pos)
struct roff_node *n; /* used for header comments */
const char *start; /* start of the string to process */
char *stesc; /* start of an escape sequence ('\\') */
+ const char *esct; /* type of esccape sequence */
char *ep; /* end of comment string */
const char *stnam; /* start of the name, after "[(*" */
const char *cp; /* end of the name, e.g. before ']' */
@@ -1163,7 +1164,6 @@ roff_res(struct roff *r, struct buf *buf, int ln, int pos)
size_t naml; /* actual length of the escape name */
size_t asz; /* length of the replacement */
size_t rsz; /* length of the rest of the string */
- enum mandoc_esc esc; /* type of the escape sequence */
int inaml; /* length returned from mandoc_escape() */
int expand_count; /* to avoid infinite loops */
int npos; /* position in numeric expression */
@@ -1172,6 +1172,7 @@ roff_res(struct roff *r, struct buf *buf, int ln, int pos)
int done; /* no more input available */
int deftype; /* type of definition to paste */
int rcsid; /* kind of RCS id seen */
+ enum mandocerr err; /* for escape sequence problems */
char sign; /* increment number register */
char term; /* character terminating the escape */
@@ -1304,7 +1305,10 @@ roff_res(struct roff *r, struct buf *buf, int ln, int pos)
term = '\0';
cp = stesc + 1;
- switch (*cp) {
+ if (*cp == 'E')
+ cp++;
+ esct = cp;
+ switch (*esct) {
case '*':
case '$':
res = NULL;
@@ -1320,12 +1324,26 @@ roff_res(struct roff *r, struct buf *buf, int ln, int pos)
res = ubuf;
break;
default:
- esc = mandoc_escape(&cp, &stnam, &inaml);
- if (esc == ESCAPE_ERROR ||
- (esc == ESCAPE_SPECIAL &&
- mchars_spec2cp(stnam, inaml) < 0))
- mandoc_msg(MANDOCERR_ESC_BAD,
- ln, (int)(stesc - buf->buf),
+ err = MANDOCERR_OK;
+ switch(mandoc_escape(&cp, &stnam, &inaml)) {
+ case ESCAPE_SPECIAL:
+ if (mchars_spec2cp(stnam, inaml) >= 0)
+ break;
+ /* FALLTHROUGH */
+ case ESCAPE_ERROR:
+ err = MANDOCERR_ESC_BAD;
+ break;
+ case ESCAPE_UNDEF:
+ err = MANDOCERR_ESC_UNDEF;
+ break;
+ case ESCAPE_UNSUPP:
+ err = MANDOCERR_ESC_UNSUPP;
+ break;
+ default:
+ break;
+ }
+ if (err != MANDOCERR_OK)
+ mandoc_msg(err, ln, (int)(stesc - buf->buf),
"%.*s", (int)(cp - stesc), stesc);
stesc--;
continue;
@@ -1382,7 +1400,7 @@ roff_res(struct roff *r, struct buf *buf, int ln, int pos)
cp++;
break;
}
- if (*cp++ != '\\' || stesc[1] != 'w') {
+ if (*cp++ != '\\' || *esct != 'w') {
naml++;
continue;
}
@@ -1390,6 +1408,7 @@ roff_res(struct roff *r, struct buf *buf, int ln, int pos)
case ESCAPE_SPECIAL:
case ESCAPE_UNICODE:
case ESCAPE_NUMBERED:
+ case ESCAPE_UNDEF:
case ESCAPE_OVERSTRIKE:
naml++;
break;
@@ -1403,7 +1422,7 @@ roff_res(struct roff *r, struct buf *buf, int ln, int pos)
* undefined, resume searching for escapes.
*/
- switch (stesc[1]) {
+ switch (*esct) {
case '*':
if (arg_complete) {
deftype = ROFFDEF_USER | ROFFDEF_PRE;
@@ -1430,15 +1449,15 @@ roff_res(struct roff *r, struct buf *buf, int ln, int pos)
break;
}
ctx = r->mstack + r->mstackpos;
- npos = stesc[2] - '1';
+ npos = esct[1] - '1';
if (npos >= 0 && npos <= 8) {
res = npos < ctx->argc ?
ctx->argv[npos] : "";
break;
}
- if (stesc[2] == '*')
+ if (esct[1] == '*')
quote_args = 0;
- else if (stesc[2] == '@')
+ else if (esct[1] == '@')
quote_args = 1;
else {
mandoc_msg(MANDOCERR_ARG_NONUM, ln,
@@ -1500,7 +1519,7 @@ roff_res(struct roff *r, struct buf *buf, int ln, int pos)
}
if (res == NULL) {
- if (stesc[1] == '*')
+ if (*esct == '*')
mandoc_msg(MANDOCERR_STR_UNDEF,
ln, (int)(stesc - buf->buf),
"%.*s", (int)naml, stnam);