-/* $OpenBSD$ */
+/* $Id: roff_escape.c,v 1.14 2022/06/08 13:23:57 schwarze Exp $ */
/*
* Copyright (c) 2011, 2012, 2013, 2014, 2015, 2017, 2018, 2020, 2022
* Ingo Schwarze <schwarze@openbsd.org>
* sequence are returned in *resc ... *rend.
* Otherwise, *resc is set to aesc and the positions of the escape
* sequence starting at aesc are returned.
- * Diagnostic messages are generated if and only if resc != NULL,
+ * Diagnostic messages are generated if and only if ln != 0,
* that is, if and only if called by roff_expand().
*/
enum mandoc_esc
int iendarg; /* index right after the argument */
int iend; /* index right after the sequence */
int sesc, snam, sarg, sendarg, send; /* for sub-escape */
+ int escterm; /* whether term is escaped */
int maxl; /* expected length of the argument */
int argl; /* actual length of the argument */
int c, i; /* for \[char...] parsing */
int valid_A; /* for \A parsing */
enum mandoc_esc rval; /* return value */
+ enum mandoc_esc stype; /* for sub-escape */
enum mandocerr err; /* diagnostic code */
char term; /* byte terminating the argument */
term = '\b';
break;
case 'C':
- if (buf[iarg] != '\'') {
- rval = ESCAPE_ERROR;
- goto out;
- }
rval = ESCAPE_SPECIAL;
term = '\b';
break;
/* Decide how to end the argument. */
+ escterm = 0;
+ stype = ESCAPE_EXPAND;
if ((term == '\b' || (term == '\0' && maxl == INT_MAX)) &&
- buf[iarg] == buf[iesc] && roff_escape(buf, ln, iendarg,
- &sesc, &snam, &sarg, &sendarg, &send) == ESCAPE_EXPAND)
- goto out_sub;
+ buf[iarg] == buf[iesc]) {
+ stype = roff_escape(buf, ln, iendarg,
+ &sesc, &snam, &sarg, &sendarg, &send);
+ if (stype == ESCAPE_EXPAND)
+ goto out_sub;
+ }
if (term == '\b') {
- if ((buf[inam] == 'N' && isdigit((unsigned char)buf[iarg])) ||
- (buf[inam] == 'h' && strchr(" %&()*+-./0123456789:<=>",
- buf[iarg]) != NULL)) {
- iendarg = iend = iarg + 1;
- rval = ESCAPE_ERROR;
- goto out;
+ if (stype == ESCAPE_UNDEF)
+ iarg++;
+ if (stype != ESCAPE_EXPAND && stype != ESCAPE_UNDEF) {
+ if (strchr("BHLRSNhlvx", buf[inam]) != NULL &&
+ strchr(" ,.0DLOXYZ^abdhlortuvx|~",
+ buf[snam]) != NULL) {
+ err = MANDOCERR_ESC_DELIM;
+ iend = send;
+ iarg = iendarg = sesc;
+ goto out;
+ }
+ escterm = 1;
+ iarg = send;
+ term = buf[snam];
+ } else if (strchr("BDHLRSvxNhl", buf[inam]) != NULL &&
+ strchr(" %&()*+-./0123456789:<=>", buf[iarg]) != NULL) {
+ err = MANDOCERR_ESC_DELIM;
+ if (rval != ESCAPE_EXPAND)
+ rval = ESCAPE_ERROR;
+ if (buf[inam] != 'D') {
+ iendarg = iend = iarg + 1;
+ goto out;
+ }
}
- term = buf[iarg++];
+ if (term == '\b')
+ term = buf[iarg++];
} else if (term == '\0' && maxl == INT_MAX) {
if (buf[inam] == 'n' && (buf[iarg] == '+' || buf[iarg] == '-'))
iarg++;
case '[':
if (buf[++iarg] == ' ') {
iendarg = iend = iarg + 1;
+ err = MANDOCERR_ESC_ARG;
rval = ESCAPE_ERROR;
goto out;
}
iendarg = iarg;
while (maxl > 0) {
if (buf[iendarg] == '\0') {
- /* Ignore an incomplete argument except for \w. */
- if (buf[inam] != 'w')
- iendarg = iarg;
- if (rval == ESCAPE_EXPAND)
- err = MANDOCERR_ESC_BAD;
- else
+ err = MANDOCERR_ESC_INCOMPLETE;
+ if (rval != ESCAPE_EXPAND &&
+ rval != ESCAPE_OVERSTRIKE)
rval = ESCAPE_ERROR;
+ /* Usually, ignore an incomplete argument. */
+ if (strchr("Aow", buf[inam]) == NULL)
+ iendarg = iarg;
break;
}
- if (buf[iendarg] == term) {
- iend = iendarg + 1;
- break;
- }
- if (buf[inam] == 'N' &&
- isdigit((unsigned char)buf[iendarg]) == 0) {
+ if (escterm == 0 && buf[iendarg] == term) {
iend = iendarg + 1;
break;
}
if (buf[iendarg] == buf[iesc]) {
- switch (roff_escape(buf, ln, iendarg,
- &sesc, &snam, &sarg, &sendarg, &send)) {
- case ESCAPE_EXPAND:
+ stype = roff_escape(buf, ln, iendarg,
+ &sesc, &snam, &sarg, &sendarg, &send);
+ if (stype == ESCAPE_EXPAND)
goto out_sub;
- case ESCAPE_UNDEF:
+ iend = send;
+ if (escterm == 1 &&
+ (buf[snam] == term || buf[inam] == 'N'))
break;
- default:
+ if (stype != ESCAPE_UNDEF)
valid_A = 0;
- break;
- }
- iendarg = iend = send;
+ iendarg = send;
+ } else if (buf[inam] == 'N' &&
+ isdigit((unsigned char)buf[iendarg]) == 0) {
+ iend = iendarg + 1;
+ break;
} else {
if (buf[iendarg] == ' ' || buf[iendarg] == '\t')
valid_A = 0;
case '2':
case '3':
case '4':
- rval = argl == 1 ? ESCAPE_IGNORE : ESCAPE_ERROR;
+ if (argl == 1)
+ rval = ESCAPE_IGNORE;
+ else {
+ err = MANDOCERR_ESC_ARG;
+ rval = ESCAPE_ERROR;
+ }
break;
case '5':
- rval = buf[iarg - 1] == '[' ? ESCAPE_UNSUPP :
- ESCAPE_ERROR;
+ if (buf[iarg - 1] == '[')
+ rval = ESCAPE_UNSUPP;
+ else {
+ err = MANDOCERR_ESC_ARG;
+ rval = ESCAPE_ERROR;
+ }
break;
default:
+ err = MANDOCERR_ESC_ARG;
rval = ESCAPE_ERROR;
break;
}
switch (rval) {
case ESCAPE_FONT:
rval = mandoc_font(buf + iarg, argl);
+ if (rval == ESCAPE_ERROR)
+ err = MANDOCERR_ESC_ARG;
break;
case ESCAPE_SPECIAL:
+ if (argl == 0) {
+ err = MANDOCERR_ESC_BADCHAR;
+ rval = ESCAPE_ERROR;
+ break;
+ }
/*
* The file chars.c only provides one common list of
*/
if (term != '\0' && argl == 1 && buf[iarg] != '-') {
+ err = MANDOCERR_ESC_BADCHAR;
rval = ESCAPE_ERROR;
break;
}
c = 0;
for (i = iarg; i < iendarg; i++)
c = 10 * c + (buf[i] - '0');
- if (c < 0x21 || (c > 0x7e && c < 0xa0) || c > 0xff)
+ if (c < 0x21 || (c > 0x7e && c < 0xa0) || c > 0xff) {
+ err = MANDOCERR_ESC_BADCHAR;
break;
+ }
iarg += 4;
rval = ESCAPE_NUMBERED;
break;
if (buf[iarg] != 'u' || argl < 5 || argl > 7)
break;
if (argl == 7 &&
- (buf[iarg + 1] != '1' || buf[iarg + 2] != '0'))
+ (buf[iarg + 1] != '1' || buf[iarg + 2] != '0')) {
+ err = MANDOCERR_ESC_BADCHAR;
break;
- if (argl == 6 && buf[iarg + 1] == '0')
+ }
+ if (argl == 6 && buf[iarg + 1] == '0') {
+ err = MANDOCERR_ESC_BADCHAR;
break;
+ }
if (argl == 5 && buf[iarg + 1] == 'D' &&
- strchr("89ABCDEF", buf[iarg + 2]) != NULL)
+ strchr("89ABCDEF", buf[iarg + 2]) != NULL) {
+ err = MANDOCERR_ESC_BADCHAR;
break;
+ }
if ((int)strspn(buf + iarg + 1, "0123456789ABCDEFabcdef")
+ 1 == argl)
rval = ESCAPE_UNICODE;
rval = ESCAPE_EXPAND;
out:
+ if (resc != NULL)
+ *resc = iesc;
if (rnam != NULL)
*rnam = inam;
if (rarg != NULL)
*rendarg = iendarg;
if (rend != NULL)
*rend = iend;
- if (resc == NULL)
+ if (ln == 0)
return rval;
/*
* from the parser, not when called from the formatters.
*/
- *resc = iesc;
switch (rval) {
- case ESCAPE_ERROR:
- err = MANDOCERR_ESC_BAD;
- break;
case ESCAPE_UNSUPP:
err = MANDOCERR_ESC_UNSUPP;
break;
err = MANDOCERR_ESC_UNDEF;
break;
case ESCAPE_SPECIAL:
- if (mchars_spec2cp(buf + iarg, argl) < 0)
- err = MANDOCERR_ESC_BAD;
+ if (mchars_spec2cp(buf + iarg, argl) >= 0)
+ err = MANDOCERR_OK;
+ else if (err == MANDOCERR_OK)
+ err = MANDOCERR_ESC_UNKCHAR;
break;
default:
break;