-/* $OpenBSD$ */
+/* $Id: roff_escape.c,v 1.14 2022/06/08 13:23:57 schwarze Exp $ */
/*
* Copyright (c) 2011, 2012, 2013, 2014, 2015, 2017, 2018, 2020, 2022
* Ingo Schwarze <schwarze@openbsd.org>
int iarg, iendarg, iend;
enum mandoc_esc rval;
- rval = roff_escape(--*rendarg, 0, 0, NULL, &iarg, &iendarg, &iend);
+ rval = roff_escape(--*rendarg, 0, 0,
+ NULL, NULL, &iarg, &iendarg, &iend);
assert(rval != ESCAPE_EXPAND);
if (rarg != NULL)
*rarg = *rendarg + iarg;
* sequence are returned in *resc ... *rend.
* Otherwise, *resc is set to aesc and the positions of the escape
* sequence starting at aesc are returned.
- * Diagnostic messages are generated if and only if resc != NULL,
+ * Diagnostic messages are generated if and only if ln != 0,
* that is, if and only if called by roff_expand().
*/
enum mandoc_esc
roff_escape(const char *buf, const int ln, const int aesc,
- int *resc, int *rarg, int *rendarg, int *rend)
+ int *resc, int *rnam, int *rarg, int *rendarg, int *rend)
{
int iesc; /* index of leading escape char */
+ int inam; /* index of escape name */
int iarg; /* index beginning the argument */
int iendarg; /* index right after the argument */
int iend; /* index right after the sequence */
- int sesc, sarg, sendarg, send; /* for sub-escape */
+ int sesc, snam, sarg, sendarg, send; /* for sub-escape */
+ int escterm; /* whether term is escaped */
int maxl; /* expected length of the argument */
int argl; /* actual length of the argument */
int c, i; /* for \[char...] parsing */
+ int valid_A; /* for \A parsing */
enum mandoc_esc rval; /* return value */
+ enum mandoc_esc stype; /* for sub-escape */
enum mandocerr err; /* diagnostic code */
- char esc_name;
char term; /* byte terminating the argument */
/*
* it only makes a difference in copy mode.
*/
- iesc = iarg = aesc;
+ iesc = inam = aesc;
do {
- iarg++;
- } while (buf[iarg] == 'E');
+ inam++;
+ } while (buf[inam] == 'E');
/*
* Sort the following cases first by syntax category,
* then by escape sequence type, and finally by ASCII code.
*/
- esc_name = buf[iarg];
- iendarg = iend = ++iarg;
+ iarg = iendarg = iend = inam + 1;
maxl = INT_MAX;
term = '\0';
- switch (esc_name) {
+ err = MANDOCERR_OK;
+ switch (buf[inam]) {
/* Escape sequences taking no arguments at all. */
case '!':
case '?':
+ case 'r':
rval = ESCAPE_UNSUPP;
goto out;
case '^':
case 'a':
case 'd':
- case 'r':
case 't':
case 'u':
case '{':
rval = ESCAPE_IGNORE;
goto out;
+ case '\0':
+ iendarg = --iend;
+ /* FALLTHROUGH */
+ case '.':
case '\\':
default:
iarg--;
case ' ':
case '\'':
case '-':
- case '.':
case '0':
case ':':
case '_':
case '$':
case '*':
+ case 'V':
+ case 'g':
case 'n':
rval = ESCAPE_EXPAND;
break;
case 'F':
case 'M':
case 'O':
- case 'V':
case 'Y':
- case 'g':
case 'k':
case 'm':
rval = ESCAPE_IGNORE;
/* Quoted arguments */
+ case 'A':
case 'B':
case 'w':
rval = ESCAPE_EXPAND;
term = '\b';
break;
- case 'A':
case 'D':
case 'H':
case 'L':
term = '\b';
break;
case 'C':
- if (buf[iarg] != '\'') {
- rval = ESCAPE_ERROR;
- goto out;
- }
rval = ESCAPE_SPECIAL;
term = '\b';
break;
/* Decide how to end the argument. */
+ escterm = 0;
+ stype = ESCAPE_EXPAND;
if ((term == '\b' || (term == '\0' && maxl == INT_MAX)) &&
- buf[iarg] == buf[iesc] && roff_escape(buf, ln, iendarg,
- &sesc, &sarg, &sendarg, &send) == ESCAPE_EXPAND)
- goto out_sub;
+ buf[iarg] == buf[iesc]) {
+ stype = roff_escape(buf, ln, iendarg,
+ &sesc, &snam, &sarg, &sendarg, &send);
+ if (stype == ESCAPE_EXPAND)
+ goto out_sub;
+ }
if (term == '\b') {
- if ((esc_name == 'N' && isdigit((unsigned char)buf[iarg])) ||
- (esc_name == 'h' && strchr(" %&()*+-./0123456789:<=>",
- buf[iarg]) != NULL)) {
- iendarg = iend = iarg + 1;
- rval = ESCAPE_ERROR;
- goto out;
+ if (stype == ESCAPE_UNDEF)
+ iarg++;
+ if (stype != ESCAPE_EXPAND && stype != ESCAPE_UNDEF) {
+ if (strchr("BHLRSNhlvx", buf[inam]) != NULL &&
+ strchr(" ,.0DLOXYZ^abdhlortuvx|~",
+ buf[snam]) != NULL) {
+ err = MANDOCERR_ESC_DELIM;
+ iend = send;
+ iarg = iendarg = sesc;
+ goto out;
+ }
+ escterm = 1;
+ iarg = send;
+ term = buf[snam];
+ } else if (strchr("BDHLRSvxNhl", buf[inam]) != NULL &&
+ strchr(" %&()*+-./0123456789:<=>", buf[iarg]) != NULL) {
+ err = MANDOCERR_ESC_DELIM;
+ if (rval != ESCAPE_EXPAND)
+ rval = ESCAPE_ERROR;
+ if (buf[inam] != 'D') {
+ iendarg = iend = iarg + 1;
+ goto out;
+ }
}
- term = buf[iarg++];
+ if (term == '\b')
+ term = buf[iarg++];
} else if (term == '\0' && maxl == INT_MAX) {
- if (esc_name == 'n' && (buf[iarg] == '+' || buf[iarg] == '-'))
+ if (buf[inam] == 'n' && (buf[iarg] == '+' || buf[iarg] == '-'))
iarg++;
switch (buf[iarg]) {
case '(':
case '[':
if (buf[++iarg] == ' ') {
iendarg = iend = iarg + 1;
+ err = MANDOCERR_ESC_ARG;
rval = ESCAPE_ERROR;
goto out;
}
/* Advance to the end of the argument. */
+ valid_A = 1;
iendarg = iarg;
while (maxl > 0) {
if (buf[iendarg] == '\0') {
- /* Ignore an incomplete argument except for \w. */
- if (esc_name != 'w')
+ err = MANDOCERR_ESC_INCOMPLETE;
+ if (rval != ESCAPE_EXPAND &&
+ rval != ESCAPE_OVERSTRIKE)
+ rval = ESCAPE_ERROR;
+ /* Usually, ignore an incomplete argument. */
+ if (strchr("Aow", buf[inam]) == NULL)
iendarg = iarg;
break;
}
- if (buf[iendarg] == term) {
+ if (escterm == 0 && buf[iendarg] == term) {
iend = iendarg + 1;
break;
}
- if (esc_name == 'N' &&
+ if (buf[iendarg] == buf[iesc]) {
+ stype = roff_escape(buf, ln, iendarg,
+ &sesc, &snam, &sarg, &sendarg, &send);
+ if (stype == ESCAPE_EXPAND)
+ goto out_sub;
+ iend = send;
+ if (escterm == 1 &&
+ (buf[snam] == term || buf[inam] == 'N'))
+ break;
+ if (stype != ESCAPE_UNDEF)
+ valid_A = 0;
+ iendarg = send;
+ } else if (buf[inam] == 'N' &&
isdigit((unsigned char)buf[iendarg]) == 0) {
iend = iendarg + 1;
break;
- }
- if (buf[iendarg] == buf[iesc]) {
- if (roff_escape(buf, ln, iendarg,
- &sesc, &sarg, &sendarg, &send) == ESCAPE_EXPAND)
- goto out_sub;
- iendarg = iend = send;
} else {
+ if (buf[iendarg] == ' ' || buf[iendarg] == '\t')
+ valid_A = 0;
if (maxl != INT_MAX)
maxl--;
iend = ++iendarg;
}
}
- if (resc != NULL && ((maxl != INT_MAX && maxl != 0) ||
- (term != '\0' && buf[iendarg] != term)))
- mandoc_msg(MANDOCERR_ESC_BAD, ln, iesc, "%s", buf + iesc);
/* Post-process depending on the content of the argument. */
argl = iendarg - iarg;
- switch (esc_name) {
+ switch (buf[inam]) {
case '*':
if (resc == NULL && argl == 2 &&
buf[iarg] == '.' && buf[iarg + 1] == 'T')
rval = ESCAPE_DEVICE;
break;
+ case 'A':
+ if (valid_A == 0)
+ iendarg = iarg;
+ break;
case 'O':
switch (buf[iarg]) {
case '0':
case '2':
case '3':
case '4':
- rval = argl == 1 ? ESCAPE_IGNORE : ESCAPE_ERROR;
+ if (argl == 1)
+ rval = ESCAPE_IGNORE;
+ else {
+ err = MANDOCERR_ESC_ARG;
+ rval = ESCAPE_ERROR;
+ }
break;
case '5':
- rval = buf[iarg - 1] == '[' ? ESCAPE_UNSUPP :
- ESCAPE_ERROR;
+ if (buf[iarg - 1] == '[')
+ rval = ESCAPE_UNSUPP;
+ else {
+ err = MANDOCERR_ESC_ARG;
+ rval = ESCAPE_ERROR;
+ }
break;
default:
+ err = MANDOCERR_ESC_ARG;
rval = ESCAPE_ERROR;
break;
}
switch (rval) {
case ESCAPE_FONT:
rval = mandoc_font(buf + iarg, argl);
+ if (rval == ESCAPE_ERROR)
+ err = MANDOCERR_ESC_ARG;
break;
case ESCAPE_SPECIAL:
+ if (argl == 0) {
+ err = MANDOCERR_ESC_BADCHAR;
+ rval = ESCAPE_ERROR;
+ break;
+ }
/*
* The file chars.c only provides one common list of
*/
if (term != '\0' && argl == 1 && buf[iarg] != '-') {
+ err = MANDOCERR_ESC_BADCHAR;
rval = ESCAPE_ERROR;
break;
}
c = 0;
for (i = iarg; i < iendarg; i++)
c = 10 * c + (buf[i] - '0');
- if (c < 0x21 || (c > 0x7e && c < 0xa0) || c > 0xff)
+ if (c < 0x21 || (c > 0x7e && c < 0xa0) || c > 0xff) {
+ err = MANDOCERR_ESC_BADCHAR;
break;
+ }
iarg += 4;
rval = ESCAPE_NUMBERED;
break;
if (buf[iarg] != 'u' || argl < 5 || argl > 7)
break;
if (argl == 7 &&
- (buf[iarg + 1] != '1' || buf[iarg + 2] != '0'))
+ (buf[iarg + 1] != '1' || buf[iarg + 2] != '0')) {
+ err = MANDOCERR_ESC_BADCHAR;
break;
- if (argl == 6 && buf[iarg + 1] == '0')
+ }
+ if (argl == 6 && buf[iarg + 1] == '0') {
+ err = MANDOCERR_ESC_BADCHAR;
break;
+ }
if (argl == 5 && buf[iarg + 1] == 'D' &&
- strchr("89ABCDEF", buf[iarg + 2]) != NULL)
+ strchr("89ABCDEF", buf[iarg + 2]) != NULL) {
+ err = MANDOCERR_ESC_BADCHAR;
break;
+ }
if ((int)strspn(buf + iarg + 1, "0123456789ABCDEFabcdef")
+ 1 == argl)
rval = ESCAPE_UNICODE;
out_sub:
iesc = sesc;
+ inam = snam;
iarg = sarg;
iendarg = sendarg;
iend = send;
rval = ESCAPE_EXPAND;
out:
+ if (resc != NULL)
+ *resc = iesc;
+ if (rnam != NULL)
+ *rnam = inam;
if (rarg != NULL)
*rarg = iarg;
if (rendarg != NULL)
*rendarg = iendarg;
if (rend != NULL)
*rend = iend;
- if (resc == NULL)
+ if (ln == 0)
return rval;
/*
* from the parser, not when called from the formatters.
*/
- *resc = iesc;
switch (rval) {
- case ESCAPE_ERROR:
- err = MANDOCERR_ESC_BAD;
- break;
case ESCAPE_UNSUPP:
err = MANDOCERR_ESC_UNSUPP;
break;
case ESCAPE_UNDEF:
- if (esc_name == '\\')
- return rval;
- err = MANDOCERR_ESC_UNDEF;
+ if (buf[inam] != '\\' && buf[inam] != '.')
+ err = MANDOCERR_ESC_UNDEF;
break;
case ESCAPE_SPECIAL:
if (mchars_spec2cp(buf + iarg, argl) >= 0)
- return rval;
- err = MANDOCERR_ESC_BAD;
+ err = MANDOCERR_OK;
+ else if (err == MANDOCERR_OK)
+ err = MANDOCERR_ESC_UNKCHAR;
break;
default:
- return rval;
+ break;
}
- mandoc_msg(err, ln, iesc, "%.*s", iend - iesc, buf + iesc);
+ if (err != MANDOCERR_OK)
+ mandoc_msg(err, ln, iesc, "%.*s", iend - iesc, buf + iesc);
return rval;
}