]> git.cameronkatri.com Git - mandoc.git/blobdiff - roff_escape.c
For accessibility, label the last two widgets in the search form.
[mandoc.git] / roff_escape.c
index 8145a9dd39634721185f394f21b0929b3b18db39..b1a4fbeb40ef035572eea2eb7f582120da7bd949 100644 (file)
@@ -1,4 +1,4 @@
-/* $OpenBSD$ */
+/* $Id: roff_escape.c,v 1.14 2022/06/08 13:23:57 schwarze Exp $ */
 /*
  * Copyright (c) 2011, 2012, 2013, 2014, 2015, 2017, 2018, 2020, 2022
  *               Ingo Schwarze <schwarze@openbsd.org>
 /*
  * Copyright (c) 2011, 2012, 2013, 2014, 2015, 2017, 2018, 2020, 2022
  *               Ingo Schwarze <schwarze@openbsd.org>
@@ -59,7 +59,7 @@ mandoc_escape(const char **rendarg, const char **rarg, int *rargl)
  * sequence are returned in *resc ... *rend.
  * Otherwise, *resc is set to aesc and the positions of the escape
  * sequence starting at aesc are returned.
  * sequence are returned in *resc ... *rend.
  * Otherwise, *resc is set to aesc and the positions of the escape
  * sequence starting at aesc are returned.
- * Diagnostic messages are generated if and only if resc != NULL,
+ * Diagnostic messages are generated if and only if ln != 0,
  * that is, if and only if called by roff_expand().
  */
 enum mandoc_esc
  * that is, if and only if called by roff_expand().
  */
 enum mandoc_esc
@@ -72,11 +72,13 @@ roff_escape(const char *buf, const int ln, const int aesc,
        int              iendarg;       /* index right after the argument */
        int              iend;          /* index right after the sequence */
        int              sesc, snam, sarg, sendarg, send; /* for sub-escape */
        int              iendarg;       /* index right after the argument */
        int              iend;          /* index right after the sequence */
        int              sesc, snam, sarg, sendarg, send; /* for sub-escape */
+       int              escterm;       /* whether term is escaped */
        int              maxl;          /* expected length of the argument */
        int              argl;          /* actual length of the argument */
        int              c, i;          /* for \[char...] parsing */
        int              valid_A;       /* for \A parsing */
        enum mandoc_esc  rval;          /* return value */
        int              maxl;          /* expected length of the argument */
        int              argl;          /* actual length of the argument */
        int              c, i;          /* for \[char...] parsing */
        int              valid_A;       /* for \A parsing */
        enum mandoc_esc  rval;          /* return value */
+       enum mandoc_esc  stype;         /* for sub-escape */
        enum mandocerr   err;           /* diagnostic code */
        char             term;          /* byte terminating the argument */
 
        enum mandocerr   err;           /* diagnostic code */
        char             term;          /* byte terminating the argument */
 
@@ -206,10 +208,6 @@ roff_escape(const char *buf, const int ln, const int aesc,
                term = '\b';
                break;
        case 'C':
                term = '\b';
                break;
        case 'C':
-               if (buf[iarg] != '\'') {
-                       rval = ESCAPE_ERROR;
-                       goto out;
-               }
                rval = ESCAPE_SPECIAL;
                term = '\b';
                break;
                rval = ESCAPE_SPECIAL;
                term = '\b';
                break;
@@ -268,20 +266,43 @@ roff_escape(const char *buf, const int ln, const int aesc,
 
        /* Decide how to end the argument. */
 
 
        /* Decide how to end the argument. */
 
+       escterm = 0;
+       stype = ESCAPE_EXPAND;
        if ((term == '\b' || (term == '\0' && maxl == INT_MAX)) &&
        if ((term == '\b' || (term == '\0' && maxl == INT_MAX)) &&
-           buf[iarg] == buf[iesc] && roff_escape(buf, ln, iendarg,
-           &sesc, &snam, &sarg, &sendarg, &send) == ESCAPE_EXPAND)
-               goto out_sub;
+           buf[iarg] == buf[iesc]) {
+               stype = roff_escape(buf, ln, iendarg,
+                   &sesc, &snam, &sarg, &sendarg, &send);
+               if (stype == ESCAPE_EXPAND)
+                       goto out_sub;
+       }
 
        if (term == '\b') {
 
        if (term == '\b') {
-               if ((buf[inam] == 'N' && isdigit((unsigned char)buf[iarg])) ||
-                   (buf[inam] == 'h' && strchr(" %&()*+-./0123456789:<=>",
-                    buf[iarg]) != NULL)) {
-                       iendarg = iend = iarg + 1;
-                       rval = ESCAPE_ERROR;
-                       goto out;
+               if (stype == ESCAPE_UNDEF)
+                       iarg++;
+               if (stype != ESCAPE_EXPAND && stype != ESCAPE_UNDEF) {
+                       if (strchr("BHLRSNhlvx", buf[inam]) != NULL &&
+                           strchr(" ,.0DLOXYZ^abdhlortuvx|~",
+                           buf[snam]) != NULL) {
+                               err = MANDOCERR_ESC_DELIM;
+                               iend = send;
+                               iarg = iendarg = sesc;
+                               goto out;
+                       }
+                       escterm = 1;
+                       iarg = send;
+                       term = buf[snam];
+               } else if (strchr("BDHLRSvxNhl", buf[inam]) != NULL &&
+                   strchr(" %&()*+-./0123456789:<=>", buf[iarg]) != NULL) {
+                       err = MANDOCERR_ESC_DELIM;
+                       if (rval != ESCAPE_EXPAND)
+                               rval = ESCAPE_ERROR;
+                       if (buf[inam] != 'D') {
+                               iendarg = iend = iarg + 1;
+                               goto out;
+                       }
                }
                }
-               term = buf[iarg++];
+               if (term == '\b')
+                       term = buf[iarg++];
        } else if (term == '\0' && maxl == INT_MAX) {
                if (buf[inam] == 'n' && (buf[iarg] == '+' || buf[iarg] == '-'))
                        iarg++;
        } else if (term == '\0' && maxl == INT_MAX) {
                if (buf[inam] == 'n' && (buf[iarg] == '+' || buf[iarg] == '-'))
                        iarg++;
@@ -293,6 +314,7 @@ roff_escape(const char *buf, const int ln, const int aesc,
                case '[':
                        if (buf[++iarg] == ' ') {
                                iendarg = iend = iarg + 1;
                case '[':
                        if (buf[++iarg] == ' ') {
                                iendarg = iend = iarg + 1;
+                               err = MANDOCERR_ESC_ARG;
                                rval = ESCAPE_ERROR;
                                goto out;
                        }
                                rval = ESCAPE_ERROR;
                                goto out;
                        }
@@ -310,36 +332,35 @@ roff_escape(const char *buf, const int ln, const int aesc,
        iendarg = iarg;
        while (maxl > 0) {
                if (buf[iendarg] == '\0') {
        iendarg = iarg;
        while (maxl > 0) {
                if (buf[iendarg] == '\0') {
-                       /* Ignore an incomplete argument except for \w. */
-                       if (buf[inam] != 'w')
-                               iendarg = iarg;
-                       if (rval == ESCAPE_EXPAND)
-                               err = MANDOCERR_ESC_BAD;
-                       else
+                       err = MANDOCERR_ESC_INCOMPLETE;
+                       if (rval != ESCAPE_EXPAND &&
+                           rval != ESCAPE_OVERSTRIKE)
                                rval = ESCAPE_ERROR;
                                rval = ESCAPE_ERROR;
+                       /* Usually, ignore an incomplete argument. */
+                       if (strchr("Aow", buf[inam]) == NULL)
+                               iendarg = iarg;
                        break;
                }
                        break;
                }
-               if (buf[iendarg] == term) {
-                       iend = iendarg + 1;
-                       break;
-               }
-               if (buf[inam] == 'N' &&
-                   isdigit((unsigned char)buf[iendarg]) == 0) {
+               if (escterm == 0 && buf[iendarg] == term) {
                        iend = iendarg + 1;
                        break;
                }
                if (buf[iendarg] == buf[iesc]) {
                        iend = iendarg + 1;
                        break;
                }
                if (buf[iendarg] == buf[iesc]) {
-                       switch (roff_escape(buf, ln, iendarg,
-                           &sesc, &snam, &sarg, &sendarg, &send)) {
-                       case ESCAPE_EXPAND:
+                       stype = roff_escape(buf, ln, iendarg,
+                           &sesc, &snam, &sarg, &sendarg, &send);
+                       if (stype == ESCAPE_EXPAND)
                                goto out_sub;
                                goto out_sub;
-                       case ESCAPE_UNDEF:
+                       iend = send;
+                       if (escterm == 1 &&
+                           (buf[snam] == term || buf[inam] == 'N'))
                                break;
                                break;
-                       default:
+                       if (stype != ESCAPE_UNDEF)
                                valid_A = 0;
                                valid_A = 0;
-                               break;
-                       }
-                       iendarg = iend = send;
+                       iendarg = send;
+               } else if (buf[inam] == 'N' &&
+                   isdigit((unsigned char)buf[iendarg]) == 0) {
+                       iend = iendarg + 1;
+                       break;
                } else {
                        if (buf[iendarg] == ' ' || buf[iendarg] == '\t')
                                valid_A = 0;
                } else {
                        if (buf[iendarg] == ' ' || buf[iendarg] == '\t')
                                valid_A = 0;
@@ -371,13 +392,23 @@ roff_escape(const char *buf, const int ln, const int aesc,
                case '2':
                case '3':
                case '4':
                case '2':
                case '3':
                case '4':
-                       rval = argl == 1 ? ESCAPE_IGNORE : ESCAPE_ERROR;
+                       if (argl == 1)
+                               rval = ESCAPE_IGNORE;
+                       else {
+                               err = MANDOCERR_ESC_ARG;
+                               rval = ESCAPE_ERROR;
+                       }
                        break;
                case '5':
                        break;
                case '5':
-                       rval = buf[iarg - 1] == '[' ? ESCAPE_UNSUPP :
-                           ESCAPE_ERROR;
+                       if (buf[iarg - 1] == '[')
+                               rval = ESCAPE_UNSUPP;
+                       else {
+                               err = MANDOCERR_ESC_ARG;
+                               rval = ESCAPE_ERROR;
+                       }
                        break;
                default:
                        break;
                default:
+                       err = MANDOCERR_ESC_ARG;
                        rval = ESCAPE_ERROR;
                        break;
                }
                        rval = ESCAPE_ERROR;
                        break;
                }
@@ -389,9 +420,16 @@ roff_escape(const char *buf, const int ln, const int aesc,
        switch (rval) {
        case ESCAPE_FONT:
                rval = mandoc_font(buf + iarg, argl);
        switch (rval) {
        case ESCAPE_FONT:
                rval = mandoc_font(buf + iarg, argl);
+               if (rval == ESCAPE_ERROR)
+                       err = MANDOCERR_ESC_ARG;
                break;
 
        case ESCAPE_SPECIAL:
                break;
 
        case ESCAPE_SPECIAL:
+               if (argl == 0) {
+                       err = MANDOCERR_ESC_BADCHAR;
+                       rval = ESCAPE_ERROR;
+                       break;
+               }
 
                /*
                 * The file chars.c only provides one common list of
 
                /*
                 * The file chars.c only provides one common list of
@@ -401,6 +439,7 @@ roff_escape(const char *buf, const int ln, const int aesc,
                 */
 
                if (term != '\0' && argl == 1 && buf[iarg] != '-') {
                 */
 
                if (term != '\0' && argl == 1 && buf[iarg] != '-') {
+                       err = MANDOCERR_ESC_BADCHAR;
                        rval = ESCAPE_ERROR;
                        break;
                }
                        rval = ESCAPE_ERROR;
                        break;
                }
@@ -416,8 +455,10 @@ roff_escape(const char *buf, const int ln, const int aesc,
                        c = 0;
                        for (i = iarg; i < iendarg; i++)
                                c = 10 * c + (buf[i] - '0');
                        c = 0;
                        for (i = iarg; i < iendarg; i++)
                                c = 10 * c + (buf[i] - '0');
-                       if (c < 0x21 || (c > 0x7e && c < 0xa0) || c > 0xff)
+                       if (c < 0x21 || (c > 0x7e && c < 0xa0) || c > 0xff) {
+                               err = MANDOCERR_ESC_BADCHAR;
                                break;
                                break;
+                       }
                        iarg += 4;
                        rval = ESCAPE_NUMBERED;
                        break;
                        iarg += 4;
                        rval = ESCAPE_NUMBERED;
                        break;
@@ -433,13 +474,19 @@ roff_escape(const char *buf, const int ln, const int aesc,
                if (buf[iarg] != 'u' || argl < 5 || argl > 7)
                        break;
                if (argl == 7 &&
                if (buf[iarg] != 'u' || argl < 5 || argl > 7)
                        break;
                if (argl == 7 &&
-                   (buf[iarg + 1] != '1' || buf[iarg + 2] != '0'))
+                   (buf[iarg + 1] != '1' || buf[iarg + 2] != '0')) {
+                       err = MANDOCERR_ESC_BADCHAR;
                        break;
                        break;
-               if (argl == 6 && buf[iarg + 1] == '0')
+               }
+               if (argl == 6 && buf[iarg + 1] == '0') {
+                       err = MANDOCERR_ESC_BADCHAR;
                        break;
                        break;
+               }
                if (argl == 5 && buf[iarg + 1] == 'D' &&
                if (argl == 5 && buf[iarg + 1] == 'D' &&
-                   strchr("89ABCDEF", buf[iarg + 2]) != NULL)
+                   strchr("89ABCDEF", buf[iarg + 2]) != NULL) {
+                       err = MANDOCERR_ESC_BADCHAR;
                        break;
                        break;
+               }
                if ((int)strspn(buf + iarg + 1, "0123456789ABCDEFabcdef")
                    + 1 == argl)
                        rval = ESCAPE_UNICODE;
                if ((int)strspn(buf + iarg + 1, "0123456789ABCDEFabcdef")
                    + 1 == argl)
                        rval = ESCAPE_UNICODE;
@@ -458,6 +505,8 @@ out_sub:
        rval = ESCAPE_EXPAND;
 
 out:
        rval = ESCAPE_EXPAND;
 
 out:
+       if (resc != NULL)
+               *resc = iesc;
        if (rnam != NULL)
                *rnam = inam;
        if (rarg != NULL)
        if (rnam != NULL)
                *rnam = inam;
        if (rarg != NULL)
@@ -466,7 +515,7 @@ out:
                *rendarg = iendarg;
        if (rend != NULL)
                *rend = iend;
                *rendarg = iendarg;
        if (rend != NULL)
                *rend = iend;
-       if (resc == NULL)
+       if (ln == 0)
                return rval;
 
        /*
                return rval;
 
        /*
@@ -474,11 +523,7 @@ out:
         * from the parser, not when called from the formatters.
         */
 
         * from the parser, not when called from the formatters.
         */
 
-       *resc = iesc;
        switch (rval) {
        switch (rval) {
-       case ESCAPE_ERROR:
-               err = MANDOCERR_ESC_BAD;
-               break;
        case ESCAPE_UNSUPP:
                err = MANDOCERR_ESC_UNSUPP;
                break;
        case ESCAPE_UNSUPP:
                err = MANDOCERR_ESC_UNSUPP;
                break;
@@ -487,8 +532,10 @@ out:
                        err = MANDOCERR_ESC_UNDEF;
                break;
        case ESCAPE_SPECIAL:
                        err = MANDOCERR_ESC_UNDEF;
                break;
        case ESCAPE_SPECIAL:
-               if (mchars_spec2cp(buf + iarg, argl) < 0)
-                       err = MANDOCERR_ESC_BAD;
+               if (mchars_spec2cp(buf + iarg, argl) >= 0)
+                       err = MANDOCERR_OK;
+               else if (err == MANDOCERR_OK)
+                       err = MANDOCERR_ESC_UNKCHAR;
                break;
        default:
                break;
                break;
        default:
                break;