]> git.cameronkatri.com Git - mandoc.git/blobdiff - roff.c
Bugfix:
[mandoc.git] / roff.c
diff --git a/roff.c b/roff.c
index a303c86bdfce4edd4e829c8aa03ee8b8b75c49c5..0304156f7dc39d79a93667e792bce7b6cd911fcf 100644 (file)
--- a/roff.c
+++ b/roff.c
@@ -1,4 +1,4 @@
-/*     $Id: roff.c,v 1.351 2018/12/14 06:33:14 schwarze Exp $ */
+/*     $Id: roff.c,v 1.354 2018/12/20 03:41:54 schwarze Exp $ */
 /*
  * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2010-2015, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org>
@@ -1154,6 +1154,7 @@ roff_res(struct roff *r, struct buf *buf, int ln, int pos)
        struct roff_node *n;    /* used for header comments */
        const char      *start; /* start of the string to process */
        char            *stesc; /* start of an escape sequence ('\\') */
+       const char      *esct;  /* type of esccape sequence */
        char            *ep;    /* end of comment string */
        const char      *stnam; /* start of the name, after "[(*" */
        const char      *cp;    /* end of the name, e.g. before ']' */
@@ -1163,7 +1164,6 @@ roff_res(struct roff *r, struct buf *buf, int ln, int pos)
        size_t           naml;  /* actual length of the escape name */
        size_t           asz;   /* length of the replacement */
        size_t           rsz;   /* length of the rest of the string */
-       enum mandoc_esc  esc;   /* type of the escape sequence */
        int              inaml; /* length returned from mandoc_escape() */
        int              expand_count;  /* to avoid infinite loops */
        int              npos;  /* position in numeric expression */
@@ -1172,6 +1172,7 @@ roff_res(struct roff *r, struct buf *buf, int ln, int pos)
        int              done;  /* no more input available */
        int              deftype; /* type of definition to paste */
        int              rcsid; /* kind of RCS id seen */
+       enum mandocerr   err;   /* for escape sequence problems */
        char             sign;  /* increment number register */
        char             term;  /* character terminating the escape */
 
@@ -1304,7 +1305,10 @@ roff_res(struct roff *r, struct buf *buf, int ln, int pos)
 
                term = '\0';
                cp = stesc + 1;
-               switch (*cp) {
+               if (*cp == 'E')
+                       cp++;
+               esct = cp;
+               switch (*esct) {
                case '*':
                case '$':
                        res = NULL;
@@ -1320,12 +1324,26 @@ roff_res(struct roff *r, struct buf *buf, int ln, int pos)
                        res = ubuf;
                        break;
                default:
-                       esc = mandoc_escape(&cp, &stnam, &inaml);
-                       if (esc == ESCAPE_ERROR ||
-                           (esc == ESCAPE_SPECIAL &&
-                            mchars_spec2cp(stnam, inaml) < 0))
-                               mandoc_msg(MANDOCERR_ESC_BAD,
-                                   ln, (int)(stesc - buf->buf),
+                       err = MANDOCERR_OK;
+                       switch(mandoc_escape(&cp, &stnam, &inaml)) {
+                       case ESCAPE_SPECIAL:
+                               if (mchars_spec2cp(stnam, inaml) >= 0)
+                                       break;
+                               /* FALLTHROUGH */
+                       case ESCAPE_ERROR:
+                               err = MANDOCERR_ESC_BAD;
+                               break;
+                       case ESCAPE_UNDEF:
+                               err = MANDOCERR_ESC_UNDEF;
+                               break;
+                       case ESCAPE_UNSUPP:
+                               err = MANDOCERR_ESC_UNSUPP;
+                               break;
+                       default:
+                               break;
+                       }
+                       if (err != MANDOCERR_OK)
+                               mandoc_msg(err, ln, (int)(stesc - buf->buf),
                                    "%.*s", (int)(cp - stesc), stesc);
                        stesc--;
                        continue;
@@ -1382,7 +1400,7 @@ roff_res(struct roff *r, struct buf *buf, int ln, int pos)
                                cp++;
                                break;
                        }
-                       if (*cp++ != '\\' || stesc[1] != 'w') {
+                       if (*cp++ != '\\' || *esct != 'w') {
                                naml++;
                                continue;
                        }
@@ -1390,6 +1408,7 @@ roff_res(struct roff *r, struct buf *buf, int ln, int pos)
                        case ESCAPE_SPECIAL:
                        case ESCAPE_UNICODE:
                        case ESCAPE_NUMBERED:
+                       case ESCAPE_UNDEF:
                        case ESCAPE_OVERSTRIKE:
                                naml++;
                                break;
@@ -1403,7 +1422,7 @@ roff_res(struct roff *r, struct buf *buf, int ln, int pos)
                 * undefined, resume searching for escapes.
                 */
 
-               switch (stesc[1]) {
+               switch (*esct) {
                case '*':
                        if (arg_complete) {
                                deftype = ROFFDEF_USER | ROFFDEF_PRE;
@@ -1430,15 +1449,15 @@ roff_res(struct roff *r, struct buf *buf, int ln, int pos)
                                break;
                        }
                        ctx = r->mstack + r->mstackpos;
-                       npos = stesc[2] - '1';
+                       npos = esct[1] - '1';
                        if (npos >= 0 && npos <= 8) {
                                res = npos < ctx->argc ?
                                    ctx->argv[npos] : "";
                                break;
                        }
-                       if (stesc[2] == '*')
+                       if (esct[1] == '*')
                                quote_args = 0;
-                       else if (stesc[2] == '@')
+                       else if (esct[1] == '@')
                                quote_args = 1;
                        else {
                                mandoc_msg(MANDOCERR_ARG_NONUM, ln,
@@ -1500,7 +1519,7 @@ roff_res(struct roff *r, struct buf *buf, int ln, int pos)
                }
 
                if (res == NULL) {
-                       if (stesc[1] == '*')
+                       if (*esct == '*')
                                mandoc_msg(MANDOCERR_STR_UNDEF,
                                    ln, (int)(stesc - buf->buf),
                                    "%.*s", (int)naml, stnam);
@@ -1527,6 +1546,103 @@ roff_res(struct roff *r, struct buf *buf, int ln, int pos)
        return ROFF_CONT;
 }
 
+/*
+ * Parse a quoted or unquoted roff-style request or macro argument.
+ * Return a pointer to the parsed argument, which is either the original
+ * pointer or advanced by one byte in case the argument is quoted.
+ * NUL-terminate the argument in place.
+ * Collapse pairs of quotes inside quoted arguments.
+ * Advance the argument pointer to the next argument,
+ * or to the NUL byte terminating the argument line.
+ */
+char *
+mandoc_getarg(char **cpp, int ln, int *pos)
+{
+       char     *start, *cp;
+       int       quoted, pairs, white;
+
+       /* Quoting can only start with a new word. */
+       start = *cpp;
+       quoted = 0;
+       if ('"' == *start) {
+               quoted = 1;
+               start++;
+       }
+
+       pairs = 0;
+       white = 0;
+       for (cp = start; '\0' != *cp; cp++) {
+
+               /*
+                * Move the following text left
+                * after quoted quotes and after "\\" and "\t".
+                */
+               if (pairs)
+                       cp[-pairs] = cp[0];
+
+               if ('\\' == cp[0]) {
+                       /*
+                        * In copy mode, translate double to single
+                        * backslashes and backslash-t to literal tabs.
+                        */
+                       switch (cp[1]) {
+                       case 'a':
+                       case 't':
+                               cp[-pairs] = '\t';
+                               /* FALLTHROUGH */
+                       case '\\':
+                               pairs++;
+                               cp++;
+                               break;
+                       case ' ':
+                               /* Skip escaped blanks. */
+                               if (0 == quoted)
+                                       cp++;
+                               break;
+                       default:
+                               break;
+                       }
+               } else if (0 == quoted) {
+                       if (' ' == cp[0]) {
+                               /* Unescaped blanks end unquoted args. */
+                               white = 1;
+                               break;
+                       }
+               } else if ('"' == cp[0]) {
+                       if ('"' == cp[1]) {
+                               /* Quoted quotes collapse. */
+                               pairs++;
+                               cp++;
+                       } else {
+                               /* Unquoted quotes end quoted args. */
+                               quoted = 2;
+                               break;
+                       }
+               }
+       }
+
+       /* Quoted argument without a closing quote. */
+       if (1 == quoted)
+               mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL);
+
+       /* NUL-terminate this argument and move to the next one. */
+       if (pairs)
+               cp[-pairs] = '\0';
+       if ('\0' != *cp) {
+               *cp++ = '\0';
+               while (' ' == *cp)
+                       cp++;
+       }
+       *pos += (int)(cp - start) + (quoted ? 1 : 0);
+       *cpp = cp;
+
+       if ('\0' == *cp && (white || ' ' == cp[-1]))
+               mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL);
+
+       return start;
+}
+
+
 /*
  * Process text streams.
  */