]> git.cameronkatri.com Git - mandoc.git/blobdiff - roff.c
For accessibility, label the last two widgets in the search form.
[mandoc.git] / roff.c
diff --git a/roff.c b/roff.c
index bd222d567c9756cc6a63cc349af2985fbee8e164..ee0da3e6244e7ed2b19f3d85f03461463c9f9743 100644 (file)
--- a/roff.c
+++ b/roff.c
@@ -1,4 +1,4 @@
-/* $Id: roff.c,v 1.381 2022/04/13 13:19:34 schwarze Exp $ */
+/* $Id: roff.c,v 1.394 2022/06/07 09:41:59 schwarze Exp $ */
 /*
  * Copyright (c) 2010-2015, 2017-2022 Ingo Schwarze <schwarze@openbsd.org>
  * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
@@ -207,6 +207,8 @@ static      int              roff_evalpar(struct roff *, int,
 static int              roff_evalstrcond(const char *, int *);
 static int              roff_expand(struct roff *, struct buf *,
                                int, int, char);
+static void             roff_expand_patch(struct buf *, int,
+                               const char *, int);
 static void             roff_free1(struct roff *);
 static void             roff_freereg(struct roffreg *);
 static void             roff_freestr(struct roffkv *);
@@ -227,15 +229,19 @@ static    int              roff_line_ignore(ROFF_ARGS);
 static void             roff_man_alloc1(struct roff_man *);
 static void             roff_man_free1(struct roff_man *);
 static int              roff_manyarg(ROFF_ARGS);
+static int              roff_mc(ROFF_ARGS);
 static int              roff_noarg(ROFF_ARGS);
 static int              roff_nop(ROFF_ARGS);
 static int              roff_nr(ROFF_ARGS);
 static int              roff_onearg(ROFF_ARGS);
 static enum roff_tok    roff_parse(struct roff *, char *, int *,
                                int, int);
+static int              roff_parse_comment(struct roff *, struct buf *,
+                               int, int, char);
 static int              roff_parsetext(struct roff *, struct buf *,
                                int, int *);
 static int              roff_renamed(ROFF_ARGS);
+static int              roff_req_or_macro(ROFF_ARGS);
 static int              roff_return(ROFF_ARGS);
 static int              roff_rm(ROFF_ARGS);
 static int              roff_rn(ROFF_ARGS);
@@ -379,7 +385,7 @@ static      struct roffmac   roffs[TOKEN_NONE] = {
        { roff_noarg, NULL, NULL, 0 },  /* fi */
        { roff_onearg, NULL, NULL, 0 },  /* ft */
        { roff_onearg, NULL, NULL, 0 },  /* ll */
-       { roff_onearg, NULL, NULL, 0 },  /* mc */
+       { roff_mc, NULL, NULL, 0 },  /* mc */
        { roff_noarg, NULL, NULL, 0 },  /* nf */
        { roff_onearg, NULL, NULL, 0 },  /* po */
        { roff_onearg, NULL, NULL, 0 },  /* rj */
@@ -1230,418 +1236,363 @@ deroff(char **dest, const struct roff_node *n)
 /* --- main functions of the roff parser ---------------------------------- */
 
 /*
- * In the current line, expand escape sequences that produce parsable
- * input text.  Also check the syntax of the remaining escape sequences,
- * which typically produce output glyphs or change formatter state.
+ * Save comments preceding the title macro, for example in order to
+ * preserve Copyright and license headers in HTML output,
+ * provide diagnostics about RCS ids and trailing whitespace in comments,
+ * then discard comments including preceding whitespace.
+ * This function also handles input line continuation.
  */
 static int
-roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc)
+roff_parse_comment(struct roff *r, struct buf *buf, int ln, int pos, char ec)
 {
-       struct mctx     *ctx;   /* current macro call context */
-       char             ubuf[24]; /* buffer to print the number */
        struct roff_node *n;    /* used for header comments */
        const char      *start; /* start of the string to process */
+       const char      *cp;    /* for RCS id parsing */
        char            *stesc; /* start of an escape sequence ('\\') */
-       const char      *esct;  /* type of esccape sequence */
        char            *ep;    /* end of comment string */
-       const char      *stnam; /* start of the name, after "[(*" */
-       const char      *cp;    /* end of the name, e.g. before ']' */
-       const char      *res;   /* the string to be substituted */
-       char            *nbuf;  /* new buffer to copy buf->buf to */
-       size_t           maxl;  /* expected length of the escape name */
-       size_t           naml;  /* actual length of the escape name */
-       size_t           asz;   /* length of the replacement */
-       size_t           rsz;   /* length of the rest of the string */
-       int              inaml; /* length returned from mandoc_escape() */
-       int              expand_count;  /* to avoid infinite loops */
-       int              npos;  /* position in numeric expression */
-       int              arg_complete; /* argument not interrupted by eol */
-       int              quote_args; /* true for \\$@, false for \\$* */
-       int              done;  /* no more input available */
-       int              deftype; /* type of definition to paste */
        int              rcsid; /* kind of RCS id seen */
-       enum mandocerr   err;   /* for escape sequence problems */
-       char             sign;  /* increment number register */
-       char             term;  /* character terminating the escape */
-
-       /* Search forward for comments. */
-
-       done = 0;
-       start = buf->buf + pos;
-       for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
-               if (stesc[0] != newesc || stesc[1] == '\0')
-                       continue;
-               stesc++;
-               if (*stesc != '"' && *stesc != '#')
-                       continue;
 
-               /* Comment found, look for RCS id. */
-
-               rcsid = 0;
-               if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
-                       rcsid = 1 << MANDOC_OS_OPENBSD;
-                       cp += 8;
-               } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
-                       rcsid = 1 << MANDOC_OS_NETBSD;
-                       cp += 7;
-               }
-               if (cp != NULL &&
-                   isalnum((unsigned char)*cp) == 0 &&
-                   strchr(cp, '$') != NULL) {
-                       if (r->man->meta.rcsids & rcsid)
-                               mandoc_msg(MANDOCERR_RCS_REP, ln,
-                                   (int)(stesc - buf->buf) + 1,
-                                   "%s", stesc + 1);
-                       r->man->meta.rcsids |= rcsid;
+       for (start = stesc = buf->buf + pos;; stesc++) {
+               /*
+                * XXX Ugly hack: Remove the newline character that
+                * mparse_buf_r() appended to mark the end of input
+                * if it is not preceded by an escape character.
+                */
+               if (stesc[0] == '\n') {
+                       assert(stesc[1] == '\0');
+                       stesc[0] = '\0';
                }
 
-               /* Handle trailing whitespace. */
+               /* The line ends without continuation or comment. */
+               if (stesc[0] == '\0')
+                       return ROFF_CONT;
 
-               ep = strchr(stesc--, '\0') - 1;
-               if (*ep == '\n') {
-                       done = 1;
-                       ep--;
-               }
-               if (*ep == ' ' || *ep == '\t')
-                       mandoc_msg(MANDOCERR_SPACE_EOL,
-                           ln, (int)(ep - buf->buf), NULL);
+               /* Unescaped byte: skip it. */
+               if (stesc[0] != ec)
+                       continue;
 
                /*
-                * Save comments preceding the title macro
-                * in the syntax tree.
+                * XXX Ugly hack: Do not attempt to append another line
+                * if the function mparse_buf_r() appended a newline
+                * character to indicate the end of input.
                 */
-
-               if (newesc != ASCII_ESC && r->options & MPARSE_COMMENT) {
-                       while (*ep == ' ' || *ep == '\t')
-                               ep--;
-                       ep[1] = '\0';
-                       n = roff_node_alloc(r->man,
-                           ln, stesc + 1 - buf->buf,
-                           ROFFT_COMMENT, TOKEN_NONE);
-                       n->string = mandoc_strdup(stesc + 2);
-                       roff_node_append(r->man, n);
-                       n->flags |= NODE_VALID | NODE_ENDED;
-                       r->man->next = ROFF_NEXT_SIBLING;
+               if (stesc[1] == '\n') {
+                       assert(stesc[2] == '\0');
+                       stesc[0] = '\0';
+                       return ROFF_CONT;
                }
 
-               /* Line continuation with comment. */
-
-               if (stesc[1] == '#') {
-                       *stesc = '\0';
+               /*
+                * An escape character at the end of an input line
+                * requests line continuation.
+                */
+               if (stesc[1] == '\0') {
+                       stesc[0] = '\0';
                        return ROFF_IGN | ROFF_APPEND;
                }
 
-               /* Discard normal comments. */
+               /* Found a comment: process it. */
+               if (stesc[1] == '"' || stesc[1] == '#')
+                       break;
 
-               while (stesc > start && stesc[-1] == ' ' &&
-                   (stesc == start + 1 || stesc[-2] != '\\'))
-                       stesc--;
-               *stesc = '\0';
-               break;
+               /* Escaped escape character: skip them both. */
+               if (stesc[1] == ec)
+                       stesc++;
        }
-       if (stesc == start)
-               return ROFF_CONT;
-       stesc--;
 
-       /* Notice the end of the input. */
+       /* Look for an RCS id in the comment. */
 
-       if (*stesc == '\n') {
-               *stesc-- = '\0';
-               done = 1;
+       rcsid = 0;
+       if ((cp = strstr(stesc + 2, "$" "OpenBSD")) != NULL) {
+               rcsid = 1 << MANDOC_OS_OPENBSD;
+               cp += 8;
+       } else if ((cp = strstr(stesc + 2, "$" "NetBSD")) != NULL) {
+               rcsid = 1 << MANDOC_OS_NETBSD;
+               cp += 7;
+       }
+       if (cp != NULL && isalnum((unsigned char)*cp) == 0 &&
+           strchr(cp, '$') != NULL) {
+               if (r->man->meta.rcsids & rcsid)
+                       mandoc_msg(MANDOCERR_RCS_REP, ln,
+                           (int)(stesc - buf->buf) + 2, "%s", stesc + 1);
+               r->man->meta.rcsids |= rcsid;
        }
 
-       expand_count = 0;
-       while (stesc >= start) {
-               if (*stesc != newesc) {
+       /* Warn about trailing whitespace at the end of the comment. */
 
-                       /*
-                        * If we have a non-standard escape character,
-                        * escape literal backslashes because all
-                        * processing in subsequent functions uses
-                        * the standard escaping rules.
-                        */
+       ep = strchr(stesc + 2, '\0') - 1;
+       if (*ep == '\n')
+               *ep-- = '\0';
+       if (*ep == ' ' || *ep == '\t')
+               mandoc_msg(MANDOCERR_SPACE_EOL,
+                   ln, (int)(ep - buf->buf), NULL);
 
-                       if (newesc != ASCII_ESC && *stesc == '\\') {
-                               *stesc = '\0';
-                               buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
-                                   buf->buf, stesc + 1) + 1;
-                               start = nbuf + pos;
-                               stesc = nbuf + (stesc - buf->buf);
-                               free(buf->buf);
-                               buf->buf = nbuf;
-                       }
+       /* Save comments preceding the title macro in the syntax tree. */
 
-                       /* Search backwards for the next escape. */
+       if (r->options & MPARSE_COMMENT) {
+               while (*ep == ' ' || *ep == '\t')
+                       ep--;
+               ep[1] = '\0';
+               n = roff_node_alloc(r->man, ln, stesc + 1 - buf->buf,
+                   ROFFT_COMMENT, TOKEN_NONE);
+               n->string = mandoc_strdup(stesc + 2);
+               roff_node_append(r->man, n);
+               n->flags |= NODE_VALID | NODE_ENDED;
+               r->man->next = ROFF_NEXT_SIBLING;
+       }
 
-                       stesc--;
-                       continue;
-               }
+       /* The comment requests line continuation. */
 
-               /* If it is escaped, skip it. */
+       if (stesc[1] == '#') {
+               *stesc = '\0';
+               return ROFF_IGN | ROFF_APPEND;
+       }
 
-               for (cp = stesc - 1; cp >= start; cp--)
-                       if (*cp != r->escape)
-                               break;
+       /* Discard the comment including preceding whitespace. */
 
-               if ((stesc - cp) % 2 == 0) {
-                       while (stesc > cp)
-                               *stesc-- = '\\';
-                       continue;
-               } else if (stesc[1] != '\0') {
-                       *stesc = '\\';
-               } else {
-                       *stesc-- = '\0';
-                       if (done)
-                               continue;
-                       else
-                               return ROFF_IGN | ROFF_APPEND;
-               }
+       while (stesc > start && stesc[-1] == ' ' &&
+           (stesc == start + 1 || stesc[-2] != '\\'))
+               stesc--;
+       *stesc = '\0';
+       return ROFF_CONT;
+}
 
-               /* Decide whether to expand or to check only. */
+/*
+ * In the current line, expand escape sequences that produce parsable
+ * input text.  Also check the syntax of the remaining escape sequences,
+ * which typically produce output glyphs or change formatter state.
+ */
+static int
+roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char ec)
+{
+       char             ubuf[24];      /* buffer to print a number */
+       struct mctx     *ctx;           /* current macro call context */
+       const char      *res;           /* the string to be pasted */
+       const char      *src;           /* source for copying */
+       char            *dst;           /* destination for copying */
+       int              iesc;          /* index of leading escape char */
+       int              inam;          /* index of the escape name */
+       int              iarg;          /* index beginning the argument */
+       int              iendarg;       /* index right after the argument */
+       int              iend;          /* index right after the sequence */
+       int              isrc, idst;    /* to reduce \\ and \. in names */
+       int              deftype;       /* type of definition to paste */
+       int              argi;          /* macro argument index */
+       int              quote_args;    /* true for \\$@, false for \\$* */
+       int              asz;           /* length of the replacement */
+       int              rsz;           /* length of the rest of the string */
+       int              npos;          /* position in numeric expression */
+       int              expand_count;  /* to avoid infinite loops */
 
-               term = '\0';
-               cp = stesc + 1;
-               while (*cp == 'E')
-                       cp++;
-               esct = cp;
-               switch (*esct) {
-               case '*':
-               case '$':
-                       res = NULL;
-                       break;
-               case 'B':
-               case 'w':
-                       term = cp[1];
-                       /* FALLTHROUGH */
-               case 'n':
-                       sign = cp[1];
-                       if (sign == '+' || sign == '-')
-                               cp++;
-                       res = ubuf;
-                       break;
-               default:
-                       err = MANDOCERR_OK;
-                       switch(mandoc_escape(&cp, &stnam, &inaml)) {
-                       case ESCAPE_SPECIAL:
-                               if (mchars_spec2cp(stnam, inaml) >= 0)
-                                       break;
-                               /* FALLTHROUGH */
-                       case ESCAPE_ERROR:
-                               err = MANDOCERR_ESC_BAD;
-                               break;
-                       case ESCAPE_UNDEF:
-                               err = MANDOCERR_ESC_UNDEF;
-                               break;
-                       case ESCAPE_UNSUPP:
-                               err = MANDOCERR_ESC_UNSUPP;
-                               break;
-                       default:
-                               break;
+       expand_count = 0;
+       while (buf->buf[pos] != '\0') {
+
+               /*
+                * Skip plain ASCII characters.
+                * If we have a non-standard escape character,
+                * escape literal backslashes because all processing in
+                * subsequent functions uses the standard escaping rules.
+                */
+
+               if (buf->buf[pos] != ec) {
+                       if (ec != ASCII_ESC && buf->buf[pos] == '\\') {
+                               roff_expand_patch(buf, pos, "\\e", pos + 1);
+                               pos++;
                        }
-                       if (err != MANDOCERR_OK)
-                               mandoc_msg(err, ln, (int)(stesc - buf->buf),
-                                   "%.*s", (int)(cp - stesc), stesc);
-                       stesc--;
+                       pos++;
                        continue;
                }
 
-               if (EXPAND_LIMIT < ++expand_count) {
-                       mandoc_msg(MANDOCERR_ROFFLOOP,
-                           ln, (int)(stesc - buf->buf), NULL);
-                       return ROFF_IGN;
-               }
-
                /*
-                * The third character decides the length
-                * of the name of the string or register.
-                * Save a pointer to the name.
+                * Parse escape sequences,
+                * issue diagnostic messages when appropriate,
+                * and skip sequences that do not need expansion.
+                * If we have a non-standard escape character, translate
+                * it to backslashes and translate backslashes to \e.
                 */
 
-               if (term == '\0') {
-                       switch (*++cp) {
-                       case '\0':
-                               maxl = 0;
-                               break;
-                       case '(':
-                               cp++;
-                               maxl = 2;
-                               break;
-                       case '[':
-                               cp++;
-                               term = ']';
-                               maxl = 0;
-                               break;
-                       default:
-                               maxl = 1;
-                               break;
+               if (roff_escape(buf->buf, ln, pos, &iesc, &inam,
+                   &iarg, &iendarg, &iend) != ESCAPE_EXPAND) {
+                       while (pos < iend) {
+                               if (buf->buf[pos] == ec) {
+                                       buf->buf[pos] = '\\';
+                                       if (pos + 1 < iend)
+                                               pos++;
+                               } else if (buf->buf[pos] == '\\') {
+                                       roff_expand_patch(buf,
+                                           pos, "\\e", pos + 1);
+                                       pos++;
+                                       iend++;
+                               }
+                               pos++;
                        }
-               } else {
-                       cp += 2;
-                       maxl = 0;
+                       continue;
                }
-               stnam = cp;
-
-               /* Advance to the end of the name. */
 
-               naml = 0;
-               arg_complete = 1;
-               while (maxl == 0 || naml < maxl) {
-                       if (*cp == '\0') {
-                               mandoc_msg(MANDOCERR_ESC_BAD, ln,
-                                   (int)(stesc - buf->buf), "%s", stesc);
-                               arg_complete = 0;
-                               break;
-                       }
-                       if (maxl == 0 && *cp == term) {
-                               cp++;
-                               break;
-                       }
-                       if (*cp++ != '\\' || *esct != 'w') {
-                               naml++;
-                               continue;
-                       }
-                       switch (mandoc_escape(&cp, NULL, NULL)) {
-                       case ESCAPE_SPECIAL:
-                       case ESCAPE_UNICODE:
-                       case ESCAPE_NUMBERED:
-                       case ESCAPE_UNDEF:
-                       case ESCAPE_OVERSTRIKE:
-                               naml++;
-                               break;
-                       default:
-                               break;
+               /* Reduce \\ and \. in names. */
+
+               if (buf->buf[inam] == '*' || buf->buf[inam] == 'n') {
+                       isrc = idst = iarg;
+                       while (isrc < iendarg) {
+                               if (isrc + 1 < iendarg &&
+                                   buf->buf[isrc] == '\\' &&
+                                   (buf->buf[isrc + 1] == '\\' ||
+                                    buf->buf[isrc + 1] == '.'))
+                                       isrc++;
+                               buf->buf[idst++] = buf->buf[isrc++];
                        }
+                       iendarg -= isrc - idst;
                }
 
-               /*
-                * Retrieve the replacement string; if it is
-                * undefined, resume searching for escapes.
-                */
+               /* Handle expansion. */
 
-               switch (*esct) {
+               res = NULL;
+               switch (buf->buf[inam]) {
                case '*':
-                       if (arg_complete) {
-                               deftype = ROFFDEF_USER | ROFFDEF_PRE;
-                               res = roff_getstrn(r, stnam, naml, &deftype);
-
-                               /*
-                                * If not overriden, let \*(.T
-                                * through to the formatters.
-                                */
-
-                               if (res == NULL && naml == 2 &&
-                                   stnam[0] == '.' && stnam[1] == 'T') {
-                                       roff_setstrn(&r->strtab,
-                                           ".T", 2, NULL, 0, 0);
-                                       stesc--;
-                                       continue;
-                               }
+                       if (iendarg == iarg)
+                               break;
+                       deftype = ROFFDEF_USER | ROFFDEF_PRE;
+                       if ((res = roff_getstrn(r, buf->buf + iarg,
+                           iendarg - iarg, &deftype)) != NULL)
+                               break;
+
+                       /*
+                        * If not overriden,
+                        * let \*(.T through to the formatters.
+                        */
+
+                       if (iendarg - iarg == 2 &&
+                           buf->buf[iarg] == '.' &&
+                           buf->buf[iarg + 1] == 'T') {
+                               roff_setstrn(&r->strtab, ".T", 2, NULL, 0, 0);
+                               pos = iend;
+                               continue;
                        }
+
+                       mandoc_msg(MANDOCERR_STR_UNDEF, ln, iesc,
+                           "%.*s", iendarg - iarg, buf->buf + iarg);
                        break;
+
                case '$':
                        if (r->mstackpos < 0) {
-                               mandoc_msg(MANDOCERR_ARG_UNDEF, ln,
-                                   (int)(stesc - buf->buf), "%.3s", stesc);
+                               mandoc_msg(MANDOCERR_ARG_UNDEF, ln, iesc,
+                                   "%.*s", iend - iesc, buf->buf + iesc);
                                break;
                        }
                        ctx = r->mstack + r->mstackpos;
-                       npos = esct[1] - '1';
-                       if (npos >= 0 && npos <= 8) {
-                               res = npos < ctx->argc ?
-                                   ctx->argv[npos] : "";
+                       argi = buf->buf[iarg] - '1';
+                       if (argi >= 0 && argi <= 8) {
+                               if (argi < ctx->argc)
+                                       res = ctx->argv[argi];
                                break;
                        }
-                       if (esct[1] == '*')
+                       if (buf->buf[iarg] == '*')
                                quote_args = 0;
-                       else if (esct[1] == '@')
+                       else if (buf->buf[iarg] == '@')
                                quote_args = 1;
                        else {
-                               mandoc_msg(MANDOCERR_ARG_NONUM, ln,
-                                   (int)(stesc - buf->buf), "%.3s", stesc);
+                               mandoc_msg(MANDOCERR_ARG_NONUM, ln, iesc,
+                                   "%.*s", iend - iesc, buf->buf + iesc);
                                break;
                        }
                        asz = 0;
-                       for (npos = 0; npos < ctx->argc; npos++) {
-                               if (npos)
+                       for (argi = 0; argi < ctx->argc; argi++) {
+                               if (argi)
                                        asz++;  /* blank */
                                if (quote_args)
                                        asz += 2;  /* quotes */
-                               asz += strlen(ctx->argv[npos]);
+                               asz += strlen(ctx->argv[argi]);
                        }
-                       if (asz != 3) {
-                               rsz = buf->sz - (stesc - buf->buf) - 3;
-                               if (asz < 3)
-                                       memmove(stesc + asz, stesc + 3, rsz);
-                               buf->sz += asz - 3;
-                               nbuf = mandoc_realloc(buf->buf, buf->sz);
-                               start = nbuf + pos;
-                               stesc = nbuf + (stesc - buf->buf);
-                               buf->buf = nbuf;
-                               if (asz > 3)
-                                       memmove(stesc + asz, stesc + 3, rsz);
+                       if (asz != iend - iesc) {
+                               rsz = buf->sz - iend;
+                               if (asz < iend - iesc)
+                                       memmove(buf->buf + iesc + asz,
+                                           buf->buf + iend, rsz);
+                               buf->sz = iesc + asz + rsz;
+                               buf->buf = mandoc_realloc(buf->buf, buf->sz);
+                               if (asz > iend - iesc)
+                                       memmove(buf->buf + iesc + asz,
+                                           buf->buf + iend, rsz);
                        }
-                       for (npos = 0; npos < ctx->argc; npos++) {
-                               if (npos)
-                                       *stesc++ = ' ';
+                       dst = buf->buf + iesc;
+                       for (argi = 0; argi < ctx->argc; argi++) {
+                               if (argi)
+                                       *dst++ = ' ';
                                if (quote_args)
-                                       *stesc++ = '"';
-                               cp = ctx->argv[npos];
-                               while (*cp != '\0')
-                                       *stesc++ = *cp++;
+                                       *dst++ = '"';
+                               src = ctx->argv[argi];
+                               while (*src != '\0')
+                                       *dst++ = *src++;
                                if (quote_args)
-                                       *stesc++ = '"';
+                                       *dst++ = '"';
                        }
                        continue;
+               case 'A':
+                       ubuf[0] = iendarg > iarg ? '1' : '0';
+                       ubuf[1] = '\0';
+                       res = ubuf;
+                       break;
                case 'B':
                        npos = 0;
-                       ubuf[0] = arg_complete &&
-                           roff_evalnum(r, ln, stnam, &npos,
-                             NULL, ROFFNUM_SCALE) &&
-                           stnam + npos + 1 == cp ? '1' : '0';
+                       ubuf[0] = iendarg > iarg && iend > iendarg &&
+                           roff_evalnum(r, ln, buf->buf + iarg, &npos,
+                                        NULL, ROFFNUM_SCALE) &&
+                           npos == iendarg - iarg ? '1' : '0';
                        ubuf[1] = '\0';
+                       res = ubuf;
+                       break;
+               case 'V':
+                       mandoc_msg(MANDOCERR_UNSUPP, ln, iesc,
+                           "%.*s", iend - iesc, buf->buf + iesc);
+                       roff_expand_patch(buf, iendarg, "}", iend);
+                       roff_expand_patch(buf, iesc, "${", iarg);
+                       continue;
+               case 'g':
                        break;
                case 'n':
-                       if (arg_complete)
+                       if (iendarg > iarg)
                                (void)snprintf(ubuf, sizeof(ubuf), "%d",
-                                   roff_getregn(r, stnam, naml, sign));
+                                   roff_getregn(r, buf->buf + iarg,
+                                   iendarg - iarg, buf->buf[inam + 1]));
                        else
                                ubuf[0] = '\0';
+                       res = ubuf;
                        break;
                case 'w':
-                       /* use even incomplete args */
-                       (void)snprintf(ubuf, sizeof(ubuf), "%d",
-                           24 * (int)naml);
+                       (void)snprintf(ubuf, sizeof(ubuf),
+                           "%d", (iendarg - iarg) * 24);
+                       res = ubuf;
+                       break;
+               default:
                        break;
                }
-
-               if (res == NULL) {
-                       if (*esct == '*')
-                               mandoc_msg(MANDOCERR_STR_UNDEF,
-                                   ln, (int)(stesc - buf->buf),
-                                   "%.*s", (int)naml, stnam);
+               if (res == NULL)
                        res = "";
-               } else if (buf->sz + strlen(res) > SHRT_MAX) {
-                       mandoc_msg(MANDOCERR_ROFFLOOP,
-                           ln, (int)(stesc - buf->buf), NULL);
+               if (++expand_count > EXPAND_LIMIT ||
+                   buf->sz + strlen(res) > SHRT_MAX) {
+                       mandoc_msg(MANDOCERR_ROFFLOOP, ln, iesc, NULL);
                        return ROFF_IGN;
                }
-
-               /* Replace the escape sequence by the string. */
-
-               *stesc = '\0';
-               buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
-                   buf->buf, res, cp) + 1;
-
-               /* Prepare for the next replacement. */
-
-               start = nbuf + pos;
-               stesc = nbuf + (stesc - buf->buf) + strlen(res);
-               free(buf->buf);
-               buf->buf = nbuf;
+               roff_expand_patch(buf, iesc, res, iend);
        }
        return ROFF_CONT;
 }
 
+/*
+ * Replace the substring from the start position (inclusive)
+ * to end position (exclusive) with the repl(acement) string.
+ */
+static void
+roff_expand_patch(struct buf *buf, int start, const char *repl, int end)
+{
+       char    *nbuf;
+
+       buf->sz = mandoc_asprintf(&nbuf, "%.*s%s%s", start, buf->buf,
+           repl, buf->buf + end) + 1;
+       free(buf->buf);
+       buf->buf = nbuf;
+}
+
 /*
  * Parse a quoted or unquoted roff-style request or macro argument.
  * Return a pointer to the parsed argument, which is either the original
@@ -1854,7 +1805,12 @@ roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len)
                assert(e == ROFF_CONT);
        }
 
-       /* Expand some escape sequences. */
+       /* Handle comments and escape sequences. */
+
+       e = roff_parse_comment(r, buf, ln, pos, r->escape);
+       if ((e & ROFF_MASK) == ROFF_IGN)
+               return e;
+       assert(e == ROFF_CONT);
 
        e = roff_expand(r, buf, ln, pos, r->escape);
        if ((e & ROFF_MASK) == ROFF_IGN)
@@ -1904,7 +1860,6 @@ roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len)
        /*
         * If a scope is open, go to the child handler for that macro,
         * as it may want to preprocess before doing anything with it.
-        * Don't do so if an equation is open.
         */
 
        if (r->last) {
@@ -1912,19 +1867,27 @@ roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len)
                return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
        }
 
-       /* No scope is open.  This is a new request or macro. */
-
        r->options &= ~MPARSE_COMMENT;
        spos = pos;
        t = roff_parse(r, buf->buf, &pos, ln, ppos);
+       return roff_req_or_macro(r, t, buf, ln, spos, pos, offs);
+}
 
-       /* Tables ignore most macros. */
+/*
+ * Handle a new request or macro.
+ * May be called outside any scope or from inside a conditional scope.
+ */
+static int
+roff_req_or_macro(ROFF_ARGS) {
 
-       if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS ||
-           t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) {
+       /* For now, tables ignore most macros and some request. */
+
+       if (r->tbl != NULL && (tok == TOKEN_NONE || tok == ROFF_TS ||
+           tok == ROFF_br || tok == ROFF_ce || tok == ROFF_rj ||
+           tok == ROFF_sp)) {
                mandoc_msg(MANDOCERR_TBLMACRO,
-                   ln, pos, "%s", buf->buf + spos);
-               if (t != TOKEN_NONE)
+                   ln, ppos, "%s", buf->buf + ppos);
+               if (tok != TOKEN_NONE)
                        return ROFF_IGN;
                while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
                        pos++;
@@ -1937,9 +1900,9 @@ roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len)
 
        /* For now, let high level macros abort .ce mode. */
 
-       if (ctl && roffce_node != NULL &&
-           (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
-            t == ROFF_TH || t == ROFF_TS)) {
+       if (roffce_node != NULL &&
+           (tok == TOKEN_NONE || tok == ROFF_Dd || tok == ROFF_EQ ||
+            tok == ROFF_TH || tok == ROFF_TS)) {
                r->man->last = roffce_node;
                r->man->next = ROFF_NEXT_SIBLING;
                roffce_lines = 0;
@@ -1951,12 +1914,12 @@ roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len)
         * Let the standard macro set parsers handle it.
         */
 
-       if (t == TOKEN_NONE)
+       if (tok == TOKEN_NONE)
                return ROFF_CONT;
 
-       /* Execute a roff request or a user defined macro. */
+       /* Execute a roff request or a user-defined macro. */
 
-       return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
+       return (*roffs[tok].proc)(r, tok, buf, ln, ppos, pos, offs);
 }
 
 /*
@@ -1999,8 +1962,10 @@ roff_endparse(struct roff *r)
 }
 
 /*
- * Parse a roff node's type from the input buffer.  This must be in the
- * form of ".foo xxx" in the usual way.
+ * Parse the request or macro name at buf[*pos].
+ * Return ROFF_RENAMED, ROFF_USERDEF, or a ROFF_* token value.
+ * For empty, undefined, mdoc(7), and man(7) macros, return TOKEN_NONE.
+ * As a side effect, set r->current_string to the definition or to NULL.
  */
 static enum roff_tok
 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
@@ -2275,12 +2240,8 @@ roff_block_sub(ROFF_ARGS)
        int             i, j;
 
        /*
-        * First check whether a custom macro exists at this level.  If
-        * it does, then check against it.  This is some of groff's
-        * stranger behaviours.  If we encountered a custom end-scope
-        * tag and that tag also happens to be a "real" macro, then we
-        * need to try interpreting it again as a real macro.  If it's
-        * not, then return ignore.  Else continue.
+        * If a custom end marker is a user-defined or predefined macro
+        * or a request, interpret it.
         */
 
        if (r->last->end) {
@@ -2306,20 +2267,17 @@ roff_block_sub(ROFF_ARGS)
                }
        }
 
-       /*
-        * If we have no custom end-query or lookup failed, then try
-        * pulling it out of the hashtable.
-        */
+       /* Handle the standard end marker. */
 
        t = roff_parse(r, buf->buf, &pos, ln, ppos);
+       if (t == ROFF_cblock)
+               return roff_cblock(r, t, buf, ln, ppos, pos, offs);
 
-       if (t != ROFF_cblock) {
-               if (tok != ROFF_ig)
-                       roff_setstr(r, r->last->name, buf->buf + ppos, 2);
-               return ROFF_IGN;
-       }
+       /* Not an end marker, so append the line to the block. */
 
-       return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
+       if (tok != ROFF_ig)
+               roff_setstr(r, r->last->name, buf->buf + ppos, 2);
+       return ROFF_IGN;
 }
 
 static int
@@ -2399,27 +2357,18 @@ static int
 roff_cond_sub(ROFF_ARGS)
 {
        struct roffnode *bl;
-       int              irc, rr;
+       int              irc, rr, spos;
        enum roff_tok    t;
 
        rr = 0;  /* If arguments follow "\}", skip them. */
        irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
+       spos = pos;
        t = roff_parse(r, buf->buf, &pos, ln, ppos);
 
-       /* For now, let high level macros abort .ce mode. */
-
-       if (roffce_node != NULL &&
-           (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
-             t == ROFF_TH || t == ROFF_TS)) {
-               r->man->last = roffce_node;
-               r->man->next = ROFF_NEXT_SIBLING;
-               roffce_lines = 0;
-               roffce_node = NULL;
-       }
-
        /*
-        * Fully handle known macros when they are structurally
-        * required or when the conditional evaluated to true.
+        * Handle requests and macros if the conditional evaluated
+        * to true or if they are structurally required.
+        * The .break request is always handled specially.
         */
 
        if (t == ROFF_break) {
@@ -2432,11 +2381,11 @@ roff_cond_sub(ROFF_ARGS)
                                        break;
                        }
                }
-       } else if (t != TOKEN_NONE &&
-           (rr || roffs[t].flags & ROFFMAC_STRUCT))
-               irc |= (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
-       else
-               irc |= rr ? ROFF_CONT : ROFF_IGN;
+       } else if (rr || (t < TOKEN_NONE && roffs[t].flags & ROFFMAC_STRUCT)) {
+               irc |= roff_req_or_macro(r, t, buf, ln, spos, pos, offs);
+               if (irc & ROFF_WHILE)
+                       irc &= ~(ROFF_LOOPCONT | ROFF_LOOPEXIT);
+       }
        return irc;
 }
 
@@ -3729,6 +3678,54 @@ roff_eo(ROFF_ARGS)
        return ROFF_IGN;
 }
 
+static int
+roff_mc(ROFF_ARGS)
+{
+       struct roff_node        *n;
+       char                    *cp;
+
+       /* Parse the first argument. */
+
+       cp = buf->buf + pos;
+       if (*cp != '\0')
+               cp++;
+       if (buf->buf[pos] == '\\') {
+               switch (mandoc_escape((const char **)&cp, NULL, NULL)) {
+               case ESCAPE_SPECIAL:
+               case ESCAPE_UNICODE:
+               case ESCAPE_NUMBERED:
+                       break;
+               default:
+                       *cp = '\0';
+                       mandoc_msg(MANDOCERR_MC_ESC, ln, pos,
+                           "mc %s", buf->buf + pos);
+                       buf->buf[pos] = '\0';
+                       break;
+               }
+       }
+
+       /* Ignore additional arguments. */
+
+       while (*cp == ' ')
+               *cp++ = '\0';
+       if (*cp != '\0') {
+               mandoc_msg(MANDOCERR_MC_DIST, ln, (int)(cp - buf->buf),
+                   "mc ... %s", cp);
+               *cp = '\0';
+       }
+
+       /* Create the .mc node. */
+
+       roff_elem_alloc(r->man, ln, ppos, tok);
+       n = r->man->last;
+       if (buf->buf[pos] != '\0')
+               roff_word_alloc(r->man, ln, pos, buf->buf + pos);
+       n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
+       r->man->last = n;
+       r->man->next = ROFF_NEXT_SIBLING;
+       return ROFF_IGN;
+}
+
 static int
 roff_nop(ROFF_ARGS)
 {
@@ -3743,7 +3740,6 @@ roff_tr(ROFF_ARGS)
 {
        const char      *p, *first, *second;
        size_t           fsz, ssz;
-       enum mandoc_esc  esc;
 
        p = buf->buf + pos;
 
@@ -3757,23 +3753,15 @@ roff_tr(ROFF_ARGS)
 
                first = p++;
                if (*first == '\\') {
-                       esc = mandoc_escape(&p, NULL, NULL);
-                       if (esc == ESCAPE_ERROR) {
-                               mandoc_msg(MANDOCERR_ESC_BAD, ln,
-                                   (int)(p - buf->buf), "%s", first);
+                       if (mandoc_escape(&p, NULL, NULL) == ESCAPE_ERROR)
                                return ROFF_IGN;
-                       }
                        fsz = (size_t)(p - first);
                }
 
                second = p++;
                if (*second == '\\') {
-                       esc = mandoc_escape(&p, NULL, NULL);
-                       if (esc == ESCAPE_ERROR) {
-                               mandoc_msg(MANDOCERR_ESC_BAD, ln,
-                                   (int)(p - buf->buf), "%s", second);
+                       if (mandoc_escape(&p, NULL, NULL) == ESCAPE_ERROR)
                                return ROFF_IGN;
-                       }
                        ssz = (size_t)(p - second);
                } else if (*second == '\0') {
                        mandoc_msg(MANDOCERR_TR_ODD, ln,
@@ -3870,8 +3858,9 @@ static int
 roff_shift(ROFF_ARGS)
 {
        struct mctx     *ctx;
-       int              levels, i;
+       int              argpos, levels, i;
 
+       argpos = pos;
        levels = 1;
        if (buf->buf[pos] != '\0' &&
            roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) {
@@ -3886,9 +3875,13 @@ roff_shift(ROFF_ARGS)
        ctx = r->mstack + r->mstackpos;
        if (levels > ctx->argc) {
                mandoc_msg(MANDOCERR_SHIFT,
-                   ln, pos, "%d, but max is %d", levels, ctx->argc);
+                   ln, argpos, "%d, but max is %d", levels, ctx->argc);
                levels = ctx->argc;
        }
+       if (levels < 0) {
+               mandoc_msg(MANDOCERR_ARG_NEG, ln, argpos, "shift %d", levels);
+               levels = 0;
+       }
        if (levels == 0)
                return ROFF_IGN;
        for (i = 0; i < levels; i++)
@@ -4016,7 +4009,7 @@ static size_t
 roff_getname(struct roff *r, char **cpp, int ln, int pos)
 {
        char     *name, *cp;
-       size_t    namesz;
+       int       namesz, inam, iend;
 
        name = *cpp;
        if (*name == '\0')
@@ -4024,24 +4017,46 @@ roff_getname(struct roff *r, char **cpp, int ln, int pos)
 
        /* Advance cp to the byte after the end of the name. */
 
-       for (cp = name; 1; cp++) {
-               namesz = cp - name;
+       cp = name;
+       namesz = 0;
+       for (;;) {
                if (*cp == '\0')
                        break;
                if (*cp == ' ' || *cp == '\t') {
                        cp++;
                        break;
                }
-               if (*cp != '\\')
+               if (*cp != '\\') {
+                       if (name + namesz < cp) {
+                               name[namesz] = *cp;
+                               *cp = ' ';
+                       }
+                       namesz++;
+                       cp++;
                        continue;
+               }
                if (cp[1] == '{' || cp[1] == '}')
                        break;
-               if (*++cp == '\\')
-                       continue;
-               mandoc_msg(MANDOCERR_NAMESC, ln, pos,
-                   "%.*s", (int)(cp - name + 1), name);
-               mandoc_escape((const char **)&cp, NULL, NULL);
-               break;
+               if (roff_escape(cp, 0, 0, NULL, &inam,
+                   NULL, NULL, &iend) != ESCAPE_UNDEF) {
+                       mandoc_msg(MANDOCERR_NAMESC, ln, pos,
+                           "%.*s%.*s", namesz, name, iend, cp);
+                       cp += iend;
+                       break;
+               }
+
+               /*
+                * In an identifier, \\, \., \G and so on
+                * are reduced to \, ., G and so on,
+                * vaguely similar to copy mode.
+                */
+
+               name[namesz++] = cp[inam];
+               while (iend--) {
+                       if (cp >= name + namesz)
+                               *cp = ' ';
+                       cp++;
+               }
        }
 
        /* Read past spaces. */