-/* $Id: roff.c,v 1.350 2018/12/14 05:18:03 schwarze Exp $ */
+/* $Id: roff.c,v 1.353 2018/12/18 22:00:02 schwarze Exp $ */
/*
* Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010-2015, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org>
};
struct roff {
- struct mparse *parse; /* parse point */
struct roff_man *man; /* mdoc or man parser */
struct roffnode *last; /* leaf of stack */
struct mctx *mstack; /* stack of macro contexts */
}
struct roff *
-roff_alloc(struct mparse *parse, int options)
+roff_alloc(int options)
{
struct roff *r;
r = mandoc_calloc(1, sizeof(struct roff));
- r->parse = parse;
r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
r->options = options;
r->format = options & (MPARSE_MDOC | MPARSE_MAN);
}
struct roff_man *
-roff_man_alloc(struct roff *roff, struct mparse *parse,
- const char *os_s, int quick)
+roff_man_alloc(struct roff *roff, const char *os_s, int quick)
{
struct roff_man *man;
man = mandoc_calloc(1, sizeof(*man));
- man->parse = parse;
man->roff = roff;
man->os_s = os_s;
man->quick = quick;
struct roff_node *n; /* used for header comments */
const char *start; /* start of the string to process */
char *stesc; /* start of an escape sequence ('\\') */
+ const char *esct; /* type of esccape sequence */
char *ep; /* end of comment string */
const char *stnam; /* start of the name, after "[(*" */
const char *cp; /* end of the name, e.g. before ']' */
size_t naml; /* actual length of the escape name */
size_t asz; /* length of the replacement */
size_t rsz; /* length of the rest of the string */
- enum mandoc_esc esc; /* type of the escape sequence */
int inaml; /* length returned from mandoc_escape() */
int expand_count; /* to avoid infinite loops */
int npos; /* position in numeric expression */
int done; /* no more input available */
int deftype; /* type of definition to paste */
int rcsid; /* kind of RCS id seen */
+ enum mandocerr err; /* for escape sequence problems */
char sign; /* increment number register */
char term; /* character terminating the escape */
term = '\0';
cp = stesc + 1;
- switch (*cp) {
+ if (*cp == 'E')
+ cp++;
+ esct = cp;
+ switch (*esct) {
case '*':
case '$':
res = NULL;
res = ubuf;
break;
default:
- esc = mandoc_escape(&cp, &stnam, &inaml);
- if (esc == ESCAPE_ERROR ||
- (esc == ESCAPE_SPECIAL &&
- mchars_spec2cp(stnam, inaml) < 0))
- mandoc_msg(MANDOCERR_ESC_BAD,
- ln, (int)(stesc - buf->buf),
+ err = MANDOCERR_OK;
+ switch(mandoc_escape(&cp, &stnam, &inaml)) {
+ case ESCAPE_SPECIAL:
+ if (mchars_spec2cp(stnam, inaml) >= 0)
+ break;
+ /* FALLTHROUGH */
+ case ESCAPE_ERROR:
+ err = MANDOCERR_ESC_BAD;
+ break;
+ case ESCAPE_UNDEF:
+ err = MANDOCERR_ESC_UNDEF;
+ break;
+ case ESCAPE_UNSUPP:
+ err = MANDOCERR_ESC_UNSUPP;
+ break;
+ default:
+ break;
+ }
+ if (err != MANDOCERR_OK)
+ mandoc_msg(err, ln, (int)(stesc - buf->buf),
"%.*s", (int)(cp - stesc), stesc);
stesc--;
continue;
cp++;
break;
}
- if (*cp++ != '\\' || stesc[1] != 'w') {
+ if (*cp++ != '\\' || *esct != 'w') {
naml++;
continue;
}
case ESCAPE_SPECIAL:
case ESCAPE_UNICODE:
case ESCAPE_NUMBERED:
+ case ESCAPE_UNDEF:
case ESCAPE_OVERSTRIKE:
naml++;
break;
* undefined, resume searching for escapes.
*/
- switch (stesc[1]) {
+ switch (*esct) {
case '*':
if (arg_complete) {
deftype = ROFFDEF_USER | ROFFDEF_PRE;
break;
}
ctx = r->mstack + r->mstackpos;
- npos = stesc[2] - '1';
+ npos = esct[1] - '1';
if (npos >= 0 && npos <= 8) {
res = npos < ctx->argc ?
ctx->argv[npos] : "";
break;
}
- if (stesc[2] == '*')
+ if (esct[1] == '*')
quote_args = 0;
- else if (stesc[2] == '@')
+ else if (esct[1] == '@')
quote_args = 1;
else {
mandoc_msg(MANDOCERR_ARG_NONUM, ln,
}
if (res == NULL) {
- if (stesc[1] == '*')
+ if (*esct == '*')
mandoc_msg(MANDOCERR_STR_UNDEF,
ln, (int)(stesc - buf->buf),
"%.*s", (int)naml, stnam);
return ROFF_CONT;
}
+/*
+ * Parse a quoted or unquoted roff-style request or macro argument.
+ * Return a pointer to the parsed argument, which is either the original
+ * pointer or advanced by one byte in case the argument is quoted.
+ * NUL-terminate the argument in place.
+ * Collapse pairs of quotes inside quoted arguments.
+ * Advance the argument pointer to the next argument,
+ * or to the NUL byte terminating the argument line.
+ */
+char *
+mandoc_getarg(char **cpp, int ln, int *pos)
+{
+ char *start, *cp;
+ int quoted, pairs, white;
+
+ /* Quoting can only start with a new word. */
+ start = *cpp;
+ quoted = 0;
+ if ('"' == *start) {
+ quoted = 1;
+ start++;
+ }
+
+ pairs = 0;
+ white = 0;
+ for (cp = start; '\0' != *cp; cp++) {
+
+ /*
+ * Move the following text left
+ * after quoted quotes and after "\\" and "\t".
+ */
+ if (pairs)
+ cp[-pairs] = cp[0];
+
+ if ('\\' == cp[0]) {
+ /*
+ * In copy mode, translate double to single
+ * backslashes and backslash-t to literal tabs.
+ */
+ switch (cp[1]) {
+ case 'a':
+ case 't':
+ cp[0] = '\t';
+ /* FALLTHROUGH */
+ case '\\':
+ pairs++;
+ cp++;
+ break;
+ case ' ':
+ /* Skip escaped blanks. */
+ if (0 == quoted)
+ cp++;
+ break;
+ default:
+ break;
+ }
+ } else if (0 == quoted) {
+ if (' ' == cp[0]) {
+ /* Unescaped blanks end unquoted args. */
+ white = 1;
+ break;
+ }
+ } else if ('"' == cp[0]) {
+ if ('"' == cp[1]) {
+ /* Quoted quotes collapse. */
+ pairs++;
+ cp++;
+ } else {
+ /* Unquoted quotes end quoted args. */
+ quoted = 2;
+ break;
+ }
+ }
+ }
+
+ /* Quoted argument without a closing quote. */
+ if (1 == quoted)
+ mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL);
+
+ /* NUL-terminate this argument and move to the next one. */
+ if (pairs)
+ cp[-pairs] = '\0';
+ if ('\0' != *cp) {
+ *cp++ = '\0';
+ while (' ' == *cp)
+ cp++;
+ }
+ *pos += (int)(cp - start) + (quoted ? 1 : 0);
+ *cpp = cp;
+
+ if ('\0' == *cp && (white || ' ' == cp[-1]))
+ mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL);
+
+ return start;
+}
+
+
/*
* Process text streams.
*/
assert(r->eqn == NULL);
if (r->last_eqn == NULL)
- r->last_eqn = eqn_alloc(r->parse);
+ r->last_eqn = eqn_alloc();
else
eqn_reset(r->last_eqn);
r->eqn = r->last_eqn;
mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS");
tbl_end(r->tbl, 0);
}
- r->tbl = tbl_alloc(ppos, ln, r->parse, r->last_tbl);
+ r->tbl = tbl_alloc(ppos, ln, r->last_tbl);
if (r->last_tbl == NULL)
r->first_tbl = r->tbl;
r->last_tbl = r->tbl;
ctx->argv = mandoc_reallocarray(ctx->argv,
ctx->argsz, sizeof(*ctx->argv));
}
- arg = mandoc_getarg(r->parse, &src, ln, &pos);
+ arg = mandoc_getarg(&src, ln, &pos);
sz = 1; /* For the terminating NUL. */
for (ap = arg; *ap != '\0'; ap++)
sz += *ap == '"' ? 4 : 1;