-/* $Id: roff.c,v 1.211 2014/06/29 21:20:31 schwarze Exp $ */
+/* $Id: roff.c,v 1.221 2014/07/07 21:36:20 schwarze Exp $ */
/*
* Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
static void roff_free1(struct roff *);
static void roff_freereg(struct roffreg *);
static void roff_freestr(struct roffkv *);
-static char *roff_getname(struct roff *, char **, int, int);
+static size_t roff_getname(struct roff *, char **, int, int);
static int roff_getnum(const char *, int *, int *);
static int roff_getop(const char *, int *, char *);
static int roff_getregn(const struct roff *,
static enum rofferr roff_nr(ROFF_ARGS);
static void roff_openeqn(struct roff *, const char *,
int, int, const char *);
-static enum rofft roff_parse(struct roff *, const char *, int *);
+static enum rofft roff_parse(struct roff *, char *, int *,
+ int, int);
static enum rofferr roff_parsetext(char **, size_t *, int, int *);
static enum rofferr roff_res(struct roff *,
char **, size_t *, int, int);
size_t naml; /* actual length of the escape name */
int expand_count; /* to avoid infinite loops */
int npos; /* position in numeric expression */
- int irc; /* return code from roff_evalnum() */
+ int arg_complete; /* argument not interrupted by eol */
char term; /* character terminating the escape */
expand_count = 0;
break;
default:
if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))
- mandoc_msg(MANDOCERR_BADESCAPE, r->parse,
- ln, (int)(stesc - *bufp), NULL);
+ mandoc_vmsg(MANDOCERR_ESC_BAD,
+ r->parse, ln, (int)(stesc - *bufp),
+ "%.*s", (int)(cp - stesc), stesc);
continue;
}
/* Advance to the end of the name. */
+ arg_complete = 1;
for (naml = 0; 0 == maxl || naml < maxl; naml++, cp++) {
if ('\0' == *cp) {
- mandoc_msg(MANDOCERR_BADESCAPE, r->parse,
- ln, (int)(stesc - *bufp), NULL);
+ mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
+ ln, (int)(stesc - *bufp), stesc);
+ arg_complete = 0;
break;
}
if (0 == maxl && *cp == term) {
switch (stesc[1]) {
case '*':
- res = roff_getstrn(r, stnam, naml);
+ if (arg_complete)
+ res = roff_getstrn(r, stnam, naml);
break;
case 'B':
npos = 0;
- irc = roff_evalnum(stnam, &npos, NULL, 0);
- ubuf[0] = irc && stnam + npos + 1 == cp
- ? '1' : '0';
+ ubuf[0] = arg_complete &&
+ roff_evalnum(stnam, &npos, NULL, 0) &&
+ stnam + npos + 1 == cp ? '1' : '0';
ubuf[1] = '\0';
break;
case 'n':
- (void)snprintf(ubuf, sizeof(ubuf), "%d",
- roff_getregn(r, stnam, naml));
+ if (arg_complete)
+ (void)snprintf(ubuf, sizeof(ubuf), "%d",
+ roff_getregn(r, stnam, naml));
+ else
+ ubuf[0] = '\0';
break;
case 'w':
+ /* use even incomplete args */
(void)snprintf(ubuf, sizeof(ubuf), "%d",
24 * (int)naml);
break;
}
if (NULL == res) {
- mandoc_msg(MANDOCERR_BADESCAPE, r->parse,
- ln, (int)(stesc - *bufp), NULL);
+ mandoc_vmsg(MANDOCERR_STR_UNDEF,
+ r->parse, ln, (int)(stesc - *bufp),
+ "%.*s", (int)naml, stnam);
res = "";
}
* the compilers handle it.
*/
- if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
+ if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos, ln, ppos)))
return(ROFF_CONT);
assert(roffs[t].proc);
{
if (r->last)
- mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
- r->last->line, r->last->col, NULL);
+ mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
+ r->last->line, r->last->col,
+ roffs[r->last->tok].name);
if (r->eqn) {
- mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
- r->eqn->eqn.ln, r->eqn->eqn.pos, NULL);
+ mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
+ r->eqn->eqn.ln, r->eqn->eqn.pos, "EQ");
eqn_end(&r->eqn);
}
if (r->tbl) {
- mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
- r->tbl->line, r->tbl->pos, NULL);
+ mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
+ r->tbl->line, r->tbl->pos, "TS");
tbl_end(&r->tbl);
}
}
* form of ".foo xxx" in the usual way.
*/
static enum rofft
-roff_parse(struct roff *r, const char *buf, int *pos)
+roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
{
+ char *cp;
const char *mac;
size_t maclen;
enum rofft t;
- if ('\0' == buf[*pos] || '"' == buf[*pos] ||
- '\t' == buf[*pos] || ' ' == buf[*pos])
- return(ROFF_MAX);
+ cp = buf + *pos;
- /* We stop the macro parse at an escape, tab, space, or nil. */
+ if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
+ return(ROFF_MAX);
- mac = buf + *pos;
- maclen = strcspn(mac, " \\\t\0");
+ mac = cp;
+ maclen = roff_getname(r, &cp, ln, ppos);
t = (r->current_string = roff_getstrn(r, mac, maclen))
? ROFF_USERDEF : roffhash_find(mac, maclen);
- *pos += (int)maclen;
-
- while (buf[*pos] && ' ' == buf[*pos])
- (*pos)++;
+ if (ROFF_MAX != t)
+ *pos = cp - buf;
return(t);
}
*/
if (NULL == r->last) {
- mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
+ mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
+ ln, ppos, "..");
return(ROFF_IGN);
}
switch (r->last->tok) {
case ROFF_am:
+ /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
/* FALLTHROUGH */
case ROFF_ami:
/* FALLTHROUGH */
- case ROFF_am1:
- /* FALLTHROUGH */
case ROFF_de:
/* ROFF_de1 is remapped to ROFF_de in roff_block(). */
/* FALLTHROUGH */
case ROFF_ig:
break;
default:
- mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
+ mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
+ ln, ppos, "..");
return(ROFF_IGN);
}
if ((*bufp)[pos])
- mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
+ mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
+ ".. %s", *bufp + pos);
roffnode_pop(r);
roffnode_cleanscope(r);
{
if (NULL == r->last) {
- mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
+ mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
+ ln, ppos, "\\}");
return;
}
case ROFF_if:
break;
default:
- mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
+ mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
+ ln, ppos, "\\}");
return;
}
if (r->last->endspan > -1) {
- mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
+ mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
+ ln, ppos, "\\}");
return;
}
static enum rofferr
roff_block(ROFF_ARGS)
{
- int sv;
- size_t sz;
- char *name;
+ const char *name;
+ char *iname, *cp;
+ size_t namesz;
- name = NULL;
+ /* Ignore groff compatibility mode for now. */
- if (ROFF_ig != tok) {
- if ('\0' == (*bufp)[pos]) {
- mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
- return(ROFF_IGN);
- }
+ if (ROFF_de1 == tok)
+ tok = ROFF_de;
+ else if (ROFF_am1 == tok)
+ tok = ROFF_am;
- /*
- * Re-write `de1', since we don't really care about
- * groff's strange compatibility mode, into `de'.
- */
+ /* Parse the macro name argument. */
- if (ROFF_de1 == tok)
- tok = ROFF_de;
- if (ROFF_de == tok)
- name = *bufp + pos;
- else
- mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos,
- roffs[tok].name);
+ cp = *bufp + pos;
+ if (ROFF_ig == tok) {
+ iname = NULL;
+ namesz = 0;
+ } else {
+ iname = cp;
+ namesz = roff_getname(r, &cp, ln, ppos);
+ iname[namesz] = '\0';
+ }
+
+ /* Resolve the macro name argument if it is indirect. */
- while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
- pos++;
+ if (namesz && (ROFF_dei == tok || ROFF_ami == tok)) {
+ if (NULL == (name = roff_getstrn(r, iname, namesz))) {
+ mandoc_vmsg(MANDOCERR_STR_UNDEF,
+ r->parse, ln, (int)(iname - *bufp),
+ "%.*s", (int)namesz, iname);
+ namesz = 0;
+ } else
+ namesz = strlen(name);
+ } else
+ name = iname;
- while (isspace((unsigned char)(*bufp)[pos]))
- (*bufp)[pos++] = '\0';
+ if (0 == namesz && ROFF_ig != tok) {
+ mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
+ ln, ppos, roffs[tok].name);
+ return(ROFF_IGN);
}
roffnode_push(r, tok, name, ln, ppos);
* appended from roff_block_text() in multiline mode.
*/
- if (ROFF_de == tok)
- roff_setstr(r, name, "", 0);
+ if (ROFF_de == tok || ROFF_dei == tok)
+ roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
- if ('\0' == (*bufp)[pos])
+ if ('\0' == *cp)
return(ROFF_IGN);
- /* If present, process the custom end-of-line marker. */
-
- sv = pos;
- while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
- pos++;
+ /* Get the custom end marker. */
- /*
- * Note: groff does NOT like escape characters in the input.
- * Instead of detecting this, we're just going to let it fly and
- * to hell with it.
- */
+ iname = cp;
+ namesz = roff_getname(r, &cp, ln, ppos);
- assert(pos > sv);
- sz = (size_t)(pos - sv);
+ /* Resolve the end marker if it is indirect. */
- if (1 == sz && '.' == (*bufp)[sv])
- return(ROFF_IGN);
-
- r->last->end = mandoc_malloc(sz + 1);
+ if (namesz && (ROFF_dei == tok || ROFF_ami == tok)) {
+ if (NULL == (name = roff_getstrn(r, iname, namesz))) {
+ mandoc_vmsg(MANDOCERR_STR_UNDEF,
+ r->parse, ln, (int)(iname - *bufp),
+ "%.*s", (int)namesz, iname);
+ namesz = 0;
+ } else
+ namesz = strlen(name);
+ } else
+ name = iname;
- memcpy(r->last->end, *bufp + sv, sz);
- r->last->end[(int)sz] = '\0';
+ if (namesz)
+ r->last->end = mandoc_strndup(name, namesz);
- if ((*bufp)[pos])
- mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
+ if ('\0' != *cp)
+ mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
+ ln, pos, ".%s ... %s", roffs[tok].name, cp);
return(ROFF_IGN);
}
i++;
pos = i;
- if (ROFF_MAX != roff_parse(r, *bufp, &pos))
+ if (ROFF_MAX != roff_parse(r, *bufp, &pos, ln, ppos))
return(ROFF_RERUN);
return(ROFF_IGN);
}
* pulling it out of the hashtable.
*/
- t = roff_parse(r, *bufp, &pos);
+ t = roff_parse(r, *bufp, &pos, ln, ppos);
- /*
- * Macros other than block-end are only significant
- * in `de' blocks; elsewhere, simply throw them away.
- */
if (ROFF_cblock != t) {
- if (ROFF_de == tok)
+ if (ROFF_ig != tok)
roff_setstr(r, r->last->name, *bufp + ppos, 2);
return(ROFF_IGN);
}
roff_block_text(ROFF_ARGS)
{
- if (ROFF_de == tok)
+ if (ROFF_ig != tok)
roff_setstr(r, r->last->name, *bufp + pos, 2);
return(ROFF_IGN);
rr = r->last->rule;
roffnode_cleanscope(r);
- t = roff_parse(r, *bufp, &pos);
+ t = roff_parse(r, *bufp, &pos, ln, ppos);
/*
* Fully handle known macros when they are structurally
*/
if ('\0' == (*bufp)[pos])
- mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
+ mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
+ ln, ppos, roffs[tok].name);
r->last->endspan = 1;
static enum rofferr
roff_ds(ROFF_ARGS)
{
- char *name, *string;
+ char *string;
+ const char *name;
+ size_t namesz;
/*
- * A symbol is named by the first word following the macro
- * invocation up to a space. Its value is anything after the
- * name's trailing whitespace and optional double-quote. Thus,
- *
- * [.ds foo "bar " ]
- *
- * will have `bar " ' as its value.
+ * The first word is the name of the string.
+ * If it is empty or terminated by an escape sequence,
+ * abort the `ds' request without defining anything.
*/
- string = *bufp + pos;
- name = roff_getname(r, &string, ln, pos);
+ name = string = *bufp + pos;
if ('\0' == *name)
return(ROFF_IGN);
- /* Read past initial double-quote. */
+ namesz = roff_getname(r, &string, ln, pos);
+ if ('\\' == name[namesz])
+ return(ROFF_IGN);
+
+ /* Read past the initial double-quote, if any. */
if ('"' == *string)
string++;
/* The rest is the value. */
- roff_setstr(r, name, string, ROFF_as == tok);
+ roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
+ ROFF_as == tok);
return(ROFF_IGN);
}
static enum rofferr
roff_nr(ROFF_ARGS)
{
- const char *key;
- char *val;
+ char *key, *val;
+ size_t keysz;
int iv;
char sign;
- val = *bufp + pos;
- key = roff_getname(r, &val, ln, pos);
+ key = val = *bufp + pos;
+ if ('\0' == *key)
+ return(ROFF_IGN);
+
+ keysz = roff_getname(r, &val, ln, pos);
+ if ('\\' == key[keysz])
+ return(ROFF_IGN);
+ key[keysz] = '\0';
sign = *val;
if ('+' == sign || '-' == sign)
roff_rr(ROFF_ARGS)
{
struct roffreg *reg, **prev;
- const char *name;
- char *cp;
+ char *name, *cp;
+ size_t namesz;
- cp = *bufp + pos;
- name = roff_getname(r, &cp, ln, pos);
+ name = cp = *bufp + pos;
+ if ('\0' == *name)
+ return(ROFF_IGN);
+ namesz = roff_getname(r, &cp, ln, pos);
+ name[namesz] = '\0';
prev = &r->regtab;
while (1) {
{
const char *name;
char *cp;
+ size_t namesz;
cp = *bufp + pos;
while ('\0' != *cp) {
- name = roff_getname(r, &cp, ln, (int)(cp - *bufp));
- if ('\0' != *name)
- roff_setstr(r, name, NULL, 0);
+ name = cp;
+ namesz = roff_getname(r, &cp, ln, (int)(cp - *bufp));
+ roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
+ if ('\\' == name[namesz])
+ break;
}
return(ROFF_IGN);
}
{
if (NULL == r->tbl)
- mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
+ mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
+ ln, ppos, "TE");
else
tbl_end(&r->tbl);
{
if (NULL == r->tbl)
- mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
+ mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
+ ln, ppos, "T&");
else
tbl_restart(ppos, ln, r->tbl);
roff_EN(ROFF_ARGS)
{
- mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
+ mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
return(ROFF_IGN);
}
struct tbl_node *tbl;
if (r->tbl) {
- mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL);
+ mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
+ ln, ppos, "TS breaks TS");
tbl_end(&r->tbl);
}
if ('\\' == *first) {
esc = mandoc_escape(&p, NULL, NULL);
if (ESCAPE_ERROR == esc) {
- mandoc_msg(MANDOCERR_BADESCAPE,
- r->parse, ln,
- (int)(p - *bufp), NULL);
+ mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
+ ln, (int)(p - *bufp), first);
return(ROFF_IGN);
}
fsz = (size_t)(p - first);
if ('\\' == *second) {
esc = mandoc_escape(&p, NULL, NULL);
if (ESCAPE_ERROR == esc) {
- mandoc_msg(MANDOCERR_BADESCAPE,
- r->parse, ln,
- (int)(p - *bufp), NULL);
+ mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
+ ln, (int)(p - *bufp), second);
return(ROFF_IGN);
}
ssz = (size_t)(p - second);
ROFF_REPARSE : ROFF_APPEND);
}
-static char *
+static size_t
roff_getname(struct roff *r, char **cpp, int ln, int pos)
{
char *name, *cp;
+ size_t namesz;
name = *cpp;
if ('\0' == *name)
- return(name);
+ return(0);
- /* Read until end of name. */
- for (cp = name; '\0' != *cp && ' ' != *cp; cp++) {
+ /* Read until end of name and terminate it with NUL. */
+ for (cp = name; 1; cp++) {
+ if ('\0' == *cp || ' ' == *cp) {
+ namesz = cp - name;
+ break;
+ }
if ('\\' != *cp)
continue;
+ namesz = cp - name;
+ if ('{' == cp[1] || '}' == cp[1])
+ break;
cp++;
if ('\\' == *cp)
continue;
mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL);
- *cp = '\0';
- name = cp;
+ mandoc_escape((const char **)&cp, NULL, NULL);
+ break;
}
- /* Nil-terminate name. */
- if ('\0' != *cp)
- *(cp++) = '\0';
-
/* Read past spaces. */
while (' ' == *cp)
cp++;
*cpp = cp;
- return(name);
+ return(namesz);
}
/*