-/* $Id: roff.c,v 1.207 2014/04/20 16:46:05 schwarze Exp $ */
+/* $Id: roff.c,v 1.218 2014/07/06 18:46:55 schwarze Exp $ */
/*
* Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
static void roff_free1(struct roff *);
static void roff_freereg(struct roffreg *);
static void roff_freestr(struct roffkv *);
-static char *roff_getname(struct roff *, char **, int, int);
+static size_t roff_getname(struct roff *, char **, int, int);
static int roff_getnum(const char *, int *, int *);
static int roff_getop(const char *, int *, char *);
static int roff_getregn(const struct roff *,
static enum rofferr roff_nr(ROFF_ARGS);
static void roff_openeqn(struct roff *, const char *,
int, int, const char *);
-static enum rofft roff_parse(struct roff *, const char *, int *);
+static enum rofft roff_parse(struct roff *, char *, int *,
+ int, int);
static enum rofferr roff_parsetext(char **, size_t *, int, int *);
static enum rofferr roff_res(struct roff *,
char **, size_t *, int, int);
static enum rofferr
roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
{
- char ubuf[12]; /* buffer to print the number */
+ char ubuf[24]; /* buffer to print the number */
const char *start; /* start of the string to process */
- const char *stesc; /* start of an escape sequence ('\\') */
+ char *stesc; /* start of an escape sequence ('\\') */
const char *stnam; /* start of the name, after "[(*" */
const char *cp; /* end of the name, e.g. before ']' */
const char *res; /* the string to be substituted */
char *nbuf; /* new buffer to copy bufp to */
size_t maxl; /* expected length of the escape name */
size_t naml; /* actual length of the escape name */
- size_t ressz; /* size of the replacement string */
int expand_count; /* to avoid infinite loops */
int npos; /* position in numeric expression */
- int irc; /* return code from roff_evalnum() */
+ int arg_complete; /* argument not interrupted by eol */
char term; /* character terminating the escape */
expand_count = 0;
break;
if (0 == (stesc - cp) % 2) {
- stesc = cp;
+ stesc = (char *)cp;
continue;
}
/* Advance to the end of the name. */
+ arg_complete = 1;
for (naml = 0; 0 == maxl || naml < maxl; naml++, cp++) {
if ('\0' == *cp) {
mandoc_msg(MANDOCERR_BADESCAPE, r->parse,
ln, (int)(stesc - *bufp), NULL);
+ arg_complete = 0;
break;
}
if (0 == maxl && *cp == term) {
switch (stesc[1]) {
case '*':
- res = roff_getstrn(r, stnam, naml);
+ if (arg_complete)
+ res = roff_getstrn(r, stnam, naml);
break;
case 'B':
npos = 0;
- irc = roff_evalnum(stnam, &npos, NULL, 0);
- ubuf[0] = irc && stnam + npos + 1 == cp
- ? '1' : '0';
+ ubuf[0] = arg_complete &&
+ roff_evalnum(stnam, &npos, NULL, 0) &&
+ stnam + npos + 1 == cp ? '1' : '0';
ubuf[1] = '\0';
break;
case 'n':
- snprintf(ubuf, sizeof(ubuf), "%d",
- roff_getregn(r, stnam, naml));
+ if (arg_complete)
+ (void)snprintf(ubuf, sizeof(ubuf), "%d",
+ roff_getregn(r, stnam, naml));
+ else
+ ubuf[0] = '\0';
break;
case 'w':
- snprintf(ubuf, sizeof(ubuf), "%d",
+ /* use even incomplete args */
+ (void)snprintf(ubuf, sizeof(ubuf), "%d",
24 * (int)naml);
break;
}
ln, (int)(stesc - *bufp), NULL);
res = "";
}
- ressz = strlen(res);
/* Replace the escape sequence by the string. */
- *szp += ressz + 1;
- nbuf = mandoc_malloc(*szp);
-
- strlcpy(nbuf, *bufp, (size_t)(stesc - *bufp + 1));
- strlcat(nbuf, res, *szp);
- strlcat(nbuf, cp, *szp);
+ *stesc = '\0';
+ *szp = mandoc_asprintf(&nbuf, "%s%s%s",
+ *bufp, res, cp) + 1;
/* Prepare for the next replacement. */
start = nbuf + pos;
- stesc = nbuf + (stesc - *bufp) + ressz;
+ stesc = nbuf + (stesc - *bufp) + strlen(res);
free(*bufp);
*bufp = nbuf;
}
* the compilers handle it.
*/
- if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
+ if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos, ln, ppos)))
return(ROFF_CONT);
assert(roffs[t].proc);
* form of ".foo xxx" in the usual way.
*/
static enum rofft
-roff_parse(struct roff *r, const char *buf, int *pos)
+roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
{
+ char *cp;
const char *mac;
size_t maclen;
enum rofft t;
- if ('\0' == buf[*pos] || '"' == buf[*pos] ||
- '\t' == buf[*pos] || ' ' == buf[*pos])
- return(ROFF_MAX);
+ cp = buf + *pos;
- /* We stop the macro parse at an escape, tab, space, or nil. */
+ if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
+ return(ROFF_MAX);
- mac = buf + *pos;
- maclen = strcspn(mac, " \\\t\0");
+ mac = cp;
+ maclen = roff_getname(r, &cp, ln, ppos);
t = (r->current_string = roff_getstrn(r, mac, maclen))
? ROFF_USERDEF : roffhash_find(mac, maclen);
- *pos += (int)maclen;
-
- while (buf[*pos] && ' ' == buf[*pos])
- (*pos)++;
+ if (ROFF_MAX != t)
+ *pos = cp - buf;
return(t);
}
}
if ((*bufp)[pos])
- mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
+ mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
+ ".. %s", *bufp + pos);
roffnode_pop(r);
roffnode_cleanscope(r);
static enum rofferr
roff_block(ROFF_ARGS)
{
- int sv;
- size_t sz;
- char *name;
+ char *name, *cp;
+ size_t namesz;
- name = NULL;
+ name = cp = *bufp + pos;
+ namesz = 0;
if (ROFF_ig != tok) {
- if ('\0' == (*bufp)[pos]) {
- mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
+ if ('\0' == *cp) {
+ mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
+ ln, ppos, roffs[tok].name);
return(ROFF_IGN);
}
if (ROFF_de1 == tok)
tok = ROFF_de;
- if (ROFF_de == tok)
- name = *bufp + pos;
- else
+ else if (ROFF_de != tok)
mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos,
roffs[tok].name);
- while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
- pos++;
-
- while (isspace((unsigned char)(*bufp)[pos]))
- (*bufp)[pos++] = '\0';
- }
+ namesz = roff_getname(r, &cp, ln, ppos);
+ name[namesz] = '\0';
+ } else
+ name = NULL;
roffnode_push(r, tok, name, ln, ppos);
* appended from roff_block_text() in multiline mode.
*/
- if (ROFF_de == tok)
- roff_setstr(r, name, "", 0);
+ if (namesz && ROFF_de == tok)
+ roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
- if ('\0' == (*bufp)[pos])
+ if ('\0' == *cp)
return(ROFF_IGN);
/* If present, process the custom end-of-line marker. */
- sv = pos;
- while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
- pos++;
+ name = cp;
+ namesz = roff_getname(r, &cp, ln, ppos);
+ if (namesz)
+ r->last->end = mandoc_strndup(name, namesz);
- /*
- * Note: groff does NOT like escape characters in the input.
- * Instead of detecting this, we're just going to let it fly and
- * to hell with it.
- */
-
- assert(pos > sv);
- sz = (size_t)(pos - sv);
-
- if (1 == sz && '.' == (*bufp)[sv])
- return(ROFF_IGN);
-
- r->last->end = mandoc_malloc(sz + 1);
-
- memcpy(r->last->end, *bufp + sv, sz);
- r->last->end[(int)sz] = '\0';
-
- if ((*bufp)[pos])
- mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
+ if ('\0' != *cp)
+ mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
+ ln, pos, ".%s ... %s", roffs[tok].name, cp);
return(ROFF_IGN);
}
i++;
pos = i;
- if (ROFF_MAX != roff_parse(r, *bufp, &pos))
+ if (ROFF_MAX != roff_parse(r, *bufp, &pos, ln, ppos))
return(ROFF_RERUN);
return(ROFF_IGN);
}
* pulling it out of the hashtable.
*/
- t = roff_parse(r, *bufp, &pos);
+ t = roff_parse(r, *bufp, &pos, ln, ppos);
/*
* Macros other than block-end are only significant
rr = r->last->rule;
roffnode_cleanscope(r);
- t = roff_parse(r, *bufp, &pos);
+ t = roff_parse(r, *bufp, &pos, ln, ppos);
/*
* Fully handle known macros when they are structurally
*/
if ('\0' == (*bufp)[pos])
- mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
+ mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
+ ln, ppos, roffs[tok].name);
r->last->endspan = 1;
static enum rofferr
roff_ds(ROFF_ARGS)
{
- char *name, *string;
+ char *string;
+ const char *name;
+ size_t namesz;
/*
- * A symbol is named by the first word following the macro
- * invocation up to a space. Its value is anything after the
- * name's trailing whitespace and optional double-quote. Thus,
- *
- * [.ds foo "bar " ]
- *
- * will have `bar " ' as its value.
+ * The first word is the name of the string.
+ * If it is empty or terminated by an escape sequence,
+ * abort the `ds' request without defining anything.
*/
- string = *bufp + pos;
- name = roff_getname(r, &string, ln, pos);
+ name = string = *bufp + pos;
if ('\0' == *name)
return(ROFF_IGN);
- /* Read past initial double-quote. */
+ namesz = roff_getname(r, &string, ln, pos);
+ if ('\\' == name[namesz])
+ return(ROFF_IGN);
+
+ /* Read past the initial double-quote, if any. */
if ('"' == *string)
string++;
/* The rest is the value. */
- roff_setstr(r, name, string, ROFF_as == tok);
+ roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
+ ROFF_as == tok);
return(ROFF_IGN);
}
static enum rofferr
roff_nr(ROFF_ARGS)
{
- const char *key;
- char *val;
+ char *key, *val;
+ size_t keysz;
int iv;
char sign;
- val = *bufp + pos;
- key = roff_getname(r, &val, ln, pos);
+ key = val = *bufp + pos;
+ if ('\0' == *key)
+ return(ROFF_IGN);
+
+ keysz = roff_getname(r, &val, ln, pos);
+ if ('\\' == key[keysz])
+ return(ROFF_IGN);
+ key[keysz] = '\0';
sign = *val;
if ('+' == sign || '-' == sign)
roff_rr(ROFF_ARGS)
{
struct roffreg *reg, **prev;
- const char *name;
- char *cp;
+ char *name, *cp;
+ size_t namesz;
- cp = *bufp + pos;
- name = roff_getname(r, &cp, ln, pos);
+ name = cp = *bufp + pos;
+ if ('\0' == *name)
+ return(ROFF_IGN);
+ namesz = roff_getname(r, &cp, ln, pos);
+ name[namesz] = '\0';
prev = &r->regtab;
while (1) {
{
const char *name;
char *cp;
+ size_t namesz;
cp = *bufp + pos;
while ('\0' != *cp) {
- name = roff_getname(r, &cp, ln, (int)(cp - *bufp));
- if ('\0' != *name)
- roff_setstr(r, name, NULL, 0);
+ name = cp;
+ namesz = roff_getname(r, &cp, ln, (int)(cp - *bufp));
+ roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
+ if ('\\' == name[namesz])
+ break;
}
return(ROFF_IGN);
}
{
char *name;
- mandoc_msg(MANDOCERR_SO, r->parse, ln, ppos, NULL);
+ name = *bufp + pos;
+ mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, ".so %s", name);
/*
* Handle `so'. Be EXTREMELY careful, as we shouldn't be
* or using absolute paths.
*/
- name = *bufp + pos;
if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
- mandoc_msg(MANDOCERR_SOPATH, r->parse, ln, pos, NULL);
+ mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
+ ".so %s", name);
return(ROFF_ERR);
}
cp += 2;
continue;
}
-
- *szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
- n2 = mandoc_malloc(*szp);
-
- strlcpy(n2, n1, (size_t)(cp - n1 + 1));
- strlcat(n2, arg[i], *szp);
- strlcat(n2, cp + 3, *szp);
-
+ *cp = '\0';
+ *szp = mandoc_asprintf(&n2, "%s%s%s",
+ n1, arg[i], cp + 3) + 1;
cp = n2 + (cp - n1);
free(n1);
n1 = n2;
ROFF_REPARSE : ROFF_APPEND);
}
-static char *
+static size_t
roff_getname(struct roff *r, char **cpp, int ln, int pos)
{
char *name, *cp;
+ size_t namesz;
name = *cpp;
if ('\0' == *name)
- return(name);
+ return(0);
- /* Read until end of name. */
- for (cp = name; '\0' != *cp && ' ' != *cp; cp++) {
+ /* Read until end of name and terminate it with NUL. */
+ for (cp = name; 1; cp++) {
+ if ('\0' == *cp || ' ' == *cp) {
+ namesz = cp - name;
+ break;
+ }
if ('\\' != *cp)
continue;
+ namesz = cp - name;
+ if ('{' == cp[1] || '}' == cp[1])
+ break;
cp++;
if ('\\' == *cp)
continue;
mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL);
- *cp = '\0';
- name = cp;
+ mandoc_escape((const char **)&cp, NULL, NULL);
+ break;
}
- /* Nil-terminate name. */
- if ('\0' != *cp)
- *(cp++) = '\0';
-
/* Read past spaces. */
while (' ' == *cp)
cp++;
*cpp = cp;
- return(name);
+ return(namesz);
}
/*
/* Search for an existing string with the same name. */
n = *r;
- while (n && strcmp(name, n->key.p))
+ while (n && (namesz != n->key.sz ||
+ strncmp(n->key.p, name, namesz)))
n = n->next;
if (NULL == n) {