-/* $Id: roff.c,v 1.160 2011/07/27 14:23:27 kristaps Exp $ */
+/* $Id: roff.c,v 1.172 2011/10/24 21:41:45 schwarze Exp $ */
/*
* Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
/* Maximum number of nested if-else conditionals. */
#define RSTACK_MAX 128
+/* Maximum number of string expansions per line, to break infinite loops. */
+#define EXPAND_LIMIT 1000
+
enum rofft {
ROFF_ad,
ROFF_am,
* Registers are assumed to be unsigned ints for now.
*/
struct reg {
- int set; /* whether set or not */
- unsigned int u; /* unsigned integer */
+ int set; /* whether set or not */
+ unsigned int u; /* unsigned integer */
};
+/*
+ * An incredibly-simple string buffer.
+ */
struct roffstr {
- char *key; /* key of symbol */
- char *val; /* current value */
- struct roffstr *next; /* next in list */
+ char *p; /* nil-terminated buffer */
+ size_t sz; /* saved strlen(p) */
+};
+
+/*
+ * A key-value roffstr pair as part of a singly-linked list.
+ */
+struct roffkv {
+ struct roffstr key;
+ struct roffstr val;
+ struct roffkv *next; /* next in list */
};
struct roff {
enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */
int rstackpos; /* position in rstack */
struct reg regs[REG__MAX];
- struct roffstr *first_string; /* user-defined strings & macros */
+ struct roffkv *strtab; /* user-defined strings & macros */
+ struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
+ struct roffstr *xtab; /* single-byte trans table (`tr') */
const char *current_string; /* value of last called user macro */
struct tbl_node *first_tbl; /* first table parsed */
struct tbl_node *last_tbl; /* last table parsed */
static enum rofferr roff_ds(ROFF_ARGS);
static enum roffrule roff_evalcond(const char *, int *);
static void roff_free1(struct roff *);
-static void roff_freestr(struct roff *);
+static void roff_freestr(struct roffkv *);
static char *roff_getname(struct roff *, char **, int, int);
static const char *roff_getstrn(const struct roff *,
const char *, size_t);
static enum rofferr roff_line_ignore(ROFF_ARGS);
static enum rofferr roff_nr(ROFF_ARGS);
-static void roff_openeqn(struct roff *, const char *,
+static void roff_openeqn(struct roff *, const char *,
int, int, const char *);
static enum rofft roff_parse(struct roff *, const char *, int *);
static enum rofferr roff_parsetext(char *);
-static void roff_res(struct roff *,
+static enum rofferr roff_res(struct roff *,
char **, size_t *, int, int);
static enum rofferr roff_rm(ROFF_ARGS);
static void roff_setstr(struct roff *,
const char *, const char *, int);
+static void roff_setstrn(struct roffkv **, const char *,
+ size_t, const char *, size_t, int);
static enum rofferr roff_so(ROFF_ARGS);
+static enum rofferr roff_tr(ROFF_ARGS);
static enum rofferr roff_TE(ROFF_ARGS);
static enum rofferr roff_TS(ROFF_ARGS);
static enum rofferr roff_EQ(ROFF_ARGS);
{ "rm", roff_rm, NULL, NULL, 0, NULL },
{ "so", roff_so, NULL, NULL, 0, NULL },
{ "ta", roff_line_ignore, NULL, NULL, 0, NULL },
- { "tr", roff_line_ignore, NULL, NULL, 0, NULL },
+ { "tr", roff_tr, NULL, NULL, 0, NULL },
{ "TS", roff_TS, NULL, NULL, 0, NULL },
{ "TE", roff_TE, NULL, NULL, 0, NULL },
{ "T&", roff_T_, NULL, NULL, 0, NULL },
{
struct tbl_node *t;
struct eqn_node *e;
+ int i;
while (NULL != (t = r->first_tbl)) {
r->first_tbl = t->next;
while (r->last)
roffnode_pop(r);
- roff_freestr(r);
-}
+ roff_freestr(r->strtab);
+ roff_freestr(r->xmbtab);
+
+ r->strtab = r->xmbtab = NULL;
+
+ if (r->xtab)
+ for (i = 0; i < 128; i++)
+ free(r->xtab[i].p);
+ free(r->xtab);
+ r->xtab = NULL;
+}
void
roff_reset(struct roff *r)
* is processed.
* This also checks the syntax of regular escapes.
*/
-static void
+static enum rofferr
roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
{
enum mandoc_esc esc;
const char *stnam; /* start of the name, after "[(*" */
const char *cp; /* end of the name, e.g. before ']' */
const char *res; /* the string to be substituted */
- int i, maxl;
+ int i, maxl, expand_count;
size_t nsz;
char *n;
+ expand_count = 0;
+
again:
cp = *bufp + pos;
while (NULL != (cp = strchr(cp, '\\'))) {
*/
if ('\0' == *cp)
- return;
+ return(ROFF_CONT);
if ('*' != *cp) {
res = cp;
mandoc_msg
(MANDOCERR_BADESCAPE, r->parse,
ln, (int)(stesc - *bufp), NULL);
- return;
+ return(ROFF_CONT);
}
cp++;
switch (*cp) {
case ('\0'):
- return;
+ return(ROFF_CONT);
case ('('):
cp++;
maxl = 2;
(MANDOCERR_BADESCAPE,
r->parse, ln,
(int)(stesc - *bufp), NULL);
- return;
+ return(ROFF_CONT);
}
if (0 == maxl && ']' == *cp)
break;
/* Replace the escape sequence by the string. */
- pos += (stesc - *bufp);
+ pos = stesc - *bufp;
nsz = *szp + strlen(res) + 1;
n = mandoc_malloc(nsz);
*bufp = n;
*szp = nsz;
- goto again;
+
+ if (EXPAND_LIMIT >= ++expand_count)
+ goto again;
+
+ /* Just leave the string unexpanded. */
+ mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, ln, pos, NULL);
+ return(ROFF_IGN);
}
+ return(ROFF_CONT);
}
/*
static enum rofferr
roff_parsetext(char *p)
{
- char l, r;
size_t sz;
const char *start;
enum mandoc_esc esc;
continue;
}
- l = *(p - 1);
- r = *(p + 1);
- if ('\\' != l &&
- '\t' != r && '\t' != l &&
- ' ' != r && ' ' != l &&
- '-' != r && '-' != l &&
- ! isdigit((unsigned char)l) &&
- ! isdigit((unsigned char)r))
+ if (isalpha((unsigned char)p[-1]) &&
+ isalpha((unsigned char)p[1]))
*p = ASCII_HYPH;
p++;
}
* words to fill in.
*/
- roff_res(r, bufp, szp, ln, pos);
+ e = roff_res(r, bufp, szp, ln, pos);
+ if (ROFF_IGN == e)
+ return(e);
+ assert(ROFF_CONT == e);
ppos = pos;
ctl = mandoc_getcontrol(*bufp, &pos);
return(ROFF_IGN);
}
+/* ARGSUSED */
+static enum rofferr
+roff_tr(ROFF_ARGS)
+{
+ const char *p, *first, *second;
+ size_t fsz, ssz;
+ enum mandoc_esc esc;
+
+ p = *bufp + pos;
+
+ if ('\0' == *p) {
+ mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
+ return(ROFF_IGN);
+ }
+
+ while ('\0' != *p) {
+ fsz = ssz = 1;
+
+ first = p++;
+ if ('\\' == *first) {
+ esc = mandoc_escape(&p, NULL, NULL);
+ if (ESCAPE_ERROR == esc) {
+ mandoc_msg
+ (MANDOCERR_BADESCAPE, r->parse,
+ ln, (int)(p - *bufp), NULL);
+ return(ROFF_IGN);
+ }
+ fsz = (size_t)(p - first);
+ }
+
+ second = p++;
+ if ('\\' == *second) {
+ esc = mandoc_escape(&p, NULL, NULL);
+ if (ESCAPE_ERROR == esc) {
+ mandoc_msg
+ (MANDOCERR_BADESCAPE, r->parse,
+ ln, (int)(p - *bufp), NULL);
+ return(ROFF_IGN);
+ }
+ ssz = (size_t)(p - second);
+ } else if ('\0' == *second) {
+ mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
+ ln, (int)(p - *bufp), NULL);
+ second = " ";
+ p--;
+ }
+
+ if (fsz > 1) {
+ roff_setstrn(&r->xmbtab, first,
+ fsz, second, ssz, 0);
+ continue;
+ }
+
+ if (NULL == r->xtab)
+ r->xtab = mandoc_calloc
+ (128, sizeof(struct roffstr));
+
+ free(r->xtab[(int)*first].p);
+ r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
+ r->xtab[(int)*first].sz = ssz;
+ }
+
+ return(ROFF_IGN);
+}
+
/* ARGSUSED */
static enum rofferr
roff_so(ROFF_ARGS)
roff_setstr(struct roff *r, const char *name, const char *string,
int multiline)
{
- struct roffstr *n;
- char *c;
- size_t oldch, newch;
+
+ roff_setstrn(&r->strtab, name, strlen(name), string,
+ string ? strlen(string) : 0, multiline);
+}
+
+static void
+roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
+ const char *string, size_t stringsz, int multiline)
+{
+ struct roffkv *n;
+ char *c;
+ int i;
+ size_t oldch, newch;
/* Search for an existing string with the same name. */
- n = r->first_string;
- while (n && strcmp(name, n->key))
+ n = *r;
+
+ while (n && strcmp(name, n->key.p))
n = n->next;
if (NULL == n) {
/* Create a new string table entry. */
- n = mandoc_malloc(sizeof(struct roffstr));
- n->key = mandoc_strdup(name);
- n->val = NULL;
- n->next = r->first_string;
- r->first_string = n;
+ n = mandoc_malloc(sizeof(struct roffkv));
+ n->key.p = mandoc_strndup(name, namesz);
+ n->key.sz = namesz;
+ n->val.p = NULL;
+ n->val.sz = 0;
+ n->next = *r;
+ *r = n;
} else if (0 == multiline) {
/* In multiline mode, append; else replace. */
- free(n->val);
- n->val = NULL;
+ free(n->val.p);
+ n->val.p = NULL;
+ n->val.sz = 0;
}
if (NULL == string)
* One additional byte for the '\n' in multiline mode,
* and one for the terminating '\0'.
*/
- newch = strlen(string) + (multiline ? 2u : 1u);
- if (NULL == n->val) {
- n->val = mandoc_malloc(newch);
- *n->val = '\0';
+ newch = stringsz + (multiline ? 2u : 1u);
+
+ if (NULL == n->val.p) {
+ n->val.p = mandoc_malloc(newch);
+ *n->val.p = '\0';
oldch = 0;
} else {
- oldch = strlen(n->val);
- n->val = mandoc_realloc(n->val, oldch + newch);
+ oldch = n->val.sz;
+ n->val.p = mandoc_realloc(n->val.p, oldch + newch);
}
/* Skip existing content in the destination buffer. */
- c = n->val + (int)oldch;
+ c = n->val.p + (int)oldch;
/* Append new content to the destination buffer. */
- while (*string) {
+ i = 0;
+ while (i < (int)stringsz) {
/*
* Rudimentary roff copy mode:
* Handle escaped backslashes.
*/
- if ('\\' == *string && '\\' == *(string + 1))
- string++;
- *c++ = *string++;
+ if ('\\' == string[i] && '\\' == string[i + 1])
+ i++;
+ *c++ = string[i++];
}
/* Append terminating bytes. */
if (multiline)
*c++ = '\n';
+
*c = '\0';
+ n->val.sz = (int)(c - n->val.p);
}
static const char *
roff_getstrn(const struct roff *r, const char *name, size_t len)
{
- const struct roffstr *n;
+ const struct roffkv *n;
- for (n = r->first_string; n; n = n->next)
- if (0 == strncmp(name, n->key, len) &&
- '\0' == n->key[(int)len])
- return(n->val);
+ for (n = r->strtab; n; n = n->next)
+ if (0 == strncmp(name, n->key.p, len) &&
+ '\0' == n->key.p[(int)len])
+ return(n->val.p);
return(NULL);
}
static void
-roff_freestr(struct roff *r)
+roff_freestr(struct roffkv *r)
{
- struct roffstr *n, *nn;
+ struct roffkv *n, *nn;
- for (n = r->first_string; n; n = nn) {
- free(n->key);
- free(n->val);
+ for (n = r; n; n = nn) {
+ free(n->key.p);
+ free(n->val.p);
nn = n->next;
free(n);
}
-
- r->first_string = NULL;
}
const struct tbl_span *
return(r->last_eqn ? &r->last_eqn->eqn : NULL);
}
-char
-roff_eqndelim(const struct roff *r)
+/*
+ * Duplicate an input string, making the appropriate character
+ * conversations (as stipulated by `tr') along the way.
+ * Returns a heap-allocated string with all the replacements made.
+ */
+char *
+roff_strdup(const struct roff *r, const char *p)
{
+ const struct roffkv *cp;
+ char *res;
+ const char *pp;
+ size_t ssz, sz;
+ enum mandoc_esc esc;
+
+ if (NULL == r->xmbtab && NULL == r->xtab)
+ return(mandoc_strdup(p));
+ else if ('\0' == *p)
+ return(mandoc_strdup(""));
+
+ /*
+ * Step through each character looking for term matches
+ * (remember that a `tr' can be invoked with an escape, which is
+ * a glyph but the escape is multi-character).
+ * We only do this if the character hash has been initialised
+ * and the string is >0 length.
+ */
+
+ res = NULL;
+ ssz = 0;
+
+ while ('\0' != *p) {
+ if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
+ sz = r->xtab[(int)*p].sz;
+ res = mandoc_realloc(res, ssz + sz + 1);
+ memcpy(res + ssz, r->xtab[(int)*p].p, sz);
+ ssz += sz;
+ p++;
+ continue;
+ } else if ('\\' != *p) {
+ res = mandoc_realloc(res, ssz + 2);
+ res[ssz++] = *p++;
+ continue;
+ }
+
+ /* Search for term matches. */
+ for (cp = r->xmbtab; cp; cp = cp->next)
+ if (0 == strncmp(p, cp->key.p, cp->key.sz))
+ break;
+
+ if (NULL != cp) {
+ /*
+ * A match has been found.
+ * Append the match to the array and move
+ * forward by its keysize.
+ */
+ res = mandoc_realloc
+ (res, ssz + cp->val.sz + 1);
+ memcpy(res + ssz, cp->val.p, cp->val.sz);
+ ssz += cp->val.sz;
+ p += (int)cp->key.sz;
+ continue;
+ }
+
+ /*
+ * Handle escapes carefully: we need to copy
+ * over just the escape itself, or else we might
+ * do replacements within the escape itself.
+ * Make sure to pass along the bogus string.
+ */
+ pp = p++;
+ esc = mandoc_escape(&p, NULL, NULL);
+ if (ESCAPE_ERROR == esc) {
+ sz = strlen(pp);
+ res = mandoc_realloc(res, ssz + sz + 1);
+ memcpy(res + ssz, pp, sz);
+ break;
+ }
+ /*
+ * We bail out on bad escapes.
+ * No need to warn: we already did so when
+ * roff_res() was called.
+ */
+ sz = (int)(p - pp);
+ res = mandoc_realloc(res, ssz + sz + 1);
+ memcpy(res + ssz, pp, sz);
+ ssz += sz;
+ }
- return('\0');
+ res[(int)ssz] = '\0';
+ return(res);
}