-/* $Id: roff.c,v 1.142 2011/05/26 11:58:25 kristaps Exp $ */
+/* $Id: roff.c,v 1.165 2011/07/28 14:53:22 kristaps Exp $ */
/*
* Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
ROFFRULE_DENY
};
+/*
+ * A single register entity. If "set" is zero, the value of the
+ * register should be the default one, which is per-register.
+ * Registers are assumed to be unsigned ints for now.
+ */
+struct reg {
+ int set; /* whether set or not */
+ unsigned int u; /* unsigned integer */
+};
+
struct roffstr {
- char *name; /* key of symbol */
- char *string; /* current value */
+ char *key; /* key of symbol */
+ size_t keysz;
+ char *val; /* current value */
+ size_t valsz;
struct roffstr *next; /* next in list */
};
struct roffnode *last; /* leaf of stack */
enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */
int rstackpos; /* position in rstack */
- struct regset *regs; /* read/writable registers */
- struct roffstr *first_string; /* user-defined strings & macros */
+ struct reg regs[REG__MAX];
+ struct roffstr *strtab; /* user-defined strings & macros */
+ struct roffstr *chrtab; /* user-defined characters */
const char *current_string; /* value of last called user macro */
struct tbl_node *first_tbl; /* first table parsed */
struct tbl_node *last_tbl; /* last table parsed */
#define PREDEF(__name, __str) \
{ (__name), (__str) },
+static enum rofft roffhash_find(const char *, size_t);
+static void roffhash_init(void);
+static void roffnode_cleanscope(struct roff *);
+static void roffnode_pop(struct roff *);
+static void roffnode_push(struct roff *, enum rofft,
+ const char *, int, int);
static enum rofferr roff_block(ROFF_ARGS);
static enum rofferr roff_block_text(ROFF_ARGS);
static enum rofferr roff_block_sub(ROFF_ARGS);
static enum rofferr roff_cond_sub(ROFF_ARGS);
static enum rofferr roff_ds(ROFF_ARGS);
static enum roffrule roff_evalcond(const char *, int *);
-static void roff_freestr(struct roff *);
+static void roff_free1(struct roff *);
+static void roff_freestr(struct roffstr **);
static char *roff_getname(struct roff *, char **, int, int);
static const char *roff_getstrn(const struct roff *,
const char *, size_t);
static enum rofferr roff_line_ignore(ROFF_ARGS);
static enum rofferr roff_nr(ROFF_ARGS);
-static int roff_res(struct roff *,
+static void roff_openeqn(struct roff *, const char *,
+ int, int, const char *);
+static enum rofft roff_parse(struct roff *, const char *, int *);
+static enum rofferr roff_parsetext(char *);
+static void roff_res(struct roff *,
char **, size_t *, int, int);
static enum rofferr roff_rm(ROFF_ARGS);
static void roff_setstr(struct roff *,
const char *, const char *, int);
+static void roff_setstrn(struct roffstr **, const char *,
+ size_t, const char *, size_t, int);
static enum rofferr roff_so(ROFF_ARGS);
+static enum rofferr roff_tr(ROFF_ARGS);
static enum rofferr roff_TE(ROFF_ARGS);
static enum rofferr roff_TS(ROFF_ARGS);
static enum rofferr roff_EQ(ROFF_ARGS);
static enum rofferr roff_T_(ROFF_ARGS);
static enum rofferr roff_userdef(ROFF_ARGS);
-/* See roff_hash_find() */
+/* See roffhash_find() */
#define ASCII_HI 126
#define ASCII_LO 33
{ "rm", roff_rm, NULL, NULL, 0, NULL },
{ "so", roff_so, NULL, NULL, 0, NULL },
{ "ta", roff_line_ignore, NULL, NULL, 0, NULL },
- { "tr", roff_line_ignore, NULL, NULL, 0, NULL },
+ { "tr", roff_tr, NULL, NULL, 0, NULL },
{ "TS", roff_TS, NULL, NULL, 0, NULL },
{ "TE", roff_TE, NULL, NULL, 0, NULL },
{ "T&", roff_T_, NULL, NULL, 0, NULL },
#include "predefs.in"
};
-static void roff_free1(struct roff *);
-static enum rofft roff_hash_find(const char *, size_t);
-static void roff_hash_init(void);
-static void roffnode_cleanscope(struct roff *);
-static void roffnode_push(struct roff *, enum rofft,
- const char *, int, int);
-static void roffnode_pop(struct roff *);
-static enum rofft roff_parse(struct roff *, const char *, int *);
-
-/* See roff_hash_find() */
+/* See roffhash_find() */
#define ROFF_HASH(p) (p[0] - ASCII_LO)
static void
-roff_hash_init(void)
+roffhash_init(void)
{
struct roffmac *n;
int buc, i;
* the nil-terminated string name could be found.
*/
static enum rofft
-roff_hash_find(const char *p, size_t s)
+roffhash_find(const char *p, size_t s)
{
int buc;
struct roffmac *n;
while (r->last)
roffnode_pop(r);
- roff_freestr(r);
+ roff_freestr(&r->strtab);
+ roff_freestr(&r->chrtab);
}
void
roff_reset(struct roff *r)
{
+ int i;
roff_free1(r);
+
+ memset(&r->regs, 0, sizeof(struct reg) * REG__MAX);
+
+ for (i = 0; i < PREDEFS_MAX; i++)
+ roff_setstr(r, predefs[i].name, predefs[i].str, 0);
}
struct roff *
-roff_alloc(struct regset *regs, struct mparse *parse)
+roff_alloc(struct mparse *parse)
{
struct roff *r;
int i;
r = mandoc_calloc(1, sizeof(struct roff));
- r->regs = regs;
r->parse = parse;
r->rstackpos = -1;
- roff_hash_init();
+ roffhash_init();
for (i = 0; i < PREDEFS_MAX; i++)
roff_setstr(r, predefs[i].name, predefs[i].str, 0);
return(r);
}
-
/*
* Pre-filter each and every line for reserved words (one beginning with
* `\*', e.g., `\*(ab'). These must be handled before the actual line
* is processed.
+ * This also checks the syntax of regular escapes.
*/
-static int
+static void
roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
{
+ enum mandoc_esc esc;
const char *stesc; /* start of an escape sequence ('\\') */
const char *stnam; /* start of the name, after "[(*" */
const char *cp; /* end of the name, e.g. before ']' */
size_t nsz;
char *n;
- /* Search for a leading backslash and save a pointer to it. */
-
+again:
cp = *bufp + pos;
while (NULL != (cp = strchr(cp, '\\'))) {
stesc = cp++;
*/
if ('\0' == *cp)
- return(1);
- if ('*' != *cp++)
- continue;
+ return;
+
+ if ('*' != *cp) {
+ res = cp;
+ esc = mandoc_escape(&cp, NULL, NULL);
+ if (ESCAPE_ERROR != esc)
+ continue;
+ cp = res;
+ mandoc_msg
+ (MANDOCERR_BADESCAPE, r->parse,
+ ln, (int)(stesc - *bufp), NULL);
+ return;
+ }
+
+ cp++;
/*
* The third character decides the length
switch (*cp) {
case ('\0'):
- return(1);
+ return;
case ('('):
cp++;
maxl = 2;
/* Advance to the end of the name. */
for (i = 0; 0 == maxl || i < maxl; i++, cp++) {
- if ('\0' == *cp)
- return(1); /* Error. */
+ if ('\0' == *cp) {
+ mandoc_msg
+ (MANDOCERR_BADESCAPE,
+ r->parse, ln,
+ (int)(stesc - *bufp), NULL);
+ return;
+ }
if (0 == maxl && ']' == *cp)
break;
}
res = roff_getstrn(r, stnam, (size_t)i);
if (NULL == res) {
- /* TODO: keep track of the correct position. */
- mandoc_msg(MANDOCERR_BADESCAPE, r->parse, ln, pos, NULL);
+ mandoc_msg
+ (MANDOCERR_BADESCAPE, r->parse,
+ ln, (int)(stesc - *bufp), NULL);
res = "";
}
/* Replace the escape sequence by the string. */
+ pos = stesc - *bufp;
+
nsz = *szp + strlen(res) + 1;
n = mandoc_malloc(nsz);
*bufp = n;
*szp = nsz;
- return(0);
+ goto again;
}
-
- return(1);
}
+/*
+ * Process text streams: convert all breakable hyphens into ASCII_HYPH.
+ */
+static enum rofferr
+roff_parsetext(char *p)
+{
+ char l, r;
+ size_t sz;
+ const char *start;
+ enum mandoc_esc esc;
+
+ start = p;
+
+ while ('\0' != *p) {
+ sz = strcspn(p, "-\\");
+ p += sz;
+
+ if ('\0' == *p)
+ break;
+
+ if ('\\' == *p) {
+ /* Skip over escapes. */
+ p++;
+ esc = mandoc_escape
+ ((const char **)&p, NULL, NULL);
+ if (ESCAPE_ERROR == esc)
+ break;
+ continue;
+ } else if (p == start) {
+ p++;
+ continue;
+ }
+
+ l = *(p - 1);
+ r = *(p + 1);
+ if ('\\' != l &&
+ '\t' != r && '\t' != l &&
+ ' ' != r && ' ' != l &&
+ '-' != r && '-' != l &&
+ ! isdigit((unsigned char)l) &&
+ ! isdigit((unsigned char)r))
+ *p = ASCII_HYPH;
+ p++;
+ }
+
+ return(ROFF_CONT);
+}
enum rofferr
roff_parseln(struct roff *r, int ln, char **bufp,
* words to fill in.
*/
- if (r->first_string && ! roff_res(r, bufp, szp, ln, pos))
- return(ROFF_REPARSE);
+ roff_res(r, bufp, szp, ln, pos);
ppos = pos;
ctl = mandoc_getcontrol(*bufp, &pos);
if (ROFF_CONT != e)
return(e);
if (r->eqn)
- return(eqn_read(&r->eqn, ln, *bufp, pos));
+ return(eqn_read(&r->eqn, ln, *bufp, pos, offs));
if (r->tbl)
return(tbl_read(r->tbl, ln, *bufp, pos));
- return(ROFF_CONT);
+ return(roff_parsetext(*bufp + pos));
} else if ( ! ctl) {
if (r->eqn)
- return(eqn_read(&r->eqn, ln, *bufp, pos));
+ return(eqn_read(&r->eqn, ln, *bufp, pos, offs));
if (r->tbl)
return(tbl_read(r->tbl, ln, *bufp, pos));
- return(ROFF_CONT);
+ return(roff_parsetext(*bufp + pos));
} else if (r->eqn)
- return(eqn_read(&r->eqn, ln, *bufp, ppos));
+ return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));
/*
* If a scope is open, go to the child handler for that macro,
if (r->eqn) {
mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
- r->eqn->eqn.line, r->eqn->eqn.pos, NULL);
- eqn_end(r->eqn);
- r->eqn = NULL;
+ r->eqn->eqn.ln, r->eqn->eqn.pos, NULL);
+ eqn_end(&r->eqn);
}
if (r->tbl) {
mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
r->tbl->line, r->tbl->pos, NULL);
- tbl_end(r->tbl);
- r->tbl = NULL;
+ tbl_end(&r->tbl);
}
}
size_t maclen;
enum rofft t;
- if ('\0' == buf[*pos] || '"' == buf[*pos])
+ if ('\0' == buf[*pos] || '"' == buf[*pos] ||
+ '\t' == buf[*pos] || ' ' == buf[*pos])
return(ROFF_MAX);
+ /*
+ * We stop the macro parse at an escape, tab, space, or nil.
+ * However, `\}' is also a valid macro, so make sure we don't
+ * clobber it by seeing the `\' as the end of token.
+ */
+
mac = buf + *pos;
- maclen = strcspn(mac, " \\\t\0");
+ maclen = strcspn(mac + 1, " \\\t\0") + 1;
t = (r->current_string = roff_getstrn(r, mac, maclen))
- ? ROFF_USERDEF : roff_hash_find(mac, maclen);
+ ? ROFF_USERDEF : roffhash_find(mac, maclen);
*pos += (int)maclen;
ep++;
if ('}' != *ep)
continue;
- *ep = '&';
+
+ /*
+ * Make the \} go away.
+ * This is a little haphazard, as it's not quite
+ * clear how nroff does this.
+ * If we're at the end of line, then just chop
+ * off the \} and resize the buffer.
+ * If we aren't, then conver it to spaces.
+ */
+
+ if ('\0' == *(ep + 1)) {
+ *--ep = '\0';
+ *szp -= 2;
+ } else
+ *(ep - 1) = *ep = ' ';
+
roff_ccond(r, ROFF_ccond, bufp, szp,
ln, pos, pos + 2, offs);
break;
return(ROFF_IGN);
}
+int
+roff_regisset(const struct roff *r, enum regs reg)
+{
+
+ return(r->regs[(int)reg].set);
+}
+
+unsigned int
+roff_regget(const struct roff *r, enum regs reg)
+{
+
+ return(r->regs[(int)reg].u);
+}
+
+void
+roff_regunset(struct roff *r, enum regs reg)
+{
+
+ r->regs[(int)reg].set = 0;
+}
/* ARGSUSED */
static enum rofferr
const char *key;
char *val;
int iv;
- struct reg *rg;
val = *bufp + pos;
key = roff_getname(r, &val, ln, pos);
- rg = r->regs->regs;
if (0 == strcmp(key, "nS")) {
- rg[(int)REG_nS].set = 1;
- if ((iv = mandoc_strntou(val, strlen(val), 10)) >= 0)
- rg[REG_nS].v.u = (unsigned)iv;
+ r->regs[(int)REG_nS].set = 1;
+ if ((iv = mandoc_strntoi(val, strlen(val), 10)) >= 0)
+ r->regs[(int)REG_nS].u = (unsigned)iv;
else
- rg[(int)REG_nS].v.u = 0u;
+ r->regs[(int)REG_nS].u = 0u;
}
return(ROFF_IGN);
if (NULL == r->tbl)
mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
else
- tbl_end(r->tbl);
+ tbl_end(&r->tbl);
- r->tbl = NULL;
return(ROFF_IGN);
}
return(ROFF_IGN);
}
-/* ARGSUSED */
-static enum rofferr
-roff_EQ(ROFF_ARGS)
+#if 0
+static int
+roff_closeeqn(struct roff *r)
{
- struct eqn_node *e;
+
+ return(r->eqn && ROFF_EQN == eqn_end(&r->eqn) ? 1 : 0);
+}
+#endif
+
+static void
+roff_openeqn(struct roff *r, const char *name, int line,
+ int offs, const char *buf)
+{
+ struct eqn_node *e;
+ int poff;
assert(NULL == r->eqn);
- e = eqn_alloc(ppos, ln);
+ e = eqn_alloc(name, offs, line, r->parse);
if (r->last_eqn)
r->last_eqn->next = e;
r->first_eqn = r->last_eqn = e;
r->eqn = r->last_eqn = e;
+
+ if (buf) {
+ poff = 0;
+ eqn_read(&r->eqn, line, buf, offs, &poff);
+ }
+}
+
+/* ARGSUSED */
+static enum rofferr
+roff_EQ(ROFF_ARGS)
+{
+
+ roff_openeqn(r, *bufp + pos, ln, ppos, NULL);
return(ROFF_IGN);
}
if (r->tbl) {
mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL);
- tbl_end(r->tbl);
+ tbl_end(&r->tbl);
}
t = tbl_alloc(ppos, ln, r->parse);
return(ROFF_IGN);
}
+/* ARGSUSED */
+static enum rofferr
+roff_tr(ROFF_ARGS)
+{
+ const char *p, *first, *second;
+ size_t fsz, ssz;
+ enum mandoc_esc esc;
+
+ p = *bufp + pos;
+
+ if ('\0' == *p) {
+ mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
+ return(ROFF_IGN);
+ }
+
+ while ('\0' != *p) {
+ fsz = ssz = 1;
+
+ first = p++;
+ if ('\\' == *first) {
+ esc = mandoc_escape(&p, NULL, NULL);
+ if (ESCAPE_ERROR == esc) {
+ mandoc_msg
+ (MANDOCERR_BADESCAPE, r->parse,
+ ln, (int)(p - *bufp), NULL);
+ return(ROFF_IGN);
+ }
+ fsz = (size_t)(p - first);
+ }
+
+ second = p++;
+ if ('\\' == *second) {
+ esc = mandoc_escape(&p, NULL, NULL);
+ if (ESCAPE_ERROR == esc) {
+ mandoc_msg
+ (MANDOCERR_BADESCAPE, r->parse,
+ ln, (int)(p - *bufp), NULL);
+ return(ROFF_IGN);
+ }
+ ssz = (size_t)(p - second);
+ } else if ('\0' == *second) {
+ mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
+ ln, (int)(p - *bufp), NULL);
+ second = " ";
+ p--;
+ }
+
+ roff_setstrn(&r->chrtab, first, fsz, second, ssz, 0);
+ }
+
+ return(ROFF_IGN);
+}
+
/* ARGSUSED */
static enum rofferr
roff_so(ROFF_ARGS)
roff_setstr(struct roff *r, const char *name, const char *string,
int multiline)
{
- struct roffstr *n;
- char *c;
- size_t oldch, newch;
+
+ roff_setstrn(&r->strtab, name, strlen(name), string,
+ string ? strlen(string) : 0, multiline);
+}
+
+static void
+roff_setstrn(struct roffstr **r, const char *name, size_t namesz,
+ const char *string, size_t stringsz, int multiline)
+{
+ struct roffstr *n;
+ char *c;
+ int i;
+ size_t oldch, newch;
/* Search for an existing string with the same name. */
- n = r->first_string;
- while (n && strcmp(name, n->name))
+ n = *r;
+
+ while (n && strcmp(name, n->key))
n = n->next;
if (NULL == n) {
/* Create a new string table entry. */
n = mandoc_malloc(sizeof(struct roffstr));
- n->name = mandoc_strdup(name);
- n->string = NULL;
- n->next = r->first_string;
- r->first_string = n;
+ n->key = mandoc_strndup(name, namesz);
+ n->keysz = namesz;
+ n->val = NULL;
+ n->valsz = 0;
+ n->next = *r;
+ *r = n;
} else if (0 == multiline) {
/* In multiline mode, append; else replace. */
- free(n->string);
- n->string = NULL;
+ free(n->val);
+ n->val = NULL;
+ n->valsz = 0;
}
if (NULL == string)
* One additional byte for the '\n' in multiline mode,
* and one for the terminating '\0'.
*/
- newch = strlen(string) + (multiline ? 2u : 1u);
- if (NULL == n->string) {
- n->string = mandoc_malloc(newch);
- *n->string = '\0';
+ newch = stringsz + (multiline ? 2u : 1u);
+
+ if (NULL == n->val) {
+ n->val = mandoc_malloc(newch);
+ *n->val = '\0';
oldch = 0;
} else {
- oldch = strlen(n->string);
- n->string = mandoc_realloc(n->string, oldch + newch);
+ oldch = n->valsz;
+ n->val = mandoc_realloc(n->val, oldch + newch);
}
/* Skip existing content in the destination buffer. */
- c = n->string + (int)oldch;
+ c = n->val + (int)oldch;
/* Append new content to the destination buffer. */
- while (*string) {
+ i = 0;
+ while (i < (int)stringsz) {
/*
* Rudimentary roff copy mode:
* Handle escaped backslashes.
*/
- if ('\\' == *string && '\\' == *(string + 1))
- string++;
- *c++ = *string++;
+ if ('\\' == string[i] && '\\' == string[i + 1])
+ i++;
+ *c++ = string[i++];
}
/* Append terminating bytes. */
if (multiline)
*c++ = '\n';
+
*c = '\0';
+ n->valsz = (int)(c - n->val);
}
static const char *
{
const struct roffstr *n;
- n = r->first_string;
- while (n && (strncmp(name, n->name, len) || '\0' != n->name[(int)len]))
- n = n->next;
+ for (n = r->strtab; n; n = n->next)
+ if (0 == strncmp(name, n->key, len) &&
+ '\0' == n->key[(int)len])
+ return(n->val);
- return(n ? n->string : NULL);
+ return(NULL);
}
static void
-roff_freestr(struct roff *r)
+roff_freestr(struct roffstr **r)
{
struct roffstr *n, *nn;
- for (n = r->first_string; n; n = nn) {
- free(n->name);
- free(n->string);
+ for (n = *r; n; n = nn) {
+ free(n->key);
+ free(n->val);
nn = n->next;
free(n);
}
- r->first_string = NULL;
+ *r = NULL;
}
const struct tbl_span *
return(r->last_eqn ? &r->last_eqn->eqn : NULL);
}
+
+char
+roff_eqndelim(const struct roff *r)
+{
+
+ return('\0');
+}
+
+/*
+ * Duplicate an input string, making the appropriate character
+ * conversations (as stipulated by `tr') along the way.
+ * Returns a heap-allocated string with all the replacements made.
+ */
+char *
+roff_strdup(const struct roff *r, const char *p)
+{
+ const struct roffstr *cp;
+ char *res;
+ const char *pp;
+ size_t ssz, sz;
+ enum mandoc_esc esc;
+
+ if (NULL == r->chrtab)
+ return(mandoc_strdup(p));
+ else if ('\0' == *p)
+ return(mandoc_strdup(""));
+
+ /*
+ * Step through each character looking for term matches
+ * (remember that a `tr' can be invoked with an escape, which is
+ * a glyph but the escape is multi-character).
+ * We only do this if the character hash has been initialised
+ * and the string is >0 length.
+ */
+
+ res = NULL;
+ ssz = 0;
+
+ while ('\0' != *p) {
+ /* Search for term matches. */
+ for (cp = r->chrtab; cp; cp = cp->next)
+ if (0 == strncmp(p, cp->key, cp->keysz))
+ break;
+
+ if (NULL != cp) {
+ /*
+ * A match has been found.
+ * Append the match to the array and move
+ * forward by its keysize.
+ */
+ res = mandoc_realloc(res, ssz + cp->valsz + 1);
+ memcpy(res + ssz, cp->val, cp->valsz);
+ ssz += cp->valsz;
+ p += (int)cp->keysz;
+ continue;
+ }
+
+ if ('\\' == *p) {
+ /*
+ * Handle escapes carefully: we need to copy
+ * over just the escape itself, or else we might
+ * do replacements within the escape itself.
+ * Make sure to pass along the bogus string.
+ */
+ pp = p++;
+ esc = mandoc_escape(&p, NULL, NULL);
+ if (ESCAPE_ERROR == esc) {
+ sz = strlen(pp);
+ res = mandoc_realloc(res, ssz + sz + 1);
+ memcpy(res + ssz, pp, sz);
+ break;
+ }
+ /*
+ * We bail out on bad escapes.
+ * No need to warn: we already did so when
+ * roff_res() was called.
+ */
+ sz = (int)(p - pp);
+ res = mandoc_realloc(res, ssz + sz + 1);
+ memcpy(res + ssz, pp, sz);
+ ssz += sz;
+ continue;
+ }
+
+ /* Just append the charater. */
+ res = mandoc_realloc(res, ssz + 2);
+ res[ssz++] = *p++;
+ }
+
+ res[(int)ssz] = '\0';
+ return(res);
+}