-/* $Id: roff.c,v 1.153 2011/07/26 14:24:06 kristaps Exp $ */
+/* $Id: roff.c,v 1.165 2011/07/28 14:53:22 kristaps Exp $ */
/*
* Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
};
struct roffstr {
- char *name; /* key of symbol */
- char *string; /* current value */
+ char *key; /* key of symbol */
+ size_t keysz;
+ char *val; /* current value */
+ size_t valsz;
struct roffstr *next; /* next in list */
};
enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */
int rstackpos; /* position in rstack */
struct reg regs[REG__MAX];
- struct roffstr *first_string; /* user-defined strings & macros */
+ struct roffstr *strtab; /* user-defined strings & macros */
+ struct roffstr *chrtab; /* user-defined characters */
const char *current_string; /* value of last called user macro */
struct tbl_node *first_tbl; /* first table parsed */
struct tbl_node *last_tbl; /* last table parsed */
#define PREDEF(__name, __str) \
{ (__name), (__str) },
+static enum rofft roffhash_find(const char *, size_t);
+static void roffhash_init(void);
+static void roffnode_cleanscope(struct roff *);
+static void roffnode_pop(struct roff *);
+static void roffnode_push(struct roff *, enum rofft,
+ const char *, int, int);
static enum rofferr roff_block(ROFF_ARGS);
static enum rofferr roff_block_text(ROFF_ARGS);
static enum rofferr roff_block_sub(ROFF_ARGS);
static enum rofferr roff_cond_sub(ROFF_ARGS);
static enum rofferr roff_ds(ROFF_ARGS);
static enum roffrule roff_evalcond(const char *, int *);
-static void roff_freestr(struct roff *);
+static void roff_free1(struct roff *);
+static void roff_freestr(struct roffstr **);
static char *roff_getname(struct roff *, char **, int, int);
static const char *roff_getstrn(const struct roff *,
const char *, size_t);
static enum rofferr roff_line_ignore(ROFF_ARGS);
static enum rofferr roff_nr(ROFF_ARGS);
-static int roff_res(struct roff *,
+static void roff_openeqn(struct roff *, const char *,
+ int, int, const char *);
+static enum rofft roff_parse(struct roff *, const char *, int *);
+static enum rofferr roff_parsetext(char *);
+static void roff_res(struct roff *,
char **, size_t *, int, int);
static enum rofferr roff_rm(ROFF_ARGS);
static void roff_setstr(struct roff *,
const char *, const char *, int);
+static void roff_setstrn(struct roffstr **, const char *,
+ size_t, const char *, size_t, int);
static enum rofferr roff_so(ROFF_ARGS);
+static enum rofferr roff_tr(ROFF_ARGS);
static enum rofferr roff_TE(ROFF_ARGS);
static enum rofferr roff_TS(ROFF_ARGS);
static enum rofferr roff_EQ(ROFF_ARGS);
static enum rofferr roff_T_(ROFF_ARGS);
static enum rofferr roff_userdef(ROFF_ARGS);
-/* See roff_hash_find() */
+/* See roffhash_find() */
#define ASCII_HI 126
#define ASCII_LO 33
{ "rm", roff_rm, NULL, NULL, 0, NULL },
{ "so", roff_so, NULL, NULL, 0, NULL },
{ "ta", roff_line_ignore, NULL, NULL, 0, NULL },
- { "tr", roff_line_ignore, NULL, NULL, 0, NULL },
+ { "tr", roff_tr, NULL, NULL, 0, NULL },
{ "TS", roff_TS, NULL, NULL, 0, NULL },
{ "TE", roff_TE, NULL, NULL, 0, NULL },
{ "T&", roff_T_, NULL, NULL, 0, NULL },
#include "predefs.in"
};
-static void roff_free1(struct roff *);
-static enum rofft roff_hash_find(const char *, size_t);
-static void roff_hash_init(void);
-static void roffnode_cleanscope(struct roff *);
-static void roffnode_push(struct roff *, enum rofft,
- const char *, int, int);
-static void roffnode_pop(struct roff *);
-static enum rofft roff_parse(struct roff *, const char *, int *);
-
-/* See roff_hash_find() */
+/* See roffhash_find() */
#define ROFF_HASH(p) (p[0] - ASCII_LO)
static void
-roff_hash_init(void)
+roffhash_init(void)
{
struct roffmac *n;
int buc, i;
* the nil-terminated string name could be found.
*/
static enum rofft
-roff_hash_find(const char *p, size_t s)
+roffhash_find(const char *p, size_t s)
{
int buc;
struct roffmac *n;
while (r->last)
roffnode_pop(r);
- roff_freestr(r);
+ roff_freestr(&r->strtab);
+ roff_freestr(&r->chrtab);
}
r->parse = parse;
r->rstackpos = -1;
- roff_hash_init();
+ roffhash_init();
for (i = 0; i < PREDEFS_MAX; i++)
roff_setstr(r, predefs[i].name, predefs[i].str, 0);
* `\*', e.g., `\*(ab'). These must be handled before the actual line
* is processed.
* This also checks the syntax of regular escapes.
-*/
-static int
+ */
+static void
roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
{
enum mandoc_esc esc;
size_t nsz;
char *n;
- /* Search for a leading backslash and save a pointer to it. */
-
+again:
cp = *bufp + pos;
while (NULL != (cp = strchr(cp, '\\'))) {
stesc = cp++;
*/
if ('\0' == *cp)
- return(1);
+ return;
if ('*' != *cp) {
res = cp;
mandoc_msg
(MANDOCERR_BADESCAPE, r->parse,
ln, (int)(stesc - *bufp), NULL);
- continue;
+ return;
}
cp++;
switch (*cp) {
case ('\0'):
- return(1);
+ return;
case ('('):
cp++;
maxl = 2;
(MANDOCERR_BADESCAPE,
r->parse, ln,
(int)(stesc - *bufp), NULL);
- return(1);
+ return;
}
if (0 == maxl && ']' == *cp)
break;
/* Replace the escape sequence by the string. */
+ pos = stesc - *bufp;
+
nsz = *szp + strlen(res) + 1;
n = mandoc_malloc(nsz);
*bufp = n;
*szp = nsz;
- return(0);
+ goto again;
+ }
+}
+
+/*
+ * Process text streams: convert all breakable hyphens into ASCII_HYPH.
+ */
+static enum rofferr
+roff_parsetext(char *p)
+{
+ char l, r;
+ size_t sz;
+ const char *start;
+ enum mandoc_esc esc;
+
+ start = p;
+
+ while ('\0' != *p) {
+ sz = strcspn(p, "-\\");
+ p += sz;
+
+ if ('\0' == *p)
+ break;
+
+ if ('\\' == *p) {
+ /* Skip over escapes. */
+ p++;
+ esc = mandoc_escape
+ ((const char **)&p, NULL, NULL);
+ if (ESCAPE_ERROR == esc)
+ break;
+ continue;
+ } else if (p == start) {
+ p++;
+ continue;
+ }
+
+ l = *(p - 1);
+ r = *(p + 1);
+ if ('\\' != l &&
+ '\t' != r && '\t' != l &&
+ ' ' != r && ' ' != l &&
+ '-' != r && '-' != l &&
+ ! isdigit((unsigned char)l) &&
+ ! isdigit((unsigned char)r))
+ *p = ASCII_HYPH;
+ p++;
}
- return(1);
+ return(ROFF_CONT);
}
enum rofferr
* words to fill in.
*/
- if ( ! roff_res(r, bufp, szp, ln, pos))
- return(ROFF_REPARSE);
+ roff_res(r, bufp, szp, ln, pos);
ppos = pos;
ctl = mandoc_getcontrol(*bufp, &pos);
return(eqn_read(&r->eqn, ln, *bufp, pos, offs));
if (r->tbl)
return(tbl_read(r->tbl, ln, *bufp, pos));
- return(ROFF_CONT);
+ return(roff_parsetext(*bufp + pos));
} else if ( ! ctl) {
if (r->eqn)
return(eqn_read(&r->eqn, ln, *bufp, pos, offs));
if (r->tbl)
return(tbl_read(r->tbl, ln, *bufp, pos));
- return(ROFF_CONT);
+ return(roff_parsetext(*bufp + pos));
} else if (r->eqn)
return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));
maclen = strcspn(mac + 1, " \\\t\0") + 1;
t = (r->current_string = roff_getstrn(r, mac, maclen))
- ? ROFF_USERDEF : roff_hash_find(mac, maclen);
+ ? ROFF_USERDEF : roffhash_find(mac, maclen);
*pos += (int)maclen;
return(ROFF_IGN);
}
-int
+#if 0
+static int
roff_closeeqn(struct roff *r)
{
return(r->eqn && ROFF_EQN == eqn_end(&r->eqn) ? 1 : 0);
}
+#endif
-void
+static void
roff_openeqn(struct roff *r, const char *name, int line,
int offs, const char *buf)
{
return(ROFF_IGN);
}
+/* ARGSUSED */
+static enum rofferr
+roff_tr(ROFF_ARGS)
+{
+ const char *p, *first, *second;
+ size_t fsz, ssz;
+ enum mandoc_esc esc;
+
+ p = *bufp + pos;
+
+ if ('\0' == *p) {
+ mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
+ return(ROFF_IGN);
+ }
+
+ while ('\0' != *p) {
+ fsz = ssz = 1;
+
+ first = p++;
+ if ('\\' == *first) {
+ esc = mandoc_escape(&p, NULL, NULL);
+ if (ESCAPE_ERROR == esc) {
+ mandoc_msg
+ (MANDOCERR_BADESCAPE, r->parse,
+ ln, (int)(p - *bufp), NULL);
+ return(ROFF_IGN);
+ }
+ fsz = (size_t)(p - first);
+ }
+
+ second = p++;
+ if ('\\' == *second) {
+ esc = mandoc_escape(&p, NULL, NULL);
+ if (ESCAPE_ERROR == esc) {
+ mandoc_msg
+ (MANDOCERR_BADESCAPE, r->parse,
+ ln, (int)(p - *bufp), NULL);
+ return(ROFF_IGN);
+ }
+ ssz = (size_t)(p - second);
+ } else if ('\0' == *second) {
+ mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
+ ln, (int)(p - *bufp), NULL);
+ second = " ";
+ p--;
+ }
+
+ roff_setstrn(&r->chrtab, first, fsz, second, ssz, 0);
+ }
+
+ return(ROFF_IGN);
+}
+
/* ARGSUSED */
static enum rofferr
roff_so(ROFF_ARGS)
roff_setstr(struct roff *r, const char *name, const char *string,
int multiline)
{
- struct roffstr *n;
- char *c;
- size_t oldch, newch;
+
+ roff_setstrn(&r->strtab, name, strlen(name), string,
+ string ? strlen(string) : 0, multiline);
+}
+
+static void
+roff_setstrn(struct roffstr **r, const char *name, size_t namesz,
+ const char *string, size_t stringsz, int multiline)
+{
+ struct roffstr *n;
+ char *c;
+ int i;
+ size_t oldch, newch;
/* Search for an existing string with the same name. */
- n = r->first_string;
- while (n && strcmp(name, n->name))
+ n = *r;
+
+ while (n && strcmp(name, n->key))
n = n->next;
if (NULL == n) {
/* Create a new string table entry. */
n = mandoc_malloc(sizeof(struct roffstr));
- n->name = mandoc_strdup(name);
- n->string = NULL;
- n->next = r->first_string;
- r->first_string = n;
+ n->key = mandoc_strndup(name, namesz);
+ n->keysz = namesz;
+ n->val = NULL;
+ n->valsz = 0;
+ n->next = *r;
+ *r = n;
} else if (0 == multiline) {
/* In multiline mode, append; else replace. */
- free(n->string);
- n->string = NULL;
+ free(n->val);
+ n->val = NULL;
+ n->valsz = 0;
}
if (NULL == string)
* One additional byte for the '\n' in multiline mode,
* and one for the terminating '\0'.
*/
- newch = strlen(string) + (multiline ? 2u : 1u);
- if (NULL == n->string) {
- n->string = mandoc_malloc(newch);
- *n->string = '\0';
+ newch = stringsz + (multiline ? 2u : 1u);
+
+ if (NULL == n->val) {
+ n->val = mandoc_malloc(newch);
+ *n->val = '\0';
oldch = 0;
} else {
- oldch = strlen(n->string);
- n->string = mandoc_realloc(n->string, oldch + newch);
+ oldch = n->valsz;
+ n->val = mandoc_realloc(n->val, oldch + newch);
}
/* Skip existing content in the destination buffer. */
- c = n->string + (int)oldch;
+ c = n->val + (int)oldch;
/* Append new content to the destination buffer. */
- while (*string) {
+ i = 0;
+ while (i < (int)stringsz) {
/*
* Rudimentary roff copy mode:
* Handle escaped backslashes.
*/
- if ('\\' == *string && '\\' == *(string + 1))
- string++;
- *c++ = *string++;
+ if ('\\' == string[i] && '\\' == string[i + 1])
+ i++;
+ *c++ = string[i++];
}
/* Append terminating bytes. */
if (multiline)
*c++ = '\n';
+
*c = '\0';
+ n->valsz = (int)(c - n->val);
}
static const char *
{
const struct roffstr *n;
- n = r->first_string;
- while (n && (strncmp(name, n->name, len) || '\0' != n->name[(int)len]))
- n = n->next;
+ for (n = r->strtab; n; n = n->next)
+ if (0 == strncmp(name, n->key, len) &&
+ '\0' == n->key[(int)len])
+ return(n->val);
- return(n ? n->string : NULL);
+ return(NULL);
}
static void
-roff_freestr(struct roff *r)
+roff_freestr(struct roffstr **r)
{
struct roffstr *n, *nn;
- for (n = r->first_string; n; n = nn) {
- free(n->name);
- free(n->string);
+ for (n = *r; n; n = nn) {
+ free(n->key);
+ free(n->val);
nn = n->next;
free(n);
}
- r->first_string = NULL;
+ *r = NULL;
}
const struct tbl_span *
return('\0');
}
+
+/*
+ * Duplicate an input string, making the appropriate character
+ * conversations (as stipulated by `tr') along the way.
+ * Returns a heap-allocated string with all the replacements made.
+ */
+char *
+roff_strdup(const struct roff *r, const char *p)
+{
+ const struct roffstr *cp;
+ char *res;
+ const char *pp;
+ size_t ssz, sz;
+ enum mandoc_esc esc;
+
+ if (NULL == r->chrtab)
+ return(mandoc_strdup(p));
+ else if ('\0' == *p)
+ return(mandoc_strdup(""));
+
+ /*
+ * Step through each character looking for term matches
+ * (remember that a `tr' can be invoked with an escape, which is
+ * a glyph but the escape is multi-character).
+ * We only do this if the character hash has been initialised
+ * and the string is >0 length.
+ */
+
+ res = NULL;
+ ssz = 0;
+
+ while ('\0' != *p) {
+ /* Search for term matches. */
+ for (cp = r->chrtab; cp; cp = cp->next)
+ if (0 == strncmp(p, cp->key, cp->keysz))
+ break;
+
+ if (NULL != cp) {
+ /*
+ * A match has been found.
+ * Append the match to the array and move
+ * forward by its keysize.
+ */
+ res = mandoc_realloc(res, ssz + cp->valsz + 1);
+ memcpy(res + ssz, cp->val, cp->valsz);
+ ssz += cp->valsz;
+ p += (int)cp->keysz;
+ continue;
+ }
+
+ if ('\\' == *p) {
+ /*
+ * Handle escapes carefully: we need to copy
+ * over just the escape itself, or else we might
+ * do replacements within the escape itself.
+ * Make sure to pass along the bogus string.
+ */
+ pp = p++;
+ esc = mandoc_escape(&p, NULL, NULL);
+ if (ESCAPE_ERROR == esc) {
+ sz = strlen(pp);
+ res = mandoc_realloc(res, ssz + sz + 1);
+ memcpy(res + ssz, pp, sz);
+ break;
+ }
+ /*
+ * We bail out on bad escapes.
+ * No need to warn: we already did so when
+ * roff_res() was called.
+ */
+ sz = (int)(p - pp);
+ res = mandoc_realloc(res, ssz + sz + 1);
+ memcpy(res + ssz, pp, sz);
+ ssz += sz;
+ continue;
+ }
+
+ /* Just append the charater. */
+ res = mandoc_realloc(res, ssz + 2);
+ res[ssz++] = *p++;
+ }
+
+ res[(int)ssz] = '\0';
+ return(res);
+}