aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorKristaps Dzonsons <kristaps@bsd.lv>2011-07-28 14:17:11 +0000
committerKristaps Dzonsons <kristaps@bsd.lv>2011-07-28 14:17:11 +0000
commit005bcc6b1d086c580bddac2fb6d976b3388902be (patch)
tree67613d40079cd746f01ff372fed13ffefd83cfe4
parent596c0de84c5a8a499213ec024d61c49876b11167 (diff)
downloadmandoc-005bcc6b1d086c580bddac2fb6d976b3388902be.tar.gz
mandoc-005bcc6b1d086c580bddac2fb6d976b3388902be.tar.zst
mandoc-005bcc6b1d086c580bddac2fb6d976b3388902be.zip
An implementation of `tr'. This routes allocations of TEXT nodes
through libroff, which does the appropriate translations of `tr'. This is SLOW: it uses the backend of `ds' and `de', which is a simple linear list. However, unlike `ds' and `de', it iterates over EACH CHARACTER of the entire file looking for replacements.
-rw-r--r--libmandoc.h3
-rw-r--r--man.c4
-rw-r--r--mdoc.c4
-rw-r--r--roff.723
-rw-r--r--roff.c199
5 files changed, 200 insertions, 33 deletions
diff --git a/libmandoc.h b/libmandoc.h
index 8ade3a15..bedf0497 100644
--- a/libmandoc.h
+++ b/libmandoc.h
@@ -1,4 +1,4 @@
-/* $Id: libmandoc.h,v 1.27 2011/07/27 12:41:02 kristaps Exp $ */
+/* $Id: libmandoc.h,v 1.28 2011/07/28 14:17:11 kristaps Exp $ */
/*
* Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -75,6 +75,7 @@ void roff_endparse(struct roff *);
int roff_regisset(const struct roff *, enum regs);
unsigned int roff_regget(const struct roff *, enum regs);
void roff_regunset(struct roff *, enum regs);
+char *roff_strdup(const struct roff *, const char *);
#if 0
char roff_eqndelim(const struct roff *);
void roff_openeqn(struct roff *, const char *,
diff --git a/man.c b/man.c
index 9d480e10..52592d4a 100644
--- a/man.c
+++ b/man.c
@@ -1,4 +1,4 @@
-/* $Id: man.c,v 1.110 2011/07/27 12:43:02 kristaps Exp $ */
+/* $Id: man.c,v 1.111 2011/07/28 14:17:11 kristaps Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -322,7 +322,7 @@ man_word_alloc(struct man *m, int line, int pos, const char *word)
struct man_node *n;
n = man_node_alloc(m, line, pos, MAN_TEXT, MAN_MAX);
- n->string = mandoc_strdup(word);
+ n->string = roff_strdup(m->roff, word);
if ( ! man_node_append(m, n))
return(0);
diff --git a/mdoc.c b/mdoc.c
index 2808326a..b2018205 100644
--- a/mdoc.c
+++ b/mdoc.c
@@ -1,4 +1,4 @@
-/* $Id: mdoc.c,v 1.194 2011/07/27 12:43:02 kristaps Exp $ */
+/* $Id: mdoc.c,v 1.195 2011/07/28 14:17:11 kristaps Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
@@ -570,7 +570,7 @@ mdoc_word_alloc(struct mdoc *m, int line, int pos, const char *p)
struct mdoc_node *n;
n = node_alloc(m, line, pos, MDOC_MAX, MDOC_TEXT);
- n->string = mandoc_strdup(p);
+ n->string = roff_strdup(m->roff, p);
if ( ! node_append(m, n))
return(0);
diff --git a/roff.7 b/roff.7
index 41837a1d..1dc8c06b 100644
--- a/roff.7
+++ b/roff.7
@@ -1,4 +1,4 @@
-.\" $Id: roff.7,v 1.29 2011/05/24 15:22:14 kristaps Exp $
+.\" $Id: roff.7,v 1.30 2011/07/28 14:17:11 kristaps Exp $
.\"
.\" Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv>
.\" Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
@@ -15,7 +15,7 @@
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
.\"
-.Dd $Mdocdate: May 24 2011 $
+.Dd $Mdocdate: July 28 2011 $
.Dt ROFF 7
.Os
.Sh NAME
@@ -584,10 +584,21 @@ This line-scoped request can take an arbitrary number of arguments.
Currently, it is ignored including its arguments.
.Ss \&tr
Output character translation.
-This request is intended to have one argument,
-consisting of an even number of characters.
-Currently, it is ignored including its arguments,
-and the number of arguments is not checked.
+Its syntax is as follows:
+.Pp
+.D1 Pf \. Cm \&tr Ar [ab]+
+.Pp
+Pairs of
+.Ar ab
+characters are replaced
+.Ar ( a
+for
+.Ar b ) .
+Replacement (or origin) characters may also be character escapes; thus,
+.Pp
+.Dl tr \e(xx\e(yy
+.Pp
+replaces all invocations of \e(xx with \e(yy.
.Ss \&T&
Re-start a table layout, retaining the options of the prior table
invocation.
diff --git a/roff.c b/roff.c
index 646f3caa..cfd4c876 100644
--- a/roff.c
+++ b/roff.c
@@ -1,4 +1,4 @@
-/* $Id: roff.c,v 1.163 2011/07/27 20:55:28 kristaps Exp $ */
+/* $Id: roff.c,v 1.164 2011/07/28 14:17:11 kristaps Exp $ */
/*
* Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
@@ -95,7 +95,8 @@ struct roff {
enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */
int rstackpos; /* position in rstack */
struct reg regs[REG__MAX];
- struct roffstr *first_string; /* user-defined strings & macros */
+ struct roffstr *strtab; /* user-defined strings & macros */
+ struct roffstr *chrtab; /* user-defined characters */
const char *current_string; /* value of last called user macro */
struct tbl_node *first_tbl; /* first table parsed */
struct tbl_node *last_tbl; /* last table parsed */
@@ -162,7 +163,7 @@ static enum rofferr roff_cond_sub(ROFF_ARGS);
static enum rofferr roff_ds(ROFF_ARGS);
static enum roffrule roff_evalcond(const char *, int *);
static void roff_free1(struct roff *);
-static void roff_freestr(struct roff *);
+static void roff_freestr(struct roffstr **);
static char *roff_getname(struct roff *, char **, int, int);
static const char *roff_getstrn(const struct roff *,
const char *, size_t);
@@ -177,7 +178,10 @@ static void roff_res(struct roff *,
static enum rofferr roff_rm(ROFF_ARGS);
static void roff_setstr(struct roff *,
const char *, const char *, int);
+static void roff_setstrn(struct roffstr **, const char *,
+ size_t, const char *, size_t, int);
static enum rofferr roff_so(ROFF_ARGS);
+static enum rofferr roff_tr(ROFF_ARGS);
static enum rofferr roff_TE(ROFF_ARGS);
static enum rofferr roff_TS(ROFF_ARGS);
static enum rofferr roff_EQ(ROFF_ARGS);
@@ -216,7 +220,7 @@ static struct roffmac roffs[ROFF_MAX] = {
{ "rm", roff_rm, NULL, NULL, 0, NULL },
{ "so", roff_so, NULL, NULL, 0, NULL },
{ "ta", roff_line_ignore, NULL, NULL, 0, NULL },
- { "tr", roff_line_ignore, NULL, NULL, 0, NULL },
+ { "tr", roff_tr, NULL, NULL, 0, NULL },
{ "TS", roff_TS, NULL, NULL, 0, NULL },
{ "TE", roff_TE, NULL, NULL, 0, NULL },
{ "T&", roff_T_, NULL, NULL, 0, NULL },
@@ -354,7 +358,8 @@ roff_free1(struct roff *r)
while (r->last)
roffnode_pop(r);
- roff_freestr(r);
+ roff_freestr(&r->strtab);
+ roff_freestr(&r->chrtab);
}
@@ -1340,6 +1345,58 @@ roff_TS(ROFF_ARGS)
/* ARGSUSED */
static enum rofferr
+roff_tr(ROFF_ARGS)
+{
+ const char *p, *first, *second;
+ size_t fsz, ssz;
+ enum mandoc_esc esc;
+
+ p = *bufp + pos;
+
+ if ('\0' == *p) {
+ mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
+ return(ROFF_IGN);
+ }
+
+ while ('\0' != *p) {
+ fsz = ssz = 1;
+
+ first = p++;
+ if ('\\' == *first) {
+ esc = mandoc_escape(&p, NULL, NULL);
+ if (ESCAPE_ERROR == esc) {
+ mandoc_msg
+ (MANDOCERR_BADESCAPE, r->parse,
+ ln, (int)(p - *bufp), NULL);
+ return(ROFF_IGN);
+ }
+ fsz = (size_t)(p - first);
+ }
+
+ second = p++;
+ if ('\\' == *second) {
+ esc = mandoc_escape(&p, NULL, NULL);
+ if (ESCAPE_ERROR == esc) {
+ mandoc_msg
+ (MANDOCERR_BADESCAPE, r->parse,
+ ln, (int)(p - *bufp), NULL);
+ return(ROFF_IGN);
+ }
+ ssz = (size_t)(p - second);
+ } else if ('\0' == *p) {
+ mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
+ ln, (int)(p - *bufp), NULL);
+ second = " ";
+ }
+
+ roff_setstrn(&r->chrtab, first, fsz, second, ssz, 0);
+ }
+
+ return(ROFF_IGN);
+}
+
+/* ARGSUSED */
+static enum rofferr
roff_so(ROFF_ARGS)
{
char *name;
@@ -1461,24 +1518,35 @@ static void
roff_setstr(struct roff *r, const char *name, const char *string,
int multiline)
{
- struct roffstr *n;
- char *c;
- size_t oldch, newch;
+
+ roff_setstrn(&r->strtab, name, strlen(name), string,
+ string ? strlen(string) : 0, multiline);
+}
+
+static void
+roff_setstrn(struct roffstr **r, const char *name, size_t namesz,
+ const char *string, size_t stringsz, int multiline)
+{
+ struct roffstr *n;
+ char *c;
+ int i;
+ size_t oldch, newch;
/* Search for an existing string with the same name. */
- n = r->first_string;
+ n = *r;
+
while (n && strcmp(name, n->key))
n = n->next;
if (NULL == n) {
/* Create a new string table entry. */
n = mandoc_malloc(sizeof(struct roffstr));
- n->key = mandoc_strdup(name);
- n->keysz = strlen(name);
+ n->key = mandoc_strndup(name, namesz);
+ n->keysz = namesz;
n->val = NULL;
n->valsz = 0;
- n->next = r->first_string;
- r->first_string = n;
+ n->next = *r;
+ *r = n;
} else if (0 == multiline) {
/* In multiline mode, append; else replace. */
free(n->val);
@@ -1493,7 +1561,8 @@ roff_setstr(struct roff *r, const char *name, const char *string,
* One additional byte for the '\n' in multiline mode,
* and one for the terminating '\0'.
*/
- newch = strlen(string) + (multiline ? 2u : 1u);
+ newch = stringsz + (multiline ? 2u : 1u);
+
if (NULL == n->val) {
n->val = mandoc_malloc(newch);
*n->val = '\0';
@@ -1507,14 +1576,15 @@ roff_setstr(struct roff *r, const char *name, const char *string,
c = n->val + (int)oldch;
/* Append new content to the destination buffer. */
- while (*string) {
+ i = 0;
+ while (i < (int)stringsz) {
/*
* Rudimentary roff copy mode:
* Handle escaped backslashes.
*/
- if ('\\' == *string && '\\' == *(string + 1))
- string++;
- *c++ = *string++;
+ if ('\\' == string[i] && '\\' == string[i + 1])
+ i++;
+ *c++ = string[i++];
}
/* Append terminating bytes. */
@@ -1530,7 +1600,7 @@ roff_getstrn(const struct roff *r, const char *name, size_t len)
{
const struct roffstr *n;
- for (n = r->first_string; n; n = n->next)
+ for (n = r->strtab; n; n = n->next)
if (0 == strncmp(name, n->key, len) &&
'\0' == n->key[(int)len])
return(n->val);
@@ -1539,18 +1609,18 @@ roff_getstrn(const struct roff *r, const char *name, size_t len)
}
static void
-roff_freestr(struct roff *r)
+roff_freestr(struct roffstr **r)
{
struct roffstr *n, *nn;
- for (n = r->first_string; n; n = nn) {
+ for (n = *r; n; n = nn) {
free(n->key);
free(n->val);
nn = n->next;
free(n);
}
- r->first_string = NULL;
+ *r = NULL;
}
const struct tbl_span *
@@ -1573,3 +1643,88 @@ roff_eqndelim(const struct roff *r)
return('\0');
}
+
+/*
+ * Duplicate an input string, making the appropriate character
+ * conversations (as stipulated by `tr') along the way.
+ * Returns a heap-allocated string with all the replacements made.
+ */
+char *
+roff_strdup(const struct roff *r, const char *p)
+{
+ const struct roffstr *cp;
+ char *res;
+ const char *pp;
+ size_t ssz, sz;
+ enum mandoc_esc esc;
+
+ if (NULL == r->chrtab)
+ return(mandoc_strdup(p));
+ else if ('\0' == *p)
+ return(mandoc_strdup(""));
+
+ /*
+ * Step through each character looking for term matches
+ * (remember that a `tr' can be invoked with an escape, which is
+ * a glyph but the escape is multi-character).
+ * We only do this if the character hash has been initialised
+ * and the string is >0 length.
+ */
+
+ res = NULL;
+ ssz = 0;
+
+ while ('\0' != *p) {
+ /* Search for term matches. */
+ for (cp = r->chrtab; cp; cp = cp->next)
+ if (0 == strncmp(p, cp->key, cp->keysz))
+ break;
+
+ if (NULL != cp) {
+ /*
+ * A match has been found.
+ * Append the match to the array and move
+ * forward by its keysize.
+ */
+ res = mandoc_realloc(res, ssz + cp->valsz + 1);
+ memcpy(res + ssz, cp->val, cp->valsz);
+ ssz += cp->valsz;
+ p += (int)cp->keysz;
+ continue;
+ }
+
+ if ('\\' == *p) {
+ /*
+ * Handle escapes carefully: we need to copy
+ * over just the escape itself, or else we might
+ * do replacements within the escape itself.
+ * Make sure to pass along the bogus string.
+ */
+ pp = p++;
+ esc = mandoc_escape(&p, NULL, NULL);
+ if (ESCAPE_ERROR == esc) {
+ sz = strlen(pp);
+ res = mandoc_realloc(res, ssz + sz + 1);
+ memcpy(res + ssz, pp, sz);
+ break;
+ }
+ /*
+ * We bail out on bad escapes.
+ * No need to warn: we already did so when
+ * roff_res() was called.
+ */
+ sz = (int)(p - pp);
+ res = mandoc_realloc(res, ssz + sz + 1);
+ memcpy(res + ssz, pp, sz);
+ ssz += sz;
+ continue;
+ }
+
+ /* Just append the charater. */
+ res = mandoc_realloc(res, ssz + 2);
+ res[ssz++] = *p++;
+ }
+
+ res[(int)ssz] = '\0';
+ return(res);
+}