aboutsummaryrefslogtreecommitdiffstats
path: root/text_cmds/tr
diff options
context:
space:
mode:
authorCameron Katri <me@cameronkatri.com>2021-05-09 14:20:58 -0400
committerCameron Katri <me@cameronkatri.com>2021-05-09 14:20:58 -0400
commit5fd83771641d15c418f747bd343ba6738d3875f7 (patch)
tree5abf0f78f680d9837dbd93d4d4c3933bb7509599 /text_cmds/tr
downloadapple_cmds-5fd83771641d15c418f747bd343ba6738d3875f7.tar.gz
apple_cmds-5fd83771641d15c418f747bd343ba6738d3875f7.tar.zst
apple_cmds-5fd83771641d15c418f747bd343ba6738d3875f7.zip
Import macOS userland
adv_cmds-176 basic_cmds-55 bootstrap_cmds-116.100.1 developer_cmds-66 diskdev_cmds-667.40.1 doc_cmds-53.60.1 file_cmds-321.40.3 mail_cmds-35 misc_cmds-34 network_cmds-606.40.1 patch_cmds-17 remote_cmds-63 shell_cmds-216.60.1 system_cmds-880.60.2 text_cmds-106
Diffstat (limited to 'text_cmds/tr')
-rw-r--r--text_cmds/tr/cmap.c212
-rw-r--r--text_cmds/tr/cmap.h83
-rw-r--r--text_cmds/tr/cset.c290
-rw-r--r--text_cmds/tr/cset.h74
-rw-r--r--text_cmds/tr/extern.h55
-rw-r--r--text_cmds/tr/str.c450
-rw-r--r--text_cmds/tr/tr.1420
-rw-r--r--text_cmds/tr/tr.c378
8 files changed, 1962 insertions, 0 deletions
diff --git a/text_cmds/tr/cmap.c b/text_cmds/tr/cmap.c
new file mode 100644
index 0000000..1edd9d2
--- /dev/null
+++ b/text_cmds/tr/cmap.c
@@ -0,0 +1,212 @@
+/*-
+ * Copyright (c) 2004 Tim J. Robbins.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * "Character map" ADT. Stores mappings between pairs of characters in a
+ * splay tree, with a lookup table cache to simplify looking up the first
+ * bunch of characters (which are presumably more common than others).
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/usr.bin/tr/cmap.c,v 1.2 2004/07/14 08:36:09 tjr Exp $");
+
+#include <assert.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <wchar.h>
+#include "cmap.h"
+
+static struct cmapnode *cmap_splay(struct cmapnode *, wint_t);
+
+/*
+ * cmap_alloc --
+ * Allocate a character map.
+ */
+struct cmap *
+cmap_alloc(void)
+{
+ struct cmap *cm;
+
+ cm = malloc(sizeof(*cm));
+ if (cm == NULL)
+ return (NULL);
+ cm->cm_root = NULL;
+ cm->cm_def = CM_DEF_SELF;
+ cm->cm_havecache = false;
+ cm->cm_min = cm->cm_max = 0;
+ return (cm);
+}
+
+/*
+ * cmap_add --
+ * Add a mapping from "from" to "to" to the map.
+ */
+bool
+cmap_add(struct cmap *cm, wint_t from, wint_t to)
+{
+ struct cmapnode *cmn, *ncmn;
+
+ cm->cm_havecache = false;
+
+ if (cm->cm_root == NULL) {
+ cmn = malloc(sizeof(*cmn));
+ if (cmn == NULL)
+ return (false);
+ cmn->cmn_from = from;
+ cmn->cmn_to = to;
+ cmn->cmn_left = cmn->cmn_right = NULL;
+ cm->cm_root = cmn;
+ cm->cm_min = cm->cm_max = from;
+ return (true);
+ }
+
+ cmn = cm->cm_root = cmap_splay(cm->cm_root, from);
+
+ if (cmn->cmn_from == from) {
+ cmn->cmn_to = to;
+ return (true);
+ }
+
+ ncmn = malloc(sizeof(*ncmn));
+ if (ncmn == NULL)
+ return (false);
+ ncmn->cmn_from = from;
+ ncmn->cmn_to = to;
+ if (from < cmn->cmn_from) {
+ ncmn->cmn_left = cmn->cmn_left;
+ ncmn->cmn_right = cmn;
+ cmn->cmn_left = NULL;
+ } else {
+ ncmn->cmn_right = cmn->cmn_right;
+ ncmn->cmn_left = cmn;
+ cmn->cmn_right = NULL;
+ }
+ if (from < cm->cm_min)
+ cm->cm_min = from;
+ if (from > cm->cm_max)
+ cm->cm_max = from;
+ cm->cm_root = ncmn;
+
+ return (true);
+}
+
+/*
+ * cmap_lookup_hard --
+ * Look up the mapping for a character without using the cache.
+ */
+wint_t
+cmap_lookup_hard(struct cmap *cm, wint_t ch)
+{
+
+ if (cm->cm_root != NULL) {
+ cm->cm_root = cmap_splay(cm->cm_root, ch);
+ if (cm->cm_root->cmn_from == ch)
+ return (cm->cm_root->cmn_to);
+ }
+ return (cm->cm_def == CM_DEF_SELF ? ch : cm->cm_def);
+}
+
+/*
+ * cmap_cache --
+ * Update the cache.
+ */
+void
+cmap_cache(struct cmap *cm)
+{
+ wint_t ch;
+
+ for (ch = 0; ch < CM_CACHE_SIZE; ch++)
+ cm->cm_cache[ch] = cmap_lookup_hard(cm, ch);
+
+ cm->cm_havecache = true;
+}
+
+/*
+ * cmap_default --
+ * Change the value that characters without mappings map to, and
+ * return the old value. The special character value CM_MAP_SELF
+ * means characters map to themselves.
+ */
+wint_t
+cmap_default(struct cmap *cm, wint_t def)
+{
+ wint_t old;
+
+ old = cm->cm_def;
+ cm->cm_def = def;
+ cm->cm_havecache = false;
+ return (old);
+}
+
+static struct cmapnode *
+cmap_splay(struct cmapnode *t, wint_t ch)
+{
+ struct cmapnode N, *l, *r, *y;
+
+ /*
+ * Based on public domain code from Sleator.
+ */
+
+ assert(t != NULL);
+
+ N.cmn_left = N.cmn_right = NULL;
+ l = r = &N;
+ for (;;) {
+ if (ch < t->cmn_from) {
+ if (t->cmn_left != NULL &&
+ ch < t->cmn_left->cmn_from) {
+ y = t->cmn_left;
+ t->cmn_left = y->cmn_right;
+ y->cmn_right = t;
+ t = y;
+ }
+ if (t->cmn_left == NULL)
+ break;
+ r->cmn_left = t;
+ r = t;
+ t = t->cmn_left;
+ } else if (ch > t->cmn_from) {
+ if (t->cmn_right != NULL &&
+ ch > t->cmn_right->cmn_from) {
+ y = t->cmn_right;
+ t->cmn_right = y->cmn_left;
+ y->cmn_left = t;
+ t = y;
+ }
+ if (t->cmn_right == NULL)
+ break;
+ l->cmn_right = t;
+ l = t;
+ t = t->cmn_right;
+ } else
+ break;
+ }
+ l->cmn_right = t->cmn_left;
+ r->cmn_left = t->cmn_right;
+ t->cmn_left = N.cmn_right;
+ t->cmn_right = N.cmn_left;
+ return (t);
+}
diff --git a/text_cmds/tr/cmap.h b/text_cmds/tr/cmap.h
new file mode 100644
index 0000000..38e3d0e
--- /dev/null
+++ b/text_cmds/tr/cmap.h
@@ -0,0 +1,83 @@
+/*-
+ * Copyright (c) 2004 Tim J. Robbins.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/usr.bin/tr/cmap.h,v 1.1 2004/07/09 02:08:07 tjr Exp $
+ */
+
+#ifndef CMAP_H
+#define CMAP_H
+
+#include <limits.h>
+#include <stdbool.h>
+#include <wchar.h>
+
+struct cmapnode {
+ wint_t cmn_from;
+ wint_t cmn_to;
+ struct cmapnode *cmn_left;
+ struct cmapnode *cmn_right;
+};
+
+struct cmap {
+#define CM_CACHE_SIZE 128
+ wint_t cm_cache[CM_CACHE_SIZE];
+ bool cm_havecache;
+ struct cmapnode *cm_root;
+#define CM_DEF_SELF -2
+ wint_t cm_def;
+ wint_t cm_min;
+ wint_t cm_max;
+};
+
+struct cmap * cmap_alloc(void);
+bool cmap_add(struct cmap *, wint_t, wint_t);
+wint_t cmap_lookup_hard(struct cmap *, wint_t);
+void cmap_cache(struct cmap *);
+wint_t cmap_default(struct cmap *, wint_t);
+
+static __inline wint_t
+cmap_lookup(struct cmap *cm, wint_t from)
+{
+
+ if (from < CM_CACHE_SIZE && cm->cm_havecache)
+ return (cm->cm_cache[from]);
+ return (cmap_lookup_hard(cm, from));
+}
+
+static __inline wint_t
+cmap_min(struct cmap *cm)
+{
+
+ return (cm->cm_min);
+}
+
+static __inline wint_t
+cmap_max(struct cmap *cm)
+{
+
+ return (cm->cm_max);
+}
+
+#endif
diff --git a/text_cmds/tr/cset.c b/text_cmds/tr/cset.c
new file mode 100644
index 0000000..6e7c217
--- /dev/null
+++ b/text_cmds/tr/cset.c
@@ -0,0 +1,290 @@
+/*-
+ * Copyright (c) 2004 Tim J. Robbins.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * "Set of characters" ADT implemented as a splay tree of extents, with
+ * a lookup table cache to simplify looking up the first bunch of
+ * characters (which are presumably more common than others).
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/usr.bin/tr/cset.c,v 1.3 2004/07/14 08:33:14 tjr Exp $");
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <wchar.h>
+#include <wctype.h>
+#include "cset.h"
+
+static struct csnode * cset_delete(struct csnode *, wchar_t);
+static __inline int cset_rangecmp(struct csnode *, wchar_t);
+static struct csnode * cset_splay(struct csnode *, wchar_t);
+
+/*
+ * cset_alloc --
+ * Allocate a set of characters.
+ */
+struct cset *
+cset_alloc(void)
+{
+ struct cset *cs;
+
+ if ((cs = malloc(sizeof(*cs))) == NULL)
+ return (NULL);
+ cs->cs_root = NULL;
+ cs->cs_classes = NULL;
+ cs->cs_havecache = false;
+ cs->cs_invert = false;
+ return (cs);
+}
+
+/*
+ * cset_add --
+ * Add a character to the set.
+ */
+bool
+cset_add(struct cset *cs, wchar_t ch)
+{
+ struct csnode *csn, *ncsn;
+ wchar_t oval;
+
+ cs->cs_havecache = false;
+
+ /*
+ * Inserting into empty tree; new item becomes the root.
+ */
+ if (cs->cs_root == NULL) {
+ csn = malloc(sizeof(*cs->cs_root));
+ if (csn == NULL)
+ return (false);
+ csn->csn_left = csn->csn_right = NULL;
+ csn->csn_min = csn->csn_max = ch;
+ cs->cs_root = csn;
+ return (true);
+ }
+
+ /*
+ * Splay to check whether the item already exists, and otherwise,
+ * where we should put it.
+ */
+ csn = cs->cs_root = cset_splay(cs->cs_root, ch);
+
+ /*
+ * Avoid adding duplicate nodes.
+ */
+ if (cset_rangecmp(csn, ch) == 0)
+ return (true);
+
+ /*
+ * Allocate a new node and make it the new root.
+ */
+ ncsn = malloc(sizeof(*ncsn));
+ if (ncsn == NULL)
+ return (false);
+ ncsn->csn_min = ncsn->csn_max = ch;
+ if (cset_rangecmp(csn, ch) < 0) {
+ ncsn->csn_left = csn->csn_left;
+ ncsn->csn_right = csn;
+ csn->csn_left = NULL;
+ } else {
+ ncsn->csn_right = csn->csn_right;
+ ncsn->csn_left = csn;
+ csn->csn_right = NULL;
+ }
+ cs->cs_root = ncsn;
+
+ /*
+ * Coalesce with left and right neighbours if possible.
+ */
+ if (ncsn->csn_left != NULL) {
+ ncsn->csn_left = cset_splay(ncsn->csn_left, ncsn->csn_min - 1);
+ if (ncsn->csn_left->csn_max == ncsn->csn_min - 1) {
+ oval = ncsn->csn_left->csn_min;
+ ncsn->csn_left = cset_delete(ncsn->csn_left,
+ ncsn->csn_left->csn_min);
+ ncsn->csn_min = oval;
+ }
+ }
+ if (ncsn->csn_right != NULL) {
+ ncsn->csn_right = cset_splay(ncsn->csn_right,
+ ncsn->csn_max + 1);
+ if (ncsn->csn_right->csn_min == ncsn->csn_max + 1) {
+ oval = ncsn->csn_right->csn_max;
+ ncsn->csn_right = cset_delete(ncsn->csn_right,
+ ncsn->csn_right->csn_min);
+ ncsn->csn_max = oval;
+ }
+ }
+
+ return (true);
+}
+
+/*
+ * cset_in_hard --
+ * Determine whether a character is in the set without using
+ * the cache.
+ */
+bool
+cset_in_hard(struct cset *cs, wchar_t ch)
+{
+ struct csclass *csc;
+
+ for (csc = cs->cs_classes; csc != NULL; csc = csc->csc_next)
+ if (csc->csc_invert ^ iswctype(ch, csc->csc_type) != 0)
+ return (cs->cs_invert ^ true);
+ if (cs->cs_root != NULL) {
+ cs->cs_root = cset_splay(cs->cs_root, ch);
+ return (cs->cs_invert ^ cset_rangecmp(cs->cs_root, ch) == 0);
+ }
+ return (cs->cs_invert ^ false);
+}
+
+/*
+ * cset_cache --
+ * Update the cache.
+ */
+void
+cset_cache(struct cset *cs)
+{
+ wchar_t i;
+
+ for (i = 0; i < CS_CACHE_SIZE; i++)
+ cs->cs_cache[i] = cset_in_hard(cs, i);
+
+ cs->cs_havecache = true;
+}
+
+/*
+ * cset_invert --
+ * Invert the character set.
+ */
+void
+cset_invert(struct cset *cs)
+{
+
+ cs->cs_invert ^= true;
+ cs->cs_havecache = false;
+}
+
+/*
+ * cset_addclass --
+ * Add a wctype()-style character class to the set, optionally
+ * inverting it.
+ */
+bool
+cset_addclass(struct cset *cs, wctype_t type, bool invert)
+{
+ struct csclass *csc;
+
+ csc = malloc(sizeof(*csc));
+ if (csc == NULL)
+ return (false);
+ csc->csc_type = type;
+ csc->csc_invert = invert;
+ csc->csc_next = cs->cs_classes;
+ cs->cs_classes = csc;
+ cs->cs_havecache = false;
+ return (true);
+}
+
+static __inline int
+cset_rangecmp(struct csnode *t, wchar_t ch)
+{
+
+ if (ch < t->csn_min)
+ return (-1);
+ if (ch > t->csn_max)
+ return (1);
+ return (0);
+}
+
+static struct csnode *
+cset_splay(struct csnode *t, wchar_t ch)
+{
+ struct csnode N, *l, *r, *y;
+
+ /*
+ * Based on public domain code from Sleator.
+ */
+
+ assert(t != NULL);
+
+ N.csn_left = N.csn_right = NULL;
+ l = r = &N;
+ for (;;) {
+ if (cset_rangecmp(t, ch) < 0) {
+ if (t->csn_left != NULL &&
+ cset_rangecmp(t->csn_left, ch) < 0) {
+ y = t->csn_left;
+ t->csn_left = y->csn_right;
+ y->csn_right = t;
+ t = y;
+ }
+ if (t->csn_left == NULL)
+ break;
+ r->csn_left = t;
+ r = t;
+ t = t->csn_left;
+ } else if (cset_rangecmp(t, ch) > 0) {
+ if (t->csn_right != NULL &&
+ cset_rangecmp(t->csn_right, ch) > 0) {
+ y = t->csn_right;
+ t->csn_right = y->csn_left;
+ y->csn_left = t;
+ t = y;
+ }
+ if (t->csn_right == NULL)
+ break;
+ l->csn_right = t;
+ l = t;
+ t = t->csn_right;
+ } else
+ break;
+ }
+ l->csn_right = t->csn_left;
+ r->csn_left = t->csn_right;
+ t->csn_left = N.csn_right;
+ t->csn_right = N.csn_left;
+ return (t);
+}
+
+static struct csnode *
+cset_delete(struct csnode *t, wchar_t ch)
+{
+ struct csnode *x;
+
+ assert(t != NULL);
+ t = cset_splay(t, ch);
+ assert(cset_rangecmp(t, ch) == 0);
+ if (t->csn_left == NULL)
+ x = t->csn_right;
+ else {
+ x = cset_splay(t->csn_left, ch);
+ x->csn_right = t->csn_right;
+ }
+ free(t);
+ return x;
+}
diff --git a/text_cmds/tr/cset.h b/text_cmds/tr/cset.h
new file mode 100644
index 0000000..ae62d1d
--- /dev/null
+++ b/text_cmds/tr/cset.h
@@ -0,0 +1,74 @@
+/*-
+ * Copyright (c) 2004 Tim J. Robbins.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/usr.bin/tr/cset.h,v 1.2 2004/07/14 08:35:11 tjr Exp $
+ */
+
+#ifndef CSET_H
+#define CSET_H
+
+#include <stdbool.h>
+#include <wchar.h>
+#include <wctype.h>
+
+struct csnode {
+ wchar_t csn_min;
+ wchar_t csn_max;
+ struct csnode *csn_left;
+ struct csnode *csn_right;
+};
+
+struct csclass {
+ wctype_t csc_type;
+ bool csc_invert;
+ struct csclass *csc_next;
+};
+
+struct cset {
+#define CS_CACHE_SIZE 256
+ bool cs_cache[CS_CACHE_SIZE];
+ bool cs_havecache;
+ struct csclass *cs_classes;
+ struct csnode *cs_root;
+ bool cs_invert;
+};
+
+bool cset_addclass(struct cset *, wctype_t, bool);
+struct cset * cset_alloc(void);
+bool cset_add(struct cset *, wchar_t);
+void cset_invert(struct cset *);
+bool cset_in_hard(struct cset *, wchar_t);
+void cset_cache(struct cset *);
+
+static __inline bool
+cset_in(struct cset *cs, wchar_t ch)
+{
+
+ if (ch < CS_CACHE_SIZE && cs->cs_havecache)
+ return (cs->cs_cache[ch]);
+ return (cset_in_hard(cs, ch));
+}
+
+#endif /* CSET_H */
diff --git a/text_cmds/tr/extern.h b/text_cmds/tr/extern.h
new file mode 100644
index 0000000..adc9ec3
--- /dev/null
+++ b/text_cmds/tr/extern.h
@@ -0,0 +1,55 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)extern.h 8.1 (Berkeley) 6/6/93
+ * $FreeBSD: src/usr.bin/tr/extern.h,v 1.9 2004/07/09 02:08:07 tjr Exp $
+ */
+
+#include <limits.h>
+
+#define NCHARS_SB (UCHAR_MAX + 1) /* Number of single-byte characters. */
+#define OOBCH -1 /* Out of band character value. */
+
+typedef struct {
+ enum { STRING1, STRING2 } which;
+ enum { EOS, INFINITE, NORMAL, RANGE, SEQUENCE,
+ CCLASS, CCLASS_UPPER, CCLASS_LOWER, SET } state;
+ int cnt; /* character count */
+ wint_t lastch; /* last character */
+ wctype_t cclass; /* character class from wctype() */
+ wint_t equiv[NCHARS_SB]; /* equivalence set */
+ wint_t *set; /* set of characters */
+ char *str; /* user's string */
+} STR;
+
+wint_t next(STR *);
+int charcoll(const void *, const void *);
diff --git a/text_cmds/tr/str.c b/text_cmds/tr/str.c
new file mode 100644
index 0000000..e7e6757
--- /dev/null
+++ b/text_cmds/tr/str.c
@@ -0,0 +1,450 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+__FBSDID("$FreeBSD: src/usr.bin/tr/str.c,v 1.24 2004/11/14 05:15:25 jkh Exp $");
+
+#ifndef lint
+static const char sccsid[] = "@(#)str.c 8.2 (Berkeley) 4/28/95";
+#endif
+
+#include <sys/cdefs.h>
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <err.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <wchar.h>
+#include <wctype.h>
+#include <xlocale.h>
+
+#include "extern.h"
+
+static int backslash(STR *, int *);
+static int bracket(STR *);
+static void genclass(STR *);
+static void genequiv(STR *);
+static int genrange(STR *, int);
+static void genseq(STR *);
+
+/*
+ * Using libc internal function __collate_lookup_l for character
+ * equivalence
+ */
+void __collate_lookup_l(const __darwin_wchar_t *, int *, int *, int *,
+locale_t);
+/*
+ * Cache for primary collation weight of each single byte character
+ * used in static void genequiv(s)
+ */
+int collation_weight_cache[NCHARS_SB];
+int is_weight_cached = 0;
+
+wint_t
+next(s)
+ STR *s;
+{
+ int is_octal;
+ wint_t ch;
+ wchar_t wch;
+ size_t clen;
+
+ switch (s->state) {
+ case EOS:
+ return (0);
+ case INFINITE:
+#ifdef __APPLE__
+ switch (ch = (u_char)*s->str) {
+ case '\0':
+ /*
+ * force at least one postive return so setup() will
+ * process lastch of a sequence like [a*]; but change
+ * state so it won't get stuck in a while(next(s)) loop
+ */
+ s->state = NORMAL;
+ }
+#endif /* __APPLE__ */
+ return (1);
+ case NORMAL:
+ switch (*s->str) {
+ case '\0':
+ s->state = EOS;
+ return (0);
+ case '\\':
+ s->lastch = backslash(s, &is_octal);
+ break;
+ case '[':
+ if (bracket(s))
+ return (next(s));
+ /* FALLTHROUGH */
+ default:
+ clen = mbrtowc(&wch, s->str, MB_LEN_MAX, NULL);
+ if (clen == (size_t)-1 || clen == (size_t)-2 ||
+ clen == 0)
+ errc(1, EILSEQ, NULL);
+ is_octal = 0;
+ s->lastch = wch;
+ s->str += clen;
+ break;
+ }
+
+ /* We can start a range at any time. */
+ if (s->str[0] == '-' && genrange(s, is_octal))
+ return (next(s));
+ return (1);
+ case RANGE:
+ if (s->cnt-- == 0) {
+ s->state = NORMAL;
+ return (next(s));
+ }
+ ++s->lastch;
+ return (1);
+ case SEQUENCE:
+ if (s->cnt-- == 0) {
+ s->state = NORMAL;
+ return (next(s));
+ }
+ return (1);
+ case CCLASS:
+ case CCLASS_UPPER:
+ case CCLASS_LOWER:
+ s->cnt++;
+ ch = nextwctype(s->lastch, s->cclass);
+ if (ch == -1) {
+ s->state = NORMAL;
+ return (next(s));
+ }
+ s->lastch = ch;
+ return (1);
+ case SET:
+ if ((ch = s->set[s->cnt++]) == OOBCH) {
+ s->state = NORMAL;
+ return (next(s));
+ }
+ s->lastch = ch;
+ return (1);
+ default:
+ return (0);
+ }
+ /* NOTREACHED */
+}
+
+static int
+bracket(s)
+ STR *s;
+{
+ char *p;
+
+ switch (s->str[1]) {
+ case ':': /* "[:class:]" */
+ if ((p = strchr(s->str + 2, ']')) == NULL)
+ return (0);
+ if (*(p - 1) != ':' || p - s->str < 4)
+ goto repeat;
+ *(p - 1) = '\0';
+ s->str += 2;
+ genclass(s);
+ s->str = p + 1;
+ return (1);
+ case '=': /* "[=equiv=]" */
+ if ((p = strchr(s->str + 2, ']')) == NULL)
+ return (0);
+ if (*(p - 1) != '=' || p - s->str < 4)
+ goto repeat;
+ s->str += 2;
+ genequiv(s);
+ return (1);
+ default: /* "[\###*n]" or "[#*n]" */
+ repeat:
+ if ((p = strpbrk(s->str + 2, "*]")) == NULL)
+ return (0);
+ if (p[0] != '*' || index(p, ']') == NULL)
+ return (0);
+ s->str += 1;
+ genseq(s);
+ return (1);
+ }
+ /* NOTREACHED */
+}
+
+static void
+genclass(s)
+ STR *s;
+{
+
+ if ((s->cclass = wctype(s->str)) == 0)
+ errx(1, "unknown class %s", s->str);
+ s->cnt = 0;
+ s->lastch = -1; /* incremented before check in next() */
+ if (strcmp(s->str, "upper") == 0)
+ s->state = CCLASS_UPPER;
+ else if (strcmp(s->str, "lower") == 0)
+ s->state = CCLASS_LOWER;
+ else
+ s->state = CCLASS;
+}
+
+static void
+genequiv(s)
+ STR *s;
+{
+ int i, p;
+ size_t clen;
+ wchar_t wc;
+
+ if (*s->str == '\\') {
+ s->equiv[0] = backslash(s, NULL);
+ if (*s->str != '=')
+ errx(1, "misplaced equivalence equals sign");
+ s->str += 2;
+ } else {
+ clen = mbrtowc(&wc, s->str, MB_LEN_MAX, NULL);
+ if (clen == (size_t)-1 || clen == (size_t)-2 || clen == 0)
+ errc(1, EILSEQ, NULL);
+ s->equiv[0] = wc;
+ if (s->str[clen] != '=')
+ errx(1, "misplaced equivalence equals sign");
+ s->str += clen + 2;
+ }
+
+ /*
+ * Partially supporting multi-byte locales; only finds equivalent
+ * characters within the first NCHARS_SB entries of the
+ * collation table
+ */
+ int tprim, tsec;
+ int len;
+ __collate_lookup_l(s->equiv, &len, &tprim, &tsec, LC_GLOBAL_LOCALE);
+
+ if (tprim != -1) {
+ for (p = 1, i = 1; i < NCHARS_SB; i++) {
+ int cprim;
+ if (is_weight_cached) {
+ /*
+ * retrieve primary weight from cache
+ */
+ cprim = collation_weight_cache[i];
+ } else {
+ /*
+ * perform lookup of primary weight and fill cache
+ */
+ int csec;
+ __collate_lookup_l((__darwin_wchar_t *)&i, &len, &cprim, &csec, LC_GLOBAL_LOCALE);
+ collation_weight_cache[i] = cprim;
+ }
+
+ /*
+ * If a character does not exist in the collation
+ * table, just skip it
+ */
+ if (cprim == -1) {
+ continue;
+ }
+
+ /*
+ * Only compare primary weights to determine multi-byte
+ * character equivalence
+ */
+ if (cprim == tprim) {
+ s->equiv[p++] = i;
+ }
+ }
+ s->equiv[p] = OOBCH;
+
+ if (!is_weight_cached) {
+ is_weight_cached = 1;
+ }
+ }
+
+ s->cnt = 0;
+ s->state = SET;
+ s->set = s->equiv;
+}
+
+static int
+genrange(STR *s, int was_octal)
+{
+ int stopval, octal;
+ char *savestart;
+ int n, cnt, *p;
+ size_t clen;
+ wchar_t wc;
+
+ octal = 0;
+ savestart = s->str;
+ if (*++s->str == '\\')
+ stopval = backslash(s, &octal);
+ else {
+ clen = mbrtowc(&wc, s->str, MB_LEN_MAX, NULL);
+ if (clen == (size_t)-1 || clen == (size_t)-2)
+ errc(1, EILSEQ, NULL);
+ stopval = wc;
+ s->str += clen;
+ }
+ /*
+ * XXX Characters are not ordered according to collating sequence in
+ * multibyte locales.
+ */
+ if (octal || was_octal || MB_CUR_MAX > 1) {
+ if (stopval < s->lastch) {
+ s->str = savestart;
+ return (0);
+ }
+ s->cnt = stopval - s->lastch + 1;
+ s->state = RANGE;
+ --s->lastch;
+ return (1);
+ }
+ if (charcoll((const void *)&stopval, (const void *)&(s->lastch)) < 0) {
+ s->str = savestart;
+ return (0);
+ }
+ if ((s->set = p = malloc((NCHARS_SB + 1) * sizeof(int))) == NULL)
+ err(1, "genrange() malloc");
+ for (cnt = 0; cnt < NCHARS_SB; cnt++)
+ if (charcoll((const void *)&cnt, (const void *)&(s->lastch)) >= 0 &&
+ charcoll((const void *)&cnt, (const void *)&stopval) <= 0)
+ *p++ = cnt;
+ *p = OOBCH;
+ n = p - s->set;
+
+ s->cnt = 0;
+ s->state = SET;
+ if (n > 1)
+ mergesort(s->set, n, sizeof(*(s->set)), charcoll);
+ return (1);
+}
+
+static void
+genseq(s)
+ STR *s;
+{
+ char *ep;
+ wchar_t wc;
+ size_t clen;
+
+#ifndef __APPLE__
+ if (s->which == STRING1)
+ errx(1, "sequences only valid in string2");
+#endif /* !__APPLE__ */
+
+ if (*s->str == '\\')
+ s->lastch = backslash(s, NULL);
+ else {
+ clen = mbrtowc(&wc, s->str, MB_LEN_MAX, NULL);
+ if (clen == (size_t)-1 || clen == (size_t)-2)
+ errc(1, EILSEQ, NULL);
+ s->lastch = wc;
+ s->str += clen;
+ }
+ if (*s->str != '*')
+ errx(1, "misplaced sequence asterisk");
+
+ switch (*++s->str) {
+ case '\\':
+ s->cnt = backslash(s, NULL);
+ break;
+ case ']':
+ s->cnt = 0;
+ ++s->str;
+ break;
+ default:
+ if (isdigit((u_char)*s->str)) {
+ s->cnt = strtol(s->str, &ep, 0);
+ if (*ep == ']') {
+ s->str = ep + 1;
+ break;
+ }
+ }
+ errx(1, "illegal sequence count");
+ /* NOTREACHED */
+ }
+
+ s->state = s->cnt ? SEQUENCE : INFINITE;
+}
+
+/*
+ * Translate \??? into a character. Up to 3 octal digits, if no digits either
+ * an escape code or a literal character.
+ */
+static int
+backslash(STR *s, int *is_octal)
+{
+ int ch, cnt, val;
+
+ if (is_octal != NULL)
+ *is_octal = 0;
+ for (cnt = val = 0;;) {
+ ch = (u_char)*++s->str;
+ if (!isdigit(ch) || ch > '7')
+ break;
+ val = val * 8 + ch - '0';
+ if (++cnt == 3) {
+ ++s->str;
+ break;
+ }
+ }
+ if (cnt) {
+ if (is_octal != NULL)
+ *is_octal = 1;
+ return (val);
+ }
+ if (ch != '\0')
+ ++s->str;
+ switch (ch) {
+ case 'a': /* escape characters */
+ return ('\7');
+ case 'b':
+ return ('\b');
+ case 'f':
+ return ('\f');
+ case 'n':
+ return ('\n');
+ case 'r':
+ return ('\r');
+ case 't':
+ return ('\t');
+ case 'v':
+ return ('\13');
+ case '\0': /* \" -> \ */
+ s->state = EOS;
+ return ('\\');
+ default: /* \x" -> x */
+ return (ch);
+ }
+}
diff --git a/text_cmds/tr/tr.1 b/text_cmds/tr/tr.1
new file mode 100644
index 0000000..6513674
--- /dev/null
+++ b/text_cmds/tr/tr.1
@@ -0,0 +1,420 @@
+.\" Copyright (c) 1991, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" This code is derived from software contributed to Berkeley by
+.\" the Institute of Electrical and Electronics Engineers, Inc.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)tr.1 8.1 (Berkeley) 6/6/93
+.\" $FreeBSD: src/usr.bin/tr/tr.1,v 1.33 2005/02/13 23:45:51 ru Exp $
+.\"
+.Dd July 23, 2004
+.Dt TR 1
+.Os
+.Sh NAME
+.Nm tr
+.Nd translate characters
+.Sh SYNOPSIS
+.Nm
+.Op Fl Ccsu
+.Ar string1 string2
+.Nm
+.Op Fl Ccu
+.Fl d
+.Ar string1
+.Nm
+.Op Fl Ccu
+.Fl s
+.Ar string1
+.Nm
+.Op Fl Ccu
+.Fl ds
+.Ar string1 string2
+.Sh DESCRIPTION
+The
+.Nm
+utility copies the standard input to the standard output with substitution
+or deletion of selected characters.
+.Pp
+The following options are available:
+.Bl -tag -width Ds
+.It Fl C
+Complement the set of characters in
+.Ar string1 ,
+that is
+.Dq Fl C Li ab
+includes every character except for
+.Ql a
+and
+.Ql b .
+.It Fl c
+Same as
+.Fl C
+but complement the set of values in
+.Ar string1 .
+.It Fl d
+Delete characters in
+.Ar string1
+from the input.
+.It Fl s
+Squeeze multiple occurrences of the characters listed in the last
+operand (either
+.Ar string1
+or
+.Ar string2 )
+in the input into a single instance of the character.
+This occurs after all deletion and translation is completed.
+.It Fl u
+Guarantee that any output is unbuffered.
+.El
+.Pp
+In the first synopsis form, the characters in
+.Ar string1
+are translated into the characters in
+.Ar string2
+where the first character in
+.Ar string1
+is translated into the first character in
+.Ar string2
+and so on.
+If
+.Ar string1
+is longer than
+.Ar string2 ,
+the last character found in
+.Ar string2
+is duplicated until
+.Ar string1
+is exhausted.
+.Pp
+In the second synopsis form, the characters in
+.Ar string1
+are deleted from the input.
+.Pp
+In the third synopsis form, the characters in
+.Ar string1
+are compressed as described for the
+.Fl s
+option.
+.Pp
+In the fourth synopsis form, the characters in
+.Ar string1
+are deleted from the input, and the characters in
+.Ar string2
+are compressed as described for the
+.Fl s
+option.
+.Pp
+The following conventions can be used in
+.Ar string1
+and
+.Ar string2
+to specify sets of characters:
+.Bl -tag -width [:equiv:]
+.It character
+Any character not described by one of the following conventions
+represents itself.
+.It \eoctal
+A backslash followed by 1, 2 or 3 octal digits represents a character
+with that encoded value.
+To follow an octal sequence with a digit as a character, left zero-pad
+the octal sequence to the full 3 octal digits.
+.It \echaracter
+A backslash followed by certain special characters maps to special
+values.
+.Pp
+.Bl -column "\ea"
+.It "\ea <alert character>
+.It "\eb <backspace>
+.It "\ef <form-feed>
+.It "\en <newline>
+.It "\er <carriage return>
+.It "\et <tab>
+.It "\ev <vertical tab>
+.El
+.Pp
+A backslash followed by any other character maps to that character.
+.It c-c
+For non-octal range endpoints
+represents the range of characters between the range endpoints, inclusive,
+in ascending order,
+as defined by the collation sequence.
+If either or both of the range endpoints are octal sequences, it
+represents the range of specific coded values between the
+range endpoints, inclusive.
+.Pp
+.Bf Em
+See the
+.Sx COMPATIBILITY
+section below for an important note regarding
+differences in the way the current
+implementation interprets range expressions differently from
+previous implementations.
+.Ef
+.It [:class:]
+Represents all characters belonging to the defined character class.
+Class names are:
+.Pp
+.Bl -column "phonogram"
+.It "alnum <alphanumeric characters>
+.It "alpha <alphabetic characters>
+.It "blank <whitespace characters>
+.It "cntrl <control characters>
+.It "digit <numeric characters>
+.It "graph <graphic characters>
+.It "ideogram <ideographic characters>
+.It "lower <lower-case alphabetic characters>
+.It "phonogram <phonographic characters>
+.It "print <printable characters>
+.It "punct <punctuation characters>
+.It "rune <valid characters>
+.It "space <space characters>
+.It "special <special characters>
+.It "upper <upper-case characters>
+.It "xdigit <hexadecimal characters>
+.El
+.Pp
+.\" All classes may be used in
+.\" .Ar string1 ,
+.\" and in
+.\" .Ar string2
+.\" when both the
+.\" .Fl d
+.\" and
+.\" .Fl s
+.\" options are specified.
+.\" Otherwise, only the classes ``upper'' and ``lower'' may be used in
+.\" .Ar string2
+.\" and then only when the corresponding class (``upper'' for ``lower''
+.\" and vice-versa) is specified in the same relative position in
+.\" .Ar string1 .
+.\" .Pp
+When
+.Dq Li [:lower:]
+appears in
+.Ar string1
+and
+.Dq Li [:upper:]
+appears in the same relative position in
+.Ar string2 ,
+it represents the characters pairs from the
+.Dv toupper
+mapping in the
+.Ev LC_CTYPE
+category of the current locale.
+When
+.Dq Li [:upper:]
+appears in
+.Ar string1
+and
+.Dq Li [:lower:]
+appears in the same relative position in
+.Ar string2 ,
+it represents the characters pairs from the
+.Dv tolower
+mapping in the
+.Ev LC_CTYPE
+category of the current locale.
+.Pp
+With the exception of case conversion,
+characters in the classes are in unspecified order.
+.Pp
+For specific information as to which
+.Tn ASCII
+characters are included
+in these classes, see
+.Xr ctype 3
+and related manual pages.
+.It [=equiv=]
+Represents all characters belonging to the same equivalence class as
+.Ar equiv ,
+ordered by their encoded values.
+.It [#*n]
+Represents
+.Ar n
+repeated occurrences of the character represented by
+.Ar # .
+This
+expression is only valid when it occurs in
+.Ar string2 .
+If
+.Ar n
+is omitted or is zero, it is be interpreted as large enough to extend
+.Ar string2
+sequence to the length of
+.Ar string1 .
+If
+.Ar n
+has a leading zero, it is interpreted as an octal value, otherwise,
+it is interpreted as a decimal value.
+.El
+.Sh ENVIRONMENT
+The
+.Ev LANG , LC_ALL , LC_CTYPE
+and
+.Ev LC_COLLATE
+environment variables affect the execution of
+.Nm
+as described in
+.Xr environ 7 .
+.Sh EXIT STATUS
+.Ex -std
+.Sh EXAMPLES
+The following examples are shown as given to the shell:
+.Pp
+Create a list of the words in file1, one per line, where a word is taken to
+be a maximal string of letters.
+.Pp
+.D1 Li "tr -cs \*q[:alpha:]\*q \*q\en\*q < file1"
+.Pp
+Translate the contents of file1 to upper-case.
+.Pp
+.D1 Li "tr \*q[:lower:]\*q \*q[:upper:]\*q < file1"
+.Pp
+(This should be preferred over the traditional
+.Ux
+idiom of
+.Dq Li "tr a-z A-Z" ,
+since it works correctly in all locales.)
+.Pp
+Strip out non-printable characters from file1.
+.Pp
+.D1 Li "tr -cd \*q[:print:]\*q < file1"
+.Pp
+Remove diacritical marks from all accented variants of the letter
+.Ql e :
+.Pp
+.Dl "tr \*q[=e=]\*q \*qe\*q"
+.Sh COMPATIBILITY
+Previous
+.Fx
+implementations of
+.Nm
+did not order characters in range expressions according to the current
+locale's collation order, making it possible to convert unaccented Latin
+characters (esp.\& as found in English text) from upper to lower case using
+the traditional
+.Ux
+idiom of
+.Dq Li "tr A-Z a-z" .
+Since
+.Nm
+now obeys the locale's collation order, this idiom may not produce
+correct results when there is not a 1:1 mapping between lower and
+upper case, or when the order of characters within the two cases differs.
+As noted in the
+.Sx EXAMPLES
+section above, the character class expressions
+.Dq Li [:lower:]
+and
+.Dq Li [:upper:]
+should be used instead of explicit character ranges like
+.Dq Li a-z
+and
+.Dq Li A-Z .
+.Pp
+System V has historically implemented character ranges using the syntax
+.Dq Li [c-c]
+instead of the
+.Dq Li c-c
+used by historic
+.Bx
+implementations and
+standardized by POSIX.
+System V shell scripts should work under this implementation as long as
+the range is intended to map in another range, i.e., the command
+.Dq Li "tr [a-z] [A-Z]"
+will work as it will map the
+.Ql \&[
+character in
+.Ar string1
+to the
+.Ql \&[
+character in
+.Ar string2 .
+However, if the shell script is deleting or squeezing characters as in
+the command
+.Dq Li "tr -d [a-z]" ,
+the characters
+.Ql \&[
+and
+.Ql \&]
+will be
+included in the deletion or compression list which would not have happened
+under a historic System V implementation.
+Additionally, any scripts that depended on the sequence
+.Dq Li a-z
+to
+represent the three characters
+.Ql a ,
+.Ql \-
+and
+.Ql z
+will have to be
+rewritten as
+.Dq Li a\e-z .
+.Pp
+The
+.Nm
+utility has historically not permitted the manipulation of NUL bytes in
+its input and, additionally, stripped NUL's from its input stream.
+This implementation has removed this behavior as a bug.
+.Pp
+The
+.Nm
+utility has historically been extremely forgiving of syntax errors,
+for example, the
+.Fl c
+and
+.Fl s
+options were ignored unless two strings were specified.
+This implementation will not permit illegal syntax.
+.Sh STANDARDS
+The
+.Nm
+utility conforms to
+.St -p1003.1-2001 .
+.Pp
+It should be noted that the feature wherein the last character of
+.Ar string2
+is duplicated if
+.Ar string2
+has less characters than
+.Ar string1
+is permitted by POSIX but is not required.
+Shell scripts attempting to be portable to other POSIX systems should use
+the
+.Dq Li [#*]
+convention instead of relying on this behavior.
+The
+.Fl u
+option is an extension to the
+.St -p1003.1-2001
+standard.
diff --git a/text_cmds/tr/tr.c b/text_cmds/tr/tr.c
new file mode 100644
index 0000000..7bd2ca7
--- /dev/null
+++ b/text_cmds/tr/tr.c
@@ -0,0 +1,378 @@
+/*
+ * Copyright (c) 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+__FBSDID("$FreeBSD: src/usr.bin/tr/tr.c,v 1.24 2005/04/09 14:31:41 stefanf Exp $");
+
+#ifndef lint
+static const char copyright[] =
+"@(#) Copyright (c) 1988, 1993\n\
+ The Regents of the University of California. All rights reserved.\n";
+#endif
+
+#ifndef lint
+static const char sccsid[] = "@(#)tr.c 8.2 (Berkeley) 5/4/95";
+#endif
+
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <err.h>
+#include <limits.h>
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <wchar.h>
+#include <wctype.h>
+
+#include "cmap.h"
+#include "cset.h"
+#include "extern.h"
+
+STR s1 = { STRING1, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL };
+STR s2 = { STRING2, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL };
+
+static struct cset *setup(char *, STR *, int, int);
+static void usage(void);
+
+int
+main(int argc, char **argv)
+{
+ static int carray[NCHARS_SB];
+ struct cmap *map;
+ struct cset *delete, *squeeze;
+ int n, *p;
+ int Cflag, cflag, dflag, sflag, isstring2;
+ wint_t ch, cnt, lastch;
+
+ (void)setlocale(LC_ALL, "");
+
+ Cflag = cflag = dflag = sflag = 0;
+ while ((ch = getopt(argc, argv, "Ccdsu")) != -1)
+ switch((char)ch) {
+ case 'C':
+ Cflag = 1;
+ cflag = 0;
+ break;
+ case 'c':
+ cflag = 1;
+ Cflag = 0;
+ break;
+ case 'd':
+ dflag = 1;
+ break;
+ case 's':
+ sflag = 1;
+ break;
+ case 'u':
+ setbuf(stdout, (char *)NULL);
+ break;
+ case '?':
+ default:
+ usage();
+ }
+ argc -= optind;
+ argv += optind;
+
+ switch(argc) {
+ case 0:
+ default:
+ usage();
+ /* NOTREACHED */
+ case 1:
+ isstring2 = 0;
+ if(!argv[0]) usage();
+ break;
+ case 2:
+ isstring2 = 1;
+ if(!argv[0] || !argv[1]) usage();
+ break;
+ }
+
+ /*
+ * tr -ds [-Cc] string1 string2
+ * Delete all characters (or complemented characters) in string1.
+ * Squeeze all characters in string2.
+ */
+ if (dflag && sflag) {
+ if (!isstring2)
+ usage();
+
+ delete = setup(argv[0], &s1, cflag, Cflag);
+ squeeze = setup(argv[1], &s2, 0, 0);
+
+ for (lastch = OOBCH; (ch = getwchar()) != WEOF;)
+ if (!cset_in(delete, ch) &&
+ (lastch != ch || !cset_in(squeeze, ch))) {
+ lastch = ch;
+ (void)putwchar(ch);
+ }
+ if (ferror(stdin))
+ err(1, NULL);
+ exit(0);
+ }
+
+ /*
+ * tr -d [-Cc] string1
+ * Delete all characters (or complemented characters) in string1.
+ */
+ if (dflag) {
+ if (isstring2)
+ usage();
+
+ delete = setup(argv[0], &s1, cflag, Cflag);
+
+ while ((ch = getwchar()) != WEOF)
+ if (!cset_in(delete, ch))
+ (void)putwchar(ch);
+ if (ferror(stdin))
+ err(1, NULL);
+ exit(0);
+ }
+
+ /*
+ * tr -s [-Cc] string1
+ * Squeeze all characters (or complemented characters) in string1.
+ */
+ if (sflag && !isstring2) {
+ squeeze = setup(argv[0], &s1, cflag, Cflag);
+
+ for (lastch = OOBCH; (ch = getwchar()) != WEOF;)
+ if (lastch != ch || !cset_in(squeeze, ch)) {
+ lastch = ch;
+ (void)putwchar(ch);
+ }
+ if (ferror(stdin))
+ err(1, NULL);
+ exit(0);
+ }
+
+ /*
+ * tr [-Ccs] string1 string2
+ * Replace all characters (or complemented characters) in string1 with
+ * the character in the same position in string2. If the -s option is
+ * specified, squeeze all the characters in string2.
+ */
+ if (!isstring2)
+ usage();
+
+ map = cmap_alloc();
+ if (map == NULL)
+ err(1, NULL);
+ squeeze = cset_alloc();
+ if (squeeze == NULL)
+ err(1, NULL);
+
+ s1.str = argv[0];
+
+ if (Cflag || cflag) {
+ cmap_default(map, OOBCH);
+ if ((s2.str = strdup(argv[1])) == NULL)
+ errx(1, "strdup(argv[1])");
+ } else
+ s2.str = argv[1];
+
+ if (!next(&s2))
+ errx(1, "empty string2");
+
+ /*
+ * For -s result will contain only those characters defined
+ * as the second characters in each of the toupper or tolower
+ * pairs.
+ */
+
+ /* If string2 runs out of characters, use the last one specified. */
+ while (next(&s1)) {
+ again:
+ if (s1.state == CCLASS_LOWER &&
+ s2.state == CCLASS_UPPER &&
+ s1.cnt == 1 && s2.cnt == 1) {
+ do {
+ ch = towupper(s1.lastch);
+ cmap_add(map, s1.lastch, ch);
+ if (sflag && iswupper(ch))
+ cset_add(squeeze, ch);
+ if (!next(&s1))
+ goto endloop;
+ } while (s1.state == CCLASS_LOWER && s1.cnt > 1);
+ /* skip upper set */
+ do {
+ if (!next(&s2))
+ break;
+ } while (s2.state == CCLASS_UPPER && s2.cnt > 1);
+ goto again;
+ } else if (s1.state == CCLASS_UPPER &&
+ s2.state == CCLASS_LOWER &&
+ s1.cnt == 1 && s2.cnt == 1) {
+ do {
+ ch = towlower(s1.lastch);
+ cmap_add(map, s1.lastch, ch);
+ if (sflag && iswlower(ch))
+ cset_add(squeeze, ch);
+ if (!next(&s1))
+ goto endloop;
+ } while (s1.state == CCLASS_UPPER && s1.cnt > 1);
+ /* skip lower set */
+ do {
+ if (!next(&s2))
+ break;
+ } while (s2.state == CCLASS_LOWER && s2.cnt > 1);
+ goto again;
+ } else {
+ cmap_add(map, s1.lastch, s2.lastch);
+ if (sflag)
+ cset_add(squeeze, s2.lastch);
+ }
+ (void)next(&s2);
+ }
+endloop:
+ if (cflag || (Cflag && MB_CUR_MAX > 1)) {
+ /*
+ * This is somewhat tricky: since the character set is
+ * potentially huge, we need to avoid allocating a map
+ * entry for every character. Our strategy is to set the
+ * default mapping to the last character of string #2
+ * (= the one that gets automatically repeated), then to
+ * add back identity mappings for characters that should
+ * remain unchanged. We don't waste space on identity mappings
+ * for non-characters with the -C option; those are simulated
+ * in the I/O loop.
+ */
+ s2.str = argv[1];
+ s2.state = NORMAL;
+ for (cnt = 0; cnt < WCHAR_MAX; cnt++) {
+ if (Cflag && !iswrune(cnt))
+ continue;
+ if (cmap_lookup(map, cnt) == OOBCH) {
+ if (next(&s2))
+ cmap_add(map, cnt, s2.lastch);
+ if (sflag)
+ cset_add(squeeze, s2.lastch);
+ } else
+ cmap_add(map, cnt, cnt);
+ if ((s2.state == EOS || s2.state == INFINITE) &&
+ cnt >= cmap_max(map))
+ break;
+ }
+ cmap_default(map, s2.lastch);
+ } else if (Cflag) {
+ for (p = carray, cnt = 0; cnt < NCHARS_SB; cnt++) {
+ if (cmap_lookup(map, cnt) == OOBCH && iswrune(cnt))
+ *p++ = cnt;
+ else
+ cmap_add(map, cnt, cnt);
+ }
+ n = p - carray;
+ if (Cflag && n > 1)
+ (void)mergesort(carray, n, sizeof(*carray), charcoll);
+
+ s2.str = argv[1];
+ s2.state = NORMAL;
+ for (cnt = 0; cnt < n; cnt++) {
+ (void)next(&s2);
+ cmap_add(map, carray[cnt], s2.lastch);
+ /*
+ * Chars taken from s2 can be different this time
+ * due to lack of complex upper/lower processing,
+ * so fill string2 again to not miss some.
+ */
+ if (sflag)
+ cset_add(squeeze, s2.lastch);
+ }
+ }
+
+ cset_cache(squeeze);
+ cmap_cache(map);
+
+ if (sflag)
+ for (lastch = OOBCH; (ch = getwchar()) != WEOF;) {
+ if (!Cflag || iswrune(ch))
+ ch = cmap_lookup(map, ch);
+ if (lastch != ch || !cset_in(squeeze, ch)) {
+ lastch = ch;
+ (void)putwchar(ch);
+ }
+ }
+ else
+ while ((ch = getwchar()) != WEOF) {
+ if (!Cflag || iswrune(ch))
+ ch = cmap_lookup(map, ch);
+ (void)putwchar(ch);
+ }
+ if (ferror(stdin))
+ err(1, NULL);
+ exit (0);
+}
+
+static struct cset *
+setup(char *arg, STR *str, int cflag, int Cflag)
+{
+ struct cset *cs;
+
+ cs = cset_alloc();
+ if (cs == NULL)
+ err(1, NULL);
+ str->str = arg;
+ while (next(str))
+ cset_add(cs, str->lastch);
+ if (Cflag)
+ cset_addclass(cs, wctype("rune"), true);
+ if (cflag || Cflag)
+ cset_invert(cs);
+ cset_cache(cs);
+ return (cs);
+}
+
+int
+charcoll(const void *a, const void *b)
+{
+ static char sa[2], sb[2];
+
+ sa[0] = *(const int *)a;
+ sb[0] = *(const int *)b;
+ return (strcoll(sa, sb));
+}
+
+static void
+usage(void)
+{
+ (void)fprintf(stderr, "%s\n%s\n%s\n%s\n",
+ "usage: tr [-Ccsu] string1 string2",
+ " tr [-Ccu] -d string1",
+ " tr [-Ccu] -s string1",
+ " tr [-Ccu] -ds string1 string2");
+ exit(1);
+}