summaryrefslogtreecommitdiffstats
path: root/text_cmds/tr/cset.c
diff options
context:
space:
mode:
Diffstat (limited to 'text_cmds/tr/cset.c')
-rw-r--r--text_cmds/tr/cset.c290
1 files changed, 290 insertions, 0 deletions
diff --git a/text_cmds/tr/cset.c b/text_cmds/tr/cset.c
new file mode 100644
index 0000000..6e7c217
--- /dev/null
+++ b/text_cmds/tr/cset.c
@@ -0,0 +1,290 @@
+/*-
+ * Copyright (c) 2004 Tim J. Robbins.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * "Set of characters" ADT implemented as a splay tree of extents, with
+ * a lookup table cache to simplify looking up the first bunch of
+ * characters (which are presumably more common than others).
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/usr.bin/tr/cset.c,v 1.3 2004/07/14 08:33:14 tjr Exp $");
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <wchar.h>
+#include <wctype.h>
+#include "cset.h"
+
+static struct csnode * cset_delete(struct csnode *, wchar_t);
+static __inline int cset_rangecmp(struct csnode *, wchar_t);
+static struct csnode * cset_splay(struct csnode *, wchar_t);
+
+/*
+ * cset_alloc --
+ * Allocate a set of characters.
+ */
+struct cset *
+cset_alloc(void)
+{
+ struct cset *cs;
+
+ if ((cs = malloc(sizeof(*cs))) == NULL)
+ return (NULL);
+ cs->cs_root = NULL;
+ cs->cs_classes = NULL;
+ cs->cs_havecache = false;
+ cs->cs_invert = false;
+ return (cs);
+}
+
+/*
+ * cset_add --
+ * Add a character to the set.
+ */
+bool
+cset_add(struct cset *cs, wchar_t ch)
+{
+ struct csnode *csn, *ncsn;
+ wchar_t oval;
+
+ cs->cs_havecache = false;
+
+ /*
+ * Inserting into empty tree; new item becomes the root.
+ */
+ if (cs->cs_root == NULL) {
+ csn = malloc(sizeof(*cs->cs_root));
+ if (csn == NULL)
+ return (false);
+ csn->csn_left = csn->csn_right = NULL;
+ csn->csn_min = csn->csn_max = ch;
+ cs->cs_root = csn;
+ return (true);
+ }
+
+ /*
+ * Splay to check whether the item already exists, and otherwise,
+ * where we should put it.
+ */
+ csn = cs->cs_root = cset_splay(cs->cs_root, ch);
+
+ /*
+ * Avoid adding duplicate nodes.
+ */
+ if (cset_rangecmp(csn, ch) == 0)
+ return (true);
+
+ /*
+ * Allocate a new node and make it the new root.
+ */
+ ncsn = malloc(sizeof(*ncsn));
+ if (ncsn == NULL)
+ return (false);
+ ncsn->csn_min = ncsn->csn_max = ch;
+ if (cset_rangecmp(csn, ch) < 0) {
+ ncsn->csn_left = csn->csn_left;
+ ncsn->csn_right = csn;
+ csn->csn_left = NULL;
+ } else {
+ ncsn->csn_right = csn->csn_right;
+ ncsn->csn_left = csn;
+ csn->csn_right = NULL;
+ }
+ cs->cs_root = ncsn;
+
+ /*
+ * Coalesce with left and right neighbours if possible.
+ */
+ if (ncsn->csn_left != NULL) {
+ ncsn->csn_left = cset_splay(ncsn->csn_left, ncsn->csn_min - 1);
+ if (ncsn->csn_left->csn_max == ncsn->csn_min - 1) {
+ oval = ncsn->csn_left->csn_min;
+ ncsn->csn_left = cset_delete(ncsn->csn_left,
+ ncsn->csn_left->csn_min);
+ ncsn->csn_min = oval;
+ }
+ }
+ if (ncsn->csn_right != NULL) {
+ ncsn->csn_right = cset_splay(ncsn->csn_right,
+ ncsn->csn_max + 1);
+ if (ncsn->csn_right->csn_min == ncsn->csn_max + 1) {
+ oval = ncsn->csn_right->csn_max;
+ ncsn->csn_right = cset_delete(ncsn->csn_right,
+ ncsn->csn_right->csn_min);
+ ncsn->csn_max = oval;
+ }
+ }
+
+ return (true);
+}
+
+/*
+ * cset_in_hard --
+ * Determine whether a character is in the set without using
+ * the cache.
+ */
+bool
+cset_in_hard(struct cset *cs, wchar_t ch)
+{
+ struct csclass *csc;
+
+ for (csc = cs->cs_classes; csc != NULL; csc = csc->csc_next)
+ if (csc->csc_invert ^ iswctype(ch, csc->csc_type) != 0)
+ return (cs->cs_invert ^ true);
+ if (cs->cs_root != NULL) {
+ cs->cs_root = cset_splay(cs->cs_root, ch);
+ return (cs->cs_invert ^ cset_rangecmp(cs->cs_root, ch) == 0);
+ }
+ return (cs->cs_invert ^ false);
+}
+
+/*
+ * cset_cache --
+ * Update the cache.
+ */
+void
+cset_cache(struct cset *cs)
+{
+ wchar_t i;
+
+ for (i = 0; i < CS_CACHE_SIZE; i++)
+ cs->cs_cache[i] = cset_in_hard(cs, i);
+
+ cs->cs_havecache = true;
+}
+
+/*
+ * cset_invert --
+ * Invert the character set.
+ */
+void
+cset_invert(struct cset *cs)
+{
+
+ cs->cs_invert ^= true;
+ cs->cs_havecache = false;
+}
+
+/*
+ * cset_addclass --
+ * Add a wctype()-style character class to the set, optionally
+ * inverting it.
+ */
+bool
+cset_addclass(struct cset *cs, wctype_t type, bool invert)
+{
+ struct csclass *csc;
+
+ csc = malloc(sizeof(*csc));
+ if (csc == NULL)
+ return (false);
+ csc->csc_type = type;
+ csc->csc_invert = invert;
+ csc->csc_next = cs->cs_classes;
+ cs->cs_classes = csc;
+ cs->cs_havecache = false;
+ return (true);
+}
+
+static __inline int
+cset_rangecmp(struct csnode *t, wchar_t ch)
+{
+
+ if (ch < t->csn_min)
+ return (-1);
+ if (ch > t->csn_max)
+ return (1);
+ return (0);
+}
+
+static struct csnode *
+cset_splay(struct csnode *t, wchar_t ch)
+{
+ struct csnode N, *l, *r, *y;
+
+ /*
+ * Based on public domain code from Sleator.
+ */
+
+ assert(t != NULL);
+
+ N.csn_left = N.csn_right = NULL;
+ l = r = &N;
+ for (;;) {
+ if (cset_rangecmp(t, ch) < 0) {
+ if (t->csn_left != NULL &&
+ cset_rangecmp(t->csn_left, ch) < 0) {
+ y = t->csn_left;
+ t->csn_left = y->csn_right;
+ y->csn_right = t;
+ t = y;
+ }
+ if (t->csn_left == NULL)
+ break;
+ r->csn_left = t;
+ r = t;
+ t = t->csn_left;
+ } else if (cset_rangecmp(t, ch) > 0) {
+ if (t->csn_right != NULL &&
+ cset_rangecmp(t->csn_right, ch) > 0) {
+ y = t->csn_right;
+ t->csn_right = y->csn_left;
+ y->csn_left = t;
+ t = y;
+ }
+ if (t->csn_right == NULL)
+ break;
+ l->csn_right = t;
+ l = t;
+ t = t->csn_right;
+ } else
+ break;
+ }
+ l->csn_right = t->csn_left;
+ r->csn_left = t->csn_right;
+ t->csn_left = N.csn_right;
+ t->csn_right = N.csn_left;
+ return (t);
+}
+
+static struct csnode *
+cset_delete(struct csnode *t, wchar_t ch)
+{
+ struct csnode *x;
+
+ assert(t != NULL);
+ t = cset_splay(t, ch);
+ assert(cset_rangecmp(t, ch) == 0);
+ if (t->csn_left == NULL)
+ x = t->csn_right;
+ else {
+ x = cset_splay(t->csn_left, ch);
+ x->csn_right = t->csn_right;
+ }
+ free(t);
+ return x;
+}