From 5fd83771641d15c418f747bd343ba6738d3875f7 Mon Sep 17 00:00:00 2001 From: Cameron Katri Date: Sun, 9 May 2021 14:20:58 -0400 Subject: Import macOS userland adv_cmds-176 basic_cmds-55 bootstrap_cmds-116.100.1 developer_cmds-66 diskdev_cmds-667.40.1 doc_cmds-53.60.1 file_cmds-321.40.3 mail_cmds-35 misc_cmds-34 network_cmds-606.40.1 patch_cmds-17 remote_cmds-63 shell_cmds-216.60.1 system_cmds-880.60.2 text_cmds-106 --- text_cmds/tr/cset.c | 290 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 290 insertions(+) create mode 100644 text_cmds/tr/cset.c (limited to 'text_cmds/tr/cset.c') diff --git a/text_cmds/tr/cset.c b/text_cmds/tr/cset.c new file mode 100644 index 0000000..6e7c217 --- /dev/null +++ b/text_cmds/tr/cset.c @@ -0,0 +1,290 @@ +/*- + * Copyright (c) 2004 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/* + * "Set of characters" ADT implemented as a splay tree of extents, with + * a lookup table cache to simplify looking up the first bunch of + * characters (which are presumably more common than others). + */ + +#include +__FBSDID("$FreeBSD: src/usr.bin/tr/cset.c,v 1.3 2004/07/14 08:33:14 tjr Exp $"); + +#include +#include +#include +#include +#include +#include "cset.h" + +static struct csnode * cset_delete(struct csnode *, wchar_t); +static __inline int cset_rangecmp(struct csnode *, wchar_t); +static struct csnode * cset_splay(struct csnode *, wchar_t); + +/* + * cset_alloc -- + * Allocate a set of characters. + */ +struct cset * +cset_alloc(void) +{ + struct cset *cs; + + if ((cs = malloc(sizeof(*cs))) == NULL) + return (NULL); + cs->cs_root = NULL; + cs->cs_classes = NULL; + cs->cs_havecache = false; + cs->cs_invert = false; + return (cs); +} + +/* + * cset_add -- + * Add a character to the set. + */ +bool +cset_add(struct cset *cs, wchar_t ch) +{ + struct csnode *csn, *ncsn; + wchar_t oval; + + cs->cs_havecache = false; + + /* + * Inserting into empty tree; new item becomes the root. + */ + if (cs->cs_root == NULL) { + csn = malloc(sizeof(*cs->cs_root)); + if (csn == NULL) + return (false); + csn->csn_left = csn->csn_right = NULL; + csn->csn_min = csn->csn_max = ch; + cs->cs_root = csn; + return (true); + } + + /* + * Splay to check whether the item already exists, and otherwise, + * where we should put it. + */ + csn = cs->cs_root = cset_splay(cs->cs_root, ch); + + /* + * Avoid adding duplicate nodes. + */ + if (cset_rangecmp(csn, ch) == 0) + return (true); + + /* + * Allocate a new node and make it the new root. + */ + ncsn = malloc(sizeof(*ncsn)); + if (ncsn == NULL) + return (false); + ncsn->csn_min = ncsn->csn_max = ch; + if (cset_rangecmp(csn, ch) < 0) { + ncsn->csn_left = csn->csn_left; + ncsn->csn_right = csn; + csn->csn_left = NULL; + } else { + ncsn->csn_right = csn->csn_right; + ncsn->csn_left = csn; + csn->csn_right = NULL; + } + cs->cs_root = ncsn; + + /* + * Coalesce with left and right neighbours if possible. + */ + if (ncsn->csn_left != NULL) { + ncsn->csn_left = cset_splay(ncsn->csn_left, ncsn->csn_min - 1); + if (ncsn->csn_left->csn_max == ncsn->csn_min - 1) { + oval = ncsn->csn_left->csn_min; + ncsn->csn_left = cset_delete(ncsn->csn_left, + ncsn->csn_left->csn_min); + ncsn->csn_min = oval; + } + } + if (ncsn->csn_right != NULL) { + ncsn->csn_right = cset_splay(ncsn->csn_right, + ncsn->csn_max + 1); + if (ncsn->csn_right->csn_min == ncsn->csn_max + 1) { + oval = ncsn->csn_right->csn_max; + ncsn->csn_right = cset_delete(ncsn->csn_right, + ncsn->csn_right->csn_min); + ncsn->csn_max = oval; + } + } + + return (true); +} + +/* + * cset_in_hard -- + * Determine whether a character is in the set without using + * the cache. + */ +bool +cset_in_hard(struct cset *cs, wchar_t ch) +{ + struct csclass *csc; + + for (csc = cs->cs_classes; csc != NULL; csc = csc->csc_next) + if (csc->csc_invert ^ iswctype(ch, csc->csc_type) != 0) + return (cs->cs_invert ^ true); + if (cs->cs_root != NULL) { + cs->cs_root = cset_splay(cs->cs_root, ch); + return (cs->cs_invert ^ cset_rangecmp(cs->cs_root, ch) == 0); + } + return (cs->cs_invert ^ false); +} + +/* + * cset_cache -- + * Update the cache. + */ +void +cset_cache(struct cset *cs) +{ + wchar_t i; + + for (i = 0; i < CS_CACHE_SIZE; i++) + cs->cs_cache[i] = cset_in_hard(cs, i); + + cs->cs_havecache = true; +} + +/* + * cset_invert -- + * Invert the character set. + */ +void +cset_invert(struct cset *cs) +{ + + cs->cs_invert ^= true; + cs->cs_havecache = false; +} + +/* + * cset_addclass -- + * Add a wctype()-style character class to the set, optionally + * inverting it. + */ +bool +cset_addclass(struct cset *cs, wctype_t type, bool invert) +{ + struct csclass *csc; + + csc = malloc(sizeof(*csc)); + if (csc == NULL) + return (false); + csc->csc_type = type; + csc->csc_invert = invert; + csc->csc_next = cs->cs_classes; + cs->cs_classes = csc; + cs->cs_havecache = false; + return (true); +} + +static __inline int +cset_rangecmp(struct csnode *t, wchar_t ch) +{ + + if (ch < t->csn_min) + return (-1); + if (ch > t->csn_max) + return (1); + return (0); +} + +static struct csnode * +cset_splay(struct csnode *t, wchar_t ch) +{ + struct csnode N, *l, *r, *y; + + /* + * Based on public domain code from Sleator. + */ + + assert(t != NULL); + + N.csn_left = N.csn_right = NULL; + l = r = &N; + for (;;) { + if (cset_rangecmp(t, ch) < 0) { + if (t->csn_left != NULL && + cset_rangecmp(t->csn_left, ch) < 0) { + y = t->csn_left; + t->csn_left = y->csn_right; + y->csn_right = t; + t = y; + } + if (t->csn_left == NULL) + break; + r->csn_left = t; + r = t; + t = t->csn_left; + } else if (cset_rangecmp(t, ch) > 0) { + if (t->csn_right != NULL && + cset_rangecmp(t->csn_right, ch) > 0) { + y = t->csn_right; + t->csn_right = y->csn_left; + y->csn_left = t; + t = y; + } + if (t->csn_right == NULL) + break; + l->csn_right = t; + l = t; + t = t->csn_right; + } else + break; + } + l->csn_right = t->csn_left; + r->csn_left = t->csn_right; + t->csn_left = N.csn_right; + t->csn_right = N.csn_left; + return (t); +} + +static struct csnode * +cset_delete(struct csnode *t, wchar_t ch) +{ + struct csnode *x; + + assert(t != NULL); + t = cset_splay(t, ch); + assert(cset_rangecmp(t, ch) == 0); + if (t->csn_left == NULL) + x = t->csn_right; + else { + x = cset_splay(t->csn_left, ch); + x->csn_right = t->csn_right; + } + free(t); + return x; +} -- cgit v1.2.3-56-ge451