]>
git.cameronkatri.com Git - apple_cmds.git/blob - text_cmds/tr/cset.c
2 * Copyright (c) 2004 Tim J. Robbins.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * "Set of characters" ADT implemented as a splay tree of extents, with
28 * a lookup table cache to simplify looking up the first bunch of
29 * characters (which are presumably more common than others).
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD: src/usr.bin/tr/cset.c,v 1.3 2004/07/14 08:33:14 tjr Exp $");
42 static struct csnode
* cset_delete(struct csnode
*, wchar_t);
43 static __inline
int cset_rangecmp(struct csnode
*, wchar_t);
44 static struct csnode
* cset_splay(struct csnode
*, wchar_t);
48 * Allocate a set of characters.
55 if ((cs
= malloc(sizeof(*cs
))) == NULL
)
58 cs
->cs_classes
= NULL
;
59 cs
->cs_havecache
= false;
60 cs
->cs_invert
= false;
66 * Add a character to the set.
69 cset_add(struct cset
*cs
, wchar_t ch
)
71 struct csnode
*csn
, *ncsn
;
74 cs
->cs_havecache
= false;
77 * Inserting into empty tree; new item becomes the root.
79 if (cs
->cs_root
== NULL
) {
80 csn
= malloc(sizeof(*cs
->cs_root
));
83 csn
->csn_left
= csn
->csn_right
= NULL
;
84 csn
->csn_min
= csn
->csn_max
= ch
;
90 * Splay to check whether the item already exists, and otherwise,
91 * where we should put it.
93 csn
= cs
->cs_root
= cset_splay(cs
->cs_root
, ch
);
96 * Avoid adding duplicate nodes.
98 if (cset_rangecmp(csn
, ch
) == 0)
102 * Allocate a new node and make it the new root.
104 ncsn
= malloc(sizeof(*ncsn
));
107 ncsn
->csn_min
= ncsn
->csn_max
= ch
;
108 if (cset_rangecmp(csn
, ch
) < 0) {
109 ncsn
->csn_left
= csn
->csn_left
;
110 ncsn
->csn_right
= csn
;
111 csn
->csn_left
= NULL
;
113 ncsn
->csn_right
= csn
->csn_right
;
114 ncsn
->csn_left
= csn
;
115 csn
->csn_right
= NULL
;
120 * Coalesce with left and right neighbours if possible.
122 if (ncsn
->csn_left
!= NULL
) {
123 ncsn
->csn_left
= cset_splay(ncsn
->csn_left
, ncsn
->csn_min
- 1);
124 if (ncsn
->csn_left
->csn_max
== ncsn
->csn_min
- 1) {
125 oval
= ncsn
->csn_left
->csn_min
;
126 ncsn
->csn_left
= cset_delete(ncsn
->csn_left
,
127 ncsn
->csn_left
->csn_min
);
128 ncsn
->csn_min
= oval
;
131 if (ncsn
->csn_right
!= NULL
) {
132 ncsn
->csn_right
= cset_splay(ncsn
->csn_right
,
134 if (ncsn
->csn_right
->csn_min
== ncsn
->csn_max
+ 1) {
135 oval
= ncsn
->csn_right
->csn_max
;
136 ncsn
->csn_right
= cset_delete(ncsn
->csn_right
,
137 ncsn
->csn_right
->csn_min
);
138 ncsn
->csn_max
= oval
;
147 * Determine whether a character is in the set without using
151 cset_in_hard(struct cset
*cs
, wchar_t ch
)
155 for (csc
= cs
->cs_classes
; csc
!= NULL
; csc
= csc
->csc_next
)
156 if (csc
->csc_invert
^ iswctype(ch
, csc
->csc_type
) != 0)
157 return (cs
->cs_invert
^ true);
158 if (cs
->cs_root
!= NULL
) {
159 cs
->cs_root
= cset_splay(cs
->cs_root
, ch
);
160 return (cs
->cs_invert
^ cset_rangecmp(cs
->cs_root
, ch
) == 0);
162 return (cs
->cs_invert
^ false);
170 cset_cache(struct cset
*cs
)
174 for (i
= 0; i
< CS_CACHE_SIZE
; i
++)
175 cs
->cs_cache
[i
] = cset_in_hard(cs
, i
);
177 cs
->cs_havecache
= true;
182 * Invert the character set.
185 cset_invert(struct cset
*cs
)
188 cs
->cs_invert
^= true;
189 cs
->cs_havecache
= false;
194 * Add a wctype()-style character class to the set, optionally
198 cset_addclass(struct cset
*cs
, wctype_t type
, bool invert
)
202 csc
= malloc(sizeof(*csc
));
205 csc
->csc_type
= type
;
206 csc
->csc_invert
= invert
;
207 csc
->csc_next
= cs
->cs_classes
;
208 cs
->cs_classes
= csc
;
209 cs
->cs_havecache
= false;
214 cset_rangecmp(struct csnode
*t
, wchar_t ch
)
224 static struct csnode
*
225 cset_splay(struct csnode
*t
, wchar_t ch
)
227 struct csnode N
, *l
, *r
, *y
;
230 * Based on public domain code from Sleator.
235 N
.csn_left
= N
.csn_right
= NULL
;
238 if (cset_rangecmp(t
, ch
) < 0) {
239 if (t
->csn_left
!= NULL
&&
240 cset_rangecmp(t
->csn_left
, ch
) < 0) {
242 t
->csn_left
= y
->csn_right
;
246 if (t
->csn_left
== NULL
)
251 } else if (cset_rangecmp(t
, ch
) > 0) {
252 if (t
->csn_right
!= NULL
&&
253 cset_rangecmp(t
->csn_right
, ch
) > 0) {
255 t
->csn_right
= y
->csn_left
;
259 if (t
->csn_right
== NULL
)
267 l
->csn_right
= t
->csn_left
;
268 r
->csn_left
= t
->csn_right
;
269 t
->csn_left
= N
.csn_right
;
270 t
->csn_right
= N
.csn_left
;
274 static struct csnode
*
275 cset_delete(struct csnode
*t
, wchar_t ch
)
280 t
= cset_splay(t
, ch
);
281 assert(cset_rangecmp(t
, ch
) == 0);
282 if (t
->csn_left
== NULL
)
285 x
= cset_splay(t
->csn_left
, ch
);
286 x
->csn_right
= t
->csn_right
;