]> git.cameronkatri.com Git - apple_cmds.git/blob - text_cmds/tr/str.c
system_cmds: Fix compilation for lower targets, downgrade lsmp
[apple_cmds.git] / text_cmds / tr / str.c
1 /*-
2 * Copyright (c) 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34 #include <sys/cdefs.h>
35
36 __FBSDID("$FreeBSD: src/usr.bin/tr/str.c,v 1.24 2004/11/14 05:15:25 jkh Exp $");
37
38 #ifndef lint
39 static const char sccsid[] = "@(#)str.c 8.2 (Berkeley) 4/28/95";
40 #endif
41
42 #include <sys/cdefs.h>
43 #include <sys/types.h>
44
45 #include <ctype.h>
46 #include <err.h>
47 #include <errno.h>
48 #include <stddef.h>
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include <string.h>
52 #include <wchar.h>
53 #include <wctype.h>
54 #include <xlocale.h>
55
56 #include "extern.h"
57
58 static int backslash(STR *, int *);
59 static int bracket(STR *);
60 static void genclass(STR *);
61 static void genequiv(STR *);
62 static int genrange(STR *, int);
63 static void genseq(STR *);
64
65 /*
66 * Using libc internal function __collate_lookup_l for character
67 * equivalence
68 */
69 void __collate_lookup_l(const __darwin_wchar_t *, int *, int *, int *,
70 locale_t);
71 /*
72 * Cache for primary collation weight of each single byte character
73 * used in static void genequiv(s)
74 */
75 int collation_weight_cache[NCHARS_SB];
76 int is_weight_cached = 0;
77
78 wint_t
79 next(s)
80 STR *s;
81 {
82 int is_octal;
83 wint_t ch;
84 wchar_t wch;
85 size_t clen;
86
87 switch (s->state) {
88 case EOS:
89 return (0);
90 case INFINITE:
91 #ifdef __APPLE__
92 switch (ch = (u_char)*s->str) {
93 case '\0':
94 /*
95 * force at least one postive return so setup() will
96 * process lastch of a sequence like [a*]; but change
97 * state so it won't get stuck in a while(next(s)) loop
98 */
99 s->state = NORMAL;
100 }
101 #endif /* __APPLE__ */
102 return (1);
103 case NORMAL:
104 switch (*s->str) {
105 case '\0':
106 s->state = EOS;
107 return (0);
108 case '\\':
109 s->lastch = backslash(s, &is_octal);
110 break;
111 case '[':
112 if (bracket(s))
113 return (next(s));
114 /* FALLTHROUGH */
115 default:
116 clen = mbrtowc(&wch, s->str, MB_LEN_MAX, NULL);
117 if (clen == (size_t)-1 || clen == (size_t)-2 ||
118 clen == 0)
119 errc(1, EILSEQ, NULL);
120 is_octal = 0;
121 s->lastch = wch;
122 s->str += clen;
123 break;
124 }
125
126 /* We can start a range at any time. */
127 if (s->str[0] == '-' && genrange(s, is_octal))
128 return (next(s));
129 return (1);
130 case RANGE:
131 if (s->cnt-- == 0) {
132 s->state = NORMAL;
133 return (next(s));
134 }
135 ++s->lastch;
136 return (1);
137 case SEQUENCE:
138 if (s->cnt-- == 0) {
139 s->state = NORMAL;
140 return (next(s));
141 }
142 return (1);
143 case CCLASS:
144 case CCLASS_UPPER:
145 case CCLASS_LOWER:
146 s->cnt++;
147 ch = nextwctype(s->lastch, s->cclass);
148 if (ch == -1) {
149 s->state = NORMAL;
150 return (next(s));
151 }
152 s->lastch = ch;
153 return (1);
154 case SET:
155 if ((ch = s->set[s->cnt++]) == OOBCH) {
156 s->state = NORMAL;
157 return (next(s));
158 }
159 s->lastch = ch;
160 return (1);
161 default:
162 return (0);
163 }
164 /* NOTREACHED */
165 }
166
167 static int
168 bracket(s)
169 STR *s;
170 {
171 char *p;
172
173 switch (s->str[1]) {
174 case ':': /* "[:class:]" */
175 if ((p = strchr(s->str + 2, ']')) == NULL)
176 return (0);
177 if (*(p - 1) != ':' || p - s->str < 4)
178 goto repeat;
179 *(p - 1) = '\0';
180 s->str += 2;
181 genclass(s);
182 s->str = p + 1;
183 return (1);
184 case '=': /* "[=equiv=]" */
185 if ((p = strchr(s->str + 2, ']')) == NULL)
186 return (0);
187 if (*(p - 1) != '=' || p - s->str < 4)
188 goto repeat;
189 s->str += 2;
190 genequiv(s);
191 return (1);
192 default: /* "[\###*n]" or "[#*n]" */
193 repeat:
194 if ((p = strpbrk(s->str + 2, "*]")) == NULL)
195 return (0);
196 if (p[0] != '*' || index(p, ']') == NULL)
197 return (0);
198 s->str += 1;
199 genseq(s);
200 return (1);
201 }
202 /* NOTREACHED */
203 }
204
205 static void
206 genclass(s)
207 STR *s;
208 {
209
210 if ((s->cclass = wctype(s->str)) == 0)
211 errx(1, "unknown class %s", s->str);
212 s->cnt = 0;
213 s->lastch = -1; /* incremented before check in next() */
214 if (strcmp(s->str, "upper") == 0)
215 s->state = CCLASS_UPPER;
216 else if (strcmp(s->str, "lower") == 0)
217 s->state = CCLASS_LOWER;
218 else
219 s->state = CCLASS;
220 }
221
222 static void
223 genequiv(s)
224 STR *s;
225 {
226 int i, p;
227 size_t clen;
228 wchar_t wc;
229
230 if (*s->str == '\\') {
231 s->equiv[0] = backslash(s, NULL);
232 if (*s->str != '=')
233 errx(1, "misplaced equivalence equals sign");
234 s->str += 2;
235 } else {
236 clen = mbrtowc(&wc, s->str, MB_LEN_MAX, NULL);
237 if (clen == (size_t)-1 || clen == (size_t)-2 || clen == 0)
238 errc(1, EILSEQ, NULL);
239 s->equiv[0] = wc;
240 if (s->str[clen] != '=')
241 errx(1, "misplaced equivalence equals sign");
242 s->str += clen + 2;
243 }
244
245 /*
246 * Partially supporting multi-byte locales; only finds equivalent
247 * characters within the first NCHARS_SB entries of the
248 * collation table
249 */
250 int tprim, tsec;
251 int len;
252 __collate_lookup_l(s->equiv, &len, &tprim, &tsec, LC_GLOBAL_LOCALE);
253
254 if (tprim != -1) {
255 for (p = 1, i = 1; i < NCHARS_SB; i++) {
256 int cprim;
257 if (is_weight_cached) {
258 /*
259 * retrieve primary weight from cache
260 */
261 cprim = collation_weight_cache[i];
262 } else {
263 /*
264 * perform lookup of primary weight and fill cache
265 */
266 int csec;
267 __collate_lookup_l((__darwin_wchar_t *)&i, &len, &cprim, &csec, LC_GLOBAL_LOCALE);
268 collation_weight_cache[i] = cprim;
269 }
270
271 /*
272 * If a character does not exist in the collation
273 * table, just skip it
274 */
275 if (cprim == -1) {
276 continue;
277 }
278
279 /*
280 * Only compare primary weights to determine multi-byte
281 * character equivalence
282 */
283 if (cprim == tprim) {
284 s->equiv[p++] = i;
285 }
286 }
287 s->equiv[p] = OOBCH;
288
289 if (!is_weight_cached) {
290 is_weight_cached = 1;
291 }
292 }
293
294 s->cnt = 0;
295 s->state = SET;
296 s->set = s->equiv;
297 }
298
299 static int
300 genrange(STR *s, int was_octal)
301 {
302 int stopval, octal;
303 char *savestart;
304 int n, cnt, *p;
305 size_t clen;
306 wchar_t wc;
307
308 octal = 0;
309 savestart = s->str;
310 if (*++s->str == '\\')
311 stopval = backslash(s, &octal);
312 else {
313 clen = mbrtowc(&wc, s->str, MB_LEN_MAX, NULL);
314 if (clen == (size_t)-1 || clen == (size_t)-2)
315 errc(1, EILSEQ, NULL);
316 stopval = wc;
317 s->str += clen;
318 }
319 /*
320 * XXX Characters are not ordered according to collating sequence in
321 * multibyte locales.
322 */
323 if (octal || was_octal || MB_CUR_MAX > 1) {
324 if (stopval < s->lastch) {
325 s->str = savestart;
326 return (0);
327 }
328 s->cnt = stopval - s->lastch + 1;
329 s->state = RANGE;
330 --s->lastch;
331 return (1);
332 }
333 if (charcoll((const void *)&stopval, (const void *)&(s->lastch)) < 0) {
334 s->str = savestart;
335 return (0);
336 }
337 if ((s->set = p = malloc((NCHARS_SB + 1) * sizeof(int))) == NULL)
338 err(1, "genrange() malloc");
339 for (cnt = 0; cnt < NCHARS_SB; cnt++)
340 if (charcoll((const void *)&cnt, (const void *)&(s->lastch)) >= 0 &&
341 charcoll((const void *)&cnt, (const void *)&stopval) <= 0)
342 *p++ = cnt;
343 *p = OOBCH;
344 n = p - s->set;
345
346 s->cnt = 0;
347 s->state = SET;
348 if (n > 1)
349 mergesort(s->set, n, sizeof(*(s->set)), charcoll);
350 return (1);
351 }
352
353 static void
354 genseq(s)
355 STR *s;
356 {
357 char *ep;
358 wchar_t wc;
359 size_t clen;
360
361 #ifndef __APPLE__
362 if (s->which == STRING1)
363 errx(1, "sequences only valid in string2");
364 #endif /* !__APPLE__ */
365
366 if (*s->str == '\\')
367 s->lastch = backslash(s, NULL);
368 else {
369 clen = mbrtowc(&wc, s->str, MB_LEN_MAX, NULL);
370 if (clen == (size_t)-1 || clen == (size_t)-2)
371 errc(1, EILSEQ, NULL);
372 s->lastch = wc;
373 s->str += clen;
374 }
375 if (*s->str != '*')
376 errx(1, "misplaced sequence asterisk");
377
378 switch (*++s->str) {
379 case '\\':
380 s->cnt = backslash(s, NULL);
381 break;
382 case ']':
383 s->cnt = 0;
384 ++s->str;
385 break;
386 default:
387 if (isdigit((u_char)*s->str)) {
388 s->cnt = strtol(s->str, &ep, 0);
389 if (*ep == ']') {
390 s->str = ep + 1;
391 break;
392 }
393 }
394 errx(1, "illegal sequence count");
395 /* NOTREACHED */
396 }
397
398 s->state = s->cnt ? SEQUENCE : INFINITE;
399 }
400
401 /*
402 * Translate \??? into a character. Up to 3 octal digits, if no digits either
403 * an escape code or a literal character.
404 */
405 static int
406 backslash(STR *s, int *is_octal)
407 {
408 int ch, cnt, val;
409
410 if (is_octal != NULL)
411 *is_octal = 0;
412 for (cnt = val = 0;;) {
413 ch = (u_char)*++s->str;
414 if (!isdigit(ch) || ch > '7')
415 break;
416 val = val * 8 + ch - '0';
417 if (++cnt == 3) {
418 ++s->str;
419 break;
420 }
421 }
422 if (cnt) {
423 if (is_octal != NULL)
424 *is_octal = 1;
425 return (val);
426 }
427 if (ch != '\0')
428 ++s->str;
429 switch (ch) {
430 case 'a': /* escape characters */
431 return ('\7');
432 case 'b':
433 return ('\b');
434 case 'f':
435 return ('\f');
436 case 'n':
437 return ('\n');
438 case 'r':
439 return ('\r');
440 case 't':
441 return ('\t');
442 case 'v':
443 return ('\13');
444 case '\0': /* \" -> \ */
445 s->state = EOS;
446 return ('\\');
447 default: /* \x" -> x */
448 return (ch);
449 }
450 }