]> git.cameronkatri.com Git - apple_cmds.git/blob - text_cmds/tr/tr.c
file_cmds: Fix install and COLORLS
[apple_cmds.git] / text_cmds / tr / tr.c
1 /*
2 * Copyright (c) 1988, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34 #include <sys/cdefs.h>
35
36 __FBSDID("$FreeBSD: src/usr.bin/tr/tr.c,v 1.24 2005/04/09 14:31:41 stefanf Exp $");
37
38 #ifndef lint
39 static const char copyright[] =
40 "@(#) Copyright (c) 1988, 1993\n\
41 The Regents of the University of California. All rights reserved.\n";
42 #endif
43
44 #ifndef lint
45 static const char sccsid[] = "@(#)tr.c 8.2 (Berkeley) 5/4/95";
46 #endif
47
48 #include <sys/types.h>
49
50 #include <ctype.h>
51 #include <err.h>
52 #include <limits.h>
53 #include <locale.h>
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <string.h>
57 #include <unistd.h>
58 #include <wchar.h>
59 #include <wctype.h>
60
61 #include "cmap.h"
62 #include "cset.h"
63 #include "extern.h"
64
65 STR s1 = { STRING1, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL };
66 STR s2 = { STRING2, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL };
67
68 static struct cset *setup(char *, STR *, int, int);
69 static void usage(void);
70
71 int
72 main(int argc, char **argv)
73 {
74 static int carray[NCHARS_SB];
75 struct cmap *map;
76 struct cset *delete, *squeeze;
77 int n, *p;
78 int Cflag, cflag, dflag, sflag, isstring2;
79 wint_t ch, cnt, lastch;
80
81 (void)setlocale(LC_ALL, "");
82
83 Cflag = cflag = dflag = sflag = 0;
84 while ((ch = getopt(argc, argv, "Ccdsu")) != -1)
85 switch((char)ch) {
86 case 'C':
87 Cflag = 1;
88 cflag = 0;
89 break;
90 case 'c':
91 cflag = 1;
92 Cflag = 0;
93 break;
94 case 'd':
95 dflag = 1;
96 break;
97 case 's':
98 sflag = 1;
99 break;
100 case 'u':
101 setbuf(stdout, (char *)NULL);
102 break;
103 case '?':
104 default:
105 usage();
106 }
107 argc -= optind;
108 argv += optind;
109
110 switch(argc) {
111 case 0:
112 default:
113 usage();
114 /* NOTREACHED */
115 case 1:
116 isstring2 = 0;
117 if(!argv[0]) usage();
118 break;
119 case 2:
120 isstring2 = 1;
121 if(!argv[0] || !argv[1]) usage();
122 break;
123 }
124
125 /*
126 * tr -ds [-Cc] string1 string2
127 * Delete all characters (or complemented characters) in string1.
128 * Squeeze all characters in string2.
129 */
130 if (dflag && sflag) {
131 if (!isstring2)
132 usage();
133
134 delete = setup(argv[0], &s1, cflag, Cflag);
135 squeeze = setup(argv[1], &s2, 0, 0);
136
137 for (lastch = OOBCH; (ch = getwchar()) != WEOF;)
138 if (!cset_in(delete, ch) &&
139 (lastch != ch || !cset_in(squeeze, ch))) {
140 lastch = ch;
141 (void)putwchar(ch);
142 }
143 if (ferror(stdin))
144 err(1, NULL);
145 exit(0);
146 }
147
148 /*
149 * tr -d [-Cc] string1
150 * Delete all characters (or complemented characters) in string1.
151 */
152 if (dflag) {
153 if (isstring2)
154 usage();
155
156 delete = setup(argv[0], &s1, cflag, Cflag);
157
158 while ((ch = getwchar()) != WEOF)
159 if (!cset_in(delete, ch))
160 (void)putwchar(ch);
161 if (ferror(stdin))
162 err(1, NULL);
163 exit(0);
164 }
165
166 /*
167 * tr -s [-Cc] string1
168 * Squeeze all characters (or complemented characters) in string1.
169 */
170 if (sflag && !isstring2) {
171 squeeze = setup(argv[0], &s1, cflag, Cflag);
172
173 for (lastch = OOBCH; (ch = getwchar()) != WEOF;)
174 if (lastch != ch || !cset_in(squeeze, ch)) {
175 lastch = ch;
176 (void)putwchar(ch);
177 }
178 if (ferror(stdin))
179 err(1, NULL);
180 exit(0);
181 }
182
183 /*
184 * tr [-Ccs] string1 string2
185 * Replace all characters (or complemented characters) in string1 with
186 * the character in the same position in string2. If the -s option is
187 * specified, squeeze all the characters in string2.
188 */
189 if (!isstring2)
190 usage();
191
192 map = cmap_alloc();
193 if (map == NULL)
194 err(1, NULL);
195 squeeze = cset_alloc();
196 if (squeeze == NULL)
197 err(1, NULL);
198
199 s1.str = argv[0];
200
201 if (Cflag || cflag) {
202 cmap_default(map, OOBCH);
203 if ((s2.str = strdup(argv[1])) == NULL)
204 errx(1, "strdup(argv[1])");
205 } else
206 s2.str = argv[1];
207
208 if (!next(&s2))
209 errx(1, "empty string2");
210
211 /*
212 * For -s result will contain only those characters defined
213 * as the second characters in each of the toupper or tolower
214 * pairs.
215 */
216
217 /* If string2 runs out of characters, use the last one specified. */
218 while (next(&s1)) {
219 again:
220 if (s1.state == CCLASS_LOWER &&
221 s2.state == CCLASS_UPPER &&
222 s1.cnt == 1 && s2.cnt == 1) {
223 do {
224 ch = towupper(s1.lastch);
225 cmap_add(map, s1.lastch, ch);
226 if (sflag && iswupper(ch))
227 cset_add(squeeze, ch);
228 if (!next(&s1))
229 goto endloop;
230 } while (s1.state == CCLASS_LOWER && s1.cnt > 1);
231 /* skip upper set */
232 do {
233 if (!next(&s2))
234 break;
235 } while (s2.state == CCLASS_UPPER && s2.cnt > 1);
236 goto again;
237 } else if (s1.state == CCLASS_UPPER &&
238 s2.state == CCLASS_LOWER &&
239 s1.cnt == 1 && s2.cnt == 1) {
240 do {
241 ch = towlower(s1.lastch);
242 cmap_add(map, s1.lastch, ch);
243 if (sflag && iswlower(ch))
244 cset_add(squeeze, ch);
245 if (!next(&s1))
246 goto endloop;
247 } while (s1.state == CCLASS_UPPER && s1.cnt > 1);
248 /* skip lower set */
249 do {
250 if (!next(&s2))
251 break;
252 } while (s2.state == CCLASS_LOWER && s2.cnt > 1);
253 goto again;
254 } else {
255 cmap_add(map, s1.lastch, s2.lastch);
256 if (sflag)
257 cset_add(squeeze, s2.lastch);
258 }
259 (void)next(&s2);
260 }
261 endloop:
262 if (cflag || (Cflag && MB_CUR_MAX > 1)) {
263 /*
264 * This is somewhat tricky: since the character set is
265 * potentially huge, we need to avoid allocating a map
266 * entry for every character. Our strategy is to set the
267 * default mapping to the last character of string #2
268 * (= the one that gets automatically repeated), then to
269 * add back identity mappings for characters that should
270 * remain unchanged. We don't waste space on identity mappings
271 * for non-characters with the -C option; those are simulated
272 * in the I/O loop.
273 */
274 s2.str = argv[1];
275 s2.state = NORMAL;
276 for (cnt = 0; cnt < WCHAR_MAX; cnt++) {
277 if (Cflag && !iswrune(cnt))
278 continue;
279 if (cmap_lookup(map, cnt) == OOBCH) {
280 if (next(&s2))
281 cmap_add(map, cnt, s2.lastch);
282 if (sflag)
283 cset_add(squeeze, s2.lastch);
284 } else
285 cmap_add(map, cnt, cnt);
286 if ((s2.state == EOS || s2.state == INFINITE) &&
287 cnt >= cmap_max(map))
288 break;
289 }
290 cmap_default(map, s2.lastch);
291 } else if (Cflag) {
292 for (p = carray, cnt = 0; cnt < NCHARS_SB; cnt++) {
293 if (cmap_lookup(map, cnt) == OOBCH && iswrune(cnt))
294 *p++ = cnt;
295 else
296 cmap_add(map, cnt, cnt);
297 }
298 n = p - carray;
299 if (Cflag && n > 1)
300 (void)mergesort(carray, n, sizeof(*carray), charcoll);
301
302 s2.str = argv[1];
303 s2.state = NORMAL;
304 for (cnt = 0; cnt < n; cnt++) {
305 (void)next(&s2);
306 cmap_add(map, carray[cnt], s2.lastch);
307 /*
308 * Chars taken from s2 can be different this time
309 * due to lack of complex upper/lower processing,
310 * so fill string2 again to not miss some.
311 */
312 if (sflag)
313 cset_add(squeeze, s2.lastch);
314 }
315 }
316
317 cset_cache(squeeze);
318 cmap_cache(map);
319
320 if (sflag)
321 for (lastch = OOBCH; (ch = getwchar()) != WEOF;) {
322 if (!Cflag || iswrune(ch))
323 ch = cmap_lookup(map, ch);
324 if (lastch != ch || !cset_in(squeeze, ch)) {
325 lastch = ch;
326 (void)putwchar(ch);
327 }
328 }
329 else
330 while ((ch = getwchar()) != WEOF) {
331 if (!Cflag || iswrune(ch))
332 ch = cmap_lookup(map, ch);
333 (void)putwchar(ch);
334 }
335 if (ferror(stdin))
336 err(1, NULL);
337 exit (0);
338 }
339
340 static struct cset *
341 setup(char *arg, STR *str, int cflag, int Cflag)
342 {
343 struct cset *cs;
344
345 cs = cset_alloc();
346 if (cs == NULL)
347 err(1, NULL);
348 str->str = arg;
349 while (next(str))
350 cset_add(cs, str->lastch);
351 if (Cflag)
352 cset_addclass(cs, wctype("rune"), true);
353 if (cflag || Cflag)
354 cset_invert(cs);
355 cset_cache(cs);
356 return (cs);
357 }
358
359 int
360 charcoll(const void *a, const void *b)
361 {
362 static char sa[2], sb[2];
363
364 sa[0] = *(const int *)a;
365 sb[0] = *(const int *)b;
366 return (strcoll(sa, sb));
367 }
368
369 static void
370 usage(void)
371 {
372 (void)fprintf(stderr, "%s\n%s\n%s\n%s\n",
373 "usage: tr [-Ccsu] string1 string2",
374 " tr [-Ccu] -d string1",
375 " tr [-Ccu] -s string1",
376 " tr [-Ccu] -ds string1 string2");
377 exit(1);
378 }