]> git.cameronkatri.com Git - apple_cmds.git/blob - text_cmds/look/look.c
Merge branch 'apple'
[apple_cmds.git] / text_cmds / look / look.c
1 /*-
2 * Copyright (c) 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * David Hitz of Auspex Systems, Inc.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37 #ifndef lint
38 static const char copyright[] =
39 "@(#) Copyright (c) 1991, 1993\n\
40 The Regents of the University of California. All rights reserved.\n";
41 #endif /* not lint */
42
43 #ifndef lint
44 #if 0
45 static char sccsid[] = "@(#)look.c 8.2 (Berkeley) 5/4/95";
46 #endif
47 #endif /* not lint */
48 #include <sys/cdefs.h>
49 __FBSDID("$FreeBSD: src/usr.bin/look/look.c,v 1.18.10.2.4.1 2010/06/14 02:09:06 kensmith Exp $");
50
51 /*
52 * look -- find lines in a sorted list.
53 *
54 * The man page said that TABs and SPACEs participate in -d comparisons.
55 * In fact, they were ignored. This implements historic practice, not
56 * the manual page.
57 */
58
59 #include <sys/types.h>
60 #include <sys/mman.h>
61 #include <sys/stat.h>
62
63 #include <err.h>
64 #include <errno.h>
65 #include <fcntl.h>
66 #include <limits.h>
67 #include <locale.h>
68 #include <stdio.h>
69 #include <stdlib.h>
70 #include <string.h>
71 #include <unistd.h>
72 #include <wchar.h>
73 #include <wctype.h>
74
75 #include "pathnames.h"
76
77 static char _path_words[] = _PATH_WORDS;
78
79 #define EQUAL 0
80 #define GREATER 1
81 #define LESS (-1)
82
83 int dflag, fflag;
84
85 char *binary_search(wchar_t *, unsigned char *, unsigned char *);
86 int compare(wchar_t *, unsigned char *, unsigned char *);
87 char *linear_search(wchar_t *, unsigned char *, unsigned char *);
88 int look(wchar_t *, unsigned char *, unsigned char *);
89 wchar_t *prepkey(const char *, wchar_t);
90 void print_from(wchar_t *, unsigned char *, unsigned char *);
91
92 static void usage(void);
93
94 int
95 main(int argc, char *argv[])
96 {
97 struct stat sb;
98 int ch, fd, match;
99 wchar_t termchar;
100 unsigned char *back, *front;
101 unsigned const char *file;
102 wchar_t *key;
103
104 (void) setlocale(LC_CTYPE, "");
105
106 file = _path_words;
107 termchar = L'\0';
108 while ((ch = getopt(argc, argv, "dft:")) != -1)
109 switch(ch) {
110 case 'd':
111 dflag = 1;
112 break;
113 case 'f':
114 fflag = 1;
115 break;
116 case 't':
117 if (mbrtowc(&termchar, optarg, MB_LEN_MAX, NULL) !=
118 strlen(optarg))
119 errx(2, "invalid termination character");
120 break;
121 case '?':
122 default:
123 usage();
124 }
125 argc -= optind;
126 argv += optind;
127
128 /* 4384130 */
129 #ifdef __APPLE__
130 if (argc <= 0)
131 #else
132 if (argc == 0)
133 #endif
134 usage();
135 if (argc == 1) /* But set -df by default. */
136 dflag = fflag = 1;
137 key = prepkey(*argv++, termchar);
138 if (argc >= 2)
139 file = *argv++;
140
141 match = 1;
142
143 do {
144 if ((fd = open(file, O_RDONLY, 0)) < 0 || fstat(fd, &sb))
145 err(2, "%s", file);
146 if (sb.st_size > SIZE_T_MAX)
147 errx(2, "%s: %s", file, strerror(EFBIG));
148 if (sb.st_size == 0) {
149 close(fd);
150 continue;
151 }
152 if ((front = mmap(NULL, (size_t)sb.st_size, PROT_READ, MAP_SHARED, fd, (off_t)0)) == MAP_FAILED)
153 err(2, "%s", file);
154 back = front + sb.st_size;
155 match *= (look(key, front, back));
156 close(fd);
157 } while (argc-- > 2 && (file = *argv++));
158
159 exit(match);
160 }
161
162 wchar_t *
163 prepkey(const char *string, wchar_t termchar)
164 {
165 const char *readp;
166 wchar_t *key, *writep;
167 wchar_t ch;
168 size_t clen;
169
170 /*
171 * Reformat search string and convert to wide character representation
172 * to avoid doing it multiple times later.
173 */
174 if ((key = malloc(sizeof(wchar_t) * (strlen(string) + 1))) == NULL)
175 err(2, NULL);
176 readp = string;
177 writep = key;
178 while ((clen = mbrtowc(&ch, readp, MB_LEN_MAX, NULL)) != 0) {
179 if (clen == (size_t)-1 || clen == (size_t)-2)
180 errc(2, EILSEQ, NULL);
181 if (fflag)
182 ch = towlower(ch);
183 if (!dflag || iswalnum(ch))
184 *writep++ = ch;
185 readp += clen;
186 }
187 *writep = L'\0';
188 if (termchar != L'\0' && (writep = wcschr(key, termchar)) != NULL)
189 *++writep = L'\0';
190 return (key);
191 }
192
193 int
194 look(wchar_t *string, unsigned char *front, unsigned char *back)
195 {
196
197 front = binary_search(string, front, back);
198 front = linear_search(string, front, back);
199
200 if (front)
201 print_from(string, front, back);
202 return (front ? 0 : 1);
203 }
204
205
206 /*
207 * Binary search for "string" in memory between "front" and "back".
208 *
209 * This routine is expected to return a pointer to the start of a line at
210 * *or before* the first word matching "string". Relaxing the constraint
211 * this way simplifies the algorithm.
212 *
213 * Invariants:
214 * front points to the beginning of a line at or before the first
215 * matching string.
216 *
217 * back points to the beginning of a line at or after the first
218 * matching line.
219 *
220 * Base of the Invariants.
221 * front = NULL;
222 * back = EOF;
223 *
224 * Advancing the Invariants:
225 *
226 * p = first newline after halfway point from front to back.
227 *
228 * If the string at "p" is not greater than the string to match,
229 * p is the new front. Otherwise it is the new back.
230 *
231 * Termination:
232 *
233 * The definition of the routine allows it return at any point,
234 * since front is always at or before the line to print.
235 *
236 * In fact, it returns when the chosen "p" equals "back". This
237 * implies that there exists a string is least half as long as
238 * (back - front), which in turn implies that a linear search will
239 * be no more expensive than the cost of simply printing a string or two.
240 *
241 * Trying to continue with binary search at this point would be
242 * more trouble than it's worth.
243 */
244 #define SKIP_PAST_NEWLINE(p, back) \
245 while (p < back && *p++ != '\n');
246
247 char *
248 binary_search(wchar_t *string, unsigned char *front, unsigned char *back)
249 {
250 unsigned char *p;
251
252 p = front + (back - front) / 2;
253 SKIP_PAST_NEWLINE(p, back);
254
255 /*
256 * If the file changes underneath us, make sure we don't
257 * infinitely loop.
258 */
259 while (p < back && back > front) {
260 if (compare(string, p, back) == GREATER)
261 front = p;
262 else
263 back = p;
264 p = front + (back - front) / 2;
265 SKIP_PAST_NEWLINE(p, back);
266 }
267 return (front);
268 }
269
270 /*
271 * Find the first line that starts with string, linearly searching from front
272 * to back.
273 *
274 * Return NULL for no such line.
275 *
276 * This routine assumes:
277 *
278 * o front points at the first character in a line.
279 * o front is before or at the first line to be printed.
280 */
281 char *
282 linear_search(wchar_t *string, unsigned char *front, unsigned char *back)
283 {
284 while (front < back) {
285 switch (compare(string, front, back)) {
286 case EQUAL: /* Found it. */
287 return (front);
288 case LESS: /* No such string. */
289 return (NULL);
290 case GREATER: /* Keep going. */
291 break;
292 }
293 SKIP_PAST_NEWLINE(front, back);
294 }
295 return (NULL);
296 }
297
298 /*
299 * Print as many lines as match string, starting at front.
300 */
301 void
302 print_from(wchar_t *string, unsigned char *front, unsigned char *back)
303 {
304 for (; front < back && compare(string, front, back) == EQUAL; ++front) {
305 for (; front < back && *front != '\n'; ++front)
306 if (putchar(*front) == EOF)
307 err(2, "stdout");
308 if (putchar('\n') == EOF)
309 err(2, "stdout");
310 }
311 }
312
313 /*
314 * Return LESS, GREATER, or EQUAL depending on how the string1 compares with
315 * string2 (s1 ??? s2).
316 *
317 * o Matches up to len(s1) are EQUAL.
318 * o Matches up to len(s2) are GREATER.
319 *
320 * Compare understands about the -f and -d flags, and treats comparisons
321 * appropriately.
322 *
323 * The string "s1" is null terminated. The string s2 is '\n' terminated (or
324 * "back" terminated).
325 */
326 int
327 compare(wchar_t *s1, unsigned char *s2, unsigned char *back)
328 {
329 wchar_t ch1, ch2;
330 size_t len2;
331
332 for (; *s1 && s2 < back && *s2 != '\n'; ++s1, s2 += len2) {
333 ch1 = *s1;
334 len2 = mbrtowc(&ch2, s2, back - s2, NULL);
335 if (len2 == (size_t)-1 || len2 == (size_t)-2) {
336 ch2 = *s2;
337 len2 = 1;
338 }
339 if (fflag)
340 ch2 = towlower(ch2);
341 if (dflag && !iswalnum(ch2)) {
342 /* Ignore character in comparison. */
343 --s1;
344 continue;
345 }
346 if (ch1 != ch2)
347 return (ch1 < ch2 ? LESS : GREATER);
348 }
349 return (*s1 ? GREATER : EQUAL);
350 }
351
352 static void
353 usage(void)
354 {
355 (void)fprintf(stderr, "usage: look [-df] [-t char] string [file ...]\n");
356 exit(2);
357 }