]> git.cameronkatri.com Git - bsdgames-darwin.git/blob - quiz/rxp.c
Grammar and punctuation fixes, from jmc@openbsd.
[bsdgames-darwin.git] / quiz / rxp.c
1 /* $NetBSD: rxp.c,v 1.10 2002/12/06 01:54:55 thorpej Exp $ */
2
3 /*-
4 * Copyright (c) 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Jim R. Oldroyd at The Instruction Set and Keith Gabryelski at
9 * Commodore Business Machines.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40 #include <sys/cdefs.h>
41 #ifndef lint
42 #if 0
43 static char sccsid[] = "@(#)rxp.c 8.1 (Berkeley) 5/31/93";
44 #else
45 __RCSID("$NetBSD: rxp.c,v 1.10 2002/12/06 01:54:55 thorpej Exp $");
46 #endif
47 #endif /* not lint */
48
49 /*
50 * regular expression parser
51 *
52 * external functions and return values are:
53 * rxp_compile(s)
54 * TRUE success
55 * FALSE parse failure; error message will be in char rxperr[]
56 * metas are:
57 * {...} optional pattern, equialent to [...|]
58 * | alternate pattern
59 * [...] pattern delimiters
60 *
61 * rxp_match(s)
62 * TRUE string s matches compiled pattern
63 * FALSE match failure or regexp error
64 *
65 * rxp_expand()
66 * char * reverse-engineered regular expression string
67 * NULL regexp error
68 */
69
70 #include <stdio.h>
71 #include <stdlib.h>
72 #include <ctype.h>
73 #include "quiz.h"
74 /* regexp tokens, arg */
75 #define LIT (-1) /* literal character, char */
76 #define SOT (-2) /* start text anchor, - */
77 #define EOT (-3) /* end text anchor, - */
78 #define GRP_S (-4) /* start alternate grp, ptr_to_end */
79 #define GRP_E (-5) /* end group, - */
80 #define ALT_S (-6) /* alternate starts, ptr_to_next */
81 #define ALT_E (-7) /* alternate ends, - */
82 #define END (-8) /* end of regexp, - */
83
84 typedef short Rxp_t; /* type for regexp tokens */
85
86 static Rxp_t rxpbuf[RXP_LINE_SZ]; /* compiled regular expression buffer */
87 char rxperr[128]; /* parser error message */
88
89 static int rxp__compile __P((const char *, int));
90 static char *rxp__expand __P((int));
91 static int rxp__match __P((const char *, int, Rxp_t *, Rxp_t *, const char *));
92
93 int
94 rxp_compile(s)
95 const char * s;
96 {
97 return (rxp__compile(s, TRUE));
98 }
99
100 static int
101 rxp__compile(s, first)
102 const char *s;
103 int first;
104 {
105 static Rxp_t *rp;
106 static const char *sp;
107 Rxp_t *grp_ptr;
108 Rxp_t *alt_ptr;
109 int esc, err;
110
111 esc = 0;
112 if (first) {
113 rp = rxpbuf;
114 sp = s;
115 *rp++ = SOT; /* auto-anchor: pat is really ^pat$ */
116 *rp++ = GRP_S; /* auto-group: ^pat$ is really ^[pat]$ */
117 *rp++ = 0;
118 }
119 *rp++ = ALT_S;
120 alt_ptr = rp;
121 *rp++ = 0;
122 for (; *sp; ++sp) {
123 if (rp - rxpbuf >= RXP_LINE_SZ - 4) {
124 (void)snprintf(rxperr, sizeof(rxperr),
125 "regular expression too long %s", s);
126 return (FALSE);
127 }
128 if (*sp == ':' && !esc)
129 break;
130 if (esc) {
131 *rp++ = LIT;
132 *rp++ = *sp;
133 esc = 0;
134 }
135 else switch (*sp) {
136 case '\\':
137 esc = 1;
138 break;
139 case '{':
140 case '[':
141 *rp++ = GRP_S;
142 grp_ptr = rp;
143 *rp++ = 0;
144 sp++;
145 if ((err = rxp__compile(s, FALSE)) != TRUE)
146 return (err);
147 *rp++ = GRP_E;
148 *grp_ptr = rp - rxpbuf;
149 break;
150 case '}':
151 case ']':
152 case '|':
153 *rp++ = ALT_E;
154 *alt_ptr = rp - rxpbuf;
155 if (*sp != ']') {
156 *rp++ = ALT_S;
157 alt_ptr = rp;
158 *rp++ = 0;
159 }
160 if (*sp != '|') {
161 if (*sp != ']') {
162 *rp++ = ALT_E;
163 *alt_ptr = rp - rxpbuf;
164 }
165 if (first) {
166 (void)snprintf(rxperr, sizeof(rxperr),
167 "unmatched alternator in regexp %s",
168 s);
169 return (FALSE);
170 }
171 return (TRUE);
172 }
173 break;
174 default:
175 *rp++ = LIT;
176 *rp++ = *sp;
177 esc = 0;
178 break;
179 }
180 }
181 if (!first) {
182 (void)snprintf(rxperr, sizeof(rxperr),
183 "unmatched alternator in regexp %s", s);
184 return (FALSE);
185 }
186 *rp++ = ALT_E;
187 *alt_ptr = rp - rxpbuf;
188 *rp++ = GRP_E;
189 *(rxpbuf + 2) = rp - rxpbuf;
190 *rp++ = EOT;
191 *rp = END;
192 return (TRUE);
193 }
194
195 /*
196 * match string against compiled regular expression
197 */
198 int
199 rxp_match(s)
200 const char * s;
201 {
202 return (rxp__match(s, TRUE, NULL, NULL, NULL));
203 }
204
205 static int
206 rxp__match(s, first, j_succ, j_fail, sp_fail)
207 const char *s;
208 int first;
209 Rxp_t *j_succ; /* jump here on successful alt match */
210 Rxp_t *j_fail; /* jump here on failed match */
211 const char *sp_fail; /* reset sp to here on failed match */
212 {
213 static Rxp_t *rp;
214 static const char *sp;
215 int ch;
216 Rxp_t *grp_end = NULL;
217
218 if (first) {
219 rp = rxpbuf;
220 sp = s;
221 }
222 while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
223 switch(*rp) {
224 case LIT:
225 rp++;
226 ch = isascii(*rp) && isupper(*rp) ? tolower(*rp) : *rp;
227 if (ch != *sp++) {
228 rp = j_fail;
229 sp = sp_fail;
230 return (FALSE);
231 }
232 rp++;
233 break;
234 case SOT:
235 if (sp != s)
236 return (FALSE);
237 rp++;
238 break;
239 case EOT:
240 if (*sp != 0)
241 return (FALSE);
242 rp++;
243 break;
244 case GRP_S:
245 rp++;
246 grp_end = rxpbuf + *rp++;
247 break;
248 case ALT_S:
249 rp++;
250 rxp__match(sp, FALSE, grp_end, rxpbuf + *rp++, sp);
251 break;
252 case ALT_E:
253 rp = j_succ;
254 return (TRUE);
255 case GRP_E:
256 rp = j_fail;
257 sp = sp_fail;
258 return (FALSE);
259 default:
260 abort();
261 }
262 return (*rp != END ? FALSE : TRUE);
263 }
264
265 /*
266 * Reverse engineer the regular expression, by picking first of all alternates.
267 */
268 char *
269 rxp_expand()
270 {
271 return (rxp__expand(TRUE));
272 }
273
274 static char *
275 rxp__expand(first)
276 int first;
277 {
278 static char buf[RXP_LINE_SZ/2];
279 static Rxp_t *rp;
280 static char *bp;
281 Rxp_t *grp_ptr;
282 char *err;
283
284 if (first) {
285 rp = rxpbuf;
286 bp = buf;
287 }
288 while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
289 switch(*rp) {
290 case LIT:
291 rp++;
292 *bp++ = *rp++;
293 break;
294 case GRP_S:
295 rp++;
296 grp_ptr = rxpbuf + *rp;
297 rp++;
298 if ((err = rxp__expand(FALSE)) == NULL)
299 return (err);
300 rp = grp_ptr;
301 break;
302 case ALT_E:
303 return (buf);
304 case ALT_S:
305 rp++;
306 /* FALLTHROUGH */
307 case SOT:
308 case EOT:
309 case GRP_E:
310 rp++;
311 break;
312 default:
313 return (NULL);
314 }
315 if (first) {
316 if (*rp != END)
317 return (NULL);
318 *bp = '\0';
319 }
320 return (buf);
321 }