]> git.cameronkatri.com Git - bsdgames-darwin.git/blob - quiz/rxp.c
fix typo in Napoleon's name
[bsdgames-darwin.git] / quiz / rxp.c
1 /* $NetBSD: rxp.c,v 1.5 1995/04/22 10:17:00 cgd Exp $ */
2
3 /*-
4 * Copyright (c) 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Jim R. Oldroyd at The Instruction Set and Keith Gabryelski at
9 * Commodore Business Machines.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40 #ifndef lint
41 #if 0
42 static char sccsid[] = "@(#)rxp.c 8.1 (Berkeley) 5/31/93";
43 #else
44 static char rcsid[] = "$NetBSD: rxp.c,v 1.5 1995/04/22 10:17:00 cgd Exp $";
45 #endif
46 #endif /* not lint */
47
48 /*
49 * regular expression parser
50 *
51 * external functions and return values are:
52 * rxp_compile(s)
53 * TRUE success
54 * FALSE parse failure; error message will be in char rxperr[]
55 * metas are:
56 * {...} optional pattern, equialent to [...|]
57 * | alternate pattern
58 * [...] pattern delimiters
59 *
60 * rxp_match(s)
61 * TRUE string s matches compiled pattern
62 * FALSE match failure or regexp error
63 *
64 * rxp_expand()
65 * char * reverse-engineered regular expression string
66 * NULL regexp error
67 */
68
69 #include <stdio.h>
70 #include <ctype.h>
71 #include "quiz.h"
72 /* regexp tokens, arg */
73 #define LIT (-1) /* literal character, char */
74 #define SOT (-2) /* start text anchor, - */
75 #define EOT (-3) /* end text anchor, - */
76 #define GRP_S (-4) /* start alternate grp, ptr_to_end */
77 #define GRP_E (-5) /* end group, - */
78 #define ALT_S (-6) /* alternate starts, ptr_to_next */
79 #define ALT_E (-7) /* alternate ends, - */
80 #define END (-8) /* end of regexp, - */
81
82 typedef short Rxp_t; /* type for regexp tokens */
83
84 static Rxp_t rxpbuf[RXP_LINE_SZ]; /* compiled regular expression buffer */
85 char rxperr[128]; /* parser error message */
86
87 static int rxp__compile __P((char *, int));
88 static char *rxp__expand __P((int));
89 static int rxp__match __P((char *, int, Rxp_t *, Rxp_t *, char *));
90
91 int
92 rxp_compile(s)
93 register char * s;
94 {
95 return (rxp__compile(s, TRUE));
96 }
97
98 static int
99 rxp__compile(s, first)
100 register char *s;
101 int first;
102 {
103 static Rxp_t *rp;
104 static char *sp;
105 Rxp_t *grp_ptr;
106 Rxp_t *alt_ptr;
107 int esc, err;
108
109 esc = 0;
110 if (first) {
111 rp = rxpbuf;
112 sp = s;
113 *rp++ = SOT; /* auto-anchor: pat is really ^pat$ */
114 *rp++ = GRP_S; /* auto-group: ^pat$ is really ^[pat]$ */
115 *rp++ = 0;
116 }
117 *rp++ = ALT_S;
118 alt_ptr = rp;
119 *rp++ = 0;
120 for (; *sp; ++sp) {
121 if (rp - rxpbuf >= RXP_LINE_SZ - 4) {
122 (void)snprintf(rxperr, sizeof(rxperr),
123 "regular expression too long %s", s);
124 return (FALSE);
125 }
126 if (*sp == ':' && !esc)
127 break;
128 if (esc) {
129 *rp++ = LIT;
130 *rp++ = *sp;
131 esc = 0;
132 }
133 else switch (*sp) {
134 case '\\':
135 esc = 1;
136 break;
137 case '{':
138 case '[':
139 *rp++ = GRP_S;
140 grp_ptr = rp;
141 *rp++ = 0;
142 sp++;
143 if ((err = rxp__compile(s, FALSE)) != TRUE)
144 return (err);
145 *rp++ = GRP_E;
146 *grp_ptr = rp - rxpbuf;
147 break;
148 case '}':
149 case ']':
150 case '|':
151 *rp++ = ALT_E;
152 *alt_ptr = rp - rxpbuf;
153 if (*sp != ']') {
154 *rp++ = ALT_S;
155 alt_ptr = rp;
156 *rp++ = 0;
157 }
158 if (*sp != '|') {
159 if (*sp != ']') {
160 *rp++ = ALT_E;
161 *alt_ptr = rp - rxpbuf;
162 }
163 if (first) {
164 (void)snprintf(rxperr, sizeof(rxperr),
165 "unmatched alternator in regexp %s",
166 s);
167 return (FALSE);
168 }
169 return (TRUE);
170 }
171 break;
172 default:
173 *rp++ = LIT;
174 *rp++ = *sp;
175 esc = 0;
176 break;
177 }
178 }
179 if (!first) {
180 (void)snprintf(rxperr, sizeof(rxperr),
181 "unmatched alternator in regexp %s", s);
182 return (FALSE);
183 }
184 *rp++ = ALT_E;
185 *alt_ptr = rp - rxpbuf;
186 *rp++ = GRP_E;
187 *(rxpbuf + 2) = rp - rxpbuf;
188 *rp++ = EOT;
189 *rp = END;
190 return (TRUE);
191 }
192
193 /*
194 * match string against compiled regular expression
195 */
196 int
197 rxp_match(s)
198 register char * s;
199 {
200 return (rxp__match(s, TRUE, NULL, NULL, NULL));
201 }
202
203 static int
204 rxp__match(s, first, j_succ, j_fail, sp_fail)
205 char *s;
206 int first;
207 Rxp_t *j_succ; /* jump here on successful alt match */
208 Rxp_t *j_fail; /* jump here on failed match */
209 char *sp_fail; /* reset sp to here on failed match */
210 {
211 static Rxp_t *rp;
212 static char *sp;
213 register int ch;
214 Rxp_t *grp_end;
215 int err;
216
217 if (first) {
218 rp = rxpbuf;
219 sp = s;
220 }
221 while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
222 switch(*rp) {
223 case LIT:
224 rp++;
225 ch = isascii(*rp) && isupper(*rp) ? tolower(*rp) : *rp;
226 if (ch != *sp++) {
227 rp = j_fail;
228 sp = sp_fail;
229 return (TRUE);
230 }
231 rp++;
232 break;
233 case SOT:
234 if (sp != s)
235 return (FALSE);
236 rp++;
237 break;
238 case EOT:
239 if (*sp != 0)
240 return (FALSE);
241 rp++;
242 break;
243 case GRP_S:
244 rp++;
245 grp_end = rxpbuf + *rp++;
246 break;
247 case ALT_S:
248 rp++;
249 if ((err = rxp__match(sp,
250 FALSE, grp_end, rxpbuf + *rp++, sp)) != TRUE)
251 return (err);
252 break;
253 case ALT_E:
254 rp = j_succ;
255 return (TRUE);
256 case GRP_E:
257 default:
258 return (FALSE);
259 }
260 return (*rp != END ? FALSE : TRUE);
261 }
262
263 /*
264 * Reverse engineer the regular expression, by picking first of all alternates.
265 */
266 char *
267 rxp_expand()
268 {
269 return (rxp__expand(TRUE));
270 }
271
272 static char *
273 rxp__expand(first)
274 int first;
275 {
276 static char buf[RXP_LINE_SZ/2];
277 static Rxp_t *rp;
278 static char *bp;
279 Rxp_t *grp_ptr;
280 char *err;
281
282 if (first) {
283 rp = rxpbuf;
284 bp = buf;
285 }
286 while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
287 switch(*rp) {
288 case LIT:
289 rp++;
290 *bp++ = *rp++;
291 break;
292 case GRP_S:
293 rp++;
294 grp_ptr = rxpbuf + *rp;
295 rp++;
296 if ((err = rxp__expand(FALSE)) == NULL)
297 return (err);
298 rp = grp_ptr;
299 break;
300 case ALT_E:
301 return (buf);
302 case ALT_S:
303 rp++;
304 /* FALLTHROUGH */
305 case SOT:
306 case EOT:
307 case GRP_E:
308 rp++;
309 break;
310 default:
311 return (NULL);
312 }
313 if (first) {
314 if (*rp != END)
315 return (NULL);
316 *bp = '\0';
317 }
318 return (buf);
319 }