]> git.cameronkatri.com Git - apple_cmds.git/blob - adv_cmds/colldef/parse.y
adv_cmds: All but pkill compiling
[apple_cmds.git] / adv_cmds / colldef / parse.y
1 %{
2 /*-
3 * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
4 * at Electronni Visti IA, Kiev, Ukraine.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD: src/usr.bin/colldef/parse.y,v 1.31 2002/10/16 12:56:22 charnier Exp $");
31
32 #include <arpa/inet.h>
33 #include <err.h>
34 #include <stdarg.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <wchar.h>
39 #include <unistd.h>
40 #include <sysexits.h>
41 #include <limits.h>
42 #include "collate.h"
43 #include "common.h"
44
45 #define PRI_UNDEFINED (-1)
46 #define PRI_IGNORE 0
47 #define LINE_NONE (-1)
48 #define LINE_NORMAL 0
49 #define LINE_ELLIPSIS 1
50 #define LINE_UNDEFINED 2
51 /* If UNDEFINED is specified with ellipses, we reposition prim_pri to
52 * UNDEFINED_PRI, leaving gap for undefined characters. */
53 #define UNDEFINED_PRI (COLLATE_MAX_PRIORITY - (COLLATE_MAX_PRIORITY >> 2))
54
55 extern FILE *yyin;
56 void yyerror(const char *fmt, ...) __printflike(1, 2);
57 int yyparse(void);
58 int yylex(void);
59 static void usage(void);
60 static void collate_print_tables(void);
61 static struct __collate_st_char_pri *getpri(int32_t);
62 static struct __collate_st_char_pri *haspri(int32_t);
63 static struct __collate_st_chain_pri *getchain(const wchar_t *, int);
64 static struct symbol *getsymbolbychar(wchar_t);
65 static struct symbol *hassymbolbychar(wchar_t);
66 static void setsymbolbychar(struct symbol *);
67 struct symbol *getstring(const wchar_t *);
68 static void makeforwardref(int, const struct symbol *, const struct symbol *);
69 static int charpricompar(const void *, const void *);
70 static int substcompar(const void *, const void *);
71 static int chainpricompar(const void *, const void *);
72 static void putsubst(int32_t, int, const wchar_t *);
73 static int hassubst(int32_t, int);
74 static const wchar_t *__collate_wcsnchr(const wchar_t *, wchar_t, int);
75 static int __collate_wcsnlen(const wchar_t *, int);
76 char *showwcs(const wchar_t *, int);
77 static char *charname(wchar_t);
78 static char *charname2(wchar_t);
79
80 char map_name[FILENAME_MAX] = ".";
81 wchar_t curr_chain[STR_LEN + 1];
82
83 char __collate_version[STR_LEN];
84 DB *charmapdb;
85 static DB *charmapdb2;
86 static DB *largemapdb;
87 static int nlargemap = 0;
88 static DB *substdb[COLL_WEIGHTS_MAX];
89 static int nsubst[COLL_WEIGHTS_MAX];
90 static DB *chaindb;
91 static int nchain = 0;
92 static DB *stringdb;
93 static DB *forward_ref[COLL_WEIGHTS_MAX];
94 static struct symbol *prev_weight_table[COLL_WEIGHTS_MAX];
95 static struct symbol *prev2_weight_table[COLL_WEIGHTS_MAX];
96 static struct symbol *weight_table[COLL_WEIGHTS_MAX];
97 static int prev_line = LINE_NONE;
98 static struct symbol *prev_elem;
99 static int weight_index = 0;
100 static int allow_ellipsis = 0;
101 static struct symbol sym_ellipsis = {SYMBOL_ELLIPSIS, PRI_UNDEFINED};
102 static struct symbol sym_ignore = {SYMBOL_IGNORE, PRI_IGNORE};
103 static struct symbol sym_undefined = {SYMBOL_CHAR, PRI_UNDEFINED};
104 static int order_pass = 0;
105
106 #undef __collate_char_pri_table
107 struct __collate_st_char_pri __collate_char_pri_table[UCHAR_MAX + 1];
108 struct __collate_st_chain_pri *__collate_chain_pri_table;
109 struct __collate_st_subst *__collate_substitute_table[COLL_WEIGHTS_MAX];
110 struct __collate_st_large_char_pri *__collate_large_char_pri_table;
111
112 int prim_pri = 2, sec_pri = 2;
113 #ifdef COLLATE_DEBUG
114 int debug;
115 #endif
116 struct __collate_st_info info = {{DIRECTIVE_FORWARD, DIRECTIVE_FORWARD}, 0, 0, 0, {PRI_UNDEFINED, PRI_UNDEFINED}};
117
118 /* Some of the code expects COLL_WEIGHTS_MAX == 2 */
119 int directive_count = COLL_WEIGHTS_MAX;
120
121 const char *out_file = "LC_COLLATE";
122 %}
123 %union {
124 int32_t ch;
125 wchar_t str[BUFSIZE];
126 }
127 %token SUBSTITUTE WITH
128 %token START_LC_COLLATE END_LC_COLLATE COLLATING_ELEMENT FROM COLLATING_SYMBOL
129 %token ELLIPSIS IGNORE UNDEFINED
130 %token ORDER RANGE ORDER_START ORDER_END ORDER_SECOND_PASS
131 %token <str> STRING
132 %token <str> DEFN
133 %token <str> ELEM
134 %token <ch> CHAR
135 %token <ch> ORDER_DIRECTIVE
136 %%
137 collate : datafile {
138 FILE *fp;
139 int localedef = (stringdb != NULL);
140 int z;
141
142 if (nchain > 0) {
143 DBT key, val;
144 struct __collate_st_chain_pri *t, *v;
145 wchar_t *wp, *tp;
146 int flags, i, len;
147
148 if ((__collate_chain_pri_table = (struct __collate_st_chain_pri *)malloc(nchain * sizeof(struct __collate_st_chain_pri))) == NULL)
149 err(1, "chain malloc");
150 flags = R_FIRST;
151 t = __collate_chain_pri_table;
152 for(i = 0; i < nchain; i++) {
153 if (chaindb->seq(chaindb, &key, &val, flags) != 0)
154 err(1, "Can't retrieve chaindb %d", i);
155 memcpy(&v, val.data, sizeof(struct __collate_st_chain_pri *));
156 *t++ = *v;
157 if ((len = __collate_wcsnlen(v->str, STR_LEN)) > info.chain_max_len)
158 info.chain_max_len = len;
159 flags = R_NEXT;
160 }
161 if (chaindb->seq(chaindb, &key, &val, flags) == 0)
162 err(1, "More in chaindb after retrieving %d", nchain);
163 qsort(__collate_chain_pri_table, nchain, sizeof(struct __collate_st_chain_pri), chainpricompar);
164 }
165 for(z = 0; z < directive_count; z++) {
166 if (nsubst[z] > 0) {
167 DBT key, val;
168 struct __collate_st_subst *t;
169 wchar_t *wp, *tp;
170 int flags, i, j;
171 int32_t cval;
172
173 if ((__collate_substitute_table[z] = (struct __collate_st_subst *)calloc(nsubst[z], sizeof(struct __collate_st_subst))) == NULL)
174 err(1, "__collate_substitute_table[%d] calloc", z);
175 flags = R_FIRST;
176 t = __collate_substitute_table[z];
177 for(i = 0; i < nsubst[z]; i++) {
178 if (substdb[z]->seq(substdb[z], &key, &val, flags) != 0)
179 err(1, "Can't retrieve substdb[%d]", z);
180 memcpy(&cval, key.data, sizeof(int32_t));
181 /* we don't set the byte order of t->val, since we
182 * need it for sorting */
183 t->val = cval;
184 for(wp = (wchar_t *)val.data, tp = t->str, j = STR_LEN; *wp && j-- > 0;)
185 *tp++ = htonl(*wp++);
186 t++;
187 flags = R_NEXT;
188 }
189 if (substdb[z]->seq(substdb[z], &key, &val, flags) == 0)
190 err(1, "More in substdb[%d] after retrieving %d", z, nsubst[z]);
191 qsort(__collate_substitute_table[z], nsubst[z], sizeof(struct __collate_st_subst), substcompar);
192 }
193 }
194 if (nlargemap > 0) {
195 DBT key, val;
196 struct __collate_st_large_char_pri *t;
197 struct __collate_st_char_pri *p;
198 int flags, i, z;
199 int32_t cval;
200
201 if ((__collate_large_char_pri_table = (struct __collate_st_large_char_pri *)malloc(nlargemap * sizeof(struct __collate_st_large_char_pri))) == NULL)
202 err(1, "nlargemap malloc");
203 flags = R_FIRST;
204 t = __collate_large_char_pri_table;
205 for(i = 0; i < nlargemap; i++) {
206 if (largemapdb->seq(largemapdb, &key, &val, flags) != 0)
207 err(1, "Can't retrieve largemapdb %d", i);
208 memcpy(&cval, key.data, sizeof(int32_t));
209 memcpy(&p, val.data, sizeof(struct __collate_st_char_pri *));
210 /* we don't set the byte order of t->val, since we
211 * need it for sorting */
212 t->val = cval;
213 for(z = 0; z < directive_count; z++)
214 t->pri.pri[z] = htonl(p->pri[z]);
215 t++;
216 flags = R_NEXT;
217 }
218 if (largemapdb->seq(largemapdb, &key, &val, flags) == 0)
219 err(1, "More in largemapdb after retrieving %d", nlargemap);
220 qsort(__collate_large_char_pri_table, nlargemap, sizeof(struct __collate_st_large_char_pri), charpricompar);
221 }
222
223 if (info.undef_pri[0] == PRI_UNDEFINED) {
224 int i;
225 info.undef_pri[0] = prim_pri;
226 for(i = 1; i < directive_count; i++)
227 info.undef_pri[i] = -prim_pri;
228 }
229
230 if (localedef) {
231 int ch, z, ret;
232 if (sym_undefined.val == PRI_UNDEFINED) {
233 int flags = R_FIRST;
234 DBT key, val;
235 struct symbol *v;
236 while((ret = charmapdb->seq(charmapdb, &key, &val, flags)) == 0) {
237 memcpy(&v, val.data, sizeof(struct symbol *));
238 switch(v->type) {
239 case SYMBOL_CHAR: {
240 struct __collate_st_char_pri *p = haspri(v->u.wc);
241 if (!p || p->pri[0] == PRI_UNDEFINED)
242 warnx("<%s> was not defined", showwcs((const wchar_t *)key.data, key.size / sizeof(wchar_t)));
243 break;
244 }
245 case SYMBOL_CHAIN: {
246 struct __collate_st_chain_pri *p = getchain(v->u.str, EXISTS);
247 if (p->pri[0] == PRI_UNDEFINED)
248 warnx("<%s> was not defined", showwcs((const wchar_t *)key.data, key.size / sizeof(wchar_t)));
249 break;
250 }
251 }
252 flags = R_NEXT;
253 }
254 if (ret < 0)
255 err(1, "Error retrieving from charmapdb");
256 }
257 for (ch = 1; ch < UCHAR_MAX + 1; ch++) {
258 for(z = 0; z < directive_count; z++)
259 if (__collate_char_pri_table[ch].pri[z] == PRI_UNDEFINED)
260 __collate_char_pri_table[ch].pri[z] = (info.undef_pri[z] >= 0) ? info.undef_pri[z] : (ch - info.undef_pri[z]);
261 }
262 for (ch = 0; ch < nlargemap; ch++) {
263 for(z = 0; z < directive_count; z++)
264 if (__collate_large_char_pri_table[ch].pri.pri[z] == PRI_UNDEFINED)
265 __collate_large_char_pri_table[ch].pri.pri[z] = (info.undef_pri[z] >= 0) ? info.undef_pri[z] : (__collate_large_char_pri_table[ch].val - info.undef_pri[z]);
266 }
267 } else {
268 int ch, substed, ordered;
269 int fatal = 0;
270 for (ch = 1; ch < UCHAR_MAX + 1; ch++) {
271 substed = hassubst(ch, 0);
272 ordered = (__collate_char_pri_table[ch].pri[0] != PRI_UNDEFINED);
273 if (!ordered && !substed) {
274 fatal = 1;
275 warnx("%s not found", charname(ch));
276 }
277 if (substed && ordered) {
278 fatal = 1;
279 warnx("%s can't be ordered since substituted", charname(ch));
280 }
281 }
282 if (fatal)
283 exit(1);
284 }
285
286 /* COLLATE_SUBST_DUP depends on COLL_WEIGHTS_MAX == 2 */
287 if (localedef) {
288 if (nsubst[0] == nsubst[1] && (nsubst[0] == 0 ||
289 memcmp(__collate_substitute_table[0], __collate_substitute_table[1], nsubst[0] * sizeof(struct __collate_st_subst)) == 0)) {
290 info.flags |= COLLATE_SUBST_DUP;
291 nsubst[1] = 0;
292 }
293 } else {
294 info.flags |= COLLATE_SUBST_DUP;
295 nsubst[1] = 0;
296 }
297
298 for(z = 0; z < directive_count; z++)
299 info.subst_count[z] = nsubst[z];
300
301 info.directive_count = directive_count;
302 info.chain_count = nchain;
303 info.large_pri_count = nlargemap;
304
305 if ((fp = fopen(out_file, "w")) == NULL)
306 err(EX_UNAVAILABLE, "can't open destination file %s",
307 out_file);
308
309 strcpy(__collate_version, COLLATE_VERSION1_1A);
310 if (fwrite(__collate_version, sizeof(__collate_version), 1, fp) != 1)
311 err(EX_IOERR,
312 "IO error writting collate version to destination file %s",
313 out_file);
314 #if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
315 for(z = 0; z < directive_count; z++) {
316 info.undef_pri[z] = htonl(info.undef_pri[z]);
317 info.subst_count[z] = htonl(info.subst_count[z]);
318 }
319 info.chain_count = htonl(info.chain_count);
320 info.large_pri_count = htonl(info.large_pri_count);
321 #endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
322 if (fwrite(&info, sizeof(info), 1, fp) != 1)
323 err(EX_IOERR,
324 "IO error writting collate info to destination file %s",
325 out_file);
326 #if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
327 {
328 int i, z;
329 struct __collate_st_char_pri *p = __collate_char_pri_table;
330
331 for(i = UCHAR_MAX + 1; i-- > 0; p++) {
332 for(z = 0; z < directive_count; z++)
333 p->pri[z] = htonl(p->pri[z]);
334 }
335 }
336 #endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
337 if (fwrite(__collate_char_pri_table,
338 sizeof(__collate_char_pri_table), 1, fp) != 1)
339 err(EX_IOERR,
340 "IO error writting char table to destination file %s",
341 out_file);
342 for(z = 0; z < directive_count; z++) {
343 if (nsubst[z] > 0) {
344 #if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
345 struct __collate_st_subst *t = __collate_substitute_table[z];
346 int i;
347 for(i = nsubst[z]; i > 0; i--) {
348 t->val = htonl(t->val);
349 t++;
350 }
351 #endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
352 if (fwrite(__collate_substitute_table[z], sizeof(struct __collate_st_subst), nsubst[z], fp) != nsubst[z])
353 err(EX_IOERR,
354 "IO error writting large substprim table %d to destination file %s",
355 z, out_file);
356 }
357 }
358 if (nchain > 0) {
359 #if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
360 int i, j, z;
361 struct __collate_st_chain_pri *p = __collate_chain_pri_table;
362 wchar_t *w;
363
364 for(i = nchain; i-- > 0; p++) {
365 for(j = STR_LEN, w = p->str; *w && j-- > 0; w++)
366 *w = htonl(*w);
367 for(z = 0; z < directive_count; z++)
368 p->pri[z] = htonl(p->pri[z]);
369 }
370 #endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
371 if (fwrite(__collate_chain_pri_table,
372 sizeof(*__collate_chain_pri_table), nchain, fp) !=
373 (size_t)nchain)
374 err(EX_IOERR,
375 "IO error writting chain table to destination file %s",
376 out_file);
377 }
378
379 if (nlargemap > 0) {
380 #if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
381 struct __collate_st_large_char_pri *t = __collate_large_char_pri_table;
382 int i;
383 for(i = 0; i < nlargemap; i++) {
384 t->val = htonl(t->val);
385 t++;
386 }
387 #endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
388 if (fwrite(__collate_large_char_pri_table, sizeof(struct __collate_st_large_char_pri), nlargemap, fp) != nlargemap)
389 err(EX_IOERR,
390 "IO error writting large pri tables to destination file %s",
391 out_file);
392 }
393
394 if (fclose(fp) != 0)
395 err(EX_IOERR, "IO error closing destination file %s",
396 out_file);
397
398 #ifdef COLLATE_DEBUG
399 if (debug)
400 collate_print_tables();
401 #endif
402 exit(EX_OK);
403 }
404 ;
405 datafile : statment_list
406 | blank_lines start_localedef localedef_sections blank_lines end_localedef blank_lines
407 ;
408 statment_list : statment
409 | statment_list '\n' statment
410 ;
411 statment :
412 | charmap
413 | substitute
414 | order
415 ;
416 blank_lines :
417 | '\n'
418 | blank_lines '\n'
419 ;
420 start_localedef : START_LC_COLLATE '\n' {
421 int i;
422 if ((stringdb = dbopen(NULL, O_CREAT | O_RDWR, 0600, DB_HASH, NULL)) == NULL)
423 err(1, "dbopen stringdb");
424 directive_count = 0;
425 for(i = 0; i < COLL_WEIGHTS_MAX; i++)
426 info.directive[i] = DIRECTIVE_UNDEF;
427 }
428 ;
429 end_localedef : END_LC_COLLATE '\n'
430 ;
431 localedef_sections : localedef_preface localedef_order
432 ;
433 localedef_preface : localedef_statment '\n'
434 | localedef_preface localedef_statment '\n'
435 ;
436 localedef_statment :
437 | charmap
438 | collating_element
439 | collating_symbol
440 ;
441 collating_element : COLLATING_ELEMENT ELEM FROM STRING {
442 int len;
443 struct symbol *s;
444 if (wcslen($2) > CHARMAP_SYMBOL_LEN)
445 yyerror("collating-element symbol name '%s' is too long", showwcs($2, CHARMAP_SYMBOL_LEN));
446 if ((len = wcslen($4)) > STR_LEN)
447 yyerror("collating-element string '%s' is too long", showwcs($4, STR_LEN));
448 if (len < 2)
449 yyerror("collating-element string '%s' must be at least two characters", showwcs($4, STR_LEN));
450 s = getsymbol($2, NOTEXISTS);
451 s->val = PRI_UNDEFINED;
452 s->type = SYMBOL_CHAIN;
453 wcsncpy(s->u.str, $4, STR_LEN);
454 getchain($4, NOTEXISTS);
455 }
456 ;
457 collating_symbol : COLLATING_SYMBOL ELEM {
458 struct symbol *s;
459 if (wcslen($2) > CHARMAP_SYMBOL_LEN)
460 yyerror("collating-element symbol name '%s' is too long", showwcs($2, CHARMAP_SYMBOL_LEN));
461 s = getsymbol($2, NOTEXISTS);
462 s->val = PRI_UNDEFINED;
463 s->type = SYMBOL_SYMBOL;
464 }
465 ;
466 localedef_order : order_start order_lines1 order_second_pass order_lines2 order_end
467 ;
468 order_start: ORDER_START order_start_list '\n'
469 ;
470 order_second_pass: ORDER_SECOND_PASS {
471 prev_line = LINE_NONE;
472 prev_elem = NULL;
473 order_pass++;
474 }
475 ;
476 order_start_list : order_start_list_directives {
477 if (directive_count > 0)
478 yyerror("Multiple order_start lines not allowed");
479 if ((info.directive[0] & DIRECTIVE_DIRECTION_MASK) == 0)
480 info.directive[0] |= DIRECTIVE_FORWARD;
481 directive_count++;
482 }
483 | order_start_list ';' order_start_list_directives {
484 if (directive_count >= COLL_WEIGHTS_MAX)
485 yyerror("only COLL_WEIGHTS_MAX weights allowed");
486 if ((info.directive[directive_count] & DIRECTIVE_DIRECTION_MASK) == 0)
487 info.directive[directive_count] |= DIRECTIVE_FORWARD;
488 directive_count++;
489 }
490 ;
491 order_start_list_directives : ORDER_DIRECTIVE {
492 info.directive[directive_count] = $1;
493 }
494 | order_start_list_directives ',' ORDER_DIRECTIVE {
495 int direction = ($3 & DIRECTIVE_DIRECTION_MASK);
496 int prev = (info.directive[directive_count] & DIRECTIVE_DIRECTION_MASK);
497 if (direction && prev && direction != prev)
498 yyerror("The forward and backward directives are mutually exclusive");
499 info.directive[directive_count] |= $3;
500 }
501 ;
502 order_lines1 : order_line1 '\n'
503 | order_lines1 order_line1 '\n'
504 ;
505 order_line1 :
506 | ELEM {
507 struct symbol *s = getsymbol($1, EXISTS);
508 if (s->val != PRI_UNDEFINED)
509 yyerror("<%s> redefined", showwcs($1, CHARMAP_SYMBOL_LEN));
510 if (prev_line == LINE_ELLIPSIS) {
511 struct symbol *m;
512 wchar_t i;
513 int v;
514 switch (s->type) {
515 case SYMBOL_CHAIN:
516 yyerror("Chain <%s> can't be endpoints of ellipsis", showwcs($1, CHARMAP_SYMBOL_LEN));
517 case SYMBOL_SYMBOL:
518 yyerror("Collating symbol <%s> can't be endpoints of ellipsis", showwcs($1, CHARMAP_SYMBOL_LEN));
519 }
520 if (s->u.wc <= prev_elem->u.wc)
521 yyerror("<%s> is before starting point of ellipsis", showwcs($1, CHARMAP_SYMBOL_LEN));
522 for(i = prev_elem->u.wc + 1, v = prev_elem->val + 1; i < s->u.wc; i++, v++) {
523 m = getsymbolbychar(i);
524 if (m->val != PRI_UNDEFINED)
525 yyerror("<%s> was previously defined while filling ellipsis symbols", showwcs(m->name, CHARMAP_SYMBOL_LEN));
526 m->val = v;
527 }
528 s->val = v;
529 } else
530 s->val = prim_pri;
531 prim_pri = s->val + 1;
532 weight_index = 0;
533 } weights {
534 int i;
535 struct symbol *s = getsymbol($1, EXISTS);
536 if (s->type == SYMBOL_SYMBOL) {
537 if (weight_index != 0)
538 yyerror("Can't specify weights for collating symbol <%s>", showwcs($1, CHARMAP_SYMBOL_LEN));
539 } else if (weight_index == 0) {
540 for(i = 0; i < directive_count; i++)
541 weight_table[i] = s;
542 } else if (weight_index != directive_count)
543 yyerror("Not enough weights specified");
544 memcpy(prev_weight_table, weight_table, sizeof(weight_table));
545 prev_line = LINE_NORMAL;
546 prev_elem = s;
547 }
548 | ELLIPSIS { weight_index = 0; allow_ellipsis = 1; } weights {
549 int i;
550 if (prev_line == LINE_ELLIPSIS)
551 yyerror("Illegal sequential ellipsis lines");
552 if (prev_line == LINE_UNDEFINED)
553 yyerror("Ellipsis line can not follow UNDEFINED line");
554 if (prev_line == LINE_NONE)
555 yyerror("Ellipsis line must follow a collating identifier lines");
556 if (weight_index == 0) {
557 for(i = 0; i < directive_count; i++)
558 weight_table[i] = &sym_ellipsis;
559 } else if (weight_index != directive_count)
560 yyerror("Not enough weights specified");
561 for(i = 0; i < directive_count; i++) {
562 if (weight_table[i]->type != SYMBOL_ELLIPSIS)
563 continue;
564 switch (prev_weight_table[i]->type) {
565 case SYMBOL_CHAIN:
566 yyerror("Startpoint of ellipsis can't be a collating element");
567 case SYMBOL_IGNORE:
568 yyerror("Startpoint of ellipsis can't be IGNORE");
569 case SYMBOL_SYMBOL:
570 yyerror("Startpoint of ellipsis can't be a collating symbol");
571 case SYMBOL_STRING:
572 yyerror("Startpoint of ellipsis can't be a string");
573 }
574 }
575 memcpy(prev2_weight_table, prev_weight_table, sizeof(prev_weight_table));
576 memcpy(prev_weight_table, weight_table, sizeof(weight_table));
577 prev_line = LINE_ELLIPSIS;
578 allow_ellipsis = 0;
579 }
580 | UNDEFINED {
581 if (sym_undefined.val != PRI_UNDEFINED)
582 yyerror("Multiple UNDEFINED lines not allowed");
583 sym_undefined.val = prim_pri++;
584 weight_index = 0;
585 allow_ellipsis = 1;
586 } weights {
587 int i;
588 if (weight_index == 0) {
589 weight_table[0] = &sym_undefined;
590 for(i = 1; i < directive_count; i++)
591 weight_table[i] = &sym_ellipsis;
592 } else if (weight_index != directive_count)
593 yyerror("Not enough weights specified");
594 memcpy(prev_weight_table, weight_table, sizeof(weight_table));
595 prev_line = LINE_UNDEFINED;
596 }
597 ;
598 order_lines2 : order_line2 '\n'
599 | order_lines2 order_line2 '\n'
600 ;
601 order_line2 :
602 | ELEM { weight_index = 0; } weights {
603 int i;
604 struct symbol *s = getsymbol($1, EXISTS);
605 if (s->val == PRI_UNDEFINED)
606 yyerror("<%s> undefined", showwcs($1, CHARMAP_SYMBOL_LEN));
607 if (s->type == SYMBOL_SYMBOL) {
608 if (weight_index != 0)
609 yyerror("Can't specify weights for collating symbol <%s>", showwcs($1, CHARMAP_SYMBOL_LEN));
610 } else if (weight_index == 0) {
611 for(i = 0; i < directive_count; i++)
612 weight_table[i] = s;
613 } else if (weight_index != directive_count)
614 yyerror("Not enough weights specified");
615 if (prev_line == LINE_ELLIPSIS) {
616 int w, x;
617 for(i = 0; i < directive_count; i++) {
618 switch (prev_weight_table[i]->type) {
619 case SYMBOL_CHAR:
620 case SYMBOL_CHAIN:
621 case SYMBOL_IGNORE:
622 case SYMBOL_SYMBOL:
623 for (w = prev_elem->u.wc + 1; w < s->u.wc; w++) {
624 struct __collate_st_char_pri *p = getpri(w);
625 if (p->pri[i] != PRI_UNDEFINED)
626 yyerror("Char 0x02x previously defined", w);
627 p->pri[i] = prev_weight_table[i]->val;
628 }
629 break;
630 case SYMBOL_ELLIPSIS:
631
632 switch (weight_table[i]->type) {
633 case SYMBOL_STRING:
634 yyerror("Strings can't be endpoints of ellipsis");
635 case SYMBOL_CHAIN:
636 yyerror("Chains can't be endpoints of ellipsis");
637 case SYMBOL_IGNORE:
638 yyerror("IGNORE can't be endpoints of ellipsis");
639 case SYMBOL_SYMBOL:
640 yyerror("Collation symbols can't be endpoints of ellipsis");
641 }
642 if (s->val - prev_elem->val != weight_table[i]->val - prev2_weight_table[i]->val)
643 yyerror("Range mismatch in weight %d", i);
644 x = prev2_weight_table[i]->val + 1;
645 for (w = prev_elem->u.wc + 1; w < s->u.wc; w++) {
646 struct __collate_st_char_pri *p = getpri(w);
647 if (p->pri[i] != PRI_UNDEFINED)
648 yyerror("Char 0x02x previously defined", w);
649 p->pri[i] = x++;
650 }
651 break;
652 case SYMBOL_STRING:
653 for (w = prev_elem->u.wc + 1; w < s->u.wc; w++) {
654 struct __collate_st_char_pri *p = getpri(w);
655 if (p->pri[i] != PRI_UNDEFINED)
656 yyerror("Char 0x02x previously defined", w);
657 putsubst(w, i, prev_weight_table[i]->u.str);
658 p->pri[i] = prev_weight_table[i]->val;
659 }
660 break;
661 }
662 }
663 }
664 switch(s->type) {
665 case SYMBOL_CHAR: {
666 struct __collate_st_char_pri *p = getpri(s->u.wc);
667 for(i = 0; i < directive_count; i++) {
668 switch (weight_table[i]->type) {
669 case SYMBOL_CHAR:
670 case SYMBOL_CHAIN:
671 case SYMBOL_IGNORE:
672 case SYMBOL_SYMBOL:
673 if (p->pri[i] != PRI_UNDEFINED)
674 yyerror("Char 0x02x previously defined", s->u.wc);
675 p->pri[i] = weight_table[i]->val;
676 break;
677 case SYMBOL_STRING:
678 if (p->pri[i] != PRI_UNDEFINED)
679 yyerror("Char 0x02x previously defined", s->u.wc);
680 putsubst(s->u.wc, i, weight_table[i]->u.str);
681 p->pri[i] = weight_table[i]->val;
682 break;
683 }
684 }
685 break;
686 }
687 case SYMBOL_CHAIN: {
688 struct __collate_st_chain_pri *p = getchain(s->u.str, EXISTS);
689 for(i = 0; i < directive_count; i++) {
690 switch (weight_table[i]->type) {
691 case SYMBOL_CHAR:
692 case SYMBOL_CHAIN:
693 case SYMBOL_IGNORE:
694 case SYMBOL_SYMBOL:
695 if (p->pri[i] != PRI_UNDEFINED)
696 yyerror("Chain %s previously defined", showwcs(s->u.str, STR_LEN));
697 p->pri[i] = weight_table[i]->val;
698 break;
699 case SYMBOL_STRING :
700 if (wcsncmp(s->u.str, weight_table[i]->u.str, STR_LEN) != 0)
701 yyerror("Chain/string mismatch");
702 if (p->pri[i] != PRI_UNDEFINED)
703 yyerror("Chain %s previously defined", showwcs(s->u.str, STR_LEN));
704 /* negative value mean don't substitute
705 * the chain, but it is in an
706 * equivalence class */
707 p->pri[i] = -weight_table[i]->val;
708 }
709 }
710 break;
711 }
712 }
713 memcpy(prev_weight_table, weight_table, sizeof(weight_table));
714 prev_line = LINE_NORMAL;
715 prev_elem = s;
716 }
717 | ELLIPSIS { weight_index = 0; allow_ellipsis = 1; } weights {
718 int i;
719 if (prev_line == LINE_ELLIPSIS)
720 yyerror("Illegal sequential ellipsis lines");
721 if (prev_line == LINE_UNDEFINED)
722 yyerror("Ellipsis line can not follow UNDEFINED line");
723 if (prev_line == LINE_NONE)
724 yyerror("Ellipsis line must follow a collating identifier lines");
725 if (weight_index == 0) {
726 for(i = 0; i < directive_count; i++)
727 weight_table[i] = &sym_ellipsis;
728 } else if (weight_index != directive_count)
729 yyerror("Not enough weights specified");
730 for(i = 0; i < directive_count; i++) {
731 if (weight_table[i]->type != SYMBOL_ELLIPSIS)
732 continue;
733 switch (prev_weight_table[i]->type) {
734 case SYMBOL_CHAIN:
735 yyerror("Startpoint of ellipsis can't be a collating element");
736 case SYMBOL_IGNORE:
737 yyerror("Startpoint of ellipsis can't be IGNORE");
738 case SYMBOL_SYMBOL:
739 yyerror("Startpoint of ellipsis can't be a collating symbol");
740 case SYMBOL_STRING:
741 yyerror("Startpoint of ellipsis can't be a string");
742 }
743 }
744 memcpy(prev2_weight_table, prev_weight_table, sizeof(prev_weight_table));
745 memcpy(prev_weight_table, weight_table, sizeof(weight_table));
746 prev_line = LINE_ELLIPSIS;
747 allow_ellipsis = 0;
748 }
749 | UNDEFINED { weight_index = 0; allow_ellipsis = 1; } weights {
750 int i;
751
752 if (weight_index == 0) {
753 weight_table[0] = &sym_undefined;
754 for(i = 1; i < directive_count; i++)
755 weight_table[i] = &sym_ellipsis;
756 } else if (weight_index != directive_count)
757 yyerror("Not enough weights specified");
758 for(i = 0; i < directive_count; i++) {
759 switch (weight_table[i]->type) {
760 case SYMBOL_CHAR:
761 case SYMBOL_CHAIN:
762 case SYMBOL_IGNORE:
763 case SYMBOL_SYMBOL:
764 info.undef_pri[i] = weight_table[i]->val;
765 break;
766 case SYMBOL_ELLIPSIS :
767 /* Negative values mean that the priority is
768 * relative to the lexical value */
769 info.undef_pri[i] = -sym_undefined.val;
770 prim_pri = UNDEFINED_PRI;
771 break;
772 case SYMBOL_STRING :
773 yyerror("Strings can't be used with UNDEFINED");
774 }
775 }
776 memcpy(prev_weight_table, weight_table, sizeof(weight_table));
777 prev_line = LINE_UNDEFINED;
778 }
779 ;
780 weights :
781 | weight
782 | weights ';' weight
783 ;
784 weight : ELEM {
785 struct symbol *s;
786 if (weight_index >= directive_count)
787 yyerror("More weights than specified by order_start");
788 s = getsymbol($1, EXISTS);
789 if (order_pass && s->val == PRI_UNDEFINED)
790 yyerror("<%s> is undefined", showwcs($1, CHARMAP_SYMBOL_LEN));
791 weight_table[weight_index++] = s;
792 }
793 | ELLIPSIS {
794 if (weight_index >= directive_count)
795 yyerror("More weights than specified by order_start");
796 if (!allow_ellipsis)
797 yyerror("Ellipsis weight not allowed");
798 weight_table[weight_index++] = &sym_ellipsis;
799 }
800 | IGNORE {
801 if (weight_index >= directive_count)
802 yyerror("More weights than specified by order_start");
803 weight_table[weight_index++] = &sym_ignore;
804 }
805 | STRING {
806 if (weight_index >= directive_count)
807 yyerror("More weights than specified by order_start");
808 if (wcslen($1) > STR_LEN)
809 yyerror("String '%s' is too long", showwcs($1, STR_LEN));
810 weight_table[weight_index++] = getstring($1);
811 }
812 ;
813 order_end : ORDER_END '\n'
814 ;
815 charmap : DEFN CHAR {
816 int len = wcslen($1);
817 struct symbol *s;
818 if (len > CHARMAP_SYMBOL_LEN)
819 yyerror("Charmap symbol name '%s' is too long", showwcs($1, CHARMAP_SYMBOL_LEN));
820 s = getsymbol($1, NOTEXISTS);
821 s->type = SYMBOL_CHAR;
822 s->val = PRI_UNDEFINED;
823 s->u.wc = $2;
824 setsymbolbychar(s);
825 }
826 ;
827 substitute : SUBSTITUTE CHAR WITH STRING {
828 if (wcslen($4) + 1 > STR_LEN)
829 yyerror("%s substitution is too long", charname($2));
830 putsubst($2, 0, $4);
831 }
832 ;
833 order : ORDER order_list
834 ;
835 order_list : item
836 | order_list ';' item
837 ;
838 chain : CHAR CHAR {
839 curr_chain[0] = $1;
840 curr_chain[1] = $2;
841 if (curr_chain[0] == '\0' || curr_chain[1] == '\0')
842 yyerror("\\0 can't be chained");
843 curr_chain[2] = '\0';
844 }
845 | chain CHAR {
846 static wchar_t tb[2];
847 tb[0] = $2;
848 if (tb[0] == '\0')
849 yyerror("\\0 can't be chained");
850 if (wcslen(curr_chain) + 1 > STR_LEN)
851 yyerror("Chain '%s' grows too long", curr_chain);
852 (void)wcscat(curr_chain, tb);
853 }
854 ;
855 item : CHAR {
856 struct __collate_st_char_pri *p = getpri($1);
857 if (p->pri[0] >= 0)
858 yyerror("%s duplicated", charname($1));
859 p->pri[0] = p->pri[1] = prim_pri;
860 sec_pri = ++prim_pri;
861 }
862 | chain {
863 struct __collate_st_chain_pri *c = getchain(curr_chain, NOTEXISTS);
864 c->pri[0] = c->pri[1] = prim_pri;
865 sec_pri = ++prim_pri;
866 }
867 | CHAR RANGE CHAR {
868 u_int i;
869 struct __collate_st_char_pri *p;
870
871 if ($3 <= $1)
872 yyerror("Illegal range %s -- %s", charname($1), charname2($3));
873
874 for (i = $1; i <= $3; i++) {
875 p = getpri(i);
876 if (p->pri[0] >= 0)
877 yyerror("%s duplicated", charname(i));
878 p->pri[0] = p->pri[1] = prim_pri++;
879 }
880 sec_pri = prim_pri;
881 }
882 | '{' mixed_order_list '}' {
883 prim_pri = sec_pri;
884 }
885 | '(' sec_order_list ')' {
886 prim_pri = sec_pri;
887 }
888 ;
889 mixed_order_list : mixed_sub_list {
890 sec_pri++;
891 }
892 | mixed_order_list ';' mixed_sub_list {
893 sec_pri++;
894 }
895 ;
896 mixed_sub_list : mixed_sub_item
897 | mixed_sub_list ',' mixed_sub_item
898 ;
899 sec_order_list : sec_sub_item
900 | sec_order_list ',' sec_sub_item
901 ;
902 mixed_sub_item : CHAR {
903 struct __collate_st_char_pri *p = getpri($1);
904 if (p->pri[0] >= 0)
905 yyerror("%s duplicated", charname($1));
906 p->pri[0] = prim_pri;
907 p->pri[1] = sec_pri;
908 }
909 | CHAR RANGE CHAR {
910 u_int i;
911 struct __collate_st_char_pri *p;
912
913 if ($3 <= $1)
914 yyerror("Illegal range %s -- %s",
915 charname($1), charname2($3));
916
917 for (i = $1; i <= $3; i++) {
918 p = getpri(i);
919 if (p->pri[0] >= 0)
920 yyerror("%s duplicated", charname(i));
921 p->pri[0] = prim_pri;
922 p->pri[1] = sec_pri;
923 }
924 }
925 | chain {
926 struct __collate_st_chain_pri *c = getchain(curr_chain, NOTEXISTS);
927 c->pri[0] = prim_pri;
928 c->pri[1] = sec_pri;
929 }
930 sec_sub_item : CHAR {
931 struct __collate_st_char_pri *p = getpri($1);
932 if (p->pri[0] >= 0)
933 yyerror("%s duplicated", charname($1));
934 p->pri[0] = prim_pri;
935 p->pri[1] = sec_pri++;
936 }
937 | CHAR RANGE CHAR {
938 u_int i;
939 struct __collate_st_char_pri *p;
940
941 if ($3 <= $1)
942 yyerror("Illegal range %s -- %s",
943 charname($1), charname2($3));
944
945 for (i = $1; i <= $3; i++) {
946 p = getpri(i);
947 if (p->pri[0] >= 0)
948 yyerror("%s duplicated", charname(i));
949 p->pri[0] = prim_pri;
950 p->pri[1] = sec_pri++;
951 }
952 }
953 | chain {
954 struct __collate_st_chain_pri *c = getchain(curr_chain, NOTEXISTS);
955 c->pri[0] = prim_pri;
956 c->pri[1] = sec_pri++;
957 }
958 ;
959 %%
960 int
961 main(int ac, char **av)
962 {
963 int ch, z;
964
965 if ((charmapdb = dbopen(NULL, O_CREAT | O_RDWR, 0600, DB_HASH, NULL)) == NULL)
966 err(1, "dbopen charmapdb");
967 if ((charmapdb2 = dbopen(NULL, O_CREAT | O_RDWR, 0600, DB_HASH, NULL)) == NULL)
968 err(1, "dbopen charmapdb");
969 if ((largemapdb = dbopen(NULL, O_CREAT | O_RDWR, 0600, DB_HASH, NULL)) == NULL)
970 err(1, "dbopen largemapdb");
971 if ((substdb[0] = dbopen(NULL, O_CREAT | O_RDWR, 0600, DB_HASH, NULL)) == NULL)
972 err(1, "dbopen substdb[0]");
973 if ((chaindb = dbopen(NULL, O_CREAT | O_RDWR, 0600, DB_HASH, NULL)) == NULL)
974 err(1, "dbopen chaindb");
975 /* -1 means an undefined priority, which we adjust after parsing */
976 for (ch = 0; ch <= UCHAR_MAX; ch++)
977 for(z = 0; z < COLL_WEIGHTS_MAX; z++)
978 __collate_char_pri_table[ch].pri[z] = PRI_UNDEFINED;
979 #ifdef COLLATE_DEBUG
980 while((ch = getopt(ac, av, ":do:I:")) != -1) {
981 #else
982 while((ch = getopt(ac, av, ":o:I:")) != -1) {
983 #endif
984 switch (ch)
985 {
986 #ifdef COLLATE_DEBUG
987 case 'd':
988 debug++;
989 break;
990 #endif
991 case 'o':
992 out_file = optarg;
993 break;
994
995 case 'I':
996 strlcpy(map_name, optarg, sizeof(map_name));
997 break;
998
999 default:
1000 usage();
1001 }
1002 }
1003 ac -= optind;
1004 av += optind;
1005 if (ac > 0) {
1006 if ((yyin = fopen(*av, "r")) == NULL)
1007 err(EX_UNAVAILABLE, "can't open source file %s", *av);
1008 }
1009 yyparse();
1010 return 0;
1011 }
1012
1013 static struct __collate_st_char_pri *
1014 getpri(int32_t c)
1015 {
1016 DBT key, val;
1017 struct __collate_st_char_pri *p;
1018 int ret;
1019
1020 if (c <= UCHAR_MAX)
1021 return &__collate_char_pri_table[c];
1022 key.data = &c;
1023 key.size = sizeof(int32_t);
1024 if ((ret = largemapdb->get(largemapdb, &key, &val, 0)) < 0)
1025 err(1, "getpri: Error getting %s", charname(c));
1026 if (ret != 0) {
1027 struct __collate_st_char_pri *pn;
1028 int z;
1029 if ((pn = (struct __collate_st_char_pri *)malloc(sizeof(struct __collate_st_char_pri))) == NULL)
1030 err(1, "getpri: malloc");
1031 for(z = 0; z < COLL_WEIGHTS_MAX; z++)
1032 pn->pri[z] = PRI_UNDEFINED;
1033 val.data = &pn;
1034 val.size = sizeof(struct __collate_st_char_pri *);
1035 if (largemapdb->put(largemapdb, &key, &val, 0) < 0)
1036 err(1, "getpri: Error storing %s", charname(c));
1037 nlargemap++;
1038 }
1039 memcpy(&p, val.data, sizeof(struct __collate_st_char_pri *));
1040 return p;
1041 }
1042
1043 static struct __collate_st_char_pri *
1044 haspri(int32_t c)
1045 {
1046 DBT key, val;
1047 struct __collate_st_char_pri *p;
1048 int ret;
1049
1050 if (c <= UCHAR_MAX)
1051 return &__collate_char_pri_table[c];
1052 key.data = &c;
1053 key.size = sizeof(int32_t);
1054 if ((ret = largemapdb->get(largemapdb, &key, &val, 0)) < 0)
1055 err(1, "haspri: Error getting %s", charname(c));
1056 if (ret != 0)
1057 return NULL;
1058 memcpy(&p, val.data, sizeof(struct __collate_st_char_pri *));
1059 return p;
1060 }
1061
1062 static struct __collate_st_chain_pri *
1063 getchain(const wchar_t *wcs, int exists)
1064 {
1065 DBT key, val;
1066 struct __collate_st_chain_pri *p;
1067 int ret;
1068
1069 key.data = (void *)wcs;
1070 key.size = __collate_wcsnlen(wcs, STR_LEN) * sizeof(wchar_t);
1071 if ((ret = chaindb->get(chaindb, &key, &val, 0)) < 0)
1072 err(1, "getchain: Error getting \"%s\"", showwcs(wcs, STR_LEN));
1073 if (ret != 0) {
1074 struct __collate_st_chain_pri *pn;
1075 int z;
1076 if (exists > 0)
1077 errx(1, "getchain: \"%s\" is not defined", showwcs(wcs, STR_LEN));
1078 if ((pn = (struct __collate_st_chain_pri *)malloc(sizeof(struct __collate_st_chain_pri))) == NULL)
1079 err(1, "getchain: malloc");
1080 for(z = 0; z < COLL_WEIGHTS_MAX; z++)
1081 pn->pri[z] = PRI_UNDEFINED;
1082 bzero(pn->str, sizeof(pn->str));
1083 wcsncpy(pn->str, wcs, STR_LEN);
1084 val.data = &pn;
1085 val.size = sizeof(struct __collate_st_chain_pri *);
1086 if (chaindb->put(chaindb, &key, &val, 0) < 0)
1087 err(1, "getchain: Error storing \"%s\"", showwcs(wcs, STR_LEN));
1088 nchain++;
1089 } else if (exists == 0)
1090 errx(1, "getchain: \"%s\" already exists", showwcs(wcs, STR_LEN));
1091 memcpy(&p, val.data, sizeof(struct __collate_st_chain_pri *));
1092 return p;
1093 }
1094
1095 struct symbol *
1096 getsymbol(const wchar_t *wcs, int exists)
1097 {
1098 DBT key, val;
1099 struct symbol *p;
1100 int ret;
1101
1102 key.data = (void *)wcs;
1103 key.size = wcslen(wcs) * sizeof(wchar_t);
1104 if ((ret = charmapdb->get(charmapdb, &key, &val, 0)) < 0)
1105 err(1, "getsymbol: Error getting \"%s\"", showwcs(wcs, CHARMAP_SYMBOL_LEN));
1106 if (ret != 0) {
1107 struct symbol *pn;
1108 if (exists > 0)
1109 errx(1, "getsymbol: \"%s\" is not defined", showwcs(wcs, CHARMAP_SYMBOL_LEN));
1110 if ((pn = (struct symbol *)malloc(sizeof(struct symbol))) == NULL)
1111 err(1, "getsymbol: malloc");
1112 pn->val = PRI_UNDEFINED;
1113 wcsncpy(pn->name, wcs, CHARMAP_SYMBOL_LEN);
1114 val.data = &pn;
1115 val.size = sizeof(struct symbol *);
1116 if (charmapdb->put(charmapdb, &key, &val, 0) < 0)
1117 err(1, "getsymbol: Error storing \"%s\"", showwcs(wcs, CHARMAP_SYMBOL_LEN));
1118 } else if (exists == 0)
1119 errx(1, "getsymbol: \"%s\" already exists", showwcs(wcs, CHARMAP_SYMBOL_LEN));
1120 memcpy(&p, val.data, sizeof(struct symbol *));
1121 return p;
1122 }
1123
1124 static struct symbol *
1125 getsymbolbychar(wchar_t wc)
1126 {
1127 DBT key, val;
1128 struct symbol *p;
1129 int ret;
1130
1131 key.data = &wc;
1132 key.size = sizeof(wchar_t);
1133 if ((ret = charmapdb2->get(charmapdb2, &key, &val, 0)) < 0)
1134 err(1, "getsymbolbychar: Error getting Char 0x%02x", wc);
1135 if (ret != 0)
1136 errx(1, "getsymbolbychar: Char 0x%02x is not defined", wc);
1137 memcpy(&p, val.data, sizeof(struct symbol *));
1138 return p;
1139 }
1140
1141 static struct symbol *
1142 hassymbolbychar(wchar_t wc)
1143 {
1144 DBT key, val;
1145 struct symbol *p;
1146 int ret;
1147
1148 key.data = &wc;
1149 key.size = sizeof(wchar_t);
1150 if ((ret = charmapdb2->get(charmapdb2, &key, &val, 0)) < 0)
1151 err(1, "hassymbolbychar: Error getting Char 0x%02x", wc);
1152 if (ret != 0)
1153 return NULL;
1154 memcpy(&p, val.data, sizeof(struct symbol *));
1155 return p;
1156 }
1157
1158 static void
1159 setsymbolbychar(struct symbol *s)
1160 {
1161 DBT key, val;
1162 struct symbol *p;
1163 int ret;
1164
1165 key.data = &s->u.wc;
1166 key.size = sizeof(wchar_t);
1167 val.data = &s;
1168 val.size = sizeof(struct symbol *);
1169 if (charmapdb2->put(charmapdb2, &key, &val, 0) < 0)
1170 err(1, "setsymbolbychar: Error storing <%s>", showwcs(s->name, CHARMAP_SYMBOL_LEN));
1171 }
1172
1173 struct symbol *
1174 getstring(const wchar_t *wcs)
1175 {
1176 DBT key, val;
1177 struct symbol *p;
1178 int ret;
1179
1180 key.data = (void *)wcs;
1181 key.size = wcslen(wcs) * sizeof(wchar_t);
1182 if ((ret = stringdb->get(stringdb, &key, &val, 0)) < 0)
1183 err(1, "getstring: Error getting \"%s\"", showwcs(wcs, STR_LEN));
1184 if (ret != 0) {
1185 struct symbol *pn;
1186 if ((pn = (struct symbol *)malloc(sizeof(struct symbol))) == NULL)
1187 err(1, "getstring: malloc");
1188 pn->type = SYMBOL_STRING;
1189 pn->val = prim_pri++;
1190 wcsncpy(pn->u.str, wcs, STR_LEN);
1191 val.data = &pn;
1192 val.size = sizeof(struct symbol *);
1193 if (stringdb->put(stringdb, &key, &val, 0) < 0)
1194 err(1, "getstring: Error storing \"%s\"", showwcs(wcs, STR_LEN));
1195 }
1196 memcpy(&p, val.data, sizeof(struct symbol *));
1197 return p;
1198 }
1199
1200 static void
1201 makeforwardref(int i, const struct symbol *from, const struct symbol * to)
1202 {
1203 }
1204
1205 static void
1206 putsubst(int32_t c, int i, const wchar_t *str)
1207 {
1208 DBT key, val;
1209 int ret;
1210 wchar_t clean[STR_LEN];
1211
1212 if (!substdb[i])
1213 if ((substdb[i] = dbopen(NULL, O_CREAT | O_RDWR, 0600, DB_HASH, NULL)) == NULL)
1214 err(1, "dbopen substdb[%d]", i);
1215 key.data = &c;
1216 key.size = sizeof(int32_t);
1217 bzero(clean, sizeof(clean));
1218 wcsncpy(clean, str, STR_LEN);
1219 val.data = clean;
1220 val.size = sizeof(clean);
1221 if ((ret = substdb[i]->put(substdb[i], &key, &val, R_NOOVERWRITE)) < 0)
1222 err(1, "putsubst: Error on %s", charname(c));
1223 if (ret != 0)
1224 errx(1, "putsubst: Duplicate substitution of %s", charname(c));
1225 nsubst[i]++;
1226 }
1227
1228 static int
1229 hassubst(int32_t c, int i)
1230 {
1231 DBT key, val;
1232 int ret;
1233
1234 if (!substdb[i])
1235 return 0;
1236 key.data = &c;
1237 key.size = sizeof(int32_t);
1238 if ((ret = substdb[i]->get(substdb[i], &key, &val, 0)) < 0)
1239 err(1, "hassubst: Error getting %s", charname(c));
1240 return (ret == 0);
1241 }
1242
1243 static int
1244 chainpricompar(const void *a, const void *b)
1245 {
1246 return wcsncmp(((struct __collate_st_chain_pri *)a)->str, ((struct __collate_st_chain_pri *)b)->str, STR_LEN);
1247 }
1248
1249 static int
1250 charpricompar(const void *a, const void *b)
1251 {
1252 return ((struct __collate_st_large_char_pri *)a)->val - ((struct __collate_st_large_char_pri *)b)->val;
1253 }
1254
1255 static int
1256 substcompar(const void *a, const void *b)
1257 {
1258 return ((struct __collate_st_subst *)a)->val - ((struct __collate_st_subst *)b)->val;
1259 }
1260
1261 static const wchar_t *
1262 __collate_wcsnchr(const wchar_t *s, wchar_t c, int len)
1263 {
1264 while (*s && len > 0) {
1265 if (*s == c)
1266 return s;
1267 s++;
1268 len--;
1269 }
1270 return NULL;
1271 }
1272
1273 static int
1274 __collate_wcsnlen(const wchar_t *s, int len)
1275 {
1276 int n = 0;
1277 while (*s && n < len) {
1278 s++;
1279 n++;
1280 }
1281 return n;
1282 }
1283
1284 static void
1285 usage(void)
1286 {
1287 fprintf(stderr, "usage: colldef [-o out_file] [-I map_dir] [filename]\n");
1288 exit(EX_USAGE);
1289 }
1290
1291 void
1292 yyerror(const char *fmt, ...)
1293 {
1294 va_list ap;
1295 char msg[128];
1296
1297 va_start(ap, fmt);
1298 vsnprintf(msg, sizeof(msg), fmt, ap);
1299 va_end(ap);
1300 errx(EX_UNAVAILABLE, "%s, near line %d", msg, line_no);
1301 }
1302
1303 char *
1304 showwcs(const wchar_t *t, int len)
1305 {
1306 static char buf[8* CHARMAP_SYMBOL_LEN];
1307 char *cp = buf;
1308
1309 for(; *t && len > 0; len--, t++) {
1310 if (*t >=32 && *t <= 126)
1311 *cp++ = *t;
1312 else {
1313 sprintf(cp, "\\x{%02x}", *t);
1314 cp += strlen(cp);
1315 }
1316 }
1317 *cp = 0;
1318 return buf;
1319 }
1320
1321 static char *
1322 charname(wchar_t wc)
1323 {
1324 static char buf[CHARMAP_SYMBOL_LEN + 1];
1325 struct symbol *s = hassymbolbychar(wc);
1326
1327 if (s)
1328 strcpy(buf, showwcs(s->name, CHARMAP_SYMBOL_LEN));
1329 else
1330 sprintf(buf, "Char 0x%02x", wc);
1331 return buf;
1332 }
1333
1334 static char *
1335 charname2(wchar_t wc)
1336 {
1337 static char buf[CHARMAP_SYMBOL_LEN + 1];
1338 struct symbol *s = hassymbolbychar(wc);
1339
1340 if (s)
1341 strcpy(buf, showwcs(s->name, CHARMAP_SYMBOL_LEN));
1342 else
1343 sprintf(buf, "Char 0x%02x", wc);
1344 return buf;
1345 }
1346
1347 #ifdef COLLATE_DEBUG
1348 static char *
1349 show(int c)
1350 {
1351 static char buf[5];
1352
1353 if (c >=32 && c <= 126)
1354 sprintf(buf, "'%c' ", c);
1355 else
1356 sprintf(buf, "\\x{%02x}", c);
1357 return buf;
1358 }
1359
1360 static void
1361 collate_print_tables(void)
1362 {
1363 int i, z;
1364
1365 printf("Info: p=%d s=%d f=0x%02x m=%d dc=%d up=%d us=%d pc=%d sc=%d cc=%d lc=%d\n",
1366 info.directive[0], info.directive[1],
1367 info.flags, info.chain_max_len,
1368 info.directive_count,
1369 info.undef_pri[0], info.undef_pri[1],
1370 info.subst_count[0], info.subst_count[1],
1371 info.chain_count, info.large_pri_count);
1372 for(z = 0; z < info.directive_count; z++) {
1373 if (info.subst_count[z] > 0) {
1374 struct __collate_st_subst *p2 = __collate_substitute_table[z];
1375 if (z == 0 && (info.flags & COLLATE_SUBST_DUP))
1376 printf("Both substitute tables:\n");
1377 else
1378 printf("Substitute table %d:\n", z);
1379 for (i = info.subst_count[z]; i-- > 0; p2++)
1380 printf("\t%s --> \"%s\"\n",
1381 show(p2->val),
1382 showwcs(p2->str, STR_LEN));
1383 }
1384 }
1385 if (info.chain_count > 0) {
1386 printf("Chain priority table:\n");
1387 struct __collate_st_chain_pri *p2 = __collate_chain_pri_table;
1388 for (i = info.chain_count; i-- > 0; p2++) {
1389 printf("\t\"%s\" :", showwcs(p2->str, STR_LEN));
1390 for(z = 0; z < info.directive_count; z++)
1391 printf(" %d", p2->pri[z]);
1392 putchar('\n');
1393 }
1394 }
1395 printf("Char priority table:\n");
1396 {
1397 struct __collate_st_char_pri *p2 = __collate_char_pri_table;
1398 for (i = 0; i < UCHAR_MAX + 1; i++, p2++) {
1399 printf("\t%s :", show(i));
1400 for(z = 0; z < info.directive_count; z++)
1401 printf(" %d", p2->pri[z]);
1402 putchar('\n');
1403 }
1404 }
1405 if (info.large_pri_count > 0) {
1406 struct __collate_st_large_char_pri *p2 = __collate_large_char_pri_table;
1407 printf("Large priority table:\n");
1408 for (i = info.large_pri_count; i-- > 0; p2++) {
1409 printf("\t%s :", show(p2->val));
1410 for(z = 0; z < info.directive_count; z++)
1411 printf(" %d", p2->pri.pri[z]);
1412 putchar('\n');
1413 }
1414 }
1415 }
1416 #endif