4 * The Regents of the University of California. All rights reserved.
6 * This code is derived from software contributed to Berkeley by
7 * Paul Borman at Krystal Technologies.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the University of
20 * California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40 static char sccsid[] = "@(#)yacc.y 8.1 (Berkeley) 6/6/93";
44 #include <sys/cdefs.h>
45 __FBSDID("$FreeBSD: src/usr.bin/mklocale/yacc.y,v 1.28 2008/01/22 00:04:50 ache Exp $");
47 #include <arpa/inet.h>
61 #define MAX_CHARCLASS 4
62 #define CHARCLASSBIT 4
64 static void *xmalloc(unsigned int sz);
65 static uint32_t *xlalloc(unsigned int sz);
66 void yyerror(const char *s);
67 static uint32_t *xrelalloc(uint32_t *old, unsigned int sz);
68 static void dump_tables(void);
69 static void cleanout(void);
71 const char *locale_file = "<stdout>";
73 rune_map maplower = { { 0 }, NULL };
74 rune_map mapupper = { { 0 }, NULL };
75 rune_map types = { { 0 }, NULL };
77 _FileRuneLocale new_locale = { "", "", 0, 0, 0, {}, {}, {}, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
78 char *variable = NULL;
80 rune_charclass charclasses[MAX_CHARCLASS];
81 int charclass_index = 0;
83 void set_map(rune_map *, rune_list *, uint32_t);
84 void set_digitmap(rune_map *, rune_list *);
85 void add_map(rune_map *, rune_list *, uint32_t);
86 static void usage(void);
106 %token <str> VARIABLE
126 entry : ENCODING STRING
127 { if (strcmp($2, "NONE") &&
128 strcmp($2, "ASCII") &&
129 strcmp($2, "UTF-8") &&
132 strcmp($2, "GB18030") &&
133 strcmp($2, "GB2312") &&
134 strcmp($2, "BIG5") &&
135 strcmp($2, "MSKanji") &&
137 warnx("ENCODING %s is not supported by libc", $2);
138 strncpy(new_locale.encoding, $2,
139 sizeof(new_locale.encoding)); }
141 { new_locale.variable_len = strlen($1) + 1;
142 variable = xmalloc(new_locale.variable_len);
143 strcpy(variable, $1);
146 { warnx("the INVALID keyword is deprecated"); }
148 { set_map(&types, $2, $1); }
150 { set_map(&maplower, $2, 0); }
152 { set_map(&mapupper, $2, 0); }
155 if (($2->map >= 0) && ($2->map <= 255)) { /* Data corruption otherwise */
156 set_digitmap(&types, $2);
159 | CHARCLASS STRING list
162 if (strlen($2) > CHARCLASS_NAME_MAX)
163 errx(1, "Exceeded maximum charclass name size (%d) \"%s\"", CHARCLASS_NAME_MAX, $2);
164 for(i = 0; i < charclass_index; i++)
165 if (strncmp(charclasses[i].name, $2, CHARCLASS_NAME_MAX) == 0)
167 if (i >= charclass_index) {
168 if (charclass_index >= MAX_CHARCLASS)
169 errx(1, "Exceeded maximum number of charclasses (%d)", MAX_CHARCLASS);
170 strncpy(charclasses[charclass_index].name, $2, CHARCLASS_NAME_MAX);
171 charclasses[charclass_index].mask = (1 << (charclass_index + CHARCLASSBIT));
174 set_map(&types, $3, charclasses[i].mask);
180 $$ = (rune_list *)xmalloc(sizeof(rune_list));
187 $$ = (rune_list *)xmalloc(sizeof(rune_list));
194 $$ = (rune_list *)xmalloc(sizeof(rune_list));
199 | list RUNE THRU RUNE
201 $$ = (rune_list *)xmalloc(sizeof(rune_list));
208 map : LBRK RUNE RUNE RBRK
210 $$ = (rune_list *)xmalloc(sizeof(rune_list));
216 | map LBRK RUNE RUNE RBRK
218 $$ = (rune_list *)xmalloc(sizeof(rune_list));
224 | LBRK RUNE THRU RUNE ':' RUNE RBRK
226 $$ = (rune_list *)xmalloc(sizeof(rune_list));
232 | map LBRK RUNE THRU RUNE ':' RUNE RBRK
234 $$ = (rune_list *)xmalloc(sizeof(rune_list));
254 main(int ac, char *av[])
260 while ((x = getopt(ac, av, "do:")) != -1) {
266 locale_file = optarg;
267 if ((fp = fopen(locale_file, "w")) == NULL)
268 err(1, "%s: fopen", locale_file);
276 switch (ac - optind) {
280 if (freopen(av[optind], "r", stdin) == 0)
281 err(1, "%s: freopen", av[optind]);
286 for (x = 0; x < _CACHED_RUNES; ++x) {
290 memcpy(new_locale.magic, _RUNE_MAGIC_A, sizeof(new_locale.magic));
300 fprintf(stderr, "usage: mklocale [-d] [-o output] [source]\n");
308 fprintf(stderr, "%s\n", s);
315 void *r = malloc(sz);
325 uint32_t *r = (uint32_t *)malloc(sz * sizeof(uint32_t));
336 uint32_t *r = (uint32_t *)realloc((char *)old,
337 sz * sizeof(uint32_t));
339 errx(1, "xrelalloc");
344 set_map(map, list, flag)
350 rune_list *nlist = list->next;
351 add_map(map, list, flag);
357 set_digitmap(map, list)
364 rune_list *nlist = list->next;
365 for (i = list->min; i <= list->max; ++i) {
366 if (list->map + (i - list->min)) {
367 rune_list *tmp = (rune_list *)xmalloc(sizeof(rune_list));
370 add_map(map, tmp, list->map + (i - list->min));
379 add_map(map, list, flag)
389 while (list->min < _CACHED_RUNES && list->min <= list->max) {
391 map->map[list->min++] |= flag;
393 map->map[list->min++] = list->map++;
396 if (list->min > list->max) {
401 run = list->max - list->min + 1;
403 if (!(r = map->root) || (list->max < r->min - 1)
404 || (!flag && list->max == r->min - 1)) {
406 list->types = xlalloc(run);
407 for (i = 0; i < run; ++i)
408 list->types[i] = flag;
410 list->next = map->root;
415 for (r = map->root; r && r->max + 1 < list->min; r = r->next)
420 * We are off the end.
423 list->types = xlalloc(run);
424 for (i = 0; i < run; ++i)
425 list->types[i] = flag;
432 if (list->max < r->min - 1) {
434 * We come before this range and we do not intersect it.
435 * We are not before the root node, it was checked before the loop
438 list->types = xlalloc(run);
439 for (i = 0; i < run; ++i)
440 list->types[i] = flag;
442 list->next = lr->next;
448 * At this point we have found that we at least intersect with
449 * the range pointed to by `r', we might intersect with one or
450 * more ranges beyond `r' as well.
453 if (!flag && list->map - list->min != r->map - r->min) {
455 * There are only two cases when we are doing case maps and
456 * our maps needn't have the same offset. When we are adjoining
457 * but not intersecting.
459 if (list->max + 1 == r->min) {
464 if (list->min - 1 == r->max) {
465 list->next = r->next;
469 errx(1, "error: conflicting map entries");
472 if (list->min >= r->min && list->max <= r->max) {
478 for (i = list->min; i <= list->max; ++i)
479 r->types[i - r->min] |= flag;
484 if (list->min <= r->min && list->max >= r->max) {
486 * Superset case. Make him big enough to hold us.
487 * We might need to merge with the guy after him.
490 list->types = xlalloc(list->max - list->min + 1);
492 for (i = list->min; i <= list->max; ++i)
493 list->types[i - list->min] = flag;
495 for (i = r->min; i <= r->max; ++i)
496 list->types[i - list->min] |= r->types[i - r->min];
499 r->types = list->types;
506 } else if (list->min < r->min) {
508 * Our tail intersects his head.
511 list->types = xlalloc(r->max - list->min + 1);
513 for (i = r->min; i <= r->max; ++i)
514 list->types[i - list->min] = r->types[i - r->min];
516 for (i = list->min; i < r->min; ++i)
517 list->types[i - list->min] = flag;
519 for (i = r->min; i <= list->max; ++i)
520 list->types[i - list->min] |= flag;
523 r->types = list->types;
532 * Our head intersects his tail.
533 * We might need to merge with the guy after him.
536 r->types = xrelalloc(r->types, list->max - r->min + 1);
538 for (i = list->min; i <= r->max; ++i)
539 r->types[i - r->min] |= flag;
541 for (i = r->max+1; i <= list->max; ++i)
542 r->types[i - r->min] = flag;
549 * Okay, check to see if we grew into the next guy(s)
551 while ((lr = r->next) && r->max >= lr->min) {
553 if (r->max >= lr->max) {
555 * Good, we consumed all of him.
557 for (i = lr->min; i <= lr->max; ++i)
558 r->types[i - r->min] |= lr->types[i - lr->min];
561 * "append" him on to the end of us.
563 r->types = xrelalloc(r->types, lr->max - r->min + 1);
565 for (i = lr->min; i <= r->max; ++i)
566 r->types[i - r->min] |= lr->types[i - lr->min];
568 for (i = r->max+1; i <= lr->max; ++i)
569 r->types[i - r->min] = lr->types[i - lr->min];
574 if (lr->max > r->max)
589 int x, first_d, curr_d;
593 * See if we can compress some of the istype arrays
595 for(list = types.root; list; list = list->next) {
596 list->map = list->types[0];
597 for (x = 1; x < list->max - list->min + 1; ++x) {
598 if ((int32_t)list->types[x] != list->map) {
605 first_d = curr_d = -1;
606 for (x = 0; x < _CACHED_RUNES; ++x) {
607 uint32_t r = types.map[x];
611 first_d = curr_d = x;
612 else if (x != curr_d + 1)
613 errx(1, "error: DIGIT range is not contiguous");
614 else if (x - first_d > 9)
615 errx(1, "error: DIGIT range is too big");
620 "error: DIGIT range is not a subset of XDIGIT range");
624 errx(1, "error: no DIGIT range defined in the single byte area");
625 else if (curr_d - first_d < 9)
626 errx(1, "error: DIGIT range is too small in the single byte area");
628 new_locale.ncharclasses = htonl(charclass_index);
631 * Fill in our tables. Do this in network order so that
632 * diverse machines have a chance of sharing data.
633 * (Machines like Crays cannot share with little machines due to
634 * word size. Sigh. We tried.)
636 for (x = 0; x < _CACHED_RUNES; ++x) {
637 new_locale.runetype[x] = htonl(types.map[x]);
638 new_locale.maplower[x] = htonl(maplower.map[x]);
639 new_locale.mapupper[x] = htonl(mapupper.map[x]);
643 * Count up how many ranges we will need for each of the extents.
648 new_locale.runetype_ext_nranges++;
651 new_locale.runetype_ext_nranges =
652 htonl(new_locale.runetype_ext_nranges);
654 list = maplower.root;
657 new_locale.maplower_ext_nranges++;
660 new_locale.maplower_ext_nranges =
661 htonl(new_locale.maplower_ext_nranges);
663 list = mapupper.root;
666 new_locale.mapupper_ext_nranges++;
669 new_locale.mapupper_ext_nranges =
670 htonl(new_locale.mapupper_ext_nranges);
672 new_locale.variable_len = htonl(new_locale.variable_len);
675 * Okay, we are now ready to write the new locale file.
679 * PART 1: The _FileRuneLocale structure
681 if (fwrite((char *)&new_locale, sizeof(new_locale), 1, fp) != 1) {
682 err(1, "%s: _FileRuneLocale structure", locale_file);
685 * PART 2: The runetype_ext structures (not the actual tables)
692 re.min = htonl(list->min);
693 re.max = htonl(list->max);
694 re.map = htonl(list->map);
699 if (fwrite((char *)&re, sizeof(re), 1, fp) != 1) {
700 err(1, "%s: runetype_ext structures", locale_file);
706 * PART 3: The maplower_ext structures
708 list = maplower.root;
713 re.min = htonl(list->min);
714 re.max = htonl(list->max);
715 re.map = htonl(list->map);
720 if (fwrite((char *)&re, sizeof(re), 1, fp) != 1) {
721 err(1, "%s: maplower_ext structures", locale_file);
727 * PART 4: The mapupper_ext structures
729 list = mapupper.root;
734 re.min = htonl(list->min);
735 re.max = htonl(list->max);
736 re.map = htonl(list->map);
741 if (fwrite((char *)&re, sizeof(re), 1, fp) != 1) {
742 err(1, "%s: mapupper_ext structures", locale_file);
748 * PART 5: The runetype_ext tables
753 for (x = 0; x < list->max - list->min + 1; ++x)
754 list->types[x] = htonl(list->types[x]);
757 if (fwrite((char *)list->types,
758 (list->max - list->min + 1) * sizeof(uint32_t),
760 err(1, "%s: runetype_ext tables", locale_file);
766 * PART 6: The charclass names table
768 for (x = 0; x < charclass_index; ++x) {
769 charclasses[x].mask = ntohl(charclasses[x].mask);
770 if (fwrite((char *)&charclasses[x], sizeof(rune_charclass), 1, fp) != 1) {
771 err(1, "%s: charclass names tables", locale_file);
775 * PART 7: And finally the variable data
776 * SUSv3 says fwrite returns zero when either size or nitems is zero.
778 if (ntohl(new_locale.variable_len) > 0 && fwrite(variable,
779 ntohl(new_locale.variable_len), 1, fp) != 1) {
780 err(1, "%s: variable data", locale_file);
782 if (fclose(fp) != 0) {
783 err(1, "%s: fclose", locale_file);
790 if (new_locale.encoding[0])
791 fprintf(stderr, "ENCODING %s\n", new_locale.encoding);
793 fprintf(stderr, "VARIABLE %s\n", variable);
795 fprintf(stderr, "\nMAPLOWER:\n\n");
797 for (x = 0; x < _CACHED_RUNES; ++x) {
798 if (isprint(maplower.map[x]))
799 fprintf(stderr, " '%c'", (int)maplower.map[x]);
800 else if (maplower.map[x])
801 fprintf(stderr, "%04x", maplower.map[x]);
803 fprintf(stderr, "%4x", 0);
804 if ((x & 0xf) == 0xf)
805 fprintf(stderr, "\n");
807 fprintf(stderr, " ");
809 fprintf(stderr, "\n");
811 for (list = maplower.root; list; list = list->next)
812 fprintf(stderr, "\t%04x - %04x : %04x\n", list->min, list->max, list->map);
814 fprintf(stderr, "\nMAPUPPER:\n\n");
816 for (x = 0; x < _CACHED_RUNES; ++x) {
817 if (isprint(mapupper.map[x]))
818 fprintf(stderr, " '%c'", (int)mapupper.map[x]);
819 else if (mapupper.map[x])
820 fprintf(stderr, "%04x", mapupper.map[x]);
822 fprintf(stderr, "%4x", 0);
823 if ((x & 0xf) == 0xf)
824 fprintf(stderr, "\n");
826 fprintf(stderr, " ");
828 fprintf(stderr, "\n");
830 for (list = mapupper.root; list; list = list->next)
831 fprintf(stderr, "\t%04x - %04x : %04x\n", list->min, list->max, list->map);
834 fprintf(stderr, "\nTYPES:\n\n");
836 for (x = 0; x < _CACHED_RUNES; ++x) {
837 uint32_t r = types.map[x];
841 fprintf(stderr, " '%c': %2d", x, (int)(r & 0xff));
843 fprintf(stderr, "%04x: %2d", x, (int)(r & 0xff));
845 fprintf(stderr, " %4s", (r & _CTYPE_A) ? "alph" : "");
846 fprintf(stderr, " %4s", (r & _CTYPE_C) ? "ctrl" : "");
847 fprintf(stderr, " %4s", (r & _CTYPE_D) ? "dig" : "");
848 fprintf(stderr, " %4s", (r & _CTYPE_G) ? "graf" : "");
849 fprintf(stderr, " %4s", (r & _CTYPE_L) ? "low" : "");
850 fprintf(stderr, " %4s", (r & _CTYPE_P) ? "punc" : "");
851 fprintf(stderr, " %4s", (r & _CTYPE_S) ? "spac" : "");
852 fprintf(stderr, " %4s", (r & _CTYPE_U) ? "upp" : "");
853 fprintf(stderr, " %4s", (r & _CTYPE_X) ? "xdig" : "");
854 fprintf(stderr, " %4s", (r & _CTYPE_B) ? "blnk" : "");
855 fprintf(stderr, " %4s", (r & _CTYPE_R) ? "prnt" : "");
856 fprintf(stderr, " %4s", (r & _CTYPE_I) ? "ideo" : "");
857 fprintf(stderr, " %4s", (r & _CTYPE_T) ? "spec" : "");
858 fprintf(stderr, " %4s", (r & _CTYPE_Q) ? "phon" : "");
859 fprintf(stderr, "\n");
863 for (list = types.root; list; list = list->next) {
864 if (list->map && list->min + 3 < list->max) {
865 uint32_t r = list->map;
867 fprintf(stderr, "%04x: %2d",
868 (uint32_t)list->min, (int)(r & 0xff));
870 fprintf(stderr, " %4s", (r & _CTYPE_A) ? "alph" : "");
871 fprintf(stderr, " %4s", (r & _CTYPE_C) ? "ctrl" : "");
872 fprintf(stderr, " %4s", (r & _CTYPE_D) ? "dig" : "");
873 fprintf(stderr, " %4s", (r & _CTYPE_G) ? "graf" : "");
874 fprintf(stderr, " %4s", (r & _CTYPE_L) ? "low" : "");
875 fprintf(stderr, " %4s", (r & _CTYPE_P) ? "punc" : "");
876 fprintf(stderr, " %4s", (r & _CTYPE_S) ? "spac" : "");
877 fprintf(stderr, " %4s", (r & _CTYPE_U) ? "upp" : "");
878 fprintf(stderr, " %4s", (r & _CTYPE_X) ? "xdig" : "");
879 fprintf(stderr, " %4s", (r & _CTYPE_B) ? "blnk" : "");
880 fprintf(stderr, " %4s", (r & _CTYPE_R) ? "prnt" : "");
881 fprintf(stderr, " %4s", (r & _CTYPE_I) ? "ideo" : "");
882 fprintf(stderr, " %4s", (r & _CTYPE_T) ? "spec" : "");
883 fprintf(stderr, " %4s", (r & _CTYPE_Q) ? "phon" : "");
884 fprintf(stderr, "\n...\n");
886 fprintf(stderr, "%04x: %2d",
887 (uint32_t)list->max, (int)(r & 0xff));
889 fprintf(stderr, " %4s", (r & _CTYPE_A) ? "alph" : "");
890 fprintf(stderr, " %4s", (r & _CTYPE_C) ? "ctrl" : "");
891 fprintf(stderr, " %4s", (r & _CTYPE_D) ? "dig" : "");
892 fprintf(stderr, " %4s", (r & _CTYPE_G) ? "graf" : "");
893 fprintf(stderr, " %4s", (r & _CTYPE_L) ? "low" : "");
894 fprintf(stderr, " %4s", (r & _CTYPE_P) ? "punc" : "");
895 fprintf(stderr, " %4s", (r & _CTYPE_S) ? "spac" : "");
896 fprintf(stderr, " %4s", (r & _CTYPE_U) ? "upp" : "");
897 fprintf(stderr, " %4s", (r & _CTYPE_X) ? "xdig" : "");
898 fprintf(stderr, " %4s", (r & _CTYPE_B) ? "blnk" : "");
899 fprintf(stderr, " %4s", (r & _CTYPE_R) ? "prnt" : "");
900 fprintf(stderr, " %4s", (r & _CTYPE_I) ? "ideo" : "");
901 fprintf(stderr, " %4s", (r & _CTYPE_T) ? "spec" : "");
902 fprintf(stderr, " %4s", (r & _CTYPE_Q) ? "phon" : "");
903 fprintf(stderr, "\n");
905 for (x = list->min; x <= list->max; ++x) {
906 uint32_t r = ntohl(list->types[x - list->min]);
909 fprintf(stderr, "%04x: %2d", x, (int)(r & 0xff));
911 fprintf(stderr, " %4s", (r & _CTYPE_A) ? "alph" : "");
912 fprintf(stderr, " %4s", (r & _CTYPE_C) ? "ctrl" : "");
913 fprintf(stderr, " %4s", (r & _CTYPE_D) ? "dig" : "");
914 fprintf(stderr, " %4s", (r & _CTYPE_G) ? "graf" : "");
915 fprintf(stderr, " %4s", (r & _CTYPE_L) ? "low" : "");
916 fprintf(stderr, " %4s", (r & _CTYPE_P) ? "punc" : "");
917 fprintf(stderr, " %4s", (r & _CTYPE_S) ? "spac" : "");
918 fprintf(stderr, " %4s", (r & _CTYPE_U) ? "upp" : "");
919 fprintf(stderr, " %4s", (r & _CTYPE_X) ? "xdig" : "");
920 fprintf(stderr, " %4s", (r & _CTYPE_B) ? "blnk" : "");
921 fprintf(stderr, " %4s", (r & _CTYPE_R) ? "prnt" : "");
922 fprintf(stderr, " %4s", (r & _CTYPE_I) ? "ideo" : "");
923 fprintf(stderr, " %4s", (r & _CTYPE_T) ? "spec" : "");
924 fprintf(stderr, " %4s", (r & _CTYPE_Q) ? "phon" : "");
925 fprintf(stderr, "\n");