From 5fd83771641d15c418f747bd343ba6738d3875f7 Mon Sep 17 00:00:00 2001 From: Cameron Katri Date: Sun, 9 May 2021 14:20:58 -0400 Subject: Import macOS userland adv_cmds-176 basic_cmds-55 bootstrap_cmds-116.100.1 developer_cmds-66 diskdev_cmds-667.40.1 doc_cmds-53.60.1 file_cmds-321.40.3 mail_cmds-35 misc_cmds-34 network_cmds-606.40.1 patch_cmds-17 remote_cmds-63 shell_cmds-216.60.1 system_cmds-880.60.2 text_cmds-106 --- adv_cmds/colldef/colldef.1 | 274 +++++++ adv_cmds/colldef/common.h | 36 + adv_cmds/colldef/locale/collate.h | 121 ++++ adv_cmds/colldef/parse.y | 1416 +++++++++++++++++++++++++++++++++++++ adv_cmds/colldef/scan.l | 398 +++++++++++ 5 files changed, 2245 insertions(+) create mode 100644 adv_cmds/colldef/colldef.1 create mode 100644 adv_cmds/colldef/common.h create mode 100644 adv_cmds/colldef/locale/collate.h create mode 100644 adv_cmds/colldef/parse.y create mode 100644 adv_cmds/colldef/scan.l (limited to 'adv_cmds/colldef') diff --git a/adv_cmds/colldef/colldef.1 b/adv_cmds/colldef/colldef.1 new file mode 100644 index 0000000..f4f875b --- /dev/null +++ b/adv_cmds/colldef/colldef.1 @@ -0,0 +1,274 @@ +.\" Copyright (c) 1995 Alex Tatmanjants +.\" at Electronni Visti IA, Kiev, Ukraine. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD: src/usr.bin/colldef/colldef.1,v 1.21 2004/05/19 09:45:46 ru Exp $ +.\" +.Dd January 27, 1995 +.Dt COLLDEF 1 +.Os +.Sh NAME +.Nm colldef +.Nd convert collation sequence source definition +.Sh SYNOPSIS +.Nm +.Op Fl I Ar map_dir +.Op Fl o Ar out_file +.Op Ar filename +.Sh DESCRIPTION +The +.Nm +utility converts a collation sequence source definition +into a format usable by the +.Fn strxfrm +and +.Fn strcoll +functions. +It is used to define the many ways in which +strings can be ordered and collated. +The +.Fn strxfrm +function transforms +its first argument and places the result in its second +argument. +The transformed string is such that it can be +correctly ordered with other transformed strings by using +.Fn strcmp , +.Fn strncmp , +or +.Fn memcmp . +The +.Fn strcoll +function transforms its arguments and does a +comparison. +.Pp +The +.Nm +utility reads the collation sequence source definition +from the standard input and stores the converted definition in filename. +The output file produced contains the +database with collating sequence information in a form +usable by system commands and routines. +.Pp +The following options are available: +.Bl -tag -width indent +.It Fl I Ar map_dir +Set directory name where +.Ar charmap +files can be found, current directory by default. +.It Fl o Ar out_file +Set output file name, +.Ar LC_COLLATE +by default. +.El +.Pp +The collation sequence definition specifies a set of collating elements and +the rules defining how strings containing these should be ordered. +This is most useful for different language definitions. +.Pp +The specification file can consist of three statements: +.Ar charmap , +.Ar substitute +and +.Ar order . +.Pp +Of these, only the +.Ar order +statement is required. +When +.Ar charmap +or +.Ar substitute +is +supplied, these statements must be ordered as above. +Any +statements after the order statement are ignored. +.Pp +Lines in the specification file beginning with a +.Ql # +are +treated as comments and are ignored. +Blank lines are also +ignored. +.Pp +.Dl "charmap charmapfile" +.Pp +.Ar Charmap +defines where a mapping of the character +and collating element symbols to the actual +character encoding can be found. +.Pp +The format of +.Ar charmapfile +is shown below. +Symbol +names are separated from their values by TAB or +SPACE characters. +Symbol-value can be specified in +a hexadecimal (\ex\fI??\fR) or octal (\e\fI???\fR) +representation, and can be only one character in length. +.Pp +.Bd -literal -offset indent +symbol-name1 symbol-value1 +symbol-name2 symbol-value2 +\&... +.Ed +.Pp +Symbol names cannot be specified in +.Ar substitute +fields. +.Pp +The +.Ar charmap +statement is optional. +.Pp +.Bd -literal -offset indent +substitute "symbol" with "repl_string" +.Ed +.Pp +The +.Ar substitute +statement substitutes the character +.Ar symbol +with the string +.Ar repl_string . +Symbol names cannot be specified in +.Ar repl_string +field. +The +.Ar substitute +statement is optional. +.Pp +.Dl "order order_list" +.Pp +.Ar Order_list +is a list of symbols, separated by semi colons, that defines the +collating sequence. +The +special symbol +.Ar ... +specifies, in a short-hand +form, symbols that are sequential in machine code +order. +.Pp +An order list element +can be represented in any one of the following +ways: +.Bl -bullet +.It +The symbol itself (for example, +.Ar a +for the lower-case letter +.Ar a ) . +.It +The symbol in octal representation (for example, +.Ar \e141 +for the letter +.Ar a ) . +.It +The symbol in hexadecimal representation (for example, +.Ar \ex61 +for the letter +.Ar a ) . +.It +The symbol name as defined in the +.Ar charmap +file (for example, +.Ar +for +.Ar letterA \e023 +record in +.Ar charmapfile ) . +If character map name have +.Ar > +character, it must be escaped as +.Ar /> , +single +.Ar / +must be escaped as +.Ar // . +.It +Symbols +.Ar \ea , +.Ar \eb , +.Ar \ef , +.Ar \en , +.Ar \er , +.Ar \ev +are permitted in its usual C-language meaning. +.It +The symbol chain (for example: +.Ar abc , +.Ar c , +.Ar \exf1b\exf2 ) +.It +The symbol range (for example, +.Ar a;...;z ) . +.It +Comma-separated symbols, ranges and chains enclosed in parenthesis (for example +.Ar \&( +.Ar sym1 , +.Ar sym2 , +.Ar ... +.Ar \&) ) +are assigned the +same primary ordering but different secondary +ordering. +.It +Comma-separated symbols, ranges and chains enclosed in curly brackets (for example +.Ar \&{ +.Ar sym1 , +.Ar sym2 , +.Ar ... +.Ar \&} ) +are assigned the same primary ordering only. +.El +.Pp +The backslash character +.Ar \e +is used for continuation. +In this case, no characters are permitted +after the backslash character. +.Sh DIAGNOSTICS +The +.Nm +utility exits with the following values: +.Bl -tag -width indent +.It Li 0 +No errors were found and the output was successfully created. +.It Li !=0 +Errors were found. +.El +.Sh FILES +.Bl -tag -width indent +.It Pa /usr/share/locale/ Ns Ao Ar language Ac Ns Pa /LC_COLLATE +The standard shared location for collation orders +under the locale +.Aq Ar language . +.El +.Sh SEE ALSO +.Xr mklocale 1 , +.Xr setlocale 3 , +.Xr strcoll 3 , +.Xr strxfrm 3 diff --git a/adv_cmds/colldef/common.h b/adv_cmds/colldef/common.h new file mode 100644 index 0000000..b59c125 --- /dev/null +++ b/adv_cmds/colldef/common.h @@ -0,0 +1,36 @@ +/* + * $FreeBSD: src/usr.bin/colldef/common.h,v 1.2 2001/11/28 09:50:24 ache Exp $ + */ + +#include +#include +#include + +#define CHARMAP_SYMBOL_LEN 64 +#define BUFSIZE 80 + +#define NOTEXISTS 0 +#define EXISTS 1 + +#define SYMBOL_CHAR 0 +#define SYMBOL_CHAIN 1 +#define SYMBOL_SYMBOL 2 +#define SYMBOL_STRING 3 +#define SYMBOL_IGNORE 4 +#define SYMBOL_ELLIPSIS 5 +struct symbol { + int type; + int val; + wchar_t name[CHARMAP_SYMBOL_LEN]; + union { + wchar_t wc; + wchar_t str[STR_LEN]; + } u; +}; + +extern int line_no; + +struct symbol *getsymbol(const wchar_t *, int); +extern char *showwcs(const wchar_t *, int); + +extern char map_name[FILENAME_MAX]; diff --git a/adv_cmds/colldef/locale/collate.h b/adv_cmds/colldef/locale/collate.h new file mode 100644 index 0000000..494e231 --- /dev/null +++ b/adv_cmds/colldef/locale/collate.h @@ -0,0 +1,121 @@ +/*- + * Copyright (c) 1995 Alex Tatmanjants + * at Electronni Visti IA, Kiev, Ukraine. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/lib/libc/locale/collate.h,v 1.14 2002/08/30 20:26:02 ache Exp $ + */ + +#ifndef _COLLATE_H_ +#define _COLLATE_H_ + +#include +#ifndef __LIBC__ +#include +#endif /* !__LIBC__ */ +#include + +#define STR_LEN 10 +#define TABLE_SIZE 100 +#define COLLATE_VERSION "1.0\n" +#define COLLATE_VERSION1_1 "1.1\n" +#define COLLATE_VERSION1_1A "1.1A\n" +/* see discussion in string/FreeBSD/strxfrm for this value */ +#define COLLATE_MAX_PRIORITY ((1 << 24) - 1) + +#define DIRECTIVE_UNDEF 0x00 +#define DIRECTIVE_FORWARD 0x01 +#define DIRECTIVE_BACKWARD 0x02 +#define DIRECTIVE_POSITION 0x04 + +#define DIRECTIVE_DIRECTION_MASK (DIRECTIVE_FORWARD | DIRECTIVE_BACKWARD) + +#define COLLATE_SUBST_DUP 0x0001 + +#define IGNORE_EQUIV_CLASS 1 + +struct __collate_st_info { + __uint8_t directive[COLL_WEIGHTS_MAX]; + __uint8_t flags; +#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN + __uint8_t directive_count:4; + __uint8_t chain_max_len:4; +#else + __uint8_t chain_max_len:4; + __uint8_t directive_count:4; +#endif + __int32_t undef_pri[COLL_WEIGHTS_MAX]; + __int32_t subst_count[COLL_WEIGHTS_MAX]; + __int32_t chain_count; + __int32_t large_pri_count; +}; + +struct __collate_st_char_pri { + __int32_t pri[COLL_WEIGHTS_MAX]; +}; +struct __collate_st_chain_pri { + __darwin_wchar_t str[STR_LEN]; + __int32_t pri[COLL_WEIGHTS_MAX]; +}; +struct __collate_st_large_char_pri { + __int32_t val; + struct __collate_st_char_pri pri; +}; +struct __collate_st_subst { + __int32_t val; + __darwin_wchar_t str[STR_LEN]; +}; + +#ifndef __LIBC__ +extern int __collate_load_error; +extern int __collate_substitute_nontrivial; +#define __collate_char_pri_table (*__collate_char_pri_table_ptr) +extern struct __collate_st_char_pri __collate_char_pri_table[UCHAR_MAX + 1]; +extern struct __collate_st_chain_pri *__collate_chain_pri_table; +extern __int32_t *__collate_chain_equiv_table; +extern struct __collate_st_info __collate_info; +#endif /* !__LIBC__ */ + +__BEGIN_DECLS +#ifdef __LIBC__ +__darwin_wchar_t *__collate_mbstowcs(const char *, locale_t); +__darwin_wchar_t *__collate_wcsdup(const __darwin_wchar_t *); +__darwin_wchar_t *__collate_substitute(const __darwin_wchar_t *, int, locale_t); +int __collate_load_tables(const char *, locale_t); +void __collate_lookup_l(const __darwin_wchar_t *, int *, int *, int *, locale_t); +void __collate_lookup_which(const __darwin_wchar_t *, int *, int *, int, locale_t); +void __collate_xfrm(const __darwin_wchar_t *, __darwin_wchar_t **, locale_t); +int __collate_range_cmp(__darwin_wchar_t, __darwin_wchar_t, locale_t); +size_t __collate_collating_symbol(__darwin_wchar_t *, size_t, const char *, size_t, __darwin_mbstate_t *, locale_t); +int __collate_equiv_class(const char *, size_t, __darwin_mbstate_t *, locale_t); +size_t __collate_equiv_match(int, __darwin_wchar_t *, size_t, __darwin_wchar_t, const char *, size_t, __darwin_mbstate_t *, size_t *, locale_t); +#else /* !__LIBC__ */ +void __collate_lookup(const unsigned char *, int *, int *, int *); +#endif /* __LIBC__ */ +#ifdef COLLATE_DEBUG +void __collate_print_tables(void); +#endif +__END_DECLS + +#endif /* !_COLLATE_H_ */ diff --git a/adv_cmds/colldef/parse.y b/adv_cmds/colldef/parse.y new file mode 100644 index 0000000..495c2f3 --- /dev/null +++ b/adv_cmds/colldef/parse.y @@ -0,0 +1,1416 @@ +%{ +/*- + * Copyright (c) 1995 Alex Tatmanjants + * at Electronni Visti IA, Kiev, Ukraine. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD: src/usr.bin/colldef/parse.y,v 1.31 2002/10/16 12:56:22 charnier Exp $"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "collate.h" +#include "common.h" + +#define PRI_UNDEFINED (-1) +#define PRI_IGNORE 0 +#define LINE_NONE (-1) +#define LINE_NORMAL 0 +#define LINE_ELLIPSIS 1 +#define LINE_UNDEFINED 2 +/* If UNDEFINED is specified with ellipses, we reposition prim_pri to + * UNDEFINED_PRI, leaving gap for undefined characters. */ +#define UNDEFINED_PRI (COLLATE_MAX_PRIORITY - (COLLATE_MAX_PRIORITY >> 2)) + +extern FILE *yyin; +void yyerror(const char *fmt, ...) __printflike(1, 2); +int yyparse(void); +int yylex(void); +static void usage(void); +static void collate_print_tables(void); +static struct __collate_st_char_pri *getpri(int32_t); +static struct __collate_st_char_pri *haspri(int32_t); +static struct __collate_st_chain_pri *getchain(const wchar_t *, int); +static struct symbol *getsymbolbychar(wchar_t); +static struct symbol *hassymbolbychar(wchar_t); +static void setsymbolbychar(struct symbol *); +struct symbol *getstring(const wchar_t *); +static void makeforwardref(int, const struct symbol *, const struct symbol *); +static int charpricompar(const void *, const void *); +static int substcompar(const void *, const void *); +static int chainpricompar(const void *, const void *); +static void putsubst(int32_t, int, const wchar_t *); +static int hassubst(int32_t, int); +static const wchar_t *__collate_wcsnchr(const wchar_t *, wchar_t, int); +static int __collate_wcsnlen(const wchar_t *, int); +char *showwcs(const wchar_t *, int); +static char *charname(wchar_t); +static char *charname2(wchar_t); + +char map_name[FILENAME_MAX] = "."; +wchar_t curr_chain[STR_LEN + 1]; + +char __collate_version[STR_LEN]; +DB *charmapdb; +static DB *charmapdb2; +static DB *largemapdb; +static int nlargemap = 0; +static DB *substdb[COLL_WEIGHTS_MAX]; +static int nsubst[COLL_WEIGHTS_MAX]; +static DB *chaindb; +static int nchain = 0; +static DB *stringdb; +static DB *forward_ref[COLL_WEIGHTS_MAX]; +static struct symbol *prev_weight_table[COLL_WEIGHTS_MAX]; +static struct symbol *prev2_weight_table[COLL_WEIGHTS_MAX]; +static struct symbol *weight_table[COLL_WEIGHTS_MAX]; +static int prev_line = LINE_NONE; +static struct symbol *prev_elem; +static int weight_index = 0; +static int allow_ellipsis = 0; +static struct symbol sym_ellipsis = {SYMBOL_ELLIPSIS, PRI_UNDEFINED}; +static struct symbol sym_ignore = {SYMBOL_IGNORE, PRI_IGNORE}; +static struct symbol sym_undefined = {SYMBOL_CHAR, PRI_UNDEFINED}; +static int order_pass = 0; + +#undef __collate_char_pri_table +struct __collate_st_char_pri __collate_char_pri_table[UCHAR_MAX + 1]; +struct __collate_st_chain_pri *__collate_chain_pri_table; +struct __collate_st_subst *__collate_substitute_table[COLL_WEIGHTS_MAX]; +struct __collate_st_large_char_pri *__collate_large_char_pri_table; + +int prim_pri = 2, sec_pri = 2; +#ifdef COLLATE_DEBUG +int debug; +#endif +struct __collate_st_info info = {{DIRECTIVE_FORWARD, DIRECTIVE_FORWARD}, 0, 0, 0, {PRI_UNDEFINED, PRI_UNDEFINED}}; + +/* Some of the code expects COLL_WEIGHTS_MAX == 2 */ +int directive_count = COLL_WEIGHTS_MAX; + +const char *out_file = "LC_COLLATE"; +%} +%union { + int32_t ch; + wchar_t str[BUFSIZE]; +} +%token SUBSTITUTE WITH +%token START_LC_COLLATE END_LC_COLLATE COLLATING_ELEMENT FROM COLLATING_SYMBOL +%token ELLIPSIS IGNORE UNDEFINED +%token ORDER RANGE ORDER_START ORDER_END ORDER_SECOND_PASS +%token STRING +%token DEFN +%token ELEM +%token CHAR +%token ORDER_DIRECTIVE +%% +collate : datafile { + FILE *fp; + int localedef = (stringdb != NULL); + int z; + + if (nchain > 0) { + DBT key, val; + struct __collate_st_chain_pri *t, *v; + wchar_t *wp, *tp; + int flags, i, len; + + if ((__collate_chain_pri_table = (struct __collate_st_chain_pri *)malloc(nchain * sizeof(struct __collate_st_chain_pri))) == NULL) + err(1, "chain malloc"); + flags = R_FIRST; + t = __collate_chain_pri_table; + for(i = 0; i < nchain; i++) { + if (chaindb->seq(chaindb, &key, &val, flags) != 0) + err(1, "Can't retrieve chaindb %d", i); + memcpy(&v, val.data, sizeof(struct __collate_st_chain_pri *)); + *t++ = *v; + if ((len = __collate_wcsnlen(v->str, STR_LEN)) > info.chain_max_len) + info.chain_max_len = len; + flags = R_NEXT; + } + if (chaindb->seq(chaindb, &key, &val, flags) == 0) + err(1, "More in chaindb after retrieving %d", nchain); + qsort(__collate_chain_pri_table, nchain, sizeof(struct __collate_st_chain_pri), chainpricompar); + } + for(z = 0; z < directive_count; z++) { + if (nsubst[z] > 0) { + DBT key, val; + struct __collate_st_subst *t; + wchar_t *wp, *tp; + int flags, i, j; + int32_t cval; + + if ((__collate_substitute_table[z] = (struct __collate_st_subst *)calloc(nsubst[z], sizeof(struct __collate_st_subst))) == NULL) + err(1, "__collate_substitute_table[%d] calloc", z); + flags = R_FIRST; + t = __collate_substitute_table[z]; + for(i = 0; i < nsubst[z]; i++) { + if (substdb[z]->seq(substdb[z], &key, &val, flags) != 0) + err(1, "Can't retrieve substdb[%d]", z); + memcpy(&cval, key.data, sizeof(int32_t)); + /* we don't set the byte order of t->val, since we + * need it for sorting */ + t->val = cval; + for(wp = (wchar_t *)val.data, tp = t->str, j = STR_LEN; *wp && j-- > 0;) + *tp++ = htonl(*wp++); + t++; + flags = R_NEXT; + } + if (substdb[z]->seq(substdb[z], &key, &val, flags) == 0) + err(1, "More in substdb[%d] after retrieving %d", z, nsubst[z]); + qsort(__collate_substitute_table[z], nsubst[z], sizeof(struct __collate_st_subst), substcompar); + } + } + if (nlargemap > 0) { + DBT key, val; + struct __collate_st_large_char_pri *t; + struct __collate_st_char_pri *p; + int flags, i, z; + int32_t cval; + + if ((__collate_large_char_pri_table = (struct __collate_st_large_char_pri *)malloc(nlargemap * sizeof(struct __collate_st_large_char_pri))) == NULL) + err(1, "nlargemap malloc"); + flags = R_FIRST; + t = __collate_large_char_pri_table; + for(i = 0; i < nlargemap; i++) { + if (largemapdb->seq(largemapdb, &key, &val, flags) != 0) + err(1, "Can't retrieve largemapdb %d", i); + memcpy(&cval, key.data, sizeof(int32_t)); + memcpy(&p, val.data, sizeof(struct __collate_st_char_pri *)); + /* we don't set the byte order of t->val, since we + * need it for sorting */ + t->val = cval; + for(z = 0; z < directive_count; z++) + t->pri.pri[z] = htonl(p->pri[z]); + t++; + flags = R_NEXT; + } + if (largemapdb->seq(largemapdb, &key, &val, flags) == 0) + err(1, "More in largemapdb after retrieving %d", nlargemap); + qsort(__collate_large_char_pri_table, nlargemap, sizeof(struct __collate_st_large_char_pri), charpricompar); + } + + if (info.undef_pri[0] == PRI_UNDEFINED) { + int i; + info.undef_pri[0] = prim_pri; + for(i = 1; i < directive_count; i++) + info.undef_pri[i] = -prim_pri; + } + + if (localedef) { + int ch, z, ret; + if (sym_undefined.val == PRI_UNDEFINED) { + int flags = R_FIRST; + DBT key, val; + struct symbol *v; + while((ret = charmapdb->seq(charmapdb, &key, &val, flags)) == 0) { + memcpy(&v, val.data, sizeof(struct symbol *)); + switch(v->type) { + case SYMBOL_CHAR: { + struct __collate_st_char_pri *p = haspri(v->u.wc); + if (!p || p->pri[0] == PRI_UNDEFINED) + warnx("<%s> was not defined", showwcs((const wchar_t *)key.data, key.size / sizeof(wchar_t))); + break; + } + case SYMBOL_CHAIN: { + struct __collate_st_chain_pri *p = getchain(v->u.str, EXISTS); + if (p->pri[0] == PRI_UNDEFINED) + warnx("<%s> was not defined", showwcs((const wchar_t *)key.data, key.size / sizeof(wchar_t))); + break; + } + } + flags = R_NEXT; + } + if (ret < 0) + err(1, "Error retrieving from charmapdb"); + } + for (ch = 1; ch < UCHAR_MAX + 1; ch++) { + for(z = 0; z < directive_count; z++) + if (__collate_char_pri_table[ch].pri[z] == PRI_UNDEFINED) + __collate_char_pri_table[ch].pri[z] = (info.undef_pri[z] >= 0) ? info.undef_pri[z] : (ch - info.undef_pri[z]); + } + for (ch = 0; ch < nlargemap; ch++) { + for(z = 0; z < directive_count; z++) + if (__collate_large_char_pri_table[ch].pri.pri[z] == PRI_UNDEFINED) + __collate_large_char_pri_table[ch].pri.pri[z] = (info.undef_pri[z] >= 0) ? info.undef_pri[z] : (__collate_large_char_pri_table[ch].val - info.undef_pri[z]); + } + } else { + int ch, substed, ordered; + int fatal = 0; + for (ch = 1; ch < UCHAR_MAX + 1; ch++) { + substed = hassubst(ch, 0); + ordered = (__collate_char_pri_table[ch].pri[0] != PRI_UNDEFINED); + if (!ordered && !substed) { + fatal = 1; + warnx("%s not found", charname(ch)); + } + if (substed && ordered) { + fatal = 1; + warnx("%s can't be ordered since substituted", charname(ch)); + } + } + if (fatal) + exit(1); + } + + /* COLLATE_SUBST_DUP depends on COLL_WEIGHTS_MAX == 2 */ + if (localedef) { + if (nsubst[0] == nsubst[1] && (nsubst[0] == 0 || + memcmp(__collate_substitute_table[0], __collate_substitute_table[1], nsubst[0] * sizeof(struct __collate_st_subst)) == 0)) { + info.flags |= COLLATE_SUBST_DUP; + nsubst[1] = 0; + } + } else { + info.flags |= COLLATE_SUBST_DUP; + nsubst[1] = 0; + } + + for(z = 0; z < directive_count; z++) + info.subst_count[z] = nsubst[z]; + + info.directive_count = directive_count; + info.chain_count = nchain; + info.large_pri_count = nlargemap; + + if ((fp = fopen(out_file, "w")) == NULL) + err(EX_UNAVAILABLE, "can't open destination file %s", + out_file); + + strcpy(__collate_version, COLLATE_VERSION1_1A); + if (fwrite(__collate_version, sizeof(__collate_version), 1, fp) != 1) + err(EX_IOERR, + "IO error writting collate version to destination file %s", + out_file); +#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN + for(z = 0; z < directive_count; z++) { + info.undef_pri[z] = htonl(info.undef_pri[z]); + info.subst_count[z] = htonl(info.subst_count[z]); + } + info.chain_count = htonl(info.chain_count); + info.large_pri_count = htonl(info.large_pri_count); +#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */ + if (fwrite(&info, sizeof(info), 1, fp) != 1) + err(EX_IOERR, + "IO error writting collate info to destination file %s", + out_file); +#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN + { + int i, z; + struct __collate_st_char_pri *p = __collate_char_pri_table; + + for(i = UCHAR_MAX + 1; i-- > 0; p++) { + for(z = 0; z < directive_count; z++) + p->pri[z] = htonl(p->pri[z]); + } + } +#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */ + if (fwrite(__collate_char_pri_table, + sizeof(__collate_char_pri_table), 1, fp) != 1) + err(EX_IOERR, + "IO error writting char table to destination file %s", + out_file); + for(z = 0; z < directive_count; z++) { + if (nsubst[z] > 0) { +#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN + struct __collate_st_subst *t = __collate_substitute_table[z]; + int i; + for(i = nsubst[z]; i > 0; i--) { + t->val = htonl(t->val); + t++; + } +#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */ + if (fwrite(__collate_substitute_table[z], sizeof(struct __collate_st_subst), nsubst[z], fp) != nsubst[z]) + err(EX_IOERR, + "IO error writting large substprim table %d to destination file %s", + z, out_file); + } + } + if (nchain > 0) { +#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN + int i, j, z; + struct __collate_st_chain_pri *p = __collate_chain_pri_table; + wchar_t *w; + + for(i = nchain; i-- > 0; p++) { + for(j = STR_LEN, w = p->str; *w && j-- > 0; w++) + *w = htonl(*w); + for(z = 0; z < directive_count; z++) + p->pri[z] = htonl(p->pri[z]); + } +#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */ + if (fwrite(__collate_chain_pri_table, + sizeof(*__collate_chain_pri_table), nchain, fp) != + (size_t)nchain) + err(EX_IOERR, + "IO error writting chain table to destination file %s", + out_file); + } + + if (nlargemap > 0) { +#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN + struct __collate_st_large_char_pri *t = __collate_large_char_pri_table; + int i; + for(i = 0; i < nlargemap; i++) { + t->val = htonl(t->val); + t++; + } +#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */ + if (fwrite(__collate_large_char_pri_table, sizeof(struct __collate_st_large_char_pri), nlargemap, fp) != nlargemap) + err(EX_IOERR, + "IO error writting large pri tables to destination file %s", + out_file); + } + + if (fclose(fp) != 0) + err(EX_IOERR, "IO error closing destination file %s", + out_file); + +#ifdef COLLATE_DEBUG + if (debug) + collate_print_tables(); +#endif + exit(EX_OK); +} +; +datafile : statment_list + | blank_lines start_localedef localedef_sections blank_lines end_localedef blank_lines +; +statment_list : statment + | statment_list '\n' statment +; +statment : + | charmap + | substitute + | order +; +blank_lines : + | '\n' + | blank_lines '\n' +; +start_localedef : START_LC_COLLATE '\n' { + int i; + if ((stringdb = dbopen(NULL, O_CREAT | O_RDWR, 0600, DB_HASH, NULL)) == NULL) + err(1, "dbopen stringdb"); + directive_count = 0; + for(i = 0; i < COLL_WEIGHTS_MAX; i++) + info.directive[i] = DIRECTIVE_UNDEF; +} +; +end_localedef : END_LC_COLLATE '\n' +; +localedef_sections : localedef_preface localedef_order +; +localedef_preface : localedef_statment '\n' + | localedef_preface localedef_statment '\n' +; +localedef_statment : + | charmap + | collating_element + | collating_symbol +; +collating_element : COLLATING_ELEMENT ELEM FROM STRING { + int len; + struct symbol *s; + if (wcslen($2) > CHARMAP_SYMBOL_LEN) + yyerror("collating-element symbol name '%s' is too long", showwcs($2, CHARMAP_SYMBOL_LEN)); + if ((len = wcslen($4)) > STR_LEN) + yyerror("collating-element string '%s' is too long", showwcs($4, STR_LEN)); + if (len < 2) + yyerror("collating-element string '%s' must be at least two characters", showwcs($4, STR_LEN)); + s = getsymbol($2, NOTEXISTS); + s->val = PRI_UNDEFINED; + s->type = SYMBOL_CHAIN; + wcsncpy(s->u.str, $4, STR_LEN); + getchain($4, NOTEXISTS); +} +; +collating_symbol : COLLATING_SYMBOL ELEM { + struct symbol *s; + if (wcslen($2) > CHARMAP_SYMBOL_LEN) + yyerror("collating-element symbol name '%s' is too long", showwcs($2, CHARMAP_SYMBOL_LEN)); + s = getsymbol($2, NOTEXISTS); + s->val = PRI_UNDEFINED; + s->type = SYMBOL_SYMBOL; +} +; +localedef_order : order_start order_lines1 order_second_pass order_lines2 order_end +; +order_start: ORDER_START order_start_list '\n' +; +order_second_pass: ORDER_SECOND_PASS { + prev_line = LINE_NONE; + prev_elem = NULL; + order_pass++; +} +; +order_start_list : order_start_list_directives { + if (directive_count > 0) + yyerror("Multiple order_start lines not allowed"); + if ((info.directive[0] & DIRECTIVE_DIRECTION_MASK) == 0) + info.directive[0] |= DIRECTIVE_FORWARD; + directive_count++; +} + | order_start_list ';' order_start_list_directives { + if (directive_count >= COLL_WEIGHTS_MAX) + yyerror("only COLL_WEIGHTS_MAX weights allowed"); + if ((info.directive[directive_count] & DIRECTIVE_DIRECTION_MASK) == 0) + info.directive[directive_count] |= DIRECTIVE_FORWARD; + directive_count++; +} +; +order_start_list_directives : ORDER_DIRECTIVE { + info.directive[directive_count] = $1; +} + | order_start_list_directives ',' ORDER_DIRECTIVE { + int direction = ($3 & DIRECTIVE_DIRECTION_MASK); + int prev = (info.directive[directive_count] & DIRECTIVE_DIRECTION_MASK); + if (direction && prev && direction != prev) + yyerror("The forward and backward directives are mutually exclusive"); + info.directive[directive_count] |= $3; +} +; +order_lines1 : order_line1 '\n' + | order_lines1 order_line1 '\n' +; +order_line1 : + | ELEM { + struct symbol *s = getsymbol($1, EXISTS); + if (s->val != PRI_UNDEFINED) + yyerror("<%s> redefined", showwcs($1, CHARMAP_SYMBOL_LEN)); + if (prev_line == LINE_ELLIPSIS) { + struct symbol *m; + wchar_t i; + int v; + switch (s->type) { + case SYMBOL_CHAIN: + yyerror("Chain <%s> can't be endpoints of ellipsis", showwcs($1, CHARMAP_SYMBOL_LEN)); + case SYMBOL_SYMBOL: + yyerror("Collating symbol <%s> can't be endpoints of ellipsis", showwcs($1, CHARMAP_SYMBOL_LEN)); + } + if (s->u.wc <= prev_elem->u.wc) + yyerror("<%s> is before starting point of ellipsis", showwcs($1, CHARMAP_SYMBOL_LEN)); + for(i = prev_elem->u.wc + 1, v = prev_elem->val + 1; i < s->u.wc; i++, v++) { + m = getsymbolbychar(i); + if (m->val != PRI_UNDEFINED) + yyerror("<%s> was previously defined while filling ellipsis symbols", showwcs(m->name, CHARMAP_SYMBOL_LEN)); + m->val = v; + } + s->val = v; + } else + s->val = prim_pri; + prim_pri = s->val + 1; + weight_index = 0; +} weights { + int i; + struct symbol *s = getsymbol($1, EXISTS); + if (s->type == SYMBOL_SYMBOL) { + if (weight_index != 0) + yyerror("Can't specify weights for collating symbol <%s>", showwcs($1, CHARMAP_SYMBOL_LEN)); + } else if (weight_index == 0) { + for(i = 0; i < directive_count; i++) + weight_table[i] = s; + } else if (weight_index != directive_count) + yyerror("Not enough weights specified"); + memcpy(prev_weight_table, weight_table, sizeof(weight_table)); + prev_line = LINE_NORMAL; + prev_elem = s; +} + | ELLIPSIS { weight_index = 0; allow_ellipsis = 1; } weights { + int i; + if (prev_line == LINE_ELLIPSIS) + yyerror("Illegal sequential ellipsis lines"); + if (prev_line == LINE_UNDEFINED) + yyerror("Ellipsis line can not follow UNDEFINED line"); + if (prev_line == LINE_NONE) + yyerror("Ellipsis line must follow a collating identifier lines"); + if (weight_index == 0) { + for(i = 0; i < directive_count; i++) + weight_table[i] = &sym_ellipsis; + } else if (weight_index != directive_count) + yyerror("Not enough weights specified"); + for(i = 0; i < directive_count; i++) { + if (weight_table[i]->type != SYMBOL_ELLIPSIS) + continue; + switch (prev_weight_table[i]->type) { + case SYMBOL_CHAIN: + yyerror("Startpoint of ellipsis can't be a collating element"); + case SYMBOL_IGNORE: + yyerror("Startpoint of ellipsis can't be IGNORE"); + case SYMBOL_SYMBOL: + yyerror("Startpoint of ellipsis can't be a collating symbol"); + case SYMBOL_STRING: + yyerror("Startpoint of ellipsis can't be a string"); + } + } + memcpy(prev2_weight_table, prev_weight_table, sizeof(prev_weight_table)); + memcpy(prev_weight_table, weight_table, sizeof(weight_table)); + prev_line = LINE_ELLIPSIS; + allow_ellipsis = 0; +} + | UNDEFINED { + if (sym_undefined.val != PRI_UNDEFINED) + yyerror("Multiple UNDEFINED lines not allowed"); + sym_undefined.val = prim_pri++; + weight_index = 0; + allow_ellipsis = 1; +} weights { + int i; + if (weight_index == 0) { + weight_table[0] = &sym_undefined; + for(i = 1; i < directive_count; i++) + weight_table[i] = &sym_ellipsis; + } else if (weight_index != directive_count) + yyerror("Not enough weights specified"); + memcpy(prev_weight_table, weight_table, sizeof(weight_table)); + prev_line = LINE_UNDEFINED; +} +; +order_lines2 : order_line2 '\n' + | order_lines2 order_line2 '\n' +; +order_line2 : + | ELEM { weight_index = 0; } weights { + int i; + struct symbol *s = getsymbol($1, EXISTS); + if (s->val == PRI_UNDEFINED) + yyerror("<%s> undefined", showwcs($1, CHARMAP_SYMBOL_LEN)); + if (s->type == SYMBOL_SYMBOL) { + if (weight_index != 0) + yyerror("Can't specify weights for collating symbol <%s>", showwcs($1, CHARMAP_SYMBOL_LEN)); + } else if (weight_index == 0) { + for(i = 0; i < directive_count; i++) + weight_table[i] = s; + } else if (weight_index != directive_count) + yyerror("Not enough weights specified"); + if (prev_line == LINE_ELLIPSIS) { + int w, x; + for(i = 0; i < directive_count; i++) { + switch (prev_weight_table[i]->type) { + case SYMBOL_CHAR: + case SYMBOL_CHAIN: + case SYMBOL_IGNORE: + case SYMBOL_SYMBOL: + for (w = prev_elem->u.wc + 1; w < s->u.wc; w++) { + struct __collate_st_char_pri *p = getpri(w); + if (p->pri[i] != PRI_UNDEFINED) + yyerror("Char 0x02x previously defined", w); + p->pri[i] = prev_weight_table[i]->val; + } + break; + case SYMBOL_ELLIPSIS: + + switch (weight_table[i]->type) { + case SYMBOL_STRING: + yyerror("Strings can't be endpoints of ellipsis"); + case SYMBOL_CHAIN: + yyerror("Chains can't be endpoints of ellipsis"); + case SYMBOL_IGNORE: + yyerror("IGNORE can't be endpoints of ellipsis"); + case SYMBOL_SYMBOL: + yyerror("Collation symbols can't be endpoints of ellipsis"); + } + if (s->val - prev_elem->val != weight_table[i]->val - prev2_weight_table[i]->val) + yyerror("Range mismatch in weight %d", i); + x = prev2_weight_table[i]->val + 1; + for (w = prev_elem->u.wc + 1; w < s->u.wc; w++) { + struct __collate_st_char_pri *p = getpri(w); + if (p->pri[i] != PRI_UNDEFINED) + yyerror("Char 0x02x previously defined", w); + p->pri[i] = x++; + } + break; + case SYMBOL_STRING: + for (w = prev_elem->u.wc + 1; w < s->u.wc; w++) { + struct __collate_st_char_pri *p = getpri(w); + if (p->pri[i] != PRI_UNDEFINED) + yyerror("Char 0x02x previously defined", w); + putsubst(w, i, prev_weight_table[i]->u.str); + p->pri[i] = prev_weight_table[i]->val; + } + break; + } + } + } + switch(s->type) { + case SYMBOL_CHAR: { + struct __collate_st_char_pri *p = getpri(s->u.wc); + for(i = 0; i < directive_count; i++) { + switch (weight_table[i]->type) { + case SYMBOL_CHAR: + case SYMBOL_CHAIN: + case SYMBOL_IGNORE: + case SYMBOL_SYMBOL: + if (p->pri[i] != PRI_UNDEFINED) + yyerror("Char 0x02x previously defined", s->u.wc); + p->pri[i] = weight_table[i]->val; + break; + case SYMBOL_STRING: + if (p->pri[i] != PRI_UNDEFINED) + yyerror("Char 0x02x previously defined", s->u.wc); + putsubst(s->u.wc, i, weight_table[i]->u.str); + p->pri[i] = weight_table[i]->val; + break; + } + } + break; + } + case SYMBOL_CHAIN: { + struct __collate_st_chain_pri *p = getchain(s->u.str, EXISTS); + for(i = 0; i < directive_count; i++) { + switch (weight_table[i]->type) { + case SYMBOL_CHAR: + case SYMBOL_CHAIN: + case SYMBOL_IGNORE: + case SYMBOL_SYMBOL: + if (p->pri[i] != PRI_UNDEFINED) + yyerror("Chain %s previously defined", showwcs(s->u.str, STR_LEN)); + p->pri[i] = weight_table[i]->val; + break; + case SYMBOL_STRING : + if (wcsncmp(s->u.str, weight_table[i]->u.str, STR_LEN) != 0) + yyerror("Chain/string mismatch"); + if (p->pri[i] != PRI_UNDEFINED) + yyerror("Chain %s previously defined", showwcs(s->u.str, STR_LEN)); + /* negative value mean don't substitute + * the chain, but it is in an + * equivalence class */ + p->pri[i] = -weight_table[i]->val; + } + } + break; + } + } + memcpy(prev_weight_table, weight_table, sizeof(weight_table)); + prev_line = LINE_NORMAL; + prev_elem = s; +} + | ELLIPSIS { weight_index = 0; allow_ellipsis = 1; } weights { + int i; + if (prev_line == LINE_ELLIPSIS) + yyerror("Illegal sequential ellipsis lines"); + if (prev_line == LINE_UNDEFINED) + yyerror("Ellipsis line can not follow UNDEFINED line"); + if (prev_line == LINE_NONE) + yyerror("Ellipsis line must follow a collating identifier lines"); + if (weight_index == 0) { + for(i = 0; i < directive_count; i++) + weight_table[i] = &sym_ellipsis; + } else if (weight_index != directive_count) + yyerror("Not enough weights specified"); + for(i = 0; i < directive_count; i++) { + if (weight_table[i]->type != SYMBOL_ELLIPSIS) + continue; + switch (prev_weight_table[i]->type) { + case SYMBOL_CHAIN: + yyerror("Startpoint of ellipsis can't be a collating element"); + case SYMBOL_IGNORE: + yyerror("Startpoint of ellipsis can't be IGNORE"); + case SYMBOL_SYMBOL: + yyerror("Startpoint of ellipsis can't be a collating symbol"); + case SYMBOL_STRING: + yyerror("Startpoint of ellipsis can't be a string"); + } + } + memcpy(prev2_weight_table, prev_weight_table, sizeof(prev_weight_table)); + memcpy(prev_weight_table, weight_table, sizeof(weight_table)); + prev_line = LINE_ELLIPSIS; + allow_ellipsis = 0; +} + | UNDEFINED { weight_index = 0; allow_ellipsis = 1; } weights { + int i; + + if (weight_index == 0) { + weight_table[0] = &sym_undefined; + for(i = 1; i < directive_count; i++) + weight_table[i] = &sym_ellipsis; + } else if (weight_index != directive_count) + yyerror("Not enough weights specified"); + for(i = 0; i < directive_count; i++) { + switch (weight_table[i]->type) { + case SYMBOL_CHAR: + case SYMBOL_CHAIN: + case SYMBOL_IGNORE: + case SYMBOL_SYMBOL: + info.undef_pri[i] = weight_table[i]->val; + break; + case SYMBOL_ELLIPSIS : + /* Negative values mean that the priority is + * relative to the lexical value */ + info.undef_pri[i] = -sym_undefined.val; + prim_pri = UNDEFINED_PRI; + break; + case SYMBOL_STRING : + yyerror("Strings can't be used with UNDEFINED"); + } + } + memcpy(prev_weight_table, weight_table, sizeof(weight_table)); + prev_line = LINE_UNDEFINED; +} +; +weights : + | weight + | weights ';' weight +; +weight : ELEM { + struct symbol *s; + if (weight_index >= directive_count) + yyerror("More weights than specified by order_start"); + s = getsymbol($1, EXISTS); + if (order_pass && s->val == PRI_UNDEFINED) + yyerror("<%s> is undefined", showwcs($1, CHARMAP_SYMBOL_LEN)); + weight_table[weight_index++] = s; +} + | ELLIPSIS { + if (weight_index >= directive_count) + yyerror("More weights than specified by order_start"); + if (!allow_ellipsis) + yyerror("Ellipsis weight not allowed"); + weight_table[weight_index++] = &sym_ellipsis; +} + | IGNORE { + if (weight_index >= directive_count) + yyerror("More weights than specified by order_start"); + weight_table[weight_index++] = &sym_ignore; +} + | STRING { + if (weight_index >= directive_count) + yyerror("More weights than specified by order_start"); + if (wcslen($1) > STR_LEN) + yyerror("String '%s' is too long", showwcs($1, STR_LEN)); + weight_table[weight_index++] = getstring($1); +} +; +order_end : ORDER_END '\n' +; +charmap : DEFN CHAR { + int len = wcslen($1); + struct symbol *s; + if (len > CHARMAP_SYMBOL_LEN) + yyerror("Charmap symbol name '%s' is too long", showwcs($1, CHARMAP_SYMBOL_LEN)); + s = getsymbol($1, NOTEXISTS); + s->type = SYMBOL_CHAR; + s->val = PRI_UNDEFINED; + s->u.wc = $2; + setsymbolbychar(s); +} +; +substitute : SUBSTITUTE CHAR WITH STRING { + if (wcslen($4) + 1 > STR_LEN) + yyerror("%s substitution is too long", charname($2)); + putsubst($2, 0, $4); +} +; +order : ORDER order_list +; +order_list : item + | order_list ';' item +; +chain : CHAR CHAR { + curr_chain[0] = $1; + curr_chain[1] = $2; + if (curr_chain[0] == '\0' || curr_chain[1] == '\0') + yyerror("\\0 can't be chained"); + curr_chain[2] = '\0'; +} + | chain CHAR { + static wchar_t tb[2]; + tb[0] = $2; + if (tb[0] == '\0') + yyerror("\\0 can't be chained"); + if (wcslen(curr_chain) + 1 > STR_LEN) + yyerror("Chain '%s' grows too long", curr_chain); + (void)wcscat(curr_chain, tb); +} +; +item : CHAR { + struct __collate_st_char_pri *p = getpri($1); + if (p->pri[0] >= 0) + yyerror("%s duplicated", charname($1)); + p->pri[0] = p->pri[1] = prim_pri; + sec_pri = ++prim_pri; +} + | chain { + struct __collate_st_chain_pri *c = getchain(curr_chain, NOTEXISTS); + c->pri[0] = c->pri[1] = prim_pri; + sec_pri = ++prim_pri; +} + | CHAR RANGE CHAR { + u_int i; + struct __collate_st_char_pri *p; + + if ($3 <= $1) + yyerror("Illegal range %s -- %s", charname($1), charname2($3)); + + for (i = $1; i <= $3; i++) { + p = getpri(i); + if (p->pri[0] >= 0) + yyerror("%s duplicated", charname(i)); + p->pri[0] = p->pri[1] = prim_pri++; + } + sec_pri = prim_pri; +} + | '{' mixed_order_list '}' { + prim_pri = sec_pri; +} + | '(' sec_order_list ')' { + prim_pri = sec_pri; +} +; +mixed_order_list : mixed_sub_list { + sec_pri++; +} + | mixed_order_list ';' mixed_sub_list { + sec_pri++; +} +; +mixed_sub_list : mixed_sub_item + | mixed_sub_list ',' mixed_sub_item +; +sec_order_list : sec_sub_item + | sec_order_list ',' sec_sub_item +; +mixed_sub_item : CHAR { + struct __collate_st_char_pri *p = getpri($1); + if (p->pri[0] >= 0) + yyerror("%s duplicated", charname($1)); + p->pri[0] = prim_pri; + p->pri[1] = sec_pri; +} + | CHAR RANGE CHAR { + u_int i; + struct __collate_st_char_pri *p; + + if ($3 <= $1) + yyerror("Illegal range %s -- %s", + charname($1), charname2($3)); + + for (i = $1; i <= $3; i++) { + p = getpri(i); + if (p->pri[0] >= 0) + yyerror("%s duplicated", charname(i)); + p->pri[0] = prim_pri; + p->pri[1] = sec_pri; + } +} + | chain { + struct __collate_st_chain_pri *c = getchain(curr_chain, NOTEXISTS); + c->pri[0] = prim_pri; + c->pri[1] = sec_pri; +} +sec_sub_item : CHAR { + struct __collate_st_char_pri *p = getpri($1); + if (p->pri[0] >= 0) + yyerror("%s duplicated", charname($1)); + p->pri[0] = prim_pri; + p->pri[1] = sec_pri++; +} + | CHAR RANGE CHAR { + u_int i; + struct __collate_st_char_pri *p; + + if ($3 <= $1) + yyerror("Illegal range %s -- %s", + charname($1), charname2($3)); + + for (i = $1; i <= $3; i++) { + p = getpri(i); + if (p->pri[0] >= 0) + yyerror("%s duplicated", charname(i)); + p->pri[0] = prim_pri; + p->pri[1] = sec_pri++; + } +} + | chain { + struct __collate_st_chain_pri *c = getchain(curr_chain, NOTEXISTS); + c->pri[0] = prim_pri; + c->pri[1] = sec_pri++; +} +; +%% +int +main(int ac, char **av) +{ + int ch, z; + + if ((charmapdb = dbopen(NULL, O_CREAT | O_RDWR, 0600, DB_HASH, NULL)) == NULL) + err(1, "dbopen charmapdb"); + if ((charmapdb2 = dbopen(NULL, O_CREAT | O_RDWR, 0600, DB_HASH, NULL)) == NULL) + err(1, "dbopen charmapdb"); + if ((largemapdb = dbopen(NULL, O_CREAT | O_RDWR, 0600, DB_HASH, NULL)) == NULL) + err(1, "dbopen largemapdb"); + if ((substdb[0] = dbopen(NULL, O_CREAT | O_RDWR, 0600, DB_HASH, NULL)) == NULL) + err(1, "dbopen substdb[0]"); + if ((chaindb = dbopen(NULL, O_CREAT | O_RDWR, 0600, DB_HASH, NULL)) == NULL) + err(1, "dbopen chaindb"); + /* -1 means an undefined priority, which we adjust after parsing */ + for (ch = 0; ch <= UCHAR_MAX; ch++) + for(z = 0; z < COLL_WEIGHTS_MAX; z++) + __collate_char_pri_table[ch].pri[z] = PRI_UNDEFINED; +#ifdef COLLATE_DEBUG + while((ch = getopt(ac, av, ":do:I:")) != -1) { +#else + while((ch = getopt(ac, av, ":o:I:")) != -1) { +#endif + switch (ch) + { +#ifdef COLLATE_DEBUG + case 'd': + debug++; + break; +#endif + case 'o': + out_file = optarg; + break; + + case 'I': + strlcpy(map_name, optarg, sizeof(map_name)); + break; + + default: + usage(); + } + } + ac -= optind; + av += optind; + if (ac > 0) { + if ((yyin = fopen(*av, "r")) == NULL) + err(EX_UNAVAILABLE, "can't open source file %s", *av); + } + yyparse(); + return 0; +} + +static struct __collate_st_char_pri * +getpri(int32_t c) +{ + DBT key, val; + struct __collate_st_char_pri *p; + int ret; + + if (c <= UCHAR_MAX) + return &__collate_char_pri_table[c]; + key.data = &c; + key.size = sizeof(int32_t); + if ((ret = largemapdb->get(largemapdb, &key, &val, 0)) < 0) + err(1, "getpri: Error getting %s", charname(c)); + if (ret != 0) { + struct __collate_st_char_pri *pn; + int z; + if ((pn = (struct __collate_st_char_pri *)malloc(sizeof(struct __collate_st_char_pri))) == NULL) + err(1, "getpri: malloc"); + for(z = 0; z < COLL_WEIGHTS_MAX; z++) + pn->pri[z] = PRI_UNDEFINED; + val.data = &pn; + val.size = sizeof(struct __collate_st_char_pri *); + if (largemapdb->put(largemapdb, &key, &val, 0) < 0) + err(1, "getpri: Error storing %s", charname(c)); + nlargemap++; + } + memcpy(&p, val.data, sizeof(struct __collate_st_char_pri *)); + return p; +} + +static struct __collate_st_char_pri * +haspri(int32_t c) +{ + DBT key, val; + struct __collate_st_char_pri *p; + int ret; + + if (c <= UCHAR_MAX) + return &__collate_char_pri_table[c]; + key.data = &c; + key.size = sizeof(int32_t); + if ((ret = largemapdb->get(largemapdb, &key, &val, 0)) < 0) + err(1, "haspri: Error getting %s", charname(c)); + if (ret != 0) + return NULL; + memcpy(&p, val.data, sizeof(struct __collate_st_char_pri *)); + return p; +} + +static struct __collate_st_chain_pri * +getchain(const wchar_t *wcs, int exists) +{ + DBT key, val; + struct __collate_st_chain_pri *p; + int ret; + + key.data = (void *)wcs; + key.size = __collate_wcsnlen(wcs, STR_LEN) * sizeof(wchar_t); + if ((ret = chaindb->get(chaindb, &key, &val, 0)) < 0) + err(1, "getchain: Error getting \"%s\"", showwcs(wcs, STR_LEN)); + if (ret != 0) { + struct __collate_st_chain_pri *pn; + int z; + if (exists > 0) + errx(1, "getchain: \"%s\" is not defined", showwcs(wcs, STR_LEN)); + if ((pn = (struct __collate_st_chain_pri *)malloc(sizeof(struct __collate_st_chain_pri))) == NULL) + err(1, "getchain: malloc"); + for(z = 0; z < COLL_WEIGHTS_MAX; z++) + pn->pri[z] = PRI_UNDEFINED; + bzero(pn->str, sizeof(pn->str)); + wcsncpy(pn->str, wcs, STR_LEN); + val.data = &pn; + val.size = sizeof(struct __collate_st_chain_pri *); + if (chaindb->put(chaindb, &key, &val, 0) < 0) + err(1, "getchain: Error storing \"%s\"", showwcs(wcs, STR_LEN)); + nchain++; + } else if (exists == 0) + errx(1, "getchain: \"%s\" already exists", showwcs(wcs, STR_LEN)); + memcpy(&p, val.data, sizeof(struct __collate_st_chain_pri *)); + return p; +} + +struct symbol * +getsymbol(const wchar_t *wcs, int exists) +{ + DBT key, val; + struct symbol *p; + int ret; + + key.data = (void *)wcs; + key.size = wcslen(wcs) * sizeof(wchar_t); + if ((ret = charmapdb->get(charmapdb, &key, &val, 0)) < 0) + err(1, "getsymbol: Error getting \"%s\"", showwcs(wcs, CHARMAP_SYMBOL_LEN)); + if (ret != 0) { + struct symbol *pn; + if (exists > 0) + errx(1, "getsymbol: \"%s\" is not defined", showwcs(wcs, CHARMAP_SYMBOL_LEN)); + if ((pn = (struct symbol *)malloc(sizeof(struct symbol))) == NULL) + err(1, "getsymbol: malloc"); + pn->val = PRI_UNDEFINED; + wcsncpy(pn->name, wcs, CHARMAP_SYMBOL_LEN); + val.data = &pn; + val.size = sizeof(struct symbol *); + if (charmapdb->put(charmapdb, &key, &val, 0) < 0) + err(1, "getsymbol: Error storing \"%s\"", showwcs(wcs, CHARMAP_SYMBOL_LEN)); + } else if (exists == 0) + errx(1, "getsymbol: \"%s\" already exists", showwcs(wcs, CHARMAP_SYMBOL_LEN)); + memcpy(&p, val.data, sizeof(struct symbol *)); + return p; +} + +static struct symbol * +getsymbolbychar(wchar_t wc) +{ + DBT key, val; + struct symbol *p; + int ret; + + key.data = &wc; + key.size = sizeof(wchar_t); + if ((ret = charmapdb2->get(charmapdb2, &key, &val, 0)) < 0) + err(1, "getsymbolbychar: Error getting Char 0x%02x", wc); + if (ret != 0) + errx(1, "getsymbolbychar: Char 0x%02x is not defined", wc); + memcpy(&p, val.data, sizeof(struct symbol *)); + return p; +} + +static struct symbol * +hassymbolbychar(wchar_t wc) +{ + DBT key, val; + struct symbol *p; + int ret; + + key.data = &wc; + key.size = sizeof(wchar_t); + if ((ret = charmapdb2->get(charmapdb2, &key, &val, 0)) < 0) + err(1, "hassymbolbychar: Error getting Char 0x%02x", wc); + if (ret != 0) + return NULL; + memcpy(&p, val.data, sizeof(struct symbol *)); + return p; +} + +static void +setsymbolbychar(struct symbol *s) +{ + DBT key, val; + struct symbol *p; + int ret; + + key.data = &s->u.wc; + key.size = sizeof(wchar_t); + val.data = &s; + val.size = sizeof(struct symbol *); + if (charmapdb2->put(charmapdb2, &key, &val, 0) < 0) + err(1, "setsymbolbychar: Error storing <%s>", showwcs(s->name, CHARMAP_SYMBOL_LEN)); +} + +struct symbol * +getstring(const wchar_t *wcs) +{ + DBT key, val; + struct symbol *p; + int ret; + + key.data = (void *)wcs; + key.size = wcslen(wcs) * sizeof(wchar_t); + if ((ret = stringdb->get(stringdb, &key, &val, 0)) < 0) + err(1, "getstring: Error getting \"%s\"", showwcs(wcs, STR_LEN)); + if (ret != 0) { + struct symbol *pn; + if ((pn = (struct symbol *)malloc(sizeof(struct symbol))) == NULL) + err(1, "getstring: malloc"); + pn->type = SYMBOL_STRING; + pn->val = prim_pri++; + wcsncpy(pn->u.str, wcs, STR_LEN); + val.data = &pn; + val.size = sizeof(struct symbol *); + if (stringdb->put(stringdb, &key, &val, 0) < 0) + err(1, "getstring: Error storing \"%s\"", showwcs(wcs, STR_LEN)); + } + memcpy(&p, val.data, sizeof(struct symbol *)); + return p; +} + +static void +makeforwardref(int i, const struct symbol *from, const struct symbol * to) +{ +} + +static void +putsubst(int32_t c, int i, const wchar_t *str) +{ + DBT key, val; + int ret; + wchar_t clean[STR_LEN]; + + if (!substdb[i]) + if ((substdb[i] = dbopen(NULL, O_CREAT | O_RDWR, 0600, DB_HASH, NULL)) == NULL) + err(1, "dbopen substdb[%d]", i); + key.data = &c; + key.size = sizeof(int32_t); + bzero(clean, sizeof(clean)); + wcsncpy(clean, str, STR_LEN); + val.data = clean; + val.size = sizeof(clean); + if ((ret = substdb[i]->put(substdb[i], &key, &val, R_NOOVERWRITE)) < 0) + err(1, "putsubst: Error on %s", charname(c)); + if (ret != 0) + errx(1, "putsubst: Duplicate substitution of %s", charname(c)); + nsubst[i]++; +} + +static int +hassubst(int32_t c, int i) +{ + DBT key, val; + int ret; + + if (!substdb[i]) + return 0; + key.data = &c; + key.size = sizeof(int32_t); + if ((ret = substdb[i]->get(substdb[i], &key, &val, 0)) < 0) + err(1, "hassubst: Error getting %s", charname(c)); + return (ret == 0); +} + +static int +chainpricompar(const void *a, const void *b) +{ + return wcsncmp(((struct __collate_st_chain_pri *)a)->str, ((struct __collate_st_chain_pri *)b)->str, STR_LEN); +} + +static int +charpricompar(const void *a, const void *b) +{ + return ((struct __collate_st_large_char_pri *)a)->val - ((struct __collate_st_large_char_pri *)b)->val; +} + +static int +substcompar(const void *a, const void *b) +{ + return ((struct __collate_st_subst *)a)->val - ((struct __collate_st_subst *)b)->val; +} + +static const wchar_t * +__collate_wcsnchr(const wchar_t *s, wchar_t c, int len) +{ + while (*s && len > 0) { + if (*s == c) + return s; + s++; + len--; + } + return NULL; +} + +static int +__collate_wcsnlen(const wchar_t *s, int len) +{ + int n = 0; + while (*s && n < len) { + s++; + n++; + } + return n; +} + +static void +usage(void) +{ + fprintf(stderr, "usage: colldef [-o out_file] [-I map_dir] [filename]\n"); + exit(EX_USAGE); +} + +void +yyerror(const char *fmt, ...) +{ + va_list ap; + char msg[128]; + + va_start(ap, fmt); + vsnprintf(msg, sizeof(msg), fmt, ap); + va_end(ap); + errx(EX_UNAVAILABLE, "%s, near line %d", msg, line_no); +} + +char * +showwcs(const wchar_t *t, int len) +{ + static char buf[8* CHARMAP_SYMBOL_LEN]; + char *cp = buf; + + for(; *t && len > 0; len--, t++) { + if (*t >=32 && *t <= 126) + *cp++ = *t; + else { + sprintf(cp, "\\x{%02x}", *t); + cp += strlen(cp); + } + } + *cp = 0; + return buf; +} + +static char * +charname(wchar_t wc) +{ + static char buf[CHARMAP_SYMBOL_LEN + 1]; + struct symbol *s = hassymbolbychar(wc); + + if (s) + strcpy(buf, showwcs(s->name, CHARMAP_SYMBOL_LEN)); + else + sprintf(buf, "Char 0x%02x", wc); + return buf; +} + +static char * +charname2(wchar_t wc) +{ + static char buf[CHARMAP_SYMBOL_LEN + 1]; + struct symbol *s = hassymbolbychar(wc); + + if (s) + strcpy(buf, showwcs(s->name, CHARMAP_SYMBOL_LEN)); + else + sprintf(buf, "Char 0x%02x", wc); + return buf; +} + +#ifdef COLLATE_DEBUG +static char * +show(int c) +{ + static char buf[5]; + + if (c >=32 && c <= 126) + sprintf(buf, "'%c' ", c); + else + sprintf(buf, "\\x{%02x}", c); + return buf; +} + +static void +collate_print_tables(void) +{ + int i, z; + + printf("Info: p=%d s=%d f=0x%02x m=%d dc=%d up=%d us=%d pc=%d sc=%d cc=%d lc=%d\n", + info.directive[0], info.directive[1], + info.flags, info.chain_max_len, + info.directive_count, + info.undef_pri[0], info.undef_pri[1], + info.subst_count[0], info.subst_count[1], + info.chain_count, info.large_pri_count); + for(z = 0; z < info.directive_count; z++) { + if (info.subst_count[z] > 0) { + struct __collate_st_subst *p2 = __collate_substitute_table[z]; + if (z == 0 && (info.flags & COLLATE_SUBST_DUP)) + printf("Both substitute tables:\n"); + else + printf("Substitute table %d:\n", z); + for (i = info.subst_count[z]; i-- > 0; p2++) + printf("\t%s --> \"%s\"\n", + show(p2->val), + showwcs(p2->str, STR_LEN)); + } + } + if (info.chain_count > 0) { + printf("Chain priority table:\n"); + struct __collate_st_chain_pri *p2 = __collate_chain_pri_table; + for (i = info.chain_count; i-- > 0; p2++) { + printf("\t\"%s\" :", showwcs(p2->str, STR_LEN)); + for(z = 0; z < info.directive_count; z++) + printf(" %d", p2->pri[z]); + putchar('\n'); + } + } + printf("Char priority table:\n"); + { + struct __collate_st_char_pri *p2 = __collate_char_pri_table; + for (i = 0; i < UCHAR_MAX + 1; i++, p2++) { + printf("\t%s :", show(i)); + for(z = 0; z < info.directive_count; z++) + printf(" %d", p2->pri[z]); + putchar('\n'); + } + } + if (info.large_pri_count > 0) { + struct __collate_st_large_char_pri *p2 = __collate_large_char_pri_table; + printf("Large priority table:\n"); + for (i = info.large_pri_count; i-- > 0; p2++) { + printf("\t%s :", show(p2->val)); + for(z = 0; z < info.directive_count; z++) + printf(" %d", p2->pri.pri[z]); + putchar('\n'); + } + } +} +#endif diff --git a/adv_cmds/colldef/scan.l b/adv_cmds/colldef/scan.l new file mode 100644 index 0000000..ce14492 --- /dev/null +++ b/adv_cmds/colldef/scan.l @@ -0,0 +1,398 @@ +%x string s_name charmap defn nchar subs subs2 ldef elem +%{ +/*- + * Copyright (c) 1995 Alex Tatmanjants + * at Electronni Visti IA, Kiev, Ukraine. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD: src/usr.bin/colldef/scan.l,v 1.19 2002/08/23 04:18:26 ache Exp $"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include "collate.h" +#include "common.h" +#include "y.tab.h" + +int line_no = 1, save_no, fromsubs; +wchar_t buf0[BUFSIZE], *ptr; +wchar_t *buf = buf0; +wchar_t bufstr[BUFSIZE], *ptrsave; +FILE *map_fp; +YY_BUFFER_STATE main_buf, map_buf; +#ifdef FLEX_DEBUG +YYSTYPE yylval; +#endif /* FLEX_DEBUG */ +int yylex(void); +static int localedefmode = 0; +static orderpass = 0; +%} +%% +[ \t]+ ; +\" { ptr = buf; BEGIN(string); } +\< { + if(localedefmode) { + ptrsave = ptr; + ptr = buf = bufstr; + BEGIN(s_name); + } else { + if(ptr >= buf + BUFSIZE - 1) + errx(EX_UNAVAILABLE, "string buffer overflow near line %u, character '<'", + line_no); + *ptr++ = '<'; + } +} +\< { ptr = buf; fromsubs = 1; BEGIN(s_name); } +[,;] return *yytext; +forward { yylval.ch = DIRECTIVE_FORWARD; return ORDER_DIRECTIVE; } +backward { yylval.ch = DIRECTIVE_BACKWARD; return ORDER_DIRECTIVE; } +position { yylval.ch = DIRECTIVE_POSITION; return ORDER_DIRECTIVE; } +collating[-_]element return COLLATING_ELEMENT; +collating[-_]symbol return COLLATING_SYMBOL; +from return FROM; +\.\.\. return ELLIPSIS; +IGNORE return IGNORE; +UNDEFINED return UNDEFINED; +order[-_]start return ORDER_START; +order[-_]end { + char line[YY_BUF_SIZE]; + if (orderpass) + return ORDER_END; + /* The first pass only defined the left-most symbol. We reread the + * order lines, and forward references should now be resolved. */ + orderpass++; + YY_FLUSH_BUFFER; + rewind(yyin); + for(;;) { + if (fgets(line, sizeof(line), yyin) == NULL) + errx(EX_UNAVAILABLE, "EOF rescanning for order_start"); + if (*line == '#') + continue; + if (strstr(line, "order_start") != NULL) + break; + } + return ORDER_SECOND_PASS; +} +END[ \t]+LC_COLLATE return END_LC_COLLATE; +\n { + line_no++; + return '\n'; +} +\< { ptr = buf; BEGIN(elem); } +\< { ptr = buf; fromsubs = 0; BEGIN(s_name); } +<*>^#.*\n line_no++; +^\n line_no++; +\\\n line_no++; +\\t { yylval.ch = '\t'; return CHAR; } +\\n { yylval.ch = '\n'; return CHAR; } +\\b { yylval.ch = '\b'; return CHAR; } +\\f { yylval.ch = '\f'; return CHAR; } +\\v { yylval.ch = '\v'; return CHAR; } +\\r { yylval.ch = '\r'; return CHAR; } +\\a { yylval.ch = '\a'; return CHAR; } +\n { + line_no++; + BEGIN(INITIAL); + return '\n'; +} +\n { + line_no++; + if (map_fp != NULL) { + ptr = buf; + BEGIN(defn); + } + return '\n'; +} +[;,{}()] return *yytext; +substitute { BEGIN(subs); return SUBSTITUTE; } +LC_COLLATE { BEGIN(ldef); localedefmode++; return START_LC_COLLATE; } +with { BEGIN(subs2); return WITH; } +order return ORDER; +charmap BEGIN(charmap); +;[ \t]*\.\.\.[ \t]*; return RANGE; +\\([0-7]{3}) { + u_int v; + + sscanf(&yytext[1], "%o", &v); + yylval.ch = v; + return CHAR; +} +\\x\{([0-9a-fA-F]{2,8})\} { + u_int v; + + sscanf(&yytext[3], "%x", &v); + yylval.ch = v; + return CHAR; +} +\\x([0-9a-fA-F]{2}) { + u_int v; + + sscanf(&yytext[2], "%x", &v); + yylval.ch = v; + return CHAR; +} +\\. { yylval.ch = yytext[1]; return CHAR; } +. { yylval.ch = *(u_char *)yytext; return CHAR; } +^\n line_no++; +[ \t]+ { + if (ptr == buf) + errx(EX_UNAVAILABLE, "map expected near line %u of %s", + line_no, map_name); + *ptr = 0; + if (localedefmode && *buf == '<' && ptr[-1] == '>') { + if (ptr == buf + 2) + errx(EX_UNAVAILABLE, "map expected near line %u of %s", + line_no, map_name); + *--ptr = 0; + wcscpy(yylval.str, buf + 1); + } else + wcscpy(yylval.str, buf); + BEGIN(nchar); + return DEFN; +} +\/\/ { + if(ptr >= buf + BUFSIZE - 1) + errx(EX_UNAVAILABLE, "name buffer overflow near line %u, character '/'", + line_no); + *ptr++ = '/'; +} +\/\> { + if(ptr >= buf + BUFSIZE - 1) + errx(EX_UNAVAILABLE, "name buffer overflow near line %u, character '>'", + line_no); + *ptr++ = '>'; +} +\\\" { + if(ptr >= buf + BUFSIZE - 1) + errx(EX_UNAVAILABLE, "string buffer overflow near line %u, character '\"'", + line_no); + *ptr++ = '"'; +} +\> { + if (ptr == buf) + errx(EX_UNAVAILABLE, "non-empty name expected near line %u", + line_no); + *ptr = 0; + wcscpy(yylval.str, buf); + BEGIN(ldef); + return ELEM; +} +\> { + struct symbol *s; + + if (ptr == buf) + errx(EX_UNAVAILABLE, "non-empty name expected near line %u", + line_no); + *ptr = 0; + s = getsymbol(buf, EXISTS); + switch (s->type) { + case SYMBOL_CHAR: + break; + case SYMBOL_CHAIN: + errx(EX_UNAVAILABLE, "name <%s> is chain type near line %u", + showwcs(buf, CHARMAP_SYMBOL_LEN), line_no); + case SYMBOL_SYMBOL: + errx(EX_UNAVAILABLE, "name <%s> is symbol type near line %u", + showwcs(buf, CHARMAP_SYMBOL_LEN), line_no); + default: + errx(EX_UNAVAILABLE, "name <%s>: unknown symbol type (%d) near line %u", + showwcs(buf, CHARMAP_SYMBOL_LEN), s->type, line_no); + } + if (localedefmode) { + ptr = ptrsave; + buf = buf0; + if(ptr >= buf + BUFSIZE - 1) + errx(EX_UNAVAILABLE, "string buffer overflow near line %u, character <%s>", + line_no, showwcs(bufstr, CHARMAP_SYMBOL_LEN)); + *ptr++ = s->u.wc; + BEGIN(string); + } else { + yylval.ch = s->u.wc; + if (fromsubs) + BEGIN(subs); + else + BEGIN(INITIAL); + return CHAR; + } +} +\" { + *ptr = 0; + wcscpy(yylval.str, buf); + if (localedefmode) + BEGIN(ldef); + else + BEGIN(subs2); + return STRING; +} +. { + const char *s = (map_fp != NULL) ? map_name : "input"; + + if (!isascii(*yytext) || !isprint(*yytext)) + errx(EX_UNAVAILABLE, "non-ASCII or non-printable character 0x%02x not allowed in the map/name near line %u of %s", + *yytext, line_no, s); + if(ptr >= buf + BUFSIZE - 1) + errx(EX_UNAVAILABLE, "map/name buffer overflow near line %u of %s, character '%c'", + line_no, s, *yytext); + *ptr++ = *yytext; +} +\\t { + if(ptr >= buf + BUFSIZE - 1) + errx(EX_UNAVAILABLE, "string buffer overflow near line %u, character '\\t'", + line_no); + *ptr++ = '\t'; +} +\\b { + if(ptr >= buf + BUFSIZE - 1) + errx(EX_UNAVAILABLE, "string buffer overflow near line %u, character '\\b'", + line_no); + *ptr++ = '\b'; +} +\\f { + if(ptr >= buf + BUFSIZE - 1) + errx(EX_UNAVAILABLE, "string buffer overflow near line %u, character '\\f'", + line_no); + *ptr++ = '\f'; +} +\\v { + if(ptr >= buf + BUFSIZE - 1) + errx(EX_UNAVAILABLE, "string buffer overflow near line %u, character '\\v'", + line_no); + *ptr++ = '\v'; +} +\\n { + if(ptr >= buf + BUFSIZE - 1) + errx(EX_UNAVAILABLE, "string buffer overflow near line %u, character '\\n'", + line_no); + *ptr++ = '\n'; +} +\\r { + if(ptr >= buf + BUFSIZE - 1) + errx(EX_UNAVAILABLE, "string buffer overflow near line %u, character '\\r'", + line_no); + *ptr++ = '\r'; +} +\\a { + if(ptr >= buf + BUFSIZE - 1) + errx(EX_UNAVAILABLE, "string buffer overflow near line %u, character '\\a'", + line_no); + *ptr++ = '\a'; +} +\n { + const char *s = (map_fp != NULL) ? map_name : "input"; + + errx(EX_UNAVAILABLE, "unterminated map/name/string near line %u of %s", line_no, s); +} +<> { + const char *s = (map_fp != NULL) ? map_name : "input"; + + errx(EX_UNAVAILABLE, "premature EOF in the name/string/char near line %u of %s", line_no, s); +} +\\x\{([0-9a-f]{2,8})\} { + u_int v; + + sscanf(&yytext[3], "%x", &v); + *ptr++ = v; +} +\\x([0-9a-f]{2}) { + u_int v; + + sscanf(&yytext[2], "%x", &v); + *ptr++ = v; +} +\\([0-7]{3}) { + u_int v; + + sscanf(&yytext[1], "%o", &v); + *ptr++ = v; +} +\\. { + if(ptr >= buf + BUFSIZE - 1) + errx(EX_UNAVAILABLE, "string buffer overflow near line %u, character '%c'", + line_no, yytext[1]); + *ptr++ = yytext[1]; +} +. { + if(ptr >= buf + BUFSIZE - 1) + errx(EX_UNAVAILABLE, "string buffer overflow near line %u, character '%c'", + line_no, *yytext); + *ptr++ = *yytext; +} +[^ \t\n]+ { + if(*yytext == '/') + strcpy(map_name, yytext); + else { + strcat(map_name, "/"); + strcat(map_name, yytext); + } + if((map_fp = fopen(map_name, "r")) == NULL) + err(EX_UNAVAILABLE, "can't open 'charmap' file %s", + map_name); + save_no = line_no; + line_no = 1; + map_buf = yy_new_buffer(map_fp, YY_BUF_SIZE); + main_buf = YY_CURRENT_BUFFER; + yy_switch_to_buffer(map_buf); + ptr = buf; + BEGIN(defn); +} +\n { + errx(EX_UNAVAILABLE, "'charmap' file name expected near line %u", + line_no); +} +<> { + errx(EX_UNAVAILABLE, "'charmap' file name expected near line %u", + line_no); +} +<> { + if(map_fp != NULL) { + if (ptr != buf) + errx(EX_UNAVAILABLE, "premature EOF in the map near line %u of %s", line_no, map_name); + yy_switch_to_buffer(main_buf); + yy_delete_buffer(map_buf); + fclose(map_fp); + map_fp = NULL; + line_no = save_no; + if (localedefmode) + BEGIN(ldef); + else + BEGIN(INITIAL); + } else + yyterminate(); +} +%% +#ifdef FLEX_DEBUG +main() +{ + while(yylex()) + ; + return 0; +} +#endif /* FLEX_DEBUG */ -- cgit v1.2.3-56-ge451