From 5fd83771641d15c418f747bd343ba6738d3875f7 Mon Sep 17 00:00:00 2001 From: Cameron Katri Date: Sun, 9 May 2021 14:20:58 -0400 Subject: Import macOS userland adv_cmds-176 basic_cmds-55 bootstrap_cmds-116.100.1 developer_cmds-66 diskdev_cmds-667.40.1 doc_cmds-53.60.1 file_cmds-321.40.3 mail_cmds-35 misc_cmds-34 network_cmds-606.40.1 patch_cmds-17 remote_cmds-63 shell_cmds-216.60.1 system_cmds-880.60.2 text_cmds-106 --- text_cmds/sort/bwstring.c | 1142 +++++++++++++++++++ text_cmds/sort/bwstring.h | 142 +++ text_cmds/sort/coll.c | 1324 +++++++++++++++++++++++ text_cmds/sort/coll.h | 168 +++ text_cmds/sort/commoncrypto.c | 98 ++ text_cmds/sort/commoncrypto.h | 8 + text_cmds/sort/file.c | 1684 +++++++++++++++++++++++++++++ text_cmds/sort/file.h | 126 +++ text_cmds/sort/mem.c | 81 ++ text_cmds/sort/mem.h | 45 + text_cmds/sort/nls/C.msg | 16 + text_cmds/sort/nls/hu_HU.ISO8859-2.msg | 16 + text_cmds/sort/radixsort.c | 746 +++++++++++++ text_cmds/sort/radixsort.h | 38 + text_cmds/sort/sort.1.in | 639 +++++++++++ text_cmds/sort/sort.c | 1325 +++++++++++++++++++++++ text_cmds/sort/sort.h | 139 +++ text_cmds/sort/testsuite/README.txt | 19 + text_cmds/sort/testsuite/bigsample.txt.xz | Bin 0 -> 50904 bytes text_cmds/sort/testsuite/run.sh | 436 ++++++++ text_cmds/sort/testsuite/sample.txt | Bin 0 -> 1672 bytes text_cmds/sort/vsort.c | 265 +++++ text_cmds/sort/vsort.h | 37 + 23 files changed, 8494 insertions(+) create mode 100644 text_cmds/sort/bwstring.c create mode 100644 text_cmds/sort/bwstring.h create mode 100644 text_cmds/sort/coll.c create mode 100644 text_cmds/sort/coll.h create mode 100644 text_cmds/sort/commoncrypto.c create mode 100644 text_cmds/sort/commoncrypto.h create mode 100644 text_cmds/sort/file.c create mode 100644 text_cmds/sort/file.h create mode 100644 text_cmds/sort/mem.c create mode 100644 text_cmds/sort/mem.h create mode 100644 text_cmds/sort/nls/C.msg create mode 100644 text_cmds/sort/nls/hu_HU.ISO8859-2.msg create mode 100644 text_cmds/sort/radixsort.c create mode 100644 text_cmds/sort/radixsort.h create mode 100644 text_cmds/sort/sort.1.in create mode 100644 text_cmds/sort/sort.c create mode 100644 text_cmds/sort/sort.h create mode 100644 text_cmds/sort/testsuite/README.txt create mode 100644 text_cmds/sort/testsuite/bigsample.txt.xz create mode 100755 text_cmds/sort/testsuite/run.sh create mode 100644 text_cmds/sort/testsuite/sample.txt create mode 100644 text_cmds/sort/vsort.c create mode 100644 text_cmds/sort/vsort.h (limited to 'text_cmds/sort') diff --git a/text_cmds/sort/bwstring.c b/text_cmds/sort/bwstring.c new file mode 100644 index 0000000..3b7b233 --- /dev/null +++ b/text_cmds/sort/bwstring.c @@ -0,0 +1,1142 @@ +/*- + * Copyright (C) 2009 Gabor Kovesdan + * Copyright (C) 2012 Oleg Moskalenko + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bwstring.h" +#include "sort.h" + +bool byte_sort; + +static wchar_t **wmonths; +static unsigned char **cmonths; + +/* initialise months */ + +void +initialise_months(void) +{ + const nl_item item[12] = { ABMON_1, ABMON_2, ABMON_3, ABMON_4, + ABMON_5, ABMON_6, ABMON_7, ABMON_8, ABMON_9, ABMON_10, + ABMON_11, ABMON_12 }; + unsigned char *tmp; + size_t len; + + if (MB_CUR_MAX == 1) { + if (cmonths == NULL) { + unsigned char *m; + + cmonths = sort_malloc(sizeof(unsigned char*) * 12); + for (int i = 0; i < 12; i++) { + cmonths[i] = NULL; + tmp = (unsigned char *) nl_langinfo(item[i]); + if (debug_sort) + printf("month[%d]=%s\n", i, tmp); + if (*tmp == '\0') + continue; + m = sort_strdup(tmp); + len = strlen(tmp); + for (unsigned int j = 0; j < len; j++) + m[j] = toupper(m[j]); + cmonths[i] = m; + } + } + + } else { + if (wmonths == NULL) { + wchar_t *m; + + wmonths = sort_malloc(sizeof(wchar_t *) * 12); + for (int i = 0; i < 12; i++) { + wmonths[i] = NULL; + tmp = (unsigned char *) nl_langinfo(item[i]); + if (debug_sort) + printf("month[%d]=%s\n", i, tmp); + if (*tmp == '\0') + continue; + len = strlen(tmp); + m = sort_malloc(SIZEOF_WCHAR_STRING(len + 1)); + if (mbstowcs(m, (char*)tmp, len) == + ((size_t) - 1)) { + sort_free(m); + continue; + } + m[len] = L'\0'; + for (unsigned int j = 0; j < len; j++) + m[j] = towupper(m[j]); + wmonths[i] = m; + } + } + } +} + +/* + * Compare two wide-character strings + */ +static int +wide_str_coll(const wchar_t *s1, const wchar_t *s2) +{ + int ret = 0; + + errno = 0; + ret = wcscoll(s1, s2); + if (errno == EILSEQ) { + errno = 0; + ret = wcscmp(s1, s2); + if (errno != 0) { + for (size_t i = 0; ; ++i) { + wchar_t c1 = s1[i]; + wchar_t c2 = s2[i]; + if (c1 == L'\0') + return ((c2 == L'\0') ? 0 : -1); + if (c2 == L'\0') + return (+1); + if (c1 == c2) + continue; + return ((int)(c1 - c2)); + } + } + } + return (ret); +} + +/* counterparts of wcs functions */ + +void +bwsprintf(FILE *f, struct bwstring *bws, const char *prefix, const char *suffix) +{ + + if (MB_CUR_MAX == 1) + fprintf(f, "%s%s%s", prefix, bws->data.cstr, suffix); + else + fprintf(f, "%s%S%s", prefix, bws->data.wstr, suffix); +} + +const void* bwsrawdata(const struct bwstring *bws) +{ + + return (&(bws->data)); +} + +size_t bwsrawlen(const struct bwstring *bws) +{ + + return ((MB_CUR_MAX == 1) ? bws->len : SIZEOF_WCHAR_STRING(bws->len)); +} + +size_t +bws_memsize(const struct bwstring *bws) +{ + + return ((MB_CUR_MAX == 1) ? (bws->len + 2 + sizeof(struct bwstring)) : + (SIZEOF_WCHAR_STRING(bws->len + 1) + sizeof(struct bwstring))); +} + +void +bws_setlen(struct bwstring *bws, size_t newlen) +{ + + if (bws && newlen != bws->len && newlen <= bws->len) { + bws->len = newlen; + if (MB_CUR_MAX == 1) + bws->data.cstr[newlen] = '\0'; + else + bws->data.wstr[newlen] = L'\0'; + } +} + +/* + * Allocate a new binary string of specified size + */ +struct bwstring * +bwsalloc(size_t sz) +{ + struct bwstring *ret; + + if (MB_CUR_MAX == 1) + ret = sort_malloc(sizeof(struct bwstring) + 1 + sz); + else + ret = sort_malloc(sizeof(struct bwstring) + + SIZEOF_WCHAR_STRING(sz + 1)); + ret->len = sz; + + if (MB_CUR_MAX == 1) + ret->data.cstr[ret->len] = '\0'; + else + ret->data.wstr[ret->len] = L'\0'; + + return (ret); +} + +/* + * Create a copy of binary string. + * New string size equals the length of the old string. + */ +struct bwstring * +bwsdup(const struct bwstring *s) +{ + + if (s == NULL) + return (NULL); + else { + struct bwstring *ret = bwsalloc(s->len); + + if (MB_CUR_MAX == 1) + memcpy(ret->data.cstr, s->data.cstr, (s->len)); + else + memcpy(ret->data.wstr, s->data.wstr, + SIZEOF_WCHAR_STRING(s->len)); + + return (ret); + } +} + +/* + * Create a new binary string from a wide character buffer. + */ +struct bwstring * +bwssbdup(const wchar_t *str, size_t len) +{ + + if (str == NULL) + return ((len == 0) ? bwsalloc(0) : NULL); + else { + struct bwstring *ret; + + ret = bwsalloc(len); + + if (MB_CUR_MAX == 1) + for (size_t i = 0; i < len; ++i) + ret->data.cstr[i] = (unsigned char) str[i]; + else + memcpy(ret->data.wstr, str, SIZEOF_WCHAR_STRING(len)); + + return (ret); + } +} + +/* + * Create a new binary string from a raw binary buffer. + */ +struct bwstring * +bwscsbdup(const unsigned char *str, size_t len) +{ + struct bwstring *ret; + + ret = bwsalloc(len); + + if (str) { + if (MB_CUR_MAX == 1) + memcpy(ret->data.cstr, str, len); + else { + mbstate_t mbs; + const char *s; + size_t charlen, chars, cptr; + + chars = 0; + cptr = 0; + s = (const char *) str; + + memset(&mbs, 0, sizeof(mbs)); + + while (cptr < len) { + size_t n = MB_CUR_MAX; + + if (n > len - cptr) + n = len - cptr; + charlen = mbrlen(s + cptr, n, &mbs); + switch (charlen) { + case 0: + /* FALLTHROUGH */ + case (size_t) -1: + /* FALLTHROUGH */ + case (size_t) -2: + ret->data.wstr[chars++] = + (unsigned char) s[cptr]; + ++cptr; + break; + default: + n = mbrtowc(ret->data.wstr + (chars++), + s + cptr, charlen, &mbs); + if ((n == (size_t)-1) || (n == (size_t)-2)) + /* NOTREACHED */ + err(2, "mbrtowc error"); + cptr += charlen; + } + } + + ret->len = chars; + ret->data.wstr[ret->len] = L'\0'; + } + } + return (ret); +} + +/* + * De-allocate object memory + */ +void +bwsfree(const struct bwstring *s) +{ + + if (s) + sort_free(s); +} + +/* + * Copy content of src binary string to dst. + * If the capacity of the dst string is not sufficient, + * then the data is truncated. + */ +size_t +bwscpy(struct bwstring *dst, const struct bwstring *src) +{ + size_t nums = src->len; + + if (nums > dst->len) + nums = dst->len; + dst->len = nums; + + if (MB_CUR_MAX == 1) { + memcpy(dst->data.cstr, src->data.cstr, nums); + dst->data.cstr[dst->len] = '\0'; + } else { + memcpy(dst->data.wstr, src->data.wstr, + SIZEOF_WCHAR_STRING(nums + 1)); + dst->data.wstr[dst->len] = L'\0'; + } + + return (nums); +} + +/* + * Copy content of src binary string to dst, + * with specified number of symbols to be copied. + * If the capacity of the dst string is not sufficient, + * then the data is truncated. + */ +struct bwstring * +bwsncpy(struct bwstring *dst, const struct bwstring *src, size_t size) +{ + size_t nums = src->len; + + if (nums > dst->len) + nums = dst->len; + if (nums > size) + nums = size; + dst->len = nums; + + if (MB_CUR_MAX == 1) { + memcpy(dst->data.cstr, src->data.cstr, nums); + dst->data.cstr[dst->len] = '\0'; + } else { + memcpy(dst->data.wstr, src->data.wstr, + SIZEOF_WCHAR_STRING(nums + 1)); + dst->data.wstr[dst->len] = L'\0'; + } + + return (dst); +} + +/* + * Copy content of src binary string to dst, + * with specified number of symbols to be copied. + * An offset value can be specified, from the start of src string. + * If the capacity of the dst string is not sufficient, + * then the data is truncated. + */ +struct bwstring * +bwsnocpy(struct bwstring *dst, const struct bwstring *src, size_t offset, + size_t size) +{ + + if (offset >= src->len) { + dst->data.wstr[0] = 0; + dst->len = 0; + } else { + size_t nums = src->len - offset; + + if (nums > dst->len) + nums = dst->len; + if (nums > size) + nums = size; + dst->len = nums; + if (MB_CUR_MAX == 1) { + memcpy(dst->data.cstr, src->data.cstr + offset, + (nums)); + dst->data.cstr[dst->len] = '\0'; + } else { + memcpy(dst->data.wstr, src->data.wstr + offset, + SIZEOF_WCHAR_STRING(nums)); + dst->data.wstr[dst->len] = L'\0'; + } + } + return (dst); +} + +/* + * Write binary string to the file. + * The output is ended either with '\n' (nl == true) + * or '\0' (nl == false). + */ +size_t +bwsfwrite(struct bwstring *bws, FILE *f, bool zero_ended) +{ + + if (MB_CUR_MAX == 1) { + size_t len = bws->len; + + if (!zero_ended) { + bws->data.cstr[len] = '\n'; + + if (fwrite(bws->data.cstr, len + 1, 1, f) < 1) + err(2, NULL); + + bws->data.cstr[len] = '\0'; + } else if (fwrite(bws->data.cstr, len + 1, 1, f) < 1) + err(2, NULL); + + return (len + 1); + + } else { + wchar_t eols; + size_t printed = 0; + + eols = zero_ended ? btowc('\0') : btowc('\n'); + + while (printed < BWSLEN(bws)) { + const wchar_t *s = bws->data.wstr + printed; + + if (*s == L'\0') { + int nums; + + nums = fwprintf(f, L"%lc", *s); + + if (nums != 1) + err(2, NULL); + ++printed; + } else { + int nums; + + nums = fwprintf(f, L"%ls", s); + + if (nums < 1) + err(2, NULL); + printed += nums; + } + } + fwprintf(f, L"%lc", eols); + return (printed + 1); + } +} + +/* + * Allocate and read a binary string from file. + * The strings are nl-ended or zero-ended, depending on the sort setting. + */ +struct bwstring * +bwsfgetln(FILE *f, size_t *len, bool zero_ended, struct reader_buffer *rb) +{ + wint_t eols; + + eols = zero_ended ? btowc('\0') : btowc('\n'); + + if (!zero_ended && (MB_CUR_MAX > 1)) { + wchar_t *ret; + + ret = fgetwln(f, len); + + if (ret == NULL) { + if (!feof(f)) + err(2, NULL); + return (NULL); + } + if (*len > 0) { + if (ret[*len - 1] == (wchar_t)eols) + --(*len); + } + return (bwssbdup(ret, *len)); + + } else if (!zero_ended && (MB_CUR_MAX == 1)) { + char *ret; + + ret = fgetln(f, len); + + if (ret == NULL) { + if (!feof(f)) + err(2, NULL); + return (NULL); + } + if (*len > 0) { + if (ret[*len - 1] == '\n') + --(*len); + } + return (bwscsbdup((unsigned char*)ret, *len)); + + } else { + *len = 0; + + if (feof(f)) + return (NULL); + + if (2 >= rb->fgetwln_z_buffer_size) { + rb->fgetwln_z_buffer_size += 256; + rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer, + sizeof(wchar_t) * rb->fgetwln_z_buffer_size); + } + rb->fgetwln_z_buffer[*len] = 0; + + if (MB_CUR_MAX == 1) + while (!feof(f)) { + int c; + + c = fgetc(f); + + if (c == EOF) { + if (*len == 0) + return (NULL); + goto line_read_done; + } + if (c == eols) + goto line_read_done; + + if (*len + 1 >= rb->fgetwln_z_buffer_size) { + rb->fgetwln_z_buffer_size += 256; + rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer, + SIZEOF_WCHAR_STRING(rb->fgetwln_z_buffer_size)); + } + + rb->fgetwln_z_buffer[*len] = c; + rb->fgetwln_z_buffer[++(*len)] = 0; + } + else + while (!feof(f)) { + wint_t c = 0; + + c = fgetwc(f); + + if (c == WEOF) { + if (*len == 0) + return (NULL); + goto line_read_done; + } + if (c == eols) + goto line_read_done; + + if (*len + 1 >= rb->fgetwln_z_buffer_size) { + rb->fgetwln_z_buffer_size += 256; + rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer, + SIZEOF_WCHAR_STRING(rb->fgetwln_z_buffer_size)); + } + + rb->fgetwln_z_buffer[*len] = c; + rb->fgetwln_z_buffer[++(*len)] = 0; + } + +line_read_done: + /* we do not count the last 0 */ + return (bwssbdup(rb->fgetwln_z_buffer, *len)); + } +} + +int +bwsncmp(const struct bwstring *bws1, const struct bwstring *bws2, + size_t offset, size_t len) +{ + size_t cmp_len, len1, len2; + int res = 0; + + len1 = bws1->len; + len2 = bws2->len; + + if (len1 <= offset) { + return ((len2 <= offset) ? 0 : -1); + } else { + if (len2 <= offset) + return (+1); + else { + len1 -= offset; + len2 -= offset; + + cmp_len = len1; + + if (len2 < cmp_len) + cmp_len = len2; + + if (len < cmp_len) + cmp_len = len; + + if (MB_CUR_MAX == 1) { + const unsigned char *s1, *s2; + + s1 = bws1->data.cstr + offset; + s2 = bws2->data.cstr + offset; + + res = memcmp(s1, s2, cmp_len); + + } else { + const wchar_t *s1, *s2; + + s1 = bws1->data.wstr + offset; + s2 = bws2->data.wstr + offset; + + res = memcmp(s1, s2, SIZEOF_WCHAR_STRING(cmp_len)); + } + } + } + + if (res == 0) { + if (len1 < cmp_len && len1 < len2) + res = -1; + else if (len2 < cmp_len && len2 < len1) + res = +1; + } + + return (res); +} + +int +bwscmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset) +{ + size_t len1, len2, cmp_len; + int res; + + len1 = bws1->len; + len2 = bws2->len; + + len1 -= offset; + len2 -= offset; + + cmp_len = len1; + + if (len2 < cmp_len) + cmp_len = len2; + + res = bwsncmp(bws1, bws2, offset, cmp_len); + + if (res == 0) { + if( len1 < len2) + res = -1; + else if (len2 < len1) + res = +1; + } + + return (res); +} + +int +bws_iterator_cmp(bwstring_iterator iter1, bwstring_iterator iter2, size_t len) +{ + wchar_t c1, c2; + size_t i = 0; + + for (i = 0; i < len; ++i) { + c1 = bws_get_iter_value(iter1); + c2 = bws_get_iter_value(iter2); + if (c1 != c2) + return (c1 - c2); + iter1 = bws_iterator_inc(iter1, 1); + iter2 = bws_iterator_inc(iter2, 1); + } + + return (0); +} + +int +bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset) +{ + size_t len1, len2; + + len1 = bws1->len; + len2 = bws2->len; + + if (len1 <= offset) + return ((len2 <= offset) ? 0 : -1); + else { + if (len2 <= offset) + return (+1); + else { + len1 -= offset; + len2 -= offset; + + if (MB_CUR_MAX == 1) { + const unsigned char *s1, *s2; + + s1 = bws1->data.cstr + offset; + s2 = bws2->data.cstr + offset; + + if (byte_sort) { + int res = 0; + + if (len1 > len2) { + res = memcmp(s1, s2, len2); + if (!res) + res = +1; + } else if (len1 < len2) { + res = memcmp(s1, s2, len1); + if (!res) + res = -1; + } else + res = memcmp(s1, s2, len1); + + return (res); + + } else { + int res = 0; + size_t i, maxlen; + + i = 0; + maxlen = len1; + + if (maxlen > len2) + maxlen = len2; + + while (i < maxlen) { + /* goto next non-zero part: */ + while ((i < maxlen) && + !s1[i] && !s2[i]) + ++i; + + if (i >= maxlen) + break; + + if (s1[i] == 0) { + if (s2[i] == 0) + /* NOTREACHED */ + err(2, "bwscoll error 01"); + else + return (-1); + } else if (s2[i] == 0) + return (+1); + + res = strcoll((const char*)(s1 + i), (const char*)(s2 + i)); + if (res) + return (res); + + while ((i < maxlen) && + s1[i] && s2[i]) + ++i; + + if (i >= maxlen) + break; + + if (s1[i] == 0) { + if (s2[i] == 0) { + ++i; + continue; + } else + return (-1); + } else if (s2[i] == 0) + return (+1); + else + /* NOTREACHED */ + err(2, "bwscoll error 02"); + } + + if (len1 < len2) + return (-1); + else if (len1 > len2) + return (+1); + + return (0); + } + } else { + const wchar_t *s1, *s2; + size_t i, maxlen; + int res = 0; + + s1 = bws1->data.wstr + offset; + s2 = bws2->data.wstr + offset; + + i = 0; + maxlen = len1; + + if (maxlen > len2) + maxlen = len2; + + while (i < maxlen) { + + /* goto next non-zero part: */ + while ((i < maxlen) && + !s1[i] && !s2[i]) + ++i; + + if (i >= maxlen) + break; + + if (s1[i] == 0) { + if (s2[i] == 0) + /* NOTREACHED */ + err(2, "bwscoll error 1"); + else + return (-1); + } else if (s2[i] == 0) + return (+1); + + res = wide_str_coll(s1 + i, s2 + i); + if (res) + return (res); + + while ((i < maxlen) && s1[i] && s2[i]) + ++i; + + if (i >= maxlen) + break; + + if (s1[i] == 0) { + if (s2[i] == 0) { + ++i; + continue; + } else + return (-1); + } else if (s2[i] == 0) + return (+1); + else + /* NOTREACHED */ + err(2, "bwscoll error 2"); + } + + if (len1 < len2) + return (-1); + else if (len1 > len2) + return (+1); + + return (0); + } + } + } +} + +/* + * Correction of the system API + */ +double +bwstod(struct bwstring *s0, bool *empty) +{ + double ret = 0; + + if (MB_CUR_MAX == 1) { + unsigned char *end, *s; + char *ep; + + s = s0->data.cstr; + end = s + s0->len; + ep = NULL; + + while (isblank_f(*s) && s < end) + ++s; + + if (!isprint(*s)) { + *empty = true; + return (0); + } + + ret = strtod((char*)s, &ep); + if ((unsigned char*) ep == s) { + *empty = true; + return (0); + } + } else { + wchar_t *end, *ep, *s; + + s = s0->data.wstr; + end = s + s0->len; + ep = NULL; + + while (iswblank_f(*s) && s < end) + ++s; + + if (!iswprint(*s)) { + *empty = true; + return (0); + } + + ret = wcstod(s, &ep); + if (ep == s) { + *empty = true; + return (0); + } + } + + *empty = false; + return (ret); +} + +/* + * A helper function for monthcoll. If a line matches + * a month name, it returns (number of the month - 1), + * while if there is no match, it just return -1. + */ + +int +bws_month_score(const struct bwstring *s0) +{ + + if (MB_CUR_MAX == 1) { + const unsigned char *end, *s; + + s = s0->data.cstr; + end = s + s0->len; + + while (isblank_f(*s) && s < end) + ++s; + + for (int i = 11; i >= 0; --i) { + if (cmonths[i] && + (s == (unsigned char*)strstr((const char*)s, (char*)(cmonths[i])))) + return (i); + } + + } else { + const wchar_t *end, *s; + + s = s0->data.wstr; + end = s + s0->len; + + while (iswblank_f(*s) && s < end) + ++s; + + for (int i = 11; i >= 0; --i) { + if (wmonths[i] && (s == wcsstr(s, wmonths[i]))) + return (i); + } + } + + return (-1); +} + +/* + * Rips out leading blanks (-b). + */ +struct bwstring * +ignore_leading_blanks(struct bwstring *str) +{ + + if (MB_CUR_MAX == 1) { + unsigned char *dst, *end, *src; + + src = str->data.cstr; + dst = src; + end = src + str->len; + + while (src < end && isblank_f(*src)) + ++src; + + if (src != dst) { + size_t newlen; + + newlen = BWSLEN(str) - (src - dst); + + while (src < end) { + *dst = *src; + ++dst; + ++src; + } + bws_setlen(str, newlen); + } + } else { + wchar_t *dst, *end, *src; + + src = str->data.wstr; + dst = src; + end = src + str->len; + + while (src < end && iswblank_f(*src)) + ++src; + + if (src != dst) { + + size_t newlen = BWSLEN(str) - (src - dst); + + while (src < end) { + *dst = *src; + ++dst; + ++src; + } + bws_setlen(str, newlen); + + } + } + return (str); +} + +/* + * Rips out nonprinting characters (-i). + */ +struct bwstring * +ignore_nonprinting(struct bwstring *str) +{ + size_t newlen = str->len; + + if (MB_CUR_MAX == 1) { + unsigned char *dst, *end, *src; + unsigned char c; + + src = str->data.cstr; + dst = src; + end = src + str->len; + + while (src < end) { + c = *src; + if (isprint(c)) { + *dst = c; + ++dst; + ++src; + } else { + ++src; + --newlen; + } + } + } else { + wchar_t *dst, *end, *src; + wchar_t c; + + src = str->data.wstr; + dst = src; + end = src + str->len; + + while (src < end) { + c = *src; + if (iswprint(c)) { + *dst = c; + ++dst; + ++src; + } else { + ++src; + --newlen; + } + } + } + bws_setlen(str, newlen); + + return (str); +} + +/* + * Rips out any characters that are not alphanumeric characters + * nor blanks (-d). + */ +struct bwstring * +dictionary_order(struct bwstring *str) +{ + size_t newlen = str->len; + + if (MB_CUR_MAX == 1) { + unsigned char *dst, *end, *src; + unsigned char c; + + src = str->data.cstr; + dst = src; + end = src + str->len; + + while (src < end) { + c = *src; + if (isalnum(c) || isblank_f(c)) { + *dst = c; + ++dst; + ++src; + } else { + ++src; + --newlen; + } + } + } else { + wchar_t *dst, *end, *src; + wchar_t c; + + src = str->data.wstr; + dst = src; + end = src + str->len; + + while (src < end) { + c = *src; + if (iswalnum(c) || iswblank_f(c)) { + *dst = c; + ++dst; + ++src; + } else { + ++src; + --newlen; + } + } + } + bws_setlen(str, newlen); + + return (str); +} + +/* + * Converts string to lower case(-f). + */ +struct bwstring * +ignore_case(struct bwstring *str) +{ + + if (MB_CUR_MAX == 1) { + unsigned char *end, *s; + + s = str->data.cstr; + end = s + str->len; + + while (s < end) { + *s = toupper(*s); + ++s; + } + } else { + wchar_t *end, *s; + + s = str->data.wstr; + end = s + str->len; + + while (s < end) { + *s = towupper(*s); + ++s; + } + } + return (str); +} + +void +bws_disorder_warnx(struct bwstring *s, const char *fn, size_t pos) +{ + + if (MB_CUR_MAX == 1) + warnx("%s:%zu: disorder: %s", fn, pos + 1, s->data.cstr); + else + warnx("%s:%zu: disorder: %ls", fn, pos + 1, s->data.wstr); +} diff --git a/text_cmds/sort/bwstring.h b/text_cmds/sort/bwstring.h new file mode 100644 index 0000000..857de13 --- /dev/null +++ b/text_cmds/sort/bwstring.h @@ -0,0 +1,142 @@ +/* $FreeBSD: head/usr.bin/sort/bwstring.h 264744 2014-04-21 22:52:18Z pfg $ */ + +/*- + * Copyright (C) 2009 Gabor Kovesdan + * Copyright (C) 2012 Oleg Moskalenko + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if !defined(__BWSTRING_H__) +#define __BWSTRING_H__ + +#include +#include +#include +#include +#include + +#include "mem.h" + +extern bool byte_sort; + +/* wchar_t is of 4 bytes: */ +#define SIZEOF_WCHAR_STRING(LEN) ((LEN)*sizeof(wchar_t)) + +/* + * Binary "wide" string + */ +struct bwstring +{ + size_t len; + union + { + wchar_t wstr[0]; + unsigned char cstr[0]; + } data; +}; + +struct reader_buffer +{ + wchar_t *fgetwln_z_buffer; + size_t fgetwln_z_buffer_size; +}; + +typedef void *bwstring_iterator; + +#define BWSLEN(s) ((s)->len) + +struct bwstring *bwsalloc(size_t sz); + +size_t bwsrawlen(const struct bwstring *bws); +const void* bwsrawdata(const struct bwstring *bws); +void bws_setlen(struct bwstring *bws, size_t newlen); +size_t bws_memsize(const struct bwstring *bws); +double bwstod(struct bwstring *s0, bool *empty); +int bws_month_score(const struct bwstring *s0); + +struct bwstring *ignore_leading_blanks(struct bwstring *str); +struct bwstring *ignore_nonprinting(struct bwstring *str); +struct bwstring *dictionary_order(struct bwstring *str); +struct bwstring *ignore_case(struct bwstring *str); + +void bwsprintf(FILE*, struct bwstring*, const char *prefix, const char *suffix); +void bws_disorder_warnx(struct bwstring *s, const char *fn, size_t pos); + +struct bwstring *bwsdup(const struct bwstring *s); +struct bwstring *bwssbdup(const wchar_t *str, size_t size); +struct bwstring *bwscsbdup(const unsigned char *str, size_t size); +void bwsfree(const struct bwstring *s); +size_t bwscpy(struct bwstring *dst, const struct bwstring *src); +struct bwstring *bwsncpy(struct bwstring *dst, const struct bwstring *src, size_t size); +struct bwstring *bwsnocpy(struct bwstring *dst, const struct bwstring *src, size_t offset, size_t size); +int bwscmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset); +int bwsncmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset, size_t len); +int bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset); +size_t bwsfwrite(struct bwstring *bws, FILE *f, bool zero_ended); +struct bwstring *bwsfgetln(FILE *file, size_t *len, bool zero_ended, struct reader_buffer *rb); + +static inline bwstring_iterator +bws_begin(struct bwstring *bws) +{ + + return (bwstring_iterator) (&(bws->data)); +} + +static inline bwstring_iterator +bws_end(struct bwstring *bws) +{ + + return ((MB_CUR_MAX == 1) ? + (bwstring_iterator) (bws->data.cstr + bws->len) : + (bwstring_iterator) (bws->data.wstr + bws->len)); +} + +static inline bwstring_iterator +bws_iterator_inc(bwstring_iterator iter, size_t pos) +{ + + if (MB_CUR_MAX == 1) + return ((unsigned char *) iter) + pos; + else + return ((wchar_t*) iter) + pos; +} + +static inline wchar_t +bws_get_iter_value(bwstring_iterator iter) +{ + + if (MB_CUR_MAX == 1) + return *((unsigned char *) iter); + else + return *((wchar_t*) iter); +} + +int +bws_iterator_cmp(bwstring_iterator iter1, bwstring_iterator iter2, size_t len); + +#define BWS_GET(bws, pos) ((MB_CUR_MAX == 1) ? ((bws)->data.cstr[(pos)]) : (bws)->data.wstr[(pos)]) + +void initialise_months(void); + +#endif /* __BWSTRING_H__ */ diff --git a/text_cmds/sort/coll.c b/text_cmds/sort/coll.c new file mode 100644 index 0000000..f87ae0c --- /dev/null +++ b/text_cmds/sort/coll.c @@ -0,0 +1,1324 @@ +/*- + * Copyright (C) 2009 Gabor Kovesdan + * Copyright (C) 2012 Oleg Moskalenko + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include + +#include +#include +#include +#include +#include +#ifndef __APPLE__ +#include +#endif +#include +#include +#include +#include + +#include "coll.h" +#include "vsort.h" + +struct key_specs *keys; +size_t keys_num = 0; + +wint_t symbol_decimal_point = L'.'; +/* there is no default thousands separator in collate rules: */ +wint_t symbol_thousands_sep = 0; +wint_t symbol_negative_sign = L'-'; +wint_t symbol_positive_sign = L'+'; + +static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset); +static int gnumcoll(struct key_value*, struct key_value *, size_t offset); +static int monthcoll(struct key_value*, struct key_value *, size_t offset); +static int numcoll(struct key_value*, struct key_value *, size_t offset); +static int hnumcoll(struct key_value*, struct key_value *, size_t offset); +static int randomcoll(struct key_value*, struct key_value *, size_t offset); +static int versioncoll(struct key_value*, struct key_value *, size_t offset); + +/* + * Allocate keys array + */ +struct keys_array * +keys_array_alloc(void) +{ + struct keys_array *ka; + size_t sz; + + sz = keys_array_size(); + ka = sort_malloc(sz); + memset(ka, 0, sz); + + return (ka); +} + +/* + * Calculate whether we need key hint space + */ +static size_t +key_hint_size(void) +{ + + return (need_hint ? sizeof(struct key_hint) : 0); +} + +/* + * Calculate keys array size + */ +size_t +keys_array_size(void) +{ + + return (keys_num * (sizeof(struct key_value) + key_hint_size())); +} + +/* + * Clean data of keys array + */ +void +clean_keys_array(const struct bwstring *s, struct keys_array *ka) +{ + + if (ka) { + for (size_t i = 0; i < keys_num; ++i) { + const struct key_value *kv; + + kv = get_key_from_keys_array(ka, i); + if (kv->k && kv->k != s) + bwsfree(kv->k); + } + memset(ka, 0, keys_array_size()); + } +} + +/* + * Get pointer to a key value in the keys set + */ +struct key_value * +get_key_from_keys_array(struct keys_array *ka, size_t ind) +{ + + return ((struct key_value *)((caddr_t)ka->key + + ind * (sizeof(struct key_value) + key_hint_size()))); +} + +/* + * Set value of a key in the keys set + */ +void +set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind) +{ + + if (ka && keys_num > ind) { + struct key_value *kv; + + kv = get_key_from_keys_array(ka, ind); + + if (kv->k && kv->k != s) + bwsfree(kv->k); + kv->k = s; + } +} + +/* + * Initialize a sort list item + */ +struct sort_list_item * +sort_list_item_alloc(void) +{ + struct sort_list_item *si; + size_t sz; + + sz = sizeof(struct sort_list_item) + keys_array_size(); + si = sort_malloc(sz); + memset(si, 0, sz); + + return (si); +} + +size_t +sort_list_item_size(struct sort_list_item *si) +{ + size_t ret = 0; + + if (si) { + ret = sizeof(struct sort_list_item) + keys_array_size(); + if (si->str) + ret += bws_memsize(si->str); + for (size_t i = 0; i < keys_num; ++i) { + const struct key_value *kv; + + kv = get_key_from_keys_array(&si->ka, i); + + if (kv->k != si->str) + ret += bws_memsize(kv->k); + } + } + return (ret); +} + +/* + * Calculate key for a sort list item + */ +static void +sort_list_item_make_key(struct sort_list_item *si) +{ + + preproc(si->str, &(si->ka)); +} + +/* + * Set value of a sort list item. + * Return combined string and keys memory size. + */ +void +sort_list_item_set(struct sort_list_item *si, struct bwstring *str) +{ + + if (si) { + clean_keys_array(si->str, &(si->ka)); + if (si->str) { + if (si->str == str) { + /* we are trying to reset the same string */ + return; + } else { + bwsfree(si->str); + si->str = NULL; + } + } + si->str = str; + sort_list_item_make_key(si); + } +} + +/* + * De-allocate a sort list item object memory + */ +void +sort_list_item_clean(struct sort_list_item *si) +{ + + if (si) { + clean_keys_array(si->str, &(si->ka)); + if (si->str) { + bwsfree(si->str); + si->str = NULL; + } + } +} + +/* + * Skip columns according to specs + */ +static size_t +skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start, + bool skip_blanks, bool *empty_key) +{ + if (cols < 1) + return (BWSLEN(s) + 1); + + if (skip_blanks) + while (start < BWSLEN(s) && iswblank_f(BWS_GET(s,start))) + ++start; + + while (start < BWSLEN(s) && cols > 1) { + --cols; + ++start; + } + + if (start >= BWSLEN(s)) + *empty_key = true; + + return (start); +} + +/* + * Skip fields according to specs + */ +static size_t +skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field) +{ + + if (fields < 2) { + if (BWSLEN(s) == 0) + *empty_field = true; + return (0); + } else if (!(sort_opts_vals.tflag)) { + size_t cpos = 0; + bool pb = true; + + while (cpos < BWSLEN(s)) { + bool isblank; + + isblank = iswblank_f(BWS_GET(s, cpos)); + + if (isblank && !pb) { + --fields; + if (fields <= 1) + return (cpos); + } + pb = isblank; + ++cpos; + } + if (fields > 1) + *empty_field = true; + return (cpos); + } else { + size_t cpos = 0; + + while (cpos < BWSLEN(s)) { + if (BWS_GET(s,cpos) == (wchar_t)sort_opts_vals.field_sep) { + --fields; + if (fields <= 1) + return (cpos + 1); + } + ++cpos; + } + if (fields > 1) + *empty_field = true; + return (cpos); + } +} + +/* + * Find fields start + */ +static void +find_field_start(const struct bwstring *s, struct key_specs *ks, + size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key) +{ + + *field_start = skip_fields_to_start(s, ks->f1, empty_field); + if (!*empty_field) + *key_start = skip_cols_to_start(s, ks->c1, *field_start, + ks->pos1b, empty_key); + else + *empty_key = true; +} + +/* + * Find end key position + */ +static size_t +find_field_end(const struct bwstring *s, struct key_specs *ks) +{ + size_t f2, next_field_start, pos_end; + bool empty_field, empty_key; + + empty_field = false; + empty_key = false; + f2 = ks->f2; + + if (f2 == 0) + return (BWSLEN(s) + 1); + else { + if (ks->c2 == 0) { + next_field_start = skip_fields_to_start(s, f2 + 1, + &empty_field); + if ((next_field_start > 0) && sort_opts_vals.tflag && + ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s, + next_field_start - 1))) + --next_field_start; + } else + next_field_start = skip_fields_to_start(s, f2, + &empty_field); + } + + if (empty_field || (next_field_start >= BWSLEN(s))) + return (BWSLEN(s) + 1); + + if (ks->c2) { + pos_end = skip_cols_to_start(s, ks->c2, next_field_start, + ks->pos2b, &empty_key); + if (pos_end < BWSLEN(s)) + ++pos_end; + } else + pos_end = next_field_start; + + return (pos_end); +} + +/* + * Cut a field according to the key specs + */ +static struct bwstring * +cut_field(const struct bwstring *s, struct key_specs *ks) +{ + struct bwstring *ret = NULL; + + if (s && ks) { + size_t field_start, key_end, key_start, sz; + bool empty_field, empty_key; + + field_start = 0; + key_start = 0; + empty_field = false; + empty_key = false; + + find_field_start(s, ks, &field_start, &key_start, + &empty_field, &empty_key); + + if (empty_key) + sz = 0; + else { + key_end = find_field_end(s, ks); + sz = (key_end < key_start) ? 0 : (key_end - key_start); + } + + ret = bwsalloc(sz); + if (sz) + bwsnocpy(ret, s, key_start, sz); + } else + ret = bwsalloc(0); + + return (ret); +} + +/* + * Preprocesses a line applying the necessary transformations + * specified by command line options and returns the preprocessed + * string, which can be used to compare. + */ +int +preproc(struct bwstring *s, struct keys_array *ka) +{ + + if (sort_opts_vals.kflag) + for (size_t i = 0; i < keys_num; i++) { + struct bwstring *key; + struct key_specs *kspecs; + struct sort_mods *sm; + + kspecs = &(keys[i]); + key = cut_field(s, kspecs); + + sm = &(kspecs->sm); + if (sm->dflag) + key = dictionary_order(key); + else if (sm->iflag) + key = ignore_nonprinting(key); + if (sm->fflag || sm->Mflag) + key = ignore_case(key); + + set_key_on_keys_array(ka, key, i); + } + else { + struct bwstring *ret = NULL; + struct sort_mods *sm = default_sort_mods; + + if (sm->bflag) { + if (ret == NULL) + ret = bwsdup(s); + ret = ignore_leading_blanks(ret); + } + if (sm->dflag) { + if (ret == NULL) + ret = bwsdup(s); + ret = dictionary_order(ret); + } else if (sm->iflag) { + if (ret == NULL) + ret = bwsdup(s); + ret = ignore_nonprinting(ret); + } + if (sm->fflag || sm->Mflag) { + if (ret == NULL) + ret = bwsdup(s); + ret = ignore_case(ret); + } + if (ret == NULL) + set_key_on_keys_array(ka, s, 0); + else + set_key_on_keys_array(ka, ret, 0); + } + + return 0; +} + +cmpcoll_t +get_sort_func(struct sort_mods *sm) +{ + + if (sm->nflag) + return (numcoll); + else if (sm->hflag) + return (hnumcoll); + else if (sm->gflag) + return (gnumcoll); + else if (sm->Mflag) + return (monthcoll); + else if (sm->Rflag) + return (randomcoll); + else if (sm->Vflag) + return (versioncoll); + else + return (wstrcoll); +} + +/* + * Compares the given strings. Returns a positive number if + * the first precedes the second, a negative number if the second is + * the preceding one, and zero if they are equal. This function calls + * the underlying collate functions, which done the actual comparison. + */ +int +key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset) +{ + struct key_value *kv1, *kv2; + struct sort_mods *sm; + int res = 0; + + for (size_t i = 0; i < keys_num; ++i) { + kv1 = get_key_from_keys_array(ps1, i); + kv2 = get_key_from_keys_array(ps2, i); + sm = &(keys[i].sm); + + if (sm->rflag) + res = sm->func(kv2, kv1, offset); + else + res = sm->func(kv1, kv2, offset); + + if (res) + break; + + /* offset applies to only the first key */ + offset = 0; + } + return (res); +} + +/* + * Compare two strings. + * Plain symbol-by-symbol comparison. + */ +int +top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2) +{ + + if (default_sort_mods->rflag) { + const struct bwstring *tmp; + + tmp = s1; + s1 = s2; + s2 = tmp; + } + + return (bwscoll(s1, s2, 0)); +} + +/* + * Compare a string and a sort list item, according to the sort specs. + */ +int +str_list_coll(struct bwstring *str1, struct sort_list_item **ss2) +{ + struct keys_array *ka1; + int ret = 0; + + ka1 = keys_array_alloc(); + + preproc(str1, ka1); + + sort_list_item_make_key(*ss2); + + if (debug_sort) { + bwsprintf(stdout, str1, "; s1=<", ">"); + bwsprintf(stdout, (*ss2)->str, ", s2=<", ">"); + } + + ret = key_coll(ka1, &((*ss2)->ka), 0); + + if (debug_sort) + printf("; cmp1=%d", ret); + + clean_keys_array(str1, ka1); + sort_free(ka1); + + if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { + ret = top_level_str_coll(str1, ((*ss2)->str)); + if (debug_sort) + printf("; cmp2=%d", ret); + } + + if (debug_sort) + printf("\n"); + + return (ret); +} + +/* + * Compare two sort list items, according to the sort specs. + */ +int +list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2, + size_t offset) +{ + int ret; + + ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset); + + if (debug_sort) { + if (offset) + printf("; offset=%d", (int) offset); + bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">"); + bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">"); + printf("; cmp1=%d\n", ret); + } + + if (ret) + return (ret); + + if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { + ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str)); + if (debug_sort) + printf("; cmp2=%d\n", ret); + } + + return (ret); +} + +/* + * Compare two sort list items, according to the sort specs. + */ +int +list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2) +{ + + return (list_coll_offset(ss1, ss2, 0)); +} + +#define LSCDEF(N) \ +static int \ +list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \ +{ \ + \ + return (list_coll_offset(ss1, ss2, N)); \ +} + +LSCDEF(1) +LSCDEF(2) +LSCDEF(3) +LSCDEF(4) +LSCDEF(5) +LSCDEF(6) +LSCDEF(7) +LSCDEF(8) +LSCDEF(9) +LSCDEF(10) +LSCDEF(11) +LSCDEF(12) +LSCDEF(13) +LSCDEF(14) +LSCDEF(15) +LSCDEF(16) +LSCDEF(17) +LSCDEF(18) +LSCDEF(19) +LSCDEF(20) + +listcoll_t +get_list_call_func(size_t offset) +{ + static const listcoll_t lsarray[] = { list_coll, list_coll_1, + list_coll_2, list_coll_3, list_coll_4, list_coll_5, + list_coll_6, list_coll_7, list_coll_8, list_coll_9, + list_coll_10, list_coll_11, list_coll_12, list_coll_13, + list_coll_14, list_coll_15, list_coll_16, list_coll_17, + list_coll_18, list_coll_19, list_coll_20 }; + + if (offset <= 20) + return (lsarray[offset]); + + return (list_coll); +} + +/* + * Compare two sort list items, only by their original string. + */ +int +list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2) +{ + + return (top_level_str_coll(((*ss1)->str), ((*ss2)->str))); +} + +/* + * Maximum size of a number in the string (before or after decimal point) + */ +#define MAX_NUM_SIZE (128) + +/* + * Set suffix value + */ +static void setsuffix(wchar_t c, unsigned char *si) +{ + switch (c){ + case L'k': + case L'K': + *si = 1; + break; + case L'M': + *si = 2; + break; + case L'G': + *si = 3; + break; + case L'T': + *si = 4; + break; + case L'P': + *si = 5; + break; + case L'E': + *si = 6; + break; + case L'Z': + *si = 7; + break; + case L'Y': + *si = 8; + break; + default: + *si = 0; + } +} + +/* + * Read string s and parse the string into a fixed-decimal-point number. + * sign equals -1 if the number is negative (explicit plus is not allowed, + * according to GNU sort's "info sort". + * The number part before decimal point is in the smain, after the decimal + * point is in sfrac, tail is the pointer to the remainder of the string. + */ +static int +read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si) +{ + bwstring_iterator s; + bool prev_thousand_sep = false; + + s = bws_begin(s0); + + /* always end the fraction with zero, even if we have no fraction */ + sfrac[0] = 0; + + while (iswblank_f(bws_get_iter_value(s))) + s = bws_iterator_inc(s, 1); + + if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) { + *sign = -1; + s = bws_iterator_inc(s, 1); + } + + // This is '0', not '\0', do not change this + while (iswdigit(bws_get_iter_value(s)) && + (bws_get_iter_value(s) == L'0')) + s = bws_iterator_inc(s, 1); + + while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) { + if (iswdigit(bws_get_iter_value(s))) { + smain[*main_len] = bws_get_iter_value(s); + s = bws_iterator_inc(s, 1); + *main_len += 1; + prev_thousand_sep = false; + } else if (symbol_thousands_sep + && (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep) + && (!prev_thousand_sep) + ) { + s = bws_iterator_inc(s, 1); + prev_thousand_sep = true; + } else + break; + } + + smain[*main_len] = 0; + + if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) { + s = bws_iterator_inc(s, 1); + while (iswdigit(bws_get_iter_value(s)) && + *frac_len < MAX_NUM_SIZE) { + sfrac[*frac_len] = bws_get_iter_value(s); + s = bws_iterator_inc(s, 1); + *frac_len += 1; + } + sfrac[*frac_len] = 0; + + while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') { + --(*frac_len); + sfrac[*frac_len] = L'\0'; + } + } + + setsuffix(bws_get_iter_value(s),si); + + if ((*main_len + *frac_len) == 0) + *sign = 0; + + return (0); +} + +/* + * Implements string sort. + */ +static int +wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) +{ + + if (debug_sort) { + if (offset) + printf("; offset=%d\n", (int) offset); + bwsprintf(stdout, kv1->k, "; k1=<", ">"); + printf("(%zu)", BWSLEN(kv1->k)); + bwsprintf(stdout, kv2->k, ", k2=<", ">"); + printf("(%zu)", BWSLEN(kv2->k)); + } + + return (bwscoll(kv1->k, kv2->k, offset)); +} + +/* + * Compare two suffixes + */ +static inline int +cmpsuffix(unsigned char si1, unsigned char si2) +{ + + return ((char)si1 - (char)si2); +} + +/* + * Implements numeric sort for -n and -h. + */ +static int +numcoll_impl(struct key_value *kv1, struct key_value *kv2, + size_t offset __unused, bool use_suffix) +{ + struct bwstring *s1, *s2; + wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1]; + wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1]; + int cmp_res, sign1, sign2; + size_t frac1, frac2, main1, main2; + unsigned char SI1, SI2; + bool e1, e2, key1_read, key2_read; + + s1 = kv1->k; + s2 = kv2->k; + sign1 = sign2 = 0; + main1 = main2 = 0; + frac1 = frac2 = 0; + + key1_read = key2_read = false; + + if (debug_sort) { + bwsprintf(stdout, s1, "; k1=<", ">"); + bwsprintf(stdout, s2, ", k2=<", ">"); + } + + if (s1 == s2) + return (0); + + if (kv1->hint->status == HS_UNINITIALIZED) { + /* read the number from the string */ + read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); + key1_read = true; + kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10); + if(main1 < 1 && frac1 < 1) + kv1->hint->v.nh.empty=true; + kv1->hint->v.nh.si = SI1; + kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ? + HS_INITIALIZED : HS_ERROR; + kv1->hint->v.nh.neg = (sign1 < 0) ? true : false; + } + + if (kv2->hint->status == HS_UNINITIALIZED) { + /* read the number from the string */ + read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2); + key2_read = true; + kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10); + if(main2 < 1 && frac2 < 1) + kv2->hint->v.nh.empty=true; + kv2->hint->v.nh.si = SI2; + kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ? + HS_INITIALIZED : HS_ERROR; + kv2->hint->v.nh.neg = (sign2 < 0) ? true : false; + } + + if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status == + HS_INITIALIZED) { + unsigned long long n1, n2; + bool neg1, neg2; + + e1 = kv1->hint->v.nh.empty; + e2 = kv2->hint->v.nh.empty; + + if (e1 && e2) + return (0); + + neg1 = kv1->hint->v.nh.neg; + neg2 = kv2->hint->v.nh.neg; + + if (neg1 && !neg2) + return (-1); + if (neg2 && !neg1) + return (+1); + + if (e1) + return (neg2 ? +1 : -1); + else if (e2) + return (neg1 ? -1 : +1); + + + if (use_suffix) { + cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si); + if (cmp_res) + return (neg1 ? -cmp_res : cmp_res); + } + + n1 = kv1->hint->v.nh.n1; + n2 = kv2->hint->v.nh.n1; + if (n1 < n2) + return (neg1 ? +1 : -1); + else if (n1 > n2) + return (neg1 ? -1 : +1); + } + + /* read the numbers from the strings */ + if (!key1_read) + read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); + if (!key2_read) + read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2); + + e1 = ((main1 + frac1) == 0); + e2 = ((main2 + frac2) == 0); + + if (e1 && e2) + return (0); + + /* we know the result if the signs are different */ + if (sign1 < 0 && sign2 >= 0) + return (-1); + if (sign1 >= 0 && sign2 < 0) + return (+1); + + if (e1) + return ((sign2 < 0) ? +1 : -1); + else if (e2) + return ((sign1 < 0) ? -1 : +1); + + if (use_suffix) { + cmp_res = cmpsuffix(SI1, SI2); + if (cmp_res) + return ((sign1 < 0) ? -cmp_res : cmp_res); + } + + /* if both numbers are empty assume that the strings are equal */ + if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1) + return (0); + + /* + * if the main part is of different size, we know the result + * (because the leading zeros are removed) + */ + if (main1 < main2) + cmp_res = -1; + else if (main1 > main2) + cmp_res = +1; + /* if the sizes are equal then simple non-collate string compare gives the correct result */ + else + cmp_res = wcscmp(smain1, smain2); + + /* check fraction */ + if (!cmp_res) + cmp_res = wcscmp(sfrac1, sfrac2); + + if (!cmp_res) + return (0); + + /* reverse result if the signs are negative */ + if (sign1 < 0 && sign2 < 0) + cmp_res = -cmp_res; + + return (cmp_res); +} + +/* + * Implements numeric sort (-n). + */ +static int +numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) +{ + + return (numcoll_impl(kv1, kv2, offset, false)); +} + +/* + * Implements 'human' numeric sort (-h). + */ +static int +hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) +{ + + return (numcoll_impl(kv1, kv2, offset, true)); +} + +/* + * Implements random sort (-R). + */ +static int +randomcoll(struct key_value *kv1, struct key_value *kv2, + size_t offset __unused) +{ + struct bwstring *s1, *s2; + MD5_CTX ctx1, ctx2; + char *b1, *b2; + + s1 = kv1->k; + s2 = kv2->k; + + if (debug_sort) { + bwsprintf(stdout, s1, "; k1=<", ">"); + bwsprintf(stdout, s2, ", k2=<", ">"); + } + + if (s1 == s2) + return (0); + + memcpy(&ctx1,&md5_ctx,sizeof(MD5_CTX)); + memcpy(&ctx2,&md5_ctx,sizeof(MD5_CTX)); + + MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1)); + MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2)); + b1 = MD5End(&ctx1, NULL); + b2 = MD5End(&ctx2, NULL); + if (b1 == NULL) { + if (b2 == NULL) + return (0); + else { + sort_free(b2); + return (-1); + } + } else if (b2 == NULL) { + sort_free(b1); + return (+1); + } else { + int cmp_res; + + cmp_res = strcmp(b1,b2); + sort_free(b1); + sort_free(b2); + + if (!cmp_res) + cmp_res = bwscoll(s1, s2, 0); + + return (cmp_res); + } +} + +/* + * Implements version sort (-V). + */ +static int +versioncoll(struct key_value *kv1, struct key_value *kv2, + size_t offset __unused) +{ + struct bwstring *s1, *s2; + + s1 = kv1->k; + s2 = kv2->k; + + if (debug_sort) { + bwsprintf(stdout, s1, "; k1=<", ">"); + bwsprintf(stdout, s2, ", k2=<", ">"); + } + + if (s1 == s2) + return (0); + + return (vcmp(s1, s2)); +} + +/* + * Check for minus infinity + */ +static inline bool +huge_minus(double d, int err1) +{ + + if (err1 == ERANGE) + if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL) + return (+1); + + return (0); +} + +/* + * Check for plus infinity + */ +static inline bool +huge_plus(double d, int err1) +{ + + if (err1 == ERANGE) + if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL) + return (+1); + + return (0); +} + +/* + * Check whether a function is a NAN + */ +static bool +is_nan(double d) +{ + + return ((d == NAN) || (isnan(d))); +} + +/* + * Compare two NANs + */ +static int +cmp_nans(double d1, double d2) +{ + + if (d1 < d2) + return (-1); + if (d1 > d2) + return (+1); + return (0); +} + +/* + * Implements general numeric sort (-g). + */ +static int +gnumcoll(struct key_value *kv1, struct key_value *kv2, + size_t offset __unused) +{ + double d1, d2; + int err1, err2; + bool empty1, empty2, key1_read, key2_read; + + d1 = d2 = 0; + err1 = err2 = 0; + key1_read = key2_read = false; + + if (debug_sort) { + bwsprintf(stdout, kv1->k, "; k1=<", ">"); + bwsprintf(stdout, kv2->k, "; k2=<", ">"); + } + + if (kv1->hint->status == HS_UNINITIALIZED) { + errno = 0; + d1 = bwstod(kv1->k, &empty1); + err1 = errno; + + if (empty1) + kv1->hint->v.gh.notnum = true; + else if (err1 == 0) { + kv1->hint->v.gh.d = d1; + kv1->hint->v.gh.nan = is_nan(d1); + kv1->hint->status = HS_INITIALIZED; + } else + kv1->hint->status = HS_ERROR; + + key1_read = true; + } + + if (kv2->hint->status == HS_UNINITIALIZED) { + errno = 0; + d2 = bwstod(kv2->k, &empty2); + err2 = errno; + + if (empty2) + kv2->hint->v.gh.notnum = true; + else if (err2 == 0) { + kv2->hint->v.gh.d = d2; + kv2->hint->v.gh.nan = is_nan(d2); + kv2->hint->status = HS_INITIALIZED; + } else + kv2->hint->status = HS_ERROR; + + key2_read = true; + } + + if (kv1->hint->status == HS_INITIALIZED && + kv2->hint->status == HS_INITIALIZED) { + if (kv1->hint->v.gh.notnum) + return ((kv2->hint->v.gh.notnum) ? 0 : -1); + else if (kv2->hint->v.gh.notnum) + return (+1); + + if (kv1->hint->v.gh.nan) + return ((kv2->hint->v.gh.nan) ? + cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) : + -1); + else if (kv2->hint->v.gh.nan) + return (+1); + + d1 = kv1->hint->v.gh.d; + d2 = kv2->hint->v.gh.d; + + if (d1 < d2) + return (-1); + else if (d1 > d2) + return (+1); + else + return (0); + } + + if (!key1_read) { + errno = 0; + d1 = bwstod(kv1->k, &empty1); + err1 = errno; + } + + if (!key2_read) { + errno = 0; + d2 = bwstod(kv2->k, &empty2); + err2 = errno; + } + + /* Non-value case: */ + if (empty1) + return (empty2 ? 0 : -1); + else if (empty2) + return (+1); + + /* NAN case */ + if (is_nan(d1)) + return (is_nan(d2) ? cmp_nans(d1, d2) : -1); + else if (is_nan(d2)) + return (+1); + + /* Infinities */ + if (err1 == ERANGE || err2 == ERANGE) { + /* Minus infinity case */ + if (huge_minus(d1, err1)) { + if (huge_minus(d2, err2)) { + if (d1 < d2) + return (-1); + if (d1 > d2) + return (+1); + return (0); + } else + return (-1); + + } else if (huge_minus(d2, err2)) { + if (huge_minus(d1, err1)) { + if (d1 < d2) + return (-1); + if (d1 > d2) + return (+1); + return (0); + } else + return (+1); + } + + /* Plus infinity case */ + if (huge_plus(d1, err1)) { + if (huge_plus(d2, err2)) { + if (d1 < d2) + return (-1); + if (d1 > d2) + return (+1); + return (0); + } else + return (+1); + } else if (huge_plus(d2, err2)) { + if (huge_plus(d1, err1)) { + if (d1 < d2) + return (-1); + if (d1 > d2) + return (+1); + return (0); + } else + return (-1); + } + } + + if (d1 < d2) + return (-1); + if (d1 > d2) + return (+1); + + return (0); +} + +/* + * Implements month sort (-M). + */ +static int +monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused) +{ + int val1, val2; + bool key1_read, key2_read; + + val1 = val2 = 0; + key1_read = key2_read = false; + + if (debug_sort) { + bwsprintf(stdout, kv1->k, "; k1=<", ">"); + bwsprintf(stdout, kv2->k, "; k2=<", ">"); + } + + if (kv1->hint->status == HS_UNINITIALIZED) { + kv1->hint->v.Mh.m = bws_month_score(kv1->k); + key1_read = true; + kv1->hint->status = HS_INITIALIZED; + } + + if (kv2->hint->status == HS_UNINITIALIZED) { + kv2->hint->v.Mh.m = bws_month_score(kv2->k); + key2_read = true; + kv2->hint->status = HS_INITIALIZED; + } + + if (kv1->hint->status == HS_INITIALIZED) { + val1 = kv1->hint->v.Mh.m; + key1_read = true; + } + + if (kv2->hint->status == HS_INITIALIZED) { + val2 = kv2->hint->v.Mh.m; + key2_read = true; + } + + if (!key1_read) + val1 = bws_month_score(kv1->k); + if (!key2_read) + val2 = bws_month_score(kv2->k); + + if (val1 == val2) { + return (0); + } + if (val1 < val2) + return (-1); + return (+1); +} diff --git a/text_cmds/sort/coll.h b/text_cmds/sort/coll.h new file mode 100644 index 0000000..6e3f9b4 --- /dev/null +++ b/text_cmds/sort/coll.h @@ -0,0 +1,168 @@ +/* $FreeBSD$ */ + +/*- + * Copyright (C) 2009 Gabor Kovesdan + * Copyright (C) 2012 Oleg Moskalenko + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if !defined(__COLL_H__) +#define __COLL_H__ + +#include "bwstring.h" +#include "sort.h" + +/* + * Sort hint data for -n + */ +struct n_hint +{ + unsigned long long n1; + unsigned char si; + bool empty; + bool neg; +}; + +/* + * Sort hint data for -g + */ +struct g_hint +{ + double d; + bool nan; + bool notnum; +}; + +/* + * Sort hint data for -M + */ +struct M_hint +{ + int m; +}; + +/* + * Status of a sort hint object + */ +typedef enum +{ + HS_ERROR = -1, HS_UNINITIALIZED = 0, HS_INITIALIZED = 1 +} hint_status; + +/* + * Sort hint object + */ +struct key_hint +{ + hint_status status; + union + { + struct n_hint nh; + struct g_hint gh; + struct M_hint Mh; + } v; +}; + +/* + * Key value + */ +struct key_value +{ + struct bwstring *k; /* key string */ + struct key_hint hint[0]; /* key sort hint */ +} __packed; + +/* + * Set of keys container object. + */ +struct keys_array +{ + struct key_value key[0]; +}; + +/* + * Parsed -k option data + */ +struct key_specs +{ + struct sort_mods sm; + size_t c1; + size_t c2; + size_t f1; + size_t f2; + bool pos1b; + bool pos2b; +}; + +/* + * Single entry in sort list. + */ +struct sort_list_item +{ + struct bwstring *str; + struct keys_array ka; +}; + +/* + * Function type, used to compare two list objects + */ +typedef int (*listcoll_t)(struct sort_list_item **ss1, struct sort_list_item **ss2); + +extern struct key_specs *keys; +extern size_t keys_num; + +/* + * Main localised symbols. These must be wint_t as they may hold WEOF. + */ +extern wint_t symbol_decimal_point; +extern wint_t symbol_thousands_sep; +extern wint_t symbol_negative_sign; +extern wint_t symbol_positive_sign; + +/* funcs */ + +cmpcoll_t get_sort_func(struct sort_mods *sm); + +struct keys_array *keys_array_alloc(void); +size_t keys_array_size(void); +struct key_value *get_key_from_keys_array(struct keys_array *ka, size_t ind); +void set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind); +void clean_keys_array(const struct bwstring *s, struct keys_array *ka); + +struct sort_list_item *sort_list_item_alloc(void); +void sort_list_item_set(struct sort_list_item *si, struct bwstring *str); +void sort_list_item_clean(struct sort_list_item *si); +size_t sort_list_item_size(struct sort_list_item *si); + +int preproc(struct bwstring *s, struct keys_array *ka); +int top_level_str_coll(const struct bwstring *, const struct bwstring *); +int key_coll(struct keys_array *ks1, struct keys_array *ks2, size_t offset); +int str_list_coll(struct bwstring *str1, struct sort_list_item **ss2); +int list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2); +int list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2); +int list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2, size_t offset); + +listcoll_t get_list_call_func(size_t offset); + +#endif /* __COLL_H__ */ diff --git a/text_cmds/sort/commoncrypto.c b/text_cmds/sort/commoncrypto.c new file mode 100644 index 0000000..c7b3b8c --- /dev/null +++ b/text_cmds/sort/commoncrypto.c @@ -0,0 +1,98 @@ +/* mdXhl.c + * ---------------------------------------------------------------------------- + * "THE BEER-WARE LICENSE" (Revision 42): + * wrote this file. As long as you retain this notice you + * can do whatever you want with this stuff. If we meet some day, and you think + * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp + * ---------------------------------------------------------------------------- + */ + +#include +__FBSDID("$FreeBSD: head/lib/libmd/mdXhl.c 294037 2016-01-14 21:08:23Z jtl $"); + +#include +#include +#include +#include + +#include +#include +#include + +#include "commoncrypto.h" + +#define LENGTH CC_MD5_DIGEST_LENGTH + +char *MD5FileChunk(const char *, char *, off_t, off_t); + +char * +MD5End(CC_MD5_CTX *ctx, char *buf) +{ + int i; + unsigned char digest[LENGTH]; + static const char hex[]="0123456789abcdef"; + + if (!buf) + buf = malloc(2*LENGTH + 1); + if (!buf) + return 0; + CC_MD5_Final(digest, ctx); + for (i = 0; i < LENGTH; i++) { + buf[i+i] = hex[digest[i] >> 4]; + buf[i+i+1] = hex[digest[i] & 0x0f]; + } + buf[i+i] = '\0'; + return buf; +} + +char * +MD5File(const char *filename, char *buf) +{ + return (MD5FileChunk(filename, buf, 0, 0)); +} + +char * +MD5FileChunk(const char *filename, char *buf, off_t ofs, off_t len) +{ + unsigned char buffer[16*1024]; + CC_MD5_CTX ctx; + int fd, readrv, e; + off_t remain; + + if (len < 0) { + errno = EINVAL; + return NULL; + } + + CC_MD5_Init(&ctx); + fd = open(filename, O_RDONLY); + if (fd < 0) + return NULL; + if (ofs != 0) { + errno = 0; + if (lseek(fd, ofs, SEEK_SET) != ofs || + (ofs == -1 && errno != 0)) { + readrv = -1; + goto error; + } + } + remain = len; + readrv = 0; + while (len == 0 || remain > 0) { + if (len == 0 || remain > (off_t)sizeof(buffer)) + readrv = read(fd, buffer, sizeof(buffer)); + else + readrv = read(fd, buffer, remain); + if (readrv <= 0) + break; + CC_MD5_Update(&ctx, buffer, readrv); + remain -= readrv; + } +error: + e = errno; + close(fd); + errno = e; + if (readrv < 0) + return NULL; + return (MD5End(&ctx, buf)); +} diff --git a/text_cmds/sort/commoncrypto.h b/text_cmds/sort/commoncrypto.h new file mode 100644 index 0000000..5170b51 --- /dev/null +++ b/text_cmds/sort/commoncrypto.h @@ -0,0 +1,8 @@ +#include + +#define MD5_CTX CC_MD5_CTX +#define MD5Init CC_MD5_Init +#define MD5Update CC_MD5_Update + +char *MD5End(CC_MD5_CTX *, char *); +char *MD5File(const char *, char *); diff --git a/text_cmds/sort/file.c b/text_cmds/sort/file.c new file mode 100644 index 0000000..346819b --- /dev/null +++ b/text_cmds/sort/file.c @@ -0,0 +1,1684 @@ +/*- + * Copyright (C) 2009 Gabor Kovesdan + * Copyright (C) 2012 Oleg Moskalenko + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +#include +#include +#if defined(SORT_THREADS) +#include +#endif +#ifndef __APPLE__ +#include +#else +#include +#include +#include +#include +#endif +#include +#include +#include +#include +#include +#include + +#include "coll.h" +#include "file.h" +#include "radixsort.h" + +unsigned long long free_memory = 1000000; +unsigned long long available_free_memory = 1000000; + +bool use_mmap; + +const char *tmpdir = "/var/tmp"; +const char *compress_program; + +size_t max_open_files = 16; + +/* + * How much space we read from file at once + */ +#define READ_CHUNK (4096) + +/* + * File reader structure + */ +struct file_reader +{ + struct reader_buffer rb; + FILE *file; + char *fname; + unsigned char *buffer; + unsigned char *mmapaddr; + unsigned char *mmapptr; + size_t bsz; + size_t cbsz; + size_t mmapsize; + size_t strbeg; + int fd; + char elsymb; +}; + +/* + * Structure to be used in file merge process. + */ +struct file_header +{ + struct file_reader *fr; + struct sort_list_item *si; /* current top line */ + size_t file_pos; +}; + +/* + * List elements of "cleanable" files list. + */ +struct CLEANABLE_FILE +{ + char *fn; + LIST_ENTRY(CLEANABLE_FILE) files; +}; + +/* + * List header of "cleanable" files list. + */ +static LIST_HEAD(CLEANABLE_FILES,CLEANABLE_FILE) tmp_files; + +/* + * Semaphore to protect the tmp file list. + * We use semaphore here because it is signal-safe, according to POSIX. + * And semaphore does not require pthread library. + */ +#ifndef __APPLE__ +static sem_t tmp_files_sem; +#else +static semaphore_t tmp_files_sem; +#endif + +static void mt_sort(struct sort_list *list, + int (*sort_func)(void *, size_t, size_t, + int (*)(const void *, const void *)), const char* fn); + +/* + * Init tmp files list + */ +void +init_tmp_files(void) +{ + + LIST_INIT(&tmp_files); +#ifndef __APPLE__ + sem_init(&tmp_files_sem, 0, 1); +#else + { + mach_port_t self = mach_task_self(); + kern_return_t ret = semaphore_create(self, &tmp_files_sem, SYNC_POLICY_FIFO, 1); + if (ret != KERN_SUCCESS) { + err(2,NULL); + } + } +#endif +} + +/* + * Save name of a tmp file for signal cleanup + */ +void +tmp_file_atexit(const char *tmp_file) +{ + + if (tmp_file) { +#ifndef __APPLE__ + sem_wait(&tmp_files_sem); +#else + semaphore_wait(tmp_files_sem); +#endif + struct CLEANABLE_FILE *item = + sort_malloc(sizeof(struct CLEANABLE_FILE)); + item->fn = sort_strdup(tmp_file); + LIST_INSERT_HEAD(&tmp_files, item, files); +#ifndef __APPLE__ + sem_post(&tmp_files_sem); +#else + semaphore_signal(tmp_files_sem); +#endif + } +} + +/* + * Clear tmp files + */ +void +clear_tmp_files(void) +{ + struct CLEANABLE_FILE *item; + +#ifndef __APPLE__ + sem_wait(&tmp_files_sem); +#else + semaphore_wait(tmp_files_sem); +#endif + LIST_FOREACH(item,&tmp_files,files) { + if ((item) && (item->fn)) + unlink(item->fn); + } +#ifndef __APPLE__ + sem_post(&tmp_files_sem); +#else + semaphore_signal(tmp_files_sem); +#endif +} + +/* + * Check whether a file is a temporary file + */ +static bool +file_is_tmp(const char* fn) +{ + struct CLEANABLE_FILE *item; + bool ret = false; + + if (fn) { +#ifndef __APPLE__ + sem_wait(&tmp_files_sem); +#else + semaphore_wait(tmp_files_sem); +#endif + LIST_FOREACH(item,&tmp_files,files) { + if ((item) && (item->fn)) + if (strcmp(item->fn, fn) == 0) { + ret = true; + break; + } + } +#ifndef __APPLE__ + sem_post(&tmp_files_sem); +#else + semaphore_signal(tmp_files_sem); +#endif + } + + return (ret); +} + +/* + * Generate new temporary file name + */ +char * +new_tmp_file_name(void) +{ + static size_t tfcounter = 0; + static const char *fn = ".bsdsort."; + char *ret; + size_t sz; + + sz = strlen(tmpdir) + 1 + strlen(fn) + 32 + 1; + ret = sort_malloc(sz); + + sprintf(ret, "%s/%s%d.%lu", tmpdir, fn, (int) getpid(), (unsigned long)(tfcounter++)); + tmp_file_atexit(ret); + return (ret); +} + +/* + * Initialize file list + */ +void +file_list_init(struct file_list *fl, bool tmp) +{ + + if (fl) { + fl->count = 0; + fl->sz = 0; + fl->fns = NULL; + fl->tmp = tmp; + } +} + +/* + * Add a file name to the list + */ +void +file_list_add(struct file_list *fl, char *fn, bool allocate) +{ + + if (fl && fn) { + if (fl->count >= fl->sz || (fl->fns == NULL)) { + fl->sz = (fl->sz) * 2 + 1; + fl->fns = sort_realloc(fl->fns, fl->sz * + sizeof(char *)); + } + fl->fns[fl->count] = allocate ? sort_strdup(fn) : fn; + fl->count += 1; + } +} + +/* + * Populate file list from array of file names + */ +void +file_list_populate(struct file_list *fl, int argc, char **argv, bool allocate) +{ + + if (fl && argv) { + int i; + + for (i = 0; i < argc; i++) + file_list_add(fl, argv[i], allocate); + } +} + +/* + * Clean file list data and delete the files, + * if this is a list of temporary files + */ +void +file_list_clean(struct file_list *fl) +{ + + if (fl) { + if (fl->fns) { + size_t i; + + for (i = 0; i < fl->count; i++) { + if (fl->fns[i]) { + if (fl->tmp) + unlink(fl->fns[i]); + sort_free(fl->fns[i]); + fl->fns[i] = 0; + } + } + sort_free(fl->fns); + fl->fns = NULL; + } + fl->sz = 0; + fl->count = 0; + fl->tmp = false; + } +} + +/* + * Init sort list + */ +void +sort_list_init(struct sort_list *l) +{ + + if (l) { + l->count = 0; + l->size = 0; + l->memsize = sizeof(struct sort_list); + l->list = NULL; + } +} + +/* + * Add string to sort list + */ +void +sort_list_add(struct sort_list *l, struct bwstring *str) +{ + + if (l && str) { + size_t indx = l->count; + + if ((l->list == NULL) || (indx >= l->size)) { + size_t newsize = (l->size + 1) + 1024; + + l->list = sort_realloc(l->list, + sizeof(struct sort_list_item*) * newsize); + l->memsize += (newsize - l->size) * + sizeof(struct sort_list_item*); + l->size = newsize; + } + l->list[indx] = sort_list_item_alloc(); + sort_list_item_set(l->list[indx], str); + l->memsize += sort_list_item_size(l->list[indx]); + l->count += 1; + } +} + +/* + * Clean sort list data + */ +void +sort_list_clean(struct sort_list *l) +{ + + if (l) { + if (l->list) { + size_t i; + + for (i = 0; i < l->count; i++) { + struct sort_list_item *item; + + item = l->list[i]; + + if (item) { + sort_list_item_clean(item); + sort_free(item); + l->list[i] = NULL; + } + } + sort_free(l->list); + l->list = NULL; + } + l->count = 0; + l->size = 0; + l->memsize = sizeof(struct sort_list); + } +} + +/* + * Write sort list to file + */ +void +sort_list_dump(struct sort_list *l, const char *fn) +{ + + if (l && fn) { + FILE *f; + + f = openfile(fn, "w"); + if (f == NULL) + err(2, NULL); + + if (l->list) { + size_t i; + if (!(sort_opts_vals.uflag)) { + for (i = 0; i < l->count; ++i) + bwsfwrite(l->list[i]->str, f, + sort_opts_vals.zflag); + } else { + struct sort_list_item *last_printed_item = NULL; + struct sort_list_item *item; + for (i = 0; i < l->count; ++i) { + item = l->list[i]; + if ((last_printed_item == NULL) || + list_coll(&last_printed_item, &item)) { + bwsfwrite(item->str, f, sort_opts_vals.zflag); + last_printed_item = item; + } + } + } + } + + closefile(f, fn); + } +} + +/* + * Checks if the given file is sorted. Stops at the first disorder, + * prints the disordered line and returns 1. + */ +int +check(const char *fn) +{ + struct bwstring *s1, *s2, *s1disorder, *s2disorder; + struct file_reader *fr; + struct keys_array *ka1, *ka2; + int res; + size_t pos, posdisorder; + + s1 = s2 = s1disorder = s2disorder = NULL; + ka1 = ka2 = NULL; + + fr = file_reader_init(fn); + + res = 0; + pos = 1; + posdisorder = 1; + + if (fr == NULL) { + err(2, NULL); +#ifndef __APPLE__ + goto end; +#endif + } + + s1 = file_reader_readline(fr); + if (s1 == NULL) + goto end; + + ka1 = keys_array_alloc(); + preproc(s1, ka1); + + s2 = file_reader_readline(fr); + if (s2 == NULL) + goto end; + + ka2 = keys_array_alloc(); + preproc(s2, ka2); + + for (;;) { + + if (debug_sort) { + bwsprintf(stdout, s2, "s1=<", ">"); + bwsprintf(stdout, s1, "s2=<", ">"); + } + int cmp = key_coll(ka2, ka1, 0); + if (debug_sort) + printf("; cmp1=%d", cmp); + + if (!cmp && sort_opts_vals.complex_sort && + !(sort_opts_vals.uflag) && !(sort_opts_vals.sflag)) { + cmp = top_level_str_coll(s2, s1); + if (debug_sort) + printf("; cmp2=%d", cmp); + } + if (debug_sort) + printf("\n"); + + if ((sort_opts_vals.uflag && (cmp <= 0)) || (cmp < 0)) { + if (!(sort_opts_vals.csilentflag)) { + s2disorder = bwsdup(s2); + posdisorder = pos; + if (debug_sort) + s1disorder = bwsdup(s1); + } + res = 1; + goto end; + } + + pos++; + + clean_keys_array(s1, ka1); + sort_free(ka1); + ka1 = ka2; + ka2 = NULL; + + bwsfree(s1); + s1 = s2; + + s2 = file_reader_readline(fr); + if (s2 == NULL) + goto end; + + ka2 = keys_array_alloc(); + preproc(s2, ka2); + } + +end: + if (ka1) { + clean_keys_array(s1, ka1); + sort_free(ka1); + } + + if (s1) + bwsfree(s1); + + if (ka2) { + clean_keys_array(s2, ka2); + sort_free(ka2); + } + + if (s2) + bwsfree(s2); + + if ((fn == NULL) || (*fn == 0) || (strcmp(fn, "-") == 0)) { + for (;;) { + s2 = file_reader_readline(fr); + if (s2 == NULL) + break; + bwsfree(s2); + } + } + + file_reader_free(fr); + + if (s2disorder) { + bws_disorder_warnx(s2disorder, fn, posdisorder); + if (s1disorder) { + bws_disorder_warnx(s1disorder, fn, posdisorder); + if (s1disorder != s2disorder) + bwsfree(s1disorder); + } + bwsfree(s2disorder); + s1disorder = NULL; + s2disorder = NULL; + } + + if (res) + exit(res); + + return (0); +} + +/* + * Opens a file. If the given filename is "-", stdout will be + * opened. + */ +FILE * +openfile(const char *fn, const char *mode) +{ + FILE *file; + + if (strcmp(fn, "-") == 0) { + return ((mode && mode[0] == 'r') ? stdin : stdout); + } else { + mode_t orig_file_mask = 0; + int is_tmp = file_is_tmp(fn); + + if (is_tmp && (mode[0] == 'w')) + orig_file_mask = umask(S_IWGRP | S_IWOTH | + S_IRGRP | S_IROTH); + + if (is_tmp && (compress_program != NULL)) { + char *cmd; + size_t cmdsz; + + cmdsz = strlen(fn) + 128; + cmd = sort_malloc(cmdsz); + + fflush(stdout); + + if (mode[0] == 'r') + snprintf(cmd, cmdsz - 1, "cat %s | %s -d", + fn, compress_program); + else if (mode[0] == 'w') + snprintf(cmd, cmdsz - 1, "%s > %s", + compress_program, fn); + else + err(2, "%s", getstr(7)); + + if ((file = popen(cmd, mode)) == NULL) + err(2, NULL); + + sort_free(cmd); + + } else + if ((file = fopen(fn, mode)) == NULL) + err(2, NULL); + + if (is_tmp && (mode[0] == 'w')) + umask(orig_file_mask); + } + + return (file); +} + +/* + * Close file + */ +void +closefile(FILE *f, const char *fn) +{ + if (f == NULL) { + ; + } else if (f == stdin) { + ; + } else if (f == stdout) { + fflush(f); + } else { + if (file_is_tmp(fn) && compress_program != NULL) { + if(pclose(f)<0) + err(2,NULL); + } else + fclose(f); + } +} + +/* + * Reads a file into the internal buffer. + */ +struct file_reader * +file_reader_init(const char *fsrc) +{ + struct file_reader *ret; + + if (fsrc == NULL) + fsrc = "-"; + + ret = sort_malloc(sizeof(struct file_reader)); + memset(ret, 0, sizeof(struct file_reader)); + + ret->elsymb = '\n'; + if (sort_opts_vals.zflag) + ret->elsymb = 0; + + ret->fname = sort_strdup(fsrc); + + if (strcmp(fsrc, "-") && (compress_program == NULL) && use_mmap) { + + do { + struct stat stat_buf; + void *addr; + size_t sz = 0; + int fd, flags; + +#if defined(__APPLE__) + flags = 0; +#else + flags = MAP_NOCORE | MAP_NOSYNC; +#endif + + fd = open(fsrc, O_RDONLY); + if (fd < 0) + err(2, NULL); + + if (fstat(fd, &stat_buf) < 0) { + close(fd); + break; + } + + sz = stat_buf.st_size; + +#if defined(MAP_PREFAULT_READ) + flags |= MAP_PREFAULT_READ; +#endif + + addr = mmap(NULL, sz, PROT_READ, flags, fd, 0); + if (addr == MAP_FAILED) { + close(fd); + break; + } + + ret->fd = fd; + ret->mmapaddr = addr; + ret->mmapsize = sz; + ret->mmapptr = ret->mmapaddr; + + } while (0); + } + + if (ret->mmapaddr == NULL) { + ret->file = openfile(fsrc, "r"); + if (ret->file == NULL) + err(2, NULL); + + if (strcmp(fsrc, "-")) { + ret->cbsz = READ_CHUNK; + ret->buffer = sort_malloc(ret->cbsz); + ret->bsz = 0; + ret->strbeg = 0; + + ret->bsz = fread(ret->buffer, 1, ret->cbsz, ret->file); + if (ret->bsz == 0) { + if (ferror(ret->file)) + err(2, NULL); + } + } + } + + return (ret); +} + +struct bwstring * +file_reader_readline(struct file_reader *fr) +{ + struct bwstring *ret = NULL; + + if (fr->mmapaddr) { + unsigned char *mmapend; + + mmapend = fr->mmapaddr + fr->mmapsize; + if (fr->mmapptr >= mmapend) + return (NULL); + else { + unsigned char *strend; + size_t sz; + + sz = mmapend - fr->mmapptr; + strend = memchr(fr->mmapptr, fr->elsymb, sz); + + if (strend == NULL) { + ret = bwscsbdup(fr->mmapptr, sz); + fr->mmapptr = mmapend; + } else { + ret = bwscsbdup(fr->mmapptr, strend - + fr->mmapptr); + fr->mmapptr = strend + 1; + } + } + + } else if (fr->file != stdin) { + unsigned char *strend; + size_t bsz1, remsz, search_start; + + search_start = 0; + remsz = 0; + strend = NULL; + + if (fr->bsz > fr->strbeg) + remsz = fr->bsz - fr->strbeg; + + /* line read cycle */ + for (;;) { + if (remsz > search_start) + strend = memchr(fr->buffer + fr->strbeg + + search_start, fr->elsymb, remsz - + search_start); + else + strend = NULL; + + if (strend) + break; + if (feof(fr->file)) + break; + + if (fr->bsz != fr->cbsz) + /* NOTREACHED */ + err(2, "File read software error 1"); + + if (remsz > (READ_CHUNK >> 1)) { + search_start = fr->cbsz - fr->strbeg; + fr->cbsz += READ_CHUNK; + fr->buffer = sort_realloc(fr->buffer, + fr->cbsz); + bsz1 = fread(fr->buffer + fr->bsz, 1, + READ_CHUNK, fr->file); + if (bsz1 == 0) { + if (ferror(fr->file)) + err(2, NULL); + break; + } + fr->bsz += bsz1; + remsz += bsz1; + } else { + if (remsz > 0 && fr->strbeg>0) + bcopy(fr->buffer + fr->strbeg, + fr->buffer, remsz); + + fr->strbeg = 0; + search_start = remsz; + bsz1 = fread(fr->buffer + remsz, 1, + fr->cbsz - remsz, fr->file); + if (bsz1 == 0) { + if (ferror(fr->file)) + err(2, NULL); + break; + } + fr->bsz = remsz + bsz1; + remsz = fr->bsz; + } + } + + if (strend == NULL) + strend = fr->buffer + fr->bsz; + + if ((fr->buffer + fr->strbeg <= strend) && + (fr->strbeg < fr->bsz) && (remsz>0)) + ret = bwscsbdup(fr->buffer + fr->strbeg, strend - + fr->buffer - fr->strbeg); + + fr->strbeg = (strend - fr->buffer) + 1; + + } else { + size_t len = 0; + + ret = bwsfgetln(fr->file, &len, sort_opts_vals.zflag, + &(fr->rb)); + } + + return (ret); +} + +static void +file_reader_clean(struct file_reader *fr) +{ + + if (fr) { + if (fr->mmapaddr) + munmap(fr->mmapaddr, fr->mmapsize); + + if (fr->fd) + close(fr->fd); + + if (fr->buffer) + sort_free(fr->buffer); + + if (fr->file) + if (fr->file != stdin) + closefile(fr->file, fr->fname); + + if(fr->fname) + sort_free(fr->fname); + + memset(fr, 0, sizeof(struct file_reader)); + } +} + +void +file_reader_free(struct file_reader *fr) +{ + + if (fr) { + file_reader_clean(fr); + sort_free(fr); + } +} + +int +procfile(const char *fsrc, struct sort_list *list, struct file_list *fl) +{ + struct file_reader *fr; + + fr = file_reader_init(fsrc); + if (fr == NULL) + err(2, NULL); + + /* file browse cycle */ + for (;;) { + struct bwstring *bws; + + bws = file_reader_readline(fr); + + if (bws == NULL) + break; + + sort_list_add(list, bws); + + if (list->memsize >= available_free_memory) { + char *fn; + + fn = new_tmp_file_name(); + sort_list_to_file(list, fn); + file_list_add(fl, fn, false); + sort_list_clean(list); + } + } + + file_reader_free(fr); + + return (0); +} + +/* + * Compare file headers. Files with EOF always go to the end of the list. + */ +static int +file_header_cmp(struct file_header *f1, struct file_header *f2) +{ + + if (f1 == f2) + return (0); + else { + if (f1->fr == NULL) { + return ((f2->fr == NULL) ? 0 : +1); + } else if (f2->fr == NULL) + return (-1); + else { + int ret; + + ret = list_coll(&(f1->si), &(f2->si)); + if (!ret) + return ((f1->file_pos < f2->file_pos) ? -1 : +1); + return (ret); + } + } +} + +/* + * Allocate and init file header structure + */ +static void +file_header_init(struct file_header **fh, const char *fn, size_t file_pos) +{ + + if (fh && fn) { + struct bwstring *line; + + *fh = sort_malloc(sizeof(struct file_header)); + (*fh)->file_pos = file_pos; + (*fh)->fr = file_reader_init(fn); + if ((*fh)->fr == NULL) { + perror(fn); + err(2, "%s", getstr(8)); + } + line = file_reader_readline((*fh)->fr); + if (line == NULL) { + file_reader_free((*fh)->fr); + (*fh)->fr = NULL; + (*fh)->si = NULL; + } else { + (*fh)->si = sort_list_item_alloc(); + sort_list_item_set((*fh)->si, line); + } + } +} + +/* + * Close file + */ +static void +file_header_close(struct file_header **fh) +{ + + if (fh && *fh) { + if ((*fh)->fr) { + file_reader_free((*fh)->fr); + (*fh)->fr = NULL; + } + if ((*fh)->si) { + sort_list_item_clean((*fh)->si); + sort_free((*fh)->si); + (*fh)->si = NULL; + } + sort_free(*fh); + *fh = NULL; + } +} + +/* + * Swap two array elements + */ +static void +file_header_swap(struct file_header **fh, size_t i1, size_t i2) +{ + struct file_header *tmp; + + tmp = fh[i1]; + fh[i1] = fh[i2]; + fh[i2] = tmp; +} + +/* heap algorithm ==>> */ + +/* + * See heap sort algorithm + * "Raises" last element to its right place + */ +static void +file_header_heap_swim(struct file_header **fh, size_t indx) +{ + + if (indx > 0) { + size_t parent_index; + + parent_index = (indx - 1) >> 1; + + if (file_header_cmp(fh[indx], fh[parent_index]) < 0) { + /* swap child and parent and continue */ + file_header_swap(fh, indx, parent_index); + file_header_heap_swim(fh, parent_index); + } + } +} + +/* + * Sink the top element to its correct position + */ +static void +file_header_heap_sink(struct file_header **fh, size_t indx, size_t size) +{ + size_t left_child_index; + size_t right_child_index; + + left_child_index = indx + indx + 1; + right_child_index = left_child_index + 1; + + if (left_child_index < size) { + size_t min_child_index; + + min_child_index = left_child_index; + + if ((right_child_index < size) && + (file_header_cmp(fh[left_child_index], + fh[right_child_index]) > 0)) + min_child_index = right_child_index; + if (file_header_cmp(fh[indx], fh[min_child_index]) > 0) { + file_header_swap(fh, indx, min_child_index); + file_header_heap_sink(fh, min_child_index, size); + } + } +} + +/* <<== heap algorithm */ + +/* + * Adds element to the "left" end + */ +static void +file_header_list_rearrange_from_header(struct file_header **fh, size_t size) +{ + + file_header_heap_sink(fh, 0, size); +} + +/* + * Adds element to the "right" end + */ +static void +file_header_list_push(struct file_header *f, struct file_header **fh, size_t size) +{ + + fh[size++] = f; + file_header_heap_swim(fh, size - 1); +} + +struct last_printed +{ + struct bwstring *str; +}; + +/* + * Prints the current line of the file + */ +static void +file_header_print(struct file_header *fh, FILE *f_out, struct last_printed *lp) +{ + + if (fh && fh->fr && f_out && fh->si && fh->si->str) { + if (sort_opts_vals.uflag) { + if ((lp->str == NULL) || (str_list_coll(lp->str, &(fh->si)))) { + bwsfwrite(fh->si->str, f_out, sort_opts_vals.zflag); + if (lp->str) + bwsfree(lp->str); + lp->str = bwsdup(fh->si->str); + } + } else + bwsfwrite(fh->si->str, f_out, sort_opts_vals.zflag); + } +} + +/* + * Read next line + */ +static void +file_header_read_next(struct file_header *fh) +{ + + if (fh && fh->fr) { + struct bwstring *tmp; + + tmp = file_reader_readline(fh->fr); + if (tmp == NULL) { + file_reader_free(fh->fr); + fh->fr = NULL; + if (fh->si) { + sort_list_item_clean(fh->si); + sort_free(fh->si); + fh->si = NULL; + } + } else { + if (fh->si == NULL) + fh->si = sort_list_item_alloc(); + sort_list_item_set(fh->si, tmp); + } + } +} + +/* + * Merge array of "files headers" + */ +static void +file_headers_merge(size_t fnum, struct file_header **fh, FILE *f_out) +{ + struct last_printed lp; + size_t i; + + memset(&lp, 0, sizeof(lp)); + + /* + * construct the initial sort structure + */ + for (i = 0; i < fnum; i++) + file_header_list_push(fh[i], fh, i); + + while (fh[0]->fr) { /* unfinished files are always in front */ + /* output the smallest line: */ + file_header_print(fh[0], f_out, &lp); + /* read a new line, if possible: */ + file_header_read_next(fh[0]); + /* re-arrange the list: */ + file_header_list_rearrange_from_header(fh, fnum); + } + + if (lp.str) + bwsfree(lp.str); +} + +/* + * Merges the given files into the output file, which can be + * stdout. + */ +static void +merge_files_array(size_t argc, char **argv, const char *fn_out) +{ + + if (argv && fn_out) { + struct file_header **fh; + FILE *f_out; + size_t i; + + f_out = openfile(fn_out, "w"); + + if (f_out == NULL) + err(2, NULL); + + fh = sort_malloc((argc + 1) * sizeof(struct file_header *)); + + for (i = 0; i < argc; i++) + file_header_init(fh + i, argv[i], (size_t) i); + + file_headers_merge(argc, fh, f_out); + + for (i = 0; i < argc; i++) + file_header_close(fh + i); + + sort_free(fh); + + closefile(f_out, fn_out); + } +} + +/* + * Shrinks the file list until its size smaller than max number of opened files + */ +static int +shrink_file_list(struct file_list *fl) +{ + + if ((fl == NULL) || (size_t) (fl->count) < max_open_files) + return (0); + else { + struct file_list new_fl; + size_t indx = 0; + + file_list_init(&new_fl, true); + while (indx < fl->count) { + char *fnew; + size_t num; + + num = fl->count - indx; + fnew = new_tmp_file_name(); + + if ((size_t) num >= max_open_files) + num = max_open_files - 1; + merge_files_array(num, fl->fns + indx, fnew); + if (fl->tmp) { + size_t i; + + for (i = 0; i < num; i++) + unlink(fl->fns[indx + i]); + } + file_list_add(&new_fl, fnew, false); + indx += num; + } + fl->tmp = false; /* already taken care of */ + file_list_clean(fl); + + fl->count = new_fl.count; + fl->fns = new_fl.fns; + fl->sz = new_fl.sz; + fl->tmp = new_fl.tmp; + + return (1); + } +} + +/* + * Merge list of files + */ +void +merge_files(struct file_list *fl, const char *fn_out) +{ + + if (fl && fn_out) { + while (shrink_file_list(fl)); + + merge_files_array(fl->count, fl->fns, fn_out); + } +} + +static const char * +get_sort_method_name(int sm) +{ + + if (sm == SORT_MERGESORT) + return "mergesort"; + else if (sort_opts_vals.sort_method == SORT_RADIXSORT) + return "radixsort"; + else if (sort_opts_vals.sort_method == SORT_HEAPSORT) + return "heapsort"; + else + return "quicksort"; +} + +/* + * Wrapper for qsort + */ +static int sort_qsort(void *list, size_t count, size_t elem_size, + int (*cmp_func)(const void *, const void *)) +{ + + qsort(list, count, elem_size, cmp_func); + return (0); +} + +/* + * Sort list of lines and writes it to the file + */ +void +sort_list_to_file(struct sort_list *list, const char *outfile) +{ + struct sort_mods *sm = &(keys[0].sm); + + if (!(sm->Mflag) && !(sm->Rflag) && !(sm->Vflag) && !(sm->Vflag) && + !(sm->gflag) && !(sm->hflag) && !(sm->nflag)) { + if ((sort_opts_vals.sort_method == SORT_DEFAULT) && byte_sort) + sort_opts_vals.sort_method = SORT_RADIXSORT; + + } else if (sort_opts_vals.sort_method == SORT_RADIXSORT) + err(2, "%s", getstr(9)); + + /* + * to handle stable sort and the unique cases in the + * right order, we need stable basic algorithm + */ + if (sort_opts_vals.sflag) { + switch (sort_opts_vals.sort_method){ + case SORT_MERGESORT: + break; + case SORT_RADIXSORT: + break; + case SORT_DEFAULT: + sort_opts_vals.sort_method = SORT_MERGESORT; + break; + default: + errx(2, "%s", getstr(10)); + } + } + + if (sort_opts_vals.sort_method == SORT_DEFAULT) + sort_opts_vals.sort_method = DEFAULT_SORT_ALGORITHM; + + if (debug_sort) + printf("sort_method=%s\n", + get_sort_method_name(sort_opts_vals.sort_method)); + + switch (sort_opts_vals.sort_method){ + case SORT_RADIXSORT: + rxsort(list->list, list->count); + sort_list_dump(list, outfile); + break; + case SORT_MERGESORT: + mt_sort(list, mergesort, outfile); + break; + case SORT_HEAPSORT: + mt_sort(list, heapsort, outfile); + break; + case SORT_QSORT: + mt_sort(list, sort_qsort, outfile); + break; + default: + mt_sort(list, DEFAULT_SORT_FUNC, outfile); + break; + } +} + +/******************* MT SORT ************************/ + +#if defined(SORT_THREADS) +/* semaphore to count threads */ +#ifndef __APPLE__ +static sem_t mtsem; +#else +static semaphore_t mtsem; +#endif + +/* current system sort function */ +static int (*g_sort_func)(void *, size_t, size_t, + int(*)(const void *, const void *)); + +/* + * Sort cycle thread (in multi-threaded mode) + */ +static void* +mt_sort_thread(void* arg) +{ + struct sort_list *list = arg; + + g_sort_func(list->list, list->count, sizeof(struct sort_list_item *), + (int(*)(const void *, const void *)) list_coll); + +#ifndef __APPLE__ + sem_post(&mtsem); +#else + semaphore_signal(mtsem); +#endif + + return (arg); +} + +/* + * Compare sub-lists. Empty sub-lists always go to the end of the list. + */ +static int +sub_list_cmp(struct sort_list *l1, struct sort_list *l2) +{ + + if (l1 == l2) + return (0); + else { + if (l1->count == 0) { + return ((l2->count == 0) ? 0 : +1); + } else if (l2->count == 0) { + return (-1); + } else { + int ret; + + ret = list_coll(&(l1->list[0]), &(l2->list[0])); + if (!ret) + return ((l1->sub_list_pos < l2->sub_list_pos) ? + -1 : +1); + return (ret); + } + } +} + +/* + * Swap two array elements + */ +static void +sub_list_swap(struct sort_list **sl, size_t i1, size_t i2) +{ + struct sort_list *tmp; + + tmp = sl[i1]; + sl[i1] = sl[i2]; + sl[i2] = tmp; +} + +/* heap algorithm ==>> */ + +/* + * See heap sort algorithm + * "Raises" last element to its right place + */ +static void +sub_list_swim(struct sort_list **sl, size_t indx) +{ + + if (indx > 0) { + size_t parent_index; + + parent_index = (indx - 1) >> 1; + + if (sub_list_cmp(sl[indx], sl[parent_index]) < 0) { + /* swap child and parent and continue */ + sub_list_swap(sl, indx, parent_index); + sub_list_swim(sl, parent_index); + } + } +} + +/* + * Sink the top element to its correct position + */ +static void +sub_list_sink(struct sort_list **sl, size_t indx, size_t size) +{ + size_t left_child_index; + size_t right_child_index; + + left_child_index = indx + indx + 1; + right_child_index = left_child_index + 1; + + if (left_child_index < size) { + size_t min_child_index; + + min_child_index = left_child_index; + + if ((right_child_index < size) && + (sub_list_cmp(sl[left_child_index], + sl[right_child_index]) > 0)) + min_child_index = right_child_index; + if (sub_list_cmp(sl[indx], sl[min_child_index]) > 0) { + sub_list_swap(sl, indx, min_child_index); + sub_list_sink(sl, min_child_index, size); + } + } +} + +/* <<== heap algorithm */ + +/* + * Adds element to the "right" end + */ +static void +sub_list_push(struct sort_list *s, struct sort_list **sl, size_t size) +{ + + sl[size++] = s; + sub_list_swim(sl, size - 1); +} + +struct last_printed_item +{ + struct sort_list_item *item; +}; + +/* + * Prints the current line of the file + */ +static void +sub_list_header_print(struct sort_list *sl, FILE *f_out, + struct last_printed_item *lp) +{ + + if (sl && sl->count && f_out && sl->list[0]->str) { + if (sort_opts_vals.uflag) { + if ((lp->item == NULL) || (list_coll(&(lp->item), + &(sl->list[0])))) { + bwsfwrite(sl->list[0]->str, f_out, + sort_opts_vals.zflag); + lp->item = sl->list[0]; + } + } else + bwsfwrite(sl->list[0]->str, f_out, + sort_opts_vals.zflag); + } +} + +/* + * Read next line + */ +static void +sub_list_next(struct sort_list *sl) +{ + + if (sl && sl->count) { + sl->list += 1; + sl->count -= 1; + } +} + +/* + * Merge sub-lists to a file + */ +static void +merge_sub_lists(struct sort_list **sl, size_t n, FILE* f_out) +{ + struct last_printed_item lp; + size_t i; + + memset(&lp,0,sizeof(lp)); + + /* construct the initial list: */ + for (i = 0; i < n; i++) + sub_list_push(sl[i], sl, i); + + while (sl[0]->count) { /* unfinished lists are always in front */ + /* output the smallest line: */ + sub_list_header_print(sl[0], f_out, &lp); + /* move to a new line, if possible: */ + sub_list_next(sl[0]); + /* re-arrange the list: */ + sub_list_sink(sl, 0, n); + } +} + +/* + * Merge sub-lists to a file + */ +static void +merge_list_parts(struct sort_list **parts, size_t n, const char *fn) +{ + FILE* f_out; + + f_out = openfile(fn,"w"); + + merge_sub_lists(parts, n, f_out); + + closefile(f_out, fn); +} + +#endif /* defined(SORT_THREADS) */ +/* + * Multi-threaded sort algorithm "driver" + */ +static void +mt_sort(struct sort_list *list, + int(*sort_func)(void *, size_t, size_t, int(*)(const void *, const void *)), + const char* fn) +{ +#if defined(SORT_THREADS) + if (nthreads < 2 || list->count < MT_SORT_THRESHOLD) { + size_t nthreads_save = nthreads; + nthreads = 1; +#endif + /* if single thread or small data, do simple sort */ + sort_func(list->list, list->count, + sizeof(struct sort_list_item *), + (int(*)(const void *, const void *)) list_coll); + sort_list_dump(list, fn); +#if defined(SORT_THREADS) + nthreads = nthreads_save; + } else { + /* multi-threaded sort */ + struct sort_list **parts; + size_t avgsize, cstart, i; + + /* array of sub-lists */ + parts = sort_malloc(sizeof(struct sort_list*) * nthreads); + cstart = 0; + avgsize = list->count / nthreads; + + /* set global system sort function */ + g_sort_func = sort_func; + + /* set sublists */ + for (i = 0; i < nthreads; ++i) { + size_t sz = 0; + + parts[i] = sort_malloc(sizeof(struct sort_list)); + parts[i]->list = list->list + cstart; + parts[i]->memsize = 0; + parts[i]->sub_list_pos = i; + + sz = (i == nthreads - 1) ? list->count - cstart : + avgsize; + + parts[i]->count = sz; + + parts[i]->size = parts[i]->count; + + cstart += sz; + } + + /* init threads counting semaphore */ +#ifndef __APPLE__ + sem_init(&mtsem, 0, 0); +#else + { + mach_port_t self = mach_task_self(); + kern_return_t ret = semaphore_create(self, &mtsem, SYNC_POLICY_FIFO, 0); + if (ret != KERN_SUCCESS) { + err(2,NULL); + } + } +#endif + + /* start threads */ + for (i = 0; i < nthreads; ++i) { + pthread_t pth; + pthread_attr_t attr; + + pthread_attr_init(&attr); +#ifndef __APPLE__ + pthread_attr_setdetachstate(&attr, PTHREAD_DETACHED); +#else + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); +#endif + + for (;;) { + int res = pthread_create(&pth, &attr, + mt_sort_thread, parts[i]); + + if (res >= 0) + break; + if (errno == EAGAIN) { +#ifndef __APPLE__ + pthread_yield(); +#else + sched_yield(); +#endif + continue; + } + err(2, NULL); + } + + pthread_attr_destroy(&attr); + } + + /* wait for threads completion */ + for (i = 0; i < nthreads; ++i) { +#ifndef __APPLE__ + sem_wait(&mtsem); +#else + semaphore_wait(mtsem); +#endif + } + /* destroy the semaphore - we do not need it anymore */ +#ifndef __APPLE__ + sem_destroy(&mtsem); +#else + { + mach_port_t self = mach_task_self(); + semaphore_destroy(self,mtsem); + } +#endif + + /* merge sorted sub-lists to the file */ + merge_list_parts(parts, nthreads, fn); + + /* free sub-lists data */ + for (i = 0; i < nthreads; ++i) { + sort_free(parts[i]); + } + sort_free(parts); + } +#endif /* defined(SORT_THREADS) */ +} diff --git a/text_cmds/sort/file.h b/text_cmds/sort/file.h new file mode 100644 index 0000000..e7a8c94 --- /dev/null +++ b/text_cmds/sort/file.h @@ -0,0 +1,126 @@ +/* $FreeBSD$ */ + +/*- + * Copyright (C) 2009 Gabor Kovesdan + * Copyright (C) 2012 Oleg Moskalenko + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if !defined(__SORT_FILE_H__) +#define __SORT_FILE_H__ + +#include "coll.h" +#include "sort.h" + +#define SORT_DEFAULT 0 +#define SORT_QSORT 1 +#define SORT_MERGESORT 2 +#define SORT_HEAPSORT 3 +#define SORT_RADIXSORT 4 + +#define DEFAULT_SORT_ALGORITHM SORT_MERGESORT +#define DEFAULT_SORT_FUNC mergesort + +/* + * List of data to be sorted. + */ +struct sort_list +{ + struct sort_list_item **list; + unsigned long long memsize; + size_t count; + size_t size; + size_t sub_list_pos; +}; + +/* + * File reader object + */ +struct file_reader; + +/* + * List of files to be sorted + */ +struct file_list +{ + char **fns; + size_t count; + size_t sz; + bool tmp; +}; + +/* memory */ + +/**/ + +extern unsigned long long free_memory; +extern unsigned long long available_free_memory; + +/* Are we using mmap ? */ +extern bool use_mmap; + +/* temporary file dir */ + +extern const char *tmpdir; + +/* + * Max number of simultaneously open files (including the output file). + */ +extern size_t max_open_files; + +/* + * Compress program + */ +extern const char* compress_program; + +/* funcs */ + +struct file_reader *file_reader_init(const char *fsrc); +struct bwstring *file_reader_readline(struct file_reader *fr); +void file_reader_free(struct file_reader *fr); + +void init_tmp_files(void); +void clear_tmp_files(void); +char *new_tmp_file_name(void); +void tmp_file_atexit(const char *tmp_file); + +void file_list_init(struct file_list *fl, bool tmp); +void file_list_add(struct file_list *fl, char *fn, bool allocate); +void file_list_populate(struct file_list *fl, int argc, char **argv, bool allocate); +void file_list_clean(struct file_list *fl); + +int check(const char *); +void merge_files(struct file_list *fl, const char *fn_out); +FILE *openfile(const char *, const char *); +void closefile(FILE *, const char *); +int procfile(const char *fn, struct sort_list *list, struct file_list *fl); + +void sort_list_init(struct sort_list *l); +void sort_list_add(struct sort_list *l, struct bwstring *str); +void sort_list_clean(struct sort_list *l); +void sort_list_dump(struct sort_list *l, const char *fn); + +void sort_list_to_file(struct sort_list *list, const char *outfile); + +#endif /* __SORT_FILE_H__ */ diff --git a/text_cmds/sort/mem.c b/text_cmds/sort/mem.c new file mode 100644 index 0000000..f59f751 --- /dev/null +++ b/text_cmds/sort/mem.c @@ -0,0 +1,81 @@ +/*- + * Copyright (C) 2009 Gabor Kovesdan + * Copyright (C) 2012 Oleg Moskalenko + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD: head/usr.bin/sort/mem.c 281132 2015-04-06 02:35:55Z pfg $"); + +#include +#include +#include + +#include "mem.h" + +/* + * malloc() wrapper. + */ +void * +sort_malloc(size_t size) +{ + void *ptr; + + if ((ptr = malloc(size)) == NULL) + err(2, NULL); + return (ptr); +} + +/* + * free() wrapper. + */ +void +sort_free(const void *ptr) +{ + + if (ptr) + free(__DECONST(void *, ptr)); +} + +/* + * realloc() wrapper. + */ +void * +sort_realloc(void *ptr, size_t size) +{ + + if ((ptr = realloc(ptr, size)) == NULL) + err(2, NULL); + return (ptr); +} + +char * +sort_strdup(const char *str) +{ + char *dup; + + if ((dup = strdup(str)) == NULL) + err(2, NULL); + return (dup); +} diff --git a/text_cmds/sort/mem.h b/text_cmds/sort/mem.h new file mode 100644 index 0000000..95ab1ac --- /dev/null +++ b/text_cmds/sort/mem.h @@ -0,0 +1,45 @@ +/* $FreeBSD: head/usr.bin/sort/mem.h 264744 2014-04-21 22:52:18Z pfg $ */ + +/*- + * Copyright (C) 2009 Gabor Kovesdan + * Copyright (C) 2012 Oleg Moskalenko + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if !defined(__SORT_MEM_H__) +#define __SORT_MEM_H__ + +#include +#include +#include + +/* + * mem.c + */ +void *sort_malloc(size_t); +void sort_free(const void *ptr); +void *sort_realloc(void *, size_t); +char *sort_strdup(const char *); + +#endif /* __SORT_MEM_H__ */ diff --git a/text_cmds/sort/nls/C.msg b/text_cmds/sort/nls/C.msg new file mode 100644 index 0000000..81300ba --- /dev/null +++ b/text_cmds/sort/nls/C.msg @@ -0,0 +1,16 @@ +$ $FreeBSD: head/usr.bin/sort/nls/C.msg 235434 2012-05-14 09:55:23Z gabor $ +$ +$set 1 +$quote " +1 "mutually exclusive flags" +2 "extra argument not allowed with -c" +3 "Unknown feature" +4 "Wrong memory buffer specification" +5 "0 field in key specs" +6 "0 column in key specs" +7 "Wrong file mode" +8 "Cannot open file for reading" +9 "Radix sort cannot be used with these sort options" +10 "The chosen sort method cannot be used with stable and/or unique sort" +11 "Invalid key position" +12 "Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] [+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] [-o outfile] [--batch-size size] [--files0-from file] [--heapsort] [--mergesort] [--radixsort] [--qsort] [--nthreads thread_no] [--human-numeric-sort] [--version-sort] [--random-sort [--random-source file]] [--compress-program program] [file ...]\n" diff --git a/text_cmds/sort/nls/hu_HU.ISO8859-2.msg b/text_cmds/sort/nls/hu_HU.ISO8859-2.msg new file mode 100644 index 0000000..c45862b --- /dev/null +++ b/text_cmds/sort/nls/hu_HU.ISO8859-2.msg @@ -0,0 +1,16 @@ +$ $FreeBSD: head/usr.bin/sort/nls/hu_HU.ISO8859-2.msg 235434 2012-05-14 09:55:23Z gabor $ +$ +$set 1 +$quote " +1 "egymást kizáró opciók" +2 "extra argumentum a -%c opcióval" +3 "Ismeretlen funkció\n" +4 "Rossz memória puffer érték" +5 "0 mezõ a kulcsspecifikációban\n" +6 "0 oszlop a kulcsspecifikációban\n" +7 "Helytelen fájl mód" +8 "A fájl nem nyitható meg olvasásra" +9 "A radix rendezés nem használható a megadott rendezési opciókkal" +10 "A választott rendezési mód nem használható a --stable és --unique opciókkal" +11 "Érvénytelen kulcs pozíció" +12 "Használat: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] [+POS1 [-POS2]] [-S memóriaméret] [-T ideiglenes_könyvtár] [-t elválasztó] [-o kimeneti_fájl] [--batch-size méret] [--files0-from fájl] [--heapsort] [--mergesort] [--radixsort] [--qsort] [--nthreads szálak_száma] [--human-numeric-sort] [--version-sort] [--random-sort [--random-source fájl]] [--compress-program program] [fájl ...]\n" diff --git a/text_cmds/sort/radixsort.c b/text_cmds/sort/radixsort.c new file mode 100644 index 0000000..8288a19 --- /dev/null +++ b/text_cmds/sort/radixsort.c @@ -0,0 +1,746 @@ +/*- + * Copyright (C) 2012 Oleg Moskalenko + * Copyright (C) 2012 Gabor Kovesdan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#if defined(SORT_THREADS) +#include +#ifndef __APPLE__ +#include +#else +#include +#include +#include +#include +#endif +#endif +#include +#include +#include +#include +#include + +#include "coll.h" +#include "radixsort.h" + +#define DEFAULT_SORT_FUNC_RADIXSORT mergesort + +#define TINY_NODE(sl) ((sl)->tosort_num < 65) +#define SMALL_NODE(sl) ((sl)->tosort_num < 5) + +/* are we sorting in reverse order ? */ +static bool reverse_sort; + +/* sort sub-levels array size */ +static const size_t slsz = 256 * sizeof(struct sort_level*); + +/* one sort level structure */ +struct sort_level +{ + struct sort_level **sublevels; + struct sort_list_item **leaves; + struct sort_list_item **sorted; + struct sort_list_item **tosort; + size_t leaves_num; + size_t leaves_sz; + size_t level; + size_t real_sln; + size_t start_position; + size_t sln; + size_t tosort_num; + size_t tosort_sz; +}; + +/* stack of sort levels ready to be sorted */ +struct level_stack { + struct level_stack *next; + struct sort_level *sl; +}; + +static struct level_stack *g_ls; + +#if defined(SORT_THREADS) +/* stack guarding mutex */ +static pthread_cond_t g_ls_cond; +static pthread_mutex_t g_ls_mutex; + +/* counter: how many items are left */ +static size_t sort_left; +/* guarding mutex */ + +/* semaphore to count threads */ +#ifndef __APPLE__ +static sem_t mtsem; +#else +semaphore_t rmtsem; +#endif + +/* + * Decrement items counter + */ +static inline void +sort_left_dec(size_t n) +{ + pthread_mutex_lock(&g_ls_mutex); + sort_left -= n; + if (sort_left == 0 && nthreads > 1) + pthread_cond_broadcast(&g_ls_cond); + pthread_mutex_unlock(&g_ls_mutex); +} + +/* + * Do we have something to sort ? + * + * This routine does not need to be locked. + */ +static inline bool +have_sort_left(void) +{ + bool ret; + + ret = (sort_left > 0); + + return (ret); +} + +#else + +#define sort_left_dec(n) + +#endif /* SORT_THREADS */ + +/* + * Push sort level to the stack + */ +static inline void +push_ls(struct sort_level *sl) +{ + struct level_stack *new_ls; + + new_ls = sort_malloc(sizeof(struct level_stack)); + new_ls->sl = sl; + +#if defined(SORT_THREADS) + if (nthreads > 1) + pthread_mutex_lock(&g_ls_mutex); +#endif + + new_ls->next = g_ls; + g_ls = new_ls; + +#if defined(SORT_THREADS) + if (nthreads > 1) + pthread_cond_signal(&g_ls_cond); +#endif + +#if defined(SORT_THREADS) + if (nthreads > 1) + pthread_mutex_unlock(&g_ls_mutex); +#endif +} + +/* + * Pop sort level from the stack (single-threaded style) + */ +static inline struct sort_level* +pop_ls_st(void) +{ + struct sort_level *sl; + + if (g_ls) { + struct level_stack *saved_ls; + + sl = g_ls->sl; + saved_ls = g_ls; + g_ls = g_ls->next; + sort_free(saved_ls); + } else + sl = NULL; + + return (sl); +} + +#if defined(SORT_THREADS) + +/* + * Pop sort level from the stack (multi-threaded style) + */ +static inline struct sort_level* +pop_ls_mt(void) +{ + struct level_stack *saved_ls; + struct sort_level *sl; + + pthread_mutex_lock(&g_ls_mutex); + + for (;;) { + if (g_ls) { + sl = g_ls->sl; + saved_ls = g_ls; + g_ls = g_ls->next; + break; + } + sl = NULL; + saved_ls = NULL; + + if (have_sort_left() == 0) + break; + pthread_cond_wait(&g_ls_cond, &g_ls_mutex); + } + + pthread_mutex_unlock(&g_ls_mutex); + + sort_free(saved_ls); + + return (sl); +} + +#endif /* defined(SORT_THREADS) */ + +static void +add_to_sublevel(struct sort_level *sl, struct sort_list_item *item, size_t indx) +{ + struct sort_level *ssl; + + ssl = sl->sublevels[indx]; + + if (ssl == NULL) { + ssl = sort_malloc(sizeof(struct sort_level)); + memset(ssl, 0, sizeof(struct sort_level)); + + ssl->level = sl->level + 1; + sl->sublevels[indx] = ssl; + + ++(sl->real_sln); + } + + if (++(ssl->tosort_num) > ssl->tosort_sz) { + ssl->tosort_sz = ssl->tosort_num + 128; + ssl->tosort = sort_realloc(ssl->tosort, + sizeof(struct sort_list_item*) * (ssl->tosort_sz)); + } + + ssl->tosort[ssl->tosort_num - 1] = item; +} + +static inline void +add_leaf(struct sort_level *sl, struct sort_list_item *item) +{ + + if (++(sl->leaves_num) > sl->leaves_sz) { + sl->leaves_sz = sl->leaves_num + 128; + sl->leaves = sort_realloc(sl->leaves, + (sizeof(struct sort_list_item*) * (sl->leaves_sz))); + } + sl->leaves[sl->leaves_num - 1] = item; +} + +static inline int +get_wc_index(struct sort_list_item *sli, size_t level) +{ + const struct key_value *kv; + const struct bwstring *bws; + + kv = get_key_from_keys_array(&sli->ka, 0); + bws = kv->k; + + if ((BWSLEN(bws) > level)) + return (unsigned char) BWS_GET(bws,level); + return (-1); +} + +static void +place_item(struct sort_level *sl, size_t item) +{ + struct sort_list_item *sli; + int c; + + sli = sl->tosort[item]; + c = get_wc_index(sli, sl->level); + + if (c == -1) + add_leaf(sl, sli); + else + add_to_sublevel(sl, sli, c); +} + +static void +free_sort_level(struct sort_level *sl) +{ + + if (sl) { + if (sl->leaves) + sort_free(sl->leaves); + + if (sl->level > 0) + sort_free(sl->tosort); + + if (sl->sublevels) { + struct sort_level *slc; + size_t sln; + + sln = sl->sln; + + for (size_t i = 0; i < sln; ++i) { + slc = sl->sublevels[i]; + if (slc) + free_sort_level(slc); + } + + sort_free(sl->sublevels); + } + + sort_free(sl); + } +} + +static void +run_sort_level_next(struct sort_level *sl) +{ + struct sort_level *slc; + size_t i, sln, tosort_num; + + if (sl->sublevels) { + sort_free(sl->sublevels); + sl->sublevels = NULL; + } + + switch (sl->tosort_num) { + case 0: + goto end; + case (1): + sl->sorted[sl->start_position] = sl->tosort[0]; + sort_left_dec(1); + goto end; + case (2): + if (list_coll_offset(&(sl->tosort[0]), &(sl->tosort[1]), + sl->level) > 0) { + sl->sorted[sl->start_position++] = sl->tosort[1]; + sl->sorted[sl->start_position] = sl->tosort[0]; + } else { + sl->sorted[sl->start_position++] = sl->tosort[0]; + sl->sorted[sl->start_position] = sl->tosort[1]; + } + sort_left_dec(2); + + goto end; + default: + if (TINY_NODE(sl) || (sl->level > 15)) { + listcoll_t func; + + func = get_list_call_func(sl->level); + + sl->leaves = sl->tosort; + sl->leaves_num = sl->tosort_num; + sl->leaves_sz = sl->leaves_num; + sl->leaves = sort_realloc(sl->leaves, + (sizeof(struct sort_list_item *) * + (sl->leaves_sz))); + sl->tosort = NULL; + sl->tosort_num = 0; + sl->tosort_sz = 0; + sl->sln = 0; + sl->real_sln = 0; + if (sort_opts_vals.sflag) { + if (mergesort(sl->leaves, sl->leaves_num, + sizeof(struct sort_list_item *), + (int(*)(const void *, const void *)) func) == -1) + /* NOTREACHED */ + err(2, "Radix sort error 3"); + } else + DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num, + sizeof(struct sort_list_item *), + (int(*)(const void *, const void *)) func); + + memcpy(sl->sorted + sl->start_position, + sl->leaves, sl->leaves_num * + sizeof(struct sort_list_item*)); + + sort_left_dec(sl->leaves_num); + + goto end; + } else { + sl->tosort_sz = sl->tosort_num; + sl->tosort = sort_realloc(sl->tosort, + sizeof(struct sort_list_item*) * (sl->tosort_sz)); + } + } + + sl->sln = 256; + sl->sublevels = sort_malloc(slsz); + memset(sl->sublevels, 0, slsz); + + sl->real_sln = 0; + + tosort_num = sl->tosort_num; + for (i = 0; i < tosort_num; ++i) + place_item(sl, i); + + sort_free(sl->tosort); + sl->tosort = NULL; + sl->tosort_num = 0; + sl->tosort_sz = 0; + + if (sl->leaves_num > 1) { + if (keys_num > 1) { + if (sort_opts_vals.sflag) { + mergesort(sl->leaves, sl->leaves_num, + sizeof(struct sort_list_item *), + (int(*)(const void *, const void *)) list_coll); + } else { + DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num, + sizeof(struct sort_list_item *), + (int(*)(const void *, const void *)) list_coll); + } + } else if (!sort_opts_vals.sflag && sort_opts_vals.complex_sort) { + DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num, + sizeof(struct sort_list_item *), + (int(*)(const void *, const void *)) list_coll_by_str_only); + } + } + + sl->leaves_sz = sl->leaves_num; + sl->leaves = sort_realloc(sl->leaves, (sizeof(struct sort_list_item *) * + (sl->leaves_sz))); + + if (!reverse_sort) { + memcpy(sl->sorted + sl->start_position, sl->leaves, + sl->leaves_num * sizeof(struct sort_list_item*)); + sl->start_position += sl->leaves_num; + sort_left_dec(sl->leaves_num); + + sort_free(sl->leaves); + sl->leaves = NULL; + sl->leaves_num = 0; + sl->leaves_sz = 0; + + sln = sl->sln; + + for (i = 0; i < sln; ++i) { + slc = sl->sublevels[i]; + + if (slc) { + slc->sorted = sl->sorted; + slc->start_position = sl->start_position; + sl->start_position += slc->tosort_num; + if (SMALL_NODE(slc)) + run_sort_level_next(slc); + else + push_ls(slc); + sl->sublevels[i] = NULL; + } + } + + } else { + size_t n; + + sln = sl->sln; + + for (i = 0; i < sln; ++i) { + n = sln - i - 1; + slc = sl->sublevels[n]; + + if (slc) { + slc->sorted = sl->sorted; + slc->start_position = sl->start_position; + sl->start_position += slc->tosort_num; + if (SMALL_NODE(slc)) + run_sort_level_next(slc); + else + push_ls(slc); + sl->sublevels[n] = NULL; + } + } + + memcpy(sl->sorted + sl->start_position, sl->leaves, + sl->leaves_num * sizeof(struct sort_list_item*)); + sort_left_dec(sl->leaves_num); + } + +end: + free_sort_level(sl); +} + +/* + * Single-threaded sort cycle + */ +static void +run_sort_cycle_st(void) +{ + struct sort_level *slc; + + for (;;) { + slc = pop_ls_st(); + if (slc == NULL) { + break; + } + run_sort_level_next(slc); + } +} + +#if defined(SORT_THREADS) + +/* + * Multi-threaded sort cycle + */ +static void +run_sort_cycle_mt(void) +{ + struct sort_level *slc; + + for (;;) { + slc = pop_ls_mt(); + if (slc == NULL) + break; + run_sort_level_next(slc); + } +} + +/* + * Sort cycle thread (in multi-threaded mode) + */ +static void* +sort_thread(void* arg) +{ + + run_sort_cycle_mt(); + +#ifndef __APPLE__ + sem_post(&mtsem); +#else + semaphore_signal(rmtsem); +#endif + + return (arg); +} + +#endif /* defined(SORT_THREADS) */ + +static void +run_top_sort_level(struct sort_level *sl) +{ + struct sort_level *slc; + + reverse_sort = sort_opts_vals.kflag ? keys[0].sm.rflag : + default_sort_mods->rflag; + + sl->start_position = 0; + sl->sln = 256; + sl->sublevels = sort_malloc(slsz); + memset(sl->sublevels, 0, slsz); + + for (size_t i = 0; i < sl->tosort_num; ++i) + place_item(sl, i); + + if (sl->leaves_num > 1) { + if (keys_num > 1) { + if (sort_opts_vals.sflag) { + mergesort(sl->leaves, sl->leaves_num, + sizeof(struct sort_list_item *), + (int(*)(const void *, const void *)) list_coll); + } else { + DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num, + sizeof(struct sort_list_item *), + (int(*)(const void *, const void *)) list_coll); + } + } else if (!sort_opts_vals.sflag && sort_opts_vals.complex_sort) { + DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num, + sizeof(struct sort_list_item *), + (int(*)(const void *, const void *)) list_coll_by_str_only); + } + } + + if (!reverse_sort) { + memcpy(sl->tosort + sl->start_position, sl->leaves, + sl->leaves_num * sizeof(struct sort_list_item*)); + sl->start_position += sl->leaves_num; + sort_left_dec(sl->leaves_num); + + for (size_t i = 0; i < sl->sln; ++i) { + slc = sl->sublevels[i]; + + if (slc) { + slc->sorted = sl->tosort; + slc->start_position = sl->start_position; + sl->start_position += slc->tosort_num; + push_ls(slc); + sl->sublevels[i] = NULL; + } + } + + } else { + size_t n; + + for (size_t i = 0; i < sl->sln; ++i) { + + n = sl->sln - i - 1; + slc = sl->sublevels[n]; + + if (slc) { + slc->sorted = sl->tosort; + slc->start_position = sl->start_position; + sl->start_position += slc->tosort_num; + push_ls(slc); + sl->sublevels[n] = NULL; + } + } + + memcpy(sl->tosort + sl->start_position, sl->leaves, + sl->leaves_num * sizeof(struct sort_list_item*)); + + sort_left_dec(sl->leaves_num); + } + +#if defined(SORT_THREADS) + if (nthreads < 2) { +#endif + run_sort_cycle_st(); +#if defined(SORT_THREADS) + } else { + size_t i; + + for(i = 0; i < nthreads; ++i) { + pthread_attr_t attr; + pthread_t pth; + + pthread_attr_init(&attr); +#ifndef __APPLE__ + pthread_attr_setdetachstate(&attr, + PTHREAD_DETACHED); +#else + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); +#endif + + for (;;) { + int res = pthread_create(&pth, &attr, + sort_thread, NULL); + if (res >= 0) + break; + if (errno == EAGAIN) { +#ifndef __APPLE__ + pthread_yield(); +#else + sched_yield(); +#endif + continue; + } + err(2, NULL); + } + + pthread_attr_destroy(&attr); + } + + for(i = 0; i < nthreads; ++i) +#ifndef __APPLE__ + sem_wait(&mtsem); +#else + semaphore_wait(rmtsem); +#endif + } +#endif /* defined(SORT_THREADS) */ +} + +static void +run_sort(struct sort_list_item **base, size_t nmemb) +{ + struct sort_level *sl; + +#if defined(SORT_THREADS) + size_t nthreads_save = nthreads; + if (nmemb < MT_SORT_THRESHOLD) + nthreads = 1; + + if (nthreads > 1) { + pthread_mutexattr_t mattr; + + pthread_mutexattr_init(&mattr); +#ifndef __APPLE__ + pthread_mutexattr_settype(&mattr, PTHREAD_MUTEX_ADAPTIVE_NP); +#endif + + pthread_mutex_init(&g_ls_mutex, &mattr); + pthread_cond_init(&g_ls_cond, NULL); + + pthread_mutexattr_destroy(&mattr); + +#ifndef __APPLE__ + sem_init(&mtsem, 0, 0); +#else + { + mach_port_t self = mach_task_self(); + kern_return_t ret = semaphore_create(self, &rmtsem, SYNC_POLICY_FIFO, 0); + if (ret != KERN_SUCCESS) { + err(2,NULL); + } + } +#endif + + } +#endif + + sl = sort_malloc(sizeof(struct sort_level)); + memset(sl, 0, sizeof(struct sort_level)); + + sl->tosort = base; + sl->tosort_num = nmemb; + sl->tosort_sz = nmemb; + +#if defined(SORT_THREADS) + sort_left = nmemb; +#endif + + run_top_sort_level(sl); + + free_sort_level(sl); + +#if defined(SORT_THREADS) + if (nthreads > 1) { +#ifndef __APPLE__ + sem_destroy(&mtsem); +#else + { + mach_port_t self = mach_task_self(); + semaphore_destroy(self,rmtsem); + } +#endif + pthread_mutex_destroy(&g_ls_mutex); + } + nthreads = nthreads_save; +#endif +} + +void +rxsort(struct sort_list_item **base, size_t nmemb) +{ + + run_sort(base, nmemb); +} diff --git a/text_cmds/sort/radixsort.h b/text_cmds/sort/radixsort.h new file mode 100644 index 0000000..8743728 --- /dev/null +++ b/text_cmds/sort/radixsort.h @@ -0,0 +1,38 @@ +/* $FreeBSD$ */ + +/*- + * Copyright (C) 2012 Oleg Moskalenko + * Copyright (C) 2012 Gabor Kovesdan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if !defined(__SORT_RADIX_H__) +#define __SORT_RADIX_H__ + +#include "coll.h" +#include "sort.h" + +void rxsort(struct sort_list_item **base, size_t nmemb); + +#endif /* __SORT_RADIX_H__ */ diff --git a/text_cmds/sort/sort.1.in b/text_cmds/sort/sort.1.in new file mode 100644 index 0000000..a2234e8 --- /dev/null +++ b/text_cmds/sort/sort.1.in @@ -0,0 +1,639 @@ +.\" $OpenBSD: sort.1,v 1.45 2015/03/19 13:51:10 jmc Exp $ +.\" $FreeBSD: head/usr.bin/sort/sort.1.in 305613 2016-09-08 14:50:23Z gabor $ +.\" +.\" Copyright (c) 1991, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" the Institute of Electrical and Electronics Engineers, Inc. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)sort.1 8.1 (Berkeley) 6/6/93 +.\" +.Dd March 19, 2015 +.Dt SORT 1 +.Os +.Sh NAME +.Nm sort +.Nd sort or merge records (lines) of text and binary files +.Sh SYNOPSIS +.Nm +.Bk -words +.Op Fl bcCdfghiRMmnrsuVz +.Sm off +.Op Fl k\ \& Ar field1 Op , Ar field2 +.Sm on +.Op Fl S Ar memsize +.Ek +.Op Fl T Ar dir +.Op Fl t Ar char +.Op Fl o Ar output +.Op Ar file ... +.Nm +.Fl Fl help +.Nm +.Fl Fl version +.Sh DESCRIPTION +The +.Nm +utility sorts text and binary files by lines. +A line is a record separated from the subsequent record by a +newline (default) or NUL \'\\0\' character (-z option). +A record can contain any printable or unprintable characters. +Comparisons are based on one or more sort keys extracted from +each line of input, and are performed lexicographically, +according to the current locale's collating rules and the +specified command-line options that can tune the actual +sorting behavior. +By default, if keys are not given, +.Nm +uses entire lines for comparison. +.Pp +The command line options are as follows: +.Bl -tag -width Ds +.It Fl c , Fl Fl check , Fl C , Fl Fl check=silent|quiet +Check that the single input file is sorted. +If the file is not sorted, +.Nm +produces the appropriate error messages and exits with code 1, +otherwise returns 0. +If +.Fl C +or +.Fl Fl check=silent +is specified, +.Nm +produces no output. +This is a "silent" version of +.Fl c . +.It Fl m , Fl Fl merge +Merge only. +The input files are assumed to be pre-sorted. +If they are not sorted the output order is undefined. +.It Fl o Ar output , Fl Fl output Ns = Ns Ar output +Print the output to the +.Ar output +file instead of the standard output. +.It Fl S Ar size , Fl Fl buffer-size Ns = Ns Ar size +Use +.Ar size +for the maximum size of the memory buffer. +Size modifiers %,b,K,M,G,T,P,E,Z,Y can be used. +If a memory limit is not explicitly specified, +.Nm +takes up to about 90% of available memory. +If the file size is too big to fit into the memory buffer, +the temporary disk files are used to perform the sorting. +.It Fl T Ar dir , Fl Fl temporary-directory Ns = Ns Ar dir +Store temporary files in the directory +.Ar dir . +The default path is the value of the environment variable +.Ev TMPDIR +or +.Pa /var/tmp +if +.Ev TMPDIR +is not defined. +.It Fl u , Fl Fl unique +Unique keys. +Suppress all lines that have a key that is equal to an already +processed one. +This option, similarly to +.Fl s , +implies a stable sort. +If used with +.Fl c +or +.Fl C , +.Nm +also checks that there are no lines with duplicate keys. +.It Fl s +Stable sort. +This option maintains the original record order of records that have +an equal key. +This is a non-standard feature, but it is widely accepted and used. +.It Fl Fl version +Print the version and silently exits. +.It Fl Fl help +Print the help text and silently exits. +.El +.Pp +The following options override the default ordering rules. +When ordering options appear independently of key field +specifications, they apply globally to all sort keys. +When attached to a specific key (see +.Fl k ) , +the ordering options override all global ordering options for +the key they are attached to. +.Bl -tag -width indent +.It Fl b , Fl Fl ignore-leading-blanks +Ignore leading blank characters when comparing lines. +.It Fl d , Fl Fl dictionary-order +Consider only blank spaces and alphanumeric characters in comparisons. +.It Fl f , Fl Fl ignore-case +Convert all lowercase characters to their uppercase equivalent +before comparison, that is, perform case-independent sorting. +.It Fl g , Fl Fl general-numeric-sort , Fl Fl sort=general-numeric +Sort by general numerical value. +As opposed to +.Fl n , +this option handles general floating points. +It has a more +permissive format than that allowed by +.Fl n +but it has a significant performance drawback. +.It Fl h , Fl Fl human-numeric-sort , Fl Fl sort=human-numeric +Sort by numerical value, but take into account the SI suffix, +if present. +Sort first by numeric sign (negative, zero, or +positive); then by SI suffix (either empty, or `k' or `K', or one +of `MGTPEZY', in that order); and finally by numeric value. +The SI suffix must immediately follow the number. +For example, '12345K' sorts before '1M', because M is "larger" than K. +This sort option is useful for sorting the output of a single invocation +of 'df' command with +.Fl h +or +.Fl H +options (human-readable). +.It Fl i , Fl Fl ignore-nonprinting +Ignore all non-printable characters. +.It Fl M , Fl Fl month-sort , Fl Fl sort=month +Sort by month abbreviations. +Unknown strings are considered smaller than the month names. +.It Fl n , Fl Fl numeric-sort , Fl Fl sort=numeric +Sort fields numerically by arithmetic value. +Fields are supposed to have optional blanks in the beginning, an +optional minus sign, zero or more digits (including decimal point and +possible thousand separators). +.It Fl R , Fl Fl random-sort , Fl Fl sort=random +Sort by a random order. +This is a random permutation of the inputs except that +the equal keys sort together. +It is implemented by hashing the input keys and sorting +the hash values. +The hash function is chosen randomly. +The hash function is randomized by +.Cm /dev/random +content, or by file content if it is specified by +.Fl Fl random-source . +Even if multiple sort fields are specified, +the same random hash function is used for all of them. +.It Fl r , Fl Fl reverse +Sort in reverse order. +.It Fl V , Fl Fl version-sort +Sort version numbers. +The input lines are treated as file names in form +PREFIX VERSION SUFFIX, where SUFFIX matches the regular expression +"(\.([A-Za-z~][A-Za-z0-9~]*)?)*". +The files are compared by their prefixes and versions (leading +zeros are ignored in version numbers, see example below). +If an input string does not match the pattern, then it is compared +using the byte compare function. +All string comparisons are performed in C locale, the locale +environment setting is ignored. +.Bl -tag -width indent +.It Example: +.It $ ls sort* | sort -V +.It sort-1.022.tgz +.It sort-1.23.tgz +.It sort-1.23.1.tgz +.It sort-1.024.tgz +.It sort-1.024.003. +.It sort-1.024.003.tgz +.It sort-1.024.07.tgz +.It sort-1.024.009.tgz +.El +.El +.Pp +The treatment of field separators can be altered using these options: +.Bl -tag -width indent +.It Fl b , Fl Fl ignore-leading-blanks +Ignore leading blank space when determining the start +and end of a restricted sort key (see +.Fl k ) . +If +.Fl b +is specified before the first +.Fl k +option, it applies globally to all key specifications. +Otherwise, +.Fl b +can be attached independently to each +.Ar field +argument of the key specifications. +.Fl b . +.It Xo +.Fl k Ar field1 Ns Op , Ns Ar field2 , +.Fl Fl key Ns = Ns Ar field1 Ns Op , Ns Ar field2 +.Xc +Define a restricted sort key that has the starting position +.Ar field1 , +and optional ending position +.Ar field2 +of a key field. +The +.Fl k +option may be specified multiple times, +in which case subsequent keys are compared when earlier keys compare equal. +The +.Fl k +option replaces the obsolete options +.Cm \(pl Ns Ar pos1 +and +.Fl Ns Ar pos2 , +but the old notation is also supported. +.It Fl t Ar char , Fl Fl field-separator Ns = Ns Ar char +Use +.Ar char +as a field separator character. +The initial +.Ar char +is not considered to be part of a field when determining key offsets. +Each occurrence of +.Ar char +is significant (for example, +.Dq Ar charchar +delimits an empty field). +If +.Fl t +is not specified, the default field separator is a sequence of +blank space characters, and consecutive blank spaces do +.Em not +delimit an empty field, however, the initial blank space +.Em is +considered part of a field when determining key offsets. +To use NUL as field separator, use +.Fl t +\'\\0\'. +.It Fl z , Fl Fl zero-terminated +Use NUL as record separator. +By default, records in the files are supposed to be separated by +the newline characters. +With this option, NUL (\'\\0\') is used as a record separator character. +.El +.Pp +Other options: +.Bl -tag -width indent +.It Fl Fl batch-size Ns = Ns Ar num +Specify maximum number of files that can be opened by +.Nm +at once. +This option affects behavior when having many input files or using +temporary files. +The default value is 16. +.It Fl Fl compress-program Ns = Ns Ar PROGRAM +Use PROGRAM to compress temporary files. +PROGRAM must compress standard input to standard output, when called +without arguments. +When called with argument +.Fl d +it must decompress standard input to standard output. +If PROGRAM fails, +.Nm +must exit with error. +An example of PROGRAM that can be used here is bzip2. +.It Fl Fl random-source Ns = Ns Ar filename +In random sort, the file content is used as the source of the 'seed' data +for the hash function choice. +Two invocations of random sort with the same seed data will use +the same hash function and will produce the same result if the input is +also identical. +By default, file +.Cm /dev/random +is used. +.It Fl Fl debug +Print some extra information about the sorting process to the +standard output. +%%THREADS%%.It Fl Fl parallel +%%THREADS%%Set the maximum number of execution threads. +%%THREADS%%Default number equals to the number of CPUs. +.It Fl Fl files0-from Ns = Ns Ar filename +Take the input file list from the file +.Ar filename . +The file names must be separated by NUL +(like the output produced by the command "find ... -print0"). +.It Fl Fl radixsort +Try to use radix sort, if the sort specifications allow. +The radix sort can only be used for trivial locales (C and POSIX), +and it cannot be used for numeric or month sort. +Radix sort is very fast and stable. +.It Fl Fl mergesort +Use mergesort. +This is a universal algorithm that can always be used, +but it is not always the fastest. +.It Fl Fl qsort +Try to use quick sort, if the sort specifications allow. +This sort algorithm cannot be used with +.Fl u +and +.Fl s . +.It Fl Fl heapsort +Try to use heap sort, if the sort specifications allow. +This sort algorithm cannot be used with +.Fl u +and +.Fl s . +.It Fl Fl mmap +Try to use file memory mapping system call. +It may increase speed in some cases. +.El +.Pp +The following operands are available: +.Bl -tag -width indent +.It Ar file +The pathname of a file to be sorted, merged, or checked. +If no +.Ar file +operands are specified, or if a +.Ar file +operand is +.Fl , +the standard input is used. +.El +.Pp +A field is defined as a maximal sequence of characters other than the +field separator and record separator (newline by default). +Initial blank spaces are included in the field unless +.Fl b +has been specified; +the first blank space of a sequence of blank spaces acts as the field +separator and is included in the field (unless +.Fl t +is specified). +For example, all blank spaces at the beginning of a line are +considered to be part of the first field. +.Pp +Fields are specified by the +.Sm off +.Fl k\ \& Ar field1 Op , Ar field2 +.Sm on +command-line option. +If +.Ar field2 +is missing, the end of the key defaults to the end of the line. +.Pp +The arguments +.Ar field1 +and +.Ar field2 +have the form +.Em m.n +.Em (m,n > 0) +and can be followed by one or more of the modifiers +.Cm b , d , f , i , +.Cm n , g , M +and +.Cm r , +which correspond to the options discussed above. +When +.Cm b +is specified it applies only to +.Ar field1 +or +.Ar field2 +where it is specified while the rest of the modifiers +apply to the whole key field regardless if they are +specified only with +.Ar field1 +or +.Ar field2 +or both. +A +.Ar field1 +position specified by +.Em m.n +is interpreted as the +.Em n Ns th +character from the beginning of the +.Em m Ns th +field. +A missing +.Em \&.n +in +.Ar field1 +means +.Ql \&.1 , +indicating the first character of the +.Em m Ns th +field; if the +.Fl b +option is in effect, +.Em n +is counted from the first non-blank character in the +.Em m Ns th +field; +.Em m Ns \&.1b +refers to the first non-blank character in the +.Em m Ns th +field. +.No 1\&. Ns Em n +refers to the +.Em n Ns th +character from the beginning of the line; +if +.Em n +is greater than the length of the line, the field is taken to be empty. +.Pp +.Em n Ns th +positions are always counted from the field beginning, even if the field +is shorter than the number of specified positions. +Thus, the key can really start from a position in a subsequent field. +.Pp +A +.Ar field2 +position specified by +.Em m.n +is interpreted as the +.Em n Ns th +character (including separators) from the beginning of the +.Em m Ns th +field. +A missing +.Em \&.n +indicates the last character of the +.Em m Ns th +field; +.Em m += \&0 +designates the end of a line. +Thus the option +.Fl k Ar v.x,w.y +is synonymous with the obsolete option +.Cm \(pl Ns Ar v-\&1.x-\&1 +.Fl Ns Ar w-\&1.y ; +when +.Em y +is omitted, +.Fl k Ar v.x,w +is synonymous with +.Cm \(pl Ns Ar v-\&1.x-\&1 +.Fl Ns Ar w\&.0 . +The obsolete +.Cm \(pl Ns Ar pos1 +.Fl Ns Ar pos2 +option is still supported, except for +.Fl Ns Ar w\&.0b , +which has no +.Fl k +equivalent. +.Sh ENVIRONMENT +.Bl -tag -width Fl +.It Ev LC_COLLATE +Locale settings to be used to determine the collation for +sorting records. +.It Ev LC_CTYPE +Locale settings to be used to case conversion and classification +of characters, that is, which characters are considered +whitespaces, etc. +.It Ev LC_MESSAGES +Locale settings that determine the language of output messages +that +.Nm +prints out. +.It Ev LC_NUMERIC +Locale settings that determine the number format used in numeric sort. +.It Ev LC_TIME +Locale settings that determine the month format used in month sort. +.It Ev LC_ALL +Locale settings that override all of the above locale settings. +This environment variable can be used to set all these settings +to the same value at once. +.It Ev LANG +Used as a last resort to determine different kinds of locale-specific +behavior if neither the respective environment variable, nor +.Ev LC_ALL +are set. +%%NLS%%.It Ev NLSPATH +%%NLS%%Path to NLS catalogs. +.It Ev TMPDIR +Path to the directory in which temporary files will be stored. +Note that +.Ev TMPDIR +may be overridden by the +.Fl T +option. +.It Ev GNUSORT_NUMERIC_COMPATIBILITY +If defined +.Fl t +will not override the locale numeric symbols, that is, thousand +separators and decimal separators. +By default, if we specify +.Fl t +with the same symbol as the thousand separator or decimal point, +the symbol will be treated as the field separator. +Older behavior was less definite; the symbol was treated as both field +separator and numeric separator, simultaneously. +This environment variable enables the old behavior. +.It Ev GNUSORT_COMPATIBLE_BLANKS +Use 'space' symbols as field separators (as modern GNU sort does). +.El +.Sh FILES +.Bl -tag -width Pa -compact +.It Pa /var/tmp/.bsdsort.PID.* +Temporary files. +.It Pa /dev/random +Default seed file for the random sort. +.El +.Sh EXIT STATUS +The +.Nm +utility shall exit with one of the following values: +.Pp +.Bl -tag -width flag -compact +.It 0 +Successfully sorted the input files or if used with +.Fl c +or +.Fl C , +the input file already met the sorting criteria. +.It 1 +On disorder (or non-uniqueness) with the +.Fl c +or +.Fl C +options. +.It 2 +An error occurred. +.El +.Sh SEE ALSO +.Xr comm 1 , +.Xr join 1 , +.Xr uniq 1 +.Sh STANDARDS +The +.Nm +utility is compliant with the +.St -p1003.1-2008 +specification. +.Pp +The flags +.Op Fl ghRMSsTVz +are extensions to the POSIX specification. +.Pp +All long options are extensions to the specification, some of them are +provided for compatibility with GNU versions and some of them are +own extensions. +.Pp +The old key notations +.Cm \(pl Ns Ar pos1 +and +.Fl Ns Ar pos2 +come from older versions of +.Nm +and are still supported but their use is highly discouraged. +.Sh HISTORY +A +.Nm +command first appeared in +.At v3 . +.Sh AUTHORS +.An Gabor Kovesdan Aq Mt gabor@FreeBSD.org , +.Pp +.An Oleg Moskalenko Aq Mt mom040267@gmail.com +.Sh NOTES +This implementation of +.Nm +has no limits on input line length (other than imposed by available +memory) or any restrictions on bytes allowed within lines. +.Pp +The performance depends highly on locale settings, +efficient choice of sort keys and key complexity. +The fastest sort is with locale C, on whole lines, +with option +.Fl s . +In general, locale C is the fastest, then single-byte +locales follow and multi-byte locales as the slowest but +the correct collation order is always respected. +As for the key specification, the simpler to process the +lines the faster the search will be. +.Pp +When sorting by arithmetic value, using +.Fl n +results in much better performance than +.Fl g +so its use is encouraged +whenever possible. diff --git a/text_cmds/sort/sort.c b/text_cmds/sort/sort.c new file mode 100644 index 0000000..9200606 --- /dev/null +++ b/text_cmds/sort/sort.c @@ -0,0 +1,1325 @@ +/*- + * Copyright (C) 2009 Gabor Kovesdan + * Copyright (C) 2012 Oleg Moskalenko + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include + +#include +#include +#include +#include +#include +#ifndef __APPLE__ +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "coll.h" +#include "file.h" +#include "sort.h" + +#ifndef WITHOUT_NLS +#include +nl_catd catalog; +#endif + +#define OPTIONS "bcCdfghik:Mmno:RrsS:t:T:uVz" + +#define DEFAULT_RANDOM_SORT_SEED_FILE ("/dev/random") +#define MAX_DEFAULT_RANDOM_SEED_DATA_SIZE (1024) + +static bool need_random; +static const char *random_source = DEFAULT_RANDOM_SORT_SEED_FILE; +static const void *random_seed; +static size_t random_seed_size; + +MD5_CTX md5_ctx; + +/* + * Default messages to use when NLS is disabled or no catalogue + * is found. + */ +const char *nlsstr[] = { "", +/* 1*/"mutually exclusive flags", +/* 2*/"extra argument not allowed with -c", +/* 3*/"Unknown feature", +/* 4*/"Wrong memory buffer specification", +/* 5*/"0 field in key specs", +/* 6*/"0 column in key specs", +/* 7*/"Wrong file mode", +/* 8*/"Cannot open file for reading", +/* 9*/"Radix sort cannot be used with these sort options", +/*10*/"The chosen sort method cannot be used with stable and/or unique sort", +/*11*/"Invalid key position", +/*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] " + "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] " + "[-o outfile] [--batch-size size] [--files0-from file] " + "[--heapsort] [--mergesort] [--radixsort] [--qsort] " + "[--mmap] " +#if defined(SORT_THREADS) + "[--parallel thread_no] " +#endif + "[--human-numeric-sort] " + "[--version-sort] [--random-sort [--random-source file]] " + "[--compress-program program] [file ...]\n" }; + +struct sort_opts sort_opts_vals; + +bool debug_sort; +bool need_hint; + +int (*isblank_f)(int c) = isblank; +int (*iswblank_f)(wint_t c) = iswblank; + +#if defined(SORT_THREADS) +unsigned int ncpu = 1; +size_t nthreads = 1; +#endif + +static bool gnusort_numeric_compatibility; + +static struct sort_mods default_sort_mods_object; +struct sort_mods * const default_sort_mods = &default_sort_mods_object; + +static bool print_symbols_on_debug; + +/* + * Arguments from file (when file0-from option is used: + */ +static size_t argc_from_file0 = (size_t)-1; +static char **argv_from_file0; + +/* + * Placeholder symbols for options which have no single-character equivalent + */ +enum +{ + SORT_OPT = CHAR_MAX + 1, + HELP_OPT, + FF_OPT, + BS_OPT, + VERSION_OPT, + DEBUG_OPT, +#if defined(SORT_THREADS) + PARALLEL_OPT, +#endif + RANDOMSOURCE_OPT, + COMPRESSPROGRAM_OPT, + QSORT_OPT, + MERGESORT_OPT, + HEAPSORT_OPT, + RADIXSORT_OPT, + MMAP_OPT +}; + +#define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6 +static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' }; + +static struct option long_options[] = { + { "batch-size", required_argument, NULL, BS_OPT }, + { "buffer-size", required_argument, NULL, 'S' }, + { "check", optional_argument, NULL, 'c' }, + { "check=silent|quiet", optional_argument, NULL, 'C' }, + { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT }, + { "debug", no_argument, NULL, DEBUG_OPT }, + { "dictionary-order", no_argument, NULL, 'd' }, + { "field-separator", required_argument, NULL, 't' }, + { "files0-from", required_argument, NULL, FF_OPT }, + { "general-numeric-sort", no_argument, NULL, 'g' }, + { "heapsort", no_argument, NULL, HEAPSORT_OPT }, + { "help",no_argument, NULL, HELP_OPT }, + { "human-numeric-sort", no_argument, NULL, 'h' }, + { "ignore-leading-blanks", no_argument, NULL, 'b' }, + { "ignore-case", no_argument, NULL, 'f' }, + { "ignore-nonprinting", no_argument, NULL, 'i' }, + { "key", required_argument, NULL, 'k' }, + { "merge", no_argument, NULL, 'm' }, + { "mergesort", no_argument, NULL, MERGESORT_OPT }, + { "mmap", no_argument, NULL, MMAP_OPT }, + { "month-sort", no_argument, NULL, 'M' }, + { "numeric-sort", no_argument, NULL, 'n' }, + { "output", required_argument, NULL, 'o' }, +#if defined(SORT_THREADS) + { "parallel", required_argument, NULL, PARALLEL_OPT }, +#endif + { "qsort", no_argument, NULL, QSORT_OPT }, + { "radixsort", no_argument, NULL, RADIXSORT_OPT }, + { "random-sort", no_argument, NULL, 'R' }, + { "random-source", required_argument, NULL, RANDOMSOURCE_OPT }, + { "reverse", no_argument, NULL, 'r' }, + { "sort", required_argument, NULL, SORT_OPT }, + { "stable", no_argument, NULL, 's' }, + { "temporary-directory",required_argument, NULL, 'T' }, + { "unique", no_argument, NULL, 'u' }, + { "version", no_argument, NULL, VERSION_OPT }, + { "version-sort",no_argument, NULL, 'V' }, + { "zero-terminated", no_argument, NULL, 'z' }, + { NULL, no_argument, NULL, 0 } +}; + +void fix_obsolete_keys(int *argc, char **argv); + +/* + * Check where sort modifier is present + */ +static bool +sort_modifier_empty(struct sort_mods *sm) +{ + + if (sm == NULL) + return (true); + return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag || + sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag || sm->iflag)); +} + +/* + * Print out usage text. + */ +static void +usage(bool opt_err) +{ + FILE *out; + + out = opt_err ? stderr : stdout; + + fprintf(out, getstr(12), getprogname()); + if (opt_err) + exit(2); + exit(0); +} + +/* + * Read input file names from a file (file0-from option). + */ +static void +read_fns_from_file0(const char *fn) +{ + FILE *f; + char *line = NULL; + size_t linesize = 0; + ssize_t linelen; + + if (fn == NULL) + return; + + f = fopen(fn, "r"); + if (f == NULL) + err(2, "%s", fn); + + while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) { + if (*line != '\0') { + if (argc_from_file0 == (size_t) - 1) + argc_from_file0 = 0; + ++argc_from_file0; + argv_from_file0 = sort_realloc(argv_from_file0, + argc_from_file0 * sizeof(char *)); + if (argv_from_file0 == NULL) + err(2, NULL); + argv_from_file0[argc_from_file0 - 1] = line; + } else { + free(line); + } + line = NULL; + linesize = 0; + } + if (ferror(f)) + err(2, "%s: getdelim", fn); + + closefile(f, fn); +} + +/* + * Check how much RAM is available for the sort. + */ +static void +set_hw_params(void) +{ + long pages, psize; + +#if defined(SORT_THREADS) + ncpu = 1; +#endif + + pages = sysconf(_SC_PHYS_PAGES); + if (pages < 1) { + perror("sysconf pages"); + pages = 1; + } + psize = sysconf(_SC_PAGESIZE); + if (psize < 1) { + perror("sysconf psize"); + psize = 4096; + } +#if defined(SORT_THREADS) + ncpu = (unsigned int)sysconf(_SC_NPROCESSORS_ONLN); + if (ncpu < 1) + ncpu = 1; + else if(ncpu > 32) + ncpu = 32; + + nthreads = ncpu; +#endif + + free_memory = (unsigned long long) pages * (unsigned long long) psize; + available_free_memory = free_memory / 2; + + if (available_free_memory < 1024) + available_free_memory = 1024; +} + +/* + * Convert "plain" symbol to wide symbol, with default value. + */ +static void +conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def) +{ + + if (wc && c) { + int res; + + res = mbtowc(wc, c, MB_CUR_MAX); + if (res < 1) + *wc = def; + } +} + +/* + * Set current locale symbols. + */ +static void +set_locale(void) +{ + struct lconv *lc; + const char *locale; + + setlocale(LC_ALL, ""); + + lc = localeconv(); + + if (lc) { + /* obtain LC_NUMERIC info */ + /* Convert to wide char form */ + conv_mbtowc(&symbol_decimal_point, lc->decimal_point, + symbol_decimal_point); + conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep, + symbol_thousands_sep); + conv_mbtowc(&symbol_positive_sign, lc->positive_sign, + symbol_positive_sign); + conv_mbtowc(&symbol_negative_sign, lc->negative_sign, + symbol_negative_sign); + } + + if (getenv("GNUSORT_NUMERIC_COMPATIBILITY")) + gnusort_numeric_compatibility = true; + + locale = setlocale(LC_COLLATE, NULL); + + if (locale) { + char *tmpl; + const char *cclocale; + + tmpl = sort_strdup(locale); + cclocale = setlocale(LC_COLLATE, "C"); + if (cclocale && !strcmp(cclocale, tmpl)) + byte_sort = true; + else { + const char *pclocale; + + pclocale = setlocale(LC_COLLATE, "POSIX"); + if (pclocale && !strcmp(pclocale, tmpl)) + byte_sort = true; + } + setlocale(LC_COLLATE, tmpl); + sort_free(tmpl); + } +} + +/* + * Set directory temporary files. + */ +static void +set_tmpdir(void) +{ + char *td; + + td = getenv("TMPDIR"); + if (td != NULL) + tmpdir = sort_strdup(td); +} + +/* + * Parse -S option. + */ +static unsigned long long +parse_memory_buffer_value(const char *value) +{ + + if (value == NULL) + return (available_free_memory); + else { + char *endptr; + unsigned long long membuf; + + endptr = NULL; + errno = 0; + membuf = strtoll(value, &endptr, 10); + + if (errno != 0) { + warn("%s",getstr(4)); + membuf = available_free_memory; + } else { + switch (*endptr){ + case 'Y': + membuf *= 1024; + /* FALLTHROUGH */ + case 'Z': + membuf *= 1024; + /* FALLTHROUGH */ + case 'E': + membuf *= 1024; + /* FALLTHROUGH */ + case 'P': + membuf *= 1024; + /* FALLTHROUGH */ + case 'T': + membuf *= 1024; + /* FALLTHROUGH */ + case 'G': + membuf *= 1024; + /* FALLTHROUGH */ + case 'M': + membuf *= 1024; + /* FALLTHROUGH */ + case '\0': + case 'K': + membuf *= 1024; + /* FALLTHROUGH */ + case 'b': + break; + case '%': + membuf = (available_free_memory * membuf) / + 100; + break; + default: + warnc(EINVAL, "%s", optarg); + membuf = available_free_memory; + } + } + return (membuf); + } +} + +/* + * Signal handler that clears the temporary files. + */ +static void +sig_handler(int sig __unused, siginfo_t *siginfo __unused, + void *context __unused) +{ + + clear_tmp_files(); + exit(-1); +} + +/* + * Set signal handler on panic signals. + */ +static void +set_signal_handler(void) +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = &sig_handler; + sa.sa_flags = SA_SIGINFO; + + if (sigaction(SIGTERM, &sa, NULL) < 0) { + perror("sigaction"); + return; + } + if (sigaction(SIGHUP, &sa, NULL) < 0) { + perror("sigaction"); + return; + } + if (sigaction(SIGINT, &sa, NULL) < 0) { + perror("sigaction"); + return; + } + if (sigaction(SIGQUIT, &sa, NULL) < 0) { + perror("sigaction"); + return; + } + if (sigaction(SIGABRT, &sa, NULL) < 0) { + perror("sigaction"); + return; + } + if (sigaction(SIGBUS, &sa, NULL) < 0) { + perror("sigaction"); + return; + } + if (sigaction(SIGSEGV, &sa, NULL) < 0) { + perror("sigaction"); + return; + } + if (sigaction(SIGUSR1, &sa, NULL) < 0) { + perror("sigaction"); + return; + } + if (sigaction(SIGUSR2, &sa, NULL) < 0) { + perror("sigaction"); + return; + } +} + +/* + * Print "unknown" message and exit with status 2. + */ +static void +unknown(const char *what) +{ + + errx(2, "%s: %s", getstr(3), what); +} + +/* + * Check whether contradictory input options are used. + */ +static void +check_mutually_exclusive_flags(char c, bool *mef_flags) +{ + int fo_index, mec; + bool found_others, found_this; + + found_others = found_this = false; + fo_index = 0; + + for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) { + mec = mutually_exclusive_flags[i]; + + if (mec != c) { + if (mef_flags[i]) { + if (found_this) + errx(1, "%c:%c: %s", c, mec, getstr(1)); + found_others = true; + fo_index = i; + } + } else { + if (found_others) + errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1)); + mef_flags[i] = true; + found_this = true; + } + } +} + +/* + * Initialise sort opts data. + */ +static void +set_sort_opts(void) +{ + + memset(&default_sort_mods_object, 0, + sizeof(default_sort_mods_object)); + memset(&sort_opts_vals, 0, sizeof(sort_opts_vals)); + default_sort_mods_object.func = + get_sort_func(&default_sort_mods_object); +} + +/* + * Set a sort modifier on a sort modifiers object. + */ +static bool +set_sort_modifier(struct sort_mods *sm, int c) +{ + + if (sm) { + switch (c){ + case 'b': + sm->bflag = true; + break; + case 'd': + sm->dflag = true; + break; + case 'f': + sm->fflag = true; + break; + case 'g': + sm->gflag = true; + need_hint = true; + break; + case 'i': + sm->iflag = true; + break; + case 'R': + sm->Rflag = true; + need_random = true; + break; + case 'M': + initialise_months(); + sm->Mflag = true; + need_hint = true; + break; + case 'n': + sm->nflag = true; + need_hint = true; + print_symbols_on_debug = true; + break; + case 'r': + sm->rflag = true; + break; + case 'V': + sm->Vflag = true; + break; + case 'h': + sm->hflag = true; + need_hint = true; + print_symbols_on_debug = true; + break; + default: + return false; + } + sort_opts_vals.complex_sort = true; + sm->func = get_sort_func(sm); + } + return (true); +} + +/* + * Parse POS in -k option. + */ +static int +parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second) +{ + regmatch_t pmatch[4]; + regex_t re; + char *c, *f; + const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$"; + size_t len, nmatch; + int ret; + + ret = -1; + nmatch = 4; + c = f = NULL; + + if (regcomp(&re, sregexp, REG_EXTENDED) != 0) + return (-1); + + if (regexec(&re, s, nmatch, pmatch, 0) != 0) + goto end; + + if (pmatch[0].rm_eo <= pmatch[0].rm_so) + goto end; + + if (pmatch[1].rm_eo <= pmatch[1].rm_so) + goto end; + + len = pmatch[1].rm_eo - pmatch[1].rm_so; + f = sort_malloc((len + 1) * sizeof(char)); + + strncpy(f, s + pmatch[1].rm_so, len); + f[len] = '\0'; + + if (second) { + errno = 0; + ks->f2 = (size_t) strtoul(f, NULL, 10); + if (errno != 0) + err(2, "-k"); + if (ks->f2 == 0) { + warn("%s",getstr(5)); + goto end; + } + } else { + errno = 0; + ks->f1 = (size_t) strtoul(f, NULL, 10); + if (errno != 0) + err(2, "-k"); + if (ks->f1 == 0) { + warn("%s",getstr(5)); + goto end; + } + } + + if (pmatch[2].rm_eo > pmatch[2].rm_so) { + len = pmatch[2].rm_eo - pmatch[2].rm_so - 1; + c = sort_malloc((len + 1) * sizeof(char)); + + strncpy(c, s + pmatch[2].rm_so + 1, len); + c[len] = '\0'; + + if (second) { + errno = 0; + ks->c2 = (size_t) strtoul(c, NULL, 10); + if (errno != 0) + err(2, "-k"); + } else { + errno = 0; + ks->c1 = (size_t) strtoul(c, NULL, 10); + if (errno != 0) + err(2, "-k"); + if (ks->c1 == 0) { + warn("%s",getstr(6)); + goto end; + } + } + } else { + if (second) + ks->c2 = 0; + else + ks->c1 = 1; + } + + if (pmatch[3].rm_eo > pmatch[3].rm_so) { + regoff_t i = 0; + + for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) { + check_mutually_exclusive_flags(s[i], mef_flags); + if (s[i] == 'b') { + if (second) + ks->pos2b = true; + else + ks->pos1b = true; + } else if (!set_sort_modifier(&(ks->sm), s[i])) + goto end; + } + } + + ret = 0; + +end: + + if (c) + sort_free(c); + if (f) + sort_free(f); + regfree(&re); + + return (ret); +} + +/* + * Parse -k option value. + */ +static int +parse_k(const char *s, struct key_specs *ks) +{ + int ret = -1; + bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = + { false, false, false, false, false, false }; + + if (s && *s) { + char *sptr; + + sptr = strchr(s, ','); + if (sptr) { + size_t size1; + char *pos1, *pos2; + + size1 = sptr - s; + + if (size1 < 1) + return (-1); + pos1 = sort_malloc((size1 + 1) * sizeof(char)); + + strncpy(pos1, s, size1); + pos1[size1] = '\0'; + + ret = parse_pos(pos1, ks, mef_flags, false); + + sort_free(pos1); + if (ret < 0) + return (ret); + + pos2 = sort_strdup(sptr + 1); + ret = parse_pos(pos2, ks, mef_flags, true); + sort_free(pos2); + } else + ret = parse_pos(s, ks, mef_flags, false); + } + + return (ret); +} + +/* + * Parse POS in +POS -POS option. + */ +static int +parse_pos_obs(const char *s, int *nf, int *nc, char* sopts) +{ + regex_t re; + regmatch_t pmatch[4]; + char *c, *f; + const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$"; + int ret; + size_t len, nmatch; + + ret = -1; + nmatch = 4; + c = f = NULL; + *nc = *nf = 0; + + if (regcomp(&re, sregexp, REG_EXTENDED) != 0) + return (-1); + + if (regexec(&re, s, nmatch, pmatch, 0) != 0) + goto end; + + if (pmatch[0].rm_eo <= pmatch[0].rm_so) + goto end; + + if (pmatch[1].rm_eo <= pmatch[1].rm_so) + goto end; + + len = pmatch[1].rm_eo - pmatch[1].rm_so; + f = sort_malloc((len + 1) * sizeof(char)); + + strncpy(f, s + pmatch[1].rm_so, len); + f[len] = '\0'; + + errno = 0; + *nf = (size_t) strtoul(f, NULL, 10); + if (errno != 0) + errx(2, "%s", getstr(11)); + + if (pmatch[2].rm_eo > pmatch[2].rm_so) { + len = pmatch[2].rm_eo - pmatch[2].rm_so - 1; + c = sort_malloc((len + 1) * sizeof(char)); + + strncpy(c, s + pmatch[2].rm_so + 1, len); + c[len] = '\0'; + + errno = 0; + *nc = (size_t) strtoul(c, NULL, 10); + if (errno != 0) + errx(2, "%s", getstr(11)); + } + + if (pmatch[3].rm_eo > pmatch[3].rm_so) { + + len = pmatch[3].rm_eo - pmatch[3].rm_so; + + strncpy(sopts, s + pmatch[3].rm_so, len); + sopts[len] = '\0'; + } + + ret = 0; + +end: + if (c) + sort_free(c); + if (f) + sort_free(f); + regfree(&re); + + return (ret); +} + +/* + * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax + */ +void +fix_obsolete_keys(int *argc, char **argv) +{ + char sopt[129]; + + for (int i = 1; i < *argc; i++) { + char *arg1; + + arg1 = argv[i]; + + if (strlen(arg1) > 1 && arg1[0] == '+') { + int c1, f1; + char sopts1[128]; + + sopts1[0] = 0; + c1 = f1 = 0; + + if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0) + continue; + else { + f1 += 1; + c1 += 1; + if (i + 1 < *argc) { + char *arg2 = argv[i + 1]; + + if (strlen(arg2) > 1 && + arg2[0] == '-') { + int c2, f2; + char sopts2[128]; + + sopts2[0] = 0; + c2 = f2 = 0; + + if (parse_pos_obs(arg2 + 1, + &f2, &c2, sopts2) >= 0) { + if (c2 > 0) + f2 += 1; + sprintf(sopt, "-k%d.%d%s,%d.%d%s", + f1, c1, sopts1, f2, c2, sopts2); + argv[i] = sort_strdup(sopt); + for (int j = i + 1; j + 1 < *argc; j++) + argv[j] = argv[j + 1]; + *argc -= 1; + continue; + } + } + } + sprintf(sopt, "-k%d.%d%s", f1, c1, sopts1); + argv[i] = sort_strdup(sopt); + } + } + } +} + +/* + * Set random seed + */ +static void +set_random_seed(void) +{ + if (need_random) { + + if (strcmp(random_source, DEFAULT_RANDOM_SORT_SEED_FILE) == 0) { + FILE* fseed; + MD5_CTX ctx; + char rsd[MAX_DEFAULT_RANDOM_SEED_DATA_SIZE]; + size_t sz = 0; + + fseed = openfile(random_source, "r"); + while (!feof(fseed)) { + int cr; + + cr = fgetc(fseed); + if (cr == EOF) + break; + + rsd[sz++] = (char) cr; + + if (sz >= MAX_DEFAULT_RANDOM_SEED_DATA_SIZE) + break; + } + + closefile(fseed, random_source); + + MD5Init(&ctx); + MD5Update(&ctx, rsd, sz); + + random_seed = MD5End(&ctx, NULL); + random_seed_size = strlen(random_seed); + + } else { + MD5_CTX ctx; + char *b; + + MD5Init(&ctx); + b = MD5File(random_source, NULL); + if (b == NULL) + err(2, NULL); + + random_seed = b; + random_seed_size = strlen(b); + } + + MD5Init(&md5_ctx); + if(random_seed_size>0) { + MD5Update(&md5_ctx, random_seed, random_seed_size); + } + } +} + +/* + * Main function. + */ +int +main(int argc, char **argv) +{ + char *outfile, *real_outfile; + int c, result; + bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = + { false, false, false, false, false, false }; + + result = 0; + outfile = sort_strdup("-"); + real_outfile = NULL; + + if(getenv("GNUSORT_COMPATIBLE_BLANKS")) { + isblank_f = isspace; + iswblank_f = iswspace; + } + + struct sort_mods *sm = &default_sort_mods_object; + + init_tmp_files(); + + set_signal_handler(); + + set_hw_params(); + set_locale(); + set_tmpdir(); + set_sort_opts(); + + fix_obsolete_keys(&argc, argv); + + while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL)) + != -1)) { + + check_mutually_exclusive_flags(c, mef_flags); + + if (!set_sort_modifier(sm, c)) { + + switch (c) { + case 'c': + sort_opts_vals.cflag = true; + if (optarg) { + if (!strcmp(optarg, "diagnose-first")) + ; + else if (!strcmp(optarg, "silent") || + !strcmp(optarg, "quiet")) + sort_opts_vals.csilentflag = true; + else if (*optarg) + unknown(optarg); + } + break; + case 'C': + sort_opts_vals.cflag = true; + sort_opts_vals.csilentflag = true; + break; + case 'k': + { + sort_opts_vals.complex_sort = true; + sort_opts_vals.kflag = true; + + keys_num++; + keys = sort_realloc(keys, keys_num * + sizeof(struct key_specs)); + memset(&(keys[keys_num - 1]), 0, + sizeof(struct key_specs)); + + if (parse_k(optarg, &(keys[keys_num - 1])) + < 0) { + errc(2, EINVAL, "-k %s", optarg); + } + + break; + } + case 'm': + sort_opts_vals.mflag = true; + break; + case 'o': + outfile = sort_realloc(outfile, (strlen(optarg) + 1)); + strcpy(outfile, optarg); + break; + case 's': + sort_opts_vals.sflag = true; + break; + case 'S': + available_free_memory = + parse_memory_buffer_value(optarg); + break; + case 'T': + tmpdir = sort_strdup(optarg); + break; + case 't': + while (strlen(optarg) > 1) { + if (optarg[0] != '\\') { + errc(2, EINVAL, "%s", optarg); + } + optarg += 1; + if (*optarg == '0') { + *optarg = 0; + break; + } + } + sort_opts_vals.tflag = true; + sort_opts_vals.field_sep = btowc(optarg[0]); + if (sort_opts_vals.field_sep == WEOF) { + errno = EINVAL; + err(2, NULL); + } + if (!gnusort_numeric_compatibility) { + if (symbol_decimal_point == sort_opts_vals.field_sep) + symbol_decimal_point = WEOF; + if (symbol_thousands_sep == sort_opts_vals.field_sep) + symbol_thousands_sep = WEOF; + if (symbol_negative_sign == sort_opts_vals.field_sep) + symbol_negative_sign = WEOF; + if (symbol_positive_sign == sort_opts_vals.field_sep) + symbol_positive_sign = WEOF; + } + break; + case 'u': + sort_opts_vals.uflag = true; + /* stable sort for the correct unique val */ + sort_opts_vals.sflag = true; + break; + case 'z': + sort_opts_vals.zflag = true; + break; + case SORT_OPT: + if (optarg) { + if (!strcmp(optarg, "general-numeric")) + set_sort_modifier(sm, 'g'); + else if (!strcmp(optarg, "human-numeric")) + set_sort_modifier(sm, 'h'); + else if (!strcmp(optarg, "numeric")) + set_sort_modifier(sm, 'n'); + else if (!strcmp(optarg, "month")) + set_sort_modifier(sm, 'M'); + else if (!strcmp(optarg, "random")) + set_sort_modifier(sm, 'R'); + else + unknown(optarg); + } + break; +#if defined(SORT_THREADS) + case PARALLEL_OPT: + nthreads = (size_t)(atoi(optarg)); + if (nthreads < 1) + nthreads = 1; + if (nthreads > 1024) + nthreads = 1024; + break; +#endif + case QSORT_OPT: + sort_opts_vals.sort_method = SORT_QSORT; + break; + case MERGESORT_OPT: + sort_opts_vals.sort_method = SORT_MERGESORT; + break; + case MMAP_OPT: + use_mmap = true; + break; + case HEAPSORT_OPT: + sort_opts_vals.sort_method = SORT_HEAPSORT; + break; + case RADIXSORT_OPT: + sort_opts_vals.sort_method = SORT_RADIXSORT; + break; + case RANDOMSOURCE_OPT: + random_source = strdup(optarg); + break; + case COMPRESSPROGRAM_OPT: + compress_program = strdup(optarg); + break; + case FF_OPT: + read_fns_from_file0(optarg); + break; + case BS_OPT: + { + errno = 0; + long mof = strtol(optarg, NULL, 10); + if (errno != 0) + err(2, "--batch-size"); + if (mof >= 2) + max_open_files = (size_t) mof + 1; + } + break; + case VERSION_OPT: + printf("%s\n", VERSION); + exit(EXIT_SUCCESS); + /* NOTREACHED */ + break; + case DEBUG_OPT: + debug_sort = true; + break; + case HELP_OPT: + usage(false); + /* NOTREACHED */ + break; + default: + usage(true); + /* NOTREACHED */ + } + } + } + + argc -= optind; + argv += optind; + + if (argv_from_file0) { + argc = argc_from_file0; + argv = argv_from_file0; + } + +#ifndef WITHOUT_NLS + catalog = catopen("sort", NL_CAT_LOCALE); +#endif + + if (sort_opts_vals.cflag && sort_opts_vals.mflag) + errx(1, "%c:%c: %s", 'm', 'c', getstr(1)); + +#ifndef WITHOUT_NLS + catclose(catalog); +#endif + + if (keys_num == 0) { + keys_num = 1; + keys = sort_realloc(keys, sizeof(struct key_specs)); + memset(&(keys[0]), 0, sizeof(struct key_specs)); + keys[0].c1 = 1; + keys[0].pos1b = default_sort_mods->bflag; + keys[0].pos2b = default_sort_mods->bflag; + memcpy(&(keys[0].sm), default_sort_mods, + sizeof(struct sort_mods)); + } + + for (size_t i = 0; i < keys_num; i++) { + struct key_specs *ks; + + ks = &(keys[i]); + + if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) && + !(ks->pos2b)) { + ks->pos1b = sm->bflag; + ks->pos2b = sm->bflag; + memcpy(&(ks->sm), sm, sizeof(struct sort_mods)); + } + + ks->sm.func = get_sort_func(&(ks->sm)); + } + + if (debug_sort) { + printf("Memory to be used for sorting: %llu\n",available_free_memory); +#if defined(SORT_THREADS) + printf("Number of CPUs: %d\n",(int)ncpu); + nthreads = 1; +#endif + printf("Using collate rules of %s locale\n", + setlocale(LC_COLLATE, NULL)); + if (byte_sort) + printf("Byte sort is used\n"); + if (print_symbols_on_debug) { + printf("Decimal Point: <%lc>\n", symbol_decimal_point); + if (symbol_thousands_sep) + printf("Thousands separator: <%lc>\n", + symbol_thousands_sep); + printf("Positive sign: <%lc>\n", symbol_positive_sign); + printf("Negative sign: <%lc>\n", symbol_negative_sign); + } + } + + set_random_seed(); + + /* Case when the outfile equals one of the input files: */ + if (strcmp(outfile, "-")) { + + for(int i = 0; i < argc; ++i) { + if (strcmp(argv[i], outfile) == 0) { + real_outfile = sort_strdup(outfile); + for(;;) { + char* tmp = sort_malloc(strlen(outfile) + + strlen(".tmp") + 1); + + strcpy(tmp, outfile); + strcpy(tmp + strlen(tmp), ".tmp"); + sort_free(outfile); + outfile = tmp; + if (access(outfile, F_OK) < 0) + break; + } + tmp_file_atexit(outfile); + } + } + } + +#if defined(SORT_THREADS) + if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0)) + nthreads = 1; +#endif + + if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) { + struct file_list fl; + struct sort_list list; + + sort_list_init(&list); + file_list_init(&fl, true); + + if (argc < 1) + procfile("-", &list, &fl); + else { + while (argc > 0) { + procfile(*argv, &list, &fl); + --argc; + ++argv; + } + } + + if (fl.count < 1) + sort_list_to_file(&list, outfile); + else { + if (list.count > 0) { + char *flast = new_tmp_file_name(); + + sort_list_to_file(&list, flast); + file_list_add(&fl, flast, false); + } + merge_files(&fl, outfile); + } + + file_list_clean(&fl); + + /* + * We are about to exit the program, so we can ignore + * the clean-up for speed + * + * sort_list_clean(&list); + */ + + } else if (sort_opts_vals.cflag) { + result = (argc == 0) ? (check("-")) : (check(*argv)); + } else if (sort_opts_vals.mflag) { + struct file_list fl; + + file_list_init(&fl, false); + file_list_populate(&fl, argc, argv, true); + merge_files(&fl, outfile); + file_list_clean(&fl); + } + + if (real_outfile) { + unlink(real_outfile); + if (rename(outfile, real_outfile) < 0) + err(2, NULL); + sort_free(real_outfile); + } + + sort_free(outfile); + + return (result); +} diff --git a/text_cmds/sort/sort.h b/text_cmds/sort/sort.h new file mode 100644 index 0000000..d31b7cb --- /dev/null +++ b/text_cmds/sort/sort.h @@ -0,0 +1,139 @@ +/* $FreeBSD$ */ + +/*- + * Copyright (C) 2009 Gabor Kovesdan + * Copyright (C) 2012 Oleg Moskalenko + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if !defined(__BSD_SORT_H__) +#define __BSD_SORT_H__ + +#include +#include +#include +#include +#include + +#include +#ifdef __APPLE__ +#include "commoncrypto.h" +#else +#include +#endif + +#ifdef __APPLE__ +#define VERSION "2.3-Apple (" SORT_VERSION ")" +#else +#define VERSION "2.3-FreeBSD" +#endif + +#ifdef WITHOUT_NLS +#define getstr(n) nlsstr[n] +#else +#include + +extern nl_catd catalog; +#define getstr(n) catgets(catalog, 1, n, nlsstr[n]) +#endif + +extern const char *nlsstr[]; + +#if defined(SORT_THREADS) +#define MT_SORT_THRESHOLD (10000) +extern unsigned int ncpu; +extern size_t nthreads; +#endif + +/* + * If true, we output some debug information. + */ +extern bool debug_sort; + +/* + * MD5 context for random hash function + */ +extern MD5_CTX md5_ctx; + +/* + * sort.c + */ + +/* + * This structure holds main sort options which are NOT affecting the sort ordering. + */ +struct sort_opts +{ + wint_t field_sep; + int sort_method; + bool cflag; + bool csilentflag; + bool kflag; + bool mflag; + bool sflag; + bool uflag; + bool zflag; + bool tflag; + bool complex_sort; +}; + +/* + * Key value structure forward declaration + */ +struct key_value; + +/* + * Cmp function + */ +typedef int (*cmpcoll_t)(struct key_value *kv1, struct key_value *kv2, size_t offset); + +/* + * This structure holds "sort modifiers" - options which are affecting the sort ordering. + */ +struct sort_mods +{ + cmpcoll_t func; + bool bflag; + bool dflag; + bool fflag; + bool gflag; + bool iflag; + bool Mflag; + bool nflag; + bool rflag; + bool Rflag; + bool Vflag; + bool hflag; +}; + +extern bool need_hint; + +extern struct sort_opts sort_opts_vals; + +extern struct sort_mods * const default_sort_mods; + +extern int (*isblank_f)(int c); +extern int (*iswblank_f)(wint_t c); + +#endif /* __BSD_SORT_H__ */ diff --git a/text_cmds/sort/testsuite/README.txt b/text_cmds/sort/testsuite/README.txt new file mode 100644 index 0000000..865094f --- /dev/null +++ b/text_cmds/sort/testsuite/README.txt @@ -0,0 +1,19 @@ +To run the tests: + +1) Adjust the variable TESTED_SORT in the file run.sh - the value must point +to the binary that is to be tested. + +2) Adjust the value ORIG_SORT in the file run.sh - the value must point to the binary that is assumed +to be working correctly. The tested sort binary will be checked against this program. + +3) Run: + +$ cd <...>/testsuite/ +$ ./run.sh + +4) Wait for many hours, it is running about 23 hours on my laptop. + +5) Check the output and check the existence of the file errors.log in the current directory. +If the test run has been successful, then there must be no file errors.log. + + diff --git a/text_cmds/sort/testsuite/bigsample.txt.xz b/text_cmds/sort/testsuite/bigsample.txt.xz new file mode 100644 index 0000000..ca120b7 Binary files /dev/null and b/text_cmds/sort/testsuite/bigsample.txt.xz differ diff --git a/text_cmds/sort/testsuite/run.sh b/text_cmds/sort/testsuite/run.sh new file mode 100755 index 0000000..db13a34 --- /dev/null +++ b/text_cmds/sort/testsuite/run.sh @@ -0,0 +1,436 @@ +#!/bin/sh + +#export GNUSORT_NUMERIC_COMPATIBILITY=x +#export GNUSORT_COMPATIBLE_BLANKS=x + +TESTED_SORT=../../text_cmds/sort/sort +ORIG_SORT=../../text_cmds_orig/sort/sort + +FILECMP=cmp + +INPUT_FILE=sample.txt +BIG_INPUT_FILE=bigsample.txt + +ERRORS_FILE=errors.log + +OUT_DIR=tmp + +# clean + +rm -rf ${OUT_DIR} +mkdir -p ${OUT_DIR} +rm -rf ${ERRORS_FILE} + +# ru_RU.KOI8-R C ru_RU.ISO-8859-5 en_US.ISO8859-15 zh_HK.Big5HKSCS +# +# ru KOI-8 is an "irregular" locale with non-trivial ordering. +# zh* is a 2-bytes locale. + +for lang in en_US.UTF-8 C en_US.ISO8859-15 zh_HK.Big5HKSCS ru_RU.KOI8-R ru_RU.ISO-8859-5 +do + + export LANG=${lang} + + for KEYS in -srh -sfrudb -Vs -sM -siz + do + + echo ${LANG} ${KEYS} + + if [ ${LANG} = "ru_RU.KOI8-R" ] && [ ${KEYS} = "-srh" ] ; then + + # numeric sorting in ru_RU.KOI8-R incompatible because the thousands separator bug fixed, + # for better compatibility with the new GNU sort. + # (ru_RU.KOI8-R uses space as thousands separator) + + continue + fi + + time ${ORIG_SORT} ${KEYS} ${BIG_INPUT_FILE} -o ${OUT_DIR}/big_orig + + for PARALLEL in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 + do + + echo --parallel ${PARALLEL} + + time ${TESTED_SORT} --parallel ${PARALLEL} ${KEYS} ${BIG_INPUT_FILE} -o ${OUT_DIR}/big_new + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} big crash --parallel ${PARALLEL} >> ${ERRORS_FILE} + exit + fi + + if ! ${FILECMP} ${OUT_DIR}/big_new ${OUT_DIR}/big_orig >${OUT_DIR}/res.0.0.big 2>&1 ; then + echo ${LANG} ${KEYS} big error --parallel ${PARALLEL} >> ${ERRORS_FILE} + fi + time ${TESTED_SORT} --parallel ${PARALLEL} -c ${KEYS} ${OUT_DIR}/big_new + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} -c big error --parallel ${PARALLEL} >> ${ERRORS_FILE} + fi + rm -rf ${OUT_DIR}/res.0.0.big + rm -rf ${OUT_DIR}/big_new + done + + rm -rf ${OUT_DIR}/big_orig + + ${TESTED_SORT} ${KEYS} ${INPUT_FILE} -o ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} crash >> ${ERRORS_FILE} + exit + fi + ${ORIG_SORT} ${KEYS} ${INPUT_FILE} -o ${OUT_DIR}/sik2 + if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.0.0 2>&1 ; then + echo ${LANG} ${KEYS} error >> ${ERRORS_FILE} + fi + ${TESTED_SORT} -c ${KEYS} ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} -c error >> ${ERRORS_FILE} + fi + rm ${OUT_DIR}/res.0.0 + + ${TESTED_SORT} ${KEYS} -t " " ${INPUT_FILE} -o ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} -t " " crash >> ${ERRORS_FILE} + exit + fi + ${ORIG_SORT} ${KEYS} -t " " ${INPUT_FILE} -o ${OUT_DIR}/sik2 + if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.0.0 2>&1 ; then + echo ${LANG} ${KEYS} error -t " " >> ${ERRORS_FILE} + fi + ${TESTED_SORT} -c -t " " ${KEYS} ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo $? ${LANG} ${KEYS} -t " " -c error >> ${ERRORS_FILE} + fi + rm ${OUT_DIR}/res.0.0 + + ${TESTED_SORT} ${KEYS} -t "|" ${INPUT_FILE} -o ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} -t "|" crash >> ${ERRORS_FILE} + exit + fi + ${ORIG_SORT} ${KEYS} -t "|" ${INPUT_FILE} -o ${OUT_DIR}/sik2 + if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.0.0 2>&1 ; then + echo ${LANG} ${KEYS} error -t "|" >> ${ERRORS_FILE} + fi + ${TESTED_SORT} -c -t "|" ${KEYS} ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} -c -t "|" error >> ${ERRORS_FILE} + fi + rm ${OUT_DIR}/res.0.0 + + ${TESTED_SORT} ${KEYS} -t '\0' ${INPUT_FILE} -o ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} -t 0 crash >> ${ERRORS_FILE} + exit + fi + ${ORIG_SORT} ${KEYS} -t '\0' ${INPUT_FILE} -o ${OUT_DIR}/sik2 + if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.0.0 2>&1 ; then + echo ${LANG} ${KEYS} error -t '\0' >> ${ERRORS_FILE} + fi + ${TESTED_SORT} -c -t '\0' ${KEYS} ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} -c -t '\0' error >> ${ERRORS_FILE} + fi + rm ${OUT_DIR}/res.0.0 + + for f1 in 1 2 3 4 5 6 7 8 9 + do + for c1 in 1 2 3 4 5 10 15 20 25 30 + do + echo ${LANG} ${KEYS} ${f1} ${c1} + + ${TESTED_SORT} ${KEYS} +${f1}.${c1} ${INPUT_FILE} -o ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} +${f1}.${c1} crash +- >> ${ERRORS_FILE} + exit + fi + ${ORIG_SORT} ${KEYS} +${f1}.${c1} ${INPUT_FILE} -o ${OUT_DIR}/sik2 + if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.${f1}.${c1} 2>&1 ; then + echo ${LANG} ${KEYS} +${f1}.${c1} error +- >> ${ERRORS_FILE} + fi + ${TESTED_SORT} -c ${KEYS} +${f1}.${c1} ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} +${f1}.${c1} -c error +- >> ${ERRORS_FILE} + fi + rm ${OUT_DIR}/res.${f1}.${c1} + + ${TESTED_SORT} ${KEYS} -k${f1}.${c1} ${INPUT_FILE} -o ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} -k${f1}.${c1} crash >> ${ERRORS_FILE} + exit + fi + ${ORIG_SORT} ${KEYS} -k${f1}.${c1} ${INPUT_FILE} -o ${OUT_DIR}/sik2 + if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.${f1}.${c1} 2>&1 ; then + echo ${LANG} ${KEYS} -k${f1}.${c1} error >> ${ERRORS_FILE} + fi + ${TESTED_SORT} -c ${KEYS} -k${f1}.${c1} ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} -k${f1}.${c1} -c error >> ${ERRORS_FILE} + fi + rm ${OUT_DIR}/res.${f1}.${c1} + + ${TESTED_SORT} ${KEYS} -k${f1}.${c1}b ${INPUT_FILE} -o ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} -k${f1}.${c1}b crash >> ${ERRORS_FILE} + exit + fi + ${ORIG_SORT} ${KEYS} -k${f1}.${c1}b ${INPUT_FILE} -o ${OUT_DIR}/sik2 + if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.${f1}.${c1} 2>&1 ; then + echo ${LANG} ${KEYS} -k${f1}.${c1}b error >> ${ERRORS_FILE} + fi + ${TESTED_SORT} -c ${KEYS} -k${f1}.${c1}b ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} -k${f1}.${c1}b -c error >> ${ERRORS_FILE} + fi + rm ${OUT_DIR}/res.${f1}.${c1} + + ${TESTED_SORT} ${KEYS} -t " " -k${f1}.${c1} ${INPUT_FILE} -o ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} -t -k${f1}.${c1} crash >> ${ERRORS_FILE} + exit + fi + ${ORIG_SORT} ${KEYS} -t " " -k${f1}.${c1} ${INPUT_FILE} -o ${OUT_DIR}/sik2 + if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.${f1}.${c1} 2>&1 ; then + echo ${LANG} ${KEYS} -k${f1}.${c1} error -t " " >> ${ERRORS_FILE} + fi + ${TESTED_SORT} -c -t " " ${KEYS} -k${f1}.${c1} ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} -k${f1}.${c1} -t " " -c error >> ${ERRORS_FILE} + fi + rm ${OUT_DIR}/res.${f1}.${c1} + + if [ ${LANG} != "ru_RU.KOI8-R" ] ; then + + # numeric sorting in ru_RU.KOI8-R incompatible because the thousands separator bug fixed, + # for better compatibility with the new GNU sort. + # (ru_RU.KOI8-R uses space as thousands separator) + + ${TESTED_SORT} ${KEYS} -t " " -k${f1}.${c1}n ${INPUT_FILE} -o ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} -k${f1}.${c1}n crash >> ${ERRORS_FILE} + exit + fi + ${ORIG_SORT} ${KEYS} -t " " -k${f1}.${c1}n ${INPUT_FILE} -o ${OUT_DIR}/sik2 + if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.${f1}.${c1} 2>&1 ; then + echo ${LANG} ${KEYS} -k${f1}.${c1} error -t " " n >> ${ERRORS_FILE} + fi + ${TESTED_SORT} -c -t " " ${KEYS} -k${f1}.${c1}n ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} -k${f1}.${c1} -c -t " " n error >> ${ERRORS_FILE} + fi + rm ${OUT_DIR}/res.${f1}.${c1} + fi + + ${TESTED_SORT} ${KEYS} -t "|" -k${f1}.${c1} ${INPUT_FILE} -o ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} -t "|" -k${f1}.${c1} crash >> ${ERRORS_FILE} + exit + fi + ${ORIG_SORT} ${KEYS} -t "|" -k${f1}.${c1} ${INPUT_FILE} -o ${OUT_DIR}/sik2 + if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.${f1}.${c1} 2>&1 ; then + echo ${LANG} ${KEYS} -k${f1}.${c1} error -t "|" >> ${ERRORS_FILE} + fi + ${TESTED_SORT} -c -t "|" ${KEYS} -k${f1}.${c1} ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} -k${f1}.${c1} -c -t "|" error >> ${ERRORS_FILE} + fi + rm ${OUT_DIR}/res.${f1}.${c1} + + for f2 in 1 2 3 4 5 6 7 8 9 10 + do + for c2 in 0 1 2 3 4 5 10 15 20 25 30 + do + echo ${LANG} ${KEYS} ${f1} ${c1} ${f2} ${c2} + + ${TESTED_SORT} ${KEYS} +${f1}.${c1} -${f2}.${c2} ${INPUT_FILE} -o ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} +${f1}.${c1} -${f2}.${c2} crash >> ${ERRORS_FILE} + exit + fi + ${ORIG_SORT} ${KEYS} +${f1}.${c1} -${f2}.${c2} ${INPUT_FILE} -o ${OUT_DIR}/sik2 + if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.${f1}.${c1}.${f2}.${c2} 2>&1 ; then + echo ${LANG} ${KEYS} +${f1}.${c1} -${f2}.${c2} error +- >> ${ERRORS_FILE} + fi + ${TESTED_SORT} -c ${KEYS} +${f1}.${c1} -${f2}.${c2} ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} +${f1}.${c1} -${f2}.${c2} -c error +- >> ${ERRORS_FILE} + fi + rm ${OUT_DIR}/res.${f1}.${c1}.${f2}.${c2} + + ${TESTED_SORT} ${KEYS} -k${f1}.${c1},${f2}.${c2} ${INPUT_FILE} -o ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} -k${f1}.${c1},${f2}.${c2} crash >> ${ERRORS_FILE} + exit + fi + ${ORIG_SORT} ${KEYS} -k${f1}.${c1},${f2}.${c2} ${INPUT_FILE} -o ${OUT_DIR}/sik2 + if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.${f1}.${c1}.${f2}.${c2} 2>&1 ; then + echo ${LANG} ${KEYS} -k${f1}.${c1}.${f2}.${c2} error >> ${ERRORS_FILE} + fi + ${TESTED_SORT} -c ${KEYS} -k${f1}.${c1},${f2}.${c2} ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} -k${f1}.${c1},${f2}.${c2} -c error >> ${ERRORS_FILE} + fi + rm ${OUT_DIR}/res.${f1}.${c1}.${f2}.${c2} + + ${TESTED_SORT} ${KEYS} -k${f1}.${c1}b,${f2}.${c2} ${INPUT_FILE} -o ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} -k${f1}.${c1}b,${f2}.${c2} crash >> ${ERRORS_FILE} + exit + fi + ${ORIG_SORT} ${KEYS} -k${f1}.${c1}b,${f2}.${c2} ${INPUT_FILE} -o ${OUT_DIR}/sik2 + if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.${f1}.${c1}.${f2}.${c2} 2>&1 ; then + echo ${LANG} ${KEYS} -k${f1}.${c1}.b.${f2}.${c2} error >> ${ERRORS_FILE} + fi + ${TESTED_SORT} -c ${KEYS} -k${f1}.${c1}b,${f2}.${c2} ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} -k${f1}.${c1}b,${f2}.${c2} -c error >> ${ERRORS_FILE} + fi + rm ${OUT_DIR}/res.${f1}.${c1}.${f2}.${c2} + + ${TESTED_SORT} ${KEYS} -t " " -k${f1}.${c1},${f2}.${c2} ${INPUT_FILE} -o ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} -t " " -k${f1}.${c1},${f2}.${c2} crash >> ${ERRORS_FILE} + exit + fi + ${ORIG_SORT} ${KEYS} -t " " -k${f1}.${c1},${f2}.${c2} ${INPUT_FILE} -o ${OUT_DIR}/sik2 + if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.${f1}.${c1}.${f2}.${c2} 2>&1 ; then + echo ${LANG} ${KEYS} -k${f1}.${c1}.${f2}.${c2} error -t " " >> ${ERRORS_FILE} + fi + ${TESTED_SORT} -c -t " " ${KEYS} -k${f1}.${c1},${f2}.${c2} ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} -k${f1}.${c1},${f2}.${c2} -c -t " " error >> ${ERRORS_FILE} + fi + rm ${OUT_DIR}/res.${f1}.${c1}.${f2}.${c2} + + if [ ${LANG} != "ru_RU.KOI8-R" ] ; then + + # numeric sorting in ru_RU.KOI8-R incompatible because the thousands separator bug fixed, + # for better compatibility with the new GNU sort. + # (ru_RU.KOI8-R uses space as thousands separator) + + ${TESTED_SORT} ${KEYS} -t " " -k${f1}.${c1}n,${f2}.${c2} ${INPUT_FILE} -o ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} -t " " -k${f1}.${c1}n,${f2}.${c2} crash >> ${ERRORS_FILE} + exit + fi + ${ORIG_SORT} ${KEYS} -t " " -k${f1}.${c1}n,${f2}.${c2} ${INPUT_FILE} -o ${OUT_DIR}/sik2 + if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.${f1}.${c1}.${f2}.${c2} 2>&1 ; then + echo ${LANG} ${KEYS} -t " " -k${f1}.${c1}.${f2}.${c2} error n >> ${ERRORS_FILE} + fi + ${TESTED_SORT} -c -t " " ${KEYS} -k${f1}.${c1}n,${f2}.${c2} ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} -k${f1}.${c1},${f2}.${c2} -c -t " " n error >> ${ERRORS_FILE} + fi + rm ${OUT_DIR}/res.${f1}.${c1}.${f2}.${c2} + + ${TESTED_SORT} ${KEYS} -t '\0' -k${f1}.${c1}n,${f2}.${c2} ${INPUT_FILE} -o ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} -t '\0' -k${f1}.${c1}n,${f2}.${c2} crash >> ${ERRORS_FILE} + exit + fi + ${ORIG_SORT} ${KEYS} -t '\0' -k${f1}.${c1}n,${f2}.${c2} ${INPUT_FILE} -o ${OUT_DIR}/sik2 + if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.${f1}.${c1}.${f2}.${c2} 2>&1 ; then + echo ${LANG} ${KEYS} -k${f1}.${c1}.${f2}.${c2} error -t '\0' n >> ${ERRORS_FILE} + fi + ${TESTED_SORT} -c -t '\0' ${KEYS} -k${f1}.${c1}n,${f2}.${c2} ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} -k${f1}.${c1},${f2}.${c2} -c -t '\0' n error >> ${ERRORS_FILE} + fi + rm ${OUT_DIR}/res.${f1}.${c1}.${f2}.${c2} + fi + + ${TESTED_SORT} ${KEYS} -t "|" -k${f1}.${c1},${f2}.${c2} ${INPUT_FILE} -o ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} -t "|" -k${f1}.${c1},${f2}.${c2} crash >> ${ERRORS_FILE} + exit + fi + ${ORIG_SORT} ${KEYS} -t "|" -k${f1}.${c1},${f2}.${c2} ${INPUT_FILE} -o ${OUT_DIR}/sik2 + if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.${f1}.${c1}.${f2}.${c2} 2>&1 ; then + echo ${LANG} ${KEYS} -k${f1}.${c1}.${f2}.${c2} error -t "|" >> ${ERRORS_FILE} + fi + ${TESTED_SORT} -c -t "|" ${KEYS} -k${f1}.${c1},${f2}.${c2} ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} -k${f1}.${c1},${f2}.${c2} -c -t "|" error >> ${ERRORS_FILE} + fi + rm ${OUT_DIR}/res.${f1}.${c1}.${f2}.${c2} + + ${TESTED_SORT} ${KEYS} -t "|" -k${f1}.${c1},${f2}.${c2} -k${f2}.${c1},${f1}.${c2} ${INPUT_FILE} -o ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} -t "|" -k${f1}.${c1},${f2}.${c2} -k${f2}.${c1},${f1}.${c2} crash >> ${ERRORS_FILE} + exit + fi + ${ORIG_SORT} ${KEYS} -t "|" -k${f1}.${c1},${f2}.${c2} -k${f2}.${c1},${f1}.${c2} ${INPUT_FILE} -o ${OUT_DIR}/sik2 + if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.${f1}.${c1}.${f2}.${c2} 2>&1 ; then + echo ${LANG} ${KEYS} -k${f1}.${c1}.${f2}.${c2} error -t "|" 2k >> ${ERRORS_FILE} + fi + ${TESTED_SORT} -c -t "|" ${KEYS} -k${f1}.${c1},${f2}.${c2} -k${f2}.${c1},${f1}.${c2} ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} -k${f1}.${c1},${f2}.${c2} -c -t "|" 2k error >> ${ERRORS_FILE} + fi + rm ${OUT_DIR}/res.${f1}.${c1}.${f2}.${c2} + + ${TESTED_SORT} ${KEYS} -k${f1}.${c1}b,${f2}.${c2} -k${f2}.${c1},${f1}.${c2} ${INPUT_FILE} -o ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} -k${f1}.${c1}b,${f2}.${c2} -k${f2}.${c1},${f1}.${c2} crash >> ${ERRORS_FILE} + exit + fi + ${ORIG_SORT} ${KEYS} -k${f1}.${c1}b,${f2}.${c2} -k${f2}.${c1},${f1}.${c2} ${INPUT_FILE} -o ${OUT_DIR}/sik2 + if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.${f1}.${c1}.${f2}.${c2} 2>&1 ; then + echo ${LANG} ${KEYS} -k${f1}.${c1}.b.${f2}.${c2} error 2k >> ${ERRORS_FILE} + fi + ${TESTED_SORT} -c ${KEYS} -k${f1}.${c1}b,${f2}.${c2} -k${f2}.${c1},${f1}.${c2} ${OUT_DIR}/sik1 + ER=$? + if ! [ ${ER} -eq 0 ] ; then + echo ${LANG} ${KEYS} -k${f1}.${c1}b,${f2}.${c2} -c 2k error >> ${ERRORS_FILE} + fi + rm ${OUT_DIR}/res.${f1}.${c1}.${f2}.${c2} + + done + done + done + done + done +done + +if [ -f ${ERRORS_FILE} ] ; then + echo TEST FAILED +else + echo TEST SUCCEEDED +fi diff --git a/text_cmds/sort/testsuite/sample.txt b/text_cmds/sort/testsuite/sample.txt new file mode 100644 index 0000000..74d1477 Binary files /dev/null and b/text_cmds/sort/testsuite/sample.txt differ diff --git a/text_cmds/sort/vsort.c b/text_cmds/sort/vsort.c new file mode 100644 index 0000000..abc8647 --- /dev/null +++ b/text_cmds/sort/vsort.c @@ -0,0 +1,265 @@ +/*- + * Copyright (C) 2012 Oleg Moskalenko + * Copyright (C) 2012 Gabor Kovesdan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD: head/usr.bin/sort/vsort.c 281132 2015-04-06 02:35:55Z pfg $"); + +#include + +#include +#include +#include + +#include "sort.h" +#include "vsort.h" + +static inline bool +isdigit_clocale(wchar_t c) +{ + + return (c >= L'0' && c <= L'9'); +} + +static inline bool +isalpha_clocale(wchar_t c) +{ + + return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z')); +} + +static inline bool +isalnum_clocale(wchar_t c) +{ + + return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z') || + (c >= L'0' && c <= L'9')); +} + +/* + * Find string suffix of format: (\.[A-Za-z~][A-Za-z0-9~]*)*$ + * Set length of string before suffix. + */ +static void +find_suffix(bwstring_iterator si, bwstring_iterator se, size_t *len) +{ + wchar_t c; + size_t clen; + bool expect_alpha, sfx; + + sfx = false; + expect_alpha = false; + *len = 0; + clen = 0; + + while ((si < se) && (c = bws_get_iter_value(si))) { + if (expect_alpha) { + expect_alpha = false; + if (!isalpha_clocale(c) && (c != L'~')) + sfx = false; + } else if (c == L'.') { + expect_alpha = true; + if (!sfx) { + sfx = true; + *len = clen; + } + } else if (!isalnum_clocale(c) && (c != L'~')) + sfx = false; + + si = bws_iterator_inc(si, 1); + ++clen; + } + + /* This code must be here to make the implementation compatible + * with WORDING of GNU sort documentation. + * But the GNU sort implementation is not following its own + * documentation. GNU sort allows empty file extensions + * (just dot with nothing after); but the regular expression in + * their documentation does not allow empty file extensions. + * We chose to make our implementation compatible with GNU sort + * implementation. If they will ever fix their bug, this code + * must be uncommented. Or they may choose to fix the info page, + * then the code stays commented. + * + if (expect_alpha) + sfx = false; + */ + + if (!sfx) + *len = clen; +} + +static inline int +cmp_chars(wchar_t c1, wchar_t c2) +{ + + if (c1 == c2) + return (0); + + if (c1 == L'~') + return (-1); + if (c2 == L'~') + return (+1); + + if (isdigit_clocale(c1) || !c1) + return ((isdigit_clocale(c2) || !c2) ? 0 : -1); + + if (isdigit_clocale(c2) || !c2) + return (+1); + + if (isalpha_clocale(c1)) + return ((isalpha_clocale(c2)) ? ((int) c1 - (int) c2) : -1); + + if (isalpha_clocale(c2)) + return (+1); + + return ((int) c1 - (int) c2); +} + +static int +cmpversions(bwstring_iterator si1, bwstring_iterator se1, + bwstring_iterator si2, bwstring_iterator se2) +{ + int cmp, diff; + + while ((si1 < se1) || (si2 < se2)) { + diff = 0; + + while (((si1 < se1) && + !isdigit_clocale(bws_get_iter_value(si1))) || + ((si2 < se2) && !isdigit_clocale(bws_get_iter_value(si2)))) { + wchar_t c1, c2; + + c1 = (si1 < se1) ? bws_get_iter_value(si1) : 0; + c2 = (si2 < se2) ? bws_get_iter_value(si2) : 0; + + cmp = cmp_chars(c1, c2); + if (cmp) + return (cmp); + + if (si1 < se1) + si1 = bws_iterator_inc(si1, 1); + if (si2 < se2) + si2 = bws_iterator_inc(si2, 1); + } + + while (bws_get_iter_value(si1) == L'0') + si1 = bws_iterator_inc(si1, 1); + + while (bws_get_iter_value(si2) == L'0') + si2 = bws_iterator_inc(si2, 1); + + while (isdigit_clocale(bws_get_iter_value(si1)) && + isdigit_clocale(bws_get_iter_value(si2))) { + if (!diff) + diff = ((int)bws_get_iter_value(si1) - + (int)bws_get_iter_value(si2)); + si1 = bws_iterator_inc(si1, 1); + si2 = bws_iterator_inc(si2, 1); + } + + if (isdigit_clocale(bws_get_iter_value(si1))) + return (1); + + if (isdigit_clocale(bws_get_iter_value(si2))) + return (-1); + + if (diff) + return (diff); + } + + return (0); +} + +/* + * Compare two version strings + */ +int +vcmp(struct bwstring *s1, struct bwstring *s2) +{ + bwstring_iterator si1, si2; + wchar_t c1, c2; + size_t len1, len2, slen1, slen2; + int cmp_bytes, cmp_res; + + if (s1 == s2) + return (0); + + cmp_bytes = bwscmp(s1, s2, 0); + if (cmp_bytes == 0) + return (0); + + len1 = slen1 = BWSLEN(s1); + len2 = slen2 = BWSLEN(s2); + + if (slen1 < 1) + return (-1); + if (slen2 < 1) + return (+1); + + si1 = bws_begin(s1); + si2 = bws_begin(s2); + + c1 = bws_get_iter_value(si1); + c2 = bws_get_iter_value(si2); + + if (c1 == L'.' && (slen1 == 1)) + return (-1); + + if (c2 == L'.' && (slen2 == 1)) + return (+1); + + if (slen1 == 2 && c1 == L'.' && + bws_get_iter_value(bws_iterator_inc(si1, 1)) == L'.') + return (-1); + if (slen2 == 2 && c2 == L'.' && + bws_get_iter_value(bws_iterator_inc(si2, 1)) == L'.') + return (+1); + + if (c1 == L'.' && c2 != L'.') + return (-1); + if (c1 != L'.' && c2 == L'.') + return (+1); + + if (c1 == L'.' && c2 == L'.') { + si1 = bws_iterator_inc(si1, 1); + si2 = bws_iterator_inc(si2, 1); + } + + find_suffix(si1, bws_end(s1), &len1); + find_suffix(si2, bws_end(s2), &len2); + + if ((len1 == len2) && (bws_iterator_cmp(si1, si2, len1) == 0)) + return (cmp_bytes); + + cmp_res = cmpversions(si1, bws_iterator_inc(si1, len1), si2, + bws_iterator_inc(si2, len2)); + + if (cmp_res == 0) + cmp_res = cmp_bytes; + + return (cmp_res); +} diff --git a/text_cmds/sort/vsort.h b/text_cmds/sort/vsort.h new file mode 100644 index 0000000..5ef45d5 --- /dev/null +++ b/text_cmds/sort/vsort.h @@ -0,0 +1,37 @@ +/* $FreeBSD: head/usr.bin/sort/vsort.h 264744 2014-04-21 22:52:18Z pfg $ */ + +/*- + * Copyright (C) 2012 Oleg Moskalenko + * Copyright (C) 2012 Gabor Kovesdan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _VSORT_H_ +#define _VSORT_H_ + +#include "bwstring.h" + +int vcmp(struct bwstring *s1, struct bwstring *s2); + +#endif -- cgit v1.2.3-56-ge451