summaryrefslogtreecommitdiffstats
path: root/text_cmds/sort
diff options
context:
space:
mode:
Diffstat (limited to 'text_cmds/sort')
-rw-r--r--text_cmds/sort/bwstring.c1142
-rw-r--r--text_cmds/sort/bwstring.h142
-rw-r--r--text_cmds/sort/coll.c1324
-rw-r--r--text_cmds/sort/coll.h168
-rw-r--r--text_cmds/sort/commoncrypto.c98
-rw-r--r--text_cmds/sort/commoncrypto.h8
-rw-r--r--text_cmds/sort/file.c1684
-rw-r--r--text_cmds/sort/file.h126
-rw-r--r--text_cmds/sort/mem.c81
-rw-r--r--text_cmds/sort/mem.h45
-rw-r--r--text_cmds/sort/nls/C.msg16
-rw-r--r--text_cmds/sort/nls/hu_HU.ISO8859-2.msg16
-rw-r--r--text_cmds/sort/radixsort.c746
-rw-r--r--text_cmds/sort/radixsort.h38
-rw-r--r--text_cmds/sort/sort.1.in639
-rw-r--r--text_cmds/sort/sort.c1325
-rw-r--r--text_cmds/sort/sort.h139
-rw-r--r--text_cmds/sort/testsuite/README.txt19
-rw-r--r--text_cmds/sort/testsuite/bigsample.txt.xzbin0 -> 50904 bytes
-rwxr-xr-xtext_cmds/sort/testsuite/run.sh436
-rw-r--r--text_cmds/sort/testsuite/sample.txtbin0 -> 1672 bytes
-rw-r--r--text_cmds/sort/vsort.c265
-rw-r--r--text_cmds/sort/vsort.h37
23 files changed, 8494 insertions, 0 deletions
diff --git a/text_cmds/sort/bwstring.c b/text_cmds/sort/bwstring.c
new file mode 100644
index 0000000..3b7b233
--- /dev/null
+++ b/text_cmds/sort/bwstring.c
@@ -0,0 +1,1142 @@
+/*-
+ * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
+ * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <ctype.h>
+#include <errno.h>
+#include <err.h>
+#include <langinfo.h>
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <wchar.h>
+#include <wctype.h>
+
+#include "bwstring.h"
+#include "sort.h"
+
+bool byte_sort;
+
+static wchar_t **wmonths;
+static unsigned char **cmonths;
+
+/* initialise months */
+
+void
+initialise_months(void)
+{
+ const nl_item item[12] = { ABMON_1, ABMON_2, ABMON_3, ABMON_4,
+ ABMON_5, ABMON_6, ABMON_7, ABMON_8, ABMON_9, ABMON_10,
+ ABMON_11, ABMON_12 };
+ unsigned char *tmp;
+ size_t len;
+
+ if (MB_CUR_MAX == 1) {
+ if (cmonths == NULL) {
+ unsigned char *m;
+
+ cmonths = sort_malloc(sizeof(unsigned char*) * 12);
+ for (int i = 0; i < 12; i++) {
+ cmonths[i] = NULL;
+ tmp = (unsigned char *) nl_langinfo(item[i]);
+ if (debug_sort)
+ printf("month[%d]=%s\n", i, tmp);
+ if (*tmp == '\0')
+ continue;
+ m = sort_strdup(tmp);
+ len = strlen(tmp);
+ for (unsigned int j = 0; j < len; j++)
+ m[j] = toupper(m[j]);
+ cmonths[i] = m;
+ }
+ }
+
+ } else {
+ if (wmonths == NULL) {
+ wchar_t *m;
+
+ wmonths = sort_malloc(sizeof(wchar_t *) * 12);
+ for (int i = 0; i < 12; i++) {
+ wmonths[i] = NULL;
+ tmp = (unsigned char *) nl_langinfo(item[i]);
+ if (debug_sort)
+ printf("month[%d]=%s\n", i, tmp);
+ if (*tmp == '\0')
+ continue;
+ len = strlen(tmp);
+ m = sort_malloc(SIZEOF_WCHAR_STRING(len + 1));
+ if (mbstowcs(m, (char*)tmp, len) ==
+ ((size_t) - 1)) {
+ sort_free(m);
+ continue;
+ }
+ m[len] = L'\0';
+ for (unsigned int j = 0; j < len; j++)
+ m[j] = towupper(m[j]);
+ wmonths[i] = m;
+ }
+ }
+ }
+}
+
+/*
+ * Compare two wide-character strings
+ */
+static int
+wide_str_coll(const wchar_t *s1, const wchar_t *s2)
+{
+ int ret = 0;
+
+ errno = 0;
+ ret = wcscoll(s1, s2);
+ if (errno == EILSEQ) {
+ errno = 0;
+ ret = wcscmp(s1, s2);
+ if (errno != 0) {
+ for (size_t i = 0; ; ++i) {
+ wchar_t c1 = s1[i];
+ wchar_t c2 = s2[i];
+ if (c1 == L'\0')
+ return ((c2 == L'\0') ? 0 : -1);
+ if (c2 == L'\0')
+ return (+1);
+ if (c1 == c2)
+ continue;
+ return ((int)(c1 - c2));
+ }
+ }
+ }
+ return (ret);
+}
+
+/* counterparts of wcs functions */
+
+void
+bwsprintf(FILE *f, struct bwstring *bws, const char *prefix, const char *suffix)
+{
+
+ if (MB_CUR_MAX == 1)
+ fprintf(f, "%s%s%s", prefix, bws->data.cstr, suffix);
+ else
+ fprintf(f, "%s%S%s", prefix, bws->data.wstr, suffix);
+}
+
+const void* bwsrawdata(const struct bwstring *bws)
+{
+
+ return (&(bws->data));
+}
+
+size_t bwsrawlen(const struct bwstring *bws)
+{
+
+ return ((MB_CUR_MAX == 1) ? bws->len : SIZEOF_WCHAR_STRING(bws->len));
+}
+
+size_t
+bws_memsize(const struct bwstring *bws)
+{
+
+ return ((MB_CUR_MAX == 1) ? (bws->len + 2 + sizeof(struct bwstring)) :
+ (SIZEOF_WCHAR_STRING(bws->len + 1) + sizeof(struct bwstring)));
+}
+
+void
+bws_setlen(struct bwstring *bws, size_t newlen)
+{
+
+ if (bws && newlen != bws->len && newlen <= bws->len) {
+ bws->len = newlen;
+ if (MB_CUR_MAX == 1)
+ bws->data.cstr[newlen] = '\0';
+ else
+ bws->data.wstr[newlen] = L'\0';
+ }
+}
+
+/*
+ * Allocate a new binary string of specified size
+ */
+struct bwstring *
+bwsalloc(size_t sz)
+{
+ struct bwstring *ret;
+
+ if (MB_CUR_MAX == 1)
+ ret = sort_malloc(sizeof(struct bwstring) + 1 + sz);
+ else
+ ret = sort_malloc(sizeof(struct bwstring) +
+ SIZEOF_WCHAR_STRING(sz + 1));
+ ret->len = sz;
+
+ if (MB_CUR_MAX == 1)
+ ret->data.cstr[ret->len] = '\0';
+ else
+ ret->data.wstr[ret->len] = L'\0';
+
+ return (ret);
+}
+
+/*
+ * Create a copy of binary string.
+ * New string size equals the length of the old string.
+ */
+struct bwstring *
+bwsdup(const struct bwstring *s)
+{
+
+ if (s == NULL)
+ return (NULL);
+ else {
+ struct bwstring *ret = bwsalloc(s->len);
+
+ if (MB_CUR_MAX == 1)
+ memcpy(ret->data.cstr, s->data.cstr, (s->len));
+ else
+ memcpy(ret->data.wstr, s->data.wstr,
+ SIZEOF_WCHAR_STRING(s->len));
+
+ return (ret);
+ }
+}
+
+/*
+ * Create a new binary string from a wide character buffer.
+ */
+struct bwstring *
+bwssbdup(const wchar_t *str, size_t len)
+{
+
+ if (str == NULL)
+ return ((len == 0) ? bwsalloc(0) : NULL);
+ else {
+ struct bwstring *ret;
+
+ ret = bwsalloc(len);
+
+ if (MB_CUR_MAX == 1)
+ for (size_t i = 0; i < len; ++i)
+ ret->data.cstr[i] = (unsigned char) str[i];
+ else
+ memcpy(ret->data.wstr, str, SIZEOF_WCHAR_STRING(len));
+
+ return (ret);
+ }
+}
+
+/*
+ * Create a new binary string from a raw binary buffer.
+ */
+struct bwstring *
+bwscsbdup(const unsigned char *str, size_t len)
+{
+ struct bwstring *ret;
+
+ ret = bwsalloc(len);
+
+ if (str) {
+ if (MB_CUR_MAX == 1)
+ memcpy(ret->data.cstr, str, len);
+ else {
+ mbstate_t mbs;
+ const char *s;
+ size_t charlen, chars, cptr;
+
+ chars = 0;
+ cptr = 0;
+ s = (const char *) str;
+
+ memset(&mbs, 0, sizeof(mbs));
+
+ while (cptr < len) {
+ size_t n = MB_CUR_MAX;
+
+ if (n > len - cptr)
+ n = len - cptr;
+ charlen = mbrlen(s + cptr, n, &mbs);
+ switch (charlen) {
+ case 0:
+ /* FALLTHROUGH */
+ case (size_t) -1:
+ /* FALLTHROUGH */
+ case (size_t) -2:
+ ret->data.wstr[chars++] =
+ (unsigned char) s[cptr];
+ ++cptr;
+ break;
+ default:
+ n = mbrtowc(ret->data.wstr + (chars++),
+ s + cptr, charlen, &mbs);
+ if ((n == (size_t)-1) || (n == (size_t)-2))
+ /* NOTREACHED */
+ err(2, "mbrtowc error");
+ cptr += charlen;
+ }
+ }
+
+ ret->len = chars;
+ ret->data.wstr[ret->len] = L'\0';
+ }
+ }
+ return (ret);
+}
+
+/*
+ * De-allocate object memory
+ */
+void
+bwsfree(const struct bwstring *s)
+{
+
+ if (s)
+ sort_free(s);
+}
+
+/*
+ * Copy content of src binary string to dst.
+ * If the capacity of the dst string is not sufficient,
+ * then the data is truncated.
+ */
+size_t
+bwscpy(struct bwstring *dst, const struct bwstring *src)
+{
+ size_t nums = src->len;
+
+ if (nums > dst->len)
+ nums = dst->len;
+ dst->len = nums;
+
+ if (MB_CUR_MAX == 1) {
+ memcpy(dst->data.cstr, src->data.cstr, nums);
+ dst->data.cstr[dst->len] = '\0';
+ } else {
+ memcpy(dst->data.wstr, src->data.wstr,
+ SIZEOF_WCHAR_STRING(nums + 1));
+ dst->data.wstr[dst->len] = L'\0';
+ }
+
+ return (nums);
+}
+
+/*
+ * Copy content of src binary string to dst,
+ * with specified number of symbols to be copied.
+ * If the capacity of the dst string is not sufficient,
+ * then the data is truncated.
+ */
+struct bwstring *
+bwsncpy(struct bwstring *dst, const struct bwstring *src, size_t size)
+{
+ size_t nums = src->len;
+
+ if (nums > dst->len)
+ nums = dst->len;
+ if (nums > size)
+ nums = size;
+ dst->len = nums;
+
+ if (MB_CUR_MAX == 1) {
+ memcpy(dst->data.cstr, src->data.cstr, nums);
+ dst->data.cstr[dst->len] = '\0';
+ } else {
+ memcpy(dst->data.wstr, src->data.wstr,
+ SIZEOF_WCHAR_STRING(nums + 1));
+ dst->data.wstr[dst->len] = L'\0';
+ }
+
+ return (dst);
+}
+
+/*
+ * Copy content of src binary string to dst,
+ * with specified number of symbols to be copied.
+ * An offset value can be specified, from the start of src string.
+ * If the capacity of the dst string is not sufficient,
+ * then the data is truncated.
+ */
+struct bwstring *
+bwsnocpy(struct bwstring *dst, const struct bwstring *src, size_t offset,
+ size_t size)
+{
+
+ if (offset >= src->len) {
+ dst->data.wstr[0] = 0;
+ dst->len = 0;
+ } else {
+ size_t nums = src->len - offset;
+
+ if (nums > dst->len)
+ nums = dst->len;
+ if (nums > size)
+ nums = size;
+ dst->len = nums;
+ if (MB_CUR_MAX == 1) {
+ memcpy(dst->data.cstr, src->data.cstr + offset,
+ (nums));
+ dst->data.cstr[dst->len] = '\0';
+ } else {
+ memcpy(dst->data.wstr, src->data.wstr + offset,
+ SIZEOF_WCHAR_STRING(nums));
+ dst->data.wstr[dst->len] = L'\0';
+ }
+ }
+ return (dst);
+}
+
+/*
+ * Write binary string to the file.
+ * The output is ended either with '\n' (nl == true)
+ * or '\0' (nl == false).
+ */
+size_t
+bwsfwrite(struct bwstring *bws, FILE *f, bool zero_ended)
+{
+
+ if (MB_CUR_MAX == 1) {
+ size_t len = bws->len;
+
+ if (!zero_ended) {
+ bws->data.cstr[len] = '\n';
+
+ if (fwrite(bws->data.cstr, len + 1, 1, f) < 1)
+ err(2, NULL);
+
+ bws->data.cstr[len] = '\0';
+ } else if (fwrite(bws->data.cstr, len + 1, 1, f) < 1)
+ err(2, NULL);
+
+ return (len + 1);
+
+ } else {
+ wchar_t eols;
+ size_t printed = 0;
+
+ eols = zero_ended ? btowc('\0') : btowc('\n');
+
+ while (printed < BWSLEN(bws)) {
+ const wchar_t *s = bws->data.wstr + printed;
+
+ if (*s == L'\0') {
+ int nums;
+
+ nums = fwprintf(f, L"%lc", *s);
+
+ if (nums != 1)
+ err(2, NULL);
+ ++printed;
+ } else {
+ int nums;
+
+ nums = fwprintf(f, L"%ls", s);
+
+ if (nums < 1)
+ err(2, NULL);
+ printed += nums;
+ }
+ }
+ fwprintf(f, L"%lc", eols);
+ return (printed + 1);
+ }
+}
+
+/*
+ * Allocate and read a binary string from file.
+ * The strings are nl-ended or zero-ended, depending on the sort setting.
+ */
+struct bwstring *
+bwsfgetln(FILE *f, size_t *len, bool zero_ended, struct reader_buffer *rb)
+{
+ wint_t eols;
+
+ eols = zero_ended ? btowc('\0') : btowc('\n');
+
+ if (!zero_ended && (MB_CUR_MAX > 1)) {
+ wchar_t *ret;
+
+ ret = fgetwln(f, len);
+
+ if (ret == NULL) {
+ if (!feof(f))
+ err(2, NULL);
+ return (NULL);
+ }
+ if (*len > 0) {
+ if (ret[*len - 1] == (wchar_t)eols)
+ --(*len);
+ }
+ return (bwssbdup(ret, *len));
+
+ } else if (!zero_ended && (MB_CUR_MAX == 1)) {
+ char *ret;
+
+ ret = fgetln(f, len);
+
+ if (ret == NULL) {
+ if (!feof(f))
+ err(2, NULL);
+ return (NULL);
+ }
+ if (*len > 0) {
+ if (ret[*len - 1] == '\n')
+ --(*len);
+ }
+ return (bwscsbdup((unsigned char*)ret, *len));
+
+ } else {
+ *len = 0;
+
+ if (feof(f))
+ return (NULL);
+
+ if (2 >= rb->fgetwln_z_buffer_size) {
+ rb->fgetwln_z_buffer_size += 256;
+ rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer,
+ sizeof(wchar_t) * rb->fgetwln_z_buffer_size);
+ }
+ rb->fgetwln_z_buffer[*len] = 0;
+
+ if (MB_CUR_MAX == 1)
+ while (!feof(f)) {
+ int c;
+
+ c = fgetc(f);
+
+ if (c == EOF) {
+ if (*len == 0)
+ return (NULL);
+ goto line_read_done;
+ }
+ if (c == eols)
+ goto line_read_done;
+
+ if (*len + 1 >= rb->fgetwln_z_buffer_size) {
+ rb->fgetwln_z_buffer_size += 256;
+ rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer,
+ SIZEOF_WCHAR_STRING(rb->fgetwln_z_buffer_size));
+ }
+
+ rb->fgetwln_z_buffer[*len] = c;
+ rb->fgetwln_z_buffer[++(*len)] = 0;
+ }
+ else
+ while (!feof(f)) {
+ wint_t c = 0;
+
+ c = fgetwc(f);
+
+ if (c == WEOF) {
+ if (*len == 0)
+ return (NULL);
+ goto line_read_done;
+ }
+ if (c == eols)
+ goto line_read_done;
+
+ if (*len + 1 >= rb->fgetwln_z_buffer_size) {
+ rb->fgetwln_z_buffer_size += 256;
+ rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer,
+ SIZEOF_WCHAR_STRING(rb->fgetwln_z_buffer_size));
+ }
+
+ rb->fgetwln_z_buffer[*len] = c;
+ rb->fgetwln_z_buffer[++(*len)] = 0;
+ }
+
+line_read_done:
+ /* we do not count the last 0 */
+ return (bwssbdup(rb->fgetwln_z_buffer, *len));
+ }
+}
+
+int
+bwsncmp(const struct bwstring *bws1, const struct bwstring *bws2,
+ size_t offset, size_t len)
+{
+ size_t cmp_len, len1, len2;
+ int res = 0;
+
+ len1 = bws1->len;
+ len2 = bws2->len;
+
+ if (len1 <= offset) {
+ return ((len2 <= offset) ? 0 : -1);
+ } else {
+ if (len2 <= offset)
+ return (+1);
+ else {
+ len1 -= offset;
+ len2 -= offset;
+
+ cmp_len = len1;
+
+ if (len2 < cmp_len)
+ cmp_len = len2;
+
+ if (len < cmp_len)
+ cmp_len = len;
+
+ if (MB_CUR_MAX == 1) {
+ const unsigned char *s1, *s2;
+
+ s1 = bws1->data.cstr + offset;
+ s2 = bws2->data.cstr + offset;
+
+ res = memcmp(s1, s2, cmp_len);
+
+ } else {
+ const wchar_t *s1, *s2;
+
+ s1 = bws1->data.wstr + offset;
+ s2 = bws2->data.wstr + offset;
+
+ res = memcmp(s1, s2, SIZEOF_WCHAR_STRING(cmp_len));
+ }
+ }
+ }
+
+ if (res == 0) {
+ if (len1 < cmp_len && len1 < len2)
+ res = -1;
+ else if (len2 < cmp_len && len2 < len1)
+ res = +1;
+ }
+
+ return (res);
+}
+
+int
+bwscmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
+{
+ size_t len1, len2, cmp_len;
+ int res;
+
+ len1 = bws1->len;
+ len2 = bws2->len;
+
+ len1 -= offset;
+ len2 -= offset;
+
+ cmp_len = len1;
+
+ if (len2 < cmp_len)
+ cmp_len = len2;
+
+ res = bwsncmp(bws1, bws2, offset, cmp_len);
+
+ if (res == 0) {
+ if( len1 < len2)
+ res = -1;
+ else if (len2 < len1)
+ res = +1;
+ }
+
+ return (res);
+}
+
+int
+bws_iterator_cmp(bwstring_iterator iter1, bwstring_iterator iter2, size_t len)
+{
+ wchar_t c1, c2;
+ size_t i = 0;
+
+ for (i = 0; i < len; ++i) {
+ c1 = bws_get_iter_value(iter1);
+ c2 = bws_get_iter_value(iter2);
+ if (c1 != c2)
+ return (c1 - c2);
+ iter1 = bws_iterator_inc(iter1, 1);
+ iter2 = bws_iterator_inc(iter2, 1);
+ }
+
+ return (0);
+}
+
+int
+bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
+{
+ size_t len1, len2;
+
+ len1 = bws1->len;
+ len2 = bws2->len;
+
+ if (len1 <= offset)
+ return ((len2 <= offset) ? 0 : -1);
+ else {
+ if (len2 <= offset)
+ return (+1);
+ else {
+ len1 -= offset;
+ len2 -= offset;
+
+ if (MB_CUR_MAX == 1) {
+ const unsigned char *s1, *s2;
+
+ s1 = bws1->data.cstr + offset;
+ s2 = bws2->data.cstr + offset;
+
+ if (byte_sort) {
+ int res = 0;
+
+ if (len1 > len2) {
+ res = memcmp(s1, s2, len2);
+ if (!res)
+ res = +1;
+ } else if (len1 < len2) {
+ res = memcmp(s1, s2, len1);
+ if (!res)
+ res = -1;
+ } else
+ res = memcmp(s1, s2, len1);
+
+ return (res);
+
+ } else {
+ int res = 0;
+ size_t i, maxlen;
+
+ i = 0;
+ maxlen = len1;
+
+ if (maxlen > len2)
+ maxlen = len2;
+
+ while (i < maxlen) {
+ /* goto next non-zero part: */
+ while ((i < maxlen) &&
+ !s1[i] && !s2[i])
+ ++i;
+
+ if (i >= maxlen)
+ break;
+
+ if (s1[i] == 0) {
+ if (s2[i] == 0)
+ /* NOTREACHED */
+ err(2, "bwscoll error 01");
+ else
+ return (-1);
+ } else if (s2[i] == 0)
+ return (+1);
+
+ res = strcoll((const char*)(s1 + i), (const char*)(s2 + i));
+ if (res)
+ return (res);
+
+ while ((i < maxlen) &&
+ s1[i] && s2[i])
+ ++i;
+
+ if (i >= maxlen)
+ break;
+
+ if (s1[i] == 0) {
+ if (s2[i] == 0) {
+ ++i;
+ continue;
+ } else
+ return (-1);
+ } else if (s2[i] == 0)
+ return (+1);
+ else
+ /* NOTREACHED */
+ err(2, "bwscoll error 02");
+ }
+
+ if (len1 < len2)
+ return (-1);
+ else if (len1 > len2)
+ return (+1);
+
+ return (0);
+ }
+ } else {
+ const wchar_t *s1, *s2;
+ size_t i, maxlen;
+ int res = 0;
+
+ s1 = bws1->data.wstr + offset;
+ s2 = bws2->data.wstr + offset;
+
+ i = 0;
+ maxlen = len1;
+
+ if (maxlen > len2)
+ maxlen = len2;
+
+ while (i < maxlen) {
+
+ /* goto next non-zero part: */
+ while ((i < maxlen) &&
+ !s1[i] && !s2[i])
+ ++i;
+
+ if (i >= maxlen)
+ break;
+
+ if (s1[i] == 0) {
+ if (s2[i] == 0)
+ /* NOTREACHED */
+ err(2, "bwscoll error 1");
+ else
+ return (-1);
+ } else if (s2[i] == 0)
+ return (+1);
+
+ res = wide_str_coll(s1 + i, s2 + i);
+ if (res)
+ return (res);
+
+ while ((i < maxlen) && s1[i] && s2[i])
+ ++i;
+
+ if (i >= maxlen)
+ break;
+
+ if (s1[i] == 0) {
+ if (s2[i] == 0) {
+ ++i;
+ continue;
+ } else
+ return (-1);
+ } else if (s2[i] == 0)
+ return (+1);
+ else
+ /* NOTREACHED */
+ err(2, "bwscoll error 2");
+ }
+
+ if (len1 < len2)
+ return (-1);
+ else if (len1 > len2)
+ return (+1);
+
+ return (0);
+ }
+ }
+ }
+}
+
+/*
+ * Correction of the system API
+ */
+double
+bwstod(struct bwstring *s0, bool *empty)
+{
+ double ret = 0;
+
+ if (MB_CUR_MAX == 1) {
+ unsigned char *end, *s;
+ char *ep;
+
+ s = s0->data.cstr;
+ end = s + s0->len;
+ ep = NULL;
+
+ while (isblank_f(*s) && s < end)
+ ++s;
+
+ if (!isprint(*s)) {
+ *empty = true;
+ return (0);
+ }
+
+ ret = strtod((char*)s, &ep);
+ if ((unsigned char*) ep == s) {
+ *empty = true;
+ return (0);
+ }
+ } else {
+ wchar_t *end, *ep, *s;
+
+ s = s0->data.wstr;
+ end = s + s0->len;
+ ep = NULL;
+
+ while (iswblank_f(*s) && s < end)
+ ++s;
+
+ if (!iswprint(*s)) {
+ *empty = true;
+ return (0);
+ }
+
+ ret = wcstod(s, &ep);
+ if (ep == s) {
+ *empty = true;
+ return (0);
+ }
+ }
+
+ *empty = false;
+ return (ret);
+}
+
+/*
+ * A helper function for monthcoll. If a line matches
+ * a month name, it returns (number of the month - 1),
+ * while if there is no match, it just return -1.
+ */
+
+int
+bws_month_score(const struct bwstring *s0)
+{
+
+ if (MB_CUR_MAX == 1) {
+ const unsigned char *end, *s;
+
+ s = s0->data.cstr;
+ end = s + s0->len;
+
+ while (isblank_f(*s) && s < end)
+ ++s;
+
+ for (int i = 11; i >= 0; --i) {
+ if (cmonths[i] &&
+ (s == (unsigned char*)strstr((const char*)s, (char*)(cmonths[i]))))
+ return (i);
+ }
+
+ } else {
+ const wchar_t *end, *s;
+
+ s = s0->data.wstr;
+ end = s + s0->len;
+
+ while (iswblank_f(*s) && s < end)
+ ++s;
+
+ for (int i = 11; i >= 0; --i) {
+ if (wmonths[i] && (s == wcsstr(s, wmonths[i])))
+ return (i);
+ }
+ }
+
+ return (-1);
+}
+
+/*
+ * Rips out leading blanks (-b).
+ */
+struct bwstring *
+ignore_leading_blanks(struct bwstring *str)
+{
+
+ if (MB_CUR_MAX == 1) {
+ unsigned char *dst, *end, *src;
+
+ src = str->data.cstr;
+ dst = src;
+ end = src + str->len;
+
+ while (src < end && isblank_f(*src))
+ ++src;
+
+ if (src != dst) {
+ size_t newlen;
+
+ newlen = BWSLEN(str) - (src - dst);
+
+ while (src < end) {
+ *dst = *src;
+ ++dst;
+ ++src;
+ }
+ bws_setlen(str, newlen);
+ }
+ } else {
+ wchar_t *dst, *end, *src;
+
+ src = str->data.wstr;
+ dst = src;
+ end = src + str->len;
+
+ while (src < end && iswblank_f(*src))
+ ++src;
+
+ if (src != dst) {
+
+ size_t newlen = BWSLEN(str) - (src - dst);
+
+ while (src < end) {
+ *dst = *src;
+ ++dst;
+ ++src;
+ }
+ bws_setlen(str, newlen);
+
+ }
+ }
+ return (str);
+}
+
+/*
+ * Rips out nonprinting characters (-i).
+ */
+struct bwstring *
+ignore_nonprinting(struct bwstring *str)
+{
+ size_t newlen = str->len;
+
+ if (MB_CUR_MAX == 1) {
+ unsigned char *dst, *end, *src;
+ unsigned char c;
+
+ src = str->data.cstr;
+ dst = src;
+ end = src + str->len;
+
+ while (src < end) {
+ c = *src;
+ if (isprint(c)) {
+ *dst = c;
+ ++dst;
+ ++src;
+ } else {
+ ++src;
+ --newlen;
+ }
+ }
+ } else {
+ wchar_t *dst, *end, *src;
+ wchar_t c;
+
+ src = str->data.wstr;
+ dst = src;
+ end = src + str->len;
+
+ while (src < end) {
+ c = *src;
+ if (iswprint(c)) {
+ *dst = c;
+ ++dst;
+ ++src;
+ } else {
+ ++src;
+ --newlen;
+ }
+ }
+ }
+ bws_setlen(str, newlen);
+
+ return (str);
+}
+
+/*
+ * Rips out any characters that are not alphanumeric characters
+ * nor blanks (-d).
+ */
+struct bwstring *
+dictionary_order(struct bwstring *str)
+{
+ size_t newlen = str->len;
+
+ if (MB_CUR_MAX == 1) {
+ unsigned char *dst, *end, *src;
+ unsigned char c;
+
+ src = str->data.cstr;
+ dst = src;
+ end = src + str->len;
+
+ while (src < end) {
+ c = *src;
+ if (isalnum(c) || isblank_f(c)) {
+ *dst = c;
+ ++dst;
+ ++src;
+ } else {
+ ++src;
+ --newlen;
+ }
+ }
+ } else {
+ wchar_t *dst, *end, *src;
+ wchar_t c;
+
+ src = str->data.wstr;
+ dst = src;
+ end = src + str->len;
+
+ while (src < end) {
+ c = *src;
+ if (iswalnum(c) || iswblank_f(c)) {
+ *dst = c;
+ ++dst;
+ ++src;
+ } else {
+ ++src;
+ --newlen;
+ }
+ }
+ }
+ bws_setlen(str, newlen);
+
+ return (str);
+}
+
+/*
+ * Converts string to lower case(-f).
+ */
+struct bwstring *
+ignore_case(struct bwstring *str)
+{
+
+ if (MB_CUR_MAX == 1) {
+ unsigned char *end, *s;
+
+ s = str->data.cstr;
+ end = s + str->len;
+
+ while (s < end) {
+ *s = toupper(*s);
+ ++s;
+ }
+ } else {
+ wchar_t *end, *s;
+
+ s = str->data.wstr;
+ end = s + str->len;
+
+ while (s < end) {
+ *s = towupper(*s);
+ ++s;
+ }
+ }
+ return (str);
+}
+
+void
+bws_disorder_warnx(struct bwstring *s, const char *fn, size_t pos)
+{
+
+ if (MB_CUR_MAX == 1)
+ warnx("%s:%zu: disorder: %s", fn, pos + 1, s->data.cstr);
+ else
+ warnx("%s:%zu: disorder: %ls", fn, pos + 1, s->data.wstr);
+}
diff --git a/text_cmds/sort/bwstring.h b/text_cmds/sort/bwstring.h
new file mode 100644
index 0000000..857de13
--- /dev/null
+++ b/text_cmds/sort/bwstring.h
@@ -0,0 +1,142 @@
+/* $FreeBSD: head/usr.bin/sort/bwstring.h 264744 2014-04-21 22:52:18Z pfg $ */
+
+/*-
+ * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
+ * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if !defined(__BWSTRING_H__)
+#define __BWSTRING_H__
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <errno.h>
+#include <sysexits.h>
+#include <wchar.h>
+
+#include "mem.h"
+
+extern bool byte_sort;
+
+/* wchar_t is of 4 bytes: */
+#define SIZEOF_WCHAR_STRING(LEN) ((LEN)*sizeof(wchar_t))
+
+/*
+ * Binary "wide" string
+ */
+struct bwstring
+{
+ size_t len;
+ union
+ {
+ wchar_t wstr[0];
+ unsigned char cstr[0];
+ } data;
+};
+
+struct reader_buffer
+{
+ wchar_t *fgetwln_z_buffer;
+ size_t fgetwln_z_buffer_size;
+};
+
+typedef void *bwstring_iterator;
+
+#define BWSLEN(s) ((s)->len)
+
+struct bwstring *bwsalloc(size_t sz);
+
+size_t bwsrawlen(const struct bwstring *bws);
+const void* bwsrawdata(const struct bwstring *bws);
+void bws_setlen(struct bwstring *bws, size_t newlen);
+size_t bws_memsize(const struct bwstring *bws);
+double bwstod(struct bwstring *s0, bool *empty);
+int bws_month_score(const struct bwstring *s0);
+
+struct bwstring *ignore_leading_blanks(struct bwstring *str);
+struct bwstring *ignore_nonprinting(struct bwstring *str);
+struct bwstring *dictionary_order(struct bwstring *str);
+struct bwstring *ignore_case(struct bwstring *str);
+
+void bwsprintf(FILE*, struct bwstring*, const char *prefix, const char *suffix);
+void bws_disorder_warnx(struct bwstring *s, const char *fn, size_t pos);
+
+struct bwstring *bwsdup(const struct bwstring *s);
+struct bwstring *bwssbdup(const wchar_t *str, size_t size);
+struct bwstring *bwscsbdup(const unsigned char *str, size_t size);
+void bwsfree(const struct bwstring *s);
+size_t bwscpy(struct bwstring *dst, const struct bwstring *src);
+struct bwstring *bwsncpy(struct bwstring *dst, const struct bwstring *src, size_t size);
+struct bwstring *bwsnocpy(struct bwstring *dst, const struct bwstring *src, size_t offset, size_t size);
+int bwscmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset);
+int bwsncmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset, size_t len);
+int bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset);
+size_t bwsfwrite(struct bwstring *bws, FILE *f, bool zero_ended);
+struct bwstring *bwsfgetln(FILE *file, size_t *len, bool zero_ended, struct reader_buffer *rb);
+
+static inline bwstring_iterator
+bws_begin(struct bwstring *bws)
+{
+
+ return (bwstring_iterator) (&(bws->data));
+}
+
+static inline bwstring_iterator
+bws_end(struct bwstring *bws)
+{
+
+ return ((MB_CUR_MAX == 1) ?
+ (bwstring_iterator) (bws->data.cstr + bws->len) :
+ (bwstring_iterator) (bws->data.wstr + bws->len));
+}
+
+static inline bwstring_iterator
+bws_iterator_inc(bwstring_iterator iter, size_t pos)
+{
+
+ if (MB_CUR_MAX == 1)
+ return ((unsigned char *) iter) + pos;
+ else
+ return ((wchar_t*) iter) + pos;
+}
+
+static inline wchar_t
+bws_get_iter_value(bwstring_iterator iter)
+{
+
+ if (MB_CUR_MAX == 1)
+ return *((unsigned char *) iter);
+ else
+ return *((wchar_t*) iter);
+}
+
+int
+bws_iterator_cmp(bwstring_iterator iter1, bwstring_iterator iter2, size_t len);
+
+#define BWS_GET(bws, pos) ((MB_CUR_MAX == 1) ? ((bws)->data.cstr[(pos)]) : (bws)->data.wstr[(pos)])
+
+void initialise_months(void);
+
+#endif /* __BWSTRING_H__ */
diff --git a/text_cmds/sort/coll.c b/text_cmds/sort/coll.c
new file mode 100644
index 0000000..f87ae0c
--- /dev/null
+++ b/text_cmds/sort/coll.c
@@ -0,0 +1,1324 @@
+/*-
+ * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
+ * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+
+#include <errno.h>
+#include <err.h>
+#include <langinfo.h>
+#include <limits.h>
+#include <math.h>
+#ifndef __APPLE__
+#include <md5.h>
+#endif
+#include <stdlib.h>
+#include <string.h>
+#include <wchar.h>
+#include <wctype.h>
+
+#include "coll.h"
+#include "vsort.h"
+
+struct key_specs *keys;
+size_t keys_num = 0;
+
+wint_t symbol_decimal_point = L'.';
+/* there is no default thousands separator in collate rules: */
+wint_t symbol_thousands_sep = 0;
+wint_t symbol_negative_sign = L'-';
+wint_t symbol_positive_sign = L'+';
+
+static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
+static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
+static int monthcoll(struct key_value*, struct key_value *, size_t offset);
+static int numcoll(struct key_value*, struct key_value *, size_t offset);
+static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
+static int randomcoll(struct key_value*, struct key_value *, size_t offset);
+static int versioncoll(struct key_value*, struct key_value *, size_t offset);
+
+/*
+ * Allocate keys array
+ */
+struct keys_array *
+keys_array_alloc(void)
+{
+ struct keys_array *ka;
+ size_t sz;
+
+ sz = keys_array_size();
+ ka = sort_malloc(sz);
+ memset(ka, 0, sz);
+
+ return (ka);
+}
+
+/*
+ * Calculate whether we need key hint space
+ */
+static size_t
+key_hint_size(void)
+{
+
+ return (need_hint ? sizeof(struct key_hint) : 0);
+}
+
+/*
+ * Calculate keys array size
+ */
+size_t
+keys_array_size(void)
+{
+
+ return (keys_num * (sizeof(struct key_value) + key_hint_size()));
+}
+
+/*
+ * Clean data of keys array
+ */
+void
+clean_keys_array(const struct bwstring *s, struct keys_array *ka)
+{
+
+ if (ka) {
+ for (size_t i = 0; i < keys_num; ++i) {
+ const struct key_value *kv;
+
+ kv = get_key_from_keys_array(ka, i);
+ if (kv->k && kv->k != s)
+ bwsfree(kv->k);
+ }
+ memset(ka, 0, keys_array_size());
+ }
+}
+
+/*
+ * Get pointer to a key value in the keys set
+ */
+struct key_value *
+get_key_from_keys_array(struct keys_array *ka, size_t ind)
+{
+
+ return ((struct key_value *)((caddr_t)ka->key +
+ ind * (sizeof(struct key_value) + key_hint_size())));
+}
+
+/*
+ * Set value of a key in the keys set
+ */
+void
+set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
+{
+
+ if (ka && keys_num > ind) {
+ struct key_value *kv;
+
+ kv = get_key_from_keys_array(ka, ind);
+
+ if (kv->k && kv->k != s)
+ bwsfree(kv->k);
+ kv->k = s;
+ }
+}
+
+/*
+ * Initialize a sort list item
+ */
+struct sort_list_item *
+sort_list_item_alloc(void)
+{
+ struct sort_list_item *si;
+ size_t sz;
+
+ sz = sizeof(struct sort_list_item) + keys_array_size();
+ si = sort_malloc(sz);
+ memset(si, 0, sz);
+
+ return (si);
+}
+
+size_t
+sort_list_item_size(struct sort_list_item *si)
+{
+ size_t ret = 0;
+
+ if (si) {
+ ret = sizeof(struct sort_list_item) + keys_array_size();
+ if (si->str)
+ ret += bws_memsize(si->str);
+ for (size_t i = 0; i < keys_num; ++i) {
+ const struct key_value *kv;
+
+ kv = get_key_from_keys_array(&si->ka, i);
+
+ if (kv->k != si->str)
+ ret += bws_memsize(kv->k);
+ }
+ }
+ return (ret);
+}
+
+/*
+ * Calculate key for a sort list item
+ */
+static void
+sort_list_item_make_key(struct sort_list_item *si)
+{
+
+ preproc(si->str, &(si->ka));
+}
+
+/*
+ * Set value of a sort list item.
+ * Return combined string and keys memory size.
+ */
+void
+sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
+{
+
+ if (si) {
+ clean_keys_array(si->str, &(si->ka));
+ if (si->str) {
+ if (si->str == str) {
+ /* we are trying to reset the same string */
+ return;
+ } else {
+ bwsfree(si->str);
+ si->str = NULL;
+ }
+ }
+ si->str = str;
+ sort_list_item_make_key(si);
+ }
+}
+
+/*
+ * De-allocate a sort list item object memory
+ */
+void
+sort_list_item_clean(struct sort_list_item *si)
+{
+
+ if (si) {
+ clean_keys_array(si->str, &(si->ka));
+ if (si->str) {
+ bwsfree(si->str);
+ si->str = NULL;
+ }
+ }
+}
+
+/*
+ * Skip columns according to specs
+ */
+static size_t
+skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
+ bool skip_blanks, bool *empty_key)
+{
+ if (cols < 1)
+ return (BWSLEN(s) + 1);
+
+ if (skip_blanks)
+ while (start < BWSLEN(s) && iswblank_f(BWS_GET(s,start)))
+ ++start;
+
+ while (start < BWSLEN(s) && cols > 1) {
+ --cols;
+ ++start;
+ }
+
+ if (start >= BWSLEN(s))
+ *empty_key = true;
+
+ return (start);
+}
+
+/*
+ * Skip fields according to specs
+ */
+static size_t
+skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
+{
+
+ if (fields < 2) {
+ if (BWSLEN(s) == 0)
+ *empty_field = true;
+ return (0);
+ } else if (!(sort_opts_vals.tflag)) {
+ size_t cpos = 0;
+ bool pb = true;
+
+ while (cpos < BWSLEN(s)) {
+ bool isblank;
+
+ isblank = iswblank_f(BWS_GET(s, cpos));
+
+ if (isblank && !pb) {
+ --fields;
+ if (fields <= 1)
+ return (cpos);
+ }
+ pb = isblank;
+ ++cpos;
+ }
+ if (fields > 1)
+ *empty_field = true;
+ return (cpos);
+ } else {
+ size_t cpos = 0;
+
+ while (cpos < BWSLEN(s)) {
+ if (BWS_GET(s,cpos) == (wchar_t)sort_opts_vals.field_sep) {
+ --fields;
+ if (fields <= 1)
+ return (cpos + 1);
+ }
+ ++cpos;
+ }
+ if (fields > 1)
+ *empty_field = true;
+ return (cpos);
+ }
+}
+
+/*
+ * Find fields start
+ */
+static void
+find_field_start(const struct bwstring *s, struct key_specs *ks,
+ size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
+{
+
+ *field_start = skip_fields_to_start(s, ks->f1, empty_field);
+ if (!*empty_field)
+ *key_start = skip_cols_to_start(s, ks->c1, *field_start,
+ ks->pos1b, empty_key);
+ else
+ *empty_key = true;
+}
+
+/*
+ * Find end key position
+ */
+static size_t
+find_field_end(const struct bwstring *s, struct key_specs *ks)
+{
+ size_t f2, next_field_start, pos_end;
+ bool empty_field, empty_key;
+
+ empty_field = false;
+ empty_key = false;
+ f2 = ks->f2;
+
+ if (f2 == 0)
+ return (BWSLEN(s) + 1);
+ else {
+ if (ks->c2 == 0) {
+ next_field_start = skip_fields_to_start(s, f2 + 1,
+ &empty_field);
+ if ((next_field_start > 0) && sort_opts_vals.tflag &&
+ ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
+ next_field_start - 1)))
+ --next_field_start;
+ } else
+ next_field_start = skip_fields_to_start(s, f2,
+ &empty_field);
+ }
+
+ if (empty_field || (next_field_start >= BWSLEN(s)))
+ return (BWSLEN(s) + 1);
+
+ if (ks->c2) {
+ pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
+ ks->pos2b, &empty_key);
+ if (pos_end < BWSLEN(s))
+ ++pos_end;
+ } else
+ pos_end = next_field_start;
+
+ return (pos_end);
+}
+
+/*
+ * Cut a field according to the key specs
+ */
+static struct bwstring *
+cut_field(const struct bwstring *s, struct key_specs *ks)
+{
+ struct bwstring *ret = NULL;
+
+ if (s && ks) {
+ size_t field_start, key_end, key_start, sz;
+ bool empty_field, empty_key;
+
+ field_start = 0;
+ key_start = 0;
+ empty_field = false;
+ empty_key = false;
+
+ find_field_start(s, ks, &field_start, &key_start,
+ &empty_field, &empty_key);
+
+ if (empty_key)
+ sz = 0;
+ else {
+ key_end = find_field_end(s, ks);
+ sz = (key_end < key_start) ? 0 : (key_end - key_start);
+ }
+
+ ret = bwsalloc(sz);
+ if (sz)
+ bwsnocpy(ret, s, key_start, sz);
+ } else
+ ret = bwsalloc(0);
+
+ return (ret);
+}
+
+/*
+ * Preprocesses a line applying the necessary transformations
+ * specified by command line options and returns the preprocessed
+ * string, which can be used to compare.
+ */
+int
+preproc(struct bwstring *s, struct keys_array *ka)
+{
+
+ if (sort_opts_vals.kflag)
+ for (size_t i = 0; i < keys_num; i++) {
+ struct bwstring *key;
+ struct key_specs *kspecs;
+ struct sort_mods *sm;
+
+ kspecs = &(keys[i]);
+ key = cut_field(s, kspecs);
+
+ sm = &(kspecs->sm);
+ if (sm->dflag)
+ key = dictionary_order(key);
+ else if (sm->iflag)
+ key = ignore_nonprinting(key);
+ if (sm->fflag || sm->Mflag)
+ key = ignore_case(key);
+
+ set_key_on_keys_array(ka, key, i);
+ }
+ else {
+ struct bwstring *ret = NULL;
+ struct sort_mods *sm = default_sort_mods;
+
+ if (sm->bflag) {
+ if (ret == NULL)
+ ret = bwsdup(s);
+ ret = ignore_leading_blanks(ret);
+ }
+ if (sm->dflag) {
+ if (ret == NULL)
+ ret = bwsdup(s);
+ ret = dictionary_order(ret);
+ } else if (sm->iflag) {
+ if (ret == NULL)
+ ret = bwsdup(s);
+ ret = ignore_nonprinting(ret);
+ }
+ if (sm->fflag || sm->Mflag) {
+ if (ret == NULL)
+ ret = bwsdup(s);
+ ret = ignore_case(ret);
+ }
+ if (ret == NULL)
+ set_key_on_keys_array(ka, s, 0);
+ else
+ set_key_on_keys_array(ka, ret, 0);
+ }
+
+ return 0;
+}
+
+cmpcoll_t
+get_sort_func(struct sort_mods *sm)
+{
+
+ if (sm->nflag)
+ return (numcoll);
+ else if (sm->hflag)
+ return (hnumcoll);
+ else if (sm->gflag)
+ return (gnumcoll);
+ else if (sm->Mflag)
+ return (monthcoll);
+ else if (sm->Rflag)
+ return (randomcoll);
+ else if (sm->Vflag)
+ return (versioncoll);
+ else
+ return (wstrcoll);
+}
+
+/*
+ * Compares the given strings. Returns a positive number if
+ * the first precedes the second, a negative number if the second is
+ * the preceding one, and zero if they are equal. This function calls
+ * the underlying collate functions, which done the actual comparison.
+ */
+int
+key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
+{
+ struct key_value *kv1, *kv2;
+ struct sort_mods *sm;
+ int res = 0;
+
+ for (size_t i = 0; i < keys_num; ++i) {
+ kv1 = get_key_from_keys_array(ps1, i);
+ kv2 = get_key_from_keys_array(ps2, i);
+ sm = &(keys[i].sm);
+
+ if (sm->rflag)
+ res = sm->func(kv2, kv1, offset);
+ else
+ res = sm->func(kv1, kv2, offset);
+
+ if (res)
+ break;
+
+ /* offset applies to only the first key */
+ offset = 0;
+ }
+ return (res);
+}
+
+/*
+ * Compare two strings.
+ * Plain symbol-by-symbol comparison.
+ */
+int
+top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
+{
+
+ if (default_sort_mods->rflag) {
+ const struct bwstring *tmp;
+
+ tmp = s1;
+ s1 = s2;
+ s2 = tmp;
+ }
+
+ return (bwscoll(s1, s2, 0));
+}
+
+/*
+ * Compare a string and a sort list item, according to the sort specs.
+ */
+int
+str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
+{
+ struct keys_array *ka1;
+ int ret = 0;
+
+ ka1 = keys_array_alloc();
+
+ preproc(str1, ka1);
+
+ sort_list_item_make_key(*ss2);
+
+ if (debug_sort) {
+ bwsprintf(stdout, str1, "; s1=<", ">");
+ bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
+ }
+
+ ret = key_coll(ka1, &((*ss2)->ka), 0);
+
+ if (debug_sort)
+ printf("; cmp1=%d", ret);
+
+ clean_keys_array(str1, ka1);
+ sort_free(ka1);
+
+ if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
+ ret = top_level_str_coll(str1, ((*ss2)->str));
+ if (debug_sort)
+ printf("; cmp2=%d", ret);
+ }
+
+ if (debug_sort)
+ printf("\n");
+
+ return (ret);
+}
+
+/*
+ * Compare two sort list items, according to the sort specs.
+ */
+int
+list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
+ size_t offset)
+{
+ int ret;
+
+ ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
+
+ if (debug_sort) {
+ if (offset)
+ printf("; offset=%d", (int) offset);
+ bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
+ bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
+ printf("; cmp1=%d\n", ret);
+ }
+
+ if (ret)
+ return (ret);
+
+ if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
+ ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
+ if (debug_sort)
+ printf("; cmp2=%d\n", ret);
+ }
+
+ return (ret);
+}
+
+/*
+ * Compare two sort list items, according to the sort specs.
+ */
+int
+list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2)
+{
+
+ return (list_coll_offset(ss1, ss2, 0));
+}
+
+#define LSCDEF(N) \
+static int \
+list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \
+{ \
+ \
+ return (list_coll_offset(ss1, ss2, N)); \
+}
+
+LSCDEF(1)
+LSCDEF(2)
+LSCDEF(3)
+LSCDEF(4)
+LSCDEF(5)
+LSCDEF(6)
+LSCDEF(7)
+LSCDEF(8)
+LSCDEF(9)
+LSCDEF(10)
+LSCDEF(11)
+LSCDEF(12)
+LSCDEF(13)
+LSCDEF(14)
+LSCDEF(15)
+LSCDEF(16)
+LSCDEF(17)
+LSCDEF(18)
+LSCDEF(19)
+LSCDEF(20)
+
+listcoll_t
+get_list_call_func(size_t offset)
+{
+ static const listcoll_t lsarray[] = { list_coll, list_coll_1,
+ list_coll_2, list_coll_3, list_coll_4, list_coll_5,
+ list_coll_6, list_coll_7, list_coll_8, list_coll_9,
+ list_coll_10, list_coll_11, list_coll_12, list_coll_13,
+ list_coll_14, list_coll_15, list_coll_16, list_coll_17,
+ list_coll_18, list_coll_19, list_coll_20 };
+
+ if (offset <= 20)
+ return (lsarray[offset]);
+
+ return (list_coll);
+}
+
+/*
+ * Compare two sort list items, only by their original string.
+ */
+int
+list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
+{
+
+ return (top_level_str_coll(((*ss1)->str), ((*ss2)->str)));
+}
+
+/*
+ * Maximum size of a number in the string (before or after decimal point)
+ */
+#define MAX_NUM_SIZE (128)
+
+/*
+ * Set suffix value
+ */
+static void setsuffix(wchar_t c, unsigned char *si)
+{
+ switch (c){
+ case L'k':
+ case L'K':
+ *si = 1;
+ break;
+ case L'M':
+ *si = 2;
+ break;
+ case L'G':
+ *si = 3;
+ break;
+ case L'T':
+ *si = 4;
+ break;
+ case L'P':
+ *si = 5;
+ break;
+ case L'E':
+ *si = 6;
+ break;
+ case L'Z':
+ *si = 7;
+ break;
+ case L'Y':
+ *si = 8;
+ break;
+ default:
+ *si = 0;
+ }
+}
+
+/*
+ * Read string s and parse the string into a fixed-decimal-point number.
+ * sign equals -1 if the number is negative (explicit plus is not allowed,
+ * according to GNU sort's "info sort".
+ * The number part before decimal point is in the smain, after the decimal
+ * point is in sfrac, tail is the pointer to the remainder of the string.
+ */
+static int
+read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
+{
+ bwstring_iterator s;
+ bool prev_thousand_sep = false;
+
+ s = bws_begin(s0);
+
+ /* always end the fraction with zero, even if we have no fraction */
+ sfrac[0] = 0;
+
+ while (iswblank_f(bws_get_iter_value(s)))
+ s = bws_iterator_inc(s, 1);
+
+ if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) {
+ *sign = -1;
+ s = bws_iterator_inc(s, 1);
+ }
+
+ // This is '0', not '\0', do not change this
+ while (iswdigit(bws_get_iter_value(s)) &&
+ (bws_get_iter_value(s) == L'0'))
+ s = bws_iterator_inc(s, 1);
+
+ while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
+ if (iswdigit(bws_get_iter_value(s))) {
+ smain[*main_len] = bws_get_iter_value(s);
+ s = bws_iterator_inc(s, 1);
+ *main_len += 1;
+ prev_thousand_sep = false;
+ } else if (symbol_thousands_sep
+ && (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep)
+ && (!prev_thousand_sep)
+ ) {
+ s = bws_iterator_inc(s, 1);
+ prev_thousand_sep = true;
+ } else
+ break;
+ }
+
+ smain[*main_len] = 0;
+
+ if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) {
+ s = bws_iterator_inc(s, 1);
+ while (iswdigit(bws_get_iter_value(s)) &&
+ *frac_len < MAX_NUM_SIZE) {
+ sfrac[*frac_len] = bws_get_iter_value(s);
+ s = bws_iterator_inc(s, 1);
+ *frac_len += 1;
+ }
+ sfrac[*frac_len] = 0;
+
+ while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
+ --(*frac_len);
+ sfrac[*frac_len] = L'\0';
+ }
+ }
+
+ setsuffix(bws_get_iter_value(s),si);
+
+ if ((*main_len + *frac_len) == 0)
+ *sign = 0;
+
+ return (0);
+}
+
+/*
+ * Implements string sort.
+ */
+static int
+wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
+{
+
+ if (debug_sort) {
+ if (offset)
+ printf("; offset=%d\n", (int) offset);
+ bwsprintf(stdout, kv1->k, "; k1=<", ">");
+ printf("(%zu)", BWSLEN(kv1->k));
+ bwsprintf(stdout, kv2->k, ", k2=<", ">");
+ printf("(%zu)", BWSLEN(kv2->k));
+ }
+
+ return (bwscoll(kv1->k, kv2->k, offset));
+}
+
+/*
+ * Compare two suffixes
+ */
+static inline int
+cmpsuffix(unsigned char si1, unsigned char si2)
+{
+
+ return ((char)si1 - (char)si2);
+}
+
+/*
+ * Implements numeric sort for -n and -h.
+ */
+static int
+numcoll_impl(struct key_value *kv1, struct key_value *kv2,
+ size_t offset __unused, bool use_suffix)
+{
+ struct bwstring *s1, *s2;
+ wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
+ wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
+ int cmp_res, sign1, sign2;
+ size_t frac1, frac2, main1, main2;
+ unsigned char SI1, SI2;
+ bool e1, e2, key1_read, key2_read;
+
+ s1 = kv1->k;
+ s2 = kv2->k;
+ sign1 = sign2 = 0;
+ main1 = main2 = 0;
+ frac1 = frac2 = 0;
+
+ key1_read = key2_read = false;
+
+ if (debug_sort) {
+ bwsprintf(stdout, s1, "; k1=<", ">");
+ bwsprintf(stdout, s2, ", k2=<", ">");
+ }
+
+ if (s1 == s2)
+ return (0);
+
+ if (kv1->hint->status == HS_UNINITIALIZED) {
+ /* read the number from the string */
+ read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
+ key1_read = true;
+ kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
+ if(main1 < 1 && frac1 < 1)
+ kv1->hint->v.nh.empty=true;
+ kv1->hint->v.nh.si = SI1;
+ kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
+ HS_INITIALIZED : HS_ERROR;
+ kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
+ }
+
+ if (kv2->hint->status == HS_UNINITIALIZED) {
+ /* read the number from the string */
+ read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2);
+ key2_read = true;
+ kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
+ if(main2 < 1 && frac2 < 1)
+ kv2->hint->v.nh.empty=true;
+ kv2->hint->v.nh.si = SI2;
+ kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
+ HS_INITIALIZED : HS_ERROR;
+ kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
+ }
+
+ if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
+ HS_INITIALIZED) {
+ unsigned long long n1, n2;
+ bool neg1, neg2;
+
+ e1 = kv1->hint->v.nh.empty;
+ e2 = kv2->hint->v.nh.empty;
+
+ if (e1 && e2)
+ return (0);
+
+ neg1 = kv1->hint->v.nh.neg;
+ neg2 = kv2->hint->v.nh.neg;
+
+ if (neg1 && !neg2)
+ return (-1);
+ if (neg2 && !neg1)
+ return (+1);
+
+ if (e1)
+ return (neg2 ? +1 : -1);
+ else if (e2)
+ return (neg1 ? -1 : +1);
+
+
+ if (use_suffix) {
+ cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
+ if (cmp_res)
+ return (neg1 ? -cmp_res : cmp_res);
+ }
+
+ n1 = kv1->hint->v.nh.n1;
+ n2 = kv2->hint->v.nh.n1;
+ if (n1 < n2)
+ return (neg1 ? +1 : -1);
+ else if (n1 > n2)
+ return (neg1 ? -1 : +1);
+ }
+
+ /* read the numbers from the strings */
+ if (!key1_read)
+ read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
+ if (!key2_read)
+ read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
+
+ e1 = ((main1 + frac1) == 0);
+ e2 = ((main2 + frac2) == 0);
+
+ if (e1 && e2)
+ return (0);
+
+ /* we know the result if the signs are different */
+ if (sign1 < 0 && sign2 >= 0)
+ return (-1);
+ if (sign1 >= 0 && sign2 < 0)
+ return (+1);
+
+ if (e1)
+ return ((sign2 < 0) ? +1 : -1);
+ else if (e2)
+ return ((sign1 < 0) ? -1 : +1);
+
+ if (use_suffix) {
+ cmp_res = cmpsuffix(SI1, SI2);
+ if (cmp_res)
+ return ((sign1 < 0) ? -cmp_res : cmp_res);
+ }
+
+ /* if both numbers are empty assume that the strings are equal */
+ if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
+ return (0);
+
+ /*
+ * if the main part is of different size, we know the result
+ * (because the leading zeros are removed)
+ */
+ if (main1 < main2)
+ cmp_res = -1;
+ else if (main1 > main2)
+ cmp_res = +1;
+ /* if the sizes are equal then simple non-collate string compare gives the correct result */
+ else
+ cmp_res = wcscmp(smain1, smain2);
+
+ /* check fraction */
+ if (!cmp_res)
+ cmp_res = wcscmp(sfrac1, sfrac2);
+
+ if (!cmp_res)
+ return (0);
+
+ /* reverse result if the signs are negative */
+ if (sign1 < 0 && sign2 < 0)
+ cmp_res = -cmp_res;
+
+ return (cmp_res);
+}
+
+/*
+ * Implements numeric sort (-n).
+ */
+static int
+numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
+{
+
+ return (numcoll_impl(kv1, kv2, offset, false));
+}
+
+/*
+ * Implements 'human' numeric sort (-h).
+ */
+static int
+hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
+{
+
+ return (numcoll_impl(kv1, kv2, offset, true));
+}
+
+/*
+ * Implements random sort (-R).
+ */
+static int
+randomcoll(struct key_value *kv1, struct key_value *kv2,
+ size_t offset __unused)
+{
+ struct bwstring *s1, *s2;
+ MD5_CTX ctx1, ctx2;
+ char *b1, *b2;
+
+ s1 = kv1->k;
+ s2 = kv2->k;
+
+ if (debug_sort) {
+ bwsprintf(stdout, s1, "; k1=<", ">");
+ bwsprintf(stdout, s2, ", k2=<", ">");
+ }
+
+ if (s1 == s2)
+ return (0);
+
+ memcpy(&ctx1,&md5_ctx,sizeof(MD5_CTX));
+ memcpy(&ctx2,&md5_ctx,sizeof(MD5_CTX));
+
+ MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
+ MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
+ b1 = MD5End(&ctx1, NULL);
+ b2 = MD5End(&ctx2, NULL);
+ if (b1 == NULL) {
+ if (b2 == NULL)
+ return (0);
+ else {
+ sort_free(b2);
+ return (-1);
+ }
+ } else if (b2 == NULL) {
+ sort_free(b1);
+ return (+1);
+ } else {
+ int cmp_res;
+
+ cmp_res = strcmp(b1,b2);
+ sort_free(b1);
+ sort_free(b2);
+
+ if (!cmp_res)
+ cmp_res = bwscoll(s1, s2, 0);
+
+ return (cmp_res);
+ }
+}
+
+/*
+ * Implements version sort (-V).
+ */
+static int
+versioncoll(struct key_value *kv1, struct key_value *kv2,
+ size_t offset __unused)
+{
+ struct bwstring *s1, *s2;
+
+ s1 = kv1->k;
+ s2 = kv2->k;
+
+ if (debug_sort) {
+ bwsprintf(stdout, s1, "; k1=<", ">");
+ bwsprintf(stdout, s2, ", k2=<", ">");
+ }
+
+ if (s1 == s2)
+ return (0);
+
+ return (vcmp(s1, s2));
+}
+
+/*
+ * Check for minus infinity
+ */
+static inline bool
+huge_minus(double d, int err1)
+{
+
+ if (err1 == ERANGE)
+ if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
+ return (+1);
+
+ return (0);
+}
+
+/*
+ * Check for plus infinity
+ */
+static inline bool
+huge_plus(double d, int err1)
+{
+
+ if (err1 == ERANGE)
+ if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
+ return (+1);
+
+ return (0);
+}
+
+/*
+ * Check whether a function is a NAN
+ */
+static bool
+is_nan(double d)
+{
+
+ return ((d == NAN) || (isnan(d)));
+}
+
+/*
+ * Compare two NANs
+ */
+static int
+cmp_nans(double d1, double d2)
+{
+
+ if (d1 < d2)
+ return (-1);
+ if (d1 > d2)
+ return (+1);
+ return (0);
+}
+
+/*
+ * Implements general numeric sort (-g).
+ */
+static int
+gnumcoll(struct key_value *kv1, struct key_value *kv2,
+ size_t offset __unused)
+{
+ double d1, d2;
+ int err1, err2;
+ bool empty1, empty2, key1_read, key2_read;
+
+ d1 = d2 = 0;
+ err1 = err2 = 0;
+ key1_read = key2_read = false;
+
+ if (debug_sort) {
+ bwsprintf(stdout, kv1->k, "; k1=<", ">");
+ bwsprintf(stdout, kv2->k, "; k2=<", ">");
+ }
+
+ if (kv1->hint->status == HS_UNINITIALIZED) {
+ errno = 0;
+ d1 = bwstod(kv1->k, &empty1);
+ err1 = errno;
+
+ if (empty1)
+ kv1->hint->v.gh.notnum = true;
+ else if (err1 == 0) {
+ kv1->hint->v.gh.d = d1;
+ kv1->hint->v.gh.nan = is_nan(d1);
+ kv1->hint->status = HS_INITIALIZED;
+ } else
+ kv1->hint->status = HS_ERROR;
+
+ key1_read = true;
+ }
+
+ if (kv2->hint->status == HS_UNINITIALIZED) {
+ errno = 0;
+ d2 = bwstod(kv2->k, &empty2);
+ err2 = errno;
+
+ if (empty2)
+ kv2->hint->v.gh.notnum = true;
+ else if (err2 == 0) {
+ kv2->hint->v.gh.d = d2;
+ kv2->hint->v.gh.nan = is_nan(d2);
+ kv2->hint->status = HS_INITIALIZED;
+ } else
+ kv2->hint->status = HS_ERROR;
+
+ key2_read = true;
+ }
+
+ if (kv1->hint->status == HS_INITIALIZED &&
+ kv2->hint->status == HS_INITIALIZED) {
+ if (kv1->hint->v.gh.notnum)
+ return ((kv2->hint->v.gh.notnum) ? 0 : -1);
+ else if (kv2->hint->v.gh.notnum)
+ return (+1);
+
+ if (kv1->hint->v.gh.nan)
+ return ((kv2->hint->v.gh.nan) ?
+ cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) :
+ -1);
+ else if (kv2->hint->v.gh.nan)
+ return (+1);
+
+ d1 = kv1->hint->v.gh.d;
+ d2 = kv2->hint->v.gh.d;
+
+ if (d1 < d2)
+ return (-1);
+ else if (d1 > d2)
+ return (+1);
+ else
+ return (0);
+ }
+
+ if (!key1_read) {
+ errno = 0;
+ d1 = bwstod(kv1->k, &empty1);
+ err1 = errno;
+ }
+
+ if (!key2_read) {
+ errno = 0;
+ d2 = bwstod(kv2->k, &empty2);
+ err2 = errno;
+ }
+
+ /* Non-value case: */
+ if (empty1)
+ return (empty2 ? 0 : -1);
+ else if (empty2)
+ return (+1);
+
+ /* NAN case */
+ if (is_nan(d1))
+ return (is_nan(d2) ? cmp_nans(d1, d2) : -1);
+ else if (is_nan(d2))
+ return (+1);
+
+ /* Infinities */
+ if (err1 == ERANGE || err2 == ERANGE) {
+ /* Minus infinity case */
+ if (huge_minus(d1, err1)) {
+ if (huge_minus(d2, err2)) {
+ if (d1 < d2)
+ return (-1);
+ if (d1 > d2)
+ return (+1);
+ return (0);
+ } else
+ return (-1);
+
+ } else if (huge_minus(d2, err2)) {
+ if (huge_minus(d1, err1)) {
+ if (d1 < d2)
+ return (-1);
+ if (d1 > d2)
+ return (+1);
+ return (0);
+ } else
+ return (+1);
+ }
+
+ /* Plus infinity case */
+ if (huge_plus(d1, err1)) {
+ if (huge_plus(d2, err2)) {
+ if (d1 < d2)
+ return (-1);
+ if (d1 > d2)
+ return (+1);
+ return (0);
+ } else
+ return (+1);
+ } else if (huge_plus(d2, err2)) {
+ if (huge_plus(d1, err1)) {
+ if (d1 < d2)
+ return (-1);
+ if (d1 > d2)
+ return (+1);
+ return (0);
+ } else
+ return (-1);
+ }
+ }
+
+ if (d1 < d2)
+ return (-1);
+ if (d1 > d2)
+ return (+1);
+
+ return (0);
+}
+
+/*
+ * Implements month sort (-M).
+ */
+static int
+monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
+{
+ int val1, val2;
+ bool key1_read, key2_read;
+
+ val1 = val2 = 0;
+ key1_read = key2_read = false;
+
+ if (debug_sort) {
+ bwsprintf(stdout, kv1->k, "; k1=<", ">");
+ bwsprintf(stdout, kv2->k, "; k2=<", ">");
+ }
+
+ if (kv1->hint->status == HS_UNINITIALIZED) {
+ kv1->hint->v.Mh.m = bws_month_score(kv1->k);
+ key1_read = true;
+ kv1->hint->status = HS_INITIALIZED;
+ }
+
+ if (kv2->hint->status == HS_UNINITIALIZED) {
+ kv2->hint->v.Mh.m = bws_month_score(kv2->k);
+ key2_read = true;
+ kv2->hint->status = HS_INITIALIZED;
+ }
+
+ if (kv1->hint->status == HS_INITIALIZED) {
+ val1 = kv1->hint->v.Mh.m;
+ key1_read = true;
+ }
+
+ if (kv2->hint->status == HS_INITIALIZED) {
+ val2 = kv2->hint->v.Mh.m;
+ key2_read = true;
+ }
+
+ if (!key1_read)
+ val1 = bws_month_score(kv1->k);
+ if (!key2_read)
+ val2 = bws_month_score(kv2->k);
+
+ if (val1 == val2) {
+ return (0);
+ }
+ if (val1 < val2)
+ return (-1);
+ return (+1);
+}
diff --git a/text_cmds/sort/coll.h b/text_cmds/sort/coll.h
new file mode 100644
index 0000000..6e3f9b4
--- /dev/null
+++ b/text_cmds/sort/coll.h
@@ -0,0 +1,168 @@
+/* $FreeBSD$ */
+
+/*-
+ * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
+ * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if !defined(__COLL_H__)
+#define __COLL_H__
+
+#include "bwstring.h"
+#include "sort.h"
+
+/*
+ * Sort hint data for -n
+ */
+struct n_hint
+{
+ unsigned long long n1;
+ unsigned char si;
+ bool empty;
+ bool neg;
+};
+
+/*
+ * Sort hint data for -g
+ */
+struct g_hint
+{
+ double d;
+ bool nan;
+ bool notnum;
+};
+
+/*
+ * Sort hint data for -M
+ */
+struct M_hint
+{
+ int m;
+};
+
+/*
+ * Status of a sort hint object
+ */
+typedef enum
+{
+ HS_ERROR = -1, HS_UNINITIALIZED = 0, HS_INITIALIZED = 1
+} hint_status;
+
+/*
+ * Sort hint object
+ */
+struct key_hint
+{
+ hint_status status;
+ union
+ {
+ struct n_hint nh;
+ struct g_hint gh;
+ struct M_hint Mh;
+ } v;
+};
+
+/*
+ * Key value
+ */
+struct key_value
+{
+ struct bwstring *k; /* key string */
+ struct key_hint hint[0]; /* key sort hint */
+} __packed;
+
+/*
+ * Set of keys container object.
+ */
+struct keys_array
+{
+ struct key_value key[0];
+};
+
+/*
+ * Parsed -k option data
+ */
+struct key_specs
+{
+ struct sort_mods sm;
+ size_t c1;
+ size_t c2;
+ size_t f1;
+ size_t f2;
+ bool pos1b;
+ bool pos2b;
+};
+
+/*
+ * Single entry in sort list.
+ */
+struct sort_list_item
+{
+ struct bwstring *str;
+ struct keys_array ka;
+};
+
+/*
+ * Function type, used to compare two list objects
+ */
+typedef int (*listcoll_t)(struct sort_list_item **ss1, struct sort_list_item **ss2);
+
+extern struct key_specs *keys;
+extern size_t keys_num;
+
+/*
+ * Main localised symbols. These must be wint_t as they may hold WEOF.
+ */
+extern wint_t symbol_decimal_point;
+extern wint_t symbol_thousands_sep;
+extern wint_t symbol_negative_sign;
+extern wint_t symbol_positive_sign;
+
+/* funcs */
+
+cmpcoll_t get_sort_func(struct sort_mods *sm);
+
+struct keys_array *keys_array_alloc(void);
+size_t keys_array_size(void);
+struct key_value *get_key_from_keys_array(struct keys_array *ka, size_t ind);
+void set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind);
+void clean_keys_array(const struct bwstring *s, struct keys_array *ka);
+
+struct sort_list_item *sort_list_item_alloc(void);
+void sort_list_item_set(struct sort_list_item *si, struct bwstring *str);
+void sort_list_item_clean(struct sort_list_item *si);
+size_t sort_list_item_size(struct sort_list_item *si);
+
+int preproc(struct bwstring *s, struct keys_array *ka);
+int top_level_str_coll(const struct bwstring *, const struct bwstring *);
+int key_coll(struct keys_array *ks1, struct keys_array *ks2, size_t offset);
+int str_list_coll(struct bwstring *str1, struct sort_list_item **ss2);
+int list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2);
+int list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2);
+int list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2, size_t offset);
+
+listcoll_t get_list_call_func(size_t offset);
+
+#endif /* __COLL_H__ */
diff --git a/text_cmds/sort/commoncrypto.c b/text_cmds/sort/commoncrypto.c
new file mode 100644
index 0000000..c7b3b8c
--- /dev/null
+++ b/text_cmds/sort/commoncrypto.c
@@ -0,0 +1,98 @@
+/* mdXhl.c
+ * ----------------------------------------------------------------------------
+ * "THE BEER-WARE LICENSE" (Revision 42):
+ * <phk@FreeBSD.org> wrote this file. As long as you retain this notice you
+ * can do whatever you want with this stuff. If we meet some day, and you think
+ * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
+ * ----------------------------------------------------------------------------
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/lib/libmd/mdXhl.c 294037 2016-01-14 21:08:23Z jtl $");
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "commoncrypto.h"
+
+#define LENGTH CC_MD5_DIGEST_LENGTH
+
+char *MD5FileChunk(const char *, char *, off_t, off_t);
+
+char *
+MD5End(CC_MD5_CTX *ctx, char *buf)
+{
+ int i;
+ unsigned char digest[LENGTH];
+ static const char hex[]="0123456789abcdef";
+
+ if (!buf)
+ buf = malloc(2*LENGTH + 1);
+ if (!buf)
+ return 0;
+ CC_MD5_Final(digest, ctx);
+ for (i = 0; i < LENGTH; i++) {
+ buf[i+i] = hex[digest[i] >> 4];
+ buf[i+i+1] = hex[digest[i] & 0x0f];
+ }
+ buf[i+i] = '\0';
+ return buf;
+}
+
+char *
+MD5File(const char *filename, char *buf)
+{
+ return (MD5FileChunk(filename, buf, 0, 0));
+}
+
+char *
+MD5FileChunk(const char *filename, char *buf, off_t ofs, off_t len)
+{
+ unsigned char buffer[16*1024];
+ CC_MD5_CTX ctx;
+ int fd, readrv, e;
+ off_t remain;
+
+ if (len < 0) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ CC_MD5_Init(&ctx);
+ fd = open(filename, O_RDONLY);
+ if (fd < 0)
+ return NULL;
+ if (ofs != 0) {
+ errno = 0;
+ if (lseek(fd, ofs, SEEK_SET) != ofs ||
+ (ofs == -1 && errno != 0)) {
+ readrv = -1;
+ goto error;
+ }
+ }
+ remain = len;
+ readrv = 0;
+ while (len == 0 || remain > 0) {
+ if (len == 0 || remain > (off_t)sizeof(buffer))
+ readrv = read(fd, buffer, sizeof(buffer));
+ else
+ readrv = read(fd, buffer, remain);
+ if (readrv <= 0)
+ break;
+ CC_MD5_Update(&ctx, buffer, readrv);
+ remain -= readrv;
+ }
+error:
+ e = errno;
+ close(fd);
+ errno = e;
+ if (readrv < 0)
+ return NULL;
+ return (MD5End(&ctx, buf));
+}
diff --git a/text_cmds/sort/commoncrypto.h b/text_cmds/sort/commoncrypto.h
new file mode 100644
index 0000000..5170b51
--- /dev/null
+++ b/text_cmds/sort/commoncrypto.h
@@ -0,0 +1,8 @@
+#include <CommonCrypto/CommonDigest.h>
+
+#define MD5_CTX CC_MD5_CTX
+#define MD5Init CC_MD5_Init
+#define MD5Update CC_MD5_Update
+
+char *MD5End(CC_MD5_CTX *, char *);
+char *MD5File(const char *, char *);
diff --git a/text_cmds/sort/file.c b/text_cmds/sort/file.c
new file mode 100644
index 0000000..346819b
--- /dev/null
+++ b/text_cmds/sort/file.c
@@ -0,0 +1,1684 @@
+/*-
+ * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
+ * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/queue.h>
+
+#include <err.h>
+#include <fcntl.h>
+#if defined(SORT_THREADS)
+#include <pthread.h>
+#endif
+#ifndef __APPLE__
+#include <semaphore.h>
+#else
+#include <mach/mach_init.h>
+#include <mach/mach_error.h>
+#include <mach/semaphore.h>
+#include <mach/task.h>
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <wchar.h>
+#include <wctype.h>
+
+#include "coll.h"
+#include "file.h"
+#include "radixsort.h"
+
+unsigned long long free_memory = 1000000;
+unsigned long long available_free_memory = 1000000;
+
+bool use_mmap;
+
+const char *tmpdir = "/var/tmp";
+const char *compress_program;
+
+size_t max_open_files = 16;
+
+/*
+ * How much space we read from file at once
+ */
+#define READ_CHUNK (4096)
+
+/*
+ * File reader structure
+ */
+struct file_reader
+{
+ struct reader_buffer rb;
+ FILE *file;
+ char *fname;
+ unsigned char *buffer;
+ unsigned char *mmapaddr;
+ unsigned char *mmapptr;
+ size_t bsz;
+ size_t cbsz;
+ size_t mmapsize;
+ size_t strbeg;
+ int fd;
+ char elsymb;
+};
+
+/*
+ * Structure to be used in file merge process.
+ */
+struct file_header
+{
+ struct file_reader *fr;
+ struct sort_list_item *si; /* current top line */
+ size_t file_pos;
+};
+
+/*
+ * List elements of "cleanable" files list.
+ */
+struct CLEANABLE_FILE
+{
+ char *fn;
+ LIST_ENTRY(CLEANABLE_FILE) files;
+};
+
+/*
+ * List header of "cleanable" files list.
+ */
+static LIST_HEAD(CLEANABLE_FILES,CLEANABLE_FILE) tmp_files;
+
+/*
+ * Semaphore to protect the tmp file list.
+ * We use semaphore here because it is signal-safe, according to POSIX.
+ * And semaphore does not require pthread library.
+ */
+#ifndef __APPLE__
+static sem_t tmp_files_sem;
+#else
+static semaphore_t tmp_files_sem;
+#endif
+
+static void mt_sort(struct sort_list *list,
+ int (*sort_func)(void *, size_t, size_t,
+ int (*)(const void *, const void *)), const char* fn);
+
+/*
+ * Init tmp files list
+ */
+void
+init_tmp_files(void)
+{
+
+ LIST_INIT(&tmp_files);
+#ifndef __APPLE__
+ sem_init(&tmp_files_sem, 0, 1);
+#else
+ {
+ mach_port_t self = mach_task_self();
+ kern_return_t ret = semaphore_create(self, &tmp_files_sem, SYNC_POLICY_FIFO, 1);
+ if (ret != KERN_SUCCESS) {
+ err(2,NULL);
+ }
+ }
+#endif
+}
+
+/*
+ * Save name of a tmp file for signal cleanup
+ */
+void
+tmp_file_atexit(const char *tmp_file)
+{
+
+ if (tmp_file) {
+#ifndef __APPLE__
+ sem_wait(&tmp_files_sem);
+#else
+ semaphore_wait(tmp_files_sem);
+#endif
+ struct CLEANABLE_FILE *item =
+ sort_malloc(sizeof(struct CLEANABLE_FILE));
+ item->fn = sort_strdup(tmp_file);
+ LIST_INSERT_HEAD(&tmp_files, item, files);
+#ifndef __APPLE__
+ sem_post(&tmp_files_sem);
+#else
+ semaphore_signal(tmp_files_sem);
+#endif
+ }
+}
+
+/*
+ * Clear tmp files
+ */
+void
+clear_tmp_files(void)
+{
+ struct CLEANABLE_FILE *item;
+
+#ifndef __APPLE__
+ sem_wait(&tmp_files_sem);
+#else
+ semaphore_wait(tmp_files_sem);
+#endif
+ LIST_FOREACH(item,&tmp_files,files) {
+ if ((item) && (item->fn))
+ unlink(item->fn);
+ }
+#ifndef __APPLE__
+ sem_post(&tmp_files_sem);
+#else
+ semaphore_signal(tmp_files_sem);
+#endif
+}
+
+/*
+ * Check whether a file is a temporary file
+ */
+static bool
+file_is_tmp(const char* fn)
+{
+ struct CLEANABLE_FILE *item;
+ bool ret = false;
+
+ if (fn) {
+#ifndef __APPLE__
+ sem_wait(&tmp_files_sem);
+#else
+ semaphore_wait(tmp_files_sem);
+#endif
+ LIST_FOREACH(item,&tmp_files,files) {
+ if ((item) && (item->fn))
+ if (strcmp(item->fn, fn) == 0) {
+ ret = true;
+ break;
+ }
+ }
+#ifndef __APPLE__
+ sem_post(&tmp_files_sem);
+#else
+ semaphore_signal(tmp_files_sem);
+#endif
+ }
+
+ return (ret);
+}
+
+/*
+ * Generate new temporary file name
+ */
+char *
+new_tmp_file_name(void)
+{
+ static size_t tfcounter = 0;
+ static const char *fn = ".bsdsort.";
+ char *ret;
+ size_t sz;
+
+ sz = strlen(tmpdir) + 1 + strlen(fn) + 32 + 1;
+ ret = sort_malloc(sz);
+
+ sprintf(ret, "%s/%s%d.%lu", tmpdir, fn, (int) getpid(), (unsigned long)(tfcounter++));
+ tmp_file_atexit(ret);
+ return (ret);
+}
+
+/*
+ * Initialize file list
+ */
+void
+file_list_init(struct file_list *fl, bool tmp)
+{
+
+ if (fl) {
+ fl->count = 0;
+ fl->sz = 0;
+ fl->fns = NULL;
+ fl->tmp = tmp;
+ }
+}
+
+/*
+ * Add a file name to the list
+ */
+void
+file_list_add(struct file_list *fl, char *fn, bool allocate)
+{
+
+ if (fl && fn) {
+ if (fl->count >= fl->sz || (fl->fns == NULL)) {
+ fl->sz = (fl->sz) * 2 + 1;
+ fl->fns = sort_realloc(fl->fns, fl->sz *
+ sizeof(char *));
+ }
+ fl->fns[fl->count] = allocate ? sort_strdup(fn) : fn;
+ fl->count += 1;
+ }
+}
+
+/*
+ * Populate file list from array of file names
+ */
+void
+file_list_populate(struct file_list *fl, int argc, char **argv, bool allocate)
+{
+
+ if (fl && argv) {
+ int i;
+
+ for (i = 0; i < argc; i++)
+ file_list_add(fl, argv[i], allocate);
+ }
+}
+
+/*
+ * Clean file list data and delete the files,
+ * if this is a list of temporary files
+ */
+void
+file_list_clean(struct file_list *fl)
+{
+
+ if (fl) {
+ if (fl->fns) {
+ size_t i;
+
+ for (i = 0; i < fl->count; i++) {
+ if (fl->fns[i]) {
+ if (fl->tmp)
+ unlink(fl->fns[i]);
+ sort_free(fl->fns[i]);
+ fl->fns[i] = 0;
+ }
+ }
+ sort_free(fl->fns);
+ fl->fns = NULL;
+ }
+ fl->sz = 0;
+ fl->count = 0;
+ fl->tmp = false;
+ }
+}
+
+/*
+ * Init sort list
+ */
+void
+sort_list_init(struct sort_list *l)
+{
+
+ if (l) {
+ l->count = 0;
+ l->size = 0;
+ l->memsize = sizeof(struct sort_list);
+ l->list = NULL;
+ }
+}
+
+/*
+ * Add string to sort list
+ */
+void
+sort_list_add(struct sort_list *l, struct bwstring *str)
+{
+
+ if (l && str) {
+ size_t indx = l->count;
+
+ if ((l->list == NULL) || (indx >= l->size)) {
+ size_t newsize = (l->size + 1) + 1024;
+
+ l->list = sort_realloc(l->list,
+ sizeof(struct sort_list_item*) * newsize);
+ l->memsize += (newsize - l->size) *
+ sizeof(struct sort_list_item*);
+ l->size = newsize;
+ }
+ l->list[indx] = sort_list_item_alloc();
+ sort_list_item_set(l->list[indx], str);
+ l->memsize += sort_list_item_size(l->list[indx]);
+ l->count += 1;
+ }
+}
+
+/*
+ * Clean sort list data
+ */
+void
+sort_list_clean(struct sort_list *l)
+{
+
+ if (l) {
+ if (l->list) {
+ size_t i;
+
+ for (i = 0; i < l->count; i++) {
+ struct sort_list_item *item;
+
+ item = l->list[i];
+
+ if (item) {
+ sort_list_item_clean(item);
+ sort_free(item);
+ l->list[i] = NULL;
+ }
+ }
+ sort_free(l->list);
+ l->list = NULL;
+ }
+ l->count = 0;
+ l->size = 0;
+ l->memsize = sizeof(struct sort_list);
+ }
+}
+
+/*
+ * Write sort list to file
+ */
+void
+sort_list_dump(struct sort_list *l, const char *fn)
+{
+
+ if (l && fn) {
+ FILE *f;
+
+ f = openfile(fn, "w");
+ if (f == NULL)
+ err(2, NULL);
+
+ if (l->list) {
+ size_t i;
+ if (!(sort_opts_vals.uflag)) {
+ for (i = 0; i < l->count; ++i)
+ bwsfwrite(l->list[i]->str, f,
+ sort_opts_vals.zflag);
+ } else {
+ struct sort_list_item *last_printed_item = NULL;
+ struct sort_list_item *item;
+ for (i = 0; i < l->count; ++i) {
+ item = l->list[i];
+ if ((last_printed_item == NULL) ||
+ list_coll(&last_printed_item, &item)) {
+ bwsfwrite(item->str, f, sort_opts_vals.zflag);
+ last_printed_item = item;
+ }
+ }
+ }
+ }
+
+ closefile(f, fn);
+ }
+}
+
+/*
+ * Checks if the given file is sorted. Stops at the first disorder,
+ * prints the disordered line and returns 1.
+ */
+int
+check(const char *fn)
+{
+ struct bwstring *s1, *s2, *s1disorder, *s2disorder;
+ struct file_reader *fr;
+ struct keys_array *ka1, *ka2;
+ int res;
+ size_t pos, posdisorder;
+
+ s1 = s2 = s1disorder = s2disorder = NULL;
+ ka1 = ka2 = NULL;
+
+ fr = file_reader_init(fn);
+
+ res = 0;
+ pos = 1;
+ posdisorder = 1;
+
+ if (fr == NULL) {
+ err(2, NULL);
+#ifndef __APPLE__
+ goto end;
+#endif
+ }
+
+ s1 = file_reader_readline(fr);
+ if (s1 == NULL)
+ goto end;
+
+ ka1 = keys_array_alloc();
+ preproc(s1, ka1);
+
+ s2 = file_reader_readline(fr);
+ if (s2 == NULL)
+ goto end;
+
+ ka2 = keys_array_alloc();
+ preproc(s2, ka2);
+
+ for (;;) {
+
+ if (debug_sort) {
+ bwsprintf(stdout, s2, "s1=<", ">");
+ bwsprintf(stdout, s1, "s2=<", ">");
+ }
+ int cmp = key_coll(ka2, ka1, 0);
+ if (debug_sort)
+ printf("; cmp1=%d", cmp);
+
+ if (!cmp && sort_opts_vals.complex_sort &&
+ !(sort_opts_vals.uflag) && !(sort_opts_vals.sflag)) {
+ cmp = top_level_str_coll(s2, s1);
+ if (debug_sort)
+ printf("; cmp2=%d", cmp);
+ }
+ if (debug_sort)
+ printf("\n");
+
+ if ((sort_opts_vals.uflag && (cmp <= 0)) || (cmp < 0)) {
+ if (!(sort_opts_vals.csilentflag)) {
+ s2disorder = bwsdup(s2);
+ posdisorder = pos;
+ if (debug_sort)
+ s1disorder = bwsdup(s1);
+ }
+ res = 1;
+ goto end;
+ }
+
+ pos++;
+
+ clean_keys_array(s1, ka1);
+ sort_free(ka1);
+ ka1 = ka2;
+ ka2 = NULL;
+
+ bwsfree(s1);
+ s1 = s2;
+
+ s2 = file_reader_readline(fr);
+ if (s2 == NULL)
+ goto end;
+
+ ka2 = keys_array_alloc();
+ preproc(s2, ka2);
+ }
+
+end:
+ if (ka1) {
+ clean_keys_array(s1, ka1);
+ sort_free(ka1);
+ }
+
+ if (s1)
+ bwsfree(s1);
+
+ if (ka2) {
+ clean_keys_array(s2, ka2);
+ sort_free(ka2);
+ }
+
+ if (s2)
+ bwsfree(s2);
+
+ if ((fn == NULL) || (*fn == 0) || (strcmp(fn, "-") == 0)) {
+ for (;;) {
+ s2 = file_reader_readline(fr);
+ if (s2 == NULL)
+ break;
+ bwsfree(s2);
+ }
+ }
+
+ file_reader_free(fr);
+
+ if (s2disorder) {
+ bws_disorder_warnx(s2disorder, fn, posdisorder);
+ if (s1disorder) {
+ bws_disorder_warnx(s1disorder, fn, posdisorder);
+ if (s1disorder != s2disorder)
+ bwsfree(s1disorder);
+ }
+ bwsfree(s2disorder);
+ s1disorder = NULL;
+ s2disorder = NULL;
+ }
+
+ if (res)
+ exit(res);
+
+ return (0);
+}
+
+/*
+ * Opens a file. If the given filename is "-", stdout will be
+ * opened.
+ */
+FILE *
+openfile(const char *fn, const char *mode)
+{
+ FILE *file;
+
+ if (strcmp(fn, "-") == 0) {
+ return ((mode && mode[0] == 'r') ? stdin : stdout);
+ } else {
+ mode_t orig_file_mask = 0;
+ int is_tmp = file_is_tmp(fn);
+
+ if (is_tmp && (mode[0] == 'w'))
+ orig_file_mask = umask(S_IWGRP | S_IWOTH |
+ S_IRGRP | S_IROTH);
+
+ if (is_tmp && (compress_program != NULL)) {
+ char *cmd;
+ size_t cmdsz;
+
+ cmdsz = strlen(fn) + 128;
+ cmd = sort_malloc(cmdsz);
+
+ fflush(stdout);
+
+ if (mode[0] == 'r')
+ snprintf(cmd, cmdsz - 1, "cat %s | %s -d",
+ fn, compress_program);
+ else if (mode[0] == 'w')
+ snprintf(cmd, cmdsz - 1, "%s > %s",
+ compress_program, fn);
+ else
+ err(2, "%s", getstr(7));
+
+ if ((file = popen(cmd, mode)) == NULL)
+ err(2, NULL);
+
+ sort_free(cmd);
+
+ } else
+ if ((file = fopen(fn, mode)) == NULL)
+ err(2, NULL);
+
+ if (is_tmp && (mode[0] == 'w'))
+ umask(orig_file_mask);
+ }
+
+ return (file);
+}
+
+/*
+ * Close file
+ */
+void
+closefile(FILE *f, const char *fn)
+{
+ if (f == NULL) {
+ ;
+ } else if (f == stdin) {
+ ;
+ } else if (f == stdout) {
+ fflush(f);
+ } else {
+ if (file_is_tmp(fn) && compress_program != NULL) {
+ if(pclose(f)<0)
+ err(2,NULL);
+ } else
+ fclose(f);
+ }
+}
+
+/*
+ * Reads a file into the internal buffer.
+ */
+struct file_reader *
+file_reader_init(const char *fsrc)
+{
+ struct file_reader *ret;
+
+ if (fsrc == NULL)
+ fsrc = "-";
+
+ ret = sort_malloc(sizeof(struct file_reader));
+ memset(ret, 0, sizeof(struct file_reader));
+
+ ret->elsymb = '\n';
+ if (sort_opts_vals.zflag)
+ ret->elsymb = 0;
+
+ ret->fname = sort_strdup(fsrc);
+
+ if (strcmp(fsrc, "-") && (compress_program == NULL) && use_mmap) {
+
+ do {
+ struct stat stat_buf;
+ void *addr;
+ size_t sz = 0;
+ int fd, flags;
+
+#if defined(__APPLE__)
+ flags = 0;
+#else
+ flags = MAP_NOCORE | MAP_NOSYNC;
+#endif
+
+ fd = open(fsrc, O_RDONLY);
+ if (fd < 0)
+ err(2, NULL);
+
+ if (fstat(fd, &stat_buf) < 0) {
+ close(fd);
+ break;
+ }
+
+ sz = stat_buf.st_size;
+
+#if defined(MAP_PREFAULT_READ)
+ flags |= MAP_PREFAULT_READ;
+#endif
+
+ addr = mmap(NULL, sz, PROT_READ, flags, fd, 0);
+ if (addr == MAP_FAILED) {
+ close(fd);
+ break;
+ }
+
+ ret->fd = fd;
+ ret->mmapaddr = addr;
+ ret->mmapsize = sz;
+ ret->mmapptr = ret->mmapaddr;
+
+ } while (0);
+ }
+
+ if (ret->mmapaddr == NULL) {
+ ret->file = openfile(fsrc, "r");
+ if (ret->file == NULL)
+ err(2, NULL);
+
+ if (strcmp(fsrc, "-")) {
+ ret->cbsz = READ_CHUNK;
+ ret->buffer = sort_malloc(ret->cbsz);
+ ret->bsz = 0;
+ ret->strbeg = 0;
+
+ ret->bsz = fread(ret->buffer, 1, ret->cbsz, ret->file);
+ if (ret->bsz == 0) {
+ if (ferror(ret->file))
+ err(2, NULL);
+ }
+ }
+ }
+
+ return (ret);
+}
+
+struct bwstring *
+file_reader_readline(struct file_reader *fr)
+{
+ struct bwstring *ret = NULL;
+
+ if (fr->mmapaddr) {
+ unsigned char *mmapend;
+
+ mmapend = fr->mmapaddr + fr->mmapsize;
+ if (fr->mmapptr >= mmapend)
+ return (NULL);
+ else {
+ unsigned char *strend;
+ size_t sz;
+
+ sz = mmapend - fr->mmapptr;
+ strend = memchr(fr->mmapptr, fr->elsymb, sz);
+
+ if (strend == NULL) {
+ ret = bwscsbdup(fr->mmapptr, sz);
+ fr->mmapptr = mmapend;
+ } else {
+ ret = bwscsbdup(fr->mmapptr, strend -
+ fr->mmapptr);
+ fr->mmapptr = strend + 1;
+ }
+ }
+
+ } else if (fr->file != stdin) {
+ unsigned char *strend;
+ size_t bsz1, remsz, search_start;
+
+ search_start = 0;
+ remsz = 0;
+ strend = NULL;
+
+ if (fr->bsz > fr->strbeg)
+ remsz = fr->bsz - fr->strbeg;
+
+ /* line read cycle */
+ for (;;) {
+ if (remsz > search_start)
+ strend = memchr(fr->buffer + fr->strbeg +
+ search_start, fr->elsymb, remsz -
+ search_start);
+ else
+ strend = NULL;
+
+ if (strend)
+ break;
+ if (feof(fr->file))
+ break;
+
+ if (fr->bsz != fr->cbsz)
+ /* NOTREACHED */
+ err(2, "File read software error 1");
+
+ if (remsz > (READ_CHUNK >> 1)) {
+ search_start = fr->cbsz - fr->strbeg;
+ fr->cbsz += READ_CHUNK;
+ fr->buffer = sort_realloc(fr->buffer,
+ fr->cbsz);
+ bsz1 = fread(fr->buffer + fr->bsz, 1,
+ READ_CHUNK, fr->file);
+ if (bsz1 == 0) {
+ if (ferror(fr->file))
+ err(2, NULL);
+ break;
+ }
+ fr->bsz += bsz1;
+ remsz += bsz1;
+ } else {
+ if (remsz > 0 && fr->strbeg>0)
+ bcopy(fr->buffer + fr->strbeg,
+ fr->buffer, remsz);
+
+ fr->strbeg = 0;
+ search_start = remsz;
+ bsz1 = fread(fr->buffer + remsz, 1,
+ fr->cbsz - remsz, fr->file);
+ if (bsz1 == 0) {
+ if (ferror(fr->file))
+ err(2, NULL);
+ break;
+ }
+ fr->bsz = remsz + bsz1;
+ remsz = fr->bsz;
+ }
+ }
+
+ if (strend == NULL)
+ strend = fr->buffer + fr->bsz;
+
+ if ((fr->buffer + fr->strbeg <= strend) &&
+ (fr->strbeg < fr->bsz) && (remsz>0))
+ ret = bwscsbdup(fr->buffer + fr->strbeg, strend -
+ fr->buffer - fr->strbeg);
+
+ fr->strbeg = (strend - fr->buffer) + 1;
+
+ } else {
+ size_t len = 0;
+
+ ret = bwsfgetln(fr->file, &len, sort_opts_vals.zflag,
+ &(fr->rb));
+ }
+
+ return (ret);
+}
+
+static void
+file_reader_clean(struct file_reader *fr)
+{
+
+ if (fr) {
+ if (fr->mmapaddr)
+ munmap(fr->mmapaddr, fr->mmapsize);
+
+ if (fr->fd)
+ close(fr->fd);
+
+ if (fr->buffer)
+ sort_free(fr->buffer);
+
+ if (fr->file)
+ if (fr->file != stdin)
+ closefile(fr->file, fr->fname);
+
+ if(fr->fname)
+ sort_free(fr->fname);
+
+ memset(fr, 0, sizeof(struct file_reader));
+ }
+}
+
+void
+file_reader_free(struct file_reader *fr)
+{
+
+ if (fr) {
+ file_reader_clean(fr);
+ sort_free(fr);
+ }
+}
+
+int
+procfile(const char *fsrc, struct sort_list *list, struct file_list *fl)
+{
+ struct file_reader *fr;
+
+ fr = file_reader_init(fsrc);
+ if (fr == NULL)
+ err(2, NULL);
+
+ /* file browse cycle */
+ for (;;) {
+ struct bwstring *bws;
+
+ bws = file_reader_readline(fr);
+
+ if (bws == NULL)
+ break;
+
+ sort_list_add(list, bws);
+
+ if (list->memsize >= available_free_memory) {
+ char *fn;
+
+ fn = new_tmp_file_name();
+ sort_list_to_file(list, fn);
+ file_list_add(fl, fn, false);
+ sort_list_clean(list);
+ }
+ }
+
+ file_reader_free(fr);
+
+ return (0);
+}
+
+/*
+ * Compare file headers. Files with EOF always go to the end of the list.
+ */
+static int
+file_header_cmp(struct file_header *f1, struct file_header *f2)
+{
+
+ if (f1 == f2)
+ return (0);
+ else {
+ if (f1->fr == NULL) {
+ return ((f2->fr == NULL) ? 0 : +1);
+ } else if (f2->fr == NULL)
+ return (-1);
+ else {
+ int ret;
+
+ ret = list_coll(&(f1->si), &(f2->si));
+ if (!ret)
+ return ((f1->file_pos < f2->file_pos) ? -1 : +1);
+ return (ret);
+ }
+ }
+}
+
+/*
+ * Allocate and init file header structure
+ */
+static void
+file_header_init(struct file_header **fh, const char *fn, size_t file_pos)
+{
+
+ if (fh && fn) {
+ struct bwstring *line;
+
+ *fh = sort_malloc(sizeof(struct file_header));
+ (*fh)->file_pos = file_pos;
+ (*fh)->fr = file_reader_init(fn);
+ if ((*fh)->fr == NULL) {
+ perror(fn);
+ err(2, "%s", getstr(8));
+ }
+ line = file_reader_readline((*fh)->fr);
+ if (line == NULL) {
+ file_reader_free((*fh)->fr);
+ (*fh)->fr = NULL;
+ (*fh)->si = NULL;
+ } else {
+ (*fh)->si = sort_list_item_alloc();
+ sort_list_item_set((*fh)->si, line);
+ }
+ }
+}
+
+/*
+ * Close file
+ */
+static void
+file_header_close(struct file_header **fh)
+{
+
+ if (fh && *fh) {
+ if ((*fh)->fr) {
+ file_reader_free((*fh)->fr);
+ (*fh)->fr = NULL;
+ }
+ if ((*fh)->si) {
+ sort_list_item_clean((*fh)->si);
+ sort_free((*fh)->si);
+ (*fh)->si = NULL;
+ }
+ sort_free(*fh);
+ *fh = NULL;
+ }
+}
+
+/*
+ * Swap two array elements
+ */
+static void
+file_header_swap(struct file_header **fh, size_t i1, size_t i2)
+{
+ struct file_header *tmp;
+
+ tmp = fh[i1];
+ fh[i1] = fh[i2];
+ fh[i2] = tmp;
+}
+
+/* heap algorithm ==>> */
+
+/*
+ * See heap sort algorithm
+ * "Raises" last element to its right place
+ */
+static void
+file_header_heap_swim(struct file_header **fh, size_t indx)
+{
+
+ if (indx > 0) {
+ size_t parent_index;
+
+ parent_index = (indx - 1) >> 1;
+
+ if (file_header_cmp(fh[indx], fh[parent_index]) < 0) {
+ /* swap child and parent and continue */
+ file_header_swap(fh, indx, parent_index);
+ file_header_heap_swim(fh, parent_index);
+ }
+ }
+}
+
+/*
+ * Sink the top element to its correct position
+ */
+static void
+file_header_heap_sink(struct file_header **fh, size_t indx, size_t size)
+{
+ size_t left_child_index;
+ size_t right_child_index;
+
+ left_child_index = indx + indx + 1;
+ right_child_index = left_child_index + 1;
+
+ if (left_child_index < size) {
+ size_t min_child_index;
+
+ min_child_index = left_child_index;
+
+ if ((right_child_index < size) &&
+ (file_header_cmp(fh[left_child_index],
+ fh[right_child_index]) > 0))
+ min_child_index = right_child_index;
+ if (file_header_cmp(fh[indx], fh[min_child_index]) > 0) {
+ file_header_swap(fh, indx, min_child_index);
+ file_header_heap_sink(fh, min_child_index, size);
+ }
+ }
+}
+
+/* <<== heap algorithm */
+
+/*
+ * Adds element to the "left" end
+ */
+static void
+file_header_list_rearrange_from_header(struct file_header **fh, size_t size)
+{
+
+ file_header_heap_sink(fh, 0, size);
+}
+
+/*
+ * Adds element to the "right" end
+ */
+static void
+file_header_list_push(struct file_header *f, struct file_header **fh, size_t size)
+{
+
+ fh[size++] = f;
+ file_header_heap_swim(fh, size - 1);
+}
+
+struct last_printed
+{
+ struct bwstring *str;
+};
+
+/*
+ * Prints the current line of the file
+ */
+static void
+file_header_print(struct file_header *fh, FILE *f_out, struct last_printed *lp)
+{
+
+ if (fh && fh->fr && f_out && fh->si && fh->si->str) {
+ if (sort_opts_vals.uflag) {
+ if ((lp->str == NULL) || (str_list_coll(lp->str, &(fh->si)))) {
+ bwsfwrite(fh->si->str, f_out, sort_opts_vals.zflag);
+ if (lp->str)
+ bwsfree(lp->str);
+ lp->str = bwsdup(fh->si->str);
+ }
+ } else
+ bwsfwrite(fh->si->str, f_out, sort_opts_vals.zflag);
+ }
+}
+
+/*
+ * Read next line
+ */
+static void
+file_header_read_next(struct file_header *fh)
+{
+
+ if (fh && fh->fr) {
+ struct bwstring *tmp;
+
+ tmp = file_reader_readline(fh->fr);
+ if (tmp == NULL) {
+ file_reader_free(fh->fr);
+ fh->fr = NULL;
+ if (fh->si) {
+ sort_list_item_clean(fh->si);
+ sort_free(fh->si);
+ fh->si = NULL;
+ }
+ } else {
+ if (fh->si == NULL)
+ fh->si = sort_list_item_alloc();
+ sort_list_item_set(fh->si, tmp);
+ }
+ }
+}
+
+/*
+ * Merge array of "files headers"
+ */
+static void
+file_headers_merge(size_t fnum, struct file_header **fh, FILE *f_out)
+{
+ struct last_printed lp;
+ size_t i;
+
+ memset(&lp, 0, sizeof(lp));
+
+ /*
+ * construct the initial sort structure
+ */
+ for (i = 0; i < fnum; i++)
+ file_header_list_push(fh[i], fh, i);
+
+ while (fh[0]->fr) { /* unfinished files are always in front */
+ /* output the smallest line: */
+ file_header_print(fh[0], f_out, &lp);
+ /* read a new line, if possible: */
+ file_header_read_next(fh[0]);
+ /* re-arrange the list: */
+ file_header_list_rearrange_from_header(fh, fnum);
+ }
+
+ if (lp.str)
+ bwsfree(lp.str);
+}
+
+/*
+ * Merges the given files into the output file, which can be
+ * stdout.
+ */
+static void
+merge_files_array(size_t argc, char **argv, const char *fn_out)
+{
+
+ if (argv && fn_out) {
+ struct file_header **fh;
+ FILE *f_out;
+ size_t i;
+
+ f_out = openfile(fn_out, "w");
+
+ if (f_out == NULL)
+ err(2, NULL);
+
+ fh = sort_malloc((argc + 1) * sizeof(struct file_header *));
+
+ for (i = 0; i < argc; i++)
+ file_header_init(fh + i, argv[i], (size_t) i);
+
+ file_headers_merge(argc, fh, f_out);
+
+ for (i = 0; i < argc; i++)
+ file_header_close(fh + i);
+
+ sort_free(fh);
+
+ closefile(f_out, fn_out);
+ }
+}
+
+/*
+ * Shrinks the file list until its size smaller than max number of opened files
+ */
+static int
+shrink_file_list(struct file_list *fl)
+{
+
+ if ((fl == NULL) || (size_t) (fl->count) < max_open_files)
+ return (0);
+ else {
+ struct file_list new_fl;
+ size_t indx = 0;
+
+ file_list_init(&new_fl, true);
+ while (indx < fl->count) {
+ char *fnew;
+ size_t num;
+
+ num = fl->count - indx;
+ fnew = new_tmp_file_name();
+
+ if ((size_t) num >= max_open_files)
+ num = max_open_files - 1;
+ merge_files_array(num, fl->fns + indx, fnew);
+ if (fl->tmp) {
+ size_t i;
+
+ for (i = 0; i < num; i++)
+ unlink(fl->fns[indx + i]);
+ }
+ file_list_add(&new_fl, fnew, false);
+ indx += num;
+ }
+ fl->tmp = false; /* already taken care of */
+ file_list_clean(fl);
+
+ fl->count = new_fl.count;
+ fl->fns = new_fl.fns;
+ fl->sz = new_fl.sz;
+ fl->tmp = new_fl.tmp;
+
+ return (1);
+ }
+}
+
+/*
+ * Merge list of files
+ */
+void
+merge_files(struct file_list *fl, const char *fn_out)
+{
+
+ if (fl && fn_out) {
+ while (shrink_file_list(fl));
+
+ merge_files_array(fl->count, fl->fns, fn_out);
+ }
+}
+
+static const char *
+get_sort_method_name(int sm)
+{
+
+ if (sm == SORT_MERGESORT)
+ return "mergesort";
+ else if (sort_opts_vals.sort_method == SORT_RADIXSORT)
+ return "radixsort";
+ else if (sort_opts_vals.sort_method == SORT_HEAPSORT)
+ return "heapsort";
+ else
+ return "quicksort";
+}
+
+/*
+ * Wrapper for qsort
+ */
+static int sort_qsort(void *list, size_t count, size_t elem_size,
+ int (*cmp_func)(const void *, const void *))
+{
+
+ qsort(list, count, elem_size, cmp_func);
+ return (0);
+}
+
+/*
+ * Sort list of lines and writes it to the file
+ */
+void
+sort_list_to_file(struct sort_list *list, const char *outfile)
+{
+ struct sort_mods *sm = &(keys[0].sm);
+
+ if (!(sm->Mflag) && !(sm->Rflag) && !(sm->Vflag) && !(sm->Vflag) &&
+ !(sm->gflag) && !(sm->hflag) && !(sm->nflag)) {
+ if ((sort_opts_vals.sort_method == SORT_DEFAULT) && byte_sort)
+ sort_opts_vals.sort_method = SORT_RADIXSORT;
+
+ } else if (sort_opts_vals.sort_method == SORT_RADIXSORT)
+ err(2, "%s", getstr(9));
+
+ /*
+ * to handle stable sort and the unique cases in the
+ * right order, we need stable basic algorithm
+ */
+ if (sort_opts_vals.sflag) {
+ switch (sort_opts_vals.sort_method){
+ case SORT_MERGESORT:
+ break;
+ case SORT_RADIXSORT:
+ break;
+ case SORT_DEFAULT:
+ sort_opts_vals.sort_method = SORT_MERGESORT;
+ break;
+ default:
+ errx(2, "%s", getstr(10));
+ }
+ }
+
+ if (sort_opts_vals.sort_method == SORT_DEFAULT)
+ sort_opts_vals.sort_method = DEFAULT_SORT_ALGORITHM;
+
+ if (debug_sort)
+ printf("sort_method=%s\n",
+ get_sort_method_name(sort_opts_vals.sort_method));
+
+ switch (sort_opts_vals.sort_method){
+ case SORT_RADIXSORT:
+ rxsort(list->list, list->count);
+ sort_list_dump(list, outfile);
+ break;
+ case SORT_MERGESORT:
+ mt_sort(list, mergesort, outfile);
+ break;
+ case SORT_HEAPSORT:
+ mt_sort(list, heapsort, outfile);
+ break;
+ case SORT_QSORT:
+ mt_sort(list, sort_qsort, outfile);
+ break;
+ default:
+ mt_sort(list, DEFAULT_SORT_FUNC, outfile);
+ break;
+ }
+}
+
+/******************* MT SORT ************************/
+
+#if defined(SORT_THREADS)
+/* semaphore to count threads */
+#ifndef __APPLE__
+static sem_t mtsem;
+#else
+static semaphore_t mtsem;
+#endif
+
+/* current system sort function */
+static int (*g_sort_func)(void *, size_t, size_t,
+ int(*)(const void *, const void *));
+
+/*
+ * Sort cycle thread (in multi-threaded mode)
+ */
+static void*
+mt_sort_thread(void* arg)
+{
+ struct sort_list *list = arg;
+
+ g_sort_func(list->list, list->count, sizeof(struct sort_list_item *),
+ (int(*)(const void *, const void *)) list_coll);
+
+#ifndef __APPLE__
+ sem_post(&mtsem);
+#else
+ semaphore_signal(mtsem);
+#endif
+
+ return (arg);
+}
+
+/*
+ * Compare sub-lists. Empty sub-lists always go to the end of the list.
+ */
+static int
+sub_list_cmp(struct sort_list *l1, struct sort_list *l2)
+{
+
+ if (l1 == l2)
+ return (0);
+ else {
+ if (l1->count == 0) {
+ return ((l2->count == 0) ? 0 : +1);
+ } else if (l2->count == 0) {
+ return (-1);
+ } else {
+ int ret;
+
+ ret = list_coll(&(l1->list[0]), &(l2->list[0]));
+ if (!ret)
+ return ((l1->sub_list_pos < l2->sub_list_pos) ?
+ -1 : +1);
+ return (ret);
+ }
+ }
+}
+
+/*
+ * Swap two array elements
+ */
+static void
+sub_list_swap(struct sort_list **sl, size_t i1, size_t i2)
+{
+ struct sort_list *tmp;
+
+ tmp = sl[i1];
+ sl[i1] = sl[i2];
+ sl[i2] = tmp;
+}
+
+/* heap algorithm ==>> */
+
+/*
+ * See heap sort algorithm
+ * "Raises" last element to its right place
+ */
+static void
+sub_list_swim(struct sort_list **sl, size_t indx)
+{
+
+ if (indx > 0) {
+ size_t parent_index;
+
+ parent_index = (indx - 1) >> 1;
+
+ if (sub_list_cmp(sl[indx], sl[parent_index]) < 0) {
+ /* swap child and parent and continue */
+ sub_list_swap(sl, indx, parent_index);
+ sub_list_swim(sl, parent_index);
+ }
+ }
+}
+
+/*
+ * Sink the top element to its correct position
+ */
+static void
+sub_list_sink(struct sort_list **sl, size_t indx, size_t size)
+{
+ size_t left_child_index;
+ size_t right_child_index;
+
+ left_child_index = indx + indx + 1;
+ right_child_index = left_child_index + 1;
+
+ if (left_child_index < size) {
+ size_t min_child_index;
+
+ min_child_index = left_child_index;
+
+ if ((right_child_index < size) &&
+ (sub_list_cmp(sl[left_child_index],
+ sl[right_child_index]) > 0))
+ min_child_index = right_child_index;
+ if (sub_list_cmp(sl[indx], sl[min_child_index]) > 0) {
+ sub_list_swap(sl, indx, min_child_index);
+ sub_list_sink(sl, min_child_index, size);
+ }
+ }
+}
+
+/* <<== heap algorithm */
+
+/*
+ * Adds element to the "right" end
+ */
+static void
+sub_list_push(struct sort_list *s, struct sort_list **sl, size_t size)
+{
+
+ sl[size++] = s;
+ sub_list_swim(sl, size - 1);
+}
+
+struct last_printed_item
+{
+ struct sort_list_item *item;
+};
+
+/*
+ * Prints the current line of the file
+ */
+static void
+sub_list_header_print(struct sort_list *sl, FILE *f_out,
+ struct last_printed_item *lp)
+{
+
+ if (sl && sl->count && f_out && sl->list[0]->str) {
+ if (sort_opts_vals.uflag) {
+ if ((lp->item == NULL) || (list_coll(&(lp->item),
+ &(sl->list[0])))) {
+ bwsfwrite(sl->list[0]->str, f_out,
+ sort_opts_vals.zflag);
+ lp->item = sl->list[0];
+ }
+ } else
+ bwsfwrite(sl->list[0]->str, f_out,
+ sort_opts_vals.zflag);
+ }
+}
+
+/*
+ * Read next line
+ */
+static void
+sub_list_next(struct sort_list *sl)
+{
+
+ if (sl && sl->count) {
+ sl->list += 1;
+ sl->count -= 1;
+ }
+}
+
+/*
+ * Merge sub-lists to a file
+ */
+static void
+merge_sub_lists(struct sort_list **sl, size_t n, FILE* f_out)
+{
+ struct last_printed_item lp;
+ size_t i;
+
+ memset(&lp,0,sizeof(lp));
+
+ /* construct the initial list: */
+ for (i = 0; i < n; i++)
+ sub_list_push(sl[i], sl, i);
+
+ while (sl[0]->count) { /* unfinished lists are always in front */
+ /* output the smallest line: */
+ sub_list_header_print(sl[0], f_out, &lp);
+ /* move to a new line, if possible: */
+ sub_list_next(sl[0]);
+ /* re-arrange the list: */
+ sub_list_sink(sl, 0, n);
+ }
+}
+
+/*
+ * Merge sub-lists to a file
+ */
+static void
+merge_list_parts(struct sort_list **parts, size_t n, const char *fn)
+{
+ FILE* f_out;
+
+ f_out = openfile(fn,"w");
+
+ merge_sub_lists(parts, n, f_out);
+
+ closefile(f_out, fn);
+}
+
+#endif /* defined(SORT_THREADS) */
+/*
+ * Multi-threaded sort algorithm "driver"
+ */
+static void
+mt_sort(struct sort_list *list,
+ int(*sort_func)(void *, size_t, size_t, int(*)(const void *, const void *)),
+ const char* fn)
+{
+#if defined(SORT_THREADS)
+ if (nthreads < 2 || list->count < MT_SORT_THRESHOLD) {
+ size_t nthreads_save = nthreads;
+ nthreads = 1;
+#endif
+ /* if single thread or small data, do simple sort */
+ sort_func(list->list, list->count,
+ sizeof(struct sort_list_item *),
+ (int(*)(const void *, const void *)) list_coll);
+ sort_list_dump(list, fn);
+#if defined(SORT_THREADS)
+ nthreads = nthreads_save;
+ } else {
+ /* multi-threaded sort */
+ struct sort_list **parts;
+ size_t avgsize, cstart, i;
+
+ /* array of sub-lists */
+ parts = sort_malloc(sizeof(struct sort_list*) * nthreads);
+ cstart = 0;
+ avgsize = list->count / nthreads;
+
+ /* set global system sort function */
+ g_sort_func = sort_func;
+
+ /* set sublists */
+ for (i = 0; i < nthreads; ++i) {
+ size_t sz = 0;
+
+ parts[i] = sort_malloc(sizeof(struct sort_list));
+ parts[i]->list = list->list + cstart;
+ parts[i]->memsize = 0;
+ parts[i]->sub_list_pos = i;
+
+ sz = (i == nthreads - 1) ? list->count - cstart :
+ avgsize;
+
+ parts[i]->count = sz;
+
+ parts[i]->size = parts[i]->count;
+
+ cstart += sz;
+ }
+
+ /* init threads counting semaphore */
+#ifndef __APPLE__
+ sem_init(&mtsem, 0, 0);
+#else
+ {
+ mach_port_t self = mach_task_self();
+ kern_return_t ret = semaphore_create(self, &mtsem, SYNC_POLICY_FIFO, 0);
+ if (ret != KERN_SUCCESS) {
+ err(2,NULL);
+ }
+ }
+#endif
+
+ /* start threads */
+ for (i = 0; i < nthreads; ++i) {
+ pthread_t pth;
+ pthread_attr_t attr;
+
+ pthread_attr_init(&attr);
+#ifndef __APPLE__
+ pthread_attr_setdetachstate(&attr, PTHREAD_DETACHED);
+#else
+ pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+#endif
+
+ for (;;) {
+ int res = pthread_create(&pth, &attr,
+ mt_sort_thread, parts[i]);
+
+ if (res >= 0)
+ break;
+ if (errno == EAGAIN) {
+#ifndef __APPLE__
+ pthread_yield();
+#else
+ sched_yield();
+#endif
+ continue;
+ }
+ err(2, NULL);
+ }
+
+ pthread_attr_destroy(&attr);
+ }
+
+ /* wait for threads completion */
+ for (i = 0; i < nthreads; ++i) {
+#ifndef __APPLE__
+ sem_wait(&mtsem);
+#else
+ semaphore_wait(mtsem);
+#endif
+ }
+ /* destroy the semaphore - we do not need it anymore */
+#ifndef __APPLE__
+ sem_destroy(&mtsem);
+#else
+ {
+ mach_port_t self = mach_task_self();
+ semaphore_destroy(self,mtsem);
+ }
+#endif
+
+ /* merge sorted sub-lists to the file */
+ merge_list_parts(parts, nthreads, fn);
+
+ /* free sub-lists data */
+ for (i = 0; i < nthreads; ++i) {
+ sort_free(parts[i]);
+ }
+ sort_free(parts);
+ }
+#endif /* defined(SORT_THREADS) */
+}
diff --git a/text_cmds/sort/file.h b/text_cmds/sort/file.h
new file mode 100644
index 0000000..e7a8c94
--- /dev/null
+++ b/text_cmds/sort/file.h
@@ -0,0 +1,126 @@
+/* $FreeBSD$ */
+
+/*-
+ * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
+ * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if !defined(__SORT_FILE_H__)
+#define __SORT_FILE_H__
+
+#include "coll.h"
+#include "sort.h"
+
+#define SORT_DEFAULT 0
+#define SORT_QSORT 1
+#define SORT_MERGESORT 2
+#define SORT_HEAPSORT 3
+#define SORT_RADIXSORT 4
+
+#define DEFAULT_SORT_ALGORITHM SORT_MERGESORT
+#define DEFAULT_SORT_FUNC mergesort
+
+/*
+ * List of data to be sorted.
+ */
+struct sort_list
+{
+ struct sort_list_item **list;
+ unsigned long long memsize;
+ size_t count;
+ size_t size;
+ size_t sub_list_pos;
+};
+
+/*
+ * File reader object
+ */
+struct file_reader;
+
+/*
+ * List of files to be sorted
+ */
+struct file_list
+{
+ char **fns;
+ size_t count;
+ size_t sz;
+ bool tmp;
+};
+
+/* memory */
+
+/**/
+
+extern unsigned long long free_memory;
+extern unsigned long long available_free_memory;
+
+/* Are we using mmap ? */
+extern bool use_mmap;
+
+/* temporary file dir */
+
+extern const char *tmpdir;
+
+/*
+ * Max number of simultaneously open files (including the output file).
+ */
+extern size_t max_open_files;
+
+/*
+ * Compress program
+ */
+extern const char* compress_program;
+
+/* funcs */
+
+struct file_reader *file_reader_init(const char *fsrc);
+struct bwstring *file_reader_readline(struct file_reader *fr);
+void file_reader_free(struct file_reader *fr);
+
+void init_tmp_files(void);
+void clear_tmp_files(void);
+char *new_tmp_file_name(void);
+void tmp_file_atexit(const char *tmp_file);
+
+void file_list_init(struct file_list *fl, bool tmp);
+void file_list_add(struct file_list *fl, char *fn, bool allocate);
+void file_list_populate(struct file_list *fl, int argc, char **argv, bool allocate);
+void file_list_clean(struct file_list *fl);
+
+int check(const char *);
+void merge_files(struct file_list *fl, const char *fn_out);
+FILE *openfile(const char *, const char *);
+void closefile(FILE *, const char *);
+int procfile(const char *fn, struct sort_list *list, struct file_list *fl);
+
+void sort_list_init(struct sort_list *l);
+void sort_list_add(struct sort_list *l, struct bwstring *str);
+void sort_list_clean(struct sort_list *l);
+void sort_list_dump(struct sort_list *l, const char *fn);
+
+void sort_list_to_file(struct sort_list *list, const char *outfile);
+
+#endif /* __SORT_FILE_H__ */
diff --git a/text_cmds/sort/mem.c b/text_cmds/sort/mem.c
new file mode 100644
index 0000000..f59f751
--- /dev/null
+++ b/text_cmds/sort/mem.c
@@ -0,0 +1,81 @@
+/*-
+ * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
+ * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/usr.bin/sort/mem.c 281132 2015-04-06 02:35:55Z pfg $");
+
+#include <err.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "mem.h"
+
+/*
+ * malloc() wrapper.
+ */
+void *
+sort_malloc(size_t size)
+{
+ void *ptr;
+
+ if ((ptr = malloc(size)) == NULL)
+ err(2, NULL);
+ return (ptr);
+}
+
+/*
+ * free() wrapper.
+ */
+void
+sort_free(const void *ptr)
+{
+
+ if (ptr)
+ free(__DECONST(void *, ptr));
+}
+
+/*
+ * realloc() wrapper.
+ */
+void *
+sort_realloc(void *ptr, size_t size)
+{
+
+ if ((ptr = realloc(ptr, size)) == NULL)
+ err(2, NULL);
+ return (ptr);
+}
+
+char *
+sort_strdup(const char *str)
+{
+ char *dup;
+
+ if ((dup = strdup(str)) == NULL)
+ err(2, NULL);
+ return (dup);
+}
diff --git a/text_cmds/sort/mem.h b/text_cmds/sort/mem.h
new file mode 100644
index 0000000..95ab1ac
--- /dev/null
+++ b/text_cmds/sort/mem.h
@@ -0,0 +1,45 @@
+/* $FreeBSD: head/usr.bin/sort/mem.h 264744 2014-04-21 22:52:18Z pfg $ */
+
+/*-
+ * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
+ * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if !defined(__SORT_MEM_H__)
+#define __SORT_MEM_H__
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+/*
+ * mem.c
+ */
+void *sort_malloc(size_t);
+void sort_free(const void *ptr);
+void *sort_realloc(void *, size_t);
+char *sort_strdup(const char *);
+
+#endif /* __SORT_MEM_H__ */
diff --git a/text_cmds/sort/nls/C.msg b/text_cmds/sort/nls/C.msg
new file mode 100644
index 0000000..81300ba
--- /dev/null
+++ b/text_cmds/sort/nls/C.msg
@@ -0,0 +1,16 @@
+$ $FreeBSD: head/usr.bin/sort/nls/C.msg 235434 2012-05-14 09:55:23Z gabor $
+$
+$set 1
+$quote "
+1 "mutually exclusive flags"
+2 "extra argument not allowed with -c"
+3 "Unknown feature"
+4 "Wrong memory buffer specification"
+5 "0 field in key specs"
+6 "0 column in key specs"
+7 "Wrong file mode"
+8 "Cannot open file for reading"
+9 "Radix sort cannot be used with these sort options"
+10 "The chosen sort method cannot be used with stable and/or unique sort"
+11 "Invalid key position"
+12 "Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] [+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] [-o outfile] [--batch-size size] [--files0-from file] [--heapsort] [--mergesort] [--radixsort] [--qsort] [--nthreads thread_no] [--human-numeric-sort] [--version-sort] [--random-sort [--random-source file]] [--compress-program program] [file ...]\n"
diff --git a/text_cmds/sort/nls/hu_HU.ISO8859-2.msg b/text_cmds/sort/nls/hu_HU.ISO8859-2.msg
new file mode 100644
index 0000000..c45862b
--- /dev/null
+++ b/text_cmds/sort/nls/hu_HU.ISO8859-2.msg
@@ -0,0 +1,16 @@
+$ $FreeBSD: head/usr.bin/sort/nls/hu_HU.ISO8859-2.msg 235434 2012-05-14 09:55:23Z gabor $
+$
+$set 1
+$quote "
+1 "egymást kizáró opciók"
+2 "extra argumentum a -%c opcióval"
+3 "Ismeretlen funkció\n"
+4 "Rossz memória puffer érték"
+5 "0 mezõ a kulcsspecifikációban\n"
+6 "0 oszlop a kulcsspecifikációban\n"
+7 "Helytelen fájl mód"
+8 "A fájl nem nyitható meg olvasásra"
+9 "A radix rendezés nem használható a megadott rendezési opciókkal"
+10 "A választott rendezési mód nem használható a --stable és --unique opciókkal"
+11 "Érvénytelen kulcs pozíció"
+12 "Használat: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] [+POS1 [-POS2]] [-S memóriaméret] [-T ideiglenes_könyvtár] [-t elválasztó] [-o kimeneti_fájl] [--batch-size méret] [--files0-from fájl] [--heapsort] [--mergesort] [--radixsort] [--qsort] [--nthreads szálak_száma] [--human-numeric-sort] [--version-sort] [--random-sort [--random-source fájl]] [--compress-program program] [fájl ...]\n"
diff --git a/text_cmds/sort/radixsort.c b/text_cmds/sort/radixsort.c
new file mode 100644
index 0000000..8288a19
--- /dev/null
+++ b/text_cmds/sort/radixsort.c
@@ -0,0 +1,746 @@
+/*-
+ * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
+ * Copyright (C) 2012 Gabor Kovesdan <gabor@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <errno.h>
+#include <err.h>
+#include <langinfo.h>
+#include <math.h>
+#if defined(SORT_THREADS)
+#include <pthread.h>
+#ifndef __APPLE__
+#include <semaphore.h>
+#else
+#include <mach/mach_init.h>
+#include <mach/mach_error.h>
+#include <mach/semaphore.h>
+#include <mach/task.h>
+#endif
+#endif
+#include <stdlib.h>
+#include <string.h>
+#include <wchar.h>
+#include <wctype.h>
+#include <unistd.h>
+
+#include "coll.h"
+#include "radixsort.h"
+
+#define DEFAULT_SORT_FUNC_RADIXSORT mergesort
+
+#define TINY_NODE(sl) ((sl)->tosort_num < 65)
+#define SMALL_NODE(sl) ((sl)->tosort_num < 5)
+
+/* are we sorting in reverse order ? */
+static bool reverse_sort;
+
+/* sort sub-levels array size */
+static const size_t slsz = 256 * sizeof(struct sort_level*);
+
+/* one sort level structure */
+struct sort_level
+{
+ struct sort_level **sublevels;
+ struct sort_list_item **leaves;
+ struct sort_list_item **sorted;
+ struct sort_list_item **tosort;
+ size_t leaves_num;
+ size_t leaves_sz;
+ size_t level;
+ size_t real_sln;
+ size_t start_position;
+ size_t sln;
+ size_t tosort_num;
+ size_t tosort_sz;
+};
+
+/* stack of sort levels ready to be sorted */
+struct level_stack {
+ struct level_stack *next;
+ struct sort_level *sl;
+};
+
+static struct level_stack *g_ls;
+
+#if defined(SORT_THREADS)
+/* stack guarding mutex */
+static pthread_cond_t g_ls_cond;
+static pthread_mutex_t g_ls_mutex;
+
+/* counter: how many items are left */
+static size_t sort_left;
+/* guarding mutex */
+
+/* semaphore to count threads */
+#ifndef __APPLE__
+static sem_t mtsem;
+#else
+semaphore_t rmtsem;
+#endif
+
+/*
+ * Decrement items counter
+ */
+static inline void
+sort_left_dec(size_t n)
+{
+ pthread_mutex_lock(&g_ls_mutex);
+ sort_left -= n;
+ if (sort_left == 0 && nthreads > 1)
+ pthread_cond_broadcast(&g_ls_cond);
+ pthread_mutex_unlock(&g_ls_mutex);
+}
+
+/*
+ * Do we have something to sort ?
+ *
+ * This routine does not need to be locked.
+ */
+static inline bool
+have_sort_left(void)
+{
+ bool ret;
+
+ ret = (sort_left > 0);
+
+ return (ret);
+}
+
+#else
+
+#define sort_left_dec(n)
+
+#endif /* SORT_THREADS */
+
+/*
+ * Push sort level to the stack
+ */
+static inline void
+push_ls(struct sort_level *sl)
+{
+ struct level_stack *new_ls;
+
+ new_ls = sort_malloc(sizeof(struct level_stack));
+ new_ls->sl = sl;
+
+#if defined(SORT_THREADS)
+ if (nthreads > 1)
+ pthread_mutex_lock(&g_ls_mutex);
+#endif
+
+ new_ls->next = g_ls;
+ g_ls = new_ls;
+
+#if defined(SORT_THREADS)
+ if (nthreads > 1)
+ pthread_cond_signal(&g_ls_cond);
+#endif
+
+#if defined(SORT_THREADS)
+ if (nthreads > 1)
+ pthread_mutex_unlock(&g_ls_mutex);
+#endif
+}
+
+/*
+ * Pop sort level from the stack (single-threaded style)
+ */
+static inline struct sort_level*
+pop_ls_st(void)
+{
+ struct sort_level *sl;
+
+ if (g_ls) {
+ struct level_stack *saved_ls;
+
+ sl = g_ls->sl;
+ saved_ls = g_ls;
+ g_ls = g_ls->next;
+ sort_free(saved_ls);
+ } else
+ sl = NULL;
+
+ return (sl);
+}
+
+#if defined(SORT_THREADS)
+
+/*
+ * Pop sort level from the stack (multi-threaded style)
+ */
+static inline struct sort_level*
+pop_ls_mt(void)
+{
+ struct level_stack *saved_ls;
+ struct sort_level *sl;
+
+ pthread_mutex_lock(&g_ls_mutex);
+
+ for (;;) {
+ if (g_ls) {
+ sl = g_ls->sl;
+ saved_ls = g_ls;
+ g_ls = g_ls->next;
+ break;
+ }
+ sl = NULL;
+ saved_ls = NULL;
+
+ if (have_sort_left() == 0)
+ break;
+ pthread_cond_wait(&g_ls_cond, &g_ls_mutex);
+ }
+
+ pthread_mutex_unlock(&g_ls_mutex);
+
+ sort_free(saved_ls);
+
+ return (sl);
+}
+
+#endif /* defined(SORT_THREADS) */
+
+static void
+add_to_sublevel(struct sort_level *sl, struct sort_list_item *item, size_t indx)
+{
+ struct sort_level *ssl;
+
+ ssl = sl->sublevels[indx];
+
+ if (ssl == NULL) {
+ ssl = sort_malloc(sizeof(struct sort_level));
+ memset(ssl, 0, sizeof(struct sort_level));
+
+ ssl->level = sl->level + 1;
+ sl->sublevels[indx] = ssl;
+
+ ++(sl->real_sln);
+ }
+
+ if (++(ssl->tosort_num) > ssl->tosort_sz) {
+ ssl->tosort_sz = ssl->tosort_num + 128;
+ ssl->tosort = sort_realloc(ssl->tosort,
+ sizeof(struct sort_list_item*) * (ssl->tosort_sz));
+ }
+
+ ssl->tosort[ssl->tosort_num - 1] = item;
+}
+
+static inline void
+add_leaf(struct sort_level *sl, struct sort_list_item *item)
+{
+
+ if (++(sl->leaves_num) > sl->leaves_sz) {
+ sl->leaves_sz = sl->leaves_num + 128;
+ sl->leaves = sort_realloc(sl->leaves,
+ (sizeof(struct sort_list_item*) * (sl->leaves_sz)));
+ }
+ sl->leaves[sl->leaves_num - 1] = item;
+}
+
+static inline int
+get_wc_index(struct sort_list_item *sli, size_t level)
+{
+ const struct key_value *kv;
+ const struct bwstring *bws;
+
+ kv = get_key_from_keys_array(&sli->ka, 0);
+ bws = kv->k;
+
+ if ((BWSLEN(bws) > level))
+ return (unsigned char) BWS_GET(bws,level);
+ return (-1);
+}
+
+static void
+place_item(struct sort_level *sl, size_t item)
+{
+ struct sort_list_item *sli;
+ int c;
+
+ sli = sl->tosort[item];
+ c = get_wc_index(sli, sl->level);
+
+ if (c == -1)
+ add_leaf(sl, sli);
+ else
+ add_to_sublevel(sl, sli, c);
+}
+
+static void
+free_sort_level(struct sort_level *sl)
+{
+
+ if (sl) {
+ if (sl->leaves)
+ sort_free(sl->leaves);
+
+ if (sl->level > 0)
+ sort_free(sl->tosort);
+
+ if (sl->sublevels) {
+ struct sort_level *slc;
+ size_t sln;
+
+ sln = sl->sln;
+
+ for (size_t i = 0; i < sln; ++i) {
+ slc = sl->sublevels[i];
+ if (slc)
+ free_sort_level(slc);
+ }
+
+ sort_free(sl->sublevels);
+ }
+
+ sort_free(sl);
+ }
+}
+
+static void
+run_sort_level_next(struct sort_level *sl)
+{
+ struct sort_level *slc;
+ size_t i, sln, tosort_num;
+
+ if (sl->sublevels) {
+ sort_free(sl->sublevels);
+ sl->sublevels = NULL;
+ }
+
+ switch (sl->tosort_num) {
+ case 0:
+ goto end;
+ case (1):
+ sl->sorted[sl->start_position] = sl->tosort[0];
+ sort_left_dec(1);
+ goto end;
+ case (2):
+ if (list_coll_offset(&(sl->tosort[0]), &(sl->tosort[1]),
+ sl->level) > 0) {
+ sl->sorted[sl->start_position++] = sl->tosort[1];
+ sl->sorted[sl->start_position] = sl->tosort[0];
+ } else {
+ sl->sorted[sl->start_position++] = sl->tosort[0];
+ sl->sorted[sl->start_position] = sl->tosort[1];
+ }
+ sort_left_dec(2);
+
+ goto end;
+ default:
+ if (TINY_NODE(sl) || (sl->level > 15)) {
+ listcoll_t func;
+
+ func = get_list_call_func(sl->level);
+
+ sl->leaves = sl->tosort;
+ sl->leaves_num = sl->tosort_num;
+ sl->leaves_sz = sl->leaves_num;
+ sl->leaves = sort_realloc(sl->leaves,
+ (sizeof(struct sort_list_item *) *
+ (sl->leaves_sz)));
+ sl->tosort = NULL;
+ sl->tosort_num = 0;
+ sl->tosort_sz = 0;
+ sl->sln = 0;
+ sl->real_sln = 0;
+ if (sort_opts_vals.sflag) {
+ if (mergesort(sl->leaves, sl->leaves_num,
+ sizeof(struct sort_list_item *),
+ (int(*)(const void *, const void *)) func) == -1)
+ /* NOTREACHED */
+ err(2, "Radix sort error 3");
+ } else
+ DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num,
+ sizeof(struct sort_list_item *),
+ (int(*)(const void *, const void *)) func);
+
+ memcpy(sl->sorted + sl->start_position,
+ sl->leaves, sl->leaves_num *
+ sizeof(struct sort_list_item*));
+
+ sort_left_dec(sl->leaves_num);
+
+ goto end;
+ } else {
+ sl->tosort_sz = sl->tosort_num;
+ sl->tosort = sort_realloc(sl->tosort,
+ sizeof(struct sort_list_item*) * (sl->tosort_sz));
+ }
+ }
+
+ sl->sln = 256;
+ sl->sublevels = sort_malloc(slsz);
+ memset(sl->sublevels, 0, slsz);
+
+ sl->real_sln = 0;
+
+ tosort_num = sl->tosort_num;
+ for (i = 0; i < tosort_num; ++i)
+ place_item(sl, i);
+
+ sort_free(sl->tosort);
+ sl->tosort = NULL;
+ sl->tosort_num = 0;
+ sl->tosort_sz = 0;
+
+ if (sl->leaves_num > 1) {
+ if (keys_num > 1) {
+ if (sort_opts_vals.sflag) {
+ mergesort(sl->leaves, sl->leaves_num,
+ sizeof(struct sort_list_item *),
+ (int(*)(const void *, const void *)) list_coll);
+ } else {
+ DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num,
+ sizeof(struct sort_list_item *),
+ (int(*)(const void *, const void *)) list_coll);
+ }
+ } else if (!sort_opts_vals.sflag && sort_opts_vals.complex_sort) {
+ DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num,
+ sizeof(struct sort_list_item *),
+ (int(*)(const void *, const void *)) list_coll_by_str_only);
+ }
+ }
+
+ sl->leaves_sz = sl->leaves_num;
+ sl->leaves = sort_realloc(sl->leaves, (sizeof(struct sort_list_item *) *
+ (sl->leaves_sz)));
+
+ if (!reverse_sort) {
+ memcpy(sl->sorted + sl->start_position, sl->leaves,
+ sl->leaves_num * sizeof(struct sort_list_item*));
+ sl->start_position += sl->leaves_num;
+ sort_left_dec(sl->leaves_num);
+
+ sort_free(sl->leaves);
+ sl->leaves = NULL;
+ sl->leaves_num = 0;
+ sl->leaves_sz = 0;
+
+ sln = sl->sln;
+
+ for (i = 0; i < sln; ++i) {
+ slc = sl->sublevels[i];
+
+ if (slc) {
+ slc->sorted = sl->sorted;
+ slc->start_position = sl->start_position;
+ sl->start_position += slc->tosort_num;
+ if (SMALL_NODE(slc))
+ run_sort_level_next(slc);
+ else
+ push_ls(slc);
+ sl->sublevels[i] = NULL;
+ }
+ }
+
+ } else {
+ size_t n;
+
+ sln = sl->sln;
+
+ for (i = 0; i < sln; ++i) {
+ n = sln - i - 1;
+ slc = sl->sublevels[n];
+
+ if (slc) {
+ slc->sorted = sl->sorted;
+ slc->start_position = sl->start_position;
+ sl->start_position += slc->tosort_num;
+ if (SMALL_NODE(slc))
+ run_sort_level_next(slc);
+ else
+ push_ls(slc);
+ sl->sublevels[n] = NULL;
+ }
+ }
+
+ memcpy(sl->sorted + sl->start_position, sl->leaves,
+ sl->leaves_num * sizeof(struct sort_list_item*));
+ sort_left_dec(sl->leaves_num);
+ }
+
+end:
+ free_sort_level(sl);
+}
+
+/*
+ * Single-threaded sort cycle
+ */
+static void
+run_sort_cycle_st(void)
+{
+ struct sort_level *slc;
+
+ for (;;) {
+ slc = pop_ls_st();
+ if (slc == NULL) {
+ break;
+ }
+ run_sort_level_next(slc);
+ }
+}
+
+#if defined(SORT_THREADS)
+
+/*
+ * Multi-threaded sort cycle
+ */
+static void
+run_sort_cycle_mt(void)
+{
+ struct sort_level *slc;
+
+ for (;;) {
+ slc = pop_ls_mt();
+ if (slc == NULL)
+ break;
+ run_sort_level_next(slc);
+ }
+}
+
+/*
+ * Sort cycle thread (in multi-threaded mode)
+ */
+static void*
+sort_thread(void* arg)
+{
+
+ run_sort_cycle_mt();
+
+#ifndef __APPLE__
+ sem_post(&mtsem);
+#else
+ semaphore_signal(rmtsem);
+#endif
+
+ return (arg);
+}
+
+#endif /* defined(SORT_THREADS) */
+
+static void
+run_top_sort_level(struct sort_level *sl)
+{
+ struct sort_level *slc;
+
+ reverse_sort = sort_opts_vals.kflag ? keys[0].sm.rflag :
+ default_sort_mods->rflag;
+
+ sl->start_position = 0;
+ sl->sln = 256;
+ sl->sublevels = sort_malloc(slsz);
+ memset(sl->sublevels, 0, slsz);
+
+ for (size_t i = 0; i < sl->tosort_num; ++i)
+ place_item(sl, i);
+
+ if (sl->leaves_num > 1) {
+ if (keys_num > 1) {
+ if (sort_opts_vals.sflag) {
+ mergesort(sl->leaves, sl->leaves_num,
+ sizeof(struct sort_list_item *),
+ (int(*)(const void *, const void *)) list_coll);
+ } else {
+ DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num,
+ sizeof(struct sort_list_item *),
+ (int(*)(const void *, const void *)) list_coll);
+ }
+ } else if (!sort_opts_vals.sflag && sort_opts_vals.complex_sort) {
+ DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num,
+ sizeof(struct sort_list_item *),
+ (int(*)(const void *, const void *)) list_coll_by_str_only);
+ }
+ }
+
+ if (!reverse_sort) {
+ memcpy(sl->tosort + sl->start_position, sl->leaves,
+ sl->leaves_num * sizeof(struct sort_list_item*));
+ sl->start_position += sl->leaves_num;
+ sort_left_dec(sl->leaves_num);
+
+ for (size_t i = 0; i < sl->sln; ++i) {
+ slc = sl->sublevels[i];
+
+ if (slc) {
+ slc->sorted = sl->tosort;
+ slc->start_position = sl->start_position;
+ sl->start_position += slc->tosort_num;
+ push_ls(slc);
+ sl->sublevels[i] = NULL;
+ }
+ }
+
+ } else {
+ size_t n;
+
+ for (size_t i = 0; i < sl->sln; ++i) {
+
+ n = sl->sln - i - 1;
+ slc = sl->sublevels[n];
+
+ if (slc) {
+ slc->sorted = sl->tosort;
+ slc->start_position = sl->start_position;
+ sl->start_position += slc->tosort_num;
+ push_ls(slc);
+ sl->sublevels[n] = NULL;
+ }
+ }
+
+ memcpy(sl->tosort + sl->start_position, sl->leaves,
+ sl->leaves_num * sizeof(struct sort_list_item*));
+
+ sort_left_dec(sl->leaves_num);
+ }
+
+#if defined(SORT_THREADS)
+ if (nthreads < 2) {
+#endif
+ run_sort_cycle_st();
+#if defined(SORT_THREADS)
+ } else {
+ size_t i;
+
+ for(i = 0; i < nthreads; ++i) {
+ pthread_attr_t attr;
+ pthread_t pth;
+
+ pthread_attr_init(&attr);
+#ifndef __APPLE__
+ pthread_attr_setdetachstate(&attr,
+ PTHREAD_DETACHED);
+#else
+ pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+#endif
+
+ for (;;) {
+ int res = pthread_create(&pth, &attr,
+ sort_thread, NULL);
+ if (res >= 0)
+ break;
+ if (errno == EAGAIN) {
+#ifndef __APPLE__
+ pthread_yield();
+#else
+ sched_yield();
+#endif
+ continue;
+ }
+ err(2, NULL);
+ }
+
+ pthread_attr_destroy(&attr);
+ }
+
+ for(i = 0; i < nthreads; ++i)
+#ifndef __APPLE__
+ sem_wait(&mtsem);
+#else
+ semaphore_wait(rmtsem);
+#endif
+ }
+#endif /* defined(SORT_THREADS) */
+}
+
+static void
+run_sort(struct sort_list_item **base, size_t nmemb)
+{
+ struct sort_level *sl;
+
+#if defined(SORT_THREADS)
+ size_t nthreads_save = nthreads;
+ if (nmemb < MT_SORT_THRESHOLD)
+ nthreads = 1;
+
+ if (nthreads > 1) {
+ pthread_mutexattr_t mattr;
+
+ pthread_mutexattr_init(&mattr);
+#ifndef __APPLE__
+ pthread_mutexattr_settype(&mattr, PTHREAD_MUTEX_ADAPTIVE_NP);
+#endif
+
+ pthread_mutex_init(&g_ls_mutex, &mattr);
+ pthread_cond_init(&g_ls_cond, NULL);
+
+ pthread_mutexattr_destroy(&mattr);
+
+#ifndef __APPLE__
+ sem_init(&mtsem, 0, 0);
+#else
+ {
+ mach_port_t self = mach_task_self();
+ kern_return_t ret = semaphore_create(self, &rmtsem, SYNC_POLICY_FIFO, 0);
+ if (ret != KERN_SUCCESS) {
+ err(2,NULL);
+ }
+ }
+#endif
+
+ }
+#endif
+
+ sl = sort_malloc(sizeof(struct sort_level));
+ memset(sl, 0, sizeof(struct sort_level));
+
+ sl->tosort = base;
+ sl->tosort_num = nmemb;
+ sl->tosort_sz = nmemb;
+
+#if defined(SORT_THREADS)
+ sort_left = nmemb;
+#endif
+
+ run_top_sort_level(sl);
+
+ free_sort_level(sl);
+
+#if defined(SORT_THREADS)
+ if (nthreads > 1) {
+#ifndef __APPLE__
+ sem_destroy(&mtsem);
+#else
+ {
+ mach_port_t self = mach_task_self();
+ semaphore_destroy(self,rmtsem);
+ }
+#endif
+ pthread_mutex_destroy(&g_ls_mutex);
+ }
+ nthreads = nthreads_save;
+#endif
+}
+
+void
+rxsort(struct sort_list_item **base, size_t nmemb)
+{
+
+ run_sort(base, nmemb);
+}
diff --git a/text_cmds/sort/radixsort.h b/text_cmds/sort/radixsort.h
new file mode 100644
index 0000000..8743728
--- /dev/null
+++ b/text_cmds/sort/radixsort.h
@@ -0,0 +1,38 @@
+/* $FreeBSD$ */
+
+/*-
+ * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
+ * Copyright (C) 2012 Gabor Kovesdan <gabor@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if !defined(__SORT_RADIX_H__)
+#define __SORT_RADIX_H__
+
+#include "coll.h"
+#include "sort.h"
+
+void rxsort(struct sort_list_item **base, size_t nmemb);
+
+#endif /* __SORT_RADIX_H__ */
diff --git a/text_cmds/sort/sort.1.in b/text_cmds/sort/sort.1.in
new file mode 100644
index 0000000..a2234e8
--- /dev/null
+++ b/text_cmds/sort/sort.1.in
@@ -0,0 +1,639 @@
+.\" $OpenBSD: sort.1,v 1.45 2015/03/19 13:51:10 jmc Exp $
+.\" $FreeBSD: head/usr.bin/sort/sort.1.in 305613 2016-09-08 14:50:23Z gabor $
+.\"
+.\" Copyright (c) 1991, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" This code is derived from software contributed to Berkeley by
+.\" the Institute of Electrical and Electronics Engineers, Inc.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)sort.1 8.1 (Berkeley) 6/6/93
+.\"
+.Dd March 19, 2015
+.Dt SORT 1
+.Os
+.Sh NAME
+.Nm sort
+.Nd sort or merge records (lines) of text and binary files
+.Sh SYNOPSIS
+.Nm
+.Bk -words
+.Op Fl bcCdfghiRMmnrsuVz
+.Sm off
+.Op Fl k\ \& Ar field1 Op , Ar field2
+.Sm on
+.Op Fl S Ar memsize
+.Ek
+.Op Fl T Ar dir
+.Op Fl t Ar char
+.Op Fl o Ar output
+.Op Ar file ...
+.Nm
+.Fl Fl help
+.Nm
+.Fl Fl version
+.Sh DESCRIPTION
+The
+.Nm
+utility sorts text and binary files by lines.
+A line is a record separated from the subsequent record by a
+newline (default) or NUL \'\\0\' character (-z option).
+A record can contain any printable or unprintable characters.
+Comparisons are based on one or more sort keys extracted from
+each line of input, and are performed lexicographically,
+according to the current locale's collating rules and the
+specified command-line options that can tune the actual
+sorting behavior.
+By default, if keys are not given,
+.Nm
+uses entire lines for comparison.
+.Pp
+The command line options are as follows:
+.Bl -tag -width Ds
+.It Fl c , Fl Fl check , Fl C , Fl Fl check=silent|quiet
+Check that the single input file is sorted.
+If the file is not sorted,
+.Nm
+produces the appropriate error messages and exits with code 1,
+otherwise returns 0.
+If
+.Fl C
+or
+.Fl Fl check=silent
+is specified,
+.Nm
+produces no output.
+This is a "silent" version of
+.Fl c .
+.It Fl m , Fl Fl merge
+Merge only.
+The input files are assumed to be pre-sorted.
+If they are not sorted the output order is undefined.
+.It Fl o Ar output , Fl Fl output Ns = Ns Ar output
+Print the output to the
+.Ar output
+file instead of the standard output.
+.It Fl S Ar size , Fl Fl buffer-size Ns = Ns Ar size
+Use
+.Ar size
+for the maximum size of the memory buffer.
+Size modifiers %,b,K,M,G,T,P,E,Z,Y can be used.
+If a memory limit is not explicitly specified,
+.Nm
+takes up to about 90% of available memory.
+If the file size is too big to fit into the memory buffer,
+the temporary disk files are used to perform the sorting.
+.It Fl T Ar dir , Fl Fl temporary-directory Ns = Ns Ar dir
+Store temporary files in the directory
+.Ar dir .
+The default path is the value of the environment variable
+.Ev TMPDIR
+or
+.Pa /var/tmp
+if
+.Ev TMPDIR
+is not defined.
+.It Fl u , Fl Fl unique
+Unique keys.
+Suppress all lines that have a key that is equal to an already
+processed one.
+This option, similarly to
+.Fl s ,
+implies a stable sort.
+If used with
+.Fl c
+or
+.Fl C ,
+.Nm
+also checks that there are no lines with duplicate keys.
+.It Fl s
+Stable sort.
+This option maintains the original record order of records that have
+an equal key.
+This is a non-standard feature, but it is widely accepted and used.
+.It Fl Fl version
+Print the version and silently exits.
+.It Fl Fl help
+Print the help text and silently exits.
+.El
+.Pp
+The following options override the default ordering rules.
+When ordering options appear independently of key field
+specifications, they apply globally to all sort keys.
+When attached to a specific key (see
+.Fl k ) ,
+the ordering options override all global ordering options for
+the key they are attached to.
+.Bl -tag -width indent
+.It Fl b , Fl Fl ignore-leading-blanks
+Ignore leading blank characters when comparing lines.
+.It Fl d , Fl Fl dictionary-order
+Consider only blank spaces and alphanumeric characters in comparisons.
+.It Fl f , Fl Fl ignore-case
+Convert all lowercase characters to their uppercase equivalent
+before comparison, that is, perform case-independent sorting.
+.It Fl g , Fl Fl general-numeric-sort , Fl Fl sort=general-numeric
+Sort by general numerical value.
+As opposed to
+.Fl n ,
+this option handles general floating points.
+It has a more
+permissive format than that allowed by
+.Fl n
+but it has a significant performance drawback.
+.It Fl h , Fl Fl human-numeric-sort , Fl Fl sort=human-numeric
+Sort by numerical value, but take into account the SI suffix,
+if present.
+Sort first by numeric sign (negative, zero, or
+positive); then by SI suffix (either empty, or `k' or `K', or one
+of `MGTPEZY', in that order); and finally by numeric value.
+The SI suffix must immediately follow the number.
+For example, '12345K' sorts before '1M', because M is "larger" than K.
+This sort option is useful for sorting the output of a single invocation
+of 'df' command with
+.Fl h
+or
+.Fl H
+options (human-readable).
+.It Fl i , Fl Fl ignore-nonprinting
+Ignore all non-printable characters.
+.It Fl M , Fl Fl month-sort , Fl Fl sort=month
+Sort by month abbreviations.
+Unknown strings are considered smaller than the month names.
+.It Fl n , Fl Fl numeric-sort , Fl Fl sort=numeric
+Sort fields numerically by arithmetic value.
+Fields are supposed to have optional blanks in the beginning, an
+optional minus sign, zero or more digits (including decimal point and
+possible thousand separators).
+.It Fl R , Fl Fl random-sort , Fl Fl sort=random
+Sort by a random order.
+This is a random permutation of the inputs except that
+the equal keys sort together.
+It is implemented by hashing the input keys and sorting
+the hash values.
+The hash function is chosen randomly.
+The hash function is randomized by
+.Cm /dev/random
+content, or by file content if it is specified by
+.Fl Fl random-source .
+Even if multiple sort fields are specified,
+the same random hash function is used for all of them.
+.It Fl r , Fl Fl reverse
+Sort in reverse order.
+.It Fl V , Fl Fl version-sort
+Sort version numbers.
+The input lines are treated as file names in form
+PREFIX VERSION SUFFIX, where SUFFIX matches the regular expression
+"(\.([A-Za-z~][A-Za-z0-9~]*)?)*".
+The files are compared by their prefixes and versions (leading
+zeros are ignored in version numbers, see example below).
+If an input string does not match the pattern, then it is compared
+using the byte compare function.
+All string comparisons are performed in C locale, the locale
+environment setting is ignored.
+.Bl -tag -width indent
+.It Example:
+.It $ ls sort* | sort -V
+.It sort-1.022.tgz
+.It sort-1.23.tgz
+.It sort-1.23.1.tgz
+.It sort-1.024.tgz
+.It sort-1.024.003.
+.It sort-1.024.003.tgz
+.It sort-1.024.07.tgz
+.It sort-1.024.009.tgz
+.El
+.El
+.Pp
+The treatment of field separators can be altered using these options:
+.Bl -tag -width indent
+.It Fl b , Fl Fl ignore-leading-blanks
+Ignore leading blank space when determining the start
+and end of a restricted sort key (see
+.Fl k ) .
+If
+.Fl b
+is specified before the first
+.Fl k
+option, it applies globally to all key specifications.
+Otherwise,
+.Fl b
+can be attached independently to each
+.Ar field
+argument of the key specifications.
+.Fl b .
+.It Xo
+.Fl k Ar field1 Ns Op , Ns Ar field2 ,
+.Fl Fl key Ns = Ns Ar field1 Ns Op , Ns Ar field2
+.Xc
+Define a restricted sort key that has the starting position
+.Ar field1 ,
+and optional ending position
+.Ar field2
+of a key field.
+The
+.Fl k
+option may be specified multiple times,
+in which case subsequent keys are compared when earlier keys compare equal.
+The
+.Fl k
+option replaces the obsolete options
+.Cm \(pl Ns Ar pos1
+and
+.Fl Ns Ar pos2 ,
+but the old notation is also supported.
+.It Fl t Ar char , Fl Fl field-separator Ns = Ns Ar char
+Use
+.Ar char
+as a field separator character.
+The initial
+.Ar char
+is not considered to be part of a field when determining key offsets.
+Each occurrence of
+.Ar char
+is significant (for example,
+.Dq Ar charchar
+delimits an empty field).
+If
+.Fl t
+is not specified, the default field separator is a sequence of
+blank space characters, and consecutive blank spaces do
+.Em not
+delimit an empty field, however, the initial blank space
+.Em is
+considered part of a field when determining key offsets.
+To use NUL as field separator, use
+.Fl t
+\'\\0\'.
+.It Fl z , Fl Fl zero-terminated
+Use NUL as record separator.
+By default, records in the files are supposed to be separated by
+the newline characters.
+With this option, NUL (\'\\0\') is used as a record separator character.
+.El
+.Pp
+Other options:
+.Bl -tag -width indent
+.It Fl Fl batch-size Ns = Ns Ar num
+Specify maximum number of files that can be opened by
+.Nm
+at once.
+This option affects behavior when having many input files or using
+temporary files.
+The default value is 16.
+.It Fl Fl compress-program Ns = Ns Ar PROGRAM
+Use PROGRAM to compress temporary files.
+PROGRAM must compress standard input to standard output, when called
+without arguments.
+When called with argument
+.Fl d
+it must decompress standard input to standard output.
+If PROGRAM fails,
+.Nm
+must exit with error.
+An example of PROGRAM that can be used here is bzip2.
+.It Fl Fl random-source Ns = Ns Ar filename
+In random sort, the file content is used as the source of the 'seed' data
+for the hash function choice.
+Two invocations of random sort with the same seed data will use
+the same hash function and will produce the same result if the input is
+also identical.
+By default, file
+.Cm /dev/random
+is used.
+.It Fl Fl debug
+Print some extra information about the sorting process to the
+standard output.
+%%THREADS%%.It Fl Fl parallel
+%%THREADS%%Set the maximum number of execution threads.
+%%THREADS%%Default number equals to the number of CPUs.
+.It Fl Fl files0-from Ns = Ns Ar filename
+Take the input file list from the file
+.Ar filename .
+The file names must be separated by NUL
+(like the output produced by the command "find ... -print0").
+.It Fl Fl radixsort
+Try to use radix sort, if the sort specifications allow.
+The radix sort can only be used for trivial locales (C and POSIX),
+and it cannot be used for numeric or month sort.
+Radix sort is very fast and stable.
+.It Fl Fl mergesort
+Use mergesort.
+This is a universal algorithm that can always be used,
+but it is not always the fastest.
+.It Fl Fl qsort
+Try to use quick sort, if the sort specifications allow.
+This sort algorithm cannot be used with
+.Fl u
+and
+.Fl s .
+.It Fl Fl heapsort
+Try to use heap sort, if the sort specifications allow.
+This sort algorithm cannot be used with
+.Fl u
+and
+.Fl s .
+.It Fl Fl mmap
+Try to use file memory mapping system call.
+It may increase speed in some cases.
+.El
+.Pp
+The following operands are available:
+.Bl -tag -width indent
+.It Ar file
+The pathname of a file to be sorted, merged, or checked.
+If no
+.Ar file
+operands are specified, or if a
+.Ar file
+operand is
+.Fl ,
+the standard input is used.
+.El
+.Pp
+A field is defined as a maximal sequence of characters other than the
+field separator and record separator (newline by default).
+Initial blank spaces are included in the field unless
+.Fl b
+has been specified;
+the first blank space of a sequence of blank spaces acts as the field
+separator and is included in the field (unless
+.Fl t
+is specified).
+For example, all blank spaces at the beginning of a line are
+considered to be part of the first field.
+.Pp
+Fields are specified by the
+.Sm off
+.Fl k\ \& Ar field1 Op , Ar field2
+.Sm on
+command-line option.
+If
+.Ar field2
+is missing, the end of the key defaults to the end of the line.
+.Pp
+The arguments
+.Ar field1
+and
+.Ar field2
+have the form
+.Em m.n
+.Em (m,n > 0)
+and can be followed by one or more of the modifiers
+.Cm b , d , f , i ,
+.Cm n , g , M
+and
+.Cm r ,
+which correspond to the options discussed above.
+When
+.Cm b
+is specified it applies only to
+.Ar field1
+or
+.Ar field2
+where it is specified while the rest of the modifiers
+apply to the whole key field regardless if they are
+specified only with
+.Ar field1
+or
+.Ar field2
+or both.
+A
+.Ar field1
+position specified by
+.Em m.n
+is interpreted as the
+.Em n Ns th
+character from the beginning of the
+.Em m Ns th
+field.
+A missing
+.Em \&.n
+in
+.Ar field1
+means
+.Ql \&.1 ,
+indicating the first character of the
+.Em m Ns th
+field; if the
+.Fl b
+option is in effect,
+.Em n
+is counted from the first non-blank character in the
+.Em m Ns th
+field;
+.Em m Ns \&.1b
+refers to the first non-blank character in the
+.Em m Ns th
+field.
+.No 1\&. Ns Em n
+refers to the
+.Em n Ns th
+character from the beginning of the line;
+if
+.Em n
+is greater than the length of the line, the field is taken to be empty.
+.Pp
+.Em n Ns th
+positions are always counted from the field beginning, even if the field
+is shorter than the number of specified positions.
+Thus, the key can really start from a position in a subsequent field.
+.Pp
+A
+.Ar field2
+position specified by
+.Em m.n
+is interpreted as the
+.Em n Ns th
+character (including separators) from the beginning of the
+.Em m Ns th
+field.
+A missing
+.Em \&.n
+indicates the last character of the
+.Em m Ns th
+field;
+.Em m
+= \&0
+designates the end of a line.
+Thus the option
+.Fl k Ar v.x,w.y
+is synonymous with the obsolete option
+.Cm \(pl Ns Ar v-\&1.x-\&1
+.Fl Ns Ar w-\&1.y ;
+when
+.Em y
+is omitted,
+.Fl k Ar v.x,w
+is synonymous with
+.Cm \(pl Ns Ar v-\&1.x-\&1
+.Fl Ns Ar w\&.0 .
+The obsolete
+.Cm \(pl Ns Ar pos1
+.Fl Ns Ar pos2
+option is still supported, except for
+.Fl Ns Ar w\&.0b ,
+which has no
+.Fl k
+equivalent.
+.Sh ENVIRONMENT
+.Bl -tag -width Fl
+.It Ev LC_COLLATE
+Locale settings to be used to determine the collation for
+sorting records.
+.It Ev LC_CTYPE
+Locale settings to be used to case conversion and classification
+of characters, that is, which characters are considered
+whitespaces, etc.
+.It Ev LC_MESSAGES
+Locale settings that determine the language of output messages
+that
+.Nm
+prints out.
+.It Ev LC_NUMERIC
+Locale settings that determine the number format used in numeric sort.
+.It Ev LC_TIME
+Locale settings that determine the month format used in month sort.
+.It Ev LC_ALL
+Locale settings that override all of the above locale settings.
+This environment variable can be used to set all these settings
+to the same value at once.
+.It Ev LANG
+Used as a last resort to determine different kinds of locale-specific
+behavior if neither the respective environment variable, nor
+.Ev LC_ALL
+are set.
+%%NLS%%.It Ev NLSPATH
+%%NLS%%Path to NLS catalogs.
+.It Ev TMPDIR
+Path to the directory in which temporary files will be stored.
+Note that
+.Ev TMPDIR
+may be overridden by the
+.Fl T
+option.
+.It Ev GNUSORT_NUMERIC_COMPATIBILITY
+If defined
+.Fl t
+will not override the locale numeric symbols, that is, thousand
+separators and decimal separators.
+By default, if we specify
+.Fl t
+with the same symbol as the thousand separator or decimal point,
+the symbol will be treated as the field separator.
+Older behavior was less definite; the symbol was treated as both field
+separator and numeric separator, simultaneously.
+This environment variable enables the old behavior.
+.It Ev GNUSORT_COMPATIBLE_BLANKS
+Use 'space' symbols as field separators (as modern GNU sort does).
+.El
+.Sh FILES
+.Bl -tag -width Pa -compact
+.It Pa /var/tmp/.bsdsort.PID.*
+Temporary files.
+.It Pa /dev/random
+Default seed file for the random sort.
+.El
+.Sh EXIT STATUS
+The
+.Nm
+utility shall exit with one of the following values:
+.Pp
+.Bl -tag -width flag -compact
+.It 0
+Successfully sorted the input files or if used with
+.Fl c
+or
+.Fl C ,
+the input file already met the sorting criteria.
+.It 1
+On disorder (or non-uniqueness) with the
+.Fl c
+or
+.Fl C
+options.
+.It 2
+An error occurred.
+.El
+.Sh SEE ALSO
+.Xr comm 1 ,
+.Xr join 1 ,
+.Xr uniq 1
+.Sh STANDARDS
+The
+.Nm
+utility is compliant with the
+.St -p1003.1-2008
+specification.
+.Pp
+The flags
+.Op Fl ghRMSsTVz
+are extensions to the POSIX specification.
+.Pp
+All long options are extensions to the specification, some of them are
+provided for compatibility with GNU versions and some of them are
+own extensions.
+.Pp
+The old key notations
+.Cm \(pl Ns Ar pos1
+and
+.Fl Ns Ar pos2
+come from older versions of
+.Nm
+and are still supported but their use is highly discouraged.
+.Sh HISTORY
+A
+.Nm
+command first appeared in
+.At v3 .
+.Sh AUTHORS
+.An Gabor Kovesdan Aq Mt gabor@FreeBSD.org ,
+.Pp
+.An Oleg Moskalenko Aq Mt mom040267@gmail.com
+.Sh NOTES
+This implementation of
+.Nm
+has no limits on input line length (other than imposed by available
+memory) or any restrictions on bytes allowed within lines.
+.Pp
+The performance depends highly on locale settings,
+efficient choice of sort keys and key complexity.
+The fastest sort is with locale C, on whole lines,
+with option
+.Fl s .
+In general, locale C is the fastest, then single-byte
+locales follow and multi-byte locales as the slowest but
+the correct collation order is always respected.
+As for the key specification, the simpler to process the
+lines the faster the search will be.
+.Pp
+When sorting by arithmetic value, using
+.Fl n
+results in much better performance than
+.Fl g
+so its use is encouraged
+whenever possible.
diff --git a/text_cmds/sort/sort.c b/text_cmds/sort/sort.c
new file mode 100644
index 0000000..9200606
--- /dev/null
+++ b/text_cmds/sort/sort.c
@@ -0,0 +1,1325 @@
+/*-
+ * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
+ * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/stat.h>
+#include <sys/sysctl.h>
+#include <sys/types.h>
+
+#include <err.h>
+#include <errno.h>
+#include <getopt.h>
+#include <limits.h>
+#include <locale.h>
+#ifndef __APPLE__
+#include <md5.h>
+#endif
+#include <regex.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <wchar.h>
+#include <wctype.h>
+
+#include "coll.h"
+#include "file.h"
+#include "sort.h"
+
+#ifndef WITHOUT_NLS
+#include <nl_types.h>
+nl_catd catalog;
+#endif
+
+#define OPTIONS "bcCdfghik:Mmno:RrsS:t:T:uVz"
+
+#define DEFAULT_RANDOM_SORT_SEED_FILE ("/dev/random")
+#define MAX_DEFAULT_RANDOM_SEED_DATA_SIZE (1024)
+
+static bool need_random;
+static const char *random_source = DEFAULT_RANDOM_SORT_SEED_FILE;
+static const void *random_seed;
+static size_t random_seed_size;
+
+MD5_CTX md5_ctx;
+
+/*
+ * Default messages to use when NLS is disabled or no catalogue
+ * is found.
+ */
+const char *nlsstr[] = { "",
+/* 1*/"mutually exclusive flags",
+/* 2*/"extra argument not allowed with -c",
+/* 3*/"Unknown feature",
+/* 4*/"Wrong memory buffer specification",
+/* 5*/"0 field in key specs",
+/* 6*/"0 column in key specs",
+/* 7*/"Wrong file mode",
+/* 8*/"Cannot open file for reading",
+/* 9*/"Radix sort cannot be used with these sort options",
+/*10*/"The chosen sort method cannot be used with stable and/or unique sort",
+/*11*/"Invalid key position",
+/*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] "
+ "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] "
+ "[-o outfile] [--batch-size size] [--files0-from file] "
+ "[--heapsort] [--mergesort] [--radixsort] [--qsort] "
+ "[--mmap] "
+#if defined(SORT_THREADS)
+ "[--parallel thread_no] "
+#endif
+ "[--human-numeric-sort] "
+ "[--version-sort] [--random-sort [--random-source file]] "
+ "[--compress-program program] [file ...]\n" };
+
+struct sort_opts sort_opts_vals;
+
+bool debug_sort;
+bool need_hint;
+
+int (*isblank_f)(int c) = isblank;
+int (*iswblank_f)(wint_t c) = iswblank;
+
+#if defined(SORT_THREADS)
+unsigned int ncpu = 1;
+size_t nthreads = 1;
+#endif
+
+static bool gnusort_numeric_compatibility;
+
+static struct sort_mods default_sort_mods_object;
+struct sort_mods * const default_sort_mods = &default_sort_mods_object;
+
+static bool print_symbols_on_debug;
+
+/*
+ * Arguments from file (when file0-from option is used:
+ */
+static size_t argc_from_file0 = (size_t)-1;
+static char **argv_from_file0;
+
+/*
+ * Placeholder symbols for options which have no single-character equivalent
+ */
+enum
+{
+ SORT_OPT = CHAR_MAX + 1,
+ HELP_OPT,
+ FF_OPT,
+ BS_OPT,
+ VERSION_OPT,
+ DEBUG_OPT,
+#if defined(SORT_THREADS)
+ PARALLEL_OPT,
+#endif
+ RANDOMSOURCE_OPT,
+ COMPRESSPROGRAM_OPT,
+ QSORT_OPT,
+ MERGESORT_OPT,
+ HEAPSORT_OPT,
+ RADIXSORT_OPT,
+ MMAP_OPT
+};
+
+#define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
+static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
+
+static struct option long_options[] = {
+ { "batch-size", required_argument, NULL, BS_OPT },
+ { "buffer-size", required_argument, NULL, 'S' },
+ { "check", optional_argument, NULL, 'c' },
+ { "check=silent|quiet", optional_argument, NULL, 'C' },
+ { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
+ { "debug", no_argument, NULL, DEBUG_OPT },
+ { "dictionary-order", no_argument, NULL, 'd' },
+ { "field-separator", required_argument, NULL, 't' },
+ { "files0-from", required_argument, NULL, FF_OPT },
+ { "general-numeric-sort", no_argument, NULL, 'g' },
+ { "heapsort", no_argument, NULL, HEAPSORT_OPT },
+ { "help",no_argument, NULL, HELP_OPT },
+ { "human-numeric-sort", no_argument, NULL, 'h' },
+ { "ignore-leading-blanks", no_argument, NULL, 'b' },
+ { "ignore-case", no_argument, NULL, 'f' },
+ { "ignore-nonprinting", no_argument, NULL, 'i' },
+ { "key", required_argument, NULL, 'k' },
+ { "merge", no_argument, NULL, 'm' },
+ { "mergesort", no_argument, NULL, MERGESORT_OPT },
+ { "mmap", no_argument, NULL, MMAP_OPT },
+ { "month-sort", no_argument, NULL, 'M' },
+ { "numeric-sort", no_argument, NULL, 'n' },
+ { "output", required_argument, NULL, 'o' },
+#if defined(SORT_THREADS)
+ { "parallel", required_argument, NULL, PARALLEL_OPT },
+#endif
+ { "qsort", no_argument, NULL, QSORT_OPT },
+ { "radixsort", no_argument, NULL, RADIXSORT_OPT },
+ { "random-sort", no_argument, NULL, 'R' },
+ { "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
+ { "reverse", no_argument, NULL, 'r' },
+ { "sort", required_argument, NULL, SORT_OPT },
+ { "stable", no_argument, NULL, 's' },
+ { "temporary-directory",required_argument, NULL, 'T' },
+ { "unique", no_argument, NULL, 'u' },
+ { "version", no_argument, NULL, VERSION_OPT },
+ { "version-sort",no_argument, NULL, 'V' },
+ { "zero-terminated", no_argument, NULL, 'z' },
+ { NULL, no_argument, NULL, 0 }
+};
+
+void fix_obsolete_keys(int *argc, char **argv);
+
+/*
+ * Check where sort modifier is present
+ */
+static bool
+sort_modifier_empty(struct sort_mods *sm)
+{
+
+ if (sm == NULL)
+ return (true);
+ return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
+ sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag || sm->iflag));
+}
+
+/*
+ * Print out usage text.
+ */
+static void
+usage(bool opt_err)
+{
+ FILE *out;
+
+ out = opt_err ? stderr : stdout;
+
+ fprintf(out, getstr(12), getprogname());
+ if (opt_err)
+ exit(2);
+ exit(0);
+}
+
+/*
+ * Read input file names from a file (file0-from option).
+ */
+static void
+read_fns_from_file0(const char *fn)
+{
+ FILE *f;
+ char *line = NULL;
+ size_t linesize = 0;
+ ssize_t linelen;
+
+ if (fn == NULL)
+ return;
+
+ f = fopen(fn, "r");
+ if (f == NULL)
+ err(2, "%s", fn);
+
+ while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) {
+ if (*line != '\0') {
+ if (argc_from_file0 == (size_t) - 1)
+ argc_from_file0 = 0;
+ ++argc_from_file0;
+ argv_from_file0 = sort_realloc(argv_from_file0,
+ argc_from_file0 * sizeof(char *));
+ if (argv_from_file0 == NULL)
+ err(2, NULL);
+ argv_from_file0[argc_from_file0 - 1] = line;
+ } else {
+ free(line);
+ }
+ line = NULL;
+ linesize = 0;
+ }
+ if (ferror(f))
+ err(2, "%s: getdelim", fn);
+
+ closefile(f, fn);
+}
+
+/*
+ * Check how much RAM is available for the sort.
+ */
+static void
+set_hw_params(void)
+{
+ long pages, psize;
+
+#if defined(SORT_THREADS)
+ ncpu = 1;
+#endif
+
+ pages = sysconf(_SC_PHYS_PAGES);
+ if (pages < 1) {
+ perror("sysconf pages");
+ pages = 1;
+ }
+ psize = sysconf(_SC_PAGESIZE);
+ if (psize < 1) {
+ perror("sysconf psize");
+ psize = 4096;
+ }
+#if defined(SORT_THREADS)
+ ncpu = (unsigned int)sysconf(_SC_NPROCESSORS_ONLN);
+ if (ncpu < 1)
+ ncpu = 1;
+ else if(ncpu > 32)
+ ncpu = 32;
+
+ nthreads = ncpu;
+#endif
+
+ free_memory = (unsigned long long) pages * (unsigned long long) psize;
+ available_free_memory = free_memory / 2;
+
+ if (available_free_memory < 1024)
+ available_free_memory = 1024;
+}
+
+/*
+ * Convert "plain" symbol to wide symbol, with default value.
+ */
+static void
+conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
+{
+
+ if (wc && c) {
+ int res;
+
+ res = mbtowc(wc, c, MB_CUR_MAX);
+ if (res < 1)
+ *wc = def;
+ }
+}
+
+/*
+ * Set current locale symbols.
+ */
+static void
+set_locale(void)
+{
+ struct lconv *lc;
+ const char *locale;
+
+ setlocale(LC_ALL, "");
+
+ lc = localeconv();
+
+ if (lc) {
+ /* obtain LC_NUMERIC info */
+ /* Convert to wide char form */
+ conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
+ symbol_decimal_point);
+ conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
+ symbol_thousands_sep);
+ conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
+ symbol_positive_sign);
+ conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
+ symbol_negative_sign);
+ }
+
+ if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
+ gnusort_numeric_compatibility = true;
+
+ locale = setlocale(LC_COLLATE, NULL);
+
+ if (locale) {
+ char *tmpl;
+ const char *cclocale;
+
+ tmpl = sort_strdup(locale);
+ cclocale = setlocale(LC_COLLATE, "C");
+ if (cclocale && !strcmp(cclocale, tmpl))
+ byte_sort = true;
+ else {
+ const char *pclocale;
+
+ pclocale = setlocale(LC_COLLATE, "POSIX");
+ if (pclocale && !strcmp(pclocale, tmpl))
+ byte_sort = true;
+ }
+ setlocale(LC_COLLATE, tmpl);
+ sort_free(tmpl);
+ }
+}
+
+/*
+ * Set directory temporary files.
+ */
+static void
+set_tmpdir(void)
+{
+ char *td;
+
+ td = getenv("TMPDIR");
+ if (td != NULL)
+ tmpdir = sort_strdup(td);
+}
+
+/*
+ * Parse -S option.
+ */
+static unsigned long long
+parse_memory_buffer_value(const char *value)
+{
+
+ if (value == NULL)
+ return (available_free_memory);
+ else {
+ char *endptr;
+ unsigned long long membuf;
+
+ endptr = NULL;
+ errno = 0;
+ membuf = strtoll(value, &endptr, 10);
+
+ if (errno != 0) {
+ warn("%s",getstr(4));
+ membuf = available_free_memory;
+ } else {
+ switch (*endptr){
+ case 'Y':
+ membuf *= 1024;
+ /* FALLTHROUGH */
+ case 'Z':
+ membuf *= 1024;
+ /* FALLTHROUGH */
+ case 'E':
+ membuf *= 1024;
+ /* FALLTHROUGH */
+ case 'P':
+ membuf *= 1024;
+ /* FALLTHROUGH */
+ case 'T':
+ membuf *= 1024;
+ /* FALLTHROUGH */
+ case 'G':
+ membuf *= 1024;
+ /* FALLTHROUGH */
+ case 'M':
+ membuf *= 1024;
+ /* FALLTHROUGH */
+ case '\0':
+ case 'K':
+ membuf *= 1024;
+ /* FALLTHROUGH */
+ case 'b':
+ break;
+ case '%':
+ membuf = (available_free_memory * membuf) /
+ 100;
+ break;
+ default:
+ warnc(EINVAL, "%s", optarg);
+ membuf = available_free_memory;
+ }
+ }
+ return (membuf);
+ }
+}
+
+/*
+ * Signal handler that clears the temporary files.
+ */
+static void
+sig_handler(int sig __unused, siginfo_t *siginfo __unused,
+ void *context __unused)
+{
+
+ clear_tmp_files();
+ exit(-1);
+}
+
+/*
+ * Set signal handler on panic signals.
+ */
+static void
+set_signal_handler(void)
+{
+ struct sigaction sa;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = &sig_handler;
+ sa.sa_flags = SA_SIGINFO;
+
+ if (sigaction(SIGTERM, &sa, NULL) < 0) {
+ perror("sigaction");
+ return;
+ }
+ if (sigaction(SIGHUP, &sa, NULL) < 0) {
+ perror("sigaction");
+ return;
+ }
+ if (sigaction(SIGINT, &sa, NULL) < 0) {
+ perror("sigaction");
+ return;
+ }
+ if (sigaction(SIGQUIT, &sa, NULL) < 0) {
+ perror("sigaction");
+ return;
+ }
+ if (sigaction(SIGABRT, &sa, NULL) < 0) {
+ perror("sigaction");
+ return;
+ }
+ if (sigaction(SIGBUS, &sa, NULL) < 0) {
+ perror("sigaction");
+ return;
+ }
+ if (sigaction(SIGSEGV, &sa, NULL) < 0) {
+ perror("sigaction");
+ return;
+ }
+ if (sigaction(SIGUSR1, &sa, NULL) < 0) {
+ perror("sigaction");
+ return;
+ }
+ if (sigaction(SIGUSR2, &sa, NULL) < 0) {
+ perror("sigaction");
+ return;
+ }
+}
+
+/*
+ * Print "unknown" message and exit with status 2.
+ */
+static void
+unknown(const char *what)
+{
+
+ errx(2, "%s: %s", getstr(3), what);
+}
+
+/*
+ * Check whether contradictory input options are used.
+ */
+static void
+check_mutually_exclusive_flags(char c, bool *mef_flags)
+{
+ int fo_index, mec;
+ bool found_others, found_this;
+
+ found_others = found_this = false;
+ fo_index = 0;
+
+ for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
+ mec = mutually_exclusive_flags[i];
+
+ if (mec != c) {
+ if (mef_flags[i]) {
+ if (found_this)
+ errx(1, "%c:%c: %s", c, mec, getstr(1));
+ found_others = true;
+ fo_index = i;
+ }
+ } else {
+ if (found_others)
+ errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1));
+ mef_flags[i] = true;
+ found_this = true;
+ }
+ }
+}
+
+/*
+ * Initialise sort opts data.
+ */
+static void
+set_sort_opts(void)
+{
+
+ memset(&default_sort_mods_object, 0,
+ sizeof(default_sort_mods_object));
+ memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
+ default_sort_mods_object.func =
+ get_sort_func(&default_sort_mods_object);
+}
+
+/*
+ * Set a sort modifier on a sort modifiers object.
+ */
+static bool
+set_sort_modifier(struct sort_mods *sm, int c)
+{
+
+ if (sm) {
+ switch (c){
+ case 'b':
+ sm->bflag = true;
+ break;
+ case 'd':
+ sm->dflag = true;
+ break;
+ case 'f':
+ sm->fflag = true;
+ break;
+ case 'g':
+ sm->gflag = true;
+ need_hint = true;
+ break;
+ case 'i':
+ sm->iflag = true;
+ break;
+ case 'R':
+ sm->Rflag = true;
+ need_random = true;
+ break;
+ case 'M':
+ initialise_months();
+ sm->Mflag = true;
+ need_hint = true;
+ break;
+ case 'n':
+ sm->nflag = true;
+ need_hint = true;
+ print_symbols_on_debug = true;
+ break;
+ case 'r':
+ sm->rflag = true;
+ break;
+ case 'V':
+ sm->Vflag = true;
+ break;
+ case 'h':
+ sm->hflag = true;
+ need_hint = true;
+ print_symbols_on_debug = true;
+ break;
+ default:
+ return false;
+ }
+ sort_opts_vals.complex_sort = true;
+ sm->func = get_sort_func(sm);
+ }
+ return (true);
+}
+
+/*
+ * Parse POS in -k option.
+ */
+static int
+parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
+{
+ regmatch_t pmatch[4];
+ regex_t re;
+ char *c, *f;
+ const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
+ size_t len, nmatch;
+ int ret;
+
+ ret = -1;
+ nmatch = 4;
+ c = f = NULL;
+
+ if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
+ return (-1);
+
+ if (regexec(&re, s, nmatch, pmatch, 0) != 0)
+ goto end;
+
+ if (pmatch[0].rm_eo <= pmatch[0].rm_so)
+ goto end;
+
+ if (pmatch[1].rm_eo <= pmatch[1].rm_so)
+ goto end;
+
+ len = pmatch[1].rm_eo - pmatch[1].rm_so;
+ f = sort_malloc((len + 1) * sizeof(char));
+
+ strncpy(f, s + pmatch[1].rm_so, len);
+ f[len] = '\0';
+
+ if (second) {
+ errno = 0;
+ ks->f2 = (size_t) strtoul(f, NULL, 10);
+ if (errno != 0)
+ err(2, "-k");
+ if (ks->f2 == 0) {
+ warn("%s",getstr(5));
+ goto end;
+ }
+ } else {
+ errno = 0;
+ ks->f1 = (size_t) strtoul(f, NULL, 10);
+ if (errno != 0)
+ err(2, "-k");
+ if (ks->f1 == 0) {
+ warn("%s",getstr(5));
+ goto end;
+ }
+ }
+
+ if (pmatch[2].rm_eo > pmatch[2].rm_so) {
+ len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
+ c = sort_malloc((len + 1) * sizeof(char));
+
+ strncpy(c, s + pmatch[2].rm_so + 1, len);
+ c[len] = '\0';
+
+ if (second) {
+ errno = 0;
+ ks->c2 = (size_t) strtoul(c, NULL, 10);
+ if (errno != 0)
+ err(2, "-k");
+ } else {
+ errno = 0;
+ ks->c1 = (size_t) strtoul(c, NULL, 10);
+ if (errno != 0)
+ err(2, "-k");
+ if (ks->c1 == 0) {
+ warn("%s",getstr(6));
+ goto end;
+ }
+ }
+ } else {
+ if (second)
+ ks->c2 = 0;
+ else
+ ks->c1 = 1;
+ }
+
+ if (pmatch[3].rm_eo > pmatch[3].rm_so) {
+ regoff_t i = 0;
+
+ for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
+ check_mutually_exclusive_flags(s[i], mef_flags);
+ if (s[i] == 'b') {
+ if (second)
+ ks->pos2b = true;
+ else
+ ks->pos1b = true;
+ } else if (!set_sort_modifier(&(ks->sm), s[i]))
+ goto end;
+ }
+ }
+
+ ret = 0;
+
+end:
+
+ if (c)
+ sort_free(c);
+ if (f)
+ sort_free(f);
+ regfree(&re);
+
+ return (ret);
+}
+
+/*
+ * Parse -k option value.
+ */
+static int
+parse_k(const char *s, struct key_specs *ks)
+{
+ int ret = -1;
+ bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
+ { false, false, false, false, false, false };
+
+ if (s && *s) {
+ char *sptr;
+
+ sptr = strchr(s, ',');
+ if (sptr) {
+ size_t size1;
+ char *pos1, *pos2;
+
+ size1 = sptr - s;
+
+ if (size1 < 1)
+ return (-1);
+ pos1 = sort_malloc((size1 + 1) * sizeof(char));
+
+ strncpy(pos1, s, size1);
+ pos1[size1] = '\0';
+
+ ret = parse_pos(pos1, ks, mef_flags, false);
+
+ sort_free(pos1);
+ if (ret < 0)
+ return (ret);
+
+ pos2 = sort_strdup(sptr + 1);
+ ret = parse_pos(pos2, ks, mef_flags, true);
+ sort_free(pos2);
+ } else
+ ret = parse_pos(s, ks, mef_flags, false);
+ }
+
+ return (ret);
+}
+
+/*
+ * Parse POS in +POS -POS option.
+ */
+static int
+parse_pos_obs(const char *s, int *nf, int *nc, char* sopts)
+{
+ regex_t re;
+ regmatch_t pmatch[4];
+ char *c, *f;
+ const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
+ int ret;
+ size_t len, nmatch;
+
+ ret = -1;
+ nmatch = 4;
+ c = f = NULL;
+ *nc = *nf = 0;
+
+ if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
+ return (-1);
+
+ if (regexec(&re, s, nmatch, pmatch, 0) != 0)
+ goto end;
+
+ if (pmatch[0].rm_eo <= pmatch[0].rm_so)
+ goto end;
+
+ if (pmatch[1].rm_eo <= pmatch[1].rm_so)
+ goto end;
+
+ len = pmatch[1].rm_eo - pmatch[1].rm_so;
+ f = sort_malloc((len + 1) * sizeof(char));
+
+ strncpy(f, s + pmatch[1].rm_so, len);
+ f[len] = '\0';
+
+ errno = 0;
+ *nf = (size_t) strtoul(f, NULL, 10);
+ if (errno != 0)
+ errx(2, "%s", getstr(11));
+
+ if (pmatch[2].rm_eo > pmatch[2].rm_so) {
+ len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
+ c = sort_malloc((len + 1) * sizeof(char));
+
+ strncpy(c, s + pmatch[2].rm_so + 1, len);
+ c[len] = '\0';
+
+ errno = 0;
+ *nc = (size_t) strtoul(c, NULL, 10);
+ if (errno != 0)
+ errx(2, "%s", getstr(11));
+ }
+
+ if (pmatch[3].rm_eo > pmatch[3].rm_so) {
+
+ len = pmatch[3].rm_eo - pmatch[3].rm_so;
+
+ strncpy(sopts, s + pmatch[3].rm_so, len);
+ sopts[len] = '\0';
+ }
+
+ ret = 0;
+
+end:
+ if (c)
+ sort_free(c);
+ if (f)
+ sort_free(f);
+ regfree(&re);
+
+ return (ret);
+}
+
+/*
+ * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
+ */
+void
+fix_obsolete_keys(int *argc, char **argv)
+{
+ char sopt[129];
+
+ for (int i = 1; i < *argc; i++) {
+ char *arg1;
+
+ arg1 = argv[i];
+
+ if (strlen(arg1) > 1 && arg1[0] == '+') {
+ int c1, f1;
+ char sopts1[128];
+
+ sopts1[0] = 0;
+ c1 = f1 = 0;
+
+ if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0)
+ continue;
+ else {
+ f1 += 1;
+ c1 += 1;
+ if (i + 1 < *argc) {
+ char *arg2 = argv[i + 1];
+
+ if (strlen(arg2) > 1 &&
+ arg2[0] == '-') {
+ int c2, f2;
+ char sopts2[128];
+
+ sopts2[0] = 0;
+ c2 = f2 = 0;
+
+ if (parse_pos_obs(arg2 + 1,
+ &f2, &c2, sopts2) >= 0) {
+ if (c2 > 0)
+ f2 += 1;
+ sprintf(sopt, "-k%d.%d%s,%d.%d%s",
+ f1, c1, sopts1, f2, c2, sopts2);
+ argv[i] = sort_strdup(sopt);
+ for (int j = i + 1; j + 1 < *argc; j++)
+ argv[j] = argv[j + 1];
+ *argc -= 1;
+ continue;
+ }
+ }
+ }
+ sprintf(sopt, "-k%d.%d%s", f1, c1, sopts1);
+ argv[i] = sort_strdup(sopt);
+ }
+ }
+ }
+}
+
+/*
+ * Set random seed
+ */
+static void
+set_random_seed(void)
+{
+ if (need_random) {
+
+ if (strcmp(random_source, DEFAULT_RANDOM_SORT_SEED_FILE) == 0) {
+ FILE* fseed;
+ MD5_CTX ctx;
+ char rsd[MAX_DEFAULT_RANDOM_SEED_DATA_SIZE];
+ size_t sz = 0;
+
+ fseed = openfile(random_source, "r");
+ while (!feof(fseed)) {
+ int cr;
+
+ cr = fgetc(fseed);
+ if (cr == EOF)
+ break;
+
+ rsd[sz++] = (char) cr;
+
+ if (sz >= MAX_DEFAULT_RANDOM_SEED_DATA_SIZE)
+ break;
+ }
+
+ closefile(fseed, random_source);
+
+ MD5Init(&ctx);
+ MD5Update(&ctx, rsd, sz);
+
+ random_seed = MD5End(&ctx, NULL);
+ random_seed_size = strlen(random_seed);
+
+ } else {
+ MD5_CTX ctx;
+ char *b;
+
+ MD5Init(&ctx);
+ b = MD5File(random_source, NULL);
+ if (b == NULL)
+ err(2, NULL);
+
+ random_seed = b;
+ random_seed_size = strlen(b);
+ }
+
+ MD5Init(&md5_ctx);
+ if(random_seed_size>0) {
+ MD5Update(&md5_ctx, random_seed, random_seed_size);
+ }
+ }
+}
+
+/*
+ * Main function.
+ */
+int
+main(int argc, char **argv)
+{
+ char *outfile, *real_outfile;
+ int c, result;
+ bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
+ { false, false, false, false, false, false };
+
+ result = 0;
+ outfile = sort_strdup("-");
+ real_outfile = NULL;
+
+ if(getenv("GNUSORT_COMPATIBLE_BLANKS")) {
+ isblank_f = isspace;
+ iswblank_f = iswspace;
+ }
+
+ struct sort_mods *sm = &default_sort_mods_object;
+
+ init_tmp_files();
+
+ set_signal_handler();
+
+ set_hw_params();
+ set_locale();
+ set_tmpdir();
+ set_sort_opts();
+
+ fix_obsolete_keys(&argc, argv);
+
+ while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
+ != -1)) {
+
+ check_mutually_exclusive_flags(c, mef_flags);
+
+ if (!set_sort_modifier(sm, c)) {
+
+ switch (c) {
+ case 'c':
+ sort_opts_vals.cflag = true;
+ if (optarg) {
+ if (!strcmp(optarg, "diagnose-first"))
+ ;
+ else if (!strcmp(optarg, "silent") ||
+ !strcmp(optarg, "quiet"))
+ sort_opts_vals.csilentflag = true;
+ else if (*optarg)
+ unknown(optarg);
+ }
+ break;
+ case 'C':
+ sort_opts_vals.cflag = true;
+ sort_opts_vals.csilentflag = true;
+ break;
+ case 'k':
+ {
+ sort_opts_vals.complex_sort = true;
+ sort_opts_vals.kflag = true;
+
+ keys_num++;
+ keys = sort_realloc(keys, keys_num *
+ sizeof(struct key_specs));
+ memset(&(keys[keys_num - 1]), 0,
+ sizeof(struct key_specs));
+
+ if (parse_k(optarg, &(keys[keys_num - 1]))
+ < 0) {
+ errc(2, EINVAL, "-k %s", optarg);
+ }
+
+ break;
+ }
+ case 'm':
+ sort_opts_vals.mflag = true;
+ break;
+ case 'o':
+ outfile = sort_realloc(outfile, (strlen(optarg) + 1));
+ strcpy(outfile, optarg);
+ break;
+ case 's':
+ sort_opts_vals.sflag = true;
+ break;
+ case 'S':
+ available_free_memory =
+ parse_memory_buffer_value(optarg);
+ break;
+ case 'T':
+ tmpdir = sort_strdup(optarg);
+ break;
+ case 't':
+ while (strlen(optarg) > 1) {
+ if (optarg[0] != '\\') {
+ errc(2, EINVAL, "%s", optarg);
+ }
+ optarg += 1;
+ if (*optarg == '0') {
+ *optarg = 0;
+ break;
+ }
+ }
+ sort_opts_vals.tflag = true;
+ sort_opts_vals.field_sep = btowc(optarg[0]);
+ if (sort_opts_vals.field_sep == WEOF) {
+ errno = EINVAL;
+ err(2, NULL);
+ }
+ if (!gnusort_numeric_compatibility) {
+ if (symbol_decimal_point == sort_opts_vals.field_sep)
+ symbol_decimal_point = WEOF;
+ if (symbol_thousands_sep == sort_opts_vals.field_sep)
+ symbol_thousands_sep = WEOF;
+ if (symbol_negative_sign == sort_opts_vals.field_sep)
+ symbol_negative_sign = WEOF;
+ if (symbol_positive_sign == sort_opts_vals.field_sep)
+ symbol_positive_sign = WEOF;
+ }
+ break;
+ case 'u':
+ sort_opts_vals.uflag = true;
+ /* stable sort for the correct unique val */
+ sort_opts_vals.sflag = true;
+ break;
+ case 'z':
+ sort_opts_vals.zflag = true;
+ break;
+ case SORT_OPT:
+ if (optarg) {
+ if (!strcmp(optarg, "general-numeric"))
+ set_sort_modifier(sm, 'g');
+ else if (!strcmp(optarg, "human-numeric"))
+ set_sort_modifier(sm, 'h');
+ else if (!strcmp(optarg, "numeric"))
+ set_sort_modifier(sm, 'n');
+ else if (!strcmp(optarg, "month"))
+ set_sort_modifier(sm, 'M');
+ else if (!strcmp(optarg, "random"))
+ set_sort_modifier(sm, 'R');
+ else
+ unknown(optarg);
+ }
+ break;
+#if defined(SORT_THREADS)
+ case PARALLEL_OPT:
+ nthreads = (size_t)(atoi(optarg));
+ if (nthreads < 1)
+ nthreads = 1;
+ if (nthreads > 1024)
+ nthreads = 1024;
+ break;
+#endif
+ case QSORT_OPT:
+ sort_opts_vals.sort_method = SORT_QSORT;
+ break;
+ case MERGESORT_OPT:
+ sort_opts_vals.sort_method = SORT_MERGESORT;
+ break;
+ case MMAP_OPT:
+ use_mmap = true;
+ break;
+ case HEAPSORT_OPT:
+ sort_opts_vals.sort_method = SORT_HEAPSORT;
+ break;
+ case RADIXSORT_OPT:
+ sort_opts_vals.sort_method = SORT_RADIXSORT;
+ break;
+ case RANDOMSOURCE_OPT:
+ random_source = strdup(optarg);
+ break;
+ case COMPRESSPROGRAM_OPT:
+ compress_program = strdup(optarg);
+ break;
+ case FF_OPT:
+ read_fns_from_file0(optarg);
+ break;
+ case BS_OPT:
+ {
+ errno = 0;
+ long mof = strtol(optarg, NULL, 10);
+ if (errno != 0)
+ err(2, "--batch-size");
+ if (mof >= 2)
+ max_open_files = (size_t) mof + 1;
+ }
+ break;
+ case VERSION_OPT:
+ printf("%s\n", VERSION);
+ exit(EXIT_SUCCESS);
+ /* NOTREACHED */
+ break;
+ case DEBUG_OPT:
+ debug_sort = true;
+ break;
+ case HELP_OPT:
+ usage(false);
+ /* NOTREACHED */
+ break;
+ default:
+ usage(true);
+ /* NOTREACHED */
+ }
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (argv_from_file0) {
+ argc = argc_from_file0;
+ argv = argv_from_file0;
+ }
+
+#ifndef WITHOUT_NLS
+ catalog = catopen("sort", NL_CAT_LOCALE);
+#endif
+
+ if (sort_opts_vals.cflag && sort_opts_vals.mflag)
+ errx(1, "%c:%c: %s", 'm', 'c', getstr(1));
+
+#ifndef WITHOUT_NLS
+ catclose(catalog);
+#endif
+
+ if (keys_num == 0) {
+ keys_num = 1;
+ keys = sort_realloc(keys, sizeof(struct key_specs));
+ memset(&(keys[0]), 0, sizeof(struct key_specs));
+ keys[0].c1 = 1;
+ keys[0].pos1b = default_sort_mods->bflag;
+ keys[0].pos2b = default_sort_mods->bflag;
+ memcpy(&(keys[0].sm), default_sort_mods,
+ sizeof(struct sort_mods));
+ }
+
+ for (size_t i = 0; i < keys_num; i++) {
+ struct key_specs *ks;
+
+ ks = &(keys[i]);
+
+ if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) &&
+ !(ks->pos2b)) {
+ ks->pos1b = sm->bflag;
+ ks->pos2b = sm->bflag;
+ memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
+ }
+
+ ks->sm.func = get_sort_func(&(ks->sm));
+ }
+
+ if (debug_sort) {
+ printf("Memory to be used for sorting: %llu\n",available_free_memory);
+#if defined(SORT_THREADS)
+ printf("Number of CPUs: %d\n",(int)ncpu);
+ nthreads = 1;
+#endif
+ printf("Using collate rules of %s locale\n",
+ setlocale(LC_COLLATE, NULL));
+ if (byte_sort)
+ printf("Byte sort is used\n");
+ if (print_symbols_on_debug) {
+ printf("Decimal Point: <%lc>\n", symbol_decimal_point);
+ if (symbol_thousands_sep)
+ printf("Thousands separator: <%lc>\n",
+ symbol_thousands_sep);
+ printf("Positive sign: <%lc>\n", symbol_positive_sign);
+ printf("Negative sign: <%lc>\n", symbol_negative_sign);
+ }
+ }
+
+ set_random_seed();
+
+ /* Case when the outfile equals one of the input files: */
+ if (strcmp(outfile, "-")) {
+
+ for(int i = 0; i < argc; ++i) {
+ if (strcmp(argv[i], outfile) == 0) {
+ real_outfile = sort_strdup(outfile);
+ for(;;) {
+ char* tmp = sort_malloc(strlen(outfile) +
+ strlen(".tmp") + 1);
+
+ strcpy(tmp, outfile);
+ strcpy(tmp + strlen(tmp), ".tmp");
+ sort_free(outfile);
+ outfile = tmp;
+ if (access(outfile, F_OK) < 0)
+ break;
+ }
+ tmp_file_atexit(outfile);
+ }
+ }
+ }
+
+#if defined(SORT_THREADS)
+ if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0))
+ nthreads = 1;
+#endif
+
+ if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
+ struct file_list fl;
+ struct sort_list list;
+
+ sort_list_init(&list);
+ file_list_init(&fl, true);
+
+ if (argc < 1)
+ procfile("-", &list, &fl);
+ else {
+ while (argc > 0) {
+ procfile(*argv, &list, &fl);
+ --argc;
+ ++argv;
+ }
+ }
+
+ if (fl.count < 1)
+ sort_list_to_file(&list, outfile);
+ else {
+ if (list.count > 0) {
+ char *flast = new_tmp_file_name();
+
+ sort_list_to_file(&list, flast);
+ file_list_add(&fl, flast, false);
+ }
+ merge_files(&fl, outfile);
+ }
+
+ file_list_clean(&fl);
+
+ /*
+ * We are about to exit the program, so we can ignore
+ * the clean-up for speed
+ *
+ * sort_list_clean(&list);
+ */
+
+ } else if (sort_opts_vals.cflag) {
+ result = (argc == 0) ? (check("-")) : (check(*argv));
+ } else if (sort_opts_vals.mflag) {
+ struct file_list fl;
+
+ file_list_init(&fl, false);
+ file_list_populate(&fl, argc, argv, true);
+ merge_files(&fl, outfile);
+ file_list_clean(&fl);
+ }
+
+ if (real_outfile) {
+ unlink(real_outfile);
+ if (rename(outfile, real_outfile) < 0)
+ err(2, NULL);
+ sort_free(real_outfile);
+ }
+
+ sort_free(outfile);
+
+ return (result);
+}
diff --git a/text_cmds/sort/sort.h b/text_cmds/sort/sort.h
new file mode 100644
index 0000000..d31b7cb
--- /dev/null
+++ b/text_cmds/sort/sort.h
@@ -0,0 +1,139 @@
+/* $FreeBSD$ */
+
+/*-
+ * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
+ * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if !defined(__BSD_SORT_H__)
+#define __BSD_SORT_H__
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sysexits.h>
+#include <wchar.h>
+
+#include <sys/types.h>
+#ifdef __APPLE__
+#include "commoncrypto.h"
+#else
+#include <md5.h>
+#endif
+
+#ifdef __APPLE__
+#define VERSION "2.3-Apple (" SORT_VERSION ")"
+#else
+#define VERSION "2.3-FreeBSD"
+#endif
+
+#ifdef WITHOUT_NLS
+#define getstr(n) nlsstr[n]
+#else
+#include <nl_types.h>
+
+extern nl_catd catalog;
+#define getstr(n) catgets(catalog, 1, n, nlsstr[n])
+#endif
+
+extern const char *nlsstr[];
+
+#if defined(SORT_THREADS)
+#define MT_SORT_THRESHOLD (10000)
+extern unsigned int ncpu;
+extern size_t nthreads;
+#endif
+
+/*
+ * If true, we output some debug information.
+ */
+extern bool debug_sort;
+
+/*
+ * MD5 context for random hash function
+ */
+extern MD5_CTX md5_ctx;
+
+/*
+ * sort.c
+ */
+
+/*
+ * This structure holds main sort options which are NOT affecting the sort ordering.
+ */
+struct sort_opts
+{
+ wint_t field_sep;
+ int sort_method;
+ bool cflag;
+ bool csilentflag;
+ bool kflag;
+ bool mflag;
+ bool sflag;
+ bool uflag;
+ bool zflag;
+ bool tflag;
+ bool complex_sort;
+};
+
+/*
+ * Key value structure forward declaration
+ */
+struct key_value;
+
+/*
+ * Cmp function
+ */
+typedef int (*cmpcoll_t)(struct key_value *kv1, struct key_value *kv2, size_t offset);
+
+/*
+ * This structure holds "sort modifiers" - options which are affecting the sort ordering.
+ */
+struct sort_mods
+{
+ cmpcoll_t func;
+ bool bflag;
+ bool dflag;
+ bool fflag;
+ bool gflag;
+ bool iflag;
+ bool Mflag;
+ bool nflag;
+ bool rflag;
+ bool Rflag;
+ bool Vflag;
+ bool hflag;
+};
+
+extern bool need_hint;
+
+extern struct sort_opts sort_opts_vals;
+
+extern struct sort_mods * const default_sort_mods;
+
+extern int (*isblank_f)(int c);
+extern int (*iswblank_f)(wint_t c);
+
+#endif /* __BSD_SORT_H__ */
diff --git a/text_cmds/sort/testsuite/README.txt b/text_cmds/sort/testsuite/README.txt
new file mode 100644
index 0000000..865094f
--- /dev/null
+++ b/text_cmds/sort/testsuite/README.txt
@@ -0,0 +1,19 @@
+To run the tests:
+
+1) Adjust the variable TESTED_SORT in the file run.sh - the value must point
+to the binary that is to be tested.
+
+2) Adjust the value ORIG_SORT in the file run.sh - the value must point to the binary that is assumed
+to be working correctly. The tested sort binary will be checked against this program.
+
+3) Run:
+
+$ cd <...>/testsuite/
+$ ./run.sh
+
+4) Wait for many hours, it is running about 23 hours on my laptop.
+
+5) Check the output and check the existence of the file errors.log in the current directory.
+If the test run has been successful, then there must be no file errors.log.
+
+
diff --git a/text_cmds/sort/testsuite/bigsample.txt.xz b/text_cmds/sort/testsuite/bigsample.txt.xz
new file mode 100644
index 0000000..ca120b7
--- /dev/null
+++ b/text_cmds/sort/testsuite/bigsample.txt.xz
Binary files differ
diff --git a/text_cmds/sort/testsuite/run.sh b/text_cmds/sort/testsuite/run.sh
new file mode 100755
index 0000000..db13a34
--- /dev/null
+++ b/text_cmds/sort/testsuite/run.sh
@@ -0,0 +1,436 @@
+#!/bin/sh
+
+#export GNUSORT_NUMERIC_COMPATIBILITY=x
+#export GNUSORT_COMPATIBLE_BLANKS=x
+
+TESTED_SORT=../../text_cmds/sort/sort
+ORIG_SORT=../../text_cmds_orig/sort/sort
+
+FILECMP=cmp
+
+INPUT_FILE=sample.txt
+BIG_INPUT_FILE=bigsample.txt
+
+ERRORS_FILE=errors.log
+
+OUT_DIR=tmp
+
+# clean
+
+rm -rf ${OUT_DIR}
+mkdir -p ${OUT_DIR}
+rm -rf ${ERRORS_FILE}
+
+# ru_RU.KOI8-R C ru_RU.ISO-8859-5 en_US.ISO8859-15 zh_HK.Big5HKSCS
+#
+# ru KOI-8 is an "irregular" locale with non-trivial ordering.
+# zh* is a 2-bytes locale.
+
+for lang in en_US.UTF-8 C en_US.ISO8859-15 zh_HK.Big5HKSCS ru_RU.KOI8-R ru_RU.ISO-8859-5
+do
+
+ export LANG=${lang}
+
+ for KEYS in -srh -sfrudb -Vs -sM -siz
+ do
+
+ echo ${LANG} ${KEYS}
+
+ if [ ${LANG} = "ru_RU.KOI8-R" ] && [ ${KEYS} = "-srh" ] ; then
+
+ # numeric sorting in ru_RU.KOI8-R incompatible because the thousands separator bug fixed,
+ # for better compatibility with the new GNU sort.
+ # (ru_RU.KOI8-R uses space as thousands separator)
+
+ continue
+ fi
+
+ time ${ORIG_SORT} ${KEYS} ${BIG_INPUT_FILE} -o ${OUT_DIR}/big_orig
+
+ for PARALLEL in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
+ do
+
+ echo --parallel ${PARALLEL}
+
+ time ${TESTED_SORT} --parallel ${PARALLEL} ${KEYS} ${BIG_INPUT_FILE} -o ${OUT_DIR}/big_new
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} big crash --parallel ${PARALLEL} >> ${ERRORS_FILE}
+ exit
+ fi
+
+ if ! ${FILECMP} ${OUT_DIR}/big_new ${OUT_DIR}/big_orig >${OUT_DIR}/res.0.0.big 2>&1 ; then
+ echo ${LANG} ${KEYS} big error --parallel ${PARALLEL} >> ${ERRORS_FILE}
+ fi
+ time ${TESTED_SORT} --parallel ${PARALLEL} -c ${KEYS} ${OUT_DIR}/big_new
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} -c big error --parallel ${PARALLEL} >> ${ERRORS_FILE}
+ fi
+ rm -rf ${OUT_DIR}/res.0.0.big
+ rm -rf ${OUT_DIR}/big_new
+ done
+
+ rm -rf ${OUT_DIR}/big_orig
+
+ ${TESTED_SORT} ${KEYS} ${INPUT_FILE} -o ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} crash >> ${ERRORS_FILE}
+ exit
+ fi
+ ${ORIG_SORT} ${KEYS} ${INPUT_FILE} -o ${OUT_DIR}/sik2
+ if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.0.0 2>&1 ; then
+ echo ${LANG} ${KEYS} error >> ${ERRORS_FILE}
+ fi
+ ${TESTED_SORT} -c ${KEYS} ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} -c error >> ${ERRORS_FILE}
+ fi
+ rm ${OUT_DIR}/res.0.0
+
+ ${TESTED_SORT} ${KEYS} -t " " ${INPUT_FILE} -o ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} -t " " crash >> ${ERRORS_FILE}
+ exit
+ fi
+ ${ORIG_SORT} ${KEYS} -t " " ${INPUT_FILE} -o ${OUT_DIR}/sik2
+ if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.0.0 2>&1 ; then
+ echo ${LANG} ${KEYS} error -t " " >> ${ERRORS_FILE}
+ fi
+ ${TESTED_SORT} -c -t " " ${KEYS} ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo $? ${LANG} ${KEYS} -t " " -c error >> ${ERRORS_FILE}
+ fi
+ rm ${OUT_DIR}/res.0.0
+
+ ${TESTED_SORT} ${KEYS} -t "|" ${INPUT_FILE} -o ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} -t "|" crash >> ${ERRORS_FILE}
+ exit
+ fi
+ ${ORIG_SORT} ${KEYS} -t "|" ${INPUT_FILE} -o ${OUT_DIR}/sik2
+ if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.0.0 2>&1 ; then
+ echo ${LANG} ${KEYS} error -t "|" >> ${ERRORS_FILE}
+ fi
+ ${TESTED_SORT} -c -t "|" ${KEYS} ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} -c -t "|" error >> ${ERRORS_FILE}
+ fi
+ rm ${OUT_DIR}/res.0.0
+
+ ${TESTED_SORT} ${KEYS} -t '\0' ${INPUT_FILE} -o ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} -t 0 crash >> ${ERRORS_FILE}
+ exit
+ fi
+ ${ORIG_SORT} ${KEYS} -t '\0' ${INPUT_FILE} -o ${OUT_DIR}/sik2
+ if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.0.0 2>&1 ; then
+ echo ${LANG} ${KEYS} error -t '\0' >> ${ERRORS_FILE}
+ fi
+ ${TESTED_SORT} -c -t '\0' ${KEYS} ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} -c -t '\0' error >> ${ERRORS_FILE}
+ fi
+ rm ${OUT_DIR}/res.0.0
+
+ for f1 in 1 2 3 4 5 6 7 8 9
+ do
+ for c1 in 1 2 3 4 5 10 15 20 25 30
+ do
+ echo ${LANG} ${KEYS} ${f1} ${c1}
+
+ ${TESTED_SORT} ${KEYS} +${f1}.${c1} ${INPUT_FILE} -o ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} +${f1}.${c1} crash +- >> ${ERRORS_FILE}
+ exit
+ fi
+ ${ORIG_SORT} ${KEYS} +${f1}.${c1} ${INPUT_FILE} -o ${OUT_DIR}/sik2
+ if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.${f1}.${c1} 2>&1 ; then
+ echo ${LANG} ${KEYS} +${f1}.${c1} error +- >> ${ERRORS_FILE}
+ fi
+ ${TESTED_SORT} -c ${KEYS} +${f1}.${c1} ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} +${f1}.${c1} -c error +- >> ${ERRORS_FILE}
+ fi
+ rm ${OUT_DIR}/res.${f1}.${c1}
+
+ ${TESTED_SORT} ${KEYS} -k${f1}.${c1} ${INPUT_FILE} -o ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} -k${f1}.${c1} crash >> ${ERRORS_FILE}
+ exit
+ fi
+ ${ORIG_SORT} ${KEYS} -k${f1}.${c1} ${INPUT_FILE} -o ${OUT_DIR}/sik2
+ if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.${f1}.${c1} 2>&1 ; then
+ echo ${LANG} ${KEYS} -k${f1}.${c1} error >> ${ERRORS_FILE}
+ fi
+ ${TESTED_SORT} -c ${KEYS} -k${f1}.${c1} ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} -k${f1}.${c1} -c error >> ${ERRORS_FILE}
+ fi
+ rm ${OUT_DIR}/res.${f1}.${c1}
+
+ ${TESTED_SORT} ${KEYS} -k${f1}.${c1}b ${INPUT_FILE} -o ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} -k${f1}.${c1}b crash >> ${ERRORS_FILE}
+ exit
+ fi
+ ${ORIG_SORT} ${KEYS} -k${f1}.${c1}b ${INPUT_FILE} -o ${OUT_DIR}/sik2
+ if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.${f1}.${c1} 2>&1 ; then
+ echo ${LANG} ${KEYS} -k${f1}.${c1}b error >> ${ERRORS_FILE}
+ fi
+ ${TESTED_SORT} -c ${KEYS} -k${f1}.${c1}b ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} -k${f1}.${c1}b -c error >> ${ERRORS_FILE}
+ fi
+ rm ${OUT_DIR}/res.${f1}.${c1}
+
+ ${TESTED_SORT} ${KEYS} -t " " -k${f1}.${c1} ${INPUT_FILE} -o ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} -t -k${f1}.${c1} crash >> ${ERRORS_FILE}
+ exit
+ fi
+ ${ORIG_SORT} ${KEYS} -t " " -k${f1}.${c1} ${INPUT_FILE} -o ${OUT_DIR}/sik2
+ if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.${f1}.${c1} 2>&1 ; then
+ echo ${LANG} ${KEYS} -k${f1}.${c1} error -t " " >> ${ERRORS_FILE}
+ fi
+ ${TESTED_SORT} -c -t " " ${KEYS} -k${f1}.${c1} ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} -k${f1}.${c1} -t " " -c error >> ${ERRORS_FILE}
+ fi
+ rm ${OUT_DIR}/res.${f1}.${c1}
+
+ if [ ${LANG} != "ru_RU.KOI8-R" ] ; then
+
+ # numeric sorting in ru_RU.KOI8-R incompatible because the thousands separator bug fixed,
+ # for better compatibility with the new GNU sort.
+ # (ru_RU.KOI8-R uses space as thousands separator)
+
+ ${TESTED_SORT} ${KEYS} -t " " -k${f1}.${c1}n ${INPUT_FILE} -o ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} -k${f1}.${c1}n crash >> ${ERRORS_FILE}
+ exit
+ fi
+ ${ORIG_SORT} ${KEYS} -t " " -k${f1}.${c1}n ${INPUT_FILE} -o ${OUT_DIR}/sik2
+ if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.${f1}.${c1} 2>&1 ; then
+ echo ${LANG} ${KEYS} -k${f1}.${c1} error -t " " n >> ${ERRORS_FILE}
+ fi
+ ${TESTED_SORT} -c -t " " ${KEYS} -k${f1}.${c1}n ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} -k${f1}.${c1} -c -t " " n error >> ${ERRORS_FILE}
+ fi
+ rm ${OUT_DIR}/res.${f1}.${c1}
+ fi
+
+ ${TESTED_SORT} ${KEYS} -t "|" -k${f1}.${c1} ${INPUT_FILE} -o ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} -t "|" -k${f1}.${c1} crash >> ${ERRORS_FILE}
+ exit
+ fi
+ ${ORIG_SORT} ${KEYS} -t "|" -k${f1}.${c1} ${INPUT_FILE} -o ${OUT_DIR}/sik2
+ if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.${f1}.${c1} 2>&1 ; then
+ echo ${LANG} ${KEYS} -k${f1}.${c1} error -t "|" >> ${ERRORS_FILE}
+ fi
+ ${TESTED_SORT} -c -t "|" ${KEYS} -k${f1}.${c1} ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} -k${f1}.${c1} -c -t "|" error >> ${ERRORS_FILE}
+ fi
+ rm ${OUT_DIR}/res.${f1}.${c1}
+
+ for f2 in 1 2 3 4 5 6 7 8 9 10
+ do
+ for c2 in 0 1 2 3 4 5 10 15 20 25 30
+ do
+ echo ${LANG} ${KEYS} ${f1} ${c1} ${f2} ${c2}
+
+ ${TESTED_SORT} ${KEYS} +${f1}.${c1} -${f2}.${c2} ${INPUT_FILE} -o ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} +${f1}.${c1} -${f2}.${c2} crash >> ${ERRORS_FILE}
+ exit
+ fi
+ ${ORIG_SORT} ${KEYS} +${f1}.${c1} -${f2}.${c2} ${INPUT_FILE} -o ${OUT_DIR}/sik2
+ if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.${f1}.${c1}.${f2}.${c2} 2>&1 ; then
+ echo ${LANG} ${KEYS} +${f1}.${c1} -${f2}.${c2} error +- >> ${ERRORS_FILE}
+ fi
+ ${TESTED_SORT} -c ${KEYS} +${f1}.${c1} -${f2}.${c2} ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} +${f1}.${c1} -${f2}.${c2} -c error +- >> ${ERRORS_FILE}
+ fi
+ rm ${OUT_DIR}/res.${f1}.${c1}.${f2}.${c2}
+
+ ${TESTED_SORT} ${KEYS} -k${f1}.${c1},${f2}.${c2} ${INPUT_FILE} -o ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} -k${f1}.${c1},${f2}.${c2} crash >> ${ERRORS_FILE}
+ exit
+ fi
+ ${ORIG_SORT} ${KEYS} -k${f1}.${c1},${f2}.${c2} ${INPUT_FILE} -o ${OUT_DIR}/sik2
+ if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.${f1}.${c1}.${f2}.${c2} 2>&1 ; then
+ echo ${LANG} ${KEYS} -k${f1}.${c1}.${f2}.${c2} error >> ${ERRORS_FILE}
+ fi
+ ${TESTED_SORT} -c ${KEYS} -k${f1}.${c1},${f2}.${c2} ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} -k${f1}.${c1},${f2}.${c2} -c error >> ${ERRORS_FILE}
+ fi
+ rm ${OUT_DIR}/res.${f1}.${c1}.${f2}.${c2}
+
+ ${TESTED_SORT} ${KEYS} -k${f1}.${c1}b,${f2}.${c2} ${INPUT_FILE} -o ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} -k${f1}.${c1}b,${f2}.${c2} crash >> ${ERRORS_FILE}
+ exit
+ fi
+ ${ORIG_SORT} ${KEYS} -k${f1}.${c1}b,${f2}.${c2} ${INPUT_FILE} -o ${OUT_DIR}/sik2
+ if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.${f1}.${c1}.${f2}.${c2} 2>&1 ; then
+ echo ${LANG} ${KEYS} -k${f1}.${c1}.b.${f2}.${c2} error >> ${ERRORS_FILE}
+ fi
+ ${TESTED_SORT} -c ${KEYS} -k${f1}.${c1}b,${f2}.${c2} ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} -k${f1}.${c1}b,${f2}.${c2} -c error >> ${ERRORS_FILE}
+ fi
+ rm ${OUT_DIR}/res.${f1}.${c1}.${f2}.${c2}
+
+ ${TESTED_SORT} ${KEYS} -t " " -k${f1}.${c1},${f2}.${c2} ${INPUT_FILE} -o ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} -t " " -k${f1}.${c1},${f2}.${c2} crash >> ${ERRORS_FILE}
+ exit
+ fi
+ ${ORIG_SORT} ${KEYS} -t " " -k${f1}.${c1},${f2}.${c2} ${INPUT_FILE} -o ${OUT_DIR}/sik2
+ if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.${f1}.${c1}.${f2}.${c2} 2>&1 ; then
+ echo ${LANG} ${KEYS} -k${f1}.${c1}.${f2}.${c2} error -t " " >> ${ERRORS_FILE}
+ fi
+ ${TESTED_SORT} -c -t " " ${KEYS} -k${f1}.${c1},${f2}.${c2} ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} -k${f1}.${c1},${f2}.${c2} -c -t " " error >> ${ERRORS_FILE}
+ fi
+ rm ${OUT_DIR}/res.${f1}.${c1}.${f2}.${c2}
+
+ if [ ${LANG} != "ru_RU.KOI8-R" ] ; then
+
+ # numeric sorting in ru_RU.KOI8-R incompatible because the thousands separator bug fixed,
+ # for better compatibility with the new GNU sort.
+ # (ru_RU.KOI8-R uses space as thousands separator)
+
+ ${TESTED_SORT} ${KEYS} -t " " -k${f1}.${c1}n,${f2}.${c2} ${INPUT_FILE} -o ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} -t " " -k${f1}.${c1}n,${f2}.${c2} crash >> ${ERRORS_FILE}
+ exit
+ fi
+ ${ORIG_SORT} ${KEYS} -t " " -k${f1}.${c1}n,${f2}.${c2} ${INPUT_FILE} -o ${OUT_DIR}/sik2
+ if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.${f1}.${c1}.${f2}.${c2} 2>&1 ; then
+ echo ${LANG} ${KEYS} -t " " -k${f1}.${c1}.${f2}.${c2} error n >> ${ERRORS_FILE}
+ fi
+ ${TESTED_SORT} -c -t " " ${KEYS} -k${f1}.${c1}n,${f2}.${c2} ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} -k${f1}.${c1},${f2}.${c2} -c -t " " n error >> ${ERRORS_FILE}
+ fi
+ rm ${OUT_DIR}/res.${f1}.${c1}.${f2}.${c2}
+
+ ${TESTED_SORT} ${KEYS} -t '\0' -k${f1}.${c1}n,${f2}.${c2} ${INPUT_FILE} -o ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} -t '\0' -k${f1}.${c1}n,${f2}.${c2} crash >> ${ERRORS_FILE}
+ exit
+ fi
+ ${ORIG_SORT} ${KEYS} -t '\0' -k${f1}.${c1}n,${f2}.${c2} ${INPUT_FILE} -o ${OUT_DIR}/sik2
+ if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.${f1}.${c1}.${f2}.${c2} 2>&1 ; then
+ echo ${LANG} ${KEYS} -k${f1}.${c1}.${f2}.${c2} error -t '\0' n >> ${ERRORS_FILE}
+ fi
+ ${TESTED_SORT} -c -t '\0' ${KEYS} -k${f1}.${c1}n,${f2}.${c2} ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} -k${f1}.${c1},${f2}.${c2} -c -t '\0' n error >> ${ERRORS_FILE}
+ fi
+ rm ${OUT_DIR}/res.${f1}.${c1}.${f2}.${c2}
+ fi
+
+ ${TESTED_SORT} ${KEYS} -t "|" -k${f1}.${c1},${f2}.${c2} ${INPUT_FILE} -o ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} -t "|" -k${f1}.${c1},${f2}.${c2} crash >> ${ERRORS_FILE}
+ exit
+ fi
+ ${ORIG_SORT} ${KEYS} -t "|" -k${f1}.${c1},${f2}.${c2} ${INPUT_FILE} -o ${OUT_DIR}/sik2
+ if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.${f1}.${c1}.${f2}.${c2} 2>&1 ; then
+ echo ${LANG} ${KEYS} -k${f1}.${c1}.${f2}.${c2} error -t "|" >> ${ERRORS_FILE}
+ fi
+ ${TESTED_SORT} -c -t "|" ${KEYS} -k${f1}.${c1},${f2}.${c2} ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} -k${f1}.${c1},${f2}.${c2} -c -t "|" error >> ${ERRORS_FILE}
+ fi
+ rm ${OUT_DIR}/res.${f1}.${c1}.${f2}.${c2}
+
+ ${TESTED_SORT} ${KEYS} -t "|" -k${f1}.${c1},${f2}.${c2} -k${f2}.${c1},${f1}.${c2} ${INPUT_FILE} -o ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} -t "|" -k${f1}.${c1},${f2}.${c2} -k${f2}.${c1},${f1}.${c2} crash >> ${ERRORS_FILE}
+ exit
+ fi
+ ${ORIG_SORT} ${KEYS} -t "|" -k${f1}.${c1},${f2}.${c2} -k${f2}.${c1},${f1}.${c2} ${INPUT_FILE} -o ${OUT_DIR}/sik2
+ if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.${f1}.${c1}.${f2}.${c2} 2>&1 ; then
+ echo ${LANG} ${KEYS} -k${f1}.${c1}.${f2}.${c2} error -t "|" 2k >> ${ERRORS_FILE}
+ fi
+ ${TESTED_SORT} -c -t "|" ${KEYS} -k${f1}.${c1},${f2}.${c2} -k${f2}.${c1},${f1}.${c2} ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} -k${f1}.${c1},${f2}.${c2} -c -t "|" 2k error >> ${ERRORS_FILE}
+ fi
+ rm ${OUT_DIR}/res.${f1}.${c1}.${f2}.${c2}
+
+ ${TESTED_SORT} ${KEYS} -k${f1}.${c1}b,${f2}.${c2} -k${f2}.${c1},${f1}.${c2} ${INPUT_FILE} -o ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} -k${f1}.${c1}b,${f2}.${c2} -k${f2}.${c1},${f1}.${c2} crash >> ${ERRORS_FILE}
+ exit
+ fi
+ ${ORIG_SORT} ${KEYS} -k${f1}.${c1}b,${f2}.${c2} -k${f2}.${c1},${f1}.${c2} ${INPUT_FILE} -o ${OUT_DIR}/sik2
+ if ! ${FILECMP} ${OUT_DIR}/sik1 ${OUT_DIR}/sik2 >${OUT_DIR}/res.${f1}.${c1}.${f2}.${c2} 2>&1 ; then
+ echo ${LANG} ${KEYS} -k${f1}.${c1}.b.${f2}.${c2} error 2k >> ${ERRORS_FILE}
+ fi
+ ${TESTED_SORT} -c ${KEYS} -k${f1}.${c1}b,${f2}.${c2} -k${f2}.${c1},${f1}.${c2} ${OUT_DIR}/sik1
+ ER=$?
+ if ! [ ${ER} -eq 0 ] ; then
+ echo ${LANG} ${KEYS} -k${f1}.${c1}b,${f2}.${c2} -c 2k error >> ${ERRORS_FILE}
+ fi
+ rm ${OUT_DIR}/res.${f1}.${c1}.${f2}.${c2}
+
+ done
+ done
+ done
+ done
+ done
+done
+
+if [ -f ${ERRORS_FILE} ] ; then
+ echo TEST FAILED
+else
+ echo TEST SUCCEEDED
+fi
diff --git a/text_cmds/sort/testsuite/sample.txt b/text_cmds/sort/testsuite/sample.txt
new file mode 100644
index 0000000..74d1477
--- /dev/null
+++ b/text_cmds/sort/testsuite/sample.txt
Binary files differ
diff --git a/text_cmds/sort/vsort.c b/text_cmds/sort/vsort.c
new file mode 100644
index 0000000..abc8647
--- /dev/null
+++ b/text_cmds/sort/vsort.c
@@ -0,0 +1,265 @@
+/*-
+ * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
+ * Copyright (C) 2012 Gabor Kovesdan <gabor@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/usr.bin/sort/vsort.c 281132 2015-04-06 02:35:55Z pfg $");
+
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "sort.h"
+#include "vsort.h"
+
+static inline bool
+isdigit_clocale(wchar_t c)
+{
+
+ return (c >= L'0' && c <= L'9');
+}
+
+static inline bool
+isalpha_clocale(wchar_t c)
+{
+
+ return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z'));
+}
+
+static inline bool
+isalnum_clocale(wchar_t c)
+{
+
+ return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z') ||
+ (c >= L'0' && c <= L'9'));
+}
+
+/*
+ * Find string suffix of format: (\.[A-Za-z~][A-Za-z0-9~]*)*$
+ * Set length of string before suffix.
+ */
+static void
+find_suffix(bwstring_iterator si, bwstring_iterator se, size_t *len)
+{
+ wchar_t c;
+ size_t clen;
+ bool expect_alpha, sfx;
+
+ sfx = false;
+ expect_alpha = false;
+ *len = 0;
+ clen = 0;
+
+ while ((si < se) && (c = bws_get_iter_value(si))) {
+ if (expect_alpha) {
+ expect_alpha = false;
+ if (!isalpha_clocale(c) && (c != L'~'))
+ sfx = false;
+ } else if (c == L'.') {
+ expect_alpha = true;
+ if (!sfx) {
+ sfx = true;
+ *len = clen;
+ }
+ } else if (!isalnum_clocale(c) && (c != L'~'))
+ sfx = false;
+
+ si = bws_iterator_inc(si, 1);
+ ++clen;
+ }
+
+ /* This code must be here to make the implementation compatible
+ * with WORDING of GNU sort documentation.
+ * But the GNU sort implementation is not following its own
+ * documentation. GNU sort allows empty file extensions
+ * (just dot with nothing after); but the regular expression in
+ * their documentation does not allow empty file extensions.
+ * We chose to make our implementation compatible with GNU sort
+ * implementation. If they will ever fix their bug, this code
+ * must be uncommented. Or they may choose to fix the info page,
+ * then the code stays commented.
+ *
+ if (expect_alpha)
+ sfx = false;
+ */
+
+ if (!sfx)
+ *len = clen;
+}
+
+static inline int
+cmp_chars(wchar_t c1, wchar_t c2)
+{
+
+ if (c1 == c2)
+ return (0);
+
+ if (c1 == L'~')
+ return (-1);
+ if (c2 == L'~')
+ return (+1);
+
+ if (isdigit_clocale(c1) || !c1)
+ return ((isdigit_clocale(c2) || !c2) ? 0 : -1);
+
+ if (isdigit_clocale(c2) || !c2)
+ return (+1);
+
+ if (isalpha_clocale(c1))
+ return ((isalpha_clocale(c2)) ? ((int) c1 - (int) c2) : -1);
+
+ if (isalpha_clocale(c2))
+ return (+1);
+
+ return ((int) c1 - (int) c2);
+}
+
+static int
+cmpversions(bwstring_iterator si1, bwstring_iterator se1,
+ bwstring_iterator si2, bwstring_iterator se2)
+{
+ int cmp, diff;
+
+ while ((si1 < se1) || (si2 < se2)) {
+ diff = 0;
+
+ while (((si1 < se1) &&
+ !isdigit_clocale(bws_get_iter_value(si1))) ||
+ ((si2 < se2) && !isdigit_clocale(bws_get_iter_value(si2)))) {
+ wchar_t c1, c2;
+
+ c1 = (si1 < se1) ? bws_get_iter_value(si1) : 0;
+ c2 = (si2 < se2) ? bws_get_iter_value(si2) : 0;
+
+ cmp = cmp_chars(c1, c2);
+ if (cmp)
+ return (cmp);
+
+ if (si1 < se1)
+ si1 = bws_iterator_inc(si1, 1);
+ if (si2 < se2)
+ si2 = bws_iterator_inc(si2, 1);
+ }
+
+ while (bws_get_iter_value(si1) == L'0')
+ si1 = bws_iterator_inc(si1, 1);
+
+ while (bws_get_iter_value(si2) == L'0')
+ si2 = bws_iterator_inc(si2, 1);
+
+ while (isdigit_clocale(bws_get_iter_value(si1)) &&
+ isdigit_clocale(bws_get_iter_value(si2))) {
+ if (!diff)
+ diff = ((int)bws_get_iter_value(si1) -
+ (int)bws_get_iter_value(si2));
+ si1 = bws_iterator_inc(si1, 1);
+ si2 = bws_iterator_inc(si2, 1);
+ }
+
+ if (isdigit_clocale(bws_get_iter_value(si1)))
+ return (1);
+
+ if (isdigit_clocale(bws_get_iter_value(si2)))
+ return (-1);
+
+ if (diff)
+ return (diff);
+ }
+
+ return (0);
+}
+
+/*
+ * Compare two version strings
+ */
+int
+vcmp(struct bwstring *s1, struct bwstring *s2)
+{
+ bwstring_iterator si1, si2;
+ wchar_t c1, c2;
+ size_t len1, len2, slen1, slen2;
+ int cmp_bytes, cmp_res;
+
+ if (s1 == s2)
+ return (0);
+
+ cmp_bytes = bwscmp(s1, s2, 0);
+ if (cmp_bytes == 0)
+ return (0);
+
+ len1 = slen1 = BWSLEN(s1);
+ len2 = slen2 = BWSLEN(s2);
+
+ if (slen1 < 1)
+ return (-1);
+ if (slen2 < 1)
+ return (+1);
+
+ si1 = bws_begin(s1);
+ si2 = bws_begin(s2);
+
+ c1 = bws_get_iter_value(si1);
+ c2 = bws_get_iter_value(si2);
+
+ if (c1 == L'.' && (slen1 == 1))
+ return (-1);
+
+ if (c2 == L'.' && (slen2 == 1))
+ return (+1);
+
+ if (slen1 == 2 && c1 == L'.' &&
+ bws_get_iter_value(bws_iterator_inc(si1, 1)) == L'.')
+ return (-1);
+ if (slen2 == 2 && c2 == L'.' &&
+ bws_get_iter_value(bws_iterator_inc(si2, 1)) == L'.')
+ return (+1);
+
+ if (c1 == L'.' && c2 != L'.')
+ return (-1);
+ if (c1 != L'.' && c2 == L'.')
+ return (+1);
+
+ if (c1 == L'.' && c2 == L'.') {
+ si1 = bws_iterator_inc(si1, 1);
+ si2 = bws_iterator_inc(si2, 1);
+ }
+
+ find_suffix(si1, bws_end(s1), &len1);
+ find_suffix(si2, bws_end(s2), &len2);
+
+ if ((len1 == len2) && (bws_iterator_cmp(si1, si2, len1) == 0))
+ return (cmp_bytes);
+
+ cmp_res = cmpversions(si1, bws_iterator_inc(si1, len1), si2,
+ bws_iterator_inc(si2, len2));
+
+ if (cmp_res == 0)
+ cmp_res = cmp_bytes;
+
+ return (cmp_res);
+}
diff --git a/text_cmds/sort/vsort.h b/text_cmds/sort/vsort.h
new file mode 100644
index 0000000..5ef45d5
--- /dev/null
+++ b/text_cmds/sort/vsort.h
@@ -0,0 +1,37 @@
+/* $FreeBSD: head/usr.bin/sort/vsort.h 264744 2014-04-21 22:52:18Z pfg $ */
+
+/*-
+ * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
+ * Copyright (C) 2012 Gabor Kovesdan <gabor@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VSORT_H_
+#define _VSORT_H_
+
+#include "bwstring.h"
+
+int vcmp(struct bwstring *s1, struct bwstring *s2);
+
+#endif