diff options
author | 2021-05-09 14:20:58 -0400 | |
---|---|---|
committer | 2021-05-09 14:20:58 -0400 | |
commit | 5fd83771641d15c418f747bd343ba6738d3875f7 (patch) | |
tree | 5abf0f78f680d9837dbd93d4d4c3933bb7509599 /text_cmds/sort/vsort.c | |
download | apple_cmds-5fd83771641d15c418f747bd343ba6738d3875f7.tar.gz apple_cmds-5fd83771641d15c418f747bd343ba6738d3875f7.tar.zst apple_cmds-5fd83771641d15c418f747bd343ba6738d3875f7.zip |
Import macOS userland
adv_cmds-176
basic_cmds-55
bootstrap_cmds-116.100.1
developer_cmds-66
diskdev_cmds-667.40.1
doc_cmds-53.60.1
file_cmds-321.40.3
mail_cmds-35
misc_cmds-34
network_cmds-606.40.1
patch_cmds-17
remote_cmds-63
shell_cmds-216.60.1
system_cmds-880.60.2
text_cmds-106
Diffstat (limited to 'text_cmds/sort/vsort.c')
-rw-r--r-- | text_cmds/sort/vsort.c | 265 |
1 files changed, 265 insertions, 0 deletions
diff --git a/text_cmds/sort/vsort.c b/text_cmds/sort/vsort.c new file mode 100644 index 0000000..abc8647 --- /dev/null +++ b/text_cmds/sort/vsort.c @@ -0,0 +1,265 @@ +/*- + * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> + * Copyright (C) 2012 Gabor Kovesdan <gabor@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD: head/usr.bin/sort/vsort.c 281132 2015-04-06 02:35:55Z pfg $"); + +#include <sys/types.h> + +#include <ctype.h> +#include <stdlib.h> +#include <string.h> + +#include "sort.h" +#include "vsort.h" + +static inline bool +isdigit_clocale(wchar_t c) +{ + + return (c >= L'0' && c <= L'9'); +} + +static inline bool +isalpha_clocale(wchar_t c) +{ + + return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z')); +} + +static inline bool +isalnum_clocale(wchar_t c) +{ + + return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z') || + (c >= L'0' && c <= L'9')); +} + +/* + * Find string suffix of format: (\.[A-Za-z~][A-Za-z0-9~]*)*$ + * Set length of string before suffix. + */ +static void +find_suffix(bwstring_iterator si, bwstring_iterator se, size_t *len) +{ + wchar_t c; + size_t clen; + bool expect_alpha, sfx; + + sfx = false; + expect_alpha = false; + *len = 0; + clen = 0; + + while ((si < se) && (c = bws_get_iter_value(si))) { + if (expect_alpha) { + expect_alpha = false; + if (!isalpha_clocale(c) && (c != L'~')) + sfx = false; + } else if (c == L'.') { + expect_alpha = true; + if (!sfx) { + sfx = true; + *len = clen; + } + } else if (!isalnum_clocale(c) && (c != L'~')) + sfx = false; + + si = bws_iterator_inc(si, 1); + ++clen; + } + + /* This code must be here to make the implementation compatible + * with WORDING of GNU sort documentation. + * But the GNU sort implementation is not following its own + * documentation. GNU sort allows empty file extensions + * (just dot with nothing after); but the regular expression in + * their documentation does not allow empty file extensions. + * We chose to make our implementation compatible with GNU sort + * implementation. If they will ever fix their bug, this code + * must be uncommented. Or they may choose to fix the info page, + * then the code stays commented. + * + if (expect_alpha) + sfx = false; + */ + + if (!sfx) + *len = clen; +} + +static inline int +cmp_chars(wchar_t c1, wchar_t c2) +{ + + if (c1 == c2) + return (0); + + if (c1 == L'~') + return (-1); + if (c2 == L'~') + return (+1); + + if (isdigit_clocale(c1) || !c1) + return ((isdigit_clocale(c2) || !c2) ? 0 : -1); + + if (isdigit_clocale(c2) || !c2) + return (+1); + + if (isalpha_clocale(c1)) + return ((isalpha_clocale(c2)) ? ((int) c1 - (int) c2) : -1); + + if (isalpha_clocale(c2)) + return (+1); + + return ((int) c1 - (int) c2); +} + +static int +cmpversions(bwstring_iterator si1, bwstring_iterator se1, + bwstring_iterator si2, bwstring_iterator se2) +{ + int cmp, diff; + + while ((si1 < se1) || (si2 < se2)) { + diff = 0; + + while (((si1 < se1) && + !isdigit_clocale(bws_get_iter_value(si1))) || + ((si2 < se2) && !isdigit_clocale(bws_get_iter_value(si2)))) { + wchar_t c1, c2; + + c1 = (si1 < se1) ? bws_get_iter_value(si1) : 0; + c2 = (si2 < se2) ? bws_get_iter_value(si2) : 0; + + cmp = cmp_chars(c1, c2); + if (cmp) + return (cmp); + + if (si1 < se1) + si1 = bws_iterator_inc(si1, 1); + if (si2 < se2) + si2 = bws_iterator_inc(si2, 1); + } + + while (bws_get_iter_value(si1) == L'0') + si1 = bws_iterator_inc(si1, 1); + + while (bws_get_iter_value(si2) == L'0') + si2 = bws_iterator_inc(si2, 1); + + while (isdigit_clocale(bws_get_iter_value(si1)) && + isdigit_clocale(bws_get_iter_value(si2))) { + if (!diff) + diff = ((int)bws_get_iter_value(si1) - + (int)bws_get_iter_value(si2)); + si1 = bws_iterator_inc(si1, 1); + si2 = bws_iterator_inc(si2, 1); + } + + if (isdigit_clocale(bws_get_iter_value(si1))) + return (1); + + if (isdigit_clocale(bws_get_iter_value(si2))) + return (-1); + + if (diff) + return (diff); + } + + return (0); +} + +/* + * Compare two version strings + */ +int +vcmp(struct bwstring *s1, struct bwstring *s2) +{ + bwstring_iterator si1, si2; + wchar_t c1, c2; + size_t len1, len2, slen1, slen2; + int cmp_bytes, cmp_res; + + if (s1 == s2) + return (0); + + cmp_bytes = bwscmp(s1, s2, 0); + if (cmp_bytes == 0) + return (0); + + len1 = slen1 = BWSLEN(s1); + len2 = slen2 = BWSLEN(s2); + + if (slen1 < 1) + return (-1); + if (slen2 < 1) + return (+1); + + si1 = bws_begin(s1); + si2 = bws_begin(s2); + + c1 = bws_get_iter_value(si1); + c2 = bws_get_iter_value(si2); + + if (c1 == L'.' && (slen1 == 1)) + return (-1); + + if (c2 == L'.' && (slen2 == 1)) + return (+1); + + if (slen1 == 2 && c1 == L'.' && + bws_get_iter_value(bws_iterator_inc(si1, 1)) == L'.') + return (-1); + if (slen2 == 2 && c2 == L'.' && + bws_get_iter_value(bws_iterator_inc(si2, 1)) == L'.') + return (+1); + + if (c1 == L'.' && c2 != L'.') + return (-1); + if (c1 != L'.' && c2 == L'.') + return (+1); + + if (c1 == L'.' && c2 == L'.') { + si1 = bws_iterator_inc(si1, 1); + si2 = bws_iterator_inc(si2, 1); + } + + find_suffix(si1, bws_end(s1), &len1); + find_suffix(si2, bws_end(s2), &len2); + + if ((len1 == len2) && (bws_iterator_cmp(si1, si2, len1) == 0)) + return (cmp_bytes); + + cmp_res = cmpversions(si1, bws_iterator_inc(si1, len1), si2, + bws_iterator_inc(si2, len2)); + + if (cmp_res == 0) + cmp_res = cmp_bytes; + + return (cmp_res); +} |