text_cmds/sort/coll.c

   1 /*-
   2  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
   3  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
   4  * All rights reserved.
   5  *
   6  * Redistribution and use in source and binary forms, with or without
   7  * modification, are permitted provided that the following conditions
   8  * are met:
   9  * 1. Redistributions of source code must retain the above copyright
  10  *    notice, this list of conditions and the following disclaimer.
  11  * 2. Redistributions in binary form must reproduce the above copyright
  12  *    notice, this list of conditions and the following disclaimer in the
  13  *    documentation and/or other materials provided with the distribution.
  14  *
  15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  25  * SUCH DAMAGE.
  26  */
  27
  28 #include <sys/cdefs.h>
  29 __FBSDID("$FreeBSD$");
  30
  31 #include <sys/types.h>
  32
  33 #include <errno.h>
  34 #include <err.h>
  35 #include <langinfo.h>
  36 #include <limits.h>
  37 #include <math.h>
  38 #ifndef __APPLE__
  39 #include <md5.h>
  40 #endif
  41 #include <stdlib.h>
  42 #include <string.h>
  43 #include <wchar.h>
  44 #include <wctype.h>
  45
  46 #include "coll.h"
  47 #include "vsort.h"
  48
  49 struct key_specs *keys;
  50 size_t keys_num = 0;
  51
  52 wint_t symbol_decimal_point = L'.';
  53 /* there is no default thousands separator in collate rules: */
  54 wint_t symbol_thousands_sep = 0;
  55 wint_t symbol_negative_sign = L'-';
  56 wint_t symbol_positive_sign = L'+';
  57
  58 static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
  59 static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
  60 static int monthcoll(struct key_value*, struct key_value *, size_t offset);
  61 static int numcoll(struct key_value*, struct key_value *, size_t offset);
  62 static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
  63 static int randomcoll(struct key_value*, struct key_value *, size_t offset);
  64 static int versioncoll(struct key_value*, struct key_value *, size_t offset);
  65
  66 /*
  67  * Allocate keys array
  68  */
  69 struct keys_array *
  70 keys_array_alloc(void)
  71 {
  72         struct keys_array *ka;
  73         size_t sz;
  74
  75         sz = keys_array_size();
  76         ka = sort_malloc(sz);
  77         memset(ka, 0, sz);
  78
  79         return (ka);
  80 }
  81
  82 /*
  83  * Calculate whether we need key hint space
  84  */
  85 static size_t
  86 key_hint_size(void)
  87 {
  88
  89         return (need_hint ? sizeof(struct key_hint) : 0);
  90 }
  91
  92 /*
  93  * Calculate keys array size
  94  */
  95 size_t
  96 keys_array_size(void)
  97 {
  98
  99         return (keys_num * (sizeof(struct key_value) + key_hint_size()));
 100 }
 101
 102 /*
 103  * Clean data of keys array
 104  */
 105 void
 106 clean_keys_array(const struct bwstring *s, struct keys_array *ka)
 107 {
 108
 109         if (ka) {
 110                 for (size_t i = 0; i < keys_num; ++i) {
 111                         const struct key_value *kv;
 112
 113                         kv = get_key_from_keys_array(ka, i);
 114                         if (kv->k && kv->k != s)
 115                                 bwsfree(kv->k);
 116                 }
 117                 memset(ka, 0, keys_array_size());
 118         }
 119 }
 120
 121 /*
 122  * Get pointer to a key value in the keys set
 123  */
 124 struct key_value *
 125 get_key_from_keys_array(struct keys_array *ka, size_t ind)
 126 {
 127
 128         return ((struct key_value *)((caddr_t)ka->key +
 129             ind * (sizeof(struct key_value) + key_hint_size())));
 130 }
 131
 132 /*
 133  * Set value of a key in the keys set
 134  */
 135 void
 136 set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
 137 {
 138
 139         if (ka && keys_num > ind) {
 140                 struct key_value *kv;
 141
 142                 kv = get_key_from_keys_array(ka, ind);
 143
 144                 if (kv->k && kv->k != s)
 145                         bwsfree(kv->k);
 146                 kv->k = s;
 147         }
 148 }
 149
 150 /*
 151  * Initialize a sort list item
 152  */
 153 struct sort_list_item *
 154 sort_list_item_alloc(void)
 155 {
 156         struct sort_list_item *si;
 157         size_t sz;
 158
 159         sz = sizeof(struct sort_list_item) + keys_array_size();
 160         si = sort_malloc(sz);
 161         memset(si, 0, sz);
 162
 163         return (si);
 164 }
 165
 166 size_t
 167 sort_list_item_size(struct sort_list_item *si)
 168 {
 169         size_t ret = 0;
 170
 171         if (si) {
 172                 ret = sizeof(struct sort_list_item) + keys_array_size();
 173                 if (si->str)
 174                         ret += bws_memsize(si->str);
 175                 for (size_t i = 0; i < keys_num; ++i) {
 176                         const struct key_value *kv;
 177
 178                         kv = get_key_from_keys_array(&si->ka, i);
 179
 180                         if (kv->k != si->str)
 181                                 ret += bws_memsize(kv->k);
 182                 }
 183         }
 184         return (ret);
 185 }
 186
 187 /*
 188  * Calculate key for a sort list item
 189  */
 190 static void
 191 sort_list_item_make_key(struct sort_list_item *si)
 192 {
 193
 194         preproc(si->str, &(si->ka));
 195 }
 196
 197 /*
 198  * Set value of a sort list item.
 199  * Return combined string and keys memory size.
 200  */
 201 void
 202 sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
 203 {
 204
 205         if (si) {
 206                 clean_keys_array(si->str, &(si->ka));
 207                 if (si->str) {
 208                         if (si->str == str) {
 209                                 /* we are trying to reset the same string */
 210                                 return;
 211                         } else {
 212                                 bwsfree(si->str);
 213                                 si->str = NULL;
 214                         }
 215                 }
 216                 si->str = str;
 217                 sort_list_item_make_key(si);
 218         }
 219 }
 220
 221 /*
 222  * De-allocate a sort list item object memory
 223  */
 224 void
 225 sort_list_item_clean(struct sort_list_item *si)
 226 {
 227
 228         if (si) {
 229                 clean_keys_array(si->str, &(si->ka));
 230                 if (si->str) {
 231                         bwsfree(si->str);
 232                         si->str = NULL;
 233                 }
 234         }
 235 }
 236
 237 /*
 238  * Skip columns according to specs
 239  */
 240 static size_t
 241 skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
 242     bool skip_blanks, bool *empty_key)
 243 {
 244         if (cols < 1)
 245                 return (BWSLEN(s) + 1);
 246
 247         if (skip_blanks)
 248                 while (start < BWSLEN(s) && iswblank_f(BWS_GET(s,start)))
 249                         ++start;
 250
 251         while (start < BWSLEN(s) && cols > 1) {
 252                 --cols;
 253                 ++start;
 254         }
 255
 256         if (start >= BWSLEN(s))
 257                 *empty_key = true;
 258
 259         return (start);
 260 }
 261
 262 /*
 263  * Skip fields according to specs
 264  */
 265 static size_t
 266 skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
 267 {
 268
 269         if (fields < 2) {
 270                 if (BWSLEN(s) == 0)
 271                         *empty_field = true;
 272                 return (0);
 273         } else if (!(sort_opts_vals.tflag)) {
 274                 size_t cpos = 0;
 275                 bool pb = true;
 276
 277                 while (cpos < BWSLEN(s)) {
 278                         bool isblank;
 279
 280                         isblank = iswblank_f(BWS_GET(s, cpos));
 281
 282                         if (isblank && !pb) {
 283                                 --fields;
 284                                 if (fields <= 1)
 285                                         return (cpos);
 286                         }
 287                         pb = isblank;
 288                         ++cpos;
 289                 }
 290                 if (fields > 1)
 291                         *empty_field = true;
 292                 return (cpos);
 293         } else {
 294                 size_t cpos = 0;
 295
 296                 while (cpos < BWSLEN(s)) {
 297                         if (BWS_GET(s,cpos) == (wchar_t)sort_opts_vals.field_sep) {
 298                                 --fields;
 299                                 if (fields <= 1)
 300                                         return (cpos + 1);
 301                         }
 302                         ++cpos;
 303                 }
 304                 if (fields > 1)
 305                         *empty_field = true;
 306                 return (cpos);
 307         }
 308 }
 309
 310 /*
 311  * Find fields start
 312  */
 313 static void
 314 find_field_start(const struct bwstring *s, struct key_specs *ks,
 315     size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
 316 {
 317
 318         *field_start = skip_fields_to_start(s, ks->f1, empty_field);
 319         if (!*empty_field)
 320                 *key_start = skip_cols_to_start(s, ks->c1, *field_start,
 321                     ks->pos1b, empty_key);
 322         else
 323                 *empty_key = true;
 324 }
 325
 326 /*
 327  * Find end key position
 328  */
 329 static size_t
 330 find_field_end(const struct bwstring *s, struct key_specs *ks)
 331 {
 332         size_t f2, next_field_start, pos_end;
 333         bool empty_field, empty_key;
 334
 335         empty_field = false;
 336         empty_key = false;
 337         f2 = ks->f2;
 338
 339         if (f2 == 0)
 340                 return (BWSLEN(s) + 1);
 341         else {
 342                 if (ks->c2 == 0) {
 343                         next_field_start = skip_fields_to_start(s, f2 + 1,
 344                             &empty_field);
 345                         if ((next_field_start > 0) && sort_opts_vals.tflag &&
 346                             ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
 347                             next_field_start - 1)))
 348                                 --next_field_start;
 349                 } else
 350                         next_field_start = skip_fields_to_start(s, f2,
 351                             &empty_field);
 352         }
 353
 354         if (empty_field || (next_field_start >= BWSLEN(s)))
 355                 return (BWSLEN(s) + 1);
 356
 357         if (ks->c2) {
 358                 pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
 359                     ks->pos2b, &empty_key);
 360                 if (pos_end < BWSLEN(s))
 361                         ++pos_end;
 362         } else
 363                 pos_end = next_field_start;
 364
 365         return (pos_end);
 366 }
 367
 368 /*
 369  * Cut a field according to the key specs
 370  */
 371 static struct bwstring *
 372 cut_field(const struct bwstring *s, struct key_specs *ks)
 373 {
 374         struct bwstring *ret = NULL;
 375
 376         if (s && ks) {
 377                 size_t field_start, key_end, key_start, sz;
 378                 bool empty_field, empty_key;
 379
 380                 field_start = 0;
 381                 key_start = 0;
 382                 empty_field = false;
 383                 empty_key = false;
 384
 385                 find_field_start(s, ks, &field_start, &key_start,
 386                     &empty_field, &empty_key);
 387
 388                 if (empty_key)
 389                         sz = 0;
 390                 else {
 391                         key_end = find_field_end(s, ks);
 392                         sz = (key_end < key_start) ? 0 : (key_end - key_start);
 393                 }
 394
 395                 ret = bwsalloc(sz);
 396                 if (sz)
 397                         bwsnocpy(ret, s, key_start, sz);
 398         } else
 399                 ret = bwsalloc(0);
 400
 401         return (ret);
 402 }
 403
 404 /*
 405  * Preprocesses a line applying the necessary transformations
 406  * specified by command line options and returns the preprocessed
 407  * string, which can be used to compare.
 408  */
 409 int
 410 preproc(struct bwstring *s, struct keys_array *ka)
 411 {
 412
 413         if (sort_opts_vals.kflag)
 414                 for (size_t i = 0; i < keys_num; i++) {
 415                         struct bwstring *key;
 416                         struct key_specs *kspecs;
 417                         struct sort_mods *sm;
 418
 419                         kspecs = &(keys[i]);
 420                         key = cut_field(s, kspecs);
 421
 422                         sm = &(kspecs->sm);
 423                         if (sm->dflag)
 424                                 key = dictionary_order(key);
 425                         else if (sm->iflag)
 426                                 key = ignore_nonprinting(key);
 427                         if (sm->fflag || sm->Mflag)
 428                                 key = ignore_case(key);
 429
 430                         set_key_on_keys_array(ka, key, i);
 431                 }
 432         else {
 433                 struct bwstring *ret = NULL;
 434                 struct sort_mods *sm = default_sort_mods;
 435
 436                 if (sm->bflag) {
 437                         if (ret == NULL)
 438                                 ret = bwsdup(s);
 439                         ret = ignore_leading_blanks(ret);
 440                 }
 441                 if (sm->dflag) {
 442                         if (ret == NULL)
 443                                 ret = bwsdup(s);
 444                         ret = dictionary_order(ret);
 445                 } else if (sm->iflag) {
 446                         if (ret == NULL)
 447                                 ret = bwsdup(s);
 448                         ret = ignore_nonprinting(ret);
 449                 }
 450                 if (sm->fflag || sm->Mflag) {
 451                         if (ret == NULL)
 452                                 ret = bwsdup(s);
 453                         ret = ignore_case(ret);
 454                 }
 455                 if (ret == NULL)
 456                         set_key_on_keys_array(ka, s, 0);
 457                 else
 458                         set_key_on_keys_array(ka, ret, 0);
 459         }
 460
 461         return 0;
 462 }
 463
 464 cmpcoll_t
 465 get_sort_func(struct sort_mods *sm)
 466 {
 467
 468         if (sm->nflag)
 469                 return (numcoll);
 470         else if (sm->hflag)
 471                 return (hnumcoll);
 472         else if (sm->gflag)
 473                 return (gnumcoll);
 474         else if (sm->Mflag)
 475                 return (monthcoll);
 476         else if (sm->Rflag)
 477                 return (randomcoll);
 478         else if (sm->Vflag)
 479                 return (versioncoll);
 480         else
 481                 return (wstrcoll);
 482 }
 483
 484 /*
 485  * Compares the given strings.  Returns a positive number if
 486  * the first precedes the second, a negative number if the second is
 487  * the preceding one, and zero if they are equal.  This function calls
 488  * the underlying collate functions, which done the actual comparison.
 489  */
 490 int
 491 key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
 492 {
 493         struct key_value *kv1, *kv2;
 494         struct sort_mods *sm;
 495         int res = 0;
 496
 497         for (size_t i = 0; i < keys_num; ++i) {
 498                 kv1 = get_key_from_keys_array(ps1, i);
 499                 kv2 = get_key_from_keys_array(ps2, i);
 500                 sm = &(keys[i].sm);
 501
 502                 if (sm->rflag)
 503                         res = sm->func(kv2, kv1, offset);
 504                 else
 505                         res = sm->func(kv1, kv2, offset);
 506
 507                 if (res)
 508                         break;
 509
 510                 /* offset applies to only the first key */
 511                 offset = 0;
 512         }
 513         return (res);
 514 }
 515
 516 /*
 517  * Compare two strings.
 518  * Plain symbol-by-symbol comparison.
 519  */
 520 int
 521 top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
 522 {
 523
 524         if (default_sort_mods->rflag) {
 525                 const struct bwstring *tmp;
 526
 527                 tmp = s1;
 528                 s1 = s2;
 529                 s2 = tmp;
 530         }
 531
 532         return (bwscoll(s1, s2, 0));
 533 }
 534
 535 /*
 536  * Compare a string and a sort list item, according to the sort specs.
 537  */
 538 int
 539 str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
 540 {
 541         struct keys_array *ka1;
 542         int ret = 0;
 543
 544         ka1 = keys_array_alloc();
 545
 546         preproc(str1, ka1);
 547
 548         sort_list_item_make_key(*ss2);
 549
 550         if (debug_sort) {
 551                 bwsprintf(stdout, str1, "; s1=<", ">");
 552                 bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
 553         }
 554
 555         ret = key_coll(ka1, &((*ss2)->ka), 0);
 556
 557         if (debug_sort)
 558                 printf("; cmp1=%d", ret);
 559
 560         clean_keys_array(str1, ka1);
 561         sort_free(ka1);
 562
 563         if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
 564                 ret = top_level_str_coll(str1, ((*ss2)->str));
 565                 if (debug_sort)
 566                         printf("; cmp2=%d", ret);
 567         }
 568
 569         if (debug_sort)
 570                 printf("\n");
 571
 572         return (ret);
 573 }
 574
 575 /*
 576  * Compare two sort list items, according to the sort specs.
 577  */
 578 int
 579 list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
 580     size_t offset)
 581 {
 582         int ret;
 583
 584         ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
 585
 586         if (debug_sort) {
 587                 if (offset)
 588                         printf("; offset=%d", (int) offset);
 589                 bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
 590                 bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
 591                 printf("; cmp1=%d\n", ret);
 592         }
 593
 594         if (ret)
 595                 return (ret);
 596
 597         if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
 598                 ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
 599                 if (debug_sort)
 600                         printf("; cmp2=%d\n", ret);
 601         }
 602
 603         return (ret);
 604 }
 605
 606 /*
 607  * Compare two sort list items, according to the sort specs.
 608  */
 609 int
 610 list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2)
 611 {
 612
 613         return (list_coll_offset(ss1, ss2, 0));
 614 }
 615
 616 #define LSCDEF(N)                                                       \
 617 static int                                                              \
 618 list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \
 619 {                                                                       \
 620                                                                         \
 621         return (list_coll_offset(ss1, ss2, N));                         \
 622 }
 623
 624 LSCDEF(1)
 625 LSCDEF(2)
 626 LSCDEF(3)
 627 LSCDEF(4)
 628 LSCDEF(5)
 629 LSCDEF(6)
 630 LSCDEF(7)
 631 LSCDEF(8)
 632 LSCDEF(9)
 633 LSCDEF(10)
 634 LSCDEF(11)
 635 LSCDEF(12)
 636 LSCDEF(13)
 637 LSCDEF(14)
 638 LSCDEF(15)
 639 LSCDEF(16)
 640 LSCDEF(17)
 641 LSCDEF(18)
 642 LSCDEF(19)
 643 LSCDEF(20)
 644
 645 listcoll_t
 646 get_list_call_func(size_t offset)
 647 {
 648         static const listcoll_t lsarray[] = { list_coll, list_coll_1,
 649             list_coll_2, list_coll_3, list_coll_4, list_coll_5,
 650             list_coll_6, list_coll_7, list_coll_8, list_coll_9,
 651             list_coll_10, list_coll_11, list_coll_12, list_coll_13,
 652             list_coll_14, list_coll_15, list_coll_16, list_coll_17,
 653             list_coll_18, list_coll_19, list_coll_20 };
 654
 655         if (offset <= 20)
 656                 return (lsarray[offset]);
 657
 658         return (list_coll);
 659 }
 660
 661 /*
 662  * Compare two sort list items, only by their original string.
 663  */
 664 int
 665 list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
 666 {
 667
 668         return (top_level_str_coll(((*ss1)->str), ((*ss2)->str)));
 669 }
 670
 671 /*
 672  * Maximum size of a number in the string (before or after decimal point)
 673  */
 674 #define MAX_NUM_SIZE (128)
 675
 676 /*
 677  * Set suffix value
 678  */
 679 static void setsuffix(wchar_t c, unsigned char *si)
 680 {
 681         switch (c){
 682         case L'k':
 683         case L'K':
 684                 *si = 1;
 685                 break;
 686         case L'M':
 687                 *si = 2;
 688                 break;
 689         case L'G':
 690                 *si = 3;
 691                 break;
 692         case L'T':
 693                 *si = 4;
 694                 break;
 695         case L'P':
 696                 *si = 5;
 697                 break;
 698         case L'E':
 699                 *si = 6;
 700                 break;
 701         case L'Z':
 702                 *si = 7;
 703                 break;
 704         case L'Y':
 705                 *si = 8;
 706                 break;
 707         default:
 708                 *si = 0;
 709         }
 710 }
 711
 712 /*
 713  * Read string s and parse the string into a fixed-decimal-point number.
 714  * sign equals -1 if the number is negative (explicit plus is not allowed,
 715  * according to GNU sort's "info sort".
 716  * The number part before decimal point is in the smain, after the decimal
 717  * point is in sfrac, tail is the pointer to the remainder of the string.
 718  */
 719 static int
 720 read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
 721 {
 722         bwstring_iterator s;
 723         bool prev_thousand_sep = false;
 724
 725         s = bws_begin(s0);
 726
 727         /* always end the fraction with zero, even if we have no fraction */
 728         sfrac[0] = 0;
 729
 730         while (iswblank_f(bws_get_iter_value(s)))
 731                 s = bws_iterator_inc(s, 1);
 732
 733         if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) {
 734                 *sign = -1;
 735                 s = bws_iterator_inc(s, 1);
 736         }
 737
 738         // This is '0', not '\0', do not change this
 739         while (iswdigit(bws_get_iter_value(s)) &&
 740             (bws_get_iter_value(s) == L'0'))
 741                 s = bws_iterator_inc(s, 1);
 742
 743         while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
 744                 if (iswdigit(bws_get_iter_value(s))) {
 745                         smain[*main_len] = bws_get_iter_value(s);
 746                         s = bws_iterator_inc(s, 1);
 747                         *main_len += 1;
 748                         prev_thousand_sep = false;
 749                 } else if (symbol_thousands_sep
 750                         && (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep)
 751                         && (!prev_thousand_sep)
 752                            ) {
 753                         s = bws_iterator_inc(s, 1);
 754                         prev_thousand_sep = true;
 755                 } else
 756                         break;
 757         }
 758
 759         smain[*main_len] = 0;
 760
 761         if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) {
 762                 s = bws_iterator_inc(s, 1);
 763                 while (iswdigit(bws_get_iter_value(s)) &&
 764                     *frac_len < MAX_NUM_SIZE) {
 765                         sfrac[*frac_len] = bws_get_iter_value(s);
 766                         s = bws_iterator_inc(s, 1);
 767                         *frac_len += 1;
 768                 }
 769                 sfrac[*frac_len] = 0;
 770
 771                 while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
 772                         --(*frac_len);
 773                         sfrac[*frac_len] = L'\0';
 774                 }
 775         }
 776
 777         setsuffix(bws_get_iter_value(s),si);
 778
 779         if ((*main_len + *frac_len) == 0)
 780                 *sign = 0;
 781
 782         return (0);
 783 }
 784
 785 /*
 786  * Implements string sort.
 787  */
 788 static int
 789 wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
 790 {
 791
 792         if (debug_sort) {
 793                 if (offset)
 794                         printf("; offset=%d\n", (int) offset);
 795                 bwsprintf(stdout, kv1->k, "; k1=<", ">");
 796                 printf("(%zu)", BWSLEN(kv1->k));
 797                 bwsprintf(stdout, kv2->k, ", k2=<", ">");
 798                 printf("(%zu)", BWSLEN(kv2->k));
 799         }
 800
 801         return (bwscoll(kv1->k, kv2->k, offset));
 802 }
 803
 804 /*
 805  * Compare two suffixes
 806  */
 807 static inline int
 808 cmpsuffix(unsigned char si1, unsigned char si2)
 809 {
 810
 811         return ((char)si1 - (char)si2);
 812 }
 813
 814 /*
 815  * Implements numeric sort for -n and -h.
 816  */
 817 static int
 818 numcoll_impl(struct key_value *kv1, struct key_value *kv2,
 819     size_t offset __unused, bool use_suffix)
 820 {
 821         struct bwstring *s1, *s2;
 822         wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
 823         wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
 824         int cmp_res, sign1, sign2;
 825         size_t frac1, frac2, main1, main2;
 826         unsigned char SI1, SI2;
 827         bool e1, e2, key1_read, key2_read;
 828
 829         s1 = kv1->k;
 830         s2 = kv2->k;
 831         sign1 = sign2 = 0;
 832         main1 = main2 = 0;
 833         frac1 = frac2 = 0;
 834
 835         key1_read = key2_read = false;
 836
 837         if (debug_sort) {
 838                 bwsprintf(stdout, s1, "; k1=<", ">");
 839                 bwsprintf(stdout, s2, ", k2=<", ">");
 840         }
 841
 842         if (s1 == s2)
 843                 return (0);
 844
 845         if (kv1->hint->status == HS_UNINITIALIZED) {
 846                 /* read the number from the string */
 847                 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
 848                 key1_read = true;
 849                 kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
 850                 if(main1 < 1 && frac1 < 1)
 851                         kv1->hint->v.nh.empty=true;
 852                 kv1->hint->v.nh.si = SI1;
 853                 kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
 854                     HS_INITIALIZED : HS_ERROR;
 855                 kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
 856         }
 857
 858         if (kv2->hint->status == HS_UNINITIALIZED) {
 859                 /* read the number from the string */
 860                 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2);
 861                 key2_read = true;
 862                 kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
 863                 if(main2 < 1 && frac2 < 1)
 864                         kv2->hint->v.nh.empty=true;
 865                 kv2->hint->v.nh.si = SI2;
 866                 kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
 867                     HS_INITIALIZED : HS_ERROR;
 868                 kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
 869         }
 870
 871         if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
 872             HS_INITIALIZED) {
 873                 unsigned long long n1, n2;
 874                 bool neg1, neg2;
 875
 876                 e1 = kv1->hint->v.nh.empty;
 877                 e2 = kv2->hint->v.nh.empty;
 878
 879                 if (e1 && e2)
 880                         return (0);
 881
 882                 neg1 = kv1->hint->v.nh.neg;
 883                 neg2 = kv2->hint->v.nh.neg;
 884
 885                 if (neg1 && !neg2)
 886                         return (-1);
 887                 if (neg2 && !neg1)
 888                         return (+1);
 889
 890                 if (e1)
 891                         return (neg2 ? +1 : -1);
 892                 else if (e2)
 893                         return (neg1 ? -1 : +1);
 894
 895
 896                 if (use_suffix) {
 897                         cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
 898                         if (cmp_res)
 899                                 return (neg1 ? -cmp_res : cmp_res);
 900                 }
 901
 902                 n1 = kv1->hint->v.nh.n1;
 903                 n2 = kv2->hint->v.nh.n1;
 904                 if (n1 < n2)
 905                         return (neg1 ? +1 : -1);
 906                 else if (n1 > n2)
 907                         return (neg1 ? -1 : +1);
 908         }
 909
 910         /* read the numbers from the strings */
 911         if (!key1_read)
 912                 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
 913         if (!key2_read)
 914                 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
 915
 916         e1 = ((main1 + frac1) == 0);
 917         e2 = ((main2 + frac2) == 0);
 918
 919         if (e1 && e2)
 920                 return (0);
 921
 922         /* we know the result if the signs are different */
 923         if (sign1 < 0 && sign2 >= 0)
 924                 return (-1);
 925         if (sign1 >= 0 && sign2 < 0)
 926                 return (+1);
 927
 928         if (e1)
 929                 return ((sign2 < 0) ? +1 : -1);
 930         else if (e2)
 931                 return ((sign1 < 0) ? -1 : +1);
 932
 933         if (use_suffix) {
 934                 cmp_res = cmpsuffix(SI1, SI2);
 935                 if (cmp_res)
 936                         return ((sign1 < 0) ? -cmp_res : cmp_res);
 937         }
 938
 939         /* if both numbers are empty assume that the strings are equal */
 940         if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
 941                 return (0);
 942
 943         /*
 944          * if the main part is of different size, we know the result
 945          * (because the leading zeros are removed)
 946          */
 947         if (main1 < main2)
 948                 cmp_res = -1;
 949         else if (main1 > main2)
 950                 cmp_res = +1;
 951         /* if the sizes are equal then simple non-collate string compare gives the correct result */
 952         else
 953                 cmp_res = wcscmp(smain1, smain2);
 954
 955         /* check fraction */
 956         if (!cmp_res)
 957                 cmp_res = wcscmp(sfrac1, sfrac2);
 958
 959         if (!cmp_res)
 960                 return (0);
 961
 962         /* reverse result if the signs are negative */
 963         if (sign1 < 0 && sign2 < 0)
 964                 cmp_res = -cmp_res;
 965
 966         return (cmp_res);
 967 }
 968
 969 /*
 970  * Implements numeric sort (-n).
 971  */
 972 static int
 973 numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
 974 {
 975
 976         return (numcoll_impl(kv1, kv2, offset, false));
 977 }
 978
 979 /*
 980  * Implements 'human' numeric sort (-h).
 981  */
 982 static int
 983 hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
 984 {
 985
 986         return (numcoll_impl(kv1, kv2, offset, true));
 987 }
 988
 989 /*
 990  * Implements random sort (-R).
 991  */
 992 static int
 993 randomcoll(struct key_value *kv1, struct key_value *kv2,
 994     size_t offset __unused)
 995 {
 996         struct bwstring *s1, *s2;
 997         MD5_CTX ctx1, ctx2;
 998         char *b1, *b2;
 999
1000         s1 = kv1->k;
1001         s2 = kv2->k;
1002
1003         if (debug_sort) {
1004                 bwsprintf(stdout, s1, "; k1=<", ">");
1005                 bwsprintf(stdout, s2, ", k2=<", ">");
1006         }
1007
1008         if (s1 == s2)
1009                 return (0);
1010
1011         memcpy(&ctx1,&md5_ctx,sizeof(MD5_CTX));
1012         memcpy(&ctx2,&md5_ctx,sizeof(MD5_CTX));
1013
1014         MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
1015         MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
1016         b1 = MD5End(&ctx1, NULL);
1017         b2 = MD5End(&ctx2, NULL);
1018         if (b1 == NULL) {
1019                 if (b2 == NULL)
1020                         return (0);
1021                 else {
1022                         sort_free(b2);
1023                         return (-1);
1024                 }
1025         } else if (b2 == NULL) {
1026                 sort_free(b1);
1027                 return (+1);
1028         } else {
1029                 int cmp_res;
1030
1031                 cmp_res = strcmp(b1,b2);
1032                 sort_free(b1);
1033                 sort_free(b2);
1034
1035                 if (!cmp_res)
1036                         cmp_res = bwscoll(s1, s2, 0);
1037
1038                 return (cmp_res);
1039         }
1040 }
1041
1042 /*
1043  * Implements version sort (-V).
1044  */
1045 static int
1046 versioncoll(struct key_value *kv1, struct key_value *kv2,
1047     size_t offset __unused)
1048 {
1049         struct bwstring *s1, *s2;
1050
1051         s1 = kv1->k;
1052         s2 = kv2->k;
1053
1054         if (debug_sort) {
1055                 bwsprintf(stdout, s1, "; k1=<", ">");
1056                 bwsprintf(stdout, s2, ", k2=<", ">");
1057         }
1058
1059         if (s1 == s2)
1060                 return (0);
1061
1062         return (vcmp(s1, s2));
1063 }
1064
1065 /*
1066  * Check for minus infinity
1067  */
1068 static inline bool
1069 huge_minus(double d, int err1)
1070 {
1071
1072         if (err1 == ERANGE)
1073                 if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
1074                         return (+1);
1075
1076         return (0);
1077 }
1078
1079 /*
1080  * Check for plus infinity
1081  */
1082 static inline bool
1083 huge_plus(double d, int err1)
1084 {
1085
1086         if (err1 == ERANGE)
1087                 if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
1088                         return (+1);
1089
1090         return (0);
1091 }
1092
1093 /*
1094  * Check whether a function is a NAN
1095  */
1096 static bool
1097 is_nan(double d)
1098 {
1099
1100         return ((d == NAN) || (isnan(d)));
1101 }
1102
1103 /*
1104  * Compare two NANs
1105  */
1106 static int
1107 cmp_nans(double d1, double d2)
1108 {
1109
1110         if (d1 < d2)
1111                 return (-1);
1112         if (d1 > d2)
1113                 return (+1);
1114         return (0);
1115 }
1116
1117 /*
1118  * Implements general numeric sort (-g).
1119  */
1120 static int
1121 gnumcoll(struct key_value *kv1, struct key_value *kv2,
1122     size_t offset __unused)
1123 {
1124         double d1, d2;
1125         int err1, err2;
1126         bool empty1, empty2, key1_read, key2_read;
1127
1128         d1 = d2 = 0;
1129         err1 = err2 = 0;
1130         key1_read = key2_read = false;
1131
1132         if (debug_sort) {
1133                 bwsprintf(stdout, kv1->k, "; k1=<", ">");
1134                 bwsprintf(stdout, kv2->k, "; k2=<", ">");
1135         }
1136
1137         if (kv1->hint->status == HS_UNINITIALIZED) {
1138                 errno = 0;
1139                 d1 = bwstod(kv1->k, &empty1);
1140                 err1 = errno;
1141
1142                 if (empty1)
1143                         kv1->hint->v.gh.notnum = true;
1144                 else if (err1 == 0) {
1145                         kv1->hint->v.gh.d = d1;
1146                         kv1->hint->v.gh.nan = is_nan(d1);
1147                         kv1->hint->status = HS_INITIALIZED;
1148                 } else
1149                         kv1->hint->status = HS_ERROR;
1150
1151                 key1_read = true;
1152         }
1153
1154         if (kv2->hint->status == HS_UNINITIALIZED) {
1155                 errno = 0;
1156                 d2 = bwstod(kv2->k, &empty2);
1157                 err2 = errno;
1158
1159                 if (empty2)
1160                         kv2->hint->v.gh.notnum = true;
1161                 else if (err2 == 0) {
1162                         kv2->hint->v.gh.d = d2;
1163                         kv2->hint->v.gh.nan = is_nan(d2);
1164                         kv2->hint->status = HS_INITIALIZED;
1165                 } else
1166                         kv2->hint->status = HS_ERROR;
1167
1168                 key2_read = true;
1169         }
1170
1171         if (kv1->hint->status == HS_INITIALIZED &&
1172             kv2->hint->status == HS_INITIALIZED) {
1173                 if (kv1->hint->v.gh.notnum)
1174                         return ((kv2->hint->v.gh.notnum) ? 0 : -1);
1175                 else if (kv2->hint->v.gh.notnum)
1176                         return (+1);
1177
1178                 if (kv1->hint->v.gh.nan)
1179                         return ((kv2->hint->v.gh.nan) ?
1180                             cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) :
1181                             -1);
1182                 else if (kv2->hint->v.gh.nan)
1183                         return (+1);
1184
1185                 d1 = kv1->hint->v.gh.d;
1186                 d2 = kv2->hint->v.gh.d;
1187
1188                 if (d1 < d2)
1189                         return (-1);
1190                 else if (d1 > d2)
1191                         return (+1);
1192                 else
1193                         return (0);
1194         }
1195
1196         if (!key1_read) {
1197                 errno = 0;
1198                 d1 = bwstod(kv1->k, &empty1);
1199                 err1 = errno;
1200         }
1201
1202         if (!key2_read) {
1203                 errno = 0;
1204                 d2 = bwstod(kv2->k, &empty2);
1205                 err2 = errno;
1206         }
1207
1208         /* Non-value case: */
1209         if (empty1)
1210                 return (empty2 ? 0 : -1);
1211         else if (empty2)
1212                 return (+1);
1213
1214         /* NAN case */
1215         if (is_nan(d1))
1216                 return (is_nan(d2) ? cmp_nans(d1, d2) : -1);
1217         else if (is_nan(d2))
1218                 return (+1);
1219
1220         /* Infinities */
1221         if (err1 == ERANGE || err2 == ERANGE) {
1222                 /* Minus infinity case */
1223                 if (huge_minus(d1, err1)) {
1224                         if (huge_minus(d2, err2)) {
1225                                 if (d1 < d2)
1226                                         return (-1);
1227                                 if (d1 > d2)
1228                                         return (+1);
1229                                 return (0);
1230                         } else
1231                                 return (-1);
1232
1233                 } else if (huge_minus(d2, err2)) {
1234                         if (huge_minus(d1, err1)) {
1235                                 if (d1 < d2)
1236                                         return (-1);
1237                                 if (d1 > d2)
1238                                         return (+1);
1239                                 return (0);
1240                         } else
1241                                 return (+1);
1242                 }
1243
1244                 /* Plus infinity case */
1245                 if (huge_plus(d1, err1)) {
1246                         if (huge_plus(d2, err2)) {
1247                                 if (d1 < d2)
1248                                         return (-1);
1249                                 if (d1 > d2)
1250                                         return (+1);
1251                                 return (0);
1252                         } else
1253                                 return (+1);
1254                 } else if (huge_plus(d2, err2)) {
1255                         if (huge_plus(d1, err1)) {
1256                                 if (d1 < d2)
1257                                         return (-1);
1258                                 if (d1 > d2)
1259                                         return (+1);
1260                                 return (0);
1261                         } else
1262                                 return (-1);
1263                 }
1264         }
1265
1266         if (d1 < d2)
1267                 return (-1);
1268         if (d1 > d2)
1269                 return (+1);
1270
1271         return (0);
1272 }
1273
1274 /*
1275  * Implements month sort (-M).
1276  */
1277 static int
1278 monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
1279 {
1280         int val1, val2;
1281         bool key1_read, key2_read;
1282
1283         val1 = val2 = 0;
1284         key1_read = key2_read = false;
1285
1286         if (debug_sort) {
1287                 bwsprintf(stdout, kv1->k, "; k1=<", ">");
1288                 bwsprintf(stdout, kv2->k, "; k2=<", ">");
1289         }
1290
1291         if (kv1->hint->status == HS_UNINITIALIZED) {
1292                 kv1->hint->v.Mh.m = bws_month_score(kv1->k);
1293                 key1_read = true;
1294                 kv1->hint->status = HS_INITIALIZED;
1295         }
1296
1297         if (kv2->hint->status == HS_UNINITIALIZED) {
1298                 kv2->hint->v.Mh.m = bws_month_score(kv2->k);
1299                 key2_read = true;
1300                 kv2->hint->status = HS_INITIALIZED;
1301         }
1302
1303         if (kv1->hint->status == HS_INITIALIZED) {
1304                 val1 = kv1->hint->v.Mh.m;
1305                 key1_read = true;
1306         }
1307
1308         if (kv2->hint->status == HS_INITIALIZED) {
1309                 val2 = kv2->hint->v.Mh.m;
1310                 key2_read = true;
1311         }
1312
1313         if (!key1_read)
1314                 val1 = bws_month_score(kv1->k);
1315         if (!key2_read)
1316                 val2 = bws_month_score(kv2->k);
1317
1318         if (val1 == val2) {
1319                 return (0);
1320         }
1321         if (val1 < val2)
1322                 return (-1);
1323         return (+1);
1324 }