]> git.cameronkatri.com Git - apple_cmds.git/blob - text_cmds/sort/coll.c
file_cmds: Fix install and COLORLS
[apple_cmds.git] / text_cmds / sort / coll.c
1 /*-
2 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
3 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30
31 #include <sys/types.h>
32
33 #include <errno.h>
34 #include <err.h>
35 #include <langinfo.h>
36 #include <limits.h>
37 #include <math.h>
38 #ifndef __APPLE__
39 #include <md5.h>
40 #endif
41 #include <stdlib.h>
42 #include <string.h>
43 #include <wchar.h>
44 #include <wctype.h>
45
46 #include "coll.h"
47 #include "vsort.h"
48
49 struct key_specs *keys;
50 size_t keys_num = 0;
51
52 wint_t symbol_decimal_point = L'.';
53 /* there is no default thousands separator in collate rules: */
54 wint_t symbol_thousands_sep = 0;
55 wint_t symbol_negative_sign = L'-';
56 wint_t symbol_positive_sign = L'+';
57
58 static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
59 static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
60 static int monthcoll(struct key_value*, struct key_value *, size_t offset);
61 static int numcoll(struct key_value*, struct key_value *, size_t offset);
62 static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
63 static int randomcoll(struct key_value*, struct key_value *, size_t offset);
64 static int versioncoll(struct key_value*, struct key_value *, size_t offset);
65
66 /*
67 * Allocate keys array
68 */
69 struct keys_array *
70 keys_array_alloc(void)
71 {
72 struct keys_array *ka;
73 size_t sz;
74
75 sz = keys_array_size();
76 ka = sort_malloc(sz);
77 memset(ka, 0, sz);
78
79 return (ka);
80 }
81
82 /*
83 * Calculate whether we need key hint space
84 */
85 static size_t
86 key_hint_size(void)
87 {
88
89 return (need_hint ? sizeof(struct key_hint) : 0);
90 }
91
92 /*
93 * Calculate keys array size
94 */
95 size_t
96 keys_array_size(void)
97 {
98
99 return (keys_num * (sizeof(struct key_value) + key_hint_size()));
100 }
101
102 /*
103 * Clean data of keys array
104 */
105 void
106 clean_keys_array(const struct bwstring *s, struct keys_array *ka)
107 {
108
109 if (ka) {
110 for (size_t i = 0; i < keys_num; ++i) {
111 const struct key_value *kv;
112
113 kv = get_key_from_keys_array(ka, i);
114 if (kv->k && kv->k != s)
115 bwsfree(kv->k);
116 }
117 memset(ka, 0, keys_array_size());
118 }
119 }
120
121 /*
122 * Get pointer to a key value in the keys set
123 */
124 struct key_value *
125 get_key_from_keys_array(struct keys_array *ka, size_t ind)
126 {
127
128 return ((struct key_value *)((caddr_t)ka->key +
129 ind * (sizeof(struct key_value) + key_hint_size())));
130 }
131
132 /*
133 * Set value of a key in the keys set
134 */
135 void
136 set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
137 {
138
139 if (ka && keys_num > ind) {
140 struct key_value *kv;
141
142 kv = get_key_from_keys_array(ka, ind);
143
144 if (kv->k && kv->k != s)
145 bwsfree(kv->k);
146 kv->k = s;
147 }
148 }
149
150 /*
151 * Initialize a sort list item
152 */
153 struct sort_list_item *
154 sort_list_item_alloc(void)
155 {
156 struct sort_list_item *si;
157 size_t sz;
158
159 sz = sizeof(struct sort_list_item) + keys_array_size();
160 si = sort_malloc(sz);
161 memset(si, 0, sz);
162
163 return (si);
164 }
165
166 size_t
167 sort_list_item_size(struct sort_list_item *si)
168 {
169 size_t ret = 0;
170
171 if (si) {
172 ret = sizeof(struct sort_list_item) + keys_array_size();
173 if (si->str)
174 ret += bws_memsize(si->str);
175 for (size_t i = 0; i < keys_num; ++i) {
176 const struct key_value *kv;
177
178 kv = get_key_from_keys_array(&si->ka, i);
179
180 if (kv->k != si->str)
181 ret += bws_memsize(kv->k);
182 }
183 }
184 return (ret);
185 }
186
187 /*
188 * Calculate key for a sort list item
189 */
190 static void
191 sort_list_item_make_key(struct sort_list_item *si)
192 {
193
194 preproc(si->str, &(si->ka));
195 }
196
197 /*
198 * Set value of a sort list item.
199 * Return combined string and keys memory size.
200 */
201 void
202 sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
203 {
204
205 if (si) {
206 clean_keys_array(si->str, &(si->ka));
207 if (si->str) {
208 if (si->str == str) {
209 /* we are trying to reset the same string */
210 return;
211 } else {
212 bwsfree(si->str);
213 si->str = NULL;
214 }
215 }
216 si->str = str;
217 sort_list_item_make_key(si);
218 }
219 }
220
221 /*
222 * De-allocate a sort list item object memory
223 */
224 void
225 sort_list_item_clean(struct sort_list_item *si)
226 {
227
228 if (si) {
229 clean_keys_array(si->str, &(si->ka));
230 if (si->str) {
231 bwsfree(si->str);
232 si->str = NULL;
233 }
234 }
235 }
236
237 /*
238 * Skip columns according to specs
239 */
240 static size_t
241 skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
242 bool skip_blanks, bool *empty_key)
243 {
244 if (cols < 1)
245 return (BWSLEN(s) + 1);
246
247 if (skip_blanks)
248 while (start < BWSLEN(s) && iswblank_f(BWS_GET(s,start)))
249 ++start;
250
251 while (start < BWSLEN(s) && cols > 1) {
252 --cols;
253 ++start;
254 }
255
256 if (start >= BWSLEN(s))
257 *empty_key = true;
258
259 return (start);
260 }
261
262 /*
263 * Skip fields according to specs
264 */
265 static size_t
266 skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
267 {
268
269 if (fields < 2) {
270 if (BWSLEN(s) == 0)
271 *empty_field = true;
272 return (0);
273 } else if (!(sort_opts_vals.tflag)) {
274 size_t cpos = 0;
275 bool pb = true;
276
277 while (cpos < BWSLEN(s)) {
278 bool isblank;
279
280 isblank = iswblank_f(BWS_GET(s, cpos));
281
282 if (isblank && !pb) {
283 --fields;
284 if (fields <= 1)
285 return (cpos);
286 }
287 pb = isblank;
288 ++cpos;
289 }
290 if (fields > 1)
291 *empty_field = true;
292 return (cpos);
293 } else {
294 size_t cpos = 0;
295
296 while (cpos < BWSLEN(s)) {
297 if (BWS_GET(s,cpos) == (wchar_t)sort_opts_vals.field_sep) {
298 --fields;
299 if (fields <= 1)
300 return (cpos + 1);
301 }
302 ++cpos;
303 }
304 if (fields > 1)
305 *empty_field = true;
306 return (cpos);
307 }
308 }
309
310 /*
311 * Find fields start
312 */
313 static void
314 find_field_start(const struct bwstring *s, struct key_specs *ks,
315 size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
316 {
317
318 *field_start = skip_fields_to_start(s, ks->f1, empty_field);
319 if (!*empty_field)
320 *key_start = skip_cols_to_start(s, ks->c1, *field_start,
321 ks->pos1b, empty_key);
322 else
323 *empty_key = true;
324 }
325
326 /*
327 * Find end key position
328 */
329 static size_t
330 find_field_end(const struct bwstring *s, struct key_specs *ks)
331 {
332 size_t f2, next_field_start, pos_end;
333 bool empty_field, empty_key;
334
335 empty_field = false;
336 empty_key = false;
337 f2 = ks->f2;
338
339 if (f2 == 0)
340 return (BWSLEN(s) + 1);
341 else {
342 if (ks->c2 == 0) {
343 next_field_start = skip_fields_to_start(s, f2 + 1,
344 &empty_field);
345 if ((next_field_start > 0) && sort_opts_vals.tflag &&
346 ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
347 next_field_start - 1)))
348 --next_field_start;
349 } else
350 next_field_start = skip_fields_to_start(s, f2,
351 &empty_field);
352 }
353
354 if (empty_field || (next_field_start >= BWSLEN(s)))
355 return (BWSLEN(s) + 1);
356
357 if (ks->c2) {
358 pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
359 ks->pos2b, &empty_key);
360 if (pos_end < BWSLEN(s))
361 ++pos_end;
362 } else
363 pos_end = next_field_start;
364
365 return (pos_end);
366 }
367
368 /*
369 * Cut a field according to the key specs
370 */
371 static struct bwstring *
372 cut_field(const struct bwstring *s, struct key_specs *ks)
373 {
374 struct bwstring *ret = NULL;
375
376 if (s && ks) {
377 size_t field_start, key_end, key_start, sz;
378 bool empty_field, empty_key;
379
380 field_start = 0;
381 key_start = 0;
382 empty_field = false;
383 empty_key = false;
384
385 find_field_start(s, ks, &field_start, &key_start,
386 &empty_field, &empty_key);
387
388 if (empty_key)
389 sz = 0;
390 else {
391 key_end = find_field_end(s, ks);
392 sz = (key_end < key_start) ? 0 : (key_end - key_start);
393 }
394
395 ret = bwsalloc(sz);
396 if (sz)
397 bwsnocpy(ret, s, key_start, sz);
398 } else
399 ret = bwsalloc(0);
400
401 return (ret);
402 }
403
404 /*
405 * Preprocesses a line applying the necessary transformations
406 * specified by command line options and returns the preprocessed
407 * string, which can be used to compare.
408 */
409 int
410 preproc(struct bwstring *s, struct keys_array *ka)
411 {
412
413 if (sort_opts_vals.kflag)
414 for (size_t i = 0; i < keys_num; i++) {
415 struct bwstring *key;
416 struct key_specs *kspecs;
417 struct sort_mods *sm;
418
419 kspecs = &(keys[i]);
420 key = cut_field(s, kspecs);
421
422 sm = &(kspecs->sm);
423 if (sm->dflag)
424 key = dictionary_order(key);
425 else if (sm->iflag)
426 key = ignore_nonprinting(key);
427 if (sm->fflag || sm->Mflag)
428 key = ignore_case(key);
429
430 set_key_on_keys_array(ka, key, i);
431 }
432 else {
433 struct bwstring *ret = NULL;
434 struct sort_mods *sm = default_sort_mods;
435
436 if (sm->bflag) {
437 if (ret == NULL)
438 ret = bwsdup(s);
439 ret = ignore_leading_blanks(ret);
440 }
441 if (sm->dflag) {
442 if (ret == NULL)
443 ret = bwsdup(s);
444 ret = dictionary_order(ret);
445 } else if (sm->iflag) {
446 if (ret == NULL)
447 ret = bwsdup(s);
448 ret = ignore_nonprinting(ret);
449 }
450 if (sm->fflag || sm->Mflag) {
451 if (ret == NULL)
452 ret = bwsdup(s);
453 ret = ignore_case(ret);
454 }
455 if (ret == NULL)
456 set_key_on_keys_array(ka, s, 0);
457 else
458 set_key_on_keys_array(ka, ret, 0);
459 }
460
461 return 0;
462 }
463
464 cmpcoll_t
465 get_sort_func(struct sort_mods *sm)
466 {
467
468 if (sm->nflag)
469 return (numcoll);
470 else if (sm->hflag)
471 return (hnumcoll);
472 else if (sm->gflag)
473 return (gnumcoll);
474 else if (sm->Mflag)
475 return (monthcoll);
476 else if (sm->Rflag)
477 return (randomcoll);
478 else if (sm->Vflag)
479 return (versioncoll);
480 else
481 return (wstrcoll);
482 }
483
484 /*
485 * Compares the given strings. Returns a positive number if
486 * the first precedes the second, a negative number if the second is
487 * the preceding one, and zero if they are equal. This function calls
488 * the underlying collate functions, which done the actual comparison.
489 */
490 int
491 key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
492 {
493 struct key_value *kv1, *kv2;
494 struct sort_mods *sm;
495 int res = 0;
496
497 for (size_t i = 0; i < keys_num; ++i) {
498 kv1 = get_key_from_keys_array(ps1, i);
499 kv2 = get_key_from_keys_array(ps2, i);
500 sm = &(keys[i].sm);
501
502 if (sm->rflag)
503 res = sm->func(kv2, kv1, offset);
504 else
505 res = sm->func(kv1, kv2, offset);
506
507 if (res)
508 break;
509
510 /* offset applies to only the first key */
511 offset = 0;
512 }
513 return (res);
514 }
515
516 /*
517 * Compare two strings.
518 * Plain symbol-by-symbol comparison.
519 */
520 int
521 top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
522 {
523
524 if (default_sort_mods->rflag) {
525 const struct bwstring *tmp;
526
527 tmp = s1;
528 s1 = s2;
529 s2 = tmp;
530 }
531
532 return (bwscoll(s1, s2, 0));
533 }
534
535 /*
536 * Compare a string and a sort list item, according to the sort specs.
537 */
538 int
539 str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
540 {
541 struct keys_array *ka1;
542 int ret = 0;
543
544 ka1 = keys_array_alloc();
545
546 preproc(str1, ka1);
547
548 sort_list_item_make_key(*ss2);
549
550 if (debug_sort) {
551 bwsprintf(stdout, str1, "; s1=<", ">");
552 bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
553 }
554
555 ret = key_coll(ka1, &((*ss2)->ka), 0);
556
557 if (debug_sort)
558 printf("; cmp1=%d", ret);
559
560 clean_keys_array(str1, ka1);
561 sort_free(ka1);
562
563 if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
564 ret = top_level_str_coll(str1, ((*ss2)->str));
565 if (debug_sort)
566 printf("; cmp2=%d", ret);
567 }
568
569 if (debug_sort)
570 printf("\n");
571
572 return (ret);
573 }
574
575 /*
576 * Compare two sort list items, according to the sort specs.
577 */
578 int
579 list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
580 size_t offset)
581 {
582 int ret;
583
584 ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
585
586 if (debug_sort) {
587 if (offset)
588 printf("; offset=%d", (int) offset);
589 bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
590 bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
591 printf("; cmp1=%d\n", ret);
592 }
593
594 if (ret)
595 return (ret);
596
597 if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
598 ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
599 if (debug_sort)
600 printf("; cmp2=%d\n", ret);
601 }
602
603 return (ret);
604 }
605
606 /*
607 * Compare two sort list items, according to the sort specs.
608 */
609 int
610 list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2)
611 {
612
613 return (list_coll_offset(ss1, ss2, 0));
614 }
615
616 #define LSCDEF(N) \
617 static int \
618 list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \
619 { \
620 \
621 return (list_coll_offset(ss1, ss2, N)); \
622 }
623
624 LSCDEF(1)
625 LSCDEF(2)
626 LSCDEF(3)
627 LSCDEF(4)
628 LSCDEF(5)
629 LSCDEF(6)
630 LSCDEF(7)
631 LSCDEF(8)
632 LSCDEF(9)
633 LSCDEF(10)
634 LSCDEF(11)
635 LSCDEF(12)
636 LSCDEF(13)
637 LSCDEF(14)
638 LSCDEF(15)
639 LSCDEF(16)
640 LSCDEF(17)
641 LSCDEF(18)
642 LSCDEF(19)
643 LSCDEF(20)
644
645 listcoll_t
646 get_list_call_func(size_t offset)
647 {
648 static const listcoll_t lsarray[] = { list_coll, list_coll_1,
649 list_coll_2, list_coll_3, list_coll_4, list_coll_5,
650 list_coll_6, list_coll_7, list_coll_8, list_coll_9,
651 list_coll_10, list_coll_11, list_coll_12, list_coll_13,
652 list_coll_14, list_coll_15, list_coll_16, list_coll_17,
653 list_coll_18, list_coll_19, list_coll_20 };
654
655 if (offset <= 20)
656 return (lsarray[offset]);
657
658 return (list_coll);
659 }
660
661 /*
662 * Compare two sort list items, only by their original string.
663 */
664 int
665 list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
666 {
667
668 return (top_level_str_coll(((*ss1)->str), ((*ss2)->str)));
669 }
670
671 /*
672 * Maximum size of a number in the string (before or after decimal point)
673 */
674 #define MAX_NUM_SIZE (128)
675
676 /*
677 * Set suffix value
678 */
679 static void setsuffix(wchar_t c, unsigned char *si)
680 {
681 switch (c){
682 case L'k':
683 case L'K':
684 *si = 1;
685 break;
686 case L'M':
687 *si = 2;
688 break;
689 case L'G':
690 *si = 3;
691 break;
692 case L'T':
693 *si = 4;
694 break;
695 case L'P':
696 *si = 5;
697 break;
698 case L'E':
699 *si = 6;
700 break;
701 case L'Z':
702 *si = 7;
703 break;
704 case L'Y':
705 *si = 8;
706 break;
707 default:
708 *si = 0;
709 }
710 }
711
712 /*
713 * Read string s and parse the string into a fixed-decimal-point number.
714 * sign equals -1 if the number is negative (explicit plus is not allowed,
715 * according to GNU sort's "info sort".
716 * The number part before decimal point is in the smain, after the decimal
717 * point is in sfrac, tail is the pointer to the remainder of the string.
718 */
719 static int
720 read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
721 {
722 bwstring_iterator s;
723 bool prev_thousand_sep = false;
724
725 s = bws_begin(s0);
726
727 /* always end the fraction with zero, even if we have no fraction */
728 sfrac[0] = 0;
729
730 while (iswblank_f(bws_get_iter_value(s)))
731 s = bws_iterator_inc(s, 1);
732
733 if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) {
734 *sign = -1;
735 s = bws_iterator_inc(s, 1);
736 }
737
738 // This is '0', not '\0', do not change this
739 while (iswdigit(bws_get_iter_value(s)) &&
740 (bws_get_iter_value(s) == L'0'))
741 s = bws_iterator_inc(s, 1);
742
743 while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
744 if (iswdigit(bws_get_iter_value(s))) {
745 smain[*main_len] = bws_get_iter_value(s);
746 s = bws_iterator_inc(s, 1);
747 *main_len += 1;
748 prev_thousand_sep = false;
749 } else if (symbol_thousands_sep
750 && (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep)
751 && (!prev_thousand_sep)
752 ) {
753 s = bws_iterator_inc(s, 1);
754 prev_thousand_sep = true;
755 } else
756 break;
757 }
758
759 smain[*main_len] = 0;
760
761 if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) {
762 s = bws_iterator_inc(s, 1);
763 while (iswdigit(bws_get_iter_value(s)) &&
764 *frac_len < MAX_NUM_SIZE) {
765 sfrac[*frac_len] = bws_get_iter_value(s);
766 s = bws_iterator_inc(s, 1);
767 *frac_len += 1;
768 }
769 sfrac[*frac_len] = 0;
770
771 while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
772 --(*frac_len);
773 sfrac[*frac_len] = L'\0';
774 }
775 }
776
777 setsuffix(bws_get_iter_value(s),si);
778
779 if ((*main_len + *frac_len) == 0)
780 *sign = 0;
781
782 return (0);
783 }
784
785 /*
786 * Implements string sort.
787 */
788 static int
789 wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
790 {
791
792 if (debug_sort) {
793 if (offset)
794 printf("; offset=%d\n", (int) offset);
795 bwsprintf(stdout, kv1->k, "; k1=<", ">");
796 printf("(%zu)", BWSLEN(kv1->k));
797 bwsprintf(stdout, kv2->k, ", k2=<", ">");
798 printf("(%zu)", BWSLEN(kv2->k));
799 }
800
801 return (bwscoll(kv1->k, kv2->k, offset));
802 }
803
804 /*
805 * Compare two suffixes
806 */
807 static inline int
808 cmpsuffix(unsigned char si1, unsigned char si2)
809 {
810
811 return ((char)si1 - (char)si2);
812 }
813
814 /*
815 * Implements numeric sort for -n and -h.
816 */
817 static int
818 numcoll_impl(struct key_value *kv1, struct key_value *kv2,
819 size_t offset __unused, bool use_suffix)
820 {
821 struct bwstring *s1, *s2;
822 wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
823 wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
824 int cmp_res, sign1, sign2;
825 size_t frac1, frac2, main1, main2;
826 unsigned char SI1, SI2;
827 bool e1, e2, key1_read, key2_read;
828
829 s1 = kv1->k;
830 s2 = kv2->k;
831 sign1 = sign2 = 0;
832 main1 = main2 = 0;
833 frac1 = frac2 = 0;
834
835 key1_read = key2_read = false;
836
837 if (debug_sort) {
838 bwsprintf(stdout, s1, "; k1=<", ">");
839 bwsprintf(stdout, s2, ", k2=<", ">");
840 }
841
842 if (s1 == s2)
843 return (0);
844
845 if (kv1->hint->status == HS_UNINITIALIZED) {
846 /* read the number from the string */
847 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
848 key1_read = true;
849 kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
850 if(main1 < 1 && frac1 < 1)
851 kv1->hint->v.nh.empty=true;
852 kv1->hint->v.nh.si = SI1;
853 kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
854 HS_INITIALIZED : HS_ERROR;
855 kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
856 }
857
858 if (kv2->hint->status == HS_UNINITIALIZED) {
859 /* read the number from the string */
860 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2);
861 key2_read = true;
862 kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
863 if(main2 < 1 && frac2 < 1)
864 kv2->hint->v.nh.empty=true;
865 kv2->hint->v.nh.si = SI2;
866 kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
867 HS_INITIALIZED : HS_ERROR;
868 kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
869 }
870
871 if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
872 HS_INITIALIZED) {
873 unsigned long long n1, n2;
874 bool neg1, neg2;
875
876 e1 = kv1->hint->v.nh.empty;
877 e2 = kv2->hint->v.nh.empty;
878
879 if (e1 && e2)
880 return (0);
881
882 neg1 = kv1->hint->v.nh.neg;
883 neg2 = kv2->hint->v.nh.neg;
884
885 if (neg1 && !neg2)
886 return (-1);
887 if (neg2 && !neg1)
888 return (+1);
889
890 if (e1)
891 return (neg2 ? +1 : -1);
892 else if (e2)
893 return (neg1 ? -1 : +1);
894
895
896 if (use_suffix) {
897 cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
898 if (cmp_res)
899 return (neg1 ? -cmp_res : cmp_res);
900 }
901
902 n1 = kv1->hint->v.nh.n1;
903 n2 = kv2->hint->v.nh.n1;
904 if (n1 < n2)
905 return (neg1 ? +1 : -1);
906 else if (n1 > n2)
907 return (neg1 ? -1 : +1);
908 }
909
910 /* read the numbers from the strings */
911 if (!key1_read)
912 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
913 if (!key2_read)
914 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
915
916 e1 = ((main1 + frac1) == 0);
917 e2 = ((main2 + frac2) == 0);
918
919 if (e1 && e2)
920 return (0);
921
922 /* we know the result if the signs are different */
923 if (sign1 < 0 && sign2 >= 0)
924 return (-1);
925 if (sign1 >= 0 && sign2 < 0)
926 return (+1);
927
928 if (e1)
929 return ((sign2 < 0) ? +1 : -1);
930 else if (e2)
931 return ((sign1 < 0) ? -1 : +1);
932
933 if (use_suffix) {
934 cmp_res = cmpsuffix(SI1, SI2);
935 if (cmp_res)
936 return ((sign1 < 0) ? -cmp_res : cmp_res);
937 }
938
939 /* if both numbers are empty assume that the strings are equal */
940 if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
941 return (0);
942
943 /*
944 * if the main part is of different size, we know the result
945 * (because the leading zeros are removed)
946 */
947 if (main1 < main2)
948 cmp_res = -1;
949 else if (main1 > main2)
950 cmp_res = +1;
951 /* if the sizes are equal then simple non-collate string compare gives the correct result */
952 else
953 cmp_res = wcscmp(smain1, smain2);
954
955 /* check fraction */
956 if (!cmp_res)
957 cmp_res = wcscmp(sfrac1, sfrac2);
958
959 if (!cmp_res)
960 return (0);
961
962 /* reverse result if the signs are negative */
963 if (sign1 < 0 && sign2 < 0)
964 cmp_res = -cmp_res;
965
966 return (cmp_res);
967 }
968
969 /*
970 * Implements numeric sort (-n).
971 */
972 static int
973 numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
974 {
975
976 return (numcoll_impl(kv1, kv2, offset, false));
977 }
978
979 /*
980 * Implements 'human' numeric sort (-h).
981 */
982 static int
983 hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
984 {
985
986 return (numcoll_impl(kv1, kv2, offset, true));
987 }
988
989 /*
990 * Implements random sort (-R).
991 */
992 static int
993 randomcoll(struct key_value *kv1, struct key_value *kv2,
994 size_t offset __unused)
995 {
996 struct bwstring *s1, *s2;
997 MD5_CTX ctx1, ctx2;
998 char *b1, *b2;
999
1000 s1 = kv1->k;
1001 s2 = kv2->k;
1002
1003 if (debug_sort) {
1004 bwsprintf(stdout, s1, "; k1=<", ">");
1005 bwsprintf(stdout, s2, ", k2=<", ">");
1006 }
1007
1008 if (s1 == s2)
1009 return (0);
1010
1011 memcpy(&ctx1,&md5_ctx,sizeof(MD5_CTX));
1012 memcpy(&ctx2,&md5_ctx,sizeof(MD5_CTX));
1013
1014 MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
1015 MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
1016 b1 = MD5End(&ctx1, NULL);
1017 b2 = MD5End(&ctx2, NULL);
1018 if (b1 == NULL) {
1019 if (b2 == NULL)
1020 return (0);
1021 else {
1022 sort_free(b2);
1023 return (-1);
1024 }
1025 } else if (b2 == NULL) {
1026 sort_free(b1);
1027 return (+1);
1028 } else {
1029 int cmp_res;
1030
1031 cmp_res = strcmp(b1,b2);
1032 sort_free(b1);
1033 sort_free(b2);
1034
1035 if (!cmp_res)
1036 cmp_res = bwscoll(s1, s2, 0);
1037
1038 return (cmp_res);
1039 }
1040 }
1041
1042 /*
1043 * Implements version sort (-V).
1044 */
1045 static int
1046 versioncoll(struct key_value *kv1, struct key_value *kv2,
1047 size_t offset __unused)
1048 {
1049 struct bwstring *s1, *s2;
1050
1051 s1 = kv1->k;
1052 s2 = kv2->k;
1053
1054 if (debug_sort) {
1055 bwsprintf(stdout, s1, "; k1=<", ">");
1056 bwsprintf(stdout, s2, ", k2=<", ">");
1057 }
1058
1059 if (s1 == s2)
1060 return (0);
1061
1062 return (vcmp(s1, s2));
1063 }
1064
1065 /*
1066 * Check for minus infinity
1067 */
1068 static inline bool
1069 huge_minus(double d, int err1)
1070 {
1071
1072 if (err1 == ERANGE)
1073 if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
1074 return (+1);
1075
1076 return (0);
1077 }
1078
1079 /*
1080 * Check for plus infinity
1081 */
1082 static inline bool
1083 huge_plus(double d, int err1)
1084 {
1085
1086 if (err1 == ERANGE)
1087 if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
1088 return (+1);
1089
1090 return (0);
1091 }
1092
1093 /*
1094 * Check whether a function is a NAN
1095 */
1096 static bool
1097 is_nan(double d)
1098 {
1099
1100 return ((d == NAN) || (isnan(d)));
1101 }
1102
1103 /*
1104 * Compare two NANs
1105 */
1106 static int
1107 cmp_nans(double d1, double d2)
1108 {
1109
1110 if (d1 < d2)
1111 return (-1);
1112 if (d1 > d2)
1113 return (+1);
1114 return (0);
1115 }
1116
1117 /*
1118 * Implements general numeric sort (-g).
1119 */
1120 static int
1121 gnumcoll(struct key_value *kv1, struct key_value *kv2,
1122 size_t offset __unused)
1123 {
1124 double d1, d2;
1125 int err1, err2;
1126 bool empty1, empty2, key1_read, key2_read;
1127
1128 d1 = d2 = 0;
1129 err1 = err2 = 0;
1130 key1_read = key2_read = false;
1131
1132 if (debug_sort) {
1133 bwsprintf(stdout, kv1->k, "; k1=<", ">");
1134 bwsprintf(stdout, kv2->k, "; k2=<", ">");
1135 }
1136
1137 if (kv1->hint->status == HS_UNINITIALIZED) {
1138 errno = 0;
1139 d1 = bwstod(kv1->k, &empty1);
1140 err1 = errno;
1141
1142 if (empty1)
1143 kv1->hint->v.gh.notnum = true;
1144 else if (err1 == 0) {
1145 kv1->hint->v.gh.d = d1;
1146 kv1->hint->v.gh.nan = is_nan(d1);
1147 kv1->hint->status = HS_INITIALIZED;
1148 } else
1149 kv1->hint->status = HS_ERROR;
1150
1151 key1_read = true;
1152 }
1153
1154 if (kv2->hint->status == HS_UNINITIALIZED) {
1155 errno = 0;
1156 d2 = bwstod(kv2->k, &empty2);
1157 err2 = errno;
1158
1159 if (empty2)
1160 kv2->hint->v.gh.notnum = true;
1161 else if (err2 == 0) {
1162 kv2->hint->v.gh.d = d2;
1163 kv2->hint->v.gh.nan = is_nan(d2);
1164 kv2->hint->status = HS_INITIALIZED;
1165 } else
1166 kv2->hint->status = HS_ERROR;
1167
1168 key2_read = true;
1169 }
1170
1171 if (kv1->hint->status == HS_INITIALIZED &&
1172 kv2->hint->status == HS_INITIALIZED) {
1173 if (kv1->hint->v.gh.notnum)
1174 return ((kv2->hint->v.gh.notnum) ? 0 : -1);
1175 else if (kv2->hint->v.gh.notnum)
1176 return (+1);
1177
1178 if (kv1->hint->v.gh.nan)
1179 return ((kv2->hint->v.gh.nan) ?
1180 cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) :
1181 -1);
1182 else if (kv2->hint->v.gh.nan)
1183 return (+1);
1184
1185 d1 = kv1->hint->v.gh.d;
1186 d2 = kv2->hint->v.gh.d;
1187
1188 if (d1 < d2)
1189 return (-1);
1190 else if (d1 > d2)
1191 return (+1);
1192 else
1193 return (0);
1194 }
1195
1196 if (!key1_read) {
1197 errno = 0;
1198 d1 = bwstod(kv1->k, &empty1);
1199 err1 = errno;
1200 }
1201
1202 if (!key2_read) {
1203 errno = 0;
1204 d2 = bwstod(kv2->k, &empty2);
1205 err2 = errno;
1206 }
1207
1208 /* Non-value case: */
1209 if (empty1)
1210 return (empty2 ? 0 : -1);
1211 else if (empty2)
1212 return (+1);
1213
1214 /* NAN case */
1215 if (is_nan(d1))
1216 return (is_nan(d2) ? cmp_nans(d1, d2) : -1);
1217 else if (is_nan(d2))
1218 return (+1);
1219
1220 /* Infinities */
1221 if (err1 == ERANGE || err2 == ERANGE) {
1222 /* Minus infinity case */
1223 if (huge_minus(d1, err1)) {
1224 if (huge_minus(d2, err2)) {
1225 if (d1 < d2)
1226 return (-1);
1227 if (d1 > d2)
1228 return (+1);
1229 return (0);
1230 } else
1231 return (-1);
1232
1233 } else if (huge_minus(d2, err2)) {
1234 if (huge_minus(d1, err1)) {
1235 if (d1 < d2)
1236 return (-1);
1237 if (d1 > d2)
1238 return (+1);
1239 return (0);
1240 } else
1241 return (+1);
1242 }
1243
1244 /* Plus infinity case */
1245 if (huge_plus(d1, err1)) {
1246 if (huge_plus(d2, err2)) {
1247 if (d1 < d2)
1248 return (-1);
1249 if (d1 > d2)
1250 return (+1);
1251 return (0);
1252 } else
1253 return (+1);
1254 } else if (huge_plus(d2, err2)) {
1255 if (huge_plus(d1, err1)) {
1256 if (d1 < d2)
1257 return (-1);
1258 if (d1 > d2)
1259 return (+1);
1260 return (0);
1261 } else
1262 return (-1);
1263 }
1264 }
1265
1266 if (d1 < d2)
1267 return (-1);
1268 if (d1 > d2)
1269 return (+1);
1270
1271 return (0);
1272 }
1273
1274 /*
1275 * Implements month sort (-M).
1276 */
1277 static int
1278 monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
1279 {
1280 int val1, val2;
1281 bool key1_read, key2_read;
1282
1283 val1 = val2 = 0;
1284 key1_read = key2_read = false;
1285
1286 if (debug_sort) {
1287 bwsprintf(stdout, kv1->k, "; k1=<", ">");
1288 bwsprintf(stdout, kv2->k, "; k2=<", ">");
1289 }
1290
1291 if (kv1->hint->status == HS_UNINITIALIZED) {
1292 kv1->hint->v.Mh.m = bws_month_score(kv1->k);
1293 key1_read = true;
1294 kv1->hint->status = HS_INITIALIZED;
1295 }
1296
1297 if (kv2->hint->status == HS_UNINITIALIZED) {
1298 kv2->hint->v.Mh.m = bws_month_score(kv2->k);
1299 key2_read = true;
1300 kv2->hint->status = HS_INITIALIZED;
1301 }
1302
1303 if (kv1->hint->status == HS_INITIALIZED) {
1304 val1 = kv1->hint->v.Mh.m;
1305 key1_read = true;
1306 }
1307
1308 if (kv2->hint->status == HS_INITIALIZED) {
1309 val2 = kv2->hint->v.Mh.m;
1310 key2_read = true;
1311 }
1312
1313 if (!key1_read)
1314 val1 = bws_month_score(kv1->k);
1315 if (!key2_read)
1316 val2 = bws_month_score(kv2->k);
1317
1318 if (val1 == val2) {
1319 return (0);
1320 }
1321 if (val1 < val2)
1322 return (-1);
1323 return (+1);
1324 }