[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Human readable sort
From: |
Michael Speer |
Subject: |
Human readable sort |
Date: |
Fri, 24 Apr 2009 15:15:53 -0400 |
I wrote the following patch to the 7.2 branch of coreutils to allow
`sort` to sort by human readable byte sizes. I looked around a bit to
see what the status of previous attempts to integrate this
functionality were, but didn't see any very recent activity. This is
my first interaction with coreutils, so if I missed something obvious,
please point me towards it.
Is the last potential patch (
http://www.mail-archive.com/address@hidden/msg14080.html )
moving through? If not, if I cleaned this up ( tabs, documentation,
and test cases ) and applied it to the current HEAD on savannah is
there a chance of getting this functionality into sort?
Patch assumptions :
* that numbers will use the best representation ( never uses 1024b
instead of 1k, etc )
* that the sizes will be specified via suffixes of b, K, M, G, T, P,
E, Z, Y or their alternately cased variants
The first assumption results in checking only the suffix when they differ.
This enables it to match the output of `du -h / du --si`, but possibly
not other tools that do not conform to these assumptions.
---------
--- orig/coreutils-7.2/src/sort.c 2009-03-29 13:44:10.000000000 -0400
+++ coreutils-7.2/src/sort.c 2009-04-24 14:03:47.000000000 -0400
@@ -176,6 +176,8 @@
bool random; /* Sort by random hash of key. */
bool general_numeric; /* Flag for general, numeric comparison.
Handle numbers in exponential notation. */
+ bool human_numeric; /* Flag for sorting by size specified
+ data */
bool month; /* Flag for comparison by month name. */
bool reverse; /* Reverse the sense of comparison. */
bool version; /* sort by version number */
@@ -426,7 +428,7 @@
SORT_OPTION
};
-static char const short_options[] = "-bcCdfgik:mMno:rRsS:t:T:uVy:z";
+static char const short_options[] = "-bcCdfghik:mMno:rRsS:t:T:uVy:z";
static struct option const long_options[] =
{
@@ -442,6 +444,7 @@
{"merge", no_argument, NULL, 'm'},
{"month-sort", no_argument, NULL, 'M'},
{"numeric-sort", no_argument, NULL, 'n'},
+ {"human-sort", no_argument, NULL, 'h'},
{"version-sort", no_argument, NULL, 'V'},
{"random-sort", no_argument, NULL, 'R'},
{"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION},
@@ -1673,6 +1676,57 @@
return strnumcmp (a, b, decimal_point, thousands_sep);
}
+/* assumes UCHAR_MAX of 255 */
+/* Y/y:8 -> K/k:1 , otherwise ( including b ) : 0 */
+const char weights [] =
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 6, 0, 3, 0, 0, 0, 1, 0, 2, 0, 0,
+ 5, 0, 0, 0, 4, 0, 0, 0, 0, 8, 7, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 6, 0, 3, 0, 0, 0, 1, 0, 2, 0, 0,
+ 5, 0, 0, 0, 4, 0, 0, 0, 0, 8, 7, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } ;
+
+static int
+human_compare(const char *a, const char *b)
+{
+ /* this tests numeric entities ending in human readable size specifiers
+ b < K < M < G < T < P < E < Z < Y
+ we (rudely I admit) assume that numbers are properly abbreviated.
+ for example, you will never see 500,000,000b, instead of 5M
+ */
+
+ const char * ar, * br ; /* riders */
+ int aw, bw ;
+
+ while(blanks[to_uchar (*a)])
+ a++;
+ while(blanks[to_uchar (*b)])
+ b++;
+
+ ar = a ;
+ br = b ;
+
+ while( ISDIGIT(*ar) || (*ar) == decimal_point || (*ar) == thousands_sep )
+ ar++ ;
+ while( ISDIGIT(*br) || (*br) == decimal_point || (*br) == thousands_sep )
+ br++ ;
+
+ aw = weights[to_uchar (*ar)] ;
+ bw = weights[to_uchar (*br)] ;
+
+ return aw > bw ? 1 : aw < bw ? -1 : strnumcmp( a , b ,
decimal_point , thousands_sep) ;
+}
+
static int
general_numcompare (const char *sa, const char *sb)
{
@@ -1917,6 +1971,10 @@
if (key->random)
diff = compare_random (texta, lena, textb, lenb);
+ else if (key->human_numeric)
+ {
+ diff = human_compare(texta, textb);
+ }
else if (key->numeric | key->general_numeric)
{
char savea = *lima, saveb = *limb;
@@ -2887,7 +2945,7 @@
for (key = keylist; key; key = key->next)
if ((1 < (key->random + key->numeric + key->general_numeric + key->month
- + key->version + !!key->ignore))
+ + key->version + (!!key->ignore) + key->human_numeric))
|| (key->random && key->translate))
{
/* The following is too big, but guaranteed to be "big enough". */
@@ -2899,6 +2957,8 @@
*p++ = 'f';
if (key->general_numeric)
*p++ = 'g';
+ if (key->human_numeric)
+ *p++ = 'h';
if (key->ignore == nonprinting)
*p++ = 'i';
if (key->month)
@@ -2990,6 +3050,9 @@
case 'g':
key->general_numeric = true;
break;
+ case 'h':
+ key->human_numeric = true;
+ break;
case 'i':
/* Option order should not matter, so don't let -i override
-d. -d implies -i, but -i does not imply -d. */
@@ -3138,7 +3201,7 @@
gkey.sword = gkey.eword = SIZE_MAX;
gkey.ignore = NULL;
gkey.translate = NULL;
- gkey.numeric = gkey.general_numeric = gkey.random = gkey.version = false;
+ gkey.numeric = gkey.general_numeric = gkey.random = gkey.version =
gkey.human_numeric = false;
gkey.month = gkey.reverse = false;
gkey.skipsblanks = gkey.skipeblanks = false;
@@ -3217,6 +3280,7 @@
case 'd':
case 'f':
case 'g':
+ case 'h':
case 'i':
case 'M':
case 'n':
@@ -3469,6 +3533,7 @@
| key->numeric
| key->version
| key->general_numeric
+ | key->human_numeric
| key->random)))
{
key->ignore = gkey.ignore;
@@ -3478,6 +3543,7 @@
key->month = gkey.month;
key->numeric = gkey.numeric;
key->general_numeric = gkey.general_numeric;
+ key->human_numeric = gkey.human_numeric;
key->random = gkey.random;
key->reverse = gkey.reverse;
key->version = gkey.version;
@@ -3493,6 +3559,7 @@
| gkey.month
| gkey.numeric
| gkey.general_numeric
+ | gkey.human_numeric
| gkey.random
| gkey.version)))
{
- Human readable sort,
Michael Speer <=
- Re: Human readable sort, Pádraig Brady, 2009/04/24
- Re: Human readable sort, Michael Speer, 2009/04/25
- Re: Human readable sort, Pádraig Brady, 2009/04/25
- Re: Human readable sort, Michael Speer, 2009/04/26
- Re: Human readable sort, Pádraig Brady, 2009/04/26
- Re: Human readable sort, Pádraig Brady, 2009/04/27
- Re: Human readable sort, Pádraig Brady, 2009/04/27
- Re: Human readable sort, Ondřej Vašík, 2009/04/27
- Re: Re: Human readable sort, knome . net, 2009/04/27
- Re: Human readable sort, Pádraig Brady, 2009/04/27