diff -x '*~' -ur -N coreutils-5.2.1/src/sort.c coreutils-5.2.1-sort-rand/src/sort.c --- coreutils-5.2.1/src/sort.c 2005-06-02 22:56:29.000000000 -0700 +++ coreutils-5.2.1-sort-rand/src/sort.c 2005-06-05 22:07:37.000000000 -0700 @@ -37,6 +37,9 @@ #include "stdio-safer.h" #include "xmemcoll.h" #include "xstrtol.h" +#include "checksum.h" +#include "randseed.h" +#include "md5.h" #if HAVE_SYS_RESOURCE_H # include @@ -153,6 +156,7 @@ bool numeric; /* Flag for numeric comparison. Handle strings of digits with optional decimal point, but no exponential notation. */ + bool random_hash; /* Shuffle by sorting on random hash of key */ bool general_numeric; /* Flag for general, numeric comparison. Handle numbers in exponential notation. */ bool month; /* Flag for comparison by month name. */ @@ -296,6 +300,7 @@ -i, --ignore-nonprinting consider only printable characters\n\ -M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\ -n, --numeric-sort compare according to string numerical value\n\ + -R, --random sort by random hash of keys\n\ -r, --reverse reverse the result of comparisons\n\ \n\ "), stdout); @@ -318,6 +323,7 @@ "), DEFAULT_TMPDIR); fputs (_("\ -z, --zero-terminated end lines with 0 byte, not newline\n\ + --seed use specified seed for random number generator\n\ "), stdout); fputs (HELP_OPTION_DESCRIPTION, stdout); fputs (VERSION_OPTION_DESCRIPTION, stdout); @@ -346,7 +352,11 @@ exit (status); } -#define COMMON_SHORT_OPTIONS "-bcdfgik:mMno:rsS:t:T:uz" +#define COMMON_SHORT_OPTIONS "-bcdfgik:mMnRo:rsS:t:T:uz" +enum +{ + SEED_OPTION = CHAR_MAX + 1 +}; static struct option const long_options[] = { @@ -360,6 +370,7 @@ {"merge", no_argument, NULL, 'm'}, {"month-sort", no_argument, NULL, 'M'}, {"numeric-sort", no_argument, NULL, 'n'}, + {"random", no_argument, NULL, 'R'}, {"output", required_argument, NULL, 'o'}, {"reverse", no_argument, NULL, 'r'}, {"stable", no_argument, NULL, 's'}, @@ -368,6 +379,7 @@ {"temporary-directory", required_argument, NULL, 'T'}, {"unique", no_argument, NULL, 'u'}, {"zero-terminated", no_argument, NULL, 'z'}, + {"seed", required_argument, NULL, SEED_OPTION}, {GETOPT_HELP_OPTION_DECL}, {GETOPT_VERSION_OPTION_DECL}, {0, 0, 0, 0}, @@ -1332,6 +1344,26 @@ return result; } +struct digest_ops digops; +void *salt_ctx; + +static void +init_salt() +{ + get_digest_ops(ALG_MD5, &digops); + salt_ctx = xmalloc(digops.ctx_size); + digops.init_ctx(salt_ctx); +} + +static void +get_hash(char* text, int len, void *resbuf) +{ + void *ctx = alloca(digops.ctx_size); + memcpy(ctx, salt_ctx, digops.ctx_size); + digops.process_bytes(text, len, ctx); + digops.finish_ctx(ctx,resbuf); +} + /* Compare two lines A and B trying every key in sequence until there are no more keys or a difference is found. */ @@ -1374,6 +1406,15 @@ (texta, textb)); *lima = savea, *limb = saveb; } + else if (key->random_hash) + { + int dig_bytes = digops.bits/8; + char diga[dig_bytes]; + char digb[dig_bytes]; + get_hash(texta, lena, diga); + get_hash(textb, lenb, digb); + diff = memcmp(diga, digb, sizeof(diga)); + } else if (key->month) diff = getmonth (texta, lena) - getmonth (textb, lenb); /* Sorting like this may become slow, so in a simple locale the user @@ -2083,7 +2124,7 @@ { error (SORT_FAILURE, 0, _("%s: invalid field specification `%s'"), _(msgid), spec); - abort (); + abort (); // XXX is this ever reached? need comment if it is } /* Parse the leading integer in STRING and store the resulting value @@ -2189,6 +2230,9 @@ case 'n': key->numeric = true; break; + case 'R': + key->random_hash = true; + break; case 'r': key->reverse = true; break; @@ -2217,6 +2261,8 @@ int c = 0; bool checkonly = false; bool mergeonly = false; + char *randseed = 0; + bool need_rand = false; int nfiles = 0; bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL); bool obsolete_usage = (posix2_version () < 200112); @@ -2300,7 +2346,7 @@ gkey.sword = gkey.eword = SIZE_MAX; gkey.ignore = NULL; gkey.translate = NULL; - gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = false; + gkey.numeric = gkey.general_numeric = gkey.random_hash = gkey.month = gkey.reverse = false; gkey.skipsblanks = gkey.skipeblanks = false; files = xnmalloc (argc, sizeof *files); @@ -2376,6 +2422,7 @@ case 'M': case 'n': case 'r': + case 'R': { char str[2]; str[0] = c; @@ -2503,6 +2550,10 @@ eolchar = 0; break; + case SEED_OPTION: + randseed = strdupa(optarg); + break; + case_GETOPT_HELP_CHAR; case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); @@ -2512,26 +2563,41 @@ } } + need_rand |= gkey.random_hash; /* Inheritance of global options to individual keys. */ - for (key = keylist; key; key = key->next) - if (! (key->ignore || key->translate - || (key->skipsblanks | key->reverse - | key->skipeblanks | key->month | key->numeric - | key->general_numeric))) - { - key->ignore = gkey.ignore; - key->translate = gkey.translate; - key->skipsblanks = gkey.skipsblanks; - key->skipeblanks = gkey.skipeblanks; - key->month = gkey.month; - key->numeric = gkey.numeric; - key->general_numeric = gkey.general_numeric; - key->reverse = gkey.reverse; - } + for (key = keylist; key; key = key->next) + { + need_rand |= key->random_hash; + if (! (key->ignore || key->translate + || (key->skipsblanks | key->reverse + | key->skipeblanks | key->month | key->numeric + | key->general_numeric + | key->random_hash))) + { + key->ignore = gkey.ignore; + key->translate = gkey.translate; + key->skipsblanks = gkey.skipsblanks; + key->skipeblanks = gkey.skipeblanks; + key->month = gkey.month; + key->numeric = gkey.numeric; + key->general_numeric = gkey.general_numeric; + key->random_hash = gkey.random_hash; + key->reverse = gkey.reverse; + } + } + + if (need_rand) { + init_salt(); + if(randseed) + string_seed(&digops, salt_ctx, randseed); + else + default_seed(&digops, salt_ctx); + } if (!keylist && (gkey.ignore || gkey.translate || (gkey.skipsblanks | gkey.skipeblanks | gkey.month - | gkey.numeric | gkey.general_numeric))) + | gkey.numeric | gkey.general_numeric + | gkey.random_hash))) insertkey (&gkey); reverse = gkey.reverse; diff -x '*~' -ur -N coreutils-5.2.1/src/checksum.h coreutils-5.2.1-sort-rand/src/checksum.h --- coreutils-5.2.1/src/checksum.h 2003-04-11 02:07:21.000000000 -0700 +++ coreutils-5.2.1-sort-rand/src/checksum.h 2005-06-05 13:57:22.000000000 -0700 @@ -1,8 +1,14 @@ +#ifndef _CHECKSUM_H +#define _CHECKSUM_H 1 + #include #include #include +#include "sha1.h" +#include "md5.h" + /* For long options that have no equivalent short option, use a non-character as a pseudo short option, starting with CHAR_MAX + 1. */ enum @@ -13,3 +19,39 @@ }; extern int algorithm; + +struct digest_ops { + int ctx_size; + int bits; + void (*init_ctx) (void *ctx); + void (*process_bytes) (const void *buffer, size_t len, void *ctx); + void *(*finish_ctx) (void *ctx, void *resbuf); + void *(*read_ctx) (void *ctx, void *resbuf); +}; + +static inline +void get_digest_ops(int alg, struct digest_ops *res) { + switch(alg) { + case ALG_MD5: + res->ctx_size = sizeof(struct md5_ctx); + res->bits = 128; + res->init_ctx = (void (*) (void *))md5_init_ctx; + res->process_bytes = (void (*) (const void *, size_t, void *))md5_process_bytes; + res->finish_ctx = (void *(*) (void *, void *))md5_finish_ctx; + res->read_ctx = (void *(*) (void *, void *))md5_read_ctx; + break; + case ALG_SHA1: + res->ctx_size = sizeof(struct sha_ctx); + res->bits = 160; + res->init_ctx = (void (*) (void *))sha_init_ctx; + res->process_bytes = (void (*) (const void *, size_t, void *))sha_process_bytes; + res->finish_ctx = (void *(*) (void *, void *))sha_finish_ctx; + res->read_ctx = (void *(*) (void *, void *))sha_read_ctx; + break; + default: + /* abort? */ + break; + } +} + +#endif diff -x '*~' -ur -N coreutils-5.2.1/src/Makefile.in coreutils-5.2.1-sort-rand/src/Makefile.in --- coreutils-5.2.1/src/Makefile.in 2004-03-11 00:59:23.000000000 -0800 +++ coreutils-5.2.1-sort-rand/src/Makefile.in 2005-06-06 00:14:02.000000000 -0700 @@ -481,7 +481,8 @@ $(am__DEPENDENCIES_1) sleep_DEPENDENCIES = $(am__DEPENDENCIES_3) sort_SOURCES = sort.c -sort_OBJECTS = sort.$(OBJEXT) +am_sort_OBJECTS = sort.$(OBJEXT) md5.$(OBJEXT) randseed.$(OBJEXT) +sort_OBJECTS = $(am_sort_OBJECTS) sort_DEPENDENCIES = $(am__DEPENDENCIES_2) $(am__DEPENDENCIES_1) split_SOURCES = split.c split_OBJECTS = split.$(OBJEXT) diff -x '*~' -ur -N coreutils-5.2.1/src/randseed.c coreutils-5.2.1-sort-rand/src/randseed.c --- coreutils-5.2.1/src/randseed.c 1969-12-31 16:00:00.000000000 -0800 +++ coreutils-5.2.1-sort-rand/src/randseed.c 2005-06-06 01:12:07.000000000 -0700 @@ -0,0 +1,390 @@ +/* randseed.c + +Code for portably reading a random seed from entropy devices. Mostly +adapted from libgcrypt. + +June 2005 Frederik Eaton +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "checksum.h" + +// XXX put these in autoconf +#define NAME_OF_DEV_RANDOM "/dev/random" +#define NAME_OF_DEV_URANDOM "/dev/urandom" +#define USE_RNDLINUX 1 +#define USE_RNDEGD 1 + +#define SORT_FAILURE 2 +#define FAIL_CODE SORT_FAILURE + +static int gather_random(struct digest_ops *ops, void *ctx, int level); + +#if USE_RNDLINUX +static int linux_gather_random (struct digest_ops *ops, void *ctx, int level); +#endif + +#if USE_RNDEGD +static int egd_gather_random (struct digest_ops *ops, void *ctx, int level); +static int egd_connect_socket (int nofail); +#endif + +int string_seed(struct digest_ops *ops, void *ctx, char *str) +{ + int len = strlen(str); + ops->process_bytes(str, len, ctx); + return (len*2) >= ops->bits; +} + +int default_seed(struct digest_ops *ops, void *ctx) +{ + gather_random(ops,ctx,1); +} + +static int +gather_random(struct digest_ops *ops, void *ctx, int level) +{ +#if USE_RNDLINUX + if ( !access (NAME_OF_DEV_RANDOM, R_OK) + && !access (NAME_OF_DEV_URANDOM, R_OK)) + { + return linux_gather_random(ops, ctx, level); + } +#endif + +#if USE_RNDEGD + if ( egd_connect_socket (1) != -1 ) + { + return egd_gather_random(ops, ctx, level); + } +#endif + + { + unsigned long l; + l = time(NULL); + ops->process_bytes(&l, sizeof(l), ctx); + l = getpid();; + ops->process_bytes(&l, sizeof(l), ctx); + l = getuid(); + ops->process_bytes(&l, sizeof(l), ctx); + } + + return 0; +} + +//---------------------------------------------------------------- + +int open_dev(char *name) +{ + int fd; + fd = open( name, O_RDONLY ); + if( fd == -1 ) + error (FAIL_CODE, errno, "can't open %s\n", name); + return fd; +} + +#if USE_RNDLINUX +static int +linux_gather_random (struct digest_ops *ops, void *ctx, int level) +{ + static int fd_urandom = -1; + static int fd_random = -1; + int fd; + int n; + int length = ops->bits/8; + int warn=0; + char buffer[768]; + + if( level >= 2 ) + { + if( fd_random == -1 ) + fd_random = open_dev( NAME_OF_DEV_RANDOM ); + fd = fd_random; + } + else + { + if( fd_urandom == -1 ) + fd_urandom = open_dev( NAME_OF_DEV_URANDOM ); + fd = fd_urandom; + } + + while (length) + { + fd_set rfds; + struct timeval tv; + int rc; + + FD_ZERO(&rfds); + FD_SET(fd, &rfds); + tv.tv_sec = 0; + tv.tv_usec = 20*1000; + if( !(rc=select(fd+1, &rfds, NULL, NULL, &tv)) ) + { + if( !warn ) + { + error (0,0,"need_entropy"); + warn = 1; + } + continue; + } + else if( rc == -1 ) + { + error (0, errno, "select()\n"); + continue; + } + + do + { + int nbytes = length < sizeof(buffer)? length : sizeof(buffer); + n = read(fd, buffer, nbytes ); + if( n >= 0 && n > nbytes ) + { + error(0,0,"bogus read from random device (n=%d)\n", n ); + n = nbytes; + } + } + while( n == -1 && errno == EINTR ); + if( n == -1 ) + error(FAIL_CODE, errno, "read error on random device: %s\n"); + ops->process_bytes (buffer, n, ctx); + length -= n; + } + memset(buffer, 0, sizeof(buffer)); + + return 0; /* success */ +} +#endif + +//---------------------------------------------------------------- + +#if USE_RNDEGD + +#ifndef offsetof +#define offsetof(type, member) ((size_t) &((type *)0)->member) +#endif + +static int egd_socket = -1; + +/* Allocate a new filename from FIRST_PART and SECOND_PART and to + tilde expansion for first_part. SECOND_PART might be NULL. + */ +static char * +my_make_filename (const char *first_part, const char *second_part) +{ + size_t n; + char *name, *home, *p; + + n = strlen(first_part)+1; + if (second_part) + n += strlen (second_part) + 1; + + home = NULL; + if( *first_part == '~' && first_part[1] == '/' + && (home = (char*)getenv("HOME")) && *home ) + n += strlen(home); + + name = (char*)xmalloc(n); + p = (char*)(home + ? stpcpy (stpcpy (name, home), first_part+1 ) + : stpcpy (name, first_part) ); + + if (second_part) + strcpy ((char*)stpcpy(p,"/"), second_part); + + return name; +} + + +static int +do_write( int fd, void *buf, size_t nbytes ) +{ + size_t nleft = nbytes; + int nwritten; + + while( nleft > 0 ) + { + nwritten = write( fd, buf, nleft); + if( nwritten < 0 ) + { + if( errno == EINTR ) + continue; + return -1; + } + nleft -= nwritten; + buf = (char*)buf + nwritten; + } + return 0; +} + +static int +do_read( int fd, void *buf, size_t nbytes ) +{ + int n, nread = 0; + + do + { + do + { + n = read(fd, (char*)buf + nread, nbytes ); + } + while( n == -1 && errno == EINTR ); + if( n == -1) + return nread? nread:-1; + if( n == 0) + return -1; + nread += n; + nbytes -= n; + } + while( nread < nbytes ); + return nread; +} + +/* Connect to the EGD and return the file descriptor. Return -1 on + error. With NOFAIL set to true, silently fail and return the + error, otherwise print an error message and die. */ +static int +egd_connect_socket (int nofail) +{ + int fd; + const char *bname = NULL; + char *name; + struct sockaddr_un addr; + int addr_len; + + if (egd_socket != -1) + { + close (egd_socket); + egd_socket = -1; + } + +#ifdef EGD_SOCKET_NAME + bname = EGD_SOCKET_NAME; +#endif + if ( !bname || !*bname ) + name = my_make_filename ("~/.gnupg", "entropy"); + else + name = my_make_filename (bname, NULL); + + if (strlen(name)+1 >= sizeof addr.sun_path) + error (FAIL_CODE, 0, "EGD socketname is too long\n"); + + memset( &addr, 0, sizeof addr ); + addr.sun_family = AF_UNIX; + strcpy( addr.sun_path, name ); + addr_len = (offsetof( struct sockaddr_un, sun_path ) + + strlen( addr.sun_path )); + + fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (fd == -1 && !nofail) + error (FAIL_CODE, errno, "can't create unix domain socket\n"); + else if (connect (fd, (struct sockaddr*)&addr, addr_len) == -1) + { + if (!nofail) + error (FAIL_CODE, errno, "can't connect to EGD socket `%s'\n"); + close (fd); + fd = -1; + } + free(name); + if (fd != -1) + egd_socket = fd; + return fd; +} + +/**************** + * Note: We always use the highest level. + * To boost the performance we may want to add some + * additional code for level 1 + * + * Using a level of 0 should never block and better add nothing + * to the pool. So this is just a dummy for EGD. + */ +static int +egd_gather_random (struct digest_ops *ops, void *ctx, int level) +{ + int fd = egd_socket; + int n; + char buffer[256+2]; + int nbytes; + int do_restart = 0; + int length = ops->bits/8; + + if( !length ) + return 0; + if( !level ) + return 0; + + restart: + if (fd == -1 || do_restart) + fd = egd_connect_socket (0); + + do_restart = 0; + + nbytes = length < 255? length : 255; + /* First time we do it with a non blocking request */ + buffer[0] = 1; /* non blocking */ + buffer[1] = nbytes; + if( do_write( fd, buffer, 2 ) == -1 ) + error(FAIL_CODE, errno, "can't write to the EGD: %s\n"); + n = do_read( fd, buffer, 1 ); + if( n == -1 ) + { + error (FAIL_CODE, errno, "read error on EGD\n"); + do_restart = 1; + goto restart; + } + n = buffer[0]; + if( n ) + { + n = do_read( fd, buffer, n ); + if( n == -1 ) + { + error (FAIL_CODE, errno, "read error on EGD\n"); + do_restart = 1; + goto restart; + } + ops->process_bytes (buffer, n, ctx); + length -= n; + } + + if( length ) + { + fprintf (stderr, + "Please wait, entropy is being gathered. Do some work if it would\n" + "keep you from getting bored, because it will improve the quality\n" + "of the entropy.\n"); + } + while( length ) + { + nbytes = length < 255? length : 255; + + buffer[0] = 2; /* blocking */ + buffer[1] = nbytes; + if( do_write( fd, buffer, 2 ) == -1 ) + error (FAIL_CODE, errno, "can't write to the EGD\n"); + n = do_read( fd, buffer, nbytes ); + if( n == -1 ) + { + error (FAIL_CODE, errno, "read error on EGD\n"); + do_restart = 1; + goto restart; + } + ops->process_bytes (buffer, n, ctx); + length -= n; + } + memset(buffer, 0, sizeof(buffer) ); + + return 0; /* success */ +} + +#endif diff -x '*~' -ur -N coreutils-5.2.1/src/randseed.h coreutils-5.2.1-sort-rand/src/randseed.h --- coreutils-5.2.1/src/randseed.h 1969-12-31 16:00:00.000000000 -0800 +++ coreutils-5.2.1-sort-rand/src/randseed.h 2005-06-05 13:05:41.000000000 -0700 @@ -0,0 +1,4 @@ +#include "checksum.h" + +int string_seed(struct digest_ops *ops, void *ctx, char *str); +int default_seed(struct digest_ops *ops, void *ctx);