From 912c5d139c24bf0ad5aa5459bd02506be30ab206 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A1draig=20Brady?= Date: Mon, 9 Oct 2023 14:45:01 +0100 Subject: [PATCH] sort: use XXH128 rather than blake2b $ seq 1000000 > 1.txt $ time src/sort-md5-lc -R < 1.txt > /dev/null real 0m6.734s user 0m23.258s sys 0m0.047s $ time src/sort-blake2 -R < 1.txt > /dev/null real 0m7.215s user 0m25.683s sys 0m0.043s Using xxhash128 (0.8.2) shows a good improvement (note avx2 being used on this cpu): $ time src/sort-xxh -R < 1.txt > /dev/null real 0m4.111s user 0m14.429s sys 0m0.058s $ grep 'model name' /proc/cpuinfo | head -n1 model name : Intel(R) Core(TM) i7-5600U CPU @ 2.60GHz $ rpm -q openssl-libs openssl-libs-3.0.9-2.fc38.x86_64 * src/sort.c: Use xxh128 routines rather than blake2b. * src/xxhash.h: Include xxhash appropriately. --- src/sort.c | 26 +++++++++++++------------- src/xxhash.h | 3 +++ 2 files changed, 16 insertions(+), 13 deletions(-) create mode 100644 src/xxhash.h diff --git a/src/sort.c b/src/sort.c index 14c3951ce..5f04d6921 100644 --- a/src/sort.c +++ b/src/sort.c @@ -31,7 +31,7 @@ #include "system.h" #include "argmatch.h" #include "assure.h" -#include "blake2/blake2.h" +#include "xxhash.h" #include "fadvise.h" #include "filevercmp.h" #include "flexmember.h" @@ -2085,7 +2085,7 @@ getmonth (char const *month, char **ea) } /* A randomly chosen state, used for random comparison. */ -static blake2b_state random_hash_state; +static XXH3_state_t random_hash_state; #define RANDOM_HASH_BYTES 16 /* Initialize the randomly chosen hash state. */ @@ -2100,8 +2100,8 @@ random_hash_state_init (char const *random_source) randread (r, buf, sizeof buf); if (randread_free (r) != 0) sort_die (_("close failed"), random_source); - blake2b_init (&random_hash_state, RANDOM_HASH_BYTES); - blake2b_update (&random_hash_state, buf, sizeof buf); + (void)XXH3_128bits_reset(&random_hash_state); + (void)XXH3_128bits_update(&random_hash_state, buf, sizeof buf); } /* This is like strxfrm, except it reports any error and exits. */ @@ -2142,8 +2142,8 @@ compare_random (char *restrict texta, size_t lena, char *buf = stackbuf; size_t bufsize = sizeof stackbuf; void *allocated = nullptr; - uint32_t dig[2][RANDOM_HASH_BYTES / sizeof (uint32_t)]; - blake2b_state s[2]; + XXH128_hash_t dig[2]; + XXH3_state_t s[2]; s[0] = s[1] = random_hash_state; if (hard_LC_COLLATE) @@ -2221,8 +2221,8 @@ compare_random (char *restrict texta, size_t lena, /* Accumulate the transformed data in the corresponding checksums. */ - blake2b_update (&s[0], buf, sizea); - blake2b_update (&s[1], buf + sizea, sizeb); + (void)XXH3_128bits_update (&s[0], buf, sizea); + (void)XXH3_128bits_update (&s[1], buf + sizea, sizeb); /* Update the tiebreaker comparison of the transformed data. */ if (! xfrm_diff) @@ -2235,11 +2235,11 @@ compare_random (char *restrict texta, size_t lena, } /* Compute and compare the checksums. */ - blake2b_update (&s[0], texta, lena); - blake2b_final (&s[0], dig[0], RANDOM_HASH_BYTES); - blake2b_update (&s[1], textb, lenb); - blake2b_final (&s[1], dig[1], RANDOM_HASH_BYTES); - int diff = memcmp (dig[0], dig[1], sizeof dig[0]); + (void)XXH3_128bits_update (&s[0], texta, lena); + dig[0] = XXH3_128bits_digest(&s[0]); + (void)XXH3_128bits_update (&s[1], textb, lenb); + dig[1] = XXH3_128bits_digest(&s[1]); + int diff = XXH128_cmp (&dig[0], &dig[1]); /* Fall back on the tiebreaker if the checksums collide. */ if (! diff) diff --git a/src/xxhash.h b/src/xxhash.h new file mode 100644 index 000000000..2802f6917 --- /dev/null +++ b/src/xxhash.h @@ -0,0 +1,3 @@ +#define XXH_INLINE_ALL +#define XXH_STATIC_LINKING_ONLY +#include "xxHash/xxhash.h" -- 2.41.0