[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
master e66870400d4 1/2: Change hash range reduction from remainder to mu
From: |
Mattias Engdegård |
Subject: |
master e66870400d4 1/2: Change hash range reduction from remainder to multiplication |
Date: |
Tue, 6 Feb 2024 09:50:47 -0500 (EST) |
branch: master
commit e66870400d45e3d08265df9f6acd4631a5712139
Author: Mattias Engdegård <mattiase@acm.org>
Commit: Mattias Engdegård <mattiase@acm.org>
Change hash range reduction from remainder to multiplication
This makes both lookups and rehashing cheaper. The index vector size
is now always a power of 2. The first table size is reduced to
6 (from 8), because index vectors would become excessively big
otherwise.
* src/lisp.h (struct Lisp_Hash_Table): Replace index_size with
index_bits. All references adapted.
(hash_table_index_size): New accessor; use it where applicable.
* src/fns.c (hash_index_size): Replace with...
(compute_hash_index_bits): ...this new function, returning the log2 of the
index size. All callers adapted.
(hash_index_index): Knuth multiplicative hashing instead of remainder.
(maybe_resize_hash_table): Reduce first table size from 8 to 6.
---
src/alloc.c | 7 +++---
src/fns.c | 78 +++++++++++++++++++++++++++++------------------------------
src/lisp.h | 13 +++++++---
src/pdumper.c | 2 +-
4 files changed, 54 insertions(+), 46 deletions(-)
diff --git a/src/alloc.c b/src/alloc.c
index 15bb65cf74f..6abe9e28650 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -3443,7 +3443,7 @@ cleanup_vector (struct Lisp_Vector *vector)
struct Lisp_Hash_Table *h = PSEUDOVEC_STRUCT (vector, Lisp_Hash_Table);
if (h->table_size > 0)
{
- eassert (h->index_size > 1);
+ eassert (h->index_bits > 0);
xfree (h->index);
xfree (h->key_and_value);
xfree (h->next);
@@ -3451,7 +3451,7 @@ cleanup_vector (struct Lisp_Vector *vector)
ptrdiff_t bytes = (h->table_size * (2 * sizeof *h->key_and_value
+ sizeof *h->hash
+ sizeof *h->next)
- + h->index_size * sizeof *h->index);
+ + hash_table_index_size (h) * sizeof *h->index);
hash_table_allocated_bytes -= bytes;
}
}
@@ -5959,7 +5959,8 @@ purecopy_hash_table (struct Lisp_Hash_Table *table)
for (ptrdiff_t i = 0; i < nvalues; i++)
pure->key_and_value[i] = purecopy (table->key_and_value[i]);
- ptrdiff_t index_bytes = table->index_size * sizeof *table->index;
+ ptrdiff_t index_bytes = hash_table_index_size (table)
+ * sizeof *table->index;
pure->index = pure_alloc (index_bytes, -(int)sizeof *table->index);
memcpy (pure->index, table->index, index_bytes);
}
diff --git a/src/fns.c b/src/fns.c
index 08908d481a3..7de2616b359 100644
--- a/src/fns.c
+++ b/src/fns.c
@@ -4291,7 +4291,7 @@ set_hash_hash_slot (struct Lisp_Hash_Table *h, ptrdiff_t
idx, hash_hash_t val)
static void
set_hash_index_slot (struct Lisp_Hash_Table *h, ptrdiff_t idx, ptrdiff_t val)
{
- eassert (idx >= 0 && idx < h->index_size);
+ eassert (idx >= 0 && idx < hash_table_index_size (h));
h->index[idx] = val;
}
@@ -4392,7 +4392,7 @@ HASH_NEXT (struct Lisp_Hash_Table *h, ptrdiff_t idx)
static ptrdiff_t
HASH_INDEX (struct Lisp_Hash_Table *h, ptrdiff_t idx)
{
- eassert (idx >= 0 && idx < h->index_size);
+ eassert (idx >= 0 && idx < hash_table_index_size (h));
return h->index[idx];
}
@@ -4527,26 +4527,19 @@ allocate_hash_table (void)
return ALLOCATE_PLAIN_PSEUDOVECTOR (struct Lisp_Hash_Table, PVEC_HASH_TABLE);
}
-/* Compute the size of the index from the table capacity. */
-static ptrdiff_t
-hash_index_size (ptrdiff_t size)
-{
- /* An upper bound on the size of a hash table index. It must fit in
- ptrdiff_t and be a valid Emacs fixnum. */
- ptrdiff_t upper_bound = min (MOST_POSITIVE_FIXNUM,
- min (TYPE_MAXIMUM (hash_idx_t),
- PTRDIFF_MAX / sizeof (ptrdiff_t)));
- /* Single-element index vectors are used iff size=0. */
- eassert (size > 0);
- ptrdiff_t lower_bound = 2;
- ptrdiff_t index_size = size + max (size >> 2, 1); /* 1.25x larger */
- if (index_size < upper_bound)
- index_size = (index_size < lower_bound
- ? lower_bound
- : next_almost_prime (index_size));
- if (index_size > upper_bound)
+/* Compute the size of the index (as log2) from the table capacity. */
+static int
+compute_hash_index_bits (hash_idx_t size)
+{
+ /* An upper bound on the size of a hash table index index. */
+ hash_idx_t upper_bound = min (MOST_POSITIVE_FIXNUM,
+ min (TYPE_MAXIMUM (hash_idx_t),
+ PTRDIFF_MAX / sizeof (hash_idx_t)));
+ /* Use next higher power of 2. This works even for size=0. */
+ int bits = elogb (size) + 1;
+ if (bits >= TYPE_WIDTH (uintmax_t) || ((uintmax_t)1 << bits) > upper_bound)
error ("Hash table too large");
- return index_size;
+ return bits;
}
/* Constant hash index vector used when the table size is zero.
@@ -4587,7 +4580,7 @@ make_hash_table (const struct hash_table_test *test,
EMACS_INT size,
h->key_and_value = NULL;
h->hash = NULL;
h->next = NULL;
- h->index_size = 1;
+ h->index_bits = 0;
h->index = (hash_idx_t *)empty_hash_index_vector;
h->next_free = -1;
}
@@ -4605,8 +4598,9 @@ make_hash_table (const struct hash_table_test *test,
EMACS_INT size,
h->next[i] = i + 1;
h->next[size - 1] = -1;
- int index_size = hash_index_size (size);
- h->index_size = index_size;
+ int index_bits = compute_hash_index_bits (size);
+ h->index_bits = index_bits;
+ ptrdiff_t index_size = hash_table_index_size (h);
h->index = hash_table_alloc_bytes (index_size * sizeof *h->index);
for (ptrdiff_t i = 0; i < index_size; i++)
h->index[i] = -1;
@@ -4654,7 +4648,7 @@ copy_hash_table (struct Lisp_Hash_Table *h1)
h2->next = hash_table_alloc_bytes (next_bytes);
memcpy (h2->next, h1->next, next_bytes);
- ptrdiff_t index_bytes = h1->index_size * sizeof *h1->index;
+ ptrdiff_t index_bytes = hash_table_index_size (h1) * sizeof *h1->index;
h2->index = hash_table_alloc_bytes (index_bytes);
memcpy (h2->index, h1->index, index_bytes);
}
@@ -4668,8 +4662,11 @@ copy_hash_table (struct Lisp_Hash_Table *h1)
static inline ptrdiff_t
hash_index_index (struct Lisp_Hash_Table *h, hash_hash_t hash)
{
- eassert (h->index_size > 0);
- return hash % h->index_size;
+ /* Knuth multiplicative hashing, tailored for 32-bit indices
+ (avoiding a 64-bit multiply). */
+ uint32_t alpha = 2654435769; /* 2**32/phi */
+ /* Note the cast to uint64_t, to make it work for index_bits=0. */
+ return (uint64_t)((uint32_t)hash * alpha) >> (32 - h->index_bits);
}
/* Resize hash table H if it's too full. If H cannot be resized
@@ -4681,7 +4678,7 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h)
if (h->next_free < 0)
{
ptrdiff_t old_size = HASH_TABLE_SIZE (h);
- ptrdiff_t min_size = 8;
+ ptrdiff_t min_size = 6;
ptrdiff_t base_size = min (max (old_size, min_size), PTRDIFF_MAX / 2);
/* Grow aggressively at small sizes, then just double. */
ptrdiff_t new_size =
@@ -4706,13 +4703,14 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h)
hash_hash_t *hash = hash_table_alloc_bytes (new_size * sizeof *hash);
memcpy (hash, h->hash, old_size * sizeof *hash);
- ptrdiff_t old_index_size = h->index_size;
- ptrdiff_t index_size = hash_index_size (new_size);
+ ptrdiff_t old_index_size = hash_table_index_size (h);
+ ptrdiff_t index_bits = compute_hash_index_bits (new_size);
+ ptrdiff_t index_size = (ptrdiff_t)1 << index_bits;
hash_idx_t *index = hash_table_alloc_bytes (index_size * sizeof *index);
for (ptrdiff_t i = 0; i < index_size; i++)
index[i] = -1;
- h->index_size = index_size;
+ h->index_bits = index_bits;
h->table_size = new_size;
h->next_free = old_size;
@@ -4778,18 +4776,19 @@ hash_table_thaw (Lisp_Object hash_table)
h->key_and_value = NULL;
h->hash = NULL;
h->next = NULL;
- h->index_size = 1;
+ h->index_bits = 0;
h->index = (hash_idx_t *)empty_hash_index_vector;
}
else
{
- ptrdiff_t index_size = hash_index_size (size);
- h->index_size = index_size;
+ ptrdiff_t index_bits = compute_hash_index_bits (size);
+ h->index_bits = index_bits;
h->hash = hash_table_alloc_bytes (size * sizeof *h->hash);
h->next = hash_table_alloc_bytes (size * sizeof *h->next);
+ ptrdiff_t index_size = hash_table_index_size (h);
h->index = hash_table_alloc_bytes (index_size * sizeof *h->index);
for (ptrdiff_t i = 0; i < index_size; i++)
h->index[i] = -1;
@@ -4937,7 +4936,8 @@ hash_clear (struct Lisp_Hash_Table *h)
set_hash_value_slot (h, i, Qnil);
}
- for (ptrdiff_t i = 0; i < h->index_size; i++)
+ ptrdiff_t index_size = hash_table_index_size (h);
+ for (ptrdiff_t i = 0; i < index_size; i++)
h->index[i] = -1;
h->next_free = 0;
@@ -4976,7 +4976,7 @@ keep_entry_p (hash_table_weakness_t weakness,
bool
sweep_weak_table (struct Lisp_Hash_Table *h, bool remove_entries_p)
{
- ptrdiff_t n = h->index_size;
+ ptrdiff_t n = hash_table_index_size (h);
bool marked = false;
for (ptrdiff_t bucket = 0; bucket < n; ++bucket)
@@ -5701,7 +5701,7 @@ DEFUN ("internal--hash-table-histogram",
struct Lisp_Hash_Table *h = check_hash_table (hash_table);
ptrdiff_t size = HASH_TABLE_SIZE (h);
ptrdiff_t *freq = xzalloc (size * sizeof *freq);
- ptrdiff_t index_size = h->index_size;
+ ptrdiff_t index_size = hash_table_index_size (h);
for (ptrdiff_t i = 0; i < index_size; i++)
{
ptrdiff_t n = 0;
@@ -5729,7 +5729,7 @@ Internal use only. */)
{
struct Lisp_Hash_Table *h = check_hash_table (hash_table);
Lisp_Object ret = Qnil;
- ptrdiff_t index_size = h->index_size;
+ ptrdiff_t index_size = hash_table_index_size (h);
for (ptrdiff_t i = 0; i < index_size; i++)
{
Lisp_Object bucket = Qnil;
@@ -5750,7 +5750,7 @@ DEFUN ("internal--hash-table-index-size",
(Lisp_Object hash_table)
{
struct Lisp_Hash_Table *h = check_hash_table (hash_table);
- return make_int (h->index_size);
+ return make_int (hash_table_index_size (h));
}
diff --git a/src/lisp.h b/src/lisp.h
index e6fd8cacb1b..d6bbf15d83b 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -2475,14 +2475,14 @@ struct Lisp_Hash_Table
The table is physically split into three vectors (hash, next,
key_and_value) which may or may not be beneficial. */
- hash_idx_t index_size; /* Size of the index vector. */
+ int index_bits; /* log2 (size of the index vector). */
hash_idx_t table_size; /* Size of the next and hash vectors. */
/* Bucket vector. An entry of -1 indicates no item is present,
and a nonnegative entry is the index of the first item in
a collision chain.
- This vector is index_size entries long.
- If index_size is 1 (and table_size is 0), then this is the
+ This vector is 2**index_bits entries long.
+ If index_bits is 0 (and table_size is 0), then this is the
constant read-only vector {-1}, shared between all instances.
Otherwise it is heap-allocated. */
hash_idx_t *index;
@@ -2597,6 +2597,13 @@ HASH_TABLE_SIZE (const struct Lisp_Hash_Table *h)
return h->table_size;
}
+/* Size of the index vector in hash table H. */
+INLINE ptrdiff_t
+hash_table_index_size (const struct Lisp_Hash_Table *h)
+{
+ return (ptrdiff_t)1 << h->index_bits;
+}
+
/* Hash value for KEY in hash table H. */
INLINE hash_hash_t
hash_from_key (struct Lisp_Hash_Table *h, Lisp_Object key)
diff --git a/src/pdumper.c b/src/pdumper.c
index ee554cda55a..b8006b035ea 100644
--- a/src/pdumper.c
+++ b/src/pdumper.c
@@ -2688,7 +2688,7 @@ hash_table_freeze (struct Lisp_Hash_Table *h)
h->hash = NULL;
h->index = NULL;
h->table_size = 0;
- h->index_size = 0;
+ h->index_bits = 0;
h->frozen_test = hash_table_std_test (h->test);
h->test = NULL;
}