[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
scratch/hash-table-perf 8e80d1930e3 20/35: Store hash values as EMACS_UI
From: |
Mattias Engdegård |
Subject: |
scratch/hash-table-perf 8e80d1930e3 20/35: Store hash values as EMACS_UINT instead of Lisp_Object |
Date: |
Thu, 4 Jan 2024 10:56:42 -0500 (EST) |
branch: scratch/hash-table-perf
commit 8e80d1930e3bede069d9b9869198d8337aac2e01
Author: Mattias Engdegård <mattiase@acm.org>
Commit: Mattias Engdegård <mattiase@acm.org>
Store hash values as EMACS_UINT instead of Lisp_Object
This improves typing and saves pointless tagging and untagging.
We now use hash_unused instead of Qnil to mark unused entries.
* src/lisp.h (hash_unused): New constant.
(struct Lisp_Hash_Table): Retype hash vector to an array of
EMACS_UINT. All code using it changed accordingly.
(HASH_HASH): Retype return value. All callers changed.
* src/fns.c (set_hash_index_slot, hash_index_index):
Retype hash arguments as EMACS_UINT. All callers changed.
---
src/fns.c | 49 +++++++++++++++++++++++++++----------------------
src/lisp.h | 11 ++++++++---
src/macfont.m | 2 +-
src/pdumper.c | 2 +-
4 files changed, 37 insertions(+), 27 deletions(-)
diff --git a/src/fns.c b/src/fns.c
index 4b2d78fbb6a..9d4f1b4d4d9 100644
--- a/src/fns.c
+++ b/src/fns.c
@@ -4279,7 +4279,7 @@ set_hash_next_slot (struct Lisp_Hash_Table *h, ptrdiff_t
idx, ptrdiff_t val)
h->next[idx] = val;
}
static void
-set_hash_hash_slot (struct Lisp_Hash_Table *h, ptrdiff_t idx, Lisp_Object val)
+set_hash_hash_slot (struct Lisp_Hash_Table *h, ptrdiff_t idx, EMACS_UINT val)
{
eassert (idx >= 0 && idx < h->table_size);
h->hash[idx] = val;
@@ -4567,7 +4567,8 @@ make_hash_table (struct hash_table_test test, EMACS_INT
size,
h->key_and_value[i] = HASH_UNUSED_ENTRY_KEY;
h->hash = hash_table_alloc_bytes (size * sizeof *h->hash);
- memclear (h->hash, size * sizeof *h->hash);
+ for (ptrdiff_t i = 0; i < size; i++)
+ h->hash[i] = hash_unused;
h->next = hash_table_alloc_bytes (size * sizeof *h->next);
for (ptrdiff_t i = 0; i < size - 1; i++)
@@ -4632,10 +4633,10 @@ copy_hash_table (struct Lisp_Hash_Table *h1)
/* Compute index into the index vector from a hash value. */
static inline ptrdiff_t
-hash_index_index (struct Lisp_Hash_Table *h, Lisp_Object hash_code)
+hash_index_index (struct Lisp_Hash_Table *h, EMACS_UINT hash)
{
eassert (h->index_size > 0);
- return XUFIXNUM (hash_code) % h->index_size;
+ return hash % h->index_size;
}
/* Resize hash table H if it's too full. If H cannot be resized
@@ -4673,9 +4674,10 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h)
for (ptrdiff_t i = 2 * old_size; i < 2 * new_size; i++)
key_and_value[i] = HASH_UNUSED_ENTRY_KEY;
- Lisp_Object *hash = hash_table_alloc_bytes (new_size * sizeof *hash);
+ EMACS_UINT *hash = hash_table_alloc_bytes (new_size * sizeof *hash);
memcpy (hash, h->hash, old_size * sizeof *hash);
- memclear (hash + old_size, (new_size - old_size) * word_size);
+ for (ptrdiff_t i = old_size; i < new_size; i++)
+ hash[i] = hash_unused;
ptrdiff_t old_index_size = h->index_size;
ptrdiff_t index_size = hash_index_size (new_size);
@@ -4704,9 +4706,9 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h)
/* Rehash. */
for (ptrdiff_t i = 0; i < old_size; i++)
- if (!NILP (HASH_HASH (h, i)))
+ if (HASH_HASH (h, i) != hash_unused)
{
- Lisp_Object hash_code = HASH_HASH (h, i);
+ EMACS_UINT hash_code = HASH_HASH (h, i);
ptrdiff_t start_of_bucket = hash_index_index (h, hash_code);
set_hash_next_slot (h, i, HASH_INDEX (h, start_of_bucket));
set_hash_index_slot (h, start_of_bucket, i);
@@ -4747,7 +4749,8 @@ hash_table_thaw (Lisp_Object hash_table)
h->next_free = -1;
h->hash = hash_table_alloc_bytes (size * sizeof *h->hash);
- memclear (h->hash, size * sizeof *h->hash);
+ for (ptrdiff_t i = 0; i < size; i++)
+ h->hash[i] = hash_unused;
h->next = hash_table_alloc_bytes (size * sizeof *h->next);
for (ptrdiff_t i = 0; i < size; i++)
@@ -4762,7 +4765,7 @@ hash_table_thaw (Lisp_Object hash_table)
for (ptrdiff_t i = 0; i < size; i++)
{
Lisp_Object key = HASH_KEY (h, i);
- Lisp_Object hash_code = hash_from_key (h, key);
+ EMACS_UINT hash_code = XUFIXNUM (hash_from_key (h, key));
ptrdiff_t start_of_bucket = hash_index_index (h, hash_code);
set_hash_hash_slot (h, i, hash_code);
set_hash_next_slot (h, i, HASH_INDEX (h, start_of_bucket));
@@ -4780,13 +4783,14 @@ hash_lookup (struct Lisp_Hash_Table *h, Lisp_Object
key, Lisp_Object *hash)
Lisp_Object hash_code = hash_from_key (h, key);
if (hash)
*hash = hash_code;
+ EMACS_UINT hashval = XUFIXNUM (hash_code);
- ptrdiff_t start_of_bucket = hash_index_index (h, hash_code);
+ ptrdiff_t start_of_bucket = hash_index_index (h, hashval);
ptrdiff_t i;
for (i = HASH_INDEX (h, start_of_bucket); 0 <= i; i = HASH_NEXT (h, i))
if (EQ (key, HASH_KEY (h, i))
|| (h->test.cmpfn
- && EQ (hash_code, HASH_HASH (h, i))
+ && hashval == HASH_HASH (h, i)
&& !NILP (h->test.cmpfn (key, HASH_KEY (h, i), h))))
break;
@@ -4815,17 +4819,18 @@ hash_put (struct Lisp_Hash_Table *h, Lisp_Object key,
Lisp_Object value,
/* Store key/value in the key_and_value vector. */
ptrdiff_t i = h->next_free;
- eassert (NILP (HASH_HASH (h, i)));
+ eassert (HASH_HASH (h, i) == hash_unused);
eassert (hash_unused_entry_key_p (HASH_KEY (h, i)));
h->next_free = HASH_NEXT (h, i);
set_hash_key_slot (h, i, key);
set_hash_value_slot (h, i, value);
/* Remember its hash code. */
- set_hash_hash_slot (h, i, hash);
+ EMACS_UINT hashval = XUFIXNUM (hash);
+ set_hash_hash_slot (h, i, hashval);
/* Add new entry to its collision chain. */
- ptrdiff_t start_of_bucket = hash_index_index (h, hash);
+ ptrdiff_t start_of_bucket = hash_index_index (h, hashval);
set_hash_next_slot (h, i, HASH_INDEX (h, start_of_bucket));
set_hash_index_slot (h, start_of_bucket, i);
return i;
@@ -4837,8 +4842,8 @@ hash_put (struct Lisp_Hash_Table *h, Lisp_Object key,
Lisp_Object value,
void
hash_remove_from_table (struct Lisp_Hash_Table *h, Lisp_Object key)
{
- Lisp_Object hash_code = hash_from_key (h, key);
- ptrdiff_t start_of_bucket = hash_index_index (h, hash_code);
+ EMACS_UINT hashval = XUFIXNUM (hash_from_key (h, key));
+ ptrdiff_t start_of_bucket = hash_index_index (h, hashval);
ptrdiff_t prev = -1;
for (ptrdiff_t i = HASH_INDEX (h, start_of_bucket);
@@ -4847,7 +4852,7 @@ hash_remove_from_table (struct Lisp_Hash_Table *h,
Lisp_Object key)
{
if (EQ (key, HASH_KEY (h, i))
|| (h->test.cmpfn
- && EQ (hash_code, HASH_HASH (h, i))
+ && hashval == HASH_HASH (h, i)
&& !NILP (h->test.cmpfn (key, HASH_KEY (h, i), h))))
{
/* Take entry out of collision chain. */
@@ -4860,7 +4865,7 @@ hash_remove_from_table (struct Lisp_Hash_Table *h,
Lisp_Object key)
the free list. */
set_hash_key_slot (h, i, HASH_UNUSED_ENTRY_KEY);
set_hash_value_slot (h, i, Qnil);
- set_hash_hash_slot (h, i, Qnil);
+ set_hash_hash_slot (h, i, hash_unused);
set_hash_next_slot (h, i, h->next_free);
h->next_free = i;
h->count--;
@@ -4881,9 +4886,9 @@ hash_clear (struct Lisp_Hash_Table *h)
if (h->count > 0)
{
ptrdiff_t size = HASH_TABLE_SIZE (h);
- memclear (h->hash, size * word_size);
for (ptrdiff_t i = 0; i < size; i++)
{
+ set_hash_hash_slot (h, i, hash_unused);
set_hash_next_slot (h, i, i < size - 1 ? i + 1 : -1);
set_hash_key_slot (h, i, HASH_UNUSED_ENTRY_KEY);
set_hash_value_slot (h, i, Qnil);
@@ -4966,7 +4971,7 @@ sweep_weak_table (struct Lisp_Hash_Table *h, bool
remove_entries_p)
/* Clear key, value, and hash. */
set_hash_key_slot (h, i, HASH_UNUSED_ENTRY_KEY);
set_hash_value_slot (h, i, Qnil);
- set_hash_hash_slot (h, i, Qnil);
+ set_hash_hash_slot (h, i, hash_unused);
eassert (h->count != 0);
h->count--;
@@ -5633,7 +5638,7 @@ Internal use only. */)
{
Lisp_Object bucket = Qnil;
for (ptrdiff_t j = HASH_INDEX (h, i); j != -1; j = HASH_NEXT (h, j))
- bucket = Fcons (Fcons (HASH_KEY (h, j), HASH_HASH (h, j)),
+ bucket = Fcons (Fcons (HASH_KEY (h, j), make_int (HASH_HASH (h, j))),
bucket);
if (!NILP (bucket))
ret = Fcons (Fnreverse (bucket), ret);
diff --git a/src/lisp.h b/src/lisp.h
index 3666d25fb54..d8af923263b 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -2421,6 +2421,11 @@ typedef enum {
both key and value remain. */
} hash_table_weakness_t;
+/* An value that marks an unused hash entry.
+ Any EMACS_UINT value that is not a valid fixnum will do here. */
+enum { hash_unused = (EMACS_UINT)MOST_POSITIVE_FIXNUM + 1 };
+verify (FIXNUM_OVERFLOW_P (hash_unused));
+
struct Lisp_Hash_Table
{
/* Change pdumper.c if you change the fields here. */
@@ -2437,9 +2442,9 @@ struct Lisp_Hash_Table
ptrdiff_t table_size; /* Size of the next and hash vectors. */
- /* Vector of hash codes. Each entry is either a fixnum, or nil if unused.
+ /* Vector of hash codes. The value hash_unused marks an unused table entry.
This vector is table_size entries long. */
- Lisp_Object *hash;
+ EMACS_UINT *hash;
/* Vector used to chain entries. If entry I is free, next[I] is the
entry number of the next free item. If entry I is non-free,
@@ -2528,7 +2533,7 @@ HASH_VALUE (const struct Lisp_Hash_Table *h, ptrdiff_t
idx)
}
/* Value is the hash code computed for entry IDX in hash table H. */
-INLINE Lisp_Object
+INLINE EMACS_UINT
HASH_HASH (const struct Lisp_Hash_Table *h, ptrdiff_t idx)
{
eassert (idx >= 0 && idx < h->table_size);
diff --git a/src/macfont.m b/src/macfont.m
index 9f9f6f4efaf..ea8cd2413d0 100644
--- a/src/macfont.m
+++ b/src/macfont.m
@@ -980,7 +980,7 @@ macfont_invalidate_family_cache (void)
ptrdiff_t i, size = HASH_TABLE_SIZE (h);
for (i = 0; i < size; ++i)
- if (!NILP (HASH_HASH (h, i)))
+ if (HASH_HASH (h, i) != hash_unused)
{
Lisp_Object value = HASH_VALUE (h, i);
diff --git a/src/pdumper.c b/src/pdumper.c
index 5d02fbd061d..bc31dd8fa6d 100644
--- a/src/pdumper.c
+++ b/src/pdumper.c
@@ -2661,7 +2661,7 @@ hash_table_contents (struct Lisp_Hash_Table *h)
relies on it by expecting hash table indices to stay constant
across the dump. */
for (ptrdiff_t i = 0; i < old_size; i++)
- if (!NILP (HASH_HASH (h, i)))
+ if (HASH_HASH (h, i) != hash_unused)
{
key_and_value[n++] = HASH_KEY (h, i);
key_and_value[n++] = HASH_VALUE (h, i);
- scratch/hash-table-perf 4e8d7725fd4 11/35: ; * src/fns.c (Fmake_hash_table): ensure `test` is a bare symbol, (continued)
- scratch/hash-table-perf 4e8d7725fd4 11/35: ; * src/fns.c (Fmake_hash_table): ensure `test` is a bare symbol, Mattias Engdegård, 2024/01/04
- scratch/hash-table-perf 8335891387a 22/35: Inlined and specialised hash table look-up, Mattias Engdegård, 2024/01/04
- scratch/hash-table-perf 54807fee4d0 23/35: Use hash_idx_t for storing hash indices, Mattias Engdegård, 2024/01/04
- scratch/hash-table-perf 8cd35079f4c 34/35: Don't pretend that hash-table-size is useful, Mattias Engdegård, 2024/01/04
- scratch/hash-table-perf e6defe82569 27/35: Change default hash table size to 8 (from 65), Mattias Engdegård, 2024/01/04
- scratch/hash-table-perf 422c91a822a 02/35: ; * src/pdumper.c (dump_hash_table): Remove unused argument., Mattias Engdegård, 2024/01/04
- scratch/hash-table-perf 3d2042c48a6 07/35: Refactor: extract hash index computation to a function, Mattias Engdegård, 2024/01/04
- scratch/hash-table-perf c188b9f2bf5 08/35: Refactor hash table vector reallocation, Mattias Engdegård, 2024/01/04
- scratch/hash-table-perf 6ffbccbf1dd 15/35: Represent hash table weakness as an enum internally, Mattias Engdegård, 2024/01/04
- scratch/hash-table-perf 3e9e68333ae 16/35: Remove rehash-threshold and rehash-size struct members, Mattias Engdegård, 2024/01/04
- scratch/hash-table-perf 8e80d1930e3 20/35: Store hash values as EMACS_UINT instead of Lisp_Object,
Mattias Engdegård <=
- scratch/hash-table-perf 05297736aa6 33/35: Adapt hash functions to produce a hash_hash_t eventually, Mattias Engdegård, 2024/01/04
- scratch/hash-table-perf 830838eb5f3 31/35: Use KEY=Qunbound instead of HASH=hash_unused for unused entries, Mattias Engdegård, 2024/01/04
- scratch/hash-table-perf 8b1b140bed9 32/35: * src/lisp.h (hash_hash_t): Change to uint32_t., Mattias Engdegård, 2024/01/04
- scratch/hash-table-perf 681a2877cc2 35/35: Hash-table documentation updates, Mattias Engdegård, 2024/01/04
- scratch/hash-table-perf d77c9540363 06/35: Refactor: extract hash computation to a function, Mattias Engdegård, 2024/01/04
- scratch/hash-table-perf e69035c6ef5 17/35: Leaner hash table dumping and thawing, Mattias Engdegård, 2024/01/04
- scratch/hash-table-perf 2d28042f56a 19/35: Use non-Lisp allocation for internal hash-table vectors, Mattias Engdegård, 2024/01/04
- scratch/hash-table-perf 1672d880e0c 29/35: Change hash_idx_t to int32_t on all platforms, Mattias Engdegård, 2024/01/04
- scratch/hash-table-perf 5cf627d70e1 30/35: Don't dump Qunbound, Mattias Engdegård, 2024/01/04
- scratch/hash-table-perf ad3d2f8ed88 25/35: Share hash table test structs, Mattias Engdegård, 2024/01/04