emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

scratch/hash-table-perf 8e80d1930e3 20/35: Store hash values as EMACS_UI


From: Mattias Engdegård
Subject: scratch/hash-table-perf 8e80d1930e3 20/35: Store hash values as EMACS_UINT instead of Lisp_Object
Date: Thu, 4 Jan 2024 10:56:42 -0500 (EST)

branch: scratch/hash-table-perf
commit 8e80d1930e3bede069d9b9869198d8337aac2e01
Author: Mattias Engdegård <mattiase@acm.org>
Commit: Mattias Engdegård <mattiase@acm.org>

    Store hash values as EMACS_UINT instead of Lisp_Object
    
    This improves typing and saves pointless tagging and untagging.
    We now use hash_unused instead of Qnil to mark unused entries.
    
    * src/lisp.h (hash_unused): New constant.
    (struct Lisp_Hash_Table): Retype hash vector to an array of
    EMACS_UINT.  All code using it changed accordingly.
    (HASH_HASH): Retype return value.  All callers changed.
    * src/fns.c (set_hash_index_slot, hash_index_index):
    Retype hash arguments as EMACS_UINT.  All callers changed.
---
 src/fns.c     | 49 +++++++++++++++++++++++++++----------------------
 src/lisp.h    | 11 ++++++++---
 src/macfont.m |  2 +-
 src/pdumper.c |  2 +-
 4 files changed, 37 insertions(+), 27 deletions(-)

diff --git a/src/fns.c b/src/fns.c
index 4b2d78fbb6a..9d4f1b4d4d9 100644
--- a/src/fns.c
+++ b/src/fns.c
@@ -4279,7 +4279,7 @@ set_hash_next_slot (struct Lisp_Hash_Table *h, ptrdiff_t 
idx, ptrdiff_t val)
   h->next[idx] = val;
 }
 static void
-set_hash_hash_slot (struct Lisp_Hash_Table *h, ptrdiff_t idx, Lisp_Object val)
+set_hash_hash_slot (struct Lisp_Hash_Table *h, ptrdiff_t idx, EMACS_UINT val)
 {
   eassert (idx >= 0 && idx < h->table_size);
   h->hash[idx] = val;
@@ -4567,7 +4567,8 @@ make_hash_table (struct hash_table_test test, EMACS_INT 
size,
     h->key_and_value[i] = HASH_UNUSED_ENTRY_KEY;
 
   h->hash = hash_table_alloc_bytes (size * sizeof *h->hash);
-  memclear (h->hash, size * sizeof *h->hash);
+  for (ptrdiff_t i = 0; i < size; i++)
+    h->hash[i] = hash_unused;
 
   h->next = hash_table_alloc_bytes (size * sizeof *h->next);
   for (ptrdiff_t i = 0; i < size - 1; i++)
@@ -4632,10 +4633,10 @@ copy_hash_table (struct Lisp_Hash_Table *h1)
 
 /* Compute index into the index vector from a hash value.  */
 static inline ptrdiff_t
-hash_index_index (struct Lisp_Hash_Table *h, Lisp_Object hash_code)
+hash_index_index (struct Lisp_Hash_Table *h, EMACS_UINT hash)
 {
   eassert (h->index_size > 0);
-  return XUFIXNUM (hash_code) % h->index_size;
+  return hash % h->index_size;
 }
 
 /* Resize hash table H if it's too full.  If H cannot be resized
@@ -4673,9 +4674,10 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h)
       for (ptrdiff_t i = 2 * old_size; i < 2 * new_size; i++)
         key_and_value[i] = HASH_UNUSED_ENTRY_KEY;
 
-      Lisp_Object *hash = hash_table_alloc_bytes (new_size * sizeof *hash);
+      EMACS_UINT *hash = hash_table_alloc_bytes (new_size * sizeof *hash);
       memcpy (hash, h->hash, old_size * sizeof *hash);
-      memclear (hash + old_size, (new_size - old_size) * word_size);
+      for (ptrdiff_t i = old_size; i < new_size; i++)
+       hash[i] = hash_unused;
 
       ptrdiff_t old_index_size = h->index_size;
       ptrdiff_t index_size = hash_index_size (new_size);
@@ -4704,9 +4706,9 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h)
 
       /* Rehash.  */
       for (ptrdiff_t i = 0; i < old_size; i++)
-       if (!NILP (HASH_HASH (h, i)))
+       if (HASH_HASH (h, i) != hash_unused)
          {
-           Lisp_Object hash_code = HASH_HASH (h, i);
+           EMACS_UINT hash_code = HASH_HASH (h, i);
            ptrdiff_t start_of_bucket = hash_index_index (h, hash_code);
            set_hash_next_slot (h, i, HASH_INDEX (h, start_of_bucket));
            set_hash_index_slot (h, start_of_bucket, i);
@@ -4747,7 +4749,8 @@ hash_table_thaw (Lisp_Object hash_table)
   h->next_free = -1;
 
   h->hash = hash_table_alloc_bytes (size * sizeof *h->hash);
-  memclear (h->hash, size * sizeof *h->hash);
+  for (ptrdiff_t i = 0; i < size; i++)
+    h->hash[i] = hash_unused;
 
   h->next = hash_table_alloc_bytes (size * sizeof *h->next);
   for (ptrdiff_t i = 0; i < size; i++)
@@ -4762,7 +4765,7 @@ hash_table_thaw (Lisp_Object hash_table)
   for (ptrdiff_t i = 0; i < size; i++)
     {
       Lisp_Object key = HASH_KEY (h, i);
-      Lisp_Object hash_code = hash_from_key (h, key);
+      EMACS_UINT hash_code = XUFIXNUM (hash_from_key (h, key));
       ptrdiff_t start_of_bucket = hash_index_index (h, hash_code);
       set_hash_hash_slot (h, i, hash_code);
       set_hash_next_slot (h, i, HASH_INDEX (h, start_of_bucket));
@@ -4780,13 +4783,14 @@ hash_lookup (struct Lisp_Hash_Table *h, Lisp_Object 
key, Lisp_Object *hash)
   Lisp_Object hash_code = hash_from_key (h, key);
   if (hash)
     *hash = hash_code;
+  EMACS_UINT hashval = XUFIXNUM (hash_code);
 
-  ptrdiff_t start_of_bucket = hash_index_index (h, hash_code);
+  ptrdiff_t start_of_bucket = hash_index_index (h, hashval);
   ptrdiff_t i;
   for (i = HASH_INDEX (h, start_of_bucket); 0 <= i; i = HASH_NEXT (h, i))
     if (EQ (key, HASH_KEY (h, i))
        || (h->test.cmpfn
-           && EQ (hash_code, HASH_HASH (h, i))
+           && hashval == HASH_HASH (h, i)
            && !NILP (h->test.cmpfn (key, HASH_KEY (h, i), h))))
       break;
 
@@ -4815,17 +4819,18 @@ hash_put (struct Lisp_Hash_Table *h, Lisp_Object key, 
Lisp_Object value,
 
   /* Store key/value in the key_and_value vector.  */
   ptrdiff_t i = h->next_free;
-  eassert (NILP (HASH_HASH (h, i)));
+  eassert (HASH_HASH (h, i) == hash_unused);
   eassert (hash_unused_entry_key_p (HASH_KEY (h, i)));
   h->next_free = HASH_NEXT (h, i);
   set_hash_key_slot (h, i, key);
   set_hash_value_slot (h, i, value);
 
   /* Remember its hash code.  */
-  set_hash_hash_slot (h, i, hash);
+  EMACS_UINT hashval = XUFIXNUM (hash);
+  set_hash_hash_slot (h, i, hashval);
 
   /* Add new entry to its collision chain.  */
-  ptrdiff_t start_of_bucket = hash_index_index (h, hash);
+  ptrdiff_t start_of_bucket = hash_index_index (h, hashval);
   set_hash_next_slot (h, i, HASH_INDEX (h, start_of_bucket));
   set_hash_index_slot (h, start_of_bucket, i);
   return i;
@@ -4837,8 +4842,8 @@ hash_put (struct Lisp_Hash_Table *h, Lisp_Object key, 
Lisp_Object value,
 void
 hash_remove_from_table (struct Lisp_Hash_Table *h, Lisp_Object key)
 {
-  Lisp_Object hash_code = hash_from_key (h, key);
-  ptrdiff_t start_of_bucket = hash_index_index (h, hash_code);
+  EMACS_UINT hashval = XUFIXNUM (hash_from_key (h, key));
+  ptrdiff_t start_of_bucket = hash_index_index (h, hashval);
   ptrdiff_t prev = -1;
 
   for (ptrdiff_t i = HASH_INDEX (h, start_of_bucket);
@@ -4847,7 +4852,7 @@ hash_remove_from_table (struct Lisp_Hash_Table *h, 
Lisp_Object key)
     {
       if (EQ (key, HASH_KEY (h, i))
          || (h->test.cmpfn
-             && EQ (hash_code, HASH_HASH (h, i))
+             && hashval == HASH_HASH (h, i)
              && !NILP (h->test.cmpfn (key, HASH_KEY (h, i), h))))
        {
          /* Take entry out of collision chain.  */
@@ -4860,7 +4865,7 @@ hash_remove_from_table (struct Lisp_Hash_Table *h, 
Lisp_Object key)
             the free list.  */
          set_hash_key_slot (h, i, HASH_UNUSED_ENTRY_KEY);
          set_hash_value_slot (h, i, Qnil);
-         set_hash_hash_slot (h, i, Qnil);
+         set_hash_hash_slot (h, i, hash_unused);
          set_hash_next_slot (h, i, h->next_free);
          h->next_free = i;
          h->count--;
@@ -4881,9 +4886,9 @@ hash_clear (struct Lisp_Hash_Table *h)
   if (h->count > 0)
     {
       ptrdiff_t size = HASH_TABLE_SIZE (h);
-      memclear (h->hash, size * word_size);
       for (ptrdiff_t i = 0; i < size; i++)
        {
+         set_hash_hash_slot (h, i, hash_unused);
          set_hash_next_slot (h, i, i < size - 1 ? i + 1 : -1);
          set_hash_key_slot (h, i, HASH_UNUSED_ENTRY_KEY);
          set_hash_value_slot (h, i, Qnil);
@@ -4966,7 +4971,7 @@ sweep_weak_table (struct Lisp_Hash_Table *h, bool 
remove_entries_p)
                  /* Clear key, value, and hash.  */
                  set_hash_key_slot (h, i, HASH_UNUSED_ENTRY_KEY);
                  set_hash_value_slot (h, i, Qnil);
-                 set_hash_hash_slot (h, i, Qnil);
+                 set_hash_hash_slot (h, i, hash_unused);
 
                   eassert (h->count != 0);
                   h->count--;
@@ -5633,7 +5638,7 @@ Internal use only. */)
     {
       Lisp_Object bucket = Qnil;
       for (ptrdiff_t j = HASH_INDEX (h, i); j != -1; j = HASH_NEXT (h, j))
-       bucket = Fcons (Fcons (HASH_KEY (h, j), HASH_HASH (h, j)),
+       bucket = Fcons (Fcons (HASH_KEY (h, j), make_int (HASH_HASH (h, j))),
                        bucket);
       if (!NILP (bucket))
        ret = Fcons (Fnreverse (bucket), ret);
diff --git a/src/lisp.h b/src/lisp.h
index 3666d25fb54..d8af923263b 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -2421,6 +2421,11 @@ typedef enum {
                            both key and value remain.  */
 } hash_table_weakness_t;
 
+/* An value that marks an unused hash entry.
+   Any EMACS_UINT value that is not a valid fixnum will do here.  */
+enum { hash_unused = (EMACS_UINT)MOST_POSITIVE_FIXNUM + 1 };
+verify (FIXNUM_OVERFLOW_P (hash_unused));
+
 struct Lisp_Hash_Table
 {
   /* Change pdumper.c if you change the fields here.  */
@@ -2437,9 +2442,9 @@ struct Lisp_Hash_Table
 
   ptrdiff_t table_size;          /* Size of the next and hash vectors.  */
 
-  /* Vector of hash codes.  Each entry is either a fixnum, or nil if unused.
+  /* Vector of hash codes.  The value hash_unused marks an unused table entry.
      This vector is table_size entries long.  */
-  Lisp_Object *hash;
+  EMACS_UINT *hash;
 
   /* Vector used to chain entries.  If entry I is free, next[I] is the
      entry number of the next free item.  If entry I is non-free,
@@ -2528,7 +2533,7 @@ HASH_VALUE (const struct Lisp_Hash_Table *h, ptrdiff_t 
idx)
 }
 
 /* Value is the hash code computed for entry IDX in hash table H.  */
-INLINE Lisp_Object
+INLINE EMACS_UINT
 HASH_HASH (const struct Lisp_Hash_Table *h, ptrdiff_t idx)
 {
   eassert (idx >= 0 && idx < h->table_size);
diff --git a/src/macfont.m b/src/macfont.m
index 9f9f6f4efaf..ea8cd2413d0 100644
--- a/src/macfont.m
+++ b/src/macfont.m
@@ -980,7 +980,7 @@ macfont_invalidate_family_cache (void)
       ptrdiff_t i, size = HASH_TABLE_SIZE (h);
 
       for (i = 0; i < size; ++i)
-       if (!NILP (HASH_HASH (h, i)))
+       if (HASH_HASH (h, i) != hash_unused)
          {
            Lisp_Object value = HASH_VALUE (h, i);
 
diff --git a/src/pdumper.c b/src/pdumper.c
index 5d02fbd061d..bc31dd8fa6d 100644
--- a/src/pdumper.c
+++ b/src/pdumper.c
@@ -2661,7 +2661,7 @@ hash_table_contents (struct Lisp_Hash_Table *h)
      relies on it by expecting hash table indices to stay constant
      across the dump.  */
   for (ptrdiff_t i = 0; i < old_size; i++)
-    if (!NILP (HASH_HASH (h, i)))
+    if (HASH_HASH (h, i) != hash_unused)
       {
        key_and_value[n++] = HASH_KEY (h, i);
        key_and_value[n++] = HASH_VALUE (h, i);



reply via email to

[Prev in Thread] Current Thread [Next in Thread]