emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

scratch/hash-table-perf e69035c6ef5 17/35: Leaner hash table dumping and


From: Mattias Engdegård
Subject: scratch/hash-table-perf e69035c6ef5 17/35: Leaner hash table dumping and thawing
Date: Thu, 4 Jan 2024 10:56:42 -0500 (EST)

branch: scratch/hash-table-perf
commit e69035c6ef5399709a7f13b7bf3b3e3152d211ad
Author: Mattias Engdegård <mattiase@acm.org>
Commit: Mattias Engdegård <mattiase@acm.org>

    Leaner hash table dumping and thawing
    
    Only dump the actual data, and the test encoded as an enum.  This
    simplifies dumping, makes dump files smaller and saves space at run
    time.
    
    * src/lisp.h (hash_table_std_test_t): New enum.
    (struct Lisp_Hash_Table): Add frozen_test member, consuming no extra space.
    * src/fns.c (hashfn_user_defined): Now static.
    (hash_table_test_from_std): New.
    (hash_table_rehash): Rename to...
    (hash_table_thaw): ...this and rewrite.
    * src/pdumper.c (hash_table_contents): Only include actual data, not
    unused space.
    (hash_table_std_test): New.
    (hash_table_freeze): Set frozen_test from test.
    (dump_hash_table): Dump frozen_test, not the whole test struct.
    Don't bother other dumping fields that can be derived.
---
 src/fns.c     | 53 ++++++++++++++++++++++++++++++-------------------
 src/lisp.h    | 12 ++++++++++--
 src/pdumper.c | 63 +++++++++++++++++++++++++----------------------------------
 3 files changed, 70 insertions(+), 58 deletions(-)

diff --git a/src/fns.c b/src/fns.c
index f9f41d1ead1..db2b5fdb5cc 100644
--- a/src/fns.c
+++ b/src/fns.c
@@ -4474,7 +4474,7 @@ hashfn_eql (Lisp_Object key, struct Lisp_Hash_Table *h)
 /* Given H, return a hash code for KEY which uses a user-defined
    function to compare keys.  */
 
-Lisp_Object
+static Lisp_Object
 hashfn_user_defined (Lisp_Object key, struct Lisp_Hash_Table *h)
 {
   Lisp_Object args[] = { h->test.user_hash_function, key };
@@ -4638,11 +4638,10 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h)
   if (h->next_free < 0)
     {
       ptrdiff_t old_size = HASH_TABLE_SIZE (h);
-      EMACS_INT new_size;
-
-      double float_new_size = old_size * std_rehash_size;
-      if (float_new_size < EMACS_INT_MAX)
-       new_size = float_new_size;
+      /* FIXME: better growth management, ditch std_rehash_size */
+      EMACS_INT new_size = old_size * std_rehash_size;
+      if (new_size < EMACS_INT_MAX)
+       new_size = max (new_size, 32);  /* avoid slow initial growth */
       else
        new_size = EMACS_INT_MAX;
       if (PTRDIFF_MAX < new_size)
@@ -4691,20 +4690,39 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h)
     }
 }
 
-/* Recompute the hashes (and hence also the "next" pointers).
-   Normally there's never a need to recompute hashes.
-   This is done only on first access to a hash-table loaded from
-   the "pdump", because the objects' addresses may have changed, thus
-   affecting their hashes.  */
+static const struct hash_table_test *
+hash_table_test_from_std (hash_table_std_test_t test)
+{
+  switch (test)
+    {
+    case Test_eq:    return &hashtest_eq;
+    case Test_eql:   return &hashtest_eql;
+    case Test_equal: return &hashtest_equal;
+    }
+  emacs_abort();
+}
+
+/* Rebuild a hash table from its frozen (dumped) form.  */
 void
-hash_table_rehash (Lisp_Object hash)
+hash_table_thaw (Lisp_Object hash_table)
 {
-  struct Lisp_Hash_Table *h = XHASH_TABLE (hash);
-  ptrdiff_t i, count = h->count;
+  struct Lisp_Hash_Table *h = XHASH_TABLE (hash_table);
+
+  /* Freezing discarded most non-essential information; recompute it.
+     The allocation is minimal with no room for growth.  */
+  h->test = *hash_table_test_from_std (h->frozen_test);
+  ptrdiff_t size = ASIZE (h->key_and_value) / 2;
+  h->count = size;
+  ptrdiff_t index_size = hash_index_size (size);
+  h->next_free = -1;
+
+  h->hash = make_nil_vector (size);
+  h->next = make_vector (size, make_fixnum (-1));
+  h->index = make_vector (index_size, make_fixnum (-1));
 
   /* Recompute the actual hash codes for each entry in the table.
      Order is still invalid.  */
-  for (i = 0; i < count; i++)
+  for (ptrdiff_t i = 0; i < size; i++)
     {
       Lisp_Object key = HASH_KEY (h, i);
       Lisp_Object hash_code = hash_from_key (h, key);
@@ -4712,12 +4730,7 @@ hash_table_rehash (Lisp_Object hash)
       set_hash_hash_slot (h, i, hash_code);
       set_hash_next_slot (h, i, HASH_INDEX (h, start_of_bucket));
       set_hash_index_slot (h, start_of_bucket, i);
-      eassert (HASH_NEXT (h, i) != i); /* Stop loops.  */
     }
-
-  ptrdiff_t size = ASIZE (h->next);
-  for (; i + 1 < size; i++)
-    set_hash_next_slot (h, i, i + 1);
 }
 
 /* Lookup KEY in hash table H.  If HASH is non-null, return in *HASH
diff --git a/src/lisp.h b/src/lisp.h
index ee87b9567b0..1f3220c0179 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -2385,6 +2385,12 @@ INLINE int
 
 struct Lisp_Hash_Table;
 
+typedef enum {
+  Test_eql,
+  Test_eq,
+  Test_equal,
+} hash_table_std_test_t;
+
 struct hash_table_test
 {
   /* Function used to compare keys; always a bare symbol.  */
@@ -2451,6 +2457,9 @@ struct Lisp_Hash_Table
   /* Weakness of the table.  */
   hash_table_weakness_t weakness : 8;
 
+  /* Hash table test (only used when frozen in dump)  */
+  hash_table_std_test_t frozen_test : 8;
+
   /* True if the table can be purecopied.  The table cannot be
      changed afterwards.  */
   bool purecopy;
@@ -2541,7 +2550,7 @@ hash_from_key (struct Lisp_Hash_Table *h, Lisp_Object key)
   return h->test.hashfn (key, h);
 }
 
-void hash_table_rehash (Lisp_Object);
+void hash_table_thaw (Lisp_Object hash_table);
 
 /* Default size for hash tables if not specified.  */
 
@@ -3983,7 +3992,6 @@ extern void hexbuf_digest (char *, void const *, int);
 extern char *extract_data_from_object (Lisp_Object, ptrdiff_t *, ptrdiff_t *);
 EMACS_UINT hash_string (char const *, ptrdiff_t);
 EMACS_UINT sxhash (Lisp_Object);
-Lisp_Object hashfn_user_defined (Lisp_Object, struct Lisp_Hash_Table *);
 Lisp_Object make_hash_table (struct hash_table_test, EMACS_INT,
                              hash_table_weakness_t, bool);
 Lisp_Object hash_table_weakness_symbol (hash_table_weakness_t weak);
diff --git a/src/pdumper.c b/src/pdumper.c
index 93ecd19a6ca..d9f24ce1a9e 100644
--- a/src/pdumper.c
+++ b/src/pdumper.c
@@ -2646,34 +2646,26 @@ dump_vectorlike_generic (struct dump_context *ctx,
   return offset;
 }
 
-/* Return a vector of KEY, VALUE pairs in the given hash table H.  The
-   first H->count pairs are valid, and the rest are unbound.  */
+/* Return a vector of KEY, VALUE pairs in the given hash table H.
+   No room for growth is included.  */
 static Lisp_Object
 hash_table_contents (struct Lisp_Hash_Table *h)
 {
-  if (h->test.hashfn == hashfn_user_defined)
-    error ("cannot dump hash tables with user-defined tests");  /* Bug#36769 */
-
-  ptrdiff_t size = HASH_TABLE_SIZE (h);
+  ptrdiff_t old_size = HASH_TABLE_SIZE (h);
+  ptrdiff_t size = h->count;
   Lisp_Object key_and_value = make_uninit_vector (2 * size);
   ptrdiff_t n = 0;
 
   /* Make sure key_and_value ends up in the same order; charset.c
      relies on it by expecting hash table indices to stay constant
      across the dump.  */
-  for (ptrdiff_t i = 0; i < size; i++)
+  for (ptrdiff_t i = 0; i < old_size; i++)
     if (!NILP (HASH_HASH (h, i)))
       {
        ASET (key_and_value, n++, HASH_KEY (h, i));
        ASET (key_and_value, n++, HASH_VALUE (h, i));
       }
 
-  while (n < 2 * size)
-    {
-      ASET (key_and_value, n++, Qunbound);
-      ASET (key_and_value, n++, Qnil);
-    }
-
   return key_and_value;
 }
 
@@ -2686,25 +2678,32 @@ dump_hash_table_list (struct dump_context *ctx)
     return 0;
 }
 
-static void
-hash_table_freeze (struct Lisp_Hash_Table *h)
+static hash_table_std_test_t
+hash_table_std_test (const struct hash_table_test *t)
 {
-  ptrdiff_t npairs = ASIZE (h->key_and_value) / 2;
-  h->key_and_value = hash_table_contents (h);
-  h->next = h->hash = make_fixnum (npairs);
-  h->index = make_fixnum (ASIZE (h->index));
-  h->next_free = (npairs == h->count ? -1 : h->count);
+  if (BASE_EQ (t->name, Qeq))
+    return Test_eq;
+  if (BASE_EQ (t->name, Qeql))
+    return Test_eql;
+  if (BASE_EQ (t->name, Qequal))
+    return Test_equal;
+  error ("cannot dump hash tables with user-defined tests");  /* Bug#36769 */
 }
 
+/* Compact contents and discard inessential information from a hash table,
+   preparing it for dumping.
+   See `hash_table_thaw' for the code that restores the object to a usable
+   state. */
 static void
-hash_table_thaw (Lisp_Object hash)
+hash_table_freeze (struct Lisp_Hash_Table *h)
 {
-  struct Lisp_Hash_Table *h = XHASH_TABLE (hash);
-  h->hash = make_nil_vector (XFIXNUM (h->hash));
-  h->next = Fmake_vector (h->next, make_fixnum (-1));
-  h->index = Fmake_vector (h->index, make_fixnum (-1));
-
-  hash_table_rehash (hash);
+  h->key_and_value = hash_table_contents (h);
+  eassert (ASIZE (h->key_and_value) == h->count * 2);
+  h->next = Qnil;
+  h->hash = Qnil;
+  h->index = Qnil;
+  h->count = 0;
+  h->frozen_test = hash_table_std_test (&h->test);
 }
 
 static dump_off
@@ -2724,19 +2723,11 @@ dump_hash_table (struct dump_context *ctx, Lisp_Object 
object)
   dump_pseudovector_lisp_fields (ctx, &out->header, &hash->header);
   /* TODO: dump the hash bucket vectors synchronously here to keep
      them as close to the hash table as possible.  */
-  DUMP_FIELD_COPY (out, hash, count);
-  DUMP_FIELD_COPY (out, hash, next_free);
   DUMP_FIELD_COPY (out, hash, weakness);
   DUMP_FIELD_COPY (out, hash, purecopy);
   DUMP_FIELD_COPY (out, hash, mutable);
+  DUMP_FIELD_COPY (out, hash, frozen_test);
   dump_field_lv (ctx, out, hash, &hash->key_and_value, WEIGHT_STRONG);
-  dump_field_lv (ctx, out, hash, &hash->test.name, WEIGHT_STRONG);
-  dump_field_lv (ctx, out, hash, &hash->test.user_hash_function,
-                 WEIGHT_STRONG);
-  dump_field_lv (ctx, out, hash, &hash->test.user_cmp_function,
-                 WEIGHT_STRONG);
-  dump_field_emacs_ptr (ctx, out, hash, &hash->test.cmpfn);
-  dump_field_emacs_ptr (ctx, out, hash, &hash->test.hashfn);
   eassert (hash->next_weak == NULL);
   return finish_dump_pvec (ctx, &out->header);
 }



reply via email to

[Prev in Thread] Current Thread [Next in Thread]