>From 96a36697b2f472f3a6b3138444659babd0d73f32 Mon Sep 17 00:00:00 2001 From: Pip Cet Date: Sun, 14 Jul 2019 07:22:01 -0700 Subject: [PATCH] Rehash hash tables eagerly after loading a portable dump * src/lisp.h (hash_rehash_needed_p): Remove. All uses removed. (hash_rehash_if_needed): Remove. All uses removed. (struct Lisp_Hash_Table): Remove comment about rehashing hash tables. * src/pdumper.c (thaw_hash_tables): New function. (hash_table_thaw): New function. (hash_table_freeze): New function. (dump_hash_table): Simplify. (dump_hash_table_list): New function. (hash_table_contents): New function. (Fdump_emacs_portable): Handle hash tables by eager rehashing. (pdumper_load): Restore hash tables. (init_pdumper_once): New function. (PDUMPER_CHECK_REHASHING): Remove. --- src/bytecode.c | 1 - src/composite.c | 1 - src/emacs.c | 1 + src/fns.c | 54 ++++---------- src/lisp.h | 19 +---- src/minibuf.c | 3 - src/pdumper.c | 188 ++++++++++++++++++++++++------------------------ src/pdumper.h | 1 + 8 files changed, 108 insertions(+), 160 deletions(-) diff --git a/src/bytecode.c b/src/bytecode.c index 29dff44f00..9c72429e0c 100644 --- a/src/bytecode.c +++ b/src/bytecode.c @@ -1402,7 +1402,6 @@ #define DEFINE(name, value) LABEL (name) , Lisp_Object v1 = POP; ptrdiff_t i; struct Lisp_Hash_Table *h = XHASH_TABLE (jmp_table); - hash_rehash_if_needed (h); /* h->count is a faster approximation for HASH_TABLE_SIZE (h) here. */ diff --git a/src/composite.c b/src/composite.c index 183062de46..49a285cff0 100644 --- a/src/composite.c +++ b/src/composite.c @@ -654,7 +654,6 @@ gstring_lookup_cache (Lisp_Object header) composition_gstring_put_cache (Lisp_Object gstring, ptrdiff_t len) { struct Lisp_Hash_Table *h = XHASH_TABLE (gstring_hash_table); - hash_rehash_if_needed (h); Lisp_Object header = LGSTRING_HEADER (gstring); EMACS_UINT hash = h->test.hashfn (&h->test, header); if (len < 0) diff --git a/src/emacs.c b/src/emacs.c index ad661a081b..855b2c6715 100644 --- a/src/emacs.c +++ b/src/emacs.c @@ -1560,6 +1560,7 @@ main (int argc, char **argv) if (!initialized) { init_alloc_once (); + init_pdumper_once (); init_obarray_once (); init_eval_once (); init_charset_once (); diff --git a/src/fns.c b/src/fns.c index 0497588689..b6134a314c 100644 --- a/src/fns.c +++ b/src/fns.c @@ -4241,43 +4241,24 @@ hash_table_rehash (struct Lisp_Hash_Table *h) { ptrdiff_t size = HASH_TABLE_SIZE (h); - /* These structures may have been purecopied and shared - (bug#36447). */ - h->next = Fcopy_sequence (h->next); - h->index = Fcopy_sequence (h->index); - h->hash = Fcopy_sequence (h->hash); - /* Recompute the actual hash codes for each entry in the table. Order is still invalid. */ for (ptrdiff_t i = 0; i < size; ++i) - if (!NILP (HASH_HASH (h, i))) - { - Lisp_Object key = HASH_KEY (h, i); - EMACS_UINT hash_code = h->test.hashfn (&h->test, key); - set_hash_hash_slot (h, i, make_fixnum (hash_code)); - } - - /* Reset the index so that any slot we don't fill below is marked - invalid. */ - Ffillarray (h->index, make_fixnum (-1)); + { + Lisp_Object key = HASH_KEY (h, i); + EMACS_UINT hash_code = h->test.hashfn (&h->test, key); + set_hash_hash_slot (h, i, make_fixnum (hash_code)); + } /* Rebuild the collision chains. */ for (ptrdiff_t i = 0; i < size; ++i) - if (!NILP (HASH_HASH (h, i))) - { - EMACS_UINT hash_code = XUFIXNUM (HASH_HASH (h, i)); - ptrdiff_t start_of_bucket = hash_code % ASIZE (h->index); - set_hash_next_slot (h, i, HASH_INDEX (h, start_of_bucket)); - set_hash_index_slot (h, start_of_bucket, i); - eassert (HASH_NEXT (h, i) != i); /* Stop loops. */ - } - - /* Finally, mark the hash table as having a valid hash order. - Do this last so that if we're interrupted, we retry on next - access. */ - eassert (h->count < 0); - h->count = -h->count; - eassert (!hash_rehash_needed_p (h)); + { + EMACS_UINT hash_code = XUFIXNUM (HASH_HASH (h, i)); + ptrdiff_t start_of_bucket = hash_code % ASIZE (h->index); + set_hash_next_slot (h, i, HASH_INDEX (h, start_of_bucket)); + set_hash_index_slot (h, start_of_bucket, i); + eassert (HASH_NEXT (h, i) != i); /* Stop loops. */ + } } /* Lookup KEY in hash table H. If HASH is non-null, return in *HASH @@ -4290,8 +4271,6 @@ hash_lookup (struct Lisp_Hash_Table *h, Lisp_Object key, EMACS_UINT *hash) EMACS_UINT hash_code; ptrdiff_t start_of_bucket, i; - hash_rehash_if_needed (h); - hash_code = h->test.hashfn (&h->test, key); eassert ((hash_code & ~INTMASK) == 0); if (hash) @@ -4320,8 +4299,6 @@ hash_put (struct Lisp_Hash_Table *h, Lisp_Object key, Lisp_Object value, { ptrdiff_t start_of_bucket, i; - hash_rehash_if_needed (h); - eassert ((hash & ~INTMASK) == 0); /* Increment count after resizing because resizing may fail. */ @@ -4355,8 +4332,6 @@ hash_remove_from_table (struct Lisp_Hash_Table *h, Lisp_Object key) ptrdiff_t start_of_bucket = hash_code % ASIZE (h->index); ptrdiff_t prev = -1; - hash_rehash_if_needed (h); - for (ptrdiff_t i = HASH_INDEX (h, start_of_bucket); 0 <= i; i = HASH_NEXT (h, i)) @@ -4434,9 +4409,7 @@ sweep_weak_table (struct Lisp_Hash_Table *h, bool remove_entries_p) for (ptrdiff_t bucket = 0; bucket < n; ++bucket) { /* Follow collision chain, removing entries that don't survive - this garbage collection. It's okay if hash_rehash_needed_p - (h) is true, since we're operating entirely on the cached - hash values. */ + this garbage collection. */ ptrdiff_t prev = -1; ptrdiff_t next; for (ptrdiff_t i = HASH_INDEX (h, bucket); 0 <= i; i = next) @@ -4881,7 +4854,6 @@ DEFUN ("hash-table-count", Fhash_table_count, Shash_table_count, 1, 1, 0, (Lisp_Object table) { struct Lisp_Hash_Table *h = check_hash_table (table); - hash_rehash_if_needed (h); return make_fixnum (h->count); } diff --git a/src/lisp.h b/src/lisp.h index 13014c82dc..d0e5c43c41 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -2245,11 +2245,7 @@ #define DEFSYM(sym, name) /* empty */ struct Lisp_Hash_Table { - /* Change pdumper.c if you change the fields here. - - IMPORTANT!!!!!!! - - Call hash_rehash_if_needed() before accessing. */ + /* Change pdumper.c if you change the fields here. */ /* This is for Lisp; the hash table code does not refer to it. */ union vectorlike_header header; @@ -2363,19 +2359,6 @@ HASH_TABLE_SIZE (const struct Lisp_Hash_Table *h) void hash_table_rehash (struct Lisp_Hash_Table *h); -INLINE bool -hash_rehash_needed_p (const struct Lisp_Hash_Table *h) -{ - return h->count < 0; -} - -INLINE void -hash_rehash_if_needed (struct Lisp_Hash_Table *h) -{ - if (hash_rehash_needed_p (h)) - hash_table_rehash (h); -} - /* Default size for hash tables if not specified. */ enum DEFAULT_HASH_SIZE { DEFAULT_HASH_SIZE = 65 }; diff --git a/src/minibuf.c b/src/minibuf.c index d9a6e15b05..e923ce2a43 100644 --- a/src/minibuf.c +++ b/src/minibuf.c @@ -1203,9 +1203,6 @@ DEFUN ("try-completion", Ftry_completion, Stry_completion, 2, 3, 0, bucket = AREF (collection, idx); } - if (HASH_TABLE_P (collection)) - hash_rehash_if_needed (XHASH_TABLE (collection)); - while (1) { /* Get the next element of the alist, obarray, or hash-table. */ diff --git a/src/pdumper.c b/src/pdumper.c index 03c00bf27b..d35d296d32 100644 --- a/src/pdumper.c +++ b/src/pdumper.c @@ -107,17 +107,6 @@ #define VM_MS_WINDOWS 2 #define DANGEROUS 0 -/* PDUMPER_CHECK_REHASHING being true causes the portable dumper to - check, for each hash table it dumps, that the hash table means the - same thing after rehashing. */ -#ifndef PDUMPER_CHECK_REHASHING -# if ENABLE_CHECKING -# define PDUMPER_CHECK_REHASHING 1 -# else -# define PDUMPER_CHECK_REHASHING 0 -# endif -#endif - /* We require an architecture in which all pointers are the same size and have the same layout, where pointers are either 32 or 64 bits long, and where bytes have eight bits --- that is, a @@ -393,6 +382,8 @@ dump_fingerprint (char const *label, The start of the cold region is always aligned on a page boundary. */ dump_off cold_start; + + dump_off hash_list; }; /* Double-ended singly linked list. */ @@ -550,6 +541,8 @@ dump_fingerprint (char const *label, heap objects. */ Lisp_Object bignum_data; + Lisp_Object hash_tables; + unsigned number_hot_relocations; unsigned number_discardable_relocations; }; @@ -2622,68 +2615,64 @@ dump_vectorlike_generic (struct dump_context *ctx, return offset; } -/* Determine whether the hash table's hash order is stable - across dump and load. If it is, we don't have to trigger - a rehash on access. */ -static bool -dump_hash_table_stable_p (const struct Lisp_Hash_Table *hash) +/* Return a list of (KEY . VALUE) pairs in the given hash table. */ +static Lisp_Object +hash_table_contents (struct Lisp_Hash_Table *h) { - bool is_eql = hash->test.hashfn == hashfn_eql; - bool is_equal = hash->test.hashfn == hashfn_equal; - ptrdiff_t size = HASH_TABLE_SIZE (hash); - for (ptrdiff_t i = 0; i < size; ++i) - if (!NILP (HASH_HASH (hash, i))) + Lisp_Object contents = Qnil; + /* Make sure key_and_value ends up in the same order, charset.c + relies on it by expecting hash table indices to stay constant + across the dump. */ + for (ptrdiff_t i = HASH_TABLE_SIZE (h) - 1; i >= 0; --i) + if (!NILP (HASH_HASH (h, i))) { - Lisp_Object key = HASH_KEY (hash, i); - bool key_stable = (dump_builtin_symbol_p (key) - || FIXNUMP (key) - || (is_equal && STRINGP (key)) - || ((is_equal || is_eql) && FLOATP (key))); - if (!key_stable) - return false; + dump_push (&contents, HASH_VALUE (h, i)); + dump_push (&contents, HASH_KEY (h, i)); } + return CALLN (Fapply, Qvector, contents); +} - return true; +static dump_off +dump_hash_table_list (struct dump_context *ctx) +{ + if (CONSP (ctx->hash_tables)) + return dump_object (ctx, CALLN (Fapply, Qvector, ctx->hash_tables)); + else + return 0; } -/* Return a list of (KEY . VALUE) pairs in the given hash table. */ -static Lisp_Object -hash_table_contents (Lisp_Object table) +static void +hash_table_freeze (struct Lisp_Hash_Table *h) { - Lisp_Object contents = Qnil; - struct Lisp_Hash_Table *h = XHASH_TABLE (table); - for (ptrdiff_t i = 0; i < HASH_TABLE_SIZE (h); ++i) - if (!NILP (HASH_HASH (h, i))) - dump_push (&contents, Fcons (HASH_KEY (h, i), HASH_VALUE (h, i))); - return Fnreverse (contents); + h->key_and_value = hash_table_contents (h); + ptrdiff_t nkeys = XFIXNAT (Flength (h->key_and_value)) / 2; + h->count = nkeys; + if (nkeys == 0) + nkeys = 1; + h->index = h->next = h->hash = make_fixnum (nkeys); } -/* Copy the given hash table, rehash it, and make sure that we can - look up all the values in the original. */ static void -check_hash_table_rehash (Lisp_Object table_orig) -{ - hash_rehash_if_needed (XHASH_TABLE (table_orig)); - Lisp_Object table_rehashed = Fcopy_hash_table (table_orig); - eassert (XHASH_TABLE (table_rehashed)->count >= 0); - XHASH_TABLE (table_rehashed)->count *= -1; - eassert (XHASH_TABLE (table_rehashed)->count <= 0); - hash_rehash_if_needed (XHASH_TABLE (table_rehashed)); - eassert (XHASH_TABLE (table_rehashed)->count >= 0); - Lisp_Object expected_contents = hash_table_contents (table_orig); - while (!NILP (expected_contents)) +hash_table_thaw (struct Lisp_Hash_Table *h) +{ + Lisp_Object count = h->index; + h->index = Fmake_vector (h->index, make_fixnum (-1)); + h->hash = Fmake_vector (h->hash, Qnil); + h->next = Fmake_vector (h->next, make_fixnum (-1)); + Lisp_Object key_and_value = h->key_and_value; + h->next_free = -1; + if (XFIXNAT (count) <= 1) { - Lisp_Object key_value_pair = dump_pop (&expected_contents); - Lisp_Object key = XCAR (key_value_pair); - Lisp_Object expected_value = XCDR (key_value_pair); - Lisp_Object arbitrary = Qdump_emacs_portable__sort_predicate_copied; - Lisp_Object found_value = Fgethash (key, table_rehashed, arbitrary); - eassert (EQ (expected_value, found_value)); - Fremhash (key, table_rehashed); + h->key_and_value = Fmake_vector (make_fixnum (2 * XFIXNAT (count)), Qnil); + ptrdiff_t i = 0; + while (i < ASIZE (key_and_value)) + { + ASET (h->key_and_value, i, AREF (key_and_value, i)); + i++; + } } - eassert (EQ (Fhash_table_count (table_rehashed), - make_fixnum (0))); + hash_table_rehash (h); } static dump_off @@ -2695,45 +2684,11 @@ dump_hash_table (struct dump_context *ctx, # error "Lisp_Hash_Table changed. See CHECK_STRUCTS comment in config.h." #endif const struct Lisp_Hash_Table *hash_in = XHASH_TABLE (object); - bool is_stable = dump_hash_table_stable_p (hash_in); - /* If the hash table is likely to be modified in memory (either - because we need to rehash, and thus toggle hash->count, or - because we need to assemble a list of weak tables) punt the hash - table to the end of the dump, where we can lump all such hash - tables together. */ - if (!(is_stable || !NILP (hash_in->weak)) - && ctx->flags.defer_hash_tables) - { - if (offset != DUMP_OBJECT_ON_HASH_TABLE_QUEUE) - { - eassert (offset == DUMP_OBJECT_ON_NORMAL_QUEUE - || offset == DUMP_OBJECT_NOT_SEEN); - /* We still want to dump the actual keys and values now. */ - dump_enqueue_object (ctx, hash_in->key_and_value, WEIGHT_NONE); - /* We'll get to the rest later. */ - offset = DUMP_OBJECT_ON_HASH_TABLE_QUEUE; - dump_remember_object (ctx, object, offset); - dump_push (&ctx->deferred_hash_tables, object); - } - return offset; - } - - if (PDUMPER_CHECK_REHASHING) - check_hash_table_rehash (make_lisp_ptr ((void *) hash_in, Lisp_Vectorlike)); - struct Lisp_Hash_Table hash_munged = *hash_in; struct Lisp_Hash_Table *hash = &hash_munged; - /* Remember to rehash this hash table on first access. After a - dump reload, the hash table values will have changed, so we'll - need to rebuild the index. - - TODO: for EQ and EQL hash tables, it should be possible to rehash - here using the preferred load address of the dump, eliminating - the need to rehash-on-access if we can load the dump where we - want. */ - if (hash->count > 0 && !is_stable) - hash->count = -hash->count; + hash_table_freeze (hash); + dump_push (&ctx->hash_tables, object); START_DUMP_PVEC (ctx, &hash->header, struct Lisp_Hash_Table, out); dump_pseudovector_lisp_fields (ctx, &out->header, &hash->header); @@ -4140,6 +4095,19 @@ DEFUN ("dump-emacs-portable", || !NILP (ctx->deferred_hash_tables) || !NILP (ctx->deferred_symbols)); + ctx->header.hash_list = ctx->offset; + dump_hash_table_list (ctx); + + do + { + dump_drain_deferred_hash_tables (ctx); + dump_drain_deferred_symbols (ctx); + dump_drain_normal_queue (ctx); + } + while (!dump_queue_empty_p (&ctx->dump_queue) + || !NILP (ctx->deferred_hash_tables) + || !NILP (ctx->deferred_symbols)); + dump_sort_copied_objects (ctx); /* While we copy built-in symbols into the Emacs image, these @@ -5431,6 +5399,13 @@ pdumper_load (const char *dump_filename) for (int i = 0; i < ARRAYELTS (sections); ++i) dump_mmap_reset (§ions[i]); + if (header->hash_list) + { + struct Lisp_Vector *hash_tables = + ((struct Lisp_Vector *)(dump_base + header->hash_list)); + XSETVECTOR (Vpdumper_hash_tables, hash_tables); + } + /* Run the functions Emacs registered for doing post-dump-load initialization. */ for (int i = 0; i < nr_dump_hooks; ++i) @@ -5502,10 +5477,31 @@ DEFUN ("pdumper-stats", Fpdumper_stats, Spdumper_stats, 0, 0, 0, +static void thaw_hash_tables (void) +{ + Lisp_Object hash_tables = Vpdumper_hash_tables; + ptrdiff_t i = 0; + while (i < ASIZE (hash_tables)) + { + hash_table_thaw (XHASH_TABLE (AREF (hash_tables, i))); + i++; + } + Vpdumper_hash_tables = zero_vector; +} + +void +init_pdumper_once (void) +{ + Vpdumper_hash_tables = zero_vector; + pdumper_do_now_and_after_load (thaw_hash_tables); +} + void syms_of_pdumper (void) { #ifdef HAVE_PDUMPER + DEFVAR_LISP ("pdumper-hash-tables", Vpdumper_hash_tables, + doc: /* A list of hash tables that need to be thawed after loading the pdump. */); defsubr (&Sdump_emacs_portable); defsubr (&Sdump_emacs_portable__sort_predicate); defsubr (&Sdump_emacs_portable__sort_predicate_copied); diff --git a/src/pdumper.h b/src/pdumper.h index ab2f426c1e..cfea06d33d 100644 --- a/src/pdumper.h +++ b/src/pdumper.h @@ -248,6 +248,7 @@ pdumper_clear_marks (void) file was loaded. */ extern void pdumper_record_wd (const char *); +void init_pdumper_once (void); void syms_of_pdumper (void); INLINE_HEADER_END -- 2.17.1