From 2db3964f2d8290b905b2d20e2a62fb073fec1382 Mon Sep 17 00:00:00 2001 From: Pip Cet Date: Thu, 11 Jul 2019 12:06:59 +0000 Subject: [PATCH] Rehash hash tables eagerly after loading a portable dump * src/lisp.h (hash_rehash_needed_p): Remove. All uses removed. (hash_rehash_if_needed): Remove. All uses removed. (struct Lisp_Hash_Table): Remove comment about rehashing hash tables. * src/pdumper.c (thaw_hash_tables): New function. (hash_table_thaw): New function. (hash_table_freeze): New function. (dump_hash_table): Simplify. (dump_hash_table_list): New function. (hash_table_contents): New function. (Fdump_emacs_portable): Handle hash tables by eager rehashing. (pdumper_load): Restore hash tables. (init_pdumper_once): New function. --- src/bytecode.c | 1 - src/composite.c | 1 - src/emacs.c | 1 + src/fns.c | 53 ++++----------- src/lisp.h | 19 +----- src/minibuf.c | 3 - src/pdumper.c | 177 +++++++++++++++++++++++++----------------------- src/pdumper.h | 1 + 8 files changed, 108 insertions(+), 148 deletions(-) diff --git a/src/bytecode.c b/src/bytecode.c index 29dff44f00..9c72429e0c 100644 --- a/src/bytecode.c +++ b/src/bytecode.c @@ -1402,7 +1402,6 @@ #define DEFINE(name, value) LABEL (name) , Lisp_Object v1 = POP; ptrdiff_t i; struct Lisp_Hash_Table *h = XHASH_TABLE (jmp_table); - hash_rehash_if_needed (h); /* h->count is a faster approximation for HASH_TABLE_SIZE (h) here. */ diff --git a/src/composite.c b/src/composite.c index 183062de46..49a285cff0 100644 --- a/src/composite.c +++ b/src/composite.c @@ -654,7 +654,6 @@ gstring_lookup_cache (Lisp_Object header) composition_gstring_put_cache (Lisp_Object gstring, ptrdiff_t len) { struct Lisp_Hash_Table *h = XHASH_TABLE (gstring_hash_table); - hash_rehash_if_needed (h); Lisp_Object header = LGSTRING_HEADER (gstring); EMACS_UINT hash = h->test.hashfn (&h->test, header); if (len < 0) diff --git a/src/emacs.c b/src/emacs.c index 9c93748a0f..136236eb35 100644 --- a/src/emacs.c +++ b/src/emacs.c @@ -1560,6 +1560,7 @@ main (int argc, char **argv) if (!initialized) { init_alloc_once (); + init_pdumper_once (); init_obarray_once (); init_eval_once (); init_charset_once (); diff --git a/src/fns.c b/src/fns.c index 7343556ac2..420d898b26 100644 --- a/src/fns.c +++ b/src/fns.c @@ -4224,43 +4224,24 @@ hash_table_rehash (struct Lisp_Hash_Table *h) { ptrdiff_t size = HASH_TABLE_SIZE (h); - /* These structures may have been purecopied and shared - (bug#36447). */ - h->next = Fcopy_sequence (h->next); - h->index = Fcopy_sequence (h->index); - h->hash = Fcopy_sequence (h->hash); - /* Recompute the actual hash codes for each entry in the table. Order is still invalid. */ for (ptrdiff_t i = 0; i < size; ++i) - if (!NILP (HASH_HASH (h, i))) - { - Lisp_Object key = HASH_KEY (h, i); - EMACS_UINT hash_code = h->test.hashfn (&h->test, key); - set_hash_hash_slot (h, i, make_fixnum (hash_code)); - } - - /* Reset the index so that any slot we don't fill below is marked - invalid. */ - Ffillarray (h->index, make_fixnum (-1)); + { + Lisp_Object key = HASH_KEY (h, i); + EMACS_UINT hash_code = h->test.hashfn (&h->test, key); + set_hash_hash_slot (h, i, make_fixnum (hash_code)); + } /* Rebuild the collision chains. */ for (ptrdiff_t i = 0; i < size; ++i) - if (!NILP (HASH_HASH (h, i))) - { - EMACS_UINT hash_code = XUFIXNUM (HASH_HASH (h, i)); - ptrdiff_t start_of_bucket = hash_code % ASIZE (h->index); - set_hash_next_slot (h, i, HASH_INDEX (h, start_of_bucket)); - set_hash_index_slot (h, start_of_bucket, i); - eassert (HASH_NEXT (h, i) != i); /* Stop loops. */ - } - - /* Finally, mark the hash table as having a valid hash order. - Do this last so that if we're interrupted, we retry on next - access. */ - eassert (h->count < 0); - h->count = -h->count; - eassert (!hash_rehash_needed_p (h)); + { + EMACS_UINT hash_code = XUFIXNUM (HASH_HASH (h, i)); + ptrdiff_t start_of_bucket = hash_code % ASIZE (h->index); + set_hash_next_slot (h, i, HASH_INDEX (h, start_of_bucket)); + set_hash_index_slot (h, start_of_bucket, i); + eassert (HASH_NEXT (h, i) != i); /* Stop loops. */ + } } /* Lookup KEY in hash table H. If HASH is non-null, return in *HASH @@ -4273,8 +4254,6 @@ hash_lookup (struct Lisp_Hash_Table *h, Lisp_Object key, EMACS_UINT *hash) EMACS_UINT hash_code; ptrdiff_t start_of_bucket, i; - hash_rehash_if_needed (h); - hash_code = h->test.hashfn (&h->test, key); eassert ((hash_code & ~INTMASK) == 0); if (hash) @@ -4303,8 +4282,6 @@ hash_put (struct Lisp_Hash_Table *h, Lisp_Object key, Lisp_Object value, { ptrdiff_t start_of_bucket, i; - hash_rehash_if_needed (h); - eassert ((hash & ~INTMASK) == 0); /* Increment count after resizing because resizing may fail. */ @@ -4338,8 +4315,6 @@ hash_remove_from_table (struct Lisp_Hash_Table *h, Lisp_Object key) ptrdiff_t start_of_bucket = hash_code % ASIZE (h->index); ptrdiff_t prev = -1; - hash_rehash_if_needed (h); - for (ptrdiff_t i = HASH_INDEX (h, start_of_bucket); 0 <= i; i = HASH_NEXT (h, i)) @@ -4417,9 +4392,7 @@ sweep_weak_table (struct Lisp_Hash_Table *h, bool remove_entries_p) for (ptrdiff_t bucket = 0; bucket < n; ++bucket) { /* Follow collision chain, removing entries that don't survive - this garbage collection. It's okay if hash_rehash_needed_p - (h) is true, since we're operating entirely on the cached - hash values. */ + this garbage collection. */ ptrdiff_t prev = -1; ptrdiff_t next; for (ptrdiff_t i = HASH_INDEX (h, bucket); 0 <= i; i = next) diff --git a/src/lisp.h b/src/lisp.h index fa57cad8a6..cc0e1bce51 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -2236,11 +2236,7 @@ #define DEFSYM(sym, name) /* empty */ struct Lisp_Hash_Table { - /* Change pdumper.c if you change the fields here. - - IMPORTANT!!!!!!! - - Call hash_rehash_if_needed() before accessing. */ + /* Change pdumper.c if you change the fields here. */ /* This is for Lisp; the hash table code does not refer to it. */ union vectorlike_header header; @@ -2353,19 +2349,6 @@ HASH_TABLE_SIZE (const struct Lisp_Hash_Table *h) void hash_table_rehash (struct Lisp_Hash_Table *h); -INLINE bool -hash_rehash_needed_p (const struct Lisp_Hash_Table *h) -{ - return h->count < 0; -} - -INLINE void -hash_rehash_if_needed (struct Lisp_Hash_Table *h) -{ - if (hash_rehash_needed_p (h)) - hash_table_rehash (h); -} - /* Default size for hash tables if not specified. */ enum DEFAULT_HASH_SIZE { DEFAULT_HASH_SIZE = 65 }; diff --git a/src/minibuf.c b/src/minibuf.c index d9a6e15b05..e923ce2a43 100644 --- a/src/minibuf.c +++ b/src/minibuf.c @@ -1203,9 +1203,6 @@ DEFUN ("try-completion", Ftry_completion, Stry_completion, 2, 3, 0, bucket = AREF (collection, idx); } - if (HASH_TABLE_P (collection)) - hash_rehash_if_needed (XHASH_TABLE (collection)); - while (1) { /* Get the next element of the alist, obarray, or hash-table. */ diff --git a/src/pdumper.c b/src/pdumper.c index 3d8531c6a4..cfdeb5ec9b 100644 --- a/src/pdumper.c +++ b/src/pdumper.c @@ -392,6 +392,8 @@ dump_fingerprint (const char *label, unsigned char const *xfingerprint) The start of the cold region is always aligned on a page boundary. */ dump_off cold_start; + + dump_off hash_list; }; /* Double-ended singly linked list. */ @@ -549,6 +551,8 @@ dump_fingerprint (const char *label, unsigned char const *xfingerprint) heap objects. */ Lisp_Object bignum_data; + Lisp_Object hash_tables; + unsigned number_hot_relocations; unsigned number_discardable_relocations; }; @@ -2621,68 +2625,64 @@ dump_vectorlike_generic (struct dump_context *ctx, return offset; } -/* Determine whether the hash table's hash order is stable - across dump and load. If it is, we don't have to trigger - a rehash on access. */ -static bool -dump_hash_table_stable_p (const struct Lisp_Hash_Table *hash) +/* Return a list of (KEY . VALUE) pairs in the given hash table. */ +static Lisp_Object +hash_table_contents (struct Lisp_Hash_Table *h) { - bool is_eql = hash->test.hashfn == hashfn_eql; - bool is_equal = hash->test.hashfn == hashfn_equal; - ptrdiff_t size = HASH_TABLE_SIZE (hash); - for (ptrdiff_t i = 0; i < size; ++i) - if (!NILP (HASH_HASH (hash, i))) + Lisp_Object contents = Qnil; + /* Make sure key_and_value ends up in the same order, charset.c + relies on it by expecting hash table indices to stay constant + across the dump. */ + for (ptrdiff_t i = HASH_TABLE_SIZE (h) - 1; i >= 0; --i) + if (!NILP (HASH_HASH (h, i))) { - Lisp_Object key = HASH_KEY (hash, i); - bool key_stable = (dump_builtin_symbol_p (key) - || FIXNUMP (key) - || (is_equal && STRINGP (key)) - || ((is_equal || is_eql) && FLOATP (key))); - if (!key_stable) - return false; + dump_push (&contents, HASH_VALUE (h, i)); + dump_push (&contents, HASH_KEY (h, i)); } + return CALLN (Fapply, Qvector, contents); +} - return true; +static dump_off +dump_hash_table_list (struct dump_context *ctx) +{ + if (CONSP (ctx->hash_tables)) + return dump_object (ctx, CALLN (Fapply, Qvector, ctx->hash_tables)); + else + return 0; } -/* Return a list of (KEY . VALUE) pairs in the given hash table. */ -static Lisp_Object -hash_table_contents (Lisp_Object table) +static void +hash_table_freeze (struct Lisp_Hash_Table *h) { - Lisp_Object contents = Qnil; - struct Lisp_Hash_Table *h = XHASH_TABLE (table); - for (ptrdiff_t i = 0; i < HASH_TABLE_SIZE (h); ++i) - if (!NILP (HASH_HASH (h, i))) - dump_push (&contents, Fcons (HASH_KEY (h, i), HASH_VALUE (h, i))); - return Fnreverse (contents); + h->key_and_value = hash_table_contents (h); + ptrdiff_t nkeys = XFIXNAT (Flength (h->key_and_value)) / 2; + h->count = nkeys; + if (nkeys == 0) + nkeys = 1; + h->index = h->next = h->hash = make_fixnum (nkeys); } -/* Copy the given hash table, rehash it, and make sure that we can - look up all the values in the original. */ static void -check_hash_table_rehash (Lisp_Object table_orig) -{ - hash_rehash_if_needed (XHASH_TABLE (table_orig)); - Lisp_Object table_rehashed = Fcopy_hash_table (table_orig); - eassert (XHASH_TABLE (table_rehashed)->count >= 0); - XHASH_TABLE (table_rehashed)->count *= -1; - eassert (XHASH_TABLE (table_rehashed)->count <= 0); - hash_rehash_if_needed (XHASH_TABLE (table_rehashed)); - eassert (XHASH_TABLE (table_rehashed)->count >= 0); - Lisp_Object expected_contents = hash_table_contents (table_orig); - while (!NILP (expected_contents)) +hash_table_thaw (struct Lisp_Hash_Table *h) +{ + Lisp_Object count = h->index; + h->index = Fmake_vector (h->index, make_fixnum (-1)); + h->hash = Fmake_vector (h->hash, Qnil); + h->next = Fmake_vector (h->next, make_fixnum (-1)); + Lisp_Object key_and_value = h->key_and_value; + h->next_free = -1; + if (XFIXNAT (count) <= 1) { - Lisp_Object key_value_pair = dump_pop (&expected_contents); - Lisp_Object key = XCAR (key_value_pair); - Lisp_Object expected_value = XCDR (key_value_pair); - Lisp_Object arbitrary = Qdump_emacs_portable__sort_predicate_copied; - Lisp_Object found_value = Fgethash (key, table_rehashed, arbitrary); - eassert (EQ (expected_value, found_value)); - Fremhash (key, table_rehashed); + h->key_and_value = Fmake_vector (make_fixnum (2 * XFIXNAT (count)), Qnil); + ptrdiff_t i = 0; + while (i < ASIZE (key_and_value)) + { + ASET (h->key_and_value, i, AREF (key_and_value, i)); + i++; + } } - eassert (EQ (Fhash_table_count (table_rehashed), - make_fixnum (0))); + hash_table_rehash (h); } static dump_off @@ -2694,45 +2694,11 @@ dump_hash_table (struct dump_context *ctx, # error "Lisp_Hash_Table changed. See CHECK_STRUCTS comment in config.h." #endif const struct Lisp_Hash_Table *hash_in = XHASH_TABLE (object); - bool is_stable = dump_hash_table_stable_p (hash_in); - /* If the hash table is likely to be modified in memory (either - because we need to rehash, and thus toggle hash->count, or - because we need to assemble a list of weak tables) punt the hash - table to the end of the dump, where we can lump all such hash - tables together. */ - if (!(is_stable || !NILP (hash_in->weak)) - && ctx->flags.defer_hash_tables) - { - if (offset != DUMP_OBJECT_ON_HASH_TABLE_QUEUE) - { - eassert (offset == DUMP_OBJECT_ON_NORMAL_QUEUE - || offset == DUMP_OBJECT_NOT_SEEN); - /* We still want to dump the actual keys and values now. */ - dump_enqueue_object (ctx, hash_in->key_and_value, WEIGHT_NONE); - /* We'll get to the rest later. */ - offset = DUMP_OBJECT_ON_HASH_TABLE_QUEUE; - dump_remember_object (ctx, object, offset); - dump_push (&ctx->deferred_hash_tables, object); - } - return offset; - } - - if (PDUMPER_CHECK_REHASHING) - check_hash_table_rehash (make_lisp_ptr ((void *) hash_in, Lisp_Vectorlike)); - struct Lisp_Hash_Table hash_munged = *hash_in; struct Lisp_Hash_Table *hash = &hash_munged; - /* Remember to rehash this hash table on first access. After a - dump reload, the hash table values will have changed, so we'll - need to rebuild the index. - - TODO: for EQ and EQL hash tables, it should be possible to rehash - here using the preferred load address of the dump, eliminating - the need to rehash-on-access if we can load the dump where we - want. */ - if (hash->count > 0 && !is_stable) - hash->count = -hash->count; + hash_table_freeze (hash); + dump_push (&ctx->hash_tables, object); START_DUMP_PVEC (ctx, &hash->header, struct Lisp_Hash_Table, out); dump_pseudovector_lisp_fields (ctx, &out->header, &hash->header); @@ -4142,6 +4108,19 @@ DEFUN ("dump-emacs-portable", || !NILP (ctx->deferred_hash_tables) || !NILP (ctx->deferred_symbols)); + ctx->header.hash_list = ctx->offset; + dump_hash_table_list (ctx); + + do + { + dump_drain_deferred_hash_tables (ctx); + dump_drain_deferred_symbols (ctx); + dump_drain_normal_queue (ctx); + } + while (!dump_queue_empty_p (&ctx->dump_queue) + || !NILP (ctx->deferred_hash_tables) + || !NILP (ctx->deferred_symbols)); + dump_sort_copied_objects (ctx); /* While we copy built-in symbols into the Emacs image, these @@ -5433,6 +5412,13 @@ pdumper_load (const char *dump_filename) for (int i = 0; i < ARRAYELTS (sections); ++i) dump_mmap_reset (§ions[i]); + if (header->hash_list) + { + struct Lisp_Vector *hash_tables = + ((struct Lisp_Vector *)(dump_base + header->hash_list)); + XSETVECTOR (Vpdumper_hash_tables, hash_tables); + } + /* Run the functions Emacs registered for doing post-dump-load initialization. */ for (int i = 0; i < nr_dump_hooks; ++i) @@ -5504,10 +5490,31 @@ DEFUN ("pdumper-stats", Fpdumper_stats, Spdumper_stats, 0, 0, 0, +static void thaw_hash_tables (void) +{ + Lisp_Object hash_tables = Vpdumper_hash_tables; + ptrdiff_t i = 0; + while (i < ASIZE (hash_tables)) + { + hash_table_thaw (XHASH_TABLE (AREF (hash_tables, i))); + i++; + } + Vpdumper_hash_tables = zero_vector; +} + +void +init_pdumper_once (void) +{ + Vpdumper_hash_tables = zero_vector; + pdumper_do_now_and_after_load (thaw_hash_tables); +} + void syms_of_pdumper (void) { #ifdef HAVE_PDUMPER + DEFVAR_LISP ("pdumper-hash-tables", Vpdumper_hash_tables, + doc: /* A list of hash tables that need to be thawed after loading the pdump. */); defsubr (&Sdump_emacs_portable); defsubr (&Sdump_emacs_portable__sort_predicate); defsubr (&Sdump_emacs_portable__sort_predicate_copied); diff --git a/src/pdumper.h b/src/pdumper.h index ab2f426c1e..cfea06d33d 100644 --- a/src/pdumper.h +++ b/src/pdumper.h @@ -248,6 +248,7 @@ pdumper_clear_marks (void) file was loaded. */ extern void pdumper_record_wd (const char *); +void init_pdumper_once (void); void syms_of_pdumper (void); INLINE_HEADER_END -- 2.20.1