bug-gnu-emacs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

bug#36597: 27.0.50; rehash hash tables eagerly in pdumper


From: Eli Zaretskii
Subject: bug#36597: 27.0.50; rehash hash tables eagerly in pdumper
Date: Thu, 18 Jul 2019 08:39:20 +0300

> From: Paul Eggert <eggert@cs.ucla.edu>
> Date: Sun, 14 Jul 2019 07:39:08 -0700
> Cc: 36597@debbugs.gnu.org
> 
> Although I like the simplicity of eager rehashing, I'm not yet sold on the 
> performance implications. On my usual (cd lisp && make compile-always) 
> benchmark, the patch hurt user+system CPU time performance by 1.5%. 
> Admittedly 
> just one benchmark, but still....
> 
> Also, must we expose Vpdumper_hash_tables to Lisp? Surely it'd be better to 
> keep 
> it private to pdumper.c.
> 
> I'll CC: to Daniel to see whether he has any insights about improvements in 
> this 
> area.

You didn't CC Daniel, AFAICT, so I did that now.

> PS. I ran that benchmark on my home desktop, an Intel Xeon E3-1225 v2 running 
> Ubunto 18.04.2. To run it, I rebased your patch and also removed the 
> no-longer-used PDUMPER_CHECK_REHASHING macro that my GCC complained about 
> (wonder why that didn't happen for you?), resulting in the attached patch 
> against current master 8ff09154a29a1151afb2902267ca35f89ebda73c.
> 
> >From 96a36697b2f472f3a6b3138444659babd0d73f32 Mon Sep 17 00:00:00 2001
> From: Pip Cet <pipcet@gmail.com>
> Date: Sun, 14 Jul 2019 07:22:01 -0700
> Subject: [PATCH] Rehash hash tables eagerly after loading a portable dump
> 
> * src/lisp.h (hash_rehash_needed_p): Remove.  All uses removed.
> (hash_rehash_if_needed): Remove.  All uses removed.
> (struct Lisp_Hash_Table): Remove comment about rehashing hash tables.
> * src/pdumper.c (thaw_hash_tables): New function.
> (hash_table_thaw): New function.
> (hash_table_freeze): New function.
> (dump_hash_table): Simplify.
> (dump_hash_table_list): New function.
> (hash_table_contents): New function.
> (Fdump_emacs_portable): Handle hash tables by eager rehashing.
> (pdumper_load): Restore hash tables.
> (init_pdumper_once): New function.
> (PDUMPER_CHECK_REHASHING): Remove.
> ---
>  src/bytecode.c  |   1 -
>  src/composite.c |   1 -
>  src/emacs.c     |   1 +
>  src/fns.c       |  54 ++++----------
>  src/lisp.h      |  19 +----
>  src/minibuf.c   |   3 -
>  src/pdumper.c   | 188 ++++++++++++++++++++++++------------------------
>  src/pdumper.h   |   1 +
>  8 files changed, 108 insertions(+), 160 deletions(-)
> 
> diff --git a/src/bytecode.c b/src/bytecode.c
> index 29dff44f00..9c72429e0c 100644
> --- a/src/bytecode.c
> +++ b/src/bytecode.c
> @@ -1402,7 +1402,6 @@ #define DEFINE(name, value) LABEL (name) ,
>              Lisp_Object v1 = POP;
>              ptrdiff_t i;
>              struct Lisp_Hash_Table *h = XHASH_TABLE (jmp_table);
> -            hash_rehash_if_needed (h);
>  
>              /* h->count is a faster approximation for HASH_TABLE_SIZE (h)
>                 here. */
> diff --git a/src/composite.c b/src/composite.c
> index 183062de46..49a285cff0 100644
> --- a/src/composite.c
> +++ b/src/composite.c
> @@ -654,7 +654,6 @@ gstring_lookup_cache (Lisp_Object header)
>  composition_gstring_put_cache (Lisp_Object gstring, ptrdiff_t len)
>  {
>    struct Lisp_Hash_Table *h = XHASH_TABLE (gstring_hash_table);
> -  hash_rehash_if_needed (h);
>    Lisp_Object header = LGSTRING_HEADER (gstring);
>    EMACS_UINT hash = h->test.hashfn (&h->test, header);
>    if (len < 0)
> diff --git a/src/emacs.c b/src/emacs.c
> index ad661a081b..855b2c6715 100644
> --- a/src/emacs.c
> +++ b/src/emacs.c
> @@ -1560,6 +1560,7 @@ main (int argc, char **argv)
>    if (!initialized)
>      {
>        init_alloc_once ();
> +      init_pdumper_once ();
>        init_obarray_once ();
>        init_eval_once ();
>        init_charset_once ();
> diff --git a/src/fns.c b/src/fns.c
> index 0497588689..b6134a314c 100644
> --- a/src/fns.c
> +++ b/src/fns.c
> @@ -4241,43 +4241,24 @@ hash_table_rehash (struct Lisp_Hash_Table *h)
>  {
>    ptrdiff_t size = HASH_TABLE_SIZE (h);
>  
> -  /* These structures may have been purecopied and shared
> -     (bug#36447).  */
> -  h->next = Fcopy_sequence (h->next);
> -  h->index = Fcopy_sequence (h->index);
> -  h->hash = Fcopy_sequence (h->hash);
> -
>    /* Recompute the actual hash codes for each entry in the table.
>       Order is still invalid.  */
>    for (ptrdiff_t i = 0; i < size; ++i)
> -    if (!NILP (HASH_HASH (h, i)))
> -      {
> -        Lisp_Object key = HASH_KEY (h, i);
> -        EMACS_UINT hash_code = h->test.hashfn (&h->test, key);
> -        set_hash_hash_slot (h, i, make_fixnum (hash_code));
> -      }
> -
> -  /* Reset the index so that any slot we don't fill below is marked
> -     invalid.  */
> -  Ffillarray (h->index, make_fixnum (-1));
> +    {
> +      Lisp_Object key = HASH_KEY (h, i);
> +      EMACS_UINT hash_code = h->test.hashfn (&h->test, key);
> +      set_hash_hash_slot (h, i, make_fixnum (hash_code));
> +    }
>  
>    /* Rebuild the collision chains.  */
>    for (ptrdiff_t i = 0; i < size; ++i)
> -    if (!NILP (HASH_HASH (h, i)))
> -      {
> -        EMACS_UINT hash_code = XUFIXNUM (HASH_HASH (h, i));
> -        ptrdiff_t start_of_bucket = hash_code % ASIZE (h->index);
> -        set_hash_next_slot (h, i, HASH_INDEX (h, start_of_bucket));
> -        set_hash_index_slot (h, start_of_bucket, i);
> -        eassert (HASH_NEXT (h, i) != i); /* Stop loops.  */
> -      }
> -
> -  /* Finally, mark the hash table as having a valid hash order.
> -     Do this last so that if we're interrupted, we retry on next
> -     access. */
> -  eassert (h->count < 0);
> -  h->count = -h->count;
> -  eassert (!hash_rehash_needed_p (h));
> +    {
> +      EMACS_UINT hash_code = XUFIXNUM (HASH_HASH (h, i));
> +      ptrdiff_t start_of_bucket = hash_code % ASIZE (h->index);
> +      set_hash_next_slot (h, i, HASH_INDEX (h, start_of_bucket));
> +      set_hash_index_slot (h, start_of_bucket, i);
> +      eassert (HASH_NEXT (h, i) != i); /* Stop loops.  */
> +    }
>  }
>  
>  /* Lookup KEY in hash table H.  If HASH is non-null, return in *HASH
> @@ -4290,8 +4271,6 @@ hash_lookup (struct Lisp_Hash_Table *h, Lisp_Object 
> key, EMACS_UINT *hash)
>    EMACS_UINT hash_code;
>    ptrdiff_t start_of_bucket, i;
>  
> -  hash_rehash_if_needed (h);
> -
>    hash_code = h->test.hashfn (&h->test, key);
>    eassert ((hash_code & ~INTMASK) == 0);
>    if (hash)
> @@ -4320,8 +4299,6 @@ hash_put (struct Lisp_Hash_Table *h, Lisp_Object key, 
> Lisp_Object value,
>  {
>    ptrdiff_t start_of_bucket, i;
>  
> -  hash_rehash_if_needed (h);
> -
>    eassert ((hash & ~INTMASK) == 0);
>  
>    /* Increment count after resizing because resizing may fail.  */
> @@ -4355,8 +4332,6 @@ hash_remove_from_table (struct Lisp_Hash_Table *h, 
> Lisp_Object key)
>    ptrdiff_t start_of_bucket = hash_code % ASIZE (h->index);
>    ptrdiff_t prev = -1;
>  
> -  hash_rehash_if_needed (h);
> -
>    for (ptrdiff_t i = HASH_INDEX (h, start_of_bucket);
>         0 <= i;
>         i = HASH_NEXT (h, i))
> @@ -4434,9 +4409,7 @@ sweep_weak_table (struct Lisp_Hash_Table *h, bool 
> remove_entries_p)
>    for (ptrdiff_t bucket = 0; bucket < n; ++bucket)
>      {
>        /* Follow collision chain, removing entries that don't survive
> -         this garbage collection.  It's okay if hash_rehash_needed_p
> -         (h) is true, since we're operating entirely on the cached
> -         hash values. */
> +         this garbage collection.  */
>        ptrdiff_t prev = -1;
>        ptrdiff_t next;
>        for (ptrdiff_t i = HASH_INDEX (h, bucket); 0 <= i; i = next)
> @@ -4881,7 +4854,6 @@ DEFUN ("hash-table-count", Fhash_table_count, 
> Shash_table_count, 1, 1, 0,
>    (Lisp_Object table)
>  {
>    struct Lisp_Hash_Table *h = check_hash_table (table);
> -  hash_rehash_if_needed (h);
>    return make_fixnum (h->count);
>  }
>  
> diff --git a/src/lisp.h b/src/lisp.h
> index 13014c82dc..d0e5c43c41 100644
> --- a/src/lisp.h
> +++ b/src/lisp.h
> @@ -2245,11 +2245,7 @@ #define DEFSYM(sym, name) /* empty */
>  
>  struct Lisp_Hash_Table
>  {
> -  /* Change pdumper.c if you change the fields here.
> -
> -     IMPORTANT!!!!!!!
> -
> -     Call hash_rehash_if_needed() before accessing.  */
> +  /* Change pdumper.c if you change the fields here.  */
>  
>    /* This is for Lisp; the hash table code does not refer to it.  */
>    union vectorlike_header header;
> @@ -2363,19 +2359,6 @@ HASH_TABLE_SIZE (const struct Lisp_Hash_Table *h)
>  
>  void hash_table_rehash (struct Lisp_Hash_Table *h);
>  
> -INLINE bool
> -hash_rehash_needed_p (const struct Lisp_Hash_Table *h)
> -{
> -  return h->count < 0;
> -}
> -
> -INLINE void
> -hash_rehash_if_needed (struct Lisp_Hash_Table *h)
> -{
> -  if (hash_rehash_needed_p (h))
> -    hash_table_rehash (h);
> -}
> -
>  /* Default size for hash tables if not specified.  */
>  
>  enum DEFAULT_HASH_SIZE { DEFAULT_HASH_SIZE = 65 };
> diff --git a/src/minibuf.c b/src/minibuf.c
> index d9a6e15b05..e923ce2a43 100644
> --- a/src/minibuf.c
> +++ b/src/minibuf.c
> @@ -1203,9 +1203,6 @@ DEFUN ("try-completion", Ftry_completion, 
> Stry_completion, 2, 3, 0,
>        bucket = AREF (collection, idx);
>      }
>  
> -  if (HASH_TABLE_P (collection))
> -    hash_rehash_if_needed (XHASH_TABLE (collection));
> -
>    while (1)
>      {
>        /* Get the next element of the alist, obarray, or hash-table.  */
> diff --git a/src/pdumper.c b/src/pdumper.c
> index 03c00bf27b..d35d296d32 100644
> --- a/src/pdumper.c
> +++ b/src/pdumper.c
> @@ -107,17 +107,6 @@ #define VM_MS_WINDOWS 2
>  
>  #define DANGEROUS 0
>  
> -/* PDUMPER_CHECK_REHASHING being true causes the portable dumper to
> -   check, for each hash table it dumps, that the hash table means the
> -   same thing after rehashing.  */
> -#ifndef PDUMPER_CHECK_REHASHING
> -# if ENABLE_CHECKING
> -#  define PDUMPER_CHECK_REHASHING 1
> -# else
> -#  define PDUMPER_CHECK_REHASHING 0
> -# endif
> -#endif
> -
>  /* We require an architecture in which all pointers are the same size
>     and have the same layout, where pointers are either 32 or 64 bits
>     long, and where bytes have eight bits --- that is, a
> @@ -393,6 +382,8 @@ dump_fingerprint (char const *label,
>       The start of the cold region is always aligned on a page
>       boundary.  */
>    dump_off cold_start;
> +
> +  dump_off hash_list;
>  };
>  
>  /* Double-ended singly linked list.  */
> @@ -550,6 +541,8 @@ dump_fingerprint (char const *label,
>       heap objects.  */
>    Lisp_Object bignum_data;
>  
> +  Lisp_Object hash_tables;
> +
>    unsigned number_hot_relocations;
>    unsigned number_discardable_relocations;
>  };
> @@ -2622,68 +2615,64 @@ dump_vectorlike_generic (struct dump_context *ctx,
>    return offset;
>  }
>  
> -/* Determine whether the hash table's hash order is stable
> -   across dump and load.  If it is, we don't have to trigger
> -   a rehash on access.  */
> -static bool
> -dump_hash_table_stable_p (const struct Lisp_Hash_Table *hash)
> +/* Return a list of (KEY . VALUE) pairs in the given hash table.  */
> +static Lisp_Object
> +hash_table_contents (struct Lisp_Hash_Table *h)
>  {
> -  bool is_eql = hash->test.hashfn == hashfn_eql;
> -  bool is_equal = hash->test.hashfn == hashfn_equal;
> -  ptrdiff_t size = HASH_TABLE_SIZE (hash);
> -  for (ptrdiff_t i = 0; i < size; ++i)
> -    if (!NILP (HASH_HASH (hash, i)))
> +  Lisp_Object contents = Qnil;
> +  /* Make sure key_and_value ends up in the same order, charset.c
> +     relies on it by expecting hash table indices to stay constant
> +     across the dump.  */
> +  for (ptrdiff_t i = HASH_TABLE_SIZE (h) - 1; i >= 0; --i)
> +    if (!NILP (HASH_HASH (h, i)))
>        {
> -        Lisp_Object key =  HASH_KEY (hash, i);
> -     bool key_stable = (dump_builtin_symbol_p (key)
> -                        || FIXNUMP (key)
> -                        || (is_equal && STRINGP (key))
> -                        || ((is_equal || is_eql) && FLOATP (key)));
> -        if (!key_stable)
> -          return false;
> +     dump_push (&contents, HASH_VALUE (h, i));
> +     dump_push (&contents, HASH_KEY (h, i));
>        }
> +  return CALLN (Fapply, Qvector, contents);
> +}
>  
> -  return true;
> +static dump_off
> +dump_hash_table_list (struct dump_context *ctx)
> +{
> +  if (CONSP (ctx->hash_tables))
> +    return dump_object (ctx, CALLN (Fapply, Qvector, ctx->hash_tables));
> +  else
> +    return 0;
>  }
>  
> -/* Return a list of (KEY . VALUE) pairs in the given hash table.  */
> -static Lisp_Object
> -hash_table_contents (Lisp_Object table)
> +static void
> +hash_table_freeze (struct Lisp_Hash_Table *h)
>  {
> -  Lisp_Object contents = Qnil;
> -  struct Lisp_Hash_Table *h = XHASH_TABLE (table);
> -  for (ptrdiff_t i = 0; i < HASH_TABLE_SIZE (h); ++i)
> -    if (!NILP (HASH_HASH (h, i)))
> -      dump_push (&contents, Fcons (HASH_KEY (h, i), HASH_VALUE (h, i)));
> -  return Fnreverse (contents);
> +  h->key_and_value = hash_table_contents (h);
> +  ptrdiff_t nkeys = XFIXNAT (Flength (h->key_and_value)) / 2;
> +  h->count = nkeys;
> +  if (nkeys == 0)
> +    nkeys = 1;
> +  h->index = h->next = h->hash = make_fixnum (nkeys);
>  }
>  
> -/* Copy the given hash table, rehash it, and make sure that we can
> -   look up all the values in the original.  */
>  static void
> -check_hash_table_rehash (Lisp_Object table_orig)
> -{
> -  hash_rehash_if_needed (XHASH_TABLE (table_orig));
> -  Lisp_Object table_rehashed = Fcopy_hash_table (table_orig);
> -  eassert (XHASH_TABLE (table_rehashed)->count >= 0);
> -  XHASH_TABLE (table_rehashed)->count *= -1;
> -  eassert (XHASH_TABLE (table_rehashed)->count <= 0);
> -  hash_rehash_if_needed (XHASH_TABLE (table_rehashed));
> -  eassert (XHASH_TABLE (table_rehashed)->count >= 0);
> -  Lisp_Object expected_contents = hash_table_contents (table_orig);
> -  while (!NILP (expected_contents))
> +hash_table_thaw (struct Lisp_Hash_Table *h)
> +{
> +  Lisp_Object count = h->index;
> +  h->index = Fmake_vector (h->index, make_fixnum (-1));
> +  h->hash = Fmake_vector (h->hash, Qnil);
> +  h->next = Fmake_vector (h->next, make_fixnum (-1));
> +  Lisp_Object key_and_value = h->key_and_value;
> +  h->next_free = -1;
> +  if (XFIXNAT (count) <= 1)
>      {
> -      Lisp_Object key_value_pair = dump_pop (&expected_contents);
> -      Lisp_Object key = XCAR (key_value_pair);
> -      Lisp_Object expected_value = XCDR (key_value_pair);
> -      Lisp_Object arbitrary = Qdump_emacs_portable__sort_predicate_copied;
> -      Lisp_Object found_value = Fgethash (key, table_rehashed, arbitrary);
> -      eassert (EQ (expected_value, found_value));
> -      Fremhash (key, table_rehashed);
> +      h->key_and_value = Fmake_vector (make_fixnum (2 * XFIXNAT (count)), 
> Qnil);
> +      ptrdiff_t i = 0;
> +      while (i < ASIZE (key_and_value))
> +     {
> +       ASET (h->key_and_value, i, AREF (key_and_value, i));
> +       i++;
> +     }
>      }
>  
> -  eassert (EQ (Fhash_table_count (table_rehashed),
> -               make_fixnum (0)));
> +  hash_table_rehash (h);
>  }
>  
>  static dump_off
> @@ -2695,45 +2684,11 @@ dump_hash_table (struct dump_context *ctx,
>  # error "Lisp_Hash_Table changed. See CHECK_STRUCTS comment in config.h."
>  #endif
>    const struct Lisp_Hash_Table *hash_in = XHASH_TABLE (object);
> -  bool is_stable = dump_hash_table_stable_p (hash_in);
> -  /* If the hash table is likely to be modified in memory (either
> -     because we need to rehash, and thus toggle hash->count, or
> -     because we need to assemble a list of weak tables) punt the hash
> -     table to the end of the dump, where we can lump all such hash
> -     tables together.  */
> -  if (!(is_stable || !NILP (hash_in->weak))
> -      && ctx->flags.defer_hash_tables)
> -    {
> -      if (offset != DUMP_OBJECT_ON_HASH_TABLE_QUEUE)
> -        {
> -       eassert (offset == DUMP_OBJECT_ON_NORMAL_QUEUE
> -                || offset == DUMP_OBJECT_NOT_SEEN);
> -          /* We still want to dump the actual keys and values now.  */
> -          dump_enqueue_object (ctx, hash_in->key_and_value, WEIGHT_NONE);
> -          /* We'll get to the rest later.  */
> -          offset = DUMP_OBJECT_ON_HASH_TABLE_QUEUE;
> -          dump_remember_object (ctx, object, offset);
> -          dump_push (&ctx->deferred_hash_tables, object);
> -        }
> -      return offset;
> -    }
> -
> -  if (PDUMPER_CHECK_REHASHING)
> -    check_hash_table_rehash (make_lisp_ptr ((void *) hash_in, 
> Lisp_Vectorlike));
> -
>    struct Lisp_Hash_Table hash_munged = *hash_in;
>    struct Lisp_Hash_Table *hash = &hash_munged;
>  
> -  /* Remember to rehash this hash table on first access.  After a
> -     dump reload, the hash table values will have changed, so we'll
> -     need to rebuild the index.
> -
> -     TODO: for EQ and EQL hash tables, it should be possible to rehash
> -     here using the preferred load address of the dump, eliminating
> -     the need to rehash-on-access if we can load the dump where we
> -     want.  */
> -  if (hash->count > 0 && !is_stable)
> -    hash->count = -hash->count;
> +  hash_table_freeze (hash);
> +  dump_push (&ctx->hash_tables, object);
>  
>    START_DUMP_PVEC (ctx, &hash->header, struct Lisp_Hash_Table, out);
>    dump_pseudovector_lisp_fields (ctx, &out->header, &hash->header);
> @@ -4140,6 +4095,19 @@ DEFUN ("dump-emacs-portable",
>        || !NILP (ctx->deferred_hash_tables)
>        || !NILP (ctx->deferred_symbols));
>  
> +  ctx->header.hash_list = ctx->offset;
> +  dump_hash_table_list (ctx);
> +
> +  do
> +    {
> +      dump_drain_deferred_hash_tables (ctx);
> +      dump_drain_deferred_symbols (ctx);
> +      dump_drain_normal_queue (ctx);
> +    }
> +  while (!dump_queue_empty_p (&ctx->dump_queue)
> +      || !NILP (ctx->deferred_hash_tables)
> +      || !NILP (ctx->deferred_symbols));
> +
>    dump_sort_copied_objects (ctx);
>  
>    /* While we copy built-in symbols into the Emacs image, these
> @@ -5431,6 +5399,13 @@ pdumper_load (const char *dump_filename)
>    for (int i = 0; i < ARRAYELTS (sections); ++i)
>      dump_mmap_reset (&sections[i]);
>  
> +  if (header->hash_list)
> +    {
> +      struct Lisp_Vector *hash_tables =
> +     ((struct Lisp_Vector *)(dump_base + header->hash_list));
> +      XSETVECTOR (Vpdumper_hash_tables, hash_tables);
> +    }
> +
>    /* Run the functions Emacs registered for doing post-dump-load
>       initialization.  */
>    for (int i = 0; i < nr_dump_hooks; ++i)
> @@ -5502,10 +5477,31 @@ DEFUN ("pdumper-stats", Fpdumper_stats, 
> Spdumper_stats, 0, 0, 0,
>  
>  
>  
> +static void thaw_hash_tables (void)
> +{
> +  Lisp_Object hash_tables = Vpdumper_hash_tables;
> +  ptrdiff_t i = 0;
> +  while (i < ASIZE (hash_tables))
> +    {
> +      hash_table_thaw (XHASH_TABLE (AREF (hash_tables, i)));
> +      i++;
> +    }
> +  Vpdumper_hash_tables = zero_vector;
> +}
> +
> +void
> +init_pdumper_once (void)
> +{
> +  Vpdumper_hash_tables = zero_vector;
> +  pdumper_do_now_and_after_load (thaw_hash_tables);
> +}
> +
>  void
>  syms_of_pdumper (void)
>  {
>  #ifdef HAVE_PDUMPER
> +  DEFVAR_LISP ("pdumper-hash-tables", Vpdumper_hash_tables,
> +            doc: /* A list of hash tables that need to be thawed after 
> loading the pdump.  */);
>    defsubr (&Sdump_emacs_portable);
>    defsubr (&Sdump_emacs_portable__sort_predicate);
>    defsubr (&Sdump_emacs_portable__sort_predicate_copied);
> diff --git a/src/pdumper.h b/src/pdumper.h
> index ab2f426c1e..cfea06d33d 100644
> --- a/src/pdumper.h
> +++ b/src/pdumper.h
> @@ -248,6 +248,7 @@ pdumper_clear_marks (void)
>     file was loaded.  */
>  extern void pdumper_record_wd (const char *);
>  
> +void init_pdumper_once (void);
>  void syms_of_pdumper (void);
>  
>  INLINE_HEADER_END
> -- 
> 2.17.1
> 





reply via email to

[Prev in Thread] Current Thread [Next in Thread]