emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

scratch/bytecode-speedup 870806d4c4 04/11: Pin bytecode strings to avoid


From: Mattias Engdegård
Subject: scratch/bytecode-speedup 870806d4c4 04/11: Pin bytecode strings to avoid copy at call time
Date: Tue, 11 Jan 2022 11:50:47 -0500 (EST)

branch: scratch/bytecode-speedup
commit 870806d4c453addca794aa0940298425241e13d9
Author: Mattias Engdegård <mattiase@acm.org>
Commit: Mattias Engdegård <mattiase@acm.org>

    Pin bytecode strings to avoid copy at call time
    
    Avoid making a copy (in the interpreter C stack frame) of the bytecode
    string by making sure it won't be moved by the GC.  This is done by
    reallocating it to the heap normally only used for large strings,
    which isn't compacted.
    
    This requires that we retain an explicit reference to the bytecode
    string object (`bytestr`) lest it be GCed away should all other
    references vanish during execution.  We allocate an extra stack slot
    for that, as we already do for the constant vector object.
    
    * src/alloc.c (allocate_string_data): Add `immovable` argument.
    (resize_string_data, make_clear_multibyte_string): Use it.
    (pin_string): New.
    * src/pdumper.c (dump_string): Fix incorrect comment.
    Update hash for Lisp_String (only comments changed, not contents).
    * src/lread.c (read1):
    * src/alloc.c (Fmake_byte_code, purecopy):
    * src/bytecode.c (Fbyte_code): Pin bytecode on object creation.
    (exec_byte_code): Don't copy bytecode.  Retain `bytestr` explicitly.
    * src/lisp.h (Lisp_String): Explain special size_byte values.
    (string_immovable_p): New.
---
 src/alloc.c    | 38 ++++++++++++++++++++++++++++++++++----
 src/bytecode.c | 23 ++++++++++++++++-------
 src/lisp.h     | 12 +++++++++++-
 src/lread.c    | 22 +++++++++++++---------
 src/pdumper.c  |  4 ++--
 5 files changed, 76 insertions(+), 23 deletions(-)

diff --git a/src/alloc.c b/src/alloc.c
index 7582a42601..a0b1be920e 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -1853,7 +1853,8 @@ allocate_string (void)
 
 static void
 allocate_string_data (struct Lisp_String *s,
-                     EMACS_INT nchars, EMACS_INT nbytes, bool clearit)
+                     EMACS_INT nchars, EMACS_INT nbytes, bool clearit,
+                     bool immovable)
 {
   sdata *data;
   struct sblock *b;
@@ -1867,7 +1868,7 @@ allocate_string_data (struct Lisp_String *s,
 
   MALLOC_BLOCK_INPUT;
 
-  if (nbytes > LARGE_STRING_BYTES)
+  if (nbytes > LARGE_STRING_BYTES || immovable)
     {
       size_t size = FLEXSIZEOF (struct sblock, data, needed);
 
@@ -1967,7 +1968,7 @@ resize_string_data (Lisp_Object string, ptrdiff_t 
cidx_byte,
     }
   else
     {
-      allocate_string_data (XSTRING (string), nchars, new_nbytes, false);
+      allocate_string_data (XSTRING (string), nchars, new_nbytes, false, 
false);
       unsigned char *new_data = SDATA (string);
       new_charaddr = new_data + cidx_byte;
       memcpy (new_charaddr + new_clen, data + cidx_byte + clen,
@@ -2483,7 +2484,7 @@ make_clear_multibyte_string (EMACS_INT nchars, EMACS_INT 
nbytes, bool clearit)
 
   s = allocate_string ();
   s->u.s.intervals = NULL;
-  allocate_string_data (s, nchars, nbytes, clearit);
+  allocate_string_data (s, nchars, nbytes, clearit, false);
   XSETSTRING (string, s);
   string_chars_consed += nbytes;
   return string;
@@ -2513,6 +2514,29 @@ make_formatted_string (char *buf, const char *format, 
...)
   return make_string (buf, length);
 }
 
+/* Pin a unibyte string in place so that it won't move during GC.  */
+void
+pin_string (Lisp_Object string)
+{
+  eassert (STRINGP (string) && !STRING_MULTIBYTE (string));
+  struct Lisp_String *s = XSTRING (string);
+  ptrdiff_t size = STRING_BYTES (s);
+  unsigned char *data = s->u.s.data;
+
+  if (!(size > LARGE_STRING_BYTES
+       || PURE_P (data) || pdumper_object_p (data)
+       || s->u.s.size_byte == -3))
+    {
+      eassert (s->u.s.size_byte == -1);
+      sdata *old_sdata = SDATA_OF_STRING (s);
+      allocate_string_data (s, size, size, false, true);
+      memcpy (s->u.s.data, data, size);
+      old_sdata->string = NULL;
+      SDATA_NBYTES (old_sdata) = size;
+    }
+  s->u.s.size_byte = -3;
+}
+
 
 /***********************************************************************
                           Float Allocation
@@ -3515,6 +3539,8 @@ usage: (make-byte-code ARGLIST BYTE-CODE CONSTANTS DEPTH 
&optional DOCSTRING INT
         && FIXNATP (args[COMPILED_STACK_DEPTH])))
     error ("Invalid byte-code object");
 
+  pin_string (args[COMPILED_BYTECODE]);  // Bytecode must be immovable.
+
   /* We used to purecopy everything here, if purify-flag was set.  This worked
      OK for Emacs-23, but with Emacs-24's lexical binding code, it can be
      dangerous, since make-byte-code is used during execution to build
@@ -5638,6 +5664,10 @@ purecopy (Lisp_Object obj)
       memcpy (vec, objp, nbytes);
       for (i = 0; i < size; i++)
        vec->contents[i] = purecopy (vec->contents[i]);
+      // Byte code strings must be pinned.
+      if (COMPILEDP (obj) && size >= 2 && STRINGP (vec->contents[1])
+         && !STRING_MULTIBYTE (vec->contents[1]))
+       pin_string (vec->contents[1]);
       XSETVECTOR (obj, vec);
     }
   else if (SYMBOLP (obj))
diff --git a/src/bytecode.c b/src/bytecode.c
index 5e7de2725a..1561bdc149 100644
--- a/src/bytecode.c
+++ b/src/bytecode.c
@@ -331,6 +331,7 @@ If the third argument is incorrect, Emacs may crash.  */)
         the original unibyte form.  */
       bytestr = Fstring_as_unibyte (bytestr);
     }
+  pin_string (bytestr);  // Bytecode must be immovable.
 
   return exec_byte_code (bytestr, vector, maxdepth, Qnil, 0, NULL);
 }
@@ -358,22 +359,28 @@ exec_byte_code (Lisp_Object bytestr, Lisp_Object vector, 
Lisp_Object maxdepth,
 #endif
 
   eassert (!STRING_MULTIBYTE (bytestr));
+  eassert (string_immovable_p (bytestr));
 
   ptrdiff_t const_length = ASIZE (vector);
   ptrdiff_t bytestr_length = SCHARS (bytestr);
   Lisp_Object *vectorp = XVECTOR (vector)->contents;
 
   unsigned char quitcounter = 1;
-  EMACS_INT stack_items = XFIXNAT (maxdepth) + 1;
+  /* Allocate two more slots than required, because... */
+  EMACS_INT stack_items = XFIXNAT (maxdepth) + 2;
   USE_SAFE_ALLOCA;
   void *alloc;
-  SAFE_ALLOCA_LISP_EXTRA (alloc, stack_items, bytestr_length);
+  SAFE_ALLOCA_LISP (alloc, stack_items);
   Lisp_Object *stack_base = alloc;
-  Lisp_Object *top = stack_base;
-  *top = vector; /* Ensure VECTOR survives GC (Bug#33014).  */
-  Lisp_Object *stack_lim = stack_base + stack_items;
-  unsigned char const *bytestr_data = memcpy (stack_lim,
-                                             SDATA (bytestr), bytestr_length);
+  /* ... we plonk BYTESTR and VECTOR there to ensure that they survive
+     GC (bug#33014), since these variables aren't used directly beyond
+     the interpreter prologue and wouldn't be found in the stack frame
+     otherwise.  */
+  stack_base[0] = bytestr;
+  stack_base[1] = vector;
+  Lisp_Object *top = stack_base + 1;
+  Lisp_Object *stack_lim = top + stack_items;
+  unsigned char const *bytestr_data = SDATA (bytestr);
   unsigned char const *pc = bytestr_data;
   ptrdiff_t count = SPECPDL_INDEX ();
 
@@ -1570,6 +1577,8 @@ exec_byte_code (Lisp_Object bytestr, Lisp_Object vector, 
Lisp_Object maxdepth,
 
  exit:
 
+  eassert (SDATA (bytestr) == bytestr_data);
+
   /* Binds and unbinds are supposed to be compiled balanced.  */
   if (SPECPDL_INDEX () != count)
     {
diff --git a/src/lisp.h b/src/lisp.h
index abd68e081c..140c7cd066 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -1496,7 +1496,9 @@ struct Lisp_String
     struct
     {
       ptrdiff_t size;           /* MSB is used as the markbit.  */
-      ptrdiff_t size_byte;      /* Set to -1 for unibyte strings.  */
+      ptrdiff_t size_byte;      /* Set to -1 for unibyte strings,
+                                  -2 for data in rodata,
+                                  -3 for immovable unibyte strings.  */
       INTERVAL intervals;      /* Text properties in this string.  */
       unsigned char *data;
     } s;
@@ -1644,6 +1646,13 @@ CHECK_STRING_NULL_BYTES (Lisp_Object string)
              Qfilenamep, string);
 }
 
+/* True if STR is immovable (whose data won't move during GC).  */
+INLINE bool
+string_immovable_p (Lisp_Object str)
+{
+  return XSTRING (str)->u.s.size_byte == -3;
+}
+
 /* A regular vector is just a header plus an array of Lisp_Objects.  */
 
 struct Lisp_Vector
@@ -3989,6 +3998,7 @@ extern Lisp_Object make_specified_string (const char *,
                                          ptrdiff_t, ptrdiff_t, bool);
 extern Lisp_Object make_pure_string (const char *, ptrdiff_t, ptrdiff_t, bool);
 extern Lisp_Object make_pure_c_string (const char *, ptrdiff_t);
+extern void pin_string (Lisp_Object string);
 
 /* Make a string allocated in pure space, use STR as string data.  */
 
diff --git a/src/lread.c b/src/lread.c
index 2eff20f15d..36610bcc6e 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -3212,16 +3212,20 @@ read1 (Lisp_Object readcharfun, int *pch, bool 
first_in_list)
                 && FIXNATP (AREF (tmp, COMPILED_STACK_DEPTH))))
            invalid_syntax ("Invalid byte-code object", readcharfun);
 
-         if (STRINGP (AREF (tmp, COMPILED_BYTECODE))
-             && STRING_MULTIBYTE (AREF (tmp, COMPILED_BYTECODE)))
+         if (STRINGP (AREF (tmp, COMPILED_BYTECODE)))
            {
-             /* BYTESTR must have been produced by Emacs 20.2 or earlier
-                because it produced a raw 8-bit string for byte-code and
-                now such a byte-code string is loaded as multibyte with
-                raw 8-bit characters converted to multibyte form.
-                Convert them back to the original unibyte form.  */
-             ASET (tmp, COMPILED_BYTECODE,
-                   Fstring_as_unibyte (AREF (tmp, COMPILED_BYTECODE)));
+             if (STRING_MULTIBYTE (AREF (tmp, COMPILED_BYTECODE)))
+               {
+                 /* BYTESTR must have been produced by Emacs 20.2 or earlier
+                    because it produced a raw 8-bit string for byte-code and
+                    now such a byte-code string is loaded as multibyte with
+                    raw 8-bit characters converted to multibyte form.
+                    Convert them back to the original unibyte form.  */
+                 ASET (tmp, COMPILED_BYTECODE,
+                       Fstring_as_unibyte (AREF (tmp, COMPILED_BYTECODE)));
+               }
+             // Bytecode must be immovable.
+             pin_string (AREF (tmp, COMPILED_BYTECODE));
            }
 
          XSETPVECTYPE (vec, PVEC_COMPILED);
diff --git a/src/pdumper.c b/src/pdumper.c
index eeebb7ed0e..60280fcb04 100644
--- a/src/pdumper.c
+++ b/src/pdumper.c
@@ -2068,7 +2068,7 @@ dump_interval_tree (struct dump_context *ctx,
 static dump_off
 dump_string (struct dump_context *ctx, const struct Lisp_String *string)
 {
-#if CHECK_STRUCTS && !defined (HASH_Lisp_String_348C2B2FDB)
+#if CHECK_STRUCTS && !defined (HASH_Lisp_String_C2CAF90352)
 # error "Lisp_String changed. See CHECK_STRUCTS comment in config.h."
 #endif
   /* If we have text properties, write them _after_ the string so that
@@ -2079,7 +2079,7 @@ dump_string (struct dump_context *ctx, const struct 
Lisp_String *string)
      we seldom write to string data and never relocate it, so lumping
      it together at the end of the dump saves on COW faults.
 
-     If, however, the string's size_byte field is -1, the string data
+     If, however, the string's size_byte field is -2, the string data
      is actually a pointer to Emacs data segment, so we can do even
      better by emitting a relocation instead of bothering to copy the
      string data.  */



reply via email to

[Prev in Thread] Current Thread [Next in Thread]