guile-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH] Efficient Gensym Hack (v2)


From: Mark H Weaver
Subject: Re: [PATCH] Efficient Gensym Hack (v2)
Date: Tue, 06 Mar 2012 04:55:40 -0500
User-agent: Gnus/5.13 (Gnus v5.13) Emacs/24.0.92 (gnu/linux)

Hello all,

Here's an improved version of the Efficient Gensym Hack (v2).

     Mark


>From 5f558244261f3a22217d5136d0aebb7f644d7efb Mon Sep 17 00:00:00 2001
From: Mark H Weaver <address@hidden>
Date: Mon, 5 Mar 2012 09:51:17 -0500
Subject: [PATCH 1/3] Don't lock mutex to set shared flag on stringbuf if it's
 already shared

* libguile/strings.c (set_stringbuf_shared): New internal static
  function to replace the macro SET_STRINGBUF_SHARED.  The macro assumed
  that the stringbuf_write_mutex was already locked, but this new
  function handles locking internally, and avoids locking if the
  stringbuf is already shared.

  (SET_STRINGBUF_SHARED): Removed.

  (scm_i_make_string, scm_i_substring, scm_i_substring_read_only,
  scm_i_make_symbol, scm_i_symbol_substring): Use set_stringbuf_shared
  instead of SET_STRINGBUF_SHARED.
---
 libguile/strings.c |   41 ++++++++++++++++++-----------------------
 1 files changed, 18 insertions(+), 23 deletions(-)

diff --git a/libguile/strings.c b/libguile/strings.c
index 494a658..35757f0 100644
--- a/libguile/strings.c
+++ b/libguile/strings.c
@@ -91,16 +91,6 @@
 
 #define STRINGBUF_LENGTH(buf)   (SCM_CELL_WORD_1 (buf))
 
-#define SET_STRINGBUF_SHARED(buf)                                      \
-  do                                                                   \
-    {                                                                  \
-      /* Don't modify BUF if it's already marked as shared since it might be \
-        a read-only, statically allocated stringbuf.  */               \
-      if (SCM_LIKELY (!STRINGBUF_SHARED (buf)))                                
\
-       SCM_SET_CELL_WORD_0 ((buf), SCM_CELL_WORD_0 (buf) | 
STRINGBUF_F_SHARED); \
-    }                                                                  \
-  while (0)
-
 #ifdef SCM_STRING_LENGTH_HISTOGRAM
 static size_t lenhist[1001];
 #endif
@@ -227,6 +217,19 @@ narrow_stringbuf (SCM buf)
 
 scm_i_pthread_mutex_t stringbuf_write_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER;
 
+static void
+set_stringbuf_shared (SCM buf)
+{
+  /* Don't modify BUF if it's already marked as shared since it
+     might be a read-only, statically allocated stringbuf.  */
+  if (!STRINGBUF_SHARED (buf))
+    {
+      scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
+      SCM_SET_CELL_WORD_0 (buf, SCM_CELL_WORD_0 (buf) | STRINGBUF_F_SHARED);
+      scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
+    }
+}
+
 
 /* Copy-on-write strings.
  */
@@ -276,7 +279,7 @@ scm_i_make_string (size_t len, char **charsp, int 
read_only_p)
       if (SCM_UNLIKELY (scm_is_false (null_stringbuf)))
         {
           null_stringbuf = make_stringbuf (0);
-          SET_STRINGBUF_SHARED (null_stringbuf);
+          set_stringbuf_shared (null_stringbuf);
         }
       buf = null_stringbuf;
     }
@@ -341,9 +344,7 @@ scm_i_substring (SCM str, size_t start, size_t end)
       SCM buf;
       size_t str_start;
       get_str_buf_start (&str, &buf, &str_start);
-      scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
-      SET_STRINGBUF_SHARED (buf);
-      scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
+      set_stringbuf_shared (buf);
       return scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
                               (scm_t_bits)str_start + start,
                               (scm_t_bits) end - start);
@@ -360,9 +361,7 @@ scm_i_substring_read_only (SCM str, size_t start, size_t 
end)
       SCM buf;
       size_t str_start;
       get_str_buf_start (&str, &buf, &str_start);
-      scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
-      SET_STRINGBUF_SHARED (buf);
-      scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
+      set_stringbuf_shared (buf);
       return scm_double_cell (RO_STRING_TAG, SCM_UNPACK(buf),
                               (scm_t_bits)str_start + start,
                               (scm_t_bits) end - start);
@@ -753,9 +752,7 @@ scm_i_make_symbol (SCM name, scm_t_bits flags,
   if (start == 0 && length == STRINGBUF_LENGTH (buf))
     {
       /* reuse buf. */
-      scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
-      SET_STRINGBUF_SHARED (buf);
-      scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
+      set_stringbuf_shared (buf);
     }
   else
     {
@@ -854,9 +851,7 @@ SCM
 scm_i_symbol_substring (SCM sym, size_t start, size_t end)
 {
   SCM buf = SYMBOL_STRINGBUF (sym);
-  scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
-  SET_STRINGBUF_SHARED (buf);
-  scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
+  set_stringbuf_shared (buf);
   return scm_double_cell (RO_STRING_TAG, SCM_UNPACK (buf),
                          (scm_t_bits)start, (scm_t_bits) end - start);
 }
-- 
1.7.5.4

>From 6c644645ecd2b1e84754b4759789edab2fdf9260 Mon Sep 17 00:00:00 2001
From: Mark H Weaver <address@hidden>
Date: Mon, 5 Mar 2012 10:06:34 -0500
Subject: [PATCH 2/3] Move prototype for scm_i_try_narrow_string where it
 belongs

* libguile/strings.h (scm_i_try_narrow_string): Move prototype out of
  the "internal functions related to symbols" section.
---
 libguile/strings.h |    3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/libguile/strings.h b/libguile/strings.h
index 42e57ac..9735913 100644
--- a/libguile/strings.h
+++ b/libguile/strings.h
@@ -195,10 +195,12 @@ SCM_INTERNAL const void *scm_i_string_data (SCM str);
 SCM_INTERNAL SCM scm_i_string_start_writing (SCM str);
 SCM_INTERNAL void scm_i_string_stop_writing (void);
 SCM_INTERNAL int scm_i_is_narrow_string (SCM str);
+SCM_INTERNAL int scm_i_try_narrow_string (SCM str);
 SCM_INTERNAL scm_t_wchar scm_i_string_ref (SCM str, size_t x);
 SCM_INTERNAL int scm_i_string_contains_char (SCM str, char c);
 SCM_INTERNAL int scm_i_string_strcmp (SCM sstr, size_t start_x, const char 
*cstr);
 SCM_INTERNAL void scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr);
+
 /* internal functions related to symbols. */
 
 SCM_INTERNAL SCM scm_i_make_symbol (SCM name, scm_t_bits flags,
@@ -210,7 +212,6 @@ SCM_INTERNAL const char *scm_i_symbol_chars (SCM sym);
 SCM_INTERNAL const scm_t_wchar *scm_i_symbol_wide_chars (SCM sym);
 SCM_INTERNAL size_t scm_i_symbol_length (SCM sym);
 SCM_INTERNAL int scm_i_is_narrow_symbol (SCM str);
-SCM_INTERNAL int scm_i_try_narrow_string (SCM str);
 SCM_INTERNAL SCM scm_i_symbol_substring (SCM sym, size_t start, size_t end);
 SCM_INTERNAL scm_t_wchar scm_i_symbol_ref (SCM sym, size_t x);
 SCM_INTERNAL void scm_encoding_error (const char *subr, int err,
-- 
1.7.5.4

>From 7eff2e5ee0230b11a1ad38b4fd1cf4a470a9b3bc Mon Sep 17 00:00:00 2001
From: Mark H Weaver <address@hidden>
Date: Mon, 5 Mar 2012 10:35:06 -0500
Subject: [PATCH 3/3] Efficient gensym hack: generate gensym names lazily

* libguile/strings.c (scm_i_stringbuf_from_string,
  scm_i_string_from_stringbuf): New internal functions.

  (symbol_stringbuf): New internal static function to replace most uses
  of SYMBOL_STRINGBUF.  Handles forcing lazy gensyms.

  (scm_i_symbol_length, scm_c_symbol_length, scm_i_is_narrow_symbol,
  scm_i_symbol_chars, scm_i_symbol_wide_chars, scm_i_symbol_substring,
  scm_sys_symbol_dump): Use symbol_stringbuf instead of
  SYMBOL_STRINGBUF.

* libguile/strings.h (scm_i_stringbuf_from_string,
  scm_i_string_from_stringbuf): Add prototypes.

* libguile/symbols.c (SYMBOL_STRINGBUF): New internal macro.

  (scm_i_symbol_hash): New internal function to replace the macro of the
  same name.  Handles forcing lazy gensyms.

  (scm_gensym): Don't construct the name or even increment the
  gensym_counter here.  Just return a special symbol with the
  SCM_I_F_SYMBOL_LAZY_GENSYM flag set, with hash value 0, and with a
  stringbuf containing only the prefix.

  (scm_i_force_lazy_gensym): New internal procedure used when a lazy
  gensym is queried for its name or hash value.

  (symbol_lookup_hash_fn, symbol_lookup_assoc_fn): Avoid lazy gensym
  checks.

* libguile/symbols.h (scm_i_symbol_hash): Remove macro, and replace it
  with a prototype for the new internal function of the same name.
  (scm_i_force_lazy_gensym): Add prototype.
  (scm_i_symbol_is_lazy_gensym): New macro.
  (SCM_I_F_SYMBOL_LAZY_GENSYM): New flag.

* doc/ref/api-data.texi (Symbol Primitives): Update documentation.

* test-suite/tests/symbols.test (gensym): Add tests.
---
 doc/ref/api-data.texi         |    4 +-
 libguile/strings.c            |   58 +++++++++++++++--
 libguile/strings.h            |    2 +
 libguile/symbols.c            |  140 ++++++++++++++++++++++++++++++++++-------
 libguile/symbols.h            |    6 ++-
 test-suite/tests/symbols.test |   36 ++++++++++-
 6 files changed, 211 insertions(+), 35 deletions(-)

diff --git a/doc/ref/api-data.texi b/doc/ref/api-data.texi
index 39c9790..a1203f0 100644
--- a/doc/ref/api-data.texi
+++ b/doc/ref/api-data.texi
@@ -5293,8 +5293,8 @@ code.  The @code{gensym} primitive meets this need:
 @deffnx {C Function} scm_gensym (prefix)
 Create a new symbol with a name constructed from a prefix and a counter
 value.  The string @var{prefix} can be specified as an optional
-argument.  Default prefix is @address@hidden g}}.  The counter is increased by 
1
-at each call.  There is no provision for resetting the counter.
+argument.  Default prefix is @address@hidden g}}.  The name is constructed
+lazily, when the name or hash of the symbol is first requested.
 @end deffn
 
 The symbols generated by @code{gensym} are @emph{likely} to be unique,
diff --git a/libguile/strings.c b/libguile/strings.c
index 35757f0..b4f42d4 100644
--- a/libguile/strings.c
+++ b/libguile/strings.c
@@ -334,6 +334,41 @@ get_str_buf_start (SCM *str, SCM *buf, size_t *start)
   *buf = STRING_STRINGBUF (*str);
 }
 
+/* scm_i_stringbuf_from_string returns a stringbuf containing exactly
+   the characters in 'str'.  If possible, it returns 'str's stringbuf
+   (marking it shared).  However, if 'str' refers to only part of its
+   stringbuf, the relevant portion is copied into a fresh stringbuf.
+
+   This is needed by the lazy gensym code in symbols.c. */
+SCM
+scm_i_stringbuf_from_string (SCM str)
+{
+  SCM inner_str, buf;
+  size_t len, start;
+
+  len = STRING_LENGTH (str);
+  inner_str = str;
+  get_str_buf_start (&inner_str, &buf, &start);
+  if (STRINGBUF_LENGTH (buf) == len)
+    set_stringbuf_shared (buf);
+  else
+    {
+      SCM new_str = scm_i_substring_copy (str, 0, len);
+      buf = STRING_STRINGBUF (new_str);
+    }
+  return buf;
+}
+
+/* Needed by the lazy gensym code in symbols.c. */
+SCM
+scm_i_string_from_stringbuf (SCM buf)
+{
+  size_t len = STRINGBUF_LENGTH (buf);
+  set_stringbuf_shared (buf);
+  return scm_double_cell (RO_STRING_TAG, SCM_UNPACK (buf),
+                          (scm_t_bits) 0, (scm_t_bits) len);
+}
+
 SCM
 scm_i_substring (SCM str, size_t start, size_t end)
 {
@@ -732,8 +767,17 @@ scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr)
    internals of strings and string-like objects confined to this file.
 */
 
+/* Must be kept in sync with the matching definition in symbols.c */
 #define SYMBOL_STRINGBUF SCM_CELL_OBJECT_1
 
+static SCM
+symbol_stringbuf (SCM symbol)
+{
+  if (SCM_UNLIKELY (scm_i_symbol_is_lazy_gensym (symbol)))
+    scm_i_force_lazy_gensym (symbol);
+  return SYMBOL_STRINGBUF (symbol);
+}
+
 SCM
 scm_i_make_symbol (SCM name, scm_t_bits flags,
                   unsigned long hash, SCM props)
@@ -793,7 +837,7 @@ scm_i_c_make_symbol (const char *name, size_t len,
 size_t
 scm_i_symbol_length (SCM sym)
 {
-  return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
+  return STRINGBUF_LENGTH (symbol_stringbuf (sym));
 }
 
 size_t
@@ -802,7 +846,7 @@ scm_c_symbol_length (SCM sym)
 {
   SCM_VALIDATE_SYMBOL (1, sym);
 
-  return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
+  return STRINGBUF_LENGTH (symbol_stringbuf (sym));
 }
 #undef FUNC_NAME
 
@@ -813,7 +857,7 @@ scm_i_is_narrow_symbol (SCM sym)
 {
   SCM buf;
 
-  buf = SYMBOL_STRINGBUF (sym);
+  buf = symbol_stringbuf (sym);
   return !STRINGBUF_WIDE (buf);
 }
 
@@ -824,7 +868,7 @@ scm_i_symbol_chars (SCM sym)
 {
   SCM buf;
 
-  buf = SYMBOL_STRINGBUF (sym);
+  buf = symbol_stringbuf (sym);
   if (!STRINGBUF_WIDE (buf))
     return (const char *) STRINGBUF_CHARS (buf);
   else
@@ -839,7 +883,7 @@ scm_i_symbol_wide_chars (SCM sym)
 {
   SCM buf;
 
-  buf = SYMBOL_STRINGBUF (sym);
+  buf = symbol_stringbuf (sym);
   if (STRINGBUF_WIDE (buf))
     return (const scm_t_wchar *) STRINGBUF_WIDE_CHARS (buf);
   else
@@ -850,7 +894,7 @@ scm_i_symbol_wide_chars (SCM sym)
 SCM
 scm_i_symbol_substring (SCM sym, size_t start, size_t end)
 {
-  SCM buf = SYMBOL_STRINGBUF (sym);
+  SCM buf = symbol_stringbuf (sym);
   set_stringbuf_shared (buf);
   return scm_double_cell (RO_STRING_TAG, SCM_UNPACK (buf),
                          (scm_t_bits)start, (scm_t_bits) end - start);
@@ -1000,7 +1044,7 @@ SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0, 
(SCM sym),
                  scm_from_ulong (scm_i_symbol_hash (sym)));
   e3 = scm_cons (scm_from_latin1_symbol ("interned"),
                  scm_symbol_interned_p (sym));
-  buf = SYMBOL_STRINGBUF (sym);
+  buf = symbol_stringbuf (sym);
 
   /* Stringbuf info */
   if (!STRINGBUF_WIDE (buf))
diff --git a/libguile/strings.h b/libguile/strings.h
index 9735913..afb5a53 100644
--- a/libguile/strings.h
+++ b/libguile/strings.h
@@ -200,6 +200,8 @@ SCM_INTERNAL scm_t_wchar scm_i_string_ref (SCM str, size_t 
x);
 SCM_INTERNAL int scm_i_string_contains_char (SCM str, char c);
 SCM_INTERNAL int scm_i_string_strcmp (SCM sstr, size_t start_x, const char 
*cstr);
 SCM_INTERNAL void scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr);
+SCM_INTERNAL SCM scm_i_stringbuf_from_string (SCM str);
+SCM_INTERNAL SCM scm_i_string_from_stringbuf (SCM buf);
 
 /* internal functions related to symbols. */
 
diff --git a/libguile/symbols.c b/libguile/symbols.c
index 08512a6..07556fa 100644
--- a/libguile/symbols.c
+++ b/libguile/symbols.c
@@ -70,6 +70,19 @@ SCM_DEFINE (scm_sys_symbols, "%symbols", 0, 0, 0,
 /* {Symbols}
  */
 
+/* Must be kept in sync with the matching definition in strings.c */
+#define SYMBOL_STRINGBUF SCM_CELL_OBJECT_1
+
+#define SYMBOL_HASH(x) ((unsigned long) SCM_CELL_WORD_2 (x))
+
+unsigned long
+scm_i_symbol_hash (SCM symbol)
+{
+  if (SCM_UNLIKELY (scm_i_symbol_is_lazy_gensym (symbol)))
+    scm_i_force_lazy_gensym (symbol);
+  return SYMBOL_HASH (symbol);
+}
+
 unsigned long
 scm_i_hash_symbol (SCM obj, unsigned long n, void *closure)
 {
@@ -165,7 +178,16 @@ lookup_interned_latin1_symbol (const char *str, size_t len,
 static unsigned long
 symbol_lookup_hash_fn (SCM obj, unsigned long max, void *closure)
 {
-  return scm_i_symbol_hash (obj) % max;
+  /* We must avoid forcing lazy gensyms here, because
+     scm_i_force_lazy_gensym needs to intern its symbol before clearing
+     the lazy gensym flag. */
+  return SYMBOL_HASH (obj) % max;
+}
+
+static SCM
+symbol_to_string_no_lazy_gensym_check (SCM sym)
+{
+  return scm_i_string_from_stringbuf (SYMBOL_STRINGBUF (sym));
 }
 
 static SCM
@@ -175,9 +197,13 @@ symbol_lookup_assoc_fn (SCM obj, SCM alist, void *closure)
     {
       SCM sym = SCM_CAAR (alist);
 
-      if (scm_i_symbol_hash (sym) == scm_i_symbol_hash (obj)
-          && scm_is_true (scm_string_equal_p (scm_symbol_to_string (sym),
-                                              scm_symbol_to_string (obj))))
+      /* We must avoid forcing lazy gensyms here, because
+         scm_i_force_lazy_gensym needs to intern its symbol before
+         clearing the lazy gensym flag. */
+      if (SYMBOL_HASH (sym) == SYMBOL_HASH (obj)
+          && scm_is_true (scm_string_equal_p
+                          (symbol_to_string_no_lazy_gensym_check (sym),
+                           symbol_to_string_no_lazy_gensym_check (obj))))
         return SCM_CAR (alist);
     }
 
@@ -340,38 +366,104 @@ SCM_DEFINE (scm_string_ci_to_symbol, 
"string-ci->symbol", 1, 0, 0,
 /* The default prefix for `gensym'd symbols.  */
 static SCM default_gensym_prefix;
 
-#define MAX_PREFIX_LENGTH 30
-
 SCM_DEFINE (scm_gensym, "gensym", 0, 1, 0,
             (SCM prefix),
            "Create a new symbol with a name constructed from a prefix and\n"
-           "a counter value. The string @var{prefix} can be specified as\n"
-           "an optional argument. Default prefix is @code{ g}.  The counter\n"
-           "is increased by 1 at each call. There is no provision for\n"
-           "resetting the counter.")
+           "a counter value.  The string @var{prefix} can be specified as\n"
+           "an optional argument.  Default prefix is @code{ g}.  The name\n"
+            "is constructed lazily, when the name or hash of the symbol is\n"
+            "first requested.")
 #define FUNC_NAME s_scm_gensym
 {
-  static int gensym_counter = 0;
-  
-  SCM suffix, name;
-  int n, n_digits;
-  char buf[SCM_INTBUFLEN];
+  SCM prefix_stringbuf;
 
   if (SCM_UNBNDP (prefix))
     prefix = default_gensym_prefix;
+  else
+    SCM_VALIDATE_STRING (1, prefix);
 
-  /* mutex in case another thread looks and incs at the exact same moment */
-  scm_i_scm_pthread_mutex_lock (&scm_i_misc_mutex);
-  n = gensym_counter++;
-  scm_i_pthread_mutex_unlock (&scm_i_misc_mutex);
+  prefix_stringbuf = scm_i_stringbuf_from_string (prefix);
 
-  n_digits = scm_iint2str (n, 10, buf);
-  suffix = scm_from_latin1_stringn (buf, n_digits);
-  name = scm_string_append (scm_list_2 (prefix, suffix));
-  return scm_string_to_symbol (name);
+  /* Allocate a special symbol with the lazy gensym flag set.  Except
+     for a few special exceptions, all code must check this flag before
+     accessing the name or hash fields of symbols.  When the gensym is
+     forced, it will set the name and hash fields to their final values,
+     and then clear the lazy gensym flag.  For now, we store the gensym
+     prefix as the symbol name, and 0 as the hash value. */
+  return scm_double_cell (scm_tc7_symbol | SCM_I_F_SYMBOL_LAZY_GENSYM,
+                          SCM_UNPACK (prefix_stringbuf), (scm_t_bits) 0,
+                          SCM_UNPACK (scm_cons (SCM_BOOL_F, SCM_EOL)));
 }
 #undef FUNC_NAME
 
+/*
+ * Forcing lazy gensyms
+ *
+ * Here we must choose a name for our gensym and set its 'equal?' hash
+ * value to match its name.  In most cases, we will simply append the
+ * current gensym counter to the prefix to form the name, increment the
+ * counter, and intern the symbol.  However, there are some
+ * complications.
+ *
+ * The name we ultimately assign to the gensym _must_ not already be
+ * interned.  To understand why, consider this scenario: Suppose the
+ * user asks for a lazy gensym with prefix "foo", and we assign it the
+ * number 6.  Now suppose sometime later, but before the gensym is
+ * forced, the symbol 'foo6' is independently interned.  Now we have two
+ * distinct symbols (in the sense of 'eq?'), both semantically interned,
+ * with the same name.  This is a violation of the most fundamental
+ * property of symbols.
+ *
+ * Therefore, if the first counter value we try yields a name that has
+ * already been interned, we try the next counter value, and repeat
+ * until we successfully intern our symbol.  Only then can we clear the
+ * lazy gensym flag and thereby allow the name and 'equal?' hash value
+ * to be accessed.
+ */
+void
+scm_i_force_lazy_gensym (SCM sym)
+{
+  static int gensym_counter = 0;
+
+  SCM prefix, suffix, name, handle;
+  int n, n_digits;
+  char buf[SCM_INTBUFLEN];
+
+  /* mutex in case another thread forces a gensym (possibly this one) */
+  scm_i_pthread_mutex_lock (&symbols_lock);
+  if (SCM_LIKELY (scm_i_symbol_is_lazy_gensym (sym)))
+    {
+      prefix = scm_i_string_from_stringbuf (SYMBOL_STRINGBUF (sym));
+
+      do
+        {
+          n = gensym_counter++;
+
+          n_digits = scm_iint2str (n, 10, buf);
+          suffix = scm_from_latin1_stringn (buf, n_digits);
+          name = scm_string_append (scm_list_2 (prefix, suffix));
+
+          /* Set the name and hash to their candidate values. */
+          SCM_SET_CELL_OBJECT_1 (sym, scm_i_stringbuf_from_string (name));
+          SCM_SET_CELL_WORD_2   (sym, scm_i_string_hash (name));
+
+          /* Attempt to intern the symbol */
+          handle = scm_hash_fn_create_handle_x (symbols, sym, SCM_UNDEFINED,
+                                                symbol_lookup_hash_fn,
+                                                symbol_lookup_assoc_fn,
+                                                NULL);
+        } while (SCM_UNLIKELY (!scm_is_eq (sym, SCM_CAR (handle))));
+
+      /* We must not clear the lazy gensym flag until our symbol has
+         been interned.  The lock does not save us here, because another
+         thread could retrieve our gensym's name or hash outside of any
+         lock. */
+      SCM_SET_CELL_WORD_0 (sym, (SCM_CELL_WORD_0 (sym)
+                                 & ~SCM_I_F_SYMBOL_LAZY_GENSYM));
+    }
+  scm_i_pthread_mutex_unlock (&symbols_lock);
+}
+
 SCM_DEFINE (scm_symbol_hash, "symbol-hash", 1, 0, 0, 
            (SCM symbol),
            "Return a hash value for @var{symbol}.")
diff --git a/libguile/symbols.h b/libguile/symbols.h
index 6106f9e..b8fe997 100644
--- a/libguile/symbols.h
+++ b/libguile/symbols.h
@@ -28,11 +28,13 @@
 
 #define scm_is_symbol(x)            (!SCM_IMP (x) \
                                      && (SCM_TYP7 (x) == scm_tc7_symbol))
-#define scm_i_symbol_hash(x)        ((unsigned long) SCM_CELL_WORD_2 (x))
 #define scm_i_symbol_is_interned(x) \
   (!(SCM_CELL_WORD_0 (x) & SCM_I_F_SYMBOL_UNINTERNED))
+#define scm_i_symbol_is_lazy_gensym(x) \
+  (SCM_CELL_WORD_0 (x) & SCM_I_F_SYMBOL_LAZY_GENSYM)
 
 #define SCM_I_F_SYMBOL_UNINTERNED   0x100
+#define SCM_I_F_SYMBOL_LAZY_GENSYM  0x200
 
 
 
@@ -90,8 +92,10 @@ SCM_API SCM scm_take_utf8_symboln (char *sym, size_t len);
 
 /* internal functions. */
 
+SCM_INTERNAL unsigned long scm_i_symbol_hash (SCM symbol);
 SCM_INTERNAL unsigned long scm_i_hash_symbol (SCM obj, unsigned long n,
                                         void *closure);
+SCM_INTERNAL void scm_i_force_lazy_gensym (SCM sym);
 
 SCM_INTERNAL void scm_symbols_prehistory (void);
 SCM_INTERNAL void scm_init_symbols (void);
diff --git a/test-suite/tests/symbols.test b/test-suite/tests/symbols.test
index 6fbc6be..0dbb121 100644
--- a/test-suite/tests/symbols.test
+++ b/test-suite/tests/symbols.test
@@ -149,7 +149,41 @@
     (symbol? (gensym (make-string 4000 #\!))))
 
   (pass-if "accepts embedded NULs"
-    (> (string-length (symbol->string (gensym 
"foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0")))
 6)))
+    (> (string-length (symbol->string (gensym 
"foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0")))
 6))
+
+  (pass-if "accepts substring prefixes"
+    (let* ((prefix (substring "foobar" 1 4))
+           (symbol (gensym prefix))
+           (name (symbol->string symbol)))
+      (string= "oob" (substring name 0 3))))
+
+  (pass-if "accepts shared substring prefixes"
+    (let* ((prefix (substring/shared (string-copy "foobar")
+                                     1 4))
+           (symbol (gensym prefix))
+           (name (symbol->string symbol)))
+      (string= "oob" (substring name 0 3))))
+
+  (pass-if "counter incremented lazily"
+    (let* ((s1 (gensym ""))
+           (s2 (gensym ""))
+           (s3 (gensym ""))
+           (s4 (gensym ""))
+           (s4-counter (string->number (symbol->string s4)))
+           (s1-counter (string->number (symbol->string s1))))
+      (= s1-counter (1+ s4-counter))))
+
+  (pass-if "unaffected by mutation of prefix"
+    (let* ((prefix (string-copy "foo"))
+           (symbol (gensym prefix)))
+      (string-set! prefix 0 #\g)
+      (string= "foo" (substring (symbol->string symbol) 0 3))))
+
+  (pass-if "avoids existing interned symbols"
+    (let* ((n (1+ (string->number (symbol->string (gensym "")))))
+           (colliding-symbol (string->symbol (number->string n)))
+           (symbol (gensym "")))
+      (< n (string->number (symbol->string symbol))))))
 
 (with-test-prefix "extended read syntax"
   (pass-if (equal? "#{}#" (object->string (string->symbol ""))))
-- 
1.7.5.4


reply via email to

[Prev in Thread] Current Thread [Next in Thread]