guile-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH] Universally-unique gensyms


From: Mark H Weaver
Subject: Re: [PATCH] Universally-unique gensyms
Date: Wed, 18 Jan 2012 03:04:53 -0500
User-agent: Gnus/5.13 (Gnus v5.13) Emacs/24.0.92 (gnu/linux)

Hi Andy,

Here's an improved implementation, incorporating your excellent feedback
and also implementing thread-local gensym counters, thus eliminating the
mutex altogether.  What do you think?

   Thanks!
     Mark


>From 9090dfeb58846d637150f5f88e344c7d980efdf2 Mon Sep 17 00:00:00 2001
From: Mark H Weaver <address@hidden>
Date: Wed, 18 Jan 2012 02:36:17 -0500
Subject: [PATCH 1/2] Add `random-state-from-platform' and
 `scm_i_random_bytes_from_platform'

* libguile/random.c (scm_random_state_from_platform): New procedure to
  construct a new random state seeded from a platform-specific source of
  randomness.

  (scm_i_random_bytes_from_platform): New internal function to fill a
  byte buffer with random bytes from a platform-specific source of
  randomness.

  (read_dev_urandom, random_state_of_last_resort): New internal static
  helper functions.

* libguile/random.h (scm_random_state_from_platform,
  scm_i_random_bytes_from_platform): Add prototypes.

* doc/ref/api-data.texi (Random): Add documentation.
---
 doc/ref/api-data.texi |   30 +++++++++----------------
 libguile/random.c     |   58 +++++++++++++++++++++++++++++++++++++++++++++++++
 libguile/random.h     |    3 ++
 3 files changed, 72 insertions(+), 19 deletions(-)

diff --git a/doc/ref/api-data.texi b/doc/ref/api-data.texi
index f2450ce..e165559 100644
--- a/doc/ref/api-data.texi
+++ b/doc/ref/api-data.texi
@@ -1865,6 +1865,14 @@ Return a datum representation of @var{state} that may be 
written out and
 read back with the Scheme reader.
 @end deffn
 
address@hidden {Scheme Procedure} random-state-from-platform
address@hidden {C Function} scm_random_state_from_platform ()
+Construct a new random state seeded from a platform-specific source of
+entropy, appropriate for use in non-security-critical applications.
+Currently @file{/dev/urandom} is tried first, or else the seed is based
+on the time, date, process ID, and a high-resolution timer if available.
address@hidden deffn
+
 @defvar *random-state*
 The global random state used by the above functions when the
 @var{state} parameter is not given.
@@ -1887,29 +1895,13 @@ Guile started up, will always give:
 (0 1 1 2 2 2 1 2 6 7 10 0 5 3 12 5 5 12)
 @end lisp
 
-To use the time of day as the random seed, you can use code like this:
-
address@hidden
-(let ((time (gettimeofday)))
-  (set! *random-state*
-        (seed->random-state (+ (car time)
-                               (cdr time)))))
address@hidden lisp
-
address@hidden
-And then (depending on the time of day, of course):
+To seed the random state in a sensible way for non-security-critical
+applications, do this during initialization of your program:
 
 @lisp
-(map random (cdr (iota 19)))
address@hidden
-(0 0 1 0 2 4 5 4 5 5 9 3 10 1 8 3 14 17)
+(set! *random-state* (random-state-from-platform))
 @end lisp
 
-For security applications, such as password generation, you should use
-more bits of seed.  Otherwise an open source password generator could
-be attacked by guessing the address@hidden but that's a subject for
-another manual.
-
 
 @node Characters
 @subsection Characters
diff --git a/libguile/random.c b/libguile/random.c
index 8bc0d87..a1e7abe 100644
--- a/libguile/random.c
+++ b/libguile/random.c
@@ -653,6 +653,64 @@ SCM_DEFINE (scm_random_exp, "random:exp", 0, 1, 0,
 }
 #undef FUNC_NAME
 
+static SCM
+random_state_of_last_resort (void)
+{
+  SCM time_of_day = scm_gettimeofday ();
+  return scm_seed_to_random_state
+    (scm_sum
+     (scm_get_internal_real_time (),
+      scm_ash (scm_sum (scm_getpid (),
+                        scm_ash (scm_sum (scm_cdr (time_of_day),
+                                          scm_ash (scm_car (time_of_day),
+                                                   SCM_I_MAKINUM (20))),
+                                 SCM_I_MAKINUM (20))),
+               SCM_I_MAKINUM (64))));
+}
+
+/* Returns 1 if successful, else 0 */
+static int
+read_dev_urandom (unsigned char *buf, size_t len)
+{
+  size_t res = 0;
+  FILE *f = fopen ("/dev/urandom", "r");
+  if (f)
+    {
+      res = fread(buf, 1, len, f);
+      fclose (f);
+    }
+  return (res == len);
+}
+
+void
+scm_i_random_bytes_from_platform (unsigned char *buf, size_t len)
+{
+  if (read_dev_urandom (buf, len))
+    return;
+  else  /* FIXME: support other platform sources */
+    {
+      /* When all else fails, use this (rather weak) fallback */
+      SCM random_state = random_state_of_last_resort ();
+      int i;
+      for (i = len-1; i >= 0; --i)
+        buf[i] = scm_to_int (scm_random (SCM_I_MAKINUM (256), random_state));
+    }
+}
+
+SCM_DEFINE (scm_random_state_from_platform, "random-state-from-platform", 0, 
0, 0,
+            (void),
+            "Construct a new random state seeded from a platform-specific 
source of\n\
+entropy, appropriate for use in non-security-critical applications.")
+#define FUNC_NAME s_scm_random_state_from_platform
+{
+  unsigned char buf[32];
+  if (read_dev_urandom (buf, sizeof(buf)))
+    return make_rstate (scm_c_make_rstate ((char *) buf, sizeof(buf)));
+  else
+    return random_state_of_last_resort ();
+}
+#undef FUNC_NAME
+
 void
 scm_init_random ()
 {
diff --git a/libguile/random.h b/libguile/random.h
index 2f1f0f6..109969e 100644
--- a/libguile/random.h
+++ b/libguile/random.h
@@ -86,6 +86,7 @@ SCM_API SCM scm_copy_random_state (SCM state);
 SCM_API SCM scm_seed_to_random_state (SCM seed);
 SCM_API SCM scm_datum_to_random_state (SCM datum);
 SCM_API SCM scm_random_state_to_datum (SCM state);
+SCM_API SCM scm_random_state_from_platform (void);
 SCM_API SCM scm_random_uniform (SCM state);
 SCM_API SCM scm_random_solid_sphere_x (SCM v, SCM state);
 SCM_API SCM scm_random_hollow_sphere_x (SCM v, SCM state);
@@ -94,6 +95,8 @@ SCM_API SCM scm_random_normal_vector_x (SCM v, SCM state);
 SCM_API SCM scm_random_exp (SCM state);
 SCM_INTERNAL void scm_init_random (void);
 
+SCM_INTERNAL void scm_i_random_bytes_from_platform (unsigned char *buf, size_t 
len);
+
 #endif  /* SCM_RANDOM_H */
 
 /*
-- 
1.7.5.4

>From f7feb8c116c40be6894061dcc4474c5939f64e03 Mon Sep 17 00:00:00 2001
From: Mark H Weaver <address@hidden>
Date: Wed, 18 Jan 2012 02:53:05 -0500
Subject: [PATCH 2/2] Universally-unique gensyms

* libguile/symbols.c (scm_gensym): Make the gensym counter a 128-bit
  thread-local, initialized to a random number upon the first call to
  `gensym' within a given thread.  This counter is rendered as a 22 byte
  suffix of mostly base64 digits.

* libguile/threads.h (scm_i_thread): Add a thread-local gensym_counter.

* libguile/threads.c (guilify_self_1): Initialize gensym_counter to NULL.
---
 libguile/symbols.c |   49 ++++++++++++++++++++++++++++++++++++++-----------
 libguile/threads.c |    1 +
 libguile/threads.h |    4 ++++
 3 files changed, 43 insertions(+), 11 deletions(-)

diff --git a/libguile/symbols.c b/libguile/symbols.c
index 59aca00..0656ecb 100644
--- a/libguile/symbols.c
+++ b/libguile/symbols.c
@@ -31,6 +31,7 @@
 #include "libguile/variable.h"
 #include "libguile/alist.h"
 #include "libguile/fluids.h"
+#include "libguile/threads.h"
 #include "libguile/strings.h"
 #include "libguile/vectors.h"
 #include "libguile/hashtab.h"
@@ -340,7 +341,9 @@ SCM_DEFINE (scm_string_ci_to_symbol, "string-ci->symbol", 
1, 0, 0,
 /* The default prefix for `gensym'd symbols.  */
 static SCM default_gensym_prefix;
 
-#define MAX_PREFIX_LENGTH 30
+#define GENSYM_LENGTH      22  /* bytes */
+#define GENSYM_RADIX_BITS  6
+#define GENSYM_RADIX       (1 << (GENSYM_RADIX_BITS))
 
 SCM_DEFINE (scm_gensym, "gensym", 0, 1, 0,
             (SCM prefix),
@@ -351,22 +354,46 @@ SCM_DEFINE (scm_gensym, "gensym", 0, 1, 0,
            "resetting the counter.")
 #define FUNC_NAME s_scm_gensym
 {
-  static int gensym_counter = 0;
-  
+  static const char base64[GENSYM_RADIX] =
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789$@";
+  static const char base4[4] = "_.-~";
+
+  unsigned char *digit_buf = SCM_I_CURRENT_THREAD->gensym_counter;
+  char char_buf[GENSYM_LENGTH];
   SCM suffix, name;
-  int n, n_digits;
-  char buf[SCM_INTBUFLEN];
+  int i;
 
   if (SCM_UNBNDP (prefix))
     prefix = default_gensym_prefix;
 
-  /* mutex in case another thread looks and incs at the exact same moment */
-  scm_i_scm_pthread_mutex_lock (&scm_i_misc_mutex);
-  n = gensym_counter++;
-  scm_i_pthread_mutex_unlock (&scm_i_misc_mutex);
+  if (SCM_UNLIKELY (digit_buf == NULL))
+    {
+      /* This is the first time gensym has been called in this thread.
+         Allocate and randomize our new thread-local gensym counter */
+      digit_buf = (unsigned char *) scm_malloc (GENSYM_LENGTH);
+      scm_i_random_bytes_from_platform (digit_buf, GENSYM_LENGTH);
+      for (i = (GENSYM_LENGTH - 1); i >= 0; --i)
+        digit_buf[i] &= (GENSYM_RADIX - 1);
+      SCM_I_CURRENT_THREAD->gensym_counter = digit_buf;
+    }
+
+  /* increment our thread-local gensym_counter */
+  for (i = (GENSYM_LENGTH - 1); i >= 0; --i)
+    {
+      if (SCM_LIKELY (++(digit_buf[i]) < GENSYM_RADIX))
+        break;
+      else
+        digit_buf[i] = 0;
+    }
+
+  /* encode digit_buf as base64, except for the first character which is
+     base 4 using the sparse glyphs "_.-~" to hopefully provide some
+     visual separation between the prefix and the dense base64 block. */
+  for (i = (GENSYM_LENGTH - 1); i > 0; --i)
+    char_buf[i] = base64[digit_buf[i]];
+  char_buf[0] = base4[digit_buf[0] & 3];
 
-  n_digits = scm_iint2str (n, 10, buf);
-  suffix = scm_from_latin1_stringn (buf, n_digits);
+  suffix = scm_from_latin1_stringn (char_buf, GENSYM_LENGTH);
   name = scm_string_append (scm_list_2 (prefix, suffix));
   return scm_string_to_symbol (name);
 }
diff --git a/libguile/threads.c b/libguile/threads.c
index 5a13e5c..67834ff 100644
--- a/libguile/threads.c
+++ b/libguile/threads.c
@@ -545,6 +545,7 @@ guilify_self_1 (struct GC_stack_base *base)
   t.join_queue = SCM_EOL;
   t.dynamic_state = SCM_BOOL_F;
   t.dynwinds = SCM_EOL;
+  t.gensym_counter = NULL;
   t.active_asyncs = SCM_EOL;
   t.block_asyncs = 1;
   t.pending_asyncs = 1;
diff --git a/libguile/threads.h b/libguile/threads.h
index ec129bc..3660a58 100644
--- a/libguile/threads.h
+++ b/libguile/threads.h
@@ -81,6 +81,10 @@ typedef struct scm_i_thread {
   SCM dynamic_state;
   SCM dynwinds;
 
+  /* Thread-local gensym counter.
+   */
+  unsigned char *gensym_counter;
+
   /* For system asyncs.
    */
   SCM active_asyncs;            /* The thunks to be run at the next
-- 
1.7.5.4


reply via email to

[Prev in Thread] Current Thread [Next in Thread]