gawk-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gawk-diffs] [SCM] gawk branch, master, updated. gawk-4.1.0-2405-gbaadcc


From: Arnold Robbins
Subject: [gawk-diffs] [SCM] gawk branch, master, updated. gawk-4.1.0-2405-gbaadccc
Date: Thu, 19 Jan 2017 18:47:35 +0000 (UTC)

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gawk".

The branch, master has been updated
       via  baadccc7297fa9a0cd1bcc276385872fa0ca8b6e (commit)
      from  35c461c3b2c9cc56e22a5360c36b5e6dc9fccd28 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=baadccc7297fa9a0cd1bcc276385872fa0ca8b6e

commit baadccc7297fa9a0cd1bcc276385872fa0ca8b6e
Author: Arnold D. Robbins <address@hidden>
Date:   Sun Feb 21 20:03:16 2016 +0200

    Improve random number generator's period using a shuffle buffer.

diff --git a/ChangeLog b/ChangeLog
index 3650a30..62b9e85 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1407,6 +1407,15 @@
 
        * configure.ac (AM_GNU_GETTEXT_VERSION): Bump to 0.19.7.
 
+2016-02-21         Nelson H.F. Beebe     <address@hidden>
+
+       * random.c [SHUFFLE_BITS, SHUFFLE_MAX, SHUFFLE_MASK]: New macros.
+       (shuffle_init, shuffle_buffer): New static variables.
+       (random_old): Renamed from random.
+       (random): New function wrapping random_old and providing a
+       shuffle buffer to increase the period. See the literature citations
+       and other notes in the code.
+
 2016-02-21         Arnold D. Robbins     <address@hidden>
 
        * regexec.c (prune_impossible_nodes): Remove attribute that
diff --git a/support/random.c b/support/random.c
index cba1b6b..1aab2b5 100644
--- a/support/random.c
+++ b/support/random.c
@@ -60,6 +60,8 @@ static const char sccsid[] = "@(#)random.c    8.2 (Berkeley) 
5/19/95";
 #include <unistd.h>
 #endif
 
+#include <assert.h>
+
 #include "random.h"            /* gawk addition */
 
 #ifdef HAVE_SYS_TIME_H         /* gawk addition */
@@ -137,8 +139,20 @@ __FBSDID("$FreeBSD: 
/repoman/r/ncvs/src/lib/libc/stdlib/random.c,v 1.24 2004/01/
  * 32 bytes, a simple linear congruential R.N.G. is used."  For a buffer of
  * 128 bytes, this new version runs about 19 percent faster and for a 16
  * byte buffer it is about 5 percent faster.
+ *
+ * Modified 06 February 2016 by Nelson H. F. Beebe to interface to a
+ * shuffle buffer, producing a huge period, and removing long-range
+ * correlations of the basic low-level generator.  See comments and
+ * literature references in random() at the end of this file.
  */
 
+#define SHUFFLE_BITS   9                       /* see comments in random() 
below for this choice */
+#define SHUFFLE_MAX    (1 << SHUFFLE_BITS)     /* MUST be power of two */
+#define SHUFFLE_MASK   (SHUFFLE_MAX - 1)       /* (k & SHUFFLE_MASK) is in [0, 
SHUFFLE_MAX - 1] */
+
+static int  shuffle_init = 1;
+static long shuffle_buffer[SHUFFLE_MAX];
+
 /*
  * For each of the currently supported random number generators, we have a
  * break value on the amount of state information (you need at least this
@@ -307,6 +321,8 @@ srandom(x)
 {
        int i, lim;
 
+       shuffle_init = 1;
+
        state[0] = (uint32_t)x;
        if (rand_type == TYPE_0)
                lim = NSHUFF;
@@ -512,8 +528,8 @@ setstate(arg_state)
  *
  * Returns a 31-bit random number.
  */
-long
-random()
+static long
+random_old()
 {
        uint32_t i;
        uint32_t *f, *r;
@@ -540,3 +556,87 @@ random()
        }
        return((long)i);
 }
+
+long
+random()
+{
+       /*
+        * This function is a wrapper to the original random(), now renamed
+        * random_old(), to interpose a shuffle buffer to dramatically extend
+        * the generator period at nearly zero additional execution cost,
+        * and an additional storage cost set by the size of the
+        * shuffle buffer (default: 512 longs, or 2K or 4K bytes).
+        * The algorithm was first described in
+        *
+        *     Carter Bays and S. D. Durham
+        *     Improving a Poor Random Number Generator
+        *     ACM Transactions on Mathematical Software (TOMS) 2(1) 59--64 
(March 1976)
+        *     http://dx.doi.org/10.1145/355666.355670
+        * 
+        * and later revisited in
+        * 
+        *     Carter Bays
+        *     C364. Improving a random number generator: a comparison between 
two shuffling methods
+        *     Journal of Statistical Computation and Simulation 36(1) 57--59 
(May 1990)
+        *     http://dx.doi.org/10.1080/00949659008811264
+        *
+        * The second paper is critically important because it
+        * emphasizes how an apparently trivial change to the final
+        * element selection can destroy the period-lengthening
+        * feature of the original shuffle algorithm.
+        * 
+        * Here is a table of the increase in period size for a
+        * shuffle generator using 32-bit and 64-bit unsigned integer
+        * linear congruential generators, which are known to have
+        * significant correlations, and are thus inadvisable for
+        * serious work with random numbers:
+        *
+        * hocd128> for (n = 32; n < 4096; n *= 2) \
+        *              printf("%7d\t%12.3.4e\t%12.3.4e\n",
+        *              n, \
+        *              sqrt(PI * gamma(n + 1)/(2**32 - 1)) / (2**32 - 1), \\
+        *              sqrt(PI * gamma(n + 1)/(2**64 - 1)) / (2**64 - 1))
+        * 
+        *      32    3.230e+03       1.148e-11
+        *      64    2.243e+30       7.969e+15
+        *     128    3.910e+93       1.389e+79
+        *     256   1.844e+239      6.552e+224
+        *     512   1.174e+569      4.172e+554
+        *    1024  4.635e+1305     1.647e+1291
+        *    2048  8.144e+2932     2.893e+2918
+        *
+        * A generator giving one result per nanosecond would produce
+        * about 3.16e16 random numbers per year, so even for
+        * massively parallel operations with, say, one million CPU
+        * cores, it could not produce more than 10**23 values per
+        * year.  The main benefit of an enormous period is that it
+        * makes long-range correlations vanishingly unlikely, even
+        * when starting seeds are similar (e.g., seeds of 0, 1, 2,
+        * ...), and therefore makes possible families of generators
+        * (needed in parallel computations) where the probability of
+        * sequence overlap between family members is essentially
+        * zero.
+        */
+
+       int k;
+       long r;
+       static long s = 0xcafefeedL;
+
+       if (shuffle_init) {     /* first time, or seed changed by srand() */
+               for (k = 0; k < SHUFFLE_MAX; k++)
+                       shuffle_buffer[k] = random_old();
+
+               s = random_old();
+               shuffle_init = 0;
+       }
+
+       r = random_old();
+       k = s & SHUFFLE_MASK;           /* random index into shuffle_buffer[] */
+
+       assert(0L <= k && k < SHUFFLE_MAX);
+
+       s = shuffle_buffer[k];
+       shuffle_buffer[k] = r;
+
+       return (s);
+}
diff --git a/test/ChangeLog b/test/ChangeLog
index 5122534..1b2d402 100644
--- a/test/ChangeLog
+++ b/test/ChangeLog
@@ -306,6 +306,10 @@
        * Makefile.am (profile10): New test.
        * profile10.awk, profile10.ok: New files.
 
+2016-02-21         Nelson H.F. Beebe     <address@hidden>
+
+       * rand.ok: Updated after code change.
+
 2016-02-18         Arnold D. Robbins     <address@hidden>
 
        * profile2.ok, profile5.ok: Adjust after code changes.
diff --git a/test/rand.ok b/test/rand.ok
index 1df4ba3..4ec90dd 100644
--- a/test/rand.ok
+++ b/test/rand.ok
@@ -1 +1 @@
- 67   6  77  68  96  26   8  93  53  74  53  95  78  74  96  77  33  58  91 
+ 90  23   5  64  38  61  72   5  82  80  33  71  33  34  55  60  90  88   7 

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog        |    9 +++++
 support/random.c |  104 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
 test/ChangeLog   |    4 +++
 test/rand.ok     |    2 +-
 4 files changed, 116 insertions(+), 3 deletions(-)


hooks/post-receive
-- 
gawk



reply via email to

[Prev in Thread] Current Thread [Next in Thread]