By the way, here is a patch corresponding to my current version of the
code.
Frederik
diff -N -ur -x '*.in' -x Makefile -x configure -x dircolors.h -x groups -x '*~'
-x wheel.h -x 'localedir.*' -x 'stamp-*' -x wheel-size.h -x alloca.h -x .deps
-x charset.alias -x getdate.c -x ref-add.sed -x ref-del.sed -x po -x config.h
-x config.log -x config.status -x coreutils.info -x autom4te.cache -x
'output.*' -x confdefs.h -x '.#*' -x CVS -x tests -x '*.1' -x config.hin -x
'*.orig' -x '*.rej' -x version.texi coreutils-cvs/ChangeLog
coreutils-cvs-3-sort-with-param-isaac/ChangeLog
--- coreutils-cvs/ChangeLog 2005-11-24 20:09:03.000000000 +0000
+++ coreutils-cvs-3-sort-with-param-isaac/ChangeLog 2005-12-01
03:06:55.000000000 +0000
@@ -1,3 +1,24 @@
+2005-11-25 Frederik Eaton <[EMAIL PROTECTED]>
+
+ * src/shred.c:
+ * src/rand-isaac.h:
+ * src/rand-isaac.c (isaac_new, isaac_copy): transfer ISAAC code
+ (for cryptographic random number generation) from shred.c to new
+ files rand-isaac.c, rand-isaac.h; make state size
+ runtime-configurable and add functions isaac_new and isaac_copy
+
+ * src/Makefile.am (shred_SOURCES): isaac.c is now a source of shred
+
+ * src/Makefile.am (sort_SOURCES, sort_LDADD): changes to reflect
+ that sort uses ISAAC
+
+ * src/sort.c (short_options, long_options, rand_state, HASH_SIZE,
+ HASH_WORDS, get_hash, keycompare, main): add options --random-sort
+ and --seed to implement a random shuffle
+
+ * doc/coreutils.texi: document new --random-hash option, provide
+ an example
+
2005-11-24 Jim Meyering <[EMAIL PROTECTED]>
* Version 6.0-cvs.
diff -N -ur -x '*.in' -x Makefile -x configure -x dircolors.h -x groups -x '*~'
-x wheel.h -x 'localedir.*' -x 'stamp-*' -x wheel-size.h -x alloca.h -x .deps
-x charset.alias -x getdate.c -x ref-add.sed -x ref-del.sed -x po -x config.h
-x config.log -x config.status -x coreutils.info -x autom4te.cache -x
'output.*' -x confdefs.h -x '.#*' -x CVS -x tests -x '*.1' -x config.hin -x
'*.orig' -x '*.rej' -x version.texi coreutils-cvs/doc/coreutils.texi
coreutils-cvs-3-sort-with-param-isaac/doc/coreutils.texi
--- coreutils-cvs/doc/coreutils.texi 2005-11-24 20:09:05.000000000 +0000
+++ coreutils-cvs-3-sort-with-param-isaac/doc/coreutils.texi 2005-12-03
19:16:12.000000000 +0000
@@ -3396,6 +3396,15 @@
Reverse the result of comparison, so that lines with greater key values
appear earlier in the output instead of later.
[EMAIL PROTECTED] -R
[EMAIL PROTECTED] --random-sort
[EMAIL PROTECTED] -R
[EMAIL PROTECTED] --random-sort
[EMAIL PROTECTED] random sort
+
+Sort by random hash, i.e. perform a shuffle. This is done by hashing
+the input keys and sorting based on the results.
+
@end table
Other options are:
@@ -3529,6 +3538,11 @@
reliably handle arbitrary file names (even those containing blanks
or other special characters).
[EMAIL PROTECTED] --seed @var{tempdir}
[EMAIL PROTECTED] --seed
[EMAIL PROTECTED] specify seed for random hash
+Specify a seed for the @option{--random-sort} option.
+
@end table
Historical (BSD and System V) implementations of @command{sort} have
@@ -3695,6 +3709,16 @@
@c printf 'c\n\nb\n\na\n'|perl -0pe 's/\n\n/\n\0/g'|sort -z|perl -0pe
's/\0/\n/g'
@c @end example
[EMAIL PROTECTED]
+Shuffle a list of directories, but preserve the order of files within
+each directory. For instance, one could use this to generate a music
+playlist in which albums are shuffled but the songs of each album are
+played in order.
+
[EMAIL PROTECTED]
+find . -maxdepth 2 -type f | sort -t / -k2,2R -k3,3
[EMAIL PROTECTED] example
+
@end itemize
diff -N -ur -x '*.in' -x Makefile -x configure -x dircolors.h -x groups -x '*~'
-x wheel.h -x 'localedir.*' -x 'stamp-*' -x wheel-size.h -x alloca.h -x .deps
-x charset.alias -x getdate.c -x ref-add.sed -x ref-del.sed -x po -x config.h
-x config.log -x config.status -x coreutils.info -x autom4te.cache -x
'output.*' -x confdefs.h -x '.#*' -x CVS -x tests -x '*.1' -x config.hin -x
'*.orig' -x '*.rej' -x version.texi coreutils-cvs/src/Makefile.am
coreutils-cvs-3-sort-with-param-isaac/src/Makefile.am
--- coreutils-cvs/src/Makefile.am 2005-10-23 16:23:56.000000000 +0100
+++ coreutils-cvs-3-sort-with-param-isaac/src/Makefile.am 2005-12-03
15:27:52.000000000 +0000
@@ -69,7 +69,7 @@
vdir_LDADD = $(LDADD) $(LIB_CLOCK_GETTIME)
## If necessary, add -lm to resolve use of pow in lib/strtod.c.
-sort_LDADD = $(LDADD) $(POW_LIB)
+sort_LDADD = $(LDADD) $(POW_LIB) $(LIB_GETHRXTIME)
# for get_date and gettime
date_LDADD = $(LDADD) $(LIB_CLOCK_GETTIME)
@@ -166,6 +166,8 @@
ls_SOURCES = ls.c ls-ls.c
chown_SOURCES = chown.c chown-core.c
chgrp_SOURCES = chgrp.c chown-core.c
+shred_SOURCES = shred.c rand-isaac.c
+sort_SOURCES = sort.c rand-isaac.c
mv_SOURCES = mv.c copy.c cp-hash.c remove.c
rm_SOURCES = rm.c remove.c
diff -N -ur -x '*.in' -x Makefile -x configure -x dircolors.h -x groups -x '*~'
-x wheel.h -x 'localedir.*' -x 'stamp-*' -x wheel-size.h -x alloca.h -x .deps
-x charset.alias -x getdate.c -x ref-add.sed -x ref-del.sed -x po -x config.h
-x config.log -x config.status -x coreutils.info -x autom4te.cache -x
'output.*' -x confdefs.h -x '.#*' -x CVS -x tests -x '*.1' -x config.hin -x
'*.orig' -x '*.rej' -x version.texi coreutils-cvs/src/rand-isaac.c
coreutils-cvs-3-sort-with-param-isaac/src/rand-isaac.c
--- coreutils-cvs/src/rand-isaac.c 1970-01-01 01:00:00.000000000 +0100
+++ coreutils-cvs-3-sort-with-param-isaac/src/rand-isaac.c 2005-12-03
18:12:24.000000000 +0000
@@ -0,0 +1,406 @@
+/* rand-isaac.c - ISAAC random number generator
+
+ Copyright (C) 1999-2005 Free Software Foundation, Inc.
+ Copyright (C) 1997, 1998, 1999 Colin Plumb.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+ Written by Colin Plumb. */
+
+/*
+ * --------------------------------------------------------------------
+ * Bob Jenkins' cryptographic random number generator, ISAAC.
+ * Hacked by Colin Plumb.
+ *
+ * We need a source of random numbers for some of the overwrite data
+ * for shred. Cryptographically secure is desirable, but it's not
+ * life-or-death so I can be a little bit experimental in the choice
+ * of RNGs here.
+ *
+ * This generator is based somewhat on RC4, but has analysis
+ * <http://burtleburtle.net/bob/rand/isaacafa.html>
+ * pointing to it actually being better. I like it because it's nice
+ * and fast, and because the author did good work analyzing it.
+ * --------------------------------------------------------------------
+ */
+
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include "system.h"
+#include "gethrxtime.h"
+
+#include "rand-isaac.h"
+
+#define ISAAC_SIZE(words) \
+ (sizeof (struct isaac_state) - \
+ (ISAAC_MAX_WORDS - words) * sizeof (uint32_t))
+
+/*
+ * Create a new state, optionally at the location specified. This
+ * should be called to create/initialize each new isaac_state. 'words'
+ * must be a power of 2, and should be at least 8. Smaller values give
+ * less security.
+ */
+struct isaac_state *
+isaac_new (struct isaac_state *s, int words)
+{
+ size_t w;
+ size_t ss = ISAAC_SIZE (words);
+ if (!s)
+ {
+ s = xmalloc (ss);
+ }
+ memset (s, 0, ss);
+ s->words = words;
+ s->log = 0;
+ for (w=1; w<words; w<<=1, s->log++) {}
+ return s;
+}
+
+/*
+ * Copy a state. Destination block must be at least as large as
+ * source.
+ */
+void
+isaac_copy (struct isaac_state *dst, struct isaac_state *src)
+{
+ memcpy(dst, src, ISAAC_SIZE (src->words));
+}
+
+/*
+ * Refill the entire R array, and update S.
+ */
+void
+isaac_refill (struct isaac_state *s, uint32_t r[/* s>-words */])
+{
+ uint32_t a, b; /* Caches of a and b */
+ uint32_t x, y; /* Temps needed by isaac_step macro */
+ uint32_t *m = s->mm; /* Pointer into state array */
+ uint32_t w = s->words;
+
+ a = s->a;
+ b = s->b + (++s->c);
+
+ do
+ {
+ isaac_step (s, a << 13, a, b, m, w / 2, r);
+ isaac_step (s, a >> 6, a, b, m + 1, w / 2, r + 1);
+ isaac_step (s, a << 2, a, b, m + 2, w / 2, r + 2);
+ isaac_step (s, a >> 16, a, b, m + 3, w / 2, r + 3);
+ r += 4;
+ }
+ while ((m += 4) < s->mm + w / 2);
+ do
+ {
+ isaac_step (s, a << 13, a, b, m, -w / 2, r);
+ isaac_step (s, a >> 6, a, b, m + 1, -w / 2, r + 1);
+ isaac_step (s, a << 2, a, b, m + 2, -w / 2, r + 2);
+ isaac_step (s, a >> 16, a, b, m + 3, -w / 2, r + 3);
+ r += 4;
+ }
+ while ((m += 4) < s->mm + w);
+ s->a = a;
+ s->b = b;
+}
+
+/*
+ * The basic seed-scrambling step for initialization, based on Bob
+ * Jenkins' 256-bit hash.
+ */
+#define mix(a,b,c,d,e,f,g,h) \
+ ( a ^= b << 11, d += a, \
+ b += c, b ^= c >> 2, e += b, \
+ c += d, c ^= d << 8, f += c, \
+ d += e, d ^= e >> 16, g += d, \
+ e += f, e ^= f << 10, h += e, \
+ f += g, f ^= g >> 4, a += f, \
+ g += h, g ^= h << 8, b += g, \
+ h += a, h ^= a >> 9, c += h, \
+ a += b )
+
+/* The basic ISAAC initialization pass. */
+void
+isaac_mix (struct isaac_state *s, uint32_t const seed[/* s->words */])
+{
+ int i;
+ uint32_t a = s->iv[0];
+ uint32_t b = s->iv[1];
+ uint32_t c = s->iv[2];
+ uint32_t d = s->iv[3];
+ uint32_t e = s->iv[4];
+ uint32_t f = s->iv[5];
+ uint32_t g = s->iv[6];
+ uint32_t h = s->iv[7];
+ uint32_t w = s->words;
+
+ for (i = 0; i < w; i += 8)
+ {
+ a += seed[i];
+ b += seed[i + 1];
+ c += seed[i + 2];
+ d += seed[i + 3];
+ e += seed[i + 4];
+ f += seed[i + 5];
+ g += seed[i + 6];
+ h += seed[i + 7];
+
+ mix (a, b, c, d, e, f, g, h);
+
+ s->mm[i] = a;
+ s->mm[i + 1] = b;
+ s->mm[i + 2] = c;
+ s->mm[i + 3] = d;
+ s->mm[i + 4] = e;
+ s->mm[i + 5] = f;
+ s->mm[i + 6] = g;
+ s->mm[i + 7] = h;
+ }
+
+ s->iv[0] = a;
+ s->iv[1] = b;
+ s->iv[2] = c;
+ s->iv[3] = d;
+ s->iv[4] = e;
+ s->iv[5] = f;
+ s->iv[6] = g;
+ s->iv[7] = h;
+}
+
+#if 0 /* Provided for reference only; not used in this code */
+/*
+ * Initialize the ISAAC RNG with the given seed material. Its size
+ * MUST be a multiple of s->words * sizeof (uint32_t), and may be
+ * stored in the s->mm array.
+ *
+ * This is a generalization of the original ISAAC initialization code
+ * to support larger seed sizes. For seed sizes of 0 and s->words *
+ * sizeof (uint32_t), it is identical.
+ */
+void
+isaac_init (struct isaac_state *s, uint32_t const *seed, size_t seedsize)
+{
+ static uint32_t const iv[8] =
+ {
+ 0x1367df5a, 0x95d90059, 0xc3163e4b, 0x0f421ad8,
+ 0xd92a4a78, 0xa51a3c49, 0xc4efea1b, 0x30609119};
+ int i;
+
+# if 0
+ /* The initialization of iv is a precomputed form of: */
+ for (i = 0; i < 7; i++)
+ iv[i] = 0x9e3779b9; /* the golden ratio */
+ for (i = 0; i < 4; ++i) /* scramble it */
+ mix (iv[0], iv[1], iv[2], iv[3], iv[4], iv[5], iv[6], iv[7]);
+# endif
+ s->a = s->b = s->c = 0;
+
+ for (i = 0; i < 8; i++)
+ s->iv[i] = iv[i];
+
+ if (seedsize)
+ {
+ /* First pass (as in reference ISAAC code) */
+ isaac_mix (s, seed);
+ /* Second and subsequent passes (extension to ISAAC) */
+ while (seedsize -= s->words * sizeof (uint32_t))
+ {
+ seed += s->words;
+ for (i = 0; i < s->words; i++)
+ s->mm[i] += seed[i];
+ isaac_mix (s, s->mm);
+ }
+ }
+ else
+ {
+ /* The no seed case (as in reference ISAAC code) */
+ for (i = 0; i < s->words; i++)
+ s->mm[i] = 0;
+ }
+
+ /* Final pass */
+ isaac_mix (s, s->mm);
+}
+#endif
+
+/* Start seeding an ISAAC structire */
+void
+isaac_seed_start (struct isaac_state *s)
+{
+ static uint32_t const iv[8] =
+ {
+ 0x1367df5a, 0x95d90059, 0xc3163e4b, 0x0f421ad8,
+ 0xd92a4a78, 0xa51a3c49, 0xc4efea1b, 0x30609119
+ };
+ int i;
+
+#if 0
+ /* The initialization of iv is a precomputed form of: */
+ for (i = 0; i < 7; i++)
+ iv[i] = 0x9e3779b9; /* the golden ratio */
+ for (i = 0; i < 4; ++i) /* scramble it */
+ mix (iv[0], iv[1], iv[2], iv[3], iv[4], iv[5], iv[6], iv[7]);
+#endif
+ for (i = 0; i < 8; i++)
+ s->iv[i] = iv[i];
+
+ /* used to do memset here to clear the state array, but now it's
+ done in isaac_new */
+
+ /* s->c gets used for a data pointer during the seeding phase */
+ s->a = s->b = s->c = 0;
+}
+
+/* Add a buffer of seed material */
+void
+isaac_seed_data (struct isaac_state *s, void const *buffer, size_t size)
+{
+ unsigned char const *buf = buffer;
+ unsigned char *p;
+ size_t avail;
+ size_t i;
+
+ avail = s->words - (size_t) s->c; /* s->c is used as a write pointer */
+
+ /* Do any full buffers that are necessary */
+ while (size > avail)
+ {
+ p = (unsigned char *) s->mm + s->c;
+ for (i = 0; i < avail; i++)
+ p[i] ^= buf[i];
+ buf += avail;
+ size -= avail;
+ isaac_mix (s, s->mm);
+ s->c = 0;
+ avail = s->words;
+ }
+
+ /* And the final partial block */
+ p = (unsigned char *) s->mm + s->c;
+ for (i = 0; i < size; i++)
+ p[i] ^= buf[i];
+ s->c = size;
+}
+
+
+/* End of seeding phase; get everything ready to produce output. */
+void
+isaac_seed_finish (struct isaac_state *s)
+{
+ isaac_mix (s, s->mm);
+ isaac_mix (s, s->mm);
+ /* Now reinitialize c to start things off right */
+ s->c = 0;
+}
+#define ISAAC_SEED(s,x) isaac_seed_data (s, &(x), sizeof (x))
+
+/*
+ * Get seed material. 16 bytes (128 bits) is plenty, but if we have
+ * /dev/urandom, we get 32 bytes = 256 bits for complete overkill.
+ */
+void
+isaac_seed (struct isaac_state *s)
+{
+ isaac_seed_start (s);
+
+ { pid_t t = getpid (); ISAAC_SEED (s, t); }
+ { pid_t t = getppid (); ISAAC_SEED (s, t); }
+ { uid_t t = getuid (); ISAAC_SEED (s, t); }
+ { gid_t t = getgid (); ISAAC_SEED (s, t); }
+
+ {
+ xtime_t t = gethrxtime ();
+ ISAAC_SEED (s, t);
+ }
+
+ {
+ char buf[32];
+ int fd = open ("/dev/urandom", O_RDONLY | O_NOCTTY);
+ if (fd >= 0)
+ {
+ read (fd, buf, 32);
+ close (fd);
+ isaac_seed_data (s, buf, 32);
+ }
+ else
+ {
+ fd = open ("/dev/random", O_RDONLY | O_NONBLOCK | O_NOCTTY);
+ if (fd >= 0)
+ {
+ /* /dev/random is more precious, so use less */
+ read (fd, buf, 16);
+ close (fd);
+ isaac_seed_data (s, buf, 16);
+ }
+ }
+ }
+
+ isaac_seed_finish (s);
+}
+
+void
+irand_init (struct irand_state *r, struct isaac_state *s)
+{
+ r->numleft = 0;
+ r->s = s;
+ memset (r->r, 0, s->words * sizeof (uint32_t));
+}
+
+/*
+ * We take from the end of the block deliberately, so if we need
+ * only a small number of values, we choose the final ones which are
+ * marginally better mixed than the initial ones.
+ */
+uint32_t
+irand32 (struct irand_state *r)
+{
+ if (!r->numleft)
+ {
+ isaac_refill (r->s, r->r);
+ r->numleft = r->s->words;
+ }
+ return r->r[--r->numleft];
+}
+
+/*
+ * Return a uniformly distributed random number between 0 and n,
+ * inclusive. Thus, the result is modulo n+1.
+ *
+ * Theory of operation: as x steps through every possible 32-bit number,
+ * x % n takes each value at least 2^32 / n times (rounded down), but
+ * the values less than 2^32 % n are taken one additional time. Thus,
+ * x % n is not perfectly uniform. To fix this, the values of x less
+ * than 2^32 % n are disallowed, and if the RNG produces one, we ask
+ * for a new value.
+ */
+uint32_t
+irand_mod (struct irand_state *r, uint32_t n)
+{
+ uint32_t x;
+ uint32_t lim;
+
+ if (!++n)
+ return irand32 (r);
+
+ lim = -n % n; /* == (2**32-n) % n == 2**32 % n */
+ do
+ {
+ x = irand32 (r);
+ }
+ while (x < lim);
+ return x % n;
+}
diff -N -ur -x '*.in' -x Makefile -x configure -x dircolors.h -x groups -x '*~'
-x wheel.h -x 'localedir.*' -x 'stamp-*' -x wheel-size.h -x alloca.h -x .deps
-x charset.alias -x getdate.c -x ref-add.sed -x ref-del.sed -x po -x config.h
-x config.log -x config.status -x coreutils.info -x autom4te.cache -x
'output.*' -x confdefs.h -x '.#*' -x CVS -x tests -x '*.1' -x config.hin -x
'*.orig' -x '*.rej' -x version.texi coreutils-cvs/src/rand-isaac.h
coreutils-cvs-3-sort-with-param-isaac/src/rand-isaac.h
--- coreutils-cvs/src/rand-isaac.h 1970-01-01 01:00:00.000000000 +0100
+++ coreutils-cvs-3-sort-with-param-isaac/src/rand-isaac.h 2005-12-03
18:13:15.000000000 +0000
@@ -0,0 +1,93 @@
+/* rand-isaac.h - header for ISAAC
+
+ Copyright (C) 1999-2005 Free Software Foundation, Inc.
+ Copyright (C) 1997, 1998, 1999 Colin Plumb.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+ Written by Colin Plumb. */
+
+/* Size of the state tables to use. */
+/* (You may change ISAAC_LOG but it should be >= 3, and smaller values
+ * give less security) */
+#ifndef ISAAC_LOG
+#define ISAAC_LOG 8
+#endif
+#define ISAAC_MAX_WORDS (1 << ISAAC_LOG)
+#define ISAAC_MAX_BYTES (ISAAC_MAX_WORDS * sizeof (uint32_t))
+
+/* RNG state variables */
+struct isaac_state
+ {
+ uint32_t iv[8]; /* Seeding initial vector */
+ uint32_t a, b, c; /* Extra index variables */
+ uint32_t words; /* Words in main state array */
+ uint32_t log; /* Log of words */
+ uint32_t mm[ISAAC_MAX_WORDS]; /* Main state array */
+ };
+
+/* This index operation is more efficient on many processors */
+#define ind(words, mm, x) \
+ (* (uint32_t *) ((char *) (mm) \
+ + ((x) & (words - 1) * sizeof (uint32_t))))
+
+/*
+ * The central step. This uses two temporaries, x and y. s is the
+ * state, while m is a pointer to the current word. off is the offset
+ * from m to the word s->words/2 words away in the mm array, i.e.
+ * +/- s->words/2.
+ */
+#define isaac_step(s, mix, a, b, m, off, r) \
+( \
+ a = ((a) ^ (mix)) + (m)[off], \
+ x = *(m), \
+ *(m) = y = ind (s->words, s->mm, x) + (a) + (b), \
+ *(r) = b = ind (s->words, s->mm, (y) >> s->log) + x \
+)
+
+struct isaac_state *
+isaac_new (struct isaac_state *s, int words);
+void
+isaac_copy (struct isaac_state *dst, struct isaac_state *src);
+void
+isaac_refill (struct isaac_state *s, uint32_t r[/* s->words */]);
+void
+isaac_mix (struct isaac_state *s, uint32_t const seed[/* s->words */]);
+void
+isaac_init (struct isaac_state *s, uint32_t const *seed, size_t seedsize);
+void
+isaac_seed_start (struct isaac_state *s);
+void
+isaac_seed_data (struct isaac_state *s, void const *buffer, size_t size);
+void
+isaac_seed_finish (struct isaac_state *s);
+#define ISAAC_SEED(s,x) isaac_seed_data (s, &(x), sizeof (x))
+void
+isaac_seed (struct isaac_state *s);
+
+/* Single-word RNG built on top of ISAAC */
+struct irand_state
+{
+ uint32_t r[ISAAC_MAX_WORDS];
+ unsigned int numleft;
+ struct isaac_state *s;
+};
+
+void
+irand_init (struct irand_state *r, struct isaac_state *s);
+uint32_t
+irand32 (struct irand_state *r);
+uint32_t
+irand_mod (struct irand_state *r, uint32_t n);
diff -N -ur -x '*.in' -x Makefile -x configure -x dircolors.h -x groups -x '*~'
-x wheel.h -x 'localedir.*' -x 'stamp-*' -x wheel-size.h -x alloca.h -x .deps
-x charset.alias -x getdate.c -x ref-add.sed -x ref-del.sed -x po -x config.h
-x config.log -x config.status -x coreutils.info -x autom4te.cache -x
'output.*' -x confdefs.h -x '.#*' -x CVS -x tests -x '*.1' -x config.hin -x
'*.orig' -x '*.rej' -x version.texi coreutils-cvs/src/shred.c
coreutils-cvs-3-sort-with-param-isaac/src/shred.c
--- coreutils-cvs/src/shred.c 2005-09-24 14:40:37.000000000 +0100
+++ coreutils-cvs-3-sort-with-param-isaac/src/shred.c 2005-12-01
00:33:14.000000000 +0000
@@ -109,6 +109,7 @@
#include "inttostr.h"
#include "quotearg.h" /* For quotearg_colon */
#include "quote.h" /* For quotearg_colon */
+#include "rand-isaac.h" /* Random number stuff */
#define DEFAULT_PASSES 25 /* Default */
@@ -227,387 +228,6 @@
}
/*
- * --------------------------------------------------------------------
- * Bob Jenkins' cryptographic random number generator, ISAAC.
- * Hacked by Colin Plumb.
- *
- * We need a source of random numbers for some of the overwrite data.
- * Cryptographically secure is desirable, but it's not life-or-death
- * so I can be a little bit experimental in the choice of RNGs here.
- *
- * This generator is based somewhat on RC4, but has analysis
- * (http://ourworld.compuserve.com/homepages/bob_jenkins/randomnu.htm)
- * pointing to it actually being better. I like it because it's nice
- * and fast, and because the author did good work analyzing it.
- * --------------------------------------------------------------------
- */
-
-/* Size of the state tables to use. (You may change ISAAC_LOG) */
-#define ISAAC_LOG 8
-#define ISAAC_WORDS (1 << ISAAC_LOG)
-#define ISAAC_BYTES (ISAAC_WORDS * sizeof (uint32_t))
-
-/* RNG state variables */
-struct isaac_state
- {
- uint32_t mm[ISAAC_WORDS]; /* Main state array */
- uint32_t iv[8]; /* Seeding initial vector */
- uint32_t a, b, c; /* Extra index variables */
- };
-
-/* This index operation is more efficient on many processors */
-#define ind(mm, x) \
- (* (uint32_t *) ((char *) (mm) \
- + ((x) & (ISAAC_WORDS - 1) * sizeof (uint32_t))))
-
-/*
- * The central step. This uses two temporaries, x and y. mm is the
- * whole state array, while m is a pointer to the current word. off is
- * the offset from m to the word ISAAC_WORDS/2 words away in the mm array,
- * i.e. +/- ISAAC_WORDS/2.
- */
-#define isaac_step(mix, a, b, mm, m, off, r) \
-( \
- a = ((a) ^ (mix)) + (m)[off], \
- x = *(m), \
- *(m) = y = ind (mm, x) + (a) + (b), \
- *(r) = b = ind (mm, (y) >> ISAAC_LOG) + x \
-)
-
-/*
- * Refill the entire R array, and update S.
- */
-static void
-isaac_refill (struct isaac_state *s, uint32_t r[/* ISAAC_WORDS */])
-{
- uint32_t a, b; /* Caches of a and b */
- uint32_t x, y; /* Temps needed by isaac_step macro */
- uint32_t *m = s->mm; /* Pointer into state array */
-
- a = s->a;
- b = s->b + (++s->c);
-
- do
- {
- isaac_step (a << 13, a, b, s->mm, m, ISAAC_WORDS / 2, r);
- isaac_step (a >> 6, a, b, s->mm, m + 1, ISAAC_WORDS / 2, r + 1);
- isaac_step (a << 2, a, b, s->mm, m + 2, ISAAC_WORDS / 2, r + 2);
- isaac_step (a >> 16, a, b, s->mm, m + 3, ISAAC_WORDS / 2, r + 3);
- r += 4;
- }
- while ((m += 4) < s->mm + ISAAC_WORDS / 2);
- do
- {
- isaac_step (a << 13, a, b, s->mm, m, -ISAAC_WORDS / 2, r);
- isaac_step (a >> 6, a, b, s->mm, m + 1, -ISAAC_WORDS / 2, r + 1);
- isaac_step (a << 2, a, b, s->mm, m + 2, -ISAAC_WORDS / 2, r + 2);
- isaac_step (a >> 16, a, b, s->mm, m + 3, -ISAAC_WORDS / 2, r + 3);
- r += 4;
- }
- while ((m += 4) < s->mm + ISAAC_WORDS);
- s->a = a;
- s->b = b;
-}
-
-/*
- * The basic seed-scrambling step for initialization, based on Bob
- * Jenkins' 256-bit hash.
- */
-#define mix(a,b,c,d,e,f,g,h) \
- ( a ^= b << 11, d += a, \
- b += c, b ^= c >> 2, e += b, \
- c += d, c ^= d << 8, f += c, \
- d += e, d ^= e >> 16, g += d, \
- e += f, e ^= f << 10, h += e, \
- f += g, f ^= g >> 4, a += f, \
- g += h, g ^= h << 8, b += g, \
- h += a, h ^= a >> 9, c += h, \
- a += b )
-
-/* The basic ISAAC initialization pass. */
-static void
-isaac_mix (struct isaac_state *s, uint32_t const seed[/* ISAAC_WORDS */])
-{
- int i;
- uint32_t a = s->iv[0];
- uint32_t b = s->iv[1];
- uint32_t c = s->iv[2];
- uint32_t d = s->iv[3];
- uint32_t e = s->iv[4];
- uint32_t f = s->iv[5];
- uint32_t g = s->iv[6];
- uint32_t h = s->iv[7];
-
- for (i = 0; i < ISAAC_WORDS; i += 8)
- {
- a += seed[i];
- b += seed[i + 1];
- c += seed[i + 2];
- d += seed[i + 3];
- e += seed[i + 4];
- f += seed[i + 5];
- g += seed[i + 6];
- h += seed[i + 7];
-
- mix (a, b, c, d, e, f, g, h);
-
- s->mm[i] = a;
- s->mm[i + 1] = b;
- s->mm[i + 2] = c;
- s->mm[i + 3] = d;
- s->mm[i + 4] = e;
- s->mm[i + 5] = f;
- s->mm[i + 6] = g;
- s->mm[i + 7] = h;
- }
-
- s->iv[0] = a;
- s->iv[1] = b;
- s->iv[2] = c;
- s->iv[3] = d;
- s->iv[4] = e;
- s->iv[5] = f;
- s->iv[6] = g;
- s->iv[7] = h;
-}
-
-#if 0 /* Provided for reference only; not used in this code */
-/*
- * Initialize the ISAAC RNG with the given seed material.
- * Its size MUST be a multiple of ISAAC_BYTES, and may be
- * stored in the s->mm array.
- *
- * This is a generalization of the original ISAAC initialization code
- * to support larger seed sizes. For seed sizes of 0 and ISAAC_BYTES,
- * it is identical.
- */
-static void
-isaac_init (struct isaac_state *s, uint32_t const *seed, size_t seedsize)
-{
- static uint32_t const iv[8] =
- {
- 0x1367df5a, 0x95d90059, 0xc3163e4b, 0x0f421ad8,
- 0xd92a4a78, 0xa51a3c49, 0xc4efea1b, 0x30609119};
- int i;
-
-# if 0
- /* The initialization of iv is a precomputed form of: */
- for (i = 0; i < 7; i++)
- iv[i] = 0x9e3779b9; /* the golden ratio */
- for (i = 0; i < 4; ++i) /* scramble it */
- mix (iv[0], iv[1], iv[2], iv[3], iv[4], iv[5], iv[6], iv[7]);
-# endif
- s->a = s->b = s->c = 0;
-
- for (i = 0; i < 8; i++)
- s->iv[i] = iv[i];
-
- if (seedsize)
- {
- /* First pass (as in reference ISAAC code) */
- isaac_mix (s, seed);
- /* Second and subsequent passes (extension to ISAAC) */
- while (seedsize -= ISAAC_BYTES)
- {
- seed += ISAAC_WORDS;
- for (i = 0; i < ISAAC_WORDS; i++)
- s->mm[i] += seed[i];
- isaac_mix (s, s->mm);
- }
- }
- else
- {
- /* The no seed case (as in reference ISAAC code) */
- for (i = 0; i < ISAAC_WORDS; i++)
- s->mm[i] = 0;
- }
-
- /* Final pass */
- isaac_mix (s, s->mm);
-}
-#endif
-
-/* Start seeding an ISAAC structire */
-static void
-isaac_seed_start (struct isaac_state *s)
-{
- static uint32_t const iv[8] =
- {
- 0x1367df5a, 0x95d90059, 0xc3163e4b, 0x0f421ad8,
- 0xd92a4a78, 0xa51a3c49, 0xc4efea1b, 0x30609119
- };
- int i;
-
-#if 0
- /* The initialization of iv is a precomputed form of: */
- for (i = 0; i < 7; i++)
- iv[i] = 0x9e3779b9; /* the golden ratio */
- for (i = 0; i < 4; ++i) /* scramble it */
- mix (iv[0], iv[1], iv[2], iv[3], iv[4], iv[5], iv[6], iv[7]);
-#endif
- for (i = 0; i < 8; i++)
- s->iv[i] = iv[i];
-
- /* Enable the following memset if you're worried about used-uninitialized
- warnings involving code in isaac_refill from tools like valgrind.
- Since this buffer is used to accumulate pseudo-random data, there's
- no harm, and maybe even some benefit, in using it uninitialized. */
-#if AVOID_USED_UNINITIALIZED_WARNINGS
- memset (s->mm, 0, sizeof s->mm);
-#endif
-
- /* s->c gets used for a data pointer during the seeding phase */
- s->a = s->b = s->c = 0;
-}
-
-/* Add a buffer of seed material */
-static void
-isaac_seed_data (struct isaac_state *s, void const *buffer, size_t size)
-{
- unsigned char const *buf = buffer;
- unsigned char *p;
- size_t avail;
- size_t i;
-
- avail = sizeof s->mm - (size_t) s->c; /* s->c is used as a write
pointer */
-
- /* Do any full buffers that are necessary */
- while (size > avail)
- {
- p = (unsigned char *) s->mm + s->c;
- for (i = 0; i < avail; i++)
- p[i] ^= buf[i];
- buf += avail;
- size -= avail;
- isaac_mix (s, s->mm);
- s->c = 0;
- avail = sizeof s->mm;
- }
-
- /* And the final partial block */
- p = (unsigned char *) s->mm + s->c;
- for (i = 0; i < size; i++)
- p[i] ^= buf[i];
- s->c = size;
-}
-
-
-/* End of seeding phase; get everything ready to produce output. */
-static void
-isaac_seed_finish (struct isaac_state *s)
-{
- isaac_mix (s, s->mm);
- isaac_mix (s, s->mm);
- /* Now reinitialize c to start things off right */
- s->c = 0;
-}
-#define ISAAC_SEED(s,x) isaac_seed_data (s, &(x), sizeof (x))
-
-/*
- * Get seed material. 16 bytes (128 bits) is plenty, but if we have
- * /dev/urandom, we get 32 bytes = 256 bits for complete overkill.
- */
-static void
-isaac_seed (struct isaac_state *s)
-{
- isaac_seed_start (s);
-
- { pid_t t = getpid (); ISAAC_SEED (s, t); }
- { pid_t t = getppid (); ISAAC_SEED (s, t); }
- { uid_t t = getuid (); ISAAC_SEED (s, t); }
- { gid_t t = getgid (); ISAAC_SEED (s, t); }
-
- {
- xtime_t t = gethrxtime ();
- ISAAC_SEED (s, t);
- }
-
- {
- char buf[32];
- int fd = open ("/dev/urandom", O_RDONLY | O_NOCTTY);
- if (fd >= 0)
- {
- read (fd, buf, 32);
- close (fd);
- isaac_seed_data (s, buf, 32);
- }
- else
- {
- fd = open ("/dev/random", O_RDONLY | O_NONBLOCK | O_NOCTTY);
- if (fd >= 0)
- {
- /* /dev/random is more precious, so use less */
- read (fd, buf, 16);
- close (fd);
- isaac_seed_data (s, buf, 16);
- }
- }
- }
-
- isaac_seed_finish (s);
-}
-
-/* Single-word RNG built on top of ISAAC */
-struct irand_state
-{
- uint32_t r[ISAAC_WORDS];
- unsigned int numleft;
- struct isaac_state *s;
-};
-
-static void
-irand_init (struct irand_state *r, struct isaac_state *s)
-{
- r->numleft = 0;
- r->s = s;
-}
-
-/*
- * We take from the end of the block deliberately, so if we need
- * only a small number of values, we choose the final ones which are
- * marginally better mixed than the initial ones.
- */
-static uint32_t
-irand32 (struct irand_state *r)
-{
- if (!r->numleft)
- {
- isaac_refill (r->s, r->r);
- r->numleft = ISAAC_WORDS;
- }
- return r->r[--r->numleft];
-}
-
-/*
- * Return a uniformly distributed random number between 0 and n,
- * inclusive. Thus, the result is modulo n+1.
- *
- * Theory of operation: as x steps through every possible 32-bit number,
- * x % n takes each value at least 2^32 / n times (rounded down), but
- * the values less than 2^32 % n are taken one additional time. Thus,
- * x % n is not perfectly uniform. To fix this, the values of x less
- * than 2^32 % n are disallowed, and if the RNG produces one, we ask
- * for a new value.
- */
-static uint32_t
-irand_mod (struct irand_state *r, uint32_t n)
-{
- uint32_t x;
- uint32_t lim;
-
- if (!++n)
- return irand32 (r);
-
- lim = -n % n; /* == (2**32-n) % n == 2**32 % n */
- do
- {
- x = irand32 (r);
- }
- while (x < lim);
- return x % n;
-}
-
-/*
* Fill a buffer with a fixed pattern.
*
* The buffer must be at least 3 bytes long, even if
@@ -636,18 +256,19 @@
/*
* Fill a buffer, R (of size SIZE_MAX), with random data.
- * SIZE is rounded UP to a multiple of ISAAC_BYTES.
+ * SIZE is rounded UP to a multiple of s->words * sizeof (uint32_t).
*/
static void
fillrand (struct isaac_state *s, uint32_t *r, size_t size_max, size_t size)
{
- size = (size + ISAAC_BYTES - 1) / ISAAC_BYTES;
+ size_t bytes = s->words * sizeof (uint32_t);
+ size = (size + bytes - 1) / bytes;
assert (size <= size_max);
while (size--)
{
isaac_refill (s, r);
- r += ISAAC_WORDS;
+ r += s->words;
}
}
@@ -749,7 +370,7 @@
size_t soff; /* Offset into buffer for next write */
ssize_t ssize; /* Return value from write */
uint32_t *r; /* Fill pattern. */
- size_t rsize = 3 * MAX (ISAAC_WORDS, 1024) * sizeof *r; /* Fill size. */
+ size_t rsize = 3 * MAX (s->words, 1024) * sizeof *r; /* Fill size. */
size_t ralign = lcm (getpagesize (), sizeof *r); /* Fill alignment. */
char pass_string[PASS_NAME_SIZE]; /* Name of current pass */
bool write_error = false;
@@ -1483,6 +1104,7 @@
atexit (close_stdout);
+ isaac_new (&s, ISAAC_MAX_WORDS);
isaac_seed (&s);
memset (&flags, 0, sizeof flags);
diff -N -ur -x '*.in' -x Makefile -x configure -x dircolors.h -x groups -x '*~'
-x wheel.h -x 'localedir.*' -x 'stamp-*' -x wheel-size.h -x alloca.h -x .deps
-x charset.alias -x getdate.c -x ref-add.sed -x ref-del.sed -x po -x config.h
-x config.log -x config.status -x coreutils.info -x autom4te.cache -x
'output.*' -x confdefs.h -x '.#*' -x CVS -x tests -x '*.1' -x config.hin -x
'*.orig' -x '*.rej' -x version.texi coreutils-cvs/src/sort.c
coreutils-cvs-3-sort-with-param-isaac/src/sort.c
--- coreutils-cvs/src/sort.c 2005-10-07 19:48:28.000000000 +0100
+++ coreutils-cvs-3-sort-with-param-isaac/src/sort.c 2005-12-03
18:16:48.000000000 +0000
@@ -30,8 +30,10 @@
#include "error.h"
#include "hard-locale.h"
#include "inttostr.h"
+#include "md5.h"
#include "physmem.h"
#include "posixver.h"
+#include "rand-isaac.h"
#include "quote.h"
#include "stdlib--.h"
#include "stdio--.h"
@@ -77,6 +79,8 @@
# define DEFAULT_TMPDIR "/tmp"
#endif
+#define SORT_ISAAC_WORDS 8
+
/* Exit statuses. */
enum
{
@@ -147,6 +151,7 @@
bool numeric; /* Flag for numeric comparison. Handle
strings of digits with optional decimal
point, but no exponential notation. */
+ bool random_hash; /* Shuffle by sorting on random hash of key */
bool general_numeric; /* Flag for general, numeric comparison.
Handle numbers in exponential notation. */
bool month; /* Flag for comparison by month name. */
@@ -303,6 +308,7 @@
-i, --ignore-nonprinting consider only printable characters\n\
-M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\
-n, --numeric-sort compare according to string numerical value\n\
+ -R, --random-sort sort by random hash of keys\n\
-r, --reverse reverse the result of comparisons\n\
\n\
"), stdout);
@@ -313,6 +319,7 @@
-k, --key=POS1[,POS2] start a key at POS1, end it at POS2 (origin 1)\n\
-m, --merge merge already sorted files; do not sort\n\
-o, --output=FILE write result to FILE instead of standard output\n\
+ --seed=STRING use specified seed for random number generator\n\
-s, --stable stabilize sort by disabling last-resort
comparison\n\
-S, --buffer-size=SIZE use SIZE for main memory buffer\n\
"), stdout);
@@ -353,7 +360,11 @@
exit (status);
}
-static char const short_options[] = "-bcdfgik:mMno:rsS:t:T:uy:z";
+static char const short_options[] = "-bcdfgik:mMno:rRsS:t:T:uy:z";
+enum
+{
+ SEED_OPTION = CHAR_MAX + 1
+};
static struct option const long_options[] =
{
@@ -367,6 +378,7 @@
{"merge", no_argument, NULL, 'm'},
{"month-sort", no_argument, NULL, 'M'},
{"numeric-sort", no_argument, NULL, 'n'},
+ {"random-sort", no_argument, NULL, 'R'},
{"output", required_argument, NULL, 'o'},
{"reverse", no_argument, NULL, 'r'},
{"stable", no_argument, NULL, 's'},
@@ -375,6 +387,7 @@
{"temporary-directory", required_argument, NULL, 'T'},
{"unique", no_argument, NULL, 'u'},
{"zero-terminated", no_argument, NULL, 'z'},
+ {"seed", required_argument, NULL, SEED_OPTION},
{GETOPT_HELP_OPTION_DECL},
{GETOPT_VERSION_OPTION_DECL},
{NULL, 0, NULL, 0},
@@ -1152,6 +1165,29 @@
return 0;
}
+static struct isaac_state *rand_state;
+
+#define HASH_WORDS 1
+#define HASH_SIZE (HASH_WORDS * sizeof (uint32_t))
+
+static void
+get_hash (char const* text, size_t len, uint32_t resbuf[/*HASH_WORDS*/])
+{
+ struct isaac_state s;
+ int i;
+ isaac_copy (&s, rand_state);
+ isaac_seed_data (&s, text, len);
+ /* we can call isaac_seed_finish multiple times, but should only
+ call isaac_seed_start once */
+ isaac_seed_finish (&s);
+
+ /* getting an irand_state and drawing random numbers would be more
+ kosher here, but also slower. so we just peek at the ISAAC state
+ array instead */
+ for (i = 0; i < HASH_WORDS; i++)
+ resbuf[i] = s.mm[s.words - 1 - i];
+}
+
/* Compare two lines A and B trying every key in sequence until there
are no more keys or a difference is found. */
@@ -1179,6 +1215,18 @@
size_t lenb = limb <= textb ? 0 : limb - textb;
/* Actually compare the fields. */
+
+ if (key->random_hash)
+ {
+ uint32_t diga[HASH_WORDS];
+ uint32_t digb[HASH_WORDS];
+ get_hash (texta, lena, diga);
+ get_hash (textb, lenb, digb);
+ diff = memcmp (diga, digb, sizeof (diga));
+ if (diff)
+ goto not_equal;
+ }
+
if (key->numeric | key->general_numeric)
{
char savea = *lima, saveb = *limb;
@@ -1989,6 +2037,7 @@
{
error (SORT_FAILURE, 0, _("%s: invalid field specification %s"),
_(msgid), quote (spec));
+ /* added to satisfy compiler for NORETURN: */
abort ();
}
@@ -2081,6 +2130,9 @@
case 'n':
key->numeric = true;
break;
+ case 'R':
+ key->random_hash = true;
+ break;
case 'r':
key->reverse = true;
break;
@@ -2109,6 +2161,8 @@
int c = 0;
bool checkonly = false;
bool mergeonly = false;
+ char const *randseed = NULL;
+ bool need_rand = false;
size_t nfiles = 0;
bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
bool obsolete_usage = (posix2_version () < 200112);
@@ -2187,7 +2241,7 @@
gkey.sword = gkey.eword = SIZE_MAX;
gkey.ignore = NULL;
gkey.translate = NULL;
- gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = false;
+ gkey.numeric = gkey.general_numeric = gkey.random_hash = gkey.month =
gkey.reverse = false;
gkey.skipsblanks = gkey.skipeblanks = false;
files = xnmalloc (argc, sizeof *files);
@@ -2263,6 +2317,7 @@
case 'M':
case 'n':
case 'r':
+ case 'R':
{
char str[2];
str[0] = c;
@@ -2339,6 +2394,10 @@
outfile = optarg;
break;
+ case SEED_OPTION:
+ randseed = optarg;
+ break;
+
case 's':
stable = true;
break;
@@ -2413,26 +2472,46 @@
}
}
+ need_rand |= gkey.random_hash;
/* Inheritance of global options to individual keys. */
for (key = keylist; key; key = key->next)
- if (! (key->ignore || key->translate
- || (key->skipsblanks | key->reverse
- | key->skipeblanks | key->month | key->numeric
- | key->general_numeric)))
- {
- key->ignore = gkey.ignore;
- key->translate = gkey.translate;
- key->skipsblanks = gkey.skipsblanks;
- key->skipeblanks = gkey.skipeblanks;
- key->month = gkey.month;
- key->numeric = gkey.numeric;
- key->general_numeric = gkey.general_numeric;
- key->reverse = gkey.reverse;
- }
+ {
+ need_rand |= key->random_hash;
+ if (! (key->ignore || key->translate
+ || (key->skipsblanks | key->reverse
+ | key->skipeblanks | key->month | key->numeric
+ | key->general_numeric
+ | key->random_hash)))
+ {
+ key->ignore = gkey.ignore;
+ key->translate = gkey.translate;
+ key->skipsblanks = gkey.skipsblanks;
+ key->skipeblanks = gkey.skipeblanks;
+ key->month = gkey.month;
+ key->numeric = gkey.numeric;
+ key->general_numeric = gkey.general_numeric;
+ key->random_hash = gkey.random_hash;
+ key->reverse = gkey.reverse;
+ }
+ }
+
+ if (need_rand)
+ {
+ rand_state = isaac_new (NULL, SORT_ISAAC_WORDS);
+ if (randseed)
+ {
+ isaac_seed_start (rand_state);
+ isaac_seed_data (rand_state, randseed, strlen (randseed));
+ isaac_seed_finish (rand_state);
+ }
+ else
+ isaac_seed (rand_state);
+ }
if (!keylist && (gkey.ignore || gkey.translate
|| (gkey.skipsblanks | gkey.skipeblanks | gkey.month
- | gkey.numeric | gkey.general_numeric)))
+ | gkey.numeric | gkey.general_numeric
+ | gkey.random_hash)))
insertkey (&gkey);
reverse = gkey.reverse;
_______________________________________________
Bug-coreutils mailing list
[email protected]
http://lists.gnu.org/mailman/listinfo/bug-coreutils