Module Name:    src
Committed By:   riastradh
Date:           Sun Nov 16 20:33:04 UTC 2014

Modified Files:
        src/lib/libc/gen: arc4random.3 arc4random.c

Log Message:
Rewrite arc4random(3) with ChaCha20-based PRNG and per-thread state.

Explain the security model in the man page.

No more RC4!

XXX pullup to netbsd-6, netbsd-5


To generate a diff of this commit:
cvs rdiff -u -r1.9 -r1.10 src/lib/libc/gen/arc4random.3
cvs rdiff -u -r1.25 -r1.26 src/lib/libc/gen/arc4random.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/lib/libc/gen/arc4random.3
diff -u src/lib/libc/gen/arc4random.3:1.9 src/lib/libc/gen/arc4random.3:1.10
--- src/lib/libc/gen/arc4random.3:1.9	Sat Feb  5 00:24:08 2011
+++ src/lib/libc/gen/arc4random.3	Sun Nov 16 20:33:04 2014
@@ -1,9 +1,11 @@
-.\"	$NetBSD: arc4random.3,v 1.9 2011/02/05 00:24:08 wiz Exp $
-.\" $OpenBSD: arc4random.3,v 1.17 2000/12/21 14:07:41 aaron Exp $
+.\"	$NetBSD: arc4random.3,v 1.10 2014/11/16 20:33:04 riastradh Exp $
 .\"
-.\" Copyright 1997 Niels Provos <[email protected]>
+.\" Copyright (c) 2014 The NetBSD Foundation, Inc.
 .\" All rights reserved.
 .\"
+.\" This code is derived from software contributed to The NetBSD Foundation
+.\" by Taylor R. Campbell.
+.\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
@@ -12,122 +14,243 @@
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
-.\" 3. All advertising materials mentioning features or use of this software
-.\"    must display the following acknowledgement:
-.\"      This product includes software developed by Niels Provos.
-.\" 4. The name of the author may not be used to endorse or promote products
-.\"    derived from this software without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
-.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
-.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
-.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
-.\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-.\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-.\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
-.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 .\"
-.\" Manual page, using -mandoc macros
+.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.\" PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGE.
 .\"
-.Dd February 4, 2011
+.Dd November 16, 2014
 .Dt ARC4RANDOM 3
 .Os
 .Sh NAME
 .Nm arc4random ,
-.Nm arc4random_buf ,
 .Nm arc4random_uniform ,
+.Nm arc4random_buf ,
 .Nm arc4random_stir ,
 .Nm arc4random_addrandom
-.Nd arc4 random number generator
+.Nd random number generator
 .Sh LIBRARY
 .Lb libc
 .Sh SYNOPSIS
 .In stdlib.h
 .Ft uint32_t
 .Fn arc4random "void"
-.Ft void
-.Fn arc4random_buf "void *buffer" "size_t length"
 .Ft uint32_t
-.Fn arc4random_uniform "uint32_t upper_bound"
+.Fn arc4random_uniform "uint32_t bound"
+.Ft void
+.Fn arc4random_buf "void *buf" "size_t len"
 .Ft void
 .Fn arc4random_stir "void"
 .Ft void
-.Fn arc4random_addrandom "u_char *dat" "int datlen"
+.Fn arc4random_addrandom "unsigned char *buf" "int len"
 .Sh DESCRIPTION
 The
-.Fn arc4random
-function provides a high quality 32-bit pseudo-random
-number very quickly.
-.Fn arc4random
-seeds itself on a regular basis from the kernel strong random number
-subsystem described in
-.Xr rnd 4 .
-On each call, an ARC4 generator is used to generate a new result.
-The
-.Fn arc4random
-function uses the ARC4 cipher key stream generator,
-which uses 8*8 8 bit S-Boxes.
-The S-Boxes can be in about (2**1700) states.
+.Nm
+family of functions provides a cryptographic pseudorandom number
+generator automatically seeded from the system entropy pool and safe to
+use from multiple threads.
+.Nm
+is faster and more convenient than reading from
+.Pa /dev/urandom
+directly.
 .Pp
 .Fn arc4random
-fits into a middle ground not covered by other subsystems such as
-the strong, slow, and resource expensive random
-devices described in
-.Xr rnd 4
-versus the fast but poor quality interfaces described in
-.Xr rand 3 ,
-.Xr random 3 ,
-and
-.Xr drand48 3 .
+returns an integer in [0, 2^32) chosen independently with uniform
+distribution.
+.Pp
+.Fn arc4random_uniform
+returns an integer in [0,
+.Fa bound )
+chosen independently with uniform distribution.
 .Pp
-The
 .Fn arc4random_buf
-function fills the
-.Fa buffer
-with
-.Fa length
-bytes of ARC4-derived random data.
+stores
+.Fa len
+bytes into the memory pointed to by
+.Fa buf ,
+each byte chosen independently from [0, 256) with uniform
+distribution.
+.Pp
+.Fn arc4random_stir
+draws entropy from the operating system and incorporates it into the
+library's PRNG state to influence future outputs.
 .Pp
+.Fn arc4random_addrandom
+incorporates
+.Fa len
+bytes, which must be nonnegative, from the buffer
+.Fa buf ,
+into the library's PRNG state to influence future outputs.
+.Pp
+It is not necessary for an application to call
+.Fn arc4random_stir
+or
+.Fn arc4random_addrandom
+before calling other
+.Nm
+functions.
+The first call to any
+.Nm
+function will initialize the PRNG state unpredictably from the system
+entropy pool.
+.Sh SECURITY MODEL
+The
+.Nm
+functions provides the following security properties against three
+different classes of attackers, assuming that the state of the
+operating system's entropy pool is unknown to the attacker:
+.Bl -bullet -offset abcd -compact
+.It
+An attacker who has seen some outputs of any of the
+.Nm
+functions cannot predict past or future unseen outputs.
+.It
+An attacker who has seen the library's PRNG state in memory cannot
+predict past outputs.
+.It
+An attacker who has seen one process's PRNG state cannot predict past
+or future outputs in other processes, particularly its parent or
+siblings.
+.El
+.Sh IMPLEMENTATION NOTES
 The
+.Nm
+functions are currently implemented using the ChaCha20 pseudorandom
+function family.
+For any 32-byte string
+.Fa s ,
+.Pf ChaCha20_ Fa s
+is a function from 16-byte strings to 64-byte strings.
+It is conjectured that if
+.Fa s
+is chosen with uniform distribution, then the distribution on
+.Pf ChaCha20_ Fa s
+is indistinguishable to a computationally bounded adversary from a
+uniform distribution on all functions from 16-byte strings to 64-byte
+strings.
+.Pp
+The PRNG state is a 32-byte ChaCha20 key
+.Fa s .
+Each request to
+an
+.Nm
+function
+.Bl -bullet -offset abcd -compact
+.It
+computes the 64-byte quantity
+.Fa x
+=
+.Pf ChaCha20_ Fa s Ns (0),
+.It
+splits
+.Fa x
+into two 32-byte quantities
+.Fa s'
+and
+.Fa k ,
+.It
+replaces
+.Fa s
+by
+.Fa s' ,
+and
+.It
+uses
+.Fa k
+as output.
+.El
+.Pp
+.Fn arc4random
+yields the first four bytes of
+.Fa k
+as output directly.
+.Fn arc4random_buf
+either yields up to 32 bytes of
+.Fa k
+as output directly, or, for longer
+requests, uses
+.Fa k
+as a ChaCha20 key and yields the concatenation
+.Pf ChaCha20_ Fa k Ns (0)
+||
+.Pf ChaCha20_ Fa k Ns (1)
+|| ... as output.
 .Fn arc4random_uniform
-function returns a uniformly distributed random number less than
-.Fa upper_bound
-avoiding modulo bias when the upper bound is not a power of two.
+repeats
+.Fn arc4random
+until it obtains an integer in [2^32 %
+.Fa bound ,
+2^32), and reduces that modulo
+.Fa bound .
 .Pp
-The
-.Fn arc4random_stir
-function reads data from
-.Pa /dev/urandom
-and uses it to permute the S-Boxes via
-.Fn arc4random_addrandom .
+The PRNG state is per-thread, unless memory allocation fails inside the
+library, in which case some threads may share global PRNG state with a
+mutex.
+The global PRNG state is zeroed on fork in the parent via
+.Xr pthread_atfork 3 ,
+and the per-thread PRNG state is zeroed on fork in the child via
+.Xr minherit 2
+with
+.Dv MAP_INHERIT_ZERO ,
+so that the child cannot reuse or see the parent's PRNG state.
+The PRNG state is reseeded automatically from the system entropy pool
+on the first use of an
+.Nm
+function after zeroing.
 .Pp
-There is no need to call
+The first use of an
+.Nm
+function may abort the process in the highly unlikely event that
+library initialization necessary to implement the security model fails.
+Additionally,
 .Fn arc4random_stir
-before using
-.Fn arc4random ,
-since
-.Fn arc4random
-automatically initializes itself.
+and
+.Fn arc4random_addrandom
+may abort the process in the highly unlikely event that the operating
+system fails to provide entropy.
 .Sh SEE ALSO
 .Xr rand 3 ,
-.Xr rand48 3 ,
-.Xr random 3
-.Sh HISTORY
-An algorithm called
-.Pa RC4
-was designed by RSA Data Security, Inc.
-It was considered a trade secret, but not trademarked.
-Because it was a trade secret, it obviously could not be patented.
-A clone of this was posted anonymously to USENET and confirmed to
-be equivalent by several sources who had access to the original cipher.
-Because of the trade secret situation, RSA Data Security, Inc. can do
-nothing about the release of the ARC4 algorithm.
-Since
-.Pa RC4
-used to be a trade secret, the cipher is now referred to as
-.Pa ARC4 .
+.Xr random 3 ,
+.Xr cprng 9
+.Rs
+.%A Daniel J. Bernstein
+.%T ChaCha, a variant of Salsa20
+.%D 2008-01-28
+.%O Document ID: 4027b5256e17b9796842e6d0f68b0b5e
+.%U http://cr.yp.to/papers.html#chacha
+.Re
+.Sh BUGS
+There is no way to get deterministic, reproducible results out of
+.Nm
+for testing purposes.
+.Pp
+The name
+.Sq arc4random
+was chosen for hysterical raisins, because it was originally
+implemented using the RC4 stream cipher, which is now known to be
+badly enough biased to admit practical attacks in the real world.
+Unfortunately, the library found widespread adoption and the name
+stuck before anyone recognized that it was silly.
 .Pp
-These functions first appeared in
-.Ox 2.1 .
+The signature of
+.Fn arc4random_addrandom
+is silly.
+There is no reason to require casts or accept negative lengths:
+it should take a
+.Vt void *
+buffer and a
+.Vt size_t
+length.
+But it's too late to change that now.
+.Pp
+.Fn arc4random_uniform
+does not help to choose integers in [0, n) uniformly at random when n >
+2^32.

Index: src/lib/libc/gen/arc4random.c
diff -u src/lib/libc/gen/arc4random.c:1.25 src/lib/libc/gen/arc4random.c:1.26
--- src/lib/libc/gen/arc4random.c:1.25	Sat Jul 19 14:53:22 2014
+++ src/lib/libc/gen/arc4random.c	Sun Nov 16 20:33:04 2014
@@ -1,46 +1,75 @@
-/*	$NetBSD: arc4random.c,v 1.25 2014/07/19 14:53:22 roy Exp $	*/
-/*	$OpenBSD: arc4random.c,v 1.6 2001/06/05 05:05:38 pvalchev Exp $	*/
+/*	$NetBSD: arc4random.c,v 1.26 2014/11/16 20:33:04 riastradh Exp $	*/
 
-/*
- * Arc4 random number generator for OpenBSD.
- * Copyright 1996 David Mazieres <[email protected]>.
+/*-
+ * Copyright (c) 2014 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Taylor R. Campbell.
  *
- * Modification and redistribution in source and binary forms is
- * permitted provided that due credit is given to the author and the
- * OpenBSD project by leaving this copyright notice intact.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
- * This code is derived from section 17.1 of Applied Cryptography,
- * second edition, which describes a stream cipher allegedly
- * compatible with RSA Labs "RC4" cipher (the actual description of
- * which is a trade secret).  The same algorithm is used as a stream
- * cipher called "arcfour" in Tatu Ylonen's ssh package.
+ * Legacy arc4random(3) API from OpenBSD reimplemented using the
+ * ChaCha20 PRF, with per-thread state.
+ *
+ * Security model:
+ * - An attacker who sees some outputs cannot predict past or future
+ *   outputs.
+ * - An attacker who sees the PRNG state cannot predict past outputs.
+ * - An attacker who sees a child's PRNG state cannot predict past or
+ *   future outputs in the parent, or in other children.
+ *
+ * The arc4random(3) API may abort the process if:
  *
- * Here the stream cipher has been modified always to include the time
- * when initializing the state.  That makes it impossible to
- * regenerate the same random sequence twice, so this can't be used
- * for encryption, but will generate good random numbers.
+ * (a) the crypto self-test fails,
+ * (b) pthread_atfork or thr_keycreate fail, or
+ * (c) sysctl(KERN_ARND) fails when reseeding the PRNG.
  *
- * RC4 is a registered trademark of RSA Laboratories.
+ * The crypto self-test, pthread_atfork, and thr_keycreate occur only
+ * once, on the first use of any of the arc4random(3) API.  KERN_ARND
+ * is unlikely to fail later unless the kernel is seriously broken.
  */
 
 #include <sys/cdefs.h>
-#if defined(LIBC_SCCS) && !defined(lint)
-__RCSID("$NetBSD: arc4random.c,v 1.25 2014/07/19 14:53:22 roy Exp $");
-#endif /* LIBC_SCCS and not lint */
+__RCSID("$NetBSD: arc4random.c,v 1.26 2014/11/16 20:33:04 riastradh Exp $");
 
 #include "namespace.h"
 #include "reentrant.h"
-#include <fcntl.h>
-#include <pthread.h>
+
+#include <sys/bitops.h>
+#include <sys/endian.h>
+#include <sys/errno.h>
+#include <sys/mman.h>
+#include <sys/sysctl.h>
+
+#include <assert.h>
+#include <sha2.h>
 #include <stdbool.h>
+#include <stdint.h>
 #include <stdlib.h>
+#include <string.h>
 #include <unistd.h>
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/time.h>
-#include <sys/sysctl.h>
 
 #ifdef __weak_alias
 __weak_alias(arc4random,_arc4random)
@@ -50,286 +79,713 @@ __weak_alias(arc4random_stir,_arc4random
 __weak_alias(arc4random_uniform,_arc4random_uniform)
 #endif
 
-#define REKEY_BYTES	1600000
+/*
+ * For standard ChaCha, use le32dec/le32enc.  We don't need that for
+ * the purposes of a nondeterministic random number generator -- we
+ * don't need to be bit-for-bit compatible over any wire.
+ */
 
-struct arc4_stream {
-	bool inited;
-	uint8_t i;
-	uint8_t j;
-	uint8_t s[(uint8_t)~0u + 1u];	/* 256 to you and me */
-	size_t count;
-	mutex_t mtx;
-};
+static inline uint32_t
+crypto_le32dec(const void *p)
+{
+	uint32_t v;
 
-#ifdef _REENTRANT
-#define LOCK(rs)	do { \
-				if (__isthreaded) mutex_lock(&(rs)->mtx); \
-			} while (/*CONSTCOND*/ 0)
-#define UNLOCK(rs)	do { \
-				if (__isthreaded) mutex_unlock(&(rs)->mtx); \
-			} while (/*CONSTCOND*/ 0)
-#else
-#define LOCK(rs)
-#define UNLOCK(rs)
-#endif
+	(void)memcpy(&v, p, sizeof v);
 
-#define S(n) (n)
-#define S4(n) S(n), S(n + 1), S(n + 2), S(n + 3)
-#define S16(n) S4(n), S4(n + 4), S4(n + 8), S4(n + 12)
-#define S64(n) S16(n), S16(n + 16), S16(n + 32), S16(n + 48)
-#define S256 S64(0), S64(64), S64(128), S64(192)
-
-static struct arc4_stream rs = { .inited = false,
-		.i = 0xff, .j = 0, .s = { S256 },
-		.count = 0, .mtx = MUTEX_INITIALIZER };
-
-#undef S
-#undef S4
-#undef S16
-#undef S64
-#undef S256
-
-static inline void arc4_addrandom(struct arc4_stream *, u_char *, int);
-static __noinline void arc4_stir(struct arc4_stream *);
-static inline uint8_t arc4_getbyte(struct arc4_stream *);
-static inline uint32_t arc4_getword(struct arc4_stream *);
+	return v;
+}
 
-#ifdef _REENTRANT
-static void
-arc4_fork_prepare(void)
+static inline void
+crypto_le32enc(void *p, uint32_t v)
 {
 
-	LOCK(&rs);
+	(void)memcpy(p, &v, sizeof v);
 }
 
+/* ChaCha core */
+
+#define	crypto_core_OUTPUTBYTES	64
+#define	crypto_core_INPUTBYTES	16
+#define	crypto_core_KEYBYTES	32
+#define	crypto_core_CONSTBYTES	16
+
+#define	crypto_core_ROUNDS	8
+
+static uint32_t
+rotate(uint32_t u, unsigned c)
+{
+
+	return (u << c) | (u >> (32 - c));
+}
+
+#define	QUARTERROUND(a, b, c, d) do {					      \
+	(a) += (b); (d) ^= (a); (d) = rotate((d), 16);			      \
+	(c) += (d); (b) ^= (c); (b) = rotate((b), 12);			      \
+	(a) += (b); (d) ^= (a); (d) = rotate((d),  8);			      \
+	(c) += (d); (b) ^= (c); (b) = rotate((b),  7);			      \
+} while (0)
+
+const uint8_t crypto_core_constant32[16] = "expand 32-byte k";
+
 static void
-arc4_fork_parent(void)
+crypto_core(uint8_t *out, const uint8_t *in, const uint8_t *k,
+    const uint8_t *c)
 {
+	uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15;
+	uint32_t j0,j1,j2,j3,j4,j5,j6,j7,j8,j9,j10,j11,j12,j13,j14,j15;
+	int i;
+
+	j0 = x0 = crypto_le32dec(c + 0);
+	j1 = x1 = crypto_le32dec(c + 4);
+	j2 = x2 = crypto_le32dec(c + 8);
+	j3 = x3 = crypto_le32dec(c + 12);
+	j4 = x4 = crypto_le32dec(k + 0);
+	j5 = x5 = crypto_le32dec(k + 4);
+	j6 = x6 = crypto_le32dec(k + 8);
+	j7 = x7 = crypto_le32dec(k + 12);
+	j8 = x8 = crypto_le32dec(k + 16);
+	j9 = x9 = crypto_le32dec(k + 20);
+	j10 = x10 = crypto_le32dec(k + 24);
+	j11 = x11 = crypto_le32dec(k + 28);
+	j12 = x12 = crypto_le32dec(in + 0);
+	j13 = x13 = crypto_le32dec(in + 4);
+	j14 = x14 = crypto_le32dec(in + 8);
+	j15 = x15 = crypto_le32dec(in + 12);
+
+	for (i = crypto_core_ROUNDS; i > 0; i -= 2) {
+		QUARTERROUND( x0, x4, x8,x12);
+		QUARTERROUND( x1, x5, x9,x13);
+		QUARTERROUND( x2, x6,x10,x14);
+		QUARTERROUND( x3, x7,x11,x15);
+		QUARTERROUND( x0, x5,x10,x15);
+		QUARTERROUND( x1, x6,x11,x12);
+		QUARTERROUND( x2, x7, x8,x13);
+		QUARTERROUND( x3, x4, x9,x14);
+	}
 
-	UNLOCK(&rs);
+	crypto_le32enc(out + 0, x0 + j0);
+	crypto_le32enc(out + 4, x1 + j1);
+	crypto_le32enc(out + 8, x2 + j2);
+	crypto_le32enc(out + 12, x3 + j3);
+	crypto_le32enc(out + 16, x4 + j4);
+	crypto_le32enc(out + 20, x5 + j5);
+	crypto_le32enc(out + 24, x6 + j6);
+	crypto_le32enc(out + 28, x7 + j7);
+	crypto_le32enc(out + 32, x8 + j8);
+	crypto_le32enc(out + 36, x9 + j9);
+	crypto_le32enc(out + 40, x10 + j10);
+	crypto_le32enc(out + 44, x11 + j11);
+	crypto_le32enc(out + 48, x12 + j12);
+	crypto_le32enc(out + 52, x13 + j13);
+	crypto_le32enc(out + 56, x14 + j14);
+	crypto_le32enc(out + 60, x15 + j15);
 }
+
+/* ChaCha self-test */
+
+#ifdef _DIAGNOSTIC
+
+/*
+ * Test vector for ChaCha20 from
+ * <http://tools.ietf.org/html/draft-strombergson-chacha-test-vectors-00>,
+ * test vectors for ChaCha12 and ChaCha8 and for big-endian machines
+ * generated by the same crypto_core code with crypto_core_ROUNDS and
+ * crypto_le32enc/dec varied.
+ */
+
+static const uint8_t crypto_core_selftest_vector[64] = {
+#if _BYTE_ORDER == _LITTLE_ENDIAN
+#  if crypto_core_ROUNDS == 8
+	0x3e,0x00,0xef,0x2f,0x89,0x5f,0x40,0xd6,
+	0x7f,0x5b,0xb8,0xe8,0x1f,0x09,0xa5,0xa1,
+	0x2c,0x84,0x0e,0xc3,0xce,0x9a,0x7f,0x3b,
+	0x18,0x1b,0xe1,0x88,0xef,0x71,0x1a,0x1e,
+	0x98,0x4c,0xe1,0x72,0xb9,0x21,0x6f,0x41,
+	0x9f,0x44,0x53,0x67,0x45,0x6d,0x56,0x19,
+	0x31,0x4a,0x42,0xa3,0xda,0x86,0xb0,0x01,
+	0x38,0x7b,0xfd,0xb8,0x0e,0x0c,0xfe,0x42,
+#  elif crypto_core_ROUNDS == 12
+	0x9b,0xf4,0x9a,0x6a,0x07,0x55,0xf9,0x53,
+	0x81,0x1f,0xce,0x12,0x5f,0x26,0x83,0xd5,
+	0x04,0x29,0xc3,0xbb,0x49,0xe0,0x74,0x14,
+	0x7e,0x00,0x89,0xa5,0x2e,0xae,0x15,0x5f,
+	0x05,0x64,0xf8,0x79,0xd2,0x7a,0xe3,0xc0,
+	0x2c,0xe8,0x28,0x34,0xac,0xfa,0x8c,0x79,
+	0x3a,0x62,0x9f,0x2c,0xa0,0xde,0x69,0x19,
+	0x61,0x0b,0xe8,0x2f,0x41,0x13,0x26,0xbe,
+#  elif crypto_core_ROUNDS == 20
+	0x76,0xb8,0xe0,0xad,0xa0,0xf1,0x3d,0x90,
+	0x40,0x5d,0x6a,0xe5,0x53,0x86,0xbd,0x28,
+	0xbd,0xd2,0x19,0xb8,0xa0,0x8d,0xed,0x1a,
+	0xa8,0x36,0xef,0xcc,0x8b,0x77,0x0d,0xc7,
+	0xda,0x41,0x59,0x7c,0x51,0x57,0x48,0x8d,
+	0x77,0x24,0xe0,0x3f,0xb8,0xd8,0x4a,0x37,
+	0x6a,0x43,0xb8,0xf4,0x15,0x18,0xa1,0x1c,
+	0xc3,0x87,0xb6,0x69,0xb2,0xee,0x65,0x86,
+#  else
+#    error crypto_core_ROUNDS must be 8, 12, or 20.
+#  endif
+#elif _BYTE_ORDER == _BIG_ENDIAN
+#  if crypto_core_ROUNDS == 8
+	0x9a,0x13,0x07,0xe3,0x38,0x18,0x9e,0x99,
+	0x15,0x37,0x16,0x4d,0x04,0xe6,0x48,0x9a,
+	0x07,0xd6,0xe8,0x7a,0x02,0xf9,0xf5,0xc7,
+	0x3f,0xa9,0xc2,0x0a,0xe1,0xc6,0x62,0xea,
+	0x80,0xaf,0xb6,0x51,0xca,0x52,0x43,0x87,
+	0xe3,0xa6,0xa6,0x61,0x11,0xf5,0xe6,0xcf,
+	0x09,0x0f,0xdc,0x9d,0xc3,0xc3,0xbb,0x43,
+	0xd7,0xfa,0x70,0x42,0xbf,0xa5,0xee,0xa2,
+#  elif crypto_core_ROUNDS == 12
+	0xcf,0x6c,0x16,0x48,0xbf,0xf4,0xba,0x85,
+	0x32,0x69,0xd3,0x98,0xc8,0x7d,0xcd,0x3f,
+	0xdc,0x76,0x6b,0xa2,0x7b,0xcb,0x17,0x4d,
+	0x05,0xda,0xdd,0xd8,0x62,0x54,0xbf,0xe0,
+	0x65,0xed,0x0e,0xf4,0x01,0x7e,0x3c,0x05,
+	0x35,0xb2,0x7a,0x60,0xf3,0x8f,0x12,0x33,
+	0x24,0x60,0xcd,0x85,0xfe,0x4c,0xf3,0x39,
+	0xb1,0x0e,0x3e,0xe0,0xba,0xa6,0x2f,0xa9,
+#  elif crypto_core_ROUNDS == 20
+	0x83,0x8b,0xf8,0x75,0xf7,0xde,0x9d,0x8c,
+	0x33,0x14,0x72,0x28,0xd1,0xbe,0x88,0xe5,
+	0x94,0xb5,0xed,0xb8,0x56,0xb5,0x9e,0x0c,
+	0x64,0x6a,0xaf,0xd9,0xa7,0x49,0x10,0x59,
+	0xba,0x3a,0x82,0xf8,0x4a,0x70,0x9c,0x00,
+	0x82,0x2c,0xae,0xc6,0xd7,0x1c,0x2e,0xda,
+	0x2a,0xfb,0x61,0x70,0x2b,0xd1,0xbf,0x8b,
+	0x95,0xbc,0x23,0xb6,0x4b,0x60,0x02,0xec,
+#  else
+#    error crypto_core_ROUNDS must be 8, 12, or 20.
+#  endif
 #else
-#define arc4_fork_prepare	NULL
-#define arc4_fork_parent	NULL
+#  error Byte order must be little-endian or big-endian.
 #endif
+};
+
+static int
+crypto_core_selftest(void)
+{
+	const uint8_t nonce[crypto_core_INPUTBYTES] = {0};
+	const uint8_t key[crypto_core_KEYBYTES] = {0};
+	uint8_t block[64];
+	unsigned i;
+
+	crypto_core(block, nonce, key, crypto_core_constant32);
+	for (i = 0; i < 64; i++) {
+		if (block[i] != crypto_core_selftest_vector[i])
+			return EIO;
+	}
+
+	return 0;
+}
+
+#else  /* !_DIAGNOSTIC */
+
+static int
+crypto_core_selftest(void)
+{
+
+	return 0;
+}
+
+#endif
+
+/* PRNG */
+
+/*
+ * For a state s, rather than use ChaCha20 as a stream cipher to
+ * generate the concatenation ChaCha20_s(0) || ChaCha20_s(1) || ..., we
+ * split ChaCha20_s(0) into s' || x and yield x for the first request,
+ * split ChaCha20_s'(0) into s'' || y and yield y for the second
+ * request, &c.  This provides backtracking resistance: an attacker who
+ * finds s'' can't recover s' or x.
+ */
+
+#define	crypto_prng_SEEDBYTES		crypto_core_KEYBYTES
+#define	crypto_prng_MAXOUTPUTBYTES	\
+	(crypto_core_OUTPUTBYTES - crypto_prng_SEEDBYTES)
+
+struct crypto_prng {
+	uint8_t		state[crypto_prng_SEEDBYTES];
+};
 
 static void
-arc4_fork_child(void)
+crypto_prng_seed(struct crypto_prng *prng, const void *seed)
 {
 
-	/* Reset the counter to a force new stir after forking */
-	rs.count = 0;
-	UNLOCK(&rs);
+	(void)memcpy(prng->state, seed, crypto_prng_SEEDBYTES);
 }
 
-static inline void
-arc4_check_init(struct arc4_stream *as)
+static void
+crypto_prng_buf(struct crypto_prng *prng, void *buf, size_t n)
+{
+	const uint8_t nonce[crypto_core_INPUTBYTES] = {0};
+	uint8_t output[crypto_core_OUTPUTBYTES];
+
+	_DIAGASSERT(n <= crypto_prng_MAXOUTPUTBYTES);
+	__CTASSERT(sizeof prng->state + crypto_prng_MAXOUTPUTBYTES
+	    <= sizeof output);
+
+	crypto_core(output, nonce, prng->state, crypto_core_constant32);
+	(void)memcpy(prng->state, output, sizeof prng->state);
+	(void)memcpy(buf, output + sizeof prng->state, n);
+	(void)explicit_memset(output, 0, sizeof output);
+}
+
+/* One-time stream: expand short single-use secret into long secret */
+
+#define	crypto_onetimestream_SEEDBYTES	crypto_core_KEYBYTES
+
+static void
+crypto_onetimestream(const void *seed, void *buf, size_t n)
 {
+	uint32_t nonce[crypto_core_INPUTBYTES / sizeof(uint32_t)] = {0};
+	uint8_t block[crypto_core_OUTPUTBYTES];
+	uint8_t *p8, *p32;
+	const uint8_t *nonce8 = (const uint8_t *)nonce;
+	size_t ni, nb, nf;
+
 	/*
-	 * pthread_atfork(3) only allows async-signal-safe functions in
-	 * the child handler.
-	 * NetBSD's mutex_unlock is async-signal safe, other implementations
-	 * may not be.
+	 * Guarantee we can generate up to n bytes.  We have
+	 * 2^(8*INPUTBYTES) possible inputs yielding output of
+	 * OUTPUTBYTES*2^(8*INPUTBYTES) bytes.  It suffices to require
+	 * that sizeof n > (1/CHAR_BIT) log_2 n be less than
+	 * (1/CHAR_BIT) log_2 of the total output stream length.  We
+	 * have
+	 *
+	 *	log_2 (o 2^(8 i)) = log_2 o + log_2 2^(8 i)
+	 *	  = log_2 o + 8 i.
 	 */
+	__CTASSERT(CHAR_BIT*sizeof n <=
+	    (ilog2(crypto_core_OUTPUTBYTES) + 8*crypto_core_INPUTBYTES));
 
-	if (__predict_false(!as->inited)) {
-		as->inited = true;
-		pthread_atfork(arc4_fork_prepare,
-		    arc4_fork_parent, arc4_fork_child);
+	p8 = buf;
+	p32 = (uint8_t *)roundup2((uintptr_t)p8, 4);
+	ni = p32 - p8;
+	if (n < ni)
+		ni = n;
+	nb = (n - ni) / sizeof block;
+	nf = (n - ni) % sizeof block;
+
+	_DIAGASSERT(((uintptr_t)p32 & 3) == 0);
+	_DIAGASSERT(ni <= n);
+	_DIAGASSERT(nb <= (n / sizeof block));
+	_DIAGASSERT(nf <= n);
+	_DIAGASSERT(n == (ni + (nb * sizeof block) + nf));
+	_DIAGASSERT(ni < 4);
+	_DIAGASSERT(nf < sizeof block);
+
+	if (ni) {
+		crypto_core(block, nonce8, seed, crypto_core_constant32);
+		nonce[0]++;
+		(void)memcpy(p8, block, ni);
+	}
+	while (nb--) {
+		crypto_core(p32, nonce8, seed, crypto_core_constant32);
+		if (++nonce[0] == 0)
+			nonce[1]++;
+		p32 += crypto_core_OUTPUTBYTES;
+	}
+	if (nf) {
+		crypto_core(block, nonce8, seed, crypto_core_constant32);
+		if (++nonce[0] == 0)
+			nonce[1]++;
+		(void)memcpy(p32, block, nf);
 	}
+
+	if (ni | nf)
+		(void)explicit_memset(block, 0, sizeof block);
 }
 
-static inline void
-arc4_addrandom(struct arc4_stream *as, u_char *dat, int datlen)
+/* arc4random state: per-thread, per-process (zeroed in child on fork) */
+
+struct arc4random_prng {
+	struct crypto_prng	arc4_prng;
+	bool			arc4_seeded;
+};
+
+static void
+arc4random_prng_addrandom(struct arc4random_prng *prng, const void *data,
+    size_t datalen)
 {
-	uint8_t si;
-	size_t n;
+	const int mib[] = { CTL_KERN, KERN_ARND };
+	SHA256_CTX ctx;
+	uint8_t buf[crypto_prng_SEEDBYTES];
+	size_t buflen = sizeof buf;
 
-	for (n = 0; n < __arraycount(as->s); n++) {
-		as->i = (as->i + 1);
-		si = as->s[as->i];
-		as->j = (as->j + si + dat[n % datlen]);
-		as->s[as->i] = as->s[as->j];
-		as->s[as->j] = si;
-	}
+	__CTASSERT(sizeof buf == SHA256_DIGEST_LENGTH);
+
+	SHA256_Init(&ctx);
+
+	crypto_prng_buf(&prng->arc4_prng, buf, sizeof buf);
+	SHA256_Update(&ctx, buf, sizeof buf);
+
+	if (sysctl(mib, __arraycount(mib), buf, &buflen, NULL, 0) == -1)
+		abort();
+	if (buflen != sizeof buf)
+		abort();
+	SHA256_Update(&ctx, buf, sizeof buf);
+
+	if (data != NULL)
+		SHA256_Update(&ctx, data, datalen);
+
+	SHA256_Final(buf, &ctx);
+	(void)explicit_memset(&ctx, 0, sizeof ctx);
+
+	/* reseed(SHA256(prng() || sysctl(KERN_ARND) || data)) */
+	crypto_prng_seed(&prng->arc4_prng, buf);
+	(void)explicit_memset(buf, 0, sizeof buf);
+	prng->arc4_seeded = true;
 }
 
-static __noinline void
-arc4_stir(struct arc4_stream *as)
+#ifdef _REENTRANT
+static struct arc4random_prng *
+arc4random_prng_create(void)
 {
-	int rdat[32];
-	int mib[] = { CTL_KERN, KERN_URND };
-	size_t len;
-	size_t i, j;
+	struct arc4random_prng *prng;
+	const size_t size = roundup(sizeof(*prng), sysconf(_SC_PAGESIZE));
 
-	arc4_check_init(as);
+	prng = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_ANON, -1, 0);
+	if (prng == MAP_FAILED)
+		goto fail0;
+#ifdef MAP_INHERIT_ZERO
+	if (minherit(prng, size, MAP_INHERIT_ZERO) == -1)
+		goto fail1;
+#else
+#warning This arc4random is not fork-safe!
+#endif
 
-	/*
-	 * This code once opened and read /dev/urandom on each
-	 * call.  That causes repeated rekeying of the kernel stream
-	 * generator, which is very wasteful.  Because of application
-	 * behavior, caching the fd doesn't really help.  So we just
-	 * fill up the tank from sysctl, which is a tiny bit slower
-	 * for us but much friendlier to other entropy consumers.
-	 */
+	return prng;
 
-	for (i = 0; i < __arraycount(rdat); i++) {
-		len = sizeof(rdat[i]);
-		if (sysctl(mib, 2, &rdat[i], &len, NULL, 0) == -1)
-			abort();
-	}
+#ifdef MAP_INHERIT_ZERO
+fail1:	(void)munmap(prng, size);
+#endif
+fail0:	return NULL;
+}
+#endif
 
-	arc4_addrandom(as, (void *) &rdat, (int)sizeof(rdat));
+#ifdef _REENTRANT
+static void
+arc4random_prng_destroy(struct arc4random_prng *prng)
+{
+	const size_t size = roundup(sizeof(*prng), sysconf(_SC_PAGESIZE));
 
-	/*
-	 * Throw away the first N words of output, as suggested in the
-	 * paper "Weaknesses in the Key Scheduling Algorithm of RC4"
-	 * by Fluher, Mantin, and Shamir.  (N = 256 in our case.)
-	 */
-	for (j = 0; j < __arraycount(as->s) * sizeof(uint32_t); j++)
-		arc4_getbyte(as);
+	(void)explicit_memset(prng, 0, sizeof(*prng));
+	(void)munmap(prng, size);
+}
+#endif
+
+/* Library state */
+
+#if !defined(_REENTRANT)
+#define	mutex_lock(m)		do {} while (0)
+#define	mutex_unlock(m)		do {} while (0)
+#endif
 
-	/* Stir again after REKEY_BYTES bytes, or if the pid changes */
-	as->count = REKEY_BYTES;
+static struct arc4random_global {
+#ifdef _REENTRANT
+	mutex_t			lock;
+	thread_key_t		thread_key;
+#endif
+	struct arc4random_prng	prng;
+	bool			initialized;
+} arc4random_global = {
+#ifdef _REENTRANT
+	.lock		= MUTEX_INITIALIZER,
+#endif
+	.initialized	= false,
+};
+
+static void
+arc4random_atfork_prepare(void)
+{
+
+	mutex_lock(&arc4random_global.lock);
+	(void)explicit_memset(&arc4random_global.prng, 0,
+	    sizeof arc4random_global.prng);
 }
 
-static inline void
-arc4_stir_if_needed(struct arc4_stream *as, size_t len)
+static void
+arc4random_atfork_parent(void)
 {
 
-	if (__predict_false(as->count <= len))
-		arc4_stir(as);
-	else
-		as->count -= len;
+	mutex_unlock(&arc4random_global.lock);
 }
 
-static __inline uint8_t
-arc4_getbyte_ij(struct arc4_stream *as, uint8_t *i, uint8_t *j)
+static void
+arc4random_atfork_child(void)
 {
-	uint8_t si, sj;
 
-	*i = *i + 1;
-	si = as->s[*i];
-	*j = *j + si;
-	sj = as->s[*j];
-	as->s[*i] = sj;
-	as->s[*j] = si;
-	return (as->s[(si + sj) & 0xff]);
+	mutex_unlock(&arc4random_global.lock);
 }
 
-static inline uint8_t
-arc4_getbyte(struct arc4_stream *as)
+#ifdef _REENTRANT
+static void
+arc4random_tsd_destructor(void *p)
 {
+	struct arc4random_prng *const prng = p;
 
-	return arc4_getbyte_ij(as, &as->i, &as->j);
+	arc4random_prng_destroy(prng);
 }
+#endif
 
-static inline uint32_t
-arc4_getword(struct arc4_stream *as)
+static void
+arc4random_initialize(void)
 {
-	uint32_t val;
 
-	val = arc4_getbyte(as) << 24;
-	val |= arc4_getbyte(as) << 16;
-	val |= arc4_getbyte(as) << 8;
-	val |= arc4_getbyte(as);
-	return val;
+	mutex_lock(&arc4random_global.lock);
+	if (!arc4random_global.initialized) {
+		if (crypto_core_selftest() != 0)
+			abort();
+		if (pthread_atfork(&arc4random_atfork_prepare,
+			&arc4random_atfork_parent, &arc4random_atfork_child)
+		    != 0)
+			abort();
+#ifdef _REENTRANT
+		if (thr_keycreate(&arc4random_global.thread_key,
+			&arc4random_tsd_destructor) != 0)
+			abort();
+#endif
+		arc4random_global.initialized = true;
+	}
+	mutex_unlock(&arc4random_global.lock);
 }
 
-void
-arc4random_stir(void)
+static struct arc4random_prng *
+arc4random_prng_get(void)
 {
+	struct arc4random_prng *prng = NULL;
+
+	/* Make sure the library is initialized.  */
+	if (__predict_false(!arc4random_global.initialized))
+		arc4random_initialize();
+
+#ifdef _REENTRANT
+	/* Get or create the per-thread PRNG state.  */
+	prng = thr_getspecific(arc4random_global.thread_key);
+	if (__predict_false(prng == NULL)) {
+		prng = arc4random_prng_create();
+		thr_setspecific(arc4random_global.thread_key, prng);
+	}
+#endif
+
+	/* If we can't create it, fall back to the global PRNG.  */
+	if (__predict_false(prng == NULL)) {
+		mutex_lock(&arc4random_global.lock);
+		prng = &arc4random_global.prng;
+	}
 
-	LOCK(&rs);
-	arc4_stir(&rs);
-	UNLOCK(&rs);
+	/* Guarantee the PRNG is seeded.  */
+	if (__predict_false(!prng->arc4_seeded))
+		arc4random_prng_addrandom(prng, NULL, 0);
+
+	return prng;
 }
 
-void
-arc4random_addrandom(u_char *dat, int datlen)
+static void
+arc4random_prng_put(struct arc4random_prng *prng)
 {
 
-	LOCK(&rs);
-	arc4_stir_if_needed(&rs, datlen);
-	arc4_addrandom(&rs, dat, datlen);
-	UNLOCK(&rs);
+	/* If we had fallen back to the global PRNG, unlock it.  */
+	if (__predict_false(prng == &arc4random_global.prng))
+		mutex_unlock(&arc4random_global.lock);
 }
 
+/* Public API */
+
 uint32_t
 arc4random(void)
 {
+	struct arc4random_prng *prng;
 	uint32_t v;
 
-	LOCK(&rs);
-	arc4_stir_if_needed(&rs, sizeof(v));
-	v = arc4_getword(&rs);
-	UNLOCK(&rs);
+	prng = arc4random_prng_get();
+	crypto_prng_buf(&prng->arc4_prng, &v, sizeof v);
+	arc4random_prng_put(prng);
+
 	return v;
 }
 
 void
 arc4random_buf(void *buf, size_t len)
 {
-	uint8_t *bp = buf;
-	uint8_t *ep = bp + len;
-	uint8_t i, j;
-
-	LOCK(&rs);
-	arc4_stir_if_needed(&rs, len);
-
-	/* cache i and j - compiler can't know 'buf' doesn't alias them */
-	i = rs.i;
-	j = rs.j;
-
-	while (bp < ep)
-		*bp++ = arc4_getbyte_ij(&rs, &i, &j);
-	rs.i = i;
-	rs.j = j;
+	struct arc4random_prng *prng;
 
-	UNLOCK(&rs);
+	if (len <= crypto_prng_MAXOUTPUTBYTES) {
+		prng = arc4random_prng_get();
+		crypto_prng_buf(&prng->arc4_prng, buf, len);
+		arc4random_prng_put(prng);
+	} else {
+		uint8_t seed[crypto_onetimestream_SEEDBYTES];
+
+		prng = arc4random_prng_get();
+		crypto_prng_buf(&prng->arc4_prng, seed, sizeof seed);
+		arc4random_prng_put(prng);
+
+		crypto_onetimestream(seed, buf, len);
+		(void)explicit_memset(seed, 0, sizeof seed);
+	}
 }
 
-/*-
- * Written by Damien Miller.
- * With simplifications by Jinmei Tatuya.
- */
+uint32_t
+arc4random_uniform(uint32_t bound)
+{
+	struct arc4random_prng *prng;
+	uint32_t minimum, r;
+
+	/*
+	 * We want a uniform random choice in [0, n), and arc4random()
+	 * makes a uniform random choice in [0, 2^32).  If we reduce
+	 * that modulo n, values in [0, 2^32 mod n) will be represented
+	 * slightly more than values in [2^32 mod n, n).  Instead we
+	 * choose only from [2^32 mod n, 2^32) by rejecting samples in
+	 * [0, 2^32 mod n), to avoid counting the extra representative
+	 * of [0, 2^32 mod n).  To compute 2^32 mod n, note that
+	 *
+	 *	2^32 mod n = 2^32 mod n - 0
+	 *	  = 2^32 mod n - n mod n
+	 *	  = (2^32 - n) mod n,
+	 *
+	 * the last of which is what we compute in 32-bit arithmetic.
+	 */
+	minimum = (-bound % bound);
+
+	prng = arc4random_prng_get();
+	do crypto_prng_buf(&prng->arc4_prng, &r, sizeof r);
+	while (__predict_false(r < minimum));
+	arc4random_prng_put(prng);
+
+	return (r % bound);
+}
+
+void
+arc4random_stir(void)
+{
+	struct arc4random_prng *prng;
+
+	prng = arc4random_prng_get();
+	arc4random_prng_addrandom(prng, NULL, 0);
+	arc4random_prng_put(prng);
+}
 
 /*
- * Calculate a uniformly distributed random number less than
- * upper_bound avoiding "modulo bias".
- *
- * Uniformity is achieved by generating new random numbers
- * until the one returned is outside the range
- * [0, 2^32 % upper_bound[. This guarantees the selected
- * random number will be inside the range
- * [2^32 % upper_bound, 2^32[ which maps back to
- * [0, upper_bound[ after reduction modulo upper_bound.
+ * Silly signature here is for hysterical raisins.  Should instead be
+ * const void *data and size_t datalen.
  */
-uint32_t
-arc4random_uniform(uint32_t upper_bound)
+void
+arc4random_addrandom(u_char *data, int datalen)
 {
-	uint32_t r, min;
+	struct arc4random_prng *prng;
 
-	if (upper_bound < 2)
-		return 0;
+	_DIAGASSERT(0 <= datalen);
 
-	/* calculate (2^32 % upper_bound) avoiding 64-bit math */
-	/* ((2^32 - x) % x) == (2^32 % x) when x <= 2^31 */
-	min = (0xFFFFFFFFU - upper_bound + 1) % upper_bound;
+	prng = arc4random_prng_get();
+	arc4random_prng_addrandom(prng, data, datalen);
+	arc4random_prng_put(prng);
+}
+
+#ifdef _ARC4RANDOM_TEST
+
+#include <sys/wait.h>
+
+#include <err.h>
+#include <stdio.h>
+
+int
+main(int argc __unused, char **argv __unused)
+{
+	unsigned char gubbish[] = "random gubbish";
+	const uint8_t zero64[64] = {0};
+	uint8_t buf[2048];
+	unsigned i, a, n;
+
+	/* Test arc4random: should not be deterministic.  */
+	if (printf("arc4random: %08"PRIx32"\n", arc4random()) < 0)
+		err(1, "printf");
+
+	/* Test stirring: should definitely not be deterministic.  */
+	arc4random_stir();
+
+	/* Test small buffer.  */
+	arc4random_buf(buf, 8);
+	if (printf("arc4randombuf small:") < 0)
+		err(1, "printf");
+	for (i = 0; i < 8; i++)
+		if (printf(" %02x", buf[i]) < 0)
+			err(1, "printf");
+	if (printf("\n") < 0)
+		err(1, "printf");
+
+	/* Test addrandom: should not make the rest deterministic.  */
+	arc4random_addrandom(gubbish, sizeof gubbish);
+
+	/* Test large buffer.  */
+	arc4random_buf(buf, sizeof buf);
+	if (printf("arc4randombuf_large:") < 0)
+		err(1, "printf");
+	for (i = 0; i < sizeof buf; i++)
+		if (printf(" %02x", buf[i]) < 0)
+			err(1, "printf");
+	if (printf("\n") < 0)
+		err(1, "printf");
+
+	/* Test misaligned small and large.  */
+	for (a = 0; a < 64; a++) {
+		for (n = a; n < sizeof buf; n++) {
+			(void)memset(buf, 0, sizeof buf);
+			arc4random_buf(buf, n - a);
+			if (memcmp(buf + n - a, zero64, a) != 0)
+				errx(1, "arc4random buffer overflow 0");
+
+			(void)memset(buf, 0, sizeof buf);
+			arc4random_buf(buf + a, n - a);
+			if (memcmp(buf, zero64, a) != 0)
+				errx(1, "arc4random buffer overflow 1");
+
+			if ((2*a) <= n) {
+				(void)memset(buf, 0, sizeof buf);
+				arc4random_buf(buf + a, n - a - a);
+				if (memcmp(buf + n - a, zero64, a) != 0)
+					errx(1,
+					    "arc4random buffer overflow 2");
+			}
+		}
+	}
 
-	LOCK(&rs);
-	arc4_stir_if_needed(&rs, sizeof(r));
+	/* Test fork-safety.  */
+    {
+	pid_t pid, rpid;
+	int status;
+
+	pid = fork();
+	switch (pid) {
+	case -1:
+		err(1, "fork");
+	case 0:
+		_exit(arc4random_prng_get()->arc4_seeded);
+	default:
+		rpid = waitpid(pid, &status, 0);
+		if (rpid == -1)
+			err(1, "waitpid");
+		if (rpid != pid)
+			errx(1, "waitpid returned wrong pid"
+			    ": %"PRIdMAX" != %"PRIdMAX,
+			    (intmax_t)rpid,
+			    (intmax_t)pid);
+		if (WIFEXITED(status)) {
+			if (WEXITSTATUS(status) != 0)
+				errx(1, "child exited with %d",
+				    WEXITSTATUS(status));
+		} else if (WIFSIGNALED(status)) {
+			errx(1, "child terminated on signal %d",
+			    WTERMSIG(status));
+		} else {
+			errx(1, "child died mysteriously: %d", status);
+		}
+	}
+    }
 
-	/*
-	 * This could theoretically loop forever but each retry has
-	 * p > 0.5 (worst case, usually far better) of selecting a
-	 * number inside the range we need, so it should rarely need
-	 * to re-roll (at all).
-	 */
-	do
-		r = arc4_getword(&rs);
-	while (r < min);
-	UNLOCK(&rs);
+	/* XXX Test multithreaded fork safety...?  */
 
-	return r % upper_bound;
+	return 0;
 }
+#endif

Reply via email to