On Wed, Apr 16, 2014 at 09:52:22PM -0400, Thor Lancelot Simon wrote: > > Attached is a patch which makes cprng_fast per-CPU and lockless. *IT IS NOT > WELL TESTED YET (I haven't even run test vectors) AND IS ONLY FOR REVIEW.*
New diff, with some missing files and incorporating some more comments from Taylor. Thor
? kern/.init_main.c.swp ? sys/.cprng.h.swo Index: conf/files =================================================================== RCS file: /cvsroot/src/sys/conf/files,v retrieving revision 1.1090 diff -u -p -r1.1090 files --- conf/files 1 Apr 2014 17:49:30 -0000 1.1090 +++ conf/files 17 Apr 2014 03:17:18 -0000 @@ -160,6 +160,7 @@ include "crypto/cast128/files.cast128" include "crypto/rijndael/files.rijndael" include "crypto/skipjack/files.skipjack" include "crypto/camellia/files.camellia" +include "crypto/hc128/files.hc128" # General-purpose crypto processing framework. include "opencrypto/files.opencrypto" Index: crypto/hc128/files.hc128 =================================================================== RCS file: crypto/hc128/files.hc128 diff -N crypto/hc128/files.hc128 --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ crypto/hc128/files.hc128 17 Apr 2014 03:17:18 -0000 @@ -0,0 +1,5 @@ +# $NetBSD: $ + +define hc128 + +file crypto/hc128/hc128.c Index: crypto/hc128/hc128.c =================================================================== RCS file: crypto/hc128/hc128.c diff -N crypto/hc128/hc128.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ crypto/hc128/hc128.c 17 Apr 2014 03:17:18 -0000 @@ -0,0 +1,162 @@ +/* $NetBSD: $ */ + +/* Author: Lucas Clemente Vella + * Source code placed into public domain. */ + +/* + * This is the HC-128 stream cipher, one of the eStream Profile 1 + * selected ciphers. It is based on SHA-256. + * + * This cipher has a very large key setup time (estimated at 74,000 + * cycles on a modern x86 CPU) but is quite efficient once keyed: + * around 3 cycles per byte. Since it produces a stream of 32-bit + * values, it is well suited for use as a kernel RNG, usually requiring + * no output buffering and wasting little of the output stream. + */ + +#include <crypto/hc128/hc128.h> + +static inline uint32_t +rotl(uint32_t x, unsigned int n) +{ + return (x << n) | (x >> (32-n)); +} + +static inline unsigned int +m512(unsigned int x) +{ + static const unsigned int mask = 0x1ff; /* 511 mask, for mod 512 */ + return x & mask; +} + +static inline uint32_t +f1(uint32_t x) +{ + return rotl(x, 25) ^ rotl(x, 14) ^ (x >> 3); +} + +static inline uint32_t +f2(uint32_t x) +{ + return rotl(x, 15) ^ rotl(x, 13) ^ (x >> 10); +} + +static inline uint32_t +g1(uint32_t x, uint32_t y, uint32_t z) +{ + return (rotl(x, 22) ^ rotl(z, 9)) + rotl(y, 24); +} + +static inline uint32_t +g2(uint32_t x, uint32_t y, uint32_t z) +{ + return (rotl(x, 10) ^ rotl(z, 23)) + rotl(y, 8); +} + +static inline uint32_t +h(const uint32_t *qp, uint32_t x) +{ + return qp[x & 0xFFu] + qp[256 + ((x >> 16) & 0xFFu)]; +} + +static inline uint32_t +round_expression(uint32_t *pq, const uint32_t *qp, + uint32_t (*g)(uint32_t x, uint32_t y, uint32_t z), + uint16_t i) +{ + pq[i] += g(pq[m512(i-3u)], pq[m512(i-10u)], pq[m512(i+1u)]); + return pq[i] ^ h(qp, pq[m512(i-12u)]); +} + +static inline uint32_t +pack_littleendian(const uint8_t *v) +{ +#ifdef LITTLE_ENDIAN + return *((const uint32_t*)v); +#else + return (uint32_t)v[3] << 24 + | (uint32_t)v[2] << 16 + | (uint32_t)v[1] << 8 + | (uint32_t)v[0]; +#endif +} + +static inline void +unpack_littleendian(uint32_t value, uint8_t *v) +{ +#if BYTE_ORDER == LITTLE_ENDIAN + *((uint32_t*)v) = value; +#else + int i; + for(i = 0; i < 4; ++i) { + v[i] = value >> (i * 8); + } +#endif +} + +/** Initialize HC-128 state with key and IV. + * + * Contrary to the other implemented algorithms, the key and IV are taken + * in a single function to initialize the state. This approach was chosen + * here because of the nature of the algorithm, that keeps no intermediate + * state between the key setting and the IV setting. + * + * Notice: an IV should never be reused. + * + * @param state The uninitialized state, it will be ready to + * encryption/decryption afterwards. + * @param key 16 bytes buffer containing the 128-bit key. The buffer must + * be aligned to at least 4 bytes (depending on the platform it may or may + * not work with unaligned memory). + * @param iv 16 bytes buffer containing the IV. + */ +void +hc128_init(hc128_state_t *state, const uint8_t *key, const uint8_t *iv) +{ + unsigned int i; + uint32_t w[1280], *p = state->p, *q = state->q; + + for(i = 0; i < 4; ++i) { + w[i] = w[i+4] = pack_littleendian(key + 4 * i); + w[i+8] = w[i+12] = pack_littleendian(iv + 4 * i); + } + + for(i = 16; i < 1280; ++i) { + w[i] = f2(w[i-2]) + w[i-7] + f1(w[i-15]) + w[i-16] + i; + } + + for(i = 0; i < 512; ++i) { + p[i] = w[i+256]; + q[i] = w[i+768]; + } + + for(i = 0; i < 512; ++i) { + p[i] = round_expression(p, q, g1, i); + } + + for(i = 0; i < 512; ++i) { + q[i] = round_expression(q, p, g2, i); + } + + state->i = 0; +} + +/** Performs one round of the algorithm. + * + * @param state The algorithm state. + * @param stream A 4 byte buffer where the generated stream will be stored. + * Must be aligned. + */ +void +hc128_extract(hc128_state_t *state, uint8_t *stream) +{ + register uint32_t ret; + + uint16_t i = state->i; + state->i = (i + 1u) & 1023u; + + ret = (i < 512) ? round_expression(state->p, state->q, g1, i) : + round_expression(state->q, state->p, g2, m512(i)); + + unpack_littleendian(ret, stream); +} Index: crypto/hc128/hc128.h =================================================================== RCS file: crypto/hc128/hc128.h diff -N crypto/hc128/hc128.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ crypto/hc128/hc128.h 17 Apr 2014 03:17:18 -0000 @@ -0,0 +1,22 @@ +/* $NetBSD: $ */ + +/* Author: Lucas Clemente Vella + * Source code placed into public domain. */ + +#ifndef _CRYPTO_HC128_H_ +#define _CRYPTO_HC128_H_ + +#include <sys/types.h> + +typedef struct +{ + uint32_t p[512]; + uint32_t q[512]; + uint16_t i; +} hc128_state_t; + +void hc128_init(hc128_state_t *, const uint8_t *, const uint8_t *); + +void hc128_extract(hc128_state_t *, uint8_t *); + +#endif Index: kern/init_main.c =================================================================== RCS file: /cvsroot/src/sys/kern/init_main.c,v retrieving revision 1.454.2.1 diff -u -p -r1.454.2.1 init_main.c --- kern/init_main.c 7 Apr 2014 02:20:00 -0000 1.454.2.1 +++ kern/init_main.c 17 Apr 2014 03:17:19 -0000 @@ -497,6 +497,8 @@ main(void) /* Initialize the kernel strong PRNG. */ kern_cprng = cprng_strong_create("kernel", IPL_VM, CPRNG_INIT_ANY|CPRNG_REKEY_ANY); + + cprng_fast_init(); /* Initialize interfaces. */ ifinit1(); Index: kern/subr_cprng.c =================================================================== RCS file: /cvsroot/src/sys/kern/subr_cprng.c,v retrieving revision 1.23 diff -u -p -r1.23 subr_cprng.c --- kern/subr_cprng.c 17 Jan 2014 02:12:48 -0000 1.23 +++ kern/subr_cprng.c 17 Apr 2014 03:17:19 -0000 @@ -43,6 +43,7 @@ __KERNEL_RCSID(0, "$NetBSD: subr_cprng.c #include <sys/kmem.h> #include <sys/lwp.h> #include <sys/once.h> +#include <sys/percpu.h> #include <sys/poll.h> /* XXX POLLIN/POLLOUT/&c. */ #include <sys/select.h> #include <sys/systm.h> @@ -54,6 +55,7 @@ __KERNEL_RCSID(0, "$NetBSD: subr_cprng.c #endif #include <crypto/nist_ctr_drbg/nist_ctr_drbg.h> +#include <crypto/hc128/hc128.h> #if defined(__HAVE_CPU_COUNTER) #include <machine/cpu_counter.h> @@ -72,6 +74,13 @@ static void cprng_strong_rngtest(struct static rndsink_callback_t cprng_strong_rndsink_callback; +percpu_t *percpu_cprng_fast_ctx; +static int cprng_fast_initialized; + +static void cprng_fast_randrekey(cprng_fast_ctx_t *); + +void *cprng_fast_rekey_softintr = NULL; + void cprng_init(void) { @@ -103,10 +112,11 @@ cprng_counter(void) return cpu_counter32(); #endif if (__predict_false(cold)) { + static int ctr; /* microtime unsafe if clock not running yet */ - return 0; + return ctr++; } - microtime(&tv); + getmicrotime(&tv); return (tv.tv_sec * 1000000 + tv.tv_usec); } @@ -532,8 +542,16 @@ sysctl_kern_urnd(SYSCTLFN_ARGS) } /* - * sysctl helper routine for kern.arandom node. Picks a random number - * for you. + * sysctl helper routine for kern.arandom node. Fills the supplied + * structure with random data for you. + * + * This node was originally declared as type "int" but its implementation + * in OpenBSD, whence it came, would happily return up to 8K of data if + * requested. Evidently this was used to key RC4 in userspace. + * + * In NetBSD, the libc stack-smash-protection code reads 64 bytes + * from here at every program startup. So though it would be nice + * to make this node return only 32 or 64 bits, we can't. Too bad! */ static int sysctl_kern_arnd(SYSCTLFN_ARGS) @@ -542,31 +560,145 @@ sysctl_kern_arnd(SYSCTLFN_ARGS) void *v; struct sysctlnode node = *rnode; - if (*oldlenp == 0) + switch (*oldlenp) { + case 0: return 0; + default: + if (*oldlenp > 256) { + return E2BIG; + } + v = kmem_alloc(*oldlenp, KM_SLEEP); + cprng_fast(v, *oldlenp); + node.sysctl_data = v; + node.sysctl_size = *oldlenp; + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + kmem_free(v, *oldlenp); + return error; + } +} + +static void +cprng_fast_randrekey(cprng_fast_ctx_t *ctx) +{ + uint8_t key[16], iv[16]; + hc128_state_t tempstate; + int s; + + int have_initial = rnd_initial_entropy; + + cprng_strong(kern_cprng, key, sizeof(key), FASYNC); + cprng_strong(kern_cprng, iv, sizeof(iv), FASYNC); + + /* Rekey the hc128 state - expensive, don't do this at splhigh. */ + hc128_init(&ctx->hc128, key, iv); + explicit_memset(key, 0, sizeof(key)); + explicit_memset(iv, 0, sizeof(iv)); + + s = splhigh(); + memcpy(&ctx->hc128, &tempstate, sizeof(tempstate)); + splx(s); + + explicit_memset(&tempstate, 0, sizeof(tempstate)); + /* - * This code used to allow sucking 8192 bytes at a time out - * of the kernel arc4random generator. Evidently there is some - * very old OpenBSD application code that may try to do this. - * - * Note that this node is documented as type "INT" -- 4 or 8 - * bytes, not 8192. - * - * We continue to support this abuse of the "len" pointer here - * but only 256 bytes at a time, as, anecdotally, the actual - * application use here was to generate RC4 keys in userspace. - * - * Support for such large requests will probably be removed - * entirely in the future. + * Reset for next reseed cycle. */ - if (*oldlenp > 256) - return E2BIG; + ctx->nextreseed = time_uptime + + (have_initial ? CPRNGF_RESEED_SECONDS : 0); + ctx->numbytes = 0; +} + +static void +cprng_fast_init_ctx(void *v, + void *arg __unused, + struct cpu_info * ci __unused) +{ + cprng_fast_ctx_t *ctx = v; + cprng_fast_randrekey(ctx); +} + +static void +cprng_fast_rekey_one(void *arg __unused) +{ + cprng_fast_ctx_t *ctx = percpu_getref(percpu_cprng_fast_ctx); + + cprng_fast_randrekey(ctx); + percpu_putref(percpu_cprng_fast_ctx); +} + +void +cprng_fast_init(void) +{ + percpu_cprng_fast_ctx = percpu_alloc(sizeof(cprng_fast_ctx_t)); + percpu_foreach(percpu_cprng_fast_ctx, cprng_fast_init_ctx, NULL); + cprng_fast_initialized++; + cprng_fast_rekey_softintr = softint_establish(SOFTINT_CLOCK|SOFTINT_MPSAFE, + cprng_fast_rekey_one, NULL); +} + +size_t +_cprng_fast_exact(void *p, size_t len) +{ + uint32_t *pi = p, *iter; + int s; + size_t ilen = len / sizeof(*pi); + cprng_fast_ctx_t *ctx = percpu_getref(percpu_cprng_fast_ctx); + + KASSERT(cprng_fast_initialized); + KASSERT(0 == ((uintptr_t)p % sizeof(uint32_t))); + KASSERT(ilen * sizeof(*pi) == len); + + _cprng_fast_checkrekey(ctx); + + s = splhigh(); + for (iter = pi; iter < pi + ilen; iter++) { + hc128_extract(&ctx->hc128, (uint8_t *)iter); + } + splx(s); + + ctx->numbytes += len; + percpu_putref(percpu_cprng_fast_ctx); + return len; +} + +size_t +_cprng_fast_inexact(void *p, size_t len) +{ + uint8_t *pc = p; + uint32_t *pi = p, tmp, *iter; + int s; + size_t initial_len, aligned_len, final_len, main_len; + cprng_fast_ctx_t *ctx = percpu_getref(percpu_cprng_fast_ctx); + + KASSERT(cprng_fast_initialized); + + initial_len = sizeof(uint32_t) - ((uintptr_t)pc % sizeof(uint32_t)); + aligned_len = len - initial_len; + final_len = aligned_len % sizeof(uint32_t); + main_len = aligned_len - final_len; + + main_len /= sizeof(uint32_t); + + _cprng_fast_checkrekey(ctx); + + s = splhigh(); + if (initial_len) { + hc128_extract(&ctx->hc128, (uint8_t *)&tmp); + memcpy(pc, &tmp, initial_len); + pi = (uint32_t *)pc; + } + + for (iter = pi; iter < pi + main_len ; iter++) { + hc128_extract(&ctx->hc128, (uint8_t *)iter); + } + + if (final_len) { + hc128_extract(&ctx->hc128, (uint8_t *)&tmp); + memcpy(pi + main_len, &tmp, final_len); + } + splx(s); - v = kmem_alloc(*oldlenp, KM_SLEEP); - cprng_fast(v, *oldlenp); - node.sysctl_data = v; - node.sysctl_size = *oldlenp; - error = sysctl_lookup(SYSCTLFN_CALL(&node)); - kmem_free(v, *oldlenp); - return error; + ctx->numbytes += len; + percpu_putref(percpu_cprng_fast_ctx); + return len; } Index: lib/libkern/Makefile.libkern =================================================================== RCS file: /cvsroot/src/sys/lib/libkern/Makefile.libkern,v retrieving revision 1.32.2.1 diff -u -p -r1.32.2.1 Makefile.libkern --- lib/libkern/Makefile.libkern 7 Apr 2014 01:10:55 -0000 1.32.2.1 +++ lib/libkern/Makefile.libkern 17 Apr 2014 03:17:19 -0000 @@ -54,7 +54,7 @@ SRCS+= cpuset.c inet_addr.c intoa.c SRCS+= bswap64.c .endif SRCS+= md4c.c md5c.c rmd160.c sha1.c sha2.c murmurhash.c -SRCS+= pmatch.c arc4random.c bcd.c mcount.c mertwist.c crc32.c +SRCS+= pmatch.c bcd.c mcount.c mertwist.c crc32.c SRCS+= ppath_kmem_alloc.c Index: lib/libkern/arc4random.c =================================================================== RCS file: lib/libkern/arc4random.c diff -N lib/libkern/arc4random.c --- lib/libkern/arc4random.c 24 Jun 2013 04:21:20 -0000 1.35 +++ /dev/null 1 Jan 1970 00:00:00 -0000 @@ -1,277 +0,0 @@ -/* $NetBSD: arc4random.c,v 1.35 2013/06/24 04:21:20 riastradh Exp $ */ - -/*- - * Copyright (c) 2002, 2011 The NetBSD Foundation, Inc. - * All rights reserved. - * - * This code is derived from software contributed to The NetBSD Foundation - * by Thor Lancelot Simon. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/*- - * THE BEER-WARE LICENSE - * - * <d...@freebsd.org> wrote this file. As long as you retain this notice you - * can do whatever you want with this stuff. If we meet some day, and you - * think this stuff is worth it, you can buy me a beer in return. - * - * Dan Moschuk - * - * $FreeBSD: src/sys/libkern/arc4random.c,v 1.9 2001/08/30 12:30:58 bde Exp $ - */ - -#include <sys/cdefs.h> - -#include <sys/param.h> -#include <sys/types.h> -#include <sys/rngtest.h> -#include <sys/systm.h> -#include <sys/time.h> - -#ifdef _STANDALONE -/* - * XXX This is a load of bollocks. Standalone has no entropy source. - * This module should be removed from libkern once we confirm nobody is - * using it. - */ -#define time_uptime 1 -typedef struct kmutex *kmutex_t; -#define MUTEX_DEFAULT 0 -#define IPL_VM 0 -static void mutex_init(kmutex_t *m, int t, int i) {} -static void mutex_spin_enter(kmutex_t *m) {} -static void mutex_spin_exit(kmutex_t *m) {} -typedef void rndsink_callback_t(void *, const void *, size_t); -struct rndsink; -static struct rndsink *rndsink_create(size_t n, rndsink_callback_t c, void *a) - { return NULL; } -static bool rndsink_request(struct rndsink *s, void *b, size_t n) - { return true; } -#else /* !_STANDALONE */ -#include <sys/kernel.h> -#include <sys/mutex.h> -#include <sys/rndsink.h> -#endif /* _STANDALONE */ - -#include <lib/libkern/libkern.h> - -/* - * The best known attack that distinguishes RC4 output from a random - * bitstream requires 2^25 bytes. (see Paul and Preneel, Analysis of - * Non-fortuitous Predictive States of the RC4 Keystream Generator. - * INDOCRYPT 2003, pp52 ??? 67). - * - * However, we discard the first 1024 bytes of output, avoiding the - * biases detected in this paper. The best current attack that - * can distinguish this "RC4[drop]" output seems to be Fleuhrer & - * McGrew's attack which requires 2^30.6 bytes of output: - * Fluhrer and McGrew, Statistical Analysis of the Alleged RC4 - * Keystream Generator. FSE 2000, pp19 ??? 30 - * - * We begin trying to rekey at 2^24 bytes, and forcibly rekey at 2^29 bytes - * even if the resulting key cannot be guaranteed to have full entropy. - */ -#define ARC4_MAXBYTES (16 * 1024 * 1024) -#define ARC4_HARDMAX (512 * 1024 * 1024) -#define ARC4_RESEED_SECONDS 300 -#define ARC4_KEYBYTES 16 /* 128 bit key */ - -static kmutex_t arc4_mtx; -static struct rndsink *arc4_rndsink; - -static u_int8_t arc4_i, arc4_j; -static int arc4_initialized = 0; -static int arc4_numbytes = 0; -static u_int8_t arc4_sbox[256]; -static time_t arc4_nextreseed; - -static rndsink_callback_t arc4_rndsink_callback; -static void arc4_randrekey(void); -static void arc4_randrekey_from(const uint8_t[ARC4_KEYBYTES], bool); -static void arc4_init(void); -static inline u_int8_t arc4_randbyte(void); -static inline void arc4randbytes_unlocked(void *, size_t); -void _arc4randbytes(void *, size_t); -uint32_t _arc4random(void); - -static inline void -arc4_swap(u_int8_t *a, u_int8_t *b) -{ - u_int8_t c; - - c = *a; - *a = *b; - *b = c; -} - -static void -arc4_rndsink_callback(void *context __unused, const void *seed, size_t bytes) -{ - - KASSERT(bytes == ARC4_KEYBYTES); - arc4_randrekey_from(seed, true); -} - -/* - * Stir our S-box with whatever we can get from the system entropy pool - * now. - */ -static void -arc4_randrekey(void) -{ - uint8_t seed[ARC4_KEYBYTES]; - - const bool full_entropy = rndsink_request(arc4_rndsink, seed, - sizeof(seed)); - arc4_randrekey_from(seed, full_entropy); - explicit_memset(seed, 0, sizeof(seed)); -} - -/* - * Stir our S-box with what's in seed. - */ -static void -arc4_randrekey_from(const uint8_t seed[ARC4_KEYBYTES], bool full_entropy) -{ - uint8_t key[256]; - size_t n; - - mutex_spin_enter(&arc4_mtx); - - (void)memcpy(key, seed, ARC4_KEYBYTES); - - /* Rekey the arc4 state. */ - for (n = ARC4_KEYBYTES; n < sizeof(key); n++) - key[n] = key[n % ARC4_KEYBYTES]; - - for (n = 0; n < 256; n++) { - arc4_j = (arc4_j + arc4_sbox[n] + key[n]) % 256; - arc4_swap(&arc4_sbox[n], &arc4_sbox[arc4_j]); - } - arc4_i = arc4_j; - - explicit_memset(key, 0, sizeof(key)); - - /* - * Throw away the first N words of output, as suggested in the - * paper "Weaknesses in the Key Scheduling Algorithm of RC4" by - * Fluher, Mantin, and Shamir. (N = 256 in our case.) - */ - for (n = 0; n < 256 * 4; n++) - arc4_randbyte(); - - /* - * Reset for next reseed cycle. If we don't have full entropy, - * caller has scheduled a reseed already. - */ - arc4_nextreseed = time_uptime + - (full_entropy? ARC4_RESEED_SECONDS : 0); - arc4_numbytes = 0; - -#if 0 /* XXX */ - arc4_rngtest(); -#endif - - mutex_spin_exit(&arc4_mtx); -} - -/* - * Initialize our S-box to its beginning defaults. - */ -static void -arc4_init(void) -{ - int n; - - mutex_init(&arc4_mtx, MUTEX_DEFAULT, IPL_VM); - arc4_rndsink = rndsink_create(ARC4_KEYBYTES, &arc4_rndsink_callback, - NULL); - - arc4_i = arc4_j = 0; - for (n = 0; n < 256; n++) - arc4_sbox[n] = (u_int8_t) n; - - arc4_randrekey(); - arc4_initialized = 1; -} - -/* - * Generate a random byte. - */ -static inline u_int8_t -arc4_randbyte(void) -{ - u_int8_t arc4_t; - - arc4_i = (arc4_i + 1) % 256; - arc4_j = (arc4_j + arc4_sbox[arc4_i]) % 256; - - arc4_swap(&arc4_sbox[arc4_i], &arc4_sbox[arc4_j]); - - arc4_t = (arc4_sbox[arc4_i] + arc4_sbox[arc4_j]) % 256; - return arc4_sbox[arc4_t]; -} - -static inline void -arc4randbytes_unlocked(void *p, size_t len) -{ - u_int8_t *buf = (u_int8_t *)p; - size_t i; - - for (i = 0; i < len; buf[i] = arc4_randbyte(), i++) - continue; -} - -void -_arc4randbytes(void *p, size_t len) -{ - /* Initialize array if needed. */ - if (!arc4_initialized) { - arc4_init(); - /* avoid conditionalizing locking */ - arc4randbytes_unlocked(p, len); - arc4_numbytes += len; - return; - } - mutex_spin_enter(&arc4_mtx); - arc4randbytes_unlocked(p, len); - arc4_numbytes += len; - mutex_spin_exit(&arc4_mtx); - if ((arc4_numbytes > ARC4_MAXBYTES) || - (time_uptime > arc4_nextreseed)) { - arc4_randrekey(); - } -} - -u_int32_t -_arc4random(void) -{ - u_int32_t ret; - u_int8_t *retc; - - retc = (u_int8_t *)&ret; - - _arc4randbytes(retc, sizeof(u_int32_t)); - return ret; -} Index: nfs/nfs_subs.c =================================================================== RCS file: /cvsroot/src/sys/nfs/nfs_subs.c,v retrieving revision 1.225 diff -u -p -r1.225 nfs_subs.c --- nfs/nfs_subs.c 17 Mar 2014 09:35:24 -0000 1.225 +++ nfs/nfs_subs.c 17 Apr 2014 03:17:19 -0000 @@ -1489,7 +1489,6 @@ nfs_init0(void) nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000; if (nfs_ticks < 1) nfs_ticks = 1; - nfs_xid = cprng_fast32(); nfsdreq_init(); /* @@ -1994,6 +1993,10 @@ nfs_getxid(void) { u_int32_t newxid; + if (__predict_false(nfs_xid == 0)) { + nfs_xid = cprng_fast32(); + } + /* get next xid. skip 0 */ do { newxid = atomic_inc_32_nv(&nfs_xid); Index: sys/cprng.h =================================================================== RCS file: /cvsroot/src/sys/sys/cprng.h,v retrieving revision 1.9 diff -u -p -r1.9 cprng.h --- sys/cprng.h 17 Jan 2014 02:08:56 -0000 1.9 +++ sys/cprng.h 17 Apr 2014 03:17:19 -0000 @@ -41,42 +41,91 @@ #include <sys/rnd.h> /* XXX users bogusly transitively need this */ #include <crypto/nist_ctr_drbg/nist_ctr_drbg.h> +#include <crypto/hc128/hc128.h> +#include <sys/percpu.h> +#include <sys/intr.h> /* * NIST SP800-90 says 2^19 bytes per request for the CTR_DRBG. */ #define CPRNG_MAX_LEN 524288 +#define CPRNGF_MAXBYTES (512 * 1024 * 1024) +#define CPRNGF_HARDMAX (1 * 1024 * 1024 * 1024) +#define CPRNGF_RESEED_SECONDS 600 + +typedef struct { + hc128_state_t hc128; + int numbytes; + time_t nextreseed; +} cprng_fast_ctx_t; + /* - * We do not want an arc4random() prototype available to anyone. + * This is a macro so we can skip any conditional logic at runtime if + * the size provided is a multiple of the underlying stream cipher + * blocksize, e.g. sizeof(padded struct). */ -void _arc4randbytes(void *, size_t); -uint32_t _arc4random(void); +#define cprng_fast(p, len) ((0 == (len % sizeof(uint32_t))) && \ + (0 == ((uintptr_t)p % sizeof(uint32_t))) ? \ + _cprng_fast_exact(p, len) : \ + _cprng_fast_inexact(p, len)) + +size_t _cprng_fast_exact(void *, size_t); +size_t _cprng_fast_inexact(void *, size_t); -static inline size_t -cprng_fast(void *p, size_t len) +static inline void +_cprng_fast_checkrekey(cprng_fast_ctx_t *ctx) { - _arc4randbytes(p, len); - return len; + extern void *cprng_fast_rekey_softintr; + + if (__predict_false((ctx->numbytes > CPRNGF_MAXBYTES) || + (time_uptime > ctx->nextreseed))) { + /* Schedule a deferred reseed */ + softint_schedule(cprng_fast_rekey_softintr); + } } -static inline uint32_t -cprng_fast32(void) +static inline uint32_t cprng_fast32(void) { - return _arc4random(); + uint32_t ret; + extern percpu_t *percpu_cprng_fast_ctx; + cprng_fast_ctx_t *ctx = percpu_getref(percpu_cprng_fast_ctx); + int s; + + _cprng_fast_checkrekey(ctx); + + s = splhigh(); + hc128_extract(&ctx->hc128, (uint8_t *)&ret); + splx(s); + + ctx->numbytes += sizeof(uint32_t); + percpu_putref(percpu_cprng_fast_ctx); + return ret; } -static inline uint64_t -cprng_fast64(void) +static inline uint64_t cprng_fast64(void) { - uint64_t r; - _arc4randbytes(&r, sizeof(r)); - return r; + uint64_t ret; + extern percpu_t *percpu_cprng_fast_ctx; + cprng_fast_ctx_t *ctx = percpu_getref(percpu_cprng_fast_ctx); + int s; + + _cprng_fast_checkrekey(ctx); + + s = splhigh(); + hc128_extract(&ctx->hc128, (uint8_t *)&ret); + hc128_extract(&ctx->hc128, (uint8_t *)(((uint32_t *)&ret) + 1)); + splx(s); + + ctx->numbytes += sizeof(uint64_t); + percpu_putref(percpu_cprng_fast_ctx); + return ret; } typedef struct cprng_strong cprng_strong_t; void cprng_init(void); +void cprng_fast_init(void); #define CPRNG_INIT_ANY 0x00000001 #define CPRNG_REKEY_ANY 0x00000002