Module Name: src Committed By: riastradh Date: Tue Jul 28 20:15:07 UTC 2020
Modified Files: src/sys/crypto/cprng_fast: cprng_fast.c files.cprng_fast src/sys/kern: files.kern src/sys/rump/kern/lib/libcrypto: Makefile src/sys/rump/librump/rumpkern: Makefile.rumpkern Log Message: Rewrite cprng_fast in terms of new ChaCha API. To generate a diff of this commit: cvs rdiff -u -r1.15 -r1.16 src/sys/crypto/cprng_fast/cprng_fast.c cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/cprng_fast/files.cprng_fast cvs rdiff -u -r1.49 -r1.50 src/sys/kern/files.kern cvs rdiff -u -r1.12 -r1.13 src/sys/rump/kern/lib/libcrypto/Makefile cvs rdiff -u -r1.183 -r1.184 src/sys/rump/librump/rumpkern/Makefile.rumpkern Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/crypto/cprng_fast/cprng_fast.c diff -u src/sys/crypto/cprng_fast/cprng_fast.c:1.15 src/sys/crypto/cprng_fast/cprng_fast.c:1.16 --- src/sys/crypto/cprng_fast/cprng_fast.c:1.15 Thu Apr 30 03:29:45 2020 +++ src/sys/crypto/cprng_fast/cprng_fast.c Tue Jul 28 20:15:07 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: cprng_fast.c,v 1.15 2020/04/30 03:29:45 riastradh Exp $ */ +/* $NetBSD: cprng_fast.c,v 1.16 2020/07/28 20:15:07 riastradh Exp $ */ /*- * Copyright (c) 2014 The NetBSD Foundation, Inc. @@ -30,7 +30,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: cprng_fast.c,v 1.15 2020/04/30 03:29:45 riastradh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: cprng_fast.c,v 1.16 2020/07/28 20:15:07 riastradh Exp $"); #include <sys/types.h> #include <sys/param.h> @@ -42,170 +42,21 @@ __KERNEL_RCSID(0, "$NetBSD: cprng_fast.c #include <sys/intr.h> #include <sys/kmem.h> #include <sys/percpu.h> - -/* ChaCha core */ - -#define crypto_core_OUTPUTWORDS 16 -#define crypto_core_INPUTWORDS 4 -#define crypto_core_KEYWORDS 8 -#define crypto_core_CONSTWORDS 4 - -#define crypto_core_ROUNDS 8 - -static uint32_t -rotate(uint32_t u, unsigned c) -{ - - return (u << c) | (u >> (32 - c)); -} - -#define QUARTERROUND(a, b, c, d) do { \ - (a) += (b); (d) ^= (a); (d) = rotate((d), 16); \ - (c) += (d); (b) ^= (c); (b) = rotate((b), 12); \ - (a) += (b); (d) ^= (a); (d) = rotate((d), 8); \ - (c) += (d); (b) ^= (c); (b) = rotate((b), 7); \ -} while (0) - -static void -crypto_core(uint32_t *out, const uint32_t *in, const uint32_t *k, - const uint32_t *c) -{ - uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15; - int i; - - x0 = c[0]; - x1 = c[1]; - x2 = c[2]; - x3 = c[3]; - x4 = k[0]; - x5 = k[1]; - x6 = k[2]; - x7 = k[3]; - x8 = k[4]; - x9 = k[5]; - x10 = k[6]; - x11 = k[7]; - x12 = in[0]; - x13 = in[1]; - x14 = in[2]; - x15 = in[3]; - - for (i = crypto_core_ROUNDS; i > 0; i -= 2) { - QUARTERROUND( x0, x4, x8,x12); - QUARTERROUND( x1, x5, x9,x13); - QUARTERROUND( x2, x6,x10,x14); - QUARTERROUND( x3, x7,x11,x15); - QUARTERROUND( x0, x5,x10,x15); - QUARTERROUND( x1, x6,x11,x12); - QUARTERROUND( x2, x7, x8,x13); - QUARTERROUND( x3, x4, x9,x14); - } - - out[0] = x0 + c[0]; - out[1] = x1 + c[1]; - out[2] = x2 + c[2]; - out[3] = x3 + c[3]; - out[4] = x4 + k[0]; - out[5] = x5 + k[1]; - out[6] = x6 + k[2]; - out[7] = x7 + k[3]; - out[8] = x8 + k[4]; - out[9] = x9 + k[5]; - out[10] = x10 + k[6]; - out[11] = x11 + k[7]; - out[12] = x12 + in[0]; - out[13] = x13 + in[1]; - out[14] = x14 + in[2]; - out[15] = x15 + in[3]; -} - -/* `expand 32-byte k' */ -static const uint32_t crypto_core_constant32[4] = { - 0x61707865U, 0x3320646eU, 0x79622d32U, 0x6b206574U, -}; - -/* - * Test vector for ChaCha20 from - * <http://tools.ietf.org/html/draft-strombergson-chacha-test-vectors-00>, - * test vectors for ChaCha12 and ChaCha8 generated by the same - * crypto_core code with crypto_core_ROUNDS varied. - */ - -#define check(E) do \ -{ \ - if (!(E)) \ - panic("crypto self-test failed: %s", #E); \ -} while (0) - -static void -crypto_core_selftest(void) -{ - const uint32_t zero32[8] = {0}; - const uint8_t sigma[] = "expand 32-byte k"; - uint32_t block[16]; - unsigned i; - -#if crypto_core_ROUNDS == 8 - static const uint8_t out[64] = { - 0x3e,0x00,0xef,0x2f,0x89,0x5f,0x40,0xd6, - 0x7f,0x5b,0xb8,0xe8,0x1f,0x09,0xa5,0xa1, - 0x2c,0x84,0x0e,0xc3,0xce,0x9a,0x7f,0x3b, - 0x18,0x1b,0xe1,0x88,0xef,0x71,0x1a,0x1e, - 0x98,0x4c,0xe1,0x72,0xb9,0x21,0x6f,0x41, - 0x9f,0x44,0x53,0x67,0x45,0x6d,0x56,0x19, - 0x31,0x4a,0x42,0xa3,0xda,0x86,0xb0,0x01, - 0x38,0x7b,0xfd,0xb8,0x0e,0x0c,0xfe,0x42, - }; -#elif crypto_core_ROUNDS == 12 - static const uint8_t out[64] = { - 0x9b,0xf4,0x9a,0x6a,0x07,0x55,0xf9,0x53, - 0x81,0x1f,0xce,0x12,0x5f,0x26,0x83,0xd5, - 0x04,0x29,0xc3,0xbb,0x49,0xe0,0x74,0x14, - 0x7e,0x00,0x89,0xa5,0x2e,0xae,0x15,0x5f, - 0x05,0x64,0xf8,0x79,0xd2,0x7a,0xe3,0xc0, - 0x2c,0xe8,0x28,0x34,0xac,0xfa,0x8c,0x79, - 0x3a,0x62,0x9f,0x2c,0xa0,0xde,0x69,0x19, - 0x61,0x0b,0xe8,0x2f,0x41,0x13,0x26,0xbe, - }; -#elif crypto_core_ROUNDS == 20 - static const uint8_t out[64] = { - 0x76,0xb8,0xe0,0xad,0xa0,0xf1,0x3d,0x90, - 0x40,0x5d,0x6a,0xe5,0x53,0x86,0xbd,0x28, - 0xbd,0xd2,0x19,0xb8,0xa0,0x8d,0xed,0x1a, - 0xa8,0x36,0xef,0xcc,0x8b,0x77,0x0d,0xc7, - 0xda,0x41,0x59,0x7c,0x51,0x57,0x48,0x8d, - 0x77,0x24,0xe0,0x3f,0xb8,0xd8,0x4a,0x37, - 0x6a,0x43,0xb8,0xf4,0x15,0x18,0xa1,0x1c, - 0xc3,0x87,0xb6,0x69,0xb2,0xee,0x65,0x86, - }; -#else -#error crypto_core_ROUNDS must be 8, 12, or 20. -#endif - check(crypto_core_constant32[0] == le32dec(&sigma[0])); - check(crypto_core_constant32[1] == le32dec(&sigma[4])); - check(crypto_core_constant32[2] == le32dec(&sigma[8])); - check(crypto_core_constant32[3] == le32dec(&sigma[12])); - - crypto_core(block, zero32, zero32, crypto_core_constant32); - for (i = 0; i < 16; i++) - check(block[i] == le32dec(&out[i*4])); -} - -#undef check +#include <crypto/chacha/chacha.h> -#define CPRNG_FAST_SEED_BYTES (crypto_core_KEYWORDS * sizeof(uint32_t)) +#define CPRNG_FAST_SEED_BYTES CHACHA_STREAM_KEYBYTES struct cprng_fast { - uint32_t buffer[crypto_core_OUTPUTWORDS]; - uint32_t key[crypto_core_KEYWORDS]; - uint32_t nonce[crypto_core_INPUTWORDS]; + /* 128-bit vector unit generates 256 bytes at once */ + uint8_t buf[256]; + uint8_t key[CPRNG_FAST_SEED_BYTES]; + uint8_t nonce[CHACHA_STREAM_NONCEBYTES]; + unsigned i; struct evcnt *reseed_evcnt; unsigned epoch; }; -__CTASSERT(sizeof ((struct cprng_fast *)0)->key == CPRNG_FAST_SEED_BYTES); - static void cprng_fast_init_cpu(void *, void *, struct cpu_info *); static void cprng_fast_schedule_reseed(struct cprng_fast *); static void cprng_fast_intr(void *); @@ -223,7 +74,6 @@ void cprng_fast_init(void) { - crypto_core_selftest(); cprng_fast_percpu = percpu_create(sizeof(struct cprng_fast), cprng_fast_init_cpu, NULL, NULL); cprng_fast_softint = softint_establish(SOFTINT_SERIAL|SOFTINT_MPSAFE, @@ -247,7 +97,7 @@ cprng_fast_init_cpu(void *p, void *arg _ ci->ci_cpuname, "cprng_fast reseed"); } -static inline int +static int cprng_fast_get(struct cprng_fast **cprngp) { struct cprng_fast *cprng; @@ -262,7 +112,7 @@ cprng_fast_get(struct cprng_fast **cprng return s; } -static inline void +static void cprng_fast_put(struct cprng_fast *cprng, int s) { @@ -302,149 +152,80 @@ cprng_fast_intr(void *cookie __unused) /* CPRNG algorithm */ -/* - * The state consists of a key, the current nonce, and a 64-byte buffer - * of output. Since we fill the buffer only when we need output, and - * eat a 32-bit word at a time, one 32-bit word of the buffer would be - * wasted. Instead, we repurpose it to count the number of entries in - * the buffer remaining, counting from high to low in order to allow - * comparison to zero to detect when we need to refill it. - */ -#define CPRNG_FAST_BUFIDX (crypto_core_OUTPUTWORDS - 1) - static void cprng_fast_seed(struct cprng_fast *cprng, const void *seed) { - (void)memset(cprng->buffer, 0, sizeof cprng->buffer); + (void)memset(cprng->buf, 0, sizeof cprng->buf); (void)memcpy(cprng->key, seed, sizeof cprng->key); (void)memset(cprng->nonce, 0, sizeof cprng->nonce); + cprng->i = sizeof cprng->buf; } -static inline uint32_t -cprng_fast_word(struct cprng_fast *cprng) +static void +cprng_fast_buf(struct cprng_fast *cprng, void *buf, unsigned len) { - uint32_t v; + uint8_t *p = buf; + unsigned n = len, n0; - if (__predict_true(0 < cprng->buffer[CPRNG_FAST_BUFIDX])) { - v = cprng->buffer[--cprng->buffer[CPRNG_FAST_BUFIDX]]; - } else { - /* If we don't have enough words, refill the buffer. */ - crypto_core(cprng->buffer, cprng->nonce, cprng->key, - crypto_core_constant32); - if (__predict_false(++cprng->nonce[0] == 0)) { - cprng->nonce[1]++; - cprng_fast_schedule_reseed(cprng); - } - v = cprng->buffer[CPRNG_FAST_BUFIDX]; - cprng->buffer[CPRNG_FAST_BUFIDX] = CPRNG_FAST_BUFIDX; - } + KASSERT(cprng->i <= sizeof(cprng->buf)); + KASSERT(len <= sizeof(cprng->buf)); - return v; + n0 = MIN(n, sizeof(cprng->buf) - cprng->i); + memcpy(p, &cprng->buf[cprng->i], n0); + if ((n -= n0) == 0) { + cprng->i += n0; + KASSERT(cprng->i <= sizeof(cprng->buf)); + return; + } + p += n0; + le64enc(cprng->nonce, 1 + le64dec(cprng->nonce)); + chacha_stream(cprng->buf, sizeof(cprng->buf), 0, cprng->nonce, + cprng->key, 8); + memcpy(p, cprng->buf, n); + cprng->i = n; } + +/* Public API */ -static inline void -cprng_fast_buf(struct cprng_fast *cprng, void *buf, unsigned n) +static void +cprng_fast_buf_short(void *buf, size_t len) { - uint8_t *p = buf; - uint32_t v; - unsigned w, r; + struct cprng_fast *cprng; + int s; - w = n / sizeof(uint32_t); - while (w--) { - v = cprng_fast_word(cprng); - (void)memcpy(p, &v, 4); - p += 4; - } + KASSERT(len <= sizeof(cprng->buf)); - r = n % sizeof(uint32_t); - if (r) { - v = cprng_fast_word(cprng); - while (r--) { - *p++ = (v & 0xff); - v >>= 8; - } - } + s = cprng_fast_get(&cprng); + cprng_fast_buf(cprng, buf, len); + cprng_fast_put(cprng, s); } - -/* - * crypto_onetimestream: Expand a short unpredictable one-time seed - * into a long unpredictable output. - */ + static void -crypto_onetimestream(const uint32_t seed[crypto_core_KEYWORDS], void *buf, - size_t n) +cprng_fast_buf_long(void *buf, size_t len) { - uint32_t block[crypto_core_OUTPUTWORDS]; - uint32_t nonce[crypto_core_INPUTWORDS] = {0}; - uint8_t *p8; - uint32_t *p32; - size_t ni, nb, nf; + uint8_t seed[CHACHA_STREAM_KEYBYTES]; + uint8_t nonce[CHACHA_STREAM_NONCEBYTES] = {0}; - /* - * Guarantee we can generate up to n bytes. We have - * 2^(32*INPUTWORDS) possible inputs yielding output of - * 4*OUTPUTWORDS*2^(32*INPUTWORDS) bytes. It suffices to - * require that sizeof n > (1/CHAR_BIT) log_2 n be less than - * (1/CHAR_BIT) log_2 of the total output stream length. We - * have - * - * log_2 (4 o 2^(32 i)) = log_2 (4 o) + log_2 2^(32 i) - * = 2 + log_2 o + 32 i. - */ - __CTASSERT(CHAR_BIT*sizeof n <= - (2 + ilog2(crypto_core_OUTPUTWORDS) + 32*crypto_core_INPUTWORDS)); + CTASSERT(sizeof(seed) <= sizeof(((struct cprng_fast *)0)->buf)); - p8 = buf; - p32 = (uint32_t *)roundup2((uintptr_t)p8, sizeof(uint32_t)); - ni = (uint8_t *)p32 - p8; - if (n < ni) - ni = n; - nb = (n - ni) / sizeof block; - nf = (n - ni) % sizeof block; - - KASSERT(((uintptr_t)p32 & 3) == 0); - KASSERT(ni <= n); - KASSERT(nb <= (n / sizeof block)); - KASSERT(nf <= n); - KASSERT(n == (ni + (nb * sizeof block) + nf)); - KASSERT(ni < sizeof(uint32_t)); - KASSERT(nf < sizeof block); - - if (ni) { - crypto_core(block, nonce, seed, crypto_core_constant32); - nonce[0]++; - (void)memcpy(p8, block, ni); - } - while (nb--) { - crypto_core(p32, nonce, seed, crypto_core_constant32); - if (++nonce[0] == 0) - nonce[1]++; - p32 += crypto_core_OUTPUTWORDS; - } - if (nf) { - crypto_core(block, nonce, seed, crypto_core_constant32); - if (++nonce[0] == 0) - nonce[1]++; - (void)memcpy(p32, block, nf); - } +#if SIZE_MAX >= 0x3fffffffff + /* >=256 GB is not reasonable */ + KASSERT(len <= 0x3fffffffff); +#endif - if (ni | nf) - (void)explicit_memset(block, 0, sizeof block); + cprng_fast_buf_short(seed, sizeof seed); + chacha_stream(buf, len, 0, nonce, seed, 8); + + (void)explicit_memset(seed, 0, sizeof seed); } - -/* Public API */ uint32_t cprng_fast32(void) { - struct cprng_fast *cprng; uint32_t v; - int s; - s = cprng_fast_get(&cprng); - v = cprng_fast_word(cprng); - cprng_fast_put(cprng, s); + cprng_fast_buf_short(&v, sizeof v); return v; } @@ -452,43 +233,11 @@ cprng_fast32(void) uint64_t cprng_fast64(void) { - struct cprng_fast *cprng; - uint32_t hi, lo; - int s; - - s = cprng_fast_get(&cprng); - hi = cprng_fast_word(cprng); - lo = cprng_fast_word(cprng); - cprng_fast_put(cprng, s); - - return ((uint64_t)hi << 32) | lo; -} - -static void -cprng_fast_buf_short(void *buf, size_t len) -{ - struct cprng_fast *cprng; - int s; - - s = cprng_fast_get(&cprng); - cprng_fast_buf(cprng, buf, len); - cprng_fast_put(cprng, s); -} - -static __noinline void -cprng_fast_buf_long(void *buf, size_t len) -{ - uint32_t seed[crypto_core_KEYWORDS]; - struct cprng_fast *cprng; - int s; - - s = cprng_fast_get(&cprng); - cprng_fast_buf(cprng, seed, sizeof seed); - cprng_fast_put(cprng, s); + uint64_t v; - crypto_onetimestream(seed, buf, len); + cprng_fast_buf_short(&v, sizeof v); - (void)explicit_memset(seed, 0, sizeof seed); + return v; } size_t @@ -498,12 +247,12 @@ cprng_fast(void *buf, size_t len) /* * We don't want to hog the CPU, so we use the short version, * to generate output without preemption, only if we can do it - * with at most one crypto_core. + * with at most one ChaCha call. */ - if (len <= (sizeof(uint32_t) * crypto_core_OUTPUTWORDS)) + if (len <= sizeof(((struct cprng_fast *)0)->buf)) cprng_fast_buf_short(buf, len); else cprng_fast_buf_long(buf, len); - return len; + return len; /* hysterical raisins */ } Index: src/sys/crypto/cprng_fast/files.cprng_fast diff -u src/sys/crypto/cprng_fast/files.cprng_fast:1.2 src/sys/crypto/cprng_fast/files.cprng_fast:1.3 --- src/sys/crypto/cprng_fast/files.cprng_fast:1.2 Sun Aug 10 16:44:35 2014 +++ src/sys/crypto/cprng_fast/files.cprng_fast Tue Jul 28 20:15:07 2020 @@ -1,3 +1,5 @@ -# $NetBSD: files.cprng_fast,v 1.2 2014/08/10 16:44:35 tls Exp $ +# $NetBSD: files.cprng_fast,v 1.3 2020/07/28 20:15:07 riastradh Exp $ -file crypto/cprng_fast/cprng_fast.c +define cprng_fast: chacha + +file crypto/cprng_fast/cprng_fast.c cprng_fast Index: src/sys/kern/files.kern diff -u src/sys/kern/files.kern:1.49 src/sys/kern/files.kern:1.50 --- src/sys/kern/files.kern:1.49 Sun Jun 7 09:45:19 2020 +++ src/sys/kern/files.kern Tue Jul 28 20:15:07 2020 @@ -1,9 +1,9 @@ -# $NetBSD: files.kern,v 1.49 2020/06/07 09:45:19 maxv Exp $ +# $NetBSD: files.kern,v 1.50 2020/07/28 20:15:07 riastradh Exp $ # # kernel sources # -define kern: machdep, uvm +define kern: cprng_fast, machdep, uvm defflag opt_kern.h KERN defflag opt_script.h SETUIDSCRIPTS FDSCRIPTS defflag KASLR Index: src/sys/rump/kern/lib/libcrypto/Makefile diff -u src/sys/rump/kern/lib/libcrypto/Makefile:1.12 src/sys/rump/kern/lib/libcrypto/Makefile:1.13 --- src/sys/rump/kern/lib/libcrypto/Makefile:1.12 Sun Jul 26 04:25:49 2020 +++ src/sys/rump/kern/lib/libcrypto/Makefile Tue Jul 28 20:15:07 2020 @@ -1,4 +1,4 @@ -# $NetBSD: Makefile,v 1.12 2020/07/26 04:25:49 riastradh Exp $ +# $NetBSD: Makefile,v 1.13 2020/07/28 20:15:07 riastradh Exp $ # .PATH: ${.CURDIR}/../../../../crypto/adiantum \ @@ -6,7 +6,6 @@ ${.CURDIR}/../../../../crypto/blowfish \ ${.CURDIR}/../../../../crypto/camellia \ ${.CURDIR}/../../../../crypto/cast128 \ - ${.CURDIR}/../../../../crypto/chacha \ ${.CURDIR}/../../../../crypto/des \ ${.CURDIR}/../../../../crypto/skipjack @@ -36,11 +35,6 @@ SRCS+= camellia.c camellia-api.c # cast128 SRCS+= cast128.c -# ChaCha -SRCS+= chacha_impl.c -SRCS+= chacha_ref.c -SRCS+= chacha_selftest.c - # DES SRCS+= des_ecb.c des_setkey.c des_enc.c des_cbc.c des_module.c Index: src/sys/rump/librump/rumpkern/Makefile.rumpkern diff -u src/sys/rump/librump/rumpkern/Makefile.rumpkern:1.183 src/sys/rump/librump/rumpkern/Makefile.rumpkern:1.184 --- src/sys/rump/librump/rumpkern/Makefile.rumpkern:1.183 Thu Apr 30 03:28:19 2020 +++ src/sys/rump/librump/rumpkern/Makefile.rumpkern Tue Jul 28 20:15:07 2020 @@ -1,4 +1,4 @@ -# $NetBSD: Makefile.rumpkern,v 1.183 2020/04/30 03:28:19 riastradh Exp $ +# $NetBSD: Makefile.rumpkern,v 1.184 2020/07/28 20:15:07 riastradh Exp $ # IOCONFDIR:= ${.PARSEDIR} @@ -17,6 +17,7 @@ MAN= rump.3 rump_lwproc.3 ${RUMPTOP}/../dev \ ${RUMPTOP}/../crypto/nist_hash_drbg \ ${RUMPTOP}/../crypto/cprng_fast \ + ${RUMPTOP}/../crypto/chacha \ ${RUMPTOP}/../secmodel \ ${RUMPTOP}/../secmodel/suser \ ${RUMPTOP}/../compat/common @@ -156,6 +157,9 @@ SRCS+= clock_subr.c # are available from the rumpkern_crypto component SRCS+= nist_hash_drbg.c SRCS+= cprng_fast.c +SRCS+= chacha_impl.c +SRCS+= chacha_ref.c +SRCS+= chacha_selftest.c .include "${RUMPTOP}/Makefile.rump" .include <bsd.own.mk>