Re: [PATCH 1/4] random: replace non-blocking pool with a Chacha20-based CRNG
On Wed, May 04, 2016 at 09:10:07PM -0400, Theodore Ts'o wrote: > On Wed, May 04, 2016 at 10:28:24PM +0200, Stephan Mueller wrote: > > > +out: > > > + spin_unlock_irqrestore(_crng.lock, flags); > > > + return ret; > > > > Where did you add the memzero_explict of tmp? > > Oops, sorry, somehow that change got lost in the patch updates. Fixed now. Since that was the only change, instead of sending out the patch series again, I've just updated it at: git://git.kernel.org/pub/scm/linux/kernel/git/tytso/random.git dev - Ted -- To unsubscribe from this list: send the line "unsubscribe linux-crypto" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/4] random: replace non-blocking pool with a Chacha20-based CRNG
On Wed, May 04, 2016 at 10:28:24PM +0200, Stephan Mueller wrote: > > +out: > > + spin_unlock_irqrestore(_crng.lock, flags); > > + return ret; > > Where did you add the memzero_explict of tmp? Oops, sorry, somehow that change got lost in the patch updates. Fixed now. - Ted -- To unsubscribe from this list: send the line "unsubscribe linux-crypto" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/4] random: replace non-blocking pool with a Chacha20-based CRNG
Am Mittwoch, 4. Mai 2016, 15:25:48 schrieb Theodore Ts'o: Hi Theodore, > The CRNG is faster, and we don't pretend to track entropy usage in the > CRNG any more. > > Signed-off-by: Theodore Ts'o> --- > crypto/chacha20_generic.c | 61 -- > drivers/char/random.c | 283 > +++--- include/crypto/chacha20.h | > 1 + > lib/Makefile | 2 +- > lib/chacha20.c| 79 + > 5 files changed, 295 insertions(+), 131 deletions(-) > create mode 100644 lib/chacha20.c > > diff --git a/crypto/chacha20_generic.c b/crypto/chacha20_generic.c > index da9c899..1cab831 100644 > --- a/crypto/chacha20_generic.c > +++ b/crypto/chacha20_generic.c > @@ -15,72 +15,11 @@ > #include > #include > > -static inline u32 rotl32(u32 v, u8 n) > -{ > - return (v << n) | (v >> (sizeof(v) * 8 - n)); > -} > - > static inline u32 le32_to_cpuvp(const void *p) > { > return le32_to_cpup(p); > } > > -static void chacha20_block(u32 *state, void *stream) > -{ > - u32 x[16], *out = stream; > - int i; > - > - for (i = 0; i < ARRAY_SIZE(x); i++) > - x[i] = state[i]; > - > - for (i = 0; i < 20; i += 2) { > - x[0] += x[4];x[12] = rotl32(x[12] ^ x[0], 16); > - x[1] += x[5];x[13] = rotl32(x[13] ^ x[1], 16); > - x[2] += x[6];x[14] = rotl32(x[14] ^ x[2], 16); > - x[3] += x[7];x[15] = rotl32(x[15] ^ x[3], 16); > - > - x[8] += x[12]; x[4] = rotl32(x[4] ^ x[8], 12); > - x[9] += x[13]; x[5] = rotl32(x[5] ^ x[9], 12); > - x[10] += x[14]; x[6] = rotl32(x[6] ^ x[10], 12); > - x[11] += x[15]; x[7] = rotl32(x[7] ^ x[11], 12); > - > - x[0] += x[4];x[12] = rotl32(x[12] ^ x[0], 8); > - x[1] += x[5];x[13] = rotl32(x[13] ^ x[1], 8); > - x[2] += x[6];x[14] = rotl32(x[14] ^ x[2], 8); > - x[3] += x[7];x[15] = rotl32(x[15] ^ x[3], 8); > - > - x[8] += x[12]; x[4] = rotl32(x[4] ^ x[8], 7); > - x[9] += x[13]; x[5] = rotl32(x[5] ^ x[9], 7); > - x[10] += x[14]; x[6] = rotl32(x[6] ^ x[10], 7); > - x[11] += x[15]; x[7] = rotl32(x[7] ^ x[11], 7); > - > - x[0] += x[5];x[15] = rotl32(x[15] ^ x[0], 16); > - x[1] += x[6];x[12] = rotl32(x[12] ^ x[1], 16); > - x[2] += x[7];x[13] = rotl32(x[13] ^ x[2], 16); > - x[3] += x[4];x[14] = rotl32(x[14] ^ x[3], 16); > - > - x[10] += x[15]; x[5] = rotl32(x[5] ^ x[10], 12); > - x[11] += x[12]; x[6] = rotl32(x[6] ^ x[11], 12); > - x[8] += x[13]; x[7] = rotl32(x[7] ^ x[8], 12); > - x[9] += x[14]; x[4] = rotl32(x[4] ^ x[9], 12); > - > - x[0] += x[5];x[15] = rotl32(x[15] ^ x[0], 8); > - x[1] += x[6];x[12] = rotl32(x[12] ^ x[1], 8); > - x[2] += x[7];x[13] = rotl32(x[13] ^ x[2], 8); > - x[3] += x[4];x[14] = rotl32(x[14] ^ x[3], 8); > - > - x[10] += x[15]; x[5] = rotl32(x[5] ^ x[10], 7); > - x[11] += x[12]; x[6] = rotl32(x[6] ^ x[11], 7); > - x[8] += x[13]; x[7] = rotl32(x[7] ^ x[8], 7); > - x[9] += x[14]; x[4] = rotl32(x[4] ^ x[9], 7); > - } > - > - for (i = 0; i < ARRAY_SIZE(x); i++) > - out[i] = cpu_to_le32(x[i] + state[i]); > - > - state[12]++; > -} > - > static void chacha20_docrypt(u32 *state, u8 *dst, const u8 *src, >unsigned int bytes) > { > diff --git a/drivers/char/random.c b/drivers/char/random.c > index b583e53..91d5c2a 100644 > --- a/drivers/char/random.c > +++ b/drivers/char/random.c > @@ -260,6 +260,7 @@ > #include > #include > #include > +#include > > #include > #include > @@ -412,6 +413,18 @@ static struct fasync_struct *fasync; > static DEFINE_SPINLOCK(random_ready_list_lock); > static LIST_HEAD(random_ready_list); > > +/* > + * crng_init = 0 --> Uninitialized > + * 2 --> Initialized > + * 3 --> Initialized from input_pool > + * > + * crng_init is protected by primary_crng->lock, and only increases > + * its value (from 0->1->2->3). > + */ > +static int crng_init = 0; > +#define crng_ready() (likely(crng_init >= 2)) > +static void process_random_ready_list(void); > + > /** > * > * OS independent entropy store. Here are the functions which handle > @@ -441,10 +454,13 @@ struct entropy_store { > __u8 last_data[EXTRACT_SIZE]; > }; > > +static ssize_t extract_entropy(struct entropy_store *r, void *buf, > +size_t nbytes, int min, int rsvd); > + > +static int crng_reseed(struct entropy_store *r); > static void push_to_pool(struct
[PATCH 1/4] random: replace non-blocking pool with a Chacha20-based CRNG
The CRNG is faster, and we don't pretend to track entropy usage in the CRNG any more. Signed-off-by: Theodore Ts'o--- crypto/chacha20_generic.c | 61 -- drivers/char/random.c | 283 +++--- include/crypto/chacha20.h | 1 + lib/Makefile | 2 +- lib/chacha20.c| 79 + 5 files changed, 295 insertions(+), 131 deletions(-) create mode 100644 lib/chacha20.c diff --git a/crypto/chacha20_generic.c b/crypto/chacha20_generic.c index da9c899..1cab831 100644 --- a/crypto/chacha20_generic.c +++ b/crypto/chacha20_generic.c @@ -15,72 +15,11 @@ #include #include -static inline u32 rotl32(u32 v, u8 n) -{ - return (v << n) | (v >> (sizeof(v) * 8 - n)); -} - static inline u32 le32_to_cpuvp(const void *p) { return le32_to_cpup(p); } -static void chacha20_block(u32 *state, void *stream) -{ - u32 x[16], *out = stream; - int i; - - for (i = 0; i < ARRAY_SIZE(x); i++) - x[i] = state[i]; - - for (i = 0; i < 20; i += 2) { - x[0] += x[4];x[12] = rotl32(x[12] ^ x[0], 16); - x[1] += x[5];x[13] = rotl32(x[13] ^ x[1], 16); - x[2] += x[6];x[14] = rotl32(x[14] ^ x[2], 16); - x[3] += x[7];x[15] = rotl32(x[15] ^ x[3], 16); - - x[8] += x[12]; x[4] = rotl32(x[4] ^ x[8], 12); - x[9] += x[13]; x[5] = rotl32(x[5] ^ x[9], 12); - x[10] += x[14]; x[6] = rotl32(x[6] ^ x[10], 12); - x[11] += x[15]; x[7] = rotl32(x[7] ^ x[11], 12); - - x[0] += x[4];x[12] = rotl32(x[12] ^ x[0], 8); - x[1] += x[5];x[13] = rotl32(x[13] ^ x[1], 8); - x[2] += x[6];x[14] = rotl32(x[14] ^ x[2], 8); - x[3] += x[7];x[15] = rotl32(x[15] ^ x[3], 8); - - x[8] += x[12]; x[4] = rotl32(x[4] ^ x[8], 7); - x[9] += x[13]; x[5] = rotl32(x[5] ^ x[9], 7); - x[10] += x[14]; x[6] = rotl32(x[6] ^ x[10], 7); - x[11] += x[15]; x[7] = rotl32(x[7] ^ x[11], 7); - - x[0] += x[5];x[15] = rotl32(x[15] ^ x[0], 16); - x[1] += x[6];x[12] = rotl32(x[12] ^ x[1], 16); - x[2] += x[7];x[13] = rotl32(x[13] ^ x[2], 16); - x[3] += x[4];x[14] = rotl32(x[14] ^ x[3], 16); - - x[10] += x[15]; x[5] = rotl32(x[5] ^ x[10], 12); - x[11] += x[12]; x[6] = rotl32(x[6] ^ x[11], 12); - x[8] += x[13]; x[7] = rotl32(x[7] ^ x[8], 12); - x[9] += x[14]; x[4] = rotl32(x[4] ^ x[9], 12); - - x[0] += x[5];x[15] = rotl32(x[15] ^ x[0], 8); - x[1] += x[6];x[12] = rotl32(x[12] ^ x[1], 8); - x[2] += x[7];x[13] = rotl32(x[13] ^ x[2], 8); - x[3] += x[4];x[14] = rotl32(x[14] ^ x[3], 8); - - x[10] += x[15]; x[5] = rotl32(x[5] ^ x[10], 7); - x[11] += x[12]; x[6] = rotl32(x[6] ^ x[11], 7); - x[8] += x[13]; x[7] = rotl32(x[7] ^ x[8], 7); - x[9] += x[14]; x[4] = rotl32(x[4] ^ x[9], 7); - } - - for (i = 0; i < ARRAY_SIZE(x); i++) - out[i] = cpu_to_le32(x[i] + state[i]); - - state[12]++; -} - static void chacha20_docrypt(u32 *state, u8 *dst, const u8 *src, unsigned int bytes) { diff --git a/drivers/char/random.c b/drivers/char/random.c index b583e53..91d5c2a 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -260,6 +260,7 @@ #include #include #include +#include #include #include @@ -412,6 +413,18 @@ static struct fasync_struct *fasync; static DEFINE_SPINLOCK(random_ready_list_lock); static LIST_HEAD(random_ready_list); +/* + * crng_init = 0 --> Uninitialized + * 2 --> Initialized + * 3 --> Initialized from input_pool + * + * crng_init is protected by primary_crng->lock, and only increases + * its value (from 0->1->2->3). + */ +static int crng_init = 0; +#define crng_ready() (likely(crng_init >= 2)) +static void process_random_ready_list(void); + /** * * OS independent entropy store. Here are the functions which handle @@ -441,10 +454,13 @@ struct entropy_store { __u8 last_data[EXTRACT_SIZE]; }; +static ssize_t extract_entropy(struct entropy_store *r, void *buf, + size_t nbytes, int min, int rsvd); + +static int crng_reseed(struct entropy_store *r); static void push_to_pool(struct work_struct *work); static __u32 input_pool_data[INPUT_POOL_WORDS]; static __u32 blocking_pool_data[OUTPUT_POOL_WORDS]; -static __u32 nonblocking_pool_data[OUTPUT_POOL_WORDS]; static struct entropy_store input_pool = {