Re: [PATCH 1/4] random: replace non-blocking pool with a Chacha20-based CRNG

2016-05-05 Thread Theodore Ts'o
On Wed, May 04, 2016 at 09:10:07PM -0400, Theodore Ts'o wrote:
> On Wed, May 04, 2016 at 10:28:24PM +0200, Stephan Mueller wrote:
> > > +out:
> > > + spin_unlock_irqrestore(_crng.lock, flags);
> > > + return ret;
> > 
> > Where did you add the memzero_explict of tmp?
> 
> Oops, sorry, somehow that change got lost in the patch updates.  Fixed now.

Since that was the only change, instead of sending out the patch
series again, I've just updated it at:

git://git.kernel.org/pub/scm/linux/kernel/git/tytso/random.git dev


- Ted
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/4] random: replace non-blocking pool with a Chacha20-based CRNG

2016-05-04 Thread Theodore Ts'o
On Wed, May 04, 2016 at 10:28:24PM +0200, Stephan Mueller wrote:
> > +out:
> > +   spin_unlock_irqrestore(_crng.lock, flags);
> > +   return ret;
> 
> Where did you add the memzero_explict of tmp?

Oops, sorry, somehow that change got lost in the patch updates.  Fixed now.

 - Ted
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/4] random: replace non-blocking pool with a Chacha20-based CRNG

2016-05-04 Thread Stephan Mueller
Am Mittwoch, 4. Mai 2016, 15:25:48 schrieb Theodore Ts'o:

Hi Theodore,

> The CRNG is faster, and we don't pretend to track entropy usage in the
> CRNG any more.
> 
> Signed-off-by: Theodore Ts'o 
> ---
>  crypto/chacha20_generic.c |  61 --
>  drivers/char/random.c | 283
> +++--- include/crypto/chacha20.h | 
>  1 +
>  lib/Makefile  |   2 +-
>  lib/chacha20.c|  79 +
>  5 files changed, 295 insertions(+), 131 deletions(-)
>  create mode 100644 lib/chacha20.c
> 
> diff --git a/crypto/chacha20_generic.c b/crypto/chacha20_generic.c
> index da9c899..1cab831 100644
> --- a/crypto/chacha20_generic.c
> +++ b/crypto/chacha20_generic.c
> @@ -15,72 +15,11 @@
>  #include 
>  #include 
> 
> -static inline u32 rotl32(u32 v, u8 n)
> -{
> - return (v << n) | (v >> (sizeof(v) * 8 - n));
> -}
> -
>  static inline u32 le32_to_cpuvp(const void *p)
>  {
>   return le32_to_cpup(p);
>  }
> 
> -static void chacha20_block(u32 *state, void *stream)
> -{
> - u32 x[16], *out = stream;
> - int i;
> -
> - for (i = 0; i < ARRAY_SIZE(x); i++)
> - x[i] = state[i];
> -
> - for (i = 0; i < 20; i += 2) {
> - x[0]  += x[4];x[12] = rotl32(x[12] ^ x[0],  16);
> - x[1]  += x[5];x[13] = rotl32(x[13] ^ x[1],  16);
> - x[2]  += x[6];x[14] = rotl32(x[14] ^ x[2],  16);
> - x[3]  += x[7];x[15] = rotl32(x[15] ^ x[3],  16);
> -
> - x[8]  += x[12];   x[4]  = rotl32(x[4]  ^ x[8],  12);
> - x[9]  += x[13];   x[5]  = rotl32(x[5]  ^ x[9],  12);
> - x[10] += x[14];   x[6]  = rotl32(x[6]  ^ x[10], 12);
> - x[11] += x[15];   x[7]  = rotl32(x[7]  ^ x[11], 12);
> -
> - x[0]  += x[4];x[12] = rotl32(x[12] ^ x[0],   8);
> - x[1]  += x[5];x[13] = rotl32(x[13] ^ x[1],   8);
> - x[2]  += x[6];x[14] = rotl32(x[14] ^ x[2],   8);
> - x[3]  += x[7];x[15] = rotl32(x[15] ^ x[3],   8);
> -
> - x[8]  += x[12];   x[4]  = rotl32(x[4]  ^ x[8],   7);
> - x[9]  += x[13];   x[5]  = rotl32(x[5]  ^ x[9],   7);
> - x[10] += x[14];   x[6]  = rotl32(x[6]  ^ x[10],  7);
> - x[11] += x[15];   x[7]  = rotl32(x[7]  ^ x[11],  7);
> -
> - x[0]  += x[5];x[15] = rotl32(x[15] ^ x[0],  16);
> - x[1]  += x[6];x[12] = rotl32(x[12] ^ x[1],  16);
> - x[2]  += x[7];x[13] = rotl32(x[13] ^ x[2],  16);
> - x[3]  += x[4];x[14] = rotl32(x[14] ^ x[3],  16);
> -
> - x[10] += x[15];   x[5]  = rotl32(x[5]  ^ x[10], 12);
> - x[11] += x[12];   x[6]  = rotl32(x[6]  ^ x[11], 12);
> - x[8]  += x[13];   x[7]  = rotl32(x[7]  ^ x[8],  12);
> - x[9]  += x[14];   x[4]  = rotl32(x[4]  ^ x[9],  12);
> -
> - x[0]  += x[5];x[15] = rotl32(x[15] ^ x[0],   8);
> - x[1]  += x[6];x[12] = rotl32(x[12] ^ x[1],   8);
> - x[2]  += x[7];x[13] = rotl32(x[13] ^ x[2],   8);
> - x[3]  += x[4];x[14] = rotl32(x[14] ^ x[3],   8);
> -
> - x[10] += x[15];   x[5]  = rotl32(x[5]  ^ x[10],  7);
> - x[11] += x[12];   x[6]  = rotl32(x[6]  ^ x[11],  7);
> - x[8]  += x[13];   x[7]  = rotl32(x[7]  ^ x[8],   7);
> - x[9]  += x[14];   x[4]  = rotl32(x[4]  ^ x[9],   7);
> - }
> -
> - for (i = 0; i < ARRAY_SIZE(x); i++)
> - out[i] = cpu_to_le32(x[i] + state[i]);
> -
> - state[12]++;
> -}
> -
>  static void chacha20_docrypt(u32 *state, u8 *dst, const u8 *src,
>unsigned int bytes)
>  {
> diff --git a/drivers/char/random.c b/drivers/char/random.c
> index b583e53..91d5c2a 100644
> --- a/drivers/char/random.c
> +++ b/drivers/char/random.c
> @@ -260,6 +260,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
> 
>  #include 
>  #include 
> @@ -412,6 +413,18 @@ static struct fasync_struct *fasync;
>  static DEFINE_SPINLOCK(random_ready_list_lock);
>  static LIST_HEAD(random_ready_list);
> 
> +/*
> + * crng_init =  0 --> Uninitialized
> + *   2 --> Initialized
> + *   3 --> Initialized from input_pool
> + *
> + * crng_init is protected by primary_crng->lock, and only increases
> + * its value (from 0->1->2->3).
> + */
> +static int crng_init = 0;
> +#define crng_ready() (likely(crng_init >= 2))
> +static void process_random_ready_list(void);
> +
>  /**
>   *
>   * OS independent entropy store.   Here are the functions which handle
> @@ -441,10 +454,13 @@ struct entropy_store {
>   __u8 last_data[EXTRACT_SIZE];
>  };
> 
> +static ssize_t extract_entropy(struct entropy_store *r, void *buf,
> +size_t nbytes, int min, int rsvd);
> +
> +static int crng_reseed(struct entropy_store *r);
>  static void push_to_pool(struct 

[PATCH 1/4] random: replace non-blocking pool with a Chacha20-based CRNG

2016-05-04 Thread Theodore Ts'o
The CRNG is faster, and we don't pretend to track entropy usage in the
CRNG any more.

Signed-off-by: Theodore Ts'o 
---
 crypto/chacha20_generic.c |  61 --
 drivers/char/random.c | 283 +++---
 include/crypto/chacha20.h |   1 +
 lib/Makefile  |   2 +-
 lib/chacha20.c|  79 +
 5 files changed, 295 insertions(+), 131 deletions(-)
 create mode 100644 lib/chacha20.c

diff --git a/crypto/chacha20_generic.c b/crypto/chacha20_generic.c
index da9c899..1cab831 100644
--- a/crypto/chacha20_generic.c
+++ b/crypto/chacha20_generic.c
@@ -15,72 +15,11 @@
 #include 
 #include 
 
-static inline u32 rotl32(u32 v, u8 n)
-{
-   return (v << n) | (v >> (sizeof(v) * 8 - n));
-}
-
 static inline u32 le32_to_cpuvp(const void *p)
 {
return le32_to_cpup(p);
 }
 
-static void chacha20_block(u32 *state, void *stream)
-{
-   u32 x[16], *out = stream;
-   int i;
-
-   for (i = 0; i < ARRAY_SIZE(x); i++)
-   x[i] = state[i];
-
-   for (i = 0; i < 20; i += 2) {
-   x[0]  += x[4];x[12] = rotl32(x[12] ^ x[0],  16);
-   x[1]  += x[5];x[13] = rotl32(x[13] ^ x[1],  16);
-   x[2]  += x[6];x[14] = rotl32(x[14] ^ x[2],  16);
-   x[3]  += x[7];x[15] = rotl32(x[15] ^ x[3],  16);
-
-   x[8]  += x[12];   x[4]  = rotl32(x[4]  ^ x[8],  12);
-   x[9]  += x[13];   x[5]  = rotl32(x[5]  ^ x[9],  12);
-   x[10] += x[14];   x[6]  = rotl32(x[6]  ^ x[10], 12);
-   x[11] += x[15];   x[7]  = rotl32(x[7]  ^ x[11], 12);
-
-   x[0]  += x[4];x[12] = rotl32(x[12] ^ x[0],   8);
-   x[1]  += x[5];x[13] = rotl32(x[13] ^ x[1],   8);
-   x[2]  += x[6];x[14] = rotl32(x[14] ^ x[2],   8);
-   x[3]  += x[7];x[15] = rotl32(x[15] ^ x[3],   8);
-
-   x[8]  += x[12];   x[4]  = rotl32(x[4]  ^ x[8],   7);
-   x[9]  += x[13];   x[5]  = rotl32(x[5]  ^ x[9],   7);
-   x[10] += x[14];   x[6]  = rotl32(x[6]  ^ x[10],  7);
-   x[11] += x[15];   x[7]  = rotl32(x[7]  ^ x[11],  7);
-
-   x[0]  += x[5];x[15] = rotl32(x[15] ^ x[0],  16);
-   x[1]  += x[6];x[12] = rotl32(x[12] ^ x[1],  16);
-   x[2]  += x[7];x[13] = rotl32(x[13] ^ x[2],  16);
-   x[3]  += x[4];x[14] = rotl32(x[14] ^ x[3],  16);
-
-   x[10] += x[15];   x[5]  = rotl32(x[5]  ^ x[10], 12);
-   x[11] += x[12];   x[6]  = rotl32(x[6]  ^ x[11], 12);
-   x[8]  += x[13];   x[7]  = rotl32(x[7]  ^ x[8],  12);
-   x[9]  += x[14];   x[4]  = rotl32(x[4]  ^ x[9],  12);
-
-   x[0]  += x[5];x[15] = rotl32(x[15] ^ x[0],   8);
-   x[1]  += x[6];x[12] = rotl32(x[12] ^ x[1],   8);
-   x[2]  += x[7];x[13] = rotl32(x[13] ^ x[2],   8);
-   x[3]  += x[4];x[14] = rotl32(x[14] ^ x[3],   8);
-
-   x[10] += x[15];   x[5]  = rotl32(x[5]  ^ x[10],  7);
-   x[11] += x[12];   x[6]  = rotl32(x[6]  ^ x[11],  7);
-   x[8]  += x[13];   x[7]  = rotl32(x[7]  ^ x[8],   7);
-   x[9]  += x[14];   x[4]  = rotl32(x[4]  ^ x[9],   7);
-   }
-
-   for (i = 0; i < ARRAY_SIZE(x); i++)
-   out[i] = cpu_to_le32(x[i] + state[i]);
-
-   state[12]++;
-}
-
 static void chacha20_docrypt(u32 *state, u8 *dst, const u8 *src,
 unsigned int bytes)
 {
diff --git a/drivers/char/random.c b/drivers/char/random.c
index b583e53..91d5c2a 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -260,6 +260,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -412,6 +413,18 @@ static struct fasync_struct *fasync;
 static DEFINE_SPINLOCK(random_ready_list_lock);
 static LIST_HEAD(random_ready_list);
 
+/*
+ * crng_init =  0 --> Uninitialized
+ * 2 --> Initialized
+ * 3 --> Initialized from input_pool
+ *
+ * crng_init is protected by primary_crng->lock, and only increases
+ * its value (from 0->1->2->3).
+ */
+static int crng_init = 0;
+#define crng_ready() (likely(crng_init >= 2))
+static void process_random_ready_list(void);
+
 /**
  *
  * OS independent entropy store.   Here are the functions which handle
@@ -441,10 +454,13 @@ struct entropy_store {
__u8 last_data[EXTRACT_SIZE];
 };
 
+static ssize_t extract_entropy(struct entropy_store *r, void *buf,
+  size_t nbytes, int min, int rsvd);
+
+static int crng_reseed(struct entropy_store *r);
 static void push_to_pool(struct work_struct *work);
 static __u32 input_pool_data[INPUT_POOL_WORDS];
 static __u32 blocking_pool_data[OUTPUT_POOL_WORDS];
-static __u32 nonblocking_pool_data[OUTPUT_POOL_WORDS];
 
 static struct entropy_store input_pool = {