[RESEND PATCH] crypto: sa2ul - Reduce stack usage

2020-10-14 Thread Herbert Xu
Resending to linux-crypto.
 
---8<---
This patch reduces the stack usage in sa2ul:

1. Move the exported sha state into sa_prepare_iopads so that it
can occupy the same space as the k_pad buffer.

2. Use one buffer for ipad/opad in sa_prepare_iopads.

3. Remove ipad/opad buffer from sa_set_sc_auth.

4. Use async skcipher fallback and remove on-stack request from
sa_cipher_run.

Reported-by: kernel test robot 
Fixes: d2c8ac187fc9 ("crypto: sa2ul - Add AEAD algorithm support")
Signed-off-by: Herbert Xu 

diff --git a/drivers/crypto/sa2ul.c b/drivers/crypto/sa2ul.c
index 5bc099052bd2..66629cad9531 100644
--- a/drivers/crypto/sa2ul.c
+++ b/drivers/crypto/sa2ul.c
@@ -9,8 +9,10 @@
  * Tero Kristo
  */
 #include 
+#include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -356,42 +358,45 @@ static void sa_swiz_128(u8 *in, u16 len)
 }
 
 /* Prepare the ipad and opad from key as per SHA algorithm step 1*/
-static void prepare_kiopad(u8 *k_ipad, u8 *k_opad, const u8 *key, u16 key_sz)
+static void prepare_kipad(u8 *k_ipad, const u8 *key, u16 key_sz)
 {
int i;
 
-   for (i = 0; i < key_sz; i++) {
+   for (i = 0; i < key_sz; i++)
k_ipad[i] = key[i] ^ 0x36;
-   k_opad[i] = key[i] ^ 0x5c;
-   }
 
/* Instead of XOR with 0 */
-   for (; i < SHA1_BLOCK_SIZE; i++) {
+   for (; i < SHA1_BLOCK_SIZE; i++)
k_ipad[i] = 0x36;
+}
+
+static void prepare_kopad(u8 *k_opad, const u8 *key, u16 key_sz)
+{
+   int i;
+
+   for (i = 0; i < key_sz; i++)
+   k_opad[i] = key[i] ^ 0x5c;
+
+   /* Instead of XOR with 0 */
+   for (; i < SHA1_BLOCK_SIZE; i++)
k_opad[i] = 0x5c;
-   }
 }
 
-static void sa_export_shash(struct shash_desc *hash, int block_size,
+static void sa_export_shash(void *state, struct shash_desc *hash,
int digest_size, __be32 *out)
 {
-   union {
-   struct sha1_state sha1;
-   struct sha256_state sha256;
-   struct sha512_state sha512;
-   } sha;
-   void *state;
+   struct sha1_state *sha1;
+   struct sha256_state *sha256;
u32 *result;
-   int i;
 
switch (digest_size) {
case SHA1_DIGEST_SIZE:
-   state = 
-   result = sha.sha1.state;
+   sha1 = state;
+   result = sha1->state;
break;
case SHA256_DIGEST_SIZE:
-   state = 
-   result = sha.sha256.state;
+   sha256 = state;
+   result = sha256->state;
break;
default:
dev_err(sa_k3_dev, "%s: bad digest_size=%d\n", __func__,
@@ -401,8 +406,7 @@ static void sa_export_shash(struct shash_desc *hash, int 
block_size,
 
crypto_shash_export(hash, state);
 
-   for (i = 0; i < digest_size >> 2; i++)
-   out[i] = cpu_to_be32(result[i]);
+   cpu_to_be32_array(out, result, digest_size / 4);
 }
 
 static void sa_prepare_iopads(struct algo_data *data, const u8 *key,
@@ -411,24 +415,28 @@ static void sa_prepare_iopads(struct algo_data *data, 
const u8 *key,
SHASH_DESC_ON_STACK(shash, data->ctx->shash);
int block_size = crypto_shash_blocksize(data->ctx->shash);
int digest_size = crypto_shash_digestsize(data->ctx->shash);
-   u8 k_ipad[SHA1_BLOCK_SIZE];
-   u8 k_opad[SHA1_BLOCK_SIZE];
+   union {
+   struct sha1_state sha1;
+   struct sha256_state sha256;
+   u8 k_pad[SHA1_BLOCK_SIZE];
+   } sha;
 
shash->tfm = data->ctx->shash;
 
-   prepare_kiopad(k_ipad, k_opad, key, key_sz);
-
-   memzero_explicit(ipad, block_size);
-   memzero_explicit(opad, block_size);
+   prepare_kipad(sha.k_pad, key, key_sz);
 
crypto_shash_init(shash);
-   crypto_shash_update(shash, k_ipad, block_size);
-   sa_export_shash(shash, block_size, digest_size, ipad);
+   crypto_shash_update(shash, sha.k_pad, block_size);
+   sa_export_shash(, shash, digest_size, ipad);
+
+   prepare_kopad(sha.k_pad, key, key_sz);
 
crypto_shash_init(shash);
-   crypto_shash_update(shash, k_opad, block_size);
+   crypto_shash_update(shash, sha.k_pad, block_size);
 
-   sa_export_shash(shash, block_size, digest_size, opad);
+   sa_export_shash(, shash, digest_size, opad);
+
+   memzero_explicit(, sizeof(sha));
 }
 
 /* Derive the inverse key used in AES-CBC decryption operation */
@@ -501,7 +509,8 @@ static int sa_set_sc_enc(struct algo_data *ad, const u8 
*key, u16 key_sz,
 static void sa_set_sc_auth(struct algo_data *ad, const u8 *key, u16 key_sz,
   u8 *sc_buf)
 {
-   __be32 ipad[64], opad[64];
+   __be32 *ipad = (void *)(sc_buf + 32);
+   __be32 *opad = (void *)(sc_buf + 64);
 
/* Set Authentication mode selector to hash processing */
sc_buf[0] = SA_HASH_PROCESSING;
@@ -510,14 

Re: [PATCH] crypto: sa2ul - Reduce stack usage

2020-08-25 Thread Tero Kristo

On 24/08/2020 16:12, Herbert Xu wrote:

On Sun, Aug 16, 2020 at 02:56:43PM +0800, kernel test robot wrote:


drivers/crypto/sa2ul.c: In function 'sa_prepare_iopads':

drivers/crypto/sa2ul.c:432:1: warning: the frame size of 1076 bytes is larger 
than 1024 bytes [-Wframe-larger-than=]


This patch tries to reduce the stack frame size in sa2ul.

---8<---
This patch reduces the stack usage in sa2ul:

1. Move the exported sha state into sa_prepare_iopads so that it
can occupy the same space as the k_pad buffer.

2. Use one buffer for ipad/opad in sa_prepare_iopads.

3. Remove ipad/opad buffer from sa_set_sc_auth.

4. Use async skcipher fallback and remove on-stack request from
sa_cipher_run.

Reported-by: kernel test robot 
Fixes: d2c8ac187fc9 ("crypto: sa2ul - Add AEAD algorithm support")
Signed-off-by: Herbert Xu 


This patch seems ok, I also tested it locally so:

Reviewed-by: Tero Kristo 
Tested-by: Tero Kristo 



diff --git a/drivers/crypto/sa2ul.c b/drivers/crypto/sa2ul.c
index 5bc099052bd2..66629cad9531 100644
--- a/drivers/crypto/sa2ul.c
+++ b/drivers/crypto/sa2ul.c
@@ -9,8 +9,10 @@
   *Tero Kristo
   */
  #include 
+#include 
  #include 
  #include 
+#include 
  #include 
  #include 
  #include 
@@ -356,42 +358,45 @@ static void sa_swiz_128(u8 *in, u16 len)
  }
  
  /* Prepare the ipad and opad from key as per SHA algorithm step 1*/

-static void prepare_kiopad(u8 *k_ipad, u8 *k_opad, const u8 *key, u16 key_sz)
+static void prepare_kipad(u8 *k_ipad, const u8 *key, u16 key_sz)
  {
int i;
  
-	for (i = 0; i < key_sz; i++) {

+   for (i = 0; i < key_sz; i++)
k_ipad[i] = key[i] ^ 0x36;
-   k_opad[i] = key[i] ^ 0x5c;
-   }
  
  	/* Instead of XOR with 0 */

-   for (; i < SHA1_BLOCK_SIZE; i++) {
+   for (; i < SHA1_BLOCK_SIZE; i++)
k_ipad[i] = 0x36;
+}
+
+static void prepare_kopad(u8 *k_opad, const u8 *key, u16 key_sz)
+{
+   int i;
+
+   for (i = 0; i < key_sz; i++)
+   k_opad[i] = key[i] ^ 0x5c;
+
+   /* Instead of XOR with 0 */
+   for (; i < SHA1_BLOCK_SIZE; i++)
k_opad[i] = 0x5c;
-   }
  }
  
-static void sa_export_shash(struct shash_desc *hash, int block_size,

+static void sa_export_shash(void *state, struct shash_desc *hash,
int digest_size, __be32 *out)
  {
-   union {
-   struct sha1_state sha1;
-   struct sha256_state sha256;
-   struct sha512_state sha512;
-   } sha;
-   void *state;
+   struct sha1_state *sha1;
+   struct sha256_state *sha256;
u32 *result;
-   int i;
  
  	switch (digest_size) {

case SHA1_DIGEST_SIZE:
-   state = 
-   result = sha.sha1.state;
+   sha1 = state;
+   result = sha1->state;
break;
case SHA256_DIGEST_SIZE:
-   state = 
-   result = sha.sha256.state;
+   sha256 = state;
+   result = sha256->state;
break;
default:
dev_err(sa_k3_dev, "%s: bad digest_size=%d\n", __func__,
@@ -401,8 +406,7 @@ static void sa_export_shash(struct shash_desc *hash, int 
block_size,
  
  	crypto_shash_export(hash, state);
  
-	for (i = 0; i < digest_size >> 2; i++)

-   out[i] = cpu_to_be32(result[i]);
+   cpu_to_be32_array(out, result, digest_size / 4);
  }
  
  static void sa_prepare_iopads(struct algo_data *data, const u8 *key,

@@ -411,24 +415,28 @@ static void sa_prepare_iopads(struct algo_data *data, 
const u8 *key,
SHASH_DESC_ON_STACK(shash, data->ctx->shash);
int block_size = crypto_shash_blocksize(data->ctx->shash);
int digest_size = crypto_shash_digestsize(data->ctx->shash);
-   u8 k_ipad[SHA1_BLOCK_SIZE];
-   u8 k_opad[SHA1_BLOCK_SIZE];
+   union {
+   struct sha1_state sha1;
+   struct sha256_state sha256;
+   u8 k_pad[SHA1_BLOCK_SIZE];
+   } sha;
  
  	shash->tfm = data->ctx->shash;
  
-	prepare_kiopad(k_ipad, k_opad, key, key_sz);

-
-   memzero_explicit(ipad, block_size);
-   memzero_explicit(opad, block_size);
+   prepare_kipad(sha.k_pad, key, key_sz);
  
  	crypto_shash_init(shash);

-   crypto_shash_update(shash, k_ipad, block_size);
-   sa_export_shash(shash, block_size, digest_size, ipad);
+   crypto_shash_update(shash, sha.k_pad, block_size);
+   sa_export_shash(, shash, digest_size, ipad);
+
+   prepare_kopad(sha.k_pad, key, key_sz);
  
  	crypto_shash_init(shash);

-   crypto_shash_update(shash, k_opad, block_size);
+   crypto_shash_update(shash, sha.k_pad, block_size);
  
-	sa_export_shash(shash, block_size, digest_size, opad);

+   sa_export_shash(, shash, digest_size, opad);
+
+   memzero_explicit(, sizeof(sha));
  }
  
  /* Derive the inverse key used in AES-CBC decryption operation */

@@ -501,7 +509,8 @@ static int 

[PATCH] crypto: sa2ul - Reduce stack usage

2020-08-24 Thread Herbert Xu
On Sun, Aug 16, 2020 at 02:56:43PM +0800, kernel test robot wrote:
>
>drivers/crypto/sa2ul.c: In function 'sa_prepare_iopads':
> >> drivers/crypto/sa2ul.c:432:1: warning: the frame size of 1076 bytes is 
> >> larger than 1024 bytes [-Wframe-larger-than=]

This patch tries to reduce the stack frame size in sa2ul.

---8<---
This patch reduces the stack usage in sa2ul:

1. Move the exported sha state into sa_prepare_iopads so that it
can occupy the same space as the k_pad buffer.

2. Use one buffer for ipad/opad in sa_prepare_iopads.

3. Remove ipad/opad buffer from sa_set_sc_auth.

4. Use async skcipher fallback and remove on-stack request from
sa_cipher_run.

Reported-by: kernel test robot 
Fixes: d2c8ac187fc9 ("crypto: sa2ul - Add AEAD algorithm support")
Signed-off-by: Herbert Xu 

diff --git a/drivers/crypto/sa2ul.c b/drivers/crypto/sa2ul.c
index 5bc099052bd2..66629cad9531 100644
--- a/drivers/crypto/sa2ul.c
+++ b/drivers/crypto/sa2ul.c
@@ -9,8 +9,10 @@
  * Tero Kristo
  */
 #include 
+#include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -356,42 +358,45 @@ static void sa_swiz_128(u8 *in, u16 len)
 }
 
 /* Prepare the ipad and opad from key as per SHA algorithm step 1*/
-static void prepare_kiopad(u8 *k_ipad, u8 *k_opad, const u8 *key, u16 key_sz)
+static void prepare_kipad(u8 *k_ipad, const u8 *key, u16 key_sz)
 {
int i;
 
-   for (i = 0; i < key_sz; i++) {
+   for (i = 0; i < key_sz; i++)
k_ipad[i] = key[i] ^ 0x36;
-   k_opad[i] = key[i] ^ 0x5c;
-   }
 
/* Instead of XOR with 0 */
-   for (; i < SHA1_BLOCK_SIZE; i++) {
+   for (; i < SHA1_BLOCK_SIZE; i++)
k_ipad[i] = 0x36;
+}
+
+static void prepare_kopad(u8 *k_opad, const u8 *key, u16 key_sz)
+{
+   int i;
+
+   for (i = 0; i < key_sz; i++)
+   k_opad[i] = key[i] ^ 0x5c;
+
+   /* Instead of XOR with 0 */
+   for (; i < SHA1_BLOCK_SIZE; i++)
k_opad[i] = 0x5c;
-   }
 }
 
-static void sa_export_shash(struct shash_desc *hash, int block_size,
+static void sa_export_shash(void *state, struct shash_desc *hash,
int digest_size, __be32 *out)
 {
-   union {
-   struct sha1_state sha1;
-   struct sha256_state sha256;
-   struct sha512_state sha512;
-   } sha;
-   void *state;
+   struct sha1_state *sha1;
+   struct sha256_state *sha256;
u32 *result;
-   int i;
 
switch (digest_size) {
case SHA1_DIGEST_SIZE:
-   state = 
-   result = sha.sha1.state;
+   sha1 = state;
+   result = sha1->state;
break;
case SHA256_DIGEST_SIZE:
-   state = 
-   result = sha.sha256.state;
+   sha256 = state;
+   result = sha256->state;
break;
default:
dev_err(sa_k3_dev, "%s: bad digest_size=%d\n", __func__,
@@ -401,8 +406,7 @@ static void sa_export_shash(struct shash_desc *hash, int 
block_size,
 
crypto_shash_export(hash, state);
 
-   for (i = 0; i < digest_size >> 2; i++)
-   out[i] = cpu_to_be32(result[i]);
+   cpu_to_be32_array(out, result, digest_size / 4);
 }
 
 static void sa_prepare_iopads(struct algo_data *data, const u8 *key,
@@ -411,24 +415,28 @@ static void sa_prepare_iopads(struct algo_data *data, 
const u8 *key,
SHASH_DESC_ON_STACK(shash, data->ctx->shash);
int block_size = crypto_shash_blocksize(data->ctx->shash);
int digest_size = crypto_shash_digestsize(data->ctx->shash);
-   u8 k_ipad[SHA1_BLOCK_SIZE];
-   u8 k_opad[SHA1_BLOCK_SIZE];
+   union {
+   struct sha1_state sha1;
+   struct sha256_state sha256;
+   u8 k_pad[SHA1_BLOCK_SIZE];
+   } sha;
 
shash->tfm = data->ctx->shash;
 
-   prepare_kiopad(k_ipad, k_opad, key, key_sz);
-
-   memzero_explicit(ipad, block_size);
-   memzero_explicit(opad, block_size);
+   prepare_kipad(sha.k_pad, key, key_sz);
 
crypto_shash_init(shash);
-   crypto_shash_update(shash, k_ipad, block_size);
-   sa_export_shash(shash, block_size, digest_size, ipad);
+   crypto_shash_update(shash, sha.k_pad, block_size);
+   sa_export_shash(, shash, digest_size, ipad);
+
+   prepare_kopad(sha.k_pad, key, key_sz);
 
crypto_shash_init(shash);
-   crypto_shash_update(shash, k_opad, block_size);
+   crypto_shash_update(shash, sha.k_pad, block_size);
 
-   sa_export_shash(shash, block_size, digest_size, opad);
+   sa_export_shash(, shash, digest_size, opad);
+
+   memzero_explicit(, sizeof(sha));
 }
 
 /* Derive the inverse key used in AES-CBC decryption operation */
@@ -501,7 +509,8 @@ static int sa_set_sc_enc(struct algo_data *ad, const u8 
*key, u16 key_sz,
 static void sa_set_sc_auth(struct algo_data *ad, const u8 *key, u16 key_sz,