Tweak the SHA256 update routines to invoke the SHA256 block transform
block by block, to avoid excessive scheduling delays caused by the
NEON algorithm running with preemption disabled.

Also, remove a stale comment which no longer applies now that kernel
mode NEON is actually disallowed in some contexts.

Signed-off-by: Ard Biesheuvel <ard.biesheu...@linaro.org>
---
 arch/arm64/crypto/sha256-glue.c | 36 +++++++++++++-------
 1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/crypto/sha256-glue.c b/arch/arm64/crypto/sha256-glue.c
index b064d925fe2a..e8880ccdc71f 100644
--- a/arch/arm64/crypto/sha256-glue.c
+++ b/arch/arm64/crypto/sha256-glue.c
@@ -89,21 +89,32 @@ static struct shash_alg algs[] = { {
 static int sha256_update_neon(struct shash_desc *desc, const u8 *data,
                              unsigned int len)
 {
-       /*
-        * Stacking and unstacking a substantial slice of the NEON register
-        * file may significantly affect performance for small updates when
-        * executing in interrupt context, so fall back to the scalar code
-        * in that case.
-        */
+       struct sha256_state *sctx = shash_desc_ctx(desc);
+
        if (!may_use_simd())
                return sha256_base_do_update(desc, data, len,
                                (sha256_block_fn *)sha256_block_data_order);
 
-       kernel_neon_begin();
-       sha256_base_do_update(desc, data, len,
-                               (sha256_block_fn *)sha256_block_neon);
-       kernel_neon_end();
+       while (len > 0) {
+               unsigned int chunk = len;
+
+               /*
+                * Don't hog the CPU for the entire time it takes to process all
+                * input when running on a preemptible kernel, but process the
+                * data block by block instead.
+                */
+               if (IS_ENABLED(CONFIG_PREEMPT) &&
+                   chunk + sctx->count % SHA256_BLOCK_SIZE > SHA256_BLOCK_SIZE)
+                       chunk = SHA256_BLOCK_SIZE -
+                               sctx->count % SHA256_BLOCK_SIZE;
 
+               kernel_neon_begin();
+               sha256_base_do_update(desc, data, chunk,
+                                     (sha256_block_fn *)sha256_block_neon);
+               kernel_neon_end();
+               data += chunk;
+               len -= chunk;
+       }
        return 0;
 }
 
@@ -117,10 +128,9 @@ static int sha256_finup_neon(struct shash_desc *desc, 
const u8 *data,
                sha256_base_do_finalize(desc,
                                (sha256_block_fn *)sha256_block_data_order);
        } else {
-               kernel_neon_begin();
                if (len)
-                       sha256_base_do_update(desc, data, len,
-                               (sha256_block_fn *)sha256_block_neon);
+                       sha256_update_neon(desc, data, len);
+               kernel_neon_begin();
                sha256_base_do_finalize(desc,
                                (sha256_block_fn *)sha256_block_neon);
                kernel_neon_end();
-- 
2.11.0

Reply via email to