On Mon, Oct 07, 2019 at 06:45:43PM +0200, Ard Biesheuvel wrote:
> In preparation of extending the x86 ChaCha driver to also expose the ChaCha
> library interface, drop the dependency on the chacha_generic crypto driver
> as a non-SIMD fallback, and depend on the generic ChaCha library directly.
> This way, we only pull in the code we actually need, without registering
> a set of ChaCha skciphers that we will never use.
> 
> Since turning the FPU on and off is cheap these days, simplify the SIMD
> routine by dropping the per-page yield, which makes for a cleaner switch
> to the library API as well.
> 
> Signed-off-by: Ard Biesheuvel <ard.biesheu...@linaro.org>
> ---
>  arch/x86/crypto/chacha_glue.c | 77 ++++++++++----------
>  crypto/Kconfig                |  2 +-
>  2 files changed, 40 insertions(+), 39 deletions(-)
> 
> diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
> index bc62daa8dafd..3a1a11a4326d 100644
> --- a/arch/x86/crypto/chacha_glue.c
> +++ b/arch/x86/crypto/chacha_glue.c
> @@ -127,32 +127,32 @@ static int chacha_simd_stream_xor(struct skcipher_walk 
> *walk,
>                                 const struct chacha_ctx *ctx, const u8 *iv)
>  {
>       u32 *state, state_buf[16 + 2] __aligned(8);
> -     int next_yield = 4096; /* bytes until next FPU yield */
> +     bool do_simd;
>       int err = 0;
>  
>       BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16);
>       state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN);
>  
> -     crypto_chacha_init(state, ctx, iv);
> +     chacha_init_generic(state, ctx->key, iv);
>  
> +     do_simd = (walk->total > CHACHA_BLOCK_SIZE) && crypto_simd_usable();
>       while (walk->nbytes > 0) {
>               unsigned int nbytes = walk->nbytes;
>  
> -             if (nbytes < walk->total) {
> +             if (nbytes < walk->total)
>                       nbytes = round_down(nbytes, walk->stride);
> -                     next_yield -= nbytes;
> -             }
> -
> -             chacha_dosimd(state, walk->dst.virt.addr, walk->src.virt.addr,
> -                           nbytes, ctx->nrounds);
>  
> -             if (next_yield <= 0) {
> -                     /* temporarily allow preemption */
> -                     kernel_fpu_end();
> +             if (!do_simd) {
> +                     chacha_crypt_generic(state, walk->dst.virt.addr,
> +                                          walk->src.virt.addr, nbytes,
> +                                          ctx->nrounds);
> +             } else {
>                       kernel_fpu_begin();
> -                     next_yield = 4096;
> +                     chacha_dosimd(state, walk->dst.virt.addr,
> +                                   walk->src.virt.addr, nbytes,
> +                                   ctx->nrounds);
> +                     kernel_fpu_end();

Since the kernel_fpu_begin() and kernel_fpu_end() were moved here, it's now
possible to simplify the code by moving the call to skcipher_walk_virt() into
chacha_simd_stream_xor() rather than making the caller do it.

I.e., see what the code was like prior to the following commit:

        commit f9c9bdb5131eee60dc3b92e5126d4c0e291703e2
        Author: Eric Biggers <ebigg...@google.com>
        Date:   Sat Dec 15 12:40:17 2018 -0800

            crypto: x86/chacha - avoid sleeping under kernel_fpu_begin()

>               }
> -
>               err = skcipher_walk_done(walk, walk->nbytes - nbytes);
>       }
>  
> @@ -164,19 +164,9 @@ static int chacha_simd(struct skcipher_request *req)
>       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
>       struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
>       struct skcipher_walk walk;
> -     int err;
>  
> -     if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
> -             return crypto_chacha_crypt(req);
> -
> -     err = skcipher_walk_virt(&walk, req, true);
> -     if (err)
> -             return err;
> -
> -     kernel_fpu_begin();
> -     err = chacha_simd_stream_xor(&walk, ctx, req->iv);
> -     kernel_fpu_end();
> -     return err;
> +     return skcipher_walk_virt(&walk, req, true) ?:
> +            chacha_simd_stream_xor(&walk, ctx, req->iv);
>  }
>  
>  static int xchacha_simd(struct skcipher_request *req)
> @@ -189,31 +179,42 @@ static int xchacha_simd(struct skcipher_request *req)
>       u8 real_iv[16];
>       int err;
>  
> -     if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
> -             return crypto_xchacha_crypt(req);
> -
>       err = skcipher_walk_virt(&walk, req, true);
>       if (err)
>               return err;
>  
>       BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16);
>       state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN);
> -     crypto_chacha_init(state, ctx, req->iv);
> -
> -     kernel_fpu_begin();
> -
> -     hchacha_block_ssse3(state, subctx.key, ctx->nrounds);
> +     chacha_init_generic(state, ctx->key, req->iv);
> +
> +     if (req->cryptlen > CHACHA_BLOCK_SIZE && crypto_simd_usable()) {
> +             kernel_fpu_begin();
> +             hchacha_block_ssse3(state, subctx.key, ctx->nrounds);
> +             kernel_fpu_end();
> +     } else {
> +             hchacha_block_generic(state, subctx.key, ctx->nrounds);
> +     }
>       subctx.nrounds = ctx->nrounds;
>  
>       memcpy(&real_iv[0], req->iv + 24, 8);
>       memcpy(&real_iv[8], req->iv + 16, 8);
>       err = chacha_simd_stream_xor(&walk, &subctx, real_iv);
>  
> -     kernel_fpu_end();
> -
>       return err;

Can use 'return chacha_simd_stream_xor(...') here.

- Eric

Reply via email to