Re: [PATCH] crypto: arm64/aes-blk - ensure XTS mask is always loaded

2018-10-12 Thread Herbert Xu
On Mon, Oct 08, 2018 at 01:16:59PM +0200, Ard Biesheuvel wrote:
> Commit 2e5d2f33d1db ("crypto: arm64/aes-blk - improve XTS mask handling")
> optimized away some reloads of the XTS mask vector, but failed to take
> into account that calls into the XTS en/decrypt routines will take a
> slightly different code path if a single block of input is split across
> different buffers. So let's ensure that the first load occurs
> unconditionally, and move the reload to the end so it doesn't occur
> needlessly.
> 
> Fixes: 2e5d2f33d1db ("crypto: arm64/aes-blk - improve XTS mask handling")
> Signed-off-by: Ard Biesheuvel 
> ---
>  arch/arm64/crypto/aes-modes.S | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)

Patch applied.  Thanks.
-- 
Email: Herbert Xu 
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt


[PATCH] crypto: arm64/aes-blk - ensure XTS mask is always loaded

2018-10-08 Thread Ard Biesheuvel
Commit 2e5d2f33d1db ("crypto: arm64/aes-blk - improve XTS mask handling")
optimized away some reloads of the XTS mask vector, but failed to take
into account that calls into the XTS en/decrypt routines will take a
slightly different code path if a single block of input is split across
different buffers. So let's ensure that the first load occurs
unconditionally, and move the reload to the end so it doesn't occur
needlessly.

Fixes: 2e5d2f33d1db ("crypto: arm64/aes-blk - improve XTS mask handling")
Signed-off-by: Ard Biesheuvel 
---
 arch/arm64/crypto/aes-modes.S | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index 039738ae23f6..67700045a0e0 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -359,18 +359,17 @@ AES_ENTRY(aes_xts_encrypt)
mov x29, sp
 
ld1 {v4.16b}, [x6]
+   xts_load_mask   v8
cbz w7, .Lxtsencnotfirst
 
enc_prepare w3, x5, x8
encrypt_block   v4, w3, x5, x8, w7  /* first tweak */
enc_switch_key  w3, x2, x8
-   xts_load_mask   v8
b   .LxtsencNx
 
 .Lxtsencnotfirst:
enc_prepare w3, x2, x8
 .LxtsencloopNx:
-   xts_reload_mask v8
next_tweak  v4, v4, v8
 .LxtsencNx:
subsw4, w4, #4
@@ -391,6 +390,7 @@ AES_ENTRY(aes_xts_encrypt)
st1 {v0.16b-v3.16b}, [x0], #64
mov v4.16b, v7.16b
cbz w4, .Lxtsencout
+   xts_reload_mask v8
b   .LxtsencloopNx
 .Lxtsenc1x:
addsw4, w4, #4
@@ -417,18 +417,17 @@ AES_ENTRY(aes_xts_decrypt)
mov x29, sp
 
ld1 {v4.16b}, [x6]
+   xts_load_mask   v8
cbz w7, .Lxtsdecnotfirst
 
enc_prepare w3, x5, x8
encrypt_block   v4, w3, x5, x8, w7  /* first tweak */
dec_prepare w3, x2, x8
-   xts_load_mask   v8
b   .LxtsdecNx
 
 .Lxtsdecnotfirst:
dec_prepare w3, x2, x8
 .LxtsdecloopNx:
-   xts_reload_mask v8
next_tweak  v4, v4, v8
 .LxtsdecNx:
subsw4, w4, #4
@@ -449,6 +448,7 @@ AES_ENTRY(aes_xts_decrypt)
st1 {v0.16b-v3.16b}, [x0], #64
mov v4.16b, v7.16b
cbz w4, .Lxtsdecout
+   xts_reload_mask v8
b   .LxtsdecloopNx
 .Lxtsdec1x:
addsw4, w4, #4
-- 
2.11.0