The "by8" CTR AVX implementation fails to propperly handle counter
overflows. That was the reason it got disabled in commit 7da4b29d496b
("crypto: aesni - disable "by8" AVX CTR optimization").

Fix the overflow handling by incrementing the counter block as a double
quad word, i.e. a 128 bit, and testing for overflows afterwards. We need
to use VPTEST to do so as VPADD* does not set the flags itself and
silently drops the carry bit.

As this change adds branches to the hot path, minor performance
regressions  might be a side effect. But, OTOH, we now have a conforming
implementation -- the preferable goal.

A tcrypt test on a SandyBridge system (i7-2620M) showed almost identical
numbers for the old and this version with differences within the noise
range. A dm-crypt test with the fixed version gave even slightly better
results for this version. So the performance impact might not be as big
as expected.

Tested-by: Romain Francoise <rom...@orebokech.com>
Signed-off-by: Mathias Krause <mini...@googlemail.com>
Cc: Chandramouli Narayanan <mo...@linux.intel.com>
---
 arch/x86/crypto/aes_ctrby8_avx-x86_64.S |   17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S 
b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
index f091f122ed24..a029bc744244 100644
--- a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
+++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
@@ -108,6 +108,10 @@
 
 byteswap_const:
        .octa 0x000102030405060708090A0B0C0D0E0F
+ddq_low_msk:
+       .octa 0x0000000000000000FFFFFFFFFFFFFFFF
+ddq_high_add_1:
+       .octa 0x00000000000000010000000000000000
 ddq_add_1:
        .octa 0x00000000000000000000000000000001
 ddq_add_2:
@@ -169,7 +173,12 @@ ddq_add_8:
        .rept (by - 1)
                club DDQ_DATA, i
                club XDATA, i
-               vpaddd  var_ddq_add(%rip), xcounter, var_xdata
+               vpaddq  var_ddq_add(%rip), xcounter, var_xdata
+               vptest  ddq_low_msk(%rip), var_xdata
+               jnz 1f
+               vpaddq  ddq_high_add_1(%rip), var_xdata, var_xdata
+               vpaddq  ddq_high_add_1(%rip), xcounter, xcounter
+               1:
                vpshufb xbyteswap, var_xdata, var_xdata
                .set i, (i +1)
        .endr
@@ -178,7 +187,11 @@ ddq_add_8:
 
        vpxor   xkey0, xdata0, xdata0
        club DDQ_DATA, by
-       vpaddd  var_ddq_add(%rip), xcounter, xcounter
+       vpaddq  var_ddq_add(%rip), xcounter, xcounter
+       vptest  ddq_low_msk(%rip), xcounter
+       jnz     1f
+       vpaddq  ddq_high_add_1(%rip), xcounter, xcounter
+       1:
 
        .set i, 1
        .rept (by - 1)
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to