[PATCH 6/7] crypto: aesni: make AVX2 AES-GCM work with all valid auth_tag_len

2017-04-28 Thread Sabrina Dubroca
Signed-off-by: Sabrina Dubroca 
---
 arch/x86/crypto/aesni-intel_avx-x86_64.S | 31 ---
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S 
b/arch/x86/crypto/aesni-intel_avx-x86_64.S
index 7230808a7cef..faecb1518bf8 100644
--- a/arch/x86/crypto/aesni-intel_avx-x86_64.S
+++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S
@@ -2804,19 +2804,36 @@ ENDPROC(aesni_gcm_dec_avx_gen2)
 cmp $16, %r11
 je  _T_16\@
 
-cmp $12, %r11
-je  _T_12\@
+cmp $8, %r11
+jl  _T_4\@
 
 _T_8\@:
 vmovq   %xmm9, %rax
 mov %rax, (%r10)
-jmp _return_T_done\@
-_T_12\@:
-vmovq   %xmm9, %rax
-mov %rax, (%r10)
+add $8, %r10
+sub $8, %r11
 vpsrldq $8, %xmm9, %xmm9
+cmp $0, %r11
+je _return_T_done\@
+_T_4\@:
 vmovd   %xmm9, %eax
-mov %eax, 8(%r10)
+mov %eax, (%r10)
+add $4, %r10
+sub $4, %r11
+vpsrldq $4, %xmm9, %xmm9
+cmp $0, %r11
+je _return_T_done\@
+_T_123\@:
+vmovd %xmm9, %eax
+cmp $2, %r11
+jl _T_1\@
+mov %ax, (%r10)
+cmp $2, %r11
+je _return_T_done\@
+add $2, %r10
+sar $16, %eax
+_T_1\@:
+mov %al, (%r10)
 jmp _return_T_done\@
 
 _T_16\@:
-- 
2.12.2



[PATCH 6/7] crypto: aesni: make AVX2 AES-GCM work with all valid auth_tag_len

2017-04-28 Thread Sabrina Dubroca
Signed-off-by: Sabrina Dubroca 
---
 arch/x86/crypto/aesni-intel_avx-x86_64.S | 31 ---
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S 
b/arch/x86/crypto/aesni-intel_avx-x86_64.S
index 7230808a7cef..faecb1518bf8 100644
--- a/arch/x86/crypto/aesni-intel_avx-x86_64.S
+++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S
@@ -2804,19 +2804,36 @@ ENDPROC(aesni_gcm_dec_avx_gen2)
 cmp $16, %r11
 je  _T_16\@
 
-cmp $12, %r11
-je  _T_12\@
+cmp $8, %r11
+jl  _T_4\@
 
 _T_8\@:
 vmovq   %xmm9, %rax
 mov %rax, (%r10)
-jmp _return_T_done\@
-_T_12\@:
-vmovq   %xmm9, %rax
-mov %rax, (%r10)
+add $8, %r10
+sub $8, %r11
 vpsrldq $8, %xmm9, %xmm9
+cmp $0, %r11
+je _return_T_done\@
+_T_4\@:
 vmovd   %xmm9, %eax
-mov %eax, 8(%r10)
+mov %eax, (%r10)
+add $4, %r10
+sub $4, %r11
+vpsrldq $4, %xmm9, %xmm9
+cmp $0, %r11
+je _return_T_done\@
+_T_123\@:
+vmovd %xmm9, %eax
+cmp $2, %r11
+jl _T_1\@
+mov %ax, (%r10)
+cmp $2, %r11
+je _return_T_done\@
+add $2, %r10
+sar $16, %eax
+_T_1\@:
+mov %al, (%r10)
 jmp _return_T_done\@
 
 _T_16\@:
-- 
2.12.2