Module Name: src
Committed By: joerg
Date: Sat May 16 17:32:54 UTC 2015
Modified Files:
src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386: Makefile
bn-586.S ghash-x86.S sha1-586.S sha512-586.S x86cpuid.S
Added Files:
src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386: modes.inc
Log Message:
Optimize i386 support in libcrypto:
- Enable optional SSE2 assembler versions. Regenerate.
- Hook up assembler version of GCM.
To generate a diff of this commit:
cvs rdiff -u -r1.6 -r1.7 \
src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/Makefile
cvs rdiff -u -r1.5 -r1.6 \
src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/bn-586.S
cvs rdiff -u -r1.2 -r1.3 \
src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/ghash-x86.S \
src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/sha512-586.S
cvs rdiff -u -r0 -r1.1 \
src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/modes.inc
cvs rdiff -u -r1.3 -r1.4 \
src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/sha1-586.S
cvs rdiff -u -r1.8 -r1.9 \
src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/x86cpuid.S
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/Makefile
diff -u src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/Makefile:1.6 src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/Makefile:1.7
--- src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/Makefile:1.6 Mon Jul 30 10:25:24 2012
+++ src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/Makefile Sat May 16 17:32:54 2015
@@ -1,4 +1,4 @@
-# $NetBSD: Makefile,v 1.6 2012/07/30 10:25:24 christos Exp $
+# $NetBSD: Makefile,v 1.7 2015/05/16 17:32:54 joerg Exp $
.include "bsd.own.mk"
@@ -9,7 +9,7 @@ regen:
for i in $$(find ${OPENSSLSRC} -name \*86.pl) \
${OPENSSLSRC}/crypto/x86cpuid.pl; do \
perl -I${OPENSSLSRC}/crypto/perlasm \
- -I${OPENSSLSRC}/crypto/bn/asm $$i elf -fPIC \
+ -I${OPENSSLSRC}/crypto/bn/asm $$i elf -fPIC -DOPENSSL_IA32_SSE2 \
| sed -e 's,^\.file.*$$,#include <machine/asm.h>,' \
-e 's/ call OPENSSL_cpuid_setup/ PIC_PROLOGUE! call PIC_PLT(OPENSSL_cpuid_setup)! PIC_EPILOGUE/' | tr '!' '\n' \
> $$(basename $$i .pl).S; \
Index: src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/bn-586.S
diff -u src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/bn-586.S:1.5 src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/bn-586.S:1.6
--- src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/bn-586.S:1.5 Fri Jul 27 19:34:14 2012
+++ src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/bn-586.S Sat May 16 17:32:54 2015
@@ -5,6 +5,103 @@
.align 16
bn_mul_add_words:
.L_bn_mul_add_words_begin:
+ call .L000PIC_me_up
+.L000PIC_me_up:
+ popl %eax
+ leal _GLOBAL_OFFSET_TABLE_+[.-.L000PIC_me_up](%eax),%eax
+ movl OPENSSL_ia32cap_P@GOT(%eax),%eax
+ btl $26,(%eax)
+ jnc .L001maw_non_sse2
+ movl 4(%esp),%eax
+ movl 8(%esp),%edx
+ movl 12(%esp),%ecx
+ movd 16(%esp),%mm0
+ pxor %mm1,%mm1
+ jmp .L002maw_sse2_entry
+.align 16
+.L003maw_sse2_unrolled:
+ movd (%eax),%mm3
+ paddq %mm3,%mm1
+ movd (%edx),%mm2
+ pmuludq %mm0,%mm2
+ movd 4(%edx),%mm4
+ pmuludq %mm0,%mm4
+ movd 8(%edx),%mm6
+ pmuludq %mm0,%mm6
+ movd 12(%edx),%mm7
+ pmuludq %mm0,%mm7
+ paddq %mm2,%mm1
+ movd 4(%eax),%mm3
+ paddq %mm4,%mm3
+ movd 8(%eax),%mm5
+ paddq %mm6,%mm5
+ movd 12(%eax),%mm4
+ paddq %mm4,%mm7
+ movd %mm1,(%eax)
+ movd 16(%edx),%mm2
+ pmuludq %mm0,%mm2
+ psrlq $32,%mm1
+ movd 20(%edx),%mm4
+ pmuludq %mm0,%mm4
+ paddq %mm3,%mm1
+ movd 24(%edx),%mm6
+ pmuludq %mm0,%mm6
+ movd %mm1,4(%eax)
+ psrlq $32,%mm1
+ movd 28(%edx),%mm3
+ addl $32,%edx
+ pmuludq %mm0,%mm3
+ paddq %mm5,%mm1
+ movd 16(%eax),%mm5
+ paddq %mm5,%mm2
+ movd %mm1,8(%eax)
+ psrlq $32,%mm1
+ paddq %mm7,%mm1
+ movd 20(%eax),%mm5
+ paddq %mm5,%mm4
+ movd %mm1,12(%eax)
+ psrlq $32,%mm1
+ paddq %mm2,%mm1
+ movd 24(%eax),%mm5
+ paddq %mm5,%mm6
+ movd %mm1,16(%eax)
+ psrlq $32,%mm1
+ paddq %mm4,%mm1
+ movd 28(%eax),%mm5
+ paddq %mm5,%mm3
+ movd %mm1,20(%eax)
+ psrlq $32,%mm1
+ paddq %mm6,%mm1
+ movd %mm1,24(%eax)
+ psrlq $32,%mm1
+ paddq %mm3,%mm1
+ movd %mm1,28(%eax)
+ leal 32(%eax),%eax
+ psrlq $32,%mm1
+ subl $8,%ecx
+ jz .L004maw_sse2_exit
+.L002maw_sse2_entry:
+ testl $4294967288,%ecx
+ jnz .L003maw_sse2_unrolled
+.align 4
+.L005maw_sse2_loop:
+ movd (%edx),%mm2
+ movd (%eax),%mm3
+ pmuludq %mm0,%mm2
+ leal 4(%edx),%edx
+ paddq %mm3,%mm1
+ paddq %mm2,%mm1
+ movd %mm1,(%eax)
+ subl $1,%ecx
+ psrlq $32,%mm1
+ leal 4(%eax),%eax
+ jnz .L005maw_sse2_loop
+.L004maw_sse2_exit:
+ movd %mm1,%eax
+ emms
+ ret
+.align 16
+.L001maw_non_sse2:
pushl %ebp
pushl %ebx
pushl %esi
@@ -17,9 +114,9 @@ bn_mul_add_words:
andl $4294967288,%ecx
movl 32(%esp),%ebp
pushl %ecx
- jz .L000maw_finish
+ jz .L006maw_finish
.align 16
-.L001maw_loop:
+.L007maw_loop:
movl (%ebx),%eax
mull %ebp
@@ -96,13 +193,13 @@ bn_mul_add_words:
subl $8,%ecx
leal 32(%ebx),%ebx
leal 32(%edi),%edi
- jnz .L001maw_loop
-.L000maw_finish:
+ jnz .L007maw_loop
+.L006maw_finish:
movl 32(%esp),%ecx
andl $7,%ecx
- jnz .L002maw_finish2
- jmp .L003maw_end
-.L002maw_finish2:
+ jnz .L008maw_finish2
+ jmp .L009maw_end
+.L008maw_finish2:
movl (%ebx),%eax
mull %ebp
@@ -113,7 +210,7 @@ bn_mul_add_words:
decl %ecx
movl %eax,(%edi)
movl %edx,%esi
- jz .L003maw_end
+ jz .L009maw_end
movl 4(%ebx),%eax
mull %ebp
@@ -124,7 +221,7 @@ bn_mul_add_words:
decl %ecx
movl %eax,4(%edi)
movl %edx,%esi
- jz .L003maw_end
+ jz .L009maw_end
movl 8(%ebx),%eax
mull %ebp
@@ -135,7 +232,7 @@ bn_mul_add_words:
decl %ecx
movl %eax,8(%edi)
movl %edx,%esi
- jz .L003maw_end
+ jz .L009maw_end
movl 12(%ebx),%eax
mull %ebp
@@ -146,7 +243,7 @@ bn_mul_add_words:
decl %ecx
movl %eax,12(%edi)
movl %edx,%esi
- jz .L003maw_end
+ jz .L009maw_end
movl 16(%ebx),%eax
mull %ebp
@@ -157,7 +254,7 @@ bn_mul_add_words:
decl %ecx
movl %eax,16(%edi)
movl %edx,%esi
- jz .L003maw_end
+ jz .L009maw_end
movl 20(%ebx),%eax
mull %ebp
@@ -168,7 +265,7 @@ bn_mul_add_words:
decl %ecx
movl %eax,20(%edi)
movl %edx,%esi
- jz .L003maw_end
+ jz .L009maw_end
movl 24(%ebx),%eax
mull %ebp
@@ -178,7 +275,7 @@ bn_mul_add_words:
adcl $0,%edx
movl %eax,24(%edi)
movl %edx,%esi
-.L003maw_end:
+.L009maw_end:
movl %esi,%eax
popl %ecx
popl %edi
@@ -192,6 +289,34 @@ bn_mul_add_words:
.align 16
bn_mul_words:
.L_bn_mul_words_begin:
+ call .L010PIC_me_up
+.L010PIC_me_up:
+ popl %eax
+ leal _GLOBAL_OFFSET_TABLE_+[.-.L010PIC_me_up](%eax),%eax
+ movl OPENSSL_ia32cap_P@GOT(%eax),%eax
+ btl $26,(%eax)
+ jnc .L011mw_non_sse2
+ movl 4(%esp),%eax
+ movl 8(%esp),%edx
+ movl 12(%esp),%ecx
+ movd 16(%esp),%mm0
+ pxor %mm1,%mm1
+.align 16
+.L012mw_sse2_loop:
+ movd (%edx),%mm2
+ pmuludq %mm0,%mm2
+ leal 4(%edx),%edx
+ paddq %mm2,%mm1
+ movd %mm1,(%eax)
+ subl $1,%ecx
+ psrlq $32,%mm1
+ leal 4(%eax),%eax
+ jnz .L012mw_sse2_loop
+ movd %mm1,%eax
+ emms
+ ret
+.align 16
+.L011mw_non_sse2:
pushl %ebp
pushl %ebx
pushl %esi
@@ -203,8 +328,8 @@ bn_mul_words:
movl 28(%esp),%ebp
movl 32(%esp),%ecx
andl $4294967288,%ebp
- jz .L004mw_finish
-.L005mw_loop:
+ jz .L013mw_finish
+.L014mw_loop:
movl (%ebx),%eax
mull %ecx
@@ -265,14 +390,14 @@ bn_mul_words:
addl $32,%ebx
addl $32,%edi
subl $8,%ebp
- jz .L004mw_finish
- jmp .L005mw_loop
-.L004mw_finish:
+ jz .L013mw_finish
+ jmp .L014mw_loop
+.L013mw_finish:
movl 28(%esp),%ebp
andl $7,%ebp
- jnz .L006mw_finish2
- jmp .L007mw_end
-.L006mw_finish2:
+ jnz .L015mw_finish2
+ jmp .L016mw_end
+.L015mw_finish2:
movl (%ebx),%eax
mull %ecx
@@ -281,7 +406,7 @@ bn_mul_words:
movl %eax,(%edi)
movl %edx,%esi
decl %ebp
- jz .L007mw_end
+ jz .L016mw_end
movl 4(%ebx),%eax
mull %ecx
@@ -290,7 +415,7 @@ bn_mul_words:
movl %eax,4(%edi)
movl %edx,%esi
decl %ebp
- jz .L007mw_end
+ jz .L016mw_end
movl 8(%ebx),%eax
mull %ecx
@@ -299,7 +424,7 @@ bn_mul_words:
movl %eax,8(%edi)
movl %edx,%esi
decl %ebp
- jz .L007mw_end
+ jz .L016mw_end
movl 12(%ebx),%eax
mull %ecx
@@ -308,7 +433,7 @@ bn_mul_words:
movl %eax,12(%edi)
movl %edx,%esi
decl %ebp
- jz .L007mw_end
+ jz .L016mw_end
movl 16(%ebx),%eax
mull %ecx
@@ -317,7 +442,7 @@ bn_mul_words:
movl %eax,16(%edi)
movl %edx,%esi
decl %ebp
- jz .L007mw_end
+ jz .L016mw_end
movl 20(%ebx),%eax
mull %ecx
@@ -326,7 +451,7 @@ bn_mul_words:
movl %eax,20(%edi)
movl %edx,%esi
decl %ebp
- jz .L007mw_end
+ jz .L016mw_end
movl 24(%ebx),%eax
mull %ecx
@@ -334,7 +459,7 @@ bn_mul_words:
adcl $0,%edx
movl %eax,24(%edi)
movl %edx,%esi
-.L007mw_end:
+.L016mw_end:
movl %esi,%eax
popl %edi
popl %esi
@@ -347,6 +472,29 @@ bn_mul_words:
.align 16
bn_sqr_words:
.L_bn_sqr_words_begin:
+ call .L017PIC_me_up
+.L017PIC_me_up:
+ popl %eax
+ leal _GLOBAL_OFFSET_TABLE_+[.-.L017PIC_me_up](%eax),%eax
+ movl OPENSSL_ia32cap_P@GOT(%eax),%eax
+ btl $26,(%eax)
+ jnc .L018sqr_non_sse2
+ movl 4(%esp),%eax
+ movl 8(%esp),%edx
+ movl 12(%esp),%ecx
+.align 16
+.L019sqr_sse2_loop:
+ movd (%edx),%mm0
+ pmuludq %mm0,%mm0
+ leal 4(%edx),%edx
+ movq %mm0,(%eax)
+ subl $1,%ecx
+ leal 8(%eax),%eax
+ jnz .L019sqr_sse2_loop
+ emms
+ ret
+.align 16
+.L018sqr_non_sse2:
pushl %ebp
pushl %ebx
pushl %esi
@@ -356,8 +504,8 @@ bn_sqr_words:
movl 24(%esp),%edi
movl 28(%esp),%ebx
andl $4294967288,%ebx
- jz .L008sw_finish
-.L009sw_loop:
+ jz .L020sw_finish
+.L021sw_loop:
movl (%edi),%eax
mull %eax
@@ -402,59 +550,59 @@ bn_sqr_words:
addl $32,%edi
addl $64,%esi
subl $8,%ebx
- jnz .L009sw_loop
-.L008sw_finish:
+ jnz .L021sw_loop
+.L020sw_finish:
movl 28(%esp),%ebx
andl $7,%ebx
- jz .L010sw_end
+ jz .L022sw_end
movl (%edi),%eax
mull %eax
movl %eax,(%esi)
decl %ebx
movl %edx,4(%esi)
- jz .L010sw_end
+ jz .L022sw_end
movl 4(%edi),%eax
mull %eax
movl %eax,8(%esi)
decl %ebx
movl %edx,12(%esi)
- jz .L010sw_end
+ jz .L022sw_end
movl 8(%edi),%eax
mull %eax
movl %eax,16(%esi)
decl %ebx
movl %edx,20(%esi)
- jz .L010sw_end
+ jz .L022sw_end
movl 12(%edi),%eax
mull %eax
movl %eax,24(%esi)
decl %ebx
movl %edx,28(%esi)
- jz .L010sw_end
+ jz .L022sw_end
movl 16(%edi),%eax
mull %eax
movl %eax,32(%esi)
decl %ebx
movl %edx,36(%esi)
- jz .L010sw_end
+ jz .L022sw_end
movl 20(%edi),%eax
mull %eax
movl %eax,40(%esi)
decl %ebx
movl %edx,44(%esi)
- jz .L010sw_end
+ jz .L022sw_end
movl 24(%edi),%eax
mull %eax
movl %eax,48(%esi)
movl %edx,52(%esi)
-.L010sw_end:
+.L022sw_end:
popl %edi
popl %esi
popl %ebx
@@ -488,8 +636,8 @@ bn_add_words:
movl 32(%esp),%ebp
xorl %eax,%eax
andl $4294967288,%ebp
- jz .L011aw_finish
-.L012aw_loop:
+ jz .L023aw_finish
+.L024aw_loop:
movl (%esi),%ecx
movl (%edi),%edx
@@ -567,11 +715,11 @@ bn_add_words:
addl $32,%edi
addl $32,%ebx
subl $8,%ebp
- jnz .L012aw_loop
-.L011aw_finish:
+ jnz .L024aw_loop
+.L023aw_finish:
movl 32(%esp),%ebp
andl $7,%ebp
- jz .L013aw_end
+ jz .L025aw_end
movl (%esi),%ecx
movl (%edi),%edx
@@ -582,7 +730,7 @@ bn_add_words:
adcl $0,%eax
decl %ebp
movl %ecx,(%ebx)
- jz .L013aw_end
+ jz .L025aw_end
movl 4(%esi),%ecx
movl 4(%edi),%edx
@@ -593,7 +741,7 @@ bn_add_words:
adcl $0,%eax
decl %ebp
movl %ecx,4(%ebx)
- jz .L013aw_end
+ jz .L025aw_end
movl 8(%esi),%ecx
movl 8(%edi),%edx
@@ -604,7 +752,7 @@ bn_add_words:
adcl $0,%eax
decl %ebp
movl %ecx,8(%ebx)
- jz .L013aw_end
+ jz .L025aw_end
movl 12(%esi),%ecx
movl 12(%edi),%edx
@@ -615,7 +763,7 @@ bn_add_words:
adcl $0,%eax
decl %ebp
movl %ecx,12(%ebx)
- jz .L013aw_end
+ jz .L025aw_end
movl 16(%esi),%ecx
movl 16(%edi),%edx
@@ -626,7 +774,7 @@ bn_add_words:
adcl $0,%eax
decl %ebp
movl %ecx,16(%ebx)
- jz .L013aw_end
+ jz .L025aw_end
movl 20(%esi),%ecx
movl 20(%edi),%edx
@@ -637,7 +785,7 @@ bn_add_words:
adcl $0,%eax
decl %ebp
movl %ecx,20(%ebx)
- jz .L013aw_end
+ jz .L025aw_end
movl 24(%esi),%ecx
movl 24(%edi),%edx
@@ -647,7 +795,7 @@ bn_add_words:
addl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
-.L013aw_end:
+.L025aw_end:
popl %edi
popl %esi
popl %ebx
@@ -670,8 +818,8 @@ bn_sub_words:
movl 32(%esp),%ebp
xorl %eax,%eax
andl $4294967288,%ebp
- jz .L014aw_finish
-.L015aw_loop:
+ jz .L026aw_finish
+.L027aw_loop:
movl (%esi),%ecx
movl (%edi),%edx
@@ -749,11 +897,11 @@ bn_sub_words:
addl $32,%edi
addl $32,%ebx
subl $8,%ebp
- jnz .L015aw_loop
-.L014aw_finish:
+ jnz .L027aw_loop
+.L026aw_finish:
movl 32(%esp),%ebp
andl $7,%ebp
- jz .L016aw_end
+ jz .L028aw_end
movl (%esi),%ecx
movl (%edi),%edx
@@ -764,7 +912,7 @@ bn_sub_words:
adcl $0,%eax
decl %ebp
movl %ecx,(%ebx)
- jz .L016aw_end
+ jz .L028aw_end
movl 4(%esi),%ecx
movl 4(%edi),%edx
@@ -775,7 +923,7 @@ bn_sub_words:
adcl $0,%eax
decl %ebp
movl %ecx,4(%ebx)
- jz .L016aw_end
+ jz .L028aw_end
movl 8(%esi),%ecx
movl 8(%edi),%edx
@@ -786,7 +934,7 @@ bn_sub_words:
adcl $0,%eax
decl %ebp
movl %ecx,8(%ebx)
- jz .L016aw_end
+ jz .L028aw_end
movl 12(%esi),%ecx
movl 12(%edi),%edx
@@ -797,7 +945,7 @@ bn_sub_words:
adcl $0,%eax
decl %ebp
movl %ecx,12(%ebx)
- jz .L016aw_end
+ jz .L028aw_end
movl 16(%esi),%ecx
movl 16(%edi),%edx
@@ -808,7 +956,7 @@ bn_sub_words:
adcl $0,%eax
decl %ebp
movl %ecx,16(%ebx)
- jz .L016aw_end
+ jz .L028aw_end
movl 20(%esi),%ecx
movl 20(%edi),%edx
@@ -819,7 +967,7 @@ bn_sub_words:
adcl $0,%eax
decl %ebp
movl %ecx,20(%ebx)
- jz .L016aw_end
+ jz .L028aw_end
movl 24(%esi),%ecx
movl 24(%edi),%edx
@@ -829,7 +977,7 @@ bn_sub_words:
subl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
-.L016aw_end:
+.L028aw_end:
popl %edi
popl %esi
popl %ebx
@@ -852,8 +1000,8 @@ bn_sub_part_words:
movl 32(%esp),%ebp
xorl %eax,%eax
andl $4294967288,%ebp
- jz .L017aw_finish
-.L018aw_loop:
+ jz .L029aw_finish
+.L030aw_loop:
movl (%esi),%ecx
movl (%edi),%edx
@@ -931,11 +1079,11 @@ bn_sub_part_words:
addl $32,%edi
addl $32,%ebx
subl $8,%ebp
- jnz .L018aw_loop
-.L017aw_finish:
+ jnz .L030aw_loop
+.L029aw_finish:
movl 32(%esp),%ebp
andl $7,%ebp
- jz .L019aw_end
+ jz .L031aw_end
movl (%esi),%ecx
movl (%edi),%edx
@@ -949,7 +1097,7 @@ bn_sub_part_words:
addl $4,%edi
addl $4,%ebx
decl %ebp
- jz .L019aw_end
+ jz .L031aw_end
movl (%esi),%ecx
movl (%edi),%edx
@@ -963,7 +1111,7 @@ bn_sub_part_words:
addl $4,%edi
addl $4,%ebx
decl %ebp
- jz .L019aw_end
+ jz .L031aw_end
movl (%esi),%ecx
movl (%edi),%edx
@@ -977,7 +1125,7 @@ bn_sub_part_words:
addl $4,%edi
addl $4,%ebx
decl %ebp
- jz .L019aw_end
+ jz .L031aw_end
movl (%esi),%ecx
movl (%edi),%edx
@@ -991,7 +1139,7 @@ bn_sub_part_words:
addl $4,%edi
addl $4,%ebx
decl %ebp
- jz .L019aw_end
+ jz .L031aw_end
movl (%esi),%ecx
movl (%edi),%edx
@@ -1005,7 +1153,7 @@ bn_sub_part_words:
addl $4,%edi
addl $4,%ebx
decl %ebp
- jz .L019aw_end
+ jz .L031aw_end
movl (%esi),%ecx
movl (%edi),%edx
@@ -1019,7 +1167,7 @@ bn_sub_part_words:
addl $4,%edi
addl $4,%ebx
decl %ebp
- jz .L019aw_end
+ jz .L031aw_end
movl (%esi),%ecx
movl (%edi),%edx
@@ -1032,20 +1180,20 @@ bn_sub_part_words:
addl $4,%esi
addl $4,%edi
addl $4,%ebx
-.L019aw_end:
+.L031aw_end:
cmpl $0,36(%esp)
- je .L020pw_end
+ je .L032pw_end
movl 36(%esp),%ebp
cmpl $0,%ebp
- je .L020pw_end
- jge .L021pw_pos
+ je .L032pw_end
+ jge .L033pw_pos
movl $0,%edx
subl %ebp,%edx
movl %edx,%ebp
andl $4294967288,%ebp
- jz .L022pw_neg_finish
-.L023pw_neg_loop:
+ jz .L034pw_neg_finish
+.L035pw_neg_loop:
movl $0,%ecx
movl (%edi),%edx
@@ -1122,13 +1270,13 @@ bn_sub_part_words:
addl $32,%edi
addl $32,%ebx
subl $8,%ebp
- jnz .L023pw_neg_loop
-.L022pw_neg_finish:
+ jnz .L035pw_neg_loop
+.L034pw_neg_finish:
movl 36(%esp),%edx
movl $0,%ebp
subl %edx,%ebp
andl $7,%ebp
- jz .L020pw_end
+ jz .L032pw_end
movl $0,%ecx
movl (%edi),%edx
@@ -1139,7 +1287,7 @@ bn_sub_part_words:
adcl $0,%eax
decl %ebp
movl %ecx,(%ebx)
- jz .L020pw_end
+ jz .L032pw_end
movl $0,%ecx
movl 4(%edi),%edx
@@ -1150,7 +1298,7 @@ bn_sub_part_words:
adcl $0,%eax
decl %ebp
movl %ecx,4(%ebx)
- jz .L020pw_end
+ jz .L032pw_end
movl $0,%ecx
movl 8(%edi),%edx
@@ -1161,7 +1309,7 @@ bn_sub_part_words:
adcl $0,%eax
decl %ebp
movl %ecx,8(%ebx)
- jz .L020pw_end
+ jz .L032pw_end
movl $0,%ecx
movl 12(%edi),%edx
@@ -1172,7 +1320,7 @@ bn_sub_part_words:
adcl $0,%eax
decl %ebp
movl %ecx,12(%ebx)
- jz .L020pw_end
+ jz .L032pw_end
movl $0,%ecx
movl 16(%edi),%edx
@@ -1183,7 +1331,7 @@ bn_sub_part_words:
adcl $0,%eax
decl %ebp
movl %ecx,16(%ebx)
- jz .L020pw_end
+ jz .L032pw_end
movl $0,%ecx
movl 20(%edi),%edx
@@ -1194,7 +1342,7 @@ bn_sub_part_words:
adcl $0,%eax
decl %ebp
movl %ecx,20(%ebx)
- jz .L020pw_end
+ jz .L032pw_end
movl $0,%ecx
movl 24(%edi),%edx
@@ -1204,181 +1352,182 @@ bn_sub_part_words:
subl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
- jmp .L020pw_end
-.L021pw_pos:
+ jmp .L032pw_end
+.L033pw_pos:
andl $4294967288,%ebp
- jz .L024pw_pos_finish
-.L025pw_pos_loop:
+ jz .L036pw_pos_finish
+.L037pw_pos_loop:
movl (%esi),%ecx
subl %eax,%ecx
movl %ecx,(%ebx)
- jnc .L026pw_nc0
+ jnc .L038pw_nc0
movl 4(%esi),%ecx
subl %eax,%ecx
movl %ecx,4(%ebx)
- jnc .L027pw_nc1
+ jnc .L039pw_nc1
movl 8(%esi),%ecx
subl %eax,%ecx
movl %ecx,8(%ebx)
- jnc .L028pw_nc2
+ jnc .L040pw_nc2
movl 12(%esi),%ecx
subl %eax,%ecx
movl %ecx,12(%ebx)
- jnc .L029pw_nc3
+ jnc .L041pw_nc3
movl 16(%esi),%ecx
subl %eax,%ecx
movl %ecx,16(%ebx)
- jnc .L030pw_nc4
+ jnc .L042pw_nc4
movl 20(%esi),%ecx
subl %eax,%ecx
movl %ecx,20(%ebx)
- jnc .L031pw_nc5
+ jnc .L043pw_nc5
movl 24(%esi),%ecx
subl %eax,%ecx
movl %ecx,24(%ebx)
- jnc .L032pw_nc6
+ jnc .L044pw_nc6
movl 28(%esi),%ecx
subl %eax,%ecx
movl %ecx,28(%ebx)
- jnc .L033pw_nc7
+ jnc .L045pw_nc7
addl $32,%esi
addl $32,%ebx
subl $8,%ebp
- jnz .L025pw_pos_loop
-.L024pw_pos_finish:
+ jnz .L037pw_pos_loop
+.L036pw_pos_finish:
movl 36(%esp),%ebp
andl $7,%ebp
- jz .L020pw_end
+ jz .L032pw_end
movl (%esi),%ecx
subl %eax,%ecx
movl %ecx,(%ebx)
- jnc .L034pw_tail_nc0
+ jnc .L046pw_tail_nc0
decl %ebp
- jz .L020pw_end
+ jz .L032pw_end
movl 4(%esi),%ecx
subl %eax,%ecx
movl %ecx,4(%ebx)
- jnc .L035pw_tail_nc1
+ jnc .L047pw_tail_nc1
decl %ebp
- jz .L020pw_end
+ jz .L032pw_end
movl 8(%esi),%ecx
subl %eax,%ecx
movl %ecx,8(%ebx)
- jnc .L036pw_tail_nc2
+ jnc .L048pw_tail_nc2
decl %ebp
- jz .L020pw_end
+ jz .L032pw_end
movl 12(%esi),%ecx
subl %eax,%ecx
movl %ecx,12(%ebx)
- jnc .L037pw_tail_nc3
+ jnc .L049pw_tail_nc3
decl %ebp
- jz .L020pw_end
+ jz .L032pw_end
movl 16(%esi),%ecx
subl %eax,%ecx
movl %ecx,16(%ebx)
- jnc .L038pw_tail_nc4
+ jnc .L050pw_tail_nc4
decl %ebp
- jz .L020pw_end
+ jz .L032pw_end
movl 20(%esi),%ecx
subl %eax,%ecx
movl %ecx,20(%ebx)
- jnc .L039pw_tail_nc5
+ jnc .L051pw_tail_nc5
decl %ebp
- jz .L020pw_end
+ jz .L032pw_end
movl 24(%esi),%ecx
subl %eax,%ecx
movl %ecx,24(%ebx)
- jnc .L040pw_tail_nc6
+ jnc .L052pw_tail_nc6
movl $1,%eax
- jmp .L020pw_end
-.L041pw_nc_loop:
+ jmp .L032pw_end
+.L053pw_nc_loop:
movl (%esi),%ecx
movl %ecx,(%ebx)
-.L026pw_nc0:
+.L038pw_nc0:
movl 4(%esi),%ecx
movl %ecx,4(%ebx)
-.L027pw_nc1:
+.L039pw_nc1:
movl 8(%esi),%ecx
movl %ecx,8(%ebx)
-.L028pw_nc2:
+.L040pw_nc2:
movl 12(%esi),%ecx
movl %ecx,12(%ebx)
-.L029pw_nc3:
+.L041pw_nc3:
movl 16(%esi),%ecx
movl %ecx,16(%ebx)
-.L030pw_nc4:
+.L042pw_nc4:
movl 20(%esi),%ecx
movl %ecx,20(%ebx)
-.L031pw_nc5:
+.L043pw_nc5:
movl 24(%esi),%ecx
movl %ecx,24(%ebx)
-.L032pw_nc6:
+.L044pw_nc6:
movl 28(%esi),%ecx
movl %ecx,28(%ebx)
-.L033pw_nc7:
+.L045pw_nc7:
addl $32,%esi
addl $32,%ebx
subl $8,%ebp
- jnz .L041pw_nc_loop
+ jnz .L053pw_nc_loop
movl 36(%esp),%ebp
andl $7,%ebp
- jz .L042pw_nc_end
+ jz .L054pw_nc_end
movl (%esi),%ecx
movl %ecx,(%ebx)
-.L034pw_tail_nc0:
+.L046pw_tail_nc0:
decl %ebp
- jz .L042pw_nc_end
+ jz .L054pw_nc_end
movl 4(%esi),%ecx
movl %ecx,4(%ebx)
-.L035pw_tail_nc1:
+.L047pw_tail_nc1:
decl %ebp
- jz .L042pw_nc_end
+ jz .L054pw_nc_end
movl 8(%esi),%ecx
movl %ecx,8(%ebx)
-.L036pw_tail_nc2:
+.L048pw_tail_nc2:
decl %ebp
- jz .L042pw_nc_end
+ jz .L054pw_nc_end
movl 12(%esi),%ecx
movl %ecx,12(%ebx)
-.L037pw_tail_nc3:
+.L049pw_tail_nc3:
decl %ebp
- jz .L042pw_nc_end
+ jz .L054pw_nc_end
movl 16(%esi),%ecx
movl %ecx,16(%ebx)
-.L038pw_tail_nc4:
+.L050pw_tail_nc4:
decl %ebp
- jz .L042pw_nc_end
+ jz .L054pw_nc_end
movl 20(%esi),%ecx
movl %ecx,20(%ebx)
-.L039pw_tail_nc5:
+.L051pw_tail_nc5:
decl %ebp
- jz .L042pw_nc_end
+ jz .L054pw_nc_end
movl 24(%esi),%ecx
movl %ecx,24(%ebx)
-.L040pw_tail_nc6:
-.L042pw_nc_end:
+.L052pw_tail_nc6:
+.L054pw_nc_end:
movl $0,%eax
-.L020pw_end:
+.L032pw_end:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size bn_sub_part_words,.-.L_bn_sub_part_words_begin
+.comm OPENSSL_ia32cap_P,8,4
Index: src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/ghash-x86.S
diff -u src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/ghash-x86.S:1.2 src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/ghash-x86.S:1.3
--- src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/ghash-x86.S:1.2 Fri Jul 27 19:34:14 2012
+++ src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/ghash-x86.S Sat May 16 17:32:54 2015
@@ -203,418 +203,94 @@ gcm_ghash_4bit_x86:
popl %ebp
ret
.size gcm_ghash_4bit_x86,.-.L_gcm_ghash_4bit_x86_begin
-.type _mmx_gmult_4bit_inner,@function
+.globl gcm_gmult_4bit_mmx
+.type gcm_gmult_4bit_mmx,@function
.align 16
-_mmx_gmult_4bit_inner:
+gcm_gmult_4bit_mmx:
+.L_gcm_gmult_4bit_mmx_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 20(%esp),%edi
+ movl 24(%esp),%esi
+ call .L005pic_point
+.L005pic_point:
+ popl %eax
+ leal .Lrem_4bit-.L005pic_point(%eax),%eax
+ movzbl 15(%edi),%ebx
xorl %ecx,%ecx
movl %ebx,%edx
movb %dl,%cl
+ movl $14,%ebp
shlb $4,%cl
andl $240,%edx
movq 8(%esi,%ecx,1),%mm0
movq (%esi,%ecx,1),%mm1
- movd %mm0,%ebp
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%edx,1),%mm0
- movb 14(%edi),%cl
- psllq $60,%mm2
- andl $15,%ebp
- pxor (%esi,%edx,1),%mm1
- movl %ecx,%edx
movd %mm0,%ebx
- pxor %mm2,%mm0
- shlb $4,%cl
+ jmp .L006mmx_loop
+.align 16
+.L006mmx_loop:
psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%ecx,1),%mm0
- psllq $60,%mm2
- andl $240,%edx
- pxor (%eax,%ebp,8),%mm1
andl $15,%ebx
- pxor (%esi,%ecx,1),%mm1
- movd %mm0,%ebp
- pxor %mm2,%mm0
- psrlq $4,%mm0
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%edx,1),%mm0
- movb 13(%edi),%cl
+ movb (%edi,%ebp,1),%cl
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
- andl $15,%ebp
- pxor (%esi,%edx,1),%mm1
- movl %ecx,%edx
+ decl %ebp
movd %mm0,%ebx
- pxor %mm2,%mm0
- shlb $4,%cl
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%ecx,1),%mm0
- psllq $60,%mm2
- andl $240,%edx
- pxor (%eax,%ebp,8),%mm1
- andl $15,%ebx
- pxor (%esi,%ecx,1),%mm1
- movd %mm0,%ebp
- pxor %mm2,%mm0
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%edx,1),%mm0
- movb 12(%edi),%cl
- psllq $60,%mm2
- pxor (%eax,%ebx,8),%mm1
- andl $15,%ebp
pxor (%esi,%edx,1),%mm1
movl %ecx,%edx
- movd %mm0,%ebx
pxor %mm2,%mm0
+ js .L007mmx_break
shlb $4,%cl
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%ecx,1),%mm0
- psllq $60,%mm2
- andl $240,%edx
- pxor (%eax,%ebp,8),%mm1
andl $15,%ebx
- pxor (%esi,%ecx,1),%mm1
- movd %mm0,%ebp
- pxor %mm2,%mm0
psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%edx,1),%mm0
- movb 11(%edi),%cl
- psllq $60,%mm2
- pxor (%eax,%ebx,8),%mm1
- andl $15,%ebp
- pxor (%esi,%edx,1),%mm1
- movl %ecx,%edx
- movd %mm0,%ebx
- pxor %mm2,%mm0
- shlb $4,%cl
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%ecx,1),%mm0
- psllq $60,%mm2
andl $240,%edx
- pxor (%eax,%ebp,8),%mm1
- andl $15,%ebx
- pxor (%esi,%ecx,1),%mm1
- movd %mm0,%ebp
- pxor %mm2,%mm0
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%edx,1),%mm0
- movb 10(%edi),%cl
- psllq $60,%mm2
- pxor (%eax,%ebx,8),%mm1
- andl $15,%ebp
- pxor (%esi,%edx,1),%mm1
- movl %ecx,%edx
- movd %mm0,%ebx
- pxor %mm2,%mm0
- shlb $4,%cl
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%ecx,1),%mm0
- psllq $60,%mm2
- andl $240,%edx
- pxor (%eax,%ebp,8),%mm1
- andl $15,%ebx
- pxor (%esi,%ecx,1),%mm1
- movd %mm0,%ebp
- pxor %mm2,%mm0
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%edx,1),%mm0
- movb 9(%edi),%cl
- psllq $60,%mm2
- pxor (%eax,%ebx,8),%mm1
- andl $15,%ebp
- pxor (%esi,%edx,1),%mm1
- movl %ecx,%edx
- movd %mm0,%ebx
- pxor %mm2,%mm0
- shlb $4,%cl
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%ecx,1),%mm0
- psllq $60,%mm2
- andl $240,%edx
- pxor (%eax,%ebp,8),%mm1
- andl $15,%ebx
- pxor (%esi,%ecx,1),%mm1
- movd %mm0,%ebp
- pxor %mm2,%mm0
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%edx,1),%mm0
- movb 8(%edi),%cl
- psllq $60,%mm2
- pxor (%eax,%ebx,8),%mm1
- andl $15,%ebp
- pxor (%esi,%edx,1),%mm1
- movl %ecx,%edx
- movd %mm0,%ebx
- pxor %mm2,%mm0
- shlb $4,%cl
- psrlq $4,%mm0
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%ecx,1),%mm0
psllq $60,%mm2
- andl $240,%edx
- pxor (%eax,%ebp,8),%mm1
- andl $15,%ebx
- pxor (%esi,%ecx,1),%mm1
- movd %mm0,%ebp
- pxor %mm2,%mm0
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%edx,1),%mm0
- movb 7(%edi),%cl
- psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
- andl $15,%ebp
- pxor (%esi,%edx,1),%mm1
- movl %ecx,%edx
movd %mm0,%ebx
- pxor %mm2,%mm0
- shlb $4,%cl
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%ecx,1),%mm0
- psllq $60,%mm2
- andl $240,%edx
- pxor (%eax,%ebp,8),%mm1
- andl $15,%ebx
pxor (%esi,%ecx,1),%mm1
- movd %mm0,%ebp
- pxor %mm2,%mm0
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%edx,1),%mm0
- movb 6(%edi),%cl
- psllq $60,%mm2
- pxor (%eax,%ebx,8),%mm1
- andl $15,%ebp
- pxor (%esi,%edx,1),%mm1
- movl %ecx,%edx
- movd %mm0,%ebx
- pxor %mm2,%mm0
- shlb $4,%cl
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%ecx,1),%mm0
- psllq $60,%mm2
- andl $240,%edx
- pxor (%eax,%ebp,8),%mm1
- andl $15,%ebx
- pxor (%esi,%ecx,1),%mm1
- movd %mm0,%ebp
- pxor %mm2,%mm0
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%edx,1),%mm0
- movb 5(%edi),%cl
- psllq $60,%mm2
- pxor (%eax,%ebx,8),%mm1
- andl $15,%ebp
- pxor (%esi,%edx,1),%mm1
- movl %ecx,%edx
- movd %mm0,%ebx
pxor %mm2,%mm0
+ jmp .L006mmx_loop
+.align 16
+.L007mmx_break:
shlb $4,%cl
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%ecx,1),%mm0
- psllq $60,%mm2
- andl $240,%edx
- pxor (%eax,%ebp,8),%mm1
andl $15,%ebx
- pxor (%esi,%ecx,1),%mm1
- movd %mm0,%ebp
- pxor %mm2,%mm0
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%edx,1),%mm0
- movb 4(%edi),%cl
- psllq $60,%mm2
- pxor (%eax,%ebx,8),%mm1
- andl $15,%ebp
- pxor (%esi,%edx,1),%mm1
- movl %ecx,%edx
- movd %mm0,%ebx
- pxor %mm2,%mm0
- shlb $4,%cl
psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%ecx,1),%mm0
- psllq $60,%mm2
andl $240,%edx
- pxor (%eax,%ebp,8),%mm1
- andl $15,%ebx
- pxor (%esi,%ecx,1),%mm1
- movd %mm0,%ebp
- pxor %mm2,%mm0
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%edx,1),%mm0
- movb 3(%edi),%cl
- psllq $60,%mm2
- pxor (%eax,%ebx,8),%mm1
- andl $15,%ebp
- pxor (%esi,%edx,1),%mm1
- movl %ecx,%edx
- movd %mm0,%ebx
- pxor %mm2,%mm0
- shlb $4,%cl
- psrlq $4,%mm0
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%ecx,1),%mm0
psllq $60,%mm2
- andl $240,%edx
- pxor (%eax,%ebp,8),%mm1
- andl $15,%ebx
- pxor (%esi,%ecx,1),%mm1
- movd %mm0,%ebp
- pxor %mm2,%mm0
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%edx,1),%mm0
- movb 2(%edi),%cl
- psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
- andl $15,%ebp
- pxor (%esi,%edx,1),%mm1
- movl %ecx,%edx
movd %mm0,%ebx
- pxor %mm2,%mm0
- shlb $4,%cl
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%ecx,1),%mm0
- psllq $60,%mm2
- andl $240,%edx
- pxor (%eax,%ebp,8),%mm1
- andl $15,%ebx
pxor (%esi,%ecx,1),%mm1
- movd %mm0,%ebp
- pxor %mm2,%mm0
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%edx,1),%mm0
- movb 1(%edi),%cl
- psllq $60,%mm2
- pxor (%eax,%ebx,8),%mm1
- andl $15,%ebp
- pxor (%esi,%edx,1),%mm1
- movl %ecx,%edx
- movd %mm0,%ebx
pxor %mm2,%mm0
- shlb $4,%cl
psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%ecx,1),%mm0
- psllq $60,%mm2
- andl $240,%edx
- pxor (%eax,%ebp,8),%mm1
andl $15,%ebx
- pxor (%esi,%ecx,1),%mm1
- movd %mm0,%ebp
- pxor %mm2,%mm0
- psrlq $4,%mm0
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%edx,1),%mm0
- movb (%edi),%cl
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
- andl $15,%ebp
- pxor (%esi,%edx,1),%mm1
- movl %ecx,%edx
movd %mm0,%ebx
- pxor %mm2,%mm0
- shlb $4,%cl
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%ecx,1),%mm0
- psllq $60,%mm2
- andl $240,%edx
- pxor (%eax,%ebp,8),%mm1
- andl $15,%ebx
- pxor (%esi,%ecx,1),%mm1
- movd %mm0,%ebp
- pxor %mm2,%mm0
- psrlq $4,%mm0
- movq %mm1,%mm2
- psrlq $4,%mm1
- pxor 8(%esi,%edx,1),%mm0
- psllq $60,%mm2
- pxor (%eax,%ebx,8),%mm1
- andl $15,%ebp
pxor (%esi,%edx,1),%mm1
- movd %mm0,%ebx
pxor %mm2,%mm0
- movl 4(%eax,%ebp,8),%edi
psrlq $32,%mm0
movd %mm1,%edx
psrlq $32,%mm1
movd %mm0,%ecx
movd %mm1,%ebp
- shll $4,%edi
bswap %ebx
bswap %edx
bswap %ecx
- xorl %edi,%ebp
bswap %ebp
- ret
-.size _mmx_gmult_4bit_inner,.-_mmx_gmult_4bit_inner
-.globl gcm_gmult_4bit_mmx
-.type gcm_gmult_4bit_mmx,@function
-.align 16
-gcm_gmult_4bit_mmx:
-.L_gcm_gmult_4bit_mmx_begin:
- pushl %ebp
- pushl %ebx
- pushl %esi
- pushl %edi
- movl 20(%esp),%edi
- movl 24(%esp),%esi
- call .L005pic_point
-.L005pic_point:
- popl %eax
- leal .Lrem_4bit-.L005pic_point(%eax),%eax
- movzbl 15(%edi),%ebx
- call _mmx_gmult_4bit_inner
- movl 20(%esp),%edi
emms
movl %ebx,12(%edi)
movl %edx,4(%edi)
@@ -635,61 +311,926 @@ gcm_ghash_4bit_mmx:
pushl %ebx
pushl %esi
pushl %edi
- movl 20(%esp),%ebp
- movl 24(%esp),%esi
- movl 28(%esp),%edi
- movl 32(%esp),%ecx
- call .L006pic_point
-.L006pic_point:
- popl %eax
- leal .Lrem_4bit-.L006pic_point(%eax),%eax
- addl %edi,%ecx
- movl %ecx,32(%esp)
- subl $20,%esp
- movl 12(%ebp),%ebx
- movl 4(%ebp),%edx
- movl 8(%ebp),%ecx
- movl (%ebp),%ebp
- jmp .L007mmx_outer_loop
+ movl 20(%esp),%eax
+ movl 24(%esp),%ebx
+ movl 28(%esp),%ecx
+ movl 32(%esp),%edx
+ movl %esp,%ebp
+ call .L008pic_point
+.L008pic_point:
+ popl %esi
+ leal .Lrem_8bit-.L008pic_point(%esi),%esi
+ subl $544,%esp
+ andl $-64,%esp
+ subl $16,%esp
+ addl %ecx,%edx
+ movl %eax,544(%esp)
+ movl %edx,552(%esp)
+ movl %ebp,556(%esp)
+ addl $128,%ebx
+ leal 144(%esp),%edi
+ leal 400(%esp),%ebp
+ movl -120(%ebx),%edx
+ movq -120(%ebx),%mm0
+ movq -128(%ebx),%mm3
+ shll $4,%edx
+ movb %dl,(%esp)
+ movl -104(%ebx),%edx
+ movq -104(%ebx),%mm2
+ movq -112(%ebx),%mm5
+ movq %mm0,-128(%edi)
+ psrlq $4,%mm0
+ movq %mm3,(%edi)
+ movq %mm3,%mm7
+ psrlq $4,%mm3
+ shll $4,%edx
+ movb %dl,1(%esp)
+ movl -88(%ebx),%edx
+ movq -88(%ebx),%mm1
+ psllq $60,%mm7
+ movq -96(%ebx),%mm4
+ por %mm7,%mm0
+ movq %mm2,-120(%edi)
+ psrlq $4,%mm2
+ movq %mm5,8(%edi)
+ movq %mm5,%mm6
+ movq %mm0,-128(%ebp)
+ psrlq $4,%mm5
+ movq %mm3,(%ebp)
+ shll $4,%edx
+ movb %dl,2(%esp)
+ movl -72(%ebx),%edx
+ movq -72(%ebx),%mm0
+ psllq $60,%mm6
+ movq -80(%ebx),%mm3
+ por %mm6,%mm2
+ movq %mm1,-112(%edi)
+ psrlq $4,%mm1
+ movq %mm4,16(%edi)
+ movq %mm4,%mm7
+ movq %mm2,-120(%ebp)
+ psrlq $4,%mm4
+ movq %mm5,8(%ebp)
+ shll $4,%edx
+ movb %dl,3(%esp)
+ movl -56(%ebx),%edx
+ movq -56(%ebx),%mm2
+ psllq $60,%mm7
+ movq -64(%ebx),%mm5
+ por %mm7,%mm1
+ movq %mm0,-104(%edi)
+ psrlq $4,%mm0
+ movq %mm3,24(%edi)
+ movq %mm3,%mm6
+ movq %mm1,-112(%ebp)
+ psrlq $4,%mm3
+ movq %mm4,16(%ebp)
+ shll $4,%edx
+ movb %dl,4(%esp)
+ movl -40(%ebx),%edx
+ movq -40(%ebx),%mm1
+ psllq $60,%mm6
+ movq -48(%ebx),%mm4
+ por %mm6,%mm0
+ movq %mm2,-96(%edi)
+ psrlq $4,%mm2
+ movq %mm5,32(%edi)
+ movq %mm5,%mm7
+ movq %mm0,-104(%ebp)
+ psrlq $4,%mm5
+ movq %mm3,24(%ebp)
+ shll $4,%edx
+ movb %dl,5(%esp)
+ movl -24(%ebx),%edx
+ movq -24(%ebx),%mm0
+ psllq $60,%mm7
+ movq -32(%ebx),%mm3
+ por %mm7,%mm2
+ movq %mm1,-88(%edi)
+ psrlq $4,%mm1
+ movq %mm4,40(%edi)
+ movq %mm4,%mm6
+ movq %mm2,-96(%ebp)
+ psrlq $4,%mm4
+ movq %mm5,32(%ebp)
+ shll $4,%edx
+ movb %dl,6(%esp)
+ movl -8(%ebx),%edx
+ movq -8(%ebx),%mm2
+ psllq $60,%mm6
+ movq -16(%ebx),%mm5
+ por %mm6,%mm1
+ movq %mm0,-80(%edi)
+ psrlq $4,%mm0
+ movq %mm3,48(%edi)
+ movq %mm3,%mm7
+ movq %mm1,-88(%ebp)
+ psrlq $4,%mm3
+ movq %mm4,40(%ebp)
+ shll $4,%edx
+ movb %dl,7(%esp)
+ movl 8(%ebx),%edx
+ movq 8(%ebx),%mm1
+ psllq $60,%mm7
+ movq (%ebx),%mm4
+ por %mm7,%mm0
+ movq %mm2,-72(%edi)
+ psrlq $4,%mm2
+ movq %mm5,56(%edi)
+ movq %mm5,%mm6
+ movq %mm0,-80(%ebp)
+ psrlq $4,%mm5
+ movq %mm3,48(%ebp)
+ shll $4,%edx
+ movb %dl,8(%esp)
+ movl 24(%ebx),%edx
+ movq 24(%ebx),%mm0
+ psllq $60,%mm6
+ movq 16(%ebx),%mm3
+ por %mm6,%mm2
+ movq %mm1,-64(%edi)
+ psrlq $4,%mm1
+ movq %mm4,64(%edi)
+ movq %mm4,%mm7
+ movq %mm2,-72(%ebp)
+ psrlq $4,%mm4
+ movq %mm5,56(%ebp)
+ shll $4,%edx
+ movb %dl,9(%esp)
+ movl 40(%ebx),%edx
+ movq 40(%ebx),%mm2
+ psllq $60,%mm7
+ movq 32(%ebx),%mm5
+ por %mm7,%mm1
+ movq %mm0,-56(%edi)
+ psrlq $4,%mm0
+ movq %mm3,72(%edi)
+ movq %mm3,%mm6
+ movq %mm1,-64(%ebp)
+ psrlq $4,%mm3
+ movq %mm4,64(%ebp)
+ shll $4,%edx
+ movb %dl,10(%esp)
+ movl 56(%ebx),%edx
+ movq 56(%ebx),%mm1
+ psllq $60,%mm6
+ movq 48(%ebx),%mm4
+ por %mm6,%mm0
+ movq %mm2,-48(%edi)
+ psrlq $4,%mm2
+ movq %mm5,80(%edi)
+ movq %mm5,%mm7
+ movq %mm0,-56(%ebp)
+ psrlq $4,%mm5
+ movq %mm3,72(%ebp)
+ shll $4,%edx
+ movb %dl,11(%esp)
+ movl 72(%ebx),%edx
+ movq 72(%ebx),%mm0
+ psllq $60,%mm7
+ movq 64(%ebx),%mm3
+ por %mm7,%mm2
+ movq %mm1,-40(%edi)
+ psrlq $4,%mm1
+ movq %mm4,88(%edi)
+ movq %mm4,%mm6
+ movq %mm2,-48(%ebp)
+ psrlq $4,%mm4
+ movq %mm5,80(%ebp)
+ shll $4,%edx
+ movb %dl,12(%esp)
+ movl 88(%ebx),%edx
+ movq 88(%ebx),%mm2
+ psllq $60,%mm6
+ movq 80(%ebx),%mm5
+ por %mm6,%mm1
+ movq %mm0,-32(%edi)
+ psrlq $4,%mm0
+ movq %mm3,96(%edi)
+ movq %mm3,%mm7
+ movq %mm1,-40(%ebp)
+ psrlq $4,%mm3
+ movq %mm4,88(%ebp)
+ shll $4,%edx
+ movb %dl,13(%esp)
+ movl 104(%ebx),%edx
+ movq 104(%ebx),%mm1
+ psllq $60,%mm7
+ movq 96(%ebx),%mm4
+ por %mm7,%mm0
+ movq %mm2,-24(%edi)
+ psrlq $4,%mm2
+ movq %mm5,104(%edi)
+ movq %mm5,%mm6
+ movq %mm0,-32(%ebp)
+ psrlq $4,%mm5
+ movq %mm3,96(%ebp)
+ shll $4,%edx
+ movb %dl,14(%esp)
+ movl 120(%ebx),%edx
+ movq 120(%ebx),%mm0
+ psllq $60,%mm6
+ movq 112(%ebx),%mm3
+ por %mm6,%mm2
+ movq %mm1,-16(%edi)
+ psrlq $4,%mm1
+ movq %mm4,112(%edi)
+ movq %mm4,%mm7
+ movq %mm2,-24(%ebp)
+ psrlq $4,%mm4
+ movq %mm5,104(%ebp)
+ shll $4,%edx
+ movb %dl,15(%esp)
+ psllq $60,%mm7
+ por %mm7,%mm1
+ movq %mm0,-8(%edi)
+ psrlq $4,%mm0
+ movq %mm3,120(%edi)
+ movq %mm3,%mm6
+ movq %mm1,-16(%ebp)
+ psrlq $4,%mm3
+ movq %mm4,112(%ebp)
+ psllq $60,%mm6
+ por %mm6,%mm0
+ movq %mm0,-8(%ebp)
+ movq %mm3,120(%ebp)
+ movq (%eax),%mm6
+ movl 8(%eax),%ebx
+ movl 12(%eax),%edx
.align 16
-.L007mmx_outer_loop:
- xorl 12(%edi),%ebx
- xorl 4(%edi),%edx
- xorl 8(%edi),%ecx
- xorl (%edi),%ebp
- movl %edi,48(%esp)
- movl %ebx,12(%esp)
- movl %edx,4(%esp)
- movl %ecx,8(%esp)
- movl %ebp,(%esp)
- movl %esp,%edi
- shrl $24,%ebx
- call _mmx_gmult_4bit_inner
- movl 48(%esp),%edi
- leal 16(%edi),%edi
- cmpl 52(%esp),%edi
- jb .L007mmx_outer_loop
- movl 40(%esp),%edi
+.L009outer:
+ xorl 12(%ecx),%edx
+ xorl 8(%ecx),%ebx
+ pxor (%ecx),%mm6
+ leal 16(%ecx),%ecx
+ movl %ebx,536(%esp)
+ movq %mm6,528(%esp)
+ movl %ecx,548(%esp)
+ xorl %eax,%eax
+ roll $8,%edx
+ movb %dl,%al
+ movl %eax,%ebp
+ andb $15,%al
+ shrl $4,%ebp
+ pxor %mm0,%mm0
+ roll $8,%edx
+ pxor %mm1,%mm1
+ pxor %mm2,%mm2
+ movq 16(%esp,%eax,8),%mm7
+ movq 144(%esp,%eax,8),%mm6
+ movb %dl,%al
+ movd %mm7,%ebx
+ psrlq $8,%mm7
+ movq %mm6,%mm3
+ movl %eax,%edi
+ psrlq $8,%mm6
+ pxor 272(%esp,%ebp,8),%mm7
+ andb $15,%al
+ psllq $56,%mm3
+ shrl $4,%edi
+ pxor 16(%esp,%eax,8),%mm7
+ roll $8,%edx
+ pxor 144(%esp,%eax,8),%mm6
+ pxor %mm3,%mm7
+ pxor 400(%esp,%ebp,8),%mm6
+ xorb (%esp,%ebp,1),%bl
+ movb %dl,%al
+ movd %mm7,%ecx
+ movzbl %bl,%ebx
+ psrlq $8,%mm7
+ movq %mm6,%mm3
+ movl %eax,%ebp
+ psrlq $8,%mm6
+ pxor 272(%esp,%edi,8),%mm7
+ andb $15,%al
+ psllq $56,%mm3
+ shrl $4,%ebp
+ pinsrw $2,(%esi,%ebx,2),%mm2
+ pxor 16(%esp,%eax,8),%mm7
+ roll $8,%edx
+ pxor 144(%esp,%eax,8),%mm6
+ pxor %mm3,%mm7
+ pxor 400(%esp,%edi,8),%mm6
+ xorb (%esp,%edi,1),%cl
+ movb %dl,%al
+ movl 536(%esp),%edx
+ movd %mm7,%ebx
+ movzbl %cl,%ecx
+ psrlq $8,%mm7
+ movq %mm6,%mm3
+ movl %eax,%edi
+ psrlq $8,%mm6
+ pxor 272(%esp,%ebp,8),%mm7
+ andb $15,%al
+ psllq $56,%mm3
+ pxor %mm2,%mm6
+ shrl $4,%edi
+ pinsrw $2,(%esi,%ecx,2),%mm1
+ pxor 16(%esp,%eax,8),%mm7
+ roll $8,%edx
+ pxor 144(%esp,%eax,8),%mm6
+ pxor %mm3,%mm7
+ pxor 400(%esp,%ebp,8),%mm6
+ xorb (%esp,%ebp,1),%bl
+ movb %dl,%al
+ movd %mm7,%ecx
+ movzbl %bl,%ebx
+ psrlq $8,%mm7
+ movq %mm6,%mm3
+ movl %eax,%ebp
+ psrlq $8,%mm6
+ pxor 272(%esp,%edi,8),%mm7
+ andb $15,%al
+ psllq $56,%mm3
+ pxor %mm1,%mm6
+ shrl $4,%ebp
+ pinsrw $2,(%esi,%ebx,2),%mm0
+ pxor 16(%esp,%eax,8),%mm7
+ roll $8,%edx
+ pxor 144(%esp,%eax,8),%mm6
+ pxor %mm3,%mm7
+ pxor 400(%esp,%edi,8),%mm6
+ xorb (%esp,%edi,1),%cl
+ movb %dl,%al
+ movd %mm7,%ebx
+ movzbl %cl,%ecx
+ psrlq $8,%mm7
+ movq %mm6,%mm3
+ movl %eax,%edi
+ psrlq $8,%mm6
+ pxor 272(%esp,%ebp,8),%mm7
+ andb $15,%al
+ psllq $56,%mm3
+ pxor %mm0,%mm6
+ shrl $4,%edi
+ pinsrw $2,(%esi,%ecx,2),%mm2
+ pxor 16(%esp,%eax,8),%mm7
+ roll $8,%edx
+ pxor 144(%esp,%eax,8),%mm6
+ pxor %mm3,%mm7
+ pxor 400(%esp,%ebp,8),%mm6
+ xorb (%esp,%ebp,1),%bl
+ movb %dl,%al
+ movd %mm7,%ecx
+ movzbl %bl,%ebx
+ psrlq $8,%mm7
+ movq %mm6,%mm3
+ movl %eax,%ebp
+ psrlq $8,%mm6
+ pxor 272(%esp,%edi,8),%mm7
+ andb $15,%al
+ psllq $56,%mm3
+ pxor %mm2,%mm6
+ shrl $4,%ebp
+ pinsrw $2,(%esi,%ebx,2),%mm1
+ pxor 16(%esp,%eax,8),%mm7
+ roll $8,%edx
+ pxor 144(%esp,%eax,8),%mm6
+ pxor %mm3,%mm7
+ pxor 400(%esp,%edi,8),%mm6
+ xorb (%esp,%edi,1),%cl
+ movb %dl,%al
+ movl 532(%esp),%edx
+ movd %mm7,%ebx
+ movzbl %cl,%ecx
+ psrlq $8,%mm7
+ movq %mm6,%mm3
+ movl %eax,%edi
+ psrlq $8,%mm6
+ pxor 272(%esp,%ebp,8),%mm7
+ andb $15,%al
+ psllq $56,%mm3
+ pxor %mm1,%mm6
+ shrl $4,%edi
+ pinsrw $2,(%esi,%ecx,2),%mm0
+ pxor 16(%esp,%eax,8),%mm7
+ roll $8,%edx
+ pxor 144(%esp,%eax,8),%mm6
+ pxor %mm3,%mm7
+ pxor 400(%esp,%ebp,8),%mm6
+ xorb (%esp,%ebp,1),%bl
+ movb %dl,%al
+ movd %mm7,%ecx
+ movzbl %bl,%ebx
+ psrlq $8,%mm7
+ movq %mm6,%mm3
+ movl %eax,%ebp
+ psrlq $8,%mm6
+ pxor 272(%esp,%edi,8),%mm7
+ andb $15,%al
+ psllq $56,%mm3
+ pxor %mm0,%mm6
+ shrl $4,%ebp
+ pinsrw $2,(%esi,%ebx,2),%mm2
+ pxor 16(%esp,%eax,8),%mm7
+ roll $8,%edx
+ pxor 144(%esp,%eax,8),%mm6
+ pxor %mm3,%mm7
+ pxor 400(%esp,%edi,8),%mm6
+ xorb (%esp,%edi,1),%cl
+ movb %dl,%al
+ movd %mm7,%ebx
+ movzbl %cl,%ecx
+ psrlq $8,%mm7
+ movq %mm6,%mm3
+ movl %eax,%edi
+ psrlq $8,%mm6
+ pxor 272(%esp,%ebp,8),%mm7
+ andb $15,%al
+ psllq $56,%mm3
+ pxor %mm2,%mm6
+ shrl $4,%edi
+ pinsrw $2,(%esi,%ecx,2),%mm1
+ pxor 16(%esp,%eax,8),%mm7
+ roll $8,%edx
+ pxor 144(%esp,%eax,8),%mm6
+ pxor %mm3,%mm7
+ pxor 400(%esp,%ebp,8),%mm6
+ xorb (%esp,%ebp,1),%bl
+ movb %dl,%al
+ movd %mm7,%ecx
+ movzbl %bl,%ebx
+ psrlq $8,%mm7
+ movq %mm6,%mm3
+ movl %eax,%ebp
+ psrlq $8,%mm6
+ pxor 272(%esp,%edi,8),%mm7
+ andb $15,%al
+ psllq $56,%mm3
+ pxor %mm1,%mm6
+ shrl $4,%ebp
+ pinsrw $2,(%esi,%ebx,2),%mm0
+ pxor 16(%esp,%eax,8),%mm7
+ roll $8,%edx
+ pxor 144(%esp,%eax,8),%mm6
+ pxor %mm3,%mm7
+ pxor 400(%esp,%edi,8),%mm6
+ xorb (%esp,%edi,1),%cl
+ movb %dl,%al
+ movl 528(%esp),%edx
+ movd %mm7,%ebx
+ movzbl %cl,%ecx
+ psrlq $8,%mm7
+ movq %mm6,%mm3
+ movl %eax,%edi
+ psrlq $8,%mm6
+ pxor 272(%esp,%ebp,8),%mm7
+ andb $15,%al
+ psllq $56,%mm3
+ pxor %mm0,%mm6
+ shrl $4,%edi
+ pinsrw $2,(%esi,%ecx,2),%mm2
+ pxor 16(%esp,%eax,8),%mm7
+ roll $8,%edx
+ pxor 144(%esp,%eax,8),%mm6
+ pxor %mm3,%mm7
+ pxor 400(%esp,%ebp,8),%mm6
+ xorb (%esp,%ebp,1),%bl
+ movb %dl,%al
+ movd %mm7,%ecx
+ movzbl %bl,%ebx
+ psrlq $8,%mm7
+ movq %mm6,%mm3
+ movl %eax,%ebp
+ psrlq $8,%mm6
+ pxor 272(%esp,%edi,8),%mm7
+ andb $15,%al
+ psllq $56,%mm3
+ pxor %mm2,%mm6
+ shrl $4,%ebp
+ pinsrw $2,(%esi,%ebx,2),%mm1
+ pxor 16(%esp,%eax,8),%mm7
+ roll $8,%edx
+ pxor 144(%esp,%eax,8),%mm6
+ pxor %mm3,%mm7
+ pxor 400(%esp,%edi,8),%mm6
+ xorb (%esp,%edi,1),%cl
+ movb %dl,%al
+ movd %mm7,%ebx
+ movzbl %cl,%ecx
+ psrlq $8,%mm7
+ movq %mm6,%mm3
+ movl %eax,%edi
+ psrlq $8,%mm6
+ pxor 272(%esp,%ebp,8),%mm7
+ andb $15,%al
+ psllq $56,%mm3
+ pxor %mm1,%mm6
+ shrl $4,%edi
+ pinsrw $2,(%esi,%ecx,2),%mm0
+ pxor 16(%esp,%eax,8),%mm7
+ roll $8,%edx
+ pxor 144(%esp,%eax,8),%mm6
+ pxor %mm3,%mm7
+ pxor 400(%esp,%ebp,8),%mm6
+ xorb (%esp,%ebp,1),%bl
+ movb %dl,%al
+ movd %mm7,%ecx
+ movzbl %bl,%ebx
+ psrlq $8,%mm7
+ movq %mm6,%mm3
+ movl %eax,%ebp
+ psrlq $8,%mm6
+ pxor 272(%esp,%edi,8),%mm7
+ andb $15,%al
+ psllq $56,%mm3
+ pxor %mm0,%mm6
+ shrl $4,%ebp
+ pinsrw $2,(%esi,%ebx,2),%mm2
+ pxor 16(%esp,%eax,8),%mm7
+ roll $8,%edx
+ pxor 144(%esp,%eax,8),%mm6
+ pxor %mm3,%mm7
+ pxor 400(%esp,%edi,8),%mm6
+ xorb (%esp,%edi,1),%cl
+ movb %dl,%al
+ movl 524(%esp),%edx
+ movd %mm7,%ebx
+ movzbl %cl,%ecx
+ psrlq $8,%mm7
+ movq %mm6,%mm3
+ movl %eax,%edi
+ psrlq $8,%mm6
+ pxor 272(%esp,%ebp,8),%mm7
+ andb $15,%al
+ psllq $56,%mm3
+ pxor %mm2,%mm6
+ shrl $4,%edi
+ pinsrw $2,(%esi,%ecx,2),%mm1
+ pxor 16(%esp,%eax,8),%mm7
+ pxor 144(%esp,%eax,8),%mm6
+ xorb (%esp,%ebp,1),%bl
+ pxor %mm3,%mm7
+ pxor 400(%esp,%ebp,8),%mm6
+ movzbl %bl,%ebx
+ pxor %mm2,%mm2
+ psllq $4,%mm1
+ movd %mm7,%ecx
+ psrlq $4,%mm7
+ movq %mm6,%mm3
+ psrlq $4,%mm6
+ shll $4,%ecx
+ pxor 16(%esp,%edi,8),%mm7
+ psllq $60,%mm3
+ movzbl %cl,%ecx
+ pxor %mm3,%mm7
+ pxor 144(%esp,%edi,8),%mm6
+ pinsrw $2,(%esi,%ebx,2),%mm0
+ pxor %mm1,%mm6
+ movd %mm7,%edx
+ pinsrw $3,(%esi,%ecx,2),%mm2
+ psllq $12,%mm0
+ pxor %mm0,%mm6
+ psrlq $32,%mm7
+ pxor %mm2,%mm6
+ movl 548(%esp),%ecx
+ movd %mm7,%ebx
+ movq %mm6,%mm3
+ psllw $8,%mm6
+ psrlw $8,%mm3
+ por %mm3,%mm6
+ bswap %edx
+ pshufw $27,%mm6,%mm6
+ bswap %ebx
+ cmpl 552(%esp),%ecx
+ jne .L009outer
+ movl 544(%esp),%eax
+ movl %edx,12(%eax)
+ movl %ebx,8(%eax)
+ movq %mm6,(%eax)
+ movl 556(%esp),%esp
emms
- movl %ebx,12(%edi)
- movl %edx,4(%edi)
- movl %ecx,8(%edi)
- movl %ebp,(%edi)
- addl $20,%esp
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin
+.globl gcm_init_clmul
+.type gcm_init_clmul,@function
+.align 16
+gcm_init_clmul:
+.L_gcm_init_clmul_begin:
+ movl 4(%esp),%edx
+ movl 8(%esp),%eax
+ call .L010pic
+.L010pic:
+ popl %ecx
+ leal .Lbswap-.L010pic(%ecx),%ecx
+ movdqu (%eax),%xmm2
+ pshufd $78,%xmm2,%xmm2
+ pshufd $255,%xmm2,%xmm4
+ movdqa %xmm2,%xmm3
+ psllq $1,%xmm2
+ pxor %xmm5,%xmm5
+ psrlq $63,%xmm3
+ pcmpgtd %xmm4,%xmm5
+ pslldq $8,%xmm3
+ por %xmm3,%xmm2
+ pand 16(%ecx),%xmm5
+ pxor %xmm5,%xmm2
+ movdqa %xmm2,%xmm0
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,194,0
+.byte 102,15,58,68,202,17
+.byte 102,15,58,68,220,0
+ xorps %xmm0,%xmm3
+ xorps %xmm1,%xmm3
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ movdqu %xmm2,(%edx)
+ movdqu %xmm0,16(%edx)
+ ret
+.size gcm_init_clmul,.-.L_gcm_init_clmul_begin
+.globl gcm_gmult_clmul
+.type gcm_gmult_clmul,@function
+.align 16
+gcm_gmult_clmul:
+.L_gcm_gmult_clmul_begin:
+ movl 4(%esp),%eax
+ movl 8(%esp),%edx
+ call .L011pic
+.L011pic:
+ popl %ecx
+ leal .Lbswap-.L011pic(%ecx),%ecx
+ movdqu (%eax),%xmm0
+ movdqa (%ecx),%xmm5
+ movups (%edx),%xmm2
+.byte 102,15,56,0,197
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,194,0
+.byte 102,15,58,68,202,17
+.byte 102,15,58,68,220,0
+ xorps %xmm0,%xmm3
+ xorps %xmm1,%xmm3
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+.byte 102,15,56,0,197
+ movdqu %xmm0,(%eax)
+ ret
+.size gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin
+.globl gcm_ghash_clmul
+.type gcm_ghash_clmul,@function
+.align 16
+gcm_ghash_clmul:
+.L_gcm_ghash_clmul_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 20(%esp),%eax
+ movl 24(%esp),%edx
+ movl 28(%esp),%esi
+ movl 32(%esp),%ebx
+ call .L012pic
+.L012pic:
+ popl %ecx
+ leal .Lbswap-.L012pic(%ecx),%ecx
+ movdqu (%eax),%xmm0
+ movdqa (%ecx),%xmm5
+ movdqu (%edx),%xmm2
+.byte 102,15,56,0,197
+ subl $16,%ebx
+ jz .L013odd_tail
+ movdqu (%esi),%xmm3
+ movdqu 16(%esi),%xmm6
+.byte 102,15,56,0,221
+.byte 102,15,56,0,245
+ pxor %xmm3,%xmm0
+ movdqa %xmm6,%xmm7
+ pshufd $78,%xmm6,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm6,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,242,0
+.byte 102,15,58,68,250,17
+.byte 102,15,58,68,220,0
+ xorps %xmm6,%xmm3
+ xorps %xmm7,%xmm3
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm7
+ pxor %xmm4,%xmm6
+ movups 16(%edx),%xmm2
+ leal 32(%esi),%esi
+ subl $32,%ebx
+ jbe .L014even_tail
+.L015mod_loop:
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,194,0
+.byte 102,15,58,68,202,17
+.byte 102,15,58,68,220,0
+ xorps %xmm0,%xmm3
+ xorps %xmm1,%xmm3
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+ movdqu (%esi),%xmm3
+ movups (%edx),%xmm2
+ pxor %xmm6,%xmm0
+ pxor %xmm7,%xmm1
+ movdqu 16(%esi),%xmm6
+.byte 102,15,56,0,221
+.byte 102,15,56,0,245
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,%xmm7
+ pxor %xmm3,%xmm1
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+.byte 102,15,58,68,242,0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pshufd $78,%xmm5,%xmm3
+ pxor %xmm4,%xmm1
+ pxor %xmm5,%xmm3
+ pshufd $78,%xmm2,%xmm5
+ pxor %xmm2,%xmm5
+.byte 102,15,58,68,250,17
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+.byte 102,15,58,68,221,0
+ movups 16(%edx),%xmm2
+ xorps %xmm6,%xmm3
+ xorps %xmm7,%xmm3
+ movdqa %xmm3,%xmm5
+ psrldq $8,%xmm3
+ pslldq $8,%xmm5
+ pxor %xmm3,%xmm7
+ pxor %xmm5,%xmm6
+ movdqa (%ecx),%xmm5
+ leal 32(%esi),%esi
+ subl $32,%ebx
+ ja .L015mod_loop
+.L014even_tail:
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,194,0
+.byte 102,15,58,68,202,17
+.byte 102,15,58,68,220,0
+ xorps %xmm0,%xmm3
+ xorps %xmm1,%xmm3
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm6,%xmm0
+ pxor %xmm7,%xmm1
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ testl %ebx,%ebx
+ jnz .L016done
+ movups (%edx),%xmm2
+.L013odd_tail:
+ movdqu (%esi),%xmm3
+.byte 102,15,56,0,221
+ pxor %xmm3,%xmm0
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,194,0
+.byte 102,15,58,68,202,17
+.byte 102,15,58,68,220,0
+ xorps %xmm0,%xmm3
+ xorps %xmm1,%xmm3
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+.L016done:
+.byte 102,15,56,0,197
+ movdqu %xmm0,(%eax)
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.size gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin
+.align 64
+.Lbswap:
+.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
.align 64
.Lrem_4bit:
-.long 0,0,0,29491200,0,58982400,0,38141952
-.long 0,117964800,0,113901568,0,76283904,0,88997888
-.long 0,235929600,0,265420800,0,227803136,0,206962688
-.long 0,152567808,0,148504576,0,177995776,0,190709760
+.long 0,0,0,471859200,0,943718400,0,610271232
+.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
+.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
+.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
.align 64
-.L008rem_8bit:
+.Lrem_8bit:
.value 0,450,900,582,1800,1738,1164,1358
.value 3600,4050,3476,3158,2328,2266,2716,2910
.value 7200,7650,8100,7782,6952,6890,6316,6510
Index: src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/sha512-586.S
diff -u src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/sha512-586.S:1.2 src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/sha512-586.S:1.3
--- src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/sha512-586.S:1.2 Fri Jul 27 19:34:15 2012
+++ src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/sha512-586.S Sat May 16 17:32:54 2015
@@ -25,6 +25,278 @@ sha512_block_data_order:
movl %edi,4(%esp)
movl %eax,8(%esp)
movl %ebx,12(%esp)
+ leal _GLOBAL_OFFSET_TABLE_+[.-.L001K512](%ebp),%edx
+ movl OPENSSL_ia32cap_P@GOT(%edx),%edx
+ btl $26,(%edx)
+ jnc .L002loop_x86
+ movq (%esi),%mm0
+ movq 8(%esi),%mm1
+ movq 16(%esi),%mm2
+ movq 24(%esi),%mm3
+ movq 32(%esi),%mm4
+ movq 40(%esi),%mm5
+ movq 48(%esi),%mm6
+ movq 56(%esi),%mm7
+ subl $80,%esp
+.align 16
+.L003loop_sse2:
+ movq %mm1,8(%esp)
+ movq %mm2,16(%esp)
+ movq %mm3,24(%esp)
+ movq %mm5,40(%esp)
+ movq %mm6,48(%esp)
+ movq %mm7,56(%esp)
+ movl (%edi),%ecx
+ movl 4(%edi),%edx
+ addl $8,%edi
+ bswap %ecx
+ bswap %edx
+ movl %ecx,76(%esp)
+ movl %edx,72(%esp)
+.align 16
+.L00400_14_sse2:
+ movl (%edi),%eax
+ movl 4(%edi),%ebx
+ addl $8,%edi
+ bswap %eax
+ bswap %ebx
+ movl %eax,68(%esp)
+ movl %ebx,64(%esp)
+ movq 40(%esp),%mm5
+ movq 48(%esp),%mm6
+ movq 56(%esp),%mm7
+ movq %mm4,%mm1
+ movq %mm4,%mm2
+ psrlq $14,%mm1
+ movq %mm4,32(%esp)
+ psllq $23,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm2,%mm3
+ psllq $23,%mm2
+ pxor %mm1,%mm3
+ psrlq $23,%mm1
+ pxor %mm2,%mm3
+ psllq $4,%mm2
+ pxor %mm1,%mm3
+ paddq (%ebp),%mm7
+ pxor %mm2,%mm3
+ pxor %mm6,%mm5
+ movq 8(%esp),%mm1
+ pand %mm4,%mm5
+ movq 16(%esp),%mm2
+ pxor %mm6,%mm5
+ movq 24(%esp),%mm4
+ paddq %mm5,%mm3
+ movq %mm0,(%esp)
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ movq %mm0,%mm6
+ paddq 72(%esp),%mm3
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ psllq $25,%mm6
+ movq %mm5,%mm7
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ subl $8,%esp
+ pxor %mm6,%mm7
+ movq %mm0,%mm5
+ por %mm2,%mm0
+ pand %mm2,%mm5
+ pand %mm1,%mm0
+ por %mm0,%mm5
+ paddq %mm5,%mm7
+ movq %mm3,%mm0
+ movb (%ebp),%dl
+ paddq %mm7,%mm0
+ addl $8,%ebp
+ cmpb $53,%dl
+ jne .L00400_14_sse2
+ movq 40(%esp),%mm5
+ movq 48(%esp),%mm6
+ movq 56(%esp),%mm7
+ movq %mm4,%mm1
+ movq %mm4,%mm2
+ psrlq $14,%mm1
+ movq %mm4,32(%esp)
+ psllq $23,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm2,%mm3
+ psllq $23,%mm2
+ pxor %mm1,%mm3
+ psrlq $23,%mm1
+ pxor %mm2,%mm3
+ psllq $4,%mm2
+ pxor %mm1,%mm3
+ paddq (%ebp),%mm7
+ pxor %mm2,%mm3
+ pxor %mm6,%mm5
+ movq 8(%esp),%mm1
+ pand %mm4,%mm5
+ movq 16(%esp),%mm2
+ pxor %mm6,%mm5
+ movq 24(%esp),%mm4
+ paddq %mm5,%mm3
+ movq %mm0,(%esp)
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ movq %mm0,%mm6
+ paddq 72(%esp),%mm3
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ psllq $25,%mm6
+ movq %mm5,%mm7
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ subl $8,%esp
+ pxor %mm6,%mm7
+ movq %mm0,%mm5
+ por %mm2,%mm0
+ movq 88(%esp),%mm6
+ pand %mm2,%mm5
+ pand %mm1,%mm0
+ movq 192(%esp),%mm2
+ por %mm0,%mm5
+ paddq %mm5,%mm7
+ movq %mm3,%mm0
+ movb (%ebp),%dl
+ paddq %mm7,%mm0
+ addl $8,%ebp
+.align 16
+.L00516_79_sse2:
+ movq %mm2,%mm1
+ psrlq $1,%mm2
+ movq %mm6,%mm7
+ psrlq $6,%mm6
+ movq %mm2,%mm3
+ psrlq $6,%mm2
+ movq %mm6,%mm5
+ psrlq $13,%mm6
+ pxor %mm2,%mm3
+ psrlq $1,%mm2
+ pxor %mm6,%mm5
+ psrlq $42,%mm6
+ pxor %mm2,%mm3
+ movq 200(%esp),%mm2
+ psllq $56,%mm1
+ pxor %mm6,%mm5
+ psllq $3,%mm7
+ pxor %mm1,%mm3
+ paddq 128(%esp),%mm2
+ psllq $7,%mm1
+ pxor %mm7,%mm5
+ psllq $42,%mm7
+ pxor %mm1,%mm3
+ pxor %mm7,%mm5
+ paddq %mm5,%mm3
+ paddq %mm2,%mm3
+ movq %mm3,72(%esp)
+ movq 40(%esp),%mm5
+ movq 48(%esp),%mm6
+ movq 56(%esp),%mm7
+ movq %mm4,%mm1
+ movq %mm4,%mm2
+ psrlq $14,%mm1
+ movq %mm4,32(%esp)
+ psllq $23,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm2,%mm3
+ psllq $23,%mm2
+ pxor %mm1,%mm3
+ psrlq $23,%mm1
+ pxor %mm2,%mm3
+ psllq $4,%mm2
+ pxor %mm1,%mm3
+ paddq (%ebp),%mm7
+ pxor %mm2,%mm3
+ pxor %mm6,%mm5
+ movq 8(%esp),%mm1
+ pand %mm4,%mm5
+ movq 16(%esp),%mm2
+ pxor %mm6,%mm5
+ movq 24(%esp),%mm4
+ paddq %mm5,%mm3
+ movq %mm0,(%esp)
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ movq %mm0,%mm6
+ paddq 72(%esp),%mm3
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ psllq $25,%mm6
+ movq %mm5,%mm7
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ subl $8,%esp
+ pxor %mm6,%mm7
+ movq %mm0,%mm5
+ por %mm2,%mm0
+ movq 88(%esp),%mm6
+ pand %mm2,%mm5
+ pand %mm1,%mm0
+ movq 192(%esp),%mm2
+ por %mm0,%mm5
+ paddq %mm5,%mm7
+ movq %mm3,%mm0
+ movb (%ebp),%dl
+ paddq %mm7,%mm0
+ addl $8,%ebp
+ cmpb $23,%dl
+ jne .L00516_79_sse2
+ movq 8(%esp),%mm1
+ movq 16(%esp),%mm2
+ movq 24(%esp),%mm3
+ movq 40(%esp),%mm5
+ movq 48(%esp),%mm6
+ movq 56(%esp),%mm7
+ paddq (%esi),%mm0
+ paddq 8(%esi),%mm1
+ paddq 16(%esi),%mm2
+ paddq 24(%esi),%mm3
+ paddq 32(%esi),%mm4
+ paddq 40(%esi),%mm5
+ paddq 48(%esi),%mm6
+ paddq 56(%esi),%mm7
+ movq %mm0,(%esi)
+ movq %mm1,8(%esi)
+ movq %mm2,16(%esi)
+ movq %mm3,24(%esi)
+ movq %mm4,32(%esi)
+ movq %mm5,40(%esi)
+ movq %mm6,48(%esi)
+ movq %mm7,56(%esi)
+ addl $640,%esp
+ subl $640,%ebp
+ cmpl 88(%esp),%edi
+ jb .L003loop_sse2
+ emms
+ movl 92(%esp),%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
.align 16
.L002loop_x86:
movl (%edi),%eax
@@ -130,7 +402,7 @@ sha512_block_data_order:
movl $16,%ecx
.long 2784229001
.align 16
-.L00300_15_x86:
+.L00600_15_x86:
movl 40(%esp),%ecx
movl 44(%esp),%edx
movl %ecx,%esi
@@ -237,9 +509,9 @@ sha512_block_data_order:
subl $8,%esp
leal 8(%ebp),%ebp
cmpb $148,%dl
- jne .L00300_15_x86
+ jne .L00600_15_x86
.align 16
-.L00416_79_x86:
+.L00716_79_x86:
movl 312(%esp),%ecx
movl 316(%esp),%edx
movl %ecx,%esi
@@ -412,7 +684,7 @@ sha512_block_data_order:
subl $8,%esp
leal 8(%ebp),%ebp
cmpb $23,%dl
- jne .L00416_79_x86
+ jne .L00716_79_x86
movl 840(%esp),%esi
movl 844(%esp),%edi
movl (%esi),%eax
@@ -561,3 +833,4 @@ sha512_block_data_order:
.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
.byte 62,0
+.comm OPENSSL_ia32cap_P,8,4
Index: src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/sha1-586.S
diff -u src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/sha1-586.S:1.3 src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/sha1-586.S:1.4
--- src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/sha1-586.S:1.3 Fri Jul 27 19:34:15 2012
+++ src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/sha1-586.S Sat May 16 17:32:54 2015
@@ -9,6 +9,21 @@ sha1_block_data_order:
pushl %ebx
pushl %esi
pushl %edi
+ call .L000pic_point
+.L000pic_point:
+ popl %ebp
+ leal _GLOBAL_OFFSET_TABLE_+[.-.L000pic_point](%ebp),%esi
+ movl OPENSSL_ia32cap_P@GOT(%esi),%esi
+ leal .LK_XX_XX-.L000pic_point(%ebp),%ebp
+ movl (%esi),%eax
+ movl 4(%esi),%edx
+ testl $512,%edx
+ jz .L001x86
+ testl $16777216,%eax
+ jz .L001x86
+ jmp .Lssse3_shortcut
+.align 16
+.L001x86:
movl 20(%esp),%ebp
movl 24(%esp),%esi
movl 28(%esp),%eax
@@ -17,9 +32,9 @@ sha1_block_data_order:
addl %esi,%eax
movl %eax,104(%esp)
movl 16(%ebp),%edi
- jmp .L000loop
+ jmp .L002loop
.align 16
-.L000loop:
+.L002loop:
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
@@ -1366,7 +1381,7 @@ sha1_block_data_order:
movl %ebx,12(%ebp)
movl %edx,%esi
movl %ecx,16(%ebp)
- jb .L000loop
+ jb .L002loop
addl $76,%esp
popl %edi
popl %esi
@@ -1374,7 +1389,1251 @@ sha1_block_data_order:
popl %ebp
ret
.size sha1_block_data_order,.-.L_sha1_block_data_order_begin
+.type _sha1_block_data_order_ssse3,@function
+.align 16
+_sha1_block_data_order_ssse3:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ call .L003pic_point
+.L003pic_point:
+ popl %ebp
+ leal .LK_XX_XX-.L003pic_point(%ebp),%ebp
+.Lssse3_shortcut:
+ movdqa (%ebp),%xmm7
+ movdqa 16(%ebp),%xmm0
+ movdqa 32(%ebp),%xmm1
+ movdqa 48(%ebp),%xmm2
+ movdqa 64(%ebp),%xmm6
+ movl 20(%esp),%edi
+ movl 24(%esp),%ebp
+ movl 28(%esp),%edx
+ movl %esp,%esi
+ subl $208,%esp
+ andl $-64,%esp
+ movdqa %xmm0,112(%esp)
+ movdqa %xmm1,128(%esp)
+ movdqa %xmm2,144(%esp)
+ shll $6,%edx
+ movdqa %xmm7,160(%esp)
+ addl %ebp,%edx
+ movdqa %xmm6,176(%esp)
+ addl $64,%ebp
+ movl %edi,192(%esp)
+ movl %ebp,196(%esp)
+ movl %edx,200(%esp)
+ movl %esi,204(%esp)
+ movl (%edi),%eax
+ movl 4(%edi),%ebx
+ movl 8(%edi),%ecx
+ movl 12(%edi),%edx
+ movl 16(%edi),%edi
+ movl %ebx,%esi
+ movdqu -64(%ebp),%xmm0
+ movdqu -48(%ebp),%xmm1
+ movdqu -32(%ebp),%xmm2
+ movdqu -16(%ebp),%xmm3
+.byte 102,15,56,0,198
+.byte 102,15,56,0,206
+.byte 102,15,56,0,214
+ movdqa %xmm7,96(%esp)
+.byte 102,15,56,0,222
+ paddd %xmm7,%xmm0
+ paddd %xmm7,%xmm1
+ paddd %xmm7,%xmm2
+ movdqa %xmm0,(%esp)
+ psubd %xmm7,%xmm0
+ movdqa %xmm1,16(%esp)
+ psubd %xmm7,%xmm1
+ movdqa %xmm2,32(%esp)
+ psubd %xmm7,%xmm2
+ movdqa %xmm1,%xmm4
+ jmp .L004loop
+.align 16
+.L004loop:
+ addl (%esp),%edi
+ xorl %edx,%ecx
+.byte 102,15,58,15,224,8
+ movdqa %xmm3,%xmm6
+ movl %eax,%ebp
+ roll $5,%eax
+ paddd %xmm3,%xmm7
+ movdqa %xmm0,64(%esp)
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrldq $4,%xmm6
+ xorl %edx,%esi
+ addl %eax,%edi
+ pxor %xmm0,%xmm4
+ rorl $2,%ebx
+ addl %esi,%edi
+ pxor %xmm2,%xmm6
+ addl 4(%esp),%edx
+ xorl %ecx,%ebx
+ movl %edi,%esi
+ roll $5,%edi
+ pxor %xmm6,%xmm4
+ andl %ebx,%ebp
+ xorl %ecx,%ebx
+ movdqa %xmm7,48(%esp)
+ xorl %ecx,%ebp
+ addl %edi,%edx
+ movdqa %xmm4,%xmm0
+ movdqa %xmm4,%xmm6
+ rorl $7,%eax
+ addl %ebp,%edx
+ addl 8(%esp),%ecx
+ xorl %ebx,%eax
+ pslldq $12,%xmm0
+ paddd %xmm4,%xmm4
+ movl %edx,%ebp
+ roll $5,%edx
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrld $31,%xmm6
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ movdqa %xmm0,%xmm7
+ rorl $7,%edi
+ addl %esi,%ecx
+ psrld $30,%xmm0
+ por %xmm6,%xmm4
+ addl 12(%esp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ pslld $2,%xmm7
+ pxor %xmm0,%xmm4
+ andl %edi,%ebp
+ xorl %eax,%edi
+ movdqa 96(%esp),%xmm0
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ pxor %xmm7,%xmm4
+ movdqa %xmm2,%xmm5
+ rorl $7,%edx
+ addl %ebp,%ebx
+ addl 16(%esp),%eax
+ xorl %edi,%edx
+.byte 102,15,58,15,233,8
+ movdqa %xmm4,%xmm7
+ movl %ebx,%ebp
+ roll $5,%ebx
+ paddd %xmm4,%xmm0
+ movdqa %xmm1,80(%esp)
+ andl %edx,%esi
+ xorl %edi,%edx
+ psrldq $4,%xmm7
+ xorl %edi,%esi
+ addl %ebx,%eax
+ pxor %xmm1,%xmm5
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm3,%xmm7
+ addl 20(%esp),%edi
+ xorl %edx,%ecx
+ movl %eax,%esi
+ roll $5,%eax
+ pxor %xmm7,%xmm5
+ andl %ecx,%ebp
+ xorl %edx,%ecx
+ movdqa %xmm0,(%esp)
+ xorl %edx,%ebp
+ addl %eax,%edi
+ movdqa %xmm5,%xmm1
+ movdqa %xmm5,%xmm7
+ rorl $7,%ebx
+ addl %ebp,%edi
+ addl 24(%esp),%edx
+ xorl %ecx,%ebx
+ pslldq $12,%xmm1
+ paddd %xmm5,%xmm5
+ movl %edi,%ebp
+ roll $5,%edi
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ psrld $31,%xmm7
+ xorl %ecx,%esi
+ addl %edi,%edx
+ movdqa %xmm1,%xmm0
+ rorl $7,%eax
+ addl %esi,%edx
+ psrld $30,%xmm1
+ por %xmm7,%xmm5
+ addl 28(%esp),%ecx
+ xorl %ebx,%eax
+ movl %edx,%esi
+ roll $5,%edx
+ pslld $2,%xmm0
+ pxor %xmm1,%xmm5
+ andl %eax,%ebp
+ xorl %ebx,%eax
+ movdqa 112(%esp),%xmm1
+ xorl %ebx,%ebp
+ addl %edx,%ecx
+ pxor %xmm0,%xmm5
+ movdqa %xmm3,%xmm6
+ rorl $7,%edi
+ addl %ebp,%ecx
+ addl 32(%esp),%ebx
+ xorl %eax,%edi
+.byte 102,15,58,15,242,8
+ movdqa %xmm5,%xmm0
+ movl %ecx,%ebp
+ roll $5,%ecx
+ paddd %xmm5,%xmm1
+ movdqa %xmm2,96(%esp)
+ andl %edi,%esi
+ xorl %eax,%edi
+ psrldq $4,%xmm0
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ pxor %xmm2,%xmm6
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm4,%xmm0
+ addl 36(%esp),%eax
+ xorl %edi,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ pxor %xmm0,%xmm6
+ andl %edx,%ebp
+ xorl %edi,%edx
+ movdqa %xmm1,16(%esp)
+ xorl %edi,%ebp
+ addl %ebx,%eax
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,%xmm0
+ rorl $7,%ecx
+ addl %ebp,%eax
+ addl 40(%esp),%edi
+ xorl %edx,%ecx
+ pslldq $12,%xmm2
+ paddd %xmm6,%xmm6
+ movl %eax,%ebp
+ roll $5,%eax
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrld $31,%xmm0
+ xorl %edx,%esi
+ addl %eax,%edi
+ movdqa %xmm2,%xmm1
+ rorl $7,%ebx
+ addl %esi,%edi
+ psrld $30,%xmm2
+ por %xmm0,%xmm6
+ addl 44(%esp),%edx
+ xorl %ecx,%ebx
+ movdqa 64(%esp),%xmm0
+ movl %edi,%esi
+ roll $5,%edi
+ pslld $2,%xmm1
+ pxor %xmm2,%xmm6
+ andl %ebx,%ebp
+ xorl %ecx,%ebx
+ movdqa 112(%esp),%xmm2
+ xorl %ecx,%ebp
+ addl %edi,%edx
+ pxor %xmm1,%xmm6
+ movdqa %xmm4,%xmm7
+ rorl $7,%eax
+ addl %ebp,%edx
+ addl 48(%esp),%ecx
+ xorl %ebx,%eax
+.byte 102,15,58,15,251,8
+ movdqa %xmm6,%xmm1
+ movl %edx,%ebp
+ roll $5,%edx
+ paddd %xmm6,%xmm2
+ movdqa %xmm3,64(%esp)
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrldq $4,%xmm1
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ pxor %xmm3,%xmm7
+ rorl $7,%edi
+ addl %esi,%ecx
+ pxor %xmm5,%xmm1
+ addl 52(%esp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ pxor %xmm1,%xmm7
+ andl %edi,%ebp
+ xorl %eax,%edi
+ movdqa %xmm2,32(%esp)
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movdqa %xmm7,%xmm3
+ movdqa %xmm7,%xmm1
+ rorl $7,%edx
+ addl %ebp,%ebx
+ addl 56(%esp),%eax
+ xorl %edi,%edx
+ pslldq $12,%xmm3
+ paddd %xmm7,%xmm7
+ movl %ebx,%ebp
+ roll $5,%ebx
+ andl %edx,%esi
+ xorl %edi,%edx
+ psrld $31,%xmm1
+ xorl %edi,%esi
+ addl %ebx,%eax
+ movdqa %xmm3,%xmm2
+ rorl $7,%ecx
+ addl %esi,%eax
+ psrld $30,%xmm3
+ por %xmm1,%xmm7
+ addl 60(%esp),%edi
+ xorl %edx,%ecx
+ movdqa 80(%esp),%xmm1
+ movl %eax,%esi
+ roll $5,%eax
+ pslld $2,%xmm2
+ pxor %xmm3,%xmm7
+ andl %ecx,%ebp
+ xorl %edx,%ecx
+ movdqa 112(%esp),%xmm3
+ xorl %edx,%ebp
+ addl %eax,%edi
+ pxor %xmm2,%xmm7
+ rorl $7,%ebx
+ addl %ebp,%edi
+ movdqa %xmm7,%xmm2
+ addl (%esp),%edx
+ pxor %xmm4,%xmm0
+.byte 102,15,58,15,214,8
+ xorl %ecx,%ebx
+ movl %edi,%ebp
+ roll $5,%edi
+ pxor %xmm1,%xmm0
+ movdqa %xmm4,80(%esp)
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ movdqa %xmm3,%xmm4
+ paddd %xmm7,%xmm3
+ xorl %ecx,%esi
+ addl %edi,%edx
+ pxor %xmm2,%xmm0
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 4(%esp),%ecx
+ xorl %ebx,%eax
+ movdqa %xmm0,%xmm2
+ movdqa %xmm3,48(%esp)
+ movl %edx,%esi
+ roll $5,%edx
+ andl %eax,%ebp
+ xorl %ebx,%eax
+ pslld $2,%xmm0
+ xorl %ebx,%ebp
+ addl %edx,%ecx
+ psrld $30,%xmm2
+ rorl $7,%edi
+ addl %ebp,%ecx
+ addl 8(%esp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%ebp
+ roll $5,%ecx
+ por %xmm2,%xmm0
+ andl %edi,%esi
+ xorl %eax,%edi
+ movdqa 96(%esp),%xmm2
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 12(%esp),%eax
+ movdqa %xmm0,%xmm3
+ xorl %edi,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ andl %edx,%ebp
+ xorl %edi,%edx
+ xorl %edi,%ebp
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %ebp,%eax
+ addl 16(%esp),%edi
+ pxor %xmm5,%xmm1
+.byte 102,15,58,15,223,8
+ xorl %edx,%esi
+ movl %eax,%ebp
+ roll $5,%eax
+ pxor %xmm2,%xmm1
+ movdqa %xmm5,96(%esp)
+ xorl %ecx,%esi
+ addl %eax,%edi
+ movdqa %xmm4,%xmm5
+ paddd %xmm0,%xmm4
+ rorl $7,%ebx
+ addl %esi,%edi
+ pxor %xmm3,%xmm1
+ addl 20(%esp),%edx
+ xorl %ecx,%ebp
+ movl %edi,%esi
+ roll $5,%edi
+ movdqa %xmm1,%xmm3
+ movdqa %xmm4,(%esp)
+ xorl %ebx,%ebp
+ addl %edi,%edx
+ rorl $7,%eax
+ addl %ebp,%edx
+ pslld $2,%xmm1
+ addl 24(%esp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm3
+ movl %edx,%ebp
+ roll $5,%edx
+ xorl %eax,%esi
+ addl %edx,%ecx
+ rorl $7,%edi
+ addl %esi,%ecx
+ por %xmm3,%xmm1
+ addl 28(%esp),%ebx
+ xorl %eax,%ebp
+ movdqa 64(%esp),%xmm3
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %edi,%ebp
+ addl %ecx,%ebx
+ rorl $7,%edx
+ movdqa %xmm1,%xmm4
+ addl %ebp,%ebx
+ addl 32(%esp),%eax
+ pxor %xmm6,%xmm2
+.byte 102,15,58,15,224,8
+ xorl %edi,%esi
+ movl %ebx,%ebp
+ roll $5,%ebx
+ pxor %xmm3,%xmm2
+ movdqa %xmm6,64(%esp)
+ xorl %edx,%esi
+ addl %ebx,%eax
+ movdqa 128(%esp),%xmm6
+ paddd %xmm1,%xmm5
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm4,%xmm2
+ addl 36(%esp),%edi
+ xorl %edx,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ movdqa %xmm2,%xmm4
+ movdqa %xmm5,16(%esp)
+ xorl %ecx,%ebp
+ addl %eax,%edi
+ rorl $7,%ebx
+ addl %ebp,%edi
+ pslld $2,%xmm2
+ addl 40(%esp),%edx
+ xorl %ecx,%esi
+ psrld $30,%xmm4
+ movl %edi,%ebp
+ roll $5,%edi
+ xorl %ebx,%esi
+ addl %edi,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ por %xmm4,%xmm2
+ addl 44(%esp),%ecx
+ xorl %ebx,%ebp
+ movdqa 80(%esp),%xmm4
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%ebp
+ addl %edx,%ecx
+ rorl $7,%edi
+ movdqa %xmm2,%xmm5
+ addl %ebp,%ecx
+ addl 48(%esp),%ebx
+ pxor %xmm7,%xmm3
+.byte 102,15,58,15,233,8
+ xorl %eax,%esi
+ movl %ecx,%ebp
+ roll $5,%ecx
+ pxor %xmm4,%xmm3
+ movdqa %xmm7,80(%esp)
+ xorl %edi,%esi
+ addl %ecx,%ebx
+ movdqa %xmm6,%xmm7
+ paddd %xmm2,%xmm6
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm5,%xmm3
+ addl 52(%esp),%eax
+ xorl %edi,%ebp
+ movl %ebx,%esi
+ roll $5,%ebx
+ movdqa %xmm3,%xmm5
+ movdqa %xmm6,32(%esp)
+ xorl %edx,%ebp
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %ebp,%eax
+ pslld $2,%xmm3
+ addl 56(%esp),%edi
+ xorl %edx,%esi
+ psrld $30,%xmm5
+ movl %eax,%ebp
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%edi
+ rorl $7,%ebx
+ addl %esi,%edi
+ por %xmm5,%xmm3
+ addl 60(%esp),%edx
+ xorl %ecx,%ebp
+ movdqa 96(%esp),%xmm5
+ movl %edi,%esi
+ roll $5,%edi
+ xorl %ebx,%ebp
+ addl %edi,%edx
+ rorl $7,%eax
+ movdqa %xmm3,%xmm6
+ addl %ebp,%edx
+ addl (%esp),%ecx
+ pxor %xmm0,%xmm4
+.byte 102,15,58,15,242,8
+ xorl %ebx,%esi
+ movl %edx,%ebp
+ roll $5,%edx
+ pxor %xmm5,%xmm4
+ movdqa %xmm0,96(%esp)
+ xorl %eax,%esi
+ addl %edx,%ecx
+ movdqa %xmm7,%xmm0
+ paddd %xmm3,%xmm7
+ rorl $7,%edi
+ addl %esi,%ecx
+ pxor %xmm6,%xmm4
+ addl 4(%esp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ movdqa %xmm4,%xmm6
+ movdqa %xmm7,48(%esp)
+ xorl %edi,%ebp
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %ebp,%ebx
+ pslld $2,%xmm4
+ addl 8(%esp),%eax
+ xorl %edi,%esi
+ psrld $30,%xmm6
+ movl %ebx,%ebp
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ por %xmm6,%xmm4
+ addl 12(%esp),%edi
+ xorl %edx,%ebp
+ movdqa 64(%esp),%xmm6
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%ebp
+ addl %eax,%edi
+ rorl $7,%ebx
+ movdqa %xmm4,%xmm7
+ addl %ebp,%edi
+ addl 16(%esp),%edx
+ pxor %xmm1,%xmm5
+.byte 102,15,58,15,251,8
+ xorl %ecx,%esi
+ movl %edi,%ebp
+ roll $5,%edi
+ pxor %xmm6,%xmm5
+ movdqa %xmm1,64(%esp)
+ xorl %ebx,%esi
+ addl %edi,%edx
+ movdqa %xmm0,%xmm1
+ paddd %xmm4,%xmm0
+ rorl $7,%eax
+ addl %esi,%edx
+ pxor %xmm7,%xmm5
+ addl 20(%esp),%ecx
+ xorl %ebx,%ebp
+ movl %edx,%esi
+ roll $5,%edx
+ movdqa %xmm5,%xmm7
+ movdqa %xmm0,(%esp)
+ xorl %eax,%ebp
+ addl %edx,%ecx
+ rorl $7,%edi
+ addl %ebp,%ecx
+ pslld $2,%xmm5
+ addl 24(%esp),%ebx
+ xorl %eax,%esi
+ psrld $30,%xmm7
+ movl %ecx,%ebp
+ roll $5,%ecx
+ xorl %edi,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ por %xmm7,%xmm5
+ addl 28(%esp),%eax
+ xorl %edi,%ebp
+ movdqa 80(%esp),%xmm7
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%ebp
+ addl %ebx,%eax
+ rorl $7,%ecx
+ movdqa %xmm5,%xmm0
+ addl %ebp,%eax
+ movl %ecx,%ebp
+ pxor %xmm2,%xmm6
+.byte 102,15,58,15,196,8
+ xorl %edx,%ecx
+ addl 32(%esp),%edi
+ andl %edx,%ebp
+ pxor %xmm7,%xmm6
+ movdqa %xmm2,80(%esp)
+ andl %ecx,%esi
+ rorl $7,%ebx
+ movdqa %xmm1,%xmm2
+ paddd %xmm5,%xmm1
+ addl %ebp,%edi
+ movl %eax,%ebp
+ pxor %xmm0,%xmm6
+ roll $5,%eax
+ addl %esi,%edi
+ xorl %edx,%ecx
+ addl %eax,%edi
+ movdqa %xmm6,%xmm0
+ movdqa %xmm1,16(%esp)
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 36(%esp),%edx
+ andl %ecx,%esi
+ pslld $2,%xmm6
+ andl %ebx,%ebp
+ rorl $7,%eax
+ psrld $30,%xmm0
+ addl %esi,%edx
+ movl %edi,%esi
+ roll $5,%edi
+ addl %ebp,%edx
+ xorl %ecx,%ebx
+ addl %edi,%edx
+ por %xmm0,%xmm6
+ movl %eax,%ebp
+ xorl %ebx,%eax
+ movdqa 96(%esp),%xmm0
+ addl 40(%esp),%ecx
+ andl %ebx,%ebp
+ andl %eax,%esi
+ rorl $7,%edi
+ addl %ebp,%ecx
+ movdqa %xmm6,%xmm1
+ movl %edx,%ebp
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %edi,%esi
+ xorl %eax,%edi
+ addl 44(%esp),%ebx
+ andl %eax,%esi
+ andl %edi,%ebp
+ rorl $7,%edx
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %ebp,%ebx
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ movl %edx,%ebp
+ pxor %xmm3,%xmm7
+.byte 102,15,58,15,205,8
+ xorl %edi,%edx
+ addl 48(%esp),%eax
+ andl %edi,%ebp
+ pxor %xmm0,%xmm7
+ movdqa %xmm3,96(%esp)
+ andl %edx,%esi
+ rorl $7,%ecx
+ movdqa 144(%esp),%xmm3
+ paddd %xmm6,%xmm2
+ addl %ebp,%eax
+ movl %ebx,%ebp
+ pxor %xmm1,%xmm7
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %edi,%edx
+ addl %ebx,%eax
+ movdqa %xmm7,%xmm1
+ movdqa %xmm2,32(%esp)
+ movl %ecx,%esi
+ xorl %edx,%ecx
+ addl 52(%esp),%edi
+ andl %edx,%esi
+ pslld $2,%xmm7
+ andl %ecx,%ebp
+ rorl $7,%ebx
+ psrld $30,%xmm1
+ addl %esi,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ addl %ebp,%edi
+ xorl %edx,%ecx
+ addl %eax,%edi
+ por %xmm1,%xmm7
+ movl %ebx,%ebp
+ xorl %ecx,%ebx
+ movdqa 64(%esp),%xmm1
+ addl 56(%esp),%edx
+ andl %ecx,%ebp
+ andl %ebx,%esi
+ rorl $7,%eax
+ addl %ebp,%edx
+ movdqa %xmm7,%xmm2
+ movl %edi,%ebp
+ roll $5,%edi
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %edi,%edx
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 60(%esp),%ecx
+ andl %ebx,%esi
+ andl %eax,%ebp
+ rorl $7,%edi
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %ebp,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %edi,%ebp
+ pxor %xmm4,%xmm0
+.byte 102,15,58,15,214,8
+ xorl %eax,%edi
+ addl (%esp),%ebx
+ andl %eax,%ebp
+ pxor %xmm1,%xmm0
+ movdqa %xmm4,64(%esp)
+ andl %edi,%esi
+ rorl $7,%edx
+ movdqa %xmm3,%xmm4
+ paddd %xmm7,%xmm3
+ addl %ebp,%ebx
+ movl %ecx,%ebp
+ pxor %xmm2,%xmm0
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ movdqa %xmm0,%xmm2
+ movdqa %xmm3,48(%esp)
+ movl %edx,%esi
+ xorl %edi,%edx
+ addl 4(%esp),%eax
+ andl %edi,%esi
+ pslld $2,%xmm0
+ andl %edx,%ebp
+ rorl $7,%ecx
+ psrld $30,%xmm2
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %ebp,%eax
+ xorl %edi,%edx
+ addl %ebx,%eax
+ por %xmm2,%xmm0
+ movl %ecx,%ebp
+ xorl %edx,%ecx
+ movdqa 80(%esp),%xmm2
+ addl 8(%esp),%edi
+ andl %edx,%ebp
+ andl %ecx,%esi
+ rorl $7,%ebx
+ addl %ebp,%edi
+ movdqa %xmm0,%xmm3
+ movl %eax,%ebp
+ roll $5,%eax
+ addl %esi,%edi
+ xorl %edx,%ecx
+ addl %eax,%edi
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 12(%esp),%edx
+ andl %ecx,%esi
+ andl %ebx,%ebp
+ rorl $7,%eax
+ addl %esi,%edx
+ movl %edi,%esi
+ roll $5,%edi
+ addl %ebp,%edx
+ xorl %ecx,%ebx
+ addl %edi,%edx
+ movl %eax,%ebp
+ pxor %xmm5,%xmm1
+.byte 102,15,58,15,223,8
+ xorl %ebx,%eax
+ addl 16(%esp),%ecx
+ andl %ebx,%ebp
+ pxor %xmm2,%xmm1
+ movdqa %xmm5,80(%esp)
+ andl %eax,%esi
+ rorl $7,%edi
+ movdqa %xmm4,%xmm5
+ paddd %xmm0,%xmm4
+ addl %ebp,%ecx
+ movl %edx,%ebp
+ pxor %xmm3,%xmm1
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movdqa %xmm1,%xmm3
+ movdqa %xmm4,(%esp)
+ movl %edi,%esi
+ xorl %eax,%edi
+ addl 20(%esp),%ebx
+ andl %eax,%esi
+ pslld $2,%xmm1
+ andl %edi,%ebp
+ rorl $7,%edx
+ psrld $30,%xmm3
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %ebp,%ebx
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ por %xmm3,%xmm1
+ movl %edx,%ebp
+ xorl %edi,%edx
+ movdqa 96(%esp),%xmm3
+ addl 24(%esp),%eax
+ andl %edi,%ebp
+ andl %edx,%esi
+ rorl $7,%ecx
+ addl %ebp,%eax
+ movdqa %xmm1,%xmm4
+ movl %ebx,%ebp
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %edi,%edx
+ addl %ebx,%eax
+ movl %ecx,%esi
+ xorl %edx,%ecx
+ addl 28(%esp),%edi
+ andl %edx,%esi
+ andl %ecx,%ebp
+ rorl $7,%ebx
+ addl %esi,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ addl %ebp,%edi
+ xorl %edx,%ecx
+ addl %eax,%edi
+ movl %ebx,%ebp
+ pxor %xmm6,%xmm2
+.byte 102,15,58,15,224,8
+ xorl %ecx,%ebx
+ addl 32(%esp),%edx
+ andl %ecx,%ebp
+ pxor %xmm3,%xmm2
+ movdqa %xmm6,96(%esp)
+ andl %ebx,%esi
+ rorl $7,%eax
+ movdqa %xmm5,%xmm6
+ paddd %xmm1,%xmm5
+ addl %ebp,%edx
+ movl %edi,%ebp
+ pxor %xmm4,%xmm2
+ roll $5,%edi
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %edi,%edx
+ movdqa %xmm2,%xmm4
+ movdqa %xmm5,16(%esp)
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 36(%esp),%ecx
+ andl %ebx,%esi
+ pslld $2,%xmm2
+ andl %eax,%ebp
+ rorl $7,%edi
+ psrld $30,%xmm4
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %ebp,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ por %xmm4,%xmm2
+ movl %edi,%ebp
+ xorl %eax,%edi
+ movdqa 64(%esp),%xmm4
+ addl 40(%esp),%ebx
+ andl %eax,%ebp
+ andl %edi,%esi
+ rorl $7,%edx
+ addl %ebp,%ebx
+ movdqa %xmm2,%xmm5
+ movl %ecx,%ebp
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ movl %edx,%esi
+ xorl %edi,%edx
+ addl 44(%esp),%eax
+ andl %edi,%esi
+ andl %edx,%ebp
+ rorl $7,%ecx
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %ebp,%eax
+ xorl %edi,%edx
+ addl %ebx,%eax
+ addl 48(%esp),%edi
+ pxor %xmm7,%xmm3
+.byte 102,15,58,15,233,8
+ xorl %edx,%esi
+ movl %eax,%ebp
+ roll $5,%eax
+ pxor %xmm4,%xmm3
+ movdqa %xmm7,64(%esp)
+ xorl %ecx,%esi
+ addl %eax,%edi
+ movdqa %xmm6,%xmm7
+ paddd %xmm2,%xmm6
+ rorl $7,%ebx
+ addl %esi,%edi
+ pxor %xmm5,%xmm3
+ addl 52(%esp),%edx
+ xorl %ecx,%ebp
+ movl %edi,%esi
+ roll $5,%edi
+ movdqa %xmm3,%xmm5
+ movdqa %xmm6,32(%esp)
+ xorl %ebx,%ebp
+ addl %edi,%edx
+ rorl $7,%eax
+ addl %ebp,%edx
+ pslld $2,%xmm3
+ addl 56(%esp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm5
+ movl %edx,%ebp
+ roll $5,%edx
+ xorl %eax,%esi
+ addl %edx,%ecx
+ rorl $7,%edi
+ addl %esi,%ecx
+ por %xmm5,%xmm3
+ addl 60(%esp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %edi,%ebp
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %ebp,%ebx
+ addl (%esp),%eax
+ paddd %xmm3,%xmm7
+ xorl %edi,%esi
+ movl %ebx,%ebp
+ roll $5,%ebx
+ xorl %edx,%esi
+ movdqa %xmm7,48(%esp)
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 4(%esp),%edi
+ xorl %edx,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%ebp
+ addl %eax,%edi
+ rorl $7,%ebx
+ addl %ebp,%edi
+ addl 8(%esp),%edx
+ xorl %ecx,%esi
+ movl %edi,%ebp
+ roll $5,%edi
+ xorl %ebx,%esi
+ addl %edi,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 12(%esp),%ecx
+ xorl %ebx,%ebp
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%ebp
+ addl %edx,%ecx
+ rorl $7,%edi
+ addl %ebp,%ecx
+ movl 196(%esp),%ebp
+ cmpl 200(%esp),%ebp
+ je .L005done
+ movdqa 160(%esp),%xmm7
+ movdqa 176(%esp),%xmm6
+ movdqu (%ebp),%xmm0
+ movdqu 16(%ebp),%xmm1
+ movdqu 32(%ebp),%xmm2
+ movdqu 48(%ebp),%xmm3
+ addl $64,%ebp
+.byte 102,15,56,0,198
+ movl %ebp,196(%esp)
+ movdqa %xmm7,96(%esp)
+ addl 16(%esp),%ebx
+ xorl %eax,%esi
+.byte 102,15,56,0,206
+ movl %ecx,%ebp
+ roll $5,%ecx
+ paddd %xmm7,%xmm0
+ xorl %edi,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ movdqa %xmm0,(%esp)
+ addl 20(%esp),%eax
+ xorl %edi,%ebp
+ psubd %xmm7,%xmm0
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%ebp
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %ebp,%eax
+ addl 24(%esp),%edi
+ xorl %edx,%esi
+ movl %eax,%ebp
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%edi
+ rorl $7,%ebx
+ addl %esi,%edi
+ addl 28(%esp),%edx
+ xorl %ecx,%ebp
+ movl %edi,%esi
+ roll $5,%edi
+ xorl %ebx,%ebp
+ addl %edi,%edx
+ rorl $7,%eax
+ addl %ebp,%edx
+ addl 32(%esp),%ecx
+ xorl %ebx,%esi
+.byte 102,15,56,0,214
+ movl %edx,%ebp
+ roll $5,%edx
+ paddd %xmm7,%xmm1
+ xorl %eax,%esi
+ addl %edx,%ecx
+ rorl $7,%edi
+ addl %esi,%ecx
+ movdqa %xmm1,16(%esp)
+ addl 36(%esp),%ebx
+ xorl %eax,%ebp
+ psubd %xmm7,%xmm1
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %edi,%ebp
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %ebp,%ebx
+ addl 40(%esp),%eax
+ xorl %edi,%esi
+ movl %ebx,%ebp
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%esp),%edi
+ xorl %edx,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%ebp
+ addl %eax,%edi
+ rorl $7,%ebx
+ addl %ebp,%edi
+ addl 48(%esp),%edx
+ xorl %ecx,%esi
+.byte 102,15,56,0,222
+ movl %edi,%ebp
+ roll $5,%edi
+ paddd %xmm7,%xmm2
+ xorl %ebx,%esi
+ addl %edi,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ movdqa %xmm2,32(%esp)
+ addl 52(%esp),%ecx
+ xorl %ebx,%ebp
+ psubd %xmm7,%xmm2
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%ebp
+ addl %edx,%ecx
+ rorl $7,%edi
+ addl %ebp,%ecx
+ addl 56(%esp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%ebp
+ roll $5,%ecx
+ xorl %edi,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%esp),%eax
+ xorl %edi,%ebp
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%ebp
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %ebp,%eax
+ movl 192(%esp),%ebp
+ addl (%ebp),%eax
+ addl 4(%ebp),%esi
+ addl 8(%ebp),%ecx
+ movl %eax,(%ebp)
+ addl 12(%ebp),%edx
+ movl %esi,4(%ebp)
+ addl 16(%ebp),%edi
+ movl %ecx,8(%ebp)
+ movl %esi,%ebx
+ movl %edx,12(%ebp)
+ movl %edi,16(%ebp)
+ movdqa %xmm1,%xmm4
+ jmp .L004loop
+.align 16
+.L005done:
+ addl 16(%esp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%ebp
+ roll $5,%ecx
+ xorl %edi,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 20(%esp),%eax
+ xorl %edi,%ebp
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%ebp
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %ebp,%eax
+ addl 24(%esp),%edi
+ xorl %edx,%esi
+ movl %eax,%ebp
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%edi
+ rorl $7,%ebx
+ addl %esi,%edi
+ addl 28(%esp),%edx
+ xorl %ecx,%ebp
+ movl %edi,%esi
+ roll $5,%edi
+ xorl %ebx,%ebp
+ addl %edi,%edx
+ rorl $7,%eax
+ addl %ebp,%edx
+ addl 32(%esp),%ecx
+ xorl %ebx,%esi
+ movl %edx,%ebp
+ roll $5,%edx
+ xorl %eax,%esi
+ addl %edx,%ecx
+ rorl $7,%edi
+ addl %esi,%ecx
+ addl 36(%esp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %edi,%ebp
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %ebp,%ebx
+ addl 40(%esp),%eax
+ xorl %edi,%esi
+ movl %ebx,%ebp
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%esp),%edi
+ xorl %edx,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%ebp
+ addl %eax,%edi
+ rorl $7,%ebx
+ addl %ebp,%edi
+ addl 48(%esp),%edx
+ xorl %ecx,%esi
+ movl %edi,%ebp
+ roll $5,%edi
+ xorl %ebx,%esi
+ addl %edi,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 52(%esp),%ecx
+ xorl %ebx,%ebp
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%ebp
+ addl %edx,%ecx
+ rorl $7,%edi
+ addl %ebp,%ecx
+ addl 56(%esp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%ebp
+ roll $5,%ecx
+ xorl %edi,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%esp),%eax
+ xorl %edi,%ebp
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%ebp
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %ebp,%eax
+ movl 192(%esp),%ebp
+ addl (%ebp),%eax
+ movl 204(%esp),%esp
+ addl 4(%ebp),%esi
+ addl 8(%ebp),%ecx
+ movl %eax,(%ebp)
+ addl 12(%ebp),%edx
+ movl %esi,4(%ebp)
+ addl 16(%ebp),%edi
+ movl %ecx,8(%ebp)
+ movl %edx,12(%ebp)
+ movl %edi,16(%ebp)
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.size _sha1_block_data_order_ssse3,.-_sha1_block_data_order_ssse3
+.align 64
+.LK_XX_XX:
+.long 1518500249,1518500249,1518500249,1518500249
+.long 1859775393,1859775393,1859775393,1859775393
+.long 2400959708,2400959708,2400959708,2400959708
+.long 3395469782,3395469782,3395469782,3395469782
+.long 66051,67438087,134810123,202182159
.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115
.byte 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82
.byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
.byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.comm OPENSSL_ia32cap_P,8,4
Index: src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/x86cpuid.S
diff -u src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/x86cpuid.S:1.8 src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/x86cpuid.S:1.9
--- src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/x86cpuid.S:1.8 Sat Jan 11 18:20:06 2014
+++ src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/x86cpuid.S Sat May 16 17:32:54 2015
@@ -226,6 +226,18 @@ OPENSSL_wipe_cpu:
movl (%ecx),%ecx
btl $1,(%ecx)
jnc .L015no_x87
+ andl $83886080,%ecx
+ cmpl $83886080,%ecx
+ jne .L016no_sse2
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ pxor %xmm6,%xmm6
+ pxor %xmm7,%xmm7
+.L016no_sse2:
.long 4007259865,4007259865,4007259865,4007259865,2430851995
.L015no_x87:
leal 4(%esp),%eax
@@ -241,11 +253,11 @@ OPENSSL_atomic_add:
pushl %ebx
nop
movl (%edx),%eax
-.L016spin:
+.L017spin:
leal (%eax,%ecx,1),%ebx
nop
.long 447811568
- jne .L016spin
+ jne .L017spin
movl %ebx,%eax
popl %ebx
ret
@@ -286,32 +298,32 @@ OPENSSL_cleanse:
movl 8(%esp),%ecx
xorl %eax,%eax
cmpl $7,%ecx
- jae .L017lot
+ jae .L018lot
cmpl $0,%ecx
- je .L018ret
-.L019little:
+ je .L019ret
+.L020little:
movb %al,(%edx)
subl $1,%ecx
leal 1(%edx),%edx
- jnz .L019little
-.L018ret:
+ jnz .L020little
+.L019ret:
ret
.align 16
-.L017lot:
+.L018lot:
testl $3,%edx
- jz .L020aligned
+ jz .L021aligned
movb %al,(%edx)
leal -1(%ecx),%ecx
leal 1(%edx),%edx
- jmp .L017lot
-.L020aligned:
+ jmp .L018lot
+.L021aligned:
movl %eax,(%edx)
leal -4(%ecx),%ecx
testl $-4,%ecx
leal 4(%edx),%edx
- jnz .L020aligned
+ jnz .L021aligned
cmpl $0,%ecx
- jne .L019little
+ jne .L020little
ret
.size OPENSSL_cleanse,.-.L_OPENSSL_cleanse_begin
.globl OPENSSL_ia32_rdrand
@@ -320,11 +332,11 @@ OPENSSL_cleanse:
OPENSSL_ia32_rdrand:
.L_OPENSSL_ia32_rdrand_begin:
movl $8,%ecx
-.L021loop:
+.L022loop:
.byte 15,199,240
- jc .L022break
- loop .L021loop
-.L022break:
+ jc .L023break
+ loop .L022loop
+.L023break:
cmpl $0,%eax
cmovel %ecx,%eax
ret
Added files:
Index: src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/modes.inc
diff -u /dev/null src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/modes.inc:1.1
--- /dev/null Sat May 16 17:32:54 2015
+++ src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/modes.inc Sat May 16 17:32:54 2015
@@ -0,0 +1,4 @@
+.PATH.S: ${.PARSEDIR}
+MODES_SRCS += ghash-x86.o
+MODESCPPFLAGS = -DGHASH_ASM
+.include "../../modes.inc"