On Tue, May 19, 2015 at 03:45:10AM +0200, Timo Buhrmester wrote: > As of late, when building (and installing) -head I end up with a > libcrypto causing SIGILL, apparently due to using the ``pshufb'' > instruction (which I believe is part of the SSE3 extension).
Second version of the patch, covering the other algorithms as well. I just don't know how to trigger some of them. Joerg
Index: aes-x86_64.S =================================================================== RCS file: /home/joerg/repo/netbsd/src/crypto/external/bsd/openssl/lib/libcrypto/arch/x86_64/aes-x86_64.S,v retrieving revision 1.6 diff -u -p -r1.6 aes-x86_64.S --- aes-x86_64.S 4 Aug 2012 11:03:34 -0000 1.6 +++ aes-x86_64.S 21 May 2015 22:26:38 -0000 @@ -1325,12 +1325,12 @@ AES_cbc_encrypt: leaq .LAES_Td(%rip),%r14 .Lcbc_picked_te: - movl OPENSSL_ia32cap_P@GOTPCREL(%rip),%r10d + movq OPENSSL_ia32cap_P@GOTPCREL(%rip),%r10 cmpq $512,%rdx jb .Lcbc_slow_prologue testq $15,%rdx jnz .Lcbc_slow_prologue - btl $28,%r10d + btl $28,(%r10) jc .Lcbc_slow_prologue Index: aesni-sha1-x86_64.S =================================================================== RCS file: /home/joerg/repo/netbsd/src/crypto/external/bsd/openssl/lib/libcrypto/arch/x86_64/aesni-sha1-x86_64.S,v retrieving revision 1.4 diff -u -p -r1.4 aesni-sha1-x86_64.S --- aesni-sha1-x86_64.S 16 May 2015 19:08:37 -0000 1.4 +++ aesni-sha1-x86_64.S 21 May 2015 22:16:55 -0000 @@ -7,14 +7,11 @@ .align 16 aesni_cbc_sha1_enc: - movl OPENSSL_ia32cap_P+0@GOTPCREL(%rip),%r10d - movl OPENSSL_ia32cap_P+4@GOTPCREL(%rip),%r11d - andl $268435456,%r11d - andl $1073741824,%r10d - orl %r11d,%r10d - cmpl $1342177280,%r10d - je aesni_cbc_sha1_enc_avx + movq OPENSSL_ia32cap_P@GOTPCREL(%rip),%r10 + btl $28, 4(%r10) + jc aesni_cbc_sha1_enc_avx jmp aesni_cbc_sha1_enc_ssse3 + .byte 0xf3,0xc3 .size aesni_cbc_sha1_enc,.-aesni_cbc_sha1_enc .type aesni_cbc_sha1_enc_ssse3,@function Index: rc4-x86_64.S =================================================================== RCS file: /home/joerg/repo/netbsd/src/crypto/external/bsd/openssl/lib/libcrypto/arch/x86_64/rc4-x86_64.S,v retrieving revision 1.6 diff -u -p -r1.6 rc4-x86_64.S --- rc4-x86_64.S 4 Aug 2012 11:03:35 -0000 1.6 +++ rc4-x86_64.S 21 May 2015 22:25:35 -0000 @@ -24,7 +24,7 @@ RC4: orq %rsi,%rsi movb -4(%rdi),%cl cmpl $-1,256(%rdi) je .LRC4_CHAR - movl OPENSSL_ia32cap_P@GOTPCREL(%rip),%r8d + movq OPENSSL_ia32cap_P@GOTPCREL(%rip),%r8 xorq %rbx,%rbx incb %r10b subq %r10,%rbx @@ -32,7 +32,7 @@ RC4: orq %rsi,%rsi movl (%rdi,%r10,4),%eax testq $-16,%r11 jz .Lloop1 - btl $30,%r8d + btl $30,(%r8) jc .Lintel andq $7,%rbx leaq 1(%r10),%rsi @@ -531,8 +531,8 @@ private_RC4_set_key: xorq %r10,%r10 xorq %r11,%r11 - movl OPENSSL_ia32cap_P@GOTPCREL(%rip),%r8d - btl $20,%r8d + movq OPENSSL_ia32cap_P@GOTPCREL(%rip),%r8 + btl $20,(%r8) jc .Lc1stloop jmp .Lw1stloop Index: sha1-x86_64.S =================================================================== RCS file: /home/joerg/repo/netbsd/src/crypto/external/bsd/openssl/lib/libcrypto/arch/x86_64/sha1-x86_64.S,v retrieving revision 1.6 diff -u -p -r1.6 sha1-x86_64.S --- sha1-x86_64.S 16 May 2015 19:08:37 -0000 1.6 +++ sha1-x86_64.S 21 May 2015 22:12:42 -0000 @@ -6,15 +6,12 @@ .type sha1_block_data_order,@function .align 16 sha1_block_data_order: - movl OPENSSL_ia32cap_P+0@GOTPCREL(%rip),%r9d - movl OPENSSL_ia32cap_P+4@GOTPCREL(%rip),%r8d - testl $512,%r8d - jz .Lialu - andl $268435456,%r8d - andl $1073741824,%r9d - orl %r9d,%r8d - cmpl $1342177280,%r8d - je _avx_shortcut + movq OPENSSL_ia32cap_P@GOTPCREL(%rip),%r8 + movl 4(%r8), %r8d + btl $9, %r8d + jnc .Lialu + btl $28, %r8d + jc _avx_shortcut jmp _ssse3_shortcut .align 16 Index: x86_64-gf2m.S =================================================================== RCS file: /home/joerg/repo/netbsd/src/crypto/external/bsd/openssl/lib/libcrypto/arch/x86_64/x86_64-gf2m.S,v retrieving revision 1.1 diff -u -p -r1.1 x86_64-gf2m.S --- x86_64-gf2m.S 16 May 2015 22:23:31 -0000 1.1 +++ x86_64-gf2m.S 21 May 2015 22:16:41 -0000 @@ -203,7 +203,7 @@ _mul_1x1: .align 16 bn_GF2m_mul_2x2: movq OPENSSL_ia32cap_P@GOTPCREL(%rip),%rax - btq $33,%rax + btl $1, 4(%rax) jnc .Lvanilla_mul_2x2 .byte 102,72,15,110,198