So I have revived a diff from drahn@/patrick@ to add kernel support
for the FPU/SIMD unit on armv7.  With that diff, it is possible to use
the "NEON" SIMD instructions, even though we're still using the
softfloat ABI.  And it turns out libcrypto has code to detect this and
starts using the SIMD codepaths for some of the assembly-optimized
crypto functions.

Unfortunately those code paths suffer from the same problem as some of
the other armv7-specific assembly code in libcrypto.  They assume that
unaligned access is allowed.  In my first diff, I left the SIMD
codepaths alone, in the hope that they would be allright.  But here is
a diff that disables them when __STRICT_ALIGNMENT is defined.

This does raise the question how viable our approach of not allowing
unaligned access on armv7 really is.  I think all the SoCs we support
include NEON support and for some of the crypto code it provides a
significant performance boost.

Thoughts?


Index: lib/libcrypto/modes/gcm128.c
===================================================================
RCS file: /cvs/src/lib/libcrypto/modes/gcm128.c,v
retrieving revision 1.21
diff -u -p -r1.21 gcm128.c
--- lib/libcrypto/modes/gcm128.c        9 Dec 2017 07:16:51 -0000       1.21
+++ lib/libcrypto/modes/gcm128.c        21 Jan 2018 12:46:16 -0000
@@ -661,7 +661,7 @@ void gcm_ghash_4bit_x86(u64 Xi[2],const 
 #  endif
 # elif defined(__arm__) || defined(__arm)
 #  include "arm_arch.h"
-#  if __ARM_ARCH__>=7
+#  if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
 #   define GHASH_ASM_ARM
 #   define GCM_FUNCREF_4BIT
 void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
Index: lib/libcrypto/modes/asm/ghash-armv4.pl
===================================================================
RCS file: /cvs/src/lib/libcrypto/modes/asm/ghash-armv4.pl,v
retrieving revision 1.2
diff -u -p -r1.2 ghash-armv4.pl
--- lib/libcrypto/modes/asm/ghash-armv4.pl      4 Jan 2017 22:54:05 -0000       
1.2
+++ lib/libcrypto/modes/asm/ghash-armv4.pl      21 Jan 2018 12:46:16 -0000
@@ -319,7 +319,7 @@ sub Dhi()   { shift=~m|q([1]?[0-9])|?"d"
 sub Q()     { shift=~m|d([1-3]?[02468])|?"q".($1/2):""; }
 
 $code.=<<___;
-#if __ARM_ARCH__>=7
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
 .fpu   neon
 
 .global        gcm_gmult_neon
Index: lib/libcrypto/sha/asm/sha512-armv4.pl
===================================================================
RCS file: /cvs/src/lib/libcrypto/sha/asm/sha512-armv4.pl,v
retrieving revision 1.2
diff -u -p -r1.2 sha512-armv4.pl
--- lib/libcrypto/sha/asm/sha512-armv4.pl       7 Jan 2018 12:35:52 -0000       
1.2
+++ lib/libcrypto/sha/asm/sha512-armv4.pl       21 Jan 2018 12:46:16 -0000
@@ -229,7 +229,7 @@ WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c
 sha512_block_data_order:
        sub     r3,pc,#8                @ sha512_block_data_order
        add     $len,$inp,$len,lsl#7    @ len to point at the end of inp
-#if __ARM_ARCH__>=7
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
        ldr     r12,.LOPENSSL_armcap
        ldr     r12,[r3,r12]            @ OPENSSL_armcap_P
        tst     r12,#1
@@ -533,7 +533,7 @@ ___
 }
 
 $code.=<<___;
-#if __ARM_ARCH__>=7
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
 .fpu   neon
 
 .align 4

Reply via email to