This adds initial support for assembly crypto acceleration on ELF x86_64
targets.
I got tired of being unable to do good benchmarks due to AES crypto
being so slow, so I split up Wouter's original giant patch, got all the
infrastructure bits out of the way (the big 'update.sh' file is much
smaller now), and reduced it to this.
It's a little different than OpenSSL/OpenBSD's approach in that all the
.s files are generated ahead of time before the tarball is generated.
So, it ships pre-generated .s files. I don't see that as a great burden,
since most of the development on the perl generators happens somewhere
else anyway, and it removes the need for perl to be available on the
host system.
This only enables for the systems we know are ELF (I guess it might be
nice if there was a way to detect ELF through a configure test) and
x86_64 only to start.
---
.gitignore | 4 ++++
configure.ac | 25 ++++++++++++++-------
crypto/Makefile.am | 21 ++++++++++++------
crypto/Makefile.am.elf-x86_64 | 51 +++++++++++++++++++++++++++++++++++++++++++
update.sh | 26 +++++++++++++++++++++-
5 files changed, 111 insertions(+), 16 deletions(-)
create mode 100644 crypto/Makefile.am.elf-x86_64
diff --git a/.gitignore b/.gitignore
index 03ff4da..e964021 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,6 +9,10 @@
# C stuff
*.o
+# Assembly stuff
+*.S
+*.s
+
# Windows stuff
*.obj
*.exe
diff --git a/configure.ac b/configure.ac
index 2cc7477..eaa92fc 100644
--- a/configure.ac
+++ b/configure.ac
@@ -19,10 +19,12 @@ case $host_os in
*freebsd*)
HOST_OS=freebsd;
AC_SUBST([PROG_LDADD], ['-lthr'])
+ HOST_ABI=elf
;;
*linux*)
HOST_OS=linux;
CFLAGS="$CFLAGS -D_DEFAULT_SOURCE -D_BSD_SOURCE -D_POSIX_SOURCE
-D_GNU_SOURCE"
+ HOST_ABI=elf
;;
*solaris*)
HOST_OS=solaris;
@@ -31,6 +33,7 @@ case $host_os in
;;
*openbsd*)
AC_DEFINE([HAVE_ATTRIBUTE__BOUNDED__], [1], [OpenBSD gcc has
bounded])
+ HOST_ABI=elf
;;
*mingw*)
HOST_OS=win
@@ -40,11 +43,11 @@ case $host_os in
*) ;;
esac
-AM_CONDITIONAL(HOST_DARWIN, test x$HOST_OS = xdarwin)
-AM_CONDITIONAL(HOST_FREEBSD, test x$HOST_OS = xfreebsd)
-AM_CONDITIONAL(HOST_LINUX, test x$HOST_OS = xlinux)
-AM_CONDITIONAL(HOST_SOLARIS, test x$HOST_OS = xsolaris)
-AM_CONDITIONAL(HOST_WIN, test x$HOST_OS = xwin)
+AM_CONDITIONAL([HOST_DARWIN], [test x$HOST_OS = xdarwin])
+AM_CONDITIONAL([HOST_FREEBSD], [test x$HOST_OS = xfreebsd])
+AM_CONDITIONAL([HOST_LINUX], [test x$HOST_OS = xlinux])
+AM_CONDITIONAL([HOST_SOLARIS], [test x$HOST_OS = xsolaris])
+AM_CONDITIONAL([HOST_WIN], [test x$HOST_OS = xwin])
AC_CHECK_FUNC([clock_gettime],,
[AC_SEARCH_LIBS([clock_gettime],[rt posix4])])
@@ -52,6 +55,7 @@ AC_CHECK_FUNC([clock_gettime],,
AC_CHECK_FUNC([dl_iterate_phdr],,
[AC_SEARCH_LIBS([dl_iterate_phdr],[dl])])
+AM_PROG_AS
AC_PROG_CC
AC_PROG_LIBTOOL
AC_PROG_CC_STDC
@@ -134,18 +138,23 @@ fi
AC_CHECK_HEADERS([sys/sysctl.h err.h])
AC_ARG_WITH([openssldir],
- AS_HELP_STRING([--with-openssldir], [Set the default openssl
directory]),
+ AS_HELP_STRING([--with-openssldir],
+ [Set the default openssl directory]),
AC_DEFINE_UNQUOTED(OPENSSLDIR, "$withval")
)
AC_ARG_WITH([enginesdir],
- AS_HELP_STRING([--with-enginesdir], [Set the default engines directory
(use with openssldir)]),
+ AS_HELP_STRING([--with-enginesdir],
+ [Set the default engines directory (use with
openssldir)]),
AC_DEFINE_UNQUOTED(ENGINESDIR, "$withval")
)
AC_ARG_ENABLE([asm],
AS_HELP_STRING([--disable-asm], [Disable assembly]))
-AS_IF([test "x$enable_asm" = "xno"], [CFLAGS="$CFLAGS -DOPENSSL_NO_ASM"])
+AM_CONDITIONAL([OPENSSL_NO_ASM], [test "x$enable_asm" = "xno"])
+
+AM_CONDITIONAL([HOST_ABI_X86_64_ELF],
+ [test "x$HOST_ABI" = "xelf" -a "$host_cpu" = "x86_64" -a "x$enable_asm"
!= "xno"])
AC_ARG_ENABLE([libtls],
AS_HELP_STRING([--enable-libtls], [Enable building the libtls library]))
diff --git a/crypto/Makefile.am b/crypto/Makefile.am
index 39b143d..a10ad94 100644
--- a/crypto/Makefile.am
+++ b/crypto/Makefile.am
@@ -100,9 +100,23 @@ noinst_HEADERS += compat/arc4random_solaris.h
noinst_HEADERS += compat/arc4random_win.h
noinst_HEADERS += compat/chacha_private.h
+CLEANFILES =
+BUILT_SOURCES =
libcrypto_la_SOURCES =
EXTRA_libcrypto_la_SOURCES =
+if HOST_ABI_X86_64_ELF
+include Makefile.am.elf-x86_64
+else
+libcrypto_la_SOURCES += aes/aes_cbc.c
+libcrypto_la_SOURCES += aes/aes_core.c
+libcrypto_la_SOURCES += camellia/camellia.c
+libcrypto_la_SOURCES += camellia/cmll_cbc.c
+libcrypto_la_SOURCES += rc4/rc4_enc.c
+libcrypto_la_SOURCES += rc4/rc4_skey.c
+libcrypto_la_SOURCES += whrlpool/wp_block.c
+endif
+
libcrypto_la_SOURCES += cpt_err.c
libcrypto_la_SOURCES += cryptlib.c
libcrypto_la_SOURCES += cversion.c
@@ -118,9 +132,7 @@ noinst_HEADERS += md32_common.h
noinst_HEADERS += o_time.h
# aes
-libcrypto_la_SOURCES += aes/aes_cbc.c
libcrypto_la_SOURCES += aes/aes_cfb.c
-libcrypto_la_SOURCES += aes/aes_core.c
libcrypto_la_SOURCES += aes/aes_ctr.c
libcrypto_la_SOURCES += aes/aes_ecb.c
libcrypto_la_SOURCES += aes/aes_ige.c
@@ -284,8 +296,6 @@ libcrypto_la_SOURCES += buffer/buf_str.c
libcrypto_la_SOURCES += buffer/buffer.c
# camellia
-libcrypto_la_SOURCES += camellia/camellia.c
-libcrypto_la_SOURCES += camellia/cmll_cbc.c
libcrypto_la_SOURCES += camellia/cmll_cfb.c
libcrypto_la_SOURCES += camellia/cmll_ctr.c
libcrypto_la_SOURCES += camellia/cmll_ecb.c
@@ -666,8 +676,6 @@ libcrypto_la_SOURCES += rc2/rc2ofb64.c
noinst_HEADERS += rc2/rc2_locl.h
# rc4
-libcrypto_la_SOURCES += rc4/rc4_enc.c
-libcrypto_la_SOURCES += rc4/rc4_skey.c
noinst_HEADERS += rc4/rc4_locl.h
# ripemd
@@ -739,7 +747,6 @@ libcrypto_la_SOURCES += ui/ui_util.c
noinst_HEADERS += ui/ui_locl.h
# whrlpool
-libcrypto_la_SOURCES += whrlpool/wp_block.c
libcrypto_la_SOURCES += whrlpool/wp_dgst.c
noinst_HEADERS += whrlpool/wp_locl.h
diff --git a/crypto/Makefile.am.elf-x86_64 b/crypto/Makefile.am.elf-x86_64
new file mode 100644
index 0000000..6b718f5
--- /dev/null
+++ b/crypto/Makefile.am.elf-x86_64
@@ -0,0 +1,51 @@
+SSLASM =
+PERLASM_SCHEME = elf
+
+#aes
+libcrypto_la_CFLAGS += -DAES_ASM
+SSLASM += aes/aes-elf-x86_64.s
+libcrypto_la_CFLAGS += -DBSAES_ASM
+SSLASM += aes/bsaes-elf-x86_64.s
+libcrypto_la_CFLAGS += -DVPAES_ASM
+SSLASM += aes/vpaes-elf-x86_64.s
+SSLASM += aes/aesni-elf-x86_64.s
+SSLASM += aes/aesni-sha1-elf-x86_64.s
+# bn
+libcrypto_la_CFLAGS += -DOPENSSL_IA32_SSE2
+SSLASM += bn/modexp512-elf-x86_64.s
+libcrypto_la_CFLAGS += -DOPENSSL_BN_ASM_MONT
+SSLASM += bn/mont-elf-x86_64.s
+libcrypto_la_CFLAGS += -DOPENSSL_BN_ASM_MONT5
+SSLASM += bn/mont5-elf-x86_64.s
+libcrypto_la_CFLAGS += -DOPENSSL_BN_ASM_GF2m
+SSLASM += bn/gf2m-elf-x86_64.s
+# camellia
+SSLASM += camellia/cmll-elf-x86_64.s
+# md5
+libcrypto_la_CFLAGS += -DMD5_ASM
+SSLASM += md5/md5-elf-x86_64.s
+# modes
+libcrypto_la_CFLAGS += -DGHASH_ASM
+SSLASM += modes/ghash-elf-x86_64.s
+# rc4
+SSLASM += rc4/rc4-elf-x86_64.s
+SSLASM += rc4/rc4-md5-elf-x86_64.s
+# rsa
+libcrypto_la_CFLAGS += -DRSA_ASM
+# sha
+libcrypto_la_CFLAGS += -DSHA1_ASM
+SSLASM += sha/sha1-elf-x86_64.s
+libcrypto_la_CFLAGS += -DSHA256_ASM
+SSLASM += sha/sha256-elf-x86_64.S
+libcrypto_la_CFLAGS += -DSHA512_ASM
+SSLASM += sha/sha512-elf-x86_64.S
+# whrlpool
+libcrypto_la_CFLAGS += -DWHIRLPOOL_ASM
+SSLASM += whrlpool/wp-elf-x86_64.s
+
+libcrypto_la_CFLAGS += -DOPENSSL_CPUID_OBJ
+SSLASM += cpuid-elf-x86_64.S
+
+libcrypto_la_SOURCES += $(SSLASM)
+CLEANFILES += $(SSLASM)
+BUILT_SOURCES += $(SSLASM)
diff --git a/update.sh b/update.sh
index 47e2e22..8ed28d6 100755
--- a/update.sh
+++ b/update.sh
@@ -16,7 +16,7 @@ fi
git checkout $openbsd_branch
git pull --rebase)
-# setup source paths
+# setup source paths
dir=`pwd`
libc_src=$dir/openbsd/src/lib/libc
libc_regress=$dir/openbsd/src/regress/lib/libc
@@ -106,6 +106,30 @@ done
$CP crypto/compat/b_win.c crypto/bio
$CP crypto/compat/ui_openssl_win.c crypto/ui
+# generate assembly crypto algorithms
+asm_src=$libssl_src/src/crypto
+for abi in elf; do
+ perl $asm_src/aes/asm/aes-x86_64.pl $abi >
crypto/aes/aes-${abi}-x86_64.s
+ perl $asm_src/aes/asm/vpaes-x86_64.pl $abi >
crypto/aes/vpaes-${abi}-x86_64.s
+ perl $asm_src/aes/asm/bsaes-x86_64.pl $abi >
crypto/aes/bsaes-${abi}-x86_64.s
+ perl $asm_src/aes/asm/aesni-x86_64.pl $abi >
crypto/aes/aesni-${abi}-x86_64.s
+ perl $asm_src/aes/asm/aesni-sha1-x86_64.pl $abi >
crypto/aes/aesni-sha1-${abi}-x86_64.s
+ perl $asm_src/bn/asm/modexp512-x86_64.pl $abi >
crypto/bn/modexp512-${abi}-x86_64.s
+ perl $asm_src/bn/asm/x86_64-mont.pl $abi >
crypto/bn/mont-${abi}-x86_64.s
+ perl $asm_src/bn/asm/x86_64-mont5.pl $abi >
crypto/bn/mont5-${abi}-x86_64.s
+ perl $asm_src/bn/asm/x86_64-gf2m.pl $abi >
crypto/bn/gf2m-${abi}-x86_64.s
+ perl $asm_src/camellia/asm/cmll-x86_64.pl $abi >
crypto/camellia/cmll-${abi}-x86_64.s
+ perl $asm_src/md5/asm/md5-x86_64.pl $abi >
crypto/md5/md5-${abi}-x86_64.s
+ perl $asm_src/modes/asm/ghash-x86_64.pl $abi >
crypto/modes/ghash-${abi}-x86_64.s
+ perl $asm_src/rc4/asm/rc4-x86_64.pl $abi >
crypto/rc4/rc4-${abi}-x86_64.s
+ perl $asm_src/rc4/asm/rc4-md5-x86_64.pl $abi >
crypto/rc4/rc4-md5-${abi}-x86_64.s
+ perl $asm_src/sha/asm/sha1-x86_64.pl $abi >
crypto/sha/sha1-${abi}-x86_64.s
+ perl $asm_src/sha/asm/sha512-x86_64.pl $abi
crypto/sha/sha256-${abi}-x86_64.S
+ perl $asm_src/sha/asm/sha512-x86_64.pl $abi
crypto/sha/sha512-${abi}-x86_64.S
+ perl $asm_src/whrlpool/asm/wp-x86_64.pl $abi >
crypto/whrlpool/wp-${abi}-x86_64.s
+ perl $asm_src/x86_64cpuid.pl $abi
crypto/cpuid-${abi}-x86_64.S
+done
+
# copy libtls source
rm -f tls/*.c tls/*.h
for i in `awk '/SOURCES|HEADERS/ { print $3 }' tls/Makefile.am` ; do
--
2.2.0