> From e86ec9188319ef08d635da38c2f8bf891e56f995 Mon Sep 17 00:00:00 2001
From: "Yuriy M. Kaminskiy" <yum...@gmail.com>
Date: Wed, 2 Jan 2019 19:41:32 +0300
Subject: [PATCH] Add --enable-fat support for arm neon chacha20

On BCM2837B0 (Cortex-A53) @1.4GHz (Raspberry Pi 3B+),
Before:
`gnutls-cli --benchmark-ciphers`
       CHACHA20-POLY1305 (16384) 51.54 MB/sec
`gnutls-cli --benchmark-tls-ciphers`:
       ECDHE_RSA_CHACHA20_POLY1305 (payload 1400)  21.31 MB/sec
       ECDHE_RSA_CHACHA20_POLY1305 (payload 15360)  24.60 MB/sec
`nettle-benchmark`
		 chacha     encrypt   71.90
		 chacha     decrypt   71.89
	chacha_poly1305     encrypt   48.17
	chacha_poly1305     decrypt   48.17
	chacha_poly1305      update  146.03

After:
`gnutls-cli --benchmark-ciphers`
       CHACHA20-POLY1305 (16384) 68.44 MB/sec
`gnutls-cli --benchmark-tls-ciphers`:
       ECDHE_RSA_CHACHA20_POLY1305 (payload 1400) 27.25 MB/sec
       ECDHE_RSA_CHACHA20_POLY1305 (payload 15360) 32.41 MB/sec
`nettle-benchmark`
		 chacha     encrypt  106.00
		 chacha     decrypt  105.94
	chacha_poly1305     encrypt   65.94
	chacha_poly1305     decrypt   65.96
	chacha_poly1305      update  175.24
---
 arm/fat/chacha-core-internal-2.asm | 37 +++++++++++++++++++++++++++++++++++++
 chacha-core-internal.c             |  7 +++++++
 configure.ac                       |  2 ++
 fat-arm.c                          | 10 ++++++++++
 fat-setup.h                        |  2 ++
 5 files changed, 58 insertions(+)
 create mode 100644 arm/fat/chacha-core-internal-2.asm

P.S. for reference, on same machine,
	 openssl chacha20     encrypt  236.08
	 openssl chacha20     decrypt  236.14
openssl chacha20-poly1305     encrypt  181.26
openssl chacha20-poly1305     decrypt  182.09
openssl chacha20-poly1305      update  782.67
(with openssl 1.1.0j-1~deb9u1 from debian-stretch/armhf)

diff --git a/arm/fat/chacha-core-internal-2.asm b/arm/fat/chacha-core-internal-2.asm
new file mode 100644
index 00000000..66a5c145
--- /dev/null
+++ b/arm/fat/chacha-core-internal-2.asm
@@ -0,0 +1,37 @@
+C arm/fat/chacha-core-internal-2.asm
+
+
+ifelse(<
+   Copyright (C) 2015 Niels Möller
+
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at your
+       option) any later version.
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at your
+       option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+>)
+
+dnl PROLOGUE(_nettle_chacha_core) picked up by configure
+
+define(<fat_transform>, <$1_neon>)
+include_src(<arm/neon/chacha-core-internal.asm>)
diff --git a/chacha-core-internal.c b/chacha-core-internal.c
index af278bb0..0905834e 100644
--- a/chacha-core-internal.c
+++ b/chacha-core-internal.c
@@ -51,6 +51,13 @@
 
 #include "macros.h"
 
+/* For fat builds */
+#if HAVE_NATIVE_chacha_core
+void
+_nettle_chacha_core_c(uint32_t *dst, const uint32_t *src, unsigned rounds);
+#define _nettle_chacha_core  _nettle_chacha_core_c
+#endif
+
 #ifndef CHACHA_DEBUG
 # define CHACHA_DEBUG 0
 #endif
diff --git a/configure.ac b/configure.ac
index 305977d1..3f409fa4 100644
--- a/configure.ac
+++ b/configure.ac
@@ -472,6 +472,7 @@ asm_replace_list="aes-encrypt-internal.asm aes-decrypt-internal.asm \
 # Assembler files which generate additional object files if they are used.
 asm_nettle_optional_list="gcm-hash8.asm cpuid.asm \
   aes-encrypt-internal-2.asm aes-decrypt-internal-2.asm memxor-2.asm \
+  chacha-core-internal-2.asm \
   salsa20-core-internal-2.asm sha1-compress-2.asm sha256-compress-2.asm \
   sha3-permute-2.asm sha512-compress-2.asm \
   umac-nh-n-2.asm umac-nh-2.asm"
@@ -573,6 +574,7 @@ AC_SUBST([IF_ASM])
 AH_VERBATIM([HAVE_NATIVE],
 [/* Define to 1 each of the following for which a native (ie. CPU specific)
     implementation of the corresponding routine exists.  */
+#undef HAVE_NATIVE_chacha_core
 #undef HAVE_NATIVE_ecc_192_modp
 #undef HAVE_NATIVE_ecc_192_redc
 #undef HAVE_NATIVE_ecc_224_modp
diff --git a/fat-arm.c b/fat-arm.c
index 5e656359..56099e6f 100644
--- a/fat-arm.c
+++ b/fat-arm.c
@@ -171,6 +171,10 @@ DECLARE_FAT_FUNC(_nettle_umac_nh_n, umac_nh_n_func)
 DECLARE_FAT_FUNC_VAR(umac_nh_n, umac_nh_n_func, c);
 DECLARE_FAT_FUNC_VAR(umac_nh_n, umac_nh_n_func, neon);
 
+DECLARE_FAT_FUNC(_nettle_chacha_core, chacha_core_func)
+DECLARE_FAT_FUNC_VAR(_chacha_core, chacha_core_func, c);
+DECLARE_FAT_FUNC_VAR(_chacha_core, chacha_core_func, neon);
+
 static void CONSTRUCTOR
 fat_init (void)
 {
@@ -212,6 +216,7 @@ fat_init (void)
       nettle_sha3_permute_vec = _nettle_sha3_permute_neon;
       _nettle_umac_nh_vec = _nettle_umac_nh_neon;
       _nettle_umac_nh_n_vec = _nettle_umac_nh_n_neon;
+      _nettle_chacha_core_vec = _nettle_chacha_core_neon;
     }
   else
     {
@@ -222,6 +227,7 @@ fat_init (void)
       nettle_sha3_permute_vec = _nettle_sha3_permute_c;
       _nettle_umac_nh_vec = _nettle_umac_nh_c;
       _nettle_umac_nh_n_vec = _nettle_umac_nh_n_c;
+      _nettle_chacha_core_vec = _nettle_chacha_core_c;
     }
 }
   
@@ -267,3 +273,7 @@ DEFINE_FAT_FUNC(_nettle_umac_nh_n, void,
 		 unsigned length, const uint8_t *msg),
 		(out, n, key, length, msg))
 
+DEFINE_FAT_FUNC(_nettle_chacha_core, void,
+		(uint32_t *dst, const uint32_t *src, unsigned rounds),
+		(dst, src, rounds))
+
diff --git a/fat-setup.h b/fat-setup.h
index eb7166a7..b623ebf9 100644
--- a/fat-setup.h
+++ b/fat-setup.h
@@ -174,3 +174,5 @@ typedef void sha512_compress_func (uint64_t *state, const uint8_t *input, const
 typedef uint64_t umac_nh_func (const uint32_t *key, unsigned length, const uint8_t *msg);
 typedef void umac_nh_n_func (uint64_t *out, unsigned n, const uint32_t *key,
 			     unsigned length, const uint8_t *msg);
+
+typedef void chacha_core_func(uint32_t *dst, const uint32_t *src, unsigned rounds);
-- 
2.11.0

_______________________________________________
nettle-bugs mailing list
nettle-bugs@lists.lysator.liu.se
http://lists.lysator.liu.se/mailman/listinfo/nettle-bugs

Reply via email to