On a SPARC-T4, with AES opcodes disabled (OPENSSL_sparcv9cap=0): type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes aes-128 cbc 75200.21k 83425.11k 86767.67k 87853.06k 88279.72k aes-192 cbc 64906.68k 71059.56k 73902.42k 74532.52k 74855.77k aes-256 cbc 56814.90k 61781.72k 63903.74k 64367.27k 64607.57k
And with them enabled: type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes aes-128 cbc 501882.74k 836726.87k 993102.76k 1020379.48k 1054083.75k aes-192 cbc 435068.22k 707080.77k 837915.90k 864243.03k 889279.83k aes-256 cbc 393746.28k 620463.13k 727483.31k 749580.97k 769029.46k This system is a T4-2 so it's fun to show off some parallel benchmarks, for example "openssl speed -multi 16 -evp aes-128-ecb" gives: type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes evp 7429568.93k 17815630.93k 28436597.93k 32033047.55k 35120630.44k 35GB/sec AES encryption, not too bad. Currently CBC, ECB, CTR, OFB, and CFB modes are explicitly optimized. Other modes will be optimized in the future. Signed-off-by: David S. Miller <da...@davemloft.net> --- Configure | 2 +- crypto/aes/aes_sparccore.c | 55 ++++ crypto/aes/asm/aes-sparcv9.pl | 666 +++++++++++++++++++++++++++++++++++++++++ crypto/evp/e_aes.c | 400 +++++++++++++++++++++++++ crypto/sparc_arch.h | 19 ++ 5 files changed, 1141 insertions(+), 1 deletion(-) diff --git a/Configure b/Configure index 66b4ff8..217a552 100755 --- a/Configure +++ b/Configure @@ -130,7 +130,7 @@ my $x86_elf_asm="$x86_asm:elf"; my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghash-x86_64.o:e_padlock-x86_64.o"; my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::ghash-ia64.o::void"; -my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_sparccore.o aes_cbc.o aes-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void"; +my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_sparccore.o aes-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void"; my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::void"; my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-alpha.o::void"; my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o sha512-mips.o::::::::"; diff --git a/crypto/aes/aes_sparccore.c b/crypto/aes/aes_sparccore.c index 2842cbc..658cc66 100644 --- a/crypto/aes/aes_sparccore.c +++ b/crypto/aes/aes_sparccore.c @@ -36,6 +36,7 @@ #include <stdlib.h> #include <openssl/crypto.h> #include <openssl/aes.h> +#include <openssl/modes.h> #include "aes_locl.h" #include "sparc_arch.h" @@ -270,3 +271,57 @@ int AES_set_decrypt_key(const unsigned char *userKey, const int bits, } return 0; } + +void aes_sparc_hw_cbc_encrypt(const unsigned char *in, unsigned char *out, + size_t length, const AES_KEY *key, + unsigned char *ivec, int enc); + +void AES_cbc_encrypt(const unsigned char *in, unsigned char *out, + size_t len, const AES_KEY *key, + unsigned char *ivec, const int enc) +{ + const void *aligned_in; + void *aligned_out; + int aligned_len; + size_t bl = 16; + + if (!(OPENSSL_sparcv9cap_P & SPARCV9_AES)) + goto slow; + + aligned_len = len & ~(bl - 1); + if (!aligned_len) + goto trailing; + + aligned_out = out; + if ((unsigned long) out & 0x7) { + aligned_out = OPENSSL_malloc(aligned_len); + if (!aligned_out) + goto slow; + } + aligned_in = in; + if ((unsigned long)in & 0x7) { + memcpy(aligned_out, in, aligned_len); + aligned_in = (const void *) aligned_out; + } + + aes_sparc_hw_cbc_encrypt(aligned_in, aligned_out, aligned_len, + key, ivec, enc); + + if ((unsigned long)out & 0x7) { + memcpy(out, aligned_out, aligned_len); + OPENSSL_free(aligned_out); + } +trailing: + len -= aligned_len; + if (len) { + out += aligned_len; + in += aligned_len; +slow: + if (enc) + CRYPTO_cbc128_encrypt(in, out, len, key, ivec, + (block128_f)AES_encrypt); + else + CRYPTO_cbc128_decrypt(in, out, len, key, ivec, + (block128_f)AES_decrypt); + } +} diff --git a/crypto/aes/asm/aes-sparcv9.pl b/crypto/aes/asm/aes-sparcv9.pl index f022b7b..0d47bdb 100755 --- a/crypto/aes/asm/aes-sparcv9.pl +++ b/crypto/aes/asm/aes-sparcv9.pl @@ -1651,6 +1651,672 @@ aes_sparc_hw_expand_key: .type aes_sparc_hw_expand_key,#function .size aes_sparc_hw_expand_key,(.-aes_sparc_hw_expand_key) +#define SETUP_KEY_AND_ROUNDS(KEY, ROUNDS, TMP1, TMP2) \\ + andcc %KEY, 0x4, %TMP1; \\ + mov %KEY, %TMP2; \\ + add %KEY, 240, %ROUNDS; \\ + movne %icc, %TMP2, %ROUNDS; \\ + add %KEY, %TMP1, %KEY; \\ + ld [%ROUNDS], %ROUNDS; + +#define LOAD_ENCRYPT_KEY_128(KEY) \\ + ldd [%KEY + 0x10], %f8; \\ + ldd [%KEY + 0x18], %f10; \\ + ldd [%KEY + 0x20], %f12; \\ + ldd [%KEY + 0x28], %f14; \\ + ldd [%KEY + 0x30], %f16; \\ + ldd [%KEY + 0x38], %f18; \\ + ldd [%KEY + 0x40], %f20; \\ + ldd [%KEY + 0x48], %f22; \\ + ldd [%KEY + 0x50], %f24; \\ + ldd [%KEY + 0x58], %f26; \\ + ldd [%KEY + 0x60], %f28; \\ + ldd [%KEY + 0x68], %f30; \\ + ldd [%KEY + 0x70], %f32; \\ + ldd [%KEY + 0x78], %f34; \\ + ldd [%KEY + 0x80], %f36; \\ + ldd [%KEY + 0x88], %f38; \\ + ldd [%KEY + 0x90], %f40; \\ + ldd [%KEY + 0x98], %f42; \\ + ldd [%KEY + 0xa0], %f44; \\ + ldd [%KEY + 0xa8], %f46; + +#define LOAD_ENCRYPT_KEY_192(KEY) \\ + LOAD_ENCRYPT_KEY_128(KEY) \\ + ldd [%KEY + 0xb0], %f48; \\ + ldd [%KEY + 0xb8], %f50; \\ + ldd [%KEY + 0xc0], %f52; \\ + ldd [%KEY + 0xc8], %f54; + +#define LOAD_ENCRYPT_KEY_256(KEY) \\ + LOAD_ENCRYPT_KEY_192(KEY) \\ + ldd [%KEY + 0xd0], %f56; \\ + ldd [%KEY + 0xd8], %f58; \\ + ldd [%KEY + 0xe0], %f60; \\ + ldd [%KEY + 0xe8], %f62; + +#define LOAD_DECRYPT_KEY_128(KEY) \\ + ldd [%KEY + 0x18], %f8; \\ + ldd [%KEY + 0x10], %f10; \\ + ldd [%KEY + 0x28], %f12; \\ + ldd [%KEY + 0x20], %f14; \\ + ldd [%KEY + 0x38], %f16; \\ + ldd [%KEY + 0x30], %f18; \\ + ldd [%KEY + 0x48], %f20; \\ + ldd [%KEY + 0x40], %f22; \\ + ldd [%KEY + 0x58], %f24; \\ + ldd [%KEY + 0x50], %f26; \\ + ldd [%KEY + 0x68], %f28; \\ + ldd [%KEY + 0x60], %f30; \\ + ldd [%KEY + 0x78], %f32; \\ + ldd [%KEY + 0x70], %f34; \\ + ldd [%KEY + 0x88], %f36; \\ + ldd [%KEY + 0x80], %f38; \\ + ldd [%KEY + 0x98], %f40; \\ + ldd [%KEY + 0x90], %f42; \\ + ldd [%KEY + 0xa8], %f44; \\ + ldd [%KEY + 0xa0], %f46; + +#define LOAD_DECRYPT_KEY_192(KEY) \\ + LOAD_DECRYPT_KEY_128(KEY) \\ + ldd [%KEY + 0xb8], %f48; \\ + ldd [%KEY + 0xb0], %f50; \\ + ldd [%KEY + 0xc8], %f52; \\ + ldd [%KEY + 0xc0], %f54; + +#define LOAD_DECRYPT_KEY_256(KEY) \\ + LOAD_DECRYPT_KEY_192(KEY) \\ + ldd [%KEY + 0xd8], %f56; \\ + ldd [%KEY + 0xd0], %f58; \\ + ldd [%KEY + 0xe8], %f60; \\ + ldd [%KEY + 0xe0], %f62; + +#define ENCRYPT_192(KEY_BASE, I0, I1, T0, T1) \\ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \\ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \\ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \\ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \\ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \\ + ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1) + +#define ENCRYPT_256(KEY_BASE, I0, I1, T0, T1) \\ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \\ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \\ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \\ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \\ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \\ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \\ + ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1) + +#define DECRYPT_192(KEY_BASE, I0, I1, T0, T1) \\ + DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \\ + DECRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \\ + DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \\ + DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \\ + DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \\ + DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1) + +#define DECRYPT_256(KEY_BASE, I0, I1, T0, T1) \\ + DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \\ + DECRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \\ + DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \\ + DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \\ + DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \\ + DECRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \\ + DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1) + +#define ENCRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \\ + AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \\ + AES_EROUND23(KEY_BASE + 2, I0, I1, T1) \\ + AES_EROUND01(KEY_BASE + 0, I2, I3, T2) \\ + AES_EROUND23(KEY_BASE + 2, I2, I3, T3) \\ + AES_EROUND01(KEY_BASE + 4, T0, T1, I0) \\ + AES_EROUND23(KEY_BASE + 6, T0, T1, I1) \\ + AES_EROUND01(KEY_BASE + 4, T2, T3, I2) \\ + AES_EROUND23(KEY_BASE + 6, T2, T3, I3) + +#define ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \\ + AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \\ + AES_EROUND23(KEY_BASE + 2, I0, I1, T1) \\ + AES_EROUND01(KEY_BASE + 0, I2, I3, T2) \\ + AES_EROUND23(KEY_BASE + 2, I2, I3, T3) \\ + AES_EROUND01_L(KEY_BASE + 4, T0, T1, I0) \\ + AES_EROUND23_L(KEY_BASE + 6, T0, T1, I1) \\ + AES_EROUND01_L(KEY_BASE + 4, T2, T3, I2) \\ + AES_EROUND23_L(KEY_BASE + 6, T2, T3, I3) + +#define ENCRYPT_128_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \\ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \\ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \\ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \\ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \\ + ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) + +#define ENCRYPT_192_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \\ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \\ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \\ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \\ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \\ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) \\ + ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 40, I0, I1, I2, I3, T0, T1, T2, T3) + +#define ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, TMP_BASE) \\ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, \\ + TMP_BASE + 0, TMP_BASE + 2, TMP_BASE + 4, TMP_BASE + 6) + +#define ENCRYPT_256_2(KEY_BASE, I0, I1, I2, I3, IGN0, IGN1, IGN2, IGN3) \\ + ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, KEY_BASE + 48) \\ + ldd [%o3 + 0xd0], %f56; \\ + ldd [%o3 + 0xd8], %f58; \\ + ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, KEY_BASE + 0) \\ + ldd [%o3 + 0xe0], %f60; \\ + ldd [%o3 + 0xe8], %f62; \\ + ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, KEY_BASE + 0) \\ + ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, KEY_BASE + 0) \\ + ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, KEY_BASE + 0) \\ + ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 40, I0, I1, I2, I3, KEY_BASE + 0) \\ + AES_EROUND01(KEY_BASE + 48, I0, I1, KEY_BASE + 0) \\ + AES_EROUND23(KEY_BASE + 50, I0, I1, KEY_BASE + 2) \\ + AES_EROUND01(KEY_BASE + 48, I2, I3, KEY_BASE + 4) \\ + AES_EROUND23(KEY_BASE + 50, I2, I3, KEY_BASE + 6) \\ + AES_EROUND01_L(KEY_BASE + 52, KEY_BASE + 0, KEY_BASE + 2, I0) \\ + AES_EROUND23_L(KEY_BASE + 54, KEY_BASE + 0, KEY_BASE + 2, I1) \\ + ldd [%o3 + 0x10], %f8; \\ + ldd [%o3 + 0x18], %f10; \\ + AES_EROUND01_L(KEY_BASE + 52, KEY_BASE + 4, KEY_BASE + 6, I2) \\ + AES_EROUND23_L(KEY_BASE + 54, KEY_BASE + 4, KEY_BASE + 6, I3) \\ + ldd [%o3 + 0x20], %f12; \\ + ldd [%o3 + 0x28], %f14; + +#define DECRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \\ + AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \\ + AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \\ + AES_DROUND23(KEY_BASE + 0, I2, I3, T3) \\ + AES_DROUND01(KEY_BASE + 2, I2, I3, T2) \\ + AES_DROUND23(KEY_BASE + 4, T0, T1, I1) \\ + AES_DROUND01(KEY_BASE + 6, T0, T1, I0) \\ + AES_DROUND23(KEY_BASE + 4, T2, T3, I3) \\ + AES_DROUND01(KEY_BASE + 6, T2, T3, I2) + +#define DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \\ + AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \\ + AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \\ + AES_DROUND23(KEY_BASE + 0, I2, I3, T3) \\ + AES_DROUND01(KEY_BASE + 2, I2, I3, T2) \\ + AES_DROUND23_L(KEY_BASE + 4, T0, T1, I1) \\ + AES_DROUND01_L(KEY_BASE + 6, T0, T1, I0) \\ + AES_DROUND23_L(KEY_BASE + 4, T2, T3, I3) \\ + AES_DROUND01_L(KEY_BASE + 6, T2, T3, I2) + +#define DECRYPT_128_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \\ + DECRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \\ + DECRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \\ + DECRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \\ + DECRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \\ + DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) + +#define DECRYPT_192_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \\ + DECRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \\ + DECRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \\ + DECRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \\ + DECRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \\ + DECRYPT_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) \\ + DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 40, I0, I1, I2, I3, T0, T1, T2, T3) + +#define DECRYPT_256_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, TMP_BASE) \\ + DECRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, \\ + TMP_BASE + 0, TMP_BASE + 2, TMP_BASE + 4, TMP_BASE + 6) + +#define DECRYPT_256_2(KEY_BASE, I0, I1, I2, I3, IGN0, IGN1, IGN2, IGN3) \\ + DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, KEY_BASE + 48) \\ + ldd [%o3 + 0xd8], %f56; \\ + ldd [%o3 + 0xd0], %f58; \\ + DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, KEY_BASE + 0) \\ + ldd [%o3 + 0xe8], %f60; \\ + ldd [%o3 + 0xe0], %f62; \\ + DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, KEY_BASE + 0) \\ + DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, KEY_BASE + 0) \\ + DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, KEY_BASE + 0) \\ + DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 40, I0, I1, I2, I3, KEY_BASE + 0) \\ + AES_DROUND23(KEY_BASE + 48, I0, I1, KEY_BASE + 2) \\ + AES_DROUND01(KEY_BASE + 50, I0, I1, KEY_BASE + 0) \\ + AES_DROUND23(KEY_BASE + 48, I2, I3, KEY_BASE + 6) \\ + AES_DROUND01(KEY_BASE + 50, I2, I3, KEY_BASE + 4) \\ + AES_DROUND23_L(KEY_BASE + 52, KEY_BASE + 0, KEY_BASE + 2, I1) \\ + AES_DROUND01_L(KEY_BASE + 54, KEY_BASE + 0, KEY_BASE + 2, I0) \\ + ldd [%o3 + 0x18], %f8; \\ + ldd [%o3 + 0x10], %f10; \\ + AES_DROUND23_L(KEY_BASE + 52, KEY_BASE + 4, KEY_BASE + 6, I3) \\ + AES_DROUND01_L(KEY_BASE + 54, KEY_BASE + 4, KEY_BASE + 6, I2) \\ + ldd [%o3 + 0x28], %f12; \\ + ldd [%o3 + 0x20], %f14; + +#define ECB_CRYPT_IMPL(KEYLEN, ED, KEY, SRC, DST, LEN) \\ + LOAD_##ED##CRYPT_KEY_##KEYLEN##(KEY); \\ + subcc %LEN, 0x10, %LEN; \\ + be 10f; \\ + nop; \\ +.Lecb_##ED##crypt_##KEYLEN##_loop: \\ + ldx [%SRC + 0x00], %g3; \\ + ldx [%SRC + 0x08], %g5; \\ + ldx [%SRC + 0x10], %o4; \\ + ldx [%SRC + 0x18], %o5; \\ + xor %g1, %g3, %g3; \\ + xor %g2, %g5, %g5; \\ + MOVXTOD_G3_F4; \\ + MOVXTOD_G5_F6; \\ + xor %g1, %o4, %g3; \\ + xor %g2, %o5, %g5; \\ + MOVXTOD_G3_F0; \\ + MOVXTOD_G5_F2; \\ + ##ED##CRYPT_##KEYLEN##_2(8, 4, 6, 0, 2, 56, 58, 60, 62);\\ + std %f4, [%DST + 0x00]; \\ + std %f6, [%DST + 0x08]; \\ + std %f0, [%DST + 0x10]; \\ + std %f2, [%DST + 0x18]; \\ + subcc %LEN, 0x20, %LEN; \\ + add %SRC, 0x20, %SRC; \\ + bgt,pt %icc, .Lecb_##ED##crypt_##KEYLEN##_loop;\\ + add %DST, 0x20, %DST; \\ + blt,pt %icc, 11f; \\ + nop; \\ +10: ldx [%SRC + 0x00], %g3; \\ + ldx [%SRC + 0x08], %g5; \\ + xor %g1, %g3, %g3; \\ + xor %g2, %g5, %g5; \\ + MOVXTOD_G3_F4; \\ + MOVXTOD_G5_F6; \\ + ##ED##CRYPT_##KEYLEN##(8, 4, 6, 0, 2); \\ + std %f4, [%DST + 0x00]; \\ + std %f6, [%DST + 0x08]; \\ +11: retl; \\ + nop; + + .align 32 + .globl aes_sparc_hw_ecb_encrypt +aes_sparc_hw_ecb_encrypt: + /* %o0=in, %o1=out, %o2=len, %o3=KEY, %o4=enc */ + SETUP_KEY_AND_ROUNDS(o3, g1, g2, g3) + cmp %o4, 0 + be .Lecb_decrypt + cmp %g1, 12 + + ldx [%o3 + 0x00], %g1 + bl .Lecb_encrypt_128 + ldx [%o3 + 0x08], %g2 + be .Lecb_encrypt_192 + nop + + ECB_CRYPT_IMPL(256, EN, o3, o0, o1, o2) + +.Lecb_encrypt_192: + ECB_CRYPT_IMPL(192, EN, o3, o0, o1, o2) + +.Lecb_encrypt_128: + ECB_CRYPT_IMPL(128, EN, o3, o0, o1, o2) + +.Lecb_decrypt: + ldx [%o3 + 0x00], %g1 + bl .Lecb_decrypt_128 + ldx [%o3 + 0x08], %g2 + be .Lecb_decrypt_192 + nop + + ECB_CRYPT_IMPL(256, DE, o3, o0, o1, o2) + +.Lecb_decrypt_192: + ECB_CRYPT_IMPL(192, DE, o3, o0, o1, o2) + +.Lecb_decrypt_128: + ECB_CRYPT_IMPL(128, DE, o3, o0, o1, o2) + .type aes_sparc_hw_ecb_encrypt,#function + .size aes_sparc_hw_ecb_encrypt,(.-aes_sparc_hw_ecb_encrypt) + +#define CBC_ENCRYPT_IMPL(KEYLEN, KEY, SRC, DST, IV, LEN)\\ + LOAD_ENCRYPT_KEY_##KEYLEN##(KEY); \\ +.Lcbc_ENcrypt_##KEYLEN##_loop: \\ + ldx [%SRC + 0x00], %g3; \\ + ldx [%SRC + 0x08], %g5; \\ + add %SRC, 0x10, %SRC; \\ + subcc %LEN, 0x10, %LEN; \\ + add %DST, 0x10, %DST; \\ + xor %g1, %g3, %g3; \\ + xor %g2, %g5, %g5; \\ + MOVXTOD_G3_F0; \\ + MOVXTOD_G5_F2; \\ + fxor %f4, %f0, %f4; \\ + fxor %f6, %f2, %f6; \\ + ENCRYPT_##KEYLEN##(8, 4, 6, 0, 2); \\ + std %f4, [%DST - 0x10]; \\ + bne .Lcbc_ENcrypt_##KEYLEN##_loop; \\ + std %f6, [%DST - 0x08]; \\ + st %f4, [%IV + 0x00]; \\ + st %f5, [%IV + 0x04]; \\ + st %f6, [%IV + 0x08]; \\ + retl; \\ + st %f7, [%IV + 0x0c]; + +#define CBC_DECRYPT_IMPL(KEYLEN, KEY, SRC, DST, IV, LEN)\\ + LOAD_DECRYPT_KEY_##KEYLEN##(KEY); \\ + ld [%IV + 0x00], %o3; \\ + ld [%IV + 0x04], %g3; \\ + sllx %o3, 32, %o3; \\ + or %o3, %g3, %o3; \\ + ld [%IV + 0x08], %o5; \\ + ld [%IV + 0x0c], %g3; \\ + sllx %o5, 32, %o5; \\ + or %o5, %g3, %o5; \\ +.Lcbc_DEcrypt_##KEYLEN##_loop: \\ + ldx [%SRC + 0x00], %g3; \\ + ldx [%SRC + 0x08], %g5; \\ + add %SRC, 0x10, %SRC; \\ + subcc %LEN, 0x10, %LEN; \\ + add %DST, 0x10, %DST; \\ + xor %g1, %g3, %g3; \\ + xor %g2, %g5, %g5; \\ + MOVXTOD_G3_F4; \\ + MOVXTOD_G5_F6; \\ + DECRYPT_##KEYLEN##(8, 4, 6, 0, 2); \\ + MOVXTOD_O3_F0; \\ + MOVXTOD_O5_F2; \\ + xor %g1, %g3, %o3; \\ + xor %g2, %g5, %o5; \\ + fxor %f4, %f0, %f4; \\ + fxor %f6, %f2, %f6; \\ + std %f4, [%DST - 0x10]; \\ + bne,pt %icc, .Lcbc_DEcrypt_##KEYLEN##_loop; \\ + std %f6, [%DST - 0x08]; \\ + srlx %o3, 32, %g1; \\ + st %g1, [%IV + 0x00]; \\ + srlx %o5, 32, %g2; \\ + st %o3, [%IV + 0x04]; \\ + st %g2, [%IV + 0x08]; \\ + retl; \\ + st %o5, [%IV + 0x0c]; + + .align 32 + .globl aes_sparc_hw_cbc_encrypt +aes_sparc_hw_cbc_encrypt: + /* %o0=in, %o1=out, %o2=len, %o3=KEY, %o4=IV, %o5=enc */ + SETUP_KEY_AND_ROUNDS(o3, g3, g1, g2) + ldx [%o3 + 0x00], %g1 + cmp %o5, 0 + ldx [%o3 + 0x08], %g2 + be .Lcbc_decrypt + cmp %g3, 12 + + ld [%o4 + 0x00], %f4 + ld [%o4 + 0x04], %f5 + ld [%o4 + 0x08], %f6 + bl .Lcbc_encrypt_128 + ld [%o4 + 0x0c], %f7 + be .Lcbc_encrypt_192 + nop + + CBC_ENCRYPT_IMPL(256, o3, o0, o1, o4, o2) + +.Lcbc_encrypt_192: + CBC_ENCRYPT_IMPL(192, o3, o0, o1, o4, o2) + +.Lcbc_encrypt_128: + CBC_ENCRYPT_IMPL(128, o3, o0, o1, o4, o2) + +.Lcbc_decrypt: + bl .Lcbc_decrypt_128 + nop + be .Lcbc_decrypt_192 + nop + + CBC_DECRYPT_IMPL(256, o3, o0, o1, o4, o2) + +.Lcbc_decrypt_192: + CBC_DECRYPT_IMPL(192, o3, o0, o1, o4, o2) + +.Lcbc_decrypt_128: + CBC_DECRYPT_IMPL(128, o3, o0, o1, o4, o2) + .type aes_sparc_hw_cbc_encrypt,#function + .size aes_sparc_hw_cbc_encrypt,(.-aes_sparc_hw_cbc_encrypt) + +#define CTR_KEY_FIXUP_128(KEY) +#define CTR_KEY_FIXUP_192(KEY) +#define CTR_KEY_FIXUP_256(KEY) \\ + ldd [%KEY + 0xd0], %f56; \\ + ldd [%KEY + 0xd8], %f58; \\ + ldd [%KEY + 0xe0], %f60; \\ + ldd [%KEY + 0xe8], %f62; + +#define CTR_ENCRYPT_IMPL(KEYLEN, KEY, SRC, DST, IV, LEN)\\ + LOAD_ENCRYPT_KEY_##KEYLEN##(KEY); \\ + ld [%IV + 0x00], %g3; \\ + ld [%IV + 0x04], %o5; \\ + sllx %g3, 32, %g3; \\ + or %g3, %o5, %g3; \\ + ld [%IV + 0x08], %g5; \\ + ld [%IV + 0x0c], %o5; \\ + subcc %LEN, 0x10, %LEN; \\ + sllx %g5, 32, %g5; \\ + be 10f; \\ + or %g5, %o5, %g5; \\ +.Lctr_ENcrypt_##KEYLEN##_loop: \\ + xor %g1, %g3, %o5; \\ + MOVXTOD_O5_F0; \\ + xor %g2, %g5, %o5; \\ + MOVXTOD_O5_F2; \\ + add %g5, 1, %g5; \\ + add %g3, 1, %o5; \\ + movrz %g5, %o5, %g3; \\ + xor %g1, %g3, %o5; \\ + MOVXTOD_O5_F4; \\ + xor %g2, %g5, %o5; \\ + MOVXTOD_O5_F6; \\ + add %g5, 1, %g5; \\ + add %g3, 1, %o5; \\ + movrz %g5, %o5, %g3; \\ + ENCRYPT_##KEYLEN##_2(8, 0, 2, 4, 6, 56, 58, 60, 62);\\ + ldd [%SRC + 0x00], %f56; \\ + ldd [%SRC + 0x08], %f58; \\ + ldd [%SRC + 0x10], %f60; \\ + ldd [%SRC + 0x18], %f62; \\ + fxor %f56, %f0, %f56; \\ + fxor %f58, %f2, %f58; \\ + fxor %f60, %f4, %f60; \\ + fxor %f62, %f6, %f62; \\ + std %f56, [%DST + 0x00]; \\ + std %f58, [%DST + 0x08]; \\ + std %f60, [%DST + 0x10]; \\ + std %f62, [%DST + 0x18]; \\ + subcc %LEN, 0x20, %LEN; \\ + add %SRC, 0x20, %SRC; \\ + bgt,pt %icc, .Lctr_ENcrypt_##KEYLEN##_loop; \\ + add %DST, 0x20, %DST; \\ + blt,pt %icc, 11f; \\ + nop; \\ + CTR_KEY_FIXUP_##KEYLEN##(KEY); \\ +10: xor %g1, %g3, %o5; \\ + MOVXTOD_O5_F0; \\ + xor %g2, %g5, %o5; \\ + MOVXTOD_O5_F2; \\ + add %g5, 1, %g5; \\ + add %g3, 1, %o5; \\ + movrz %g5, %o5, %g3; \\ + ENCRYPT_##KEYLEN##(8, 0, 2, 4, 6); \\ + ldd [%SRC + 0x00], %f4; \\ + ldd [%SRC + 0x08], %f6; \\ + fxor %f4, %f0, %f4; \\ + fxor %f6, %f2, %f6; \\ + std %f4, [%DST + 0x00]; \\ + std %f6, [%DST + 0x08]; \\ +11: srlx %g3, 32, %g1; \\ + st %g1, [%IV + 0x00]; \\ + srlx %g5, 32, %g2; \\ + st %g3, [%IV + 0x04]; \\ + st %g2, [%IV + 0x08]; \\ + retl; \\ + st %g5, [%IV + 0x0c]; + + .align 32 + .globl aes_sparc_hw_ctr_encrypt +aes_sparc_hw_ctr_encrypt: + /* %o0=in, %o1=out, %o2=len, %o3=KEY, %o4=IV */ + SETUP_KEY_AND_ROUNDS(o3, g3, g1, g2) + cmp %g3, 12 + ldx [%o3 + 0x00], %g1 + bl .Lctr_encrypt_128 + ldx [%o3 + 0x08], %g2 + be .Lctr_encrypt_192 + nop + + CTR_ENCRYPT_IMPL(256, o3, o0, o1, o4, o2) + +.Lctr_encrypt_192: + CTR_ENCRYPT_IMPL(192, o3, o0, o1, o4, o2) + +.Lctr_encrypt_128: + CTR_ENCRYPT_IMPL(128, o3, o0, o1, o4, o2) + .type aes_sparc_hw_ctr_encrypt,#function + .size aes_sparc_hw_ctr_encrypt,(.-aes_sparc_hw_ctr_encrypt) + +#define OFB_ENCRYPT_IMPL(KEYLEN, KEY, SRC, DST, IV, LEN)\\ + LOAD_ENCRYPT_KEY_##KEYLEN##(KEY); \\ +.Lofb_ENcrypt_##KEYLEN##_loop: \\ + MOVXTOD_G3_F0; \\ + MOVXTOD_G5_F2; \\ + fxor %f4, %f0, %f4; \\ + fxor %f6, %f2, %f6; \\ + ENCRYPT_##KEYLEN##(8, 4, 6, 0, 2); \\ + ldd [%SRC + 0x00], %f0; \\ + ldd [%SRC + 0x08], %f2; \\ + add %SRC, 0x10, %SRC; \\ + fxor %f4, %f0, %f0; \\ + fxor %f6, %f2, %f2; \\ + std %f0, [%DST + 0x00]; \\ + std %f2, [%DST + 0x08]; \\ + subcc %LEN, 0x10, %LEN; \\ + bne .Lofb_ENcrypt_##KEYLEN##_loop; \\ + add %DST, 0x10, %DST; \\ + st %f4, [%IV + 0x00]; \\ + st %f5, [%IV + 0x04]; \\ + st %f6, [%IV + 0x08]; \\ + retl; \\ + st %f7, [%IV + 0x0c]; + + .align 32 + .globl aes_sparc_hw_ofb_encrypt +aes_sparc_hw_ofb_encrypt: + /* %o0=in, %o1=out, %o2=len, %o3=KEY, %o4=IV */ + SETUP_KEY_AND_ROUNDS(o3, g3, g1, g2) + cmp %g3, 12 + ldx [%o3 + 0x00], %g3 + ldx [%o3 + 0x08], %g5 + ld [%o4 + 0x00], %f4 + ld [%o4 + 0x04], %f5 + ld [%o4 + 0x08], %f6 + bl .Lofb_encrypt_128 + ld [%o4 + 0x0c], %f7 + be .Lofb_encrypt_192 + nop + + OFB_ENCRYPT_IMPL(256, o3, o0, o1, o4, o2) + +.Lofb_encrypt_192: + OFB_ENCRYPT_IMPL(192, o3, o0, o1, o4, o2) + +.Lofb_encrypt_128: + OFB_ENCRYPT_IMPL(128, o3, o0, o1, o4, o2) + .type aes_sparc_hw_ofb_encrypt,#function + .size aes_sparc_hw_ofb_encrypt,(.-aes_sparc_hw_ofb_encrypt) + +#define CFB_ENCRYPT_IMPL(KEYLEN, KEY, SRC, DST, IV, LEN)\\ + LOAD_ENCRYPT_KEY_##KEYLEN##(KEY); \\ +.Lcfb_ENcrypt_##KEYLEN##_loop: \\ + MOVXTOD_G3_F0; \\ + MOVXTOD_G5_F2; \\ + fxor %f4, %f0, %f4; \\ + fxor %f6, %f2, %f6; \\ + ENCRYPT_##KEYLEN##(8, 4, 6, 0, 2); \\ + ldd [%SRC + 0x00], %f0; \\ + ldd [%SRC + 0x08], %f2; \\ + add %SRC, 0x10, %SRC; \\ + fxor %f4, %f0, %f4; \\ + fxor %f6, %f2, %f6; \\ + std %f4, [%DST + 0x00]; \\ + std %f6, [%DST + 0x08]; \\ + subcc %LEN, 0x10, %LEN; \\ + bne .Lcfb_ENcrypt_##KEYLEN##_loop; \\ + add %DST, 0x10, %DST; \\ + st %f4, [%IV + 0x00]; \\ + st %f5, [%IV + 0x04]; \\ + st %f6, [%IV + 0x08]; \\ + retl; \\ + st %f7, [%IV + 0x0c]; + +#define CFB_DECRYPT_IMPL(KEYLEN, KEY, SRC, DST, IV, LEN)\\ + LOAD_ENCRYPT_KEY_##KEYLEN##(KEY); \\ +.Lcfb_DEcrypt_##KEYLEN##_loop: \\ + MOVXTOD_G3_F0; \\ + MOVXTOD_G5_F2; \\ + fxor %f4, %f0, %f0; \\ + fxor %f6, %f2, %f2; \\ + ENCRYPT_##KEYLEN##(8, 0, 2, 4, 6); \\ + ldd [%SRC + 0x00], %f4; \\ + ldd [%SRC + 0x08], %f6; \\ + add %SRC, 0x10, %SRC; \\ + fxor %f4, %f0, %f0; \\ + fxor %f6, %f2, %f2; \\ + std %f0, [%DST + 0x00]; \\ + std %f2, [%DST + 0x08]; \\ + subcc %LEN, 0x10, %LEN; \\ + bne .Lcfb_DEcrypt_##KEYLEN##_loop; \\ + add %DST, 0x10, %DST; \\ + st %f4, [%IV + 0x00]; \\ + st %f5, [%IV + 0x04]; \\ + st %f6, [%IV + 0x08]; \\ + retl; \\ + st %f7, [%IV + 0x0c]; + + .align 32 + .globl aes_sparc_hw_cfb_encrypt +aes_sparc_hw_cfb_encrypt: + /* %o0=in, %o1=out, %o2=len, %o3=KEY, %o4=IV, %o5=enc */ + SETUP_KEY_AND_ROUNDS(o3, g1, g2, g3) + ldx [%o3 + 0x00], %g3 + ldx [%o3 + 0x08], %g5 + ld [%o4 + 0x00], %f4 + ld [%o4 + 0x04], %f5 + ld [%o4 + 0x08], %f6 + ld [%o4 + 0x0c], %f7 + cmp %o5, 0 + be .Lcfb_decrypt + cmp %g1, 12 + + bl .Lcfb_encrypt_128 + nop + be .Lcfb_encrypt_192 + nop + + CFB_ENCRYPT_IMPL(256, o3, o0, o1, o4, o2) + +.Lcfb_encrypt_192: + CFB_ENCRYPT_IMPL(192, o3, o0, o1, o4, o2) + +.Lcfb_encrypt_128: + CFB_ENCRYPT_IMPL(128, o3, o0, o1, o4, o2) + +.Lcfb_decrypt: + bl .Lcfb_decrypt_128 + nop + be .Lcfb_decrypt_192 + nop + + CFB_DECRYPT_IMPL(256, o3, o0, o1, o4, o2) + +.Lcfb_decrypt_192: + CFB_DECRYPT_IMPL(192, o3, o0, o1, o4, o2) + +.Lcfb_decrypt_128: + CFB_DECRYPT_IMPL(128, o3, o0, o1, o4, o2) + .type aes_sparc_hw_cfb_encrypt,#function + .size aes_sparc_hw_cfb_encrypt,(.-aes_sparc_hw_cfb_encrypt) ___ # fmovs instructions substituting for FP nops were originally added diff --git a/crypto/evp/e_aes.c b/crypto/evp/e_aes.c index 5dccb2f..63b6edc 100644 --- a/crypto/evp/e_aes.c +++ b/crypto/evp/e_aes.c @@ -459,6 +459,404 @@ const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \ #else +#if defined(AES_ASM) && defined(__sparc__) + +#include "sparc_arch.h" + +#define SPARC_AES_CAPABLE (OPENSSL_sparcv9cap_P & SPARCV9_AES) + +void aes_sparc_hw_ecb_encrypt(const unsigned char *in, unsigned char *out, + size_t length, const AES_KEY *key, int enc); +void aes_sparc_hw_cbc_encrypt(const unsigned char *in, unsigned char *out, + size_t length, const AES_KEY *key, + unsigned char *ivec, int enc); +void aes_sparc_hw_ctr_encrypt(const unsigned char *in, unsigned char *out, + size_t length, const void *key, + unsigned char *ivec); +void aes_sparc_hw_ofb_encrypt(const unsigned char *in, unsigned char *out, + size_t length, const void *key, + unsigned char *ivec); +void aes_sparc_hw_cfb_encrypt(const unsigned char *in, unsigned char *out, + size_t length, const void *key, + unsigned char *ivec, int enc); + +static int aes_sparc_hw_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len) +{ + size_t bl = AES_BLOCK_SIZE; + const void *aligned_in; + void *aligned_out; + int aligned_len; + + aligned_len = len & ~(bl - 1); + if (!aligned_len) + goto trailing; + + aligned_out = (void *) out; + if ((unsigned long)out & 0x7) { + aligned_out = OPENSSL_malloc(aligned_len); + if (!aligned_out) + goto slow; + } + + aligned_in = (const void *) in; + if ((unsigned long)in & 0x7) { + memcpy(aligned_out, in, aligned_len); + aligned_in = (const void *) aligned_out; + } + + aes_sparc_hw_cbc_encrypt(aligned_in, aligned_out, aligned_len, + ctx->cipher_data, ctx->iv, ctx->encrypt); + + if ((unsigned long)out & 0x7) { + memcpy(out, aligned_out, aligned_len); + OPENSSL_free(aligned_out); + } +trailing: + len -= aligned_len; + if (len) { + EVP_AES_KEY *dat; + + in += aligned_len; + out += aligned_len; +slow: + dat = (EVP_AES_KEY *)ctx->cipher_data; + if (ctx->encrypt) + CRYPTO_cbc128_encrypt(in, out, len, &dat->ks, + ctx->iv, dat->block); + else + CRYPTO_cbc128_decrypt(in, out, len, &dat->ks, + ctx->iv, dat->block); + } + + return 1; +} + +static int aes_ecb_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out, + const unsigned char *in, size_t len); + +static int aes_sparc_hw_ecb_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out, + const unsigned char *in, size_t len) +{ + size_t bl = ctx->cipher->block_size; + const void *aligned_in; + void *aligned_out; + + if (len < bl) + return 1; + + len &= ~(bl - 1); + + aligned_out = (void *) out; + if ((unsigned long)out & 0x7) { + aligned_out = OPENSSL_malloc(len); + if (!aligned_out) + return aes_ecb_cipher(ctx, out, in, len); + } + + aligned_in = (const void *) in; + if ((unsigned long)in & 0x7) { + memcpy(aligned_out, in, len); + aligned_in = (const void *) aligned_out; + } + + aes_sparc_hw_ecb_encrypt(aligned_in, aligned_out, len, + ctx->cipher_data, ctx->encrypt); + + if ((unsigned long)out & 0x7) { + memcpy(out, aligned_out, len); + OPENSSL_free(aligned_out); + } + + return 1; +} + +static int aes_sparc_hw_ctr_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len) +{ + const void *aligned_in; + void *aligned_out; + int aligned_len; + + while (ctx->num && len) { + *out++ = *in++ ^ ctx->buf[ctx->num]; + ctx->num = (ctx->num + 1) % 16; + len--; + } + + aligned_len = len & ~(16 - 1); + if (!aligned_len) + goto trailing; + + aligned_out = (void *) out; + if ((unsigned long)out & 0x7) { + aligned_out = OPENSSL_malloc(aligned_len); + if (!aligned_out) + goto slow; + } + + aligned_in = (const void *) in; + if ((unsigned long)in & 0x7) { + memcpy(aligned_out, in, aligned_len); + aligned_in = (const void *) aligned_out; + } + + aes_sparc_hw_ctr_encrypt(aligned_in, aligned_out, aligned_len, + ctx->cipher_data, ctx->iv); + + if ((unsigned long)out & 0x7) { + memcpy(out, aligned_out, aligned_len); + OPENSSL_free(aligned_out); + } +trailing: + len -= aligned_len; + if (len) { + EVP_AES_KEY *dat; + unsigned int num; + + in += aligned_len; + out += aligned_len; +slow: + dat = (EVP_AES_KEY *)ctx->cipher_data; + num = ctx->num; + CRYPTO_ctr128_encrypt(in, out, len, &dat->ks, ctx->iv, + ctx->buf, &num, dat->block); + ctx->num = (size_t) num; + } + + return 1; +} + +static int aes_sparc_hw_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len) +{ + const void *aligned_in; + void *aligned_out; + int aligned_len; + + while (ctx->num && len) { + *out++ = *in++ ^ ctx->iv[ctx->num]; + ctx->num = (ctx->num + 1) % 16; + len--; + } + + aligned_len = len & ~(16 - 1); + if (!aligned_len) + goto trailing; + + aligned_out = (void *) out; + if ((unsigned long)out & 0x7) { + aligned_out = OPENSSL_malloc(aligned_len); + if (!aligned_out) + goto slow; + } + + aligned_in = (const void *) in; + if ((unsigned long)in & 0x7) { + memcpy(aligned_out, in, aligned_len); + aligned_in = (const void *) aligned_out; + } + + aes_sparc_hw_ofb_encrypt(aligned_in, aligned_out, aligned_len, + ctx->cipher_data, ctx->iv); + + if ((unsigned long)out & 0x7) { + memcpy(out, aligned_out, aligned_len); + OPENSSL_free(aligned_out); + } +trailing: + len -= aligned_len; + if (len) { + EVP_AES_KEY *dat; + + in += aligned_len; + out += aligned_len; +slow: + dat = (EVP_AES_KEY *)ctx->cipher_data; + CRYPTO_ofb128_encrypt(in, out, len, &dat->ks, ctx->iv, + &ctx->num, dat->block); + } + + return 1; +} + +static int aes_sparc_hw_cfb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len) +{ + const void *aligned_in; + void *aligned_out; + int aligned_len; + + if (ctx->num) { + if (ctx->encrypt) { + while (ctx->num && len) { + *out++ = *in++ ^ ctx->iv[ctx->num]; + ctx->num = (ctx->num + 1) % 16; + len--; + } + } else { + while (ctx->num && len) { + unsigned char c; + + c = *in++; + *out++ = c ^ ctx->iv[ctx->num]; + ctx->iv[ctx->num] = c; + + ctx->num = (ctx->num + 1) % 16; + len--; + } + } + } + + aligned_len = len & ~(16 - 1); + if (!aligned_len) + goto trailing; + + aligned_out = (void *) out; + if ((unsigned long)out & 0x7) { + aligned_out = OPENSSL_malloc(aligned_len); + if (!aligned_out) + goto slow; + } + + aligned_in = (const void *) in; + if ((unsigned long)in & 0x7) { + memcpy(aligned_out, in, aligned_len); + aligned_in = (const void *) aligned_out; + } + + aes_sparc_hw_cfb_encrypt(aligned_in, aligned_out, aligned_len, + ctx->cipher_data, ctx->iv, ctx->encrypt); + + if ((unsigned long)out & 0x7) { + memcpy(out, aligned_out, aligned_len); + OPENSSL_free(aligned_out); + } +trailing: + len -= aligned_len; + if (len) { + EVP_AES_KEY *dat; + + in += aligned_len; + out += aligned_len; +slow: + dat = (EVP_AES_KEY *)ctx->cipher_data; + CRYPTO_cfb128_encrypt(in, out, len, &dat->ks, ctx->iv, + &ctx->num, ctx->encrypt, dat->block); + } + + return 1; +} + +#define aes_sparc_hw_cfb8_cipher aes_cfb8_cipher +static int aes_sparc_hw_cfb8_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out, + const unsigned char *in,size_t len); + +#define aes_sparc_hw_cfb1_cipher aes_cfb1_cipher +static int aes_sparc_hw_cfb1_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out, + const unsigned char *in,size_t len); + +#define aes_sparc_hw_gcm_init_key aes_gcm_init_key +static int aes_sparc_hw_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, + const unsigned char *iv, int enc); + +#define aes_sparc_hw_gcm_cipher aes_gcm_cipher +static int aes_sparc_hw_gcm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +#define aes_sparc_hw_xts_init_key aes_xts_init_key +static int aes_sparc_hw_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, + const unsigned char *iv, int enc); + +#define aes_sparc_hw_xts_cipher aes_xts_cipher +static int aes_sparc_hw_xts_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +#define aes_sparc_hw_ccm_init_key aes_ccm_init_key +static int aes_sparc_hw_ccm_init_key(EVP_CIPHER_CTX *ctx, + const unsigned char *key, + const unsigned char *iv, int enc); + +#define aes_sparc_hw_ccm_cipher aes_ccm_cipher +static int aes_sparc_hw_ccm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +static int aes_sparc_hw_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, + const unsigned char *iv, int enc) +{ + EVP_AES_KEY *dat = (EVP_AES_KEY *) ctx->cipher_data; + int ret, mode; + + mode = ctx->cipher->flags & EVP_CIPH_MODE; + if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE) + && !enc) { + ret = AES_set_decrypt_key(key, ctx->key_len*8, ctx->cipher_data); + dat->block = (block128_f)AES_decrypt; + dat->stream.cbc = mode==EVP_CIPH_CBC_MODE ? + (cbc128_f)aes_sparc_hw_cbc_encrypt : + NULL; + } else { + ret = AES_set_encrypt_key(key, ctx->key_len*8, ctx->cipher_data); + dat->block = (block128_f)AES_encrypt; + if (mode==EVP_CIPH_CBC_MODE) + dat->stream.cbc = (cbc128_f)aes_sparc_hw_cbc_encrypt; + else + dat->stream.cbc = NULL; + } + + if (ret < 0) { + EVPerr(EVP_F_AES_INIT_KEY,EVP_R_AES_KEY_SETUP_FAILED); + return 0; + } + + return 1; +} + +#define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \ +static const EVP_CIPHER aes_sparc_hw_##keylen##_##mode = { \ + nid##_##keylen##_##nmode,blocksize,keylen/8,ivlen, \ + flags|EVP_CIPH_##MODE##_MODE, \ + aes_sparc_hw_init_key, \ + aes_sparc_hw_##mode##_cipher, \ + NULL, \ + sizeof(EVP_AES_KEY), \ + NULL,NULL,NULL,NULL }; \ +static const EVP_CIPHER aes_##keylen##_##mode = { \ + nid##_##keylen##_##nmode,blocksize, \ + keylen/8,ivlen, \ + flags|EVP_CIPH_##MODE##_MODE, \ + aes_init_key, \ + aes_##mode##_cipher, \ + NULL, \ + sizeof(EVP_AES_KEY), \ + NULL,NULL,NULL,NULL }; \ +const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \ +{ return SPARC_AES_CAPABLE?&aes_sparc_hw_##keylen##_##mode:&aes_##keylen##_##mode; } + +#define BLOCK_CIPHER_custom(nid,keylen,blocksize,ivlen,mode,MODE,flags) \ +static const EVP_CIPHER aes_sparc_hw_##keylen##_##mode = { \ + nid##_##keylen##_##mode,blocksize, \ + (EVP_CIPH_##MODE##_MODE==EVP_CIPH_XTS_MODE?2:1)*keylen/8, ivlen, \ + flags|EVP_CIPH_##MODE##_MODE, \ + aes_sparc_hw_##mode##_init_key, \ + aes_sparc_hw_##mode##_cipher, \ + aes_##mode##_cleanup, \ + sizeof(EVP_AES_##MODE##_CTX), \ + NULL,NULL,aes_##mode##_ctrl,NULL }; \ +static const EVP_CIPHER aes_##keylen##_##mode = { \ + nid##_##keylen##_##mode,blocksize, \ + (EVP_CIPH_##MODE##_MODE==EVP_CIPH_XTS_MODE?2:1)*keylen/8, ivlen, \ + flags|EVP_CIPH_##MODE##_MODE, \ + aes_##mode##_init_key, \ + aes_##mode##_cipher, \ + aes_##mode##_cleanup, \ + sizeof(EVP_AES_##MODE##_CTX), \ + NULL,NULL,aes_##mode##_ctrl,NULL }; \ +const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \ +{ return SPARC_AES_CAPABLE?&aes_sparc_hw_##keylen##_##mode:&aes_##keylen##_##mode; } + +#else + #define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \ static const EVP_CIPHER aes_##keylen##_##mode = { \ nid##_##keylen##_##nmode,blocksize,keylen/8,ivlen, \ @@ -485,6 +883,8 @@ const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \ { return &aes_##keylen##_##mode; } #endif +#endif + #define BLOCK_CIPHER_generic_pack(nid,keylen,flags) \ BLOCK_CIPHER_generic(nid,keylen,16,16,cbc,cbc,CBC,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \ BLOCK_CIPHER_generic(nid,keylen,16,0,ecb,ecb,ECB,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \ diff --git a/crypto/sparc_arch.h b/crypto/sparc_arch.h index f478ce3..032d67c 100644 --- a/crypto/sparc_arch.h +++ b/crypto/sparc_arch.h @@ -61,6 +61,25 @@ extern int OPENSSL_sparcv9cap_P; #define AES_KEXPAND2(a,b,c) \ .word (F3F(2, 0x36, 0x131)|RS1(a)|RS2(b)|RD(c)); +#define MOVXTOD_G3_F4 \ + .word 0x89b02303; +#define MOVXTOD_G5_F6 \ + .word 0x8db02305; +#define MOVXTOD_G3_F0 \ + .word 0x81b02303; +#define MOVXTOD_G5_F2 \ + .word 0x85b02305; +#define MOVXTOD_O3_F0 \ + .word 0x81b0230b; +#define MOVXTOD_O5_F0 \ + .word 0x81b0230d; +#define MOVXTOD_O5_F2 \ + .word 0x85b0230d; +#define MOVXTOD_O5_F4 \ + .word 0x89b0230d; +#define MOVXTOD_O5_F6 \ + .word 0x8db0230d; + #ifdef __PIC__ #define SPARC_PIC_THUNK(reg) \ .align 32; \ -- 1.7.10.4 ______________________________________________________________________ OpenSSL Project http://www.openssl.org Development Mailing List openssl-dev@openssl.org Automated List Manager majord...@openssl.org