The biggest trick here is providing the mechanism necessary to expand the key properly.
The DES opcodes expect the expanded key to be in a different format than the generic openssl DES code does. So we use some include and CPP define trickey so that we can override the key expansion in the cases in which we need to. Note in particular that we can't really use the DES opcodes for the fcrypt implementation, so we arrange things so that the generic key expansion is forced in that case. On a SPARC T4-2, first with crypto opcodes disabled: type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes des cbc 54342.76k 56258.03k 57101.95k 57558.36k 57636.18k des ede3 19990.29k 20679.25k 20837.29k 20877.31k 20889.60k and with them turned on: type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes des cbc 341439.34k 443975.51k 468697.94k 482163.84k 491296.09k des ede3 130192.06k 173126.76k 185801.81k 192443.16k 193732.61k Signed-off-by: David S. Miller <da...@davemloft.net> --- Configure | 10 +- crypto/des/Makefile | 2 +- crypto/des/asm/des_enc.m4 | 968 +++++++++++++++++++++++++++++++++++++++++++- crypto/des/des_sparccore.c | 51 +++ crypto/sparc_arch.h | 11 + 5 files changed, 1031 insertions(+), 11 deletions(-) diff --git a/Configure b/Configure index 74bc534..1a3d855 100755 --- a/Configure +++ b/Configure @@ -124,14 +124,14 @@ my $tlib="-lnsl -lsocket"; my $bits1="THIRTY_TWO_BIT "; my $bits2="SIXTY_FOUR_BIT "; -my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o x86-gf2m.o:des-586.o crypt586.o set_key.o:aes-586.o vpaes-x86.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cmll-x86.o:ghash-x86.o:e_padlock-x86.o"; +my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o x86-gf2m.o:des-586.o crypt586.o set_key.o fcrypt.o:aes-586.o vpaes-x86.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cmll-x86.o:ghash-x86.o:e_padlock-x86.o"; my $x86_elf_asm="$x86_asm:elf"; my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghash-x86_64.o:e_padlock-x86_64.o"; my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::ghash-ia64.o::void"; -my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o des_sparccore.o fcrypt_b.o set_key.o:aes_sparccore.o aes-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o::::::cmll-sparcv9.o cmll_sparccore.o:ghash-sparcv9.o::void"; -my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o des_sparccore.o fcrypt_b.o set_key.o:::::::::::::void"; +my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o des_sparccore.o fcrypt_b.o:aes_sparccore.o aes-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o::::::cmll-sparcv9.o cmll_sparccore.o:ghash-sparcv9.o::void"; +my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o des_sparccore.o fcrypt_b.o:::::::::::::void"; my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-alpha.o::void"; my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o sha512-mips.o::::::::"; my $mips32_asm=$mips64_asm; $mips32_asm =~ s/\s*sha512\-mips\.o//; @@ -190,7 +190,7 @@ my %table=( "debug-linux-ppro","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG -DL_ENDIAN -DTERMIO -g -mcpu=pentiumpro -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn", "debug-linux-elf","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG -DL_ENDIAN -DTERMIO -g -march=i486 -Wall::-D_REENTRANT::-lefence -ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "debug-linux-elf-noefence","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG -DL_ENDIAN -DTERMIO -g -march=i486 -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", -"debug-linux-ia32-aes", "gcc:-DAES_EXPERIMENTAL -DL_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:x86cpuid.o:bn-586.o co-586.o x86-mont.o:des-586.o crypt586.o set_key.o:aes_x86core.o aes_cbc.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o::ghash-x86.o:e_padlock-x86.o:elf:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +"debug-linux-ia32-aes", "gcc:-DAES_EXPERIMENTAL -DL_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:x86cpuid.o:bn-586.o co-586.o x86-mont.o:des-586.o crypt586.o set_key.o fcrypt.o:aes_x86core.o aes_cbc.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o::ghash-x86.o:e_padlock-x86.o:elf:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "debug-linux-generic32","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -DTERMIO -g -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "debug-linux-generic64","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -DTERMIO -g -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "debug-linux-x86_64","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -m64 -DL_ENDIAN -DTERMIO -g -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64", @@ -698,7 +698,7 @@ my $idea ="crypto/idea/idea.h"; my $rc2 ="crypto/rc2/rc2.h"; my $bf ="crypto/bf/bf_locl.h"; my $bn_asm ="bn_asm.o"; -my $des_enc="des_enc.o fcrypt_b.o set_key.o"; +my $des_enc="des_enc.o fcrypt.o fcrypt_b.o set_key.o"; my $aes_enc="aes_core.o aes_cbc.o"; my $bf_enc ="bf_enc.o"; my $cast_enc="c_enc.o"; diff --git a/crypto/des/Makefile b/crypto/des/Makefile index 893d2e6..b3f7797 100644 --- a/crypto/des/Makefile +++ b/crypto/des/Makefile @@ -36,7 +36,7 @@ LIBOBJ= ecb_enc.o cbc_enc.o \ enc_read.o enc_writ.o ofb64enc.o \ ofb_enc.o str2key.o pcbc_enc.o qud_cksm.o rand_key.o \ ${DES_ENC} \ - fcrypt.o xcbc_enc.o rpc_enc.o cbc_cksm.o \ + xcbc_enc.o rpc_enc.o cbc_cksm.o \ ede_cbcm_enc.o des_old.o des_old2.o read2pwd.o SRC= $(LIBSRC) diff --git a/crypto/des/asm/des_enc.m4 b/crypto/des/asm/des_enc.m4 index a69273c..06964b0 100644 --- a/crypto/des/asm/des_enc.m4 +++ b/crypto/des/asm/des_enc.m4 @@ -47,6 +47,7 @@ .file "des_enc-sparc.S" #include <openssl/opensslconf.h> +#include "sparc_arch.h" #ifdef OPENSSL_FIPSCANISTER #include <openssl/fipssyms.h> @@ -1125,6 +1126,8 @@ define(register_init, { .section ".text" +SPARC_PIC_THUNK(g2) + .align 32 .des_enc: @@ -1157,7 +1160,83 @@ define(register_init, { .type DES_encrypt1,#function DES_encrypt1: - +#ifdef __sparc_v9__ + SPARC_LOAD_V9_CAPS_LEAF(g2, g1) + andcc %g2, SPARCV9_DES, %g0 + be .Lencrypt1_software + cmp %o2, 0 + + ld [%o1 + 0x00], %f2 + ld [%o1 + 0x04], %f3 + ld [%o1 + 0x08], %f4 + ld [%o1 + 0x0c], %f5 + ld [%o1 + 0x10], %f6 + ld [%o1 + 0x14], %f7 + ld [%o1 + 0x18], %f8 + ld [%o1 + 0x1c], %f9 + ld [%o1 + 0x20], %f10 + ld [%o1 + 0x24], %f11 + ld [%o1 + 0x28], %f12 + ld [%o1 + 0x2c], %f13 + ld [%o1 + 0x30], %f14 + ld [%o1 + 0x34], %f15 + ld [%o1 + 0x38], %f16 + ld [%o1 + 0x3c], %f17 + ld [%o1 + 0x40], %f18 + ld [%o1 + 0x44], %f19 + ld [%o1 + 0x48], %f20 + ld [%o1 + 0x4c], %f21 + ld [%o1 + 0x50], %f22 + ld [%o1 + 0x54], %f23 + ld [%o1 + 0x58], %f24 + ld [%o1 + 0x5c], %f25 + ld [%o1 + 0x60], %f26 + ld [%o1 + 0x64], %f27 + ld [%o1 + 0x68], %f28 + ld [%o1 + 0x6c], %f29 + ld [%o1 + 0x70], %f30 + ld [%o1 + 0x74], %f31 + ld [%o1 + 0x78], %f0 + ld [%o1 + 0x7c], %f1 + fsrc2 %f0, %f32 + lda [%o0] 0x88, %f0 + add %o0, 0x4, %o5 + be .Ldes_decrypt1 + lda [%o5] 0x88, %f1 + + DES_IP(0, 0) + DES_ROUND(2, 4, 0, 0) + DES_ROUND(6, 8, 0, 0) + DES_ROUND(10, 12, 0, 0) + DES_ROUND(14, 16, 0, 0) + DES_ROUND(18, 20, 0, 0) + DES_ROUND(22, 24, 0, 0) + DES_ROUND(26, 28, 0, 0) + DES_ROUND(30, 32, 0, 0) + DES_IIP(0, 0) + +.Lfinish_encrypt1: + sta %f0, [%o0] 0x88 + add %o0, 0x4, %o5 + retl + sta %f1, [%o5] 0x88 + +.Ldes_decrypt1: + DES_IP(0, 0) + DES_ROUND(32, 30, 0, 0) + DES_ROUND(28, 26, 0, 0) + DES_ROUND(24, 22, 0, 0) + DES_ROUND(20, 18, 0, 0) + DES_ROUND(16, 14, 0, 0) + DES_ROUND(12, 10, 0, 0) + DES_ROUND(8, 6, 0, 0) + DES_ROUND(4, 2, 0, 0) + DES_IIP(0, 0) + ba,pt %icc, .Lfinish_encrypt1 + nop + +.Lencrypt1_software: +#endif save %sp, FRAME, %sp sethi %hi(.PIC.DES_SPtrans-1f),global1 @@ -1336,7 +1415,157 @@ DES_encrypt2: .type DES_encrypt3,#function DES_encrypt3: - +#ifdef __sparc_v9__ + SPARC_LOAD_V9_CAPS_LEAF(g2, g1) + andcc %g2, SPARCV9_DES, %g0 + be .Lencrypt3_software + nop + + lda [%o0] 0x88, %f0 + add %o0, 0x4, %o5 + lda [%o5] 0x88, %f1 + + DES_IP(0, 32) + + ld [%o1 + 0x00], %f0 + ld [%o1 + 0x04], %f1 + ld [%o1 + 0x08], %f2 + ld [%o1 + 0x0c], %f3 + ld [%o1 + 0x10], %f4 + ld [%o1 + 0x14], %f5 + ld [%o1 + 0x18], %f6 + ld [%o1 + 0x1c], %f7 + ld [%o1 + 0x20], %f8 + ld [%o1 + 0x24], %f9 + ld [%o1 + 0x28], %f10 + ld [%o1 + 0x2c], %f11 + ld [%o1 + 0x30], %f12 + ld [%o1 + 0x34], %f13 + ld [%o1 + 0x38], %f14 + ld [%o1 + 0x3c], %f15 + ld [%o1 + 0x40], %f16 + ld [%o1 + 0x44], %f17 + ld [%o1 + 0x48], %f18 + ld [%o1 + 0x4c], %f19 + ld [%o1 + 0x50], %f20 + ld [%o1 + 0x54], %f21 + ld [%o1 + 0x58], %f22 + ld [%o1 + 0x5c], %f23 + ld [%o1 + 0x60], %f24 + ld [%o1 + 0x64], %f25 + ld [%o1 + 0x68], %f26 + ld [%o1 + 0x6c], %f27 + ld [%o1 + 0x70], %f28 + ld [%o1 + 0x74], %f29 + ld [%o1 + 0x78], %f30 + ld [%o1 + 0x7c], %f31 + + DES_ROUND(0, 2, 32, 32) + ld [%o2 + 0x00], %f0 + ld [%o2 + 0x04], %f1 + ld [%o2 + 0x08], %f2 + ld [%o2 + 0x0c], %f3 + DES_ROUND(4, 6, 32, 32) + ld [%o2 + 0x10], %f4 + ld [%o2 + 0x14], %f5 + ld [%o2 + 0x18], %f6 + ld [%o2 + 0x1c], %f7 + DES_ROUND(8, 10, 32, 32) + ld [%o2 + 0x20], %f8 + ld [%o2 + 0x24], %f9 + ld [%o2 + 0x28], %f10 + ld [%o2 + 0x2c], %f11 + DES_ROUND(12, 14, 32, 32) + ld [%o2 + 0x30], %f12 + ld [%o2 + 0x34], %f13 + ld [%o2 + 0x38], %f14 + ld [%o2 + 0x3c], %f15 + DES_ROUND(16, 18, 32, 32) + ld [%o2 + 0x40], %f16 + ld [%o2 + 0x44], %f17 + ld [%o2 + 0x48], %f18 + ld [%o2 + 0x4c], %f19 + DES_ROUND(20, 22, 32, 32) + ld [%o2 + 0x50], %f20 + ld [%o2 + 0x54], %f21 + ld [%o2 + 0x58], %f22 + ld [%o2 + 0x5c], %f23 + DES_ROUND(24, 26, 32, 32) + ld [%o2 + 0x60], %f24 + ld [%o2 + 0x64], %f25 + ld [%o2 + 0x68], %f26 + ld [%o2 + 0x6c], %f27 + DES_ROUND(28, 30, 32, 32) + ld [%o2 + 0x70], %f28 + ld [%o2 + 0x74], %f29 + ld [%o2 + 0x78], %f30 + ld [%o2 + 0x7c], %f31 + + DES_IIP(32, 32) + DES_IP(32, 32) + + DES_ROUND(30, 28, 32, 32) + ld [%o3 + 0x70], %f28 + ld [%o3 + 0x74], %f29 + ld [%o3 + 0x78], %f30 + ld [%o3 + 0x7c], %f31 + DES_ROUND(26, 24, 32, 32) + ld [%o3 + 0x60], %f24 + ld [%o3 + 0x64], %f25 + ld [%o3 + 0x68], %f26 + ld [%o3 + 0x6c], %f27 + DES_ROUND(22, 20, 32, 32) + ld [%o3 + 0x50], %f20 + ld [%o3 + 0x54], %f21 + ld [%o3 + 0x58], %f22 + ld [%o3 + 0x5c], %f23 + DES_ROUND(18, 16, 32, 32) + ld [%o3 + 0x40], %f16 + ld [%o3 + 0x44], %f17 + ld [%o3 + 0x48], %f18 + ld [%o3 + 0x4c], %f19 + DES_ROUND(14, 12, 32, 32) + ld [%o3 + 0x30], %f12 + ld [%o3 + 0x34], %f13 + ld [%o3 + 0x38], %f14 + ld [%o3 + 0x3c], %f15 + DES_ROUND(10, 8, 32, 32) + ld [%o3 + 0x20], %f8 + ld [%o3 + 0x24], %f9 + ld [%o3 + 0x28], %f10 + ld [%o3 + 0x2c], %f11 + DES_ROUND(6, 4, 32, 32) + ld [%o3 + 0x10], %f4 + ld [%o3 + 0x14], %f5 + ld [%o3 + 0x18], %f6 + ld [%o3 + 0x1c], %f7 + DES_ROUND(2, 0, 32, 32) + ld [%o3 + 0x00], %f0 + ld [%o3 + 0x04], %f1 + ld [%o3 + 0x08], %f2 + ld [%o3 + 0x0c], %f3 + + DES_IIP(32, 32) + DES_IP(32, 32) + + DES_ROUND(0, 2, 32, 32) + DES_ROUND(4, 6, 32, 32) + DES_ROUND(8, 10, 32, 32) + DES_ROUND(12, 14, 32, 32) + DES_ROUND(16, 18, 32, 32) + DES_ROUND(20, 22, 32, 32) + DES_ROUND(24, 26, 32, 32) + DES_ROUND(28, 30, 32, 32) + + DES_IIP(32, 0) + + sta %f0, [%o0] 0x88 + add %o0, 0x4, %o5 + retl + sta %f1, [%o5] 0x88 + +.Lencrypt3_software: +#endif save %sp, FRAME, %sp sethi %hi(.PIC.DES_SPtrans-1f),global1 @@ -1381,7 +1610,156 @@ DES_encrypt3: .type DES_decrypt3,#function DES_decrypt3: - +#ifdef __sparc_v9__ + SPARC_LOAD_V9_CAPS_LEAF(g2, g1) + andcc %g2, SPARCV9_DES, %g0 + be .Ldecrypt3_software + nop + + lda [%o0] 0x88, %f0 + add %o0, 0x4, %o5 + lda [%o5] 0x88, %f1 + DES_IP(0, 32) + + ld [%o3 + 0x00], %f0 + ld [%o3 + 0x04], %f1 + ld [%o3 + 0x08], %f2 + ld [%o3 + 0x0c], %f3 + ld [%o3 + 0x10], %f4 + ld [%o3 + 0x14], %f5 + ld [%o3 + 0x18], %f6 + ld [%o3 + 0x1c], %f7 + ld [%o3 + 0x20], %f8 + ld [%o3 + 0x24], %f9 + ld [%o3 + 0x28], %f10 + ld [%o3 + 0x2c], %f11 + ld [%o3 + 0x30], %f12 + ld [%o3 + 0x34], %f13 + ld [%o3 + 0x38], %f14 + ld [%o3 + 0x3c], %f15 + ld [%o3 + 0x40], %f16 + ld [%o3 + 0x44], %f17 + ld [%o3 + 0x48], %f18 + ld [%o3 + 0x4c], %f19 + ld [%o3 + 0x50], %f20 + ld [%o3 + 0x54], %f21 + ld [%o3 + 0x58], %f22 + ld [%o3 + 0x5c], %f23 + ld [%o3 + 0x60], %f24 + ld [%o3 + 0x64], %f25 + ld [%o3 + 0x68], %f26 + ld [%o3 + 0x6c], %f27 + ld [%o3 + 0x70], %f28 + ld [%o3 + 0x74], %f29 + ld [%o3 + 0x78], %f30 + ld [%o3 + 0x7c], %f31 + + DES_ROUND(30, 28, 32, 32) + ld [%o2 + 0x70], %f28 + ld [%o2 + 0x74], %f29 + ld [%o2 + 0x78], %f30 + ld [%o2 + 0x7c], %f31 + DES_ROUND(26, 24, 32, 32) + ld [%o2 + 0x60], %f24 + ld [%o2 + 0x64], %f25 + ld [%o2 + 0x68], %f26 + ld [%o2 + 0x6c], %f27 + DES_ROUND(22, 20, 32, 32) + ld [%o2 + 0x50], %f20 + ld [%o2 + 0x54], %f21 + ld [%o2 + 0x58], %f22 + ld [%o2 + 0x5c], %f23 + DES_ROUND(18, 16, 32, 32) + ld [%o2 + 0x40], %f16 + ld [%o2 + 0x44], %f17 + ld [%o2 + 0x48], %f18 + ld [%o2 + 0x4c], %f19 + DES_ROUND(14, 12, 32, 32) + ld [%o2 + 0x30], %f12 + ld [%o2 + 0x34], %f13 + ld [%o2 + 0x38], %f14 + ld [%o2 + 0x3c], %f15 + DES_ROUND(10, 8, 32, 32) + ld [%o2 + 0x20], %f8 + ld [%o2 + 0x24], %f9 + ld [%o2 + 0x28], %f10 + ld [%o2 + 0x2c], %f11 + DES_ROUND(6, 4, 32, 32) + ld [%o2 + 0x10], %f4 + ld [%o2 + 0x14], %f5 + ld [%o2 + 0x18], %f6 + ld [%o2 + 0x1c], %f7 + DES_ROUND(2, 0, 32, 32) + ld [%o2 + 0x00], %f0 + ld [%o2 + 0x04], %f1 + ld [%o2 + 0x08], %f2 + ld [%o2 + 0x0c], %f3 + + DES_IIP(32, 32) + DES_IP(32, 32) + + DES_ROUND(0, 2, 32, 32) + ld [%o1 + 0x00], %f0 + ld [%o1 + 0x04], %f1 + ld [%o1 + 0x08], %f2 + ld [%o1 + 0x0c], %f3 + DES_ROUND(4, 6, 32, 32) + ld [%o1 + 0x10], %f4 + ld [%o1 + 0x14], %f5 + ld [%o1 + 0x18], %f6 + ld [%o1 + 0x1c], %f7 + DES_ROUND(8, 10, 32, 32) + ld [%o1 + 0x20], %f8 + ld [%o1 + 0x24], %f9 + ld [%o1 + 0x28], %f10 + ld [%o1 + 0x2c], %f11 + DES_ROUND(12, 14, 32, 32) + ld [%o1 + 0x30], %f12 + ld [%o1 + 0x34], %f13 + ld [%o1 + 0x38], %f14 + ld [%o1 + 0x3c], %f15 + DES_ROUND(16, 18, 32, 32) + ld [%o1 + 0x40], %f16 + ld [%o1 + 0x44], %f17 + ld [%o1 + 0x48], %f18 + ld [%o1 + 0x4c], %f19 + DES_ROUND(20, 22, 32, 32) + ld [%o1 + 0x50], %f20 + ld [%o1 + 0x54], %f21 + ld [%o1 + 0x58], %f22 + ld [%o1 + 0x5c], %f23 + DES_ROUND(24, 26, 32, 32) + ld [%o1 + 0x60], %f24 + ld [%o1 + 0x64], %f25 + ld [%o1 + 0x68], %f26 + ld [%o1 + 0x6c], %f27 + DES_ROUND(28, 30, 32, 32) + ld [%o1 + 0x70], %f28 + ld [%o1 + 0x74], %f29 + ld [%o1 + 0x78], %f30 + ld [%o1 + 0x7c], %f31 + + DES_IIP(32, 32) + DES_IP(32, 32) + + DES_ROUND(30, 28, 32, 32) + DES_ROUND(26, 24, 32, 32) + DES_ROUND(22, 20, 32, 32) + DES_ROUND(18, 16, 32, 32) + DES_ROUND(14, 12, 32, 32) + DES_ROUND(10, 8, 32, 32) + DES_ROUND(6, 4, 32, 32) + DES_ROUND(2, 0, 32, 32) + + DES_IIP(32, 0) + + sta %f0, [%o0] 0x88 + add %o0, 0x4, %o5 + retl + sta %f1, [%o5] 0x88 + +.Ldecrypt3_software: +#endif save %sp, FRAME, %sp sethi %hi(.PIC.DES_SPtrans-1f),global1 @@ -1426,7 +1804,149 @@ DES_decrypt3: .type DES_ncbc_encrypt_asm,#function DES_ncbc_encrypt_asm: - +#ifdef __sparc_v9__ + SPARC_LOAD_V9_CAPS_LEAF(g2, g1) + andcc %g2, SPARCV9_DES, %g0 + be .Lncbc_encrypt_software + cmp %o5, 0 + + ld [%o4 + 0x00], %f0 + ld [%o4 + 0x04], %f1 + fsrc2 %f0, %f34 + + ld [%o3 + 0x00], %f2 + ld [%o3 + 0x04], %f3 + ld [%o3 + 0x08], %f4 + ld [%o3 + 0x0c], %f5 + ld [%o3 + 0x10], %f6 + ld [%o3 + 0x14], %f7 + ld [%o3 + 0x18], %f8 + ld [%o3 + 0x1c], %f9 + ld [%o3 + 0x20], %f10 + ld [%o3 + 0x24], %f11 + ld [%o3 + 0x28], %f12 + ld [%o3 + 0x2c], %f13 + ld [%o3 + 0x30], %f14 + ld [%o3 + 0x34], %f15 + ld [%o3 + 0x38], %f16 + ld [%o3 + 0x3c], %f17 + ld [%o3 + 0x40], %f18 + ld [%o3 + 0x44], %f19 + ld [%o3 + 0x48], %f20 + ld [%o3 + 0x4c], %f21 + ld [%o3 + 0x50], %f22 + ld [%o3 + 0x54], %f23 + ld [%o3 + 0x58], %f24 + ld [%o3 + 0x5c], %f25 + ld [%o3 + 0x60], %f26 + ld [%o3 + 0x64], %f27 + ld [%o3 + 0x68], %f28 + ld [%o3 + 0x6c], %f29 + ld [%o3 + 0x70], %f30 + ld [%o3 + 0x74], %f31 + ld [%o3 + 0x78], %f0 + and %o2, 0x07, %g1 + ld [%o3 + 0x7c], %f1 + andn %o2, 0x07, %o2 + be .Lncbc_hw_decrypt + fsrc2 %f0, %f32 + + cmp %o2, 0 + be,pn %icc, 3f + nop +1: ld [%o0 + 0x00], %f0 + ld [%o0 + 0x04], %f1 +2: fxor %f0, %f34, %f0 + DES_IP(0, 0) + DES_ROUND(2, 4, 0, 0) + DES_ROUND(6, 8, 0, 0) + DES_ROUND(10, 12, 0, 0) + DES_ROUND(14, 16, 0, 0) + DES_ROUND(18, 20, 0, 0) + DES_ROUND(22, 24, 0, 0) + DES_ROUND(26, 28, 0, 0) + DES_ROUND(30, 32, 0, 0) + DES_IIP(0, 0) + fsrc2 %f0, %f34 + st %f0, [%o1 + 0x00] + st %f1, [%o1 + 0x04] + add %o0, 0x08, %o0 + subcc %o2, 0x08, %o2 + bne,pt %icc, 1b + add %o1, 0x08, %o1 +3: cmp %g1, 0 + be,pt %icc, .Lncbc_hw_finish + mov 64, %o5 + lduw [%o0 + 0x00], %g2 + sll %g1, 3, %g1 + lduw [%o0 + 0x04], %g3 + sub %o5, %g1, %o5 + sllx %g2, 32, %g2 + or %g2, %g3, %g3 + srlx %g3, %o5, %g3 + sllx %g3, %o5, %g3 + MOVXTOD_G3_F0 + mov 0, %g1 + ba,pt %icc, 2b + add %o2, 0x08, %o2 + +.Lncbc_hw_finish: + st %f0, [%o4 + 0x00] + retl + st %f1, [%o4 + 0x04] + +.Lncbc_hw_decrypt: + cmp %o2, 0 + be,pn %icc, 3f + nop +1: ld [%o0 + 0x00], %f0 + ld [%o0 + 0x04], %f1 + fsrc2 %f0, %f36 + DES_IP(0, 0) + DES_ROUND(32, 30, 0, 0) + DES_ROUND(28, 26, 0, 0) + DES_ROUND(24, 22, 0, 0) + DES_ROUND(20, 18, 0, 0) + DES_ROUND(16, 14, 0, 0) + DES_ROUND(12, 10, 0, 0) + DES_ROUND(8, 6, 0, 0) + DES_ROUND(4, 2, 0, 0) + DES_IIP(0, 0) + fxor %f0, %f34, %f0 + fsrc2 %f36, %f34 + st %f0, [%o1 + 0x00] + st %f1, [%o1 + 0x04] + add %o0, 0x08, %o0 + subcc %o2, 0x08, %o2 + bne,pt %icc, 1b + add %o1, 0x08, %o1 +3: cmp %g1, 0 + be,pt %icc, 4f + mov 0xff, %o5 + ld [%o0 + 0x00], %f0 + ld [%o0 + 0x04], %f1 + fsrc2 %f0, %f36 + DES_IP(0, 0) + DES_ROUND(32, 30, 0, 0) + DES_ROUND(28, 26, 0, 0) + DES_ROUND(24, 22, 0, 0) + DES_ROUND(20, 18, 0, 0) + DES_ROUND(16, 14, 0, 0) + DES_ROUND(12, 10, 0, 0) + DES_ROUND(8, 6, 0, 0) + DES_ROUND(4, 2, 0, 0) + DES_IIP(0, 0) + fxor %f0, %f34, %f0 + fsrc2 %f36, %f34 + mov 8, %g2 + sll %g1, 3, %g1 + sub %g2, %g1, %g2 + sll %o5, %g2, %o5 + stda %f0, [%o1 + %o5] 0xc0 +4: ba,pt %icc, .Lncbc_hw_finish + fsrc2 %f34, %f0 +.Lncbc_encrypt_software: +#endif save %sp, FRAME, %sp define({INPUT}, { [%sp+BIAS+ARG0+0*ARGSZ] }) @@ -1651,7 +2171,371 @@ DES_ncbc_encrypt_asm: .type DES_ede3_cbc_encrypt_asm,#function DES_ede3_cbc_encrypt_asm: - +#ifdef __sparc_v9__ + SPARC_LOAD_V9_CAPS_LEAF(g2, g1) + andcc %g2, SPARCV9_DES, %g0 + be .Lede3_cbc_encrypt_software + and %o2, 7, %g1 + LDPTR [%sp+BIAS+ARG0+7*ARGSZ], %g2 + andn %o2, 7, %o2 + LDPTR [%sp+BIAS+ARG0+6*ARGSZ], %g3 + cmp %g2, 0 + ld [%g3 + 0x00], %f0 + ld [%g3 + 0x04], %f1 + bne 1f + fsrc2 %f0, %f62 + + mov %o5, %g2 + mov %o3, %o5 + mov %g2, %o3 + +1: ld [%o3 + 0x78], %f0 + ld [%o3 + 0x7c], %f1 + ld [%o4 + 0x00], %f2 + ld [%o4 + 0x04], %f3 + ld [%o4 + 0x08], %f4 + ld [%o4 + 0x0c], %f5 + ld [%o4 + 0x10], %f6 + ld [%o4 + 0x14], %f7 + ld [%o4 + 0x18], %f8 + ld [%o4 + 0x1c], %f9 + ld [%o4 + 0x20], %f10 + ld [%o4 + 0x24], %f11 + ld [%o4 + 0x28], %f12 + ld [%o4 + 0x2c], %f13 + ld [%o4 + 0x30], %f14 + ld [%o4 + 0x34], %f15 + ld [%o4 + 0x38], %f16 + ld [%o4 + 0x3c], %f17 + ld [%o4 + 0x40], %f18 + ld [%o4 + 0x44], %f19 + ld [%o4 + 0x48], %f20 + ld [%o4 + 0x4c], %f21 + ld [%o4 + 0x50], %f22 + ld [%o4 + 0x54], %f23 + ld [%o4 + 0x58], %f24 + ld [%o4 + 0x5c], %f25 + ld [%o4 + 0x60], %f26 + ld [%o4 + 0x64], %f27 + ld [%o4 + 0x68], %f28 + ld [%o4 + 0x6c], %f29 + fsrc2 %f0, %f32 + fsrc2 %f2, %f34 + ld [%o3 + 0x00], %f2 + ld [%o3 + 0x04], %f3 + fsrc2 %f4, %f36 + ld [%o3 + 0x08], %f4 + ld [%o3 + 0x0c], %f5 + fsrc2 %f6, %f38 + ld [%o3 + 0x10], %f6 + ld [%o3 + 0x14], %f7 + fsrc2 %f8, %f40 + ld [%o3 + 0x18], %f8 + ld [%o3 + 0x1c], %f9 + fsrc2 %f10, %f42 + ld [%o3 + 0x20], %f10 + ld [%o3 + 0x24], %f11 + fsrc2 %f12, %f44 + ld [%o3 + 0x28], %f12 + ld [%o3 + 0x2c], %f13 + fsrc2 %f14, %f46 + ld [%o3 + 0x30], %f14 + ld [%o3 + 0x34], %f15 + fsrc2 %f16, %f48 + ld [%o3 + 0x38], %f16 + ld [%o3 + 0x3c], %f17 + fsrc2 %f18, %f50 + ld [%o3 + 0x40], %f18 + ld [%o3 + 0x44], %f19 + fsrc2 %f20, %f52 + ld [%o3 + 0x48], %f20 + ld [%o3 + 0x4c], %f21 + fsrc2 %f22, %f54 + ld [%o3 + 0x50], %f22 + ld [%o3 + 0x54], %f23 + fsrc2 %f24, %f56 + ld [%o3 + 0x58], %f24 + ld [%o3 + 0x5c], %f25 + fsrc2 %f26, %f58 + ld [%o3 + 0x60], %f26 + ld [%o3 + 0x64], %f27 + fsrc2 %f28, %f60 + ld [%o3 + 0x68], %f28 + ld [%o3 + 0x6c], %f29 + ld [%o3 + 0x70], %f30 + be .Lede3_cbc_hw_decrypt + ld [%o3 + 0x74], %f31 + + cmp %o2, 0 + be,pn %icc, 3f + nop +1: ld [%o0 + 0x00], %f0 + ld [%o0 + 0x04], %f1 +2: fxor %f0, %f62, %f0 + DES_IP(0, 0) + DES_ROUND(2, 4, 0, 0) + DES_ROUND(6, 8, 0, 0) + DES_ROUND(10, 12, 0, 0) + DES_ROUND(14, 16, 0, 0) + ld [%o4 + 0x70], %f14 + ld [%o4 + 0x74], %f15 + ld [%o4 + 0x78], %f16 + ld [%o4 + 0x7c], %f17 + DES_ROUND(18, 20, 0, 0) + ld [%o5 + 0x00], %f18 + ld [%o5 + 0x04], %f19 + ld [%o5 + 0x08], %f20 + ld [%o5 + 0x0c], %f21 + DES_ROUND(22, 24, 0, 0) + ld [%o5 + 0x10], %f22 + ld [%o5 + 0x14], %f23 + ld [%o5 + 0x18], %f24 + ld [%o5 + 0x1c], %f25 + DES_ROUND(26, 28, 0, 0) + ld [%o5 + 0x20], %f26 + ld [%o5 + 0x24], %f27 + ld [%o5 + 0x28], %f28 + ld [%o5 + 0x2c], %f29 + DES_ROUND(30, 32, 0, 0) + ld [%o5 + 0x30], %f2 + ld [%o5 + 0x34], %f3 + ld [%o5 + 0x38], %f4 + ld [%o5 + 0x3c], %f5 + DES_IIP(0, 0) + DES_IP(0, 0) + DES_ROUND(16, 14, 0, 0) + ld [%o5 + 0x40], %f6 + DES_ROUND(60, 58, 0, 0) + ld [%o5 + 0x44], %f7 + DES_ROUND(56, 54, 0, 0) + ld [%o5 + 0x48], %f8 + DES_ROUND(52, 50, 0, 0) + ld [%o5 + 0x4c], %f9 + DES_ROUND(48, 46, 0, 0) + ld [%o5 + 0x50], %f10 + DES_ROUND(44, 42, 0, 0) + ld [%o5 + 0x54], %f11 + DES_ROUND(40, 38, 0, 0) + ld [%o5 + 0x58], %f12 + ld [%o5 + 0x5c], %f13 + DES_ROUND(36, 34, 0, 0) + ld [%o5 + 0x60], %f14 + ld [%o5 + 0x64], %f15 + ld [%o5 + 0x68], %f16 + ld [%o5 + 0x6c], %f17 + DES_IIP(0, 0) + DES_IP(0, 0) + DES_ROUND(18, 20, 0, 0) + ld [%o5 + 0x70], %f18 + ld [%o5 + 0x74], %f19 + ld [%o5 + 0x78], %f20 + ld [%o5 + 0x7c], %f21 + DES_ROUND(22, 24, 0, 0) + ld [%o3 + 0x50], %f22 + ld [%o3 + 0x54], %f23 + ld [%o3 + 0x58], %f24 + ld [%o3 + 0x5c], %f25 + DES_ROUND(26, 28, 0, 0) + ld [%o3 + 0x60], %f26 + ld [%o3 + 0x64], %f27 + ld [%o3 + 0x68], %f28 + ld [%o3 + 0x6c], %f29 + DES_ROUND(2, 4, 0, 0) + ld [%o3 + 0x00], %f2 + ld [%o3 + 0x04], %f3 + ld [%o3 + 0x08], %f4 + ld [%o3 + 0x0c], %f5 + DES_ROUND(6, 8, 0, 0) + ld [%o3 + 0x10], %f6 + ld [%o3 + 0x14], %f7 + ld [%o3 + 0x18], %f8 + ld [%o3 + 0x1c], %f9 + DES_ROUND(10, 12, 0, 0) + ld [%o3 + 0x20], %f10 + ld [%o3 + 0x24], %f11 + ld [%o3 + 0x28], %f12 + ld [%o3 + 0x2c], %f13 + DES_ROUND(14, 16, 0, 0) + ld [%o3 + 0x30], %f14 + ld [%o3 + 0x34], %f15 + ld [%o3 + 0x38], %f16 + ld [%o3 + 0x3c], %f17 + DES_ROUND(18, 20, 0, 0) + ld [%o3 + 0x40], %f18 + ld [%o3 + 0x44], %f19 + ld [%o3 + 0x48], %f20 + ld [%o3 + 0x4c], %f21 + DES_IIP(0, 0) + fsrc2 %f0, %f62 + st %f0, [%o1 + 0x00] + st %f1, [%o1 + 0x04] + add %o0, 0x08, %o0 + subcc %o2, 0x08, %o2 + bne,pt %icc, 1b + add %o1, 0x08, %o1 + +3: cmp %g1, 0 + be,pt %icc, .Lede3_cbc_hw_finish + mov 64, %o2 + lduw [%o0 + 0x00], %g2 + sll %g1, 3, %g1 + lduw [%o0 + 0x04], %g3 + sub %o2, %g1, %o2 + sllx %g2, 32, %g2 + or %g2, %g3, %g3 + srlx %g3, %o2, %g3 + sllx %g3, %o2, %g3 + MOVXTOD_G3_F0 + mov 0, %g1 + ba,pt %icc, 2b + mov 0x08, %o2 + +.Lede3_cbc_hw_finish: + LDPTR [%sp+BIAS+ARG0+6*ARGSZ], %g3 + st %f0, [%g3 + 0x00] + retl + st %f1, [%g3 + 0x04] + +#define CBC_HW_DECRYPT \ + DES_IP(0, 0); \ + DES_ROUND(32, 30, 0, 0); \ + DES_ROUND(28, 26, 0, 0); \ + ld [%o5 + 0x50], %f26; \ + ld [%o5 + 0x54], %f27; \ + ld [%o5 + 0x58], %f28; \ + ld [%o5 + 0x5c], %f29; \ + DES_ROUND(24, 22, 0, 0); \ + ld [%o5 + 0x60], %f22; \ + ld [%o5 + 0x64], %f23; \ + ld [%o5 + 0x68], %f24; \ + ld [%o5 + 0x6c], %f25; \ + DES_ROUND(20, 18, 0, 0); \ + ld [%o5 + 0x70], %f18; \ + ld [%o5 + 0x74], %f19; \ + ld [%o5 + 0x78], %f20; \ + ld [%o5 + 0x7c], %f21; \ + DES_ROUND(16, 14, 0, 0); \ + ld [%o4 + 0x70], %f14; \ + ld [%o4 + 0x74], %f15; \ + ld [%o4 + 0x78], %f16; \ + ld [%o4 + 0x7c], %f17; \ + DES_ROUND(12, 10, 0, 0); \ + ld [%o5 + 0x20], %f10; \ + ld [%o5 + 0x24], %f11; \ + ld [%o5 + 0x28], %f12; \ + ld [%o5 + 0x2c], %f13; \ + DES_ROUND(8, 6, 0, 0); \ + ld [%o5 + 0x30], %f6; \ + ld [%o5 + 0x34], %f7; \ + ld [%o5 + 0x38], %f8; \ + ld [%o5 + 0x3c], %f9; \ + DES_ROUND(4, 2, 0, 0); \ + ld [%o5 + 0x40], %f2; \ + ld [%o5 + 0x44], %f3; \ + ld [%o5 + 0x48], %f4; \ + ld [%o5 + 0x4c], %f5; \ + DES_IIP(0, 0); \ + DES_IP(0, 0); \ + DES_ROUND(34, 36, 0, 0); \ + DES_ROUND(38, 40, 0, 0); \ + DES_ROUND(42, 44, 0, 0); \ + DES_ROUND(46, 48, 0, 0); \ + DES_ROUND(50, 52, 0, 0); \ + DES_ROUND(54, 56, 0, 0); \ + DES_ROUND(58, 60, 0, 0); \ + DES_ROUND(14, 16, 0, 0); \ + ld [%o5 + 0x10], %f14; \ + ld [%o5 + 0x14], %f15; \ + ld [%o5 + 0x18], %f16; \ + ld [%o5 + 0x1c], %f17; \ + DES_IIP(0, 0); \ + DES_IP(0, 0); \ + DES_ROUND(20, 18, 0, 0); \ + ld [%o5 + 0x00], %f18; \ + ld [%o5 + 0x04], %f19; \ + ld [%o5 + 0x08], %f20; \ + ld [%o5 + 0x0c], %f21; \ + DES_ROUND(24, 22, 0, 0); \ + ld [%o3 + 0x50], %f22; \ + ld [%o3 + 0x54], %f23; \ + ld [%o3 + 0x58], %f24; \ + ld [%o3 + 0x5c], %f25; \ + DES_ROUND(28, 26, 0, 0); \ + ld [%o3 + 0x60], %f26; \ + ld [%o3 + 0x64], %f27; \ + ld [%o3 + 0x68], %f28; \ + ld [%o3 + 0x6c], %f29; \ + DES_ROUND(4, 2, 0, 0); \ + ld [%o3 + 0x00], %f2; \ + ld [%o3 + 0x04], %f3; \ + ld [%o3 + 0x08], %f4; \ + ld [%o3 + 0x0c], %f5; \ + DES_ROUND(8, 6, 0, 0); \ + ld [%o3 + 0x10], %f6; \ + ld [%o3 + 0x14], %f7; \ + ld [%o3 + 0x18], %f8; \ + ld [%o3 + 0x1c], %f9; \ + DES_ROUND(12, 10, 0, 0); \ + ld [%o3 + 0x20], %f10; \ + ld [%o3 + 0x24], %f11; \ + ld [%o3 + 0x28], %f12; \ + ld [%o3 + 0x2c], %f13; \ + DES_ROUND(16, 14, 0, 0); \ + ld [%o3 + 0x30], %f14; \ + ld [%o3 + 0x34], %f15; \ + ld [%o3 + 0x38], %f16; \ + ld [%o3 + 0x3c], %f17; \ + DES_ROUND(20, 18, 0, 0); \ + ld [%o3 + 0x40], %f18; \ + ld [%o3 + 0x44], %f19; \ + ld [%o3 + 0x48], %f20; \ + ld [%o3 + 0x4c], %f21; \ + DES_IIP(0, 0) + +.Lede3_cbc_hw_decrypt: + cmp %o2, 0 + be,pn %icc, 3f + nop +1: lduw [%o0 + 0x00], %g3 + lduw [%o0 + 0x04], %g2 + sllx %g3, 32, %g3 + or %g3, %g2, %g3 + MOVXTOD_G3_F0 + + CBC_HW_DECRYPT + + fxor %f0, %f62, %f0 + MOVXTOD_G3_F62 + st %f0, [%o1 + 0x00] + st %f1, [%o1 + 0x04] + add %o0, 0x08, %o0 + subcc %o2, 0x08, %o2 + bne,pt %icc, 1b + add %o1, 0x08, %o1 + +3: cmp %g1, 0 + be,pt %icc, 4f + mov 0xff, %o2 + lduw [%o0 + 0x00], %g3 + lduw [%o0 + 0x04], %g2 + sllx %g3, 32, %g3 + or %g3, %g2, %g3 + MOVXTOD_G3_F0 + + CBC_HW_DECRYPT + + fxor %f0, %f62, %f0 + MOVXTOD_G3_F62 + mov 8, %g2 + sll %g1, 3, %g1 + sub %g2, %g1, %g2 + sll %o2, %g2, %o2 + stda %f0, [%o1 + %o2] 0xc0 +4: ba,pt %icc, .Lede3_cbc_hw_finish + fsrc2 %f62, %f0 + +.Lede3_cbc_encrypt_software: +#endif save %sp, FRAME, %sp define({KS1}, { [%sp+BIAS+ARG0+3*ARGSZ] }) @@ -1879,6 +2763,80 @@ DES_ede3_cbc_encrypt_asm: .DES_ede3_cbc_encrypt_asm.end: .size DES_ede3_cbc_encrypt_asm,.DES_ede3_cbc_encrypt_asm.end-DES_ede3_cbc_encrypt_asm +#ifdef __sparc_v9__ + .align 32 + .global DES_sparc_hw_key_expand + .type DES_sparc_hw_key_expand,#function +DES_sparc_hw_key_expand: + /* %o0=input_key, %o1=output_key */ + andcc %o0, 0x7, %g0 + be,pt %icc, 1f + nop + alignaddr %o0, %g0, %g1 + ldd [%g1 + 0x00], %f4 + ldd [%g1 + 0x08], %f6 + ba,pt %icc, 2f + faligndata %f4, %f6, %f0 +1: ldd [%o0 + 0x00], %f0 +2: DES_KEXPAND(0, 0, 0) + DES_KEXPAND(0, 1, 2) + DES_KEXPAND(2, 3, 6) + DES_KEXPAND(2, 2, 4) + DES_KEXPAND(6, 3, 10) + DES_KEXPAND(6, 2, 8) + DES_KEXPAND(10, 3, 14) + DES_KEXPAND(10, 2, 12) + DES_KEXPAND(14, 1, 16) + DES_KEXPAND(16, 3, 20) + DES_KEXPAND(16, 2, 18) + DES_KEXPAND(20, 3, 24) + DES_KEXPAND(20, 2, 22) + DES_KEXPAND(24, 3, 28) + DES_KEXPAND(24, 2, 26) + DES_KEXPAND(28, 1, 30) + st %f0, [%o1 + 0x00] + st %f1, [%o1 + 0x04] + st %f2, [%o1 + 0x08] + st %f3, [%o1 + 0x0c] + st %f4, [%o1 + 0x10] + st %f5, [%o1 + 0x14] + st %f6, [%o1 + 0x18] + st %f7, [%o1 + 0x1c] + st %f8, [%o1 + 0x20] + st %f9, [%o1 + 0x24] + st %f10, [%o1 + 0x28] + st %f11, [%o1 + 0x2c] + st %f12, [%o1 + 0x30] + st %f13, [%o1 + 0x34] + st %f14, [%o1 + 0x38] + st %f15, [%o1 + 0x3c] + st %f16, [%o1 + 0x40] + st %f17, [%o1 + 0x44] + st %f18, [%o1 + 0x48] + st %f19, [%o1 + 0x4c] + st %f20, [%o1 + 0x50] + st %f21, [%o1 + 0x54] + st %f22, [%o1 + 0x58] + st %f23, [%o1 + 0x5c] + st %f24, [%o1 + 0x60] + st %f25, [%o1 + 0x64] + st %f26, [%o1 + 0x68] + st %f27, [%o1 + 0x6c] + st %f28, [%o1 + 0x70] + st %f29, [%o1 + 0x74] + st %f30, [%o1 + 0x78] + retl + st %f31, [%o1 + 0x7c] + .size DES_sparc_hw_key_expand,.-DES_sparc_hw_key_expand + + .align 32 + .global DES_sparc_hw_fcrypt_body + .type DES_sparc_hw_fcrypt_body,#function +DES_sparc_hw_fcrypt_body: + /* %o0=out, %o1=ks, %o2=Eswap0, %o3=Eswap1 */ + .size DES_sparc_hw_fcrypt_body,.-DES_sparc_hw_fcrypt_body +#endif + .align 256 .type .des_and,#object .size .des_and,284 diff --git a/crypto/des/des_sparccore.c b/crypto/des/des_sparccore.c index 4aaa3c7..cab508d 100644 --- a/crypto/des/des_sparccore.c +++ b/crypto/des/des_sparccore.c @@ -1,6 +1,7 @@ #include <openssl/crypto.h> #include "des_locl.h" +#include "sparc_arch.h" extern void DES_ncbc_encrypt_asm(const DES_LONG *in, DES_LONG *out, long length, DES_key_schedule *ks, DES_LONG *ivec, int enc); @@ -153,3 +154,53 @@ void DES_ede3_cbc_encrypt(const unsigned char *in, unsigned char *out, if (aligned_ivec == ivb) memcpy(ivec, ivb, sizeof(*ivec)); } + +#define DES_set_key DES_set_key_generic +int DES_set_key_generic(const_DES_cblock *key, DES_key_schedule *schedule); + +#define DES_set_key_unchecked DES_set_key_unchecked_generic +void DES_set_key_unchecked_generic(const_DES_cblock *key, DES_key_schedule *ks); + +#define DES_set_key_checked DES_set_key_checked_generic +int DES_set_key_checked_generic(const_DES_cblock *key, DES_key_schedule *ks); + +#include "set_key.c" +#include "fcrypt.c" + +#undef DES_set_key +#undef DES_set_key_unchecked +#undef DES_set_key_checked + +#ifdef __sparc_v9__ +extern void DES_sparc_hw_key_expand(const_DES_cblock *key, DES_key_schedule *schedule); +#endif + +void DES_set_key_unchecked(const_DES_cblock *key, DES_key_schedule *ks) +{ +#ifdef __sparc_v9__ + if (OPENSSL_sparcv9cap_P & SPARCV9_DES) + DES_sparc_hw_key_expand(key, ks); + else +#endif + DES_set_key_unchecked_generic(key, ks); +} + +int DES_set_key_checked(const_DES_cblock *key, DES_key_schedule *ks) +{ + if (!DES_check_key_parity(key)) + return(-1); + if (DES_is_weak_key(key)) + return(-2); + DES_set_key_unchecked(key, ks); + return 0; +} + +int DES_set_key(const_DES_cblock *key, DES_key_schedule *ks) +{ + if (DES_check_key) { + return DES_set_key_checked(key, ks); + } else { + DES_set_key_unchecked(key, ks); + return 0; + } +} diff --git a/crypto/sparc_arch.h b/crypto/sparc_arch.h index fe9805d..1a02cf1 100644 --- a/crypto/sparc_arch.h +++ b/crypto/sparc_arch.h @@ -61,6 +61,15 @@ extern int OPENSSL_sparcv9cap_P; #define AES_KEXPAND2(a,b,c) \ .word (F3F(2, 0x36, 0x131)|RS1(a)|RS2(b)|RD(c)); +#define DES_IP(a,b) \ + .word (F3F(2, 0x36, 0x134)|RS1(a)|RD(b)); +#define DES_IIP(a,b) \ + .word (F3F(2, 0x36, 0x135)|RS1(a)|RD(b)); +#define DES_KEXPAND(a,b,c) \ + .word (F3F(2, 0x36, 0x136)|RS1(a)|IMM5_0(b)|RD(c)); +#define DES_ROUND(a,b,c,d) \ + .word (F3F(2, 0x19, 0x009)|RS1(a)|RS2(b)|RS3(c)|RD(d)); + #define CAMELLIA_F(a,b,c,d) \ .word (F3F(2, 0x19, 0x00c)|RS1(a)|RS2(b)|RS3(c)|RD(d)); #define CAMELLIA_FL(a,b,c) \ @@ -90,6 +99,8 @@ extern int OPENSSL_sparcv9cap_P; .word 0x89b0230d; #define MOVXTOD_O5_F6 \ .word 0x8db0230d; +#define MOVXTOD_G3_F62 \ + .word 0xbfb02303; #ifdef __PIC__ #define SPARC_PIC_THUNK(reg) \ -- 1.7.10.4 ______________________________________________________________________ OpenSSL Project http://www.openssl.org Development Mailing List openssl-dev@openssl.org Automated List Manager majord...@openssl.org