The biggest trick here is providing the mechanism necessary to expand
the key properly.

The DES opcodes expect the expanded key to be in a different format
than the generic openssl DES code does.

So we use some include and CPP define trickey so that we can override
the key expansion in the cases in which we need to.

Note in particular that we can't really use the DES opcodes for the
fcrypt implementation, so we arrange things so that the generic key
expansion is forced in that case.

On a SPARC T4-2, first with crypto opcodes disabled:

type             16 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes
des cbc          54342.76k    56258.03k    57101.95k    57558.36k    57636.18k
des ede3         19990.29k    20679.25k    20837.29k    20877.31k    20889.60k

and with them turned on:

type             16 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes
des cbc         341439.34k   443975.51k   468697.94k   482163.84k   491296.09k
des ede3        130192.06k   173126.76k   185801.81k   192443.16k   193732.61k

Signed-off-by: David S. Miller <da...@davemloft.net>
---
 Configure                  |   10 +-
 crypto/des/Makefile        |    2 +-
 crypto/des/asm/des_enc.m4  |  968 +++++++++++++++++++++++++++++++++++++++++++-
 crypto/des/des_sparccore.c |   51 +++
 crypto/sparc_arch.h        |   11 +
 5 files changed, 1031 insertions(+), 11 deletions(-)

diff --git a/Configure b/Configure
index 74bc534..1a3d855 100755
--- a/Configure
+++ b/Configure
@@ -124,14 +124,14 @@ my $tlib="-lnsl -lsocket";
 my $bits1="THIRTY_TWO_BIT ";
 my $bits2="SIXTY_FOUR_BIT ";
 
-my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o x86-gf2m.o:des-586.o 
crypt586.o set_key.o:aes-586.o vpaes-x86.o 
aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o 
sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o 
wp-mmx.o:cmll-x86.o:ghash-x86.o:e_padlock-x86.o";
+my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o x86-gf2m.o:des-586.o 
crypt586.o set_key.o fcrypt.o:aes-586.o vpaes-x86.o 
aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o 
sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o 
wp-mmx.o:cmll-x86.o:ghash-x86.o:e_padlock-x86.o";
 
 my $x86_elf_asm="$x86_asm:elf";
 
 my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o 
x86_64-gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o 
aesni-x86_64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o 
sha512-x86_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o 
cmll_misc.o:ghash-x86_64.o:e_padlock-x86_64.o";
 my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o 
aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o 
rc4_skey.o:::::ghash-ia64.o::void";
-my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o 
sparcv9a-mont.o:des_enc-sparc.o des_sparccore.o fcrypt_b.o 
set_key.o:aes_sparccore.o aes-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o 
sha256-sparcv9.o sha512-sparcv9.o::::::cmll-sparcv9.o 
cmll_sparccore.o:ghash-sparcv9.o::void";
-my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o des_sparccore.o fcrypt_b.o 
set_key.o:::::::::::::void";
+my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o 
sparcv9a-mont.o:des_enc-sparc.o des_sparccore.o fcrypt_b.o:aes_sparccore.o 
aes-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o 
sha512-sparcv9.o::::::cmll-sparcv9.o cmll_sparccore.o:ghash-sparcv9.o::void";
+my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o des_sparccore.o 
fcrypt_b.o:::::::::::::void";
 my $alpha_asm="alphacpuid.o:bn_asm.o 
alpha-mont.o:::::sha1-alpha.o:::::::ghash-alpha.o::void";
 my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o 
sha256-mips.o sha512-mips.o::::::::";
 my $mips32_asm=$mips64_asm; $mips32_asm =~ s/\s*sha512\-mips\.o//;
@@ -190,7 +190,7 @@ my %table=(
 "debug-linux-ppro","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG 
-DCRYPTO_MDEBUG -DL_ENDIAN -DTERMIO -g -mcpu=pentiumpro 
-Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} 
${x86_gcc_opts}:${x86_elf_asm}:dlfcn",
 "debug-linux-elf","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG 
-DCRYPTO_MDEBUG -DL_ENDIAN -DTERMIO -g -march=i486 
-Wall::-D_REENTRANT::-lefence -ldl:BN_LLONG ${x86_gcc_des} 
${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 "debug-linux-elf-noefence","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG 
-DBN_CTX_DEBUG -DCRYPTO_MDEBUG -DL_ENDIAN -DTERMIO -g -march=i486 
-Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} 
${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
-"debug-linux-ia32-aes", "gcc:-DAES_EXPERIMENTAL -DL_ENDIAN -DTERMIO -O3 
-fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} 
${x86_gcc_opts}:x86cpuid.o:bn-586.o co-586.o x86-mont.o:des-586.o crypt586.o 
set_key.o:aes_x86core.o aes_cbc.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o 
sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o 
wp-mmx.o::ghash-x86.o:e_padlock-x86.o:elf:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+"debug-linux-ia32-aes", "gcc:-DAES_EXPERIMENTAL -DL_ENDIAN -DTERMIO -O3 
-fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} 
${x86_gcc_opts}:x86cpuid.o:bn-586.o co-586.o x86-mont.o:des-586.o crypt586.o 
set_key.o fcrypt.o:aes_x86core.o aes_cbc.o 
aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o 
sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o 
wp-mmx.o::ghash-x86.o:e_padlock-x86.o:elf:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 "debug-linux-generic32","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG 
-DCRYPTO_MDEBUG -DTERMIO -g -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR 
RC4_CHUNK DES_INT DES_UNROLL 
BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 "debug-linux-generic64","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG 
-DCRYPTO_MDEBUG -DTERMIO -g -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG 
RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL 
BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 "debug-linux-x86_64","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG 
-m64 -DL_ENDIAN -DTERMIO -g -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG 
RC4_CHUNK DES_INT 
DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64",
@@ -698,7 +698,7 @@ my $idea    ="crypto/idea/idea.h";
 my $rc2        ="crypto/rc2/rc2.h";
 my $bf ="crypto/bf/bf_locl.h";
 my $bn_asm     ="bn_asm.o";
-my $des_enc="des_enc.o fcrypt_b.o set_key.o";
+my $des_enc="des_enc.o fcrypt.o fcrypt_b.o set_key.o";
 my $aes_enc="aes_core.o aes_cbc.o";
 my $bf_enc     ="bf_enc.o";
 my $cast_enc="c_enc.o";
diff --git a/crypto/des/Makefile b/crypto/des/Makefile
index 893d2e6..b3f7797 100644
--- a/crypto/des/Makefile
+++ b/crypto/des/Makefile
@@ -36,7 +36,7 @@ LIBOBJ= ecb_enc.o  cbc_enc.o \
        enc_read.o enc_writ.o ofb64enc.o \
        ofb_enc.o  str2key.o  pcbc_enc.o qud_cksm.o rand_key.o \
        ${DES_ENC} \
-       fcrypt.o xcbc_enc.o rpc_enc.o  cbc_cksm.o \
+       xcbc_enc.o rpc_enc.o  cbc_cksm.o \
        ede_cbcm_enc.o des_old.o des_old2.o read2pwd.o
 
 SRC= $(LIBSRC)
diff --git a/crypto/des/asm/des_enc.m4 b/crypto/des/asm/des_enc.m4
index a69273c..06964b0 100644
--- a/crypto/des/asm/des_enc.m4
+++ b/crypto/des/asm/des_enc.m4
@@ -47,6 +47,7 @@
 .file  "des_enc-sparc.S"
 
 #include <openssl/opensslconf.h>
+#include "sparc_arch.h"
 
 #ifdef OPENSSL_FIPSCANISTER
 #include <openssl/fipssyms.h>
@@ -1125,6 +1126,8 @@ define(register_init, {
 
 .section       ".text"
 
+SPARC_PIC_THUNK(g2)
+
        .align 32
 
 .des_enc:
@@ -1157,7 +1160,83 @@ define(register_init, {
        .type    DES_encrypt1,#function
 
 DES_encrypt1:
-
+#ifdef __sparc_v9__
+       SPARC_LOAD_V9_CAPS_LEAF(g2, g1)
+       andcc   %g2, SPARCV9_DES, %g0
+       be      .Lencrypt1_software
+        cmp    %o2, 0
+
+       ld      [%o1 + 0x00], %f2
+       ld      [%o1 + 0x04], %f3
+       ld      [%o1 + 0x08], %f4
+       ld      [%o1 + 0x0c], %f5
+       ld      [%o1 + 0x10], %f6
+       ld      [%o1 + 0x14], %f7
+       ld      [%o1 + 0x18], %f8
+       ld      [%o1 + 0x1c], %f9
+       ld      [%o1 + 0x20], %f10
+       ld      [%o1 + 0x24], %f11
+       ld      [%o1 + 0x28], %f12
+       ld      [%o1 + 0x2c], %f13
+       ld      [%o1 + 0x30], %f14
+       ld      [%o1 + 0x34], %f15
+       ld      [%o1 + 0x38], %f16
+       ld      [%o1 + 0x3c], %f17
+       ld      [%o1 + 0x40], %f18
+       ld      [%o1 + 0x44], %f19
+       ld      [%o1 + 0x48], %f20
+       ld      [%o1 + 0x4c], %f21
+       ld      [%o1 + 0x50], %f22
+       ld      [%o1 + 0x54], %f23
+       ld      [%o1 + 0x58], %f24
+       ld      [%o1 + 0x5c], %f25
+       ld      [%o1 + 0x60], %f26
+       ld      [%o1 + 0x64], %f27
+       ld      [%o1 + 0x68], %f28
+       ld      [%o1 + 0x6c], %f29
+       ld      [%o1 + 0x70], %f30
+       ld      [%o1 + 0x74], %f31
+       ld      [%o1 + 0x78], %f0
+       ld      [%o1 + 0x7c], %f1
+       fsrc2   %f0, %f32
+       lda     [%o0] 0x88, %f0
+       add     %o0, 0x4, %o5
+       be      .Ldes_decrypt1
+        lda    [%o5] 0x88, %f1
+
+       DES_IP(0, 0)
+       DES_ROUND(2, 4, 0, 0)
+       DES_ROUND(6, 8, 0, 0)
+       DES_ROUND(10, 12, 0, 0)
+       DES_ROUND(14, 16, 0, 0)
+       DES_ROUND(18, 20, 0, 0)
+       DES_ROUND(22, 24, 0, 0)
+       DES_ROUND(26, 28, 0, 0)
+       DES_ROUND(30, 32, 0, 0)
+       DES_IIP(0, 0)
+
+.Lfinish_encrypt1:
+       sta     %f0, [%o0] 0x88
+       add     %o0, 0x4, %o5
+       retl
+        sta    %f1, [%o5] 0x88
+
+.Ldes_decrypt1:
+       DES_IP(0, 0)
+       DES_ROUND(32, 30, 0, 0)
+       DES_ROUND(28, 26, 0, 0)
+       DES_ROUND(24, 22, 0, 0)
+       DES_ROUND(20, 18, 0, 0)
+       DES_ROUND(16, 14, 0, 0)
+       DES_ROUND(12, 10, 0, 0)
+       DES_ROUND(8, 6, 0, 0)
+       DES_ROUND(4, 2, 0, 0)
+       DES_IIP(0, 0)
+       ba,pt   %icc, .Lfinish_encrypt1
+        nop
+
+.Lencrypt1_software:
+#endif
        save    %sp, FRAME, %sp
 
        sethi   %hi(.PIC.DES_SPtrans-1f),global1
@@ -1336,7 +1415,157 @@ DES_encrypt2:
        .type    DES_encrypt3,#function
 
 DES_encrypt3:
-
+#ifdef __sparc_v9__
+       SPARC_LOAD_V9_CAPS_LEAF(g2, g1)
+       andcc   %g2, SPARCV9_DES, %g0
+       be      .Lencrypt3_software
+        nop
+
+       lda     [%o0] 0x88, %f0
+       add     %o0, 0x4, %o5
+       lda     [%o5] 0x88, %f1
+
+       DES_IP(0, 32)
+
+       ld      [%o1 + 0x00], %f0
+       ld      [%o1 + 0x04], %f1
+       ld      [%o1 + 0x08], %f2
+       ld      [%o1 + 0x0c], %f3
+       ld      [%o1 + 0x10], %f4
+       ld      [%o1 + 0x14], %f5
+       ld      [%o1 + 0x18], %f6
+       ld      [%o1 + 0x1c], %f7
+       ld      [%o1 + 0x20], %f8
+       ld      [%o1 + 0x24], %f9
+       ld      [%o1 + 0x28], %f10
+       ld      [%o1 + 0x2c], %f11
+       ld      [%o1 + 0x30], %f12
+       ld      [%o1 + 0x34], %f13
+       ld      [%o1 + 0x38], %f14
+       ld      [%o1 + 0x3c], %f15
+       ld      [%o1 + 0x40], %f16
+       ld      [%o1 + 0x44], %f17
+       ld      [%o1 + 0x48], %f18
+       ld      [%o1 + 0x4c], %f19
+       ld      [%o1 + 0x50], %f20
+       ld      [%o1 + 0x54], %f21
+       ld      [%o1 + 0x58], %f22
+       ld      [%o1 + 0x5c], %f23
+       ld      [%o1 + 0x60], %f24
+       ld      [%o1 + 0x64], %f25
+       ld      [%o1 + 0x68], %f26
+       ld      [%o1 + 0x6c], %f27
+       ld      [%o1 + 0x70], %f28
+       ld      [%o1 + 0x74], %f29
+       ld      [%o1 + 0x78], %f30
+       ld      [%o1 + 0x7c], %f31
+
+       DES_ROUND(0, 2, 32, 32)
+       ld      [%o2 + 0x00], %f0
+       ld      [%o2 + 0x04], %f1
+       ld      [%o2 + 0x08], %f2
+       ld      [%o2 + 0x0c], %f3
+       DES_ROUND(4, 6, 32, 32)
+       ld      [%o2 + 0x10], %f4
+       ld      [%o2 + 0x14], %f5
+       ld      [%o2 + 0x18], %f6
+       ld      [%o2 + 0x1c], %f7
+       DES_ROUND(8, 10, 32, 32)
+       ld      [%o2 + 0x20], %f8
+       ld      [%o2 + 0x24], %f9
+       ld      [%o2 + 0x28], %f10
+       ld      [%o2 + 0x2c], %f11
+       DES_ROUND(12, 14, 32, 32)
+       ld      [%o2 + 0x30], %f12
+       ld      [%o2 + 0x34], %f13
+       ld      [%o2 + 0x38], %f14
+       ld      [%o2 + 0x3c], %f15
+       DES_ROUND(16, 18, 32, 32)
+       ld      [%o2 + 0x40], %f16
+       ld      [%o2 + 0x44], %f17
+       ld      [%o2 + 0x48], %f18
+       ld      [%o2 + 0x4c], %f19
+       DES_ROUND(20, 22, 32, 32)
+       ld      [%o2 + 0x50], %f20
+       ld      [%o2 + 0x54], %f21
+       ld      [%o2 + 0x58], %f22
+       ld      [%o2 + 0x5c], %f23
+       DES_ROUND(24, 26, 32, 32)
+       ld      [%o2 + 0x60], %f24
+       ld      [%o2 + 0x64], %f25
+       ld      [%o2 + 0x68], %f26
+       ld      [%o2 + 0x6c], %f27
+       DES_ROUND(28, 30, 32, 32)
+       ld      [%o2 + 0x70], %f28
+       ld      [%o2 + 0x74], %f29
+       ld      [%o2 + 0x78], %f30
+       ld      [%o2 + 0x7c], %f31
+
+       DES_IIP(32, 32)
+       DES_IP(32, 32)
+
+       DES_ROUND(30, 28, 32, 32)
+       ld      [%o3 + 0x70], %f28
+       ld      [%o3 + 0x74], %f29
+       ld      [%o3 + 0x78], %f30
+       ld      [%o3 + 0x7c], %f31
+       DES_ROUND(26, 24, 32, 32)
+       ld      [%o3 + 0x60], %f24
+       ld      [%o3 + 0x64], %f25
+       ld      [%o3 + 0x68], %f26
+       ld      [%o3 + 0x6c], %f27
+       DES_ROUND(22, 20, 32, 32)
+       ld      [%o3 + 0x50], %f20
+       ld      [%o3 + 0x54], %f21
+       ld      [%o3 + 0x58], %f22
+       ld      [%o3 + 0x5c], %f23
+       DES_ROUND(18, 16, 32, 32)
+       ld      [%o3 + 0x40], %f16
+       ld      [%o3 + 0x44], %f17
+       ld      [%o3 + 0x48], %f18
+       ld      [%o3 + 0x4c], %f19
+       DES_ROUND(14, 12, 32, 32)
+       ld      [%o3 + 0x30], %f12
+       ld      [%o3 + 0x34], %f13
+       ld      [%o3 + 0x38], %f14
+       ld      [%o3 + 0x3c], %f15
+       DES_ROUND(10, 8, 32, 32)
+       ld      [%o3 + 0x20], %f8
+       ld      [%o3 + 0x24], %f9
+       ld      [%o3 + 0x28], %f10
+       ld      [%o3 + 0x2c], %f11
+       DES_ROUND(6, 4, 32, 32)
+       ld      [%o3 + 0x10], %f4
+       ld      [%o3 + 0x14], %f5
+       ld      [%o3 + 0x18], %f6
+       ld      [%o3 + 0x1c], %f7
+       DES_ROUND(2, 0, 32, 32)
+       ld      [%o3 + 0x00], %f0
+       ld      [%o3 + 0x04], %f1
+       ld      [%o3 + 0x08], %f2
+       ld      [%o3 + 0x0c], %f3
+
+       DES_IIP(32, 32)
+       DES_IP(32, 32)
+
+       DES_ROUND(0, 2, 32, 32)
+       DES_ROUND(4, 6, 32, 32)
+       DES_ROUND(8, 10, 32, 32)
+       DES_ROUND(12, 14, 32, 32)
+       DES_ROUND(16, 18, 32, 32)
+       DES_ROUND(20, 22, 32, 32)
+       DES_ROUND(24, 26, 32, 32)
+       DES_ROUND(28, 30, 32, 32)
+
+       DES_IIP(32, 0)
+
+       sta     %f0, [%o0] 0x88
+       add     %o0, 0x4, %o5
+       retl
+        sta    %f1, [%o5] 0x88
+
+.Lencrypt3_software:
+#endif
        save    %sp, FRAME, %sp
        
        sethi   %hi(.PIC.DES_SPtrans-1f),global1
@@ -1381,7 +1610,156 @@ DES_encrypt3:
        .type    DES_decrypt3,#function
 
 DES_decrypt3:
-
+#ifdef __sparc_v9__
+       SPARC_LOAD_V9_CAPS_LEAF(g2, g1)
+       andcc   %g2, SPARCV9_DES, %g0
+       be      .Ldecrypt3_software
+        nop
+
+       lda     [%o0] 0x88, %f0
+       add     %o0, 0x4, %o5
+       lda     [%o5] 0x88, %f1
+       DES_IP(0, 32)
+
+       ld      [%o3 + 0x00], %f0
+       ld      [%o3 + 0x04], %f1
+       ld      [%o3 + 0x08], %f2
+       ld      [%o3 + 0x0c], %f3
+       ld      [%o3 + 0x10], %f4
+       ld      [%o3 + 0x14], %f5
+       ld      [%o3 + 0x18], %f6
+       ld      [%o3 + 0x1c], %f7
+       ld      [%o3 + 0x20], %f8
+       ld      [%o3 + 0x24], %f9
+       ld      [%o3 + 0x28], %f10
+       ld      [%o3 + 0x2c], %f11
+       ld      [%o3 + 0x30], %f12
+       ld      [%o3 + 0x34], %f13
+       ld      [%o3 + 0x38], %f14
+       ld      [%o3 + 0x3c], %f15
+       ld      [%o3 + 0x40], %f16
+       ld      [%o3 + 0x44], %f17
+       ld      [%o3 + 0x48], %f18
+       ld      [%o3 + 0x4c], %f19
+       ld      [%o3 + 0x50], %f20
+       ld      [%o3 + 0x54], %f21
+       ld      [%o3 + 0x58], %f22
+       ld      [%o3 + 0x5c], %f23
+       ld      [%o3 + 0x60], %f24
+       ld      [%o3 + 0x64], %f25
+       ld      [%o3 + 0x68], %f26
+       ld      [%o3 + 0x6c], %f27
+       ld      [%o3 + 0x70], %f28
+       ld      [%o3 + 0x74], %f29
+       ld      [%o3 + 0x78], %f30
+       ld      [%o3 + 0x7c], %f31
+
+       DES_ROUND(30, 28, 32, 32)
+       ld      [%o2 + 0x70], %f28
+       ld      [%o2 + 0x74], %f29
+       ld      [%o2 + 0x78], %f30
+       ld      [%o2 + 0x7c], %f31
+       DES_ROUND(26, 24, 32, 32)
+       ld      [%o2 + 0x60], %f24
+       ld      [%o2 + 0x64], %f25
+       ld      [%o2 + 0x68], %f26
+       ld      [%o2 + 0x6c], %f27
+       DES_ROUND(22, 20, 32, 32)
+       ld      [%o2 + 0x50], %f20
+       ld      [%o2 + 0x54], %f21
+       ld      [%o2 + 0x58], %f22
+       ld      [%o2 + 0x5c], %f23
+       DES_ROUND(18, 16, 32, 32)
+       ld      [%o2 + 0x40], %f16
+       ld      [%o2 + 0x44], %f17
+       ld      [%o2 + 0x48], %f18
+       ld      [%o2 + 0x4c], %f19
+       DES_ROUND(14, 12, 32, 32)
+       ld      [%o2 + 0x30], %f12
+       ld      [%o2 + 0x34], %f13
+       ld      [%o2 + 0x38], %f14
+       ld      [%o2 + 0x3c], %f15
+       DES_ROUND(10, 8, 32, 32)
+       ld      [%o2 + 0x20], %f8
+       ld      [%o2 + 0x24], %f9
+       ld      [%o2 + 0x28], %f10
+       ld      [%o2 + 0x2c], %f11
+       DES_ROUND(6, 4, 32, 32)
+       ld      [%o2 + 0x10], %f4
+       ld      [%o2 + 0x14], %f5
+       ld      [%o2 + 0x18], %f6
+       ld      [%o2 + 0x1c], %f7
+       DES_ROUND(2, 0, 32, 32)
+       ld      [%o2 + 0x00], %f0
+       ld      [%o2 + 0x04], %f1
+       ld      [%o2 + 0x08], %f2
+       ld      [%o2 + 0x0c], %f3
+
+       DES_IIP(32, 32)
+       DES_IP(32, 32)
+
+       DES_ROUND(0, 2, 32, 32)
+       ld      [%o1 + 0x00], %f0
+       ld      [%o1 + 0x04], %f1
+       ld      [%o1 + 0x08], %f2
+       ld      [%o1 + 0x0c], %f3
+       DES_ROUND(4, 6, 32, 32)
+       ld      [%o1 + 0x10], %f4
+       ld      [%o1 + 0x14], %f5
+       ld      [%o1 + 0x18], %f6
+       ld      [%o1 + 0x1c], %f7
+       DES_ROUND(8, 10, 32, 32)
+       ld      [%o1 + 0x20], %f8
+       ld      [%o1 + 0x24], %f9
+       ld      [%o1 + 0x28], %f10
+       ld      [%o1 + 0x2c], %f11
+       DES_ROUND(12, 14, 32, 32)
+       ld      [%o1 + 0x30], %f12
+       ld      [%o1 + 0x34], %f13
+       ld      [%o1 + 0x38], %f14
+       ld      [%o1 + 0x3c], %f15
+       DES_ROUND(16, 18, 32, 32)
+       ld      [%o1 + 0x40], %f16
+       ld      [%o1 + 0x44], %f17
+       ld      [%o1 + 0x48], %f18
+       ld      [%o1 + 0x4c], %f19
+       DES_ROUND(20, 22, 32, 32)
+       ld      [%o1 + 0x50], %f20
+       ld      [%o1 + 0x54], %f21
+       ld      [%o1 + 0x58], %f22
+       ld      [%o1 + 0x5c], %f23
+       DES_ROUND(24, 26, 32, 32)
+       ld      [%o1 + 0x60], %f24
+       ld      [%o1 + 0x64], %f25
+       ld      [%o1 + 0x68], %f26
+       ld      [%o1 + 0x6c], %f27
+       DES_ROUND(28, 30, 32, 32)
+       ld      [%o1 + 0x70], %f28
+       ld      [%o1 + 0x74], %f29
+       ld      [%o1 + 0x78], %f30
+       ld      [%o1 + 0x7c], %f31
+
+       DES_IIP(32, 32)
+       DES_IP(32, 32)
+
+       DES_ROUND(30, 28, 32, 32)
+       DES_ROUND(26, 24, 32, 32)
+       DES_ROUND(22, 20, 32, 32)
+       DES_ROUND(18, 16, 32, 32)
+       DES_ROUND(14, 12, 32, 32)
+       DES_ROUND(10, 8, 32, 32)
+       DES_ROUND(6, 4, 32, 32)
+       DES_ROUND(2, 0, 32, 32)
+
+       DES_IIP(32, 0)
+
+       sta     %f0, [%o0] 0x88
+       add     %o0, 0x4, %o5
+       retl
+        sta    %f1, [%o5] 0x88
+
+.Ldecrypt3_software:
+#endif
        save    %sp, FRAME, %sp
        
        sethi   %hi(.PIC.DES_SPtrans-1f),global1
@@ -1426,7 +1804,149 @@ DES_decrypt3:
        .type    DES_ncbc_encrypt_asm,#function
 
 DES_ncbc_encrypt_asm:
-
+#ifdef __sparc_v9__
+       SPARC_LOAD_V9_CAPS_LEAF(g2, g1)
+       andcc   %g2, SPARCV9_DES, %g0
+       be      .Lncbc_encrypt_software
+        cmp    %o5, 0
+
+       ld      [%o4 + 0x00], %f0
+       ld      [%o4 + 0x04], %f1
+       fsrc2   %f0, %f34
+
+       ld      [%o3 + 0x00], %f2
+       ld      [%o3 + 0x04], %f3
+       ld      [%o3 + 0x08], %f4
+       ld      [%o3 + 0x0c], %f5
+       ld      [%o3 + 0x10], %f6
+       ld      [%o3 + 0x14], %f7
+       ld      [%o3 + 0x18], %f8
+       ld      [%o3 + 0x1c], %f9
+       ld      [%o3 + 0x20], %f10
+       ld      [%o3 + 0x24], %f11
+       ld      [%o3 + 0x28], %f12
+       ld      [%o3 + 0x2c], %f13
+       ld      [%o3 + 0x30], %f14
+       ld      [%o3 + 0x34], %f15
+       ld      [%o3 + 0x38], %f16
+       ld      [%o3 + 0x3c], %f17
+       ld      [%o3 + 0x40], %f18
+       ld      [%o3 + 0x44], %f19
+       ld      [%o3 + 0x48], %f20
+       ld      [%o3 + 0x4c], %f21
+       ld      [%o3 + 0x50], %f22
+       ld      [%o3 + 0x54], %f23
+       ld      [%o3 + 0x58], %f24
+       ld      [%o3 + 0x5c], %f25
+       ld      [%o3 + 0x60], %f26
+       ld      [%o3 + 0x64], %f27
+       ld      [%o3 + 0x68], %f28
+       ld      [%o3 + 0x6c], %f29
+       ld      [%o3 + 0x70], %f30
+       ld      [%o3 + 0x74], %f31
+       ld      [%o3 + 0x78], %f0
+       and     %o2, 0x07, %g1
+       ld      [%o3 + 0x7c], %f1
+       andn    %o2, 0x07, %o2
+       be      .Lncbc_hw_decrypt
+        fsrc2  %f0, %f32
+
+       cmp     %o2, 0
+       be,pn   %icc, 3f
+        nop
+1:     ld      [%o0 + 0x00], %f0
+       ld      [%o0 + 0x04], %f1
+2:     fxor    %f0, %f34, %f0
+       DES_IP(0, 0)
+       DES_ROUND(2, 4, 0, 0)
+       DES_ROUND(6, 8, 0, 0)
+       DES_ROUND(10, 12, 0, 0)
+       DES_ROUND(14, 16, 0, 0)
+       DES_ROUND(18, 20, 0, 0)
+       DES_ROUND(22, 24, 0, 0)
+       DES_ROUND(26, 28, 0, 0)
+       DES_ROUND(30, 32, 0, 0)
+       DES_IIP(0, 0)
+       fsrc2   %f0, %f34
+       st      %f0, [%o1 + 0x00]
+       st      %f1, [%o1 + 0x04]
+       add     %o0, 0x08, %o0
+       subcc   %o2, 0x08, %o2
+       bne,pt  %icc, 1b
+        add    %o1, 0x08, %o1
+3:     cmp     %g1, 0
+       be,pt   %icc, .Lncbc_hw_finish
+        mov    64, %o5
+       lduw    [%o0 + 0x00], %g2
+       sll     %g1, 3, %g1
+       lduw    [%o0 + 0x04], %g3
+       sub     %o5, %g1, %o5
+       sllx    %g2, 32, %g2
+       or      %g2, %g3, %g3
+       srlx    %g3, %o5, %g3
+       sllx    %g3, %o5, %g3
+       MOVXTOD_G3_F0
+       mov     0, %g1
+       ba,pt   %icc, 2b
+        add    %o2, 0x08, %o2
+
+.Lncbc_hw_finish:
+       st      %f0, [%o4 + 0x00]
+       retl
+        st     %f1, [%o4 + 0x04]
+
+.Lncbc_hw_decrypt:
+       cmp     %o2, 0
+       be,pn   %icc, 3f
+        nop
+1:     ld      [%o0 + 0x00], %f0
+       ld      [%o0 + 0x04], %f1
+       fsrc2   %f0, %f36
+       DES_IP(0, 0)
+       DES_ROUND(32, 30, 0, 0)
+       DES_ROUND(28, 26, 0, 0)
+       DES_ROUND(24, 22, 0, 0)
+       DES_ROUND(20, 18, 0, 0)
+       DES_ROUND(16, 14, 0, 0)
+       DES_ROUND(12, 10, 0, 0)
+       DES_ROUND(8, 6, 0, 0)
+       DES_ROUND(4, 2, 0, 0)
+       DES_IIP(0, 0)
+       fxor    %f0, %f34, %f0
+       fsrc2   %f36, %f34
+       st      %f0, [%o1 + 0x00]
+       st      %f1, [%o1 + 0x04]
+       add     %o0, 0x08, %o0
+       subcc   %o2, 0x08, %o2
+       bne,pt  %icc, 1b
+        add    %o1, 0x08, %o1
+3:     cmp     %g1, 0
+       be,pt   %icc, 4f
+        mov    0xff, %o5
+       ld      [%o0 + 0x00], %f0
+       ld      [%o0 + 0x04], %f1
+       fsrc2   %f0, %f36
+       DES_IP(0, 0)
+       DES_ROUND(32, 30, 0, 0)
+       DES_ROUND(28, 26, 0, 0)
+       DES_ROUND(24, 22, 0, 0)
+       DES_ROUND(20, 18, 0, 0)
+       DES_ROUND(16, 14, 0, 0)
+       DES_ROUND(12, 10, 0, 0)
+       DES_ROUND(8, 6, 0, 0)
+       DES_ROUND(4, 2, 0, 0)
+       DES_IIP(0, 0)
+       fxor    %f0, %f34, %f0
+       fsrc2   %f36, %f34
+       mov     8, %g2
+       sll     %g1, 3, %g1
+       sub     %g2, %g1, %g2
+       sll     %o5, %g2, %o5
+       stda    %f0, [%o1 + %o5] 0xc0
+4:     ba,pt   %icc, .Lncbc_hw_finish
+        fsrc2  %f34, %f0
+.Lncbc_encrypt_software:
+#endif
        save    %sp, FRAME, %sp
        
        define({INPUT},  { [%sp+BIAS+ARG0+0*ARGSZ] })
@@ -1651,7 +2171,371 @@ DES_ncbc_encrypt_asm:
        .type    DES_ede3_cbc_encrypt_asm,#function
 
 DES_ede3_cbc_encrypt_asm:
-
+#ifdef __sparc_v9__
+       SPARC_LOAD_V9_CAPS_LEAF(g2, g1)
+       andcc   %g2, SPARCV9_DES, %g0
+       be      .Lede3_cbc_encrypt_software
+        and    %o2, 7, %g1
+       LDPTR   [%sp+BIAS+ARG0+7*ARGSZ], %g2
+       andn    %o2, 7, %o2
+       LDPTR   [%sp+BIAS+ARG0+6*ARGSZ], %g3
+       cmp     %g2, 0
+       ld      [%g3 + 0x00], %f0
+       ld      [%g3 + 0x04], %f1
+       bne     1f
+        fsrc2  %f0, %f62
+
+       mov     %o5, %g2
+       mov     %o3, %o5
+       mov     %g2, %o3
+
+1:     ld      [%o3 + 0x78], %f0
+       ld      [%o3 + 0x7c], %f1
+       ld      [%o4 + 0x00], %f2
+       ld      [%o4 + 0x04], %f3
+       ld      [%o4 + 0x08], %f4
+       ld      [%o4 + 0x0c], %f5
+       ld      [%o4 + 0x10], %f6
+       ld      [%o4 + 0x14], %f7
+       ld      [%o4 + 0x18], %f8
+       ld      [%o4 + 0x1c], %f9
+       ld      [%o4 + 0x20], %f10
+       ld      [%o4 + 0x24], %f11
+       ld      [%o4 + 0x28], %f12
+       ld      [%o4 + 0x2c], %f13
+       ld      [%o4 + 0x30], %f14
+       ld      [%o4 + 0x34], %f15
+       ld      [%o4 + 0x38], %f16
+       ld      [%o4 + 0x3c], %f17
+       ld      [%o4 + 0x40], %f18
+       ld      [%o4 + 0x44], %f19
+       ld      [%o4 + 0x48], %f20
+       ld      [%o4 + 0x4c], %f21
+       ld      [%o4 + 0x50], %f22
+       ld      [%o4 + 0x54], %f23
+       ld      [%o4 + 0x58], %f24
+       ld      [%o4 + 0x5c], %f25
+       ld      [%o4 + 0x60], %f26
+       ld      [%o4 + 0x64], %f27
+       ld      [%o4 + 0x68], %f28
+       ld      [%o4 + 0x6c], %f29
+       fsrc2   %f0, %f32
+       fsrc2   %f2, %f34
+       ld      [%o3 + 0x00], %f2
+       ld      [%o3 + 0x04], %f3
+       fsrc2   %f4, %f36
+       ld      [%o3 + 0x08], %f4
+       ld      [%o3 + 0x0c], %f5
+       fsrc2   %f6, %f38
+       ld      [%o3 + 0x10], %f6
+       ld      [%o3 + 0x14], %f7
+       fsrc2   %f8, %f40
+       ld      [%o3 + 0x18], %f8
+       ld      [%o3 + 0x1c], %f9
+       fsrc2   %f10, %f42
+       ld      [%o3 + 0x20], %f10
+       ld      [%o3 + 0x24], %f11
+       fsrc2   %f12, %f44
+       ld      [%o3 + 0x28], %f12
+       ld      [%o3 + 0x2c], %f13
+       fsrc2   %f14, %f46
+       ld      [%o3 + 0x30], %f14
+       ld      [%o3 + 0x34], %f15
+       fsrc2   %f16, %f48
+       ld      [%o3 + 0x38], %f16
+       ld      [%o3 + 0x3c], %f17
+       fsrc2   %f18, %f50
+       ld      [%o3 + 0x40], %f18
+       ld      [%o3 + 0x44], %f19
+       fsrc2   %f20, %f52
+       ld      [%o3 + 0x48], %f20
+       ld      [%o3 + 0x4c], %f21
+       fsrc2   %f22, %f54
+       ld      [%o3 + 0x50], %f22
+       ld      [%o3 + 0x54], %f23
+       fsrc2   %f24, %f56
+       ld      [%o3 + 0x58], %f24
+       ld      [%o3 + 0x5c], %f25
+       fsrc2   %f26, %f58
+       ld      [%o3 + 0x60], %f26
+       ld      [%o3 + 0x64], %f27
+       fsrc2   %f28, %f60
+       ld      [%o3 + 0x68], %f28
+       ld      [%o3 + 0x6c], %f29
+       ld      [%o3 + 0x70], %f30
+       be      .Lede3_cbc_hw_decrypt
+        ld     [%o3 + 0x74], %f31
+
+       cmp     %o2, 0
+       be,pn   %icc, 3f
+        nop
+1:     ld      [%o0 + 0x00], %f0
+       ld      [%o0 + 0x04], %f1
+2:     fxor    %f0, %f62, %f0
+       DES_IP(0, 0)
+       DES_ROUND(2, 4, 0, 0)
+       DES_ROUND(6, 8, 0, 0)
+       DES_ROUND(10, 12, 0, 0)
+       DES_ROUND(14, 16, 0, 0)
+       ld      [%o4 + 0x70], %f14
+       ld      [%o4 + 0x74], %f15
+       ld      [%o4 + 0x78], %f16
+       ld      [%o4 + 0x7c], %f17
+       DES_ROUND(18, 20, 0, 0)
+       ld      [%o5 + 0x00], %f18
+       ld      [%o5 + 0x04], %f19
+       ld      [%o5 + 0x08], %f20
+       ld      [%o5 + 0x0c], %f21
+       DES_ROUND(22, 24, 0, 0)
+       ld      [%o5 + 0x10], %f22
+       ld      [%o5 + 0x14], %f23
+       ld      [%o5 + 0x18], %f24
+       ld      [%o5 + 0x1c], %f25
+       DES_ROUND(26, 28, 0, 0)
+       ld      [%o5 + 0x20], %f26
+       ld      [%o5 + 0x24], %f27
+       ld      [%o5 + 0x28], %f28
+       ld      [%o5 + 0x2c], %f29
+       DES_ROUND(30, 32, 0, 0)
+       ld      [%o5 + 0x30], %f2
+       ld      [%o5 + 0x34], %f3
+       ld      [%o5 + 0x38], %f4
+       ld      [%o5 + 0x3c], %f5
+       DES_IIP(0, 0)
+       DES_IP(0, 0)
+       DES_ROUND(16, 14, 0, 0)
+       ld      [%o5 + 0x40], %f6
+       DES_ROUND(60, 58, 0, 0)
+       ld      [%o5 + 0x44], %f7
+       DES_ROUND(56, 54, 0, 0)
+       ld      [%o5 + 0x48], %f8
+       DES_ROUND(52, 50, 0, 0)
+       ld      [%o5 + 0x4c], %f9
+       DES_ROUND(48, 46, 0, 0)
+       ld      [%o5 + 0x50], %f10
+       DES_ROUND(44, 42, 0, 0)
+       ld      [%o5 + 0x54], %f11
+       DES_ROUND(40, 38, 0, 0)
+       ld      [%o5 + 0x58], %f12
+       ld      [%o5 + 0x5c], %f13
+       DES_ROUND(36, 34, 0, 0)
+       ld      [%o5 + 0x60], %f14
+       ld      [%o5 + 0x64], %f15
+       ld      [%o5 + 0x68], %f16
+       ld      [%o5 + 0x6c], %f17
+       DES_IIP(0, 0)
+       DES_IP(0, 0)
+       DES_ROUND(18, 20, 0, 0)
+       ld      [%o5 + 0x70], %f18
+       ld      [%o5 + 0x74], %f19
+       ld      [%o5 + 0x78], %f20
+       ld      [%o5 + 0x7c], %f21
+       DES_ROUND(22, 24, 0, 0)
+       ld      [%o3 + 0x50], %f22
+       ld      [%o3 + 0x54], %f23
+       ld      [%o3 + 0x58], %f24
+       ld      [%o3 + 0x5c], %f25
+       DES_ROUND(26, 28, 0, 0)
+       ld      [%o3 + 0x60], %f26
+       ld      [%o3 + 0x64], %f27
+       ld      [%o3 + 0x68], %f28
+       ld      [%o3 + 0x6c], %f29
+       DES_ROUND(2, 4, 0, 0)
+       ld      [%o3 + 0x00], %f2
+       ld      [%o3 + 0x04], %f3
+       ld      [%o3 + 0x08], %f4
+       ld      [%o3 + 0x0c], %f5
+       DES_ROUND(6, 8, 0, 0)
+       ld      [%o3 + 0x10], %f6
+       ld      [%o3 + 0x14], %f7
+       ld      [%o3 + 0x18], %f8
+       ld      [%o3 + 0x1c], %f9
+       DES_ROUND(10, 12, 0, 0)
+       ld      [%o3 + 0x20], %f10
+       ld      [%o3 + 0x24], %f11
+       ld      [%o3 + 0x28], %f12
+       ld      [%o3 + 0x2c], %f13
+       DES_ROUND(14, 16, 0, 0)
+       ld      [%o3 + 0x30], %f14
+       ld      [%o3 + 0x34], %f15
+       ld      [%o3 + 0x38], %f16
+       ld      [%o3 + 0x3c], %f17
+       DES_ROUND(18, 20, 0, 0)
+       ld      [%o3 + 0x40], %f18
+       ld      [%o3 + 0x44], %f19
+       ld      [%o3 + 0x48], %f20
+       ld      [%o3 + 0x4c], %f21
+       DES_IIP(0, 0)
+       fsrc2   %f0, %f62
+       st      %f0, [%o1 + 0x00]
+       st      %f1, [%o1 + 0x04]
+       add     %o0, 0x08, %o0
+       subcc   %o2, 0x08, %o2
+       bne,pt  %icc, 1b
+        add    %o1, 0x08, %o1
+
+3:     cmp     %g1, 0
+       be,pt   %icc, .Lede3_cbc_hw_finish
+        mov    64, %o2
+       lduw    [%o0 + 0x00], %g2
+       sll     %g1, 3, %g1
+       lduw    [%o0 + 0x04], %g3
+       sub     %o2, %g1, %o2
+       sllx    %g2, 32, %g2
+       or      %g2, %g3, %g3
+       srlx    %g3, %o2, %g3
+       sllx    %g3, %o2, %g3
+       MOVXTOD_G3_F0
+       mov     0, %g1
+       ba,pt   %icc, 2b
+        mov    0x08, %o2
+
+.Lede3_cbc_hw_finish:
+       LDPTR   [%sp+BIAS+ARG0+6*ARGSZ], %g3
+       st      %f0, [%g3 + 0x00]
+       retl
+        st     %f1, [%g3 + 0x04]
+
+#define CBC_HW_DECRYPT \
+       DES_IP(0, 0); \
+       DES_ROUND(32, 30, 0, 0); \
+       DES_ROUND(28, 26, 0, 0); \
+       ld      [%o5 + 0x50], %f26; \
+       ld      [%o5 + 0x54], %f27; \
+       ld      [%o5 + 0x58], %f28; \
+       ld      [%o5 + 0x5c], %f29; \
+       DES_ROUND(24, 22, 0, 0); \
+       ld      [%o5 + 0x60], %f22; \
+       ld      [%o5 + 0x64], %f23; \
+       ld      [%o5 + 0x68], %f24; \
+       ld      [%o5 + 0x6c], %f25; \
+       DES_ROUND(20, 18, 0, 0); \
+       ld      [%o5 + 0x70], %f18; \
+       ld      [%o5 + 0x74], %f19; \
+       ld      [%o5 + 0x78], %f20; \
+       ld      [%o5 + 0x7c], %f21; \
+       DES_ROUND(16, 14, 0, 0); \
+       ld      [%o4 + 0x70], %f14; \
+       ld      [%o4 + 0x74], %f15; \
+       ld      [%o4 + 0x78], %f16; \
+       ld      [%o4 + 0x7c], %f17; \
+       DES_ROUND(12, 10, 0, 0); \
+       ld      [%o5 + 0x20], %f10; \
+       ld      [%o5 + 0x24], %f11; \
+       ld      [%o5 + 0x28], %f12; \
+       ld      [%o5 + 0x2c], %f13; \
+       DES_ROUND(8, 6, 0, 0); \
+       ld      [%o5 + 0x30], %f6; \
+       ld      [%o5 + 0x34], %f7; \
+       ld      [%o5 + 0x38], %f8; \
+       ld      [%o5 + 0x3c], %f9; \
+       DES_ROUND(4, 2, 0, 0); \
+       ld      [%o5 + 0x40], %f2; \
+       ld      [%o5 + 0x44], %f3; \
+       ld      [%o5 + 0x48], %f4; \
+       ld      [%o5 + 0x4c], %f5; \
+       DES_IIP(0, 0); \
+       DES_IP(0, 0); \
+       DES_ROUND(34, 36, 0, 0); \
+       DES_ROUND(38, 40, 0, 0); \
+       DES_ROUND(42, 44, 0, 0); \
+       DES_ROUND(46, 48, 0, 0); \
+       DES_ROUND(50, 52, 0, 0); \
+       DES_ROUND(54, 56, 0, 0); \
+       DES_ROUND(58, 60, 0, 0); \
+       DES_ROUND(14, 16, 0, 0); \
+       ld      [%o5 + 0x10], %f14; \
+       ld      [%o5 + 0x14], %f15; \
+       ld      [%o5 + 0x18], %f16; \
+       ld      [%o5 + 0x1c], %f17; \
+       DES_IIP(0, 0); \
+       DES_IP(0, 0); \
+       DES_ROUND(20, 18, 0, 0); \
+       ld      [%o5 + 0x00], %f18; \
+       ld      [%o5 + 0x04], %f19; \
+       ld      [%o5 + 0x08], %f20; \
+       ld      [%o5 + 0x0c], %f21; \
+       DES_ROUND(24, 22, 0, 0); \
+       ld      [%o3 + 0x50], %f22; \
+       ld      [%o3 + 0x54], %f23; \
+       ld      [%o3 + 0x58], %f24; \
+       ld      [%o3 + 0x5c], %f25; \
+       DES_ROUND(28, 26, 0, 0); \
+       ld      [%o3 + 0x60], %f26; \
+       ld      [%o3 + 0x64], %f27; \
+       ld      [%o3 + 0x68], %f28; \
+       ld      [%o3 + 0x6c], %f29; \
+       DES_ROUND(4, 2, 0, 0); \
+       ld      [%o3 + 0x00], %f2; \
+       ld      [%o3 + 0x04], %f3; \
+       ld      [%o3 + 0x08], %f4; \
+       ld      [%o3 + 0x0c], %f5; \
+       DES_ROUND(8, 6, 0, 0); \
+       ld      [%o3 + 0x10], %f6; \
+       ld      [%o3 + 0x14], %f7; \
+       ld      [%o3 + 0x18], %f8; \
+       ld      [%o3 + 0x1c], %f9; \
+       DES_ROUND(12, 10, 0, 0); \
+       ld      [%o3 + 0x20], %f10; \
+       ld      [%o3 + 0x24], %f11; \
+       ld      [%o3 + 0x28], %f12; \
+       ld      [%o3 + 0x2c], %f13; \
+       DES_ROUND(16, 14, 0, 0); \
+       ld      [%o3 + 0x30], %f14; \
+       ld      [%o3 + 0x34], %f15; \
+       ld      [%o3 + 0x38], %f16; \
+       ld      [%o3 + 0x3c], %f17; \
+       DES_ROUND(20, 18, 0, 0); \
+       ld      [%o3 + 0x40], %f18; \
+       ld      [%o3 + 0x44], %f19; \
+       ld      [%o3 + 0x48], %f20; \
+       ld      [%o3 + 0x4c], %f21; \
+       DES_IIP(0, 0)
+
+.Lede3_cbc_hw_decrypt:
+       cmp     %o2, 0
+       be,pn   %icc, 3f
+        nop
+1:     lduw    [%o0 + 0x00], %g3
+       lduw    [%o0 + 0x04], %g2
+       sllx    %g3, 32, %g3
+       or      %g3, %g2, %g3
+       MOVXTOD_G3_F0
+
+       CBC_HW_DECRYPT
+
+       fxor    %f0, %f62, %f0
+       MOVXTOD_G3_F62
+       st      %f0, [%o1 + 0x00]
+       st      %f1, [%o1 + 0x04]
+       add     %o0, 0x08, %o0
+       subcc   %o2, 0x08, %o2
+       bne,pt  %icc, 1b
+        add    %o1, 0x08, %o1
+
+3:     cmp     %g1, 0
+       be,pt   %icc, 4f
+        mov    0xff, %o2
+       lduw    [%o0 + 0x00], %g3
+       lduw    [%o0 + 0x04], %g2
+       sllx    %g3, 32, %g3
+       or      %g3, %g2, %g3
+       MOVXTOD_G3_F0
+
+       CBC_HW_DECRYPT
+
+       fxor    %f0, %f62, %f0
+       MOVXTOD_G3_F62
+       mov     8, %g2
+       sll     %g1, 3, %g1
+       sub     %g2, %g1, %g2
+       sll     %o2, %g2, %o2
+       stda    %f0, [%o1 + %o2] 0xc0
+4:     ba,pt   %icc, .Lede3_cbc_hw_finish
+        fsrc2  %f62, %f0
+
+.Lede3_cbc_encrypt_software:
+#endif
        save    %sp, FRAME, %sp
 
        define({KS1}, { [%sp+BIAS+ARG0+3*ARGSZ] })
@@ -1879,6 +2763,80 @@ DES_ede3_cbc_encrypt_asm:
 .DES_ede3_cbc_encrypt_asm.end:
        .size    
DES_ede3_cbc_encrypt_asm,.DES_ede3_cbc_encrypt_asm.end-DES_ede3_cbc_encrypt_asm
 
+#ifdef __sparc_v9__
+       .align  32
+       .global DES_sparc_hw_key_expand
+       .type   DES_sparc_hw_key_expand,#function
+DES_sparc_hw_key_expand:
+       /* %o0=input_key, %o1=output_key */
+       andcc   %o0, 0x7, %g0
+       be,pt   %icc, 1f
+        nop
+       alignaddr %o0, %g0, %g1
+       ldd     [%g1 + 0x00], %f4
+       ldd     [%g1 + 0x08], %f6
+       ba,pt   %icc, 2f
+        faligndata %f4, %f6, %f0
+1:     ldd     [%o0 + 0x00], %f0
+2:     DES_KEXPAND(0, 0, 0)
+       DES_KEXPAND(0, 1, 2)
+       DES_KEXPAND(2, 3, 6)
+       DES_KEXPAND(2, 2, 4)
+       DES_KEXPAND(6, 3, 10)
+       DES_KEXPAND(6, 2, 8)
+       DES_KEXPAND(10, 3, 14)
+       DES_KEXPAND(10, 2, 12)
+       DES_KEXPAND(14, 1, 16)
+       DES_KEXPAND(16, 3, 20)
+       DES_KEXPAND(16, 2, 18)
+       DES_KEXPAND(20, 3, 24)
+       DES_KEXPAND(20, 2, 22)
+       DES_KEXPAND(24, 3, 28)
+       DES_KEXPAND(24, 2, 26)
+       DES_KEXPAND(28, 1, 30)
+       st      %f0, [%o1 + 0x00]
+       st      %f1, [%o1 + 0x04]
+       st      %f2, [%o1 + 0x08]
+       st      %f3, [%o1 + 0x0c]
+       st      %f4, [%o1 + 0x10]
+       st      %f5, [%o1 + 0x14]
+       st      %f6, [%o1 + 0x18]
+       st      %f7, [%o1 + 0x1c]
+       st      %f8, [%o1 + 0x20]
+       st      %f9, [%o1 + 0x24]
+       st      %f10, [%o1 + 0x28]
+       st      %f11, [%o1 + 0x2c]
+       st      %f12, [%o1 + 0x30]
+       st      %f13, [%o1 + 0x34]
+       st      %f14, [%o1 + 0x38]
+       st      %f15, [%o1 + 0x3c]
+       st      %f16, [%o1 + 0x40]
+       st      %f17, [%o1 + 0x44]
+       st      %f18, [%o1 + 0x48]
+       st      %f19, [%o1 + 0x4c]
+       st      %f20, [%o1 + 0x50]
+       st      %f21, [%o1 + 0x54]
+       st      %f22, [%o1 + 0x58]
+       st      %f23, [%o1 + 0x5c]
+       st      %f24, [%o1 + 0x60]
+       st      %f25, [%o1 + 0x64]
+       st      %f26, [%o1 + 0x68]
+       st      %f27, [%o1 + 0x6c]
+       st      %f28, [%o1 + 0x70]
+       st      %f29, [%o1 + 0x74]
+       st      %f30, [%o1 + 0x78]
+       retl
+        st     %f31, [%o1 + 0x7c]
+       .size   DES_sparc_hw_key_expand,.-DES_sparc_hw_key_expand
+
+       .align  32
+       .global DES_sparc_hw_fcrypt_body
+       .type   DES_sparc_hw_fcrypt_body,#function
+DES_sparc_hw_fcrypt_body:
+       /* %o0=out, %o1=ks, %o2=Eswap0, %o3=Eswap1 */
+       .size   DES_sparc_hw_fcrypt_body,.-DES_sparc_hw_fcrypt_body
+#endif
+
        .align  256
        .type    .des_and,#object
        .size    .des_and,284
diff --git a/crypto/des/des_sparccore.c b/crypto/des/des_sparccore.c
index 4aaa3c7..cab508d 100644
--- a/crypto/des/des_sparccore.c
+++ b/crypto/des/des_sparccore.c
@@ -1,6 +1,7 @@
 #include <openssl/crypto.h>
 
 #include "des_locl.h"
+#include "sparc_arch.h"
 
 extern void DES_ncbc_encrypt_asm(const DES_LONG *in, DES_LONG *out, long 
length,
                                 DES_key_schedule *ks, DES_LONG *ivec, int enc);
@@ -153,3 +154,53 @@ void DES_ede3_cbc_encrypt(const unsigned char *in, 
unsigned char *out,
        if (aligned_ivec == ivb)
                memcpy(ivec, ivb, sizeof(*ivec));
 }
+
+#define DES_set_key DES_set_key_generic
+int DES_set_key_generic(const_DES_cblock *key, DES_key_schedule *schedule);
+
+#define DES_set_key_unchecked DES_set_key_unchecked_generic
+void DES_set_key_unchecked_generic(const_DES_cblock *key, DES_key_schedule 
*ks);
+
+#define DES_set_key_checked DES_set_key_checked_generic
+int DES_set_key_checked_generic(const_DES_cblock *key, DES_key_schedule *ks);
+
+#include "set_key.c"
+#include "fcrypt.c"
+
+#undef DES_set_key
+#undef DES_set_key_unchecked
+#undef DES_set_key_checked
+
+#ifdef __sparc_v9__
+extern void DES_sparc_hw_key_expand(const_DES_cblock *key, DES_key_schedule 
*schedule);
+#endif
+
+void DES_set_key_unchecked(const_DES_cblock *key, DES_key_schedule *ks)
+{
+#ifdef __sparc_v9__
+       if (OPENSSL_sparcv9cap_P & SPARCV9_DES)
+               DES_sparc_hw_key_expand(key, ks);
+       else
+#endif
+               DES_set_key_unchecked_generic(key, ks);
+}
+
+int DES_set_key_checked(const_DES_cblock *key, DES_key_schedule *ks)
+{
+       if (!DES_check_key_parity(key))
+               return(-1);
+       if (DES_is_weak_key(key))
+               return(-2);
+       DES_set_key_unchecked(key, ks);
+       return 0;
+}
+
+int DES_set_key(const_DES_cblock *key, DES_key_schedule *ks)
+{
+       if (DES_check_key) {
+               return DES_set_key_checked(key, ks);
+       } else {
+               DES_set_key_unchecked(key, ks);
+               return 0;
+       }
+}
diff --git a/crypto/sparc_arch.h b/crypto/sparc_arch.h
index fe9805d..1a02cf1 100644
--- a/crypto/sparc_arch.h
+++ b/crypto/sparc_arch.h
@@ -61,6 +61,15 @@ extern int OPENSSL_sparcv9cap_P;
 #define AES_KEXPAND2(a,b,c)    \
        .word   (F3F(2, 0x36, 0x131)|RS1(a)|RS2(b)|RD(c));
 
+#define DES_IP(a,b)            \
+       .word           (F3F(2, 0x36, 0x134)|RS1(a)|RD(b));
+#define DES_IIP(a,b)           \
+       .word           (F3F(2, 0x36, 0x135)|RS1(a)|RD(b));
+#define DES_KEXPAND(a,b,c)     \
+       .word           (F3F(2, 0x36, 0x136)|RS1(a)|IMM5_0(b)|RD(c));
+#define DES_ROUND(a,b,c,d)     \
+       .word           (F3F(2, 0x19, 0x009)|RS1(a)|RS2(b)|RS3(c)|RD(d));
+
 #define CAMELLIA_F(a,b,c,d)            \
        .word           (F3F(2, 0x19, 0x00c)|RS1(a)|RS2(b)|RS3(c)|RD(d));
 #define CAMELLIA_FL(a,b,c)             \
@@ -90,6 +99,8 @@ extern int OPENSSL_sparcv9cap_P;
        .word   0x89b0230d;
 #define MOVXTOD_O5_F6          \
        .word   0x8db0230d;
+#define MOVXTOD_G3_F62         \
+       .word   0xbfb02303;
 
 #ifdef __PIC__
 #define SPARC_PIC_THUNK(reg)   \
-- 
1.7.10.4

______________________________________________________________________
OpenSSL Project                                 http://www.openssl.org
Development Mailing List                       openssl-dev@openssl.org
Automated List Manager                           majord...@openssl.org

Reply via email to