Also, add a missing include of opensslconf.h so that we properly
get the OPENSSL_SYSNAME_ULTRASPARC define even in the 32-bit case.

These changes give a pretty reasonable speed boost.

On a SPARC T4-2, without these changes:

type             16 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes
des cbc          39148.28k    40071.83k    40669.53k    40685.80k    40864.43k
des ede3         14358.54k    14528.02k    14611.88k    14632.96k    14641.83k

and with them:

type             16 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes
des cbc          54334.03k    56280.92k    57101.01k    57549.14k    57619.80k
des ede3         20010.98k    20682.62k    20834.90k    20873.56k    20886.87k

Signed-off-by: David S. Miller <da...@davemloft.net>
---
 Configure                  |    4 +-
 crypto/des/asm/des_enc.m4  |   62 ++++++------------
 crypto/des/des_sparccore.c |  155 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 176 insertions(+), 45 deletions(-)
 create mode 100644 crypto/des/des_sparccore.c

diff --git a/Configure b/Configure
index b4cbb56..15d961a 100755
--- a/Configure
+++ b/Configure
@@ -130,8 +130,8 @@ my $x86_elf_asm="$x86_asm:elf";
 
 my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o 
x86_64-gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o 
aesni-x86_64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o 
sha512-x86_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o 
cmll_misc.o:ghash-x86_64.o:e_padlock-x86_64.o";
 my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o 
aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o 
rc4_skey.o:::::ghash-ia64.o::void";
-my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o 
sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_sparccore.o 
aes-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o 
sha512-sparcv9.o::::::cmll-sparcv9.o cmll_sparccore.o:ghash-sparcv9.o::void";
-my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::void";
+my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o 
sparcv9a-mont.o:des_enc-sparc.o des_sparccore.o fcrypt_b.o:aes_sparccore.o 
aes-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o 
sha512-sparcv9.o::::::cmll-sparcv9.o cmll_sparccore.o:ghash-sparcv9.o::void";
+my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o des_sparccore.o 
fcrypt_b.o:::::::::::::void";
 my $alpha_asm="alphacpuid.o:bn_asm.o 
alpha-mont.o:::::sha1-alpha.o:::::::ghash-alpha.o::void";
 my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o 
sha256-mips.o sha512-mips.o::::::::";
 my $mips32_asm=$mips64_asm; $mips32_asm =~ s/\s*sha512\-mips\.o//;
diff --git a/crypto/des/asm/des_enc.m4 b/crypto/des/asm/des_enc.m4
index 5d323d0..a69273c 100644
--- a/crypto/des/asm/des_enc.m4
+++ b/crypto/des/asm/des_enc.m4
@@ -46,6 +46,8 @@
 .ident "des_enc.m4 2.1"
 .file  "des_enc-sparc.S"
 
+#include <openssl/opensslconf.h>
+
 #ifdef OPENSSL_FIPSCANISTER
 #include <openssl/fipssyms.h>
 #endif
@@ -790,18 +792,10 @@ define(load_little_endian, {
        ! first in memory to rightmost in register
 
 #ifdef OPENSSL_SYSNAME_ULTRASPARC
-       andcc   $1, 3, global0
-       bne,pn  %icc, $5
-       nop
-
        lda     [$1] 0x88, $2
        add     $1, 4, $4
-
-       ba,pt   %icc, $5a
        lda     [$4] 0x88, $3
-#endif
-
-$5:
+#else
        ldub    [$1+3], $2
 
        ldub    [$1+2], $4
@@ -830,8 +824,7 @@ $5:
        ldub    [$1+0+4], $4
        sll     $3, 8, $3
        or      $3, $4, $3
-$5a:
-
+#endif
 })
 
 
@@ -853,19 +846,12 @@ define(load_little_endian_inc, {
        ! first in memory to rightmost in register
 
 #ifdef OPENSSL_SYSNAME_ULTRASPARC
-       andcc   $1, 3, global0
-       bne,pn  %icc, $5
-       nop
-
        lda     [$1] 0x88, $2
        add     $1, 4, $1
 
        lda     [$1] 0x88, $3
-       ba,pt   %icc, $5a
        add     $1, 4, $1
-#endif
-
-$5:
+#else
        ldub    [$1+3], $2
 
        ldub    [$1+2], $4
@@ -894,8 +880,7 @@ $5:
        ldub    [$1+0+4-8], $4
        sll     $3, 8, $3
        or      $3, $4, $3
-$5a:
-
+#endif
 })
 
 
@@ -988,18 +973,11 @@ define(store_little_endian, {
        ! rightmost in register to first in memory
 
 #ifdef OPENSSL_SYSNAME_ULTRASPARC
-       andcc   $1, 3, global0
-       bne,pn  %icc, $5
-       nop
-
        sta     $2, [$1] 0x88
        add     $1, 4, $4
 
-       ba,pt   %icc, $5a
        sta     $3, [$4] 0x88
-#endif
-
-$5:
+#else
        and     $2, 255, $4
        stub    $4, [$1+0]
 
@@ -1028,9 +1006,7 @@ $5:
 
        srl     $3, 24, $4
        stub    $4, [$1+3+4]
-
-$5a:
-
+#endif
 })
 
 
@@ -1441,15 +1417,15 @@ DES_decrypt3:
 .DES_decrypt3.end:
        .size    DES_decrypt3,.DES_decrypt3.end-DES_decrypt3
 
-! void DES_ncbc_encrypt(input, output, length, schedule, ivec, enc)
+! void DES_ncbc_encrypt_asm(input, output, length, schedule, ivec, enc)
 ! *****************************************************************
 
 
        .align 32
-       .global DES_ncbc_encrypt
-       .type    DES_ncbc_encrypt,#function
+       .global DES_ncbc_encrypt_asm
+       .type    DES_ncbc_encrypt_asm,#function
 
-DES_ncbc_encrypt:
+DES_ncbc_encrypt_asm:
 
        save    %sp, FRAME, %sp
        
@@ -1662,8 +1638,8 @@ DES_ncbc_encrypt:
        store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB1, 
.ncbc.dec.store.iv)
 
 
-.DES_ncbc_encrypt.end:
-       .size    DES_ncbc_encrypt, .DES_ncbc_encrypt.end-DES_ncbc_encrypt
+.DES_ncbc_encrypt_asm.end:
+       .size    DES_ncbc_encrypt_asm, 
.DES_ncbc_encrypt_asm.end-DES_ncbc_encrypt_asm
 
 
 ! void DES_ede3_cbc_encrypt(input, output, lenght, ks1, ks2, ks3, ivec, enc)
@@ -1671,10 +1647,10 @@ DES_ncbc_encrypt:
 
 
        .align 32
-       .global DES_ede3_cbc_encrypt
-       .type    DES_ede3_cbc_encrypt,#function
+       .global DES_ede3_cbc_encrypt_asm
+       .type    DES_ede3_cbc_encrypt_asm,#function
 
-DES_ede3_cbc_encrypt:
+DES_ede3_cbc_encrypt_asm:
 
        save    %sp, FRAME, %sp
 
@@ -1900,8 +1876,8 @@ DES_ede3_cbc_encrypt:
        store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB2, 
.ede3.dec.store.iv)
 
 
-.DES_ede3_cbc_encrypt.end:
-       .size    
DES_ede3_cbc_encrypt,.DES_ede3_cbc_encrypt.end-DES_ede3_cbc_encrypt
+.DES_ede3_cbc_encrypt_asm.end:
+       .size    
DES_ede3_cbc_encrypt_asm,.DES_ede3_cbc_encrypt_asm.end-DES_ede3_cbc_encrypt_asm
 
        .align  256
        .type    .des_and,#object
diff --git a/crypto/des/des_sparccore.c b/crypto/des/des_sparccore.c
new file mode 100644
index 0000000..4aaa3c7
--- /dev/null
+++ b/crypto/des/des_sparccore.c
@@ -0,0 +1,155 @@
+#include <openssl/crypto.h>
+
+#include "des_locl.h"
+
+extern void DES_ncbc_encrypt_asm(const DES_LONG *in, DES_LONG *out, long 
length,
+                                DES_key_schedule *ks, DES_LONG *ivec, int enc);
+
+static void DES_ncbc_encrypt_slow(const unsigned char *in, unsigned char *out, 
long length,
+                                 DES_key_schedule *ks, DES_cblock *ivec, int 
enc)
+{
+       DES_LONG tmp_buf[2];
+       DES_LONG ivb[2];
+
+       memcpy(ivb, ivec, sizeof(*ivec));
+
+       while (length > 0) {
+               int this_len = length;
+
+               if (this_len > 8)
+                       this_len = 8;
+               memcpy(tmp_buf, in, this_len);
+
+               DES_ncbc_encrypt_asm(tmp_buf, tmp_buf, this_len, ks,
+                                    ivb, enc);
+
+               memcpy(out, tmp_buf, this_len);
+
+               length -= 8;
+       }
+
+       memcpy(ivec, ivb, sizeof(*ivec));
+}
+
+void DES_ncbc_encrypt(const unsigned char *in, unsigned char *out, long length,
+                     DES_key_schedule *ks, DES_cblock *ivec, int enc)
+{
+       const DES_LONG *aligned_in;
+       DES_LONG *aligned_ivec;
+       DES_LONG *aligned_out;
+       DES_LONG ivb[2];
+
+       aligned_out = (DES_LONG *) out;
+       if ((unsigned long) out & 0x3) {
+               aligned_out = OPENSSL_malloc(length);
+               if (!aligned_out) {
+                       DES_ncbc_encrypt_slow(in, out, length, ks, ivec, enc);
+                       return;
+               }
+       }
+
+       aligned_in = (const DES_LONG *) in;
+       if ((unsigned long) in & 0x3) {
+               memcpy(aligned_out, in, length);
+               aligned_in = (const DES_LONG *) aligned_out;
+       }
+
+       aligned_ivec = (DES_LONG *) ivec;
+       if ((unsigned long) ivec & 0x3) {
+               memcpy(ivb, ivec, sizeof(*ivec));
+               aligned_ivec = ivb;
+       }
+
+       DES_ncbc_encrypt_asm(aligned_in, aligned_out, length, ks,
+                            aligned_ivec, enc);
+
+       if ((unsigned long) out & 0x3) {
+               memcpy(out, aligned_out, length);
+               OPENSSL_free(aligned_out);
+       }
+
+       if (aligned_ivec == ivb)
+               memcpy(ivec, ivb, sizeof(*ivec));
+}
+
+extern void DES_ede3_cbc_encrypt_asm(const DES_LONG *in, DES_LONG *out, long 
length,
+                                    DES_key_schedule *ks1,
+                                    DES_key_schedule *ks2,
+                                    DES_key_schedule *ks3,
+                                    DES_LONG *ivec, int enc);
+
+static void DES_ede3_cbc_encrypt_slow(const unsigned char *in, unsigned char 
*out,
+                                     long length,
+                                     DES_key_schedule *ks1,
+                                     DES_key_schedule *ks2,
+                                     DES_key_schedule *ks3,
+                                     DES_cblock *ivec, int enc)
+{
+       DES_LONG tmp_buf[2];
+       DES_LONG ivb[2];
+
+       memcpy(ivb, ivec, sizeof(*ivec));
+
+       while (length > 0) {
+               int this_len = length;
+
+               if (this_len > 8)
+                       this_len = 8;
+               memcpy(tmp_buf, in, this_len);
+
+               DES_ede3_cbc_encrypt_asm(tmp_buf, tmp_buf, this_len,
+                                        ks1, ks2, ks3,  ivb, enc);
+
+               memcpy(out, tmp_buf, this_len);
+
+               length -= 8;
+       }
+
+       memcpy(ivec, ivb, sizeof(*ivec));
+}
+
+void DES_ede3_cbc_encrypt(const unsigned char *in, unsigned char *out,
+                         long length, DES_key_schedule *ks1,
+                         DES_key_schedule *ks2, DES_key_schedule *ks3,
+                         DES_cblock *ivec, int enc)
+{
+       const DES_LONG *aligned_in;
+       DES_LONG *aligned_ivec;
+       DES_LONG *aligned_out;
+       DES_LONG ivb[2];
+
+       aligned_out = (DES_LONG *) out;
+       if ((unsigned long) out & 0x3) {
+               aligned_out = OPENSSL_malloc(length);
+               if (!aligned_out) {
+                       DES_ede3_cbc_encrypt_slow(in, out, length,
+                                                 ks1, ks2, ks3,
+                                                 ivec, enc);
+                       return;
+               }
+       }
+
+       aligned_in = (const DES_LONG *) in;
+       if ((unsigned long) in & 0x3) {
+               memcpy(aligned_out, in, length);
+               aligned_in = (const DES_LONG *) aligned_out;
+       }
+
+       aligned_ivec = (DES_LONG *) ivec;
+       if ((unsigned long) ivec & 0x3) {
+               memcpy(ivb, ivec, sizeof(*ivec));
+               aligned_ivec = ivb;
+       }
+
+       DES_ede3_cbc_encrypt_asm(aligned_in, aligned_out, length,
+                                ks1, ks2, ks3,
+                                aligned_ivec, enc);
+
+       if ((unsigned long) out & 0x3) {
+               memcpy(out, aligned_out, length);
+               OPENSSL_free(aligned_out);
+       }
+
+       if (aligned_ivec == ivb)
+               memcpy(ivec, ivb, sizeof(*ivec));
+}
-- 
1.7.10.4

______________________________________________________________________
OpenSSL Project                                 http://www.openssl.org
Development Mailing List                       openssl-dev@openssl.org
Automated List Manager                           majord...@openssl.org

Reply via email to