Also, add a missing include of opensslconf.h so that we properly get the OPENSSL_SYSNAME_ULTRASPARC define even in the 32-bit case.
These changes give a pretty reasonable speed boost. On a SPARC T4-2, without these changes: type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes des cbc 39148.28k 40071.83k 40669.53k 40685.80k 40864.43k des ede3 14358.54k 14528.02k 14611.88k 14632.96k 14641.83k and with them: type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes des cbc 54334.03k 56280.92k 57101.01k 57549.14k 57619.80k des ede3 20010.98k 20682.62k 20834.90k 20873.56k 20886.87k Signed-off-by: David S. Miller <da...@davemloft.net> --- Configure | 4 +- crypto/des/asm/des_enc.m4 | 62 ++++++------------ crypto/des/des_sparccore.c | 155 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 176 insertions(+), 45 deletions(-) create mode 100644 crypto/des/des_sparccore.c diff --git a/Configure b/Configure index b4cbb56..15d961a 100755 --- a/Configure +++ b/Configure @@ -130,8 +130,8 @@ my $x86_elf_asm="$x86_asm:elf"; my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghash-x86_64.o:e_padlock-x86_64.o"; my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::ghash-ia64.o::void"; -my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_sparccore.o aes-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o::::::cmll-sparcv9.o cmll_sparccore.o:ghash-sparcv9.o::void"; -my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::void"; +my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o des_sparccore.o fcrypt_b.o:aes_sparccore.o aes-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o::::::cmll-sparcv9.o cmll_sparccore.o:ghash-sparcv9.o::void"; +my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o des_sparccore.o fcrypt_b.o:::::::::::::void"; my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-alpha.o::void"; my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o sha512-mips.o::::::::"; my $mips32_asm=$mips64_asm; $mips32_asm =~ s/\s*sha512\-mips\.o//; diff --git a/crypto/des/asm/des_enc.m4 b/crypto/des/asm/des_enc.m4 index 5d323d0..a69273c 100644 --- a/crypto/des/asm/des_enc.m4 +++ b/crypto/des/asm/des_enc.m4 @@ -46,6 +46,8 @@ .ident "des_enc.m4 2.1" .file "des_enc-sparc.S" +#include <openssl/opensslconf.h> + #ifdef OPENSSL_FIPSCANISTER #include <openssl/fipssyms.h> #endif @@ -790,18 +792,10 @@ define(load_little_endian, { ! first in memory to rightmost in register #ifdef OPENSSL_SYSNAME_ULTRASPARC - andcc $1, 3, global0 - bne,pn %icc, $5 - nop - lda [$1] 0x88, $2 add $1, 4, $4 - - ba,pt %icc, $5a lda [$4] 0x88, $3 -#endif - -$5: +#else ldub [$1+3], $2 ldub [$1+2], $4 @@ -830,8 +824,7 @@ $5: ldub [$1+0+4], $4 sll $3, 8, $3 or $3, $4, $3 -$5a: - +#endif }) @@ -853,19 +846,12 @@ define(load_little_endian_inc, { ! first in memory to rightmost in register #ifdef OPENSSL_SYSNAME_ULTRASPARC - andcc $1, 3, global0 - bne,pn %icc, $5 - nop - lda [$1] 0x88, $2 add $1, 4, $1 lda [$1] 0x88, $3 - ba,pt %icc, $5a add $1, 4, $1 -#endif - -$5: +#else ldub [$1+3], $2 ldub [$1+2], $4 @@ -894,8 +880,7 @@ $5: ldub [$1+0+4-8], $4 sll $3, 8, $3 or $3, $4, $3 -$5a: - +#endif }) @@ -988,18 +973,11 @@ define(store_little_endian, { ! rightmost in register to first in memory #ifdef OPENSSL_SYSNAME_ULTRASPARC - andcc $1, 3, global0 - bne,pn %icc, $5 - nop - sta $2, [$1] 0x88 add $1, 4, $4 - ba,pt %icc, $5a sta $3, [$4] 0x88 -#endif - -$5: +#else and $2, 255, $4 stub $4, [$1+0] @@ -1028,9 +1006,7 @@ $5: srl $3, 24, $4 stub $4, [$1+3+4] - -$5a: - +#endif }) @@ -1441,15 +1417,15 @@ DES_decrypt3: .DES_decrypt3.end: .size DES_decrypt3,.DES_decrypt3.end-DES_decrypt3 -! void DES_ncbc_encrypt(input, output, length, schedule, ivec, enc) +! void DES_ncbc_encrypt_asm(input, output, length, schedule, ivec, enc) ! ***************************************************************** .align 32 - .global DES_ncbc_encrypt - .type DES_ncbc_encrypt,#function + .global DES_ncbc_encrypt_asm + .type DES_ncbc_encrypt_asm,#function -DES_ncbc_encrypt: +DES_ncbc_encrypt_asm: save %sp, FRAME, %sp @@ -1662,8 +1638,8 @@ DES_ncbc_encrypt: store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB1, .ncbc.dec.store.iv) -.DES_ncbc_encrypt.end: - .size DES_ncbc_encrypt, .DES_ncbc_encrypt.end-DES_ncbc_encrypt +.DES_ncbc_encrypt_asm.end: + .size DES_ncbc_encrypt_asm, .DES_ncbc_encrypt_asm.end-DES_ncbc_encrypt_asm ! void DES_ede3_cbc_encrypt(input, output, lenght, ks1, ks2, ks3, ivec, enc) @@ -1671,10 +1647,10 @@ DES_ncbc_encrypt: .align 32 - .global DES_ede3_cbc_encrypt - .type DES_ede3_cbc_encrypt,#function + .global DES_ede3_cbc_encrypt_asm + .type DES_ede3_cbc_encrypt_asm,#function -DES_ede3_cbc_encrypt: +DES_ede3_cbc_encrypt_asm: save %sp, FRAME, %sp @@ -1900,8 +1876,8 @@ DES_ede3_cbc_encrypt: store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB2, .ede3.dec.store.iv) -.DES_ede3_cbc_encrypt.end: - .size DES_ede3_cbc_encrypt,.DES_ede3_cbc_encrypt.end-DES_ede3_cbc_encrypt +.DES_ede3_cbc_encrypt_asm.end: + .size DES_ede3_cbc_encrypt_asm,.DES_ede3_cbc_encrypt_asm.end-DES_ede3_cbc_encrypt_asm .align 256 .type .des_and,#object diff --git a/crypto/des/des_sparccore.c b/crypto/des/des_sparccore.c new file mode 100644 index 0000000..4aaa3c7 --- /dev/null +++ b/crypto/des/des_sparccore.c @@ -0,0 +1,155 @@ +#include <openssl/crypto.h> + +#include "des_locl.h" + +extern void DES_ncbc_encrypt_asm(const DES_LONG *in, DES_LONG *out, long length, + DES_key_schedule *ks, DES_LONG *ivec, int enc); + +static void DES_ncbc_encrypt_slow(const unsigned char *in, unsigned char *out, long length, + DES_key_schedule *ks, DES_cblock *ivec, int enc) +{ + DES_LONG tmp_buf[2]; + DES_LONG ivb[2]; + + memcpy(ivb, ivec, sizeof(*ivec)); + + while (length > 0) { + int this_len = length; + + if (this_len > 8) + this_len = 8; + memcpy(tmp_buf, in, this_len); + + DES_ncbc_encrypt_asm(tmp_buf, tmp_buf, this_len, ks, + ivb, enc); + + memcpy(out, tmp_buf, this_len); + + length -= 8; + } + + memcpy(ivec, ivb, sizeof(*ivec)); +} + +void DES_ncbc_encrypt(const unsigned char *in, unsigned char *out, long length, + DES_key_schedule *ks, DES_cblock *ivec, int enc) +{ + const DES_LONG *aligned_in; + DES_LONG *aligned_ivec; + DES_LONG *aligned_out; + DES_LONG ivb[2]; + + aligned_out = (DES_LONG *) out; + if ((unsigned long) out & 0x3) { + aligned_out = OPENSSL_malloc(length); + if (!aligned_out) { + DES_ncbc_encrypt_slow(in, out, length, ks, ivec, enc); + return; + } + } + + aligned_in = (const DES_LONG *) in; + if ((unsigned long) in & 0x3) { + memcpy(aligned_out, in, length); + aligned_in = (const DES_LONG *) aligned_out; + } + + aligned_ivec = (DES_LONG *) ivec; + if ((unsigned long) ivec & 0x3) { + memcpy(ivb, ivec, sizeof(*ivec)); + aligned_ivec = ivb; + } + + DES_ncbc_encrypt_asm(aligned_in, aligned_out, length, ks, + aligned_ivec, enc); + + if ((unsigned long) out & 0x3) { + memcpy(out, aligned_out, length); + OPENSSL_free(aligned_out); + } + + if (aligned_ivec == ivb) + memcpy(ivec, ivb, sizeof(*ivec)); +} + +extern void DES_ede3_cbc_encrypt_asm(const DES_LONG *in, DES_LONG *out, long length, + DES_key_schedule *ks1, + DES_key_schedule *ks2, + DES_key_schedule *ks3, + DES_LONG *ivec, int enc); + +static void DES_ede3_cbc_encrypt_slow(const unsigned char *in, unsigned char *out, + long length, + DES_key_schedule *ks1, + DES_key_schedule *ks2, + DES_key_schedule *ks3, + DES_cblock *ivec, int enc) +{ + DES_LONG tmp_buf[2]; + DES_LONG ivb[2]; + + memcpy(ivb, ivec, sizeof(*ivec)); + + while (length > 0) { + int this_len = length; + + if (this_len > 8) + this_len = 8; + memcpy(tmp_buf, in, this_len); + + DES_ede3_cbc_encrypt_asm(tmp_buf, tmp_buf, this_len, + ks1, ks2, ks3, ivb, enc); + + memcpy(out, tmp_buf, this_len); + + length -= 8; + } + + memcpy(ivec, ivb, sizeof(*ivec)); +} + +void DES_ede3_cbc_encrypt(const unsigned char *in, unsigned char *out, + long length, DES_key_schedule *ks1, + DES_key_schedule *ks2, DES_key_schedule *ks3, + DES_cblock *ivec, int enc) +{ + const DES_LONG *aligned_in; + DES_LONG *aligned_ivec; + DES_LONG *aligned_out; + DES_LONG ivb[2]; + + aligned_out = (DES_LONG *) out; + if ((unsigned long) out & 0x3) { + aligned_out = OPENSSL_malloc(length); + if (!aligned_out) { + DES_ede3_cbc_encrypt_slow(in, out, length, + ks1, ks2, ks3, + ivec, enc); + return; + } + } + + aligned_in = (const DES_LONG *) in; + if ((unsigned long) in & 0x3) { + memcpy(aligned_out, in, length); + aligned_in = (const DES_LONG *) aligned_out; + } + + aligned_ivec = (DES_LONG *) ivec; + if ((unsigned long) ivec & 0x3) { + memcpy(ivb, ivec, sizeof(*ivec)); + aligned_ivec = ivb; + } + + DES_ede3_cbc_encrypt_asm(aligned_in, aligned_out, length, + ks1, ks2, ks3, + aligned_ivec, enc); + + if ((unsigned long) out & 0x3) { + memcpy(out, aligned_out, length); + OPENSSL_free(aligned_out); + } + + if (aligned_ivec == ivb) + memcpy(ivec, ivb, sizeof(*ivec)); +} -- 1.7.10.4 ______________________________________________________________________ OpenSSL Project http://www.openssl.org Development Mailing List openssl-dev@openssl.org Automated List Manager majord...@openssl.org