The following measurements of "openssl speed md5" were taken on a SPARC-T4.
Baseline (OPENSSL_sparcv9cap=0): type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes md5 12755.02k 42772.86k 128861.53k 259312.30k 366547.09k With MD5 opcode enabled: type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes md5 14423.71k 50416.70k 173663.49k 445940.05k 816587.27k Signed-off-by: David S. Miller <[email protected]> --- crypto/md5/asm/md5-sparcv9.S | 80 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 77 insertions(+), 3 deletions(-) diff --git a/crypto/md5/asm/md5-sparcv9.S b/crypto/md5/asm/md5-sparcv9.S index 30a2fdf..f8cc4cf 100644 --- a/crypto/md5/asm/md5-sparcv9.S +++ b/crypto/md5/asm/md5-sparcv9.S @@ -4,7 +4,9 @@ * details see http://www.openssl.org/~appro/cryptogams/. */ -#define OASI %i3 +#include "sparc_arch.h" + +#define OASI %g3 #define H0 %o0 #define H1 %o1 @@ -170,15 +172,87 @@ .section ".text",#alloc,#execinstr +SPARC_PIC_THUNK(g2) + .align 32 .globl md5_block_asm_data_order .type md5_block_asm_data_order,#function md5_block_asm_data_order: - /* %i0=ctx, %i1=data, %i2=num */ - save %sp, -96, %sp + /* %o0=ctx, %o1=data, %o2=num */ rd %asi, OASI wr %g0, 0x88, %asi ! ASI_PL + SPARC_LOAD_V9_CAPS_LEAF(g2, g1) + andcc %g2, SPARCV9_MD5, %g0 + be .Lsoftware + nop + + lda [%o0 + 0x00] %asi, %f0 + lda [%o0 + 0x04] %asi, %f1 + andcc %o1, 0x7, %g0 + lda [%o0 + 0x08] %asi, %f2 + bne,pn %xcc, .Lhwunaligned + lda [%o0 + 0x0c] %asi, %f3 + +.Lhw_loop: + ldd [%o1 + 0x00], %f8 + ldd [%o1 + 0x08], %f10 + ldd [%o1 + 0x10], %f12 + ldd [%o1 + 0x18], %f14 + ldd [%o1 + 0x20], %f16 + ldd [%o1 + 0x28], %f18 + ldd [%o1 + 0x30], %f20 + ldd [%o1 + 0x38], %f22 + + .word 0x81b02800 ! MD5 + + subcc %o2, 1, %o2 + bne,pt %icc, .Lhw_loop + add %o1, 0x40, %o1 + +.Lhwfinish: + sta %f0, [%o0 + 0x00] %asi + sta %f1, [%o0 + 0x04] %asi + sta %f2, [%o0 + 0x08] %asi + sta %f3, [%o0 + 0x0c] %asi + retl + wr OASI, 0x0, %asi +.Lhwunaligned: + alignaddr %o1, %g0, %o1 + + ldd [%o1 + 0x00], %f10 +.Lhwunaligned_loop: + ldd [%o1 + 0x08], %f12 + ldd [%o1 + 0x10], %f14 + ldd [%o1 + 0x18], %f16 + ldd [%o1 + 0x20], %f18 + ldd [%o1 + 0x28], %f20 + ldd [%o1 + 0x30], %f22 + ldd [%o1 + 0x38], %f24 + ldd [%o1 + 0x40], %f26 + + faligndata %f10, %f12, %f8 + faligndata %f12, %f14, %f10 + faligndata %f14, %f16, %f12 + faligndata %f16, %f18, %f14 + faligndata %f18, %f20, %f16 + faligndata %f20, %f22, %f18 + faligndata %f22, %f24, %f20 + faligndata %f24, %f26, %f22 + + .word 0x81b02800 ! MD5 + + subcc %o2, 1, %o2 + fsrc2 %f26, %f10 + bne,pt %icc, .Lhwunaligned_loop + add %o1, 0x40, %o1 + + ba,a,pt %xcc, .Lhwfinish + +.Lsoftware: + /* %i0=ctx, %i1=data, %i2=num */ + save %sp, -96, %sp + ld [%i0 + 0x00], H0 ld [%i0 + 0x04], H1 andcc %i1, 0x3, %g0 -- 1.7.10.4 ______________________________________________________________________ OpenSSL Project http://www.openssl.org Development Mailing List [email protected] Automated List Manager [email protected]
