The following measurements of "openssl speed sha" were taken on a SPARC-T4.
Baseline (OPENSSL_sparcv9cap=0): type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes sha1 24322.20k 71309.59k 153340.94k 216593.41k 246923.26k sha256 17516.97k 40926.25k 72628.65k 89984.68k 96938.67k sha512 13219.95k 52850.94k 80323.41k 112189.44k 127052.46k With SHA opcodes enabled: type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes sha1 33231.78k 115492.48k 318273.91k 579320.83k 759701.50k sha256 46641.41k 157805.85k 419859.54k 708643.16k 889514.67k sha512 50184.57k 202770.99k 529172.57k 1023763.11k 1405414.06k Signed-off-by: David S. Miller <[email protected]> --- crypto/sha/Makefile | 13 ++- crypto/sha/asm/sha1-sparcv9.pl | 70 +++++++++++++++ crypto/sha/asm/sha512-sparcv9.pl | 175 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 255 insertions(+), 3 deletions(-) diff --git a/crypto/sha/Makefile b/crypto/sha/Makefile index 79be651..06031b7 100644 --- a/crypto/sha/Makefile +++ b/crypto/sha/Makefile @@ -62,13 +62,20 @@ sha256-armv4.S: asm/sha256-armv4.pl sha1-alpha.s: asm/sha1-alpha.pl $(PERL) $< | $(CC) -E - | tee $@ > /dev/null +sha1-sparcv9.s: sha1-sparcv9.S + $(CC) $(CFLAGS) -E sha1-sparcv9.S > $@ +sha256-sparcv9.s: sha256-sparcv9.S + $(CC) $(CFLAGS) -E sha256-sparcv9.S > $@ +sha512-sparcv9.s: sha512-sparcv9.S + $(CC) $(CFLAGS) -E sha512-sparcv9.S > $@ + # Solaris make has to be explicitly told sha1-x86_64.s: asm/sha1-x86_64.pl; $(PERL) asm/sha1-x86_64.pl $(PERLASM_SCHEME) > $@ sha256-x86_64.s:asm/sha512-x86_64.pl; $(PERL) asm/sha512-x86_64.pl $(PERLASM_SCHEME) $@ sha512-x86_64.s:asm/sha512-x86_64.pl; $(PERL) asm/sha512-x86_64.pl $(PERLASM_SCHEME) $@ -sha1-sparcv9.s: asm/sha1-sparcv9.pl; $(PERL) asm/sha1-sparcv9.pl $@ $(CFLAGS) -sha256-sparcv9.s:asm/sha512-sparcv9.pl; $(PERL) asm/sha512-sparcv9.pl $@ $(CFLAGS) -sha512-sparcv9.s:asm/sha512-sparcv9.pl; $(PERL) asm/sha512-sparcv9.pl $@ $(CFLAGS) +sha1-sparcv9.S: asm/sha1-sparcv9.pl; $(PERL) asm/sha1-sparcv9.pl $@ $(CFLAGS) +sha256-sparcv9.S:asm/sha512-sparcv9.pl; $(PERL) asm/sha512-sparcv9.pl $@ $(CFLAGS) +sha512-sparcv9.S:asm/sha512-sparcv9.pl; $(PERL) asm/sha512-sparcv9.pl $@ $(CFLAGS) sha1-ppc.s: asm/sha1-ppc.pl; $(PERL) asm/sha1-ppc.pl $(PERLASM_SCHEME) $@ sha256-ppc.s: asm/sha512-ppc.pl; $(PERL) asm/sha512-ppc.pl $(PERLASM_SCHEME) $@ diff --git a/crypto/sha/asm/sha1-sparcv9.pl b/crypto/sha/asm/sha1-sparcv9.pl index 5c161ce..f3bf479 100644 --- a/crypto/sha/asm/sha1-sparcv9.pl +++ b/crypto/sha/asm/sha1-sparcv9.pl @@ -183,11 +183,81 @@ $code.=<<___ if ($bits==64); .register %g3,#scratch ___ $code.=<<___; +#include "sparc_arch.h" + .section ".text",#alloc,#execinstr +SPARC_PIC_THUNK(g2) + .align 32 .globl sha1_block_data_order sha1_block_data_order: + SPARC_LOAD_V9_CAPS_LEAF(g2, g1) + andcc %g2, SPARCV9_SHA1, %g0 + be .Lsoftware + nop + + ld [%o0 + 0x00], %f0 + ld [%o0 + 0x04], %f1 + ld [%o0 + 0x08], %f2 + andcc %o1, 0x7, %g0 + ld [%o0 + 0x0c], %f3 + bne .Lhwunaligned + ld [%o0 + 0x10], %f4 +.Lhwloop_aligned: + ldd [%o1 + 0x00], %f8 + ldd [%o1 + 0x08], %f10 + ldd [%o1 + 0x10], %f12 + ldd [%o1 + 0x18], %f14 + ldd [%o1 + 0x20], %f16 + ldd [%o1 + 0x28], %f18 + ldd [%o1 + 0x30], %f20 + ldd [%o1 + 0x38], %f22 + .word 0x81b02820 ! SHA1 + subcc %o2, 1, %o2 + bne .Lhwloop_aligned + add %o1, 0x40, %o1 +.Lhwfinish: + st %f0, [%o0 + 0x00] + st %f1, [%o0 + 0x04] + st %f2, [%o0 + 0x08] + st %f3, [%o0 + 0x0c] + retl + st %f4, [%o0 + 0x10] + +.Lhwunaligned: + alignaddr %o1, %g0, %o1 + + ldd [%o1 + 0x00], %f10 +.Lhwloop_unaligned: + ldd [%o1 + 0x08], %f12 + ldd [%o1 + 0x10], %f14 + ldd [%o1 + 0x18], %f16 + ldd [%o1 + 0x20], %f18 + ldd [%o1 + 0x28], %f20 + ldd [%o1 + 0x30], %f22 + ldd [%o1 + 0x38], %f24 + ldd [%o1 + 0x40], %f26 + + faligndata %f10, %f12, %f8 + faligndata %f12, %f14, %f10 + faligndata %f14, %f16, %f12 + faligndata %f16, %f18, %f14 + faligndata %f18, %f20, %f16 + faligndata %f20, %f22, %f18 + faligndata %f22, %f24, %f20 + faligndata %f24, %f26, %f22 + + .word 0x81b02820 ! SHA1 + + subcc %o2, 1, %o2 + fsrc2 %f26, %f10 + bne .Lhwloop_unaligned + add %o1, 0x40, %o1 + b .Lhwfinish + nop + +.Lsoftware: save %sp,-$frame,%sp sllx $len,6,$len add $inp,$len,$len diff --git a/crypto/sha/asm/sha512-sparcv9.pl b/crypto/sha/asm/sha512-sparcv9.pl index e728d6e..e5781fe 100644 --- a/crypto/sha/asm/sha512-sparcv9.pl +++ b/crypto/sha/asm/sha512-sparcv9.pl @@ -387,8 +387,12 @@ $code.=<<___ if ($bits==64); .register %g3,#scratch ___ $code.=<<___; +#include "sparc_arch.h" + .section ".text",#alloc,#execinstr +SPARC_PIC_THUNK(g2) + .align 64 K${label}: .type K${label},#object @@ -460,6 +464,177 @@ $code.=<<___; .size K${label},.-K${label} .globl sha${label}_block_data_order sha${label}_block_data_order: + SPARC_LOAD_V9_CAPS_LEAF(g2, g1) + set SPARCV9_SHA${label}, %g1 + andcc %g2, %g1, %g0 + be .Lsoftware + nop +___ +$code.=<<___ if ($SZ==8); # SHA512 + ldd [%o0 + 0x00], %f0 + ldd [%o0 + 0x08], %f2 + ldd [%o0 + 0x10], %f4 + ldd [%o0 + 0x18], %f6 + ldd [%o0 + 0x20], %f8 + ldd [%o0 + 0x28], %f10 + andcc %o1, 0x7, %g0 + ldd [%o0 + 0x30], %f12 + bne,pn %xcc, .Lhwunaligned + ldd [%o0 + 0x38], %f14 + +.Lhwaligned_loop: + ldd [%o1 + 0x00], %f16 + ldd [%o1 + 0x08], %f18 + ldd [%o1 + 0x10], %f20 + ldd [%o1 + 0x18], %f22 + ldd [%o1 + 0x20], %f24 + ldd [%o1 + 0x28], %f26 + ldd [%o1 + 0x30], %f28 + ldd [%o1 + 0x38], %f30 + ldd [%o1 + 0x40], %f32 + ldd [%o1 + 0x48], %f34 + ldd [%o1 + 0x50], %f36 + ldd [%o1 + 0x58], %f38 + ldd [%o1 + 0x60], %f40 + ldd [%o1 + 0x68], %f42 + ldd [%o1 + 0x70], %f44 + ldd [%o1 + 0x78], %f46 + + .word 0x81b02860 ! SHA512 + + subcc %o2, 1, %o2 + bne,pt %icc, .Lhwaligned_loop + add %o1, 0x80, %o1 +.Lhwfinish: + std %f0, [%o0 + 0x00] + std %f2, [%o0 + 0x08] + std %f4, [%o0 + 0x10] + std %f6, [%o0 + 0x18] + std %f8, [%o0 + 0x20] + std %f10, [%o0 + 0x28] + std %f12, [%o0 + 0x30] + retl + std %f14, [%o0 + 0x38] +.Lhwunaligned: + alignaddr %o1, %g0, %o1 + + ldd [%o1 + 0x00], %f18 +.Lhwunaligned_loop: + ldd [%o1 + 0x08], %f20 + ldd [%o1 + 0x10], %f22 + ldd [%o1 + 0x18], %f24 + ldd [%o1 + 0x20], %f26 + ldd [%o1 + 0x28], %f28 + ldd [%o1 + 0x30], %f30 + ldd [%o1 + 0x38], %f32 + ldd [%o1 + 0x40], %f34 + ldd [%o1 + 0x48], %f36 + ldd [%o1 + 0x50], %f38 + ldd [%o1 + 0x58], %f40 + ldd [%o1 + 0x60], %f42 + ldd [%o1 + 0x68], %f44 + ldd [%o1 + 0x70], %f46 + ldd [%o1 + 0x78], %f48 + ldd [%o1 + 0x80], %f50 + + faligndata %f18, %f20, %f16 + faligndata %f20, %f22, %f18 + faligndata %f22, %f24, %f20 + faligndata %f24, %f26, %f22 + faligndata %f26, %f28, %f24 + faligndata %f28, %f30, %f26 + faligndata %f30, %f32, %f28 + faligndata %f32, %f34, %f30 + faligndata %f34, %f36, %f32 + faligndata %f36, %f38, %f34 + faligndata %f38, %f40, %f36 + faligndata %f40, %f42, %f38 + faligndata %f42, %f44, %f40 + faligndata %f44, %f46, %f42 + faligndata %f46, %f48, %f44 + faligndata %f48, %f50, %f46 + + .word 0x81b02860 ! SHA512 + + subcc %o2, 1, %o2 + fsrc2 %f50, %f18 + bne,pt %icc, .Lhwunaligned_loop + add %o1, 0x80, %o1 + + ba,a,pt %xcc, .Lhwfinish +___ +$code.=<<___ if ($SZ==4); # SHA256 + ld [%o0 + 0x00], %f0 + ld [%o0 + 0x04], %f1 + ld [%o0 + 0x08], %f2 + ld [%o0 + 0x0c], %f3 + ld [%o0 + 0x10], %f4 + ld [%o0 + 0x14], %f5 + andcc %o1, 0x7, %g0 + ld [%o0 + 0x18], %f6 + bne,pn %xcc, .Lhwunaligned + ld [%o0 + 0x1c], %f7 + +.Lhwloop: + ldd [%o1 + 0x00], %f8 + ldd [%o1 + 0x08], %f10 + ldd [%o1 + 0x10], %f12 + ldd [%o1 + 0x18], %f14 + ldd [%o1 + 0x20], %f16 + ldd [%o1 + 0x28], %f18 + ldd [%o1 + 0x30], %f20 + ldd [%o1 + 0x38], %f22 + + .word 0x81b02840 ! SHA256 + + subcc %o2, 1, %o2 + bne,pt %icc, .Lhwloop + add %o1, 0x40, %o1 + +.Lhwfinish: + st %f0, [%o0 + 0x00] + st %f1, [%o0 + 0x04] + st %f2, [%o0 + 0x08] + st %f3, [%o0 + 0x0c] + st %f4, [%o0 + 0x10] + st %f5, [%o0 + 0x14] + st %f6, [%o0 + 0x18] + retl + st %f7, [%o0 + 0x1c] +.Lhwunaligned: + alignaddr %o1, %g0, %o1 + + ldd [%o1 + 0x00], %f10 +.Lhwunaligned_loop: + ldd [%o1 + 0x08], %f12 + ldd [%o1 + 0x10], %f14 + ldd [%o1 + 0x18], %f16 + ldd [%o1 + 0x20], %f18 + ldd [%o1 + 0x28], %f20 + ldd [%o1 + 0x30], %f22 + ldd [%o1 + 0x38], %f24 + ldd [%o1 + 0x40], %f26 + + faligndata %f10, %f12, %f8 + faligndata %f12, %f14, %f10 + faligndata %f14, %f16, %f12 + faligndata %f16, %f18, %f14 + faligndata %f18, %f20, %f16 + faligndata %f20, %f22, %f18 + faligndata %f22, %f24, %f20 + faligndata %f24, %f26, %f22 + + .word 0x81b02840 ! SHA256 + + subcc %o2, 1, %o2 + fsrc2 %f26, %f10 + bne,pt %icc, .Lhwunaligned_loop + add %o1, 0x40, %o1 + + ba,a,pt %xcc, .Lhwfinish +___ +$code.=<<___; +.Lsoftware: save %sp,`-$frame-$locals`,%sp and $inp,`$align-1`,$tmp31 sllx $len,`log(16*$SZ)/log(2)`,$len -- 1.7.10.4 ______________________________________________________________________ OpenSSL Project http://www.openssl.org Development Mailing List [email protected] Automated List Manager [email protected]
