The following measurements of "openssl speed md5" were taken on a
SPARC-T4.

Baseline (OPENSSL_sparcv9cap=0):

type             16 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes
md5              12755.02k    42772.86k   128861.53k   259312.30k   366547.09k

With MD5 opcode enabled:

type             16 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes
md5              14423.71k    50416.70k   173663.49k   445940.05k   816587.27k

Signed-off-by: David S. Miller <da...@davemloft.net>
---
 crypto/md5/asm/md5-sparcv9.S |   80 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 77 insertions(+), 3 deletions(-)

diff --git a/crypto/md5/asm/md5-sparcv9.S b/crypto/md5/asm/md5-sparcv9.S
index 30a2fdf..f8cc4cf 100644
--- a/crypto/md5/asm/md5-sparcv9.S
+++ b/crypto/md5/asm/md5-sparcv9.S
@@ -4,7 +4,9 @@
  * details see http://www.openssl.org/~appro/cryptogams/.
  */
 
-#define OASI   %i3
+#include "sparc_arch.h"
+
+#define OASI   %g3
 
 #define H0     %o0
 #define H1     %o1
@@ -170,15 +172,87 @@
 
        .section        ".text",#alloc,#execinstr
 
+SPARC_PIC_THUNK(g2)
+
        .align  32
        .globl  md5_block_asm_data_order
        .type   md5_block_asm_data_order,#function
 md5_block_asm_data_order:
-       /* %i0=ctx, %i1=data, %i2=num */
-       save    %sp, -96, %sp
+       /* %o0=ctx, %o1=data, %o2=num */
        rd      %asi, OASI
        wr      %g0, 0x88, %asi         ! ASI_PL
 
+       SPARC_LOAD_V9_CAPS_LEAF(g2, g1)
+       andcc   %g2, SPARCV9_MD5, %g0
+       be      .Lsoftware
+        nop
+
+       lda     [%o0 + 0x00] %asi, %f0
+       lda     [%o0 + 0x04] %asi, %f1
+       andcc   %o1, 0x7, %g0
+       lda     [%o0 + 0x08] %asi, %f2
+       bne,pn  %xcc, .Lhwunaligned
+        lda    [%o0 + 0x0c] %asi, %f3
+
+.Lhw_loop:
+       ldd     [%o1 + 0x00], %f8
+       ldd     [%o1 + 0x08], %f10
+       ldd     [%o1 + 0x10], %f12
+       ldd     [%o1 + 0x18], %f14
+       ldd     [%o1 + 0x20], %f16
+       ldd     [%o1 + 0x28], %f18
+       ldd     [%o1 + 0x30], %f20
+       ldd     [%o1 + 0x38], %f22
+
+       .word   0x81b02800      ! MD5
+
+       subcc   %o2, 1, %o2
+       bne,pt  %icc, .Lhw_loop
+        add    %o1, 0x40, %o1
+
+.Lhwfinish:
+       sta     %f0, [%o0 + 0x00] %asi
+       sta     %f1, [%o0 + 0x04] %asi
+       sta     %f2, [%o0 + 0x08] %asi
+       sta     %f3, [%o0 + 0x0c] %asi
+       retl
+        wr     OASI, 0x0, %asi
+.Lhwunaligned:
+       alignaddr %o1, %g0, %o1
+
+       ldd     [%o1 + 0x00], %f10
+.Lhwunaligned_loop:
+       ldd     [%o1 + 0x08], %f12
+       ldd     [%o1 + 0x10], %f14
+       ldd     [%o1 + 0x18], %f16
+       ldd     [%o1 + 0x20], %f18
+       ldd     [%o1 + 0x28], %f20
+       ldd     [%o1 + 0x30], %f22
+       ldd     [%o1 + 0x38], %f24
+       ldd     [%o1 + 0x40], %f26
+
+       faligndata %f10, %f12, %f8
+       faligndata %f12, %f14, %f10
+       faligndata %f14, %f16, %f12
+       faligndata %f16, %f18, %f14
+       faligndata %f18, %f20, %f16
+       faligndata %f20, %f22, %f18
+       faligndata %f22, %f24, %f20
+       faligndata %f24, %f26, %f22
+
+       .word   0x81b02800      ! MD5
+
+       subcc   %o2, 1, %o2
+       fsrc2   %f26, %f10
+       bne,pt  %icc, .Lhwunaligned_loop
+        add    %o1, 0x40, %o1
+
+       ba,a,pt %xcc, .Lhwfinish
+
+.Lsoftware:
+       /* %i0=ctx, %i1=data, %i2=num */
+       save    %sp, -96, %sp
+
        ld      [%i0 + 0x00], H0
        ld      [%i0 + 0x04], H1
        andcc   %i1, 0x3, %g0
-- 
1.7.10.4

______________________________________________________________________
OpenSSL Project                                 http://www.openssl.org
Development Mailing List                       openssl-dev@openssl.org
Automated List Manager                           majord...@openssl.org

Reply via email to