On a SPARC T4-2, with CAMELLIA opcodes disabled:

type             16 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes
camellia-128 cbc    63737.35k    66054.61k    66780.50k    66775.35k    
67062.44k
camellia-192 cbc    51126.33k    53836.78k    54761.73k    54964.91k    
55017.47k
camellia-256 cbc    51126.24k    53774.55k    54760.02k    54963.54k    
55017.47k

with CAMELLIA opcodes enabled:

type             16 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes
camellia-128 cbc   483488.94k   608627.31k   646251.78k   645825.54k   
657945.94k
camellia-192 cbc   396779.71k   474317.61k   497627.22k   497634.65k   
504881.15k
camellia-256 cbc   396796.10k   474297.19k   497624.06k   497644.20k   
504872.96k

Signed-off-by: David S. Miller <da...@davemloft.net>
---

If this is applied before the sparc AES opcode patches, there is a minor
and easy to resolve conflict in the top-level Configure file.

Tested on the full matrix of {static,shared}/linux{,64}-sparcv9

 Configure                          |    2 +-
 crypto/camellia/Makefile           |    2 +
 crypto/camellia/asm/cmll-sparcv9.S |  604 ++++++++++++++++++++++++++++++++++++
 crypto/camellia/cmll_sparccore.c   |  219 +++++++++++++
 crypto/sparc_arch.h                |   11 +
 5 files changed, 837 insertions(+), 1 deletion(-)
 create mode 100644 crypto/camellia/asm/cmll-sparcv9.S
 create mode 100644 crypto/camellia/cmll_sparccore.c

diff --git a/Configure b/Configure
index 217a552..b4cbb56 100755
--- a/Configure
+++ b/Configure
@@ -130,7 +130,7 @@ my $x86_elf_asm="$x86_asm:elf";
 
 my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o 
x86_64-gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o 
aesni-x86_64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o 
sha512-x86_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o 
cmll_misc.o:ghash-x86_64.o:e_padlock-x86_64.o";
 my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o 
aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o 
rc4_skey.o:::::ghash-ia64.o::void";
-my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o 
sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_sparccore.o 
aes-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o 
sha512-sparcv9.o:::::::ghash-sparcv9.o::void";
+my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o 
sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_sparccore.o 
aes-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o 
sha512-sparcv9.o::::::cmll-sparcv9.o cmll_sparccore.o:ghash-sparcv9.o::void";
 my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::void";
 my $alpha_asm="alphacpuid.o:bn_asm.o 
alpha-mont.o:::::sha1-alpha.o:::::::ghash-alpha.o::void";
 my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o 
sha256-mips.o sha512-mips.o::::::::";
diff --git a/crypto/camellia/Makefile b/crypto/camellia/Makefile
index 8858dd0..6802393 100644
--- a/crypto/camellia/Makefile
+++ b/crypto/camellia/Makefile
@@ -48,6 +48,8 @@ cmll-x86.s:   asm/cmll-x86.pl ../perlasm/x86asm.pl
        $(PERL) asm/cmll-x86.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@
 cmll-x86_64.s:  asm/cmll-x86_64.pl
        $(PERL) asm/cmll-x86_64.pl $(PERLASM_SCHEME) > $@
+cmll-sparcv9.s: asm/cmll-sparcv9.S
+       $(CC) $(CFLAGS) -E asm/cmll-sparcv9.S > $@
 
 files:
        $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
diff --git a/crypto/camellia/asm/cmll-sparcv9.S 
b/crypto/camellia/asm/cmll-sparcv9.S
new file mode 100644
index 0000000..015d5ee
--- /dev/null
+++ b/crypto/camellia/asm/cmll-sparcv9.S
@@ -0,0 +1,604 @@
+/* Written by David S. Miller <da...@davemloft.net> for the OpenSSL
+ * project. The module is, however, dual licensed under OpenSSL and
+ * CRYPTOGAMS licenses depending on where you obtain it. For further
+ * details see http://www.openssl.org/~appro/cryptogams/.
+ */
+
+#include "sparc_arch.h"
+
+#ifdef __arch64__
+       .register       %g2,#scratch
+       .register       %g3,#scratch
+#endif
+
+#define CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \
+       CAMELLIA_F(KEY_BASE +  0, I1, I0, I1) \
+       CAMELLIA_F(KEY_BASE +  2, I0, I1, I0) \
+       CAMELLIA_F(KEY_BASE +  4, I1, I0, I1) \
+       CAMELLIA_F(KEY_BASE +  6, I0, I1, I0) \
+       CAMELLIA_F(KEY_BASE +  8, I1, I0, I1) \
+       CAMELLIA_F(KEY_BASE + 10, I0, I1, I0)
+
+#define CAMELLIA_6ROUNDS_FL_FLI(KEY_BASE, I0, I1) \
+       CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \
+       CAMELLIA_FL(KEY_BASE + 12, I0, I0) \
+       CAMELLIA_FLI(KEY_BASE + 14, I1, I1)
+
+       .data
+
+       .align  8
+SIGMA: .xword  0xA09E667F3BCC908B
+       .xword  0xB67AE8584CAA73B2
+       .xword  0xC6EF372FE94F82BE
+       .xword  0x54FF53A5F1D36F1C
+       .xword  0x10E527FADE682D1D
+       .xword  0xB05688C2B3E6C1FD
+
+       .text
+
+SPARC_PIC_THUNK(g3)
+
+       .align  32
+       .globl  sparc_hw_camellia_ekeygen
+       .type   sparc_hw_camellia_ekeygen,#function
+sparc_hw_camellia_ekeygen:
+       /* %o0=rawkey, %o1=ks, %o2=keybitlength */
+       andcc   %o0, 0x7, %g0
+       be,pt   %icc, 1f
+        nop
+       alignaddr %o0, %g0, %g1
+       ldd     [%g1 + 0x00], %f14
+       ldd     [%g1 + 0x08], %f16
+       ldd     [%g1 + 0x10], %f18
+       faligndata %f14, %f16, %f0
+       ba,pt   %icc, 2f
+        faligndata %f16, %f18, %f2
+1:     ldd     [%o0 + 0x00], %f0       ! i0/i1, k[0]/k[1]
+       ldd     [%o0 + 0x08], %f2       ! i2/i3, k[2]/k[3]
+2:     std     %f0, [%o1 + 0x00]       ! k[0, 1]
+       fsrc2   %f0, %f28
+       std     %f2, [%o1 + 0x08]       ! k[2, 3]
+       cmp     %o2, 128
+       be      10f
+        fsrc2  %f2, %f30
+
+       andcc   %o0, 0x7, %g0
+       be,pt   %icc, 1f
+        nop
+       ldd     [%g1 + 0x18], %f16
+       ba,pt   %icc, 2f
+        faligndata %f18, %f16, %f0
+1:     ldd     [%o0 + 0x10], %f0
+2:     std     %f0, [%o1 + 0x20]       ! k[8, 9]
+       cmp     %o2, 192
+       fone    %f10
+       be,a    2f
+        fxor   %f10, %f0, %f2
+
+       andcc   %o0, 0x7, %g0
+       be,pt   %icc, 1f
+        nop
+       ldd     [%g1 + 0x20], %f18
+       ba,pt   %icc, 2f
+        faligndata %f16, %f18, %f2
+1:     ldd     [%o0 + 0x18], %f2
+2:
+       std     %f2, [%o1 + 0x28]       ! k[10, 11]
+       fxor    %f28, %f0, %f0
+       fxor    %f30, %f2, %f2
+
+10:
+       SPARC_LOAD_ADDRESS_LEAF(SIGMA, g3, g2)
+       ldd     [%g3 + 0x00], %f16
+       ldd     [%g3 + 0x08], %f18
+       ldd     [%g3 + 0x10], %f20
+       ldd     [%g3 + 0x18], %f22
+       ldd     [%g3 + 0x20], %f24
+       ldd     [%g3 + 0x28], %f26
+       CAMELLIA_F(16, 2, 0, 2)
+       CAMELLIA_F(18, 0, 2, 0)
+       fxor    %f28, %f0, %f0
+       fxor    %f30, %f2, %f2
+       CAMELLIA_F(20, 2, 0, 2)
+       CAMELLIA_F(22, 0, 2, 0)
+
+#define ROTL128(S01, S23, TMP1, TMP2, N)       \
+       srlx    S01, (64 - N), TMP1;            \
+       sllx    S01, N, S01;                    \
+       srlx    S23, (64 - N), TMP2;            \
+       sllx    S23, N, S23;                    \
+       or      S01, TMP2, S01;                 \
+       or      S23, TMP1, S23
+
+       cmp     %o2, 128
+       bne     1f
+        nop
+       /* 128-bit key */
+       std     %f0, [%o1 + 0x10]       ! k[ 4,  5]
+       std     %f2, [%o1 + 0x18]       ! k[ 6,  7]
+       MOVDTOX_F0_O4
+       MOVDTOX_F2_O5
+       ROTL128(%o4, %o5, %g2, %g3, 15)
+       stx     %o4, [%o1 + 0x30]       ! k[12, 13]
+       stx     %o5, [%o1 + 0x38]       ! k[14, 15]
+       ROTL128(%o4, %o5, %g2, %g3, 15)
+       stx     %o4, [%o1 + 0x40]       ! k[16, 17]
+       stx     %o5, [%o1 + 0x48]       ! k[18, 19]
+       ROTL128(%o4, %o5, %g2, %g3, 15)
+       stx     %o4, [%o1 + 0x60]       ! k[24, 25]
+       ROTL128(%o4, %o5, %g2, %g3, 15)
+       stx     %o4, [%o1 + 0x70]       ! k[28, 29]
+       stx     %o5, [%o1 + 0x78]       ! k[30, 31]
+       ROTL128(%o4, %o5, %g2, %g3, 34)
+       stx     %o4, [%o1 + 0xa0]       ! k[40, 41]
+       stx     %o5, [%o1 + 0xa8]       ! k[42, 43]
+       ROTL128(%o4, %o5, %g2, %g3, 17)
+       stx     %o4, [%o1 + 0xc0]       ! k[48, 49]
+       stx     %o5, [%o1 + 0xc8]       ! k[50, 51]
+
+       ldx     [%o1 + 0x00], %o4       ! k[ 0,  1]
+       ldx     [%o1 + 0x08], %o5       ! k[ 2,  3]
+       ROTL128(%o4, %o5, %g2, %g3, 15)
+       stx     %o4, [%o1 + 0x20]       ! k[ 8,  9]
+       stx     %o5, [%o1 + 0x28]       ! k[10, 11]
+       ROTL128(%o4, %o5, %g2, %g3, 30)
+       stx     %o4, [%o1 + 0x50]       ! k[20, 21]
+       stx     %o5, [%o1 + 0x58]       ! k[22, 23]
+       ROTL128(%o4, %o5, %g2, %g3, 15)
+       stx     %o5, [%o1 + 0x68]       ! k[26, 27]
+       ROTL128(%o4, %o5, %g2, %g3, 17)
+       stx     %o4, [%o1 + 0x80]       ! k[32, 33]
+       stx     %o5, [%o1 + 0x88]       ! k[34, 35]
+       ROTL128(%o4, %o5, %g2, %g3, 17)
+       stx     %o4, [%o1 + 0x90]       ! k[36, 37]
+       stx     %o5, [%o1 + 0x98]       ! k[38, 39]
+       ROTL128(%o4, %o5, %g2, %g3, 17)
+       stx     %o4, [%o1 + 0xb0]       ! k[44, 45]
+       stx     %o5, [%o1 + 0xb8]       ! k[46, 47]
+
+       ba,pt   %icc, 2f
+        mov    3, %o0
+
+1:
+       /* 192-bit or 256-bit key */
+       std     %f0, [%o1 + 0x30]       ! k[12, 13]
+       std     %f2, [%o1 + 0x38]       ! k[14, 15]
+       ldd     [%o1 + 0x20], %f4       ! k[ 8,  9]
+       ldd     [%o1 + 0x28], %f6       ! k[10, 11]
+       fxor    %f0, %f4, %f0
+       fxor    %f2, %f6, %f2
+       CAMELLIA_F(24, 2, 0, 2)
+       CAMELLIA_F(26, 0, 2, 0)
+       std     %f0, [%o1 + 0x10]       ! k[ 4,  5]
+       std     %f2, [%o1 + 0x18]       ! k[ 6,  7]
+       MOVDTOX_F0_O4
+       MOVDTOX_F2_O5
+       ROTL128(%o4, %o5, %g2, %g3, 30)
+       stx     %o4, [%o1 + 0x50]       ! k[20, 21]
+       stx     %o5, [%o1 + 0x58]       ! k[22, 23]
+       ROTL128(%o4, %o5, %g2, %g3, 30)
+       stx     %o4, [%o1 + 0xa0]       ! k[40, 41]
+       stx     %o5, [%o1 + 0xa8]       ! k[42, 43]
+       ROTL128(%o4, %o5, %g2, %g3, 51)
+       stx     %o4, [%o1 + 0x100]      ! k[64, 65]
+       stx     %o5, [%o1 + 0x108]      ! k[66, 67]
+       ldx     [%o1 + 0x20], %o4       ! k[ 8,  9]
+       ldx     [%o1 + 0x28], %o5       ! k[10, 11]
+       ROTL128(%o4, %o5, %g2, %g3, 15)
+       stx     %o4, [%o1 + 0x20]       ! k[ 8,  9]
+       stx     %o5, [%o1 + 0x28]       ! k[10, 11]
+       ROTL128(%o4, %o5, %g2, %g3, 15)
+       stx     %o4, [%o1 + 0x40]       ! k[16, 17]
+       stx     %o5, [%o1 + 0x48]       ! k[18, 19]
+       ROTL128(%o4, %o5, %g2, %g3, 30)
+       stx     %o4, [%o1 + 0x90]       ! k[36, 37]
+       stx     %o5, [%o1 + 0x98]       ! k[38, 39]
+       ROTL128(%o4, %o5, %g2, %g3, 34)
+       stx     %o4, [%o1 + 0xd0]       ! k[52, 53]
+       stx     %o5, [%o1 + 0xd8]       ! k[54, 55]
+       ldx     [%o1 + 0x30], %o4       ! k[12, 13]
+       ldx     [%o1 + 0x38], %o5       ! k[14, 15]
+       ROTL128(%o4, %o5, %g2, %g3, 15)
+       stx     %o4, [%o1 + 0x30]       ! k[12, 13]
+       stx     %o5, [%o1 + 0x38]       ! k[14, 15]
+       ROTL128(%o4, %o5, %g2, %g3, 30)
+       stx     %o4, [%o1 + 0x70]       ! k[28, 29]
+       stx     %o5, [%o1 + 0x78]       ! k[30, 31]
+       srlx    %o4, 32, %g2
+       srlx    %o5, 32, %g3
+       stw     %o4, [%o1 + 0xc0]       ! k[48]
+       stw     %g3, [%o1 + 0xc4]       ! k[49]
+       stw     %o5, [%o1 + 0xc8]       ! k[50]
+       stw     %g2, [%o1 + 0xcc]       ! k[51]
+       ROTL128(%o4, %o5, %g2, %g3, 49)
+       stx     %o4, [%o1 + 0xe0]       ! k[56, 57]
+       stx     %o5, [%o1 + 0xe8]       ! k[58, 59]
+       ldx     [%o1 + 0x00], %o4       ! k[ 0,  1]
+       ldx     [%o1 + 0x08], %o5       ! k[ 2,  3]
+       ROTL128(%o4, %o5, %g2, %g3, 45)
+       stx     %o4, [%o1 + 0x60]       ! k[24, 25]
+       stx     %o5, [%o1 + 0x68]       ! k[26, 27]
+       ROTL128(%o4, %o5, %g2, %g3, 15)
+       stx     %o4, [%o1 + 0x80]       ! k[32, 33]
+       stx     %o5, [%o1 + 0x88]       ! k[34, 35]
+       ROTL128(%o4, %o5, %g2, %g3, 17)
+       stx     %o4, [%o1 + 0xb0]       ! k[44, 45]
+       stx     %o5, [%o1 + 0xb8]       ! k[46, 47]
+       ROTL128(%o4, %o5, %g2, %g3, 34)
+       stx     %o4, [%o1 + 0xf0]       ! k[60, 61]
+       stx     %o5, [%o1 + 0xf8]       ! k[62, 63]
+       mov     4, %o0
+2:     retl
+        nop
+       .size   sparc_hw_camellia_ekeygen,.-sparc_hw_camellia_ekeygen
+
+       .align  32
+       .globl  sparc_hw_camellia_encrypt
+       .type   sparc_hw_camellia_encrypt,#function
+sparc_hw_camellia_encrypt:
+       /* %o0=key, %o1=input, %o2=output, %o3=rounds */
+       ld      [%o1 + 0x00], %f0
+       ld      [%o1 + 0x04], %f1
+       ld      [%o1 + 0x08], %f2
+       ld      [%o1 + 0x0c], %f3
+
+       ldd     [%o0 + 0x00], %f4
+       ldd     [%o0 + 0x08], %f6
+
+       cmp     %o3, 3
+       fxor    %f4, %f0, %f0
+       be      1f
+        fxor   %f6, %f2, %f2
+
+       ldd     [%o0 + 0x10], %f8
+       ldd     [%o0 + 0x18], %f10
+       ldd     [%o0 + 0x20], %f12
+       ldd     [%o0 + 0x28], %f14
+       ldd     [%o0 + 0x30], %f16
+       ldd     [%o0 + 0x38], %f18
+       ldd     [%o0 + 0x40], %f20
+       ldd     [%o0 + 0x48], %f22
+       add     %o0, 0x40, %o0
+
+       CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
+
+1:
+       ldd     [%o0 + 0x10], %f8
+       ldd     [%o0 + 0x18], %f10
+       ldd     [%o0 + 0x20], %f12
+       ldd     [%o0 + 0x28], %f14
+       ldd     [%o0 + 0x30], %f16
+       ldd     [%o0 + 0x38], %f18
+       ldd     [%o0 + 0x40], %f20
+       ldd     [%o0 + 0x48], %f22
+       ldd     [%o0 + 0x50], %f24
+       ldd     [%o0 + 0x58], %f26
+       ldd     [%o0 + 0x60], %f28
+       ldd     [%o0 + 0x68], %f30
+       ldd     [%o0 + 0x70], %f32
+       ldd     [%o0 + 0x78], %f34
+       ldd     [%o0 + 0x80], %f36
+       ldd     [%o0 + 0x88], %f38
+       ldd     [%o0 + 0x90], %f40
+       ldd     [%o0 + 0x98], %f42
+       ldd     [%o0 + 0xa0], %f44
+       ldd     [%o0 + 0xa8], %f46
+       ldd     [%o0 + 0xb0], %f48
+       ldd     [%o0 + 0xb8], %f50
+       ldd     [%o0 + 0xc0], %f52
+       ldd     [%o0 + 0xc8], %f54
+
+       CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
+       CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
+       CAMELLIA_6ROUNDS(40, 0, 2)
+       fxor    %f52, %f2, %f2
+       fxor    %f54, %f0, %f0
+
+       st      %f2, [%o2 + 0x00]
+       st      %f3, [%o2 + 0x04]
+       st      %f0, [%o2 + 0x08]
+       st      %f1, [%o2 + 0x0c]
+
+       retl
+        nop
+       .size   sparc_hw_camellia_encrypt,.-sparc_hw_camellia_encrypt
+
+       .align  32
+       .globl  sparc_hw_camellia_decrypt
+       .type   sparc_hw_camellia_decrypt,#function
+sparc_hw_camellia_decrypt:
+       /* %o0=key, %o1=input, %o2=output, %o3=rounds */
+       ld      [%o1 + 0x00], %f0
+       ld      [%o1 + 0x04], %f1
+       ld      [%o1 + 0x08], %f2
+       ld      [%o1 + 0x0c], %f3
+
+       sll     %o3, 6, %o4
+       add     %o0, %o4, %o0
+
+       ldd     [%o0 + 0x00], %f4
+       ldd     [%o0 + 0x08], %f6
+
+       cmp     %o3, 3
+       fxor    %f4, %f0, %f0
+       be      1f
+        fxor   %f6, %f2, %f2
+
+       ldd     [%o0 - 0x08], %f8
+       ldd     [%o0 - 0x10], %f10
+       ldd     [%o0 - 0x18], %f12
+       ldd     [%o0 - 0x20], %f14
+       ldd     [%o0 - 0x28], %f16
+       ldd     [%o0 - 0x30], %f18
+       ldd     [%o0 - 0x38], %f20
+       ldd     [%o0 - 0x40], %f22
+       sub     %o0, 0x40, %o0
+
+       CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
+
+1:
+       ldd     [%o0 - 0x08], %f8
+       ldd     [%o0 - 0x10], %f10
+       ldd     [%o0 - 0x18], %f12
+       ldd     [%o0 - 0x20], %f14
+       ldd     [%o0 - 0x28], %f16
+       ldd     [%o0 - 0x30], %f18
+       ldd     [%o0 - 0x38], %f20
+       ldd     [%o0 - 0x40], %f22
+       ldd     [%o0 - 0x48], %f24
+       ldd     [%o0 - 0x50], %f26
+       ldd     [%o0 - 0x58], %f28
+       ldd     [%o0 - 0x60], %f30
+       ldd     [%o0 - 0x68], %f32
+       ldd     [%o0 - 0x70], %f34
+       ldd     [%o0 - 0x78], %f36
+       ldd     [%o0 - 0x80], %f38
+       ldd     [%o0 - 0x88], %f40
+       ldd     [%o0 - 0x90], %f42
+       ldd     [%o0 - 0x98], %f44
+       ldd     [%o0 - 0xa0], %f46
+       ldd     [%o0 - 0xa8], %f48
+       ldd     [%o0 - 0xb0], %f50
+       ldd     [%o0 - 0xc0], %f52
+       ldd     [%o0 - 0xb8], %f54
+
+       CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
+       CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
+       CAMELLIA_6ROUNDS(40, 0, 2)
+       fxor    %f52, %f2, %f2
+       fxor    %f54, %f0, %f0
+
+       st      %f2, [%o2 + 0x00]
+       st      %f3, [%o2 + 0x04]
+       st      %f0, [%o2 + 0x08]
+       st      %f1, [%o2 + 0x0c]
+
+       retl
+        nop
+       .size   sparc_hw_camellia_decrypt,.-sparc_hw_camellia_decrypt
+
+#define LOAD_ENCRYPT_KEY(REG) \
+       ldd     [%REG + 0x00], %f4; \
+       ldd     [%REG + 0x08], %f6; \
+       ldd     [%REG + 0x10], %f8; \
+       ldd     [%REG + 0x18], %f10; \
+       ldd     [%REG + 0x20], %f12; \
+       ldd     [%REG + 0x28], %f14; \
+       ldd     [%REG + 0x30], %f16; \
+       ldd     [%REG + 0x38], %f18; \
+       ldd     [%REG + 0x40], %f20; \
+       ldd     [%REG + 0x48], %f22; \
+       ldd     [%REG + 0x50], %f24; \
+       ldd     [%REG + 0x58], %f26; \
+       ldd     [%REG + 0x60], %f28; \
+       ldd     [%REG + 0x68], %f30; \
+       ldd     [%REG + 0x70], %f32; \
+       ldd     [%REG + 0x78], %f34; \
+       ldd     [%REG + 0x80], %f36; \
+       ldd     [%REG + 0x88], %f38; \
+       ldd     [%REG + 0x90], %f40; \
+       ldd     [%REG + 0x98], %f42; \
+       ldd     [%REG + 0xa0], %f44; \
+       ldd     [%REG + 0xa8], %f46; \
+       ldd     [%REG + 0xb0], %f48; \
+       ldd     [%REG + 0xb8], %f50; \
+       ldd     [%REG + 0xc0], %f52; \
+       ldd     [%REG + 0xc8], %f54;
+
+       .align  32
+       .globl  sparc_hw_camellia_cbc_encrypt_3rounds
+       .type   sparc_hw_camellia_cbc_encrypt_3rounds,#function
+sparc_hw_camellia_cbc_encrypt_3rounds:
+       /* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
+       LOAD_ENCRYPT_KEY(o3)
+       ldd     [%o4 + 0x00], %f60
+       ldd     [%o4 + 0x08], %f62
+1:     ldd     [%o0 + 0x00], %f0
+       ldd     [%o0 + 0x08], %f2
+       add     %o0, 0x10, %o0
+       fxor    %f60, %f0, %f0
+       fxor    %f62, %f2, %f2
+       fxor    %f4, %f0, %f0
+       fxor    %f6, %f2, %f2
+       CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
+       CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
+       CAMELLIA_6ROUNDS(40, 0, 2)
+       fxor    %f52, %f2, %f60
+       fxor    %f54, %f0, %f62
+       std     %f60, [%o1 + 0x00]
+       std     %f62, [%o1 + 0x08]
+       subcc   %o2, 0x10, %o2
+       bne,pt  %icc, 1b
+        add    %o1, 0x10, %o1
+       std     %f60, [%o4 + 0x00]
+       retl
+        std    %f62, [%o4 + 0x08]
+       .size   
sparc_hw_camellia_cbc_encrypt_3rounds,.-sparc_hw_camellia_cbc_encrypt_3rounds
+
+       .align  32
+       .globl  sparc_hw_camellia_cbc_encrypt_4rounds
+       .type   sparc_hw_camellia_cbc_encrypt_4rounds,#function
+sparc_hw_camellia_cbc_encrypt_4rounds:
+       /* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
+       LOAD_ENCRYPT_KEY(o3)
+       ldd     [%o4 + 0x00], %f60
+       ldd     [%o4 + 0x08], %f62
+1:     ldd     [%o0 + 0x00], %f0
+       ldd     [%o0 + 0x08], %f2
+       add     %o0, 0x10, %o0
+       fxor    %f60, %f0, %f0
+       fxor    %f62, %f2, %f2
+       fxor    %f4, %f0, %f0
+       fxor    %f6, %f2, %f2
+       CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
+       ldd     [%o3 + 0xd0], %f8
+       ldd     [%o3 + 0xd8], %f10
+       ldd     [%o3 + 0xe0], %f12
+       ldd     [%o3 + 0xe8], %f14
+       ldd     [%o3 + 0xf0], %f16
+       ldd     [%o3 + 0xf8], %f18
+       ldd     [%o3 + 0x100], %f20
+       ldd     [%o3 + 0x108], %f22
+       CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
+       CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2)
+       CAMELLIA_F(8, 2, 0, 2)
+       CAMELLIA_F(10, 0, 2, 0)
+       ldd     [%o3 + 0x10], %f8
+       ldd     [%o3 + 0x18], %f10
+       CAMELLIA_F(12, 2, 0, 2)
+       CAMELLIA_F(14, 0, 2, 0)
+       ldd     [%o3 + 0x20], %f12
+       ldd     [%o3 + 0x28], %f14
+       CAMELLIA_F(16, 2, 0, 2)
+       CAMELLIA_F(18, 0, 2, 0)
+       ldd     [%o3 + 0x30], %f16
+       ldd     [%o3 + 0x38], %f18
+       fxor    %f20, %f2, %f60
+       fxor    %f22, %f0, %f62
+       ldd     [%o3 + 0x40], %f20
+       ldd     [%o3 + 0x48], %f22
+       std     %f60, [%o1 + 0x00]
+       std     %f62, [%o1 + 0x08]
+       subcc   %o2, 0x10, %o2
+       bne,pt  %icc, 1b
+        add    %o1, 0x10, %o1
+       std     %f60, [%o4 + 0x00]
+       retl
+        std    %f62, [%o4 + 0x08]
+       .size   
sparc_hw_camellia_cbc_encrypt_4rounds,.-sparc_hw_camellia_cbc_encrypt_4rounds
+
+#define LOAD_DECRYPT_KEY(REG, OFF) \
+       ldd     [%REG + OFF + 0x00], %f4; \
+       ldd     [%REG + OFF + 0x08], %f6; \
+       ldd     [%REG + OFF - 0x08], %f8; \
+       ldd     [%REG + OFF - 0x10], %f10; \
+       ldd     [%REG + OFF - 0x18], %f12; \
+       ldd     [%REG + OFF - 0x20], %f14; \
+       ldd     [%REG + OFF - 0x28], %f16; \
+       ldd     [%REG + OFF - 0x30], %f18; \
+       ldd     [%REG + OFF - 0x38], %f20; \
+       ldd     [%REG + OFF - 0x40], %f22; \
+       ldd     [%REG + OFF - 0x48], %f24; \
+       ldd     [%REG + OFF - 0x50], %f26; \
+       ldd     [%REG + OFF - 0x58], %f28; \
+       ldd     [%REG + OFF - 0x60], %f30; \
+       ldd     [%REG + OFF - 0x68], %f32; \
+       ldd     [%REG + OFF - 0x70], %f34; \
+       ldd     [%REG + OFF - 0x78], %f36; \
+       ldd     [%REG + OFF - 0x80], %f38; \
+       ldd     [%REG + OFF - 0x88], %f40; \
+       ldd     [%REG + OFF - 0x90], %f42; \
+       ldd     [%REG + OFF - 0x98], %f44; \
+       ldd     [%REG + OFF - 0xa0], %f46; \
+       ldd     [%REG + OFF - 0xa8], %f48; \
+       ldd     [%REG + OFF - 0xb0], %f50;
+
+       .align  32
+       .globl  sparc_hw_camellia_cbc_decrypt_3rounds
+       .type   sparc_hw_camellia_cbc_decrypt_3rounds,#function
+sparc_hw_camellia_cbc_decrypt_3rounds:
+       /* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
+       LOAD_DECRYPT_KEY(o3, 0x0c0)
+       ldd     [%o3 + 0x00], %f52
+       ldd     [%o3 + 0x08], %f54
+       ldd     [%o4 + 0x00], %f60
+       ldd     [%o4 + 0x08], %f62
+1:     ldd     [%o0 + 0x00], %f56
+       ldd     [%o0 + 0x08], %f58
+       add     %o0, 0x10, %o0
+       fxor    %f4, %f56, %f0
+       fxor    %f6, %f58, %f2
+       CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
+       CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
+       CAMELLIA_6ROUNDS(40, 0, 2)
+       fxor    %f52, %f2, %f2
+       fxor    %f54, %f0, %f0
+       fxor    %f60, %f2, %f2
+       fxor    %f62, %f0, %f0
+       fsrc2   %f56, %f60
+       fsrc2   %f58, %f62
+       std     %f2, [%o1 + 0x00]
+       std     %f0, [%o1 + 0x08]
+       subcc   %o2, 0x10, %o2
+       bne,pt  %icc, 1b
+        add    %o1, 0x10, %o1
+       std     %f60, [%o4 + 0x00]
+       retl
+        std    %f62, [%o4 + 0x08]
+       .size   
sparc_hw_camellia_cbc_decrypt_3rounds,.-sparc_hw_camellia_cbc_decrypt_3rounds
+
+       .align  32
+       .globl  sparc_hw_camellia_cbc_decrypt_4rounds
+       .type   sparc_hw_camellia_cbc_decrypt_4rounds,#function
+sparc_hw_camellia_cbc_decrypt_4rounds:
+       /* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
+       LOAD_DECRYPT_KEY(o3, 0x100)
+       ldd     [%o3 + 0x100 - 0xb8], %f52
+       ldd     [%o3 + 0x100 - 0xc0], %f54
+       ldd     [%o4 + 0x00], %f60
+       ldd     [%o4 + 0x08], %f62
+1:     ldd     [%o0 + 0x00], %f56
+       ldd     [%o0 + 0x08], %f58
+       add     %o0, 0x10, %o0
+       fxor    %f4, %f56, %f0
+       fxor    %f6, %f58, %f2
+       CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
+       ldd     [%o3 + 0x100 - 0xc8], %f8
+       ldd     [%o3 + 0x100 - 0xd0], %f10
+       ldd     [%o3 + 0x100 - 0xd8], %f12
+       ldd     [%o3 + 0x100 - 0xe0], %f14
+       ldd     [%o3 + 0x100 - 0xe8], %f16
+       ldd     [%o3 + 0x100 - 0xf0], %f18
+       ldd     [%o3 + 0x00], %f20
+       ldd     [%o3 + 0x08], %f22
+       CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
+       CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2)
+       CAMELLIA_F(8, 2, 0, 2)
+       CAMELLIA_F(10, 0, 2, 0)
+       ldd     [%o3 + 0x100 - 0x08], %f8
+       ldd     [%o3 + 0x100 - 0x10], %f10
+       CAMELLIA_F(12, 2, 0, 2)
+       CAMELLIA_F(14, 0, 2, 0)
+       ldd     [%o3 + 0x100 - 0x18], %f12
+       ldd     [%o3 + 0x100 - 0x20], %f14
+       CAMELLIA_F(16, 2, 0, 2)
+       CAMELLIA_F(18, 0, 2, 0)
+       ldd     [%o3 + 0x100 - 0x28], %f16
+       ldd     [%o3 + 0x100 - 0x30], %f18
+       fxor    %f20, %f2, %f2
+       fxor    %f22, %f0, %f0
+       ldd     [%o3 + 0x100 - 0x38], %f20
+       ldd     [%o3 + 0x100 - 0x40], %f22
+       fxor    %f60, %f2, %f2
+       fxor    %f62, %f0, %f0
+       fsrc2   %f56, %f60
+       fsrc2   %f58, %f62
+       std     %f2, [%o1 + 0x00]
+       std     %f0, [%o1 + 0x08]
+       subcc   %o2, 0x10, %o2
+       bne,pt  %icc, 1b
+        add    %o1, 0x10, %o1
+       std     %f60, [%o4 + 0x00]
+       retl
+        std    %f62, [%o4 + 0x08]
+       .size   
sparc_hw_camellia_cbc_decrypt_4rounds,.-sparc_hw_camellia_cbc_decrypt_4rounds
diff --git a/crypto/camellia/cmll_sparccore.c b/crypto/camellia/cmll_sparccore.c
new file mode 100644
index 0000000..0133a36
--- /dev/null
+++ b/crypto/camellia/cmll_sparccore.c
@@ -0,0 +1,219 @@
+#include <openssl/opensslv.h>
+#include <openssl/camellia.h>
+#include <openssl/crypto.h>
+#include <openssl/modes.h>
+#include "cmll_locl.h"
+
+#include "sparc_arch.h"
+
+const char CAMELLIA_version[]="CAMELLIA" OPENSSL_VERSION_PTEXT;
+
+#define Camellia_Ekeygen _generic_camellia_ekeygen
+int _generic_camellia_ekeygen(int keyBitLength, const u8 *rawKey,
+                             KEY_TABLE_TYPE k);
+
+#define Camellia_EncryptBlock_Rounds _generic_camellia_encryptblock_rounds
+void _generic_camellia_encryptblock_rounds(int grandRounds,
+                                          const u8 plaintext[],
+                                          const KEY_TABLE_TYPE keyTable,
+                                          u8 ciphertext[]);
+
+#define Camellia_DecryptBlock_Rounds _generic_camellia_decryptblock_rounds
+void _generic_camellia_decryptblock_rounds(int grandRounds,
+                                          const u8 ciphertext[],
+                                          const KEY_TABLE_TYPE keyTable,
+                                          u8 plaintext[]);
+
+#include "camellia.c"
+
+extern int sparc_hw_camellia_ekeygen(const unsigned char *userkey,
+                                    KEY_TABLE_TYPE key, const int bits);
+
+int Camellia_set_key(const unsigned char *userKey, const int bits,
+                    CAMELLIA_KEY *key)
+{
+       int rnds;
+
+       if (!userKey || !key)
+               return -1;
+       if (bits != 128 && bits != 192 && bits != 256)
+               return -2;
+
+       if (OPENSSL_sparcv9cap_P & SPARCV9_CAMELLIA)
+               rnds = sparc_hw_camellia_ekeygen(userKey, key->u.rd_key, bits);
+       else
+               rnds = _generic_camellia_ekeygen(bits, userKey, key->u.rd_key);
+
+       key->grand_rounds = rnds;
+
+       return 0;
+}
+
+typedef unsigned long long cmll_u64;
+
+extern void sparc_hw_camellia_encrypt(const KEY_TABLE_TYPE k,
+                                     const cmll_u64 *in,
+                                     cmll_u64 *out, int rounds);
+
+void Camellia_encrypt(const unsigned char *in, unsigned char *out,
+                     const CAMELLIA_KEY *key)
+{
+       const cmll_u64 *aligned_in;
+       cmll_u64 *aligned_out;
+       cmll_u64 bounce[2];
+
+       if (!(OPENSSL_sparcv9cap_P & SPARCV9_CAMELLIA)) {
+               _generic_camellia_encryptblock_rounds(key->grand_rounds, in,
+                                                     key->u.rd_key, out);
+               return;
+       }
+
+       aligned_out = (cmll_u64 *) out;
+       if ((unsigned long) out & 0x3)
+               aligned_out = bounce;
+       aligned_in = (const cmll_u64 *) in;
+       if ((unsigned long) in & 0x3) {
+               memcpy(aligned_out, in, CAMELLIA_BLOCK_SIZE);
+               aligned_in = (const cmll_u64 *) aligned_out;
+       }
+
+       sparc_hw_camellia_encrypt(key->u.rd_key, aligned_in,
+                                 aligned_out, key->grand_rounds);
+
+       if (aligned_out == bounce)
+               memcpy(out, aligned_out, CAMELLIA_BLOCK_SIZE);
+}
+
+extern void sparc_hw_camellia_decrypt(const KEY_TABLE_TYPE k,
+                                     const cmll_u64 *in,
+                                     cmll_u64 *out, int rounds);
+
+void Camellia_decrypt(const unsigned char *in, unsigned char *out,
+                     const CAMELLIA_KEY *key)
+{
+       const cmll_u64 *aligned_in;
+       cmll_u64 *aligned_out;
+       cmll_u64 bounce[2];
+
+       if (!(OPENSSL_sparcv9cap_P & SPARCV9_CAMELLIA)) {
+               _generic_camellia_decryptblock_rounds(key->grand_rounds, in,
+                                                     key->u.rd_key, out);
+               return;
+       }
+
+       aligned_out = (cmll_u64 *) out;
+       if ((unsigned long) out & 0x3)
+               aligned_out = bounce;
+       aligned_in = (const cmll_u64 *) in;
+       if ((unsigned long) in & 0x3) {
+               memcpy(aligned_out, in, CAMELLIA_BLOCK_SIZE);
+               aligned_in = (const cmll_u64 *) aligned_out;
+       }
+
+       sparc_hw_camellia_decrypt(key->u.rd_key, aligned_in,
+                                 aligned_out, key->grand_rounds);
+
+       if (aligned_out == bounce)
+               memcpy(out, aligned_out, CAMELLIA_BLOCK_SIZE);
+}
+
+extern void sparc_hw_camellia_cbc_encrypt_3rounds(const cmll_u64 *in,
+                                                 cmll_u64 *out,
+                                                 unsigned int length,
+                                                 const KEY_TABLE_TYPE k,
+                                                 cmll_u64 *IV);
+
+extern void sparc_hw_camellia_cbc_encrypt_4rounds(const cmll_u64 *in,
+                                                 cmll_u64 *out,
+                                                 unsigned int length,
+                                                 const KEY_TABLE_TYPE k,
+                                                 cmll_u64 *IV);
+
+extern void sparc_hw_camellia_cbc_decrypt_3rounds(const cmll_u64 *in,
+                                                 cmll_u64 *out,
+                                                 unsigned int length,
+                                                 const KEY_TABLE_TYPE k,
+                                                 cmll_u64 *IV);
+
+extern void sparc_hw_camellia_cbc_decrypt_4rounds(const cmll_u64 *in,
+                                                 cmll_u64 *out,
+                                                 unsigned int length,
+                                                 const KEY_TABLE_TYPE k,
+                                                 cmll_u64 *IV);
+
+void Camellia_cbc_encrypt(const unsigned char *in, unsigned char *out,
+                         size_t length, const CAMELLIA_KEY *key,
+                         unsigned char *ivec, const int enc)
+{
+       const cmll_u64 *aligned_in;
+       cmll_u64 *aligned_ivec;
+       cmll_u64 *aligned_out;
+       cmll_u64 ivb[2];
+
+       if (!(OPENSSL_sparcv9cap_P & SPARCV9_CAMELLIA))
+               goto slow;
+
+       aligned_out = (cmll_u64 *) out;
+       if ((unsigned long) out & 0x7) {
+               aligned_out = OPENSSL_malloc(length);
+               if (!aligned_out)
+                       goto slow;
+       }
+
+       aligned_in = (const cmll_u64 *) in;
+       if ((unsigned long) in & 0x7) {
+               memcpy(aligned_out, in, length);
+               aligned_in = (const cmll_u64 *) aligned_out;
+       }
+
+       aligned_ivec = (cmll_u64 *) ivec;
+       if ((unsigned long) ivec & 0x7) {
+               memcpy(ivb, ivec, sizeof(ivb));
+               aligned_ivec = ivb;
+       }
+
+       if (enc) {
+               if (key->grand_rounds == 3)
+                       sparc_hw_camellia_cbc_encrypt_3rounds(aligned_in,
+                                                             aligned_out,
+                                                             length,
+                                                             key->u.rd_key,
+                                                             aligned_ivec);
+               else
+                       sparc_hw_camellia_cbc_encrypt_4rounds(aligned_in,
+                                                             aligned_out,
+                                                             length,
+                                                             key->u.rd_key,
+                                                             aligned_ivec);
+       } else {
+               if (key->grand_rounds == 3)
+                       sparc_hw_camellia_cbc_decrypt_3rounds(aligned_in,
+                                                             aligned_out,
+                                                             length,
+                                                             key->u.rd_key,
+                                                             aligned_ivec);
+               else
+                       sparc_hw_camellia_cbc_decrypt_4rounds(aligned_in,
+                                                             aligned_out,
+                                                             length,
+                                                             key->u.rd_key,
+                                                             aligned_ivec);
+       }
+       if ((unsigned long) out & 0x7) {
+               memcpy(out, aligned_out, length);
+               OPENSSL_free(aligned_out);
+       }
+
+       if (aligned_ivec == ivb)
+               memcpy(ivec, ivb, sizeof(ivb));
+
+       return;
+
+slow:
+       if (enc)
+               CRYPTO_cbc128_encrypt(in, out, length, key, ivec,
+                                     (block128_f)Camellia_encrypt);
+       else
+               CRYPTO_cbc128_decrypt(in, out, length, key, ivec,
+                                     (block128_f)Camellia_decrypt);
+}
diff --git a/crypto/sparc_arch.h b/crypto/sparc_arch.h
index 032d67c..fe9805d 100644
--- a/crypto/sparc_arch.h
+++ b/crypto/sparc_arch.h
@@ -61,6 +61,17 @@ extern int OPENSSL_sparcv9cap_P;
 #define AES_KEXPAND2(a,b,c)    \
        .word   (F3F(2, 0x36, 0x131)|RS1(a)|RS2(b)|RD(c));
 
+#define CAMELLIA_F(a,b,c,d)            \
+       .word           (F3F(2, 0x19, 0x00c)|RS1(a)|RS2(b)|RS3(c)|RD(d));
+#define CAMELLIA_FL(a,b,c)             \
+       .word           (F3F(2, 0x36, 0x13c)|RS1(a)|RS2(b)|RD(c));
+#define CAMELLIA_FLI(a,b,c)            \
+       .word           (F3F(2, 0x36, 0x13d)|RS1(a)|RS2(b)|RD(c));
+
+#define MOVDTOX_F0_O4          \
+       .word   0x99b02200
+#define MOVDTOX_F2_O5          \
+       .word   0x9bb02202
 #define MOVXTOD_G3_F4          \
        .word   0x89b02303;
 #define MOVXTOD_G5_F6          \
-- 
1.7.10.4

______________________________________________________________________
OpenSSL Project                                 http://www.openssl.org
Development Mailing List                       openssl-dev@openssl.org
Automated List Manager                           majord...@openssl.org

Reply via email to