Include support for new POWER8 vcipher instructions. It uses
OPENSSL_ppccap_P to choose which implementation to use, regular or
POWER8.
Vcipher instructions use the straightforward decryption described in
FIPS-197 instead of the equivalent decryption that requires MixColumns
to be applied to subkeys. In order to use the same AES_set_decrypt_key()
function independently from which AES_decrypt() implementation is used,
ppc_AES_decrypt_compact() was changed and MixColumns and AddRoundKey
steps order was reversed.
Also, ppc_AES_[en|de]crypt functions were renamed to
ppc_AES_[en|de]crypt_nocompact, and AES_[en|de]crypt functions in
aes-ppc.pl were renamed to ppc_AES_[en|de]crypt to create a clearly
distinction between them and the new added functions
ppc_vcipher_AES_[en|de]crypt.
---
Configure | 6 +--
crypto/aes/aes_core.c | 3 ++
crypto/aes/asm/aes-ppc.pl | 96 ++++++++++++++++++++++++++++++++++++-----------
crypto/ppccap.c | 28 ++++++++++++++
4 files changed, 109 insertions(+), 24 deletions(-)
diff --git a/Configure b/Configure
index cf43c8d..0794b83 100755
--- a/Configure
+++ b/Configure
@@ -347,7 +347,7 @@ my %table=(
# *-generic* is endian-neutral target, but ./config is free to
# throw in -D[BL]_ENDIAN, whichever appropriate...
"linux-generic32","gcc:-DTERMIO -O3 -fomit-frame-pointer
-Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL
BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
-"linux-ppc", "gcc:-DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG
RC4_CHAR RC4_CHUNK DES_RISC1
DES_UNROLL:${ppc32_asm}:linux32:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+"linux-ppc", "gcc:-DB_ENDIAN -DTERMIO -O3 -Wall
-DAES_NO_MIXCOL_DECR_KEY::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK
DES_RISC1
DES_UNROLL:${ppc32_asm}:linux32:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
# It's believed that majority of ARM toolchains predefine appropriate -march.
# If you compiler does not, do complement config command line with one!
"linux-armv4", "gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR
RC4_CHUNK DES_INT DES_UNROLL
BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
@@ -364,8 +364,8 @@ my %table=(
"linux-aout", "gcc:-DL_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -march=i486
-Wall::(unknown):::BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_asm}:a.out",
####
"linux-generic64","gcc:-DTERMIO -O3
-Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT
DES_UNROLL
BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
-"linux-ppc64", "gcc:-m64 -DB_ENDIAN -DTERMIO -O3
-Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1
DES_UNROLL:${ppc64_asm}:linux64:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64",
-"linux-ppc64le","gcc:-m64 -DL_ENDIAN -DTERMIO -O3
-Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1
DES_UNROLL:${ppc64_asm}:linux64le:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::",
+"linux-ppc64", "gcc:-m64 -DB_ENDIAN -DTERMIO -O3 -Wall
-DAES_NO_MIXCOL_DECR_KEY::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR
RC4_CHUNK DES_RISC1
DES_UNROLL:${ppc64_asm}:linux64:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64",
+"linux-ppc64le","gcc:-m64 -DL_ENDIAN -DTERMIO -O3 -Wall
-DAES_NO_MIXCOL_DECR_KEY::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR
RC4_CHUNK DES_RISC1
DES_UNROLL:${ppc64_asm}:linux64le:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::",
"linux-ia64", "gcc:-DL_ENDIAN -DTERMIO -O3
-Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL
DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"linux-ia64-icc","icc:-DL_ENDIAN -DTERMIO -O2 -Wall::-D_REENTRANT::-ldl
-no_cpprt:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1
DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"linux-x86_64", "gcc:-m64 -DL_ENDIAN -DTERMIO -O3
-Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT
DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64",
diff --git a/crypto/aes/aes_core.c b/crypto/aes/aes_core.c
index f333c16..07b1d7a 100644
--- a/crypto/aes/aes_core.c
+++ b/crypto/aes/aes_core.c
@@ -1333,6 +1333,8 @@ int AES_set_decrypt_key(const unsigned char *userKey,
const int bits,
temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
}
+
+#ifndef AES_NO_MIXCOL_DECR_KEY
/* apply the inverse MixColumn transform to all round keys but the
first and the last: */
for (i = 1; i < (key->rounds); i++) {
rk += 4;
@@ -1363,6 +1365,7 @@ int AES_set_decrypt_key(const unsigned char *userKey,
const int bits,
#endif
}
}
+#endif
return 0;
}
diff --git a/crypto/aes/asm/aes-ppc.pl b/crypto/aes/asm/aes-ppc.pl
index b38bce1..0e0e557 100644
--- a/crypto/aes/asm/aes-ppc.pl
+++ b/crypto/aes/asm/aes-ppc.pl
@@ -9,12 +9,12 @@
# Needs more work: key setup, CBC routine...
#
-# ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
+# ppc_AES_[en|de]crypt_nocompact perform at 18 cycles per byte processed with
# 128-bit key, which is ~40% better than 64-bit code generated by gcc
# 4.0. But these are not the ones currently used! Their "compact"
# counterparts are, for security reason. ppc_AES_encrypt_compact runs
-# at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
-# at 1/3 of ppc_AES_decrypt.
+# at 1/2 of ppc_AES_encrypt_nocompact speed, while ppc_AES_decrypt_compact -
+# at 1/3 of ppc_AES_decrypt_nocompact.
# February 2010
#
@@ -109,10 +109,16 @@ $acc15="r31";
$mask80=$Tbl2;
$mask1b=$Tbl3;
+# Registers used by vcipher functions
+my $rnds="r6";
+my $state = "vr0";
+my $subkey="vr1";
+
$code.=<<___;
.machine "any"
.text
-
+___
+$code.=<<___;
.align 7
LAES_Te:
mflr r0
@@ -334,12 +340,11 @@ $code.=<<___;
.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
-.globl .AES_encrypt
+.globl .ppc_AES_encrypt
.align 7
-.AES_encrypt:
+.ppc_AES_encrypt:
$STU $sp,-$FRAME($sp)
mflr r0
-
$PUSH $out,`$FRAME-$SIZE_T*19`($sp)
$PUSH r14,`$FRAME-$SIZE_T*18`($sp)
$PUSH r15,`$FRAME-$SIZE_T*17`($sp)
@@ -523,7 +528,7 @@ Lenc_done:
.long 0
.align 5
-Lppc_AES_encrypt:
+Lppc_AES_encrypt_nocompact:
lwz $acc00,240($key)
addi $Tbl1,$Tbl0,3
lwz $t0,0($key)
@@ -810,11 +815,11 @@ Lenc_compact_done:
blr
.long 0
.byte 0,12,0x14,0,0,0,0,0
-.size .AES_encrypt,.-.AES_encrypt
+.size .ppc_AES_encrypt,.-.ppc_AES_encrypt
-.globl .AES_decrypt
+.globl .ppc_AES_decrypt
.align 7
-.AES_decrypt:
+.ppc_AES_decrypt:
$STU $sp,-$FRAME($sp)
mflr r0
@@ -1001,7 +1006,7 @@ Ldec_done:
.long 0
.align 5
-Lppc_AES_decrypt:
+Lppc_AES_decrypt_nocompact:
lwz $acc00,240($key)
addi $Tbl1,$Tbl0,3
lwz $t0,0($key)
@@ -1164,14 +1169,14 @@ $code.=<<___ if ($SIZE_T==8);
___
$code.=<<___;
mtctr $acc00
+ xor $s0,$s0,$t0
+ xor $s1,$s1,$t1
+ xor $s2,$s2,$t2
+ xor $s3,$s3,$t3
.align 4
Ldec_compact_loop:
- xor $s0,$s0,$t0
- xor $s1,$s1,$t1
rlwinm $acc00,$s0,`32-24`,24,31
- xor $s2,$s2,$t2
rlwinm $acc01,$s1,`32-24`,24,31
- xor $s3,$s3,$t3
rlwinm $acc02,$s2,`32-24`,24,31
rlwinm $acc03,$s3,`32-24`,24,31
rlwinm $acc04,$s3,`32-16`,24,31
@@ -1223,6 +1228,11 @@ Ldec_compact_loop:
lwz $t3,12($key)
or $s3,$s3,$acc15
+ xor $s0,$s0,$t0
+ xor $s1,$s1,$t1
+ xor $s2,$s2,$t2
+ xor $s3,$s3,$t3
+
addi $key,$key,16
bdz Ldec_compact_done
___
@@ -1438,15 +1448,59 @@ $code.=<<___;
b Ldec_compact_loop
.align 4
Ldec_compact_done:
- xor $s0,$s0,$t0
- xor $s1,$s1,$t1
- xor $s2,$s2,$t2
- xor $s3,$s3,$t3
blr
.long 0
.byte 0,12,0x14,0,0,0,0,0
-.size .AES_decrypt,.-.AES_decrypt
+.size .ppc_AES_decrypt,.-.ppc_AES_decrypt
+___
+# Skeleton for encryption and decryption
+sub vcipher_aes_block {
+my ($func, $instr) = @_;
+$code.=<<___;
+.globl .$func
+.align 7
+.$func:
+ # Load number of rounds and input block
+ lwz $rnds, 240($key)
+ lxvd2x $state + 32, 0, $inp
+ # Initial round
+ lxvd2x $subkey + 32, 0, $key
+ vxor $state, $state, $subkey
+ addi $key, $key, 16
+ # Check rounds
+ cmpldi $rnds, 10
+ ble L${func}10
+ cmpldi $rnds, 12
+ ble L${func}12
+___
+for ($i = 0; $i < 13; $i++) {
+$code.="L${func}12:" if ($i == 2);
+$code.="L${func}10:" if ($i == 4);
+$code.=<<___;
+ lxvd2x $subkey + 32, 0, $key
+ $instr $state, $state, $subkey
+ addi $key, $key, 16
+___
+}
+$code.=<<___;
+ # Last round
+ lxvd2x $subkey + 32, 0, $key
+ ${instr}last $state, $state, $subkey
+ # Store output block
+ stxvd2x $state + 32, 0, $out
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,0,0
+.size .${func},.-.${func}
+___
+}
+
+# Create vcipher AES functions
+vcipher_aes_block('ppc_vcipher_AES_encrypt', 'vcipher');
+vcipher_aes_block('ppc_vcipher_AES_decrypt', 'vncipher');
+
+$code.=<<___;
.asciz "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
.align 7
___
diff --git a/crypto/ppccap.c b/crypto/ppccap.c
index b38fc09..585c4b6 100644
--- a/crypto/ppccap.c
+++ b/crypto/ppccap.c
@@ -9,6 +9,7 @@
#endif
#include <crypto.h>
#include <openssl/bn.h>
+#include <openssl/aes.h>
#define PPC_FPU64 (1<<0)
#define PPC_ALTIVEC (1<<1)
@@ -18,6 +19,33 @@ static int OPENSSL_ppccap_P = 0;
static sigset_t all_masked;
+void ppc_AES_encrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key);
+void ppc_AES_decrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key);
+void ppc_vcipher_AES_encrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key);
+void ppc_vcipher_AES_decrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key);
+
+void AES_encrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key)
+{
+ if (OPENSSL_ppccap_P & PPC_VCIPHER)
+ ppc_vcipher_AES_encrypt(in, out, key);
+ else
+ ppc_AES_encrypt(in, out, key);
+}
+
+void AES_decrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key)
+{
+ if (OPENSSL_ppccap_P & PPC_VCIPHER)
+ ppc_vcipher_AES_decrypt(in, out, key);
+ else
+ ppc_AES_decrypt(in, out, key);
+}
+
#ifdef OPENSSL_BN_ASM_MONT
int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const
BN_ULONG *np, const BN_ULONG *n0, int num)
{
--
1.7.12
______________________________________________________________________
OpenSSL Project http://www.openssl.org
Development Mailing List [email protected]
Automated List Manager [email protected]