Include support for new POWER8 vcipher instructions. It uses OPENSSL_ppccap_P to choose which implementation to use, regular or POWER8.
Vcipher instructions use the straightforward decryption described in FIPS-197 instead of the equivalent decryption that requires MixColumns to be applied to subkeys. In order to use the same AES_set_decrypt_key() function independently from which AES_decrypt() implementation is used, ppc_AES_decrypt_compact() was changed and MixColumns and AddRoundKey steps order was reversed. Also, ppc_AES_[en|de]crypt functions were renamed to ppc_AES_[en|de]crypt_nocompact, and AES_[en|de]crypt functions in aes-ppc.pl were renamed to ppc_AES_[en|de]crypt to create a clearly distinction between them and the new added functions ppc_vcipher_AES_[en|de]crypt. --- Configure | 6 +-- crypto/aes/aes_core.c | 3 ++ crypto/aes/asm/aes-ppc.pl | 96 ++++++++++++++++++++++++++++++++++++----------- crypto/ppccap.c | 28 ++++++++++++++ 4 files changed, 109 insertions(+), 24 deletions(-) diff --git a/Configure b/Configure index cf43c8d..0794b83 100755 --- a/Configure +++ b/Configure @@ -347,7 +347,7 @@ my %table=( # *-generic* is endian-neutral target, but ./config is free to # throw in -D[BL]_ENDIAN, whichever appropriate... "linux-generic32","gcc:-DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", -"linux-ppc", "gcc:-DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc32_asm}:linux32:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +"linux-ppc", "gcc:-DB_ENDIAN -DTERMIO -O3 -Wall -DAES_NO_MIXCOL_DECR_KEY::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc32_asm}:linux32:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", # It's believed that majority of ARM toolchains predefine appropriate -march. # If you compiler does not, do complement config command line with one! "linux-armv4", "gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", @@ -364,8 +364,8 @@ my %table=( "linux-aout", "gcc:-DL_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -march=i486 -Wall::(unknown):::BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_asm}:a.out", #### "linux-generic64","gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", -"linux-ppc64", "gcc:-m64 -DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc64_asm}:linux64:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64", -"linux-ppc64le","gcc:-m64 -DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc64_asm}:linux64le:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::", +"linux-ppc64", "gcc:-m64 -DB_ENDIAN -DTERMIO -O3 -Wall -DAES_NO_MIXCOL_DECR_KEY::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc64_asm}:linux64:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64", +"linux-ppc64le","gcc:-m64 -DL_ENDIAN -DTERMIO -O3 -Wall -DAES_NO_MIXCOL_DECR_KEY::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc64_asm}:linux64le:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::", "linux-ia64", "gcc:-DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux-ia64-icc","icc:-DL_ENDIAN -DTERMIO -O2 -Wall::-D_REENTRANT::-ldl -no_cpprt:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux-x86_64", "gcc:-m64 -DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64", diff --git a/crypto/aes/aes_core.c b/crypto/aes/aes_core.c index f333c16..07b1d7a 100644 --- a/crypto/aes/aes_core.c +++ b/crypto/aes/aes_core.c @@ -1333,6 +1333,8 @@ int AES_set_decrypt_key(const unsigned char *userKey, const int bits, temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp; temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp; } + +#ifndef AES_NO_MIXCOL_DECR_KEY /* apply the inverse MixColumn transform to all round keys but the first and the last: */ for (i = 1; i < (key->rounds); i++) { rk += 4; @@ -1363,6 +1365,7 @@ int AES_set_decrypt_key(const unsigned char *userKey, const int bits, #endif } } +#endif return 0; } diff --git a/crypto/aes/asm/aes-ppc.pl b/crypto/aes/asm/aes-ppc.pl index b38bce1..0e0e557 100644 --- a/crypto/aes/asm/aes-ppc.pl +++ b/crypto/aes/asm/aes-ppc.pl @@ -9,12 +9,12 @@ # Needs more work: key setup, CBC routine... # -# ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with +# ppc_AES_[en|de]crypt_nocompact perform at 18 cycles per byte processed with # 128-bit key, which is ~40% better than 64-bit code generated by gcc # 4.0. But these are not the ones currently used! Their "compact" # counterparts are, for security reason. ppc_AES_encrypt_compact runs -# at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact - -# at 1/3 of ppc_AES_decrypt. +# at 1/2 of ppc_AES_encrypt_nocompact speed, while ppc_AES_decrypt_compact - +# at 1/3 of ppc_AES_decrypt_nocompact. # February 2010 # @@ -109,10 +109,16 @@ $acc15="r31"; $mask80=$Tbl2; $mask1b=$Tbl3; +# Registers used by vcipher functions +my $rnds="r6"; +my $state = "vr0"; +my $subkey="vr1"; + $code.=<<___; .machine "any" .text - +___ +$code.=<<___; .align 7 LAES_Te: mflr r0 @@ -334,12 +340,11 @@ $code.=<<___; .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d -.globl .AES_encrypt +.globl .ppc_AES_encrypt .align 7 -.AES_encrypt: +.ppc_AES_encrypt: $STU $sp,-$FRAME($sp) mflr r0 - $PUSH $out,`$FRAME-$SIZE_T*19`($sp) $PUSH r14,`$FRAME-$SIZE_T*18`($sp) $PUSH r15,`$FRAME-$SIZE_T*17`($sp) @@ -523,7 +528,7 @@ Lenc_done: .long 0 .align 5 -Lppc_AES_encrypt: +Lppc_AES_encrypt_nocompact: lwz $acc00,240($key) addi $Tbl1,$Tbl0,3 lwz $t0,0($key) @@ -810,11 +815,11 @@ Lenc_compact_done: blr .long 0 .byte 0,12,0x14,0,0,0,0,0 -.size .AES_encrypt,.-.AES_encrypt +.size .ppc_AES_encrypt,.-.ppc_AES_encrypt -.globl .AES_decrypt +.globl .ppc_AES_decrypt .align 7 -.AES_decrypt: +.ppc_AES_decrypt: $STU $sp,-$FRAME($sp) mflr r0 @@ -1001,7 +1006,7 @@ Ldec_done: .long 0 .align 5 -Lppc_AES_decrypt: +Lppc_AES_decrypt_nocompact: lwz $acc00,240($key) addi $Tbl1,$Tbl0,3 lwz $t0,0($key) @@ -1164,14 +1169,14 @@ $code.=<<___ if ($SIZE_T==8); ___ $code.=<<___; mtctr $acc00 + xor $s0,$s0,$t0 + xor $s1,$s1,$t1 + xor $s2,$s2,$t2 + xor $s3,$s3,$t3 .align 4 Ldec_compact_loop: - xor $s0,$s0,$t0 - xor $s1,$s1,$t1 rlwinm $acc00,$s0,`32-24`,24,31 - xor $s2,$s2,$t2 rlwinm $acc01,$s1,`32-24`,24,31 - xor $s3,$s3,$t3 rlwinm $acc02,$s2,`32-24`,24,31 rlwinm $acc03,$s3,`32-24`,24,31 rlwinm $acc04,$s3,`32-16`,24,31 @@ -1223,6 +1228,11 @@ Ldec_compact_loop: lwz $t3,12($key) or $s3,$s3,$acc15 + xor $s0,$s0,$t0 + xor $s1,$s1,$t1 + xor $s2,$s2,$t2 + xor $s3,$s3,$t3 + addi $key,$key,16 bdz Ldec_compact_done ___ @@ -1438,15 +1448,59 @@ $code.=<<___; b Ldec_compact_loop .align 4 Ldec_compact_done: - xor $s0,$s0,$t0 - xor $s1,$s1,$t1 - xor $s2,$s2,$t2 - xor $s3,$s3,$t3 blr .long 0 .byte 0,12,0x14,0,0,0,0,0 -.size .AES_decrypt,.-.AES_decrypt +.size .ppc_AES_decrypt,.-.ppc_AES_decrypt +___ +# Skeleton for encryption and decryption +sub vcipher_aes_block { +my ($func, $instr) = @_; +$code.=<<___; +.globl .$func +.align 7 +.$func: + # Load number of rounds and input block + lwz $rnds, 240($key) + lxvd2x $state + 32, 0, $inp + # Initial round + lxvd2x $subkey + 32, 0, $key + vxor $state, $state, $subkey + addi $key, $key, 16 + # Check rounds + cmpldi $rnds, 10 + ble L${func}10 + cmpldi $rnds, 12 + ble L${func}12 +___ +for ($i = 0; $i < 13; $i++) { +$code.="L${func}12:" if ($i == 2); +$code.="L${func}10:" if ($i == 4); +$code.=<<___; + lxvd2x $subkey + 32, 0, $key + $instr $state, $state, $subkey + addi $key, $key, 16 +___ +} +$code.=<<___; + # Last round + lxvd2x $subkey + 32, 0, $key + ${instr}last $state, $state, $subkey + # Store output block + stxvd2x $state + 32, 0, $out + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 +.size .${func},.-.${func} +___ +} + +# Create vcipher AES functions +vcipher_aes_block('ppc_vcipher_AES_encrypt', 'vcipher'); +vcipher_aes_block('ppc_vcipher_AES_decrypt', 'vncipher'); + +$code.=<<___; .asciz "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>" .align 7 ___ diff --git a/crypto/ppccap.c b/crypto/ppccap.c index b38fc09..585c4b6 100644 --- a/crypto/ppccap.c +++ b/crypto/ppccap.c @@ -9,6 +9,7 @@ #endif #include <crypto.h> #include <openssl/bn.h> +#include <openssl/aes.h> #define PPC_FPU64 (1<<0) #define PPC_ALTIVEC (1<<1) @@ -18,6 +19,33 @@ static int OPENSSL_ppccap_P = 0; static sigset_t all_masked; +void ppc_AES_encrypt(const unsigned char *in, unsigned char *out, + const AES_KEY *key); +void ppc_AES_decrypt(const unsigned char *in, unsigned char *out, + const AES_KEY *key); +void ppc_vcipher_AES_encrypt(const unsigned char *in, unsigned char *out, + const AES_KEY *key); +void ppc_vcipher_AES_decrypt(const unsigned char *in, unsigned char *out, + const AES_KEY *key); + +void AES_encrypt(const unsigned char *in, unsigned char *out, + const AES_KEY *key) +{ + if (OPENSSL_ppccap_P & PPC_VCIPHER) + ppc_vcipher_AES_encrypt(in, out, key); + else + ppc_AES_encrypt(in, out, key); +} + +void AES_decrypt(const unsigned char *in, unsigned char *out, + const AES_KEY *key) +{ + if (OPENSSL_ppccap_P & PPC_VCIPHER) + ppc_vcipher_AES_decrypt(in, out, key); + else + ppc_AES_decrypt(in, out, key); +} + #ifdef OPENSSL_BN_ASM_MONT int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num) { -- 1.7.12 ______________________________________________________________________ OpenSSL Project http://www.openssl.org Development Mailing List openssl-dev@openssl.org Automated List Manager majord...@openssl.org