Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package openssl-3 for openSUSE:Factory checked in at 2023-10-27 22:27:00 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/openssl-3 (Old) and /work/SRC/openSUSE:Factory/.openssl-3.new.17445 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "openssl-3" Fri Oct 27 22:27:00 2023 rev:17 rq:1120189 version:3.1.4 Changes: -------- --- /work/SRC/openSUSE:Factory/openssl-3/openssl-3.changes 2023-10-20 23:18:12.197664021 +0200 +++ /work/SRC/openSUSE:Factory/.openssl-3.new.17445/openssl-3.changes 2023-10-27 22:27:00.632911981 +0200 @@ -1,0 +2,30 @@ +Tue Oct 24 14:53:41 UTC 2023 - Otto Hollmann <otto.hollm...@suse.com> + +- Update to 3.1.4: + * Fix incorrect key and IV resizing issues when calling + EVP_EncryptInit_ex2(), EVP_DecryptInit_ex2() or EVP_CipherInit_ex2() + with OSSL_PARAM parameters that alter the key or IV length + [bsc#1216163, CVE-2023-5363]. + +------------------------------------------------------------------- +Thu Oct 19 15:03:14 UTC 2023 - Otto Hollmann <otto.hollm...@suse.com> + +- Performance enhancements for cryptography from OpenSSL 3.2 + [jsc#PED-5086, jsc#PED-3514] + * Add patches: + - openssl-ec-Use-static-linkage-on-nistp521-felem_-square-mul-.patch + - openssl-ec-56-bit-Limb-Solinas-Strategy-for-secp384r1.patch + - openssl-ec-powerpc64le-Add-asm-implementation-of-felem_-squa.patch + - openssl-ecc-Remove-extraneous-parentheses-in-secp384r1.patch + - openssl-powerpc-ecc-Fix-stack-allocation-secp384r1-asm.patch + - openssl-Improve-performance-for-6x-unrolling-with-vpermxor-i.patch + +------------------------------------------------------------------- +Thu Oct 19 11:53:29 UTC 2023 - Pedro Monreal <pmonr...@suse.com> + +- FIPS: Add the FIPS_mode() compatibility macro and flag support. + * Add patches: + - openssl-Add-FIPS_mode-compatibility-macro.patch + - openssl-Add-Kernel-FIPS-mode-flag-support.patch + +------------------------------------------------------------------- Old: ---- openssl-3.1.3.tar.gz openssl-3.1.3.tar.gz.asc New: ---- openssl-3.1.4.tar.gz openssl-3.1.4.tar.gz.asc openssl-Add-FIPS_mode-compatibility-macro.patch openssl-Add-Kernel-FIPS-mode-flag-support.patch openssl-Improve-performance-for-6x-unrolling-with-vpermxor-i.patch openssl-ec-56-bit-Limb-Solinas-Strategy-for-secp384r1.patch openssl-ec-Use-static-linkage-on-nistp521-felem_-square-mul-.patch openssl-ec-powerpc64le-Add-asm-implementation-of-felem_-squa.patch openssl-ecc-Remove-extraneous-parentheses-in-secp384r1.patch openssl-powerpc-ecc-Fix-stack-allocation-secp384r1-asm.patch ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ openssl-3.spec ++++++ --- /var/tmp/diff_new_pack.suGu1W/_old 2023-10-27 22:27:01.836956141 +0200 +++ /var/tmp/diff_new_pack.suGu1W/_new 2023-10-27 22:27:01.840956287 +0200 @@ -22,7 +22,7 @@ %define man_suffix 3ssl Name: openssl-3 # Don't forget to update the version in the "openssl" meta-package! -Version: 3.1.3 +Version: 3.1.4 Release: 0 Summary: Secure Sockets and Transport Layer Security License: Apache-2.0 @@ -48,6 +48,17 @@ Patch8: openssl-Override-default-paths-for-the-CA-directory-tree.patch # PATCH-FIX-UPSTREAM: bsc#1209430 Upgrade OpenSSL from 3.0.8 to 3.1.0 in TW Patch9: openssl-Add_support_for_Windows_CA_certificate_store.patch +# PATCH-FIX-FEDORA Add FIPS_mode compatibility macro and flag support +Patch10: openssl-Add-FIPS_mode-compatibility-macro.patch +Patch11: openssl-Add-Kernel-FIPS-mode-flag-support.patch +# PATCH-FIX-UPSTREAM jsc#PED-5086, jsc#PED-3514 +# POWER10 performance enhancements for cryptography +Patch12: openssl-ec-Use-static-linkage-on-nistp521-felem_-square-mul-.patch +Patch13: openssl-ec-56-bit-Limb-Solinas-Strategy-for-secp384r1.patch +Patch14: openssl-ec-powerpc64le-Add-asm-implementation-of-felem_-squa.patch +Patch15: openssl-ecc-Remove-extraneous-parentheses-in-secp384r1.patch +Patch16: openssl-powerpc-ecc-Fix-stack-allocation-secp384r1-asm.patch +Patch17: openssl-Improve-performance-for-6x-unrolling-with-vpermxor-i.patch BuildRequires: pkgconfig BuildRequires: pkgconfig(zlib) Requires: libopenssl3 = %{version}-%{release} ++++++ openssl-3.1.3.tar.gz -> openssl-3.1.4.tar.gz ++++++ /work/SRC/openSUSE:Factory/openssl-3/openssl-3.1.3.tar.gz /work/SRC/openSUSE:Factory/.openssl-3.new.17445/openssl-3.1.4.tar.gz differ: char 12, line 1 ++++++ openssl-Add-FIPS_mode-compatibility-macro.patch ++++++ >From 8e29a10b39a649d751870eb1fd1b8c388e66acc3 Mon Sep 17 00:00:00 2001 From: rpm-build <rpm-build> Date: Mon, 31 Jul 2023 09:41:27 +0200 Subject: [PATCH 08/35] 0008-Add-FIPS_mode-compatibility-macro.patch Patch-name: 0008-Add-FIPS_mode-compatibility-macro.patch Patch-id: 8 Patch-status: | # Add FIPS_mode() compatibility macro From-dist-git-commit: 9409bc7044cf4b5773639cce20f51399888c45fd --- include/openssl/fips.h | 26 ++++++++++++++++++++++++++ test/property_test.c | 14 ++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 include/openssl/fips.h diff --git a/include/openssl/fips.h b/include/openssl/fips.h new file mode 100644 index 0000000000..4162cbf88e --- /dev/null +++ b/include/openssl/fips.h @@ -0,0 +1,26 @@ +/* + * Copyright 2016-2020 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#ifndef OPENSSL_FIPS_H +# define OPENSSL_FIPS_H +# pragma once + +# include <openssl/evp.h> +# include <openssl/macros.h> + +# ifdef __cplusplus +extern "C" { +# endif + +# define FIPS_mode() EVP_default_properties_is_fips_enabled(NULL) + +# ifdef __cplusplus +} +# endif +#endif diff --git a/test/property_test.c b/test/property_test.c index 45b1db3e85..8894c1c1cb 100644 --- a/test/property_test.c +++ b/test/property_test.c @@ -677,6 +677,19 @@ static int test_property_list_to_string(int i) return ret; } +#include <openssl/fips.h> +static int test_downstream_FIPS_mode(void) +{ + int ret = 0; + + ret = TEST_true(EVP_set_default_properties(NULL, "fips=yes")) + && TEST_true(FIPS_mode()) + && TEST_true(EVP_set_default_properties(NULL, "fips=no")) + && TEST_false(FIPS_mode()); + + return ret; +} + int setup_tests(void) { ADD_TEST(test_property_string); @@ -690,6 +703,7 @@ int setup_tests(void) ADD_TEST(test_property); ADD_TEST(test_query_cache_stochastic); ADD_TEST(test_fips_mode); + ADD_TEST(test_downstream_FIPS_mode); ADD_ALL_TESTS(test_property_list_to_string, OSSL_NELEM(to_string_tests)); return 1; } -- 2.41.0 ++++++ openssl-Add-Kernel-FIPS-mode-flag-support.patch ++++++ >From aa3aebf132959e7e44876042efaf9ff24ffe0f2b Mon Sep 17 00:00:00 2001 From: rpm-build <rpm-build> Date: Mon, 31 Jul 2023 09:41:27 +0200 Subject: [PATCH 09/35] 0009-Add-Kernel-FIPS-mode-flag-support.patch Patch-name: 0009-Add-Kernel-FIPS-mode-flag-support.patch Patch-id: 9 Patch-status: | # Add check to see if fips flag is enabled in kernel From-dist-git-commit: 9409bc7044cf4b5773639cce20f51399888c45fd --- crypto/context.c | 36 ++++++++++++++++++++++++++++++++++++ include/internal/provider.h | 3 +++ 2 files changed, 39 insertions(+) diff --git a/crypto/context.c b/crypto/context.c index e294ea1512..51002ba79a 100644 --- a/crypto/context.c +++ b/crypto/context.c @@ -16,6 +16,41 @@ #include "internal/provider.h" #include "crypto/context.h" +# include <sys/types.h> +# include <sys/stat.h> +# include <fcntl.h> +# include <unistd.h> +# include <openssl/evp.h> + +# define FIPS_MODE_SWITCH_FILE "/proc/sys/crypto/fips_enabled" + +static int kernel_fips_flag; + +static void read_kernel_fips_flag(void) +{ + char buf[2] = "0"; + int fd; + + if (ossl_safe_getenv("OPENSSL_FORCE_FIPS_MODE") != NULL) { + buf[0] = '1'; + } else if ((fd = open(FIPS_MODE_SWITCH_FILE, O_RDONLY)) >= 0) { + while (read(fd, buf, sizeof(buf)) < 0 && errno == EINTR) ; + close(fd); + } + + if (buf[0] == '1') { + kernel_fips_flag = 1; + } + + return; +} + +int ossl_get_kernel_fips_flag() +{ + return kernel_fips_flag; +} + + struct ossl_lib_ctx_st { CRYPTO_RWLOCK *lock, *rand_crngt_lock; OSSL_EX_DATA_GLOBAL global; @@ -336,6 +371,7 @@ static int default_context_inited = 0; DEFINE_RUN_ONCE_STATIC(default_context_do_init) { + read_kernel_fips_flag(); if (!CRYPTO_THREAD_init_local(&default_context_thread_local, NULL)) goto err; diff --git a/include/internal/provider.h b/include/internal/provider.h index 18937f84c7..1446bf7afb 100644 --- a/include/internal/provider.h +++ b/include/internal/provider.h @@ -112,6 +112,9 @@ int ossl_provider_init_as_child(OSSL_LIB_CTX *ctx, const OSSL_DISPATCH *in); void ossl_provider_deinit_child(OSSL_LIB_CTX *ctx); +/* FIPS flag access */ +int ossl_get_kernel_fips_flag(void); + # ifdef __cplusplus } # endif -- 2.41.0 ++++++ openssl-Improve-performance-for-6x-unrolling-with-vpermxor-i.patch ++++++ >From 3d3a7ecd1ae5ab08d22041f7b3b035c34f12fa02 Mon Sep 17 00:00:00 2001 From: Danny Tsen <dt...@linux.ibm.com> Date: Tue, 22 Aug 2023 15:58:53 -0400 Subject: [PATCH] Improve performance for 6x unrolling with vpermxor instruction Reviewed-by: Paul Dale <pa...@openssl.org> Reviewed-by: Tomas Mraz <to...@openssl.org> (Merged from https://github.com/openssl/openssl/pull/21812) --- crypto/aes/asm/aesp8-ppc.pl | 145 +++++++++++++++++++++++------------- 1 file changed, 95 insertions(+), 50 deletions(-) diff --git a/crypto/aes/asm/aesp8-ppc.pl b/crypto/aes/asm/aesp8-ppc.pl index 60cf86f52aed2..38b9405a283b7 100755 --- a/crypto/aes/asm/aesp8-ppc.pl +++ b/crypto/aes/asm/aesp8-ppc.pl @@ -99,11 +99,12 @@ .long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev .long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev .long 0,0,0,0 ?asis +.long 0x0f102132, 0x43546576, 0x8798a9ba, 0xcbdcedfe Lconsts: mflr r0 bcl 20,31,\$+4 mflr $ptr #vvvvv "distance between . and rcon - addi $ptr,$ptr,-0x48 + addi $ptr,$ptr,-0x58 mtlr r0 blr .long 0 @@ -2405,7 +2406,7 @@ () my $key_=$key2; my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31)); $x00=0 if ($flavour =~ /osx/); -my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5)); +my ($in0, $in1, $in2, $in3, $in4, $in5)=map("v$_",(0..5)); my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16)); my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22)); my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys @@ -2460,6 +2461,18 @@ () li $x70,0x70 mtspr 256,r0 + # Reverse eighty7 to 0x010101..87 + xxlor 2, 32+$eighty7, 32+$eighty7 + vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87 + xxlor 1, 32+$eighty7, 32+$eighty7 + + # Load XOR contents. 0xf102132435465768798a9bacbdcedfe + mr $x70, r6 + bl Lconsts + lxvw4x 0, $x40, r6 # load XOR contents + mr r6, $x70 + li $x70,0x70 + subi $rounds,$rounds,3 # -4 in total lvx $rndkey0,$x00,$key1 # load key schedule @@ -2502,69 +2515,77 @@ () ?vperm v31,v31,$twk5,$keyperm lvx v25,$x10,$key_ # pre-load round[2] + # Switch to use the following codes with 0x010101..87 to generate tweak. + # eighty7 = 0x010101..87 + # vsrab tmp, tweak, seven # next tweak value, right shift 7 bits + # vand tmp, tmp, eighty7 # last byte with carry + # vaddubm tweak, tweak, tweak # left shift 1 bit (x2) + # xxlor vsx, 0, 0 + # vpermxor tweak, tweak, tmp, vsx + vperm $in0,$inout,$inptail,$inpperm subi $inp,$inp,31 # undo "caller" vxor $twk0,$tweak,$rndkey0 vsrab $tmp,$tweak,$seven # next tweak value vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 vand $tmp,$tmp,$eighty7 vxor $out0,$in0,$twk0 - vxor $tweak,$tweak,$tmp + xxlor 32+$in1, 0, 0 + vpermxor $tweak, $tweak, $tmp, $in1 lvx_u $in1,$x10,$inp vxor $twk1,$tweak,$rndkey0 vsrab $tmp,$tweak,$seven # next tweak value vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 le?vperm $in1,$in1,$in1,$leperm vand $tmp,$tmp,$eighty7 vxor $out1,$in1,$twk1 - vxor $tweak,$tweak,$tmp + xxlor 32+$in2, 0, 0 + vpermxor $tweak, $tweak, $tmp, $in2 lvx_u $in2,$x20,$inp andi. $taillen,$len,15 vxor $twk2,$tweak,$rndkey0 vsrab $tmp,$tweak,$seven # next tweak value vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 le?vperm $in2,$in2,$in2,$leperm vand $tmp,$tmp,$eighty7 vxor $out2,$in2,$twk2 - vxor $tweak,$tweak,$tmp + xxlor 32+$in3, 0, 0 + vpermxor $tweak, $tweak, $tmp, $in3 lvx_u $in3,$x30,$inp sub $len,$len,$taillen vxor $twk3,$tweak,$rndkey0 vsrab $tmp,$tweak,$seven # next tweak value vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 le?vperm $in3,$in3,$in3,$leperm vand $tmp,$tmp,$eighty7 vxor $out3,$in3,$twk3 - vxor $tweak,$tweak,$tmp + xxlor 32+$in4, 0, 0 + vpermxor $tweak, $tweak, $tmp, $in4 lvx_u $in4,$x40,$inp subi $len,$len,0x60 vxor $twk4,$tweak,$rndkey0 vsrab $tmp,$tweak,$seven # next tweak value vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 le?vperm $in4,$in4,$in4,$leperm vand $tmp,$tmp,$eighty7 vxor $out4,$in4,$twk4 - vxor $tweak,$tweak,$tmp + xxlor 32+$in5, 0, 0 + vpermxor $tweak, $tweak, $tmp, $in5 lvx_u $in5,$x50,$inp addi $inp,$inp,0x60 vxor $twk5,$tweak,$rndkey0 vsrab $tmp,$tweak,$seven # next tweak value vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 le?vperm $in5,$in5,$in5,$leperm vand $tmp,$tmp,$eighty7 vxor $out5,$in5,$twk5 - vxor $tweak,$tweak,$tmp + xxlor 32+$in0, 0, 0 + vpermxor $tweak, $tweak, $tmp, $in0 vxor v31,v31,$rndkey0 mtctr $rounds @@ -2590,6 +2611,8 @@ () lvx v25,$x10,$key_ # round[4] bdnz Loop_xts_enc6x + xxlor 32+$eighty7, 1, 1 # 0x010101..87 + subic $len,$len,96 # $len-=96 vxor $in0,$twk0,v31 # xor with last round key vcipher $out0,$out0,v24 @@ -2599,7 +2622,6 @@ () vaddubm $tweak,$tweak,$tweak vcipher $out2,$out2,v24 vcipher $out3,$out3,v24 - vsldoi $tmp,$tmp,$tmp,15 vcipher $out4,$out4,v24 vcipher $out5,$out5,v24 @@ -2607,7 +2629,8 @@ () vand $tmp,$tmp,$eighty7 vcipher $out0,$out0,v25 vcipher $out1,$out1,v25 - vxor $tweak,$tweak,$tmp + xxlor 32+$in1, 0, 0 + vpermxor $tweak, $tweak, $tmp, $in1 vcipher $out2,$out2,v25 vcipher $out3,$out3,v25 vxor $in1,$twk1,v31 @@ -2618,13 +2641,13 @@ () and r0,r0,$len vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 vcipher $out0,$out0,v26 vcipher $out1,$out1,v26 vand $tmp,$tmp,$eighty7 vcipher $out2,$out2,v26 vcipher $out3,$out3,v26 - vxor $tweak,$tweak,$tmp + xxlor 32+$in2, 0, 0 + vpermxor $tweak, $tweak, $tmp, $in2 vcipher $out4,$out4,v26 vcipher $out5,$out5,v26 @@ -2638,7 +2661,6 @@ () vaddubm $tweak,$tweak,$tweak vcipher $out0,$out0,v27 vcipher $out1,$out1,v27 - vsldoi $tmp,$tmp,$tmp,15 vcipher $out2,$out2,v27 vcipher $out3,$out3,v27 vand $tmp,$tmp,$eighty7 @@ -2646,7 +2668,8 @@ () vcipher $out5,$out5,v27 addi $key_,$sp,$FRAME+15 # rewind $key_ - vxor $tweak,$tweak,$tmp + xxlor 32+$in3, 0, 0 + vpermxor $tweak, $tweak, $tmp, $in3 vcipher $out0,$out0,v28 vcipher $out1,$out1,v28 vxor $in3,$twk3,v31 @@ -2655,7 +2678,6 @@ () vcipher $out2,$out2,v28 vcipher $out3,$out3,v28 vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 vcipher $out4,$out4,v28 vcipher $out5,$out5,v28 lvx v24,$x00,$key_ # re-pre-load round[1] @@ -2663,7 +2685,8 @@ () vcipher $out0,$out0,v29 vcipher $out1,$out1,v29 - vxor $tweak,$tweak,$tmp + xxlor 32+$in4, 0, 0 + vpermxor $tweak, $tweak, $tmp, $in4 vcipher $out2,$out2,v29 vcipher $out3,$out3,v29 vxor $in4,$twk4,v31 @@ -2673,14 +2696,14 @@ () vcipher $out5,$out5,v29 lvx v25,$x10,$key_ # re-pre-load round[2] vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 vcipher $out0,$out0,v30 vcipher $out1,$out1,v30 vand $tmp,$tmp,$eighty7 vcipher $out2,$out2,v30 vcipher $out3,$out3,v30 - vxor $tweak,$tweak,$tmp + xxlor 32+$in5, 0, 0 + vpermxor $tweak, $tweak, $tmp, $in5 vcipher $out4,$out4,v30 vcipher $out5,$out5,v30 vxor $in5,$twk5,v31 @@ -2690,7 +2713,6 @@ () vcipherlast $out0,$out0,$in0 lvx_u $in0,$x00,$inp # load next input block vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 vcipherlast $out1,$out1,$in1 lvx_u $in1,$x10,$inp vcipherlast $out2,$out2,$in2 @@ -2703,7 +2725,10 @@ () vcipherlast $out4,$out4,$in4 le?vperm $in2,$in2,$in2,$leperm lvx_u $in4,$x40,$inp - vxor $tweak,$tweak,$tmp + xxlor 10, 32+$in0, 32+$in0 + xxlor 32+$in0, 0, 0 + vpermxor $tweak, $tweak, $tmp, $in0 + xxlor 32+$in0, 10, 10 vcipherlast $tmp,$out5,$in5 # last block might be needed # in stealing mode le?vperm $in3,$in3,$in3,$leperm @@ -2736,6 +2761,8 @@ () mtctr $rounds beq Loop_xts_enc6x # did $len-=96 borrow? + xxlor 32+$eighty7, 2, 2 # 0x870101..01 + addic. $len,$len,0x60 beq Lxts_enc6x_zero cmpwi $len,0x20 @@ -3112,6 +3139,18 @@ () li $x70,0x70 mtspr 256,r0 + # Reverse eighty7 to 0x010101..87 + xxlor 2, 32+$eighty7, 32+$eighty7 + vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87 + xxlor 1, 32+$eighty7, 32+$eighty7 + + # Load XOR contents. 0xf102132435465768798a9bacbdcedfe + mr $x70, r6 + bl Lconsts + lxvw4x 0, $x40, r6 # load XOR contents + mr r6, $x70 + li $x70,0x70 + subi $rounds,$rounds,3 # -4 in total lvx $rndkey0,$x00,$key1 # load key schedule @@ -3159,64 +3198,64 @@ () vxor $twk0,$tweak,$rndkey0 vsrab $tmp,$tweak,$seven # next tweak value vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 vand $tmp,$tmp,$eighty7 vxor $out0,$in0,$twk0 - vxor $tweak,$tweak,$tmp + xxlor 32+$in1, 0, 0 + vpermxor $tweak, $tweak, $tmp, $in1 lvx_u $in1,$x10,$inp vxor $twk1,$tweak,$rndkey0 vsrab $tmp,$tweak,$seven # next tweak value vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 le?vperm $in1,$in1,$in1,$leperm vand $tmp,$tmp,$eighty7 vxor $out1,$in1,$twk1 - vxor $tweak,$tweak,$tmp + xxlor 32+$in2, 0, 0 + vpermxor $tweak, $tweak, $tmp, $in2 lvx_u $in2,$x20,$inp andi. $taillen,$len,15 vxor $twk2,$tweak,$rndkey0 vsrab $tmp,$tweak,$seven # next tweak value vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 le?vperm $in2,$in2,$in2,$leperm vand $tmp,$tmp,$eighty7 vxor $out2,$in2,$twk2 - vxor $tweak,$tweak,$tmp + xxlor 32+$in3, 0, 0 + vpermxor $tweak, $tweak, $tmp, $in3 lvx_u $in3,$x30,$inp sub $len,$len,$taillen vxor $twk3,$tweak,$rndkey0 vsrab $tmp,$tweak,$seven # next tweak value vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 le?vperm $in3,$in3,$in3,$leperm vand $tmp,$tmp,$eighty7 vxor $out3,$in3,$twk3 - vxor $tweak,$tweak,$tmp + xxlor 32+$in4, 0, 0 + vpermxor $tweak, $tweak, $tmp, $in4 lvx_u $in4,$x40,$inp subi $len,$len,0x60 vxor $twk4,$tweak,$rndkey0 vsrab $tmp,$tweak,$seven # next tweak value vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 le?vperm $in4,$in4,$in4,$leperm vand $tmp,$tmp,$eighty7 vxor $out4,$in4,$twk4 - vxor $tweak,$tweak,$tmp + xxlor 32+$in5, 0, 0 + vpermxor $tweak, $tweak, $tmp, $in5 lvx_u $in5,$x50,$inp addi $inp,$inp,0x60 vxor $twk5,$tweak,$rndkey0 vsrab $tmp,$tweak,$seven # next tweak value vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 le?vperm $in5,$in5,$in5,$leperm vand $tmp,$tmp,$eighty7 vxor $out5,$in5,$twk5 - vxor $tweak,$tweak,$tmp + xxlor 32+$in0, 0, 0 + vpermxor $tweak, $tweak, $tmp, $in0 vxor v31,v31,$rndkey0 mtctr $rounds @@ -3242,6 +3281,8 @@ () lvx v25,$x10,$key_ # round[4] bdnz Loop_xts_dec6x + xxlor 32+$eighty7, 1, 1 + subic $len,$len,96 # $len-=96 vxor $in0,$twk0,v31 # xor with last round key vncipher $out0,$out0,v24 @@ -3251,7 +3292,6 @@ () vaddubm $tweak,$tweak,$tweak vncipher $out2,$out2,v24 vncipher $out3,$out3,v24 - vsldoi $tmp,$tmp,$tmp,15 vncipher $out4,$out4,v24 vncipher $out5,$out5,v24 @@ -3259,7 +3299,8 @@ () vand $tmp,$tmp,$eighty7 vncipher $out0,$out0,v25 vncipher $out1,$out1,v25 - vxor $tweak,$tweak,$tmp + xxlor 32+$in1, 0, 0 + vpermxor $tweak, $tweak, $tmp, $in1 vncipher $out2,$out2,v25 vncipher $out3,$out3,v25 vxor $in1,$twk1,v31 @@ -3270,13 +3311,13 @@ () and r0,r0,$len vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 vncipher $out0,$out0,v26 vncipher $out1,$out1,v26 vand $tmp,$tmp,$eighty7 vncipher $out2,$out2,v26 vncipher $out3,$out3,v26 - vxor $tweak,$tweak,$tmp + xxlor 32+$in2, 0, 0 + vpermxor $tweak, $tweak, $tmp, $in2 vncipher $out4,$out4,v26 vncipher $out5,$out5,v26 @@ -3290,7 +3331,6 @@ () vaddubm $tweak,$tweak,$tweak vncipher $out0,$out0,v27 vncipher $out1,$out1,v27 - vsldoi $tmp,$tmp,$tmp,15 vncipher $out2,$out2,v27 vncipher $out3,$out3,v27 vand $tmp,$tmp,$eighty7 @@ -3298,7 +3338,8 @@ () vncipher $out5,$out5,v27 addi $key_,$sp,$FRAME+15 # rewind $key_ - vxor $tweak,$tweak,$tmp + xxlor 32+$in3, 0, 0 + vpermxor $tweak, $tweak, $tmp, $in3 vncipher $out0,$out0,v28 vncipher $out1,$out1,v28 vxor $in3,$twk3,v31 @@ -3307,7 +3348,6 @@ () vncipher $out2,$out2,v28 vncipher $out3,$out3,v28 vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 vncipher $out4,$out4,v28 vncipher $out5,$out5,v28 lvx v24,$x00,$key_ # re-pre-load round[1] @@ -3315,7 +3355,8 @@ () vncipher $out0,$out0,v29 vncipher $out1,$out1,v29 - vxor $tweak,$tweak,$tmp + xxlor 32+$in4, 0, 0 + vpermxor $tweak, $tweak, $tmp, $in4 vncipher $out2,$out2,v29 vncipher $out3,$out3,v29 vxor $in4,$twk4,v31 @@ -3325,14 +3366,14 @@ () vncipher $out5,$out5,v29 lvx v25,$x10,$key_ # re-pre-load round[2] vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 vncipher $out0,$out0,v30 vncipher $out1,$out1,v30 vand $tmp,$tmp,$eighty7 vncipher $out2,$out2,v30 vncipher $out3,$out3,v30 - vxor $tweak,$tweak,$tmp + xxlor 32+$in5, 0, 0 + vpermxor $tweak, $tweak, $tmp, $in5 vncipher $out4,$out4,v30 vncipher $out5,$out5,v30 vxor $in5,$twk5,v31 @@ -3342,7 +3383,6 @@ () vncipherlast $out0,$out0,$in0 lvx_u $in0,$x00,$inp # load next input block vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 vncipherlast $out1,$out1,$in1 lvx_u $in1,$x10,$inp vncipherlast $out2,$out2,$in2 @@ -3355,7 +3395,10 @@ () vncipherlast $out4,$out4,$in4 le?vperm $in2,$in2,$in2,$leperm lvx_u $in4,$x40,$inp - vxor $tweak,$tweak,$tmp + xxlor 10, 32+$in0, 32+$in0 + xxlor 32+$in0, 0, 0 + vpermxor $tweak, $tweak, $tmp, $in0 + xxlor 32+$in0, 10, 10 vncipherlast $out5,$out5,$in5 le?vperm $in3,$in3,$in3,$leperm lvx_u $in5,$x50,$inp @@ -3386,6 +3429,8 @@ () mtctr $rounds beq Loop_xts_dec6x # did $len-=96 borrow? + xxlor 32+$eighty7, 2, 2 + addic. $len,$len,0x60 beq Lxts_dec6x_zero cmpwi $len,0x20 ++++++ openssl-ec-56-bit-Limb-Solinas-Strategy-for-secp384r1.patch ++++++ ++++ 2160 lines (skipped) ++++++ openssl-ec-Use-static-linkage-on-nistp521-felem_-square-mul-.patch ++++++ >From 3e47a286dc3274bda72a196c3a4030a1fc8302f1 Mon Sep 17 00:00:00 2001 From: Rohan McLure <rohanmcl...@linux.ibm.com> Date: Fri, 23 Jun 2023 16:41:48 +1000 Subject: [PATCH] ec: Use static linkage on nistp521 felem_{square,mul} wrappers Runtime selection of implementations for felem_{square,mul} depends on felem_{square,mul}_wrapper functions, which overwrite function points in a similar design to that of .plt.got sections used by program loaders during dynamic linking. There's no reason why these functions need to have external linkage. Mark static. Signed-off-by: Rohan McLure <rohanmcl...@linux.ibm.com> Reviewed-by: Paul Dale <pa...@openssl.org> Reviewed-by: Shane Lontis <shane.lon...@oracle.com> Reviewed-by: Dmitry Belyavskiy <beld...@gmail.com> Reviewed-by: Todd Short <todd.sh...@me.com> (Merged from https://github.com/openssl/openssl/pull/21471) --- crypto/ec/ecp_nistp521.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/crypto/ec/ecp_nistp521.c b/crypto/ec/ecp_nistp521.c index 97815cac1f13..32a9268ecf17 100644 --- a/crypto/ec/ecp_nistp521.c +++ b/crypto/ec/ecp_nistp521.c @@ -676,8 +676,8 @@ static void felem_reduce(felem out, const largefelem in) } #if defined(ECP_NISTP521_ASM) -void felem_square_wrapper(largefelem out, const felem in); -void felem_mul_wrapper(largefelem out, const felem in1, const felem in2); +static void felem_square_wrapper(largefelem out, const felem in); +static void felem_mul_wrapper(largefelem out, const felem in1, const felem in2); static void (*felem_square_p)(largefelem out, const felem in) = felem_square_wrapper; @@ -691,7 +691,7 @@ void p521_felem_mul(largefelem out, const felem in1, const felem in2); # include "crypto/ppc_arch.h" # endif -void felem_select(void) +static void felem_select(void) { # if defined(_ARCH_PPC64) if ((OPENSSL_ppccap_P & PPC_MADD300) && (OPENSSL_ppccap_P & PPC_ALTIVEC)) { @@ -707,13 +707,13 @@ void felem_select(void) felem_mul_p = felem_mul_ref; } -void felem_square_wrapper(largefelem out, const felem in) +static void felem_square_wrapper(largefelem out, const felem in) { felem_select(); felem_square_p(out, in); } -void felem_mul_wrapper(largefelem out, const felem in1, const felem in2) +static void felem_mul_wrapper(largefelem out, const felem in1, const felem in2) { felem_select(); felem_mul_p(out, in1, in2); ++++++ openssl-ec-powerpc64le-Add-asm-implementation-of-felem_-squa.patch ++++++ >From 966047ee13188e8634af25af348940acceb9316d Mon Sep 17 00:00:00 2001 From: Rohan McLure <rohanmcl...@linux.ibm.com> Date: Wed, 31 May 2023 14:32:26 +1000 Subject: [PATCH] ec: powerpc64le: Add asm implementation of felem_{square,mul} Add an assembly implementation of felem_{square,mul}, which will be implemented whenever Altivec support is present and the core implements ISA 3.0 (Power 9) or greater. Signed-off-by: Rohan McLure <rohanmcl...@linux.ibm.com> Reviewed-by: Paul Dale <pa...@openssl.org> Reviewed-by: Shane Lontis <shane.lon...@oracle.com> Reviewed-by: Dmitry Belyavskiy <beld...@gmail.com> Reviewed-by: Todd Short <todd.sh...@me.com> (Merged from https://github.com/openssl/openssl/pull/21471) --- crypto/ec/asm/ecp_nistp384-ppc64.pl | 355 ++++++++++++++++++++++++++++ crypto/ec/build.info | 6 +- crypto/ec/ecp_nistp384.c | 9 + 3 files changed, 368 insertions(+), 2 deletions(-) create mode 100755 crypto/ec/asm/ecp_nistp384-ppc64.pl diff --git a/crypto/ec/asm/ecp_nistp384-ppc64.pl b/crypto/ec/asm/ecp_nistp384-ppc64.pl new file mode 100755 index 000000000000..3f86b391af69 --- /dev/null +++ b/crypto/ec/asm/ecp_nistp384-ppc64.pl @@ -0,0 +1,355 @@ +#! /usr/bin/env perl +# Copyright 2023 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the Apache License 2.0 (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html +# +# ==================================================================== +# Written by Rohan McLure <rmcl...@linux.ibm.com> for the OpenSSL +# project. +# ==================================================================== +# +# p384 lower-level primitives for PPC64 using vector instructions. +# + +use strict; +use warnings; + +my $flavour = shift; +my $output = ""; +while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} +if (!$output) { + $output = "-"; +} + +my ($xlate, $dir); +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or +die "can't locate ppc-xlate.pl"; + +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; + +my $code = ""; + +my ($sp, $outp, $savelr, $savesp) = ("r1", "r3", "r10", "r12"); + +my $vzero = "v32"; + +sub startproc($) +{ + my ($name) = @_; + + $code.=<<___; + .globl ${name} + .align 5 +${name}: + +___ +} + +sub endproc($) +{ + my ($name) = @_; + + $code.=<<___; + blr + .size ${name},.-${name} + +___ +} + + +sub push_vrs($$) +{ + my ($min, $max) = @_; + + my $count = $max - $min + 1; + + $code.=<<___; + mr $savesp,$sp + stdu $sp,-16*`$count+1`($sp) + +___ + for (my $i = $min; $i <= $max; $i++) { + my $mult = $max - $i + 1; + $code.=<<___; + stxv $i,-16*$mult($savesp) +___ + + } + + $code.=<<___; + +___ +} + +sub pop_vrs($$) +{ + my ($min, $max) = @_; + + $code.=<<___; + ld $savesp,0($sp) +___ + for (my $i = $min; $i <= $max; $i++) { + my $mult = $max - $i + 1; + $code.=<<___; + lxv $i,-16*$mult($savesp) +___ + } + + $code.=<<___; + mr $sp,$savesp + +___ +} + +sub load_vrs($$) +{ + my ($pointer, $reg_list) = @_; + + for (my $i = 0; $i <= 6; $i++) { + my $offset = $i * 8; + $code.=<<___; + lxsd $reg_list->[$i],$offset($pointer) +___ + } + + $code.=<<___; + +___ +} + +sub store_vrs($$) +{ + my ($pointer, $reg_list) = @_; + + for (my $i = 0; $i <= 12; $i++) { + my $offset = $i * 16; + $code.=<<___; + stxv $reg_list->[$i],$offset($pointer) +___ + } + + $code.=<<___; + +___ +} + +$code.=<<___; +.machine "any" +.text + +___ + +{ + # mul/square common + my ($t1, $t2, $t3, $t4) = ("v33", "v34", "v42", "v43"); + my ($zero, $one) = ("r8", "r9"); + my $out = "v51"; + + { + # + # p384_felem_mul + # + + my ($in1p, $in2p) = ("r4", "r5"); + my @in1 = map("v$_",(44..50)); + my @in2 = map("v$_",(35..41)); + + startproc("p384_felem_mul"); + + push_vrs(52, 63); + + $code.=<<___; + vspltisw $vzero,0 + +___ + + load_vrs($in1p, \@in1); + load_vrs($in2p, \@in2); + + $code.=<<___; + vmsumudm $out,$in1[0],$in2[0],$vzero + stxv $out,0($outp) + + xxpermdi $t1,$in1[0],$in1[1],0b00 + xxpermdi $t2,$in2[1],$in2[0],0b00 + vmsumudm $out,$t1,$t2,$vzero + stxv $out,16($outp) + + xxpermdi $t2,$in2[2],$in2[1],0b00 + vmsumudm $out,$t1,$t2,$vzero + vmsumudm $out,$in1[2],$in2[0],$out + stxv $out,32($outp) + + xxpermdi $t2,$in2[1],$in2[0],0b00 + xxpermdi $t3,$in1[2],$in1[3],0b00 + xxpermdi $t4,$in2[3],$in2[2],0b00 + vmsumudm $out,$t1,$t4,$vzero + vmsumudm $out,$t3,$t2,$out + stxv $out,48($outp) + + xxpermdi $t2,$in2[4],$in2[3],0b00 + xxpermdi $t4,$in2[2],$in2[1],0b00 + vmsumudm $out,$t1,$t2,$vzero + vmsumudm $out,$t3,$t4,$out + vmsumudm $out,$in1[4],$in2[0],$out + stxv $out,64($outp) + + xxpermdi $t2,$in2[5],$in2[4],0b00 + xxpermdi $t4,$in2[3],$in2[2],0b00 + vmsumudm $out,$t1,$t2,$vzero + vmsumudm $out,$t3,$t4,$out + xxpermdi $t4,$in2[1],$in2[0],0b00 + xxpermdi $t1,$in1[4],$in1[5],0b00 + vmsumudm $out,$t1,$t4,$out + stxv $out,80($outp) + + xxpermdi $t1,$in1[0],$in1[1],0b00 + xxpermdi $t2,$in2[6],$in2[5],0b00 + xxpermdi $t4,$in2[4],$in2[3],0b00 + vmsumudm $out,$t1,$t2,$vzero + vmsumudm $out,$t3,$t4,$out + xxpermdi $t2,$in2[2],$in2[1],0b00 + xxpermdi $t1,$in1[4],$in1[5],0b00 + vmsumudm $out,$t1,$t2,$out + vmsumudm $out,$in1[6],$in2[0],$out + stxv $out,96($outp) + + xxpermdi $t1,$in1[1],$in1[2],0b00 + xxpermdi $t2,$in2[6],$in2[5],0b00 + xxpermdi $t3,$in1[3],$in1[4],0b00 + vmsumudm $out,$t1,$t2,$vzero + vmsumudm $out,$t3,$t4,$out + xxpermdi $t3,$in2[2],$in2[1],0b00 + xxpermdi $t1,$in1[5],$in1[6],0b00 + vmsumudm $out,$t1,$t3,$out + stxv $out,112($outp) + + xxpermdi $t1,$in1[2],$in1[3],0b00 + xxpermdi $t3,$in1[4],$in1[5],0b00 + vmsumudm $out,$t1,$t2,$vzero + vmsumudm $out,$t3,$t4,$out + vmsumudm $out,$in1[6],$in2[2],$out + stxv $out,128($outp) + + xxpermdi $t1,$in1[3],$in1[4],0b00 + vmsumudm $out,$t1,$t2,$vzero + xxpermdi $t1,$in1[5],$in1[6],0b00 + vmsumudm $out,$t1,$t4,$out + stxv $out,144($outp) + + vmsumudm $out,$t3,$t2,$vzero + vmsumudm $out,$in1[6],$in2[4],$out + stxv $out,160($outp) + + vmsumudm $out,$t1,$t2,$vzero + stxv $out,176($outp) + + vmsumudm $out,$in1[6],$in2[6],$vzero + stxv $out,192($outp) +___ + + endproc("p384_felem_mul"); + } + + { + # + # p384_felem_square + # + + my ($inp) = ("r4"); + my @in = map("v$_",(44..50)); + my @inx2 = map("v$_",(35..41)); + + startproc("p384_felem_square"); + + push_vrs(52, 63); + + $code.=<<___; + vspltisw $vzero,0 + +___ + + load_vrs($inp, \@in); + + $code.=<<___; + li $zero,0 + li $one,1 + mtvsrdd $t1,$one,$zero +___ + + for (my $i = 0; $i <= 6; $i++) { + $code.=<<___; + vsld $inx2[$i],$in[$i],$t1 +___ + } + + $code.=<<___; + vmsumudm $out,$in[0],$in[0],$vzero + stxv $out,0($outp) + + vmsumudm $out,$in[0],$inx2[1],$vzero + stxv $out,16($outp) + + vmsumudm $out,$in[0],$inx2[2],$vzero + vmsumudm $out,$in[1],$in[1],$out + stxv $out,32($outp) + + xxpermdi $t1,$in[0],$in[1],0b00 + xxpermdi $t2,$inx2[3],$inx2[2],0b00 + vmsumudm $out,$t1,$t2,$vzero + stxv $out,48($outp) + + xxpermdi $t4,$inx2[4],$inx2[3],0b00 + vmsumudm $out,$t1,$t4,$vzero + vmsumudm $out,$in[2],$in[2],$out + stxv $out,64($outp) + + xxpermdi $t2,$inx2[5],$inx2[4],0b00 + vmsumudm $out,$t1,$t2,$vzero + vmsumudm $out,$in[2],$inx2[3],$out + stxv $out,80($outp) + + xxpermdi $t2,$inx2[6],$inx2[5],0b00 + vmsumudm $out,$t1,$t2,$vzero + vmsumudm $out,$in[2],$inx2[4],$out + vmsumudm $out,$in[3],$in[3],$out + stxv $out,96($outp) + + xxpermdi $t3,$in[1],$in[2],0b00 + vmsumudm $out,$t3,$t2,$vzero + vmsumudm $out,$in[3],$inx2[4],$out + stxv $out,112($outp) + + xxpermdi $t1,$in[2],$in[3],0b00 + vmsumudm $out,$t1,$t2,$vzero + vmsumudm $out,$in[4],$in[4],$out + stxv $out,128($outp) + + xxpermdi $t1,$in[3],$in[4],0b00 + vmsumudm $out,$t1,$t2,$vzero + stxv $out,144($outp) + + vmsumudm $out,$in[4],$inx2[6],$vzero + vmsumudm $out,$in[5],$in[5],$out + stxv $out,160($outp) + + vmsumudm $out,$in[5],$inx2[6],$vzero + stxv $out,176($outp) + + vmsumudm $out,$in[6],$in[6],$vzero + stxv $out,192($outp) +___ + + endproc("p384_felem_square"); + } +} + +$code =~ s/\`([^\`]*)\`/eval $1/gem; +print $code; +close STDOUT or die "error closing STDOUT: $!"; diff --git a/crypto/ec/build.info b/crypto/ec/build.info index 1fa60a1deddd..4077bead7bdb 100644 --- a/crypto/ec/build.info +++ b/crypto/ec/build.info @@ -39,8 +39,9 @@ IF[{- !$disabled{asm} -}] $ECASM_ppc64=ecp_nistz256.c ecp_ppc.c ecp_nistz256-ppc64.s x25519-ppc64.s $ECDEF_ppc64=ECP_NISTZ256_ASM X25519_ASM IF[{- !$disabled{'ec_nistp_64_gcc_128'} -}] - $ECASM_ppc64=$ECASM_ppc64 ecp_nistp521-ppc64.s - $ECDEF_ppc64=$ECDEF_ppc64 ECP_NISTP521_ASM + $ECASM_ppc64=$ECASM_ppc64 ecp_nistp384-ppc64.s ecp_nistp521-ppc64.s + $ECDEF_ppc64=$ECDEF_ppc64 ECP_NISTP384_ASM ECP_NISTP521_ASM + INCLUDE[ecp_nistp384.o]=.. INCLUDE[ecp_nistp521.o]=.. ENDIF @@ -119,6 +120,7 @@ GENERATE[ecp_nistz256-armv8.S]=asm/ecp_nistz256-armv8.pl INCLUDE[ecp_nistz256-armv8.o]=.. GENERATE[ecp_nistz256-ppc64.s]=asm/ecp_nistz256-ppc64.pl +GENERATE[ecp_nistp384-ppc64.s]=asm/ecp_nistp384-ppc64.pl GENERATE[ecp_nistp521-ppc64.s]=asm/ecp_nistp521-ppc64.pl GENERATE[x25519-x86_64.s]=asm/x25519-x86_64.pl diff --git a/crypto/ec/ecp_nistp384.c b/crypto/ec/ecp_nistp384.c index a0559487ed4e..14f9530d07c6 100644 --- a/crypto/ec/ecp_nistp384.c +++ b/crypto/ec/ecp_nistp384.c @@ -691,6 +691,15 @@ void p384_felem_mul(widefelem out, const felem in1, const felem in2); static void felem_select(void) { +# if defined(_ARCH_PPC64) + if ((OPENSSL_ppccap_P & PPC_MADD300) && (OPENSSL_ppccap_P & PPC_ALTIVEC)) { + felem_square_p = p384_felem_square; + felem_mul_p = p384_felem_mul; + + return; + } +# endif + /* Default */ felem_square_p = felem_square_ref; felem_mul_p = felem_mul_ref; ++++++ openssl-ecc-Remove-extraneous-parentheses-in-secp384r1.patch ++++++ >From 670e73d9084465384b11ef24802ca4a313e1d2f4 Mon Sep 17 00:00:00 2001 From: Rohan McLure <rohanmcl...@linux.ibm.com> Date: Tue, 15 Aug 2023 15:20:20 +1000 Subject: [PATCH] ecc: Remove extraneous parentheses in secp384r1 Substitutions in the felem_reduce() method feature unecessary parentheses, remove them. Signed-off-by: Rohan McLure <rohan.mcl...@linux.ibm.com> Reviewed-by: Tomas Mraz <to...@openssl.org> Reviewed-by: Shane Lontis <shane.lon...@oracle.com> Reviewed-by: Hugo Landau <hlan...@openssl.org> (Merged from https://github.com/openssl/openssl/pull/21749) --- crypto/ec/ecp_nistp384.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/crypto/ec/ecp_nistp384.c b/crypto/ec/ecp_nistp384.c index 14f9530d07c6..ff68f9cc7ad0 100644 --- a/crypto/ec/ecp_nistp384.c +++ b/crypto/ec/ecp_nistp384.c @@ -540,7 +540,7 @@ static void felem_reduce(felem out, const widefelem in) acc[7] += in[12] >> 8; acc[6] += (in[12] & 0xff) << 48; acc[6] -= in[12] >> 16; - acc[5] -= ((in[12] & 0xffff) << 40); + acc[5] -= (in[12] & 0xffff) << 40; acc[6] += in[12] >> 48; acc[5] += (in[12] & 0xffffffffffff) << 8; @@ -549,7 +549,7 @@ static void felem_reduce(felem out, const widefelem in) acc[6] += in[11] >> 8; acc[5] += (in[11] & 0xff) << 48; acc[5] -= in[11] >> 16; - acc[4] -= ((in[11] & 0xffff) << 40); + acc[4] -= (in[11] & 0xffff) << 40; acc[5] += in[11] >> 48; acc[4] += (in[11] & 0xffffffffffff) << 8; @@ -558,7 +558,7 @@ static void felem_reduce(felem out, const widefelem in) acc[5] += in[10] >> 8; acc[4] += (in[10] & 0xff) << 48; acc[4] -= in[10] >> 16; - acc[3] -= ((in[10] & 0xffff) << 40); + acc[3] -= (in[10] & 0xffff) << 40; acc[4] += in[10] >> 48; acc[3] += (in[10] & 0xffffffffffff) << 8; @@ -567,7 +567,7 @@ static void felem_reduce(felem out, const widefelem in) acc[4] += in[9] >> 8; acc[3] += (in[9] & 0xff) << 48; acc[3] -= in[9] >> 16; - acc[2] -= ((in[9] & 0xffff) << 40); + acc[2] -= (in[9] & 0xffff) << 40; acc[3] += in[9] >> 48; acc[2] += (in[9] & 0xffffffffffff) << 8; @@ -582,7 +582,7 @@ static void felem_reduce(felem out, const widefelem in) acc[3] += acc[8] >> 8; acc[2] += (acc[8] & 0xff) << 48; acc[2] -= acc[8] >> 16; - acc[1] -= ((acc[8] & 0xffff) << 40); + acc[1] -= (acc[8] & 0xffff) << 40; acc[2] += acc[8] >> 48; acc[1] += (acc[8] & 0xffffffffffff) << 8; @@ -591,7 +591,7 @@ static void felem_reduce(felem out, const widefelem in) acc[2] += acc[7] >> 8; acc[1] += (acc[7] & 0xff) << 48; acc[1] -= acc[7] >> 16; - acc[0] -= ((acc[7] & 0xffff) << 40); + acc[0] -= (acc[7] & 0xffff) << 40; acc[1] += acc[7] >> 48; acc[0] += (acc[7] & 0xffffffffffff) << 8; ++++++ openssl-no-html-docs.patch ++++++ --- /var/tmp/diff_new_pack.suGu1W/_old 2023-10-27 22:27:01.984961569 +0200 +++ /var/tmp/diff_new_pack.suGu1W/_new 2023-10-27 22:27:01.988961715 +0200 @@ -1,8 +1,8 @@ -Index: openssl-3.0.0-alpha1/Configurations/unix-Makefile.tmpl +Index: openssl-3.1.4/Configurations/unix-Makefile.tmpl =================================================================== ---- openssl-3.0.0-alpha1.orig/Configurations/unix-Makefile.tmpl 2020-04-23 22:56:27.365853133 +0200 -+++ openssl-3.0.0-alpha1/Configurations/unix-Makefile.tmpl 2020-04-23 22:56:52.474004636 +0200 -@@ -544,7 +544,7 @@ install_sw: install_dev install_engines +--- openssl-3.1.4.orig/Configurations/unix-Makefile.tmpl ++++ openssl-3.1.4/Configurations/unix-Makefile.tmpl +@@ -611,7 +611,7 @@ install_sw: install_dev install_engines uninstall_sw: uninstall_runtime uninstall_modules uninstall_engines uninstall_dev @@ -10,5 +10,5 @@ +install_docs: install_man_docs uninstall_docs: uninstall_man_docs uninstall_html_docs - $(RM) -r $(DESTDIR)$(DOCDIR) + $(RM) -r "$(DESTDIR)$(DOCDIR)" ++++++ openssl-powerpc-ecc-Fix-stack-allocation-secp384r1-asm.patch ++++++ >From 50f8b936b00dc18ce1f622a7a6aa46daf03da48b Mon Sep 17 00:00:00 2001 From: Rohan McLure <rohanmcl...@linux.ibm.com> Date: Wed, 16 Aug 2023 16:52:47 +1000 Subject: [PATCH] powerpc: ecc: Fix stack allocation secp384r1 asm Assembly acceleration secp384r1 opts to not use any callee-save VSRs, as VSX enabled systems make extensive use of renaming, and so writebacks in felem_{mul,square}() can be reordered for best cache effects. Remove stack allocations. This in turn fixes unmatched push/pops in felem_{mul,square}(). Signed-off-by: Rohan McLure <rohan.mcl...@linux.ibm.com> Reviewed-by: Tomas Mraz <to...@openssl.org> Reviewed-by: Shane Lontis <shane.lon...@oracle.com> Reviewed-by: Hugo Landau <hlan...@openssl.org> (Merged from https://github.com/openssl/openssl/pull/21749) --- crypto/ec/asm/ecp_nistp384-ppc64.pl | 49 ----------------------------- 1 file changed, 49 deletions(-) diff --git a/crypto/ec/asm/ecp_nistp384-ppc64.pl b/crypto/ec/asm/ecp_nistp384-ppc64.pl index 3f86b391af69..28f4168e5218 100755 --- a/crypto/ec/asm/ecp_nistp384-ppc64.pl +++ b/crypto/ec/asm/ecp_nistp384-ppc64.pl @@ -62,51 +62,6 @@ ($) ___ } - -sub push_vrs($$) -{ - my ($min, $max) = @_; - - my $count = $max - $min + 1; - - $code.=<<___; - mr $savesp,$sp - stdu $sp,-16*`$count+1`($sp) - -___ - for (my $i = $min; $i <= $max; $i++) { - my $mult = $max - $i + 1; - $code.=<<___; - stxv $i,-16*$mult($savesp) -___ - - } - - $code.=<<___; - -___ -} - -sub pop_vrs($$) -{ - my ($min, $max) = @_; - - $code.=<<___; - ld $savesp,0($sp) -___ - for (my $i = $min; $i <= $max; $i++) { - my $mult = $max - $i + 1; - $code.=<<___; - lxv $i,-16*$mult($savesp) -___ - } - - $code.=<<___; - mr $sp,$savesp - -___ -} - sub load_vrs($$) { my ($pointer, $reg_list) = @_; @@ -162,8 +117,6 @@ ($$) startproc("p384_felem_mul"); - push_vrs(52, 63); - $code.=<<___; vspltisw $vzero,0 @@ -268,8 +221,6 @@ ($$) startproc("p384_felem_square"); - push_vrs(52, 63); - $code.=<<___; vspltisw $vzero,0