RE: [PATCH] AArch64: Gate various crypto intrinsics availability based on features
Hi Tejas, Ok to backport, but can you please check the older supported releases as well if they need this? Ok for the other branches too assuming testing works ok. Thanks, Kyrill From: Tejas Belagod Sent: Friday, January 20, 2023 12:57 PM To: Kyrylo Tkachov ; gcc-patches@gcc.gnu.org Cc: Richard Sandiford ; Richard Earnshaw Subject: Re: [PATCH] AArch64: Gate various crypto intrinsics availability based on features From: Kyrylo Tkachov mailto:kyrylo.tkac...@arm.com>> Date: Tuesday, January 17, 2023 at 3:53 PM To: Tejas Belagod mailto:tejas.bela...@arm.com>>, gcc-patches@gcc.gnu.org<mailto:gcc-patches@gcc.gnu.org> mailto:gcc-patches@gcc.gnu.org>> Cc: Richard Sandiford mailto:richard.sandif...@arm.com>>, Richard Earnshaw mailto:richard.earns...@arm.com>> Subject: RE: [PATCH] AArch64: Gate various crypto intrinsics availability based on features Hi Tejas, > -Original Message- > From: Gcc-patches bounces+kyrylo.tkachov=arm@gcc.gnu.org<mailto:bounces+kyrylo.tkachov=arm@gcc.gnu.org>> > On Behalf Of Tejas Belagod > via Gcc-patches > Sent: Monday, January 16, 2023 7:12 AM > To: gcc-patches@gcc.gnu.org<mailto:gcc-patches@gcc.gnu.org> > Cc: Tejas Belagod mailto:tejas.bela...@arm.com>>; > Richard Sandiford > mailto:richard.sandif...@arm.com>>; Richard > Earnshaw > mailto:richard.earns...@arm.com>> > Subject: [PATCH] AArch64: Gate various crypto intrinsics availability based on > features > > The 64-bit variant of PMULL{2} and AES instructions are available if FEAT_AES > is implemented according to the Arm ARM [1]. Similarly FEAT_SHA1 and > FEAT_SHA256 enable the use of SHA1 and SHA256 instruction variants. > This patch fixes arm_neon.h to correctly reflect the feature availability > based > on '+aes' and '+sha2' as opposed to the ambiguous catch-all '+crypto'. > > [1] Section D17.2.61, C7.2.215 > > 2022-01-11 Tejas Belagod > > gcc/ >* config/aarch64/arm_neon.h: Gate AES and PMULL64 intrinsics >under target feature +aes as opposed to +crypto. Gate SHA1 and > SHA2 >intrinsics under +sha2. The ChangeLog should list the intrinsics affected like * config/aarch64/arm_neon.h (vmull_p64, vmull_high_p64): Gate under "nothing+aes" For example. Ok with a fixed ChangeLog. Thanks, Kyrill Thanks for the review Kyrill, now pushed to master. OK to backport to 12? Thanks, Tejas. > > testsuite/ > >* gcc.target/aarch64/acle/pmull64.c: New. >* gcc/testsuite/gcc.target/aarch64/aes-fuse-1.c: Replace '+crypto' > with >corresponding feature flag based on the intrinsic. >* gcc.target/aarch64/aes-fuse-2.c: Likewise. >* gcc.target/aarch64/aes_1.c: Likewise. >* gcc.target/aarch64/aes_2.c: Likewise. >* gcc.target/aarch64/aes_xor_combine.c: Likewise. >* gcc.target/aarch64/sha1_1.c: Likewise. >* gcc.target/aarch64/sha256_1.c: Likewise. >* gcc.target/aarch64/target_attr_crypto_ice_1.c: Likewise. > --- > gcc/config/aarch64/arm_neon.h | 35 ++- > .../gcc.target/aarch64/acle/pmull64.c | 14 > gcc/testsuite/gcc.target/aarch64/aes-fuse-1.c | 4 +-- > gcc/testsuite/gcc.target/aarch64/aes-fuse-2.c | 4 +-- > gcc/testsuite/gcc.target/aarch64/aes_1.c | 2 +- > gcc/testsuite/gcc.target/aarch64/aes_2.c | 4 ++- > .../gcc.target/aarch64/aes_xor_combine.c | 2 +- > gcc/testsuite/gcc.target/aarch64/sha1_1.c | 2 +- > gcc/testsuite/gcc.target/aarch64/sha256_1.c | 2 +- > .../aarch64/target_attr_crypto_ice_1.c| 2 +- > 10 files changed, 44 insertions(+), 27 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/pmull64.c > > diff --git a/gcc/config/aarch64/arm_neon.h > b/gcc/config/aarch64/arm_neon.h > index cf6af728ca9..a795a387b38 100644 > --- a/gcc/config/aarch64/arm_neon.h > +++ b/gcc/config/aarch64/arm_neon.h > @@ -7496,7 +7496,7 @@ vqrdmlshs_laneq_s32 (int32_t __a, int32_t __b, > int32x4_t __c, const int __d) > #pragma GCC pop_options > > #pragma GCC push_options > -#pragma GCC target ("+nothing+crypto") > +#pragma GCC target ("+nothing+aes") > /* vaes */ > > __extension__ extern __inline uint8x16_t > @@ -7526,6 +7526,22 @@ vaesimcq_u8 (uint8x16_t data) > { >return __builtin_aarch64_crypto_aesimcv16qi_uu (data); > } > + > +__extension__ extern __inline poly128_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +vmull_p64 (poly64_t __a, poly64_t __b) > +{ > + return > +__builtin_aarch64_crypto_pmulldi_ppp (__a, __b); > +} > + > +__extension__ extern __inline poly128_t >
Re: [PATCH] AArch64: Gate various crypto intrinsics availability based on features
From: Kyrylo Tkachov Date: Tuesday, January 17, 2023 at 3:53 PM To: Tejas Belagod , gcc-patches@gcc.gnu.org Cc: Richard Sandiford , Richard Earnshaw Subject: RE: [PATCH] AArch64: Gate various crypto intrinsics availability based on features Hi Tejas, > -Original Message- > From: Gcc-patches bounces+kyrylo.tkachov=arm@gcc.gnu.org> On Behalf Of Tejas Belagod > via Gcc-patches > Sent: Monday, January 16, 2023 7:12 AM > To: gcc-patches@gcc.gnu.org > Cc: Tejas Belagod ; Richard Sandiford > ; Richard Earnshaw > > Subject: [PATCH] AArch64: Gate various crypto intrinsics availability based on > features > > The 64-bit variant of PMULL{2} and AES instructions are available if FEAT_AES > is implemented according to the Arm ARM [1]. Similarly FEAT_SHA1 and > FEAT_SHA256 enable the use of SHA1 and SHA256 instruction variants. > This patch fixes arm_neon.h to correctly reflect the feature availability > based > on '+aes' and '+sha2' as opposed to the ambiguous catch-all '+crypto'. > > [1] Section D17.2.61, C7.2.215 > > 2022-01-11 Tejas Belagod > > gcc/ >* config/aarch64/arm_neon.h: Gate AES and PMULL64 intrinsics >under target feature +aes as opposed to +crypto. Gate SHA1 and > SHA2 >intrinsics under +sha2. The ChangeLog should list the intrinsics affected like * config/aarch64/arm_neon.h (vmull_p64, vmull_high_p64): Gate under "nothing+aes" For example. Ok with a fixed ChangeLog. Thanks, Kyrill Thanks for the review Kyrill, now pushed to master. OK to backport to 12? Thanks, Tejas. > > testsuite/ > >* gcc.target/aarch64/acle/pmull64.c: New. >* gcc/testsuite/gcc.target/aarch64/aes-fuse-1.c: Replace '+crypto' > with >corresponding feature flag based on the intrinsic. >* gcc.target/aarch64/aes-fuse-2.c: Likewise. >* gcc.target/aarch64/aes_1.c: Likewise. >* gcc.target/aarch64/aes_2.c: Likewise. >* gcc.target/aarch64/aes_xor_combine.c: Likewise. >* gcc.target/aarch64/sha1_1.c: Likewise. >* gcc.target/aarch64/sha256_1.c: Likewise. >* gcc.target/aarch64/target_attr_crypto_ice_1.c: Likewise. > --- > gcc/config/aarch64/arm_neon.h | 35 ++- > .../gcc.target/aarch64/acle/pmull64.c | 14 > gcc/testsuite/gcc.target/aarch64/aes-fuse-1.c | 4 +-- > gcc/testsuite/gcc.target/aarch64/aes-fuse-2.c | 4 +-- > gcc/testsuite/gcc.target/aarch64/aes_1.c | 2 +- > gcc/testsuite/gcc.target/aarch64/aes_2.c | 4 ++- > .../gcc.target/aarch64/aes_xor_combine.c | 2 +- > gcc/testsuite/gcc.target/aarch64/sha1_1.c | 2 +- > gcc/testsuite/gcc.target/aarch64/sha256_1.c | 2 +- > .../aarch64/target_attr_crypto_ice_1.c| 2 +- > 10 files changed, 44 insertions(+), 27 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/pmull64.c > > diff --git a/gcc/config/aarch64/arm_neon.h > b/gcc/config/aarch64/arm_neon.h > index cf6af728ca9..a795a387b38 100644 > --- a/gcc/config/aarch64/arm_neon.h > +++ b/gcc/config/aarch64/arm_neon.h > @@ -7496,7 +7496,7 @@ vqrdmlshs_laneq_s32 (int32_t __a, int32_t __b, > int32x4_t __c, const int __d) > #pragma GCC pop_options > > #pragma GCC push_options > -#pragma GCC target ("+nothing+crypto") > +#pragma GCC target ("+nothing+aes") > /* vaes */ > > __extension__ extern __inline uint8x16_t > @@ -7526,6 +7526,22 @@ vaesimcq_u8 (uint8x16_t data) > { >return __builtin_aarch64_crypto_aesimcv16qi_uu (data); > } > + > +__extension__ extern __inline poly128_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +vmull_p64 (poly64_t __a, poly64_t __b) > +{ > + return > +__builtin_aarch64_crypto_pmulldi_ppp (__a, __b); > +} > + > +__extension__ extern __inline poly128_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +vmull_high_p64 (poly64x2_t __a, poly64x2_t __b) > +{ > + return __builtin_aarch64_crypto_pmullv2di_ppp (__a, __b); > +} > + > #pragma GCC pop_options > > /* vcage */ > @@ -20772,7 +20788,7 @@ vrsrad_n_u64 (uint64_t __a, uint64_t __b, const > int __c) > } > > #pragma GCC push_options > -#pragma GCC target ("+nothing+crypto") > +#pragma GCC target ("+nothing+sha2") > > /* vsha1 */ > > @@ -20849,21 +20865,6 @@ vsha256su1q_u32 (uint32x4_t __tw0_3, > uint32x4_t __w8_11, uint32x4_t __w12_15) > __w12_15); > } > > -__extension__ extern __inline poly128_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vmull_p64 (poly64_t
RE: [PATCH] AArch64: Gate various crypto intrinsics availability based on features
Hi Tejas, > -Original Message- > From: Gcc-patches bounces+kyrylo.tkachov=arm@gcc.gnu.org> On Behalf Of Tejas Belagod > via Gcc-patches > Sent: Monday, January 16, 2023 7:12 AM > To: gcc-patches@gcc.gnu.org > Cc: Tejas Belagod ; Richard Sandiford > ; Richard Earnshaw > > Subject: [PATCH] AArch64: Gate various crypto intrinsics availability based on > features > > The 64-bit variant of PMULL{2} and AES instructions are available if FEAT_AES > is implemented according to the Arm ARM [1]. Similarly FEAT_SHA1 and > FEAT_SHA256 enable the use of SHA1 and SHA256 instruction variants. > This patch fixes arm_neon.h to correctly reflect the feature availability > based > on '+aes' and '+sha2' as opposed to the ambiguous catch-all '+crypto'. > > [1] Section D17.2.61, C7.2.215 > > 2022-01-11 Tejas Belagod > > gcc/ > * config/aarch64/arm_neon.h: Gate AES and PMULL64 intrinsics > under target feature +aes as opposed to +crypto. Gate SHA1 and > SHA2 > intrinsics under +sha2. The ChangeLog should list the intrinsics affected like * config/aarch64/arm_neon.h (vmull_p64, vmull_high_p64): Gate under "nothing+aes" For example. Ok with a fixed ChangeLog. Thanks, Kyrill > > testsuite/ > > * gcc.target/aarch64/acle/pmull64.c: New. > * gcc/testsuite/gcc.target/aarch64/aes-fuse-1.c: Replace '+crypto' > with > corresponding feature flag based on the intrinsic. > * gcc.target/aarch64/aes-fuse-2.c: Likewise. > * gcc.target/aarch64/aes_1.c: Likewise. > * gcc.target/aarch64/aes_2.c: Likewise. > * gcc.target/aarch64/aes_xor_combine.c: Likewise. > * gcc.target/aarch64/sha1_1.c: Likewise. > * gcc.target/aarch64/sha256_1.c: Likewise. > * gcc.target/aarch64/target_attr_crypto_ice_1.c: Likewise. > --- > gcc/config/aarch64/arm_neon.h | 35 ++- > .../gcc.target/aarch64/acle/pmull64.c | 14 > gcc/testsuite/gcc.target/aarch64/aes-fuse-1.c | 4 +-- > gcc/testsuite/gcc.target/aarch64/aes-fuse-2.c | 4 +-- > gcc/testsuite/gcc.target/aarch64/aes_1.c | 2 +- > gcc/testsuite/gcc.target/aarch64/aes_2.c | 4 ++- > .../gcc.target/aarch64/aes_xor_combine.c | 2 +- > gcc/testsuite/gcc.target/aarch64/sha1_1.c | 2 +- > gcc/testsuite/gcc.target/aarch64/sha256_1.c | 2 +- > .../aarch64/target_attr_crypto_ice_1.c| 2 +- > 10 files changed, 44 insertions(+), 27 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/pmull64.c > > diff --git a/gcc/config/aarch64/arm_neon.h > b/gcc/config/aarch64/arm_neon.h > index cf6af728ca9..a795a387b38 100644 > --- a/gcc/config/aarch64/arm_neon.h > +++ b/gcc/config/aarch64/arm_neon.h > @@ -7496,7 +7496,7 @@ vqrdmlshs_laneq_s32 (int32_t __a, int32_t __b, > int32x4_t __c, const int __d) > #pragma GCC pop_options > > #pragma GCC push_options > -#pragma GCC target ("+nothing+crypto") > +#pragma GCC target ("+nothing+aes") > /* vaes */ > > __extension__ extern __inline uint8x16_t > @@ -7526,6 +7526,22 @@ vaesimcq_u8 (uint8x16_t data) > { >return __builtin_aarch64_crypto_aesimcv16qi_uu (data); > } > + > +__extension__ extern __inline poly128_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +vmull_p64 (poly64_t __a, poly64_t __b) > +{ > + return > +__builtin_aarch64_crypto_pmulldi_ppp (__a, __b); > +} > + > +__extension__ extern __inline poly128_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +vmull_high_p64 (poly64x2_t __a, poly64x2_t __b) > +{ > + return __builtin_aarch64_crypto_pmullv2di_ppp (__a, __b); > +} > + > #pragma GCC pop_options > > /* vcage */ > @@ -20772,7 +20788,7 @@ vrsrad_n_u64 (uint64_t __a, uint64_t __b, const > int __c) > } > > #pragma GCC push_options > -#pragma GCC target ("+nothing+crypto") > +#pragma GCC target ("+nothing+sha2") > > /* vsha1 */ > > @@ -20849,21 +20865,6 @@ vsha256su1q_u32 (uint32x4_t __tw0_3, > uint32x4_t __w8_11, uint32x4_t __w12_15) > __w12_15); > } > > -__extension__ extern __inline poly128_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vmull_p64 (poly64_t __a, poly64_t __b) > -{ > - return > -__builtin_aarch64_crypto_pmulldi_ppp (__a, __b); > -} > - > -__extension__ extern __inline poly128_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vmull_high_p64 (poly64x2_t __a, poly64x2_t __b) > -{ > - return __builtin_aarch64_
[PATCH] AArch64: Gate various crypto intrinsics availability based on features
The 64-bit variant of PMULL{2} and AES instructions are available if FEAT_AES is implemented according to the Arm ARM [1]. Similarly FEAT_SHA1 and FEAT_SHA256 enable the use of SHA1 and SHA256 instruction variants. This patch fixes arm_neon.h to correctly reflect the feature availability based on '+aes' and '+sha2' as opposed to the ambiguous catch-all '+crypto'. [1] Section D17.2.61, C7.2.215 2022-01-11 Tejas Belagod gcc/ * config/aarch64/arm_neon.h: Gate AES and PMULL64 intrinsics under target feature +aes as opposed to +crypto. Gate SHA1 and SHA2 intrinsics under +sha2. testsuite/ * gcc.target/aarch64/acle/pmull64.c: New. * gcc/testsuite/gcc.target/aarch64/aes-fuse-1.c: Replace '+crypto' with corresponding feature flag based on the intrinsic. * gcc.target/aarch64/aes-fuse-2.c: Likewise. * gcc.target/aarch64/aes_1.c: Likewise. * gcc.target/aarch64/aes_2.c: Likewise. * gcc.target/aarch64/aes_xor_combine.c: Likewise. * gcc.target/aarch64/sha1_1.c: Likewise. * gcc.target/aarch64/sha256_1.c: Likewise. * gcc.target/aarch64/target_attr_crypto_ice_1.c: Likewise. --- gcc/config/aarch64/arm_neon.h | 35 ++- .../gcc.target/aarch64/acle/pmull64.c | 14 gcc/testsuite/gcc.target/aarch64/aes-fuse-1.c | 4 +-- gcc/testsuite/gcc.target/aarch64/aes-fuse-2.c | 4 +-- gcc/testsuite/gcc.target/aarch64/aes_1.c | 2 +- gcc/testsuite/gcc.target/aarch64/aes_2.c | 4 ++- .../gcc.target/aarch64/aes_xor_combine.c | 2 +- gcc/testsuite/gcc.target/aarch64/sha1_1.c | 2 +- gcc/testsuite/gcc.target/aarch64/sha256_1.c | 2 +- .../aarch64/target_attr_crypto_ice_1.c| 2 +- 10 files changed, 44 insertions(+), 27 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/pmull64.c diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index cf6af728ca9..a795a387b38 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -7496,7 +7496,7 @@ vqrdmlshs_laneq_s32 (int32_t __a, int32_t __b, int32x4_t __c, const int __d) #pragma GCC pop_options #pragma GCC push_options -#pragma GCC target ("+nothing+crypto") +#pragma GCC target ("+nothing+aes") /* vaes */ __extension__ extern __inline uint8x16_t @@ -7526,6 +7526,22 @@ vaesimcq_u8 (uint8x16_t data) { return __builtin_aarch64_crypto_aesimcv16qi_uu (data); } + +__extension__ extern __inline poly128_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vmull_p64 (poly64_t __a, poly64_t __b) +{ + return +__builtin_aarch64_crypto_pmulldi_ppp (__a, __b); +} + +__extension__ extern __inline poly128_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vmull_high_p64 (poly64x2_t __a, poly64x2_t __b) +{ + return __builtin_aarch64_crypto_pmullv2di_ppp (__a, __b); +} + #pragma GCC pop_options /* vcage */ @@ -20772,7 +20788,7 @@ vrsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c) } #pragma GCC push_options -#pragma GCC target ("+nothing+crypto") +#pragma GCC target ("+nothing+sha2") /* vsha1 */ @@ -20849,21 +20865,6 @@ vsha256su1q_u32 (uint32x4_t __tw0_3, uint32x4_t __w8_11, uint32x4_t __w12_15) __w12_15); } -__extension__ extern __inline poly128_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vmull_p64 (poly64_t __a, poly64_t __b) -{ - return -__builtin_aarch64_crypto_pmulldi_ppp (__a, __b); -} - -__extension__ extern __inline poly128_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vmull_high_p64 (poly64x2_t __a, poly64x2_t __b) -{ - return __builtin_aarch64_crypto_pmullv2di_ppp (__a, __b); -} - #pragma GCC pop_options /* vshl */ diff --git a/gcc/testsuite/gcc.target/aarch64/acle/pmull64.c b/gcc/testsuite/gcc.target/aarch64/acle/pmull64.c new file mode 100644 index 000..6a1e99e2d0d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/acle/pmull64.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.2-a" } */ + +#pragma push_options +#pragma GCC target ("+aes") + +#include "arm_neon.h" + +int foo (poly64_t a, poly64_t b) +{ + return vgetq_lane_s32 (vreinterpretq_s32_p128 (vmull_p64 (a, b)), 0); +} + +/* { dg-final { scan-assembler "\tpmull\tv" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/aes-fuse-1.c b/gcc/testsuite/gcc.target/aarch64/aes-fuse-1.c index d7b4f89919d..1b4e10f78db 100644 --- a/gcc/testsuite/gcc.target/aarch64/aes-fuse-1.c +++ b/gcc/testsuite/gcc.target/aarch64/aes-fuse-1.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -mcpu=cortex-a72+crypto -dp" } */ -/* { dg-additional-options "-march=armv8-a+crypto" { target { aarch64*-*-* } } }*/ +/* { dg-options "-O3 -mcpu=cortex-a72+aes -dp" } */ +/* { dg-additional-options "-march=armv8-a+aes" { target { aarch64*-*-* } } }*/ #include di