https://gcc.gnu.org/g:5300e2bda9c74ca458f01c7e8fc3ea035687b900
commit r16-2201-g5300e2bda9c74ca458f01c7e8fc3ea035687b900 Author: Kyrylo Tkachov <ktkac...@nvidia.com> Date: Thu Jul 3 08:37:33 2025 -0700 aarch64: Allow 64-bit vector modes in pattern for BCAX instruction The BCAX instruction from TARGET_SHA3 only operates on the full .16b form of the inputs but as it's a pure bitwise operation we can use it for the 64-bit modes as well as there we don't care about the upper 64 bits. This patch extends the relevant pattern in aarch64-simd.md to accept the 64-bit vector modes. Thus, for the input: uint32x2_t bcax_s (uint32x2_t a, uint32x2_t b, uint32x2_t c) { return BCAX (a, b, c); } we can now generate: bcax_s: bcax v0.16b, v0.16b, v1.16b, v2.16b ret instead of the current: bcax_s: bic v1.8b, v1.8b, v2.8b eor v0.8b, v1.8b, v0.8b ret This patch doesn't cover the DI/V1DI modes as that would require extending the bcaxqdi4 pattern with =r,r alternatives and adding splitting logic to handle the cases where the operands arrive in GP regs. It is doable, but can be a separate patch. This patch as is should be a straightforward improvement always. Bootstrapped and tested on aarch64-none-linux-gnu. Signed-off-by: Kyrylo Tkachov <ktkac...@nvidia.com> gcc/ * config/aarch64/aarch64-simd.md (bcaxq<mode>4): Use VDQ_I mode iterator. gcc/testsuite/ * gcc.target/aarch64/simd/bcax_d.c: New test. Diff: --- gcc/config/aarch64/aarch64-simd.md | 12 ++++++------ gcc/testsuite/gcc.target/aarch64/simd/bcax_d.c | 15 +++++++++++++++ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 8de79caa86d0..879b1a27bb19 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -9241,12 +9241,12 @@ ) (define_insn "bcaxq<mode>4" - [(set (match_operand:VQ_I 0 "register_operand" "=w") - (xor:VQ_I - (and:VQ_I - (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w")) - (match_operand:VQ_I 2 "register_operand" "w")) - (match_operand:VQ_I 1 "register_operand" "w")))] + [(set (match_operand:VDQ_I 0 "register_operand" "=w") + (xor:VDQ_I + (and:VDQ_I + (not:VDQ_I (match_operand:VDQ_I 3 "register_operand" "w")) + (match_operand:VDQ_I 2 "register_operand" "w")) + (match_operand:VDQ_I 1 "register_operand" "w")))] "TARGET_SHA3" "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b" [(set_attr "type" "crypto_sha3")] diff --git a/gcc/testsuite/gcc.target/aarch64/simd/bcax_d.c b/gcc/testsuite/gcc.target/aarch64/simd/bcax_d.c new file mode 100644 index 000000000000..d68f0e102bf1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/bcax_d.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +#include <arm_neon.h> + +#pragma GCC target "+sha3" + +#define BCAX(x,y,z) ((x) ^ ((y) & ~(z))) + +uint32x2_t bcax_s (uint32x2_t a, uint32x2_t b, uint32x2_t c) { return BCAX (a, b, c); } +uint16x4_t bcax_h (uint16x4_t a, uint16x4_t b, uint16x4_t c) { return BCAX (a, b, c); } +uint8x8_t bcax_b (uint8x8_t a, uint8x8_t b, uint8x8_t c) { return BCAX (a, b, c); } + +/* { dg-final { scan-assembler-times {bcax\tv0.16b, v0.16b, v1.16b, v2.16b} 3 } } */ +