https://gcc.gnu.org/g:5300e2bda9c74ca458f01c7e8fc3ea035687b900

commit r16-2201-g5300e2bda9c74ca458f01c7e8fc3ea035687b900
Author: Kyrylo Tkachov <ktkac...@nvidia.com>
Date:   Thu Jul 3 08:37:33 2025 -0700

    aarch64: Allow 64-bit vector modes in pattern for BCAX instruction
    
    The BCAX instruction from TARGET_SHA3 only operates on the full .16b form
    of the inputs but as it's a pure bitwise operation we can use it for the 
64-bit
    modes as well as there we don't care about the upper 64 bits.  This patch 
extends
    the relevant pattern in aarch64-simd.md to accept the 64-bit vector modes.
    
    Thus, for the input:
    uint32x2_t
    bcax_s (uint32x2_t a, uint32x2_t b, uint32x2_t c)
    {
      return BCAX (a, b, c);
    }
    
    we can now generate:
    bcax_s:
            bcax    v0.16b, v0.16b, v1.16b, v2.16b
            ret
    
    instead of the current:
    bcax_s:
            bic     v1.8b, v1.8b, v2.8b
            eor     v0.8b, v1.8b, v0.8b
            ret
    
    This patch doesn't cover the DI/V1DI modes as that would require extending
    the bcaxqdi4 pattern with =r,r alternatives and adding splitting logic to
    handle the cases where the operands arrive in GP regs.  It is doable, but 
can
    be a separate patch.  This patch as is should be a straightforward 
improvement
    always.
    
    Bootstrapped and tested on aarch64-none-linux-gnu.
    
    Signed-off-by: Kyrylo Tkachov <ktkac...@nvidia.com>
    
    gcc/
    
            * config/aarch64/aarch64-simd.md (bcaxq<mode>4): Use VDQ_I mode
            iterator.
    
    gcc/testsuite/
    
            * gcc.target/aarch64/simd/bcax_d.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-simd.md             | 12 ++++++------
 gcc/testsuite/gcc.target/aarch64/simd/bcax_d.c | 15 +++++++++++++++
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 8de79caa86d0..879b1a27bb19 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -9241,12 +9241,12 @@
 )
 
 (define_insn "bcaxq<mode>4"
-  [(set (match_operand:VQ_I 0 "register_operand" "=w")
-       (xor:VQ_I
-        (and:VQ_I
-         (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
-         (match_operand:VQ_I 2 "register_operand" "w"))
-        (match_operand:VQ_I 1 "register_operand" "w")))]
+  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
+       (xor:VDQ_I
+        (and:VDQ_I
+         (not:VDQ_I (match_operand:VDQ_I 3 "register_operand" "w"))
+         (match_operand:VDQ_I 2 "register_operand" "w"))
+        (match_operand:VDQ_I 1 "register_operand" "w")))]
   "TARGET_SHA3"
   "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
   [(set_attr "type" "crypto_sha3")]
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/bcax_d.c 
b/gcc/testsuite/gcc.target/aarch64/simd/bcax_d.c
new file mode 100644
index 000000000000..d68f0e102bf1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/bcax_d.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <arm_neon.h>
+
+#pragma GCC target "+sha3"
+
+#define BCAX(x,y,z)  ((x) ^ ((y) & ~(z)))
+
+uint32x2_t bcax_s (uint32x2_t a, uint32x2_t b, uint32x2_t c) { return BCAX (a, 
b, c); }
+uint16x4_t bcax_h (uint16x4_t a, uint16x4_t b, uint16x4_t c) { return BCAX (a, 
b, c); }
+uint8x8_t bcax_b (uint8x8_t a, uint8x8_t b, uint8x8_t c) { return BCAX (a, b, 
c); }
+
+/* { dg-final { scan-assembler-times {bcax\tv0.16b, v0.16b, v1.16b, v2.16b} 3 
} } */
+

Reply via email to