https://gcc.gnu.org/g:2ae2203da598b580c27d65722320f380e2af58a5

commit r16-2284-g2ae2203da598b580c27d65722320f380e2af58a5
Author: Kyrylo Tkachov <ktkac...@nvidia.com>
Date:   Fri Jul 11 07:23:16 2025 -0700

    aarch64: Use SVE2 BSL2N for vector EON
    
    SVE2 BSL2N (x, y, z) = (x & z) | (~y & ~z). When x == y this computes:
    (x & z) | (~x & ~z) which is ~(x ^ z).
    Thus, we can use it to match RTL patterns (not (xor (...) (...))) for both
    Advanced SIMD and SVE modes when TARGET_SVE2.
    This patch does that.
    For code like:
    
    uint64x2_t eon_q(uint64x2_t a, uint64x2_t b) { return EON(a, b); }
    svuint64_t eon_z(svuint64_t a, svuint64_t b) { return EON(a, b); }
    
    We now generate:
    eon_q:
            bsl2n   z0.d, z0.d, z0.d, z1.d
            ret
    
    eon_z:
            bsl2n   z0.d, z0.d, z0.d, z1.d
            ret
    
    instead of the previous:
    eon_q:
            eor     v0.16b, v0.16b, v1.16b
            not     v0.16b, v0.16b
            ret
    
    eon_z:
            eor     z0.d, z0.d, z1.d
            ptrue   p3.b, all
            not     z0.d, p3/m, z0.d
            ret
    
    Bootstrapped and tested on aarch64-none-linux-gnu.
    
    Signed-off-by: Kyrylo Tkachov <ktkac...@nvidia.com>
    
    gcc/
    
            * config/aarch64/aarch64-sve2.md (*aarch64_sve2_bsl2n_eon<mode>):
            New pattern.
            (*aarch64_sve2_eon_bsl2n_unpred<mode>): Likewise.
    
    gcc/testsuite/
    
            * gcc.target/aarch64/sve2/eon_bsl2n.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-sve2.md                | 34 +++++++++++++++
 gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c | 52 +++++++++++++++++++++++
 2 files changed, 86 insertions(+)

diff --git a/gcc/config/aarch64/aarch64-sve2.md 
b/gcc/config/aarch64/aarch64-sve2.md
index 7148f54b363f..8c03e28cb084 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -1956,6 +1956,40 @@
   }
 )
 
+;; Vector EON (~(x, y)) using BSL2N.
+(define_insn_and_rewrite "*aarch64_sve2_bsl2n_eon<mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand")
+       (unspec:SVE_FULL_I
+         [(match_operand 3)
+          (not:SVE_FULL_I
+            (xor:SVE_FULL_I
+               (match_operand:SVE_FULL_I 1 "register_operand")
+               (match_operand:SVE_FULL_I 2 "register_operand")))]
+           UNSPEC_PRED_X))]
+  "TARGET_SVE2"
+  {@ [ cons: =0, 1, 2 ; attrs: movprfx ]
+     [ w  ,      0, w ; *              ] bsl2n\t%0.d, %0.d, %0.d, %2.d
+     [ ?&w,      w, w ; yes            ] movprfx\t%0, %1\;bsl2n\t%0.d, %0.d, 
%1.d, %2.d
+  }
+  "&& !CONSTANT_P (operands[3])"
+  {
+    operands[3] = CONSTM1_RTX (<VPRED>mode);
+  }
+)
+
+(define_insn "*aarch64_sve2_eon_bsl2n_unpred<mode>"
+  [(set (match_operand:VDQ_I 0 "register_operand")
+       (not:VDQ_I
+         (xor:VDQ_I
+           (match_operand:VDQ_I 1 "register_operand")
+           (match_operand:VDQ_I 2 "register_operand"))))]
+  "TARGET_SVE2"
+  {@ [ cons: =0, 1, 2 ; attrs: movprfx ]
+     [ w  ,      0, w ; *              ] bsl2n\t%Z0.d, %Z0.d, %Z0.d, %Z2.d
+     [ ?&w,      w, w ; yes            ] movprfx\t%Z0, %Z1\;bsl2n\t%Z0.d, 
%Z0.d, %Z1.d, %Z2.d
+  }
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [INT] Shift-and-accumulate operations
 ;; -------------------------------------------------------------------------
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c 
b/gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c
new file mode 100644
index 000000000000..74b463763735
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c
@@ -0,0 +1,52 @@
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <arm_neon.h>
+#include <arm_sve.h>
+
+#define EON(x, y)   (~((x) ^ (y)))
+
+/*
+** eon_d:
+**     bsl2n   z0.d, z0.d, z0.d, z1.d
+**     ret
+*/
+uint32x2_t eon_d(uint32x2_t a, uint32x2_t b) { return EON(a, b); }
+
+/*
+** eon_d_mp:
+**     movprfx z0, z1
+**     bsl2n   z0.d, z0.d, z1.d, z2.d
+**     ret
+*/
+uint32x2_t eon_d_mp(uint32x2_t c, uint32x2_t a, uint32x2_t b) { return EON(a, 
b); }
+
+/*
+** eon_q:
+**     bsl2n   z0.d, z0.d, z0.d, z1.d
+**     ret
+*/
+uint64x2_t eon_q(uint64x2_t a, uint64x2_t b) { return EON(a, b); }
+
+/*
+** eon_q_mp:
+**     movprfx z0, z1
+**     bsl2n   z0.d, z0.d, z1.d, z2.d
+**     ret
+*/
+uint64x2_t eon_q_mp(uint64x2_t c, uint64x2_t a, uint64x2_t b) { return EON(a, 
b); }
+
+/*
+** eon_z:
+**     bsl2n   z0.d, z0.d, z0.d, z1.d
+**     ret
+*/
+svuint64_t eon_z(svuint64_t a, svuint64_t b) { return EON(a, b); }
+
+/*
+** eon_z_mp:
+**     movprfx z0, z1
+**     bsl2n   z0.d, z0.d, z1.d, z2.d
+**     ret
+*/
+svuint64_t eon_z_mp(svuint64_t c, svuint64_t a, svuint64_t b) { return EON(a, 
b); }

Reply via email to