From 761b14804c8bbeae745cb7a2ab58e26a3775b096 Mon Sep 17 00:00:00 2001
From: Kyrylo Tkachov <ktkachov@nvidia.com>
Date: Fri, 11 Jul 2025 07:23:16 -0700
Subject: [PATCH 2/2] aarch64: Use SVE2 BSL2N for vector EON

SVE2 BSL2N (x, y, z) = (x & z) | (~y & ~z). When x == y this computes:
(x & z) | (~x & ~z) which is ~(x ^ z).
Thus, we can use it to match RTL patterns (not (xor (...) (...))) for both
Advanced SIMD and SVE modes when TARGET_SVE2.
This patch does that.  The tied register requirements of BSL2N and the MOVPRFX
rules mean we can't use the MOVPRFX form here so I have not included that
alternative.  Correct me if I'm wrong on this.

For code like:

uint64x2_t eon_q(uint64x2_t a, uint64x2_t b) { return EON(a, b); }
svuint64_t eon_z(svuint64_t a, svuint64_t b) { return EON(a, b); }
svuint64_t eon_z_mp(svuint64_t c, svuint64_t a, svuint64_t b) { return EON(a, b); }

We now generate:
eon_q:
        bsl2n   z0.d, z0.d, z0.d, z1.d
        ret

eon_z:
        bsl2n   z0.d, z0.d, z0.d, z1.d
        ret

eon_z_mp:
        bsl2n   z1.d, z1.d, z1.d, z2.d
        mov     z0.d, z1.d
        ret

instead of the previous:
eon_q:
        eor     v0.16b, v0.16b, v1.16b
        not     v0.16b, v0.16b
        ret

eon_z:
        eor     z0.d, z0.d, z1.d
        ptrue   p3.b, all
        not     z0.d, p3/m, z0.d
        ret

eon_z_mp:
        eor     z0.d, z1.d, z2.d
        ptrue   p3.b, all
        not     z0.d, p3/m, z0.d
        ret

Bootstrapped and tested on aarch64-none-linux-gnu.

Signed-off-by: Kyrylo Tkachov <ktkachov@nvidia.com>

gcc/

	* config/aarch64/aarch64-sve2.md (*aarch64_sve2_bsl2n_eon<mode>):
	New pattern.
	(*aarch64_sve2_eon_bsl2n_unpred<mode>): Likewise.

gcc/testsuite/

	* gcc.target/aarch64/sve2/eon_bsl2n.c: New test.
---
 gcc/config/aarch64/aarch64-sve2.md            | 28 ++++++++++
 .../gcc.target/aarch64/sve2/eon_bsl2n.c       | 52 +++++++++++++++++++
 2 files changed, 80 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c

diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
index 6d6dc94cd81..a011b947f51 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -2053,6 +2053,34 @@
   }
 )
 
+;; Vector EON (~(x, y)) using BSL2N.
+(define_insn_and_rewrite "*aarch64_sve2_bsl2n_eon<mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
+	(unspec:SVE_FULL_I
+	  [(match_operand 3)
+	   (not:SVE_FULL_I
+	     (xor:SVE_FULL_I
+		(match_operand:SVE_FULL_I 1 "register_operand" "%0")
+		(match_operand:SVE_FULL_I 2 "register_operand" "w")))]
+	    UNSPEC_PRED_X))]
+  "TARGET_SVE2"
+  "bsl2n\t%0.d, %0.d, %0.d, %2.d"
+  "&& !CONSTANT_P (operands[3])"
+  {
+    operands[3] = CONSTM1_RTX (<VPRED>mode);
+  }
+)
+
+(define_insn "*aarch64_sve2_eon_bsl2n_unpred<mode>"
+  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
+       (not:VDQ_I
+         (xor:VDQ_I
+           (match_operand:VDQ_I 1 "register_operand" "%0")
+           (match_operand:VDQ_I 2 "register_operand" "w"))))]
+  "TARGET_SVE2"
+  "bsl2n\t%Z0.d, %Z0.d, %Z0.d, %Z2.d"
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [INT] Shift-and-accumulate operations
 ;; -------------------------------------------------------------------------
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c b/gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c
new file mode 100644
index 00000000000..d547e3152d4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c
@@ -0,0 +1,52 @@
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <arm_neon.h>
+#include <arm_sve.h>
+
+#define EON(x, y)   (~((x) ^ (y)))
+
+/*
+** eon_d:
+** 	bsl2n	z0.d, z0.d, z0.d, z1.d
+** 	ret
+*/
+uint32x2_t eon_d(uint32x2_t a, uint32x2_t b) { return EON(a, b); }
+
+/*
+** eon_d_mp:
+** 	bsl2n	z1.d, z1.d, z1.d, z2.d
+** 	...
+** 	ret
+*/
+uint32x2_t eon_d_mp(uint32x2_t c, uint32x2_t a, uint32x2_t b) { return EON(a, b); }
+
+/*
+** eon_q:
+** 	bsl2n	z0.d, z0.d, z0.d, z1.d
+** 	ret
+*/
+uint64x2_t eon_q(uint64x2_t a, uint64x2_t b) { return EON(a, b); }
+
+/*
+** eon_q_mp:
+** 	bsl2n	z1.d, z1.d, z1.d, z2.d
+** 	...
+** 	ret
+*/
+uint64x2_t eon_q_mp(uint64x2_t c, uint64x2_t a, uint64x2_t b) { return EON(a, b); }
+
+/*
+** eon_z:
+** 	bsl2n	z0.d, z0.d, z0.d, z1.d
+** 	ret
+*/
+svuint64_t eon_z(svuint64_t a, svuint64_t b) { return EON(a, b); }
+
+/*
+** eon_z_mp:
+** 	bsl2n	z1.d, z1.d, z1.d, z2.d
+** 	...
+** 	ret
+*/
+svuint64_t eon_z_mp(svuint64_t c, svuint64_t a, svuint64_t b) { return EON(a, b); }
-- 
2.44.0

