https://gcc.gnu.org/g:2ae2203da598b580c27d65722320f380e2af58a5
commit r16-2284-g2ae2203da598b580c27d65722320f380e2af58a5 Author: Kyrylo Tkachov <ktkac...@nvidia.com> Date: Fri Jul 11 07:23:16 2025 -0700 aarch64: Use SVE2 BSL2N for vector EON SVE2 BSL2N (x, y, z) = (x & z) | (~y & ~z). When x == y this computes: (x & z) | (~x & ~z) which is ~(x ^ z). Thus, we can use it to match RTL patterns (not (xor (...) (...))) for both Advanced SIMD and SVE modes when TARGET_SVE2. This patch does that. For code like: uint64x2_t eon_q(uint64x2_t a, uint64x2_t b) { return EON(a, b); } svuint64_t eon_z(svuint64_t a, svuint64_t b) { return EON(a, b); } We now generate: eon_q: bsl2n z0.d, z0.d, z0.d, z1.d ret eon_z: bsl2n z0.d, z0.d, z0.d, z1.d ret instead of the previous: eon_q: eor v0.16b, v0.16b, v1.16b not v0.16b, v0.16b ret eon_z: eor z0.d, z0.d, z1.d ptrue p3.b, all not z0.d, p3/m, z0.d ret Bootstrapped and tested on aarch64-none-linux-gnu. Signed-off-by: Kyrylo Tkachov <ktkac...@nvidia.com> gcc/ * config/aarch64/aarch64-sve2.md (*aarch64_sve2_bsl2n_eon<mode>): New pattern. (*aarch64_sve2_eon_bsl2n_unpred<mode>): Likewise. gcc/testsuite/ * gcc.target/aarch64/sve2/eon_bsl2n.c: New test. Diff: --- gcc/config/aarch64/aarch64-sve2.md | 34 +++++++++++++++ gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c | 52 +++++++++++++++++++++++ 2 files changed, 86 insertions(+) diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index 7148f54b363f..8c03e28cb084 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -1956,6 +1956,40 @@ } ) +;; Vector EON (~(x, y)) using BSL2N. +(define_insn_and_rewrite "*aarch64_sve2_bsl2n_eon<mode>" + [(set (match_operand:SVE_FULL_I 0 "register_operand") + (unspec:SVE_FULL_I + [(match_operand 3) + (not:SVE_FULL_I + (xor:SVE_FULL_I + (match_operand:SVE_FULL_I 1 "register_operand") + (match_operand:SVE_FULL_I 2 "register_operand")))] + UNSPEC_PRED_X))] + "TARGET_SVE2" + {@ [ cons: =0, 1, 2 ; attrs: movprfx ] + [ w , 0, w ; * ] bsl2n\t%0.d, %0.d, %0.d, %2.d + [ ?&w, w, w ; yes ] movprfx\t%0, %1\;bsl2n\t%0.d, %0.d, %1.d, %2.d + } + "&& !CONSTANT_P (operands[3])" + { + operands[3] = CONSTM1_RTX (<VPRED>mode); + } +) + +(define_insn "*aarch64_sve2_eon_bsl2n_unpred<mode>" + [(set (match_operand:VDQ_I 0 "register_operand") + (not:VDQ_I + (xor:VDQ_I + (match_operand:VDQ_I 1 "register_operand") + (match_operand:VDQ_I 2 "register_operand"))))] + "TARGET_SVE2" + {@ [ cons: =0, 1, 2 ; attrs: movprfx ] + [ w , 0, w ; * ] bsl2n\t%Z0.d, %Z0.d, %Z0.d, %Z2.d + [ ?&w, w, w ; yes ] movprfx\t%Z0, %Z1\;bsl2n\t%Z0.d, %Z0.d, %Z1.d, %Z2.d + } +) + ;; ------------------------------------------------------------------------- ;; ---- [INT] Shift-and-accumulate operations ;; ------------------------------------------------------------------------- diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c b/gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c new file mode 100644 index 000000000000..74b463763735 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c @@ -0,0 +1,52 @@ +/* { dg-options "-O2" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#include <arm_neon.h> +#include <arm_sve.h> + +#define EON(x, y) (~((x) ^ (y))) + +/* +** eon_d: +** bsl2n z0.d, z0.d, z0.d, z1.d +** ret +*/ +uint32x2_t eon_d(uint32x2_t a, uint32x2_t b) { return EON(a, b); } + +/* +** eon_d_mp: +** movprfx z0, z1 +** bsl2n z0.d, z0.d, z1.d, z2.d +** ret +*/ +uint32x2_t eon_d_mp(uint32x2_t c, uint32x2_t a, uint32x2_t b) { return EON(a, b); } + +/* +** eon_q: +** bsl2n z0.d, z0.d, z0.d, z1.d +** ret +*/ +uint64x2_t eon_q(uint64x2_t a, uint64x2_t b) { return EON(a, b); } + +/* +** eon_q_mp: +** movprfx z0, z1 +** bsl2n z0.d, z0.d, z1.d, z2.d +** ret +*/ +uint64x2_t eon_q_mp(uint64x2_t c, uint64x2_t a, uint64x2_t b) { return EON(a, b); } + +/* +** eon_z: +** bsl2n z0.d, z0.d, z0.d, z1.d +** ret +*/ +svuint64_t eon_z(svuint64_t a, svuint64_t b) { return EON(a, b); } + +/* +** eon_z_mp: +** movprfx z0, z1 +** bsl2n z0.d, z0.d, z1.d, z2.d +** ret +*/ +svuint64_t eon_z_mp(svuint64_t c, svuint64_t a, svuint64_t b) { return EON(a, b); }