Kyrylo Tkachov <ktkac...@nvidia.com> writes: > Hi all, > > SVE2 BSL2N (x, y, z) = (x & z) | (~y & ~z). When x == y this computes: > (x & z) | (~x & ~z) which is ~(x ^ z). > Thus, we can use it to match RTL patterns (not (xor (...) (...))) for both > Advanced SIMD and SVE modes when TARGET_SVE2. > This patch does that. The tied register requirements of BSL2N and the MOVPRFX > rules mean we can't use the MOVPRFX form here so I have not included that > alternative. Correct me if I'm wrong on this.
I think we can still use BSL2N, similarly to the patch from the other day. It's just that the asm would need to be: movprfx\t%0, %1\;bsl2n\t%0, %0, %1, %2 (with constraints &w/w/w). Thanks, Richard > > For code like: > > uint64x2_t eon_q(uint64x2_t a, uint64x2_t b) { return EON(a, b); } > svuint64_t eon_z(svuint64_t a, svuint64_t b) { return EON(a, b); } > svuint64_t eon_z_mp(svuint64_t c, svuint64_t a, svuint64_t b) { return EON(a, > b); } > > We now generate: > eon_q: > bsl2n z0.d, z0.d, z0.d, z1.d > ret > > eon_z: > bsl2n z0.d, z0.d, z0.d, z1.d > ret > > eon_z_mp: > bsl2n z1.d, z1.d, z1.d, z2.d > mov z0.d, z1.d > ret > > instead of the previous: > eon_q: > eor v0.16b, v0.16b, v1.16b > not v0.16b, v0.16b > ret > > eon_z: > eor z0.d, z0.d, z1.d > ptrue p3.b, all > not z0.d, p3/m, z0.d > ret > > eon_z_mp: > eor z0.d, z1.d, z2.d > ptrue p3.b, all > not z0.d, p3/m, z0.d > ret > > Bootstrapped and tested on aarch64-none-linux-gnu. > Ok for trunk? > Thanks, > Kyrill > > Signed-off-by: Kyrylo Tkachov <ktkac...@nvidia.com> > > gcc/ > > * config/aarch64/aarch64-sve2.md (*aarch64_sve2_bsl2n_eon<mode>): > New pattern. > (*aarch64_sve2_eon_bsl2n_unpred<mode>): Likewise. > > gcc/testsuite/ > > * gcc.target/aarch64/sve2/eon_bsl2n.c: New test. > > From 761b14804c8bbeae745cb7a2ab58e26a3775b096 Mon Sep 17 00:00:00 2001 > From: Kyrylo Tkachov <ktkac...@nvidia.com> > Date: Fri, 11 Jul 2025 07:23:16 -0700 > Subject: [PATCH 2/2] aarch64: Use SVE2 BSL2N for vector EON > > SVE2 BSL2N (x, y, z) = (x & z) | (~y & ~z). When x == y this computes: > (x & z) | (~x & ~z) which is ~(x ^ z). > Thus, we can use it to match RTL patterns (not (xor (...) (...))) for both > Advanced SIMD and SVE modes when TARGET_SVE2. > This patch does that. The tied register requirements of BSL2N and the MOVPRFX > rules mean we can't use the MOVPRFX form here so I have not included that > alternative. Correct me if I'm wrong on this. > > For code like: > > uint64x2_t eon_q(uint64x2_t a, uint64x2_t b) { return EON(a, b); } > svuint64_t eon_z(svuint64_t a, svuint64_t b) { return EON(a, b); } > svuint64_t eon_z_mp(svuint64_t c, svuint64_t a, svuint64_t b) { return EON(a, > b); } > > We now generate: > eon_q: > bsl2n z0.d, z0.d, z0.d, z1.d > ret > > eon_z: > bsl2n z0.d, z0.d, z0.d, z1.d > ret > > eon_z_mp: > bsl2n z1.d, z1.d, z1.d, z2.d > mov z0.d, z1.d > ret > > instead of the previous: > eon_q: > eor v0.16b, v0.16b, v1.16b > not v0.16b, v0.16b > ret > > eon_z: > eor z0.d, z0.d, z1.d > ptrue p3.b, all > not z0.d, p3/m, z0.d > ret > > eon_z_mp: > eor z0.d, z1.d, z2.d > ptrue p3.b, all > not z0.d, p3/m, z0.d > ret > > Bootstrapped and tested on aarch64-none-linux-gnu. > > Signed-off-by: Kyrylo Tkachov <ktkac...@nvidia.com> > > gcc/ > > * config/aarch64/aarch64-sve2.md (*aarch64_sve2_bsl2n_eon<mode>): > New pattern. > (*aarch64_sve2_eon_bsl2n_unpred<mode>): Likewise. > > gcc/testsuite/ > > * gcc.target/aarch64/sve2/eon_bsl2n.c: New test. > --- > gcc/config/aarch64/aarch64-sve2.md | 28 ++++++++++ > .../gcc.target/aarch64/sve2/eon_bsl2n.c | 52 +++++++++++++++++++ > 2 files changed, 80 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c > > diff --git a/gcc/config/aarch64/aarch64-sve2.md > b/gcc/config/aarch64/aarch64-sve2.md > index 6d6dc94cd81..a011b947f51 100644 > --- a/gcc/config/aarch64/aarch64-sve2.md > +++ b/gcc/config/aarch64/aarch64-sve2.md > @@ -2053,6 +2053,34 @@ > } > ) > > +;; Vector EON (~(x, y)) using BSL2N. > +(define_insn_and_rewrite "*aarch64_sve2_bsl2n_eon<mode>" > + [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w") > + (unspec:SVE_FULL_I > + [(match_operand 3) > + (not:SVE_FULL_I > + (xor:SVE_FULL_I > + (match_operand:SVE_FULL_I 1 "register_operand" "%0") > + (match_operand:SVE_FULL_I 2 "register_operand" "w")))] > + UNSPEC_PRED_X))] > + "TARGET_SVE2" > + "bsl2n\t%0.d, %0.d, %0.d, %2.d" > + "&& !CONSTANT_P (operands[3])" > + { > + operands[3] = CONSTM1_RTX (<VPRED>mode); > + } > +) > + > +(define_insn "*aarch64_sve2_eon_bsl2n_unpred<mode>" > + [(set (match_operand:VDQ_I 0 "register_operand" "=w") > + (not:VDQ_I > + (xor:VDQ_I > + (match_operand:VDQ_I 1 "register_operand" "%0") > + (match_operand:VDQ_I 2 "register_operand" "w"))))] > + "TARGET_SVE2" > + "bsl2n\t%Z0.d, %Z0.d, %Z0.d, %Z2.d" > +) > + > ;; ------------------------------------------------------------------------- > ;; ---- [INT] Shift-and-accumulate operations > ;; ------------------------------------------------------------------------- > diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c > b/gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c > new file mode 100644 > index 00000000000..d547e3152d4 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c > @@ -0,0 +1,52 @@ > +/* { dg-options "-O2" } */ > +/* { dg-final { check-function-bodies "**" "" "" } } */ > + > +#include <arm_neon.h> > +#include <arm_sve.h> > + > +#define EON(x, y) (~((x) ^ (y))) > + > +/* > +** eon_d: > +** bsl2n z0.d, z0.d, z0.d, z1.d > +** ret > +*/ > +uint32x2_t eon_d(uint32x2_t a, uint32x2_t b) { return EON(a, b); } > + > +/* > +** eon_d_mp: > +** bsl2n z1.d, z1.d, z1.d, z2.d > +** ... > +** ret > +*/ > +uint32x2_t eon_d_mp(uint32x2_t c, uint32x2_t a, uint32x2_t b) { return > EON(a, b); } > + > +/* > +** eon_q: > +** bsl2n z0.d, z0.d, z0.d, z1.d > +** ret > +*/ > +uint64x2_t eon_q(uint64x2_t a, uint64x2_t b) { return EON(a, b); } > + > +/* > +** eon_q_mp: > +** bsl2n z1.d, z1.d, z1.d, z2.d > +** ... > +** ret > +*/ > +uint64x2_t eon_q_mp(uint64x2_t c, uint64x2_t a, uint64x2_t b) { return > EON(a, b); } > + > +/* > +** eon_z: > +** bsl2n z0.d, z0.d, z0.d, z1.d > +** ret > +*/ > +svuint64_t eon_z(svuint64_t a, svuint64_t b) { return EON(a, b); } > + > +/* > +** eon_z_mp: > +** bsl2n z1.d, z1.d, z1.d, z2.d > +** ... > +** ret > +*/ > +svuint64_t eon_z_mp(svuint64_t c, svuint64_t a, svuint64_t b) { return > EON(a, b); }