Kyrylo Tkachov <ktkac...@nvidia.com> writes: >> On 15 Jul 2025, at 15:50, Richard Sandiford <richard.sandif...@arm.com> >> wrote: >> >> Kyrylo Tkachov <ktkac...@nvidia.com> writes: >>> Hi all, >>> >>> SVE2 BSL2N (x, y, z) = (x & z) | (~y & ~z). When x == y this computes: >>> (x & z) | (~x & ~z) which is ~(x ^ z). >>> Thus, we can use it to match RTL patterns (not (xor (...) (...))) for both >>> Advanced SIMD and SVE modes when TARGET_SVE2. >>> This patch does that. The tied register requirements of BSL2N and the >>> MOVPRFX >>> rules mean we can't use the MOVPRFX form here so I have not included that >>> alternative. Correct me if I'm wrong on this. >> >> I think we can still use BSL2N, similarly to the patch from the other day. >> It's just that the asm would need to be: >> >> movprfx\t%0, %1\;bsl2n\t%0, %0, %1, %2 >> >> (with constraints &w/w/w). > > Thanks, something like the attach seems to work. > I’ll do wider testing…
LGTM, thanks. OK if testing passes. Richard > > Kyrill > >> >> Thanks, >> Richard >> >>> >>> For code like: >>> >>> uint64x2_t eon_q(uint64x2_t a, uint64x2_t b) { return EON(a, b); } >>> svuint64_t eon_z(svuint64_t a, svuint64_t b) { return EON(a, b); } >>> svuint64_t eon_z_mp(svuint64_t c, svuint64_t a, svuint64_t b) { return >>> EON(a, b); } >>> >>> We now generate: >>> eon_q: >>> bsl2n z0.d, z0.d, z0.d, z1.d >>> ret >>> >>> eon_z: >>> bsl2n z0.d, z0.d, z0.d, z1.d >>> ret >>> >>> eon_z_mp: >>> bsl2n z1.d, z1.d, z1.d, z2.d >>> mov z0.d, z1.d >>> ret >>> >>> instead of the previous: >>> eon_q: >>> eor v0.16b, v0.16b, v1.16b >>> not v0.16b, v0.16b >>> ret >>> >>> eon_z: >>> eor z0.d, z0.d, z1.d >>> ptrue p3.b, all >>> not z0.d, p3/m, z0.d >>> ret >>> >>> eon_z_mp: >>> eor z0.d, z1.d, z2.d >>> ptrue p3.b, all >>> not z0.d, p3/m, z0.d >>> ret >>> >>> Bootstrapped and tested on aarch64-none-linux-gnu. >>> Ok for trunk? >>> Thanks, >>> Kyrill >>> >>> Signed-off-by: Kyrylo Tkachov <ktkac...@nvidia.com> >>> >>> gcc/ >>> >>> * config/aarch64/aarch64-sve2.md (*aarch64_sve2_bsl2n_eon<mode>): >>> New pattern. >>> (*aarch64_sve2_eon_bsl2n_unpred<mode>): Likewise. >>> >>> gcc/testsuite/ >>> >>> * gcc.target/aarch64/sve2/eon_bsl2n.c: New test. >>> >>> From 761b14804c8bbeae745cb7a2ab58e26a3775b096 Mon Sep 17 00:00:00 2001 >>> From: Kyrylo Tkachov <ktkac...@nvidia.com> >>> Date: Fri, 11 Jul 2025 07:23:16 -0700 >>> Subject: [PATCH 2/2] aarch64: Use SVE2 BSL2N for vector EON >>> >>> SVE2 BSL2N (x, y, z) = (x & z) | (~y & ~z). When x == y this computes: >>> (x & z) | (~x & ~z) which is ~(x ^ z). >>> Thus, we can use it to match RTL patterns (not (xor (...) (...))) for both >>> Advanced SIMD and SVE modes when TARGET_SVE2. >>> This patch does that. The tied register requirements of BSL2N and the >>> MOVPRFX >>> rules mean we can't use the MOVPRFX form here so I have not included that >>> alternative. Correct me if I'm wrong on this. >>> >>> For code like: >>> >>> uint64x2_t eon_q(uint64x2_t a, uint64x2_t b) { return EON(a, b); } >>> svuint64_t eon_z(svuint64_t a, svuint64_t b) { return EON(a, b); } >>> svuint64_t eon_z_mp(svuint64_t c, svuint64_t a, svuint64_t b) { return >>> EON(a, b); } >>> >>> We now generate: >>> eon_q: >>> bsl2n z0.d, z0.d, z0.d, z1.d >>> ret >>> >>> eon_z: >>> bsl2n z0.d, z0.d, z0.d, z1.d >>> ret >>> >>> eon_z_mp: >>> bsl2n z1.d, z1.d, z1.d, z2.d >>> mov z0.d, z1.d >>> ret >>> >>> instead of the previous: >>> eon_q: >>> eor v0.16b, v0.16b, v1.16b >>> not v0.16b, v0.16b >>> ret >>> >>> eon_z: >>> eor z0.d, z0.d, z1.d >>> ptrue p3.b, all >>> not z0.d, p3/m, z0.d >>> ret >>> >>> eon_z_mp: >>> eor z0.d, z1.d, z2.d >>> ptrue p3.b, all >>> not z0.d, p3/m, z0.d >>> ret >>> >>> Bootstrapped and tested on aarch64-none-linux-gnu. >>> >>> Signed-off-by: Kyrylo Tkachov <ktkac...@nvidia.com> >>> >>> gcc/ >>> >>> * config/aarch64/aarch64-sve2.md (*aarch64_sve2_bsl2n_eon<mode>): >>> New pattern. >>> (*aarch64_sve2_eon_bsl2n_unpred<mode>): Likewise. >>> >>> gcc/testsuite/ >>> >>> * gcc.target/aarch64/sve2/eon_bsl2n.c: New test. >>> --- >>> gcc/config/aarch64/aarch64-sve2.md | 28 ++++++++++ >>> .../gcc.target/aarch64/sve2/eon_bsl2n.c | 52 +++++++++++++++++++ >>> 2 files changed, 80 insertions(+) >>> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c >>> >>> diff --git a/gcc/config/aarch64/aarch64-sve2.md >>> b/gcc/config/aarch64/aarch64-sve2.md >>> index 6d6dc94cd81..a011b947f51 100644 >>> --- a/gcc/config/aarch64/aarch64-sve2.md >>> +++ b/gcc/config/aarch64/aarch64-sve2.md >>> @@ -2053,6 +2053,34 @@ >>> } >>> ) >>> >>> +;; Vector EON (~(x, y)) using BSL2N. >>> +(define_insn_and_rewrite "*aarch64_sve2_bsl2n_eon<mode>" >>> + [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w") >>> + (unspec:SVE_FULL_I >>> + [(match_operand 3) >>> + (not:SVE_FULL_I >>> + (xor:SVE_FULL_I >>> + (match_operand:SVE_FULL_I 1 "register_operand" "%0") >>> + (match_operand:SVE_FULL_I 2 "register_operand" "w")))] >>> + UNSPEC_PRED_X))] >>> + "TARGET_SVE2" >>> + "bsl2n\t%0.d, %0.d, %0.d, %2.d" >>> + "&& !CONSTANT_P (operands[3])" >>> + { >>> + operands[3] = CONSTM1_RTX (<VPRED>mode); >>> + } >>> +) >>> + >>> +(define_insn "*aarch64_sve2_eon_bsl2n_unpred<mode>" >>> + [(set (match_operand:VDQ_I 0 "register_operand" "=w") >>> + (not:VDQ_I >>> + (xor:VDQ_I >>> + (match_operand:VDQ_I 1 "register_operand" "%0") >>> + (match_operand:VDQ_I 2 "register_operand" "w"))))] >>> + "TARGET_SVE2" >>> + "bsl2n\t%Z0.d, %Z0.d, %Z0.d, %Z2.d" >>> +) >>> + >>> ;; ------------------------------------------------------------------------- >>> ;; ---- [INT] Shift-and-accumulate operations >>> ;; ------------------------------------------------------------------------- >>> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c >>> b/gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c >>> new file mode 100644 >>> index 00000000000..d547e3152d4 >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c >>> @@ -0,0 +1,52 @@ >>> +/* { dg-options "-O2" } */ >>> +/* { dg-final { check-function-bodies "**" "" "" } } */ >>> + >>> +#include <arm_neon.h> >>> +#include <arm_sve.h> >>> + >>> +#define EON(x, y) (~((x) ^ (y))) >>> + >>> +/* >>> +** eon_d: >>> +** bsl2n z0.d, z0.d, z0.d, z1.d >>> +** ret >>> +*/ >>> +uint32x2_t eon_d(uint32x2_t a, uint32x2_t b) { return EON(a, b); } >>> + >>> +/* >>> +** eon_d_mp: >>> +** bsl2n z1.d, z1.d, z1.d, z2.d >>> +** ... >>> +** ret >>> +*/ >>> +uint32x2_t eon_d_mp(uint32x2_t c, uint32x2_t a, uint32x2_t b) { return >>> EON(a, b); } >>> + >>> +/* >>> +** eon_q: >>> +** bsl2n z0.d, z0.d, z0.d, z1.d >>> +** ret >>> +*/ >>> +uint64x2_t eon_q(uint64x2_t a, uint64x2_t b) { return EON(a, b); } >>> + >>> +/* >>> +** eon_q_mp: >>> +** bsl2n z1.d, z1.d, z1.d, z2.d >>> +** ... >>> +** ret >>> +*/ >>> +uint64x2_t eon_q_mp(uint64x2_t c, uint64x2_t a, uint64x2_t b) { return >>> EON(a, b); } >>> + >>> +/* >>> +** eon_z: >>> +** bsl2n z0.d, z0.d, z0.d, z1.d >>> +** ret >>> +*/ >>> +svuint64_t eon_z(svuint64_t a, svuint64_t b) { return EON(a, b); } >>> + >>> +/* >>> +** eon_z_mp: >>> +** bsl2n z1.d, z1.d, z1.d, z2.d >>> +** ... >>> +** ret >>> +*/ >>> +svuint64_t eon_z_mp(svuint64_t c, svuint64_t a, svuint64_t b) { return >>> EON(a, b); } > > > From 5ef38e6ce210cb54f1fe1bb3ca78fde1db76f35a Mon Sep 17 00:00:00 2001 > From: Kyrylo Tkachov <ktkac...@nvidia.com> > Date: Fri, 11 Jul 2025 07:23:16 -0700 > Subject: [PATCH] aarch64: Use SVE2 BSL2N for vector EON > > SVE2 BSL2N (x, y, z) = (x & z) | (~y & ~z). When x == y this computes: > (x & z) | (~x & ~z) which is ~(x ^ z). > Thus, we can use it to match RTL patterns (not (xor (...) (...))) for both > Advanced SIMD and SVE modes when TARGET_SVE2. > This patch does that. > For code like: > > uint64x2_t eon_q(uint64x2_t a, uint64x2_t b) { return EON(a, b); } > svuint64_t eon_z(svuint64_t a, svuint64_t b) { return EON(a, b); } > > We now generate: > eon_q: > bsl2n z0.d, z0.d, z0.d, z1.d > ret > > eon_z: > bsl2n z0.d, z0.d, z0.d, z1.d > ret > > instead of the previous: > eon_q: > eor v0.16b, v0.16b, v1.16b > not v0.16b, v0.16b > ret > > eon_z: > eor z0.d, z0.d, z1.d > ptrue p3.b, all > not z0.d, p3/m, z0.d > ret > > Bootstrapped and tested on aarch64-none-linux-gnu. > > Signed-off-by: Kyrylo Tkachov <ktkac...@nvidia.com> > > gcc/ > > * config/aarch64/aarch64-sve2.md (*aarch64_sve2_bsl2n_eon<mode>): > New pattern. > (*aarch64_sve2_eon_bsl2n_unpred<mode>): Likewise. > > gcc/testsuite/ > > * gcc.target/aarch64/sve2/eon_bsl2n.c: New test. > --- > gcc/config/aarch64/aarch64-sve2.md | 34 ++++++++++++ > .../gcc.target/aarch64/sve2/eon_bsl2n.c | 52 +++++++++++++++++++ > 2 files changed, 86 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c > > diff --git a/gcc/config/aarch64/aarch64-sve2.md > b/gcc/config/aarch64/aarch64-sve2.md > index 3d460c73ae7..28bd680b5d5 100644 > --- a/gcc/config/aarch64/aarch64-sve2.md > +++ b/gcc/config/aarch64/aarch64-sve2.md > @@ -2053,6 +2053,40 @@ > } > ) > > +;; Vector EON (~(x, y)) using BSL2N. > +(define_insn_and_rewrite "*aarch64_sve2_bsl2n_eon<mode>" > + [(set (match_operand:SVE_FULL_I 0 "register_operand") > + (unspec:SVE_FULL_I > + [(match_operand 3) > + (not:SVE_FULL_I > + (xor:SVE_FULL_I > + (match_operand:SVE_FULL_I 1 "register_operand") > + (match_operand:SVE_FULL_I 2 "register_operand")))] > + UNSPEC_PRED_X))] > + "TARGET_SVE2" > + {@ [ cons: =0, 1, 2 ; attrs: movprfx ] > + [ w , 0, w ; * ] bsl2n\t%0.d, %0.d, %0.d, %2.d > + [ ?&w, w, w ; yes ] movprfx\t%0, %1\;bsl2n\t%0.d, %0.d, > %1.d, %2.d > + } > + "&& !CONSTANT_P (operands[3])" > + { > + operands[3] = CONSTM1_RTX (<VPRED>mode); > + } > +) > + > +(define_insn "*aarch64_sve2_eon_bsl2n_unpred<mode>" > + [(set (match_operand:VDQ_I 0 "register_operand") > + (not:VDQ_I > + (xor:VDQ_I > + (match_operand:VDQ_I 1 "register_operand") > + (match_operand:VDQ_I 2 "register_operand"))))] > + "TARGET_SVE2" > + {@ [ cons: =0, 1, 2 ; attrs: movprfx ] > + [ w , 0, w ; * ] bsl2n\t%Z0.d, %Z0.d, %Z0.d, %Z2.d > + [ ?&w, w, w ; yes ] movprfx\t%Z0, %Z1\;bsl2n\t%Z0.d, > %Z0.d, %Z1.d, %Z2.d > + } > +) > + > ;; ------------------------------------------------------------------------- > ;; ---- [INT] Shift-and-accumulate operations > ;; ------------------------------------------------------------------------- > diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c > b/gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c > new file mode 100644 > index 00000000000..74b46376373 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c > @@ -0,0 +1,52 @@ > +/* { dg-options "-O2" } */ > +/* { dg-final { check-function-bodies "**" "" "" } } */ > + > +#include <arm_neon.h> > +#include <arm_sve.h> > + > +#define EON(x, y) (~((x) ^ (y))) > + > +/* > +** eon_d: > +** bsl2n z0.d, z0.d, z0.d, z1.d > +** ret > +*/ > +uint32x2_t eon_d(uint32x2_t a, uint32x2_t b) { return EON(a, b); } > + > +/* > +** eon_d_mp: > +** movprfx z0, z1 > +** bsl2n z0.d, z0.d, z1.d, z2.d > +** ret > +*/ > +uint32x2_t eon_d_mp(uint32x2_t c, uint32x2_t a, uint32x2_t b) { return > EON(a, b); } > + > +/* > +** eon_q: > +** bsl2n z0.d, z0.d, z0.d, z1.d > +** ret > +*/ > +uint64x2_t eon_q(uint64x2_t a, uint64x2_t b) { return EON(a, b); } > + > +/* > +** eon_q_mp: > +** movprfx z0, z1 > +** bsl2n z0.d, z0.d, z1.d, z2.d > +** ret > +*/ > +uint64x2_t eon_q_mp(uint64x2_t c, uint64x2_t a, uint64x2_t b) { return > EON(a, b); } > + > +/* > +** eon_z: > +** bsl2n z0.d, z0.d, z0.d, z1.d > +** ret > +*/ > +svuint64_t eon_z(svuint64_t a, svuint64_t b) { return EON(a, b); } > + > +/* > +** eon_z_mp: > +** movprfx z0, z1 > +** bsl2n z0.d, z0.d, z1.d, z2.d > +** ret > +*/ > +svuint64_t eon_z_mp(svuint64_t c, svuint64_t a, svuint64_t b) { return > EON(a, b); }