> On 15 Jul 2025, at 15:50, Richard Sandiford <richard.sandif...@arm.com> wrote:
> 
> Kyrylo Tkachov <ktkac...@nvidia.com> writes:
>> Hi all,
>> 
>> SVE2 BSL2N (x, y, z) = (x & z) | (~y & ~z). When x == y this computes:
>> (x & z) | (~x & ~z) which is ~(x ^ z).
>> Thus, we can use it to match RTL patterns (not (xor (...) (...))) for both
>> Advanced SIMD and SVE modes when TARGET_SVE2.
>> This patch does that. The tied register requirements of BSL2N and the MOVPRFX
>> rules mean we can't use the MOVPRFX form here so I have not included that
>> alternative. Correct me if I'm wrong on this.
> 
> I think we can still use BSL2N, similarly to the patch from the other day.
> It's just that the asm would need to be:
> 
>     movprfx\t%0, %1\;bsl2n\t%0, %0, %1, %2
> 
> (with constraints &w/w/w).

Thanks, something like the attach seems to work.
I’ll do wider testing…

Kyrill

> 
> Thanks,
> Richard
> 
>> 
>> For code like:
>> 
>> uint64x2_t eon_q(uint64x2_t a, uint64x2_t b) { return EON(a, b); }
>> svuint64_t eon_z(svuint64_t a, svuint64_t b) { return EON(a, b); }
>> svuint64_t eon_z_mp(svuint64_t c, svuint64_t a, svuint64_t b) { return 
>> EON(a, b); }
>> 
>> We now generate:
>> eon_q:
>> bsl2n z0.d, z0.d, z0.d, z1.d
>> ret
>> 
>> eon_z:
>> bsl2n z0.d, z0.d, z0.d, z1.d
>> ret
>> 
>> eon_z_mp:
>> bsl2n z1.d, z1.d, z1.d, z2.d
>> mov z0.d, z1.d
>> ret
>> 
>> instead of the previous:
>> eon_q:
>> eor v0.16b, v0.16b, v1.16b
>> not v0.16b, v0.16b
>> ret
>> 
>> eon_z:
>> eor z0.d, z0.d, z1.d
>> ptrue p3.b, all
>> not z0.d, p3/m, z0.d
>> ret
>> 
>> eon_z_mp:
>> eor z0.d, z1.d, z2.d
>> ptrue p3.b, all
>> not z0.d, p3/m, z0.d
>> ret
>> 
>> Bootstrapped and tested on aarch64-none-linux-gnu.
>> Ok for trunk?
>> Thanks,
>> Kyrill
>> 
>> Signed-off-by: Kyrylo Tkachov <ktkac...@nvidia.com>
>> 
>> gcc/
>> 
>> * config/aarch64/aarch64-sve2.md (*aarch64_sve2_bsl2n_eon<mode>):
>> New pattern.
>> (*aarch64_sve2_eon_bsl2n_unpred<mode>): Likewise.
>> 
>> gcc/testsuite/
>> 
>> * gcc.target/aarch64/sve2/eon_bsl2n.c: New test.
>> 
>> From 761b14804c8bbeae745cb7a2ab58e26a3775b096 Mon Sep 17 00:00:00 2001
>> From: Kyrylo Tkachov <ktkac...@nvidia.com>
>> Date: Fri, 11 Jul 2025 07:23:16 -0700
>> Subject: [PATCH 2/2] aarch64: Use SVE2 BSL2N for vector EON
>> 
>> SVE2 BSL2N (x, y, z) = (x & z) | (~y & ~z). When x == y this computes:
>> (x & z) | (~x & ~z) which is ~(x ^ z).
>> Thus, we can use it to match RTL patterns (not (xor (...) (...))) for both
>> Advanced SIMD and SVE modes when TARGET_SVE2.
>> This patch does that.  The tied register requirements of BSL2N and the 
>> MOVPRFX
>> rules mean we can't use the MOVPRFX form here so I have not included that
>> alternative.  Correct me if I'm wrong on this.
>> 
>> For code like:
>> 
>> uint64x2_t eon_q(uint64x2_t a, uint64x2_t b) { return EON(a, b); }
>> svuint64_t eon_z(svuint64_t a, svuint64_t b) { return EON(a, b); }
>> svuint64_t eon_z_mp(svuint64_t c, svuint64_t a, svuint64_t b) { return 
>> EON(a, b); }
>> 
>> We now generate:
>> eon_q:
>>        bsl2n   z0.d, z0.d, z0.d, z1.d
>>        ret
>> 
>> eon_z:
>>        bsl2n   z0.d, z0.d, z0.d, z1.d
>>        ret
>> 
>> eon_z_mp:
>>        bsl2n   z1.d, z1.d, z1.d, z2.d
>>        mov     z0.d, z1.d
>>        ret
>> 
>> instead of the previous:
>> eon_q:
>>        eor     v0.16b, v0.16b, v1.16b
>>        not     v0.16b, v0.16b
>>        ret
>> 
>> eon_z:
>>        eor     z0.d, z0.d, z1.d
>>        ptrue   p3.b, all
>>        not     z0.d, p3/m, z0.d
>>        ret
>> 
>> eon_z_mp:
>>        eor     z0.d, z1.d, z2.d
>>        ptrue   p3.b, all
>>        not     z0.d, p3/m, z0.d
>>        ret
>> 
>> Bootstrapped and tested on aarch64-none-linux-gnu.
>> 
>> Signed-off-by: Kyrylo Tkachov <ktkac...@nvidia.com>
>> 
>> gcc/
>> 
>> * config/aarch64/aarch64-sve2.md (*aarch64_sve2_bsl2n_eon<mode>):
>> New pattern.
>> (*aarch64_sve2_eon_bsl2n_unpred<mode>): Likewise.
>> 
>> gcc/testsuite/
>> 
>> * gcc.target/aarch64/sve2/eon_bsl2n.c: New test.
>> ---
>> gcc/config/aarch64/aarch64-sve2.md            | 28 ++++++++++
>> .../gcc.target/aarch64/sve2/eon_bsl2n.c       | 52 +++++++++++++++++++
>> 2 files changed, 80 insertions(+)
>> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c
>> 
>> diff --git a/gcc/config/aarch64/aarch64-sve2.md 
>> b/gcc/config/aarch64/aarch64-sve2.md
>> index 6d6dc94cd81..a011b947f51 100644
>> --- a/gcc/config/aarch64/aarch64-sve2.md
>> +++ b/gcc/config/aarch64/aarch64-sve2.md
>> @@ -2053,6 +2053,34 @@
>>   }
>> )
>> 
>> +;; Vector EON (~(x, y)) using BSL2N.
>> +(define_insn_and_rewrite "*aarch64_sve2_bsl2n_eon<mode>"
>> +  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
>> + (unspec:SVE_FULL_I
>> +   [(match_operand 3)
>> +    (not:SVE_FULL_I
>> +      (xor:SVE_FULL_I
>> + (match_operand:SVE_FULL_I 1 "register_operand" "%0")
>> + (match_operand:SVE_FULL_I 2 "register_operand" "w")))]
>> +     UNSPEC_PRED_X))]
>> +  "TARGET_SVE2"
>> +  "bsl2n\t%0.d, %0.d, %0.d, %2.d"
>> +  "&& !CONSTANT_P (operands[3])"
>> +  {
>> +    operands[3] = CONSTM1_RTX (<VPRED>mode);
>> +  }
>> +)
>> +
>> +(define_insn "*aarch64_sve2_eon_bsl2n_unpred<mode>"
>> +  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
>> +       (not:VDQ_I
>> +         (xor:VDQ_I
>> +           (match_operand:VDQ_I 1 "register_operand" "%0")
>> +           (match_operand:VDQ_I 2 "register_operand" "w"))))]
>> +  "TARGET_SVE2"
>> +  "bsl2n\t%Z0.d, %Z0.d, %Z0.d, %Z2.d"
>> +)
>> +
>> ;; -------------------------------------------------------------------------
>> ;; ---- [INT] Shift-and-accumulate operations
>> ;; -------------------------------------------------------------------------
>> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c 
>> b/gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c
>> new file mode 100644
>> index 00000000000..d547e3152d4
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c
>> @@ -0,0 +1,52 @@
>> +/* { dg-options "-O2" } */
>> +/* { dg-final { check-function-bodies "**" "" "" } } */
>> +
>> +#include <arm_neon.h>
>> +#include <arm_sve.h>
>> +
>> +#define EON(x, y)   (~((x) ^ (y)))
>> +
>> +/*
>> +** eon_d:
>> +**  bsl2n z0.d, z0.d, z0.d, z1.d
>> +**  ret
>> +*/
>> +uint32x2_t eon_d(uint32x2_t a, uint32x2_t b) { return EON(a, b); }
>> +
>> +/*
>> +** eon_d_mp:
>> +**  bsl2n z1.d, z1.d, z1.d, z2.d
>> +**  ...
>> +**  ret
>> +*/
>> +uint32x2_t eon_d_mp(uint32x2_t c, uint32x2_t a, uint32x2_t b) { return 
>> EON(a, b); }
>> +
>> +/*
>> +** eon_q:
>> +**  bsl2n z0.d, z0.d, z0.d, z1.d
>> +**  ret
>> +*/
>> +uint64x2_t eon_q(uint64x2_t a, uint64x2_t b) { return EON(a, b); }
>> +
>> +/*
>> +** eon_q_mp:
>> +**  bsl2n z1.d, z1.d, z1.d, z2.d
>> +**  ...
>> +**  ret
>> +*/
>> +uint64x2_t eon_q_mp(uint64x2_t c, uint64x2_t a, uint64x2_t b) { return 
>> EON(a, b); }
>> +
>> +/*
>> +** eon_z:
>> +**  bsl2n z0.d, z0.d, z0.d, z1.d
>> +**  ret
>> +*/
>> +svuint64_t eon_z(svuint64_t a, svuint64_t b) { return EON(a, b); }
>> +
>> +/*
>> +** eon_z_mp:
>> +**  bsl2n z1.d, z1.d, z1.d, z2.d
>> +**  ...
>> +**  ret
>> +*/
>> +svuint64_t eon_z_mp(svuint64_t c, svuint64_t a, svuint64_t b) { return 
>> EON(a, b); }


Attachment: 0001-aarch64-Use-SVE2-BSL2N-for-vector-EON.patch
Description: 0001-aarch64-Use-SVE2-BSL2N-for-vector-EON.patch

Reply via email to