Kyrylo Tkachov <ktkac...@nvidia.com> writes:
>> On 15 Jul 2025, at 15:50, Richard Sandiford <richard.sandif...@arm.com> 
>> wrote:
>> 
>> Kyrylo Tkachov <ktkac...@nvidia.com> writes:
>>> Hi all,
>>> 
>>> SVE2 BSL2N (x, y, z) = (x & z) | (~y & ~z). When x == y this computes:
>>> (x & z) | (~x & ~z) which is ~(x ^ z).
>>> Thus, we can use it to match RTL patterns (not (xor (...) (...))) for both
>>> Advanced SIMD and SVE modes when TARGET_SVE2.
>>> This patch does that. The tied register requirements of BSL2N and the 
>>> MOVPRFX
>>> rules mean we can't use the MOVPRFX form here so I have not included that
>>> alternative. Correct me if I'm wrong on this.
>> 
>> I think we can still use BSL2N, similarly to the patch from the other day.
>> It's just that the asm would need to be:
>> 
>>     movprfx\t%0, %1\;bsl2n\t%0, %0, %1, %2
>> 
>> (with constraints &w/w/w).
>
> Thanks, something like the attach seems to work.
> I’ll do wider testing…

LGTM, thanks.  OK if testing passes.

Richard

>
> Kyrill
>
>> 
>> Thanks,
>> Richard
>> 
>>> 
>>> For code like:
>>> 
>>> uint64x2_t eon_q(uint64x2_t a, uint64x2_t b) { return EON(a, b); }
>>> svuint64_t eon_z(svuint64_t a, svuint64_t b) { return EON(a, b); }
>>> svuint64_t eon_z_mp(svuint64_t c, svuint64_t a, svuint64_t b) { return 
>>> EON(a, b); }
>>> 
>>> We now generate:
>>> eon_q:
>>> bsl2n z0.d, z0.d, z0.d, z1.d
>>> ret
>>> 
>>> eon_z:
>>> bsl2n z0.d, z0.d, z0.d, z1.d
>>> ret
>>> 
>>> eon_z_mp:
>>> bsl2n z1.d, z1.d, z1.d, z2.d
>>> mov z0.d, z1.d
>>> ret
>>> 
>>> instead of the previous:
>>> eon_q:
>>> eor v0.16b, v0.16b, v1.16b
>>> not v0.16b, v0.16b
>>> ret
>>> 
>>> eon_z:
>>> eor z0.d, z0.d, z1.d
>>> ptrue p3.b, all
>>> not z0.d, p3/m, z0.d
>>> ret
>>> 
>>> eon_z_mp:
>>> eor z0.d, z1.d, z2.d
>>> ptrue p3.b, all
>>> not z0.d, p3/m, z0.d
>>> ret
>>> 
>>> Bootstrapped and tested on aarch64-none-linux-gnu.
>>> Ok for trunk?
>>> Thanks,
>>> Kyrill
>>> 
>>> Signed-off-by: Kyrylo Tkachov <ktkac...@nvidia.com>
>>> 
>>> gcc/
>>> 
>>> * config/aarch64/aarch64-sve2.md (*aarch64_sve2_bsl2n_eon<mode>):
>>> New pattern.
>>> (*aarch64_sve2_eon_bsl2n_unpred<mode>): Likewise.
>>> 
>>> gcc/testsuite/
>>> 
>>> * gcc.target/aarch64/sve2/eon_bsl2n.c: New test.
>>> 
>>> From 761b14804c8bbeae745cb7a2ab58e26a3775b096 Mon Sep 17 00:00:00 2001
>>> From: Kyrylo Tkachov <ktkac...@nvidia.com>
>>> Date: Fri, 11 Jul 2025 07:23:16 -0700
>>> Subject: [PATCH 2/2] aarch64: Use SVE2 BSL2N for vector EON
>>> 
>>> SVE2 BSL2N (x, y, z) = (x & z) | (~y & ~z). When x == y this computes:
>>> (x & z) | (~x & ~z) which is ~(x ^ z).
>>> Thus, we can use it to match RTL patterns (not (xor (...) (...))) for both
>>> Advanced SIMD and SVE modes when TARGET_SVE2.
>>> This patch does that.  The tied register requirements of BSL2N and the 
>>> MOVPRFX
>>> rules mean we can't use the MOVPRFX form here so I have not included that
>>> alternative.  Correct me if I'm wrong on this.
>>> 
>>> For code like:
>>> 
>>> uint64x2_t eon_q(uint64x2_t a, uint64x2_t b) { return EON(a, b); }
>>> svuint64_t eon_z(svuint64_t a, svuint64_t b) { return EON(a, b); }
>>> svuint64_t eon_z_mp(svuint64_t c, svuint64_t a, svuint64_t b) { return 
>>> EON(a, b); }
>>> 
>>> We now generate:
>>> eon_q:
>>>        bsl2n   z0.d, z0.d, z0.d, z1.d
>>>        ret
>>> 
>>> eon_z:
>>>        bsl2n   z0.d, z0.d, z0.d, z1.d
>>>        ret
>>> 
>>> eon_z_mp:
>>>        bsl2n   z1.d, z1.d, z1.d, z2.d
>>>        mov     z0.d, z1.d
>>>        ret
>>> 
>>> instead of the previous:
>>> eon_q:
>>>        eor     v0.16b, v0.16b, v1.16b
>>>        not     v0.16b, v0.16b
>>>        ret
>>> 
>>> eon_z:
>>>        eor     z0.d, z0.d, z1.d
>>>        ptrue   p3.b, all
>>>        not     z0.d, p3/m, z0.d
>>>        ret
>>> 
>>> eon_z_mp:
>>>        eor     z0.d, z1.d, z2.d
>>>        ptrue   p3.b, all
>>>        not     z0.d, p3/m, z0.d
>>>        ret
>>> 
>>> Bootstrapped and tested on aarch64-none-linux-gnu.
>>> 
>>> Signed-off-by: Kyrylo Tkachov <ktkac...@nvidia.com>
>>> 
>>> gcc/
>>> 
>>> * config/aarch64/aarch64-sve2.md (*aarch64_sve2_bsl2n_eon<mode>):
>>> New pattern.
>>> (*aarch64_sve2_eon_bsl2n_unpred<mode>): Likewise.
>>> 
>>> gcc/testsuite/
>>> 
>>> * gcc.target/aarch64/sve2/eon_bsl2n.c: New test.
>>> ---
>>> gcc/config/aarch64/aarch64-sve2.md            | 28 ++++++++++
>>> .../gcc.target/aarch64/sve2/eon_bsl2n.c       | 52 +++++++++++++++++++
>>> 2 files changed, 80 insertions(+)
>>> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c
>>> 
>>> diff --git a/gcc/config/aarch64/aarch64-sve2.md 
>>> b/gcc/config/aarch64/aarch64-sve2.md
>>> index 6d6dc94cd81..a011b947f51 100644
>>> --- a/gcc/config/aarch64/aarch64-sve2.md
>>> +++ b/gcc/config/aarch64/aarch64-sve2.md
>>> @@ -2053,6 +2053,34 @@
>>>   }
>>> )
>>> 
>>> +;; Vector EON (~(x, y)) using BSL2N.
>>> +(define_insn_and_rewrite "*aarch64_sve2_bsl2n_eon<mode>"
>>> +  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
>>> + (unspec:SVE_FULL_I
>>> +   [(match_operand 3)
>>> +    (not:SVE_FULL_I
>>> +      (xor:SVE_FULL_I
>>> + (match_operand:SVE_FULL_I 1 "register_operand" "%0")
>>> + (match_operand:SVE_FULL_I 2 "register_operand" "w")))]
>>> +     UNSPEC_PRED_X))]
>>> +  "TARGET_SVE2"
>>> +  "bsl2n\t%0.d, %0.d, %0.d, %2.d"
>>> +  "&& !CONSTANT_P (operands[3])"
>>> +  {
>>> +    operands[3] = CONSTM1_RTX (<VPRED>mode);
>>> +  }
>>> +)
>>> +
>>> +(define_insn "*aarch64_sve2_eon_bsl2n_unpred<mode>"
>>> +  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
>>> +       (not:VDQ_I
>>> +         (xor:VDQ_I
>>> +           (match_operand:VDQ_I 1 "register_operand" "%0")
>>> +           (match_operand:VDQ_I 2 "register_operand" "w"))))]
>>> +  "TARGET_SVE2"
>>> +  "bsl2n\t%Z0.d, %Z0.d, %Z0.d, %Z2.d"
>>> +)
>>> +
>>> ;; -------------------------------------------------------------------------
>>> ;; ---- [INT] Shift-and-accumulate operations
>>> ;; -------------------------------------------------------------------------
>>> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c 
>>> b/gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c
>>> new file mode 100644
>>> index 00000000000..d547e3152d4
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c
>>> @@ -0,0 +1,52 @@
>>> +/* { dg-options "-O2" } */
>>> +/* { dg-final { check-function-bodies "**" "" "" } } */
>>> +
>>> +#include <arm_neon.h>
>>> +#include <arm_sve.h>
>>> +
>>> +#define EON(x, y)   (~((x) ^ (y)))
>>> +
>>> +/*
>>> +** eon_d:
>>> +**  bsl2n z0.d, z0.d, z0.d, z1.d
>>> +**  ret
>>> +*/
>>> +uint32x2_t eon_d(uint32x2_t a, uint32x2_t b) { return EON(a, b); }
>>> +
>>> +/*
>>> +** eon_d_mp:
>>> +**  bsl2n z1.d, z1.d, z1.d, z2.d
>>> +**  ...
>>> +**  ret
>>> +*/
>>> +uint32x2_t eon_d_mp(uint32x2_t c, uint32x2_t a, uint32x2_t b) { return 
>>> EON(a, b); }
>>> +
>>> +/*
>>> +** eon_q:
>>> +**  bsl2n z0.d, z0.d, z0.d, z1.d
>>> +**  ret
>>> +*/
>>> +uint64x2_t eon_q(uint64x2_t a, uint64x2_t b) { return EON(a, b); }
>>> +
>>> +/*
>>> +** eon_q_mp:
>>> +**  bsl2n z1.d, z1.d, z1.d, z2.d
>>> +**  ...
>>> +**  ret
>>> +*/
>>> +uint64x2_t eon_q_mp(uint64x2_t c, uint64x2_t a, uint64x2_t b) { return 
>>> EON(a, b); }
>>> +
>>> +/*
>>> +** eon_z:
>>> +**  bsl2n z0.d, z0.d, z0.d, z1.d
>>> +**  ret
>>> +*/
>>> +svuint64_t eon_z(svuint64_t a, svuint64_t b) { return EON(a, b); }
>>> +
>>> +/*
>>> +** eon_z_mp:
>>> +**  bsl2n z1.d, z1.d, z1.d, z2.d
>>> +**  ...
>>> +**  ret
>>> +*/
>>> +svuint64_t eon_z_mp(svuint64_t c, svuint64_t a, svuint64_t b) { return 
>>> EON(a, b); }
>
>
> From 5ef38e6ce210cb54f1fe1bb3ca78fde1db76f35a Mon Sep 17 00:00:00 2001
> From: Kyrylo Tkachov <ktkac...@nvidia.com>
> Date: Fri, 11 Jul 2025 07:23:16 -0700
> Subject: [PATCH] aarch64: Use SVE2 BSL2N for vector EON
>
> SVE2 BSL2N (x, y, z) = (x & z) | (~y & ~z). When x == y this computes:
> (x & z) | (~x & ~z) which is ~(x ^ z).
> Thus, we can use it to match RTL patterns (not (xor (...) (...))) for both
> Advanced SIMD and SVE modes when TARGET_SVE2.
> This patch does that.
> For code like:
>
> uint64x2_t eon_q(uint64x2_t a, uint64x2_t b) { return EON(a, b); }
> svuint64_t eon_z(svuint64_t a, svuint64_t b) { return EON(a, b); }
>
> We now generate:
> eon_q:
>         bsl2n   z0.d, z0.d, z0.d, z1.d
>         ret
>
> eon_z:
>         bsl2n   z0.d, z0.d, z0.d, z1.d
>         ret
>
> instead of the previous:
> eon_q:
>         eor     v0.16b, v0.16b, v1.16b
>         not     v0.16b, v0.16b
>         ret
>
> eon_z:
>         eor     z0.d, z0.d, z1.d
>         ptrue   p3.b, all
>         not     z0.d, p3/m, z0.d
>         ret
>
> Bootstrapped and tested on aarch64-none-linux-gnu.
>
> Signed-off-by: Kyrylo Tkachov <ktkac...@nvidia.com>
>
> gcc/
>
>       * config/aarch64/aarch64-sve2.md (*aarch64_sve2_bsl2n_eon<mode>):
>       New pattern.
>       (*aarch64_sve2_eon_bsl2n_unpred<mode>): Likewise.
>
> gcc/testsuite/
>
>       * gcc.target/aarch64/sve2/eon_bsl2n.c: New test.
> ---
>  gcc/config/aarch64/aarch64-sve2.md            | 34 ++++++++++++
>  .../gcc.target/aarch64/sve2/eon_bsl2n.c       | 52 +++++++++++++++++++
>  2 files changed, 86 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c
>
> diff --git a/gcc/config/aarch64/aarch64-sve2.md 
> b/gcc/config/aarch64/aarch64-sve2.md
> index 3d460c73ae7..28bd680b5d5 100644
> --- a/gcc/config/aarch64/aarch64-sve2.md
> +++ b/gcc/config/aarch64/aarch64-sve2.md
> @@ -2053,6 +2053,40 @@
>    }
>  )
>  
> +;; Vector EON (~(x, y)) using BSL2N.
> +(define_insn_and_rewrite "*aarch64_sve2_bsl2n_eon<mode>"
> +  [(set (match_operand:SVE_FULL_I 0 "register_operand")
> +     (unspec:SVE_FULL_I
> +       [(match_operand 3)
> +        (not:SVE_FULL_I
> +          (xor:SVE_FULL_I
> +             (match_operand:SVE_FULL_I 1 "register_operand")
> +             (match_operand:SVE_FULL_I 2 "register_operand")))]
> +         UNSPEC_PRED_X))]
> +  "TARGET_SVE2"
> +  {@ [ cons: =0, 1, 2 ; attrs: movprfx ]
> +     [ w  ,      0, w ; *              ] bsl2n\t%0.d, %0.d, %0.d, %2.d
> +     [ ?&w,      w, w ; yes            ] movprfx\t%0, %1\;bsl2n\t%0.d, %0.d, 
> %1.d, %2.d
> +  }
> +  "&& !CONSTANT_P (operands[3])"
> +  {
> +    operands[3] = CONSTM1_RTX (<VPRED>mode);
> +  }
> +)
> +
> +(define_insn "*aarch64_sve2_eon_bsl2n_unpred<mode>"
> +  [(set (match_operand:VDQ_I 0 "register_operand")
> +       (not:VDQ_I
> +         (xor:VDQ_I
> +           (match_operand:VDQ_I 1 "register_operand")
> +           (match_operand:VDQ_I 2 "register_operand"))))]
> +  "TARGET_SVE2"
> +  {@ [ cons: =0, 1, 2 ; attrs: movprfx ]
> +     [ w  ,      0, w ; *              ] bsl2n\t%Z0.d, %Z0.d, %Z0.d, %Z2.d
> +     [ ?&w,      w, w ; yes            ] movprfx\t%Z0, %Z1\;bsl2n\t%Z0.d, 
> %Z0.d, %Z1.d, %Z2.d
> +  }
> +)
> +
>  ;; -------------------------------------------------------------------------
>  ;; ---- [INT] Shift-and-accumulate operations
>  ;; -------------------------------------------------------------------------
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c 
> b/gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c
> new file mode 100644
> index 00000000000..74b46376373
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c
> @@ -0,0 +1,52 @@
> +/* { dg-options "-O2" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +#include <arm_neon.h>
> +#include <arm_sve.h>
> +
> +#define EON(x, y)   (~((x) ^ (y)))
> +
> +/*
> +** eon_d:
> +**   bsl2n   z0.d, z0.d, z0.d, z1.d
> +**   ret
> +*/
> +uint32x2_t eon_d(uint32x2_t a, uint32x2_t b) { return EON(a, b); }
> +
> +/*
> +** eon_d_mp:
> +**   movprfx z0, z1
> +**   bsl2n   z0.d, z0.d, z1.d, z2.d
> +**   ret
> +*/
> +uint32x2_t eon_d_mp(uint32x2_t c, uint32x2_t a, uint32x2_t b) { return 
> EON(a, b); }
> +
> +/*
> +** eon_q:
> +**   bsl2n   z0.d, z0.d, z0.d, z1.d
> +**   ret
> +*/
> +uint64x2_t eon_q(uint64x2_t a, uint64x2_t b) { return EON(a, b); }
> +
> +/*
> +** eon_q_mp:
> +**   movprfx z0, z1
> +**   bsl2n   z0.d, z0.d, z1.d, z2.d
> +**   ret
> +*/
> +uint64x2_t eon_q_mp(uint64x2_t c, uint64x2_t a, uint64x2_t b) { return 
> EON(a, b); }
> +
> +/*
> +** eon_z:
> +**   bsl2n   z0.d, z0.d, z0.d, z1.d
> +**   ret
> +*/
> +svuint64_t eon_z(svuint64_t a, svuint64_t b) { return EON(a, b); }
> +
> +/*
> +** eon_z_mp:
> +**   movprfx z0, z1
> +**   bsl2n   z0.d, z0.d, z1.d, z2.d
> +**   ret
> +*/
> +svuint64_t eon_z_mp(svuint64_t c, svuint64_t a, svuint64_t b) { return 
> EON(a, b); }

Reply via email to