Kyrylo Tkachov <ktkac...@nvidia.com> writes:
> Hi all,
>
> SVE2 BSL2N (x, y, z) = (x & z) | (~y & ~z). When x == y this computes:
> (x & z) | (~x & ~z) which is ~(x ^ z).
> Thus, we can use it to match RTL patterns (not (xor (...) (...))) for both
> Advanced SIMD and SVE modes when TARGET_SVE2.
> This patch does that. The tied register requirements of BSL2N and the MOVPRFX
> rules mean we can't use the MOVPRFX form here so I have not included that
> alternative. Correct me if I'm wrong on this.

I think we can still use BSL2N, similarly to the patch from the other day.
It's just that the asm would need to be:

     movprfx\t%0, %1\;bsl2n\t%0, %0, %1, %2

(with constraints &w/w/w).

Thanks,
Richard

>
> For code like:
>
> uint64x2_t eon_q(uint64x2_t a, uint64x2_t b) { return EON(a, b); }
> svuint64_t eon_z(svuint64_t a, svuint64_t b) { return EON(a, b); }
> svuint64_t eon_z_mp(svuint64_t c, svuint64_t a, svuint64_t b) { return EON(a, 
> b); }
>
> We now generate:
> eon_q:
>       bsl2n z0.d, z0.d, z0.d, z1.d
>       ret
>
> eon_z:
>       bsl2n z0.d, z0.d, z0.d, z1.d
>       ret
>
> eon_z_mp:
>       bsl2n z1.d, z1.d, z1.d, z2.d
>       mov z0.d, z1.d
>       ret
>
> instead of the previous:
> eon_q:
>       eor v0.16b, v0.16b, v1.16b
>       not v0.16b, v0.16b
>       ret
>
> eon_z:
>       eor z0.d, z0.d, z1.d
>       ptrue p3.b, all
>       not z0.d, p3/m, z0.d
>       ret
>
> eon_z_mp:
>       eor z0.d, z1.d, z2.d
>       ptrue p3.b, all
>       not z0.d, p3/m, z0.d
>       ret
>
> Bootstrapped and tested on aarch64-none-linux-gnu.
> Ok for trunk?
> Thanks,
> Kyrill
>
> Signed-off-by: Kyrylo Tkachov <ktkac...@nvidia.com>
>
> gcc/
>
>       * config/aarch64/aarch64-sve2.md (*aarch64_sve2_bsl2n_eon<mode>):
>       New pattern.
>       (*aarch64_sve2_eon_bsl2n_unpred<mode>): Likewise.
>
> gcc/testsuite/
>
>       * gcc.target/aarch64/sve2/eon_bsl2n.c: New test.
>
> From 761b14804c8bbeae745cb7a2ab58e26a3775b096 Mon Sep 17 00:00:00 2001
> From: Kyrylo Tkachov <ktkac...@nvidia.com>
> Date: Fri, 11 Jul 2025 07:23:16 -0700
> Subject: [PATCH 2/2] aarch64: Use SVE2 BSL2N for vector EON
>
> SVE2 BSL2N (x, y, z) = (x & z) | (~y & ~z). When x == y this computes:
> (x & z) | (~x & ~z) which is ~(x ^ z).
> Thus, we can use it to match RTL patterns (not (xor (...) (...))) for both
> Advanced SIMD and SVE modes when TARGET_SVE2.
> This patch does that.  The tied register requirements of BSL2N and the MOVPRFX
> rules mean we can't use the MOVPRFX form here so I have not included that
> alternative.  Correct me if I'm wrong on this.
>
> For code like:
>
> uint64x2_t eon_q(uint64x2_t a, uint64x2_t b) { return EON(a, b); }
> svuint64_t eon_z(svuint64_t a, svuint64_t b) { return EON(a, b); }
> svuint64_t eon_z_mp(svuint64_t c, svuint64_t a, svuint64_t b) { return EON(a, 
> b); }
>
> We now generate:
> eon_q:
>         bsl2n   z0.d, z0.d, z0.d, z1.d
>         ret
>
> eon_z:
>         bsl2n   z0.d, z0.d, z0.d, z1.d
>         ret
>
> eon_z_mp:
>         bsl2n   z1.d, z1.d, z1.d, z2.d
>         mov     z0.d, z1.d
>         ret
>
> instead of the previous:
> eon_q:
>         eor     v0.16b, v0.16b, v1.16b
>         not     v0.16b, v0.16b
>         ret
>
> eon_z:
>         eor     z0.d, z0.d, z1.d
>         ptrue   p3.b, all
>         not     z0.d, p3/m, z0.d
>         ret
>
> eon_z_mp:
>         eor     z0.d, z1.d, z2.d
>         ptrue   p3.b, all
>         not     z0.d, p3/m, z0.d
>         ret
>
> Bootstrapped and tested on aarch64-none-linux-gnu.
>
> Signed-off-by: Kyrylo Tkachov <ktkac...@nvidia.com>
>
> gcc/
>
>       * config/aarch64/aarch64-sve2.md (*aarch64_sve2_bsl2n_eon<mode>):
>       New pattern.
>       (*aarch64_sve2_eon_bsl2n_unpred<mode>): Likewise.
>
> gcc/testsuite/
>
>       * gcc.target/aarch64/sve2/eon_bsl2n.c: New test.
> ---
>  gcc/config/aarch64/aarch64-sve2.md            | 28 ++++++++++
>  .../gcc.target/aarch64/sve2/eon_bsl2n.c       | 52 +++++++++++++++++++
>  2 files changed, 80 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c
>
> diff --git a/gcc/config/aarch64/aarch64-sve2.md 
> b/gcc/config/aarch64/aarch64-sve2.md
> index 6d6dc94cd81..a011b947f51 100644
> --- a/gcc/config/aarch64/aarch64-sve2.md
> +++ b/gcc/config/aarch64/aarch64-sve2.md
> @@ -2053,6 +2053,34 @@
>    }
>  )
>  
> +;; Vector EON (~(x, y)) using BSL2N.
> +(define_insn_and_rewrite "*aarch64_sve2_bsl2n_eon<mode>"
> +  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
> +     (unspec:SVE_FULL_I
> +       [(match_operand 3)
> +        (not:SVE_FULL_I
> +          (xor:SVE_FULL_I
> +             (match_operand:SVE_FULL_I 1 "register_operand" "%0")
> +             (match_operand:SVE_FULL_I 2 "register_operand" "w")))]
> +         UNSPEC_PRED_X))]
> +  "TARGET_SVE2"
> +  "bsl2n\t%0.d, %0.d, %0.d, %2.d"
> +  "&& !CONSTANT_P (operands[3])"
> +  {
> +    operands[3] = CONSTM1_RTX (<VPRED>mode);
> +  }
> +)
> +
> +(define_insn "*aarch64_sve2_eon_bsl2n_unpred<mode>"
> +  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
> +       (not:VDQ_I
> +         (xor:VDQ_I
> +           (match_operand:VDQ_I 1 "register_operand" "%0")
> +           (match_operand:VDQ_I 2 "register_operand" "w"))))]
> +  "TARGET_SVE2"
> +  "bsl2n\t%Z0.d, %Z0.d, %Z0.d, %Z2.d"
> +)
> +
>  ;; -------------------------------------------------------------------------
>  ;; ---- [INT] Shift-and-accumulate operations
>  ;; -------------------------------------------------------------------------
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c 
> b/gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c
> new file mode 100644
> index 00000000000..d547e3152d4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c
> @@ -0,0 +1,52 @@
> +/* { dg-options "-O2" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +#include <arm_neon.h>
> +#include <arm_sve.h>
> +
> +#define EON(x, y)   (~((x) ^ (y)))
> +
> +/*
> +** eon_d:
> +**   bsl2n   z0.d, z0.d, z0.d, z1.d
> +**   ret
> +*/
> +uint32x2_t eon_d(uint32x2_t a, uint32x2_t b) { return EON(a, b); }
> +
> +/*
> +** eon_d_mp:
> +**   bsl2n   z1.d, z1.d, z1.d, z2.d
> +**   ...
> +**   ret
> +*/
> +uint32x2_t eon_d_mp(uint32x2_t c, uint32x2_t a, uint32x2_t b) { return 
> EON(a, b); }
> +
> +/*
> +** eon_q:
> +**   bsl2n   z0.d, z0.d, z0.d, z1.d
> +**   ret
> +*/
> +uint64x2_t eon_q(uint64x2_t a, uint64x2_t b) { return EON(a, b); }
> +
> +/*
> +** eon_q_mp:
> +**   bsl2n   z1.d, z1.d, z1.d, z2.d
> +**   ...
> +**   ret
> +*/
> +uint64x2_t eon_q_mp(uint64x2_t c, uint64x2_t a, uint64x2_t b) { return 
> EON(a, b); }
> +
> +/*
> +** eon_z:
> +**   bsl2n   z0.d, z0.d, z0.d, z1.d
> +**   ret
> +*/
> +svuint64_t eon_z(svuint64_t a, svuint64_t b) { return EON(a, b); }
> +
> +/*
> +** eon_z_mp:
> +**   bsl2n   z1.d, z1.d, z1.d, z2.d
> +**   ...
> +**   ret
> +*/
> +svuint64_t eon_z_mp(svuint64_t c, svuint64_t a, svuint64_t b) { return 
> EON(a, b); }

Reply via email to