Hi Tamar,

> -----Original Message-----
> From: Tamar Christina <tamar.christ...@arm.com>
> Sent: Monday, November 6, 2023 7:43 AM
> To: gcc-patches@gcc.gnu.org
> Cc: nd <n...@arm.com>; Ramana Radhakrishnan
> <ramana.radhakrish...@arm.com>; Richard Earnshaw
> <richard.earns...@arm.com>; ni...@redhat.com; Kyrylo Tkachov
> <kyrylo.tkac...@arm.com>
> Subject: [PATCH 21/21]Arm: Add MVE cbranch implementation
> 
> Hi All,
> 
> This adds an implementation for conditional branch optab for MVE.
> 
> Unfortunately MVE has rather limited operations on VPT.P0, we are missing the
> ability to do P0 comparisons and logical OR on P0.
> 
> For that reason we can only support cbranch with 0, as for comparing to a 0
> predicate we don't need to actually do a comparison, we only have to check 
> that
> any bit is set within P0.
> 
> Because we can only do P0 comparisons with 0, the costing of the comparison 
> was
> reduced in order for the compiler not to try to push 0 to a register thinking
> it's too expensive.  For the cbranch implementation to be safe we must see the
> constant 0 vector.
> 
> For the lack of logical OR on P0 we can't really work around.  This means MVE
> can't support cases where the sizes of operands in the comparison don't match,
> i.e. when one operand has been unpacked.
> 
> For e.g.
> 
> void f1 ()
> {
>   for (int i = 0; i < N; i++)
>     {
>       b[i] += a[i];
>       if (a[i] > 0)
>       break;
>     }
> }
> 
> For 128-bit vectors we generate:
> 
>         vcmp.s32        gt, q3, q1
>         vmrs    r3, p0  @ movhi
>         cbnz    r3, .L2
> 
> MVE does not have 64-bit vector comparisons, as such that is also not 
> supported.
> 
> Bootstrapped arm-none-linux-gnueabihf and regtested with
> -march=armv8.1-m.main+mve -mfpu=auto and no issues.
> 
> Ok for master?
> 

This is okay once the rest goes in.
Thanks,
Kyrill

> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>       * config/arm/arm.cc (arm_rtx_costs_internal): Update costs for pred 0
>       compares.
>       * config/arm/mve.md (cbranch<mode>4): New.
> 
> gcc/testsuite/ChangeLog:
> 
>       * lib/target-supports.exp (vect_early_break): Add MVE.
>       * gcc.target/arm/mve/vect-early-break-cbranch.c: New test.
> 
> --- inline copy of patch --
> diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
> index
> 38f0839de1c75547c259ac3d655fcfc14e7208a2..15e65c15cb3cb6f70161787e84
> b255a24eb51e32 100644
> --- a/gcc/config/arm/arm.cc
> +++ b/gcc/config/arm/arm.cc
> @@ -11883,6 +11883,15 @@ arm_rtx_costs_internal (rtx x, enum rtx_code code,
> enum rtx_code outer_code,
>          || TARGET_HAVE_MVE)
>         && simd_immediate_valid_for_move (x, mode, NULL, NULL))
>       *cost = COSTS_N_INSNS (1);
> +      else if (TARGET_HAVE_MVE
> +            && outer_code == COMPARE
> +            && VALID_MVE_PRED_MODE (mode))
> +     /* MVE allows very limited instructions on VPT.P0,  however comparisons
> +        to 0 do not require us to materialze this constant or require a
> +        predicate comparison as we can go through SImode.  For that reason
> +        allow P0 CMP 0 as a cheap operation such that the 0 isn't forced to
> +        registers as we can't compare two predicates.  */
> +     *cost = COSTS_N_INSNS (1);
>        else
>       *cost = COSTS_N_INSNS (4);
>        return true;
> diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> index
> 74909ce47e132c22a94f7d9cd3a0921b38e33051..95d40770ecc25f9eb251eba38
> 306dd43cbebfb3f 100644
> --- a/gcc/config/arm/mve.md
> +++ b/gcc/config/arm/mve.md
> @@ -6880,6 +6880,21 @@ (define_expand
> "vcond_mask_<mode><MVE_vpred>"
>    DONE;
>  })
> 
> +(define_expand "cbranch<mode>4"
> +  [(set (pc) (if_then_else
> +           (match_operator 0 "expandable_comparison_operator"
> +            [(match_operand:MVE_7 1 "register_operand")
> +             (match_operand:MVE_7 2 "zero_operand")])
> +           (label_ref (match_operand 3 "" ""))
> +           (pc)))]
> +  "TARGET_HAVE_MVE"
> +{
> +  rtx val = gen_reg_rtx (SImode);
> +  emit_move_insn (val, gen_lowpart (SImode, operands[1]));
> +  emit_jump_insn (gen_cbranchsi4 (operands[0], val, const0_rtx, 
> operands[3]));
> +  DONE;
> +})
> +
>  ;; Reinterpret operand 1 in operand 0's mode, without changing its contents.
>  (define_expand "@arm_mve_reinterpret<mode>"
>    [(set (match_operand:MVE_vecs 0 "register_operand")
> diff --git a/gcc/testsuite/gcc.target/arm/mve/vect-early-break-cbranch.c
> b/gcc/testsuite/gcc.target/arm/mve/vect-early-break-cbranch.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..c3b8506dca0b2b044e6869a6
> c8259d663c1ff930
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/mve/vect-early-break-cbranch.c
> @@ -0,0 +1,117 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target arm_v8_1m_mve_ok } */
> +/* { dg-add-options arm_v8_1m_mve } */
> +/* { dg-options "-O3" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +#define N 640
> +int a[N] = {0};
> +int b[N] = {0};
> +
> +/*
> +** f1:
> +**   ...
> +**   vcmp.s32        gt, q[0-9]+, q[0-9]+
> +**   vmrs    r[0-9]+, p0     @ movhi
> +**   cbnz    r[0-9]+, \.L[0-9]+
> +**   ...
> +*/
> +void f1 ()
> +{
> +  for (int i = 0; i < N; i++)
> +    {
> +      b[i] += a[i];
> +      if (a[i] > 0)
> +     break;
> +    }
> +}
> +
> +/*
> +** f2:
> +**   ...
> +**   vcmp.s32        ge, q[0-9]+, q[0-9]+
> +**   vmrs    r[0-9]+, p0     @ movhi
> +**   cbnz    r[0-9]+, \.L[0-9]+
> +**   ...
> +*/
> +void f2 ()
> +{
> +  for (int i = 0; i < N; i++)
> +    {
> +      b[i] += a[i];
> +      if (a[i] >= 0)
> +     break;
> +    }
> +}
> +
> +/*
> +** f3:
> +**   ...
> +**   vcmp.i32        eq, q[0-9]+, q[0-9]+
> +**   vmrs    r[0-9]+, p0     @ movhi
> +**   cbnz    r[0-9]+, \.L[0-9]+
> +**   ...
> +*/
> +void f3 ()
> +{
> +  for (int i = 0; i < N; i++)
> +    {
> +      b[i] += a[i];
> +      if (a[i] == 0)
> +     break;
> +    }
> +}
> +
> +/*
> +** f4:
> +**   ...
> +**   vcmp.i32        ne, q[0-9]+, q[0-9]+
> +**   vmrs    r[0-9]+, p0     @ movhi
> +**   cbnz    r[0-9]+, \.L[0-9]+
> +**   ...
> +*/
> +void f4 ()
> +{
> +  for (int i = 0; i < N; i++)
> +    {
> +      b[i] += a[i];
> +      if (a[i] != 0)
> +     break;
> +    }
> +}
> +
> +/*
> +** f5:
> +**   ...
> +**   vcmp.s32        lt, q[0-9]+, q[0-9]+
> +**   vmrs    r[0-9]+, p0     @ movhi
> +**   cbnz    r[0-9]+, \.L[0-9]+
> +**   ...
> +*/
> +void f5 ()
> +{
> +  for (int i = 0; i < N; i++)
> +    {
> +      b[i] += a[i];
> +      if (a[i] < 0)
> +     break;
> +    }
> +}
> +
> +/*
> +** f6:
> +**   ...
> +**   vcmp.s32        le, q[0-9]+, q[0-9]+
> +**   vmrs    r[0-9]+, p0     @ movhi
> +**   cbnz    r[0-9]+, \.L[0-9]+
> +**   ...
> +*/
> +void f6 ()
> +{
> +  for (int i = 0; i < N; i++)
> +    {
> +      b[i] += a[i];
> +      if (a[i] <= 0)
> +     break;
> +    }
> +}
> diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-
> supports.exp
> index
> 8f58671e6cfd3546c6a98e40341fe31c6492594b..1eef764542a782786e27ed935a
> 06243e319ae3fc 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -3785,6 +3785,8 @@ proc check_effective_target_vect_early_break { } {
>        expr {
>       [istarget aarch64*-*-*]
>       || [check_effective_target_arm_neon_ok]
> +     || ([check_effective_target_arm_v8_1m_mve_fp_ok]
> +          && [check_effective_target_arm_little_endian])
>       }}]
>  }
>  # Return 1 if the target supports hardware vectorization of complex 
> additions of
> 
> 
> 
> 
> --

Reply via email to