[PATCH] RISC-V: Adjust loop len by costing 1 when NITER < VF [GCC 14 regression]

Juzhe-Zhong Fri, 12 Jan 2024 17:51:53 -0800

This patch fixes the regression between GCC 13.2.0 and trunk GCC (GCC-14)

GCC 13.2.0:


        lui     a5,%hi(a)
        li      a4,19
        sb      a4,%lo(a)(a5)
        li      a0,0
        ret

Trunk GCC:

        vsetvli a5,zero,e8,mf2,ta,ma
        li      a4,-32768
        vid.v   v1
        vsetvli zero,zero,e16,m1,ta,ma
        addiw   a4,a4,104
        vmv.v.i v3,15
        lui     a1,%hi(a)
        li      a0,19
        vsetvli zero,zero,e8,mf2,ta,ma
        vadd.vi v1,v1,1
        sb      a0,%lo(a)(a1)
        vsetvli zero,zero,e16,m1,ta,ma
        vzext.vf2       v2,v1
        vmv.v.x v1,a4
        vminu.vv        v2,v2,v3
        vsrl.vv v1,v1,v2
        vslidedown.vi   v1,v1,17
        vmv.x.s a0,v1
        snez    a0,a0
        ret

The root cause we are vectorizing the codes inefficiently since we doesn't cost 
len when NITERS < VF.
Leverage loop control of mask targets or rs6000 fixes the regression.

Tested no regression. Ok for trunk ?

        PR target/113281

gcc/ChangeLog:

        * config/riscv/riscv-vector-costs.cc 
(costs::adjust_vect_cost_per_loop): New function.
        (costs::finish_cost): Adjust cost
        * config/riscv/riscv-vector-costs.h: New function.

gcc/testsuite/ChangeLog:

        * gcc.dg/vect/costmodel/riscv/rvv/pr113281-3.c: New test.
        * gcc.dg/vect/costmodel/riscv/rvv/pr113281-4.c: New test.

---
 gcc/config/riscv/riscv-vector-costs.cc        | 61 +++++++++++++++++++
 gcc/config/riscv/riscv-vector-costs.h         |  2 +
 .../vect/costmodel/riscv/rvv/pr113281-3.c     | 18 ++++++
 .../vect/costmodel/riscv/rvv/pr113281-4.c     | 18 ++++++
 4 files changed, 99 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-3.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-4.c

diff --git a/gcc/config/riscv/riscv-vector-costs.cc 
b/gcc/config/riscv/riscv-vector-costs.cc
index 1c3708f23a0..9c0b9a874de 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -1110,9 +1110,70 @@ costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
   return record_stmt_cost (stmt_info, where, count * stmt_cost);
 }
 
+/* For some target specific vectorization cost which can't be handled per stmt,
+   we check the requisite conditions and adjust the vectorization cost
+   accordingly if satisfied.  One typical example is to model model and adjust
+   loop_len cost for known_lt (NITERS, VF).  */
+
+void
+costs::adjust_vect_cost_per_loop (loop_vec_info loop_vinfo)
+{
+  if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
+      && !LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
+      && m_num_vector_iterations == 1
+      && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+      && known_le (LOOP_VINFO_INT_NITERS (loop_vinfo),
+                  LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
+    {
+      /* In middle-end loop vectorizer, we don't count the loop_len cost in
+        vect_estimate_min_profitable_iters when NITERS < VF, that is, we only
+        count cost of len that we need to iterate loop more than once with VF
+        (m_num_vector_iterations > 1).  It's correct for most of the cases:
+
+        E.g. VF = [4, 4]
+          for (int i = 0; i < 3; i ++)
+            a[i] += b[i];
+
+        We don't need to cost MIN_EXPR or SELECT_VL for the case above.
+
+        However, for some inefficient vectorized cases, it does use MIN_EXPR
+        to generate len.
+
+        E.g. VF = [256, 256]
+
+        Loop body:
+          # loop_len_110 = PHI <18(2), _119(11)>
+          ...
+          _117 = MIN_EXPR <ivtmp_114, 18>;
+          _118 = 18 - _117;
+          _119 = MIN_EXPR <_118, POLY_INT_CST [256, 256]>;
+          ...
+
+        Epilogue:
+          ...
+          _112 = .VEC_EXTRACT (vect_patt_27.14_109, _111);
+
+        We cost 1 unconditionally for this situation like other targets which
+        apply mask as the loop control.  */
+      rgroup_controls *rgc;
+      unsigned int num_vectors_m1;
+      unsigned int body_stmts = 0;
+      FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), num_vectors_m1, rgc)
+       if (rgc->type)
+         body_stmts += num_vectors_m1 + 1;
+
+      add_stmt_cost (body_stmts, scalar_stmt, NULL, NULL, NULL_TREE, 0,
+                    vect_body);
+    }
+}
+
 void
 costs::finish_cost (const vector_costs *scalar_costs)
 {
+  if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo))
+    {
+      adjust_vect_cost_per_loop (loop_vinfo);
+    }
   vector_costs::finish_cost (scalar_costs);
 }
 
diff --git a/gcc/config/riscv/riscv-vector-costs.h 
b/gcc/config/riscv/riscv-vector-costs.h
index 9bf041bb65c..3defd45fd4c 100644
--- a/gcc/config/riscv/riscv-vector-costs.h
+++ b/gcc/config/riscv/riscv-vector-costs.h
@@ -101,6 +101,8 @@ private:
      V_REGS spills according to the analysis.  */
   bool m_has_unexpected_spills_p = false;
   void record_potential_unexpected_spills (loop_vec_info);
+
+  void adjust_vect_cost_per_loop (loop_vec_info);
 };
 
 } // namespace riscv_vector
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-3.c 
b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-3.c
new file mode 100644
index 00000000000..706e19116c9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-3.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl4096b -mabi=lp64d -O3 -ftree-vectorize 
--param=riscv-autovec-lmul=m8" } */
+
+unsigned char a;
+
+int main() {
+  short b = a = 0;
+  for (; a != 19; a++)
+    if (a)
+      b = 32872 >> a;
+
+  if (b == 0)
+    return 0;
+  else
+    return 1;
+}
+
+/* { dg-final { scan-assembler-not {vset} } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-4.c 
b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-4.c
new file mode 100644
index 00000000000..b0305db2d48
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-4.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl4096b -mabi=lp64d -O3 -ftree-vectorize 
--param=riscv-autovec-lmul=m8 --param=riscv-autovec-preference=fixed-vlmax" } */
+
+unsigned char a;
+
+int main() {
+  short b = a = 0;
+  for (; a != 19; a++)
+    if (a)
+      b = 32872 >> a;
+
+  if (b == 0)
+    return 0;
+  else
+    return 1;
+}
+
+/* { dg-final { scan-assembler-not {vset} } } */
-- 
2.36.3

[PATCH] RISC-V: Adjust loop len by costing 1 when NITER < VF [GCC 14 regression]

Reply via email to