This patch fixes the regression between GCC 13.2.0 and trunk GCC (GCC-14) GCC 13.2.0:
lui a5,%hi(a) li a4,19 sb a4,%lo(a)(a5) li a0,0 ret Trunk GCC: vsetvli a5,zero,e8,mf2,ta,ma li a4,-32768 vid.v v1 vsetvli zero,zero,e16,m1,ta,ma addiw a4,a4,104 vmv.v.i v3,15 lui a1,%hi(a) li a0,19 vsetvli zero,zero,e8,mf2,ta,ma vadd.vi v1,v1,1 sb a0,%lo(a)(a1) vsetvli zero,zero,e16,m1,ta,ma vzext.vf2 v2,v1 vmv.v.x v1,a4 vminu.vv v2,v2,v3 vsrl.vv v1,v1,v2 vslidedown.vi v1,v1,17 vmv.x.s a0,v1 snez a0,a0 ret The root cause we are vectorizing the codes inefficiently since we doesn't cost len when NITERS < VF. Leverage loop control of mask targets or rs6000 fixes the regression. Tested no regression. Ok for trunk ? PR target/113281 gcc/ChangeLog: * config/riscv/riscv-vector-costs.cc (costs::adjust_vect_cost_per_loop): New function. (costs::finish_cost): Adjust cost * config/riscv/riscv-vector-costs.h: New function. gcc/testsuite/ChangeLog: * gcc.dg/vect/costmodel/riscv/rvv/pr113281-3.c: New test. * gcc.dg/vect/costmodel/riscv/rvv/pr113281-4.c: New test. --- gcc/config/riscv/riscv-vector-costs.cc | 61 +++++++++++++++++++ gcc/config/riscv/riscv-vector-costs.h | 2 + .../vect/costmodel/riscv/rvv/pr113281-3.c | 18 ++++++ .../vect/costmodel/riscv/rvv/pr113281-4.c | 18 ++++++ 4 files changed, 99 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-3.c create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-4.c diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc index 1c3708f23a0..9c0b9a874de 100644 --- a/gcc/config/riscv/riscv-vector-costs.cc +++ b/gcc/config/riscv/riscv-vector-costs.cc @@ -1110,9 +1110,70 @@ costs::add_stmt_cost (int count, vect_cost_for_stmt kind, return record_stmt_cost (stmt_info, where, count * stmt_cost); } +/* For some target specific vectorization cost which can't be handled per stmt, + we check the requisite conditions and adjust the vectorization cost + accordingly if satisfied. One typical example is to model model and adjust + loop_len cost for known_lt (NITERS, VF). */ + +void +costs::adjust_vect_cost_per_loop (loop_vec_info loop_vinfo) +{ + if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo) + && !LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo) + && m_num_vector_iterations == 1 + && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) + && known_le (LOOP_VINFO_INT_NITERS (loop_vinfo), + LOOP_VINFO_VECT_FACTOR (loop_vinfo))) + { + /* In middle-end loop vectorizer, we don't count the loop_len cost in + vect_estimate_min_profitable_iters when NITERS < VF, that is, we only + count cost of len that we need to iterate loop more than once with VF + (m_num_vector_iterations > 1). It's correct for most of the cases: + + E.g. VF = [4, 4] + for (int i = 0; i < 3; i ++) + a[i] += b[i]; + + We don't need to cost MIN_EXPR or SELECT_VL for the case above. + + However, for some inefficient vectorized cases, it does use MIN_EXPR + to generate len. + + E.g. VF = [256, 256] + + Loop body: + # loop_len_110 = PHI <18(2), _119(11)> + ... + _117 = MIN_EXPR <ivtmp_114, 18>; + _118 = 18 - _117; + _119 = MIN_EXPR <_118, POLY_INT_CST [256, 256]>; + ... + + Epilogue: + ... + _112 = .VEC_EXTRACT (vect_patt_27.14_109, _111); + + We cost 1 unconditionally for this situation like other targets which + apply mask as the loop control. */ + rgroup_controls *rgc; + unsigned int num_vectors_m1; + unsigned int body_stmts = 0; + FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), num_vectors_m1, rgc) + if (rgc->type) + body_stmts += num_vectors_m1 + 1; + + add_stmt_cost (body_stmts, scalar_stmt, NULL, NULL, NULL_TREE, 0, + vect_body); + } +} + void costs::finish_cost (const vector_costs *scalar_costs) { + if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo)) + { + adjust_vect_cost_per_loop (loop_vinfo); + } vector_costs::finish_cost (scalar_costs); } diff --git a/gcc/config/riscv/riscv-vector-costs.h b/gcc/config/riscv/riscv-vector-costs.h index 9bf041bb65c..3defd45fd4c 100644 --- a/gcc/config/riscv/riscv-vector-costs.h +++ b/gcc/config/riscv/riscv-vector-costs.h @@ -101,6 +101,8 @@ private: V_REGS spills according to the analysis. */ bool m_has_unexpected_spills_p = false; void record_potential_unexpected_spills (loop_vec_info); + + void adjust_vect_cost_per_loop (loop_vec_info); }; } // namespace riscv_vector diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-3.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-3.c new file mode 100644 index 00000000000..706e19116c9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-3.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvl4096b -mabi=lp64d -O3 -ftree-vectorize --param=riscv-autovec-lmul=m8" } */ + +unsigned char a; + +int main() { + short b = a = 0; + for (; a != 19; a++) + if (a) + b = 32872 >> a; + + if (b == 0) + return 0; + else + return 1; +} + +/* { dg-final { scan-assembler-not {vset} } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-4.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-4.c new file mode 100644 index 00000000000..b0305db2d48 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-4.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvl4096b -mabi=lp64d -O3 -ftree-vectorize --param=riscv-autovec-lmul=m8 --param=riscv-autovec-preference=fixed-vlmax" } */ + +unsigned char a; + +int main() { + short b = a = 0; + for (; a != 19; a++) + if (a) + b = 32872 >> a; + + if (b == 0) + return 0; + else + return 1; +} + +/* { dg-final { scan-assembler-not {vset} } } */ -- 2.36.3