On 12/28/23 18:21, Juzhe-Zhong wrote:
This patch fixes the following choosing unexpected big LMUL which cause 
register spillings.

Before this patch, choosing LMUL = 4:

        addi    sp,sp,-160
        addiw   t1,a2,-1
        li      a5,7
        bleu    t1,a5,.L16
        vsetivli        zero,8,e64,m4,ta,ma
        vmv.v.x v4,a0
        vs4r.v  v4,0(sp)                        ---> spill to the stack.
        vmv.v.x v4,a1
        addi    a5,sp,64
        vs4r.v  v4,0(a5)                        ---> spill to the stack.

The root cause is the following codes:

                   if (poly_int_tree_p (var)
                       || (is_gimple_val (var)
                          && !POINTER_TYPE_P (TREE_TYPE (var))))

We count the variable as consuming a RVV reg group when it is not POINTER_TYPE.

It is right for load/store STMT for example:

_1 = (MEM)*addr -->  addr won't be allocated an RVV vector group.

However, we find it is not right for non-load/store STMT:

_3 = _1 == x_8(D);

_1 is pointer type too but we does allocate a RVV register group for it.

So after this patch, we are choosing the perfect LMUL for the testcase in this 
patch:

        ble     a2,zero,.L17
        addiw   a7,a2,-1
        li      a5,3
        bleu    a7,a5,.L15
        srliw   a5,a7,2
        slli    a6,a5,1
        add     a6,a6,a5
        lui     a5,%hi(replacements)
        addi    t1,a5,%lo(replacements)
        slli    a6,a6,5
        lui     t4,%hi(.LANCHOR0)
        lui     t3,%hi(.LANCHOR0+8)
        lui     a3,%hi(.LANCHOR0+16)
        lui     a4,%hi(.LC1)
        vsetivli        zero,4,e16,mf2,ta,ma
        addi    t4,t4,%lo(.LANCHOR0)
        addi    t3,t3,%lo(.LANCHOR0+8)
        addi    a3,a3,%lo(.LANCHOR0+16)
        addi    a4,a4,%lo(.LC1)
        add     a6,t1,a6
        addi    a5,a5,%lo(replacements)
        vle16.v v18,0(t4)
        vle16.v v17,0(t3)
        vle16.v v16,0(a3)
        vmsgeu.vi       v25,v18,4
        vadd.vi v24,v18,-4
        vmsgeu.vi       v23,v17,4
        vadd.vi v22,v17,-4
        vlm.v   v21,0(a4)
        vmsgeu.vi       v20,v16,4
        vadd.vi v19,v16,-4
        vsetvli zero,zero,e64,m2,ta,mu
        vmv.v.x v12,a0
        vmv.v.x v14,a1
.L4:
        vlseg3e64.v     v6,(a5)
        vmseq.vv        v2,v6,v12
        vmseq.vv        v0,v8,v12
        vmsne.vv        v1,v8,v12
        vmand.mm        v1,v1,v2
        vmerge.vvm      v2,v8,v14,v0
        vmv1r.v v0,v1
        addi    a4,a5,24
        vmerge.vvm      v6,v6,v14,v0
        vmerge.vim      v2,v2,0,v0
        vrgatherei16.vv v4,v6,v18
        vmv1r.v v0,v25
        vrgatherei16.vv v4,v2,v24,v0.t
        vs1r.v  v4,0(a5)
        addi    a3,a5,48
        vmv1r.v v0,v21
        vmv2r.v v4,v2
        vcompress.vm    v4,v6,v0
        vs1r.v  v4,0(a4)
        vmv1r.v v0,v23
        addi    a4,a5,72
        vrgatherei16.vv v4,v6,v17
        vrgatherei16.vv v4,v2,v22,v0.t
        vs1r.v  v4,0(a3)
        vmv1r.v v0,v20
        vrgatherei16.vv v4,v6,v16
        addi    a5,a5,96
        vrgatherei16.vv v4,v2,v19,v0.t
        vs1r.v  v4,0(a4)
        bne     a6,a5,.L4

No spillings, no "sp" register used.

Tested on both RV32 and RV64, no regression.

Ok for trunk ?

        PR target/113112

gcc/ChangeLog:

        * config/riscv/riscv-vector-costs.cc (compute_nregs_for_mode): Fix 
pointer type liveness count.

gcc/testsuite/ChangeLog:

        * gcc.dg/vect/costmodel/riscv/rvv/pr113112-4.c: New test.

---
  gcc/config/riscv/riscv-vector-costs.cc        | 12 ++++++--
  .../vect/costmodel/riscv/rvv/pr113112-4.c     | 28 +++++++++++++++++++
  2 files changed, 37 insertions(+), 3 deletions(-)
  create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113112-4.c

diff --git a/gcc/config/riscv/riscv-vector-costs.cc 
b/gcc/config/riscv/riscv-vector-costs.cc
index 0c485dc4f29..b41a79429d4 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -277,9 +277,12 @@ compute_local_live_ranges (
            {
              unsigned int point = program_point.point;
              gimple *stmt = program_point.stmt;
+             stmt_vec_info stmt_info = program_point.stmt_info;
              tree lhs = gimple_get_lhs (stmt);
              if (lhs != NULL_TREE && is_gimple_reg (lhs)
-                 && !POINTER_TYPE_P (TREE_TYPE (lhs)))
+                 && (!POINTER_TYPE_P (TREE_TYPE (lhs))
+                     || STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info))
+                          != store_vec_info_type))
                {
                  biggest_mode = get_biggest_mode (biggest_mode,
                                                   TYPE_MODE (TREE_TYPE (lhs)));
@@ -305,7 +308,10 @@ compute_local_live_ranges (
                     the future.  */
                  if (poly_int_tree_p (var)
                      || (is_gimple_val (var)
-                         && !POINTER_TYPE_P (TREE_TYPE (var))))
+                         && (!POINTER_TYPE_P (TREE_TYPE (var))
+                             || STMT_VINFO_TYPE (
+                                  vect_stmt_to_vectorize (stmt_info))
+                                  != load_vec_info_type)))
                    {
                      biggest_mode
                        = get_biggest_mode (biggest_mode,
Just a nit. Why not compute vect_stmt_to_vectorize (stmt_info) into a local to improve the bad line break? Or perhaps even compute STMT_VINFO_TYPE (...) into a local?

OK with or without a change for that nit.

jeff

Reply via email to