This patch fixes the following inefficient vectorized codes:

        vsetvli a5,zero,e8,mf2,ta,ma
        li      a2,17
        vid.v   v1
        li      a4,-32768
        vsetvli zero,zero,e16,m1,ta,ma
        addiw   a4,a4,104
        vmv.v.i v3,15
        lui     a1,%hi(a)
        li      a0,19
        vsetvli zero,zero,e8,mf2,ta,ma
        vadd.vx v1,v1,a2
        sb      a0,%lo(a)(a1)
        vsetvli zero,zero,e16,m1,ta,ma
        vzext.vf2       v2,v1
        vmv.v.x v1,a4
        vminu.vv        v2,v2,v3
        vsrl.vv v1,v1,v2
        vslidedown.vi   v1,v1,1
        vmv.x.s a0,v1
        snez    a0,a0
        ret

The reason is scalar_to_vec_cost is too low.

Consider in VEC_SET, we always have a slide + scalar move instruction,
scalar_to_vec_cost = 1 (current cost) is not reasonable.

I tried to set it as 2 but failed fix this case, that is, I need to
set it as 3 to fix this case.

No matter scalar move or slide instruction, I believe they are more costly
than normal vector instructions (e.g. vadd.vv). So set it as 3 looks reasonable
to me.

After this patch:

        lui     a5,%hi(a)
        li      a4,19
        sb      a4,%lo(a)(a5)
        li      a0,0
        ret

Tested on both RV32/RV64 no regression, Ok for trunk ?

        PR target/113281

gcc/ChangeLog:

        * config/riscv/riscv.cc: Set scalar_to_vec_cost as 3.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/autovec/pr113209.c: Adapt test.
        * gcc.dg/vect/costmodel/riscv/rvv/pr113281-1.c: New test.

---
 gcc/config/riscv/riscv.cc                      |  4 ++--
 .../vect/costmodel/riscv/rvv/pr113281-1.c      | 18 ++++++++++++++++++
 .../gcc.target/riscv/rvv/autovec/pr113209.c    |  2 +-
 3 files changed, 21 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-1.c

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index df9799d9c5e..bcfb3c15a39 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -366,7 +366,7 @@ static const common_vector_cost rvv_vls_vector_cost = {
   1, /* gather_load_cost  */
   1, /* scatter_store_cost  */
   1, /* vec_to_scalar_cost  */
-  1, /* scalar_to_vec_cost  */
+  3, /* scalar_to_vec_cost  */
   1, /* permute_cost  */
   1, /* align_load_cost  */
   1, /* align_store_cost  */
@@ -382,7 +382,7 @@ static const scalable_vector_cost rvv_vla_vector_cost = {
     1, /* gather_load_cost  */
     1, /* scatter_store_cost  */
     1, /* vec_to_scalar_cost  */
-    1, /* scalar_to_vec_cost  */
+    3, /* scalar_to_vec_cost  */
     1, /* permute_cost  */
     1, /* align_load_cost  */
     1, /* align_store_cost  */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-1.c 
b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-1.c
new file mode 100644
index 00000000000..331cf961a1f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64d -O3 -ftree-vectorize 
-fdump-tree-vect-details" } */
+
+unsigned char a;
+
+int main() {
+  short b = a = 0;
+  for (; a != 19; a++)
+    if (a)
+      b = 32872 >> a;
+
+  if (b == 0)
+    return 0;
+  else
+    return 1;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113209.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113209.c
index 081ee369394..70aae151000 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113209.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113209.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64d -O3" } */
+/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64d -O3 -fno-vect-cost-model" 
} */
 
 int b, c, d, f, i, a;
 int e[1] = {0};
-- 
2.36.3

Reply via email to