https://gcc.gnu.org/g:1f7bb5c73d588dd5da92d8673d0583a8431a81c5

commit r16-6498-g1f7bb5c73d588dd5da92d8673d0583a8431a81c5
Author: Pan Li <[email protected]>
Date:   Sun Dec 28 16:33:27 2025 +0800

    Vect: Adjust depth_limit of vec_slp_has_scalar_use from 2 to 3
    
    The test case of RISC-V vx-6-u8.c is failed for the vaaddu.vx asm check
    when --param=gpr2vr-cost=2 recently.  After some investigation, it is
    failed to vectorize afte some middle-end changes.  The depth_limit is 2
    of the func vec_slp_has_scalar_use, and then return -1 by design.  Then the
    slp_insntance got 12 in size and we may see log similar as below:
    
    *_2 1 times vec_to_scalar costs 3 in epilogue
    *_2 1 times vec_to_scalar costs 3 in epilogue
    *_2 1 times vec_to_scalar costs 3 in epilogue
    *_2 1 times vec_to_scalar costs 3 in epilogue
    
      Vector cost: 18
       Scalar cost: 9
    
    And then cannot vectorize due to cost consideration.
    
    This PATCH would like to adjust the depth_limit to 3 suggested by
    Richard.
    
    gcc/ChangeLog:
    
            * tree-vect-slp.cc (vec_slp_has_scalar_use): Adjust the
            depth_limit from 2 to 3.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/riscv/rvv/autovec/sat_add-cost-1.c: New test.
    
    Signed-off-by: Pan Li <[email protected]>

Diff:
---
 .../gcc.target/riscv/rvv/autovec/sat_add-cost-1.c  | 59 ++++++++++++++++++++++
 gcc/tree-vect-slp.cc                               |  2 +-
 2 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/sat_add-cost-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/sat_add-cost-1.c
new file mode 100644
index 000000000000..6f9ef08d3c2a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/sat_add-cost-1.c
@@ -0,0 +1,59 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl128b -mabi=lp64d --param=gpr2vr-cost=2 
-fdump-tree-optimized" } */
+
+#include <stdint.h>
+
+#define T uint8_t
+
+T
+test_sat_add (T a, T b)
+{
+  return (a + b) | (-(T)((T)(a + b) < a));
+}
+
+void
+test_sat_add_cost_1 (T * restrict out, T * restrict in,
+                    T x, unsigned n)
+{
+  unsigned k = 0;
+  T tmp = x + 3;
+
+  while (k < n)
+    {
+      tmp = tmp ^ 0x82;
+
+      out[k + 0] = test_sat_add (in[k + 0], tmp);
+      out[k + 1] = test_sat_add (in[k + 1], tmp);
+      k += 2;
+
+      out[k + 0] = test_sat_add (in[k + 0], tmp);
+      out[k + 1] = test_sat_add (in[k + 1], tmp);
+      k += 2;
+
+      out[k + 0] = test_sat_add (in[k + 0], tmp);
+      out[k + 1] = test_sat_add (in[k + 1], tmp);
+      k += 2;
+
+      out[k + 0] = test_sat_add (in[k + 0], tmp);
+      out[k + 1] = test_sat_add (in[k + 1], tmp);
+      k += 2;
+
+      out[k + 0] = test_sat_add (in[k + 0], tmp);
+      out[k + 1] = test_sat_add (in[k + 1], tmp);
+      k += 2;
+
+      out[k + 0] = test_sat_add (in[k + 0], tmp);
+      out[k + 1] = test_sat_add (in[k + 1], tmp);
+      k += 2;
+
+      out[k + 0] = test_sat_add (in[k + 0], tmp);
+      out[k + 1] = test_sat_add (in[k + 1], tmp);
+      k += 2;
+
+      out[k + 0] = test_sat_add (in[k + 0], tmp);
+      out[k + 1] = test_sat_add (in[k + 1], tmp);
+      k += 2;
+    }
+}
+
+/* { dg-final { scan-tree-dump ".SAT_ADD " "optimized" } } */
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index c7b71342ffc4..4e01adf4cd30 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -8888,7 +8888,7 @@ vec_slp_has_scalar_use (bb_vec_info bb_vinfo, tree def,
                        hash_map<tree, int> &scalar_use_map,
                        int depth = 0)
 {
-  const int depth_limit = 2;
+  const int depth_limit = 3;
   imm_use_iterator use_iter;
   gimple *use_stmt;

Reply via email to