[gcc r15-47] RISC-V: Refine the condition for add additional vars in RVV cost model

2024-04-29 Thread Demin Han via Gcc-cvs
https://gcc.gnu.org/g:ca2f531cc5db4f1020d4329976610356033e0246

commit r15-47-gca2f531cc5db4f1020d4329976610356033e0246
Author: demin.han 
Date:   Tue Mar 26 16:52:12 2024 +0800

RISC-V: Refine the condition for add additional vars in RVV cost model

The adjacent_dr_p is sufficient and unnecessary condition for contiguous 
access.
So unnecessary live-ranges are added and result in smaller LMUL.

This patch uses MEMORY_ACCESS_TYPE as condition and constrains segment
load/store.

Tested on RV64 and no regression.

PR target/114506

gcc/ChangeLog:

* config/riscv/riscv-vector-costs.cc 
(non_contiguous_memory_access_p): Rename
(need_additional_vector_vars_p): Rename and refine condition

gcc/testsuite/ChangeLog:

* gcc.dg/vect/costmodel/riscv/rvv/pr114506.c: New test.

Signed-off-by: demin.han 

Diff:
---
 gcc/config/riscv/riscv-vector-costs.cc | 23 ++
 .../gcc.dg/vect/costmodel/riscv/rvv/pr114506.c | 23 ++
 2 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/gcc/config/riscv/riscv-vector-costs.cc 
b/gcc/config/riscv/riscv-vector-costs.cc
index d27bb68a7b9..4582b0db425 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -563,14 +563,24 @@ get_store_value (gimple *stmt)
 return gimple_assign_rhs1 (stmt);
 }
 
-/* Return true if it is non-contiguous load/store.  */
+/* Return true if addtional vector vars needed.  */
 static bool
-non_contiguous_memory_access_p (stmt_vec_info stmt_info)
+need_additional_vector_vars_p (stmt_vec_info stmt_info)
 {
   enum stmt_vec_info_type type
 = STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info));
-  return ((type == load_vec_info_type || type == store_vec_info_type)
- && !adjacent_dr_p (STMT_VINFO_DATA_REF (stmt_info)));
+  if (type == load_vec_info_type || type == store_vec_info_type)
+{
+  if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
+ && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER)
+   return true;
+
+  machine_mode mode = TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info));
+  int lmul = riscv_get_v_regno_alignment (mode);
+  if (DR_GROUP_SIZE (stmt_info) * lmul > RVV_M8)
+   return true;
+}
+  return false;
 }
 
 /* Return the LMUL of the current analysis.  */
@@ -739,10 +749,7 @@ update_local_live_ranges (
  stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
  enum stmt_vec_info_type type
= STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info));
- if (non_contiguous_memory_access_p (stmt_info)
- /* LOAD_LANES/STORE_LANES doesn't need a perm indice.  */
- && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info)
-  != VMAT_LOAD_STORE_LANES)
+ if (need_additional_vector_vars_p (stmt_info))
{
  /* For non-adjacent load/store STMT, we will potentially
 convert it into:
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr114506.c 
b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr114506.c
new file mode 100644
index 000..a88d24b2d2d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr114506.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-mrvv-max-lmul=dynamic -fdump-tree-vect-details" } */
+
+float a[32000], b[32000], c[32000], d[32000];
+float aa[256][256], bb[256][256], cc[256][256];
+
+void
+s2275 ()
+{
+  for (int i = 0; i < 256; i++)
+{
+  for (int j = 0; j < 256; j++)
+   {
+ aa[j][i] = aa[j][i] + bb[j][i] * cc[j][i];
+   }
+  a[i] = b[i] + c[i] * d[i];
+}
+}
+
+/* { dg-final { scan-assembler-times {e32,m8} 1 } } */
+/* { dg-final { scan-assembler-not {e32,m4} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "Preferring smaller LMUL loop because it 
has unexpected spills" "vect" } } */


[gcc r14-9826] RISC-V: Minor fix for max_point

2024-04-08 Thread Demin Han via Gcc-cvs
https://gcc.gnu.org/g:aa2ab7b79a87c25d113752401a6026c6823dfe57

commit r14-9826-gaa2ab7b79a87c25d113752401a6026c6823dfe57
Author: demin.han 
Date:   Mon Apr 1 16:20:46 2024 +0800

RISC-V: Minor fix for max_point

The program points start from 1, so max_point should be equal to
length().

Tested on RV64 and no regression.

gcc/ChangeLog:

* config/riscv/riscv-vector-costs.cc: Use length()

Signed-off-by: demin.han 

Diff:
---
 gcc/config/riscv/riscv-vector-costs.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv-vector-costs.cc 
b/gcc/config/riscv/riscv-vector-costs.cc
index f462c272a6e..5ceb313c118 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -752,7 +752,7 @@ update_local_live_ranges (
 
We will be likely using one more vector variable.  */
  unsigned int max_point
-   = (*program_points_per_bb.get (bb)).length () - 1;
+   = (*program_points_per_bb.get (bb)).length ();
  auto *live_ranges = live_ranges_per_bb.get (bb);
  bool existed_p = false;
  tree var = type == load_vec_info_type


[gcc r14-9380] RISC-V: Fix ICE in riscv vector costs

2024-03-07 Thread Demin Han via Gcc-cvs
https://gcc.gnu.org/g:42187c6968af9907de1f9b7563d87de739857684

commit r14-9380-g42187c6968af9907de1f9b7563d87de739857684
Author: demin.han 
Date:   Wed Mar 6 17:34:34 2024 +0800

RISC-V: Fix ICE in riscv vector costs

The following code can result in ICE:
-march=rv64gcv --param riscv-autovec-lmul=dynamic -O3

char *jpeg_difference7_input_buf;
void jpeg_difference7(int *diff_buf) {
  unsigned width;
  int samp, Rb;
  while (--width) {
Rb = samp = *jpeg_difference7_input_buf;
*diff_buf++ = -(int)(samp + (long)Rb >> 1);
  }
}

One biggest_mode update missed in one branch and trigger assertion fail.
gcc_assert (biggest_size >= mode_size);

Tested On RV64 and no regression.

PR target/114264

gcc/ChangeLog:

* config/riscv/riscv-vector-costs.cc: Fix ICE

gcc/testsuite/ChangeLog:

* gcc.dg/vect/costmodel/riscv/rvv/pr114264.c: New test.

Signed-off-by: demin.han 

Diff:
---
 gcc/config/riscv/riscv-vector-costs.cc   |  2 ++
 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr114264.c | 15 +++
 2 files changed, 17 insertions(+)

diff --git a/gcc/config/riscv/riscv-vector-costs.cc 
b/gcc/config/riscv/riscv-vector-costs.cc
index adf9c197df5..5ac8655b4d8 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -414,6 +414,8 @@ compute_local_live_ranges (
  auto *r = get_live_range (live_ranges, arg);
  gcc_assert (r);
  (*r).second = MAX (point, (*r).second);
+ biggest_mode = get_biggest_mode (
+   biggest_mode, TYPE_MODE (TREE_TYPE (arg)));
}
}
  else
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr114264.c 
b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr114264.c
new file mode 100644
index 000..7853f292af7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr114264.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
--param=riscv-autovec-lmul=dynamic" } */
+
+char *jpeg_difference7_input_buf;
+void
+jpeg_difference7 (int *diff_buf)
+{
+  unsigned width;
+  int samp, Rb;
+  while (--width)
+{
+  Rb = samp = *jpeg_difference7_input_buf;
+  *diff_buf++ = -(int) (samp + (long) Rb >> 1);
+}
+}


[gcc r14-9349] RISC-V: Refactor expand_vec_cmp [NFC]

2024-03-06 Thread Demin Han via Gcc-cvs
https://gcc.gnu.org/g:1cd8254ebad7b73993d2acee80a7caf37c21878a

commit r14-9349-g1cd8254ebad7b73993d2acee80a7caf37c21878a
Author: demin.han 
Date:   Mon Feb 26 14:50:15 2024 +0800

RISC-V: Refactor expand_vec_cmp [NFC]

There are two expand_vec_cmp functions.
They have same structure and similar code.
We can use default arguments instead of overloading.

Tested on RV32 and RV64.

gcc/ChangeLog:

* config/riscv/riscv-protos.h (expand_vec_cmp): Change proto
* config/riscv/riscv-v.cc (expand_vec_cmp): Use default arguments
(expand_vec_cmp_float): Adapt arguments

Signed-off-by: demin.han 

Diff:
---
 gcc/config/riscv/riscv-protos.h |  2 +-
 gcc/config/riscv/riscv-v.cc | 44 +
 2 files changed, 15 insertions(+), 31 deletions(-)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 80efdf2b7e5..b8735593805 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -603,7 +603,7 @@ bool simm5_p (rtx);
 bool neg_simm5_p (rtx);
 #ifdef RTX_CODE
 bool has_vi_variant_p (rtx_code, rtx);
-void expand_vec_cmp (rtx, rtx_code, rtx, rtx);
+void expand_vec_cmp (rtx, rtx_code, rtx, rtx, rtx = nullptr, rtx = nullptr);
 bool expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
 void expand_cond_len_unop (unsigned, rtx *);
 void expand_cond_len_binop (unsigned, rtx *);
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 2d32db06dd1..967f4e38287 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -2775,7 +2775,8 @@ vectorize_related_mode (machine_mode vector_mode, 
scalar_mode element_mode,
 /* Expand an RVV comparison.  */
 
 void
-expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx op1)
+expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx op1, rtx mask,
+   rtx maskoff)
 {
   machine_mode mask_mode = GET_MODE (target);
   machine_mode data_mode = GET_MODE (op0);
@@ -2785,8 +2786,8 @@ expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx 
op1)
 {
   rtx lt = gen_reg_rtx (mask_mode);
   rtx gt = gen_reg_rtx (mask_mode);
-  expand_vec_cmp (lt, LT, op0, op1);
-  expand_vec_cmp (gt, GT, op0, op1);
+  expand_vec_cmp (lt, LT, op0, op1, mask, maskoff);
+  expand_vec_cmp (gt, GT, op0, op1, mask, maskoff);
   icode = code_for_pred (IOR, mask_mode);
   rtx ops[] = {target, lt, gt};
   emit_vlmax_insn (icode, BINARY_MASK_OP, ops);
@@ -2794,33 +2795,16 @@ expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx 
op1)
 }
 
   rtx cmp = gen_rtx_fmt_ee (code, mask_mode, op0, op1);
-  rtx ops[] = {target, cmp, op0, op1};
-  emit_vlmax_insn (icode, COMPARE_OP, ops);
-}
-
-void
-expand_vec_cmp (rtx target, rtx_code code, rtx mask, rtx maskoff, rtx op0,
-   rtx op1)
-{
-  machine_mode mask_mode = GET_MODE (target);
-  machine_mode data_mode = GET_MODE (op0);
-  insn_code icode = get_cmp_insn_code (code, data_mode);
-
-  if (code == LTGT)
+  if (!mask && !maskoff)
 {
-  rtx lt = gen_reg_rtx (mask_mode);
-  rtx gt = gen_reg_rtx (mask_mode);
-  expand_vec_cmp (lt, LT, mask, maskoff, op0, op1);
-  expand_vec_cmp (gt, GT, mask, maskoff, op0, op1);
-  icode = code_for_pred (IOR, mask_mode);
-  rtx ops[] = {target, lt, gt};
-  emit_vlmax_insn (icode, BINARY_MASK_OP, ops);
-  return;
+  rtx ops[] = {target, cmp, op0, op1};
+  emit_vlmax_insn (icode, COMPARE_OP, ops);
+}
+  else
+{
+  rtx ops[] = {target, mask, maskoff, cmp, op0, op1};
+  emit_vlmax_insn (icode, COMPARE_OP_MU, ops);
 }
-
-  rtx cmp = gen_rtx_fmt_ee (code, mask_mode, op0, op1);
-  rtx ops[] = {target, mask, maskoff, cmp, op0, op1};
-  emit_vlmax_insn (icode, COMPARE_OP_MU, ops);
 }
 
 /* Expand an RVV floating-point comparison:
@@ -2898,7 +2882,7 @@ expand_vec_cmp_float (rtx target, rtx_code code, rtx op0, 
rtx op1,
   else
{
  /* vmfeq.vvv0, vb, vb, v0.t  */
- expand_vec_cmp (eq0, EQ, eq0, eq0, op1, op1);
+ expand_vec_cmp (eq0, EQ, op1, op1, eq0, eq0);
}
   break;
 default:
@@ -2916,7 +2900,7 @@ expand_vec_cmp_float (rtx target, rtx_code code, rtx op0, 
rtx op1,
   if (code == ORDERED)
 emit_move_insn (target, eq0);
   else
-expand_vec_cmp (eq0, code, eq0, eq0, op0, op1);
+expand_vec_cmp (eq0, code, op0, op1, eq0, eq0);
 
   if (can_invert_p)
 {