This patch adjusts the cost handling on VMAT_ELEMENTWISE
and VMAT_STRIDED_SLP in function vectorizable_load. We
don't call function vect_model_load_cost for them any more.
As PR82255 shows, we don't always need a vector construction
there, moving costing next to the transform can make us only
cost for vector construction when it's actually needed.
Besides, it can count the number of loads consistently for
some cases.
PR tree-optimization/82255
gcc/ChangeLog:
* tree-vect-stmts.cc (vectorizable_load): Adjust the cost handling
on VMAT_ELEMENTWISE and VMAT_STRIDED_SLP without calling
vect_model_load_cost.
(vect_model_load_cost): Assert it won't get VMAT_ELEMENTWISE and
VMAT_STRIDED_SLP any more, and remove their related handlings.
gcc/testsuite/ChangeLog:
* gcc.dg/vect/costmodel/ppc/costmodel-pr82255.c: New test.
2023-06-13 Bill Schmidt
Kewen Lin
---
.../vect/costmodel/ppc/costmodel-pr82255.c| 31
gcc/tree-vect-stmts.cc| 170 +++---
2 files changed, 134 insertions(+), 67 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-pr82255.c
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-pr82255.c
b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-pr82255.c
new file mode 100644
index 000..9317ee2e15b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-pr82255.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+
+/* PR82255: Ensure we don't require a vec_construct cost when we aren't
+ going to generate a strided load. */
+
+extern int abs (int __x) __attribute__ ((__nothrow__, __leaf__))
+__attribute__ ((__const__));
+
+static int
+foo (unsigned char *w, int i, unsigned char *x, int j)
+{
+ int tot = 0;
+ for (int a = 0; a < 16; a++)
+{
+#pragma GCC unroll 16
+ for (int b = 0; b < 16; b++)
+ tot += abs (w[b] - x[b]);
+ w += i;
+ x += j;
+}
+ return tot;
+}
+
+void
+bar (unsigned char *w, unsigned char *x, int i, int *result)
+{
+ *result = foo (w, 16, x, i);
+}
+
+/* { dg-final { scan-tree-dump-times "vec_construct" 0 "vect" } } */
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 19c61d703c8..651dc800380 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -1136,7 +1136,9 @@ vect_model_load_cost (vec_info *vinfo,
stmt_vector_for_cost *cost_vec)
{
gcc_assert ((memory_access_type != VMAT_GATHER_SCATTER || !gs_info->decl)
- && memory_access_type != VMAT_INVARIANT);
+ && memory_access_type != VMAT_INVARIANT
+ && memory_access_type != VMAT_ELEMENTWISE
+ && memory_access_type != VMAT_STRIDED_SLP);
unsigned int inside_cost = 0, prologue_cost = 0;
bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
@@ -1221,8 +1223,7 @@ vect_model_load_cost (vec_info *vinfo,
}
/* The loads themselves. */
- if (memory_access_type == VMAT_ELEMENTWISE
- || memory_access_type == VMAT_GATHER_SCATTER)
+ if (memory_access_type == VMAT_GATHER_SCATTER)
{
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
@@ -1244,10 +1245,10 @@ vect_model_load_cost (vec_info *vinfo,
alignment_support_scheme, misalignment, first_stmt_p,
_cost, _cost,
cost_vec, cost_vec, true);
- if (memory_access_type == VMAT_ELEMENTWISE
- || memory_access_type == VMAT_STRIDED_SLP
- || (memory_access_type == VMAT_GATHER_SCATTER
- && gs_info->ifn == IFN_LAST && !gs_info->decl))
+
+ if (memory_access_type == VMAT_GATHER_SCATTER
+ && gs_info->ifn == IFN_LAST
+ && !gs_info->decl)
inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
stmt_info, 0, vect_body);
@@ -9591,14 +9592,6 @@ vectorizable_load (vec_info *vinfo,
if (memory_access_type == VMAT_ELEMENTWISE
|| memory_access_type == VMAT_STRIDED_SLP)
{
- if (costing_p)
- {
- vect_model_load_cost (vinfo, stmt_info, ncopies, vf,
- memory_access_type, alignment_support_scheme,
- misalignment, _info, slp_node, cost_vec);
- return true;
- }
-
gimple_stmt_iterator incr_gsi;
bool insert_after;
tree offvar;
@@ -9610,6 +9603,7 @@ vectorizable_load (vec_info *vinfo,
unsigned int const_nunits = nunits.to_constant ();
unsigned HOST_WIDE_INT cst_offset = 0;
tree dr_offset;
+ unsigned int inside_cost = 0;
gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
gcc_assert (!nested_in_vect_loop);
@@ -9624,6 +9618,7 @@ vectorizable_load (vec_info *vinfo,
first_stmt_info = stmt_info;
first_dr_info = dr_info;