Hi All,
With the support to SLP only we now pass the VMAT through the SLP node, however
the majority of the costing calls inside vectorizable_load and
vectorizable_store do no pass the SLP node along. Due to this the backend
costing
never sees the VMAT for these cases anymore.
Additionally the helper around record_stmt_cost when both SLP and stmt_vinfo are
passed would only pass the SLP node along. However the SLP node doesn't contain
all the info available in the stmt_vinfo and we'd have to go through the
SLP_TREE_REPRESENTATIVE anyway. As such I changed the function to just Always
pass both along. Unlike the VMAT changes, I don't believe there to be a
correctness issue here but would minimize the number of churn in the backend
costing until vectorizer costing as a whole is revisited in GCC 16.
These changes re-enable the cost model on AArch64 and also correctly find the
VMATs on loads and stores fixing testcases such as sve_iters_low_2.c.
Bootstrapped Regtested on aarch64-none-linux-gnu,
arm-none-linux-gnueabihf, x86_64-pc-linux-gnu -m32, -m64 and no issues.
Ok for master?
Thanks,
Tamar
gcc/ChangeLog:
* tree-vect-data-refs.cc (vect_get_data_access_cost): Pass NULL for SLP
node.
* tree-vect-stmts.cc (record_stmt_cost): Expose.
(vect_get_store_cost, vect_get_load_cost): Extend with SLP node.
(vectorizable_store, vectorizable_load): Pass SLP node to all costing.
* tree-vectorizer.h (record_stmt_cost): Always pass both SLP node and
stmt_vinfo to costing.
(vect_get_load_cost, vect_get_store_cost): Extend with SLP node.
---
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index
3ea5fb883b1a5289195142171eb45fa422910a95..d87ca79b8e4c16d242e67431d1b527bdb8cb74e4
100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -1729,12 +1729,14 @@ vect_get_data_access_cost (vec_info *vinfo, dr_vec_info
*dr_info,
ncopies = vect_get_num_copies (loop_vinfo, STMT_VINFO_VECTYPE (stmt_info));
if (DR_IS_READ (dr_info->dr))
- vect_get_load_cost (vinfo, stmt_info, ncopies, alignment_support_scheme,
- misalignment, true, inside_cost,
- outside_cost, prologue_cost_vec, body_cost_vec, false);
+ vect_get_load_cost (vinfo, stmt_info, NULL, ncopies,
+ alignment_support_scheme, misalignment, true,
+ inside_cost, outside_cost, prologue_cost_vec,
+ body_cost_vec, false);
else
- vect_get_store_cost (vinfo,stmt_info, ncopies, alignment_support_scheme,
- misalignment, inside_cost, body_cost_vec);
+ vect_get_store_cost (vinfo,stmt_info, NULL, ncopies,
+ alignment_support_scheme, misalignment, inside_cost,
+ body_cost_vec);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index
7a92da00f7ddcfdf146fa1c2511f609e8bc40e9e..46543c15c00f00e5127d06446f58fce79951c3b0
100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -93,7 +93,7 @@ stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info
*stmt_info)
target model or by saving it in a vector for later processing.
Return a preliminary estimate of the statement's cost. */
-static unsigned
+unsigned
record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
enum vect_cost_for_stmt kind,
stmt_vec_info stmt_info, slp_tree node,
@@ -1008,8 +1008,8 @@ cfun_returns (tree decl)
/* Calculate cost of DR's memory access. */
void
-vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
- dr_alignment_support alignment_support_scheme,
+vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, slp_tree slp_node,
+ int ncopies, dr_alignment_support alignment_support_scheme,
int misalignment,
unsigned int *inside_cost,
stmt_vector_for_cost *body_cost_vec)
@@ -1019,7 +1019,7 @@ vect_get_store_cost (vec_info *, stmt_vec_info stmt_info,
int ncopies,
case dr_aligned:
{
*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
- vector_store, stmt_info, 0,
+ vector_store, stmt_info, slp_node, 0,
vect_body);
if (dump_enabled_p ())
@@ -1032,7 +1032,7 @@ vect_get_store_cost (vec_info *, stmt_vec_info stmt_info,
int ncopies,
{
/* Here, we assign an additional cost for the unaligned store. */
*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
- unaligned_store, stmt_info,
+ unaligned_store, stmt_info, slp_node,
misalignment, vect_body);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -1058,8 +1058,8 @@ vect_get_store_cost (vec_info *, stmt_vec_info stmt_info,
int ncopies,
/* Calculate cost of DR's memory access. */
void
-vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
- dr_alignment_support alignment_support_scheme,
+vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, slp_tree slp_node,
+ int ncopies, dr_alignment_support alignment_support_scheme,
int misalignment,
bool add_realign_cost, unsigned int *inside_cost,
unsigned int *prologue_cost,
@@ -1072,7 +1072,7 @@ vect_get_load_cost (vec_info *, stmt_vec_info stmt_info,
int ncopies,
case dr_aligned:
{
*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0, vect_body);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -1084,7 +1084,7 @@ vect_get_load_cost (vec_info *, stmt_vec_info stmt_info,
int ncopies,
{
/* Here, we assign an additional cost for the unaligned load. */
*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
- unaligned_load, stmt_info,
+ unaligned_load, stmt_info, slp_node,
misalignment, vect_body);
if (dump_enabled_p ())
@@ -1097,16 +1097,18 @@ vect_get_load_cost (vec_info *, stmt_vec_info
stmt_info, int ncopies,
case dr_explicit_realign:
{
*inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
- vector_load, stmt_info, 0, vect_body);
+ vector_load, stmt_info, slp_node, 0,
+ vect_body);
*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
- vec_perm, stmt_info, 0, vect_body);
+ vec_perm, stmt_info, slp_node, 0,
+ vect_body);
/* FIXME: If the misalignment remains fixed across the iterations of
the containing loop, the following cost should be added to the
prologue costs. */
if (targetm.vectorize.builtin_mask_for_load)
*inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0, vect_body);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -1132,17 +1134,17 @@ vect_get_load_cost (vec_info *, stmt_vec_info
stmt_info, int ncopies,
{
*prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
vector_stmt, stmt_info,
- 0, vect_prologue);
+ slp_node, 0, vect_prologue);
if (targetm.vectorize.builtin_mask_for_load)
*prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
vector_stmt, stmt_info,
- 0, vect_prologue);
+ slp_node, 0, vect_prologue);
}
*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0, vect_body);
*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0, vect_body);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -8551,7 +8553,7 @@ vectorizable_store (vec_info *vinfo,
if (vls_type == VLS_STORE_INVARIANT)
prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
stmt_info, 0, vect_prologue);
- vect_get_store_cost (vinfo, stmt_info, ncopies,
+ vect_get_store_cost (vinfo, stmt_info, slp_node, ncopies,
alignment_support_scheme, misalignment,
&inside_cost, cost_vec);
@@ -8623,7 +8625,7 @@ vectorizable_store (vec_info *vinfo,
else if (vls_type != VLS_STORE_INVARIANT)
return;
*prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info,
- 0, vect_prologue);
+ slp_node, 0, vect_prologue);
};
if (memory_access_type == VMAT_ELEMENTWISE
@@ -8890,7 +8892,7 @@ vectorizable_store (vec_info *vinfo,
if (costing_p)
{
if (n_adjacent_stores > 0)
- vect_get_store_cost (vinfo, stmt_info, n_adjacent_stores,
+ vect_get_store_cost (vinfo, stmt_info, slp_node, n_adjacent_stores,
alignment_support_scheme, misalignment,
&inside_cost, cost_vec);
if (dump_enabled_p ())
@@ -9202,7 +9204,7 @@ vectorizable_store (vec_info *vinfo,
if (costing_p)
{
if (n_adjacent_stores > 0)
- vect_get_store_cost (vinfo, stmt_info, n_adjacent_stores,
+ vect_get_store_cost (vinfo, stmt_info, slp_node, n_adjacent_stores,
alignment_support_scheme, misalignment,
&inside_cost, cost_vec);
if (dump_enabled_p ())
@@ -9227,7 +9229,8 @@ vectorizable_store (vec_info *vinfo,
{
if (costing_p && vls_type == VLS_STORE_INVARIANT)
prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
- stmt_info, 0, vect_prologue);
+ stmt_info, slp_node, 0,
+ vect_prologue);
else if (!costing_p)
{
/* Since the store is not grouped, DR_GROUP_SIZE is 1, and
@@ -9304,7 +9307,8 @@ vectorizable_store (vec_info *vinfo,
unsigned int cnunits = vect_nunits_for_cost (vectype);
inside_cost
+= record_stmt_cost (cost_vec, cnunits, scalar_store,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0,
+ vect_body);
continue;
}
@@ -9371,7 +9375,7 @@ vectorizable_store (vec_info *vinfo,
unsigned int cnunits = vect_nunits_for_cost (vectype);
inside_cost
+= record_stmt_cost (cost_vec, cnunits, scalar_store,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0, vect_body);
continue;
}
poly_uint64 offset_nunits
@@ -9478,14 +9482,14 @@ vectorizable_store (vec_info *vinfo,
consumed by the load). */
inside_cost
+= record_stmt_cost (cost_vec, cnunits, vec_to_scalar,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0, vect_body);
/* N scalar stores plus extracting the elements. */
inside_cost
+= record_stmt_cost (cost_vec, cnunits, vec_to_scalar,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0, vect_body);
inside_cost
+= record_stmt_cost (cost_vec, cnunits, scalar_store,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0, vect_body);
continue;
}
@@ -9679,7 +9683,8 @@ vectorizable_store (vec_info *vinfo,
int group_size = DR_GROUP_SIZE (first_stmt_info);
int nstmts = ceil_log2 (group_size) * group_size;
inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0,
+ vect_body);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"vect_model_store_cost: "
@@ -9708,7 +9713,8 @@ vectorizable_store (vec_info *vinfo,
{
if (costing_p)
inside_cost += record_stmt_cost (cost_vec, 1, vec_perm,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0,
+ vect_body);
else
{
tree perm_mask = perm_mask_for_reverse (vectype);
@@ -9901,7 +9907,7 @@ vectorizable_store (vec_info *vinfo,
if (costing_p)
{
if (n_adjacent_stores > 0)
- vect_get_store_cost (vinfo, stmt_info, n_adjacent_stores,
+ vect_get_store_cost (vinfo, stmt_info, slp_node, n_adjacent_stores,
alignment_support_scheme, misalignment,
&inside_cost, cost_vec);
@@ -9927,11 +9933,11 @@ vectorizable_store (vec_info *vinfo,
/* Spill. */
prologue_cost
+= record_stmt_cost (cost_vec, ncopies, vector_store,
- stmt_info, 0, vect_epilogue);
+ stmt_info, slp_node, 0, vect_epilogue);
/* Loads. */
prologue_cost
+= record_stmt_cost (cost_vec, ncopies * nregs, scalar_load,
- stmt_info, 0, vect_epilogue);
+ stmt_info, slp_node, 0, vect_epilogue);
}
}
}
@@ -10502,9 +10508,10 @@ vectorizable_load (vec_info *vinfo,
enum vect_cost_model_location cost_loc
= hoist_p ? vect_prologue : vect_body;
unsigned int cost = record_stmt_cost (cost_vec, 1, scalar_load,
- stmt_info, 0, cost_loc);
- cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info, 0,
- cost_loc);
+ stmt_info, slp_node, 0,
+ cost_loc);
+ cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info,
+ slp_node, 0, cost_loc);
unsigned int prologue_cost = hoist_p ? cost : 0;
unsigned int inside_cost = hoist_p ? 0 : cost;
if (dump_enabled_p ())
@@ -10725,7 +10732,8 @@ vectorizable_load (vec_info *vinfo,
n_adjacent_loads++;
else
inside_cost += record_stmt_cost (cost_vec, 1, scalar_load,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0,
+ vect_body);
continue;
}
tree this_off = build_int_cst (TREE_TYPE (alias_off),
@@ -10763,7 +10771,8 @@ vectorizable_load (vec_info *vinfo,
{
if (costing_p)
inside_cost += record_stmt_cost (cost_vec, 1, vec_construct,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0,
+ vect_body);
else
{
tree vec_inv = build_constructor (lvectype, v);
@@ -10809,7 +10818,8 @@ vectorizable_load (vec_info *vinfo,
vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL, vf,
true, &n_perms, &n_loads);
inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
- first_stmt_info, 0, vect_body);
+ first_stmt_info, slp_node, 0,
+ vect_body);
}
else
vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
@@ -10819,7 +10829,7 @@ vectorizable_load (vec_info *vinfo,
if (costing_p)
{
if (n_adjacent_loads > 0)
- vect_get_load_cost (vinfo, stmt_info, n_adjacent_loads,
+ vect_get_load_cost (vinfo, stmt_info, slp_node, n_adjacent_loads,
alignment_support_scheme, misalignment, false,
&inside_cost, nullptr, cost_vec, cost_vec,
true);
@@ -11173,7 +11183,7 @@ vectorizable_load (vec_info *vinfo,
"vect_model_load_cost: %d "
"unused vectors.\n",
gaps);
- vect_get_load_cost (vinfo, stmt_info, gaps,
+ vect_get_load_cost (vinfo, stmt_info, slp_node, gaps,
alignment_support_scheme,
misalignment, false, &inside_cost,
&prologue_cost, cost_vec, cost_vec,
@@ -11302,7 +11312,7 @@ vectorizable_load (vec_info *vinfo,
if (costing_p)
{
if (n_adjacent_loads > 0)
- vect_get_load_cost (vinfo, stmt_info, n_adjacent_loads,
+ vect_get_load_cost (vinfo, stmt_info, slp_node, n_adjacent_loads,
alignment_support_scheme, misalignment, false,
&inside_cost, &prologue_cost, cost_vec,
cost_vec, true);
@@ -11379,7 +11389,7 @@ vectorizable_load (vec_info *vinfo,
unsigned int cnunits = vect_nunits_for_cost (vectype);
inside_cost
= record_stmt_cost (cost_vec, cnunits, scalar_load,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0, vect_body);
continue;
}
if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
@@ -11455,7 +11465,7 @@ vectorizable_load (vec_info *vinfo,
unsigned int cnunits = vect_nunits_for_cost (vectype);
inside_cost
= record_stmt_cost (cost_vec, cnunits, scalar_load,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0, vect_body);
continue;
}
poly_uint64 offset_nunits
@@ -11590,7 +11600,7 @@ vectorizable_load (vec_info *vinfo,
vector. */
inside_cost
= record_stmt_cost (cost_vec, const_nunits, scalar_load,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0, vect_body);
inside_cost
= record_stmt_cost (cost_vec, 1, vec_construct,
stmt_info, slp_node, 0, vect_body);
@@ -12177,7 +12187,7 @@ vectorizable_load (vec_info *vinfo,
/* Leave realign cases alone to keep them simple. */
if (alignment_support_scheme == dr_explicit_realign_optimized
|| alignment_support_scheme == dr_explicit_realign)
- vect_get_load_cost (vinfo, stmt_info, 1,
+ vect_get_load_cost (vinfo, stmt_info, slp_node, 1,
alignment_support_scheme, misalignment,
add_realign_cost, &inside_cost,
&prologue_cost, cost_vec, cost_vec,
@@ -12250,7 +12260,8 @@ vectorizable_load (vec_info *vinfo,
{
if (costing_p)
inside_cost = record_stmt_cost (cost_vec, 1, vec_perm,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0,
+ vect_body);
else
{
tree perm_mask = perm_mask_for_reverse (vectype);
@@ -12319,7 +12330,8 @@ vectorizable_load (vec_info *vinfo,
vect_transform_slp_perm_load (vinfo, slp_node, vNULL, nullptr, vf,
true, &n_perms, nullptr);
inside_cost = record_stmt_cost (cost_vec, n_perms, vec_perm,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0,
+ vect_body);
}
else
{
@@ -12346,7 +12358,8 @@ vectorizable_load (vec_info *vinfo,
int group_size = DR_GROUP_SIZE (first_stmt_info);
int nstmts = ceil_log2 (group_size) * group_size;
inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0,
+ vect_body);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -12375,7 +12388,7 @@ vectorizable_load (vec_info *vinfo,
|| memory_access_type == VMAT_CONTIGUOUS_REVERSE
|| memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
if (n_adjacent_loads > 0)
- vect_get_load_cost (vinfo, stmt_info, n_adjacent_loads,
+ vect_get_load_cost (vinfo, stmt_info, slp_node, n_adjacent_loads,
alignment_support_scheme, misalignment, false,
&inside_cost, &prologue_cost, cost_vec, cost_vec,
true);
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index
dcad41dcf182045e868a83276e39ca71a82738d5..7f69a3f57b492ad9ecbd63ecdea27e9abe386ac5
100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2354,6 +2354,10 @@ extern unsigned record_stmt_cost (stmt_vector_for_cost
*, int,
extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
enum vect_cost_for_stmt,
enum vect_cost_model_location);
+extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
+ enum vect_cost_for_stmt, stmt_vec_info,
+ slp_tree, tree, int,
+ enum vect_cost_model_location);
/* Overload of record_stmt_cost with VECTYPE derived from STMT_INFO. */
@@ -2375,12 +2379,8 @@ record_stmt_cost (stmt_vector_for_cost *body_cost_vec,
int count,
slp_tree node,
int misalign, enum vect_cost_model_location where)
{
- if (node)
- return record_stmt_cost (body_cost_vec, count, kind, node,
- STMT_VINFO_VECTYPE (stmt_info), misalign, where);
- else
- return record_stmt_cost (body_cost_vec, count, kind, stmt_info,
- STMT_VINFO_VECTYPE (stmt_info), misalign, where);
+ return record_stmt_cost (body_cost_vec, count, kind, stmt_info, node,
+ STMT_VINFO_VECTYPE (stmt_info), misalign, where);
}
extern void vect_finish_replace_stmt (vec_info *, stmt_vec_info, gimple *);
@@ -2411,12 +2411,12 @@ extern bool vect_nop_conversion_p (stmt_vec_info);
extern opt_result vect_analyze_stmt (vec_info *, stmt_vec_info, bool *,
slp_tree,
slp_instance, stmt_vector_for_cost *);
-extern void vect_get_load_cost (vec_info *, stmt_vec_info, int,
+extern void vect_get_load_cost (vec_info *, stmt_vec_info, slp_tree, int,
dr_alignment_support, int, bool,
unsigned int *, unsigned int *,
stmt_vector_for_cost *,
stmt_vector_for_cost *, bool);
-extern void vect_get_store_cost (vec_info *, stmt_vec_info, int,
+extern void vect_get_store_cost (vec_info *, stmt_vec_info, slp_tree, int,
dr_alignment_support, int,
unsigned int *, stmt_vector_for_cost *);
extern bool vect_supportable_shift (vec_info *, enum tree_code, tree);
--
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index 3ea5fb883b1a5289195142171eb45fa422910a95..d87ca79b8e4c16d242e67431d1b527bdb8cb74e4 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -1729,12 +1729,14 @@ vect_get_data_access_cost (vec_info *vinfo, dr_vec_info *dr_info,
ncopies = vect_get_num_copies (loop_vinfo, STMT_VINFO_VECTYPE (stmt_info));
if (DR_IS_READ (dr_info->dr))
- vect_get_load_cost (vinfo, stmt_info, ncopies, alignment_support_scheme,
- misalignment, true, inside_cost,
- outside_cost, prologue_cost_vec, body_cost_vec, false);
+ vect_get_load_cost (vinfo, stmt_info, NULL, ncopies,
+ alignment_support_scheme, misalignment, true,
+ inside_cost, outside_cost, prologue_cost_vec,
+ body_cost_vec, false);
else
- vect_get_store_cost (vinfo,stmt_info, ncopies, alignment_support_scheme,
- misalignment, inside_cost, body_cost_vec);
+ vect_get_store_cost (vinfo,stmt_info, NULL, ncopies,
+ alignment_support_scheme, misalignment, inside_cost,
+ body_cost_vec);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 7a92da00f7ddcfdf146fa1c2511f609e8bc40e9e..46543c15c00f00e5127d06446f58fce79951c3b0 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -93,7 +93,7 @@ stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
target model or by saving it in a vector for later processing.
Return a preliminary estimate of the statement's cost. */
-static unsigned
+unsigned
record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
enum vect_cost_for_stmt kind,
stmt_vec_info stmt_info, slp_tree node,
@@ -1008,8 +1008,8 @@ cfun_returns (tree decl)
/* Calculate cost of DR's memory access. */
void
-vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
- dr_alignment_support alignment_support_scheme,
+vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, slp_tree slp_node,
+ int ncopies, dr_alignment_support alignment_support_scheme,
int misalignment,
unsigned int *inside_cost,
stmt_vector_for_cost *body_cost_vec)
@@ -1019,7 +1019,7 @@ vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
case dr_aligned:
{
*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
- vector_store, stmt_info, 0,
+ vector_store, stmt_info, slp_node, 0,
vect_body);
if (dump_enabled_p ())
@@ -1032,7 +1032,7 @@ vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
{
/* Here, we assign an additional cost for the unaligned store. */
*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
- unaligned_store, stmt_info,
+ unaligned_store, stmt_info, slp_node,
misalignment, vect_body);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -1058,8 +1058,8 @@ vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
/* Calculate cost of DR's memory access. */
void
-vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
- dr_alignment_support alignment_support_scheme,
+vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, slp_tree slp_node,
+ int ncopies, dr_alignment_support alignment_support_scheme,
int misalignment,
bool add_realign_cost, unsigned int *inside_cost,
unsigned int *prologue_cost,
@@ -1072,7 +1072,7 @@ vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
case dr_aligned:
{
*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0, vect_body);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -1084,7 +1084,7 @@ vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
{
/* Here, we assign an additional cost for the unaligned load. */
*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
- unaligned_load, stmt_info,
+ unaligned_load, stmt_info, slp_node,
misalignment, vect_body);
if (dump_enabled_p ())
@@ -1097,16 +1097,18 @@ vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
case dr_explicit_realign:
{
*inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
- vector_load, stmt_info, 0, vect_body);
+ vector_load, stmt_info, slp_node, 0,
+ vect_body);
*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
- vec_perm, stmt_info, 0, vect_body);
+ vec_perm, stmt_info, slp_node, 0,
+ vect_body);
/* FIXME: If the misalignment remains fixed across the iterations of
the containing loop, the following cost should be added to the
prologue costs. */
if (targetm.vectorize.builtin_mask_for_load)
*inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0, vect_body);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -1132,17 +1134,17 @@ vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
{
*prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
vector_stmt, stmt_info,
- 0, vect_prologue);
+ slp_node, 0, vect_prologue);
if (targetm.vectorize.builtin_mask_for_load)
*prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
vector_stmt, stmt_info,
- 0, vect_prologue);
+ slp_node, 0, vect_prologue);
}
*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0, vect_body);
*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0, vect_body);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -8551,7 +8553,7 @@ vectorizable_store (vec_info *vinfo,
if (vls_type == VLS_STORE_INVARIANT)
prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
stmt_info, 0, vect_prologue);
- vect_get_store_cost (vinfo, stmt_info, ncopies,
+ vect_get_store_cost (vinfo, stmt_info, slp_node, ncopies,
alignment_support_scheme, misalignment,
&inside_cost, cost_vec);
@@ -8623,7 +8625,7 @@ vectorizable_store (vec_info *vinfo,
else if (vls_type != VLS_STORE_INVARIANT)
return;
*prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info,
- 0, vect_prologue);
+ slp_node, 0, vect_prologue);
};
if (memory_access_type == VMAT_ELEMENTWISE
@@ -8890,7 +8892,7 @@ vectorizable_store (vec_info *vinfo,
if (costing_p)
{
if (n_adjacent_stores > 0)
- vect_get_store_cost (vinfo, stmt_info, n_adjacent_stores,
+ vect_get_store_cost (vinfo, stmt_info, slp_node, n_adjacent_stores,
alignment_support_scheme, misalignment,
&inside_cost, cost_vec);
if (dump_enabled_p ())
@@ -9202,7 +9204,7 @@ vectorizable_store (vec_info *vinfo,
if (costing_p)
{
if (n_adjacent_stores > 0)
- vect_get_store_cost (vinfo, stmt_info, n_adjacent_stores,
+ vect_get_store_cost (vinfo, stmt_info, slp_node, n_adjacent_stores,
alignment_support_scheme, misalignment,
&inside_cost, cost_vec);
if (dump_enabled_p ())
@@ -9227,7 +9229,8 @@ vectorizable_store (vec_info *vinfo,
{
if (costing_p && vls_type == VLS_STORE_INVARIANT)
prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
- stmt_info, 0, vect_prologue);
+ stmt_info, slp_node, 0,
+ vect_prologue);
else if (!costing_p)
{
/* Since the store is not grouped, DR_GROUP_SIZE is 1, and
@@ -9304,7 +9307,8 @@ vectorizable_store (vec_info *vinfo,
unsigned int cnunits = vect_nunits_for_cost (vectype);
inside_cost
+= record_stmt_cost (cost_vec, cnunits, scalar_store,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0,
+ vect_body);
continue;
}
@@ -9371,7 +9375,7 @@ vectorizable_store (vec_info *vinfo,
unsigned int cnunits = vect_nunits_for_cost (vectype);
inside_cost
+= record_stmt_cost (cost_vec, cnunits, scalar_store,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0, vect_body);
continue;
}
poly_uint64 offset_nunits
@@ -9478,14 +9482,14 @@ vectorizable_store (vec_info *vinfo,
consumed by the load). */
inside_cost
+= record_stmt_cost (cost_vec, cnunits, vec_to_scalar,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0, vect_body);
/* N scalar stores plus extracting the elements. */
inside_cost
+= record_stmt_cost (cost_vec, cnunits, vec_to_scalar,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0, vect_body);
inside_cost
+= record_stmt_cost (cost_vec, cnunits, scalar_store,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0, vect_body);
continue;
}
@@ -9679,7 +9683,8 @@ vectorizable_store (vec_info *vinfo,
int group_size = DR_GROUP_SIZE (first_stmt_info);
int nstmts = ceil_log2 (group_size) * group_size;
inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0,
+ vect_body);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"vect_model_store_cost: "
@@ -9708,7 +9713,8 @@ vectorizable_store (vec_info *vinfo,
{
if (costing_p)
inside_cost += record_stmt_cost (cost_vec, 1, vec_perm,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0,
+ vect_body);
else
{
tree perm_mask = perm_mask_for_reverse (vectype);
@@ -9901,7 +9907,7 @@ vectorizable_store (vec_info *vinfo,
if (costing_p)
{
if (n_adjacent_stores > 0)
- vect_get_store_cost (vinfo, stmt_info, n_adjacent_stores,
+ vect_get_store_cost (vinfo, stmt_info, slp_node, n_adjacent_stores,
alignment_support_scheme, misalignment,
&inside_cost, cost_vec);
@@ -9927,11 +9933,11 @@ vectorizable_store (vec_info *vinfo,
/* Spill. */
prologue_cost
+= record_stmt_cost (cost_vec, ncopies, vector_store,
- stmt_info, 0, vect_epilogue);
+ stmt_info, slp_node, 0, vect_epilogue);
/* Loads. */
prologue_cost
+= record_stmt_cost (cost_vec, ncopies * nregs, scalar_load,
- stmt_info, 0, vect_epilogue);
+ stmt_info, slp_node, 0, vect_epilogue);
}
}
}
@@ -10502,9 +10508,10 @@ vectorizable_load (vec_info *vinfo,
enum vect_cost_model_location cost_loc
= hoist_p ? vect_prologue : vect_body;
unsigned int cost = record_stmt_cost (cost_vec, 1, scalar_load,
- stmt_info, 0, cost_loc);
- cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info, 0,
- cost_loc);
+ stmt_info, slp_node, 0,
+ cost_loc);
+ cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info,
+ slp_node, 0, cost_loc);
unsigned int prologue_cost = hoist_p ? cost : 0;
unsigned int inside_cost = hoist_p ? 0 : cost;
if (dump_enabled_p ())
@@ -10725,7 +10732,8 @@ vectorizable_load (vec_info *vinfo,
n_adjacent_loads++;
else
inside_cost += record_stmt_cost (cost_vec, 1, scalar_load,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0,
+ vect_body);
continue;
}
tree this_off = build_int_cst (TREE_TYPE (alias_off),
@@ -10763,7 +10771,8 @@ vectorizable_load (vec_info *vinfo,
{
if (costing_p)
inside_cost += record_stmt_cost (cost_vec, 1, vec_construct,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0,
+ vect_body);
else
{
tree vec_inv = build_constructor (lvectype, v);
@@ -10809,7 +10818,8 @@ vectorizable_load (vec_info *vinfo,
vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL, vf,
true, &n_perms, &n_loads);
inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
- first_stmt_info, 0, vect_body);
+ first_stmt_info, slp_node, 0,
+ vect_body);
}
else
vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
@@ -10819,7 +10829,7 @@ vectorizable_load (vec_info *vinfo,
if (costing_p)
{
if (n_adjacent_loads > 0)
- vect_get_load_cost (vinfo, stmt_info, n_adjacent_loads,
+ vect_get_load_cost (vinfo, stmt_info, slp_node, n_adjacent_loads,
alignment_support_scheme, misalignment, false,
&inside_cost, nullptr, cost_vec, cost_vec,
true);
@@ -11173,7 +11183,7 @@ vectorizable_load (vec_info *vinfo,
"vect_model_load_cost: %d "
"unused vectors.\n",
gaps);
- vect_get_load_cost (vinfo, stmt_info, gaps,
+ vect_get_load_cost (vinfo, stmt_info, slp_node, gaps,
alignment_support_scheme,
misalignment, false, &inside_cost,
&prologue_cost, cost_vec, cost_vec,
@@ -11302,7 +11312,7 @@ vectorizable_load (vec_info *vinfo,
if (costing_p)
{
if (n_adjacent_loads > 0)
- vect_get_load_cost (vinfo, stmt_info, n_adjacent_loads,
+ vect_get_load_cost (vinfo, stmt_info, slp_node, n_adjacent_loads,
alignment_support_scheme, misalignment, false,
&inside_cost, &prologue_cost, cost_vec,
cost_vec, true);
@@ -11379,7 +11389,7 @@ vectorizable_load (vec_info *vinfo,
unsigned int cnunits = vect_nunits_for_cost (vectype);
inside_cost
= record_stmt_cost (cost_vec, cnunits, scalar_load,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0, vect_body);
continue;
}
if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
@@ -11455,7 +11465,7 @@ vectorizable_load (vec_info *vinfo,
unsigned int cnunits = vect_nunits_for_cost (vectype);
inside_cost
= record_stmt_cost (cost_vec, cnunits, scalar_load,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0, vect_body);
continue;
}
poly_uint64 offset_nunits
@@ -11590,7 +11600,7 @@ vectorizable_load (vec_info *vinfo,
vector. */
inside_cost
= record_stmt_cost (cost_vec, const_nunits, scalar_load,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0, vect_body);
inside_cost
= record_stmt_cost (cost_vec, 1, vec_construct,
stmt_info, slp_node, 0, vect_body);
@@ -12177,7 +12187,7 @@ vectorizable_load (vec_info *vinfo,
/* Leave realign cases alone to keep them simple. */
if (alignment_support_scheme == dr_explicit_realign_optimized
|| alignment_support_scheme == dr_explicit_realign)
- vect_get_load_cost (vinfo, stmt_info, 1,
+ vect_get_load_cost (vinfo, stmt_info, slp_node, 1,
alignment_support_scheme, misalignment,
add_realign_cost, &inside_cost,
&prologue_cost, cost_vec, cost_vec,
@@ -12250,7 +12260,8 @@ vectorizable_load (vec_info *vinfo,
{
if (costing_p)
inside_cost = record_stmt_cost (cost_vec, 1, vec_perm,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0,
+ vect_body);
else
{
tree perm_mask = perm_mask_for_reverse (vectype);
@@ -12319,7 +12330,8 @@ vectorizable_load (vec_info *vinfo,
vect_transform_slp_perm_load (vinfo, slp_node, vNULL, nullptr, vf,
true, &n_perms, nullptr);
inside_cost = record_stmt_cost (cost_vec, n_perms, vec_perm,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0,
+ vect_body);
}
else
{
@@ -12346,7 +12358,8 @@ vectorizable_load (vec_info *vinfo,
int group_size = DR_GROUP_SIZE (first_stmt_info);
int nstmts = ceil_log2 (group_size) * group_size;
inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node, 0,
+ vect_body);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -12375,7 +12388,7 @@ vectorizable_load (vec_info *vinfo,
|| memory_access_type == VMAT_CONTIGUOUS_REVERSE
|| memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
if (n_adjacent_loads > 0)
- vect_get_load_cost (vinfo, stmt_info, n_adjacent_loads,
+ vect_get_load_cost (vinfo, stmt_info, slp_node, n_adjacent_loads,
alignment_support_scheme, misalignment, false,
&inside_cost, &prologue_cost, cost_vec, cost_vec,
true);
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index dcad41dcf182045e868a83276e39ca71a82738d5..7f69a3f57b492ad9ecbd63ecdea27e9abe386ac5 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2354,6 +2354,10 @@ extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
enum vect_cost_for_stmt,
enum vect_cost_model_location);
+extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
+ enum vect_cost_for_stmt, stmt_vec_info,
+ slp_tree, tree, int,
+ enum vect_cost_model_location);
/* Overload of record_stmt_cost with VECTYPE derived from STMT_INFO. */
@@ -2375,12 +2379,8 @@ record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
slp_tree node,
int misalign, enum vect_cost_model_location where)
{
- if (node)
- return record_stmt_cost (body_cost_vec, count, kind, node,
- STMT_VINFO_VECTYPE (stmt_info), misalign, where);
- else
- return record_stmt_cost (body_cost_vec, count, kind, stmt_info,
- STMT_VINFO_VECTYPE (stmt_info), misalign, where);
+ return record_stmt_cost (body_cost_vec, count, kind, stmt_info, node,
+ STMT_VINFO_VECTYPE (stmt_info), misalign, where);
}
extern void vect_finish_replace_stmt (vec_info *, stmt_vec_info, gimple *);
@@ -2411,12 +2411,12 @@ extern bool vect_nop_conversion_p (stmt_vec_info);
extern opt_result vect_analyze_stmt (vec_info *, stmt_vec_info, bool *,
slp_tree,
slp_instance, stmt_vector_for_cost *);
-extern void vect_get_load_cost (vec_info *, stmt_vec_info, int,
+extern void vect_get_load_cost (vec_info *, stmt_vec_info, slp_tree, int,
dr_alignment_support, int, bool,
unsigned int *, unsigned int *,
stmt_vector_for_cost *,
stmt_vector_for_cost *, bool);
-extern void vect_get_store_cost (vec_info *, stmt_vec_info, int,
+extern void vect_get_store_cost (vec_info *, stmt_vec_info, slp_tree, int,
dr_alignment_support, int,
unsigned int *, stmt_vector_for_cost *);
extern bool vect_supportable_shift (vec_info *, enum tree_code, tree);