Attached is the proper version that has been regtested on x86, regtested on
aarch64 and rv64gcv_zvl512b.
Regards
Robin
[PATCH v4] vect: Estimate prolog bound for VLA alignment.
Since peeling and version for alignment for VLA modes was introduced
(r16-3065-geee51f9a4b6) we have been seeing a lot of test suite failures
like
internal compiler error: in apply_scale, at profile-count.h:1187
This is because vect_gen_prolog_loop_niters sets the prolog bound to -1
in case align_in_elems is a non-constant poly_int.
bound - 1 is later used to scale the loop profile in scale_loop_profile
so we try to calculate with an assumed -2 iterations.
The rest of vect_do_peeling expects either -1 or 0 for an unknown
prolog bound. For scaling the loop we can use an estimated_poly_value of
the prolog bound instead.
Therefore this patch has vect_gen_prolog_loop_niters additionally return
a poly estimate and set bound_prolog to 0 for the poly case.
PR/tree-optimization 121523
gcc/ChangeLog:
* tree-vect-loop-manip.cc (get_misalign_in_elems): Document
new argument.
(vect_gen_prolog_loop_niters): Set bound = 0 and bound_poly_est
to poly estimate.
(vect_do_peeling): Use bound_poly_est for frequency scaling.
---
gcc/tree-vect-loop-manip.cc | 21 +++++++++++++++++----
1 file changed, 17 insertions(+), 4 deletions(-)
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 566308f4fe5..b6ffb4f5e6a 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -2495,6 +2495,10 @@ get_misalign_in_elems (gimple **seq, loop_vec_info
loop_vinfo)
The computations will be emitted at the end of BB. We also compute and
store upper bound (included) of the result in BOUND.
+ If the number of peeled iterations is a poly_int we set BOUND to 0
+ and BOUND_POLY_EST to half of the estimated poly value, similar to
+ what we use for costing.
+
When the step of the data-ref in the loop is not 1 (as in interleaved data
and SLP), the number of iterations of the prolog must be divided by the step
(which is equal to the size of interleaved group).
@@ -2507,7 +2511,7 @@ get_misalign_in_elems (gimple **seq, loop_vec_info
loop_vinfo)
static tree
vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
- basic_block bb, int *bound)
+ basic_block bb, int *bound, int *bound_poly_est)
{
dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
tree var;
@@ -2517,6 +2521,7 @@ vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
stmt_vec_info stmt_info = dr_info->stmt;
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr_info);
+ *bound_poly_est = -1;
if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
{
@@ -2559,7 +2564,10 @@ vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
if (align_in_elems.is_constant (&align_in_elems_c))
*bound = align_in_elems_c - 1;
else
- *bound = -1;
+ {
+ *bound = 0;
+ *bound_poly_est = estimated_poly_value (align_in_elems) / 2;
+ }
}
if (dump_enabled_p ())
@@ -3258,10 +3266,12 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters,
tree nitersm1,
so that we can also get the upper bound on the number of iterations. */
tree niters_prolog;
int bound_prolog = 0;
+ int bound_prolog_est = -1;
if (prolog_peeling)
{
niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
- &bound_prolog);
+ &bound_prolog,
+ &bound_prolog_est);
/* If algonment peeling is known, we will always execute prolog. */
if (TREE_CODE (niters_prolog) == INTEGER_CST)
prob_prolog = profile_probability::always ();
@@ -3269,6 +3279,9 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters,
tree nitersm1,
else
niters_prolog = build_int_cst (type, 0);
+ if (bound_prolog_est == -1)
+ bound_prolog_est = bound_prolog;
+
loop_vec_info epilogue_vinfo = loop_vinfo->epilogue_vinfo;
tree niters_vector_mult_vf = NULL_TREE;
/* Saving NITERs before the loop, as this may be changed by prologue. */
@@ -3404,7 +3417,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters,
tree nitersm1,
slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog);
- scale_loop_profile (prolog, prob_prolog, bound_prolog - 1);
+ scale_loop_profile (prolog, prob_prolog, bound_prolog_est - 1);
}
/* Update init address of DRs. */
--
2.50.0