Attached is the proper version that has been regtested on x86, regtested on aarch64 and rv64gcv_zvl512b.

Regards
Robin

[PATCH v4] vect: Estimate prolog bound for VLA alignment.

Since peeling and version for alignment for VLA modes was introduced
(r16-3065-geee51f9a4b6) we have been seeing a lot of test suite failures
like

 internal compiler error: in apply_scale, at profile-count.h:1187

This is because vect_gen_prolog_loop_niters sets the prolog bound to -1
in case align_in_elems is a non-constant poly_int.
bound - 1 is later used to scale the loop profile in scale_loop_profile
so we try to calculate with an assumed -2 iterations.

The rest of vect_do_peeling expects either -1 or 0 for an unknown
prolog bound.  For scaling the loop we can use an estimated_poly_value of
the prolog bound instead.

Therefore this patch has vect_gen_prolog_loop_niters additionally return
a poly estimate and set bound_prolog to 0 for the poly case.

        PR/tree-optimization 121523

gcc/ChangeLog:

        * tree-vect-loop-manip.cc (get_misalign_in_elems): Document
        new argument.
        (vect_gen_prolog_loop_niters): Set bound = 0 and bound_poly_est
        to poly estimate.
        (vect_do_peeling): Use bound_poly_est for frequency scaling.
---
gcc/tree-vect-loop-manip.cc | 21 +++++++++++++++++----
1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 566308f4fe5..b6ffb4f5e6a 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -2495,6 +2495,10 @@ get_misalign_in_elems (gimple **seq, loop_vec_info 
loop_vinfo)
   The computations will be emitted at the end of BB.  We also compute and
   store upper bound (included) of the result in BOUND.

+   If the number of peeled iterations is a poly_int we set BOUND to 0
+   and BOUND_POLY_EST to half of the estimated poly value, similar to
+   what we use for costing.
+
   When the step of the data-ref in the loop is not 1 (as in interleaved data
   and SLP), the number of iterations of the prolog must be divided by the step
   (which is equal to the size of interleaved group).
@@ -2507,7 +2511,7 @@ get_misalign_in_elems (gimple **seq, loop_vec_info 
loop_vinfo)

static tree
vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
-                            basic_block bb, int *bound)
+                            basic_block bb, int *bound, int *bound_poly_est)
{
  dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
  tree var;
@@ -2517,6 +2521,7 @@ vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
  stmt_vec_info stmt_info = dr_info->stmt;
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
  poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr_info);
+  *bound_poly_est = -1;

  if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
    {
@@ -2559,7 +2564,10 @@ vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
      if (align_in_elems.is_constant (&align_in_elems_c))
        *bound = align_in_elems_c - 1;
      else
-       *bound = -1;
+       {
+         *bound = 0;
+         *bound_poly_est = estimated_poly_value (align_in_elems) / 2;
+       }
    }

  if (dump_enabled_p ())
@@ -3258,10 +3266,12 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
tree nitersm1,
     so that we can also get the upper bound on the number of iterations.  */
  tree niters_prolog;
  int bound_prolog = 0;
+  int bound_prolog_est = -1;
  if (prolog_peeling)
    {
      niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
-                                                   &bound_prolog);
+                                                  &bound_prolog,
+                                                  &bound_prolog_est);
      /* If algonment peeling is known, we will always execute prolog.  */
      if (TREE_CODE (niters_prolog) == INTEGER_CST)
        prob_prolog = profile_probability::always ();
@@ -3269,6 +3279,9 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
tree nitersm1,
  else
    niters_prolog = build_int_cst (type, 0);

+  if (bound_prolog_est == -1)
+    bound_prolog_est = bound_prolog;
+
  loop_vec_info epilogue_vinfo = loop_vinfo->epilogue_vinfo;
  tree niters_vector_mult_vf = NULL_TREE;
  /* Saving NITERs before the loop, as this may be changed by prologue.  */
@@ -3404,7 +3417,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
tree nitersm1,
          slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);

          scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog);
-         scale_loop_profile (prolog, prob_prolog, bound_prolog - 1);
+         scale_loop_profile (prolog, prob_prolog, bound_prolog_est - 1);
        }

      /* Update init address of DRs.  */
--
2.50.0

Reply via email to