Yeah, it's not a strict bound, so the function needs to return -1 aka UNKNOWN.
But how this -1 should be interpreted differs on context.

But for sure -1 cannot be interpreted as actual bound.  For frequency scaling
I'd use the same logic as for costing - use estimated_poly_value / 2

In the attached v3 I added a separate argument for the poly estimate that is used for scaling. At the same time, bound_prolog is set to 0 so it can be used with the existing semantics: record_niter_bound uses bound_prolog - 1 so will continue with -1 == unknown.

Regtested on rv64gcv_zvl512b.  aarch64 is running.

--
Regards
Robin

[PATCH v3] vect: Estimate prolog bound for VLA alignment.

Since peeling and version for alignment for VLA modes was introduced
(r16-3065-geee51f9a4b6) we have been seeing a lot of test suite failures
like

 internal compiler error: in apply_scale, at profile-count.h:1187

This is because vect_gen_prolog_loop_niters sets the prolog bound to -1
in case align_in_elems is a non-constant poly_int.
bound - 1 is later used to scale the loop profile in scale_loop_profile
so we try to calculate with an assumed -2 iterations.

The rest of vect_do_peeling expects either -1 or 0 for an unknown
prolog bound.  For scaling the loop we can use an estimated_poly_value of
the prolog bound instead.

Therefore this patch has vect_gen_prolog_loop_niters additionally return
a poly estimate and set bound_prolog to 0 for the poly case.

        PR/tree-optimization 121523

gcc/ChangeLog:

        * tree-vect-loop-manip.cc (get_misalign_in_elems): Document
        new argument.
        (vect_gen_prolog_loop_niters): Set bound = 0 and bound_poly_est
        to poly estimate.
        (vect_do_peeling): Use bound_poly_est for frequency scaling.
---
gcc/tree-vect-loop-manip.cc | 23 ++++++++++++++++++-----
1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 566308f4fe5..c01a75d9d4c 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -2495,6 +2495,10 @@ get_misalign_in_elems (gimple **seq, loop_vec_info 
loop_vinfo)
   The computations will be emitted at the end of BB.  We also compute and
   store upper bound (included) of the result in BOUND.

+   If the number of peeled iterations is a poly_int we set BOUND to 0
+   and BOUND_POLY_EST to the estimated poly value.  The latter can be used
+   for frequency scaling.
+
   When the step of the data-ref in the loop is not 1 (as in interleaved data
   and SLP), the number of iterations of the prolog must be divided by the step
   (which is equal to the size of interleaved group).
@@ -2507,7 +2511,7 @@ get_misalign_in_elems (gimple **seq, loop_vec_info 
loop_vinfo)

static tree
vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
-                            basic_block bb, int *bound)
+                            basic_block bb, int *bound, int *bound_poly_est)
{
  dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
  tree var;
@@ -2517,6 +2521,7 @@ vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
  stmt_vec_info stmt_info = dr_info->stmt;
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
  poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr_info);
+  *bound_poly_est = -1;

  if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
    {
@@ -2557,9 +2562,12 @@ vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
      iters = fold_convert (niters_type, iters);
      unsigned HOST_WIDE_INT align_in_elems_c;
      if (align_in_elems.is_constant (&align_in_elems_c))
-       *bound = align_in_elems_c - 1;
+       *bound = align_in_elems_c;
      else
-       *bound = -1;
+       {
+         *bound = 0;
+         *bound_poly_est = estimated_poly_value (align_in_elems_c) / 2;
+       }
    }

  if (dump_enabled_p ())
@@ -3258,10 +3266,12 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
tree nitersm1,
     so that we can also get the upper bound on the number of iterations.  */
  tree niters_prolog;
  int bound_prolog = 0;
+  int bound_prolog_est = -1;
  if (prolog_peeling)
    {
      niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
-                                                   &bound_prolog);
+                                                  &bound_prolog,
+                                                  &bound_prolog_est);
      /* If algonment peeling is known, we will always execute prolog.  */
      if (TREE_CODE (niters_prolog) == INTEGER_CST)
        prob_prolog = profile_probability::always ();
@@ -3269,6 +3279,9 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
tree nitersm1,
  else
    niters_prolog = build_int_cst (type, 0);

+  if (bound_prolog_est == -1)
+    bound_prolog_est = bound_prolog;
+
  loop_vec_info epilogue_vinfo = loop_vinfo->epilogue_vinfo;
  tree niters_vector_mult_vf = NULL_TREE;
  /* Saving NITERs before the loop, as this may be changed by prologue.  */
@@ -3404,7 +3417,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
tree nitersm1,
          slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);

          scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog);
-         scale_loop_profile (prolog, prob_prolog, bound_prolog - 1);
+         scale_loop_profile (prolog, prob_prolog, bound_prolog_est - 1);
        }

      /* Update init address of DRs.  */
--
2.50.0

Reply via email to