Hi,
This is a followup patch fixing a left over issue in vect_do_peeling.  In the 
first patch,
I used scale_loop_frequencies to scale up profiling counter for epilog_loop 
because
scale_loop_profile doesn't allow scaling up for the now.  This patch adds 
another call
to scale_loop_profile after scaling up.  The added call only bounds counters 
with niter
bound of epilogue.  This is necessary in case if the original loop runs for 
many iterations,
otherwise epilogue loop would still have very large count/frequency comparing 
to vectorized
loop.
BTW, this fix follows existing code/logic, specifically, scale_loop_profile 
itself computes
zero (too small) frequency/count for peeled loop in case of many iterations 
loop.
Consequence is prologue/epilogue loop will be marked as "possibly never be 
executed".
I believe the bound logic in scale_loop_profile needs to be changed just like 
patch to pr77536,
i.e, scaling loop's profiling count wrto estimated niter.  Anyway, this is next 
stage 1 work.

Bootstrap and test on x86_64 and AArch64, is it OK if no failures?

Thanks,
bin

2017-02-21  Bin Cheng  <bin.ch...@arm.com>

        PR tree-optimization/79347
        * tree-vect-loop-manip.c (vect_do_peeling): Compute niter bound for
        epilogue loop and use it when scaling profile info.
diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
index 5ee2c38..3cc7381 100644
--- a/gcc/tree-vect-loop-manip.c
+++ b/gcc/tree-vect-loop-manip.c
@@ -1645,8 +1645,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
tree nitersm1,
   tree type = TREE_TYPE (niters), guard_cond;
   basic_block guard_bb, guard_to;
   int prob_prolog, prob_vector, prob_epilog;
-  int bound_prolog = 0, bound_scalar = 0, bound = 0;
-  int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+  int bound_prolog = 0, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
   int prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
   bool epilog_peeling = (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
                         || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
@@ -1683,7 +1682,8 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
tree nitersm1,
   bool skip_epilog = (prolog_peeling < 0
                      || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
   /* PEELING_FOR_GAPS is special because epilog loop must be executed.  */
-  if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
+  bool peel_for_gaps = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
+  if (peel_for_gaps)
     skip_epilog = false;
 
   /* Record the anchor bb at which guard should be placed if scalar loop
@@ -1701,7 +1701,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
tree nitersm1,
       basic_block bb_before_loop = loop_preheader_edge (loop)->src;
       scale_bbs_frequencies_int (&bb_before_loop, 1, prob_vector,
                                 REG_BR_PROB_BASE);
-      scale_loop_profile (loop, prob_vector, bound);
+      scale_loop_profile (loop, prob_vector, 0);
     }
 
   tree niters_prolog = build_int_cst (type, 0);
@@ -1772,6 +1772,8 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
tree nitersm1,
 
   if (epilog_peeling)
     {
+      int bound_epilog = peel_for_gaps ? vf - 1 : vf - 2;
+
       e = single_exit (loop);
       if (!slpeel_can_duplicate_loop_p (loop, e))
        {
@@ -1795,8 +1797,8 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
tree nitersm1,
         won't be vectorized.  */
       if (skip_vector)
        {
+         int bound_scalar = 0;
          /* Additional epilogue iteration is peeled if gap exists.  */
-         bool peel_for_gaps = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
          tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
                                                bound_prolog,
                                                peel_for_gaps ? vf : vf - 1,
@@ -1813,14 +1815,22 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
tree nitersm1,
          e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
          slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
 
+         /* We share epilog loop with scalar version loop.  */
+         bound_epilog = MAX (bound_epilog, bound_scalar - 1);
+
          /* Simply propagate profile info from guard_bb to guard_to which is
             a merge point of control flow.  */
          guard_to->frequency = guard_bb->frequency;
          guard_to->count = guard_bb->count;
          single_succ_edge (guard_to)->count = guard_to->count;
-         /* Scale probability of epilog loop back.  */
+         /* Scale probability of epilog loop back.  After scaling up, we
+            also need to call scale_loop_profile to bound counters for
+            epilog loop in case the original loop runs for many times.
+            We can't do it in one call because scale_loop_profile does
+            not allow scaling up for now.  */
          int scale_up = REG_BR_PROB_BASE * REG_BR_PROB_BASE / prob_vector;
          scale_loop_frequencies (epilog, scale_up, REG_BR_PROB_BASE);
+         scale_loop_profile (epilog, REG_BR_PROB_BASE, bound_epilog);
        }
 
       basic_block bb_before_epilog = loop_preheader_edge (epilog)->src;
@@ -1861,15 +1871,12 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
tree nitersm1,
              scale_bbs_frequencies_int (&bb_before_epilog, 1, prob_epilog,
                                         REG_BR_PROB_BASE);
            }
-         scale_loop_profile (epilog, prob_epilog, bound);
+         scale_loop_profile (epilog, prob_epilog, bound_epilog);
        }
       else
        slpeel_update_phi_nodes_for_lcssa (epilog);
 
-      bound = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? vf - 1 : vf - 2;
-      /* We share epilog loop with scalar version loop.  */
-      bound = MAX (bound, bound_scalar - 1);
-      record_niter_bound (epilog, bound, false, true);
+      record_niter_bound (epilog, bound_epilog, false, true);
 
       delete_update_ssa ();
       adjust_vec_debug_stmts ();

Reply via email to