Hi! Honza, do you think you could have a look at this P1 fix?
Thanks. On Wed, Jan 31, 2018 at 10:03:51AM +0000, Bin Cheng wrote: > Hi, > This patch fixes invalid profile count information in vectorization peeling. > Current implementation is a bit confusing for me since it tries to compute > an overall probability based on scaling probability and change of estimated > niters. This patch does it in two steps. Firstly it does the scaling, then > it adjusts to new estimated niters by simply adjusting loop's latch count > information; scaling the loop's count information by the proportion > new_estimated_niters/old_estimate_niters. Of course we have to adjust loop > latch's count information back after scaling. > > Bootstrap and test on x86_64 and AArch64. gcc.dg/vect/pr79347.c is fixed > for both PR82965 and PR83991. Is this OK? > > Thanks, > bin > > 2018-01-30 Bin Cheng <bin.ch...@arm.com> > > PR tree-optimization/82965 > PR tree-optimization/83991 > * cfgloopmanip.c (scale_loop_profile): Further scale loop's profile > information if the loop was predicted to iterate too many times. > diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c > index b9b76d8..1f560b8 100644 > --- a/gcc/cfgloopmanip.c > +++ b/gcc/cfgloopmanip.c > @@ -509,7 +509,7 @@ scale_loop_profile (struct loop *loop, > profile_probability p, > gcov_type iteration_bound) > { > gcov_type iterations = expected_loop_iterations_unbounded (loop); > - edge e; > + edge e, preheader_e; > edge_iterator ei; > > if (dump_file && (dump_flags & TDF_DETAILS)) > @@ -521,77 +521,66 @@ scale_loop_profile (struct loop *loop, > profile_probability p, > (int)iteration_bound, (int)iterations); > } > > + /* Scale the probabilities. */ > + scale_loop_frequencies (loop, p); > + > /* See if loop is predicted to iterate too many times. */ > - if (iteration_bound && iterations > 0 > - && p.apply (iterations) > iteration_bound) > + if (iteration_bound == 0 || iterations <= 0 > + || p.apply (iterations) <= iteration_bound) > + return; > + > + e = single_exit (loop); > + preheader_e = loop_preheader_edge (loop); > + profile_count count_in = preheader_e->count (); > + if (e && preheader_e > + && count_in > profile_count::zero () > + && loop->header->count.initialized_p ()) > { > - /* Fixing loop profile for different trip count is not trivial; the > exit > - probabilities has to be updated to match and frequencies propagated > down > - to the loop body. > - > - We fully update only the simple case of loop with single exit that is > - either from the latch or BB just before latch and leads from BB with > - simple conditional jump. This is OK for use in vectorizer. */ > - e = single_exit (loop); > - if (e) > - { > - edge other_e; > - profile_count count_delta; > + edge other_e; > + profile_count count_delta; > > - FOR_EACH_EDGE (other_e, ei, e->src->succs) > - if (!(other_e->flags & (EDGE_ABNORMAL | EDGE_FAKE)) > - && e != other_e) > - break; > + FOR_EACH_EDGE (other_e, ei, e->src->succs) > + if (!(other_e->flags & (EDGE_ABNORMAL | EDGE_FAKE)) > + && e != other_e) > + break; > > - /* Probability of exit must be 1/iterations. */ > - count_delta = e->count (); > - e->probability = profile_probability::always () > + /* Probability of exit must be 1/iterations. */ > + count_delta = e->count (); > + e->probability = profile_probability::always () > .apply_scale (1, iteration_bound); > - other_e->probability = e->probability.invert (); > - count_delta -= e->count (); > - > - /* If latch exists, change its count, since we changed > - probability of exit. Theoretically we should update everything > from > - source of exit edge to latch, but for vectorizer this is enough. > */ > - if (loop->latch > - && loop->latch != e->src) > - { > - loop->latch->count += count_delta; > - } > - } > + other_e->probability = e->probability.invert (); > > /* Roughly speaking we want to reduce the loop body profile by the > difference of loop iterations. We however can do better if > we look at the actual profile, if it is available. */ > - p = p.apply_scale (iteration_bound, iterations); > - > - if (loop->header->count.initialized_p ()) > - { > - profile_count count_in = profile_count::zero (); > + p = profile_probability::always (); > > - FOR_EACH_EDGE (e, ei, loop->header->preds) > - if (e->src != loop->latch) > - count_in += e->count (); > - > - if (count_in > profile_count::zero () ) > - { > - p = count_in.probability_in (loop->header->count.apply_scale > - (iteration_bound, 1)); > - } > - } > + count_in = count_in.apply_scale (iteration_bound, 1); > + p = count_in.probability_in (loop->header->count); > if (!(p > profile_probability::never ())) > p = profile_probability::very_unlikely (); > - } > > - if (p >= profile_probability::always () > - || !p.initialized_p ()) > - return; > + if (p >= profile_probability::always () > + || !p.initialized_p ()) > + return; > > - /* Scale the actual probabilities. */ > - scale_loop_frequencies (loop, p); > - if (dump_file && (dump_flags & TDF_DETAILS)) > - fprintf (dump_file, ";; guessed iterations are now %i\n", > - (int)expected_loop_iterations_unbounded (loop)); > + /* If latch exists, change its count, since we changed > + probability of exit. Theoretically we should update everything from > + source of exit edge to latch, but for vectorizer this is enough. */ > + if (loop->latch && loop->latch != e->src) > + loop->latch->count += count_delta; > + > + /* Scale the probabilities. */ > + scale_loop_frequencies (loop, p); > + > + /* Change latch's count back. */ > + if (loop->latch && loop->latch != e->src) > + loop->latch->count -= count_delta; > + > + if (dump_file && (dump_flags & TDF_DETAILS)) > + fprintf (dump_file, ";; guessed iterations are now %i\n", > + (int)expected_loop_iterations_unbounded (loop)); > + } > } > > /* Recompute dominance information for basic blocks outside LOOP. */ Jakub