On Wed, Apr 27, 2011 at 20:52, Sharad Singhai <sing...@google.com> wrote: > This patch adds new parameters to control peeling when profile > feedback information is available. > > For google/main. > > Tested: > bootstrapped on x86_64. >
Looks OK with a couple of minor nits below. > 2011-04-27 Sharad Singhai <sing...@google.com> > > * gcc/params.def: Add new parameters to control peeling. > * gcc/tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Use > different peeling parameters when profile feedback is available. > * gcc/loop-unroll.c (decide_peel_once_rolling): Ditto. > (decide_peel_completely): Ditto. > * gcc/doc/invoke.texi: Document new peeling parameters. No need to include 'gcc/' as this will go in gcc/ChangeLog.google-main > * gcc/testsuite/gcc.dg/vect/O3-vect-pr34223.c: Add new peeling > parameters. > * gcc/testsuite/gcc.dg/vect/vect.exp: Allow reading flags in individual > tests. Likewise. No need to have 'gcc/testsuite'. These entries should go in testsuite/ChangeLog.google-main. (this looks like it was generated with mklog. It's not a very smart script :) > > > Index: gcc/doc/invoke.texi > =================================================================== > --- gcc/doc/invoke.texi (revision 172904) > +++ gcc/doc/invoke.texi (working copy) > @@ -8523,11 +8523,28 @@ > The maximum number of peelings of a single loop. > > @item max-completely-peeled-insns > +@item max-completely-peeled-insns-feedback > The maximum number of insns of a completely peeled loop. > > +The @option{max-completely-peeled-insns-feedback} is used only when profile > +feedback is available and the loop is hot. Because of the real profiles, this > +value may set to be larger for hot loops. > + > +@item max-once-peeled-insns > +@item max-once-peeled-insns-feedback > +The maximum number of insns of a peeled loop that rolls only once. > +The @option{max-once-peeled-insns-feedback} is used only when profile > feedback > +is available and the loop is hot. Because of the real profiles, this value > +may set to be larger for hot loops. > + > @item max-completely-peel-times > +@item max-completely-peel-times-feedback > The maximum number of iterations of a loop to be suitable for complete > peeling. > > +The @option{max-completely-peel-times-feedback} is used only when profile > feedback > +is available and the loop is hot. Because of the real profiles, this value > may > +set to be larger for hot loops. > + > @item max-completely-peel-loop-nest-depth > The maximum depth of a loop nest suitable for complete peeling. > > Index: gcc/testsuite/gcc.dg/vect/O3-vect-pr34223.c > =================================================================== > --- gcc/testsuite/gcc.dg/vect/O3-vect-pr34223.c (revision 172904) > +++ gcc/testsuite/gcc.dg/vect/O3-vect-pr34223.c (working copy) > @@ -1,4 +1,5 @@ > /* { dg-require-effective-target vect_int } */ > +/* { dg-options "[vect_cflags] --param max-completely-peel-times=16" } */ > > #include "tree-vect.h" > > Index: gcc/testsuite/gcc.dg/vect/vect.exp > =================================================================== > --- gcc/testsuite/gcc.dg/vect/vect.exp (revision 172904) > +++ gcc/testsuite/gcc.dg/vect/vect.exp (working copy) > @@ -24,6 +24,12 @@ > global DEFAULT_VECTCFLAGS > set DEFAULT_VECTCFLAGS "" > > +# So that we can read flags in individual tests. > +proc vect_cflags { } { > + global DEFAULT_VECTCFLAGS > + return $DEFAULT_VECTCFLAGS > +} > + > # If the target system supports vector instructions, the default action > # for a test is 'run', otherwise it's 'compile'. Save current default. > # Executing vector instructions on a system without hardware vector support > Index: gcc/tree-ssa-loop-ivcanon.c > =================================================================== > --- gcc/tree-ssa-loop-ivcanon.c (revision 172904) > +++ gcc/tree-ssa-loop-ivcanon.c (working copy) > @@ -326,6 +326,7 @@ > enum unroll_level ul) > { > unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns; > + unsigned HOST_WIDE_INT max_peeled_insns; > gimple cond; > struct loop_size size; > > @@ -336,9 +337,21 @@ > return false; > n_unroll = tree_low_cst (niter, 1); > > - max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES); > - if (n_unroll > max_unroll) > + if (profile_info && flag_branch_probabilities && > + optimize_loop_for_speed_p (loop)) Align predicates vertically. '&&' should be at the start of the line. Something like this: if (profile_info && flag_branch_probabilities && ... > + max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES_FEEDBACK); > + else > + max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES); > + > + if (n_unroll > max_unroll) { Brace should be on the next line. > + if (dump_file && (dump_flags & TDF_DETAILS)) > + { > + fprintf (dump_file, " Not unrolling loop %d limited by max unroll" > + " (%d > %d)\n", > + loop->num, (int) n_unroll, (int) max_unroll); > + } > return false; > + } > > if (n_unroll) > { > @@ -356,14 +369,20 @@ > (int) unr_insns); > } > > - if (unr_insns > ninsns > - && (unr_insns > - > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))) > + if (profile_info && flag_branch_probabilities && > + optimize_loop_for_speed_p (loop)) Vertical alignment again. > + max_peeled_insns = > + PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS_FEEDBACK); > + else > + max_peeled_insns = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS); > + > + if (unr_insns > max_peeled_insns) > { > if (dump_file && (dump_flags & TDF_DETAILS)) > fprintf (dump_file, "Not unrolling loop %d " > - "(--param max-completely-peeled-insns limit reached).\n", > - loop->num); > + "(--param max-completely-peeled-insns(-feedback) limit. " > + "(%u > %u)).\n", > + loop->num, (unsigned) unr_insns, (unsigned) > max_peeled_insns); > return false; > } > > @@ -371,7 +390,8 @@ > && unr_insns > ninsns) > { > if (dump_file && (dump_flags & TDF_DETAILS)) > - fprintf (dump_file, "Not unrolling loop %d.\n", loop->num); > + fprintf (dump_file, "Not unrolling loop %d (NO_GROWTH %d > > %d).\n", > + loop->num, (int) unr_insns, (int) ninsns); > return false; > } > } > @@ -418,8 +438,9 @@ > update_stmt (cond); > update_ssa (TODO_update_ssa); > > - if (dump_file && (dump_flags & TDF_DETAILS)) > - fprintf (dump_file, "Unrolled loop %d completely.\n", loop->num); > + if (dump_file) > + fprintf (dump_file, "Unrolled loop %d completely by factor %d.\n", > + loop->num, (int) n_unroll); > > return true; > } > Index: gcc/loop-unroll.c > =================================================================== > --- gcc/loop-unroll.c (revision 172904) > +++ gcc/loop-unroll.c (working copy) > @@ -324,15 +324,23 @@ > decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED) > { > struct niter_desc *desc; > + unsigned max_peeled_insns; > > + if (profile_info && flag_branch_probabilities) > + max_peeled_insns = > + (unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS_FEEDBACK); > + else > + max_peeled_insns = (unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS); > + > if (dump_file) > fprintf (dump_file, "\n;; Considering peeling once rolling loop\n"); > > /* Is the loop small enough? */ > - if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns) > + if (max_peeled_insns < loop->ninsns) > { > if (dump_file) > - fprintf (dump_file, ";; Not considering loop, is too big\n"); > + fprintf (dump_file, ";; Not considering loop, is too big (%d > %u)\n", > + loop->ninsns, max_peeled_insns); > return; > } > > @@ -362,7 +370,7 @@ > static void > decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED) > { > - unsigned npeel; > + unsigned npeel, max_insns, max_peel; > struct niter_desc *desc; > > if (dump_file) > @@ -393,16 +401,30 @@ > return; > } > > + if (profile_info && flag_branch_probabilities) > + { > + max_insns = > + (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS_FEEDBACK); > + max_peel = > + (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES_FEEDBACK); > + } > + else > + { > + max_insns = (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS); > + max_peel = (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES); > + } > + > /* npeel = number of iterations to peel. */ > - npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns; > - if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES)) > - npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES); > + npeel = max_insns / loop->ninsns; > + if (npeel > max_peel) > + npeel = max_peel; > > /* Is the loop small enough? */ > if (!npeel) > { > if (dump_file) > - fprintf (dump_file, ";; Not considering loop, is too big\n"); > + fprintf (dump_file, ";; Not considering loop, is too big, > npeel=%u.\n", > + npeel); > return; > } > > @@ -435,7 +457,7 @@ > > /* Success. */ > if (dump_file) > - fprintf (dump_file, ";; Decided to peel loop completely\n"); > + fprintf (dump_file, ";; Decided to peel loop completely npeel %u\n", > npeel); > loop->lpt_decision.decision = LPT_PEEL_COMPLETELY; > } > > Index: gcc/params.def > =================================================================== > --- gcc/params.def (revision 172904) > +++ gcc/params.def (working copy) > @@ -299,16 +299,37 @@ > "max-completely-peeled-insns", > "The maximum number of insns of a completely peeled loop", > 400, 0, 0) > +/* The maximum number of insns of a peeled loop, when feedback > + information is available. */ > +DEFPARAM(PARAM_MAX_COMPLETELY_PEELED_INSNS_FEEDBACK, > + "max-completely-peeled-insns-feedback", > + "The maximum number of insns of a completely peeled loop when profile > " > + "feedback is available", > + 600, 0, 0) > /* The maximum number of peelings of a single loop that is peeled > completely. */ > DEFPARAM(PARAM_MAX_COMPLETELY_PEEL_TIMES, > "max-completely-peel-times", > "The maximum number of peelings of a single loop that is peeled > completely", > - 16, 0, 0) > + 8, 0, 0) > +/* The maximum number of peelings of a single loop that is peeled > + completely, when feedback information is available. */ > +DEFPARAM(PARAM_MAX_COMPLETELY_PEEL_TIMES_FEEDBACK, > + "max-completely-peel-times-feedback", > + "The maximum number of peelings of a single loop that is peeled " > + "completely, when profile feedback is available", > + 16, 0, 0) > /* The maximum number of insns of a peeled loop that rolls only once. */ > DEFPARAM(PARAM_MAX_ONCE_PEELED_INSNS, > "max-once-peeled-insns", > "The maximum number of insns of a peeled loop that rolls only once", > 400, 0, 0) > +/* The maximum number of insns of a peeled loop that rolls only once, > + when feedback information is available. */ > +DEFPARAM(PARAM_MAX_ONCE_PEELED_INSNS_FEEDBACK, > + "max-once-peeled-insns-feedback", > + "The maximum number of insns of a peeled loop that rolls only once, " > + "when profile feedback is available", > + 600, 0, 0) > /* The maximum depth of a loop nest we completely peel. */ > DEFPARAM(PARAM_MAX_UNROLL_ITERATIONS, > "max-completely-peel-loop-nest-depth", > > -- > This patch is available for review at http://codereview.appspot.com/4438079 >