Re: [PATCH V2] correct COUNT and PROB for unrolled loop
Jiufu Guo writes: Hi, I'd like to ping this patch for trunk on stage 1. This patch could fix the issue on incorrect COUNT/FREQUENCES of loop unrolled blocks, and also could help the improve the cold/hot issue of the unrolled loops. patch is also at https://gcc.gnu.org/legacy-ml/gcc-patches/2020-02/msg00927.html Thanks, Jiufu > Jiufu Guo writes: > > Hi! > > I'd like to ping following patch. As near end of gcc10 stage 4, it seems > I would ask approval for GCC11 trunk. > > Thanks, > Jiufu Guo > >> Hi Honza and all, >> >> I updated the patch a little as below. Bootstrap and regtest are ok >> on powerpc64le. >> >> Is OK for trunk? >> >> Thanks for comments. >> Jiufu >> >> diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c >> index 727e951..ded0046 100644 >> --- a/gcc/cfgloopmanip.c >> +++ b/gcc/cfgloopmanip.c >> @@ -31,6 +31,7 @@ along with GCC; see the file COPYING3. If not see >> #include "gimplify-me.h" >> #include "tree-ssa-loop-manip.h" >> #include "dumpfile.h" >> +#include "cfgrtl.h" >> >> static void copy_loops_to (class loop **, int, >> class loop *); >> @@ -1258,14 +1259,30 @@ duplicate_loop_to_header_edge (class loop *loop, >> edge e, >>/* If original loop is executed COUNT_IN times, the unrolled >> loop will account SCALE_MAIN_DEN times. */ >>scale_main = count_in.probability_in (scale_main_den); >> + >> + /* If we are guessing at the number of iterations and count_in >> + becomes unrealistically small, reset probability. */ >> + if (!(count_in.reliable_p () || loop->any_estimate)) >> +{ >> + profile_count new_count_in = count_in.apply_probability >> (scale_main); >> + profile_count preheader_count = loop_preheader_edge (loop)->count >> (); >> + if (new_count_in.apply_scale (1, 10) < preheader_count) >> +scale_main = profile_probability::likely (); >> +} >> + >>scale_act = scale_main * prob_pass_main; >> } >>else >> { >> + profile_count new_loop_count; >>profile_count preheader_count = e->count (); >> - for (i = 0; i < ndupl; i++) >> -scale_main = scale_main * scale_step[i]; >>scale_act = preheader_count.probability_in (count_in); >> + /* Compute final preheader count after peeling NDUPL copies. */ >> + for (i = 0; i < ndupl; i++) >> +preheader_count = preheader_count.apply_probability (scale_step[i]); >> + /* Subtract out exit(s) from peeled copies. */ >> + new_loop_count = count_in - (e->count () - preheader_count); >> + scale_main = new_loop_count.probability_in (count_in); >> } >> } >> >> @@ -1381,6 +1398,38 @@ duplicate_loop_to_header_edge (class loop *loop, edge >> e, >>scale_bbs_frequencies (new_bbs, n, scale_act); >>scale_act = scale_act * scale_step[j]; >> } >> + >> + /* Need to update PROB of exit edge and corresponding COUNT. */ >> + if (orig && is_latch && (!bitmap_bit_p (wont_exit, j + 1)) >> + && bbs_to_scale) >> +{ >> + edge new_exit = new_spec_edges[SE_ORIG]; >> + profile_count new_count_in = new_exit->src->count; >> + profile_count preheader_count = loop_preheader_edge (loop)->count (); >> + edge e; >> + edge_iterator ei; >> + >> + FOR_EACH_EDGE (e, ei, new_exit->src->succs) >> +if (e != new_exit) >> + break; >> + >> + gcc_assert (e && e != new_exit); >> + >> + new_exit->probability = preheader_count.probability_in (new_count_in); >> + e->probability = new_exit->probability.invert (); >> + >> + profile_count new_latch_count >> += new_exit->src->count.apply_probability (e->probability); >> + profile_count old_latch_count = e->dest->count; >> + >> + EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi) >> +scale_bbs_frequencies_profile_count (new_bbs + i, 1, >> + new_latch_count, >> + old_latch_count); >> + >> + if (current_ir_type () != IR_GIMPLE) >> +update_br_prob_note (e->src); >> +} >> } >>free (new_bbs); >>free (orig_loops); >> diff --git a/gcc/testsuite/gcc.dg/pr68212.c b/gcc/testsuite/gcc.dg/pr68212.c >> new file mode 100644 >> index 000..f3b7c22 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/pr68212.c >> @@ -0,0 +1,13 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-O2 -fno-tree-vectorize -funroll-loops --param >> max-unroll-times=4 -fdump-rtl-alignments" } */ >> + >> +void foo(long int *a, long int *b, long int n) >> +{ >> + long int i; >> + >> + for (i = 0; i < n; i++) >> +a[i] = *b; >> +} >> + >> +/* { dg-final { scan-rtl-dump-times "internal loop alignment added" 1 >> "alignments"} } */ >> +
Re: [PATCH V2] correct COUNT and PROB for unrolled loop
Jiufu Guo writes: Hi! I'd like to ping following patch. As near end of gcc10 stage 4, it seems I would ask approval for GCC11 trunk. Thanks, Jiufu Guo > Hi Honza and all, > > I updated the patch a little as below. Bootstrap and regtest are ok > on powerpc64le. > > Is OK for trunk? > > Thanks for comments. > Jiufu > > diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c > index 727e951..ded0046 100644 > --- a/gcc/cfgloopmanip.c > +++ b/gcc/cfgloopmanip.c > @@ -31,6 +31,7 @@ along with GCC; see the file COPYING3. If not see > #include "gimplify-me.h" > #include "tree-ssa-loop-manip.h" > #include "dumpfile.h" > +#include "cfgrtl.h" > > static void copy_loops_to (class loop **, int, > class loop *); > @@ -1258,14 +1259,30 @@ duplicate_loop_to_header_edge (class loop *loop, edge > e, > /* If original loop is executed COUNT_IN times, the unrolled >loop will account SCALE_MAIN_DEN times. */ > scale_main = count_in.probability_in (scale_main_den); > + > + /* If we are guessing at the number of iterations and count_in > + becomes unrealistically small, reset probability. */ > + if (!(count_in.reliable_p () || loop->any_estimate)) > + { > + profile_count new_count_in = count_in.apply_probability > (scale_main); > + profile_count preheader_count = loop_preheader_edge (loop)->count > (); > + if (new_count_in.apply_scale (1, 10) < preheader_count) > + scale_main = profile_probability::likely (); > + } > + > scale_act = scale_main * prob_pass_main; > } >else > { > + profile_count new_loop_count; > profile_count preheader_count = e->count (); > - for (i = 0; i < ndupl; i++) > - scale_main = scale_main * scale_step[i]; > scale_act = preheader_count.probability_in (count_in); > + /* Compute final preheader count after peeling NDUPL copies. */ > + for (i = 0; i < ndupl; i++) > + preheader_count = preheader_count.apply_probability (scale_step[i]); > + /* Subtract out exit(s) from peeled copies. */ > + new_loop_count = count_in - (e->count () - preheader_count); > + scale_main = new_loop_count.probability_in (count_in); > } > } > > @@ -1381,6 +1398,38 @@ duplicate_loop_to_header_edge (class loop *loop, edge > e, > scale_bbs_frequencies (new_bbs, n, scale_act); > scale_act = scale_act * scale_step[j]; > } > + > + /* Need to update PROB of exit edge and corresponding COUNT. */ > + if (orig && is_latch && (!bitmap_bit_p (wont_exit, j + 1)) > + && bbs_to_scale) > + { > + edge new_exit = new_spec_edges[SE_ORIG]; > + profile_count new_count_in = new_exit->src->count; > + profile_count preheader_count = loop_preheader_edge (loop)->count (); > + edge e; > + edge_iterator ei; > + > + FOR_EACH_EDGE (e, ei, new_exit->src->succs) > + if (e != new_exit) > + break; > + > + gcc_assert (e && e != new_exit); > + > + new_exit->probability = preheader_count.probability_in (new_count_in); > + e->probability = new_exit->probability.invert (); > + > + profile_count new_latch_count > + = new_exit->src->count.apply_probability (e->probability); > + profile_count old_latch_count = e->dest->count; > + > + EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi) > + scale_bbs_frequencies_profile_count (new_bbs + i, 1, > + new_latch_count, > + old_latch_count); > + > + if (current_ir_type () != IR_GIMPLE) > + update_br_prob_note (e->src); > + } > } >free (new_bbs); >free (orig_loops); > diff --git a/gcc/testsuite/gcc.dg/pr68212.c b/gcc/testsuite/gcc.dg/pr68212.c > new file mode 100644 > index 000..f3b7c22 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/pr68212.c > @@ -0,0 +1,13 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fno-tree-vectorize -funroll-loops --param > max-unroll-times=4 -fdump-rtl-alignments" } */ > + > +void foo(long int *a, long int *b, long int n) > +{ > + long int i; > + > + for (i = 0; i < n; i++) > +a[i] = *b; > +} > + > +/* { dg-final { scan-rtl-dump-times "internal loop alignment added" 1 > "alignments"} } */ > +
Re: [PATCH V2] correct COUNT and PROB for unrolled loop
Jiufu Guo writes: Hi! I'd like to ping following patch. just in case it may make sense to include in GCC 10. Thanks! https://gcc.gnu.org/ml/gcc-patches/2020-02/msg00927.html Jiufu > Hi Honza and all, > > I updated the patch a little as below. Bootstrap and regtest are ok > on powerpc64le. > > Is OK for trunk? > > Thanks for comments. > Jiufu > > diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c > index 727e951..ded0046 100644 > --- a/gcc/cfgloopmanip.c > +++ b/gcc/cfgloopmanip.c > @@ -31,6 +31,7 @@ along with GCC; see the file COPYING3. If not see > #include "gimplify-me.h" > #include "tree-ssa-loop-manip.h" > #include "dumpfile.h" > +#include "cfgrtl.h" > > static void copy_loops_to (class loop **, int, > class loop *); > @@ -1258,14 +1259,30 @@ duplicate_loop_to_header_edge (class loop *loop, edge > e, > /* If original loop is executed COUNT_IN times, the unrolled >loop will account SCALE_MAIN_DEN times. */ > scale_main = count_in.probability_in (scale_main_den); > + > + /* If we are guessing at the number of iterations and count_in > + becomes unrealistically small, reset probability. */ > + if (!(count_in.reliable_p () || loop->any_estimate)) > + { > + profile_count new_count_in = count_in.apply_probability > (scale_main); > + profile_count preheader_count = loop_preheader_edge (loop)->count > (); > + if (new_count_in.apply_scale (1, 10) < preheader_count) > + scale_main = profile_probability::likely (); > + } > + > scale_act = scale_main * prob_pass_main; > } >else > { > + profile_count new_loop_count; > profile_count preheader_count = e->count (); > - for (i = 0; i < ndupl; i++) > - scale_main = scale_main * scale_step[i]; > scale_act = preheader_count.probability_in (count_in); > + /* Compute final preheader count after peeling NDUPL copies. */ > + for (i = 0; i < ndupl; i++) > + preheader_count = preheader_count.apply_probability (scale_step[i]); > + /* Subtract out exit(s) from peeled copies. */ > + new_loop_count = count_in - (e->count () - preheader_count); > + scale_main = new_loop_count.probability_in (count_in); > } > } > > @@ -1381,6 +1398,38 @@ duplicate_loop_to_header_edge (class loop *loop, edge > e, > scale_bbs_frequencies (new_bbs, n, scale_act); > scale_act = scale_act * scale_step[j]; > } > + > + /* Need to update PROB of exit edge and corresponding COUNT. */ > + if (orig && is_latch && (!bitmap_bit_p (wont_exit, j + 1)) > + && bbs_to_scale) > + { > + edge new_exit = new_spec_edges[SE_ORIG]; > + profile_count new_count_in = new_exit->src->count; > + profile_count preheader_count = loop_preheader_edge (loop)->count (); > + edge e; > + edge_iterator ei; > + > + FOR_EACH_EDGE (e, ei, new_exit->src->succs) > + if (e != new_exit) > + break; > + > + gcc_assert (e && e != new_exit); > + > + new_exit->probability = preheader_count.probability_in (new_count_in); > + e->probability = new_exit->probability.invert (); > + > + profile_count new_latch_count > + = new_exit->src->count.apply_probability (e->probability); > + profile_count old_latch_count = e->dest->count; > + > + EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi) > + scale_bbs_frequencies_profile_count (new_bbs + i, 1, > + new_latch_count, > + old_latch_count); > + > + if (current_ir_type () != IR_GIMPLE) > + update_br_prob_note (e->src); > + } > } >free (new_bbs); >free (orig_loops); > diff --git a/gcc/testsuite/gcc.dg/pr68212.c b/gcc/testsuite/gcc.dg/pr68212.c > new file mode 100644 > index 000..f3b7c22 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/pr68212.c > @@ -0,0 +1,13 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fno-tree-vectorize -funroll-loops --param > max-unroll-times=4 -fdump-rtl-alignments" } */ > + > +void foo(long int *a, long int *b, long int n) > +{ > + long int i; > + > + for (i = 0; i < n; i++) > +a[i] = *b; > +} > + > +/* { dg-final { scan-rtl-dump-times "internal loop alignment added" 1 > "alignments"} } */ > +
Re: [PATCH V2] correct COUNT and PROB for unrolled loop
Hi Honza and all, I updated the patch a little as below. Bootstrap and regtest are ok on powerpc64le. Is OK for trunk? Thanks for comments. Jiufu diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c index 727e951..ded0046 100644 --- a/gcc/cfgloopmanip.c +++ b/gcc/cfgloopmanip.c @@ -31,6 +31,7 @@ along with GCC; see the file COPYING3. If not see #include "gimplify-me.h" #include "tree-ssa-loop-manip.h" #include "dumpfile.h" +#include "cfgrtl.h" static void copy_loops_to (class loop **, int, class loop *); @@ -1258,14 +1259,30 @@ duplicate_loop_to_header_edge (class loop *loop, edge e, /* If original loop is executed COUNT_IN times, the unrolled loop will account SCALE_MAIN_DEN times. */ scale_main = count_in.probability_in (scale_main_den); + + /* If we are guessing at the number of iterations and count_in +becomes unrealistically small, reset probability. */ + if (!(count_in.reliable_p () || loop->any_estimate)) + { + profile_count new_count_in = count_in.apply_probability (scale_main); + profile_count preheader_count = loop_preheader_edge (loop)->count (); + if (new_count_in.apply_scale (1, 10) < preheader_count) + scale_main = profile_probability::likely (); + } + scale_act = scale_main * prob_pass_main; } else { + profile_count new_loop_count; profile_count preheader_count = e->count (); - for (i = 0; i < ndupl; i++) - scale_main = scale_main * scale_step[i]; scale_act = preheader_count.probability_in (count_in); + /* Compute final preheader count after peeling NDUPL copies. */ + for (i = 0; i < ndupl; i++) + preheader_count = preheader_count.apply_probability (scale_step[i]); + /* Subtract out exit(s) from peeled copies. */ + new_loop_count = count_in - (e->count () - preheader_count); + scale_main = new_loop_count.probability_in (count_in); } } @@ -1381,6 +1398,38 @@ duplicate_loop_to_header_edge (class loop *loop, edge e, scale_bbs_frequencies (new_bbs, n, scale_act); scale_act = scale_act * scale_step[j]; } + + /* Need to update PROB of exit edge and corresponding COUNT. */ + if (orig && is_latch && (!bitmap_bit_p (wont_exit, j + 1)) + && bbs_to_scale) + { + edge new_exit = new_spec_edges[SE_ORIG]; + profile_count new_count_in = new_exit->src->count; + profile_count preheader_count = loop_preheader_edge (loop)->count (); + edge e; + edge_iterator ei; + + FOR_EACH_EDGE (e, ei, new_exit->src->succs) + if (e != new_exit) + break; + + gcc_assert (e && e != new_exit); + + new_exit->probability = preheader_count.probability_in (new_count_in); + e->probability = new_exit->probability.invert (); + + profile_count new_latch_count + = new_exit->src->count.apply_probability (e->probability); + profile_count old_latch_count = e->dest->count; + + EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi) + scale_bbs_frequencies_profile_count (new_bbs + i, 1, +new_latch_count, +old_latch_count); + + if (current_ir_type () != IR_GIMPLE) + update_br_prob_note (e->src); + } } free (new_bbs); free (orig_loops); diff --git a/gcc/testsuite/gcc.dg/pr68212.c b/gcc/testsuite/gcc.dg/pr68212.c new file mode 100644 index 000..f3b7c22 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr68212.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-tree-vectorize -funroll-loops --param max-unroll-times=4 -fdump-rtl-alignments" } */ + +void foo(long int *a, long int *b, long int n) +{ + long int i; + + for (i = 0; i < n; i++) +a[i] = *b; +} + +/* { dg-final { scan-rtl-dump-times "internal loop alignment added" 1 "alignments"} } */ + -- 2.7.4