Hi, PR68212 mentioned that the COUNT of unrolled loop was not correct, and comments of this PR also mentioned that loop become 'cold'.
This patch fixes the wrong COUNT/PROB of unrolled loop. And the patch handles the case where unrolling in unreliable count number can cause a loop to no longer look hot and therefor not get aligned. This patch scale by profile_probability::likely () if unrolled count gets unrealistically small. Bootstrap/regtest on powerpc64le with no new regressions. Ok for trunk? Jiufu Guo gcc/ChangeLog: 2020-10-09 Jiufu Guo <guoji...@linux.ibm.com> Pat Haugen <pthau...@us.ibm.com> PR rtl-optimization/68212 * cfgloopmanip.c (duplicate_loop_to_header_edge): Reset probablity of unrolled/peeled loop. testsuite/ChangeLog: 2020-10-09 Jiufu Guo <guoji...@linux.ibm.com> Pat Haugen <pthau...@us.ibm.com> PR rtl-optimization/68212 * gcc.dg/pr68212.c: New test. --- gcc/cfgloopmanip.c | 31 +++++++++++++++++++++++++++++-- gcc/testsuite/gcc.dg/pr68212.c | 13 +++++++++++++ 2 files changed, 42 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/pr68212.c diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c index b0ca82a67fd..d3c95498402 100644 --- a/gcc/cfgloopmanip.c +++ b/gcc/cfgloopmanip.c @@ -1260,14 +1260,30 @@ duplicate_loop_to_header_edge (class loop *loop, edge e, /* If original loop is executed COUNT_IN times, the unrolled loop will account SCALE_MAIN_DEN times. */ scale_main = count_in.probability_in (scale_main_den); + + /* If we are guessing at the number of iterations and count_in + becomes unrealistically small, reset probability. */ + if (!(count_in.reliable_p () || loop->any_estimate)) + { + profile_count new_count_in = count_in.apply_probability (scale_main); + profile_count preheader_count = loop_preheader_edge (loop)->count (); + if (new_count_in.apply_scale (1, 10) < preheader_count) + scale_main = profile_probability::likely (); + } + scale_act = scale_main * prob_pass_main; } else { + profile_count new_loop_count; profile_count preheader_count = e->count (); - for (i = 0; i < ndupl; i++) - scale_main = scale_main * scale_step[i]; scale_act = preheader_count.probability_in (count_in); + /* Compute final preheader count after peeling NDUPL copies. */ + for (i = 0; i < ndupl; i++) + preheader_count = preheader_count.apply_probability (scale_step[i]); + /* Subtract out exit(s) from peeled copies. */ + new_loop_count = count_in - (e->count () - preheader_count); + scale_main = new_loop_count.probability_in (count_in); } } @@ -1383,6 +1399,17 @@ duplicate_loop_to_header_edge (class loop *loop, edge e, scale_bbs_frequencies (new_bbs, n, scale_act); scale_act = scale_act * scale_step[j]; } + + /* Need to update PROB of exit edge and corresponding COUNT. */ + if (orig && is_latch && (!bitmap_bit_p (wont_exit, j + 1)) + && bbs_to_scale) + { + edge new_exit = new_spec_edges[SE_ORIG]; + profile_count new_count = new_exit->src->count; + profile_count exit_count = loop_preheader_edge (loop)->count (); + profile_probability prob = exit_count.probability_in (new_count); + recompute_loop_frequencies (loop, prob); + } } free (new_bbs); free (orig_loops); diff --git a/gcc/testsuite/gcc.dg/pr68212.c b/gcc/testsuite/gcc.dg/pr68212.c new file mode 100644 index 00000000000..e0cf71d5202 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr68212.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-tree-vectorize -funroll-loops --param max-unroll-times=4 -fdump-rtl-alignments" } */ + +void foo(long int *a, long int *b, long int n) +{ + long int i; + + for (i = 0; i < n; i++) + a[i] = *b; +} + +/* { dg-final { scan-rtl-dump-times "internal loop alignment added" 1 "alignments"} } */ + -- 2.25.1