Hi Martin,

Thanks so much for your time and kindly help!!!

Wish Richi, Bin or Honza have time to review this patch. ;-)

--Here is a summmary---

PR68212 mentioned that the COUNT of unrolled loop was not correct, and
comments of this PR also mentioned that loop become 'cold'.

The following patch fixes the wrong COUNT/PROB of unrolled loop.  And
the patch resets the COUNT the case where unrolling in unreliable count
number can cause a loop to no longer look hot and therefor not get aligned.

Belows messages are referenced.
(https://gcc.gnu.org/ml/gcc-patches/2018-11/msg02368.html) and comment
(https://gcc.gnu.org/ml/gcc-patches/2018-11/msg02380.html,
https://gcc.gnu.org/legacy-ml/gcc-patches/2020-02/msg00044.html),

Bootstrap and regtest are ok on powerpc64le.  Is this ok for trunk?

ChangeLog:
2020-07-10  Jiufu Guo   <guoji...@cn.ibm.com>
            Pat Haugen  <pthau...@us.ibm.com>

        PR rtl-optimization/68212
        * cfgloopmanip.c (duplicate_loop_to_header_edge): Correct COUNT/PROB
        for unrolled/peeled blocks.

testsuite/ChangeLog:
2020-07-10  Jiufu Guo   <guoji...@cn.ibm.com>
            Pat Haugen  <pthau...@us.ibm.com>
        PR rtl-optimization/68212
        * gcc.dg/pr68212.c: New test.

diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c
index 727e951..ded0046 100644
--- a/gcc/cfgloopmanip.c
+++ b/gcc/cfgloopmanip.c
@@ -31,6 +31,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimplify-me.h"
 #include "tree-ssa-loop-manip.h"
 #include "dumpfile.h"
+#include "cfgrtl.h"
 
 static void copy_loops_to (class loop **, int,
                           class loop *);
@@ -1258,14 +1259,30 @@ duplicate_loop_to_header_edge (class loop *loop, edge e,
          /* If original loop is executed COUNT_IN times, the unrolled
             loop will account SCALE_MAIN_DEN times.  */
          scale_main = count_in.probability_in (scale_main_den);
+
+         /* If we are guessing at the number of iterations and count_in
+            becomes unrealistically small, reset probability.  */
+         if (!(count_in.reliable_p () || loop->any_estimate))
+           {
+             profile_count new_count_in = count_in.apply_probability 
(scale_main);
+             profile_count preheader_count = loop_preheader_edge (loop)->count 
();
+             if (new_count_in.apply_scale (1, 10) < preheader_count)
+               scale_main = profile_probability::likely ();
+           }
+
          scale_act = scale_main * prob_pass_main;
        }
       else
        {
+         profile_count new_loop_count;
          profile_count preheader_count = e->count ();
-         for (i = 0; i < ndupl; i++)
-           scale_main = scale_main * scale_step[i];
          scale_act = preheader_count.probability_in (count_in);
+         /* Compute final preheader count after peeling NDUPL copies.  */
+         for (i = 0; i < ndupl; i++)
+           preheader_count = preheader_count.apply_probability (scale_step[i]);
+         /* Subtract out exit(s) from peeled copies.  */
+         new_loop_count = count_in - (e->count () - preheader_count);
+         scale_main = new_loop_count.probability_in (count_in);
        }
     }
 
@@ -1381,6 +1398,38 @@ duplicate_loop_to_header_edge (class loop *loop, edge e,
          scale_bbs_frequencies (new_bbs, n, scale_act);
          scale_act = scale_act * scale_step[j];
        }
+
+      /* Need to update PROB of exit edge and corresponding COUNT.  */
+      if (orig && is_latch && (!bitmap_bit_p (wont_exit, j + 1))
+         && bbs_to_scale)
+       {
+         edge new_exit = new_spec_edges[SE_ORIG];
+         profile_count new_count_in = new_exit->src->count;
+         profile_count preheader_count = loop_preheader_edge (loop)->count ();
+         edge e;
+         edge_iterator ei;
+
+         FOR_EACH_EDGE (e, ei, new_exit->src->succs)
+           if (e != new_exit)
+             break;
+
+         gcc_assert (e && e != new_exit);
+
+         new_exit->probability = preheader_count.probability_in (new_count_in);
+         e->probability = new_exit->probability.invert ();
+
+         profile_count new_latch_count
+           = new_exit->src->count.apply_probability (e->probability);
+         profile_count old_latch_count = e->dest->count;
+
+         EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
+           scale_bbs_frequencies_profile_count (new_bbs + i, 1,
+                                                new_latch_count,
+                                                old_latch_count);
+
+         if (current_ir_type () != IR_GIMPLE)
+           update_br_prob_note (e->src);
+       }
     }
   free (new_bbs);
   free (orig_loops);
diff --git a/gcc/testsuite/gcc.dg/pr68212.c b/gcc/testsuite/gcc.dg/pr68212.c
new file mode 100644
index 0000000..f3b7c22
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr68212.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-tree-vectorize -funroll-loops --param 
max-unroll-times=4 -fdump-rtl-alignments" } */
+
+void foo(long int *a, long int *b, long int n)
+{
+  long int i;
+
+  for (i = 0; i < n; i++)
+    a[i] = *b;
+}
+
+/* { dg-final { scan-rtl-dump-times "internal loop alignment added" 1 
"alignments"} } */
+
-- 
2.7.4

Thanks!

Jiufu Guo.


Martin Liška <mli...@suse.cz> writes:

> On 7/2/20 4:35 AM, Jiufu Guo via Gcc-patches wrote:
>> I would like to reping this patch.
>> Since this is correcting COUNT and PROB for hot blocks, it helps some
>> cases.
>>
>> https://gcc.gnu.org/legacy-ml/gcc-patches/2020-02/msg00927.html
>
> Hey.
>
> I've just briefly looked at the patch and I don't feel the right person
> to make a review for it.
>
> I believe Richi, Bin or Honza can help us here?
> Martin

Reply via email to