Re: [PATCH V2] correct COUNT and PROB for unrolled loop

2020-05-18 Thread Jiufu Guo via Gcc-patches
Jiufu Guo  writes:

Hi,

I'd like to ping this patch for trunk on stage 1.

This patch could fix the issue on incorrect COUNT/FREQUENCES of loop
unrolled blocks, and also could help the improve the cold/hot issue of
the unrolled loops.

patch is also at
https://gcc.gnu.org/legacy-ml/gcc-patches/2020-02/msg00927.html

Thanks,
Jiufu

> Jiufu Guo  writes:
>
> Hi!
>
> I'd like to ping following patch. As near end of gcc10 stage 4, it seems
> I would ask approval for GCC11 trunk.
>
> Thanks,
> Jiufu Guo
>
>> Hi Honza and all,
>>
>> I updated the patch a little as below. Bootstrap and regtest are ok
>> on powerpc64le.
>>
>> Is OK for trunk?
>>
>> Thanks for comments.
>> Jiufu
>>
>> diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c
>> index 727e951..ded0046 100644
>> --- a/gcc/cfgloopmanip.c
>> +++ b/gcc/cfgloopmanip.c
>> @@ -31,6 +31,7 @@ along with GCC; see the file COPYING3.  If not see
>>  #include "gimplify-me.h"
>>  #include "tree-ssa-loop-manip.h"
>>  #include "dumpfile.h"
>> +#include "cfgrtl.h"
>>  
>>  static void copy_loops_to (class loop **, int,
>> class loop *);
>> @@ -1258,14 +1259,30 @@ duplicate_loop_to_header_edge (class loop *loop, 
>> edge e,
>>/* If original loop is executed COUNT_IN times, the unrolled
>>   loop will account SCALE_MAIN_DEN times.  */
>>scale_main = count_in.probability_in (scale_main_den);
>> +
>> +  /* If we are guessing at the number of iterations and count_in
>> + becomes unrealistically small, reset probability.  */
>> +  if (!(count_in.reliable_p () || loop->any_estimate))
>> +{
>> +  profile_count new_count_in = count_in.apply_probability 
>> (scale_main);
>> +  profile_count preheader_count = loop_preheader_edge (loop)->count 
>> ();
>> +  if (new_count_in.apply_scale (1, 10) < preheader_count)
>> +scale_main = profile_probability::likely ();
>> +}
>> +
>>scale_act = scale_main * prob_pass_main;
>>  }
>>else
>>  {
>> +  profile_count new_loop_count;
>>profile_count preheader_count = e->count ();
>> -  for (i = 0; i < ndupl; i++)
>> -scale_main = scale_main * scale_step[i];
>>scale_act = preheader_count.probability_in (count_in);
>> +  /* Compute final preheader count after peeling NDUPL copies.  */
>> +  for (i = 0; i < ndupl; i++)
>> +preheader_count = preheader_count.apply_probability (scale_step[i]);
>> +  /* Subtract out exit(s) from peeled copies.  */
>> +  new_loop_count = count_in - (e->count () - preheader_count);
>> +  scale_main = new_loop_count.probability_in (count_in);
>>  }
>>  }
>>  
>> @@ -1381,6 +1398,38 @@ duplicate_loop_to_header_edge (class loop *loop, edge 
>> e,
>>scale_bbs_frequencies (new_bbs, n, scale_act);
>>scale_act = scale_act * scale_step[j];
>>  }
>> +
>> +  /* Need to update PROB of exit edge and corresponding COUNT.  */
>> +  if (orig && is_latch && (!bitmap_bit_p (wont_exit, j + 1))
>> +  && bbs_to_scale)
>> +{
>> +  edge new_exit = new_spec_edges[SE_ORIG];
>> +  profile_count new_count_in = new_exit->src->count;
>> +  profile_count preheader_count = loop_preheader_edge (loop)->count ();
>> +  edge e;
>> +  edge_iterator ei;
>> +
>> +  FOR_EACH_EDGE (e, ei, new_exit->src->succs)
>> +if (e != new_exit)
>> +  break;
>> +
>> +  gcc_assert (e && e != new_exit);
>> +
>> +  new_exit->probability = preheader_count.probability_in (new_count_in);
>> +  e->probability = new_exit->probability.invert ();
>> +
>> +  profile_count new_latch_count
>> += new_exit->src->count.apply_probability (e->probability);
>> +  profile_count old_latch_count = e->dest->count;
>> +
>> +  EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
>> +scale_bbs_frequencies_profile_count (new_bbs + i, 1,
>> + new_latch_count,
>> + old_latch_count);
>> +
>> +  if (current_ir_type () != IR_GIMPLE)
>> +update_br_prob_note (e->src);
>> +}
>>  }
>>free (new_bbs);
>>free (orig_loops);
>> diff --git a/gcc/testsuite/gcc.dg/pr68212.c b/gcc/testsuite/gcc.dg/pr68212.c
>> new file mode 100644
>> index 000..f3b7c22
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/pr68212.c
>> @@ -0,0 +1,13 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -fno-tree-vectorize -funroll-loops --param 
>> max-unroll-times=4 -fdump-rtl-alignments" } */
>> +
>> +void foo(long int *a, long int *b, long int n)
>> +{
>> +  long int i;
>> +
>> +  for (i = 0; i < n; i++)
>> +a[i] = *b;
>> +}
>> +
>> +/* { dg-final { scan-rtl-dump-times "internal loop alignment added" 1 
>> "alignments"} } */
>> +


Re: [PATCH V2] correct COUNT and PROB for unrolled loop

2020-03-18 Thread Jiufu Guo via Gcc-patches
Jiufu Guo  writes:

Hi!

I'd like to ping following patch. As near end of gcc10 stage 4, it seems
I would ask approval for GCC11 trunk.

Thanks,
Jiufu Guo

> Hi Honza and all,
>
> I updated the patch a little as below. Bootstrap and regtest are ok
> on powerpc64le.
>
> Is OK for trunk?
>
> Thanks for comments.
> Jiufu
>
> diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c
> index 727e951..ded0046 100644
> --- a/gcc/cfgloopmanip.c
> +++ b/gcc/cfgloopmanip.c
> @@ -31,6 +31,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "gimplify-me.h"
>  #include "tree-ssa-loop-manip.h"
>  #include "dumpfile.h"
> +#include "cfgrtl.h"
>  
>  static void copy_loops_to (class loop **, int,
>  class loop *);
> @@ -1258,14 +1259,30 @@ duplicate_loop_to_header_edge (class loop *loop, edge 
> e,
> /* If original loop is executed COUNT_IN times, the unrolled
>loop will account SCALE_MAIN_DEN times.  */
> scale_main = count_in.probability_in (scale_main_den);
> +
> +   /* If we are guessing at the number of iterations and count_in
> +  becomes unrealistically small, reset probability.  */
> +   if (!(count_in.reliable_p () || loop->any_estimate))
> + {
> +   profile_count new_count_in = count_in.apply_probability 
> (scale_main);
> +   profile_count preheader_count = loop_preheader_edge (loop)->count 
> ();
> +   if (new_count_in.apply_scale (1, 10) < preheader_count)
> + scale_main = profile_probability::likely ();
> + }
> +
> scale_act = scale_main * prob_pass_main;
>   }
>else
>   {
> +   profile_count new_loop_count;
> profile_count preheader_count = e->count ();
> -   for (i = 0; i < ndupl; i++)
> - scale_main = scale_main * scale_step[i];
> scale_act = preheader_count.probability_in (count_in);
> +   /* Compute final preheader count after peeling NDUPL copies.  */
> +   for (i = 0; i < ndupl; i++)
> + preheader_count = preheader_count.apply_probability (scale_step[i]);
> +   /* Subtract out exit(s) from peeled copies.  */
> +   new_loop_count = count_in - (e->count () - preheader_count);
> +   scale_main = new_loop_count.probability_in (count_in);
>   }
>  }
>  
> @@ -1381,6 +1398,38 @@ duplicate_loop_to_header_edge (class loop *loop, edge 
> e,
> scale_bbs_frequencies (new_bbs, n, scale_act);
> scale_act = scale_act * scale_step[j];
>   }
> +
> +  /* Need to update PROB of exit edge and corresponding COUNT.  */
> +  if (orig && is_latch && (!bitmap_bit_p (wont_exit, j + 1))
> +   && bbs_to_scale)
> + {
> +   edge new_exit = new_spec_edges[SE_ORIG];
> +   profile_count new_count_in = new_exit->src->count;
> +   profile_count preheader_count = loop_preheader_edge (loop)->count ();
> +   edge e;
> +   edge_iterator ei;
> +
> +   FOR_EACH_EDGE (e, ei, new_exit->src->succs)
> + if (e != new_exit)
> +   break;
> +
> +   gcc_assert (e && e != new_exit);
> +
> +   new_exit->probability = preheader_count.probability_in (new_count_in);
> +   e->probability = new_exit->probability.invert ();
> +
> +   profile_count new_latch_count
> + = new_exit->src->count.apply_probability (e->probability);
> +   profile_count old_latch_count = e->dest->count;
> +
> +   EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
> + scale_bbs_frequencies_profile_count (new_bbs + i, 1,
> +  new_latch_count,
> +  old_latch_count);
> +
> +   if (current_ir_type () != IR_GIMPLE)
> + update_br_prob_note (e->src);
> + }
>  }
>free (new_bbs);
>free (orig_loops);
> diff --git a/gcc/testsuite/gcc.dg/pr68212.c b/gcc/testsuite/gcc.dg/pr68212.c
> new file mode 100644
> index 000..f3b7c22
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr68212.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-tree-vectorize -funroll-loops --param 
> max-unroll-times=4 -fdump-rtl-alignments" } */
> +
> +void foo(long int *a, long int *b, long int n)
> +{
> +  long int i;
> +
> +  for (i = 0; i < n; i++)
> +a[i] = *b;
> +}
> +
> +/* { dg-final { scan-rtl-dump-times "internal loop alignment added" 1 
> "alignments"} } */
> +


Re: [PATCH V2] correct COUNT and PROB for unrolled loop

2020-02-27 Thread Jiufu Guo
Jiufu Guo  writes:

Hi!

I'd like to ping following patch. just in case it may make sense to
include in GCC 10. Thanks!

https://gcc.gnu.org/ml/gcc-patches/2020-02/msg00927.html

Jiufu

> Hi Honza and all,
>
> I updated the patch a little as below. Bootstrap and regtest are ok
> on powerpc64le.
>
> Is OK for trunk?
>
> Thanks for comments.
> Jiufu
>
> diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c
> index 727e951..ded0046 100644
> --- a/gcc/cfgloopmanip.c
> +++ b/gcc/cfgloopmanip.c
> @@ -31,6 +31,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "gimplify-me.h"
>  #include "tree-ssa-loop-manip.h"
>  #include "dumpfile.h"
> +#include "cfgrtl.h"
>  
>  static void copy_loops_to (class loop **, int,
>  class loop *);
> @@ -1258,14 +1259,30 @@ duplicate_loop_to_header_edge (class loop *loop, edge 
> e,
> /* If original loop is executed COUNT_IN times, the unrolled
>loop will account SCALE_MAIN_DEN times.  */
> scale_main = count_in.probability_in (scale_main_den);
> +
> +   /* If we are guessing at the number of iterations and count_in
> +  becomes unrealistically small, reset probability.  */
> +   if (!(count_in.reliable_p () || loop->any_estimate))
> + {
> +   profile_count new_count_in = count_in.apply_probability 
> (scale_main);
> +   profile_count preheader_count = loop_preheader_edge (loop)->count 
> ();
> +   if (new_count_in.apply_scale (1, 10) < preheader_count)
> + scale_main = profile_probability::likely ();
> + }
> +
> scale_act = scale_main * prob_pass_main;
>   }
>else
>   {
> +   profile_count new_loop_count;
> profile_count preheader_count = e->count ();
> -   for (i = 0; i < ndupl; i++)
> - scale_main = scale_main * scale_step[i];
> scale_act = preheader_count.probability_in (count_in);
> +   /* Compute final preheader count after peeling NDUPL copies.  */
> +   for (i = 0; i < ndupl; i++)
> + preheader_count = preheader_count.apply_probability (scale_step[i]);
> +   /* Subtract out exit(s) from peeled copies.  */
> +   new_loop_count = count_in - (e->count () - preheader_count);
> +   scale_main = new_loop_count.probability_in (count_in);
>   }
>  }
>  
> @@ -1381,6 +1398,38 @@ duplicate_loop_to_header_edge (class loop *loop, edge 
> e,
> scale_bbs_frequencies (new_bbs, n, scale_act);
> scale_act = scale_act * scale_step[j];
>   }
> +
> +  /* Need to update PROB of exit edge and corresponding COUNT.  */
> +  if (orig && is_latch && (!bitmap_bit_p (wont_exit, j + 1))
> +   && bbs_to_scale)
> + {
> +   edge new_exit = new_spec_edges[SE_ORIG];
> +   profile_count new_count_in = new_exit->src->count;
> +   profile_count preheader_count = loop_preheader_edge (loop)->count ();
> +   edge e;
> +   edge_iterator ei;
> +
> +   FOR_EACH_EDGE (e, ei, new_exit->src->succs)
> + if (e != new_exit)
> +   break;
> +
> +   gcc_assert (e && e != new_exit);
> +
> +   new_exit->probability = preheader_count.probability_in (new_count_in);
> +   e->probability = new_exit->probability.invert ();
> +
> +   profile_count new_latch_count
> + = new_exit->src->count.apply_probability (e->probability);
> +   profile_count old_latch_count = e->dest->count;
> +
> +   EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
> + scale_bbs_frequencies_profile_count (new_bbs + i, 1,
> +  new_latch_count,
> +  old_latch_count);
> +
> +   if (current_ir_type () != IR_GIMPLE)
> + update_br_prob_note (e->src);
> + }
>  }
>free (new_bbs);
>free (orig_loops);
> diff --git a/gcc/testsuite/gcc.dg/pr68212.c b/gcc/testsuite/gcc.dg/pr68212.c
> new file mode 100644
> index 000..f3b7c22
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr68212.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-tree-vectorize -funroll-loops --param 
> max-unroll-times=4 -fdump-rtl-alignments" } */
> +
> +void foo(long int *a, long int *b, long int n)
> +{
> +  long int i;
> +
> +  for (i = 0; i < n; i++)
> +a[i] = *b;
> +}
> +
> +/* { dg-final { scan-rtl-dump-times "internal loop alignment added" 1 
> "alignments"} } */
> +


Re: [PATCH V2] correct COUNT and PROB for unrolled loop

2020-02-16 Thread Jiufu Guo


Hi Honza and all,

I updated the patch a little as below. Bootstrap and regtest are ok
on powerpc64le.

Is OK for trunk?

Thanks for comments.
Jiufu

diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c
index 727e951..ded0046 100644
--- a/gcc/cfgloopmanip.c
+++ b/gcc/cfgloopmanip.c
@@ -31,6 +31,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimplify-me.h"
 #include "tree-ssa-loop-manip.h"
 #include "dumpfile.h"
+#include "cfgrtl.h"
 
 static void copy_loops_to (class loop **, int,
   class loop *);
@@ -1258,14 +1259,30 @@ duplicate_loop_to_header_edge (class loop *loop, edge e,
  /* If original loop is executed COUNT_IN times, the unrolled
 loop will account SCALE_MAIN_DEN times.  */
  scale_main = count_in.probability_in (scale_main_den);
+
+ /* If we are guessing at the number of iterations and count_in
+becomes unrealistically small, reset probability.  */
+ if (!(count_in.reliable_p () || loop->any_estimate))
+   {
+ profile_count new_count_in = count_in.apply_probability 
(scale_main);
+ profile_count preheader_count = loop_preheader_edge (loop)->count 
();
+ if (new_count_in.apply_scale (1, 10) < preheader_count)
+   scale_main = profile_probability::likely ();
+   }
+
  scale_act = scale_main * prob_pass_main;
}
   else
{
+ profile_count new_loop_count;
  profile_count preheader_count = e->count ();
- for (i = 0; i < ndupl; i++)
-   scale_main = scale_main * scale_step[i];
  scale_act = preheader_count.probability_in (count_in);
+ /* Compute final preheader count after peeling NDUPL copies.  */
+ for (i = 0; i < ndupl; i++)
+   preheader_count = preheader_count.apply_probability (scale_step[i]);
+ /* Subtract out exit(s) from peeled copies.  */
+ new_loop_count = count_in - (e->count () - preheader_count);
+ scale_main = new_loop_count.probability_in (count_in);
}
 }
 
@@ -1381,6 +1398,38 @@ duplicate_loop_to_header_edge (class loop *loop, edge e,
  scale_bbs_frequencies (new_bbs, n, scale_act);
  scale_act = scale_act * scale_step[j];
}
+
+  /* Need to update PROB of exit edge and corresponding COUNT.  */
+  if (orig && is_latch && (!bitmap_bit_p (wont_exit, j + 1))
+ && bbs_to_scale)
+   {
+ edge new_exit = new_spec_edges[SE_ORIG];
+ profile_count new_count_in = new_exit->src->count;
+ profile_count preheader_count = loop_preheader_edge (loop)->count ();
+ edge e;
+ edge_iterator ei;
+
+ FOR_EACH_EDGE (e, ei, new_exit->src->succs)
+   if (e != new_exit)
+ break;
+
+ gcc_assert (e && e != new_exit);
+
+ new_exit->probability = preheader_count.probability_in (new_count_in);
+ e->probability = new_exit->probability.invert ();
+
+ profile_count new_latch_count
+   = new_exit->src->count.apply_probability (e->probability);
+ profile_count old_latch_count = e->dest->count;
+
+ EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
+   scale_bbs_frequencies_profile_count (new_bbs + i, 1,
+new_latch_count,
+old_latch_count);
+
+ if (current_ir_type () != IR_GIMPLE)
+   update_br_prob_note (e->src);
+   }
 }
   free (new_bbs);
   free (orig_loops);
diff --git a/gcc/testsuite/gcc.dg/pr68212.c b/gcc/testsuite/gcc.dg/pr68212.c
new file mode 100644
index 000..f3b7c22
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr68212.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-tree-vectorize -funroll-loops --param 
max-unroll-times=4 -fdump-rtl-alignments" } */
+
+void foo(long int *a, long int *b, long int n)
+{
+  long int i;
+
+  for (i = 0; i < n; i++)
+a[i] = *b;
+}
+
+/* { dg-final { scan-rtl-dump-times "internal loop alignment added" 1 
"alignments"} } */
+
-- 
2.7.4