Jiufu Guo <guoji...@linux.ibm.com> writes:

Hi!

I'd like to ping following patch. just in case it may make sense to
include in GCC 10. Thanks!

https://gcc.gnu.org/ml/gcc-patches/2020-02/msg00927.html

Jiufu

> Hi Honza and all,
>
> I updated the patch a little as below. Bootstrap and regtest are ok
> on powerpc64le.
>
> Is OK for trunk?
>
> Thanks for comments.
> Jiufu
>
> diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c
> index 727e951..ded0046 100644
> --- a/gcc/cfgloopmanip.c
> +++ b/gcc/cfgloopmanip.c
> @@ -31,6 +31,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "gimplify-me.h"
>  #include "tree-ssa-loop-manip.h"
>  #include "dumpfile.h"
> +#include "cfgrtl.h"
>  
>  static void copy_loops_to (class loop **, int,
>                          class loop *);
> @@ -1258,14 +1259,30 @@ duplicate_loop_to_header_edge (class loop *loop, edge 
> e,
>         /* If original loop is executed COUNT_IN times, the unrolled
>            loop will account SCALE_MAIN_DEN times.  */
>         scale_main = count_in.probability_in (scale_main_den);
> +
> +       /* If we are guessing at the number of iterations and count_in
> +          becomes unrealistically small, reset probability.  */
> +       if (!(count_in.reliable_p () || loop->any_estimate))
> +         {
> +           profile_count new_count_in = count_in.apply_probability 
> (scale_main);
> +           profile_count preheader_count = loop_preheader_edge (loop)->count 
> ();
> +           if (new_count_in.apply_scale (1, 10) < preheader_count)
> +             scale_main = profile_probability::likely ();
> +         }
> +
>         scale_act = scale_main * prob_pass_main;
>       }
>        else
>       {
> +       profile_count new_loop_count;
>         profile_count preheader_count = e->count ();
> -       for (i = 0; i < ndupl; i++)
> -         scale_main = scale_main * scale_step[i];
>         scale_act = preheader_count.probability_in (count_in);
> +       /* Compute final preheader count after peeling NDUPL copies.  */
> +       for (i = 0; i < ndupl; i++)
> +         preheader_count = preheader_count.apply_probability (scale_step[i]);
> +       /* Subtract out exit(s) from peeled copies.  */
> +       new_loop_count = count_in - (e->count () - preheader_count);
> +       scale_main = new_loop_count.probability_in (count_in);
>       }
>      }
>  
> @@ -1381,6 +1398,38 @@ duplicate_loop_to_header_edge (class loop *loop, edge 
> e,
>         scale_bbs_frequencies (new_bbs, n, scale_act);
>         scale_act = scale_act * scale_step[j];
>       }
> +
> +      /* Need to update PROB of exit edge and corresponding COUNT.  */
> +      if (orig && is_latch && (!bitmap_bit_p (wont_exit, j + 1))
> +       && bbs_to_scale)
> +     {
> +       edge new_exit = new_spec_edges[SE_ORIG];
> +       profile_count new_count_in = new_exit->src->count;
> +       profile_count preheader_count = loop_preheader_edge (loop)->count ();
> +       edge e;
> +       edge_iterator ei;
> +
> +       FOR_EACH_EDGE (e, ei, new_exit->src->succs)
> +         if (e != new_exit)
> +           break;
> +
> +       gcc_assert (e && e != new_exit);
> +
> +       new_exit->probability = preheader_count.probability_in (new_count_in);
> +       e->probability = new_exit->probability.invert ();
> +
> +       profile_count new_latch_count
> +         = new_exit->src->count.apply_probability (e->probability);
> +       profile_count old_latch_count = e->dest->count;
> +
> +       EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
> +         scale_bbs_frequencies_profile_count (new_bbs + i, 1,
> +                                              new_latch_count,
> +                                              old_latch_count);
> +
> +       if (current_ir_type () != IR_GIMPLE)
> +         update_br_prob_note (e->src);
> +     }
>      }
>    free (new_bbs);
>    free (orig_loops);
> diff --git a/gcc/testsuite/gcc.dg/pr68212.c b/gcc/testsuite/gcc.dg/pr68212.c
> new file mode 100644
> index 0000000..f3b7c22
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr68212.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-tree-vectorize -funroll-loops --param 
> max-unroll-times=4 -fdump-rtl-alignments" } */
> +
> +void foo(long int *a, long int *b, long int n)
> +{
> +  long int i;
> +
> +  for (i = 0; i < n; i++)
> +    a[i] = *b;
> +}
> +
> +/* { dg-final { scan-rtl-dump-times "internal loop alignment added" 1 
> "alignments"} } */
> +

Reply via email to