Jiufu Guo <guoji...@linux.ibm.com> writes:

Hi,

I'd like to ping this patch for trunk on stage 1.

This patch could fix the issue on incorrect COUNT/FREQUENCES of loop
unrolled blocks, and also could help the improve the cold/hot issue of
the unrolled loops.

patch is also at
https://gcc.gnu.org/legacy-ml/gcc-patches/2020-02/msg00927.html

Thanks,
Jiufu

> Jiufu Guo <guoji...@linux.ibm.com> writes:
>
> Hi!
>
> I'd like to ping following patch. As near end of gcc10 stage 4, it seems
> I would ask approval for GCC11 trunk.
>
> Thanks,
> Jiufu Guo
>
>> Hi Honza and all,
>>
>> I updated the patch a little as below. Bootstrap and regtest are ok
>> on powerpc64le.
>>
>> Is OK for trunk?
>>
>> Thanks for comments.
>> Jiufu
>>
>> diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c
>> index 727e951..ded0046 100644
>> --- a/gcc/cfgloopmanip.c
>> +++ b/gcc/cfgloopmanip.c
>> @@ -31,6 +31,7 @@ along with GCC; see the file COPYING3.  If not see
>>  #include "gimplify-me.h"
>>  #include "tree-ssa-loop-manip.h"
>>  #include "dumpfile.h"
>> +#include "cfgrtl.h"
>>  
>>  static void copy_loops_to (class loop **, int,
>>                         class loop *);
>> @@ -1258,14 +1259,30 @@ duplicate_loop_to_header_edge (class loop *loop, 
>> edge e,
>>        /* If original loop is executed COUNT_IN times, the unrolled
>>           loop will account SCALE_MAIN_DEN times.  */
>>        scale_main = count_in.probability_in (scale_main_den);
>> +
>> +      /* If we are guessing at the number of iterations and count_in
>> +         becomes unrealistically small, reset probability.  */
>> +      if (!(count_in.reliable_p () || loop->any_estimate))
>> +        {
>> +          profile_count new_count_in = count_in.apply_probability 
>> (scale_main);
>> +          profile_count preheader_count = loop_preheader_edge (loop)->count 
>> ();
>> +          if (new_count_in.apply_scale (1, 10) < preheader_count)
>> +            scale_main = profile_probability::likely ();
>> +        }
>> +
>>        scale_act = scale_main * prob_pass_main;
>>      }
>>        else
>>      {
>> +      profile_count new_loop_count;
>>        profile_count preheader_count = e->count ();
>> -      for (i = 0; i < ndupl; i++)
>> -        scale_main = scale_main * scale_step[i];
>>        scale_act = preheader_count.probability_in (count_in);
>> +      /* Compute final preheader count after peeling NDUPL copies.  */
>> +      for (i = 0; i < ndupl; i++)
>> +        preheader_count = preheader_count.apply_probability (scale_step[i]);
>> +      /* Subtract out exit(s) from peeled copies.  */
>> +      new_loop_count = count_in - (e->count () - preheader_count);
>> +      scale_main = new_loop_count.probability_in (count_in);
>>      }
>>      }
>>  
>> @@ -1381,6 +1398,38 @@ duplicate_loop_to_header_edge (class loop *loop, edge 
>> e,
>>        scale_bbs_frequencies (new_bbs, n, scale_act);
>>        scale_act = scale_act * scale_step[j];
>>      }
>> +
>> +      /* Need to update PROB of exit edge and corresponding COUNT.  */
>> +      if (orig && is_latch && (!bitmap_bit_p (wont_exit, j + 1))
>> +      && bbs_to_scale)
>> +    {
>> +      edge new_exit = new_spec_edges[SE_ORIG];
>> +      profile_count new_count_in = new_exit->src->count;
>> +      profile_count preheader_count = loop_preheader_edge (loop)->count ();
>> +      edge e;
>> +      edge_iterator ei;
>> +
>> +      FOR_EACH_EDGE (e, ei, new_exit->src->succs)
>> +        if (e != new_exit)
>> +          break;
>> +
>> +      gcc_assert (e && e != new_exit);
>> +
>> +      new_exit->probability = preheader_count.probability_in (new_count_in);
>> +      e->probability = new_exit->probability.invert ();
>> +
>> +      profile_count new_latch_count
>> +        = new_exit->src->count.apply_probability (e->probability);
>> +      profile_count old_latch_count = e->dest->count;
>> +
>> +      EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
>> +        scale_bbs_frequencies_profile_count (new_bbs + i, 1,
>> +                                             new_latch_count,
>> +                                             old_latch_count);
>> +
>> +      if (current_ir_type () != IR_GIMPLE)
>> +        update_br_prob_note (e->src);
>> +    }
>>      }
>>    free (new_bbs);
>>    free (orig_loops);
>> diff --git a/gcc/testsuite/gcc.dg/pr68212.c b/gcc/testsuite/gcc.dg/pr68212.c
>> new file mode 100644
>> index 0000000..f3b7c22
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/pr68212.c
>> @@ -0,0 +1,13 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -fno-tree-vectorize -funroll-loops --param 
>> max-unroll-times=4 -fdump-rtl-alignments" } */
>> +
>> +void foo(long int *a, long int *b, long int n)
>> +{
>> +  long int i;
>> +
>> +  for (i = 0; i < n; i++)
>> +    a[i] = *b;
>> +}
>> +
>> +/* { dg-final { scan-rtl-dump-times "internal loop alignment added" 1 
>> "alignments"} } */
>> +

Reply via email to