On Wed, 30 Oct 2019, Jiufu Guo wrote:

> Hi,
> 
> In this patch, loop unroll adjust hook is introduced for powerpc. In this 
> hook,
> we can do target related hueristic adjustment. For this patch, we tunned for
> O2 to unroll small loops with small unroll factor (2 times), for other
> optimization, default unroll factor is used.
> 
> Bootstrapped and regtested on powerpc64le.  Is this ok for trunk?

This looks good to me, needs ppc maintainer approval though.

Thanks,
Richard.

> 
> Jiufu
> BR.
> 
> 
> gcc/
> 2019-10-30  Jiufu Guo  <guoji...@linux.ibm.com>           
> 
>       PR tree-optimization/88760
>       * config/rs6000/rs6000.c (rs6000_option_override_internal): Remove
>       code which changes PARAM_MAX_UNROLL_TIMES and PARAM_MAX_UNROLLED_INSNS.
>       (TARGET_LOOP_UNROLL_ADJUST): Add loop unroll adjust hook.
>       (rs6000_loop_unroll_adjust): New hook for loop unroll adjust. Unrolling
>       small loop 2 times for -O2.
> 
>       
> gcc.testsuite/
> 2019-10-29  Jiufu Guo  <guoji...@linux.ibm.com>
> 
>       PR tree-optimization/88760
>       * gcc.dg/pr59643.c: Update back to r277550.
>       * gcc.dg/unroll-8.c: Update cases.
>       * gcc.dg/var-expand3.c: Update cases.
> 
> ---
>  gcc/config/rs6000/rs6000.c         | 37 ++++++++++++++++++++++++++-----------
>  gcc/testsuite/gcc.dg/pr59643.c     |  3 ---
>  gcc/testsuite/gcc.dg/unroll-8.c    |  4 ++++
>  gcc/testsuite/gcc.dg/var-expand3.c |  2 ++
>  4 files changed, 32 insertions(+), 14 deletions(-)
> 
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 9ed5151..183dceb 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -1428,6 +1428,9 @@ static const struct attribute_spec 
> rs6000_attribute_table[] =
>  #undef TARGET_VECTORIZE_DESTROY_COST_DATA
>  #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
>  
> +#undef TARGET_LOOP_UNROLL_ADJUST
> +#define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
> +
>  #undef TARGET_INIT_BUILTINS
>  #define TARGET_INIT_BUILTINS rs6000_init_builtins
>  #undef TARGET_BUILTIN_DECL
> @@ -4540,20 +4543,11 @@ rs6000_option_override_internal (bool global_init_p)
>                            global_options.x_param_values,
>                            global_options_set.x_param_values);
>  
> -      /* unroll very small loops 2 time if no -funroll-loops.  */
> +      /* If funroll-loops is implicitly enabled, do not turn fweb or
> +      frename-registers on implicitly.  */
>        if (!global_options_set.x_flag_unroll_loops
>         && !global_options_set.x_flag_unroll_all_loops)
>       {
> -       maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 2,
> -                              global_options.x_param_values,
> -                              global_options_set.x_param_values);
> -
> -       maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 20,
> -                              global_options.x_param_values,
> -                              global_options_set.x_param_values);
> -
> -       /* If fweb or frename-registers are not specificed in command-line,
> -          do not turn them on implicitly.  */
>         if (!global_options_set.x_flag_web)
>           global_options.x_flag_web = 0;
>         if (!global_options_set.x_flag_rename_registers)
> @@ -5101,6 +5095,27 @@ rs6000_destroy_cost_data (void *data)
>    free (data);
>  }
>  
> +/* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
> +   a new heristic number struct loop *loop should be unrolled.  */
> +
> +static unsigned
> +rs6000_loop_unroll_adjust (unsigned nunroll, struct loop * loop)
> +{
> +  /* For -O2, we only unroll small loops with small unroll factor.  */
> +  if (optimize == 2)
> +    {
> +      /* If the loop contains few insns, treated it as small loops.
> +      TODO: Uing 10 hard code for now.  Continue to refine, For example,
> +      if loop constians only 1-2 insns, we may unroll more times(4).
> +      And we may use PARAM to control kinds of loop size.  */
> +      if (loop->ninsns <= 10)
> +     return MIN (2, nunroll);
> +      else
> +     return 0;
> +    }
> +  return nunroll;
> +}
> +
>  /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
>     library with vectorized intrinsics.  */
>  
> diff --git a/gcc/testsuite/gcc.dg/pr59643.c b/gcc/testsuite/gcc.dg/pr59643.c
> index 4446f6e..de78d60 100644
> --- a/gcc/testsuite/gcc.dg/pr59643.c
> +++ b/gcc/testsuite/gcc.dg/pr59643.c
> @@ -1,9 +1,6 @@
>  /* PR tree-optimization/59643 */
>  /* { dg-do compile } */
>  /* { dg-options "-O3 -fdump-tree-pcom-details" } */
> -/* { dg-additional-options "--param max-unrolled-insns=400" { target { 
> powerpc*-*-* } } } */
> -/* Implicit threashold of max-unrolled-insn on ppc at O3 is too small for the
> -   loop of this case.  */
>  
>  void
>  foo (double *a, double *b, double *c, double d, double e, int n)
> diff --git a/gcc/testsuite/gcc.dg/unroll-8.c b/gcc/testsuite/gcc.dg/unroll-8.c
> index b16df67..b1d38a6 100644
> --- a/gcc/testsuite/gcc.dg/unroll-8.c
> +++ b/gcc/testsuite/gcc.dg/unroll-8.c
> @@ -4,6 +4,10 @@ struct a {int a[7];};
>  int t(struct a *a, int n)
>  {
>    int i;
> +
> +  /* Testing unroller message if arrary size is smaller than unroll factor.  
> */
> +  /* Using pragma unroll to make sure unroll factor is large enough.  */
> +  #pragma GCC unroll 8
>    for (i=0;i<n;i++)
>      a->a[i]++;
>  }
> diff --git a/gcc/testsuite/gcc.dg/var-expand3.c 
> b/gcc/testsuite/gcc.dg/var-expand3.c
> index dce6ec1..d2ca6df 100644
> --- a/gcc/testsuite/gcc.dg/var-expand3.c
> +++ b/gcc/testsuite/gcc.dg/var-expand3.c
> @@ -21,6 +21,8 @@ foo (int n)
>  
>    vaccum = vzero;
>  
> +  /* Using pragma unroll to make sure unroll enough times.  */
> +  #pragma GCC unroll 8
>    for (i = 0; i < n; i++)
>      {
>        vp1 = vec_ld (i * 16, in1);
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Reply via email to