On Tue, Mar 3, 2015 at 10:06 AM, Wilco Dijkstra <wdijk...@arm.com> wrote:
> This patch makes aarch64_min_divisions_for_recip_mul configurable for float 
> and double. This allows
> CPUs with really fast or multiple dividers to return 3 (or even 4) if that 
> happens to be faster
> overall. No code generation change - bootstrap & regression OK.

Are you planing on doing the optimization where you turn the divide
into recip est followed by a few steps?
Because if so then this should be changed to be handle that case too.

Thanks,
Andrew


>
> ChangeLog:
> 2015-03-03  Wilco Dijkstra  <wdijk...@arm.com>
>
>         * gcc/config/aarch64/aarch64-protos.h (tune_params):
>         Add min_div_recip_mul_sf and min_div_recip_mul_df fields.
>         * gcc/config/aarch64/aarch64.c (aarch64_min_divisions_for_recip_mul):
>         Return value depending on target.
>         (generic_tunings): Initialize new target settings.
>         (cortexa53_tunings): Likewise.
>         (cortexa57_tunings): Likewise.
>         (thunderx_tunings): Likewise.
>         (xgene1_tunings): Likewise.
>
> ---
>  gcc/config/aarch64/aarch64-protos.h |  2 ++
>  gcc/config/aarch64/aarch64.c        | 26 +++++++++++++++++++-------
>  2 files changed, 21 insertions(+), 7 deletions(-)
>
> diff --git a/gcc/config/aarch64/aarch64-protos.h 
> b/gcc/config/aarch64/aarch64-protos.h
> index 59c5824..4331e5c 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -177,6 +177,8 @@ struct tune_params
>    const int int_reassoc_width;
>    const int fp_reassoc_width;
>    const int vec_reassoc_width;
> +  const int min_div_recip_mul_sf;
> +  const int min_div_recip_mul_df;
>  };
>
>  HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index e22d72e..42a96f6 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -353,7 +353,9 @@ static const struct tune_params generic_tunings =
>    4,   /* loop_align.  */
>    2,   /* int_reassoc_width.  */
>    4,   /* fp_reassoc_width.  */
> -  1    /* vec_reassoc_width.  */
> +  1,   /* vec_reassoc_width.  */
> +  2,   /* min_div_recip_mul_sf.  */
> +  2    /* min_div_recip_mul_df.  */
>  };
>
>  static const struct tune_params cortexa53_tunings =
> @@ -371,7 +373,9 @@ static const struct tune_params cortexa53_tunings =
>    4,   /* loop_align.  */
>    2,   /* int_reassoc_width.  */
>    4,   /* fp_reassoc_width.  */
> -  1    /* vec_reassoc_width.  */
> +  1,   /* vec_reassoc_width.  */
> +  2,   /* min_div_recip_mul_sf.  */
> +  2    /* min_div_recip_mul_df.  */
>  };
>
>  static const struct tune_params cortexa57_tunings =
> @@ -389,7 +393,9 @@ static const struct tune_params cortexa57_tunings =
>    4,   /* loop_align.  */
>    2,   /* int_reassoc_width.  */
>    4,   /* fp_reassoc_width.  */
> -  1    /* vec_reassoc_width.  */
> +  1,   /* vec_reassoc_width.  */
> +  2,   /* min_div_recip_mul_sf.  */
> +  2    /* min_div_recip_mul_df.  */
>  };
>
>  static const struct tune_params thunderx_tunings =
> @@ -406,7 +412,9 @@ static const struct tune_params thunderx_tunings =
>    8,   /* loop_align.  */
>    2,   /* int_reassoc_width.  */
>    4,   /* fp_reassoc_width.  */
> -  1    /* vec_reassoc_width.  */
> +  1,   /* vec_reassoc_width.  */
> +  2,   /* min_div_recip_mul_sf.  */
> +  2    /* min_div_recip_mul_df.  */
>  };
>
>  static const struct tune_params xgene1_tunings =
> @@ -423,7 +431,9 @@ static const struct tune_params xgene1_tunings =
>    16,  /* loop_align.  */
>    2,   /* int_reassoc_width.  */
>    4,   /* fp_reassoc_width.  */
> -  1    /* vec_reassoc_width.  */
> +  1,   /* vec_reassoc_width.  */
> +  2,   /* min_div_recip_mul_sf.  */
> +  2    /* min_div_recip_mul_df.  */
>  };
>
>  /* A processor implementing AArch64.  */
> @@ -512,9 +522,11 @@ static const char * const aarch64_condition_codes[] =
>  };
>
>  static unsigned int
> -aarch64_min_divisions_for_recip_mul (enum machine_mode mode ATTRIBUTE_UNUSED)
> +aarch64_min_divisions_for_recip_mul (enum machine_mode mode)
>  {
> -  return 2;
> +  if (GET_MODE_UNIT_SIZE (mode) == 4)
> +    return aarch64_tune_params->min_div_recip_mul_sf;
> +  return aarch64_tune_params->min_div_recip_mul_df;
>  }
>
>  static int
> --
> 1.9.1
>
>
>
>

Reply via email to