On Tue, Mar 3, 2015 at 10:06 AM, Wilco Dijkstra <wdijk...@arm.com> wrote: > This patch makes aarch64_min_divisions_for_recip_mul configurable for float > and double. This allows > CPUs with really fast or multiple dividers to return 3 (or even 4) if that > happens to be faster > overall. No code generation change - bootstrap & regression OK.
Are you planing on doing the optimization where you turn the divide into recip est followed by a few steps? Because if so then this should be changed to be handle that case too. Thanks, Andrew > > ChangeLog: > 2015-03-03 Wilco Dijkstra <wdijk...@arm.com> > > * gcc/config/aarch64/aarch64-protos.h (tune_params): > Add min_div_recip_mul_sf and min_div_recip_mul_df fields. > * gcc/config/aarch64/aarch64.c (aarch64_min_divisions_for_recip_mul): > Return value depending on target. > (generic_tunings): Initialize new target settings. > (cortexa53_tunings): Likewise. > (cortexa57_tunings): Likewise. > (thunderx_tunings): Likewise. > (xgene1_tunings): Likewise. > > --- > gcc/config/aarch64/aarch64-protos.h | 2 ++ > gcc/config/aarch64/aarch64.c | 26 +++++++++++++++++++------- > 2 files changed, 21 insertions(+), 7 deletions(-) > > diff --git a/gcc/config/aarch64/aarch64-protos.h > b/gcc/config/aarch64/aarch64-protos.h > index 59c5824..4331e5c 100644 > --- a/gcc/config/aarch64/aarch64-protos.h > +++ b/gcc/config/aarch64/aarch64-protos.h > @@ -177,6 +177,8 @@ struct tune_params > const int int_reassoc_width; > const int fp_reassoc_width; > const int vec_reassoc_width; > + const int min_div_recip_mul_sf; > + const int min_div_recip_mul_df; > }; > > HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned); > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > index e22d72e..42a96f6 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c > @@ -353,7 +353,9 @@ static const struct tune_params generic_tunings = > 4, /* loop_align. */ > 2, /* int_reassoc_width. */ > 4, /* fp_reassoc_width. */ > - 1 /* vec_reassoc_width. */ > + 1, /* vec_reassoc_width. */ > + 2, /* min_div_recip_mul_sf. */ > + 2 /* min_div_recip_mul_df. */ > }; > > static const struct tune_params cortexa53_tunings = > @@ -371,7 +373,9 @@ static const struct tune_params cortexa53_tunings = > 4, /* loop_align. */ > 2, /* int_reassoc_width. */ > 4, /* fp_reassoc_width. */ > - 1 /* vec_reassoc_width. */ > + 1, /* vec_reassoc_width. */ > + 2, /* min_div_recip_mul_sf. */ > + 2 /* min_div_recip_mul_df. */ > }; > > static const struct tune_params cortexa57_tunings = > @@ -389,7 +393,9 @@ static const struct tune_params cortexa57_tunings = > 4, /* loop_align. */ > 2, /* int_reassoc_width. */ > 4, /* fp_reassoc_width. */ > - 1 /* vec_reassoc_width. */ > + 1, /* vec_reassoc_width. */ > + 2, /* min_div_recip_mul_sf. */ > + 2 /* min_div_recip_mul_df. */ > }; > > static const struct tune_params thunderx_tunings = > @@ -406,7 +412,9 @@ static const struct tune_params thunderx_tunings = > 8, /* loop_align. */ > 2, /* int_reassoc_width. */ > 4, /* fp_reassoc_width. */ > - 1 /* vec_reassoc_width. */ > + 1, /* vec_reassoc_width. */ > + 2, /* min_div_recip_mul_sf. */ > + 2 /* min_div_recip_mul_df. */ > }; > > static const struct tune_params xgene1_tunings = > @@ -423,7 +431,9 @@ static const struct tune_params xgene1_tunings = > 16, /* loop_align. */ > 2, /* int_reassoc_width. */ > 4, /* fp_reassoc_width. */ > - 1 /* vec_reassoc_width. */ > + 1, /* vec_reassoc_width. */ > + 2, /* min_div_recip_mul_sf. */ > + 2 /* min_div_recip_mul_df. */ > }; > > /* A processor implementing AArch64. */ > @@ -512,9 +522,11 @@ static const char * const aarch64_condition_codes[] = > }; > > static unsigned int > -aarch64_min_divisions_for_recip_mul (enum machine_mode mode ATTRIBUTE_UNUSED) > +aarch64_min_divisions_for_recip_mul (enum machine_mode mode) > { > - return 2; > + if (GET_MODE_UNIT_SIZE (mode) == 4) > + return aarch64_tune_params->min_div_recip_mul_sf; > + return aarch64_tune_params->min_div_recip_mul_df; > } > > static int > -- > 1.9.1 > > > >