make check for gcc passed
On Mon, Oct 27, 2014 at 11:10 AM, Evgeny Stupachenko <evstu...@gmail.com> wrote: > The results are the same for Silvermont. > There are no significant changes on Haswell. > So I agree with Richard, let's enable this x86 wide. > > Bootstrap/ passed. > Make check in progress. > Is it ok? > > 2014-10-25 Evgeny Stupachenko <evstu...@gmail.com> > * config/i386/i386.c (ix86_option_override_internal): Increase > PARAM_MAX_COMPLETELY_PEELED_INSNS. > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c > index 6337aa5..5ac10eb 100644 > --- a/gcc/config/i386/i386.c > +++ b/gcc/config/i386/i386.c > @@ -4081,6 +4081,12 @@ ix86_option_override_internal (bool main_args_p, > opts->x_param_values, > opts_set->x_param_values); > > + /* Extend full peel max insns parameter for x86. */ > + maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, > + 120, > + opts->x_param_values, > + opts_set->x_param_values); > + > /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */ > if (opts->x_flag_prefetch_loop_arrays < 0 > && HAVE_prefetch > > On Mon, Oct 13, 2014 at 4:23 PM, Jan Hubicka <hubi...@ucw.cz> wrote: >>> On Fri, Oct 10, 2014 at 5:40 PM, Evgeny Stupachenko <evstu...@gmail.com> >>> wrote: >>> > Hi, >>> > >>> > The patch increase PARAM_MAX_COMPLETELY_PEELED_INSNS for CPUs with >>> > high branch cost. >>> > Bootstrap and make check are in progress. >>> > The patch boosts (up to 2,5 times improve) several benchmarks compiled >>> > with "-Ofast" on Silvermont >>> > Spec2000: >>> > +5% gain on 173.applu >>> > +1% gain on 255.vortex >>> > >>> > Is it ok for trunk when pass bootstrap and make check? >>> >>> This is only a 20% increase - from 100 to 120. I would instead suggest >>> to explore doing this change unconditionally if it helps that much. >> >> Agreed, I think the value of 100 was set decade ago by Zdenek and me >> completely >> artifically. I do not recall any serious tuning of this flag. >> >> Note that I plan to update >> https://gcc.gnu.org/ml/gcc-patches/2013-11/msg02270.html to current tree so >> PARAM_MAX_COMPLETELY_PEELED_INSNS will be used at gimple level rather than >> tree >> changing its meaning somewhat. >> >> Perhaps I could try to find time this or next week to update the patch so we >> do >> not need to do the tuning twice. >> >> Honza >> >>> >>> Richard. >>> >>> > Thanks, >>> > Evgeny >>> > >>> > 2014-10-10 Evgeny Stupachenko <evstu...@gmail.com> >>> > * config/i386/i386.c (ix86_option_override_internal): Increase >>> > PARAM_MAX_COMPLETELY_PEELED_INSNS for CPUs with high branch cost. >>> > * config/i386/i386.h (TARGET_HIGH_BRANCH_COST): New. >>> > * config/i386/x86-tune.def (X86_TUNE_HIGH_BRANCH_COST): Indicates >>> > CPUs with high branch cost. >>> > >>> > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c >>> > index 6337aa5..5ac10eb 100644 >>> > --- a/gcc/config/i386/i386.c >>> > +++ b/gcc/config/i386/i386.c >>> > @@ -4081,6 +4081,14 @@ ix86_option_override_internal (bool main_args_p, >>> > opts->x_param_values, >>> > opts_set->x_param_values); >>> > >>> > + /* Extend full peel max insns parameter for CPUs with high branch >>> > cost. */ >>> > + if (TARGET_HIGH_BRANCH_COST) >>> > + maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, >>> > + 120, >>> > + opts->x_param_values, >>> > + opts_set->x_param_values); >>> > + >>> > + >>> > /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. >>> > */ >>> > if (opts->x_flag_prefetch_loop_arrays < 0 >>> > && HAVE_prefetch >>> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h >>> > index 2c64162..da0c57b 100644 >>> > --- a/gcc/config/i386/i386.h >>> > +++ b/gcc/config/i386/i386.h >>> > @@ -415,6 +415,7 @@ extern unsigned char >>> > ix86_tune_features[X86_TUNE_LAST]; >>> > #define TARGET_INTER_UNIT_CONVERSIONS \ >>> > ix86_tune_features[X86_TUNE_INTER_UNIT_CONVERSIONS] >>> > #define TARGET_FOUR_JUMP_LIMIT >>> > ix86_tune_features[X86_TUNE_FOUR_JUMP_LIMIT] >>> > +#define TARGET_HIGH_BRANCH_COST >>> > ix86_tune_features[X86_TUNE_HIGH_BRANCH_COST] >>> > #define TARGET_SCHEDULE >>> > ix86_tune_features[X86_TUNE_SCHEDULE] >>> > #define TARGET_USE_BT ix86_tune_features[X86_TUNE_USE_BT] >>> > #define TARGET_USE_INCDEC ix86_tune_features[X86_TUNE_USE_INCDEC] >>> > diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def >>> > index b6b210e..04d8bf8 100644 >>> > --- a/gcc/config/i386/x86-tune.def >>> > +++ b/gcc/config/i386/x86-tune.def >>> > @@ -208,6 +208,11 @@ DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, >>> > "four_jump_limit", >>> > m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_INTEL | >>> > m_ATHLON_K8 | m_AMDFAM10) >>> > >>> > +/* X86_TUNE_HIGH_BRANCH_COST: Some CPUs have higher branch cost. This >>> > could be >>> > + used to tune unroll, if-cvt, inline... heuristics. */ >>> > +DEF_TUNE (X86_TUNE_HIGH_BRANCH_COST, "high_branch_cost", >>> > + m_BONNELL | m_SILVERMONT | m_INTEL) >>> > + >>> > >>> > /*****************************************************************************/ >>> > /* Integer instruction selection tuning >>> > */ >>> > >>> > /*****************************************************************************/ > > > On Mon, Oct 13, 2014 at 3:23 PM, Jan Hubicka <hubi...@ucw.cz> wrote: >>> On Fri, Oct 10, 2014 at 5:40 PM, Evgeny Stupachenko <evstu...@gmail.com> >>> wrote: >>> > Hi, >>> > >>> > The patch increase PARAM_MAX_COMPLETELY_PEELED_INSNS for CPUs with >>> > high branch cost. >>> > Bootstrap and make check are in progress. >>> > The patch boosts (up to 2,5 times improve) several benchmarks compiled >>> > with "-Ofast" on Silvermont >>> > Spec2000: >>> > +5% gain on 173.applu >>> > +1% gain on 255.vortex >>> > >>> > Is it ok for trunk when pass bootstrap and make check? >>> >>> This is only a 20% increase - from 100 to 120. I would instead suggest >>> to explore doing this change unconditionally if it helps that much. >> >> Agreed, I think the value of 100 was set decade ago by Zdenek and me >> completely >> artifically. I do not recall any serious tuning of this flag. >> >> Note that I plan to update >> https://gcc.gnu.org/ml/gcc-patches/2013-11/msg02270.html to current tree so >> PARAM_MAX_COMPLETELY_PEELED_INSNS will be used at gimple level rather than >> tree >> changing its meaning somewhat. >> >> Perhaps I could try to find time this or next week to update the patch so we >> do >> not need to do the tuning twice. >> >> Honza >> >>> >>> Richard. >>> >>> > Thanks, >>> > Evgeny >>> > >>> > 2014-10-10 Evgeny Stupachenko <evstu...@gmail.com> >>> > * config/i386/i386.c (ix86_option_override_internal): Increase >>> > PARAM_MAX_COMPLETELY_PEELED_INSNS for CPUs with high branch cost. >>> > * config/i386/i386.h (TARGET_HIGH_BRANCH_COST): New. >>> > * config/i386/x86-tune.def (X86_TUNE_HIGH_BRANCH_COST): Indicates >>> > CPUs with high branch cost. >>> > >>> > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c >>> > index 6337aa5..5ac10eb 100644 >>> > --- a/gcc/config/i386/i386.c >>> > +++ b/gcc/config/i386/i386.c >>> > @@ -4081,6 +4081,14 @@ ix86_option_override_internal (bool main_args_p, >>> > opts->x_param_values, >>> > opts_set->x_param_values); >>> > >>> > + /* Extend full peel max insns parameter for CPUs with high branch >>> > cost. */ >>> > + if (TARGET_HIGH_BRANCH_COST) >>> > + maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, >>> > + 120, >>> > + opts->x_param_values, >>> > + opts_set->x_param_values); >>> > + >>> > + >>> > /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. >>> > */ >>> > if (opts->x_flag_prefetch_loop_arrays < 0 >>> > && HAVE_prefetch >>> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h >>> > index 2c64162..da0c57b 100644 >>> > --- a/gcc/config/i386/i386.h >>> > +++ b/gcc/config/i386/i386.h >>> > @@ -415,6 +415,7 @@ extern unsigned char >>> > ix86_tune_features[X86_TUNE_LAST]; >>> > #define TARGET_INTER_UNIT_CONVERSIONS \ >>> > ix86_tune_features[X86_TUNE_INTER_UNIT_CONVERSIONS] >>> > #define TARGET_FOUR_JUMP_LIMIT >>> > ix86_tune_features[X86_TUNE_FOUR_JUMP_LIMIT] >>> > +#define TARGET_HIGH_BRANCH_COST >>> > ix86_tune_features[X86_TUNE_HIGH_BRANCH_COST] >>> > #define TARGET_SCHEDULE >>> > ix86_tune_features[X86_TUNE_SCHEDULE] >>> > #define TARGET_USE_BT ix86_tune_features[X86_TUNE_USE_BT] >>> > #define TARGET_USE_INCDEC ix86_tune_features[X86_TUNE_USE_INCDEC] >>> > diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def >>> > index b6b210e..04d8bf8 100644 >>> > --- a/gcc/config/i386/x86-tune.def >>> > +++ b/gcc/config/i386/x86-tune.def >>> > @@ -208,6 +208,11 @@ DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, >>> > "four_jump_limit", >>> > m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_INTEL | >>> > m_ATHLON_K8 | m_AMDFAM10) >>> > >>> > +/* X86_TUNE_HIGH_BRANCH_COST: Some CPUs have higher branch cost. This >>> > could be >>> > + used to tune unroll, if-cvt, inline... heuristics. */ >>> > +DEF_TUNE (X86_TUNE_HIGH_BRANCH_COST, "high_branch_cost", >>> > + m_BONNELL | m_SILVERMONT | m_INTEL) >>> > + >>> > >>> > /*****************************************************************************/ >>> > /* Integer instruction selection tuning >>> > */ >>> > >>> > /*****************************************************************************/