Hi, The patch increase PARAM_MAX_COMPLETELY_PEELED_INSNS for CPUs with high branch cost. Bootstrap and make check are in progress. The patch boosts (up to 2,5 times improve) several benchmarks compiled with "-Ofast" on Silvermont Spec2000: +5% gain on 173.applu +1% gain on 255.vortex
Is it ok for trunk when pass bootstrap and make check? Thanks, Evgeny 2014-10-10 Evgeny Stupachenko <evstu...@gmail.com> * config/i386/i386.c (ix86_option_override_internal): Increase PARAM_MAX_COMPLETELY_PEELED_INSNS for CPUs with high branch cost. * config/i386/i386.h (TARGET_HIGH_BRANCH_COST): New. * config/i386/x86-tune.def (X86_TUNE_HIGH_BRANCH_COST): Indicates CPUs with high branch cost. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 6337aa5..5ac10eb 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -4081,6 +4081,14 @@ ix86_option_override_internal (bool main_args_p, opts->x_param_values, opts_set->x_param_values); + /* Extend full peel max insns parameter for CPUs with high branch cost. */ + if (TARGET_HIGH_BRANCH_COST) + maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, + 120, + opts->x_param_values, + opts_set->x_param_values); + + /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */ if (opts->x_flag_prefetch_loop_arrays < 0 && HAVE_prefetch diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 2c64162..da0c57b 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -415,6 +415,7 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; #define TARGET_INTER_UNIT_CONVERSIONS \ ix86_tune_features[X86_TUNE_INTER_UNIT_CONVERSIONS] #define TARGET_FOUR_JUMP_LIMIT ix86_tune_features[X86_TUNE_FOUR_JUMP_LIMIT] +#define TARGET_HIGH_BRANCH_COST ix86_tune_features[X86_TUNE_HIGH_BRANCH_COST] #define TARGET_SCHEDULE ix86_tune_features[X86_TUNE_SCHEDULE] #define TARGET_USE_BT ix86_tune_features[X86_TUNE_USE_BT] #define TARGET_USE_INCDEC ix86_tune_features[X86_TUNE_USE_INCDEC] diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index b6b210e..04d8bf8 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -208,6 +208,11 @@ DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, "four_jump_limit", m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_INTEL | m_ATHLON_K8 | m_AMDFAM10) +/* X86_TUNE_HIGH_BRANCH_COST: Some CPUs have higher branch cost. This could be + used to tune unroll, if-cvt, inline... heuristics. */ +DEF_TUNE (X86_TUNE_HIGH_BRANCH_COST, "high_branch_cost", + m_BONNELL | m_SILVERMONT | m_INTEL) + /*****************************************************************************/ /* Integer instruction selection tuning */ /*****************************************************************************/