Hi,

The patch increase PARAM_MAX_COMPLETELY_PEELED_INSNS for CPUs with
high branch cost.
Bootstrap and make check are in progress.
The patch boosts (up to 2,5 times improve) several benchmarks compiled
with "-Ofast" on Silvermont
Spec2000:
+5% gain on 173.applu
+1% gain on 255.vortex

Is it ok for trunk when pass bootstrap and make check?

Thanks,
Evgeny

2014-10-10  Evgeny Stupachenko  <evstu...@gmail.com>
        * config/i386/i386.c (ix86_option_override_internal): Increase
        PARAM_MAX_COMPLETELY_PEELED_INSNS for CPUs with high branch cost.
        * config/i386/i386.h (TARGET_HIGH_BRANCH_COST): New.
        * config/i386/x86-tune.def (X86_TUNE_HIGH_BRANCH_COST): Indicates
        CPUs with high branch cost.

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 6337aa5..5ac10eb 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -4081,6 +4081,14 @@ ix86_option_override_internal (bool main_args_p,
                         opts->x_param_values,
                         opts_set->x_param_values);

+  /* Extend full peel max insns parameter for CPUs with high branch cost.  */
+  if (TARGET_HIGH_BRANCH_COST)
+    maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS,
+                          120,
+                          opts->x_param_values,
+                          opts_set->x_param_values);
+
+
   /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful.  */
   if (opts->x_flag_prefetch_loop_arrays < 0
       && HAVE_prefetch
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 2c64162..da0c57b 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -415,6 +415,7 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
 #define TARGET_INTER_UNIT_CONVERSIONS \
        ix86_tune_features[X86_TUNE_INTER_UNIT_CONVERSIONS]
 #define TARGET_FOUR_JUMP_LIMIT ix86_tune_features[X86_TUNE_FOUR_JUMP_LIMIT]
+#define TARGET_HIGH_BRANCH_COST
ix86_tune_features[X86_TUNE_HIGH_BRANCH_COST]
 #define TARGET_SCHEDULE                ix86_tune_features[X86_TUNE_SCHEDULE]
 #define TARGET_USE_BT          ix86_tune_features[X86_TUNE_USE_BT]
 #define TARGET_USE_INCDEC      ix86_tune_features[X86_TUNE_USE_INCDEC]
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index b6b210e..04d8bf8 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -208,6 +208,11 @@ DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, "four_jump_limit",
           m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_INTEL |
          m_ATHLON_K8 | m_AMDFAM10)

+/* X86_TUNE_HIGH_BRANCH_COST: Some CPUs have higher branch cost.  This could be
+   used to tune unroll, if-cvt, inline... heuristics.  */
+DEF_TUNE (X86_TUNE_HIGH_BRANCH_COST, "high_branch_cost",
+          m_BONNELL | m_SILVERMONT | m_INTEL)
+
 /*****************************************************************************/
 /* Integer instruction selection tuning                                      */
 /*****************************************************************************/

Reply via email to