Hi, GCC/i386 currently has about 73 boolean parameters/knobs (defined
in ix86_tune_features[], indexed by ix86_tune_indices) to perform
micro-arch specific performance tuning. However such settings are hard
coded (fixed with a given -mtune setting) and is very hard to do
performance experiment.

The attached patch fixes the problem. The patch introduces a new
option -mtune-ctrl=. Its parameter is a comma separated list of
feature names to turn on associated features. Feature name can be
prefixed by ^ to do the opposite. For instance,

  -mtune-ctrl=prologue_using_move,epilogue_using_move,^pad_returns

tells the compiler to use move instructions in prologue/epilogue
(instead of push/pop), and *not* pad return instructions.

To facilitate the change, the feature tuning enums defined in i386.h
are moved to a new file x86-tune.def and this file can be used to
generate both the enums and names of the features.


Ok for trunk?


thanks,

David

2013-08-03  Xinliang David Li  <davi...@google.com>

        * config/i386/i386.opt: New option -mtune-ctrl=.
        * config/i386/x86-tune.def: New file.
        * config/i386/i386.h: include x86-tune.def.
        * config/i386/i386.c (ix86_option_override_internal):
        Parsing -mtune-ctrl= option and set tune features.
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c  (revision 201458)
+++ config/i386/i386.c  (working copy)
@@ -1833,6 +1833,13 @@ const struct processor_costs *ix86_cost
    (PPro/PENT4/NOCONA/CORE2/Athlon/K8).  */
 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
 
+const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
+#undef DEF_TUNE
+#define DEF_TUNE(tune, name) name,
+#include "x86-tune.def"
+#undef DEF_TUNE
+};
+
 /* Feature tests against the various tunings.  */
 unsigned char ix86_tune_features[X86_TUNE_LAST];
 
@@ -3550,6 +3557,40 @@ ix86_option_override_internal (bool main
   for (i = 0; i < X86_TUNE_LAST; ++i)
     ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
 
+  if (ix86_tune_ctrl_string)
+    {
+      /* parse the tune ctrl string in the following form:
+         [^]tune_name1,[^]tune_name2,..a */
+      char *next_feature_string = NULL;
+      char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
+      char *orig = curr_feature_string;
+      do {
+        bool clear = false;
+
+        next_feature_string = strchr (curr_feature_string, ',');
+       if (next_feature_string)
+          *next_feature_string++ = '\0';
+        if (*curr_feature_string == '^')
+         {
+           curr_feature_string++;
+           clear = true;
+         }
+        for (i = 0; i < X86_TUNE_LAST; i++)
+         {
+            if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
+             {
+                ix86_tune_features[i] = !clear;
+                break;
+              }
+         }
+        if (i == X86_TUNE_LAST)
+         warning (0, "Unknown parameter to option -mtune-ctrl: %s",
+                  clear ? curr_feature_string - 1 : curr_feature_string);
+       curr_feature_string = next_feature_string;    
+      } while (curr_feature_string);
+      free (orig);
+    }
+
 #ifndef USE_IX86_FRAME_POINTER
 #define USE_IX86_FRAME_POINTER 0
 #endif
Index: config/i386/i386.h
===================================================================
--- config/i386/i386.h  (revision 201458)
+++ config/i386/i386.h  (working copy)
@@ -261,81 +261,11 @@ extern const struct processor_costs ix86
 
 /* Feature tests against the various tunings.  */
 enum ix86_tune_indices {
-  X86_TUNE_USE_LEAVE,
-  X86_TUNE_PUSH_MEMORY,
-  X86_TUNE_ZERO_EXTEND_WITH_AND,
-  X86_TUNE_UNROLL_STRLEN,
-  X86_TUNE_BRANCH_PREDICTION_HINTS,
-  X86_TUNE_DOUBLE_WITH_ADD,
-  X86_TUNE_USE_SAHF,
-  X86_TUNE_MOVX,
-  X86_TUNE_PARTIAL_REG_STALL,
-  X86_TUNE_PARTIAL_FLAG_REG_STALL,
-  X86_TUNE_LCP_STALL,
-  X86_TUNE_USE_HIMODE_FIOP,
-  X86_TUNE_USE_SIMODE_FIOP,
-  X86_TUNE_USE_MOV0,
-  X86_TUNE_USE_CLTD,
-  X86_TUNE_USE_XCHGB,
-  X86_TUNE_SPLIT_LONG_MOVES,
-  X86_TUNE_READ_MODIFY_WRITE,
-  X86_TUNE_READ_MODIFY,
-  X86_TUNE_PROMOTE_QIMODE,
-  X86_TUNE_FAST_PREFIX,
-  X86_TUNE_SINGLE_STRINGOP,
-  X86_TUNE_QIMODE_MATH,
-  X86_TUNE_HIMODE_MATH,
-  X86_TUNE_PROMOTE_QI_REGS,
-  X86_TUNE_PROMOTE_HI_REGS,
-  X86_TUNE_SINGLE_POP,
-  X86_TUNE_DOUBLE_POP,
-  X86_TUNE_SINGLE_PUSH,
-  X86_TUNE_DOUBLE_PUSH,
-  X86_TUNE_INTEGER_DFMODE_MOVES,
-  X86_TUNE_PARTIAL_REG_DEPENDENCY,
-  X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY,
-  X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL,
-  X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL,
-  X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL,
-  X86_TUNE_SSE_SPLIT_REGS,
-  X86_TUNE_SSE_TYPELESS_STORES,
-  X86_TUNE_SSE_LOAD0_BY_PXOR,
-  X86_TUNE_MEMORY_MISMATCH_STALL,
-  X86_TUNE_PROLOGUE_USING_MOVE,
-  X86_TUNE_EPILOGUE_USING_MOVE,
-  X86_TUNE_SHIFT1,
-  X86_TUNE_USE_FFREEP,
-  X86_TUNE_INTER_UNIT_MOVES_TO_VEC,
-  X86_TUNE_INTER_UNIT_MOVES_FROM_VEC,
-  X86_TUNE_INTER_UNIT_CONVERSIONS,
-  X86_TUNE_FOUR_JUMP_LIMIT,
-  X86_TUNE_SCHEDULE,
-  X86_TUNE_USE_BT,
-  X86_TUNE_USE_INCDEC,
-  X86_TUNE_PAD_RETURNS,
-  X86_TUNE_PAD_SHORT_FUNCTION,
-  X86_TUNE_EXT_80387_CONSTANTS,
-  X86_TUNE_AVOID_VECTOR_DECODE,
-  X86_TUNE_PROMOTE_HIMODE_IMUL,
-  X86_TUNE_SLOW_IMUL_IMM32_MEM,
-  X86_TUNE_SLOW_IMUL_IMM8,
-  X86_TUNE_MOVE_M1_VIA_OR,
-  X86_TUNE_NOT_UNPAIRABLE,
-  X86_TUNE_NOT_VECTORMODE,
-  X86_TUNE_USE_VECTOR_FP_CONVERTS,
-  X86_TUNE_USE_VECTOR_CONVERTS,
-  X86_TUNE_FUSE_CMP_AND_BRANCH,
-  X86_TUNE_OPT_AGU,
-  X86_TUNE_VECTORIZE_DOUBLE,
-  X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL,
-  X86_TUNE_AVX128_OPTIMAL,
-  X86_TUNE_REASSOC_INT_TO_PARALLEL,
-  X86_TUNE_REASSOC_FP_TO_PARALLEL,
-  X86_TUNE_GENERAL_REGS_SSE_SPILL,
-  X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE,
-  X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS,
-
-  X86_TUNE_LAST
+#undef DEF_TUNE
+#define DEF_TUNE(tune, name) tune,
+#include "x86-tune.def"
+#undef DEF_TUNE
+X86_TUNE_LAST
 };
 
 extern unsigned char ix86_tune_features[X86_TUNE_LAST];
Index: config/i386/i386.opt
===================================================================
--- config/i386/i386.opt        (revision 201458)
+++ config/i386/i386.opt        (working copy)
@@ -370,6 +370,10 @@ mtune=
 Target RejectNegative Joined Var(ix86_tune_string)
 Schedule code for given CPU
 
+mtune-ctrl=
+Target RejectNegative Joined Var(ix86_tune_ctrl_string)
+Fine grain control of tune features
+
 mabi=
 Target RejectNegative Joined Var(ix86_abi) Enum(calling_abi) Init(SYSV_ABI)
 Generate code that conforms to the given ABI
Index: config/i386/x86-tune.def
===================================================================
--- config/i386/x86-tune.def    (revision 0)
+++ config/i386/x86-tune.def    (revision 0)
@@ -0,0 +1,97 @@
+/* Definitions of target machine for GCC for IA-32.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+DEF_TUNE (X86_TUNE_USE_LEAVE, "use_leave")
+DEF_TUNE (X86_TUNE_PUSH_MEMORY, "push_memory")
+DEF_TUNE (X86_TUNE_ZERO_EXTEND_WITH_AND, "zero_extend_with_and")
+DEF_TUNE (X86_TUNE_UNROLL_STRLEN, "unroll_strlen")
+DEF_TUNE (X86_TUNE_BRANCH_PREDICTION_HINTS, "branch_prediction_hints")
+DEF_TUNE (X86_TUNE_DOUBLE_WITH_ADD, "double_with_add")
+DEF_TUNE (X86_TUNE_USE_SAHF, "use_sahf")
+DEF_TUNE (X86_TUNE_MOVX, "movx")
+DEF_TUNE (X86_TUNE_PARTIAL_REG_STALL, "partial_reg_stall")
+DEF_TUNE (X86_TUNE_PARTIAL_FLAG_REG_STALL, "partial_flag_reg_stall")
+DEF_TUNE (X86_TUNE_LCP_STALL, "lcp_stall")
+DEF_TUNE (X86_TUNE_USE_HIMODE_FIOP, "use_himode_fiop")
+DEF_TUNE (X86_TUNE_USE_SIMODE_FIOP, "use_simode_fiop")
+DEF_TUNE (X86_TUNE_USE_MOV0, "use_mov0")
+DEF_TUNE (X86_TUNE_USE_CLTD, "use_cltd")
+DEF_TUNE (X86_TUNE_USE_XCHGB, "use_xchgb")
+DEF_TUNE (X86_TUNE_SPLIT_LONG_MOVES, "split_long_moves")
+DEF_TUNE (X86_TUNE_READ_MODIFY_WRITE, "read_modify_write")
+DEF_TUNE (X86_TUNE_READ_MODIFY, "read_modify")
+DEF_TUNE (X86_TUNE_PROMOTE_QIMODE, "promote_qimode")
+DEF_TUNE (X86_TUNE_FAST_PREFIX, "fast_prefix")
+DEF_TUNE (X86_TUNE_SINGLE_STRINGOP, "single_stringop")
+DEF_TUNE (X86_TUNE_QIMODE_MATH, "qimode_math")
+DEF_TUNE (X86_TUNE_HIMODE_MATH, "himode_math")
+DEF_TUNE (X86_TUNE_PROMOTE_QI_REGS, "promote_qi_regs")
+DEF_TUNE (X86_TUNE_PROMOTE_HI_REGS, "promote_hi_regs")
+DEF_TUNE (X86_TUNE_SINGLE_POP, "single_pop")
+DEF_TUNE (X86_TUNE_DOUBLE_POP, "double_pop")
+DEF_TUNE (X86_TUNE_SINGLE_PUSH, "single_push")
+DEF_TUNE (X86_TUNE_DOUBLE_PUSH, "double_push")
+DEF_TUNE (X86_TUNE_INTEGER_DFMODE_MOVES, "integer_dfmode_moves")
+DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency")
+DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY, "sse_partial_reg_dependency")
+DEF_TUNE (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, "sse_unaligned_load_optimal")
+DEF_TUNE (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL, "sse_unaligned_store_optimal")
+DEF_TUNE (X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL, 
"sse_packed_single_insn_optimal")
+DEF_TUNE (X86_TUNE_SSE_SPLIT_REGS, "sse_split_regs")
+DEF_TUNE (X86_TUNE_SSE_TYPELESS_STORES, "sse_typeless_stores")
+DEF_TUNE (X86_TUNE_SSE_LOAD0_BY_PXOR, "sse_load0_by_pxor")
+DEF_TUNE (X86_TUNE_MEMORY_MISMATCH_STALL, "memory_mismatch_stall")
+DEF_TUNE (X86_TUNE_PROLOGUE_USING_MOVE, "prologue_using_move")
+DEF_TUNE (X86_TUNE_EPILOGUE_USING_MOVE, "epilogue_using_move")
+DEF_TUNE (X86_TUNE_SHIFT1, "shift1")
+DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep")
+DEF_TUNE (X86_TUNE_INTER_UNIT_MOVES_TO_VEC, "inter_unit_moves_to_vec")
+DEF_TUNE (X86_TUNE_INTER_UNIT_MOVES_FROM_VEC, "inter_unit_moves_from_vec")
+DEF_TUNE (X86_TUNE_INTER_UNIT_CONVERSIONS, "inter_unit_conversions")
+DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, "four_jump_limit")
+DEF_TUNE (X86_TUNE_SCHEDULE, "schedule")
+DEF_TUNE (X86_TUNE_USE_BT, "use_bt")
+DEF_TUNE (X86_TUNE_USE_INCDEC, "use_incdec")
+DEF_TUNE (X86_TUNE_PAD_RETURNS, "pad_returns")
+DEF_TUNE (X86_TUNE_PAD_SHORT_FUNCTION, "pad_short_function")
+DEF_TUNE (X86_TUNE_EXT_80387_CONSTANTS, "ext_80387_constants")
+DEF_TUNE (X86_TUNE_AVOID_VECTOR_DECODE, "avoid_vector_decode")
+DEF_TUNE (X86_TUNE_PROMOTE_HIMODE_IMUL, "promote_himode_imul")
+DEF_TUNE (X86_TUNE_SLOW_IMUL_IMM32_MEM, "slow_imul_imm32_mem")
+DEF_TUNE (X86_TUNE_SLOW_IMUL_IMM8, "slow_imul_imm8")
+DEF_TUNE (X86_TUNE_MOVE_M1_VIA_OR, "move_m1_via_or")
+DEF_TUNE (X86_TUNE_NOT_UNPAIRABLE, "not_unpairable")
+DEF_TUNE (X86_TUNE_NOT_VECTORMODE, "not_vectormode")
+DEF_TUNE (X86_TUNE_USE_VECTOR_FP_CONVERTS, "use_vector_fp_converts")
+DEF_TUNE (X86_TUNE_USE_VECTOR_CONVERTS, "use_vector_converts")
+DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH, "fuse_cmp_and_branch")
+DEF_TUNE (X86_TUNE_OPT_AGU, "opt_agu")
+DEF_TUNE (X86_TUNE_VECTORIZE_DOUBLE, "vectorize_double")
+DEF_TUNE (X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL, 
"software_prefetching_beneficial")
+DEF_TUNE (X86_TUNE_AVX128_OPTIMAL, "avx128_optimal")
+DEF_TUNE (X86_TUNE_REASSOC_INT_TO_PARALLEL, "reassoc_int_to_parallel")
+DEF_TUNE (X86_TUNE_REASSOC_FP_TO_PARALLEL, "reassoc_fp_to_parallel")
+DEF_TUNE (X86_TUNE_GENERAL_REGS_SSE_SPILL, "general_regs_sse_spill")
+DEF_TUNE (X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE, "avoid_mem_opnd_for_cmove")
+DEF_TUNE (X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS, 
"split_mem_opnd_for_fp_converts")

Reply via email to