On Tue, Aug 16, 2011 at 10:50 PM, Sriraman Tallam <tmsri...@google.com> wrote: > Support for getting CPU type and feature information at run-time. > > The following patch provides support for finding the platform type at > run-time, like cpu type and features supported. The multi-versioning > framework will use the builtins added to dispatch the right function version. > Please refer to http://gcc.gnu.org/ml/gcc/2011-08/msg00298.html for details > on function multi-versioning usability.
Please provide an overview why you need the new builtins, why you need a separate pass to fold them (instead of just expanding them) and why you are creating vars behind the back of GCC: + /* Set finalized to 1, otherwise it asserts in function "write_symbol" in + lto-streamer-out.c. */ + vnode->finalized = 1; where I think you miss a varpool_finalize_node call somewhere. Why isn't this all done at target init time? If you don't mark the variable as to be preserved like you do cgraph will optimize it all away if it isn't needed. Richard. > * tree-pass.h (pass_tree_fold_builtin_target): New pass. > * builtins.def (BUILT_IN_TARGET_SUPPORTS_CMOV): New builtin. > (BUILT_IN_TARGET_SUPPORTS_MMX): New builtin. > (BUILT_IN_TARGET_SUPPORTS_POPCOUNT): New builtin. > (BUILT_IN_TARGET_SUPPORTS_SSE): New builtin. > (BUILT_IN_TARGET_SUPPORTS_SSE2): New builtin. > (BUILT_IN_TARGET_SUPPORTS_SSE3): New builtin. > (BUILT_IN_TARGET_SUPPORTS_SSSE3): New builtin. > (BUILT_IN_TARGET_SUPPORTS_SSE4_1): New builtin. > (BUILT_IN_TARGET_SUPPORTS_SSE4_2): New builtin. > (BUILT_IN_TARGET_IS_AMD): New builtin. > (BUILT_IN_TARGET_IS_INTEL): New builtin. > (BUILT_IN_TARGET_IS_COREI7_NEHALEM): New builtin. > (BUILT_IN_TARGET_IS_COREI7_WESTMERE): New builtin. > (BUILT_IN_TARGET_IS_COREI7_SANDYBRIDGE): New builtin. > (BUILT_IN_TARGET_IS_AMDFAM10_BARCELONA): New builtin. > (BUILT_IN_TARGET_IS_AMDFAM10_SHANGHAI): New builtin. > (BUILT_IN_TARGET_IS_AMDFAM10_ISTANBUL): New builtin. > * mversn-dispatch.c (do_fold_builtin_target): New function. > (gate_fold_builtin_target): New function. > (pass_tree_fold_builtin_target): New pass. > * timevar.def (TV_FOLD_BUILTIN_TARGET): New var. > * passes.c (init_optimization_passes): Add new pass to pass list. > * config/i386/i386.c (build_struct_with_one_bit_fields): New function. > (make_var_decl): New function. > (get_field_from_struct): New function. > (make_constructor_to_get_target_type): New function. > (fold_builtin_target): New function. > (ix86_fold_builtin): New function. > (TARGET_FOLD_BUILTIN): New macro. > > * gcc.dg/builtin_target.c: New test. > > * config/i386/i386-cpuinfo.c: New file. > * config/i386/t-cpuinfo: New file. > * config.host: Add t-cpuinfo to link i386-cpuinfo.o with libgcc > > Index: libgcc/config.host > =================================================================== > --- libgcc/config.host (revision 177767) > +++ libgcc/config.host (working copy) > @@ -609,7 +609,7 @@ case ${host} in > i[34567]86-*-linux* | x86_64-*-linux* | \ > i[34567]86-*-kfreebsd*-gnu | i[34567]86-*-knetbsd*-gnu | \ > i[34567]86-*-gnu*) > - tmake_file="${tmake_file} t-tls" > + tmake_file="${tmake_file} t-tls i386/t-cpuinfo" > if test "$libgcc_cv_cfi" = "yes"; then > tmake_file="${tmake_file} t-stack i386/t-stack-i386" > fi > Index: libgcc/config/i386/t-cpuinfo > =================================================================== > --- libgcc/config/i386/t-cpuinfo (revision 0) > +++ libgcc/config/i386/t-cpuinfo (revision 0) > @@ -0,0 +1,2 @@ > +# This is an endfile > +LIB2ADD += $(srcdir)/config/i386/i386-cpuinfo.c > Index: libgcc/config/i386/i386-cpuinfo.c > =================================================================== > --- libgcc/config/i386/i386-cpuinfo.c (revision 0) > +++ libgcc/config/i386/i386-cpuinfo.c (revision 0) > @@ -0,0 +1,275 @@ > +/* Copyright (C) 2011 Free Software Foundation, Inc. > + * Contributed by Sriraman Tallam <tmsri...@google.com>. > + * > + * This file is free software; you can redistribute it and/or modify it > + * under the terms of the GNU General Public License as published by the > + * Free Software Foundation; either version 3, or (at your option) any > + * later version. > + * > + * This file is distributed in the hope that it will be useful, but > + * WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * General Public License for more details. > + * > + * Under Section 7 of GPL version 3, you are granted additional > + * permissions described in the GCC Runtime Library Exception, version > + * 3.1, as published by the Free Software Foundation. > + * > + * You should have received a copy of the GNU General Public License and > + * a copy of the GCC Runtime Library Exception along with this program; > + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see > + * <http://www.gnu.org/licenses/>. > + * > + * > + * This code is adapted from gcc/config/i386/driver-i386.c. The CPUID > + * instruction is used to figure out the cpu type and supported features. > + * GCC runs __cpu_indicator_init from a constructor which sets the members > + * of __cpu_model and __cpu_features. > + */ > + > +#include <string.h> > + > +#ifdef __GNUC__ > +#include "cpuid.h" > + > +enum processor_type > +{ > + PROCESSOR_PENTIUM = 0, > + PROCESSOR_CORE2, > + PROCESSOR_COREI7_NEHALEM, > + PROCESSOR_COREI7_WESTMERE, > + PROCESSOR_COREI7_SANDYBRIDGE, > + PROCESSOR_INTEL_GENERIC, > + PROCESSOR_AMDFAM10_BARCELONA, > + PROCESSOR_AMDFAM10_SHANGHAI, > + PROCESSOR_AMDFAM10_ISTANBUL, > + PROCESSOR_AMDFAM10_GENERIC, > + PROCESSOR_AMD_GENERIC, > + PROCESSOR_GENERIC, > + PROCESSOR_max > +}; > + > +enum vendor_signatures > +{ > + SIG_INTEL = 0x756e6547 /* Genu */, > + SIG_AMD = 0x68747541 /* Auth */ > +}; > + > + > +/* Features supported. */ > + > +struct __processor_features > +{ > + unsigned int __cpu_cmov : 1; > + unsigned int __cpu_mmx : 1; > + unsigned int __cpu_popcnt : 1; > + unsigned int __cpu_sse : 1; > + unsigned int __cpu_sse2 : 1; > + unsigned int __cpu_sse3 : 1; > + unsigned int __cpu_ssse3 : 1; > + unsigned int __cpu_sse4_1 : 1; > + unsigned int __cpu_sse4_2 : 1; > +}; > + > +/* Flags exported. */ > + > +struct __processor_model > +{ > + unsigned int __cpu_is_amd : 1; > + unsigned int __cpu_is_intel : 1; > + unsigned int __cpu_is_corei7_nehalem : 1; > + unsigned int __cpu_is_corei7_westmere : 1; > + unsigned int __cpu_is_corei7_sandybridge : 1; > + unsigned int __cpu_is_amdfam10_barcelona : 1; > + unsigned int __cpu_is_amdfam10_shanghai : 1; > + unsigned int __cpu_is_amdfam10_istanbul : 1; > +}; > + > +enum processor_type __cpu_type = PROCESSOR_GENERIC; > +struct __processor_features __cpu_features; > +struct __processor_model __cpu_model; > + > +static void > +get_amd_cpu (unsigned int family, unsigned int model) > +{ > + switch (family) > + { > + case 0x10: > + switch (model) > + { > + case 0x2: > + __cpu_type = PROCESSOR_AMDFAM10_BARCELONA; > + __cpu_model.__cpu_is_amdfam10_barcelona = 1; > + break; > + case 0x4: > + __cpu_type = PROCESSOR_AMDFAM10_SHANGHAI; > + __cpu_model.__cpu_is_amdfam10_shanghai = 1; > + break; > + case 0x8: > + __cpu_type = PROCESSOR_AMDFAM10_ISTANBUL; > + __cpu_model.__cpu_is_amdfam10_istanbul = 1; > + break; > + default: > + __cpu_type = PROCESSOR_AMDFAM10_GENERIC; > + break; > + } > + break; > + default: > + __cpu_type = PROCESSOR_AMD_GENERIC; > + } > +} > + > +static void > +get_intel_cpu (unsigned int family, unsigned int model, unsigned int > brand_id) > +{ > + /* Parse family and model only if brand ID is 0. */ > + if (brand_id == 0) > + { > + switch (family) > + { > + case 0x5: > + __cpu_type = PROCESSOR_PENTIUM; > + break; > + case 0x6: > + switch (model) > + { > + case 0x1a: > + case 0x1e: > + case 0x1f: > + case 0x2e: > + /* Nehalem. */ > + __cpu_type = PROCESSOR_COREI7_NEHALEM; > + __cpu_model.__cpu_is_corei7_nehalem = 1; > + break; > + case 0x25: > + case 0x2c: > + case 0x2f: > + /* Westmere. */ > + __cpu_type = PROCESSOR_COREI7_WESTMERE; > + __cpu_model.__cpu_is_corei7_westmere = 1; > + break; > + case 0x2a: > + /* Sandy Bridge. */ > + __cpu_type = PROCESSOR_COREI7_SANDYBRIDGE; > + __cpu_model.__cpu_is_corei7_sandybridge = 1; > + break; > + case 0x17: > + case 0x1d: > + /* Penryn. */ > + case 0x0f: > + /* Merom. */ > + __cpu_type = PROCESSOR_CORE2; > + break; > + default: > + __cpu_type = PROCESSOR_INTEL_GENERIC; > + break; > + } > + break; > + default: > + /* We have no idea. */ > + __cpu_type = PROCESSOR_INTEL_GENERIC; > + break; > + } > + } > +} > + > +static void > +get_available_features (unsigned int ecx, unsigned int edx) > +{ > + __cpu_features.__cpu_cmov = (edx & bit_CMOV) ? 1 : 0; > + __cpu_features.__cpu_mmx = (edx & bit_MMX) ? 1 : 0; > + __cpu_features.__cpu_sse = (edx & bit_SSE) ? 1 : 0; > + __cpu_features.__cpu_sse2 = (edx & bit_SSE2) ? 1 : 0; > + __cpu_features.__cpu_popcnt = (ecx & bit_POPCNT) ? 1 : 0; > + __cpu_features.__cpu_sse3 = (ecx & bit_SSE3) ? 1 : 0; > + __cpu_features.__cpu_ssse3 = (ecx & bit_SSSE3) ? 1 : 0; > + __cpu_features.__cpu_sse4_1 = (ecx & bit_SSE4_1) ? 1 : 0; > + __cpu_features.__cpu_sse4_2 = (ecx & bit_SSE4_2) ? 1 : 0; > +} > + > +/* A noinline function calling __get_cpuid. Having many calls to > + cpuid in one function in 32-bit mode causes GCC to complain: > + "can’t find a register in class ‘CLOBBERED_REGS’". This is > + related to PR rtl-optimization 44174. */ > + > +static int __attribute__ ((noinline)) > +__get_cpuid_output (unsigned int __level, > + unsigned int *__eax, unsigned int *__ebx, > + unsigned int *__ecx, unsigned int *__edx) > +{ > + return __get_cpuid (__level, __eax, __ebx, __ecx, __edx); > +} > + > +/* This function will be linked in to binaries that need to look up > + CPU information. */ > + > +void > +__cpu_indicator_init(void) > +{ > + unsigned int eax, ebx, ecx, edx; > + > + int max_level = 5; > + unsigned int vendor; > + unsigned int model, family, brand_id; > + > + memset (&__cpu_features, 0, sizeof (struct __processor_features)); > + memset (&__cpu_model, 0, sizeof (struct __processor_model)); > + > + /* Assume cpuid insn present. Run in level 0 to get vendor id. */ > + if (!__get_cpuid_output (0, &eax, &ebx, &ecx, &edx)) > + return; > + > + vendor = ebx; > + max_level = eax; > + > + if (max_level < 1) > + return; > + > + if (!__get_cpuid_output (1, &eax, &ebx, &ecx, &edx)) > + return; > + > + model = (eax >> 4) & 0x0f; > + family = (eax >> 8) & 0x0f; > + brand_id = ebx & 0xff; > + > + /* Adjust model and family for Intel CPUS. */ > + if (vendor == SIG_INTEL) > + { > + unsigned int extended_model, extended_family; > + > + extended_model = (eax >> 12) & 0xf0; > + extended_family = (eax >> 20) & 0xff; > + if (family == 0x0f) > + { > + family += extended_family; > + model += extended_model; > + } > + else if (family == 0x06) > + model += extended_model; > + } > + > + /* Find CPU model. */ > + > + if (vendor == SIG_AMD) > + { > + __cpu_model.__cpu_is_amd = 1; > + get_amd_cpu (family, model); > + } > + else if (vendor == SIG_INTEL) > + { > + __cpu_model.__cpu_is_intel = 1; > + get_intel_cpu (family, model, brand_id); > + } > + > + /* Find available features. */ > + get_available_features (ecx, edx); > +} > + > +#else > + > +void > +__cpu_indicator_init(void) > +{ > +} > + > +#endif /* __GNUC__ */ > Index: gcc/tree-pass.h > =================================================================== > --- gcc/tree-pass.h (revision 177767) > +++ gcc/tree-pass.h (working copy) > @@ -449,6 +449,7 @@ extern struct gimple_opt_pass pass_split_functions > extern struct gimple_opt_pass pass_feedback_split_functions; > extern struct gimple_opt_pass pass_threadsafe_analyze; > extern struct gimple_opt_pass pass_tree_convert_builtin_dispatch; > +extern struct gimple_opt_pass pass_tree_fold_builtin_target; > > /* IPA Passes */ > extern struct simple_ipa_opt_pass pass_ipa_lower_emutls; > Index: gcc/testsuite/gcc.dg/builtin_target.c > =================================================================== > --- gcc/testsuite/gcc.dg/builtin_target.c (revision 0) > +++ gcc/testsuite/gcc.dg/builtin_target.c (revision 0) > @@ -0,0 +1,49 @@ > +/* This test checks if the __builtin_target_* calls are recognized. */ > + > +/* { dg-do run } */ > + > +int > +fn1 () > +{ > + if (__builtin_target_supports_cmov () < 0) > + return -1; > + if (__builtin_target_supports_mmx () < 0) > + return -1; > + if (__builtin_target_supports_popcount () < 0) > + return -1; > + if (__builtin_target_supports_sse () < 0) > + return -1; > + if (__builtin_target_supports_sse2 () < 0) > + return -1; > + if (__builtin_target_supports_sse3 () < 0) > + return -1; > + if (__builtin_target_supports_ssse3 () < 0) > + return -1; > + if (__builtin_target_supports_sse4_1 () < 0) > + return -1; > + if (__builtin_target_supports_sse4_2 () < 0) > + return -1; > + if (__builtin_target_is_amd () < 0) > + return -1; > + if (__builtin_target_is_intel () < 0) > + return -1; > + if (__builtin_target_is_corei7_nehalem () < 0) > + return -1; > + if (__builtin_target_is_corei7_westmere () < 0) > + return -1; > + if (__builtin_target_is_corei7_sandybridge () < 0) > + return -1; > + if (__builtin_target_is_amdfam10_barcelona () < 0) > + return -1; > + if (__builtin_target_is_amdfam10_shanghai () < 0) > + return -1; > + if (__builtin_target_is_amdfam10_istanbul () < 0) > + return -1; > + > + return 0; > +} > + > +int main () > +{ > + return fn1 (); > +} > Index: gcc/builtins.def > =================================================================== > --- gcc/builtins.def (revision 177767) > +++ gcc/builtins.def (working copy) > @@ -763,6 +763,25 @@ DEF_BUILTIN (BUILT_IN_EMUTLS_REGISTER_COMMON, > /* Multiversioning builtin dispatch hook. */ > DEF_GCC_BUILTIN (BUILT_IN_DISPATCH, "dispatch", > BT_FN_INT_PTR_FN_INT_PTR_PTR_VAR, ATTR_NULL) > > +/* Builtins to determine target type and features at run-time. */ > +DEF_GCC_BUILTIN (BUILT_IN_TARGET_SUPPORTS_CMOV, "target_supports_cmov", > BT_FN_INT, ATTR_NULL) > +DEF_GCC_BUILTIN (BUILT_IN_TARGET_SUPPORTS_MMX, "target_supports_mmx", > BT_FN_INT, ATTR_NULL) > +DEF_GCC_BUILTIN (BUILT_IN_TARGET_SUPPORTS_POPCOUNT, > "target_supports_popcount", BT_FN_INT, ATTR_NULL) > +DEF_GCC_BUILTIN (BUILT_IN_TARGET_SUPPORTS_SSE, "target_supports_sse", > BT_FN_INT, ATTR_NULL) > +DEF_GCC_BUILTIN (BUILT_IN_TARGET_SUPPORTS_SSE2, "target_supports_sse2", > BT_FN_INT, ATTR_NULL) > +DEF_GCC_BUILTIN (BUILT_IN_TARGET_SUPPORTS_SSE3, "target_supports_sse3", > BT_FN_INT, ATTR_NULL) > +DEF_GCC_BUILTIN (BUILT_IN_TARGET_SUPPORTS_SSSE3, "target_supports_ssse3", > BT_FN_INT, ATTR_NULL) > +DEF_GCC_BUILTIN (BUILT_IN_TARGET_SUPPORTS_SSE4_1, "target_supports_sse4_1", > BT_FN_INT, ATTR_NULL) > +DEF_GCC_BUILTIN (BUILT_IN_TARGET_SUPPORTS_SSE4_2, "target_supports_sse4_2", > BT_FN_INT, ATTR_NULL) > +DEF_GCC_BUILTIN (BUILT_IN_TARGET_IS_AMD, "target_is_amd", BT_FN_INT, > ATTR_NULL) > +DEF_GCC_BUILTIN (BUILT_IN_TARGET_IS_INTEL, "target_is_intel", BT_FN_INT, > ATTR_NULL) > +DEF_GCC_BUILTIN (BUILT_IN_TARGET_IS_COREI7_NEHALEM, > "target_is_corei7_nehalem", BT_FN_INT, ATTR_NULL) > +DEF_GCC_BUILTIN (BUILT_IN_TARGET_IS_COREI7_WESTMERE, > "target_is_corei7_westmere", BT_FN_INT, ATTR_NULL) > +DEF_GCC_BUILTIN (BUILT_IN_TARGET_IS_COREI7_SANDYBRIDGE, > "target_is_corei7_sandybridge", BT_FN_INT, ATTR_NULL) > +DEF_GCC_BUILTIN (BUILT_IN_TARGET_IS_AMDFAM10_BARCELONA, > "target_is_amdfam10_barcelona", BT_FN_INT, ATTR_NULL) > +DEF_GCC_BUILTIN (BUILT_IN_TARGET_IS_AMDFAM10_SHANGHAI, > "target_is_amdfam10_shanghai", BT_FN_INT, ATTR_NULL) > +DEF_GCC_BUILTIN (BUILT_IN_TARGET_IS_AMDFAM10_ISTANBUL, > "target_is_amdfam10_istanbul", BT_FN_INT, ATTR_NULL) > + > /* Exception support. */ > DEF_BUILTIN_STUB (BUILT_IN_UNWIND_RESUME, "__builtin_unwind_resume") > DEF_BUILTIN_STUB (BUILT_IN_CXA_END_CLEANUP, "__builtin_cxa_end_cleanup") > Index: gcc/mversn-dispatch.c > =================================================================== > --- gcc/mversn-dispatch.c (revision 177767) > +++ gcc/mversn-dispatch.c (working copy) > @@ -135,6 +135,7 @@ along with GCC; see the file COPYING3. If not see > #include "output.h" > #include "vecprim.h" > #include "gimple-pretty-print.h" > +#include "target.h" > > typedef struct cgraph_node* NODEPTR; > DEF_VEC_P (NODEPTR); > @@ -1764,3 +1765,103 @@ struct gimple_opt_pass pass_tree_convert_builtin_d > TODO_update_ssa | TODO_verify_ssa > } > }; > + > +/* Fold calls to __builtin_target_* */ > + > +static unsigned int > +do_fold_builtin_target (void) > +{ > + basic_block bb; > + gimple_stmt_iterator gsi; > + > + /* Go through each stmt looking for __builtin_target_* calls */ > + FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (current_function_decl)) > + { > + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) > + { > + gimple stmt = gsi_stmt (gsi); > + gimple assign_stmt; > + tree call_decl; > + tree lhs_retval; > + tree folded_val; > + > + tree ssa_var, tmp_var; > + gimple init_stmt; > + > + if (!is_gimple_call (stmt)) > + continue; > + > + call_decl = gimple_call_fndecl (stmt); > + > + /* Check if it is a __builtin_target_* call. */ > + > + if (call_decl == NULL > + || DECL_NAME (call_decl) == NULL_TREE > + || DECL_BUILT_IN_CLASS (call_decl) != BUILT_IN_NORMAL > + || strstr (IDENTIFIER_POINTER (DECL_NAME (call_decl)), > + "__builtin_target") == NULL) > + continue; > + > + /* If the lhs is NULL there is no need to fold the call. */ > + lhs_retval = gimple_call_lhs(stmt); > + if (lhs_retval == NULL) > + continue; > + > + /* Call the target hook to fold the builtin */ > + folded_val = targetm.fold_builtin(call_decl, 0, NULL, false); > + > + /* If the target does not support the builtin then fold it to zero. > */ > + if (folded_val == NULL_TREE) > + folded_val = build_zero_cst (unsigned_type_node); > + > + /* Type cast unsigned value to integer */ > + tmp_var = create_tmp_var (unsigned_type_node, NULL); > + init_stmt = gimple_build_assign (tmp_var, folded_val); > + ssa_var = make_ssa_name (tmp_var, init_stmt); > + gimple_assign_set_lhs (init_stmt, ssa_var); > + mark_symbols_for_renaming (init_stmt); > + > + assign_stmt = gimple_build_assign_with_ops (NOP_EXPR, lhs_retval, > ssa_var, 0); > + mark_symbols_for_renaming(assign_stmt); > + > + gsi_insert_after_without_update (&gsi, assign_stmt, GSI_SAME_STMT); > + gsi_insert_after_without_update (&gsi, init_stmt, GSI_SAME_STMT); > + /* Delete the original call. */ > + gsi_remove(&gsi, true); > + } > + } > + > + return 0; > +} > + > +static bool > +gate_fold_builtin_target (void) > +{ > + return true; > +} > + > +/* Pass to fold __builtin_target_* functions */ > + > +struct gimple_opt_pass pass_tree_fold_builtin_target = > +{ > + { > + GIMPLE_PASS, > + "fold_builtin_target", /* name */ > + gate_fold_builtin_target, /* gate */ > + do_fold_builtin_target, /* execute */ > + NULL, /* sub */ > + NULL, /* next */ > + 0, /* static_pass_number */ > + TV_FOLD_BUILTIN_TARGET, /* tv_id */ > + PROP_cfg, /* properties_required */ > + PROP_cfg, /* properties_provided */ > + 0, /* properties_destroyed */ > + 0, /* todo_flags_start */ > + TODO_dump_func | /* todo_flags_finish */ > + TODO_cleanup_cfg | > + TODO_update_ssa | > + TODO_verify_ssa > + } > +}; > + > + > Index: gcc/timevar.def > =================================================================== > --- gcc/timevar.def (revision 177767) > +++ gcc/timevar.def (working copy) > @@ -124,6 +124,7 @@ DEFTIMEVAR (TV_PARSE_INMETH , "parser inl > DEFTIMEVAR (TV_TEMPLATE_INST , "template instantiation") > DEFTIMEVAR (TV_INLINE_HEURISTICS , "inline heuristics") > DEFTIMEVAR (TV_MVERSN_DISPATCH , "multiversion dispatch") > +DEFTIMEVAR (TV_FOLD_BUILTIN_TARGET , "fold __builtin_target calls") > DEFTIMEVAR (TV_INTEGRATION , "integration") > DEFTIMEVAR (TV_TREE_GIMPLIFY , "tree gimplify") > DEFTIMEVAR (TV_TREE_EH , "tree eh") > Index: gcc/passes.c > =================================================================== > --- gcc/passes.c (revision 177767) > +++ gcc/passes.c (working copy) > @@ -1249,6 +1249,8 @@ init_optimization_passes (void) > { > struct opt_pass **p = &pass_ipa_multiversion_dispatch.pass.sub; > NEXT_PASS (pass_tree_convert_builtin_dispatch); > + /* Fold calls to __builtin_target_*. */ > + NEXT_PASS (pass_tree_fold_builtin_target); > /* Rebuilding cgraph edges is necessary as the above passes change > the call graph. Otherwise, future optimizations use the old > call graph and make wrong decisions sometimes.*/ > Index: gcc/config/i386/i386.c > =================================================================== > --- gcc/config/i386/i386.c (revision 177767) > +++ gcc/config/i386/i386.c (working copy) > @@ -58,6 +58,8 @@ along with GCC; see the file COPYING3. If not see > #include "sched-int.h" > #include "sbitmap.h" > #include "fibheap.h" > +#include "tree-flow.h" > +#include "tree-pass.h" > > enum upper_128bits_state > { > @@ -7867,6 +7869,338 @@ ix86_build_builtin_va_list (void) > return ret; > } > > +/* Returns a struct type with name NAME and number of fields equal to > + NUM_FIELDS. Each field is a unsigned int bit field of length 1 bit. */ > + > +static tree > +build_struct_with_one_bit_fields (int num_fields, const char *name) > +{ > + int i; > + char field_name [10]; > + tree field = NULL_TREE, field_chain = NULL_TREE; > + tree type = make_node (RECORD_TYPE); > + > + strcpy (field_name, "k_field"); > + > + for (i = 0; i < num_fields; i++) > + { > + /* Name the fields, 0_field, 1_field, ... */ > + field_name [0] = '0' + i; > + field = build_decl (UNKNOWN_LOCATION, FIELD_DECL, > + get_identifier (field_name), unsigned_type_node); > + DECL_BIT_FIELD (field) = 1; > + DECL_SIZE (field) = bitsize_one_node; > + if (field_chain != NULL_TREE) > + DECL_CHAIN (field) = field_chain; > + field_chain = field; > + } > + finish_builtin_struct (type, name, field_chain, NULL_TREE); > + return type; > +} > + > +/* Returns a VAR_DECL of type TYPE and name NAME. */ > + > +static tree > +make_var_decl (tree type, const char *name) > +{ > + tree new_decl; > + struct varpool_node *vnode; > + > + new_decl = build_decl (UNKNOWN_LOCATION, > + VAR_DECL, > + get_identifier(name), > + type); > + > + DECL_EXTERNAL (new_decl) = 1; > + TREE_STATIC (new_decl) = 1; > + TREE_PUBLIC (new_decl) = 1; > + DECL_INITIAL (new_decl) = 0; > + DECL_ARTIFICIAL (new_decl) = 0; > + DECL_PRESERVE_P (new_decl) = 1; > + > + make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl)); > + assemble_variable (new_decl, 0, 0, 0); > + > + vnode = varpool_node (new_decl); > + gcc_assert (vnode != NULL); > + /* Set finalized to 1, otherwise it asserts in function "write_symbol" in > + lto-streamer-out.c. */ > + vnode->finalized = 1; > + > + return new_decl; > +} > + > +/* Traverses the chain of fields in STRUCT_TYPE and returns the FIELD_NUM > + numbered field. */ > + > +static tree > +get_field_from_struct (tree struct_type, int field_num) > +{ > + int i; > + tree field = TYPE_FIELDS (struct_type); > + > + for (i = 0; i < field_num; i++, field = DECL_CHAIN(field)) > + { > + gcc_assert (field != NULL_TREE); > + } > + > + return field; > +} > + > +/* Create a new static constructor that calls __cpu_indicator_init () > + function defined in libgcc/config/i386-cpuinfo.c which runs cpuid > + to figure out the type of the target. */ > + > +static tree > +make_constructor_to_get_target_type (const char *name) > +{ > + tree decl, type, t; > + gimple_seq seq; > + basic_block new_bb; > + tree old_current_function_decl; > + > + tree __cpu_indicator_int_decl; > + gimple constructor_body; > + > + > + type = build_function_type_list (void_type_node, NULL_TREE); > + > + /* Make a call stmt to __cpu_indicator_init */ > + __cpu_indicator_int_decl = build_fn_decl ("__cpu_indicator_init", type); > + constructor_body = gimple_build_call (__cpu_indicator_int_decl, 0); > + DECL_EXTERNAL (__cpu_indicator_int_decl) = 1; > + > + decl = build_fn_decl (name, type); > + > + DECL_NAME (decl) = get_identifier (name); > + SET_DECL_ASSEMBLER_NAME (decl, DECL_NAME (decl)); > + gcc_assert (cgraph_node (decl) != NULL); > + > + TREE_USED (decl) = 1; > + DECL_ARTIFICIAL (decl) = 1; > + DECL_IGNORED_P (decl) = 0; > + TREE_PUBLIC (decl) = 0; > + DECL_UNINLINABLE (decl) = 1; > + DECL_EXTERNAL (decl) = 0; > + DECL_CONTEXT (decl) = NULL_TREE; > + DECL_INITIAL (decl) = make_node (BLOCK); > + DECL_STATIC_CONSTRUCTOR (decl) = 1; > + TREE_READONLY (decl) = 0; > + DECL_PURE_P (decl) = 0; > + > + /* This is a comdat. */ > + make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl)); > + > + /* Build result decl and add to function_decl. */ > + t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, void_type_node); > + DECL_ARTIFICIAL (t) = 1; > + DECL_IGNORED_P (t) = 1; > + DECL_RESULT (decl) = t; > + > + gimplify_function_tree (decl); > + > + /* Build CFG for this function. */ > + > + old_current_function_decl = current_function_decl; > + push_cfun (DECL_STRUCT_FUNCTION (decl)); > + current_function_decl = decl; > + init_empty_tree_cfg_for_function (DECL_STRUCT_FUNCTION (decl)); > + cfun->curr_properties |= > + (PROP_gimple_lcf | PROP_gimple_leh | PROP_cfg | PROP_referenced_vars | > + PROP_ssa); > + new_bb = create_empty_bb (ENTRY_BLOCK_PTR); > + make_edge (ENTRY_BLOCK_PTR, new_bb, EDGE_FALLTHRU); > + > + /* XXX: Not sure if the edge commented below is necessary. If I add this > + edge, it fails in gimple_verify_flow_info in tree-cfg.c in condition : > + " if (e->flags & EDGE_FALLTHRU)" > + during -fprofile-generate. > + Otherwise, it is fine. Deleting this edge does not break anything. > + Commenting this so that it is clear I am intentionally not doing this.*/ > + /* make_edge (new_bb, EXIT_BLOCK_PTR, EDGE_FALLTHRU); */ > + > + seq = gimple_seq_alloc_with_stmt (constructor_body); > + > + set_bb_seq (new_bb, seq); > + gimple_set_bb (constructor_body, new_bb); > + > + /* Set the lexical block of the constructor body. Fails the inliner > + other wise. */ > + gimple_set_block (constructor_body, DECL_INITIAL (decl)); > + > + /* This call is very important if this pass runs when the IR is in > + SSA form. It breaks things in strange ways otherwise. */ > + init_tree_ssa (DECL_STRUCT_FUNCTION (decl)); > + /* add_referenced_var (version_selector_var); */ > + > + cgraph_add_new_function (decl, true); > + cgraph_call_function_insertion_hooks (cgraph_node (decl)); > + cgraph_mark_needed_node (cgraph_node (decl)); > + > + pop_cfun (); > + current_function_decl = old_current_function_decl; > + return decl; > +} > + > +/* FNDECL is a __builtin_target_* call that is folded into an integer defined > + in libgcc/config/i386/i386-cpuinfo.c */ > + > +static tree > +fold_builtin_target (tree fndecl) > +{ > + /* This is the order of bit-fields in __processor_features in > + i386-cpuinfo.c */ > + enum processor_features > + { > + F_CMOV = 0, > + F_MMX, > + F_POPCNT, > + F_SSE, > + F_SSE2, > + F_SSE3, > + F_SSSE3, > + F_SSE4_1, > + F_SSE4_2, > + F_MAX > + }; > + > + /* This is the order of bit-fields in __processor_model in > + i386-cpuinfo.c */ > + enum processor_model > + { > + M_AMD = 0, > + M_INTEL, > + M_COREI7_NEHALEM, > + M_COREI7_WESTMERE, > + M_COREI7_SANDYBRIDGE, > + M_AMDFAM10_BARCELONA, > + M_AMDFAM10_SHANGHAI, > + M_AMDFAM10_ISTANBUL, > + M_MAX > + }; > + > + static tree __processor_features_type = NULL_TREE; > + static tree __cpu_features_var = NULL_TREE; > + static tree __processor_model_type = NULL_TREE; > + static tree __cpu_model_var = NULL_TREE; > + static tree ctor_decl = NULL_TREE; > + static tree field; > + static tree which_struct; > + > + /* Make a call to __cpu_indicatior_init in a constructor. > + Function __cpu_indicator_init is defined in i386-cpuinfo.c. */ > + if (ctor_decl == NULL_TREE) > + ctor_decl = make_constructor_to_get_target_type > + ("__cpu_indicator_init_ctor"); > + > + if (__processor_features_type == NULL_TREE) > + __processor_features_type = build_struct_with_one_bit_fields (F_MAX, > + "__processor_features"); > + > + if (__processor_model_type == NULL_TREE) > + __processor_model_type = build_struct_with_one_bit_fields (M_MAX, > + "__processor_model"); > + > + if (__cpu_features_var == NULL_TREE) > + __cpu_features_var = make_var_decl (__processor_features_type, > + "__cpu_features"); > + > + if (__cpu_model_var == NULL_TREE) > + __cpu_model_var = make_var_decl (__processor_model_type, > + "__cpu_model"); > + > + /* Look at fndecl code to identify the field requested. */ > + switch (DECL_FUNCTION_CODE (fndecl)) > + { > + case BUILT_IN_TARGET_SUPPORTS_CMOV: > + field = get_field_from_struct (__processor_features_type, F_CMOV); > + which_struct = __cpu_features_var; > + break; > + case BUILT_IN_TARGET_SUPPORTS_MMX: > + field = get_field_from_struct (__processor_features_type, F_MMX); > + which_struct = __cpu_features_var; > + break; > + case BUILT_IN_TARGET_SUPPORTS_POPCOUNT: > + field = get_field_from_struct (__processor_features_type, F_POPCNT); > + which_struct = __cpu_features_var; > + break; > + case BUILT_IN_TARGET_SUPPORTS_SSE: > + field = get_field_from_struct (__processor_features_type, F_SSE); > + which_struct = __cpu_features_var; > + break; > + case BUILT_IN_TARGET_SUPPORTS_SSE2: > + field = get_field_from_struct (__processor_features_type, F_SSE2); > + which_struct = __cpu_features_var; > + break; > + case BUILT_IN_TARGET_SUPPORTS_SSE3: > + field = get_field_from_struct (__processor_features_type, F_SSE3); > + which_struct = __cpu_features_var; > + break; > + case BUILT_IN_TARGET_SUPPORTS_SSSE3: > + field = get_field_from_struct (__processor_features_type, F_SSE3); > + which_struct = __cpu_features_var; > + break; > + case BUILT_IN_TARGET_SUPPORTS_SSE4_1: > + field = get_field_from_struct (__processor_features_type, F_SSE4_1); > + which_struct = __cpu_features_var; > + break; > + case BUILT_IN_TARGET_SUPPORTS_SSE4_2: > + field = get_field_from_struct (__processor_features_type, F_SSE4_2); > + which_struct = __cpu_features_var; > + break; > + case BUILT_IN_TARGET_IS_AMD: > + field = get_field_from_struct (__processor_model_type, M_AMD);; > + which_struct = __cpu_model_var; > + break; > + case BUILT_IN_TARGET_IS_INTEL: > + field = get_field_from_struct (__processor_model_type, M_INTEL);; > + which_struct = __cpu_model_var; > + break; > + case BUILT_IN_TARGET_IS_COREI7_NEHALEM: > + field = get_field_from_struct (__processor_model_type, > M_COREI7_NEHALEM);; > + which_struct = __cpu_model_var; > + break; > + case BUILT_IN_TARGET_IS_COREI7_WESTMERE: > + field = get_field_from_struct (__processor_model_type, > M_COREI7_WESTMERE);; > + which_struct = __cpu_model_var; > + break; > + case BUILT_IN_TARGET_IS_COREI7_SANDYBRIDGE: > + field = get_field_from_struct (__processor_model_type, > M_COREI7_SANDYBRIDGE);; > + which_struct = __cpu_model_var; > + break; > + case BUILT_IN_TARGET_IS_AMDFAM10_BARCELONA: > + field = get_field_from_struct (__processor_model_type, > M_AMDFAM10_BARCELONA);; > + which_struct = __cpu_model_var; > + break; > + case BUILT_IN_TARGET_IS_AMDFAM10_SHANGHAI: > + field = get_field_from_struct (__processor_model_type, > M_AMDFAM10_SHANGHAI);; > + which_struct = __cpu_model_var; > + break; > + case BUILT_IN_TARGET_IS_AMDFAM10_ISTANBUL: > + field = get_field_from_struct (__processor_model_type, > M_AMDFAM10_ISTANBUL);; > + which_struct = __cpu_model_var; > + break; > + default: > + return NULL_TREE; > + } > + > + return build3 (COMPONENT_REF, TREE_TYPE (field), which_struct, field, > NULL_TREE); > +} > + > +/* Folds __builtin_target_* builtins. */ > + > +static tree > +ix86_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, > + tree *args ATTRIBUTE_UNUSED, bool ignore ATTRIBUTE_UNUSED) > +{ > + const char *decl_name = IDENTIFIER_POINTER (DECL_NAME (fndecl)); > + if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL > + && strstr(decl_name, "__builtin_target") != NULL) > + return fold_builtin_target (fndecl); > + > + return NULL_TREE; > +} > + > /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */ > > static void > @@ -35097,6 +35431,9 @@ ix86_autovectorize_vector_sizes (void) > #undef TARGET_BUILD_BUILTIN_VA_LIST > #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list > > +#undef TARGET_FOLD_BUILTIN > +#define TARGET_FOLD_BUILTIN ix86_fold_builtin > + > #undef TARGET_ENUM_VA_LIST_P > #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list > > > -- > This patch is available for review at http://codereview.appspot.com/4893046 >