Committed the attached patch to google/main. Will send a patch for trunk soon.
On Thu, Apr 28, 2011 at 10:03 PM, Xinliang David Li <davi...@google.com> wrote: > Please add regression test cases for the feature. Address the comments > when available. Ok for google/main. > > Thanks, > > David > > On Thu, Apr 28, 2011 at 4:42 PM, Easwaran Raman <era...@google.com> wrote: >> This patch from Silvius Rus adds support for sampled edge profile >> collection to reduce instrumentation run overhead. Bootstraps and no test >> regressions. Ok for google/main? >> >> 2011-04-28 Silvius Rus <silvius....@gmail.com> >> >> * doc/invoke.texi: Document -fprofile-generate-sampling option. >> * gcov-io.h (__gcov_set_sampling_rate): New declaration. >> * profile.c (branch_prob): Add support for sampled profile >> collection. >> * profile.h (add_sampling_to_edge_counters): New declaration. >> * common.opt (fprofile-generate-sampling): New option. >> * tree-profile: Include header files; define EDGE_COUNTER_STMT_COUNT. >> (instrumentation_to_be_sampled, gcov_sample_counter_decl) >> (gcov_sampling_rate_decl): New globals. >> (insert_if_then, add_sampling_wrapper, >> is_instrumentation_to_be_sampled) >> (add_sampling_to_edge_counters, gimple_init_instrumentation_sampling): >> New functions. >> (gimple_init_edge_profiler): Call >> gimple_init_instrumentation_sampling. >> (gimple_gen_edge_profiler): Mark start of instrumentation block. >> * libgcov.c (__gcov_sampling_rate): New extern declaration. >> (gcov_sampling_rate_initialized, __gcov_sample_counter): New globals. >> (gcov_exit): Set sampling rate; minor coding style fixes. >> * params.def (PARAM_PROFILE_GENERATE_SAMPLING_RATE): New parameter. >> >> Index: gcc/doc/invoke.texi >> =================================================================== >> --- gcc/doc/invoke.texi (revision 173136) >> +++ gcc/doc/invoke.texi (working copy) >> @@ -375,7 +375,7 @@ Objective-C and Objective-C++ Dialects}. >> -fpartial-inlining -fpeel-loops -fpredictive-commoning @gol >> -fprefetch-loop-arrays @gol >> -fprofile-correction -fprofile-dir=@var{path} -fprofile-generate @gol >> --fprofile-generate=@var{path} @gol >> +-fprofile-generate=@var{path} -fprofile-generate-sampling @gol >> -fprofile-use -fprofile-use=@var{path} -fprofile-values @gol >> -freciprocal-math -fregmove -frename-registers -freorder-blocks @gol >> -freorder-blocks-and-partition -freorder-functions @gol >> @@ -7923,6 +7923,20 @@ The following options are enabled: @code{-fprofile >> If @var{path} is specified, GCC will look at the @var{path} to find >> the profile feedback data files. See @option{-fprofile-dir}. >> >> +@item -fprofile-generate-sampling >> +@opindex -fprofile-generate-sampling >> + >> +Enable sampling for instrumented binaries. Instead of recording every >> event, >> +record only every N-th event, where N (the sampling rate) can be set either >> +at compile time using >> +@option{--param profile-generate-sampling-rate=@var{value}}, or >> +at execution start time through environment variable >> @samp{GCOV_SAMPLING_RATE}. >> + >> +At this time sampling applies only to branch counters. A sampling rate of >> 100 >> +decreases instrumentated binary slowdown from up to 20x for heavily threaded >> +applications down to around 2x. @option{-fprofile-correction} is always >> +needed with sampling. >> + >> @item -fprofile-use >> @itemx -fprofile-use=@var{path} >> @opindex fprofile-use >> @@ -9138,6 +9152,9 @@ recognize. >> If you want to pass an option that takes an argument, you must use >> @option{-Xassembler} twice, once for the option and once for the argument. >> >> +@item profile-generate-sampling-rate >> +Set the sampling rate with @option{-fprofile-generate-sampling}. >> + >> @end table >> >> @node Link Options >> Index: gcc/gcov-io.h >> =================================================================== >> --- gcc/gcov-io.h (revision 173136) >> +++ gcc/gcov-io.h (working copy) >> @@ -544,6 +544,9 @@ struct dyn_imp_mod >> /* Register a new object file module. */ >> extern void __gcov_init (struct gcov_info *) ATTRIBUTE_HIDDEN; >> >> +/* Set sampling rate to RATE. */ >> +extern void __gcov_set_sampling_rate (unsigned int rate); >> + >> /* Called before fork, to avoid double counting. */ >> extern void __gcov_flush (void) ATTRIBUTE_HIDDEN; >> >> Index: gcc/profile.c >> =================================================================== >> --- gcc/profile.c (revision 173136) >> +++ gcc/profile.c (working copy) >> @@ -1210,6 +1210,9 @@ branch_prob (void) >> >> /* Commit changes done by instrumentation. */ >> gsi_commit_edge_inserts (); >> + >> + if (flag_profile_generate_sampling) >> + add_sampling_to_edge_counters (); >> } >> >> free_aux_for_edges (); >> Index: gcc/profile.h >> =================================================================== >> --- gcc/profile.h (revision 173136) >> +++ gcc/profile.h (working copy) >> @@ -47,4 +47,10 @@ extern gcov_type sum_edge_counts (VEC (edge, gc) * >> extern void init_node_map (void); >> extern void del_node_map (void); >> >> +/* Implement sampling to avoid writing to edge counters very often. >> + Many concurrent writes to the same counters, or to counters that share >> + the same cache line leads to up to 30x slowdown on an application running >> + on 8 CPUs. With sampling, the slowdown reduced to 2x. */ >> +extern void add_sampling_to_edge_counters (void); >> + >> #endif /* PROFILE_H */ >> Index: gcc/common.opt >> =================================================================== >> --- gcc/common.opt (revision 173136) >> +++ gcc/common.opt (working copy) >> @@ -1605,6 +1605,10 @@ fprofile-generate= >> Common Joined RejectNegative >> Enable common options for generating profile info for profile feedback >> directed optimizations, and set -fprofile-dir= >> >> +fprofile-generate-sampling >> +Common Var(flag_profile_generate_sampling) >> +Turn on instrumentation sampling with -fprofile-generate with rate set by >> --param profile-generate-sampling-rate or environment variable >> GCOV_SAMPLING_RATE >> + >> fprofile-use >> Common Var(flag_profile_use) >> Enable common options for performing profile feedback directed optimizations >> Index: gcc/tree-profile.c >> =================================================================== >> --- gcc/tree-profile.c (revision 173136) >> +++ gcc/tree-profile.c (working copy) >> @@ -31,6 +31,8 @@ along with GCC; see the file COPYING3. If not see >> #include "coretypes.h" >> #include "tm.h" >> #include "flags.h" >> +#include "target.h" >> +#include "output.h" >> #include "regs.h" >> #include "function.h" >> #include "basic-block.h" >> @@ -44,9 +46,14 @@ along with GCC; see the file COPYING3. If not see >> #include "value-prof.h" >> #include "cgraph.h" >> #include "output.h" >> +#include "params.h" >> +#include "profile.h" >> #include "l-ipo.h" >> #include "profile.h" >> >> +/* Number of statements inserted for each edge counter increment. */ >> +#define EDGE_COUNTER_STMT_COUNT 3 >> + >> static GTY(()) tree gcov_type_node; >> static GTY(()) tree gcov_type_tmp_var; >> static GTY(()) tree tree_interval_profiler_fn; >> @@ -136,7 +143,179 @@ init_ic_make_global_vars (void) >> } >> } >> >> +/* A set of the first statement in each block of statements that need to >> + be applied a sampling wrapper. */ >> +static htab_t instrumentation_to_be_sampled = NULL; >> + >> +/* extern __thread gcov_unsigned_t __gcov_sample_counter */ >> +static tree gcov_sample_counter_decl = NULL_TREE; >> + >> +/* extern gcov_unsigned_t __gcov_sampling_rate */ >> +static tree gcov_sampling_rate_decl = NULL_TREE; >> + >> +/* Insert STMT_IF around given sequence of consecutive statements in the >> + same basic block starting with STMT_START, ending with STMT_END. */ >> + >> +static void >> +insert_if_then (gimple stmt_start, gimple stmt_end, gimple stmt_if) >> +{ >> + gimple_stmt_iterator gsi; >> + basic_block bb_original, bb_before_if, bb_after_if; >> + edge e_if_taken, e_then_join; >> + >> + gsi = gsi_for_stmt (stmt_start); >> + gsi_insert_before (&gsi, stmt_if, GSI_SAME_STMT); >> + bb_original = gsi_bb (gsi); >> + e_if_taken = split_block (bb_original, stmt_if); >> + e_if_taken->flags &= ~EDGE_FALLTHRU; >> + e_if_taken->flags |= EDGE_TRUE_VALUE; >> + e_then_join = split_block (e_if_taken->dest, stmt_end); >> + bb_before_if = e_if_taken->src; >> + bb_after_if = e_then_join->dest; >> + make_edge (bb_before_if, bb_after_if, EDGE_FALSE_VALUE); >> +} >> + >> +/* Transform: >> + >> + ORIGINAL CODE >> + >> + Into: >> + >> + __gcov_sample_counter++; >> + if (__gcov_sample_counter >= __gcov_sampling_rate) >> + { >> + __gcov_sample_counter = 0; >> + ORIGINAL CODE >> + } >> + >> + The original code block starts with STMT_START, is made of STMT_COUNT >> + consecutive statements in the same basic block. */ >> + >> +static void >> +add_sampling_wrapper (gimple stmt_start, int stmt_count) >> +{ >> + int i; >> + tree zero, one, tmp_var, tmp1, tmp2, tmp3; >> + gimple stmt_block_end; >> + gimple stmt_inc_counter1, stmt_inc_counter2, stmt_inc_counter3; >> + gimple stmt_reset_counter, stmt_assign_rate, stmt_if; >> + gimple_stmt_iterator gsi; >> + >> + tmp_var = create_tmp_var (get_gcov_unsigned_t (), "PROF_sample_counter"); >> + tmp1 = make_ssa_name (tmp_var, NULL); >> + tmp2 = make_ssa_name (tmp_var, NULL); >> + >> + /* Create all the new statements needed. */ >> + stmt_inc_counter1 = gimple_build_assign (tmp1, gcov_sample_counter_decl); >> + one = build_int_cst (get_gcov_unsigned_t (), 1); >> + stmt_inc_counter2 = gimple_build_assign_with_ops ( >> + PLUS_EXPR, tmp2, tmp1, one); >> + stmt_inc_counter3 = gimple_build_assign (gcov_sample_counter_decl, tmp2); >> + zero = build_int_cst (get_gcov_unsigned_t (), 0); >> + stmt_reset_counter = gimple_build_assign (gcov_sample_counter_decl, zero); >> + tmp_var = create_tmp_var (get_gcov_unsigned_t (), "PROF_sample_counter"); >> + tmp3 = make_ssa_name (tmp_var, NULL); >> + stmt_assign_rate = gimple_build_assign (tmp3, gcov_sampling_rate_decl); >> + stmt_if = gimple_build_cond (GE_EXPR, tmp2, tmp3, NULL_TREE, NULL_TREE); >> + >> + /* Insert them for now in the original basic block. */ >> + gsi = gsi_for_stmt (stmt_start); >> + gsi_insert_before (&gsi, stmt_inc_counter1, GSI_SAME_STMT); >> + gsi_insert_before (&gsi, stmt_inc_counter2, GSI_SAME_STMT); >> + gsi_insert_before (&gsi, stmt_inc_counter3, GSI_SAME_STMT); >> + gsi_insert_before (&gsi, stmt_assign_rate, GSI_SAME_STMT); >> + gsi_insert_before (&gsi, stmt_reset_counter, GSI_SAME_STMT); >> + >> + /* Move to last statement. */ >> + for (i = 0; i < stmt_count - 1; i++) >> + gsi_next (&gsi); >> + >> + stmt_block_end = gsi_stmt (gsi); >> + gcc_assert (stmt_block_end); >> + >> + /* Insert IF block. */ >> + insert_if_then (stmt_reset_counter, stmt_block_end, stmt_if); >> +} >> + >> +/* Return whether STMT is the beginning of an instrumentation block to be >> + applied sampling. */ >> + >> +static bool >> +is_instrumentation_to_be_sampled (gimple stmt) >> +{ >> + return (htab_find_slot_with_hash (instrumentation_to_be_sampled, stmt, >> + htab_hash_pointer (stmt), NO_INSERT) >> + != NULL); >> +} >> + >> +/* Add sampling wrappers around edge counter code in current function. */ >> + >> void >> +add_sampling_to_edge_counters (void) >> +{ >> + gimple_stmt_iterator gsi; >> + basic_block bb; >> + >> + FOR_EACH_BB_REVERSE (bb) >> + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) >> + { >> + gimple stmt = gsi_stmt (gsi); >> + if (is_instrumentation_to_be_sampled (stmt)) >> + { >> + add_sampling_wrapper (stmt, EDGE_COUNTER_STMT_COUNT); >> + break; >> + } >> + } >> + >> + /* Empty the set of statements performing the edge counter increment. */ >> + if (instrumentation_to_be_sampled) >> + htab_empty (instrumentation_to_be_sampled); >> +} >> + >> +static void >> +gimple_init_instrumentation_sampling (void) >> +{ >> + if (!gcov_sampling_rate_decl) >> + { >> + /* Define __gcov_sampling_rate regardless of >> -fprofile-generate-sampling. >> + Otherwise the extern reference to it from libgcov becomes >> unmatched. >> + */ >> + gcov_sampling_rate_decl = build_decl ( >> + UNKNOWN_LOCATION, >> + VAR_DECL, >> + get_identifier ("__gcov_sampling_rate"), >> + get_gcov_unsigned_t ()); >> + TREE_PUBLIC (gcov_sampling_rate_decl) = 1; >> + DECL_ARTIFICIAL (gcov_sampling_rate_decl) = 1; >> + DECL_COMDAT_GROUP (gcov_sampling_rate_decl) >> + = DECL_ASSEMBLER_NAME (gcov_sampling_rate_decl); >> + TREE_STATIC (gcov_sampling_rate_decl) = 1; >> + DECL_INITIAL (gcov_sampling_rate_decl) = build_int_cst ( >> + get_gcov_unsigned_t (), >> + PARAM_VALUE (PARAM_PROFILE_GENERATE_SAMPLING_RATE)); >> + assemble_variable (gcov_sampling_rate_decl, 0, 0, 0); >> + } >> + >> + if (flag_profile_generate_sampling && !instrumentation_to_be_sampled) >> + { >> + instrumentation_to_be_sampled = htab_create (100, htab_hash_pointer, >> + htab_eq_pointer, NULL); >> + gcov_sample_counter_decl = build_decl ( >> + UNKNOWN_LOCATION, >> + VAR_DECL, >> + get_identifier ("__gcov_sample_counter"), >> + get_gcov_unsigned_t ()); >> + TREE_PUBLIC (gcov_sample_counter_decl) = 1; >> + DECL_EXTERNAL (gcov_sample_counter_decl) = 1; >> + DECL_ARTIFICIAL (gcov_sample_counter_decl) = 1; >> + if (targetm.have_tls) >> + DECL_TLS_MODEL (gcov_sample_counter_decl) = >> + decl_default_tls_model (gcov_sample_counter_decl); >> + assemble_variable (gcov_sample_counter_decl, 0, 0, 0); >> + } >> +} >> + >> +void >> gimple_init_edge_profiler (void) >> { >> tree interval_profiler_fn_type; >> @@ -148,6 +327,8 @@ gimple_init_edge_profiler (void) >> tree dc_profiler_fn_type; >> tree average_profiler_fn_type; >> >> + gimple_init_instrumentation_sampling (); >> + >> if (!gcov_type_node) >> { >> char name_buf[32]; >> @@ -277,6 +458,7 @@ gimple_init_edge_profiler (void) >> void >> gimple_gen_edge_profiler (int edgeno, edge e) >> { >> + void** slot; >> tree ref, one; >> gimple stmt1, stmt2, stmt3; >> >> @@ -292,6 +474,15 @@ gimple_gen_edge_profiler (int edgeno, edge e) >> gimple_assign_lhs (stmt1), one); >> gimple_assign_set_lhs (stmt2, make_ssa_name (gcov_type_tmp_var, stmt2)); >> stmt3 = gimple_build_assign (unshare_expr (ref), gimple_assign_lhs >> (stmt2)); >> + >> + if (flag_profile_generate_sampling) >> + { >> + slot = htab_find_slot_with_hash (instrumentation_to_be_sampled, stmt1, >> + htab_hash_pointer (stmt1), INSERT); >> + gcc_assert (!*slot); >> + *slot = stmt1; >> + } >> + >> gsi_insert_on_edge (e, stmt1); >> gsi_insert_on_edge (e, stmt2); >> gsi_insert_on_edge (e, stmt3); >> Index: gcc/libgcov.c >> =================================================================== >> --- gcc/libgcov.c (revision 173136) >> +++ gcc/libgcov.c (working copy) >> @@ -83,6 +83,20 @@ void __gcov_merge_delta (gcov_type *counters __at >> #ifdef L_gcov >> #include "gcov-io.c" >> >> +/* Sampling rate. */ >> +extern gcov_unsigned_t __gcov_sampling_rate; >> +static int gcov_sampling_rate_initialized = 0; >> + >> +/* Set sampling rate to RATE. */ >> + >> +void __gcov_set_sampling_rate (unsigned int rate) >> +{ >> + __gcov_sampling_rate = rate; >> +} >> + >> +/* Per thread sample counter. */ >> +THREAD_PREFIX gcov_unsigned_t __gcov_sample_counter = 0; >> + >> /* Chain of per-object gcov structures. */ >> extern struct gcov_info *__gcov_list; >> >> @@ -365,7 +379,7 @@ gcov_exit (void) >> >> { >> /* Check if the level of dirs to strip off specified. */ >> - char *tmp = getenv("GCOV_PREFIX_STRIP"); >> + char *tmp = getenv ("GCOV_PREFIX_STRIP"); >> if (tmp) >> { >> gcov_prefix_strip = atoi (tmp); >> @@ -375,7 +389,7 @@ gcov_exit (void) >> } >> } >> /* Get file name relocation prefix. Non-absolute values are ignored. */ >> - gcov_prefix = getenv("GCOV_PREFIX"); >> + gcov_prefix = getenv ("GCOV_PREFIX"); >> if (gcov_prefix) >> { >> prefix_length = strlen(gcov_prefix); >> @@ -757,6 +771,17 @@ gcov_exit (void) >> void >> __gcov_init (struct gcov_info *info) >> { >> + if (!gcov_sampling_rate_initialized) >> + { >> + const char* env_value_str = getenv ("GCOV_SAMPLING_RATE"); >> + if (env_value_str) >> + { >> + int env_value_int = atoi(env_value_str); >> + if (env_value_int >= 1) >> + __gcov_sampling_rate = env_value_int; >> + } >> + gcov_sampling_rate_initialized = 1; >> + } >> if (!info->version) >> return; >> if (gcov_version (info, info->version, 0)) >> Index: gcc/params.def >> =================================================================== >> --- gcc/params.def (revision 173136) >> +++ gcc/params.def (working copy) >> @@ -929,6 +929,11 @@ DEFPARAM (CXX_MAX_NAMESPACES_FOR_DIAGNOSTIC_HELP, >> "name lookup fails", >> 1000, 0, 0) >> >> +DEFPARAM (PARAM_PROFILE_GENERATE_SAMPLING_RATE, >> + "profile-generate-sampling-rate", >> + "sampling rate with -fprofile-generate-sampling", >> + 100, 0, 2000000000) >> + >> /* >> Local variables: >> mode:c >> >> -- >> This patch is available for review at http://codereview.appspot.com/4438083 >> >
Index: gcc/doc/invoke.texi =================================================================== --- gcc/doc/invoke.texi (revision 173392) +++ gcc/doc/invoke.texi (working copy) @@ -376,7 +376,7 @@ Objective-C and Objective-C++ Dialects}. -fpartial-inlining -fpeel-loops -fpredictive-commoning @gol -fprefetch-loop-arrays @gol -fprofile-correction -fprofile-dir=@var{path} -fprofile-generate @gol --fprofile-generate=@var{path} @gol +-fprofile-generate=@var{path} -fprofile-generate-sampling @gol -fprofile-use -fprofile-use=@var{path} -fprofile-values @gol -freciprocal-math -fregmove -frename-registers -freorder-blocks @gol -freorder-blocks-and-partition -freorder-functions @gol @@ -7977,6 +7977,20 @@ The following options are enabled: @code{-fprofile If @var{path} is specified, GCC will look at the @var{path} to find the profile feedback data files. See @option{-fprofile-dir}. +@item -fprofile-generate-sampling +@opindex -fprofile-generate-sampling + +Enable sampling for instrumented binaries. Instead of recording every event, +record only every N-th event, where N (the sampling rate) can be set either +at compile time using +@option{--param profile-generate-sampling-rate=@var{value}}, or +at execution start time through environment variable @samp{GCOV_SAMPLING_RATE}. + +At this time sampling applies only to branch counters. A sampling rate of 100 +decreases instrumentated binary slowdown from up to 20x for heavily threaded +applications down to around 2x. @option{-fprofile-correction} is always +needed with sampling. + @item -fprofile-use @itemx -fprofile-use=@var{path} @opindex fprofile-use @@ -9212,6 +9226,9 @@ recognize. If you want to pass an option that takes an argument, you must use @option{-Xassembler} twice, once for the option and once for the argument. +@item profile-generate-sampling-rate +Set the sampling rate with @option{-fprofile-generate-sampling}. + @end table @node Link Options Index: gcc/gcov-io.h =================================================================== --- gcc/gcov-io.h (revision 173392) +++ gcc/gcov-io.h (working copy) @@ -548,6 +548,9 @@ struct dyn_imp_mod /* Register a new object file module. */ extern void __gcov_init (struct gcov_info *) ATTRIBUTE_HIDDEN; +/* Set sampling rate to RATE. */ +extern void __gcov_set_sampling_rate (unsigned int rate); + /* Called before fork, to avoid double counting. */ extern void __gcov_flush (void) ATTRIBUTE_HIDDEN; Index: gcc/profile.c =================================================================== --- gcc/profile.c (revision 173392) +++ gcc/profile.c (working copy) @@ -1221,6 +1221,9 @@ branch_prob (void) /* Commit changes done by instrumentation. */ gsi_commit_edge_inserts (); + + if (flag_profile_generate_sampling) + add_sampling_to_edge_counters (); } free_aux_for_edges (); Index: gcc/profile.h =================================================================== --- gcc/profile.h (revision 173392) +++ gcc/profile.h (working copy) @@ -47,4 +47,10 @@ extern gcov_type sum_edge_counts (VEC (edge, gc) * extern void init_node_map (void); extern void del_node_map (void); +/* Implement sampling to avoid writing to edge counters very often. + Many concurrent writes to the same counters, or to counters that share + the same cache line leads to up to 30x slowdown on an application running + on 8 CPUs. With sampling, the slowdown reduced to 2x. */ +extern void add_sampling_to_edge_counters (void); + #endif /* PROFILE_H */ Index: gcc/common.opt =================================================================== --- gcc/common.opt (revision 173392) +++ gcc/common.opt (working copy) @@ -1619,6 +1619,10 @@ fprofile-generate= Common Joined RejectNegative Enable common options for generating profile info for profile feedback directed optimizations, and set -fprofile-dir= +fprofile-generate-sampling +Common Var(flag_profile_generate_sampling) +Turn on instrumentation sampling with -fprofile-generate with rate set by --param profile-generate-sampling-rate or environment variable GCOV_SAMPLING_RATE + fprofile-use Common Var(flag_profile_use) Enable common options for performing profile feedback directed optimizations Index: gcc/tree-profile.c =================================================================== --- gcc/tree-profile.c (revision 173392) +++ gcc/tree-profile.c (working copy) @@ -31,6 +31,8 @@ along with GCC; see the file COPYING3. If not see #include "coretypes.h" #include "tm.h" #include "flags.h" +#include "target.h" +#include "output.h" #include "regs.h" #include "function.h" #include "basic-block.h" @@ -44,11 +46,16 @@ along with GCC; see the file COPYING3. If not see #include "value-prof.h" #include "cgraph.h" #include "output.h" +#include "params.h" +#include "profile.h" #include "l-ipo.h" #include "profile.h" #include "target.h" #include "output.h" +/* Number of statements inserted for each edge counter increment. */ +#define EDGE_COUNTER_STMT_COUNT 3 + static GTY(()) tree gcov_type_node; static GTY(()) tree gcov_type_tmp_var; static GTY(()) tree tree_interval_profiler_fn; @@ -146,7 +153,178 @@ init_ic_make_global_vars (void) } } +/* A pointer-set of the first statement in each block of statements that need to + be applied a sampling wrapper. */ +static struct pointer_set_t *instrumentation_to_be_sampled = NULL; + +/* extern __thread gcov_unsigned_t __gcov_sample_counter */ +static tree gcov_sample_counter_decl = NULL_TREE; + +/* extern gcov_unsigned_t __gcov_sampling_rate */ +static tree gcov_sampling_rate_decl = NULL_TREE; + +/* Insert STMT_IF around given sequence of consecutive statements in the + same basic block starting with STMT_START, ending with STMT_END. */ + +static void +insert_if_then (gimple stmt_start, gimple stmt_end, gimple stmt_if) +{ + gimple_stmt_iterator gsi; + basic_block bb_original, bb_before_if, bb_after_if; + edge e_if_taken, e_then_join; + + gsi = gsi_for_stmt (stmt_start); + gsi_insert_before (&gsi, stmt_if, GSI_SAME_STMT); + bb_original = gsi_bb (gsi); + e_if_taken = split_block (bb_original, stmt_if); + e_if_taken->flags &= ~EDGE_FALLTHRU; + e_if_taken->flags |= EDGE_TRUE_VALUE; + e_then_join = split_block (e_if_taken->dest, stmt_end); + bb_before_if = e_if_taken->src; + bb_after_if = e_then_join->dest; + make_edge (bb_before_if, bb_after_if, EDGE_FALSE_VALUE); +} + +/* Transform: + + ORIGINAL CODE + + Into: + + __gcov_sample_counter++; + if (__gcov_sample_counter >= __gcov_sampling_rate) + { + __gcov_sample_counter = 0; + ORIGINAL CODE + } + + The original code block starts with STMT_START, is made of STMT_COUNT + consecutive statements in the same basic block. */ + +static void +add_sampling_wrapper (gimple stmt_start, gimple stmt_end) +{ + tree zero, one, tmp_var, tmp1, tmp2, tmp3; + gimple stmt_inc_counter1, stmt_inc_counter2, stmt_inc_counter3; + gimple stmt_reset_counter, stmt_assign_rate, stmt_if; + gimple_stmt_iterator gsi; + + tmp_var = create_tmp_reg (get_gcov_unsigned_t (), "PROF_sample"); + tmp1 = make_ssa_name (tmp_var, NULL); + tmp2 = make_ssa_name (tmp_var, NULL); + + /* Create all the new statements needed. */ + stmt_inc_counter1 = gimple_build_assign (tmp1, gcov_sample_counter_decl); + one = build_int_cst (get_gcov_unsigned_t (), 1); + stmt_inc_counter2 = gimple_build_assign_with_ops ( + PLUS_EXPR, tmp2, tmp1, one); + stmt_inc_counter3 = gimple_build_assign (gcov_sample_counter_decl, tmp2); + zero = build_int_cst (get_gcov_unsigned_t (), 0); + stmt_reset_counter = gimple_build_assign (gcov_sample_counter_decl, zero); + tmp3 = make_ssa_name (tmp_var, NULL); + stmt_assign_rate = gimple_build_assign (tmp3, gcov_sampling_rate_decl); + stmt_if = gimple_build_cond (GE_EXPR, tmp2, tmp3, NULL_TREE, NULL_TREE); + + /* Insert them for now in the original basic block. */ + gsi = gsi_for_stmt (stmt_start); + gsi_insert_before (&gsi, stmt_inc_counter1, GSI_SAME_STMT); + gsi_insert_before (&gsi, stmt_inc_counter2, GSI_SAME_STMT); + gsi_insert_before (&gsi, stmt_inc_counter3, GSI_SAME_STMT); + gsi_insert_before (&gsi, stmt_assign_rate, GSI_SAME_STMT); + gsi_insert_before (&gsi, stmt_reset_counter, GSI_SAME_STMT); + + /* Insert IF block. */ + insert_if_then (stmt_reset_counter, stmt_end, stmt_if); +} + +/* Return whether STMT is the beginning of an instrumentation block to be + applied sampling. */ + +static bool +is_instrumentation_to_be_sampled (gimple stmt) +{ + return pointer_set_contains (instrumentation_to_be_sampled, stmt); +} + +/* Add sampling wrappers around edge counter code in current function. */ + void +add_sampling_to_edge_counters (void) +{ + gimple_stmt_iterator gsi; + basic_block bb; + + FOR_EACH_BB_REVERSE (bb) + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple stmt = gsi_stmt (gsi); + if (is_instrumentation_to_be_sampled (stmt)) + { + gimple stmt_end; + int i; + /* The code for edge counter increment has EDGE_COUNTER_STMT_COUNT + gimple statements. Advance that many statements to find the + last statement. */ + for (i = 0; i < EDGE_COUNTER_STMT_COUNT - 1; i++) + gsi_next (&gsi); + stmt_end = gsi_stmt (gsi); + gcc_assert (stmt_end); + add_sampling_wrapper (stmt, stmt_end); + break; + } + } + + /* Free the bitmap. */ + if (instrumentation_to_be_sampled) + { + pointer_set_destroy (instrumentation_to_be_sampled); + instrumentation_to_be_sampled = NULL; + } +} + +static void +gimple_init_instrumentation_sampling (void) +{ + if (!gcov_sampling_rate_decl) + { + /* Define __gcov_sampling_rate regardless of -fprofile-generate-sampling. + Otherwise the extern reference to it from libgcov becomes unmatched. + */ + gcov_sampling_rate_decl = build_decl ( + UNKNOWN_LOCATION, + VAR_DECL, + get_identifier ("__gcov_sampling_rate"), + get_gcov_unsigned_t ()); + TREE_PUBLIC (gcov_sampling_rate_decl) = 1; + DECL_ARTIFICIAL (gcov_sampling_rate_decl) = 1; + DECL_COMDAT_GROUP (gcov_sampling_rate_decl) + = DECL_ASSEMBLER_NAME (gcov_sampling_rate_decl); + TREE_STATIC (gcov_sampling_rate_decl) = 1; + DECL_INITIAL (gcov_sampling_rate_decl) = build_int_cst ( + get_gcov_unsigned_t (), + PARAM_VALUE (PARAM_PROFILE_GENERATE_SAMPLING_RATE)); + assemble_variable (gcov_sampling_rate_decl, 0, 0, 0); + } + + if (flag_profile_generate_sampling && !instrumentation_to_be_sampled) + { + instrumentation_to_be_sampled = pointer_set_create (); + gcov_sample_counter_decl = build_decl ( + UNKNOWN_LOCATION, + VAR_DECL, + get_identifier ("__gcov_sample_counter"), + get_gcov_unsigned_t ()); + TREE_PUBLIC (gcov_sample_counter_decl) = 1; + DECL_EXTERNAL (gcov_sample_counter_decl) = 1; + DECL_ARTIFICIAL (gcov_sample_counter_decl) = 1; + if (targetm.have_tls) + DECL_TLS_MODEL (gcov_sample_counter_decl) = + decl_default_tls_model (gcov_sample_counter_decl); + assemble_variable (gcov_sample_counter_decl, 0, 0, 0); + } +} + +void gimple_init_edge_profiler (void) { tree interval_profiler_fn_type; @@ -158,6 +336,8 @@ gimple_init_edge_profiler (void) tree dc_profiler_fn_type; tree average_profiler_fn_type; + gimple_init_instrumentation_sampling (); + if (!gcov_type_node) { char name_buf[32]; @@ -302,6 +482,10 @@ gimple_gen_edge_profiler (int edgeno, edge e) gimple_assign_lhs (stmt1), one); gimple_assign_set_lhs (stmt2, make_ssa_name (gcov_type_tmp_var, stmt2)); stmt3 = gimple_build_assign (unshare_expr (ref), gimple_assign_lhs (stmt2)); + + if (flag_profile_generate_sampling) + pointer_set_insert (instrumentation_to_be_sampled, stmt1); + gsi_insert_on_edge (e, stmt1); gsi_insert_on_edge (e, stmt2); gsi_insert_on_edge (e, stmt3); Index: gcc/libgcov.c =================================================================== --- gcc/libgcov.c (revision 173392) +++ gcc/libgcov.c (working copy) @@ -83,6 +83,20 @@ void __gcov_merge_delta (gcov_type *counters __at #ifdef L_gcov #include "gcov-io.c" +/* Sampling rate. */ +extern gcov_unsigned_t __gcov_sampling_rate; +static int gcov_sampling_rate_initialized = 0; + +/* Set sampling rate to RATE. */ + +void __gcov_set_sampling_rate (unsigned int rate) +{ + __gcov_sampling_rate = rate; +} + +/* Per thread sample counter. */ +THREAD_PREFIX gcov_unsigned_t __gcov_sample_counter = 0; + /* Chain of per-object gcov structures. */ extern struct gcov_info *__gcov_list; @@ -365,7 +379,7 @@ gcov_exit (void) { /* Check if the level of dirs to strip off specified. */ - char *tmp = getenv("GCOV_PREFIX_STRIP"); + char *tmp = getenv ("GCOV_PREFIX_STRIP"); if (tmp) { gcov_prefix_strip = atoi (tmp); @@ -375,7 +389,7 @@ gcov_exit (void) } } /* Get file name relocation prefix. Non-absolute values are ignored. */ - gcov_prefix = getenv("GCOV_PREFIX"); + gcov_prefix = getenv ("GCOV_PREFIX"); if (gcov_prefix) { prefix_length = strlen(gcov_prefix); @@ -759,6 +773,17 @@ gcov_exit (void) void __gcov_init (struct gcov_info *info) { + if (!gcov_sampling_rate_initialized) + { + const char* env_value_str = getenv ("GCOV_SAMPLING_RATE"); + if (env_value_str) + { + int env_value_int = atoi(env_value_str); + if (env_value_int >= 1) + __gcov_sampling_rate = env_value_int; + } + gcov_sampling_rate_initialized = 1; + } if (!info->version) return; if (gcov_version (info, info->version, 0)) Index: gcc/params.def =================================================================== --- gcc/params.def (revision 173392) +++ gcc/params.def (working copy) @@ -880,6 +880,11 @@ DEFPARAM (PARAM_MAX_LIPO_MEMORY, "don't import aux files if memory consumption exceeds this value", 2400000, 0, 0) +DEFPARAM (PARAM_PROFILE_GENERATE_SAMPLING_RATE, + "profile-generate-sampling-rate", + "sampling rate with -fprofile-generate-sampling", + 100, 0, 2000000000) + /* Used for debugging purpose. Tell the compiler to find the gcda file in the current directory. */ DEFPARAM (PARAM_GCOV_DEBUG,