Committed the attached patch to google/main. Will send a patch for trunk soon.


On Thu, Apr 28, 2011 at 10:03 PM, Xinliang David Li <davi...@google.com> wrote:
> Please add regression test cases for the feature. Address the comments
> when available. Ok for google/main.
>
> Thanks,
>
> David
>
> On Thu, Apr 28, 2011 at 4:42 PM, Easwaran Raman <era...@google.com> wrote:
>> This patch from Silvius Rus  adds support for sampled edge profile 
>> collection to reduce instrumentation run overhead. Bootstraps and no test 
>> regressions. Ok for google/main?
>>
>> 2011-04-28  Silvius Rus  <silvius....@gmail.com>
>>
>>        * doc/invoke.texi: Document -fprofile-generate-sampling option.
>>        * gcov-io.h (__gcov_set_sampling_rate): New declaration.
>>        * profile.c (branch_prob): Add support for sampled profile
>>        collection.
>>        * profile.h (add_sampling_to_edge_counters): New declaration.
>>        * common.opt (fprofile-generate-sampling): New option.
>>        * tree-profile: Include header files; define EDGE_COUNTER_STMT_COUNT.
>>        (instrumentation_to_be_sampled, gcov_sample_counter_decl)
>>        (gcov_sampling_rate_decl): New globals.
>>        (insert_if_then, add_sampling_wrapper, 
>> is_instrumentation_to_be_sampled)
>>        (add_sampling_to_edge_counters, gimple_init_instrumentation_sampling):
>>        New functions.
>>        (gimple_init_edge_profiler): Call 
>> gimple_init_instrumentation_sampling.
>>        (gimple_gen_edge_profiler): Mark start of instrumentation block.
>>        * libgcov.c (__gcov_sampling_rate): New extern declaration.
>>        (gcov_sampling_rate_initialized, __gcov_sample_counter): New globals.
>>        (gcov_exit): Set sampling rate; minor coding style fixes.
>>        * params.def (PARAM_PROFILE_GENERATE_SAMPLING_RATE): New parameter.
>>
>> Index: gcc/doc/invoke.texi
>> ===================================================================
>> --- gcc/doc/invoke.texi (revision 173136)
>> +++ gcc/doc/invoke.texi (working copy)
>> @@ -375,7 +375,7 @@ Objective-C and Objective-C++ Dialects}.
>>  -fpartial-inlining -fpeel-loops -fpredictive-commoning @gol
>>  -fprefetch-loop-arrays @gol
>>  -fprofile-correction -fprofile-dir=@var{path} -fprofile-generate @gol
>> --fprofile-generate=@var{path} @gol
>> +-fprofile-generate=@var{path} -fprofile-generate-sampling @gol
>>  -fprofile-use -fprofile-use=@var{path} -fprofile-values @gol
>>  -freciprocal-math -fregmove -frename-registers -freorder-blocks @gol
>>  -freorder-blocks-and-partition -freorder-functions @gol
>> @@ -7923,6 +7923,20 @@ The following options are enabled: @code{-fprofile
>>  If @var{path} is specified, GCC will look at the @var{path} to find
>>  the profile feedback data files. See @option{-fprofile-dir}.
>>
>> +@item -fprofile-generate-sampling
>> +@opindex -fprofile-generate-sampling
>> +
>> +Enable sampling for instrumented binaries.  Instead of recording every 
>> event,
>> +record only every N-th event, where N (the sampling rate) can be set either
>> +at compile time using
>> +@option{--param profile-generate-sampling-rate=@var{value}}, or
>> +at execution start time through environment variable 
>> @samp{GCOV_SAMPLING_RATE}.
>> +
>> +At this time sampling applies only to branch counters.  A sampling rate of 
>> 100
>> +decreases instrumentated binary slowdown from up to 20x for heavily threaded
>> +applications down to around 2x.  @option{-fprofile-correction} is always
>> +needed with sampling.
>> +
>>  @item -fprofile-use
>>  @itemx -fprofile-use=@var{path}
>>  @opindex fprofile-use
>> @@ -9138,6 +9152,9 @@ recognize.
>>  If you want to pass an option that takes an argument, you must use
>>  @option{-Xassembler} twice, once for the option and once for the argument.
>>
>> +@item profile-generate-sampling-rate
>> +Set the sampling rate with @option{-fprofile-generate-sampling}.
>> +
>>  @end table
>>
>>  @node Link Options
>> Index: gcc/gcov-io.h
>> ===================================================================
>> --- gcc/gcov-io.h       (revision 173136)
>> +++ gcc/gcov-io.h       (working copy)
>> @@ -544,6 +544,9 @@ struct dyn_imp_mod
>>  /* Register a new object file module.  */
>>  extern void __gcov_init (struct gcov_info *) ATTRIBUTE_HIDDEN;
>>
>> +/* Set sampling rate to RATE.  */
>> +extern void __gcov_set_sampling_rate (unsigned int rate);
>> +
>>  /* Called before fork, to avoid double counting.  */
>>  extern void __gcov_flush (void) ATTRIBUTE_HIDDEN;
>>
>> Index: gcc/profile.c
>> ===================================================================
>> --- gcc/profile.c       (revision 173136)
>> +++ gcc/profile.c       (working copy)
>> @@ -1210,6 +1210,9 @@ branch_prob (void)
>>
>>       /* Commit changes done by instrumentation.  */
>>       gsi_commit_edge_inserts ();
>> +
>> +      if (flag_profile_generate_sampling)
>> +        add_sampling_to_edge_counters ();
>>     }
>>
>>   free_aux_for_edges ();
>> Index: gcc/profile.h
>> ===================================================================
>> --- gcc/profile.h       (revision 173136)
>> +++ gcc/profile.h       (working copy)
>> @@ -47,4 +47,10 @@ extern gcov_type sum_edge_counts (VEC (edge, gc) *
>>  extern void init_node_map (void);
>>  extern void del_node_map (void);
>>
>> +/* Implement sampling to avoid writing to edge counters very often.
>> +   Many concurrent writes to the same counters, or to counters that share
>> +   the same cache line leads to up to 30x slowdown on an application running
>> +   on 8 CPUs.  With sampling, the slowdown reduced to 2x.  */
>> +extern void add_sampling_to_edge_counters (void);
>> +
>>  #endif /* PROFILE_H */
>> Index: gcc/common.opt
>> ===================================================================
>> --- gcc/common.opt      (revision 173136)
>> +++ gcc/common.opt      (working copy)
>> @@ -1605,6 +1605,10 @@ fprofile-generate=
>>  Common Joined RejectNegative
>>  Enable common options for generating profile info for profile feedback 
>> directed optimizations, and set -fprofile-dir=
>>
>> +fprofile-generate-sampling
>> +Common Var(flag_profile_generate_sampling)
>> +Turn on instrumentation sampling with -fprofile-generate with rate set by 
>> --param profile-generate-sampling-rate or environment variable 
>> GCOV_SAMPLING_RATE
>> +
>>  fprofile-use
>>  Common Var(flag_profile_use)
>>  Enable common options for performing profile feedback directed optimizations
>> Index: gcc/tree-profile.c
>> ===================================================================
>> --- gcc/tree-profile.c  (revision 173136)
>> +++ gcc/tree-profile.c  (working copy)
>> @@ -31,6 +31,8 @@ along with GCC; see the file COPYING3.  If not see
>>  #include "coretypes.h"
>>  #include "tm.h"
>>  #include "flags.h"
>> +#include "target.h"
>> +#include "output.h"
>>  #include "regs.h"
>>  #include "function.h"
>>  #include "basic-block.h"
>> @@ -44,9 +46,14 @@ along with GCC; see the file COPYING3.  If not see
>>  #include "value-prof.h"
>>  #include "cgraph.h"
>>  #include "output.h"
>> +#include "params.h"
>> +#include "profile.h"
>>  #include "l-ipo.h"
>>  #include "profile.h"
>>
>> +/* Number of statements inserted for each edge counter increment.  */
>> +#define EDGE_COUNTER_STMT_COUNT 3
>> +
>>  static GTY(()) tree gcov_type_node;
>>  static GTY(()) tree gcov_type_tmp_var;
>>  static GTY(()) tree tree_interval_profiler_fn;
>> @@ -136,7 +143,179 @@ init_ic_make_global_vars (void)
>>     }
>>  }
>>
>> +/* A set of the first statement in each block of statements that need to
>> +   be applied a sampling wrapper.  */
>> +static htab_t instrumentation_to_be_sampled = NULL;
>> +
>> +/* extern __thread gcov_unsigned_t __gcov_sample_counter  */
>> +static tree gcov_sample_counter_decl = NULL_TREE;
>> +
>> +/* extern gcov_unsigned_t __gcov_sampling_rate  */
>> +static tree gcov_sampling_rate_decl = NULL_TREE;
>> +
>> +/* Insert STMT_IF around given sequence of consecutive statements in the
>> +   same basic block starting with STMT_START, ending with STMT_END.  */
>> +
>> +static void
>> +insert_if_then (gimple stmt_start, gimple stmt_end, gimple stmt_if)
>> +{
>> +  gimple_stmt_iterator gsi;
>> +  basic_block bb_original, bb_before_if, bb_after_if;
>> +  edge e_if_taken, e_then_join;
>> +
>> +  gsi = gsi_for_stmt (stmt_start);
>> +  gsi_insert_before (&gsi, stmt_if, GSI_SAME_STMT);
>> +  bb_original = gsi_bb (gsi);
>> +  e_if_taken = split_block (bb_original, stmt_if);
>> +  e_if_taken->flags &= ~EDGE_FALLTHRU;
>> +  e_if_taken->flags |= EDGE_TRUE_VALUE;
>> +  e_then_join = split_block (e_if_taken->dest, stmt_end);
>> +  bb_before_if = e_if_taken->src;
>> +  bb_after_if = e_then_join->dest;
>> +  make_edge (bb_before_if, bb_after_if, EDGE_FALSE_VALUE);
>> +}
>> +
>> +/* Transform:
>> +
>> +   ORIGINAL CODE
>> +
>> +   Into:
>> +
>> +   __gcov_sample_counter++;
>> +   if (__gcov_sample_counter >= __gcov_sampling_rate)
>> +     {
>> +       __gcov_sample_counter = 0;
>> +       ORIGINAL CODE
>> +     }
>> +
>> +   The original code block starts with STMT_START, is made of STMT_COUNT
>> +   consecutive statements in the same basic block.  */
>> +
>> +static void
>> +add_sampling_wrapper (gimple stmt_start, int stmt_count)
>> +{
>> +  int i;
>> +  tree zero, one, tmp_var, tmp1, tmp2, tmp3;
>> +  gimple stmt_block_end;
>> +  gimple stmt_inc_counter1, stmt_inc_counter2, stmt_inc_counter3;
>> +  gimple stmt_reset_counter, stmt_assign_rate, stmt_if;
>> +  gimple_stmt_iterator gsi;
>> +
>> +  tmp_var = create_tmp_var (get_gcov_unsigned_t (), "PROF_sample_counter");
>> +  tmp1 = make_ssa_name (tmp_var, NULL);
>> +  tmp2 = make_ssa_name (tmp_var, NULL);
>> +
>> +  /* Create all the new statements needed.  */
>> +  stmt_inc_counter1 = gimple_build_assign (tmp1, gcov_sample_counter_decl);
>> +  one = build_int_cst (get_gcov_unsigned_t (), 1);
>> +  stmt_inc_counter2 = gimple_build_assign_with_ops (
>> +      PLUS_EXPR, tmp2, tmp1, one);
>> +  stmt_inc_counter3 = gimple_build_assign (gcov_sample_counter_decl, tmp2);
>> +  zero = build_int_cst (get_gcov_unsigned_t (), 0);
>> +  stmt_reset_counter = gimple_build_assign (gcov_sample_counter_decl, zero);
>> +  tmp_var = create_tmp_var (get_gcov_unsigned_t (), "PROF_sample_counter");
>> +  tmp3 = make_ssa_name (tmp_var, NULL);
>> +  stmt_assign_rate = gimple_build_assign (tmp3, gcov_sampling_rate_decl);
>> +  stmt_if = gimple_build_cond (GE_EXPR, tmp2, tmp3, NULL_TREE, NULL_TREE);
>> +
>> +  /* Insert them for now in the original basic block.  */
>> +  gsi = gsi_for_stmt (stmt_start);
>> +  gsi_insert_before (&gsi, stmt_inc_counter1, GSI_SAME_STMT);
>> +  gsi_insert_before (&gsi, stmt_inc_counter2, GSI_SAME_STMT);
>> +  gsi_insert_before (&gsi, stmt_inc_counter3, GSI_SAME_STMT);
>> +  gsi_insert_before (&gsi, stmt_assign_rate, GSI_SAME_STMT);
>> +  gsi_insert_before (&gsi, stmt_reset_counter, GSI_SAME_STMT);
>> +
>> +  /* Move to last statement.  */
>> +  for (i = 0; i < stmt_count - 1; i++)
>> +    gsi_next (&gsi);
>> +
>> +  stmt_block_end = gsi_stmt (gsi);
>> +  gcc_assert (stmt_block_end);
>> +
>> +  /* Insert IF block.  */
>> +  insert_if_then (stmt_reset_counter, stmt_block_end, stmt_if);
>> +}
>> +
>> +/* Return whether STMT is the beginning of an instrumentation block to be
>> +   applied sampling.  */
>> +
>> +static bool
>> +is_instrumentation_to_be_sampled (gimple stmt)
>> +{
>> +  return (htab_find_slot_with_hash (instrumentation_to_be_sampled, stmt,
>> +                                    htab_hash_pointer (stmt), NO_INSERT)
>> +          != NULL);
>> +}
>> +
>> +/* Add sampling wrappers around edge counter code in current function.  */
>> +
>>  void
>> +add_sampling_to_edge_counters (void)
>> +{
>> +  gimple_stmt_iterator gsi;
>> +  basic_block bb;
>> +
>> +  FOR_EACH_BB_REVERSE (bb)
>> +    for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>> +      {
>> +        gimple stmt = gsi_stmt (gsi);
>> +        if (is_instrumentation_to_be_sampled (stmt))
>> +          {
>> +            add_sampling_wrapper (stmt, EDGE_COUNTER_STMT_COUNT);
>> +            break;
>> +          }
>> +      }
>> +
>> +  /* Empty the set of statements performing the edge counter increment.  */
>> +  if (instrumentation_to_be_sampled)
>> +    htab_empty (instrumentation_to_be_sampled);
>> +}
>> +
>> +static void
>> +gimple_init_instrumentation_sampling (void)
>> +{
>> +  if (!gcov_sampling_rate_decl)
>> +    {
>> +      /* Define __gcov_sampling_rate regardless of 
>> -fprofile-generate-sampling.
>> +         Otherwise the extern reference to it from libgcov becomes 
>> unmatched.
>> +      */
>> +      gcov_sampling_rate_decl = build_decl (
>> +          UNKNOWN_LOCATION,
>> +          VAR_DECL,
>> +          get_identifier ("__gcov_sampling_rate"),
>> +          get_gcov_unsigned_t ());
>> +      TREE_PUBLIC (gcov_sampling_rate_decl) = 1;
>> +      DECL_ARTIFICIAL (gcov_sampling_rate_decl) = 1;
>> +      DECL_COMDAT_GROUP (gcov_sampling_rate_decl)
>> +          = DECL_ASSEMBLER_NAME (gcov_sampling_rate_decl);
>> +      TREE_STATIC (gcov_sampling_rate_decl) = 1;
>> +      DECL_INITIAL (gcov_sampling_rate_decl) = build_int_cst (
>> +          get_gcov_unsigned_t (),
>> +          PARAM_VALUE (PARAM_PROFILE_GENERATE_SAMPLING_RATE));
>> +      assemble_variable (gcov_sampling_rate_decl, 0, 0, 0);
>> +    }
>> +
>> +  if (flag_profile_generate_sampling && !instrumentation_to_be_sampled)
>> +    {
>> +      instrumentation_to_be_sampled = htab_create (100, htab_hash_pointer,
>> +                                                   htab_eq_pointer, NULL);
>> +      gcov_sample_counter_decl = build_decl (
>> +          UNKNOWN_LOCATION,
>> +          VAR_DECL,
>> +          get_identifier ("__gcov_sample_counter"),
>> +          get_gcov_unsigned_t ());
>> +      TREE_PUBLIC (gcov_sample_counter_decl) = 1;
>> +      DECL_EXTERNAL (gcov_sample_counter_decl) = 1;
>> +      DECL_ARTIFICIAL (gcov_sample_counter_decl) = 1;
>> +      if (targetm.have_tls)
>> +        DECL_TLS_MODEL (gcov_sample_counter_decl) =
>> +            decl_default_tls_model (gcov_sample_counter_decl);
>> +      assemble_variable (gcov_sample_counter_decl, 0, 0, 0);
>> +    }
>> +}
>> +
>> +void
>>  gimple_init_edge_profiler (void)
>>  {
>>   tree interval_profiler_fn_type;
>> @@ -148,6 +327,8 @@ gimple_init_edge_profiler (void)
>>   tree dc_profiler_fn_type;
>>   tree average_profiler_fn_type;
>>
>> +  gimple_init_instrumentation_sampling ();
>> +
>>   if (!gcov_type_node)
>>     {
>>       char name_buf[32];
>> @@ -277,6 +458,7 @@ gimple_init_edge_profiler (void)
>>  void
>>  gimple_gen_edge_profiler (int edgeno, edge e)
>>  {
>> +  void** slot;
>>   tree ref, one;
>>   gimple stmt1, stmt2, stmt3;
>>
>> @@ -292,6 +474,15 @@ gimple_gen_edge_profiler (int edgeno, edge e)
>>                                        gimple_assign_lhs (stmt1), one);
>>   gimple_assign_set_lhs (stmt2, make_ssa_name (gcov_type_tmp_var, stmt2));
>>   stmt3 = gimple_build_assign (unshare_expr (ref), gimple_assign_lhs 
>> (stmt2));
>> +
>> +  if (flag_profile_generate_sampling)
>> +    {
>> +      slot = htab_find_slot_with_hash (instrumentation_to_be_sampled, stmt1,
>> +                                       htab_hash_pointer (stmt1), INSERT);
>> +      gcc_assert (!*slot);
>> +      *slot = stmt1;
>> +    }
>> +
>>   gsi_insert_on_edge (e, stmt1);
>>   gsi_insert_on_edge (e, stmt2);
>>   gsi_insert_on_edge (e, stmt3);
>> Index: gcc/libgcov.c
>> ===================================================================
>> --- gcc/libgcov.c       (revision 173136)
>> +++ gcc/libgcov.c       (working copy)
>> @@ -83,6 +83,20 @@ void __gcov_merge_delta (gcov_type *counters  __at
>>  #ifdef L_gcov
>>  #include "gcov-io.c"
>>
>> +/* Sampling rate.  */
>> +extern gcov_unsigned_t __gcov_sampling_rate;
>> +static int gcov_sampling_rate_initialized = 0;
>> +
>> +/* Set sampling rate to RATE.  */
>> +
>> +void __gcov_set_sampling_rate (unsigned int rate)
>> +{
>> +  __gcov_sampling_rate = rate;
>> +}
>> +
>> +/* Per thread sample counter.  */
>> +THREAD_PREFIX gcov_unsigned_t __gcov_sample_counter = 0;
>> +
>>  /* Chain of per-object gcov structures.  */
>>  extern struct gcov_info *__gcov_list;
>>
>> @@ -365,7 +379,7 @@ gcov_exit (void)
>>
>>   {
>>     /* Check if the level of dirs to strip off specified. */
>> -    char *tmp = getenv("GCOV_PREFIX_STRIP");
>> +    char *tmp = getenv ("GCOV_PREFIX_STRIP");
>>     if (tmp)
>>       {
>>        gcov_prefix_strip = atoi (tmp);
>> @@ -375,7 +389,7 @@ gcov_exit (void)
>>       }
>>   }
>>   /* Get file name relocation prefix.  Non-absolute values are ignored. */
>> -  gcov_prefix = getenv("GCOV_PREFIX");
>> +  gcov_prefix = getenv ("GCOV_PREFIX");
>>   if (gcov_prefix)
>>     {
>>       prefix_length = strlen(gcov_prefix);
>> @@ -757,6 +771,17 @@ gcov_exit (void)
>>  void
>>  __gcov_init (struct gcov_info *info)
>>  {
>> +  if (!gcov_sampling_rate_initialized)
>> +    {
>> +      const char* env_value_str = getenv ("GCOV_SAMPLING_RATE");
>> +      if (env_value_str)
>> +        {
>> +          int env_value_int = atoi(env_value_str);
>> +          if (env_value_int >= 1)
>> +            __gcov_sampling_rate = env_value_int;
>> +        }
>> +      gcov_sampling_rate_initialized = 1;
>> +    }
>>   if (!info->version)
>>     return;
>>   if (gcov_version (info, info->version, 0))
>> Index: gcc/params.def
>> ===================================================================
>> --- gcc/params.def      (revision 173136)
>> +++ gcc/params.def      (working copy)
>> @@ -929,6 +929,11 @@ DEFPARAM (CXX_MAX_NAMESPACES_FOR_DIAGNOSTIC_HELP,
>>          "name lookup fails",
>>          1000, 0, 0)
>>
>> +DEFPARAM (PARAM_PROFILE_GENERATE_SAMPLING_RATE,
>> +         "profile-generate-sampling-rate",
>> +         "sampling rate with -fprofile-generate-sampling",
>> +         100, 0, 2000000000)
>> +
>>  /*
>>  Local variables:
>>  mode:c
>>
>> --
>> This patch is available for review at http://codereview.appspot.com/4438083
>>
>
Index: gcc/doc/invoke.texi
===================================================================
--- gcc/doc/invoke.texi (revision 173392)
+++ gcc/doc/invoke.texi (working copy)
@@ -376,7 +376,7 @@ Objective-C and Objective-C++ Dialects}.
 -fpartial-inlining -fpeel-loops -fpredictive-commoning @gol
 -fprefetch-loop-arrays @gol
 -fprofile-correction -fprofile-dir=@var{path} -fprofile-generate @gol
--fprofile-generate=@var{path} @gol
+-fprofile-generate=@var{path} -fprofile-generate-sampling @gol
 -fprofile-use -fprofile-use=@var{path} -fprofile-values @gol
 -freciprocal-math -fregmove -frename-registers -freorder-blocks @gol
 -freorder-blocks-and-partition -freorder-functions @gol
@@ -7977,6 +7977,20 @@ The following options are enabled: @code{-fprofile
 If @var{path} is specified, GCC will look at the @var{path} to find
 the profile feedback data files. See @option{-fprofile-dir}.
 
+@item -fprofile-generate-sampling
+@opindex -fprofile-generate-sampling
+
+Enable sampling for instrumented binaries.  Instead of recording every event,
+record only every N-th event, where N (the sampling rate) can be set either
+at compile time using
+@option{--param profile-generate-sampling-rate=@var{value}}, or
+at execution start time through environment variable @samp{GCOV_SAMPLING_RATE}.
+
+At this time sampling applies only to branch counters.  A sampling rate of 100
+decreases instrumentated binary slowdown from up to 20x for heavily threaded
+applications down to around 2x.  @option{-fprofile-correction} is always
+needed with sampling.
+
 @item -fprofile-use
 @itemx -fprofile-use=@var{path}
 @opindex fprofile-use
@@ -9212,6 +9226,9 @@ recognize.
 If you want to pass an option that takes an argument, you must use
 @option{-Xassembler} twice, once for the option and once for the argument.
 
+@item profile-generate-sampling-rate
+Set the sampling rate with @option{-fprofile-generate-sampling}.
+
 @end table
 
 @node Link Options
Index: gcc/gcov-io.h
===================================================================
--- gcc/gcov-io.h       (revision 173392)
+++ gcc/gcov-io.h       (working copy)
@@ -548,6 +548,9 @@ struct dyn_imp_mod
 /* Register a new object file module.  */
 extern void __gcov_init (struct gcov_info *) ATTRIBUTE_HIDDEN;
 
+/* Set sampling rate to RATE.  */
+extern void __gcov_set_sampling_rate (unsigned int rate);
+
 /* Called before fork, to avoid double counting.  */
 extern void __gcov_flush (void) ATTRIBUTE_HIDDEN;
 
Index: gcc/profile.c
===================================================================
--- gcc/profile.c       (revision 173392)
+++ gcc/profile.c       (working copy)
@@ -1221,6 +1221,9 @@ branch_prob (void)
 
       /* Commit changes done by instrumentation.  */
       gsi_commit_edge_inserts ();
+
+      if (flag_profile_generate_sampling)
+        add_sampling_to_edge_counters ();
     }
 
   free_aux_for_edges ();
Index: gcc/profile.h
===================================================================
--- gcc/profile.h       (revision 173392)
+++ gcc/profile.h       (working copy)
@@ -47,4 +47,10 @@ extern gcov_type sum_edge_counts (VEC (edge, gc) *
 extern void init_node_map (void);
 extern void del_node_map (void);
 
+/* Implement sampling to avoid writing to edge counters very often.
+   Many concurrent writes to the same counters, or to counters that share
+   the same cache line leads to up to 30x slowdown on an application running
+   on 8 CPUs.  With sampling, the slowdown reduced to 2x.  */
+extern void add_sampling_to_edge_counters (void);
+
 #endif /* PROFILE_H */
Index: gcc/common.opt
===================================================================
--- gcc/common.opt      (revision 173392)
+++ gcc/common.opt      (working copy)
@@ -1619,6 +1619,10 @@ fprofile-generate=
 Common Joined RejectNegative
 Enable common options for generating profile info for profile feedback 
directed optimizations, and set -fprofile-dir=
 
+fprofile-generate-sampling
+Common Var(flag_profile_generate_sampling)
+Turn on instrumentation sampling with -fprofile-generate with rate set by 
--param profile-generate-sampling-rate or environment variable 
GCOV_SAMPLING_RATE
+
 fprofile-use
 Common Var(flag_profile_use)
 Enable common options for performing profile feedback directed optimizations
Index: gcc/tree-profile.c
===================================================================
--- gcc/tree-profile.c  (revision 173392)
+++ gcc/tree-profile.c  (working copy)
@@ -31,6 +31,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "coretypes.h"
 #include "tm.h"
 #include "flags.h"
+#include "target.h"
+#include "output.h"
 #include "regs.h"
 #include "function.h"
 #include "basic-block.h"
@@ -44,11 +46,16 @@ along with GCC; see the file COPYING3.  If not see
 #include "value-prof.h"
 #include "cgraph.h"
 #include "output.h"
+#include "params.h"
+#include "profile.h"
 #include "l-ipo.h"
 #include "profile.h"
 #include "target.h"
 #include "output.h"
 
+/* Number of statements inserted for each edge counter increment.  */
+#define EDGE_COUNTER_STMT_COUNT 3
+
 static GTY(()) tree gcov_type_node;
 static GTY(()) tree gcov_type_tmp_var;
 static GTY(()) tree tree_interval_profiler_fn;
@@ -146,7 +153,178 @@ init_ic_make_global_vars (void)
     }
 }
 
+/* A pointer-set of the first statement in each block of statements that need 
to
+   be applied a sampling wrapper.  */
+static struct pointer_set_t *instrumentation_to_be_sampled = NULL;
+
+/* extern __thread gcov_unsigned_t __gcov_sample_counter  */
+static tree gcov_sample_counter_decl = NULL_TREE;
+
+/* extern gcov_unsigned_t __gcov_sampling_rate  */
+static tree gcov_sampling_rate_decl = NULL_TREE;
+
+/* Insert STMT_IF around given sequence of consecutive statements in the
+   same basic block starting with STMT_START, ending with STMT_END.  */
+
+static void
+insert_if_then (gimple stmt_start, gimple stmt_end, gimple stmt_if)
+{
+  gimple_stmt_iterator gsi;
+  basic_block bb_original, bb_before_if, bb_after_if;
+  edge e_if_taken, e_then_join;
+
+  gsi = gsi_for_stmt (stmt_start);
+  gsi_insert_before (&gsi, stmt_if, GSI_SAME_STMT);
+  bb_original = gsi_bb (gsi);
+  e_if_taken = split_block (bb_original, stmt_if);
+  e_if_taken->flags &= ~EDGE_FALLTHRU;
+  e_if_taken->flags |= EDGE_TRUE_VALUE;
+  e_then_join = split_block (e_if_taken->dest, stmt_end);
+  bb_before_if = e_if_taken->src;
+  bb_after_if = e_then_join->dest;
+  make_edge (bb_before_if, bb_after_if, EDGE_FALSE_VALUE);
+}
+
+/* Transform:
+
+   ORIGINAL CODE
+
+   Into:
+
+   __gcov_sample_counter++;
+   if (__gcov_sample_counter >= __gcov_sampling_rate)
+     {
+       __gcov_sample_counter = 0;
+       ORIGINAL CODE
+     }
+
+   The original code block starts with STMT_START, is made of STMT_COUNT
+   consecutive statements in the same basic block.  */
+
+static void
+add_sampling_wrapper (gimple stmt_start, gimple stmt_end)
+{
+  tree zero, one, tmp_var, tmp1, tmp2, tmp3;
+  gimple stmt_inc_counter1, stmt_inc_counter2, stmt_inc_counter3;
+  gimple stmt_reset_counter, stmt_assign_rate, stmt_if;
+  gimple_stmt_iterator gsi;
+
+  tmp_var = create_tmp_reg (get_gcov_unsigned_t (), "PROF_sample");
+  tmp1 = make_ssa_name (tmp_var, NULL);
+  tmp2 = make_ssa_name (tmp_var, NULL);
+
+  /* Create all the new statements needed.  */
+  stmt_inc_counter1 = gimple_build_assign (tmp1, gcov_sample_counter_decl);
+  one = build_int_cst (get_gcov_unsigned_t (), 1);
+  stmt_inc_counter2 = gimple_build_assign_with_ops (
+      PLUS_EXPR, tmp2, tmp1, one);
+  stmt_inc_counter3 = gimple_build_assign (gcov_sample_counter_decl, tmp2);
+  zero = build_int_cst (get_gcov_unsigned_t (), 0);
+  stmt_reset_counter = gimple_build_assign (gcov_sample_counter_decl, zero);
+  tmp3 = make_ssa_name (tmp_var, NULL);
+  stmt_assign_rate = gimple_build_assign (tmp3, gcov_sampling_rate_decl);
+  stmt_if = gimple_build_cond (GE_EXPR, tmp2, tmp3, NULL_TREE, NULL_TREE);
+
+  /* Insert them for now in the original basic block.  */
+  gsi = gsi_for_stmt (stmt_start);
+  gsi_insert_before (&gsi, stmt_inc_counter1, GSI_SAME_STMT);
+  gsi_insert_before (&gsi, stmt_inc_counter2, GSI_SAME_STMT);
+  gsi_insert_before (&gsi, stmt_inc_counter3, GSI_SAME_STMT);
+  gsi_insert_before (&gsi, stmt_assign_rate, GSI_SAME_STMT);
+  gsi_insert_before (&gsi, stmt_reset_counter, GSI_SAME_STMT);
+
+  /* Insert IF block.  */
+  insert_if_then (stmt_reset_counter, stmt_end, stmt_if);
+}
+
+/* Return whether STMT is the beginning of an instrumentation block to be
+   applied sampling.  */
+
+static bool
+is_instrumentation_to_be_sampled (gimple stmt)
+{
+  return pointer_set_contains (instrumentation_to_be_sampled, stmt);
+}
+
+/* Add sampling wrappers around edge counter code in current function.  */
+
 void
+add_sampling_to_edge_counters (void)
+{
+  gimple_stmt_iterator gsi;
+  basic_block bb;
+
+  FOR_EACH_BB_REVERSE (bb)
+    for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+      {
+        gimple stmt = gsi_stmt (gsi);
+        if (is_instrumentation_to_be_sampled (stmt))
+          {
+            gimple stmt_end;
+            int i;
+            /* The code for edge counter increment has EDGE_COUNTER_STMT_COUNT
+               gimple statements. Advance that many statements to find the
+               last statement.  */
+            for (i = 0; i < EDGE_COUNTER_STMT_COUNT - 1; i++)
+              gsi_next (&gsi);
+            stmt_end = gsi_stmt (gsi);
+            gcc_assert (stmt_end);
+            add_sampling_wrapper (stmt, stmt_end);
+            break;
+          }
+      }
+
+  /* Free the bitmap.  */
+  if (instrumentation_to_be_sampled)
+    {
+      pointer_set_destroy (instrumentation_to_be_sampled);
+      instrumentation_to_be_sampled = NULL;
+    }
+}
+
+static void
+gimple_init_instrumentation_sampling (void)
+{
+  if (!gcov_sampling_rate_decl)
+    {
+      /* Define __gcov_sampling_rate regardless of -fprofile-generate-sampling.
+         Otherwise the extern reference to it from libgcov becomes unmatched.
+      */
+      gcov_sampling_rate_decl = build_decl (
+          UNKNOWN_LOCATION,
+          VAR_DECL,
+          get_identifier ("__gcov_sampling_rate"),
+          get_gcov_unsigned_t ());
+      TREE_PUBLIC (gcov_sampling_rate_decl) = 1;
+      DECL_ARTIFICIAL (gcov_sampling_rate_decl) = 1;
+      DECL_COMDAT_GROUP (gcov_sampling_rate_decl)
+          = DECL_ASSEMBLER_NAME (gcov_sampling_rate_decl);
+      TREE_STATIC (gcov_sampling_rate_decl) = 1;
+      DECL_INITIAL (gcov_sampling_rate_decl) = build_int_cst (
+          get_gcov_unsigned_t (),
+          PARAM_VALUE (PARAM_PROFILE_GENERATE_SAMPLING_RATE));
+      assemble_variable (gcov_sampling_rate_decl, 0, 0, 0);
+    }
+
+  if (flag_profile_generate_sampling && !instrumentation_to_be_sampled)
+    {
+      instrumentation_to_be_sampled = pointer_set_create ();
+      gcov_sample_counter_decl = build_decl (
+          UNKNOWN_LOCATION,
+          VAR_DECL,
+          get_identifier ("__gcov_sample_counter"),
+          get_gcov_unsigned_t ());
+      TREE_PUBLIC (gcov_sample_counter_decl) = 1;
+      DECL_EXTERNAL (gcov_sample_counter_decl) = 1;
+      DECL_ARTIFICIAL (gcov_sample_counter_decl) = 1;
+      if (targetm.have_tls)
+        DECL_TLS_MODEL (gcov_sample_counter_decl) =
+            decl_default_tls_model (gcov_sample_counter_decl);
+      assemble_variable (gcov_sample_counter_decl, 0, 0, 0);
+    }
+}
+
+void
 gimple_init_edge_profiler (void)
 {
   tree interval_profiler_fn_type;
@@ -158,6 +336,8 @@ gimple_init_edge_profiler (void)
   tree dc_profiler_fn_type;
   tree average_profiler_fn_type;
 
+  gimple_init_instrumentation_sampling ();
+
   if (!gcov_type_node)
     {
       char name_buf[32];
@@ -302,6 +482,10 @@ gimple_gen_edge_profiler (int edgeno, edge e)
                                        gimple_assign_lhs (stmt1), one);
   gimple_assign_set_lhs (stmt2, make_ssa_name (gcov_type_tmp_var, stmt2));
   stmt3 = gimple_build_assign (unshare_expr (ref), gimple_assign_lhs (stmt2));
+
+  if (flag_profile_generate_sampling)
+    pointer_set_insert (instrumentation_to_be_sampled, stmt1);
+
   gsi_insert_on_edge (e, stmt1);
   gsi_insert_on_edge (e, stmt2);
   gsi_insert_on_edge (e, stmt3);
Index: gcc/libgcov.c
===================================================================
--- gcc/libgcov.c       (revision 173392)
+++ gcc/libgcov.c       (working copy)
@@ -83,6 +83,20 @@ void __gcov_merge_delta (gcov_type *counters  __at
 #ifdef L_gcov
 #include "gcov-io.c"
 
+/* Sampling rate.  */
+extern gcov_unsigned_t __gcov_sampling_rate;
+static int gcov_sampling_rate_initialized = 0;
+
+/* Set sampling rate to RATE.  */
+
+void __gcov_set_sampling_rate (unsigned int rate)
+{
+  __gcov_sampling_rate = rate;
+}
+
+/* Per thread sample counter.  */
+THREAD_PREFIX gcov_unsigned_t __gcov_sample_counter = 0;
+
 /* Chain of per-object gcov structures.  */
 extern struct gcov_info *__gcov_list;
 
@@ -365,7 +379,7 @@ gcov_exit (void)
 
   {
     /* Check if the level of dirs to strip off specified. */
-    char *tmp = getenv("GCOV_PREFIX_STRIP");
+    char *tmp = getenv ("GCOV_PREFIX_STRIP");
     if (tmp)
       {
        gcov_prefix_strip = atoi (tmp);
@@ -375,7 +389,7 @@ gcov_exit (void)
       }
   }
   /* Get file name relocation prefix.  Non-absolute values are ignored. */
-  gcov_prefix = getenv("GCOV_PREFIX");
+  gcov_prefix = getenv ("GCOV_PREFIX");
   if (gcov_prefix)
     {
       prefix_length = strlen(gcov_prefix);
@@ -759,6 +773,17 @@ gcov_exit (void)
 void
 __gcov_init (struct gcov_info *info)
 {
+  if (!gcov_sampling_rate_initialized)
+    {
+      const char* env_value_str = getenv ("GCOV_SAMPLING_RATE");
+      if (env_value_str)
+        {
+          int env_value_int = atoi(env_value_str);
+          if (env_value_int >= 1)
+            __gcov_sampling_rate = env_value_int;
+        }
+      gcov_sampling_rate_initialized = 1;
+    }
   if (!info->version)
     return;
   if (gcov_version (info, info->version, 0))
Index: gcc/params.def
===================================================================
--- gcc/params.def      (revision 173392)
+++ gcc/params.def      (working copy)
@@ -880,6 +880,11 @@ DEFPARAM (PARAM_MAX_LIPO_MEMORY,
          "don't import aux files if memory consumption exceeds this value",
          2400000, 0, 0)
 
+DEFPARAM (PARAM_PROFILE_GENERATE_SAMPLING_RATE,
+         "profile-generate-sampling-rate",
+         "sampling rate with -fprofile-generate-sampling",
+         100, 0, 2000000000)
+
 /* Used for debugging purpose. Tell the compiler to find
    the gcda file in the current directory.  */
 DEFPARAM (PARAM_GCOV_DEBUG,

Reply via email to