On Thu, Sep 6, 2012 at 5:34 PM, Chris Manghane <cm...@google.com> wrote:
>
>
> On Thu, Sep 6, 2012 at 5:08 PM, Teresa Johnson <tejohn...@google.com> wrote:
>>
>> On Thu, Sep 6, 2012 at 2:49 PM, Chris Manghane <cm...@google.com> wrote:
>> > This patch adds a new dump flag that dumps PMU profile information using
>> > the -pmu dump option.
>> >
>> > This patch should be applied to google/main.
>> >
>> > Tested with crosstools.
>> >
>> > 2012-09-06  Chris Manghane  <cm...@google.com>
>> >
>> >         * gcc/doc/invoke.texi: Modified pmu-profile-use option.
>> >         * gcc/tree-dump.c: Added new dump flag.
>> >         * gcc/tree-pretty-print.c
>> >         (dump_load_latency_details): New function.
>> >         (dump_pmu): New function.
>> >         (dump_generic_node): Added support for new dump flag.
>> >         * gcc/tree-pretty-print.h: Added new function to global header.
>> >         * gcc/tree-pass.h (enum tree_dump_index): Added new dump flag.
>> >         * gcc/gcov.c:
>> >         (process_pmu_profile): Fixed assertion conditions.
>> >         * gcc/gcov-io.h (struct gcov_pmu_summary): Added new struct.
>> >         * gcc/opts.c (common_handle_option): Added support for modified
>> > option.
>> >         * gcc/gimple-pretty-print.c
>> >         (dump_gimple_phi): Added support for new dump flag.
>> >         (dump_gimple_stmt): Ditto.
>> >         * gcc/coverage.c
>> >         (htab_counts_entry_hash): Added new hash table for PMU info.
>> >         (htab_pmu_entry_hash): Ditto.
>> >         (htab_counts_entry_eq): Ditto.
>> >         (htab_pmu_entry_eq): Ditto.
>> >         (htab_counts_entry_del): Ditto.
>> >         (htab_pmu_entry_del): Ditto.
>> >         (read_counts_file): Ditto.
>> >         (static void read_pmu_file): Ditto.
>> >         (get_coverage_pmu_latency): Ditto.
>> >         (get_coverage_pmu_branch_mispredict): Ditto.
>> >         (pmu_data_present): Added new function.
>> >         (coverage_init): Added pmu file reading support.
>> >         * gcc/coverage.h: Added pmu functions to global header.
>> >         * gcc/common.opt: Modified pmu-profile-use option.
>> >
>> > Index: gcc/doc/invoke.texi
>> > ===================================================================
>> > --- gcc/doc/invoke.texi (revision 190817)
>> > +++ gcc/doc/invoke.texi (working copy)
>> > @@ -399,7 +399,7 @@ Objective-C and Objective-C++ Dialects}.
>> >  -fprofile-generate=@var{path} -fprofile-generate-sampling @gol
>> >  -fprofile-use -fprofile-use=@var{path} -fprofile-values @gol
>> >  -fpmu-profile-generate=@var{pmuoption} @gol
>> > --fpmu-profile-use=@var{pmuoption} @gol
>> > +-fpmu-profile-use=@var{pmudata} @gol
>> >  -freciprocal-math -free -fregmove -frename-registers -freorder-blocks
>> > @gol
>> >  -frecord-gcc-switches-in-elf@gol
>> >  -freorder-blocks-and-partition -freorder-functions @gol
>> > @@ -8381,12 +8381,11 @@ displayed using coverage tool gcov. The params
>> > var
>> >  "pmu_profile_n_addresses" can be used to restrict PMU data collection
>> >  to only this many addresses.
>> >
>> > -@item -fpmu-profile-use=@var{pmuoption}
>> > +@item -fpmu-profile-use=@var{pmudata}
>> >  @opindex fpmu-profile-use
>> >
>> > -Enable performance monitoring unit (PMU) profiling based
>> > -optimizations.  Currently only @var{load-latency} and
>> > -@var{branch-mispredict} are supported.
>> > +If @var{pmudata} is specified, GCC will read PMU data from
>> > @var{pmudata}. If
>> > +unspecified, PMU data will be read from 'pmuprofile.gcda'.
>> >
>> >  @item -fprofile-strip=@var{base_suffix}
>> >  @opindex fprofile-strip
>> > Index: gcc/tree-dump.c
>> > ===================================================================
>> > --- gcc/tree-dump.c     (revision 190817)
>> > +++ gcc/tree-dump.c     (working copy)
>> > @@ -824,9 +824,11 @@ static const struct dump_option_value_info dump_op
>> >    {"nouid", TDF_NOUID},
>> >    {"enumerate_locals", TDF_ENUMERATE_LOCALS},
>> >    {"scev", TDF_SCEV},
>> > +  {"pmu", TDF_PMU},
>> >    {"all", ~(TDF_RAW | TDF_SLIM | TDF_LINENO | TDF_TREE | TDF_RTL |
>> > TDF_IPA
>> >             | TDF_STMTADDR | TDF_GRAPH | TDF_DIAGNOSTIC | TDF_VERBOSE
>> > -           | TDF_RHS_ONLY | TDF_NOUID | TDF_ENUMERATE_LOCALS |
>> > TDF_SCEV)},
>> > +           | TDF_RHS_ONLY | TDF_NOUID | TDF_ENUMERATE_LOCALS | TDF_SCEV
>> > +            | TDF_PMU)},
>> >    {NULL, 0}
>> >  };
>> >
>> > Index: gcc/tree-pretty-print.c
>> > ===================================================================
>> > --- gcc/tree-pretty-print.c     (revision 190817)
>> > +++ gcc/tree-pretty-print.c     (working copy)
>> > @@ -25,6 +25,9 @@ along with GCC; see the file COPYING3.  If not see
>> >  #include "tm.h"
>> >  #include "tree.h"
>> >  #include "output.h"
>> > +#include "basic-block.h"
>> > +#include "gcov-io.h"
>> > +#include "coverage.h"
>> >  #include "tree-pretty-print.h"
>> >  #include "hashtab.h"
>> >  #include "tree-flow.h"
>> > @@ -51,6 +54,7 @@ static void do_niy (pretty_printer *, const_tree);
>> >
>> >  static pretty_printer buffer;
>> >  static int initialized = 0;
>> > +static char *file_prefix = NULL;
>> >
>> >  /* Try to print something for an unknown tree code.  */
>> >
>> > @@ -461,7 +465,32 @@ dump_omp_clauses (pretty_printer *buffer, tree cla
>> >      }
>> >  }
>> >
>> > +/* Dump detailed information about pmu load latency events */
>> >
>> > +void
>> > +dump_load_latency_details (pretty_printer *buffer, gcov_pmu_ll_info_t
>> > *ll_info)
>> > +{
>> > +  if (ll_info == NULL)
>> > +    return;
>> > +
>> > +  pp_string (buffer, "\n[load latency contribution: ");
>> > +  pp_scalar (buffer, "%.2f%%\n", ll_info->self / 100.f);
>> > +  pp_string (buffer, "average cycle distribution:\n");
>> > +  pp_scalar (buffer, "%.2f%% <= 10 cycles\n",
>> > +             ll_info->lt_10 / 100.f);
>> > +  pp_scalar (buffer, "%.2f%% <= 32 cycles\n",
>> > +             ll_info->lt_32 / 100.f);
>> > +  pp_scalar (buffer, "%.2f%% <= 64 cycles\n",
>> > +             ll_info->lt_64 / 100.f);
>> > +  pp_scalar (buffer, "%.2f%% <= 256 cycles\n",
>> > +             ll_info->lt_256 / 100.f);
>> > +  pp_scalar (buffer, "%.2f%% <= 1024 cycles\n",
>> > +             ll_info->lt_1024 / 100.f);
>> > +  pp_scalar (buffer, "%.2f%% > 1024 cycles\n",
>> > +             ll_info->gt_1024 / 100.f);
>> > +  pp_string (buffer, "] ");
>> > +}
>> > +
>> >  /* Dump location LOC to BUFFER.  */
>> >
>> >  static void
>> > @@ -485,7 +514,51 @@ dump_location (pretty_printer *buffer, location_t
>> >    pp_string (buffer, "] ");
>> >  }
>> >
>> > +/* Dump PMU info about LOC to BUFFER.  */
>> >
>> > +static void
>> > +dump_pmu (pretty_printer *buffer, location_t loc)
>> > +{
>> > +  expanded_location xloc = expand_location (loc);
>> > +  gcov_pmu_ll_info_t *ll_info;
>> > +  gcov_pmu_brm_info_t *brm_info;
>> > +  char *src;
>> > +  uint64_t src_size;
>> > +
>> > +  if (!xloc.file)
>> > +    return;
>> > +
>> > +  if (!file_prefix)
>> > +    file_prefix = getpwd();
>>
>> Missing space before "(".
>>
>> > +
>> > +  if (!IS_ABSOLUTE_PATH (xloc.file))
>> > +    {
>> > +      src_size = strlen (xloc.file) + strlen (file_prefix) + 1;
>> > +      src = XCNEWVEC (char, src_size + 1);
>> > +      strcpy (src, file_prefix);
>> > +      strcat (src, "/");
>> > +      strcat (src, xloc.file);
>> > +    }
>> > +  else
>> > +    src = xstrdup (xloc.file);
>> > +
>> > +  ll_info = get_coverage_pmu_latency (src, xloc.line);
>> > +  brm_info =
>> > +      get_coverage_pmu_branch_mispredict (src, xloc.line);
>> > +
>> > +  if (ll_info)
>> > +    dump_load_latency_details (buffer, ll_info);
>> > +
>> > +  if (brm_info)
>> > +    {
>> > +      pp_string (buffer, "[branch misprediction contribution: ");
>> > +      pp_scalar (buffer, "%.2f%%", brm_info->self / 100.f);
>> > +      pp_string (buffer, "] ");
>> > +    }
>> > +
>> > +  XDELETE (src);
>> > +}
>> > +
>> >  /* Dump lexical block BLOCK.  BUFFER, SPC and FLAGS are as in
>> >     dump_generic_node.  */
>> >
>> > @@ -622,6 +695,9 @@ dump_generic_node (pretty_printer *buffer, tree no
>> >    if ((flags & TDF_LINENO) && EXPR_HAS_LOCATION (node))
>> >      dump_location (buffer, EXPR_LOCATION (node));
>> >
>> > +  if ((flags & TDF_PMU) && pmu_data_present () && EXPR_HAS_LOCATION
>> > (node))
>> > +    dump_pmu (buffer, EXPR_LOCATION (node));
>> > +
>> >    switch (TREE_CODE (node))
>> >      {
>> >      case ERROR_MARK:
>> > Index: gcc/tree-pretty-print.h
>> > ===================================================================
>> > --- gcc/tree-pretty-print.h     (revision 190817)
>> > +++ gcc/tree-pretty-print.h     (working copy)
>> > @@ -24,6 +24,8 @@ along with GCC; see the file COPYING3.  If not see
>> >  #define GCC_TREE_PRETTY_PRINT_H
>> >
>> >  #include "pretty-print.h"
>> > +#include "basic-block.h"
>> > +#include "gcov-io.h"
>> >
>> >  #define pp_tree_identifier(PP, T)                      \
>> >    pp_base_tree_identifier (pp_base (PP), T)
>> > @@ -45,6 +47,7 @@ extern void print_generic_expr (FILE *, tree, int)
>> >  extern void print_generic_decl (FILE *, tree, int);
>> >  extern void debug_c_tree (tree);
>> >  extern void dump_omp_clauses (pretty_printer *, tree, int, int);
>> > +extern void dump_load_latency_details (pretty_printer *,
>> > gcov_pmu_ll_info_t *);
>> >  extern void print_call_name (pretty_printer *, tree, int);
>> >  extern void debug_generic_expr (tree);
>> >  extern void debug_generic_stmt (tree);
>> > Index: gcc/tree-pass.h
>> > ===================================================================
>> > --- gcc/tree-pass.h     (revision 190817)
>> > +++ gcc/tree-pass.h     (working copy)
>> > @@ -84,8 +84,8 @@ enum tree_dump_index
>> >  #define TDF_ENUMERATE_LOCALS (1 << 22) /* Enumerate locals by uid.  */
>> >  #define TDF_CSELIB     (1 << 23)       /* Dump cselib details.  */
>> >  #define TDF_SCEV       (1 << 24)       /* Dump SCEV details.  */
>> > +#define TDF_PMU         (1 << 25)       /* Dump PMU Profiling details
>> > */
>> >
>> > -
>> >  /* In tree-dump.c */
>> >
>> >  extern char *get_dump_file_name (int);
>> > Index: gcc/gcov.c
>> > ===================================================================
>> > --- gcc/gcov.c  (revision 190817)
>> > +++ gcc/gcov.c  (working copy)
>> > @@ -2350,6 +2350,7 @@ filter_pmu_data_lines (source_t *src)
>> >          }
>> >      }
>> >
>> > +
>> >    /* Sort the load latency data according to the line numbers because
>> >       we later iterate over sources in line number order. Normally we
>> >       expect the PMU tool to provide sorted data, but a few entries can
>> > @@ -3022,9 +3023,9 @@ static void process_pmu_profile (void)
>> >          {
>> >            gcov_pmu_st_entry_t *st_entry = XCNEW (gcov_pmu_st_entry_t);
>> >            gcov_read_pmu_string_table_entry (st_entry, length);
>> > +          string_table->st_count++;
>> >            /* Verify that we read string table entries in the right
>> > order */
>> >            gcc_assert (st_entry->index == string_table->st_count);
>> > -          string_table->st_count++;
>> >            if (string_table->st_count >= string_table->alloc_st_count)
>> >              {
>> >                string_table->alloc_st_count *= 2;
>> > Index: gcc/gcov-io.h
>> > ===================================================================
>> > --- gcc/gcov-io.h       (revision 190817)
>> > +++ gcc/gcov-io.h       (working copy)
>> > @@ -702,6 +702,14 @@ typedef struct string_table
>> >    gcov_pmu_tool_header_t *pmu_tool_header;
>> >  } string_table_t;
>> >
>> > +/* Cumulative pmu data */
>> > +struct gcov_pmu_summary
>> > +{
>> > +  ll_infos_t ll_infos;         /* load latency infos. */
>> > +  brm_infos_t brm_infos;       /* branch misprediction infos */
>> > +  string_table_t string_table; /* string table entries */
>> > +};
>> > +
>> >  /* Structures embedded in coveraged program.  The structures generated
>> >     by write_profile must match these.  */
>> >
>> > Index: gcc/opts.c
>> > ===================================================================
>> > --- gcc/opts.c  (revision 190817)
>> > +++ gcc/opts.c  (working copy)
>> > @@ -1645,6 +1645,11 @@ common_handle_option (struct gcc_options *opts,
>> >         opts->x_flag_gcse_after_reload = value;
>> >        break;
>> >
>> > +    case OPT_fpmu_profile_use_:
>> > +      opts->x_pmu_profile_data = xstrdup (arg);
>> > +      value = true;
>> > +      break;
>> > +
>> >      case OPT_fprofile_generate_:
>> >        opts->x_profile_data_prefix = xstrdup (arg);
>> >        value = true;
>> > Index: gcc/gimple-pretty-print.c
>> > ===================================================================
>> > --- gcc/gimple-pretty-print.c   (revision 190817)
>> > +++ gcc/gimple-pretty-print.c   (working copy)
>> > @@ -26,8 +26,11 @@ along with GCC; see the file COPYING3.  If not see
>> >  #include "tm.h"
>> >  #include "tree.h"
>> >  #include "diagnostic.h"
>> > +#include "basic-block.h"
>> >  #include "tree-pretty-print.h"
>> >  #include "gimple-pretty-print.h"
>> > +#include "gcov-io.h"
>> > +#include "coverage.h"
>> >  #include "hashtab.h"
>> >  #include "tree-flow.h"
>> >  #include "tree-pass.h"
>> > @@ -40,6 +43,7 @@ along with GCC; see the file COPYING3.  If not see
>> >
>> >  static pretty_printer buffer;
>> >  static bool initialized = false;
>> > +static char *file_prefix = NULL;
>> >
>> >  #define GIMPLE_NIY do_niy (buffer,gs)
>> >
>> > @@ -1629,6 +1633,51 @@ dump_gimple_phi (pretty_printer *buffer, gimple
>> > ph
>> >           pp_decimal_int (buffer, xloc.column);
>> >           pp_string (buffer, "] ");
>> >         }
>> > +      if ((flags & TDF_PMU) && pmu_data_present ()
>> > +          && (gimple_phi_arg_location (phi, i)))
>>
>> Combine this and similar block below into a helper.
>>
>
> By this did you mean to turn the if condition into a helper or the
> duplicated code in the if block?

I meant the code in the if block.

Thanks
Teresa

>
>>
>> > +        {
>> > +          expanded_location xloc;
>> > +          gcov_pmu_ll_info_t *ll_info;
>> > +          gcov_pmu_brm_info_t *brm_info;
>> > +          char *src;
>> > +          uint64_t src_size;
>> > +
>> > +          xloc = expand_location (gimple_phi_arg_location (phi, i));
>> > +          if (xloc.file)
>> > +            {
>> > +              if (!file_prefix)
>> > +                file_prefix = getpwd();
>>
>> Missing space.
>>
>> > +
>> > +              if (!IS_ABSOLUTE_PATH (xloc.file))
>> > +                {
>> > +                  src_size = strlen (xloc.file) + strlen (file_prefix)
>> > + 1;
>> > +                  src = XCNEWVEC (char, src_size + 1);
>> > +                  strcpy (src, file_prefix);
>> > +                  strcat (src, "/");
>> > +                  strcat (src, xloc.file);
>> > +                }
>> > +              else
>> > +                src = xstrdup (xloc.file);
>> > +
>> > +              ll_info = get_coverage_pmu_latency (src, xloc.line);
>> > +              brm_info =
>> > +                  get_coverage_pmu_branch_mispredict (src, xloc.line);
>> > +
>> > +              if (ll_info)
>> > +                dump_load_latency_details (buffer, ll_info);
>> > +
>> > +              if (brm_info)
>> > +                {
>> > +                  pp_string (buffer, "\n[branch misprediction
>> > contribution: ");
>> > +                  pp_scalar (buffer, "%.2f%%", brm_info->self / 100.f);
>> > +                  pp_string (buffer, "] ");
>> > +                }
>> > +
>> > +              XDELETE (src);
>> > +            }
>> > +
>> > +        }
>> > +
>> >        dump_generic_node (buffer, gimple_phi_arg_def (phi, i), spc,
>> > flags,
>> >                          false);
>> >        pp_character (buffer, '(');
>> > @@ -1875,6 +1924,50 @@ dump_gimple_stmt (pretty_printer *buffer, gimple
>> > g
>> >        pp_string (buffer, "] ");
>> >      }
>> >
>> > +  if ((flags & TDF_PMU) && pmu_data_present () && gimple_has_location
>> > (gs))
>> > +    {
>> > +      expanded_location xloc;
>> > +      gcov_pmu_ll_info_t *ll_info;
>> > +      gcov_pmu_brm_info_t *brm_info;
>> > +      char *src;
>> > +      uint64_t src_size;
>> > +
>> > +      xloc = expand_location (gimple_location (gs));
>> > +      if (xloc.file)
>> > +        {
>> > +          if (!file_prefix)
>> > +            file_prefix = getpwd();
>>
>> Missing space.
>>
>> > +
>> > +          if (!IS_ABSOLUTE_PATH (xloc.file))
>> > +            {
>> > +              src_size = strlen (xloc.file) + strlen (file_prefix) + 1;
>> > +              src = XCNEWVEC (char, src_size + 1);
>> > +              strcpy (src, file_prefix);
>> > +              strcat (src, "/");
>> > +              strcat (src, xloc.file);
>> > +            }
>> > +          else
>> > +            src = xstrdup (xloc.file);
>> > +
>> > +          ll_info = get_coverage_pmu_latency (src, xloc.line);
>> > +          brm_info =
>> > +              get_coverage_pmu_branch_mispredict (src, xloc.line);
>> > +
>> > +          if (ll_info)
>> > +            dump_load_latency_details (buffer, ll_info);
>> > +
>> > +          if (brm_info)
>> > +            {
>> > +              pp_string (buffer, "\n[branch misprediction contribution:
>> > ");
>> > +              pp_scalar (buffer, "%.2f%%", brm_info->self / 100.f);
>> > +              pp_string (buffer, "] ");
>> > +            }
>> > +
>> > +          XDELETE (src);
>> > +        }
>> > +    }
>> > +
>> > +
>> >    if (flags & TDF_EH)
>> >      {
>> >        int lp_nr = lookup_stmt_eh_lp (gs);
>> > Index: gcc/coverage.c
>> > ===================================================================
>> > --- gcc/coverage.c      (revision 190817)
>> > +++ gcc/coverage.c      (working copy)
>> > @@ -96,6 +96,17 @@ typedef struct counts_entry
>> >    struct gcov_ctr_summary summary;
>> >  } counts_entry_t;
>> >
>> > +typedef struct pmu_entry
>> > +{
>> > +  /* We hash by  */
>> > +  gcov_unsigned_t lineno;
>> > +  char *filename;
>> > +
>> > +  /* Store  */
>> > +  gcov_pmu_ll_info_t *ll_info;
>> > +  gcov_pmu_brm_info_t *brm_info;
>> > +} pmu_entry_t;
>> > +
>> >  static GTY(()) struct coverage_data *functions_head = 0;
>> >  static struct coverage_data **functions_tail = &functions_head;
>> >  static unsigned no_coverage = 0;
>> > @@ -129,6 +140,9 @@ static char pmu_profile_filename[] = "pmuprofile";
>> >  /* Hash table of count data.  */
>> >  static htab_t counts_hash = NULL;
>> >
>> > +/* Hash table of pmu data, */
>> > +static htab_t pmu_hash = NULL;
>> > +
>> >  /* The names of merge functions for counters.  */
>> >  static const char *const ctr_merge_functions[GCOV_COUNTERS] =
>> > GCOV_MERGE_FUNCTIONS;
>> >  static const char *const ctr_names[GCOV_COUNTERS] = GCOV_COUNTER_NAMES;
>> > @@ -159,11 +173,17 @@ static tree gcov_pmu_top_n_address_decl = NULL_TRE
>> >  /* To ensure that the above variables are initialized only once.  */
>> >  static int pmu_profiling_initialized = 0;
>> >
>> > +struct gcov_pmu_summary pmu_global_summary;
>> > +
>> >  /* Forward declarations.  */
>> >  static hashval_t htab_counts_entry_hash (const void *);
>> > +static hashval_t htab_pmu_entry_hash (const void *);
>> >  static int htab_counts_entry_eq (const void *, const void *);
>> > +static int htab_pmu_entry_eq (const void *, const void *);
>> >  static void htab_counts_entry_del (void *);
>> > +static void htab_pmu_entry_del (void *);
>> >  static void read_counts_file (const char *, unsigned);
>> > +static void read_pmu_file (const char*);
>> >  static tree build_var (tree, tree, int);
>> >  static void build_fn_info_type (tree, unsigned, tree);
>> >  static void build_info_type (tree, tree);
>> > @@ -211,6 +231,14 @@ htab_counts_entry_hash (const void *of)
>> >    return entry->ident * GCOV_COUNTERS + entry->ctr;
>> >  }
>> >
>> > +static hashval_t
>> > +htab_pmu_entry_hash (const void *of)
>> > +{
>> > +  const pmu_entry_t *const entry = (const pmu_entry_t *) of;
>> > +
>> > +  return htab_hash_string (entry->filename) + entry->lineno;
>> > +}
>> > +
>> >  static int
>> >  htab_counts_entry_eq (const void *of1, const void *of2)
>> >  {
>> > @@ -220,6 +248,16 @@ htab_counts_entry_eq (const void *of1, const void
>> >    return entry1->ident == entry2->ident && entry1->ctr == entry2->ctr;
>> >  }
>> >
>> > +static int
>> > +htab_pmu_entry_eq (const void *of1, const void *of2)
>> > +{
>> > +  const pmu_entry_t *const entry1 = (const pmu_entry_t *) of1;
>> > +  const pmu_entry_t *const entry2 = (const pmu_entry_t *) of2;
>> > +
>> > +  return strcmp (entry1->filename, entry2->filename) == 0 &&
>> > +      entry1->lineno == entry2->lineno;
>> > +}
>> > +
>> >  static void
>> >  htab_counts_entry_del (void *of)
>> >  {
>> > @@ -233,6 +271,17 @@ htab_counts_entry_del (void *of)
>> >      }
>> >  }
>> >
>> > +static void
>> > +htab_pmu_entry_del (void *of)
>> > +{
>> > +  pmu_entry_t *const entry = (pmu_entry_t *) of;
>> > +
>> > +  free (entry->filename);
>> > +  free (entry->ll_info);
>> > +  free (entry->brm_info);
>> > +  free (entry);
>> > +}
>> > +
>> >  /* Returns true if MOD_ID is the id of the last source module.  */
>> >
>> >  int
>> > @@ -722,6 +771,247 @@ read_counts_file (const char *da_file_name, unsign
>> >    gcov_close ();
>> >  }
>> >
>> > +/* Read in the pmu profiling file, if available. DA_FILE_NAME is the
>> > +   name of the gcda file. */
>> > +
>> > +static void read_pmu_file (const char* da_file_name)
>> > +{
>> > +  gcov_unsigned_t tag;
>> > +  ll_infos_t* ll_infos = &pmu_global_summary.ll_infos;
>> > +  brm_infos_t* brm_infos = &pmu_global_summary.brm_infos;
>> > +  string_table_t* string_table = &pmu_global_summary.string_table;
>> > +  int is_error = 0;
>> > +  unsigned i;
>> > +  pmu_entry_t **slot, *entry, elt;
>> > +  gcov_pmu_ll_info_t *ll_info;
>> > +  gcov_pmu_brm_info_t *brm_info;
>> > +  gcov_pmu_st_entry_t *st_entry;
>> > +
>> > +
>> > +  if (!gcov_open (da_file_name, 1))
>> > +    {
>> > +      if (PARAM_VALUE (PARAM_GCOV_DEBUG))
>> > +        {
>> > +          /* Try to find .gcda file in the current working dir.  */
>> > +          da_file_name = lbasename (da_file_name);
>> > +          if (!gcov_open (da_file_name, 1))
>> > +            return;
>> > +        }
>> > +      else
>> > +        return;
>> > +    }
>> > +
>> > +  if (!gcov_magic (gcov_read_unsigned (), GCOV_DATA_MAGIC))
>> > +    {
>> > +      warning (0, "%qs is not a gcov data file", da_file_name);
>> > +      gcov_close ();
>> > +      return;
>> > +    }
>> > +  else if ((tag = gcov_read_unsigned ()) != GCOV_VERSION)
>> > +    {
>> > +      char v[4], e[4];
>> > +
>> > +      GCOV_UNSIGNED2STRING (v, tag);
>> > +      GCOV_UNSIGNED2STRING (e, GCOV_VERSION);
>> > +
>> > +      warning (0, "%qs is version %q.*s, expected version %q.*s",
>> > +               da_file_name, 4, v, 4, e);
>> > +      gcov_close ();
>> > +      return;
>> > +    }
>> > +
>> > +  /* Read and discard the version. */
>> > +  tag = gcov_read_unsigned ();
>> > +
>> > +  /* Read and discard the stamp.  */
>> > +  tag = gcov_read_unsigned ();
>> > +
>> > +  /* Initialize PMU data fields. */
>> > +  ll_infos->ll_count = 0;
>> > +  ll_infos->alloc_ll_count = 64;
>> > +  ll_infos->ll_array = XCNEWVEC (gcov_pmu_ll_info_t *,
>> > ll_infos->alloc_ll_count);
>> > +
>> > +  brm_infos->brm_count = 0;
>> > +  brm_infos->alloc_brm_count = 64;
>> > +  brm_infos->brm_array = XCNEWVEC (gcov_pmu_brm_info_t *,
>> > +                                   brm_infos->alloc_brm_count);
>> > +
>> > +  string_table->st_count = 0;
>> > +  string_table->alloc_st_count = 64;
>> > +  string_table->st_array = XCNEWVEC (gcov_pmu_st_entry_t *,
>> > +                                     string_table->alloc_st_count);
>> > +
>> > +  while ((tag = gcov_read_unsigned ()))
>> > +    {
>> > +      unsigned length = gcov_read_unsigned ();
>> > +      unsigned long base = gcov_position ();
>> > +
>> > +      if (tag == GCOV_TAG_PMU_LOAD_LATENCY_INFO)
>> > +        {
>> > +          gcov_pmu_ll_info_t *ll_info = XCNEW (gcov_pmu_ll_info_t);
>> > +          gcov_read_pmu_load_latency_info (ll_info, length);
>> > +          ll_infos->ll_count++;
>> > +          if (ll_infos->ll_count >= ll_infos->alloc_ll_count)
>> > +            {
>> > +              /* need to realloc */
>> > +              ll_infos->ll_array = (gcov_pmu_ll_info_t **)
>> > +                xrealloc (ll_infos->ll_array, 2 *
>> > ll_infos->alloc_ll_count);
>> > +            }
>> > +          ll_infos->ll_array[ll_infos->ll_count - 1] = ll_info;
>> > +        }
>> > +      else if (tag == GCOV_TAG_PMU_BRANCH_MISPREDICT_INFO)
>> > +        {
>> > +          gcov_pmu_brm_info_t *brm_info = XCNEW (gcov_pmu_brm_info_t);
>> > +          gcov_read_pmu_branch_mispredict_info (brm_info, length);
>> > +          brm_infos->brm_count++;
>> > +          if (brm_infos->brm_count >= brm_infos->alloc_brm_count)
>> > +            {
>> > +              /* need to realloc */
>> > +              brm_infos->brm_array = (gcov_pmu_brm_info_t **)
>> > +                xrealloc (brm_infos->brm_array, 2 *
>> > brm_infos->alloc_brm_count);
>> > +            }
>> > +          brm_infos->brm_array[brm_infos->brm_count - 1] = brm_info;
>> > +        }
>> > +      else if (tag == GCOV_TAG_PMU_TOOL_HEADER)
>> > +        {
>> > +          gcov_pmu_tool_header_t *tool_header = XCNEW
>> > (gcov_pmu_tool_header_t);
>> > +          gcov_read_pmu_tool_header (tool_header, length);
>> > +          ll_infos->pmu_tool_header = tool_header;
>> > +          brm_infos->pmu_tool_header = tool_header;
>> > +        }
>> > +      else if (tag == GCOV_TAG_PMU_STRING_TABLE_ENTRY)
>> > +       {
>> > +         gcov_pmu_st_entry_t *st_entry = XCNEW (gcov_pmu_st_entry_t);
>> > +         gcov_read_pmu_string_table_entry(st_entry, length);
>>
>> Missing space.
>>
>> > +         string_table->st_count++;
>> > +         if (string_table->st_count >= string_table->alloc_st_count)
>> > +           {
>> > +             string_table->alloc_st_count *= 2;
>> > +             string_table->st_array = (gcov_pmu_st_entry_t **)
>> > +                 xrealloc (string_table->st_array,
>> > +                           string_table->alloc_st_count);
>> > +           }
>> > +
>> > +         string_table->st_array[string_table->st_count - 1] = st_entry;
>> > +       }
>> > +
>> > +      gcov_sync (base, length);
>> > +      if ((is_error = gcov_is_error ()))
>> > +       {
>> > +         error (is_error < 0 ? "%qs has overflowed" : "%qs is
>> > corrupted",
>> > +                da_file_name);
>> > +          gcov_close();
>>
>> Missing space.
>>
>> > +         break;
>> > +       }
>> > +    }
>> > +
>> > +  gcov_close();
>>
>> Missing space.
>>
>> > +
>> > +  /* Construct hash table with information from gcda file. Entry keys
>> > are a
>> > +     unique combination of the filename and the line number for easy
>> > access */
>> > +  if (!pmu_hash)
>> > +    pmu_hash = htab_create (10,
>> > +                            htab_pmu_entry_hash, htab_pmu_entry_eq,
>> > +                            htab_pmu_entry_del);
>> > +
>> > +  gcc_assert (pmu_hash != NULL);
>> > +  gcc_assert (ll_infos->ll_count > 0);
>> > +  gcc_assert (brm_infos->brm_count > 0);
>> > +
>> > +  for (i = 0; i < ll_infos->ll_count; ++i)
>> > +    {
>> > +      ll_info = ll_infos->ll_array[i];
>> > +      st_entry = string_table->st_array[ll_info->filetag - 1];
>> > +      elt.lineno = ll_info->line;
>> > +      elt.filename = xstrdup (st_entry->str);
>> > +
>> > +      slot = (pmu_entry_t **) htab_find_slot
>> > +          (pmu_hash, &elt, INSERT);
>> > +      entry = *slot;
>> > +      XDELETE (elt.filename);
>> > +      if (!entry)
>> > +        {
>> > +          *slot = entry = XCNEW (pmu_entry_t);
>> > +          entry->lineno = elt.lineno;
>> > +          entry->filename = xstrdup (st_entry->str);
>> > +          entry->ll_info = ll_info;
>> > +        }
>> > +      /* No need to check for existing entries because
>> > +         there should only be one entry per filename and line number */
>> > +    }
>> > +
>> > +  for (i = 0; i < brm_infos->brm_count; ++i)
>> > +    {
>> > +      brm_info = brm_infos->brm_array[i];
>> > +      st_entry = string_table->st_array[brm_info->filetag - 1];
>> > +      elt.lineno = brm_info->line;
>> > +      elt.filename = xstrdup (st_entry->str);
>> > +      slot = (pmu_entry_t **) htab_find_slot
>> > +          (pmu_hash, &elt, INSERT);
>> > +      entry = *slot;
>> > +      XDELETE (elt.filename);
>> > +      if (!entry)
>> > +        {
>> > +          *slot = entry = XCNEW (pmu_entry_t);
>> > +          entry->lineno = elt.lineno;
>> > +          entry->filename = xstrdup(st_entry->str);
>>
>> Missing space.
>>
>> > +          entry->brm_info = brm_info;
>> > +        }
>> > +      else
>> > +        {
>> > +          /* There already exists a pmu_entry_t that is partially
>> > filled
>> > +             with load latency info */
>> > +          entry->brm_info = brm_info;
>> > +        }
>> > +    }
>> > +}
>> > +
>> > +/* Returns the load latency info for line number LINENO of source file
>> > +   FILENAME. */
>> > +
>> > +gcov_pmu_ll_info_t *
>> > +get_coverage_pmu_latency (const char* filename, gcov_unsigned_t lineno)
>> > +{
>> > +  pmu_entry_t *entry, elt;
>> > +
>> > +  /* No hash table, no pmu data */
>> > +  if (pmu_hash == NULL)
>> > +    return NULL;
>> > +
>> > +  elt.filename = xstrdup (filename);
>> > +  elt.lineno = lineno;
>> > +
>> > +  entry = (pmu_entry_t *) htab_find(pmu_hash, &elt);
>>
>> Missing space.
>>
>> > +  XDELETE (elt.filename);
>> > +  if (entry)
>> > +    return entry->ll_info;
>> > +
>> > +  return NULL;
>> > +}
>> > +
>> > +/* Returns the branch misprediction info for line number LINENO of
>> > source file
>> > +   FILENAME. */
>> > +
>> > +gcov_pmu_brm_info_t *
>> > +get_coverage_pmu_branch_mispredict (const char* filename,
>> > gcov_unsigned_t lineno)
>> > +{
>> > +  pmu_entry_t *entry, elt;
>> > +
>> > +  /* No hash table, no pmu data */
>> > +  if (pmu_hash == NULL)
>> > +    return NULL;
>> > +
>> > +  elt.filename = xstrdup(filename);
>>
>> Missing space.
>>
>> > +  elt.lineno = lineno;
>> > +
>> > +  entry = (pmu_entry_t *) htab_find(pmu_hash, &elt);
>>
>> Missing space.
>>
>> > +  XDELETE (elt.filename);
>> > +  if (entry)
>> > +    return entry->brm_info;
>> > +
>> > +  return NULL;
>> > +}
>> > +
>> >  /* Returns the coverage data entry for counter type COUNTER of function
>> >     FUNC. EXPECTED is the number of expected counter entries.  */
>> >
>> > @@ -1125,6 +1415,14 @@ coverage_function_present (unsigned fn_ident)
>> >    return item != NULL;
>> >  }
>> >
>> > +/* True if there is PMU data present in this compilation */
>> > +
>> > +bool
>> > +pmu_data_present (void)
>> > +{
>> > +  return (pmu_hash != NULL);
>> > +}
>> > +
>> >  /* Update function and program direct-call coverage counts.  */
>> >
>> >  void
>> > @@ -2271,6 +2569,10 @@ coverage_init (const char *filename, const char*
>> > s
>> >    if (flag_branch_probabilities)
>> >      read_counts_file (da_file_name, 0);
>> >
>> > +  /* Reads at most one auxiliary GCDA file since we don't support
>> > merging */
>> > +  if (pmu_profile_data != 0 && TDF_PMU)
>> > +    read_pmu_file (pmu_profile_data);
>> > +
>> >    /* Rebuild counts_hash and read the auxiliary GCDA files.  */
>> >    if (flag_profile_use && L_IPO_COMP_MODE)
>> >      {
>> > Index: gcc/coverage.h
>> > ===================================================================
>> > --- gcc/coverage.h      (revision 190817)
>> > +++ gcc/coverage.h      (working copy)
>> > @@ -45,7 +45,12 @@ extern int coverage_counter_alloc (unsigned /*coun
>> >  extern tree tree_coverage_counter_ref (unsigned /*counter*/,
>> > unsigned/*num*/);
>> >  /* Use a counter address from the most recent allocation.  */
>> >  extern tree tree_coverage_counter_addr (unsigned /*counter*/,
>> > unsigned/*num*/);
>> > -
>> > +/* Get the load latency info for the current file and line */
>> > +extern gcov_pmu_ll_info_t *get_coverage_pmu_latency (const char*,
>> > +                                                     gcov_unsigned_t);
>> > +/* Get the load latency info for the current file and line */
>> > +extern gcov_pmu_brm_info_t *
>> > +get_coverage_pmu_branch_mispredict (const char*, gcov_unsigned_t);
>> >  /* Get all the counters for the current function.  */
>> >  extern gcov_type *get_coverage_counts (unsigned /*counter*/,
>> >                                        unsigned /*expected*/,
>> > @@ -70,6 +75,9 @@ extern void coverage_dc_end_function (void);
>> >     is present in the coverage internal data structures.  */
>> >  extern bool coverage_function_present (unsigned fn_ident);
>> >
>> > +/* True if there is PMU data present in this compilation. */
>> > +extern bool pmu_data_present (void);
>> > +
>> >  extern tree get_gcov_type (void);
>> >  extern tree get_gcov_unsigned_t (void);
>> >
>> > Index: gcc/common.opt
>> > ===================================================================
>> > --- gcc/common.opt      (revision 190817)
>> > +++ gcc/common.opt      (working copy)
>> > @@ -1684,8 +1684,8 @@ Common Joined RejectNegative Var(flag_pmu_profile_
>> >  -fpmu-profile-generate=[load-latency]  Generate pmu profile for cache
>> > misses. Currently only pfmon based load latency profiling is supported on
>> > Intel/PEBS and AMD/IBS platforms.
>> >
>> >  fpmu-profile-use=
>> > -Common Joined RejectNegative Var(flag_pmu_profile_use)
>> > --fpmu-profile-use=[load-latency]  Use pmu profile data while
>> > optimizing.  Currently only perfmon based load latency profiling is
>> > supported on Intel/PEBS and AMD/IBS platforms.
>> > +Common Joined RejectNegative Var(pmu_profile_data)
>> > +-fpmu-profile-use=[pmuprofile.gcda]  The pmu profile data file to use
>> > for pmu feedback.
>> >
>> >  fpredictive-commoning
>> >  Common Report Var(flag_predictive_commoning) Optimization
>> >
>> > --
>> > This patch is available for review at
>> > http://codereview.appspot.com/6489092
>>
>>
>>
>> --
>> Teresa Johnson | Software Engineer | tejohn...@google.com | 408-460-2413
>
>



-- 
Teresa Johnson | Software Engineer | tejohn...@google.com | 408-460-2413

Reply via email to