This patch adds a bunch of stats that will be used later in post-processing
to determine where and with what frequency the HITMs are coming from.

Most of the stats are decoded from the data source response.  Another
piece of the stats is tracking which cpu the record came in on.

Credit to Dick Fowles for determining which bits are important and how to
properly track them.  Ported to perf by me.

V2: refresh with hist_entry

Original-by: Dick Fowles <rfow...@redhat.com>
Signed-off-by: Don Zickus <dzic...@redhat.com>
---
 tools/perf/builtin-c2c.c | 187 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 187 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index b5742bd..55c5ce9 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -5,17 +5,58 @@
 #include "util/parse-options.h"
 #include "util/session.h"
 #include "util/tool.h"
+#include "util/stat.h"
+#include "util/cpumap.h"
 #include "util/debug.h"
 #include <api/fs/fs.h>
 #include "util/annotate.h"
 
 #include <linux/compiler.h>
 #include <linux/kernel.h>
+#include <sched.h>
+
+typedef struct {
+       int  locks;               /* count of 'lock' transactions */
+       int  store;               /* count of all stores in trace */
+       int  st_uncache;          /* stores to uncacheable address */
+       int  st_noadrs;           /* cacheable store with no address */
+       int  st_l1hit;            /* count of stores that hit L1D */
+       int  st_l1miss;           /* count of stores that miss L1D */
+       int  load;                /* count of all loads in trace */
+       int  ld_excl;             /* exclusive loads, rmt/lcl DRAM - snp 
none/miss */
+       int  ld_shared;           /* shared loads, rmt/lcl DRAM - snp hit */
+       int  ld_uncache;          /* loads to uncacheable address */
+       int  ld_io;               /* loads to io address */
+       int  ld_miss;             /* loads miss */
+       int  ld_noadrs;           /* cacheable load with no address */
+       int  ld_fbhit;            /* count of loads hitting Fill Buffer */
+       int  ld_l1hit;            /* count of loads that hit L1D */
+       int  ld_l2hit;            /* count of loads that hit L2D */
+       int  ld_llchit;           /* count of loads that hit LLC */
+       int  lcl_hitm;            /* count of loads with local HITM  */
+       int  rmt_hitm;            /* count of loads with remote HITM */
+       int  rmt_hit;             /* count of loads with remote hit clean; */
+       int  lcl_dram;            /* count of loads miss to local DRAM */
+       int  rmt_dram;            /* count of loads miss to remote DRAM */
+       int  nomap;               /* count of load/stores with no phys adrs */
+       int  noparse;             /* count of unparsable data sources */
+} trinfo_t;
+
+struct c2c_stats {
+       cpu_set_t               cpuset;
+       int                     nr_entries;
+       u64                     total_period;
+       trinfo_t                t;
+       struct stats            stats;
+};
 
 struct perf_c2c {
        struct perf_tool tool;
        bool             raw_records;
        struct hists     hists;
+
+       /* stats */
+       struct c2c_stats        stats;
 };
 
 enum { OP, LVL, SNP, LCK, TLB };
@@ -26,6 +67,29 @@ static int lat_level = DEFAULT_LATENCY_THRES;
 static int prec_level = DEFAULT_PRECISION;
 static bool no_stores = false;
 
+#define RMT_RAM              (PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_REM_RAM2)
+#define RMT_LLC              (PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_REM_CCE2)
+
+#define L1CACHE_HIT(a)       (((a) & PERF_MEM_LVL_L1 ) && ((a) & 
PERF_MEM_LVL_HIT))
+#define FILLBUF_HIT(a)       (((a) & PERF_MEM_LVL_LFB) && ((a) & 
PERF_MEM_LVL_HIT))
+#define L2CACHE_HIT(a)       (((a) & PERF_MEM_LVL_L2 ) && ((a) & 
PERF_MEM_LVL_HIT))
+#define L3CACHE_HIT(a)       (((a) & PERF_MEM_LVL_L3 ) && ((a) & 
PERF_MEM_LVL_HIT))
+
+#define L1CACHE_MISS(a)      (((a) & PERF_MEM_LVL_L1 ) && ((a) & 
PERF_MEM_LVL_MISS))
+#define L3CACHE_MISS(a)      (((a) & PERF_MEM_LVL_L3 ) && ((a) & 
PERF_MEM_LVL_MISS))
+
+#define LD_UNCACHED(a)       (((a) & PERF_MEM_LVL_UNC) && ((a) & 
PERF_MEM_LVL_HIT))
+#define ST_UNCACHED(a)       (((a) & PERF_MEM_LVL_UNC) && ((a) & 
PERF_MEM_LVL_HIT))
+
+#define RMT_LLCHIT(a)        (((a) & RMT_LLC) && ((a) & PERF_MEM_LVL_HIT))
+#define RMT_HIT(a,b)         (((a) & RMT_LLC) && ((b) & PERF_MEM_SNOOP_HIT))
+#define RMT_HITM(a,b)        (((a) & RMT_LLC) && ((b) & PERF_MEM_SNOOP_HITM))
+#define RMT_MEM(a)           (((a) & RMT_RAM) && ((a) & PERF_MEM_LVL_HIT))
+
+#define LCL_HIT(a,b)         (L3CACHE_HIT(a) && ((b) & PERF_MEM_SNOOP_HIT))
+#define LCL_HITM(a,b)        (L3CACHE_HIT(a) && ((b) & PERF_MEM_SNOOP_HITM))
+#define LCL_MEM(a)           (((a) & PERF_MEM_LVL_LOC_RAM) && ((a) & 
PERF_MEM_LVL_HIT))
+
 static int perf_c2c__scnprintf_data_src(char *bf, size_t size, uint64_t val)
 {
 #define PREFIX       "["
@@ -148,6 +212,109 @@ static int perf_sample__fprintf(struct perf_sample 
*sample, char tag,
                       mi->iaddr.sym ? mi->iaddr.sym->name : "???");
 }
 
+static int c2c_decode_stats(struct c2c_stats *stats, struct hist_entry *entry)
+{
+       union perf_mem_data_src *data_src = &entry->mem_info->data_src;
+       u64 daddr = entry->mem_info->daddr.addr;
+       u64 weight = entry->stat.weight;
+       int err = 0;
+
+       u64 op = data_src->mem_op;
+       u64 lvl = data_src->mem_lvl;
+       u64 snoop = data_src->mem_snoop;
+       u64 lock = data_src->mem_lock;
+
+#define P(a,b) PERF_MEM_##a##_##b
+
+       stats->nr_entries++;
+       stats->total_period += entry->stat.period;
+
+       if (lock & P(LOCK,LOCKED)) stats->t.locks++;
+
+       if (op & P(OP,LOAD)) {
+               stats->t.load++;
+
+               if (!daddr) {
+                       stats->t.ld_noadrs++;
+                       return -1;
+               }
+
+               if (lvl & P(LVL,HIT)) {
+                       if (lvl & P(LVL,UNC)) stats->t.ld_uncache++;
+                       if (lvl & P(LVL,IO))  stats->t.ld_io++;
+                       if (lvl & P(LVL,LFB)) stats->t.ld_fbhit++;
+                       if (lvl & P(LVL,L1 )) stats->t.ld_l1hit++;
+                       if (lvl & P(LVL,L2 )) stats->t.ld_l2hit++;
+                       if (lvl & P(LVL,L3 )) {
+                               if (snoop & P(SNOOP,HITM))
+                                       stats->t.lcl_hitm++;
+                               else
+                                       stats->t.ld_llchit++;
+                       }
+
+                       if (lvl & P(LVL,LOC_RAM)) {
+                               stats->t.lcl_dram++;
+                               if (snoop & P(SNOOP,HIT))
+                                       stats->t.ld_shared++;
+                               else
+                                       stats->t.ld_excl++;
+                       }
+
+                       if ((lvl & P(LVL,REM_RAM1)) ||
+                           (lvl & P(LVL,REM_RAM2))) {
+                               stats->t.rmt_dram++;
+                               if (snoop & P(SNOOP,HIT))
+                                       stats->t.ld_shared++;
+                               else
+                                       stats->t.ld_excl++;
+                       }
+               }
+
+               if ((lvl & P(LVL,REM_CCE1)) ||
+                   (lvl & P(LVL,REM_CCE2))) {
+                       if (snoop & P(SNOOP, HIT))
+                               stats->t.rmt_hit++;
+                       else if (snoop & P(SNOOP, HITM)) {
+                               stats->t.rmt_hitm++;
+                               update_stats(&stats->stats, weight);
+                       }
+               }
+
+               if ((lvl & P(LVL,MISS)))
+                       stats->t.ld_miss++;
+
+       } else if (op & P(OP,STORE)) {
+               /* store */
+               stats->t.store++;
+
+               if (!daddr) {
+                       stats->t.st_noadrs++;
+                       return -1;
+               }
+
+               if (lvl & P(LVL,HIT)) {
+                       if (lvl & P(LVL,UNC)) stats->t.st_uncache++;
+                       if (lvl & P(LVL,L1 )) stats->t.st_l1hit++;
+               }
+               if (lvl & P(LVL,MISS))
+                       if (lvl & P(LVL,L1)) stats->t.st_l1miss++;
+       } else {
+               /* unparsable data_src? */
+               stats->t.noparse++;
+               return -1;
+       }
+
+       if (!entry->mem_info->daddr.map || !entry->mem_info->iaddr.map) {
+               stats->t.nomap++;
+               pr_debug("Dropping data 0x%lx (%p) and inst 0x%lx (%p)\n",
+                        entry->mem_info->daddr.addr, 
entry->mem_info->daddr.map,
+                        entry->mem_info->iaddr.addr, 
entry->mem_info->iaddr.map);
+               return -1;
+       }
+
+       return err;
+}
+
 static int perf_c2c__process_load_store(struct perf_c2c *c2c,
                                        struct addr_location *al,
                                        struct perf_sample *sample,
@@ -187,6 +354,14 @@ static int perf_c2c__process_load_store(struct perf_c2c 
*c2c,
                goto out_mem;
        }
 
+       err = c2c_decode_stats(&c2c->stats, he);
+       if (err < 0) {
+               err = 0;
+               rb_erase(&he->rb_node_in, c2c->hists.entries_in);
+               free(he);
+               goto out;
+       }
+
        c2c->hists.stats.total_period += cost;
        hists__inc_nr_events(&c2c->hists, PERF_RECORD_SAMPLE);
        return err;
@@ -280,6 +455,9 @@ static int perf_c2c__read_events(struct perf_c2c *c2c)
                goto out;
        }
 
+       if (symbol__init() < 0)
+               goto out_delete;
+
        /* setup the evsel handlers for each event type */
        evlist__for_each(session->evlist, evsel) {
                const char *name = perf_evsel__name(evsel);
@@ -294,12 +472,20 @@ static int perf_c2c__read_events(struct perf_c2c *c2c)
 
        err = perf_c2c__process_events(session, c2c);
 
+out_delete:
+       perf_session__delete(session);
 out:
        return err;
 }
 
 static int perf_c2c__init(struct perf_c2c *c2c)
 {
+       /* setup cpu map */
+       if (cpu__setup_cpunode_map() < 0) {
+               pr_err("can not setup cpu map\n");
+               return -1;
+       }
+
        sort__mode = SORT_MODE__PHYSID;
        sort__wants_unique = 1;
        sort_order = "daddr,iaddr,pid,tid";
@@ -310,6 +496,7 @@ static int perf_c2c__init(struct perf_c2c *c2c)
        }
 
        hists__init(&c2c->hists);
+       CPU_ZERO(&c2c->stats.cpuset);
 
        return 0;
 }
-- 
1.7.11.7

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to