From: Andi Kleen <a...@linux.intel.com>

Enable metrics printing in --per-core / --per-socket mode. We need
to save the shadow metrics in a unique place. Always use the first
CPU in the aggregation. Then use the same CPU to retrieve the
shadow value later.

Example output:

% perf stat --per-core -a ./BC1s

 Performance counter stats for 'system wide':

S0-C0           2        2966.020381      task-clock (msec)         #    2.004 
CPUs utilized            (100.00%)
S0-C0           2                 49      context-switches          #    0.017 
K/sec                    (100.00%)
S0-C0           2                  4      cpu-migrations            #    0.001 
K/sec                    (100.00%)
S0-C0           2                467      page-faults               #    0.157 
K/sec
S0-C0           2      4,599,061,773      cycles                    #    1.551 
GHz                      (100.00%)
S0-C0           2      9,755,886,883      instructions              #    2.12  
insn per cycle           (100.00%)
S0-C0           2      1,906,272,125      branches                  #  642.704 
M/sec                    (100.00%)
S0-C0           2         81,180,867      branch-misses             #    4.26% 
of all branches
S0-C1           2        2965.995373      task-clock (msec)         #    2.003 
CPUs utilized            (100.00%)
S0-C1           2                 62      context-switches          #    0.021 
K/sec                    (100.00%)
S0-C1           2                  8      cpu-migrations            #    0.003 
K/sec                    (100.00%)
S0-C1           2                281      page-faults               #    0.095 
K/sec
S0-C1           2          6,347,290      cycles                    #    0.002 
GHz                      (100.00%)
S0-C1           2          4,654,156      instructions              #    0.73  
insn per cycle           (100.00%)
S0-C1           2            947,121      branches                  #    0.319 
M/sec                    (100.00%)
S0-C1           2             37,322      branch-misses             #    3.94% 
of all branches

       1.480409747 seconds time elapsed

v2: Rebase to older patches
v3: Document shadow cpus. Fix aggr_get_id argument. Fix -A shadows (Jiri)
v4: Fix crash with --per-thread
Acked-by: Jiri Olsa <jo...@kernel.org>
Signed-off-by: Andi Kleen <a...@linux.intel.com>
---
 tools/perf/builtin-stat.c     | 64 +++++++++++++++++++++++++++++++++++++------
 tools/perf/util/stat-shadow.c |  7 +++++
 2 files changed, 63 insertions(+), 8 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 2ffb822..9b5089c 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -740,6 +740,8 @@ struct outstate {
        bool newline;
        const char *prefix;
        int  nfields;
+       int  id, nr;
+       struct perf_evsel *evsel;
 };
 
 #define METRIC_LEN  35
@@ -755,12 +757,9 @@ static void do_new_line_std(struct outstate *os)
 {
        fputc('\n', os->fh);
        fputs(os->prefix, os->fh);
+       aggr_printout(os->evsel, os->id, os->nr);
        if (stat_config.aggr_mode == AGGR_NONE)
                fprintf(os->fh, "        ");
-       if (stat_config.aggr_mode == AGGR_CORE)
-               fprintf(os->fh, "                  ");
-       if (stat_config.aggr_mode == AGGR_SOCKET)
-               fprintf(os->fh, "            ");
        fprintf(os->fh, "                                                 ");
 }
 
@@ -798,6 +797,7 @@ static void new_line_csv(void *ctx)
        fputc('\n', os->fh);
        if (os->prefix)
                fprintf(os->fh, "%s%s", os->prefix, csv_sep);
+       aggr_printout(os->evsel, os->id, os->nr);
        for (i = 0; i < os->nfields; i++)
                fputs(csv_sep, os->fh);
 }
@@ -855,6 +855,28 @@ static void nsec_printout(int id, int nr, struct 
perf_evsel *evsel, double avg)
                fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
 }
 
+static int first_shadow_cpu(struct perf_evsel *evsel, int id)
+{
+       int i;
+
+       if (!aggr_get_id)
+               return 0;
+
+       if (stat_config.aggr_mode == AGGR_NONE)
+               return id;
+
+       if (stat_config.aggr_mode == AGGR_GLOBAL)
+               return 0;
+
+       for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) {
+               int cpu2 = perf_evsel__cpus(evsel)->map[i];
+
+               if (aggr_get_id(evsel_list->cpus, cpu2) == id)
+                       return cpu2;
+       }
+       return 0;
+}
+
 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 {
        FILE *output = stat_config.output;
@@ -891,7 +913,10 @@ static void printout(int id, int nr, struct perf_evsel 
*counter, double uval,
        struct perf_stat_output_ctx out;
        struct outstate os = {
                .fh = stat_config.output,
-               .prefix = prefix ? prefix : ""
+               .prefix = prefix ? prefix : "",
+               .id = id,
+               .nr = nr,
+               .evsel = counter,
        };
        print_metric_t pm = print_metric_std;
        void (*nl)(void *);
@@ -958,16 +983,37 @@ static void printout(int id, int nr, struct perf_evsel 
*counter, double uval,
        }
 
        perf_stat__print_shadow_stats(counter, uval,
-                               stat_config.aggr_mode == AGGR_GLOBAL ? 0 :
-                               cpu_map__id_to_cpu(id),
+                               first_shadow_cpu(counter, id),
                                &out);
-
        if (!csv_output) {
                print_noise(counter, noise);
                print_running(run, ena);
        }
 }
 
+static void aggr_update_shadow(void)
+{
+       int cpu, s2, id, s;
+       u64 val;
+       struct perf_evsel *counter;
+
+       for (s = 0; s < aggr_map->nr; s++) {
+               id = aggr_map->map[s];
+               evlist__for_each(evsel_list, counter) {
+                       val = 0;
+                       for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); 
cpu++) {
+                               s2 = aggr_get_id(evsel_list->cpus, cpu);
+                               if (s2 != id)
+                                       continue;
+                               val += perf_counts(counter->counts, cpu, 
0)->val;
+                       }
+                       val = val * counter->scale;
+                       perf_stat__update_shadow_stats(counter, &val,
+                                                      
first_shadow_cpu(counter, id));
+               }
+       }
+}
+
 static void print_aggr(char *prefix)
 {
        FILE *output = stat_config.output;
@@ -979,6 +1025,8 @@ static void print_aggr(char *prefix)
        if (!(aggr_map || aggr_get_id))
                return;
 
+       aggr_update_shadow();
+
        for (s = 0; s < aggr_map->nr; s++) {
                id = aggr_map->map[s];
                evlist__for_each(evsel_list, counter) {
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 367e220..5e2d2e3 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -14,6 +14,13 @@ enum {
 
 #define NUM_CTX CTX_BIT_MAX
 
+/*
+ * AGGR_GLOBAL: Use CPU 0
+ * AGGR_SOCKET: Use first CPU of socket
+ * AGGR_CORE: Use first CPU of core
+ * AGGR_NONE: Use matching CPU
+ * AGGR_THREAD: Not supported?
+ */
 static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
 static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS];
-- 
2.5.0

Reply via email to