perf stat --per-thread doesn't support outputting metrics, like IPC.

We should support this because it would allow easily to collect metrics
for different threads in applications.

1. Current output, for example:

root@skl:/tmp# perf stat --per-thread -p 21623
^C
 Performance counter stats for process id '21623':

          vmstat-21623              0.517479      task-clock (msec)         #   
 0.000 CPUs utilized
          vmstat-21623                     1      context-switches
          vmstat-21623                     0      cpu-migrations
          vmstat-21623                     0      page-faults
          vmstat-21623               461,306      cycles
          vmstat-21623               630,724      instructions
          vmstat-21623               136,265      branches
          vmstat-21623                 2,520      branch-misses

       1.444020756 seconds time elapsed

root@skl:/tmp# perf stat --per-thread --metrics ipc -p 21623
^C
 Performance counter stats for process id '21623':

          vmstat-21623               631,185      inst_retired.any
          vmstat-21623               605,893      cpu_clk_unhalted.thread

       1.415679293 seconds time elapsed

2. With this patch, the result would be:

root@skl:/tmp# perf stat --per-thread -p 21623
^C
 Performance counter stats for process id '21623':

          vmstat-21623              0.533759      task-clock (msec)         #   
 0.000 CPUs utilized
          vmstat-21623                     1      context-switches          #   
 0.002 M/sec
          vmstat-21623                     0      cpu-migrations            #   
 0.000 K/sec
          vmstat-21623                     0      page-faults               #   
 0.000 K/sec
          vmstat-21623               473,896      cycles                    #   
 0.888 GHz
          vmstat-21623               631,072      instructions              #   
 1.33  insn per cycle
          vmstat-21623               136,307      branches                  #  
255.372 M/sec
          vmstat-21623                 2,524      branch-misses             #   
 1.85% of all branches

       1.544862861 seconds time elapsed

root@skl:/tmp# perf stat --per-thread --metrics ipc -p 21623
^C
 Performance counter stats for process id '21623':

          vmstat-21623             1,259,104      inst_retired.any          #   
   1.2 IPC
          vmstat-21623             1,056,756      cpu_clk_unhalted.thread

       2.040954502 seconds time elapsed

Signed-off-by: Jin Yao <[email protected]>
---
 tools/perf/util/stat.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 35e9848..1164e68 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -314,6 +314,26 @@ static int process_counter_maps(struct perf_stat_config 
*config,
        return 0;
 }
 
+static int process_aggr_thread_counter(struct perf_evsel *counter)
+{
+       int nthreads = thread_map__nr(counter->threads);
+       int ncpus = cpu_map__nr(counter->cpus);
+       int cpu, thread;
+       u64 tmp;
+
+       for (thread = 0; thread < nthreads; thread++) {
+               u64 val = 0;
+
+               for (cpu = 0; cpu < ncpus; cpu++)
+                       val += perf_counts(counter->counts, cpu, thread)->val;
+
+               tmp = val * counter->scale;
+               perf_stat__update_shadow_stats(counter, &tmp, 0);
+       }
+
+       return 0;
+}
+
 int perf_stat_process_counter(struct perf_stat_config *config,
                              struct perf_evsel *counter)
 {
@@ -342,6 +362,9 @@ int perf_stat_process_counter(struct perf_stat_config 
*config,
        if (ret)
                return ret;
 
+       if (config->aggr_mode == AGGR_THREAD)
+               return process_aggr_thread_counter(counter);
+
        if (config->aggr_mode != AGGR_GLOBAL)
                return 0;
 
-- 
2.7.4

Reply via email to