From: Andi Kleen <a...@linux.intel.com> The uncore PMU has a lot of duplicated PMUs for different subsystems. When expanding an uncore alias we usually end up with a large number of identically named aliases, which makes perf stat output difficult to read.
Automatically sum them up in perf stat, unless --no-merge is specified. Signed-off-by: Andi Kleen <a...@linux.intel.com> --- tools/perf/Documentation/perf-stat.txt | 3 + tools/perf/builtin-stat.c | 125 +++++++++++++++++++++++++++------ tools/perf/util/evsel.h | 1 + 3 files changed, 106 insertions(+), 23 deletions(-) diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index d96ccd4844df..320d8020bc5b 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -237,6 +237,9 @@ To interpret the results it is usually needed to know on which CPUs the workload runs on. If needed the CPUs can be forced using taskset. +--no-merge:: +Do not merge results from same PMUs. + EXAMPLES -------- diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 688dea7cb08f..76304f27c090 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -140,6 +140,7 @@ static unsigned int unit_width = 4; /* strlen("unit") */ static bool forever = false; static bool metric_only = false; static bool force_metric_only = false; +static bool no_merge = false; static struct timespec ref_time; static struct cpu_map *aggr_map; static aggr_get_id_t aggr_get_id; @@ -1178,11 +1179,59 @@ static void aggr_update_shadow(void) } } +static void collect_aliases(struct perf_evsel *counter, + void (*cb)(struct perf_evsel *counter, void *data, + bool first), + void *data) +{ + struct perf_evsel *alias; + + alias = list_prepare_entry(counter, &(evsel_list->entries), node); + cb(counter, data, true); + if (no_merge) + return; + list_for_each_entry_continue (alias, &evsel_list->entries, node) { + if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) || + alias->scale != counter->scale || + alias->cgrp != counter->cgrp || + strcmp(alias->unit, counter->unit) || + nsec_counter(alias) != nsec_counter(counter)) + break; + alias->alias = true; + cb(alias, data, false); + } +} + +struct aggr_data { + u64 ena, run, val; + int id; + int nr; + int cpu; +}; + +static void aggr_cb(struct perf_evsel *counter, void *data, bool first) +{ + struct aggr_data *ad = data; + int cpu, cpu2, s2; + + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { + cpu2 = perf_evsel__cpus(counter)->map[cpu]; + s2 = aggr_get_id(evsel_list->cpus, cpu2); + if (s2 != ad->id) + continue; + ad->val += perf_counts(counter->counts, cpu, 0)->val; + ad->ena += perf_counts(counter->counts, cpu, 0)->ena; + ad->run += perf_counts(counter->counts, cpu, 0)->run; + if (first) + ad->nr++; + } +} + static void print_aggr(char *prefix) { FILE *output = stat_config.output; struct perf_evsel *counter; - int cpu, s, s2, id, nr; + int s, id, nr; double uval; u64 ena, run, val; bool first; @@ -1197,23 +1246,22 @@ static void print_aggr(char *prefix) * Without each counter has its own line. */ for (s = 0; s < aggr_map->nr; s++) { + struct aggr_data ad; if (prefix && metric_only) fprintf(output, "%s", prefix); - id = aggr_map->map[s]; + ad.id = id = aggr_map->map[s]; first = true; evlist__for_each_entry(evsel_list, counter) { - val = ena = run = 0; - nr = 0; - for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { - s2 = aggr_get_id(perf_evsel__cpus(counter), cpu); - if (s2 != id) - continue; - val += perf_counts(counter->counts, cpu, 0)->val; - ena += perf_counts(counter->counts, cpu, 0)->ena; - run += perf_counts(counter->counts, cpu, 0)->run; - nr++; - } + if (counter->alias) + continue; + ad.val = ad.ena = ad.run = 0; + ad.nr = 0; + collect_aliases(counter, aggr_cb, &ad); + nr = ad.nr; + ena = ad.ena; + run = ad.run; + val = ad.val; if (first && metric_only) { first = false; aggr_printout(counter, id, nr); @@ -1257,6 +1305,21 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix) } } +struct caggr_data { + double avg, avg_enabled, avg_running; +}; + +static void counter_aggr_cb(struct perf_evsel *counter, void *data, + bool first __maybe_unused) +{ + struct caggr_data *cd = data; + struct perf_stat_evsel *ps = counter->priv; + + cd->avg += avg_stats(&ps->res_stats[0]); + cd->avg_enabled += avg_stats(&ps->res_stats[1]); + cd->avg_running += avg_stats(&ps->res_stats[2]); +} + /* * Print out the results of a single counter: * aggregated counts in system-wide mode @@ -1264,23 +1327,32 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix) static void print_counter_aggr(struct perf_evsel *counter, char *prefix) { FILE *output = stat_config.output; - struct perf_stat_evsel *ps = counter->priv; - double avg = avg_stats(&ps->res_stats[0]); double uval; - double avg_enabled, avg_running; + struct caggr_data cd = { .avg = 0.0 }; - avg_enabled = avg_stats(&ps->res_stats[1]); - avg_running = avg_stats(&ps->res_stats[2]); + if (counter->alias) + return; + collect_aliases(counter, counter_aggr_cb, &cd); if (prefix && !metric_only) fprintf(output, "%s", prefix); - uval = avg * counter->scale; - printout(-1, 0, counter, uval, prefix, avg_running, avg_enabled, avg); + uval = cd.avg * counter->scale; + printout(-1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, cd.avg); if (!metric_only) fprintf(output, "\n"); } +static void counter_cb(struct perf_evsel *counter, void *data, + bool first __maybe_unused) +{ + struct aggr_data *ad = data; + + ad->val += perf_counts(counter->counts, ad->cpu, 0)->val; + ad->ena += perf_counts(counter->counts, ad->cpu, 0)->ena; + ad->run += perf_counts(counter->counts, ad->cpu, 0)->run; +} + /* * Print out the results of a single counter: * does not use aggregated count in system-wide @@ -1292,10 +1364,16 @@ static void print_counter(struct perf_evsel *counter, char *prefix) double uval; int cpu; + if (counter->alias) + return; + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { - val = perf_counts(counter->counts, cpu, 0)->val; - ena = perf_counts(counter->counts, cpu, 0)->ena; - run = perf_counts(counter->counts, cpu, 0)->run; + struct aggr_data ad = { .cpu = cpu }; + + collect_aliases(counter, counter_cb, &ad); + val = ad.val; + ena = ad.ena; + run = ad.run; if (prefix) fprintf(output, "%s", prefix); @@ -1633,6 +1711,7 @@ static const struct option stat_options[] = { "list of cpus to monitor in system-wide"), OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, "disable CPU count aggregation", AGGR_NONE), + OPT_BOOLEAN(0, "no-merge", &no_merge, "Do not merge identical named events"), OPT_STRING('x', "field-separator", &csv_sep, "separator", "print counts with custom separator"), OPT_CALLBACK('G', "cgroup", &evsel_list, "name", diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index b1503b0ecdff..4e3158fe79c2 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -128,6 +128,7 @@ struct perf_evsel { bool cmdline_group_boundary; struct list_head config_terms; int bpf_fd; + bool alias; }; union u64_swap { -- 2.5.5