From: Kan Liang <kan.li...@intel.com>

Show frequency, CPU Utilization and percent performance for each symbol
in perf report by --stdio --show-freq-perf

In sampling group, only group leader do sampling. So only need to print
group leader's freq in --group.

Here is an example.

$ perf report --stdio --group --show-freq-perf

                                 Overhead   FREQ MHz   CPU%  CORE_BUSY%
Command      Shared Object     Symbol
 ........................................  .........  .....  ..........
...........  ................  ......................

    99.54%  99.54%  99.53%  99.53%  99.53%       2301     96         99
tchain_edit  tchain_edit       [.] f3
     0.20%   0.20%   0.20%   0.20%   0.20%       2301     98         99
tchain_edit  tchain_edit       [.] f2
     0.05%   0.05%   0.05%   0.05%   0.05%       2300     98         99
tchain_edit  [kernel.vmlinux]  [k] read_tsc

Signed-off-by: Kan Liang <kan.li...@intel.com>
---
 tools/perf/Documentation/perf-report.txt | 12 ++++++
 tools/perf/builtin-report.c              | 22 +++++++++-
 tools/perf/ui/hist.c                     | 71 +++++++++++++++++++++++++++++---
 tools/perf/util/hist.h                   |  3 ++
 tools/perf/util/session.c                | 33 ++++++++-------
 tools/perf/util/sort.c                   |  3 ++
 tools/perf/util/symbol.h                 | 12 +++++-
 7 files changed, 134 insertions(+), 22 deletions(-)

diff --git a/tools/perf/Documentation/perf-report.txt 
b/tools/perf/Documentation/perf-report.txt
index a18ba75..9f979a7 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -306,6 +306,18 @@ OPTIONS
        special event -e cpu/mem-loads/ or -e cpu/mem-stores/. See
        'perf mem' for simpler access.
 
+--show-freq-perf::
+       Show CPU frequency and performance result from sample read.
+       To generate the frequency and performance output, the perf.data file
+       must have been obtained by group read and using special events cycles,
+       ref-cycles, msr/tsc/, msr/aperf/ or msr/mperf/
+       Freq MHz: The frequency during the sample interval. Needs cycles and
+                 ref-cycles event.
+       CPU%: CPU utilization during the sample interval. Needs ref-cycles and
+             msr/tsc/ events.
+       CORE_BUSY%: actual percent performance (APERF/MPERF%) during the
+                   sample interval. Needs msr/aperf/ and msr/mperf/ events.
+
 --percent-limit::
        Do not show entries which have an overhead under that percent.
        (Default: 0).
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index e108729..76d6ea9 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -133,7 +133,8 @@ static int hist_iter__report_callback(struct 
hist_entry_iter *iter,
        struct branch_info *bi;
 
        if ((iter->ops == &hist_iter_normal) &&
-           perf_evsel__is_group_leader(evsel))
+           perf_evsel__is_group_leader(evsel) &&
+           symbol_conf.show_freq_perf)
                set_he_freq_perf(rep->session, iter);
 
        if (!ui__has_annotation())
@@ -772,6 +773,8 @@ int cmd_report(int argc, const char **argv, const char 
*prefix __maybe_unused)
        OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel,
                    "Enable kernel symbol demangling"),
        OPT_BOOLEAN(0, "mem-mode", &report.mem_mode, "mem access profile"),
+       OPT_BOOLEAN(0, "show-freq-perf", &symbol_conf.show_freq_perf,
+                   "show CPU freqency and performance info"),
        OPT_CALLBACK(0, "percent-limit", &report, "percent",
                     "Don't show entries under that percent", 
parse_percent_limit),
        OPT_CALLBACK(0, "percentage", NULL, "relative|absolute",
@@ -788,7 +791,9 @@ int cmd_report(int argc, const char **argv, const char 
*prefix __maybe_unused)
        struct perf_data_file file = {
                .mode  = PERF_DATA_MODE_READ,
        };
+       struct perf_evsel *pos;
        int ret = hists__init();
+       perf_freq_t freq_data_status = { 0 };
 
        if (ret < 0)
                return ret;
@@ -873,6 +878,21 @@ repeat:
                symbol_conf.cumulate_callchain = false;
        }
 
+
+       if (symbol_conf.show_freq_perf) {
+               symbol_conf.freq_perf_type = 0;
+               evlist__for_each(session->evlist, pos) {
+                       perf_freq__init(session->header.env.msr_pmu_type,
+                                       pos, freq_data_status, 1);
+               }
+               if (perf_freq__has_freq(freq_data_status))
+                       symbol_conf.freq_perf_type |= 1U << DISPLAY_FREQ;
+               if (perf_freq__has_cpu_util(freq_data_status))
+                       symbol_conf.freq_perf_type |= 1U << DISPLAY_CPU_UTIL;
+               if (perf_freq__has_core_busy(freq_data_status))
+                       symbol_conf.freq_perf_type |= 1U << DISPLAY_CORE_BUSY;
+       }
+
        if (setup_sorting() < 0) {
                if (sort_order)
                        parse_options_usage(report_usage, options, "s", 1);
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 25d6083..c2be455 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -17,7 +17,7 @@
 
 static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
                      hpp_field_fn get_field, const char *fmt, int len,
-                     hpp_snprint_fn print_fn, bool fmt_percent)
+                     hpp_snprint_fn print_fn, bool fmt_percent, bool single)
 {
        int ret;
        struct hists *hists = he->hists;
@@ -36,7 +36,7 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry 
*he,
        } else
                ret = hpp__call_print_fn(hpp, print_fn, fmt, len, 
get_field(he));
 
-       if (perf_evsel__is_group_event(evsel)) {
+       if (perf_evsel__is_group_event(evsel) && !single) {
                int prev_idx, idx_delta;
                struct hist_entry *pair;
                int nr_members = evsel->nr_members;
@@ -109,10 +109,16 @@ int hpp__fmt(struct perf_hpp_fmt *fmt, struct perf_hpp 
*hpp,
             const char *fmtstr, hpp_snprint_fn print_fn, bool fmt_percent)
 {
        int len = fmt->user_len ?: fmt->len;
+       bool single = false;
+
+       if (((fmt == &perf_hpp__format[PERF_HPP__FREQ]) ||
+            (fmt == &perf_hpp__format[PERF_HPP__CPU_UTIL]) ||
+            (fmt == &perf_hpp__format[PERF_HPP__CORE_BUSY])))
+               single = true;
 
        if (symbol_conf.field_sep) {
                return __hpp__fmt(hpp, he, get_field, fmtstr, 1,
-                                 print_fn, fmt_percent);
+                                 print_fn, fmt_percent, single);
        }
 
        if (fmt_percent)
@@ -120,7 +126,7 @@ int hpp__fmt(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
        else
                len -= 1;
 
-       return  __hpp__fmt(hpp, he, get_field, fmtstr, len, print_fn, 
fmt_percent);
+       return  __hpp__fmt(hpp, he, get_field, fmtstr, len, print_fn, 
fmt_percent, single);
 }
 
 int hpp__fmt_acc(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
@@ -234,6 +240,30 @@ static int hpp__header_fn(struct perf_hpp_fmt *fmt, struct 
perf_hpp *hpp,
        return scnprintf(hpp->buf, hpp->size, "%*s", len, fmt->name);
 }
 
+static int hpp__single_width_fn(struct perf_hpp_fmt *fmt,
+                        struct perf_hpp *hpp __maybe_unused,
+                        struct perf_evsel *evsel)
+{
+       int len = fmt->user_len ?: fmt->len;
+
+       if (symbol_conf.event_group && !symbol_conf.show_freq_perf)
+               len = max(len, evsel->nr_members * fmt->len);
+
+       if (len < (int)strlen(fmt->name))
+               len = strlen(fmt->name);
+
+       return len;
+}
+
+static int hpp__single_header_fn(struct perf_hpp_fmt *fmt, struct perf_hpp 
*hpp,
+                         struct perf_evsel *evsel)
+{
+       int len = hpp__single_width_fn(fmt, hpp, evsel);
+
+       return scnprintf(hpp->buf, hpp->size, "%*s", len, fmt->name);
+}
+
+
 static int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...)
 {
        va_list args;
@@ -363,6 +393,9 @@ HPP_PERCENT_ACC_FNS(overhead_acc, period)
 
 HPP_RAW_FNS(samples, nr_events)
 HPP_RAW_FNS(period, period)
+HPP_RAW_FNS(freq, freq)
+HPP_RAW_FNS(cpu_util, cpu_util)
+HPP_RAW_FNS(core_busy, core_busy)
 
 static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
                            struct hist_entry *a __maybe_unused,
@@ -395,6 +428,17 @@ static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt 
__maybe_unused,
                .sort   = hpp__sort_ ## _fn,            \
        }
 
+#define HPP__SINGLE_PRINT_FNS(_name, _fn)              \
+       {                                               \
+               .name   = _name,                        \
+               .header = hpp__single_header_fn,        \
+               .width  = hpp__single_width_fn,         \
+               .entry  = hpp__entry_ ## _fn,           \
+               .cmp    = hpp__nop_cmp,                 \
+               .collapse = hpp__nop_cmp,               \
+               .sort   = hpp__sort_ ## _fn,            \
+       }
+
 #define HPP__PRINT_FNS(_name, _fn)                     \
        {                                               \
                .name   = _name,                        \
@@ -414,7 +458,10 @@ struct perf_hpp_fmt perf_hpp__format[] = {
        HPP__COLOR_PRINT_FNS("guest usr", overhead_guest_us),
        HPP__COLOR_ACC_PRINT_FNS("Children", overhead_acc),
        HPP__PRINT_FNS("Samples", samples),
-       HPP__PRINT_FNS("Period", period)
+       HPP__PRINT_FNS("Period", period),
+       HPP__SINGLE_PRINT_FNS("FREQ MHz", freq),
+       HPP__SINGLE_PRINT_FNS("CPU%", cpu_util),
+       HPP__SINGLE_PRINT_FNS("CORE_BUSY%", core_busy)
 };
 
 LIST_HEAD(perf_hpp__list);
@@ -485,6 +532,15 @@ void perf_hpp__init(void)
        if (symbol_conf.show_total_period)
                perf_hpp__column_enable(PERF_HPP__PERIOD);
 
+       if (symbol_conf.show_freq_perf) {
+               if (symbol_conf.freq_perf_type & (1U << DISPLAY_FREQ))
+                       perf_hpp__column_enable(PERF_HPP__FREQ);
+               if (symbol_conf.freq_perf_type & (1U << DISPLAY_CPU_UTIL))
+                       perf_hpp__column_enable(PERF_HPP__CPU_UTIL);
+               if (symbol_conf.freq_perf_type & (1U << DISPLAY_CORE_BUSY))
+                       perf_hpp__column_enable(PERF_HPP__CORE_BUSY);
+       }
+
        /* prepend overhead field for backward compatiblity.  */
        list = &perf_hpp__format[PERF_HPP__OVERHEAD].sort_list;
        if (list_empty(list))
@@ -652,6 +708,9 @@ void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct 
hists *hists)
                return;
 
        switch (idx) {
+       case PERF_HPP__CPU_UTIL:
+               fmt->len = 5;
+               break;
        case PERF_HPP__OVERHEAD:
        case PERF_HPP__OVERHEAD_SYS:
        case PERF_HPP__OVERHEAD_US:
@@ -661,6 +720,8 @@ void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct 
hists *hists)
 
        case PERF_HPP__OVERHEAD_GUEST_SYS:
        case PERF_HPP__OVERHEAD_GUEST_US:
+       case PERF_HPP__FREQ:
+       case PERF_HPP__CORE_BUSY:
                fmt->len = 9;
                break;
 
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index de6d58e..df07f28 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -237,6 +237,9 @@ enum {
        PERF_HPP__OVERHEAD_ACC,
        PERF_HPP__SAMPLES,
        PERF_HPP__PERIOD,
+       PERF_HPP__FREQ,
+       PERF_HPP__CPU_UTIL,
+       PERF_HPP__CORE_BUSY,
 
        PERF_HPP__MAX_INDEX
 };
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 215ff73..19f6fc4 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -907,26 +907,29 @@ static void sample_read__printf(struct perf_sample 
*sample,
                        printf("..... id %016" PRIx64
                               ", value %016" PRIx64 "\n",
                               value->id, value->value);
-
-                       sid = perf_evlist__id2sid(evlist, value->id);
-                       evsel = sid->evsel;
-                       if (evsel != NULL)
-                               perf_freq__init(evlist->env->msr_pmu_type,
-                                               evsel, data, value->value);
+                       if (symbol_conf.show_freq_perf) {
+                               sid = perf_evlist__id2sid(evlist, value->id);
+                               evsel = sid->evsel;
+                               if (evsel != NULL)
+                                       
perf_freq__init(evlist->env->msr_pmu_type,
+                                                       evsel, data, 
value->value);
+                       }
                }
        } else
                printf("..... id %016" PRIx64 ", value %016" PRIx64 "\n",
                        sample->read.one.id, sample->read.one.value);
 
-       if (perf_freq__has_freq(data))
-               printf("..... Freq %lu MHz\n",
-                      perf_freq__get_freq(data, cpu_max_freq/1000));
-       if (perf_freq__has_cpu_util(data))
-               printf("..... CPU%% %lu%%\n",
-                      perf_freq__get_cpu_util(data));
-       if (perf_freq__has_core_busy(data))
-               printf("..... CORE_BUSY%% %lu%%\n",
-                      perf_freq__get_core_busy(data));
+       if (symbol_conf.show_freq_perf) {
+               if (perf_freq__has_freq(data))
+                       printf("..... Freq %lu MHz\n",
+                              perf_freq__get_freq(data, cpu_max_freq/1000));
+               if (perf_freq__has_cpu_util(data))
+                       printf("..... CPU%% %lu%%\n",
+                              perf_freq__get_cpu_util(data));
+               if (perf_freq__has_core_busy(data))
+                       printf("..... CORE_BUSY%% %lu%%\n",
+                              perf_freq__get_core_busy(data));
+       }
 }
 
 static void dump_event(struct perf_evlist *evlist, union perf_event *event,
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 7e38716..8eb81db 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -1303,6 +1303,9 @@ static struct hpp_dimension hpp_sort_dimensions[] = {
        DIM(PERF_HPP__OVERHEAD_ACC, "overhead_children"),
        DIM(PERF_HPP__SAMPLES, "sample"),
        DIM(PERF_HPP__PERIOD, "period"),
+       DIM(PERF_HPP__FREQ, "freq"),
+       DIM(PERF_HPP__CPU_UTIL, "cpu_u"),
+       DIM(PERF_HPP__CORE_BUSY, "core_busy"),
 };
 
 #undef DIM
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 440ba8a..8b21fb4 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -80,6 +80,14 @@ static inline size_t symbol__size(const struct symbol *sym)
 struct strlist;
 struct intlist;
 
+enum freq_perf_type_index {
+       DISPLAY_FREQ            = 0,
+       DISPLAY_CPU_UTIL,
+       DISPLAY_CORE_BUSY,
+
+       DISPLAY_MAX
+};
+
 struct symbol_conf {
        unsigned short  priv_size;
        unsigned short  nr_events;
@@ -107,7 +115,8 @@ struct symbol_conf {
                        show_hist_headers,
                        branch_callstack,
                        has_filter,
-                       show_ref_callgraph;
+                       show_ref_callgraph,
+                       show_freq_perf;
        const char      *vmlinux_name,
                        *kallsyms_name,
                        *source_prefix,
@@ -132,6 +141,7 @@ struct symbol_conf {
        struct intlist  *pid_list,
                        *tid_list;
        const char      *symfs;
+       u64             freq_perf_type;
 };
 
 extern struct symbol_conf symbol_conf;
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to