It would be useful to support sorting for all blocks by the
sampled cycles percent per block. This is useful to concentrate
on the globally busiest/slowest blocks.

This patch implements a new sort option "total_cycles" which sorts
all blocks by 'Sampled Cycles%'. The 'Sampled Cycles%' is
block sampled cycles aggregation / total sampled cycles

Note that, this patch only supports "--stdio" mode.

For example,

perf record -b ./div
perf report -s total_cycles --stdio

 # To display the perf.data header info, please use --header/--header-only 
options.
 #
 #
 # Total Lost Samples: 0
 #
 # Samples: 2M of event 'cycles'
 # Event count (approx.): 2753248
 #
 # Sampled Cycles%  Sampled Cycles  Avg Cycles%  Avg Cycles                     
                         [Program Block Range]         Shared Object
 # ...............  ..............  ...........  ..........  
.................................................................  
....................
 #
            26.04%            2.8M        0.40%          18                     
                        [div.c:42 -> div.c:39]                   div
            15.17%            1.2M        0.16%           7                     
            [random_r.c:357 -> random_r.c:380]          libc-2.27.so
             5.11%          402.0K        0.04%           2                     
                        [div.c:27 -> div.c:28]                   div
             4.87%          381.6K        0.04%           2                     
                [random.c:288 -> random.c:291]          libc-2.27.so
             4.53%          381.0K        0.04%           2                     
                        [div.c:40 -> div.c:40]                   div
             3.85%          300.9K        0.02%           1                     
                        [div.c:22 -> div.c:25]                   div
             3.08%          241.1K        0.02%           1                     
                      [rand.c:26 -> rand.c:27]          libc-2.27.so
             3.06%          240.0K        0.02%           1                     
                [random.c:291 -> random.c:291]          libc-2.27.so
             2.78%          215.7K        0.02%           1                     
                [random.c:298 -> random.c:298]          libc-2.27.so
             2.52%          198.3K        0.02%           1                     
                [random.c:293 -> random.c:293]          libc-2.27.so
             2.36%          184.8K        0.02%           1                     
                      [rand.c:28 -> rand.c:28]          libc-2.27.so
             2.33%          180.5K        0.02%           1                     
                [random.c:295 -> random.c:295]          libc-2.27.so
             2.28%          176.7K        0.02%           1                     
                [random.c:295 -> random.c:295]          libc-2.27.so
             2.20%          168.8K        0.02%           1                     
                    [rand@plt+0 -> rand@plt+0]                   div
             1.98%          158.2K        0.02%           1                     
            [random_r.c:388 -> random_r.c:388]          libc-2.27.so
             1.57%          123.3K        0.02%           1                     
                        [div.c:42 -> div.c:44]                   div
             1.44%          116.0K        0.42%          19                     
            [random_r.c:357 -> random_r.c:394]          libc-2.27.so
             0.25%          182.5K        0.02%           1                     
            [random_r.c:388 -> random_r.c:391]          libc-2.27.so
             0.00%              48        1.07%          48                     
    [x86_pmu_enable+284 -> x86_pmu_enable+298]     [kernel.kallsyms]
             0.00%              74        1.64%          74                     
         [vm_mmap_pgoff+0 -> vm_mmap_pgoff+92]     [kernel.kallsyms]
             0.00%              73        1.62%          73                     
                     [vm_mmap+0 -> vm_mmap+48]     [kernel.kallsyms]
             0.00%              63        0.69%          31                     
                   [up_write+0 -> up_write+34]     [kernel.kallsyms]
             0.00%              13        0.29%          13                     
  [setup_arg_pages+396 -> setup_arg_pages+413]     [kernel.kallsyms]
             0.00%               3        0.07%           3                     
  [setup_arg_pages+418 -> setup_arg_pages+450]     [kernel.kallsyms]
             0.00%             616        6.84%         308                    
[security_mmap_file+0 -> security_mmap_file+72]     [kernel.kallsyms]
             0.00%              23        0.51%          23                   
[security_mmap_file+77 -> security_mmap_file+87]     [kernel.kallsyms]
             0.00%               4        0.02%           1                     
              [sched_clock+0 -> sched_clock+4]     [kernel.kallsyms]
             0.00%               4        0.02%           1                     
             [sched_clock+9 -> sched_clock+12]     [kernel.kallsyms]
             0.00%               1        0.02%           1                     
            [rcu_nmi_exit+0 -> rcu_nmi_exit+9]     [kernel.kallsyms]
             0.00%               1        0.02%           1                     
          [rcu_nmi_exit+14 -> rcu_nmi_exit+15]     [kernel.kallsyms]
             0.00%               5        0.11%           5                     
           [rcu_irq_exit+0 -> rcu_irq_exit+79]     [kernel.kallsyms]
             0.00%               2        0.04%           2                     
     [printk_nmi_exit+0 -> printk_nmi_exit+16]     [kernel.kallsyms]
             0.00%               3        0.07%           3            
[perf_sample_event_took+0 -> perf_sample_event_took+56]     [kernel.kallsyms]
             0.00%               1        0.02%           1         
[perf_sample_event_took+184 -> perf_sample_event_took+185]     [kernel.kallsyms]
             0.00%              72        1.60%          72                     
   [perf_iterate_ctx+0 -> perf_iterate_ctx+50]     [kernel.kallsyms]
             0.00%               1        0.02%           1           
[perf_event_nmi_handler+50 -> perf_event_nmi_handler+52]     [kernel.kallsyms]
             0.00%               1        0.02%           1           
[perf_event_nmi_handler+57 -> perf_event_nmi_handler+63]     [kernel.kallsyms]
             0.00%               1        0.02%           1           
[perf_event_nmi_handler+68 -> perf_event_nmi_handler+74]     [kernel.kallsyms]

Signed-off-by: Jin Yao <yao....@linux.intel.com>
---
 tools/perf/Documentation/perf-report.txt |  10 +
 tools/perf/builtin-report.c              | 423 ++++++++++++++++++++++-
 tools/perf/ui/stdio/hist.c               |  22 ++
 tools/perf/util/block.h                  |   1 +
 tools/perf/util/hist.c                   |   4 +
 tools/perf/util/sort.c                   |   5 +
 tools/perf/util/sort.h                   |   1 +
 tools/perf/util/symbol_conf.h            |   1 +
 8 files changed, 463 insertions(+), 4 deletions(-)

diff --git a/tools/perf/Documentation/perf-report.txt 
b/tools/perf/Documentation/perf-report.txt
index 7315f155803f..b1f9c93a91fd 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -124,6 +124,7 @@ OPTIONS
        - in_tx: branch in TSX transaction
        - abort: TSX transaction abort.
        - cycles: Cycles in basic block
+       - total_cycles: sort all blocks by the 'Sampled Cycles%'.
 
        And default sort keys are changed to comm, dso_from, symbol_from, dso_to
        and symbol_to, see '--branch-stack'.
@@ -136,6 +137,15 @@ OPTIONS
        executed, such as a memory access bottleneck. If a function has high 
overhead
        and low IPC, it's worth further analyzing it to optimize its 
performance.
 
+       When the total_cycles is specified, it supports sorting for all blocks 
by the
+       'Sampled Cycles%'. This is useful to concentrate on the globally 
slowest blocks.
+       In output, there are some new columns:
+       'Sampled Cycles%' - block sampled cycles aggregation / total sampled 
cycles
+       'Sampled Cycles' - block sampled cycles aggregation
+       'Avg Cycles%' - block average sampled cycles / sum of total block 
average
+       sampled cycles
+       'Avg Cycles' - block average sampled cycles
+
        If the --mem-mode option is used, the following sort keys are also 
available
        (incompatible with --branch-stack):
        symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline.
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 094bb43cbcf5..189fd1b75ac8 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -51,6 +51,7 @@
 #include "util/util.h" // perf_tip()
 #include "ui/ui.h"
 #include "ui/progress.h"
+#include "util/block.h"
 
 #include <dlfcn.h>
 #include <errno.h>
@@ -96,10 +97,64 @@ struct report {
        float                   min_percent;
        u64                     nr_entries;
        u64                     queue_size;
+       u64                     cycles_count;
+       u64                     block_cycles;
        int                     socket_filter;
        DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
        struct branch_type_stat brtype_stat;
        bool                    symbol_ipc;
+       bool                    total_cycles;
+       struct block_hist       block_hist;
+};
+
+struct block_fmt {
+       struct perf_hpp_fmt     fmt;
+       int                     idx;
+       int                     width;
+       const char              *header;
+       struct report           *rep;
+};
+
+enum {
+       PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_COV,
+       PERF_HPP_REPORT__BLOCK_LBR_CYCLES,
+       PERF_HPP_REPORT__BLOCK_CYCLES_PCT,
+       PERF_HPP_REPORT__BLOCK_AVG_CYCLES,
+       PERF_HPP_REPORT__BLOCK_RANGE,
+       PERF_HPP_REPORT__BLOCK_DSO,
+       PERF_HPP_REPORT__BLOCK_MAX_INDEX
+};
+
+static struct block_fmt block_fmts[PERF_HPP_REPORT__BLOCK_MAX_INDEX];
+
+static struct block_header_column{
+       const char *name;
+       int width;
+} block_columns[PERF_HPP_REPORT__BLOCK_MAX_INDEX] = {
+       [PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_COV] = {
+               .name = "Sampled Cycles%",
+               .width = 15,
+       },
+       [PERF_HPP_REPORT__BLOCK_LBR_CYCLES] = {
+               .name = "Sampled Cycles",
+               .width = 14,
+       },
+       [PERF_HPP_REPORT__BLOCK_CYCLES_PCT] = {
+               .name = "Avg Cycles%",
+               .width = 11,
+       },
+       [PERF_HPP_REPORT__BLOCK_AVG_CYCLES] = {
+               .name = "Avg Cycles",
+               .width = 10,
+       },
+       [PERF_HPP_REPORT__BLOCK_RANGE] = {
+               .name = "[Program Block Range]",
+               .width = 70,
+       },
+       [PERF_HPP_REPORT__BLOCK_DSO] = {
+               .name = "Shared Object",
+               .width = 20,
+       }
 };
 
 static int report__config(const char *var, const char *value, void *cb)
@@ -277,7 +332,8 @@ static int process_sample_event(struct perf_tool *tool,
                if (!sample->branch_stack)
                        goto out_put;
 
-               iter.add_entry_cb = hist_iter__branch_callback;
+               if (!rep->total_cycles)
+                       iter.add_entry_cb = hist_iter__branch_callback;
                iter.ops = &hist_iter_branch;
        } else if (rep->mem_mode) {
                iter.ops = &hist_iter_mem;
@@ -290,9 +346,10 @@ static int process_sample_event(struct perf_tool *tool,
        if (al.map != NULL)
                al.map->dso->hit = 1;
 
-       if (ui__has_annotation() || rep->symbol_ipc) {
+       if (ui__has_annotation() || rep->symbol_ipc || rep->total_cycles) {
                hist__account_cycles(sample->branch_stack, &al, sample,
-                                    rep->nonany_branch_mode, NULL);
+                                    rep->nonany_branch_mode,
+                                    &rep->cycles_count);
        }
 
        ret = hist_entry_iter__add(&iter, &al, rep->max_stack, rep);
@@ -473,6 +530,349 @@ static size_t hists__fprintf_nr_sample_events(struct 
hists *hists, struct report
        return ret + fprintf(fp, "\n#\n");
 }
 
+static int block_column_header(struct perf_hpp_fmt *fmt __maybe_unused,
+                              struct perf_hpp *hpp __maybe_unused,
+                              struct hists *hists __maybe_unused,
+                              int line __maybe_unused,
+                              int *span __maybe_unused)
+{
+       struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+
+       return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width,
+                        block_fmt->header);
+}
+
+static int block_column_width(struct perf_hpp_fmt *fmt __maybe_unused,
+                             struct perf_hpp *hpp __maybe_unused,
+                             struct hists *hists __maybe_unused)
+{
+       struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+
+       return block_fmt->width;
+}
+
+static int block_cycles_cov_entry(struct perf_hpp_fmt *fmt,
+                                 struct perf_hpp *hpp, struct hist_entry *he)
+{
+       struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+       struct report *rep = block_fmt->rep;
+       struct block_info *bi = he->block_info;
+       double ratio = 0.0;
+       char buf[16];
+
+       if (rep->cycles_count)
+               ratio = (double)bi->cycles / (double)rep->cycles_count;
+
+       sprintf(buf, "%.2f%%", 100.0 * ratio);
+
+       return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf);
+}
+
+static int64_t block_cycles_cov_sort(struct perf_hpp_fmt *fmt,
+                                    struct hist_entry *left,
+                                    struct hist_entry *right)
+{
+       struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+       struct report *rep = block_fmt->rep;
+       struct block_info *bi_l = left->block_info;
+       struct block_info *bi_r = right->block_info;
+       double l, r;
+
+       if (rep->cycles_count) {
+               l = ((double)bi_l->cycles / (double)rep->cycles_count) * 1000.0;
+               r = ((double)bi_r->cycles / (double)rep->cycles_count) * 1000.0;
+               return (int64_t)l - (int64_t)r;
+       }
+
+       return 0;
+}
+
+static void cycles_string(u64 cycles, char *buf, int size)
+{
+       if (cycles >= 1000000)
+               scnprintf(buf, size, "%.1fM", (double)cycles / 1000000.0);
+       else if (cycles >= 1000)
+               scnprintf(buf, size, "%.1fK", (double)cycles / 1000.0);
+       else
+               scnprintf(buf, size, "%1d", cycles);
+}
+
+static int block_cycles_lbr_entry(struct perf_hpp_fmt *fmt,
+                                 struct perf_hpp *hpp, struct hist_entry *he)
+{
+       struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+       struct block_info *bi = he->block_info;
+       char cycles_buf[16];
+
+       cycles_string(bi->cycles_aggr, cycles_buf, sizeof(cycles_buf));
+
+       return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width,
+                        cycles_buf);
+}
+
+static int block_cycles_pct_entry(struct perf_hpp_fmt *fmt,
+                                 struct perf_hpp *hpp, struct hist_entry *he)
+{
+       struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+       struct report *rep = block_fmt->rep;
+       struct block_info *bi = he->block_info;
+       double ratio = 0.0;
+       u64 avg;
+       char buf[16];
+
+       if (rep->block_cycles && bi->num_aggr) {
+               avg = bi->cycles_aggr / bi->num_aggr;
+               ratio = (double)avg / (double)rep->block_cycles;
+       }
+
+       sprintf(buf, "%.2f%%", 100.0 * ratio);
+
+       return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf);
+}
+
+static int block_avg_cycles_entry(struct perf_hpp_fmt *fmt,
+                                 struct perf_hpp *hpp,
+                                 struct hist_entry *he)
+{
+       struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+       struct block_info *bi = he->block_info;
+       char cycles_buf[16];
+
+       cycles_string(bi->cycles_aggr / bi->num_aggr, cycles_buf,
+                     sizeof(cycles_buf));
+
+       return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width,
+                        cycles_buf);
+}
+
+static int block_range_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+                            struct hist_entry *he)
+{
+       struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+       struct block_info *bi = he->block_info;
+       char buf[128];
+       char *start_line, *end_line;
+
+       symbol_conf.disable_add2line_warn = true;
+
+       start_line = map__srcline(he->ms.map, bi->sym->start + bi->start,
+                                 he->ms.sym);
+
+       end_line = map__srcline(he->ms.map, bi->sym->start + bi->end,
+                               he->ms.sym);
+
+       if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) {
+               scnprintf(buf, sizeof(buf), "[%s -> %s]",
+                         start_line, end_line);
+       } else {
+               scnprintf(buf, sizeof(buf), "[%7lx -> %7lx]",
+                         bi->start, bi->end);
+       }
+
+       free_srcline(start_line);
+       free_srcline(end_line);
+
+       return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf);
+}
+
+static int block_dso_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+                          struct hist_entry *he)
+{
+       struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+       struct map *map = he->ms.map;
+
+       if (map && map->dso) {
+               return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width,
+                                map->dso->short_name);
+       }
+
+       return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width,
+                        "[unknown]");
+}
+
+static void init_block_header(struct block_fmt *block_fmt)
+{
+       struct perf_hpp_fmt *fmt = &block_fmt->fmt;
+
+       BUG_ON(block_fmt->idx >= PERF_HPP_REPORT__BLOCK_MAX_INDEX);
+
+       block_fmt->header = block_columns[block_fmt->idx].name;
+       block_fmt->width = block_columns[block_fmt->idx].width;
+
+       fmt->header = block_column_header;
+       fmt->width = block_column_width;
+}
+
+static void block_hpp_register(struct block_fmt *block_fmt, int idx,
+                              struct perf_hpp_list *hpp_list,
+                              struct report *rep)
+{
+       struct perf_hpp_fmt *fmt = &block_fmt->fmt;
+
+       block_fmt->rep = rep;
+       block_fmt->idx = idx;
+       INIT_LIST_HEAD(&fmt->list);
+       INIT_LIST_HEAD(&fmt->sort_list);
+
+       switch (idx) {
+       case PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_COV:
+               fmt->entry = block_cycles_cov_entry;
+               fmt->cmp = block_info__cmp;
+               fmt->sort = block_cycles_cov_sort;
+               break;
+       case PERF_HPP_REPORT__BLOCK_LBR_CYCLES:
+               fmt->entry = block_cycles_lbr_entry;
+               break;
+       case PERF_HPP_REPORT__BLOCK_CYCLES_PCT:
+               fmt->entry = block_cycles_pct_entry;
+               break;
+       case PERF_HPP_REPORT__BLOCK_AVG_CYCLES:
+               fmt->entry = block_avg_cycles_entry;
+               break;
+       case PERF_HPP_REPORT__BLOCK_RANGE:
+               fmt->entry = block_range_entry;
+               break;
+       case PERF_HPP_REPORT__BLOCK_DSO:
+               fmt->entry = block_dso_entry;
+               break;
+       default:
+               return;
+       }
+
+       init_block_header(block_fmt);
+       perf_hpp_list__column_register(hpp_list, fmt);
+}
+
+static void register_block_columns(struct perf_hpp_list *hpp_list,
+                                  struct report *rep)
+{
+       for (int i = 0; i < PERF_HPP_REPORT__BLOCK_MAX_INDEX; i++)
+               block_hpp_register(&block_fmts[i], i, hpp_list, rep);
+}
+
+static void init_block_hist(struct block_hist *bh, struct report *rep)
+{
+       __hists__init(&bh->block_hists, &bh->block_list);
+       perf_hpp_list__init(&bh->block_list);
+       bh->block_list.nr_header_lines = 1;
+
+       register_block_columns(&bh->block_list, rep);
+
+       perf_hpp_list__register_sort_field(&bh->block_list,
+               &block_fmts[PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_COV].fmt);
+}
+
+static void init_block_info(struct block_info *bi, struct symbol *sym,
+                           struct cyc_hist *ch, int offset,
+                           u64 total_cycles)
+{
+       bi->sym = sym;
+       bi->start = ch->start;
+       bi->end = offset;
+       bi->cycles = ch->cycles;
+       bi->cycles_aggr = ch->cycles_aggr;
+       bi->num = ch->num;
+       bi->num_aggr = ch->num_aggr;
+       bi->total_cycles = total_cycles;
+}
+
+static int add_block_per_sym(struct hist_entry *he, struct block_hist *bh,
+                            u64 *block_cycles, u64 total_cycles)
+{
+       struct annotation *notes;
+       struct cyc_hist *ch;
+       static struct addr_location al;
+       u64 cycles = 0;
+
+       if (!he->ms.map || !he->ms.sym)
+               return 0;
+
+       memset(&al, 0, sizeof(al));
+       al.map = he->ms.map;
+       al.sym = he->ms.sym;
+
+       notes = symbol__annotation(he->ms.sym);
+       if (!notes || !notes->src || !notes->src->cycles_hist)
+               return 0;
+       ch = notes->src->cycles_hist;
+       for (unsigned int i = 0; i < symbol__size(he->ms.sym); i++) {
+               if (ch[i].num_aggr) {
+                       struct block_info *bi;
+                       struct hist_entry *he_block;
+
+                       bi = block_info__new();
+                       if (!bi)
+                               return -1;
+
+                       init_block_info(bi, he->ms.sym, &ch[i], i,
+                                       total_cycles);
+                       cycles += bi->cycles_aggr / bi->num_aggr;
+
+                       he_block = hists__add_entry_block(&bh->block_hists,
+                                                         &al, bi);
+                       if (!he_block) {
+                               block_info__put(bi);
+                               return -1;
+                       }
+               }
+       }
+
+       if (block_cycles)
+               *block_cycles += cycles;
+
+       return 0;
+}
+
+static int resort_cb(struct hist_entry *he, void *arg __maybe_unused)
+{
+       /* Skip the calculation of column length in output_resort */
+       he->filtered = true;
+       return 0;
+}
+
+static void hists__clear_filtered(struct hists *hists)
+{
+       struct rb_node *next = rb_first_cached(&hists->entries);
+       struct hist_entry *he;
+
+       while (next) {
+               he = rb_entry(next, struct hist_entry, rb_node);
+               he->filtered = false;
+               next = rb_next(&he->rb_node);
+       }
+}
+
+static void get_block_hists(struct hists *hists, struct block_hist *bh,
+                           struct report *rep)
+{
+       struct rb_node *next = rb_first_cached(&hists->entries);
+       struct hist_entry *he;
+
+       init_block_hist(bh, rep);
+
+       while (next) {
+               he = rb_entry(next, struct hist_entry, rb_node);
+               add_block_per_sym(he, bh, &rep->block_cycles,
+                                 rep->cycles_count);
+               next = rb_next(&he->rb_node);
+       }
+
+       hists__output_resort_cb(&bh->block_hists, NULL, resort_cb);
+       hists__clear_filtered(&bh->block_hists);
+}
+
+static int hists__fprintf_all_blocks(struct hists *hists, struct report *rep)
+{
+       struct block_hist *bh = &rep->block_hist;
+
+       get_block_hists(hists, bh, rep);
+       symbol_conf.report_individual_block = true;
+       hists__fprintf(&bh->block_hists, true, 0, 0, 0,
+                      stdout, true);
+       hists__delete_entries(&bh->block_hists);
+       return 0;
+}
+
 static int perf_evlist__tty_browse_hists(struct evlist *evlist,
                                         struct report *rep,
                                         const char *help)
@@ -493,6 +893,12 @@ static int perf_evlist__tty_browse_hists(struct evlist 
*evlist,
                        continue;
 
                hists__fprintf_nr_sample_events(hists, rep, evname, stdout);
+
+               if (rep->total_cycles) {
+                       hists__fprintf_all_blocks(hists, rep);
+                       continue;
+               }
+
                hists__fprintf(hists, !quiet, 0, 0, rep->min_percent, stdout,
                               !(symbol_conf.use_callchain ||
                                 symbol_conf.show_branchflag_count));
@@ -1366,6 +1772,15 @@ int cmd_report(int argc, const char **argv)
                goto error;
        }
 
+       if (sort_order && strstr(sort_order, "total_cycles") &&
+           (sort__mode == SORT_MODE__BRANCH)) {
+               report.total_cycles = true;
+               if (!report.use_stdio) {
+                       pr_err("Error: -s total_cycles can be only used 
together with --stdio\n");
+                       goto error;
+               }
+       }
+
        if (strcmp(input_name, "-") != 0)
                setup_browser(true);
        else
@@ -1416,7 +1831,7 @@ int cmd_report(int argc, const char **argv)
         * so don't allocate extra space that won't be used in the stdio
         * implementation.
         */
-       if (ui__has_annotation() || report.symbol_ipc) {
+       if (ui__has_annotation() || report.symbol_ipc || report.total_cycles) {
                ret = symbol__annotation_init();
                if (ret < 0)
                        goto error;
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 5365606e9dad..655ef7708cd0 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -558,6 +558,25 @@ static int hist_entry__block_fprintf(struct hist_entry *he,
        return ret;
 }
 
+static int hist_entry__individual_block_fprintf(struct hist_entry *he,
+                                               char *bf, size_t size,
+                                               FILE *fp)
+{
+       int ret = 0;
+
+       struct perf_hpp hpp = {
+               .buf            = bf,
+               .size           = size,
+               .skip           = false,
+       };
+
+       hist_entry__snprintf(he, &hpp);
+       if (!hpp.skip)
+               ret += fprintf(fp, "%s\n", bf);
+
+       return ret;
+}
+
 static int hist_entry__fprintf(struct hist_entry *he, size_t size,
                               char *bf, size_t bfsz, FILE *fp,
                               bool ignore_callchains)
@@ -580,6 +599,9 @@ static int hist_entry__fprintf(struct hist_entry *he, 
size_t size,
        if (symbol_conf.report_block)
                return hist_entry__block_fprintf(he, bf, size, fp);
 
+       if (symbol_conf.report_individual_block)
+               return hist_entry__individual_block_fprintf(he, bf, size, fp);
+
        hist_entry__snprintf(he, &hpp);
 
        ret = fprintf(fp, "%s\n", bf);
diff --git a/tools/perf/util/block.h b/tools/perf/util/block.h
index 1aeef6dd2bf3..8031a8dd318a 100644
--- a/tools/perf/util/block.h
+++ b/tools/perf/util/block.h
@@ -13,6 +13,7 @@ struct block_info {
        u64                     end;
        u64                     cycles;
        u64                     cycles_aggr;
+       u64                     total_cycles;
        int                     num;
        int                     num_aggr;
        refcount_t              refcnt;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index af65ce950ba2..521f7185a94f 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -756,6 +756,10 @@ struct hist_entry *hists__add_entry_block(struct hists 
*hists,
        struct hist_entry entry = {
                .block_info = block_info,
                .hists = hists,
+               .ms = {
+                       .map = al->map,
+                       .sym = al->sym,
+               },
        }, *he = hists__findnew_entry(hists, &entry, al, false);
 
        return he;
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 43d1d410854a..eb286700a8a9 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -492,6 +492,10 @@ struct sort_entry sort_sym_ipc_null = {
        .se_width_idx   = HISTC_SYMBOL_IPC,
 };
 
+struct sort_entry sort_block_cycles = {
+       .se_cmp         = sort__sym_cmp,
+};
+
 /* --sort srcfile */
 
 static char no_srcfile[1];
@@ -1695,6 +1699,7 @@ static struct sort_dimension bstack_sort_dimensions[] = {
        DIM(SORT_SRCLINE_FROM, "srcline_from", sort_srcline_from),
        DIM(SORT_SRCLINE_TO, "srcline_to", sort_srcline_to),
        DIM(SORT_SYM_IPC, "ipc_lbr", sort_sym_ipc),
+       DIM(SORT_BLOCK_CYCLES, "total_cycles", sort_block_cycles),
 };
 
 #undef DIM
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 7b93f34ac1f4..7ed9a08751c3 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -235,6 +235,7 @@ enum sort_type {
        SORT_SRCLINE_FROM,
        SORT_SRCLINE_TO,
        SORT_SYM_IPC,
+       SORT_BLOCK_CYCLES,
 
        /* memory mode specific sort keys */
        __SORT_MEMORY_MODE,
diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h
index e6880789864c..10f1ec3e0349 100644
--- a/tools/perf/util/symbol_conf.h
+++ b/tools/perf/util/symbol_conf.h
@@ -40,6 +40,7 @@ struct symbol_conf {
                        raw_trace,
                        report_hierarchy,
                        report_block,
+                       report_individual_block,
                        inline_name,
                        disable_add2line_warn;
        const char      *vmlinux_name,
-- 
2.17.1

Reply via email to