So when a cpu is overpowered processing samples, most of the time is
spent in the histogram code.

It seems we initialize a ~262 byte structure on the stack to do every
histogram entry lookup.

This is a side effect of how the sorting code is shared with the code
that does lookups and insertions into the histogram tree(s).

I tried to change this so that lookups use a smaller key, but it gets
ugly real fast.

I don't know when I'd be able to work more on this so I'm posting this
hoping maybe someone else can move it forward, or maybe even find a
better way to do this.

The histogram code is really the limiting factor in how well perf can
handle high sample rates.

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index f96c005..f0265e4 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -81,6 +81,12 @@ sort__thread_cmp(struct hist_entry *left, struct hist_entry 
*right)
        return right->thread->tid - left->thread->tid;
 }
 
+static int64_t
+sort__thread_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
+{
+       return key->al->thread->tid - entry->thread->tid;
+}
+
 static int hist_entry__thread_snprintf(struct hist_entry *he, char *bf,
                                       size_t size, unsigned int width)
 {
@@ -104,6 +110,7 @@ static int hist_entry__thread_filter(struct hist_entry *he, 
int type, const void
 struct sort_entry sort_thread = {
        .se_header      = "    Pid:Command",
        .se_cmp         = sort__thread_cmp,
+       .se_cmp_key     = sort__thread_cmp_key,
        .se_snprintf    = hist_entry__thread_snprintf,
        .se_filter      = hist_entry__thread_filter,
        .se_width_idx   = HISTC_THREAD,
@@ -123,6 +130,13 @@ sort__comm_cmp(struct hist_entry *left, struct hist_entry 
*right)
 }
 
 static int64_t
+sort__comm_cmp_key(struct hist_entry *entry,
+              struct hist_entry_cmp_key *key)
+{
+       return strcmp(comm__str(key->comm), comm__str(entry->comm));
+}
+
+static int64_t
 sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
 {
        return strcmp(comm__str(right->comm), comm__str(left->comm));
@@ -143,6 +157,7 @@ static int hist_entry__comm_snprintf(struct hist_entry *he, 
char *bf,
 struct sort_entry sort_comm = {
        .se_header      = "Command",
        .se_cmp         = sort__comm_cmp,
+       .se_cmp_key     = sort__comm_cmp_key,
        .se_collapse    = sort__comm_collapse,
        .se_sort        = sort__comm_sort,
        .se_snprintf    = hist_entry__comm_snprintf,
@@ -178,6 +193,12 @@ sort__dso_cmp(struct hist_entry *left, struct hist_entry 
*right)
        return _sort__dso_cmp(right->ms.map, left->ms.map);
 }
 
+static int64_t
+sort__dso_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
+{
+       return _sort__dso_cmp(key->al->map, entry->ms.map);
+}
+
 static int _hist_entry__dso_snprintf(struct map *map, char *bf,
                                     size_t size, unsigned int width)
 {
@@ -209,6 +230,7 @@ static int hist_entry__dso_filter(struct hist_entry *he, 
int type, const void *a
 struct sort_entry sort_dso = {
        .se_header      = "Shared Object",
        .se_cmp         = sort__dso_cmp,
+       .se_cmp_key     = sort__dso_cmp_key,
        .se_snprintf    = hist_entry__dso_snprintf,
        .se_filter      = hist_entry__dso_filter,
        .se_width_idx   = HISTC_DSO,
@@ -260,6 +282,25 @@ sort__sym_cmp(struct hist_entry *left, struct hist_entry 
*right)
 }
 
 static int64_t
+sort__sym_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
+{
+       int64_t ret;
+
+       if (!entry->ms.sym && !key->al->sym)
+               return _sort__addr_cmp(entry->ip, key->al->addr);
+
+       /*
+        * comparing symbol address alone is not enough since it's a
+        * relative address within a dso.
+        */
+       ret = sort__dso_cmp_key(entry, key);
+       if (ret != 0)
+               return ret;
+
+       return _sort__sym_cmp(entry->ms.sym, key->al->sym);
+}
+
+static int64_t
 sort__sym_sort(struct hist_entry *left, struct hist_entry *right)
 {
        if (!left->ms.sym || !right->ms.sym)
@@ -323,6 +364,7 @@ static int hist_entry__sym_filter(struct hist_entry *he, 
int type, const void *a
 struct sort_entry sort_sym = {
        .se_header      = "Symbol",
        .se_cmp         = sort__sym_cmp,
+       .se_cmp_key     = sort__sym_cmp_key,
        .se_sort        = sort__sym_sort,
        .se_snprintf    = hist_entry__sym_snprintf,
        .se_filter      = hist_entry__sym_filter,
@@ -347,6 +389,18 @@ sort__srcline_cmp(struct hist_entry *left, struct 
hist_entry *right)
        return strcmp(right->srcline, left->srcline);
 }
 
+static int64_t
+sort__srcline_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
+{
+       if (!entry->srcline)
+               entry->srcline = hist_entry__srcline(entry);
+       if (!key->al->srcline)
+               key->al->srcline =
+                       map__srcline(key->al->map, key->al->addr, key->al->sym);
+
+       return strcmp(key->al->srcline, entry->srcline);
+}
+
 static int hist_entry__srcline_snprintf(struct hist_entry *he, char *bf,
                                        size_t size, unsigned int width)
 {
@@ -359,6 +413,7 @@ static int hist_entry__srcline_snprintf(struct hist_entry 
*he, char *bf,
 struct sort_entry sort_srcline = {
        .se_header      = "Source:Line",
        .se_cmp         = sort__srcline_cmp,
+       .se_cmp_key     = sort__srcline_cmp_key,
        .se_snprintf    = hist_entry__srcline_snprintf,
        .se_width_idx   = HISTC_SRCLINE,
 };
@@ -382,6 +437,18 @@ sort__srcline_from_cmp(struct hist_entry *left, struct 
hist_entry *right)
        return strcmp(right->branch_info->srcline_from, 
left->branch_info->srcline_from);
 }
 
+static int64_t
+sort__srcline_from_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key 
*key)
+{
+       if (!entry->branch_info->srcline_from)
+               entry->branch_info->srcline_from = 
addr_map_symbol__srcline(&entry->branch_info->from);
+
+       if (!key->bi->srcline_from)
+               key->bi->srcline_from = 
addr_map_symbol__srcline(&key->bi->from);
+
+       return strcmp(key->bi->srcline_from, entry->branch_info->srcline_from);
+}
+
 static int hist_entry__srcline_from_snprintf(struct hist_entry *he, char *bf,
                                        size_t size, unsigned int width)
 {
@@ -391,6 +458,7 @@ static int hist_entry__srcline_from_snprintf(struct 
hist_entry *he, char *bf,
 struct sort_entry sort_srcline_from = {
        .se_header      = "From Source:Line",
        .se_cmp         = sort__srcline_from_cmp,
+       .se_cmp_key     = sort__srcline_from_cmp_key,
        .se_snprintf    = hist_entry__srcline_from_snprintf,
        .se_width_idx   = HISTC_SRCLINE_FROM,
 };
@@ -409,6 +477,18 @@ sort__srcline_to_cmp(struct hist_entry *left, struct 
hist_entry *right)
        return strcmp(right->branch_info->srcline_to, 
left->branch_info->srcline_to);
 }
 
+static int64_t
+sort__srcline_to_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key 
*key)
+{
+       if (!entry->branch_info->srcline_to)
+               entry->branch_info->srcline_to = 
addr_map_symbol__srcline(&entry->branch_info->to);
+
+       if (!key->bi->srcline_to)
+               key->bi->srcline_to = addr_map_symbol__srcline(&key->bi->to);
+
+       return strcmp(key->bi->srcline_to, entry->branch_info->srcline_to);
+}
+
 static int hist_entry__srcline_to_snprintf(struct hist_entry *he, char *bf,
                                        size_t size, unsigned int width)
 {
@@ -418,6 +498,7 @@ static int hist_entry__srcline_to_snprintf(struct 
hist_entry *he, char *bf,
 struct sort_entry sort_srcline_to = {
        .se_header      = "To Source:Line",
        .se_cmp         = sort__srcline_to_cmp,
+       .se_cmp_key     = sort__srcline_to_cmp_key,
        .se_snprintf    = hist_entry__srcline_to_snprintf,
        .se_width_idx   = HISTC_SRCLINE_TO,
 };
@@ -426,16 +507,16 @@ struct sort_entry sort_srcline_to = {
 
 static char no_srcfile[1];
 
-static char *hist_entry__get_srcfile(struct hist_entry *e)
+static char *__hist_entry__get_srcfile(struct map *map, struct symbol *sym,
+                                      u64 ip)
 {
        char *sf, *p;
-       struct map *map = e->ms.map;
 
        if (!map)
                return no_srcfile;
 
-       sf = __get_srcline(map->dso, map__rip_2objdump(map, e->ip),
-                        e->ms.sym, false, true, true, e->ip);
+       sf = __get_srcline(map->dso, map__rip_2objdump(map, ip),
+                        sym, false, true, true, ip);
        if (!strcmp(sf, SRCLINE_UNKNOWN))
                return no_srcfile;
        p = strchr(sf, ':');
@@ -447,6 +528,15 @@ static char *hist_entry__get_srcfile(struct hist_entry *e)
        return no_srcfile;
 }
 
+static char *hist_entry__get_srcfile(struct hist_entry *e)
+{
+       return __hist_entry__get_srcfile(e->ms.map, e->ms.sym, e->ip);
+}
+
+static char *hist_entry_key__get_srcfile(struct hist_entry_cmp_key *key)
+{
+       return __hist_entry__get_srcfile(key->al->map, key->al->sym, 
key->al->addr);
+}
 static int64_t
 sort__srcfile_cmp(struct hist_entry *left, struct hist_entry *right)
 {
@@ -458,6 +548,17 @@ sort__srcfile_cmp(struct hist_entry *left, struct 
hist_entry *right)
        return strcmp(right->srcfile, left->srcfile);
 }
 
+static int64_t
+sort__srcfile_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
+{
+       if (!entry->srcfile)
+               entry->srcfile = hist_entry__get_srcfile(entry);
+       if (!key->srcfile)
+               key->srcfile = hist_entry_key__get_srcfile(key);
+
+       return strcmp(key->srcfile, entry->srcfile);
+}
+
 static int hist_entry__srcfile_snprintf(struct hist_entry *he, char *bf,
                                        size_t size, unsigned int width)
 {
@@ -470,6 +571,7 @@ static int hist_entry__srcfile_snprintf(struct hist_entry 
*he, char *bf,
 struct sort_entry sort_srcfile = {
        .se_header      = "Source File",
        .se_cmp         = sort__srcfile_cmp,
+       .se_cmp_key     = sort__srcfile_cmp_key,
        .se_snprintf    = hist_entry__srcfile_snprintf,
        .se_width_idx   = HISTC_SRCFILE,
 };
@@ -488,6 +590,18 @@ sort__parent_cmp(struct hist_entry *left, struct 
hist_entry *right)
        return strcmp(sym_r->name, sym_l->name);
 }
 
+static int64_t
+sort__parent_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
+{
+       struct symbol *sym_l = entry->parent;
+       struct symbol *sym_r = key->sym_parent;
+
+       if (!sym_l || !sym_r)
+               return cmp_null(sym_l, sym_r);
+
+       return strcmp(sym_r->name, sym_l->name);
+}
+
 static int hist_entry__parent_snprintf(struct hist_entry *he, char *bf,
                                       size_t size, unsigned int width)
 {
@@ -498,6 +612,7 @@ static int hist_entry__parent_snprintf(struct hist_entry 
*he, char *bf,
 struct sort_entry sort_parent = {
        .se_header      = "Parent symbol",
        .se_cmp         = sort__parent_cmp,
+       .se_cmp_key     = sort__parent_cmp_key,
        .se_snprintf    = hist_entry__parent_snprintf,
        .se_width_idx   = HISTC_PARENT,
 };
@@ -510,6 +625,12 @@ sort__cpu_cmp(struct hist_entry *left, struct hist_entry 
*right)
        return right->cpu - left->cpu;
 }
 
+static int64_t
+sort__cpu_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
+{
+       return key->al->cpu - entry->cpu;
+}
+
 static int hist_entry__cpu_snprintf(struct hist_entry *he, char *bf,
                                    size_t size, unsigned int width)
 {
@@ -519,6 +640,7 @@ static int hist_entry__cpu_snprintf(struct hist_entry *he, 
char *bf,
 struct sort_entry sort_cpu = {
        .se_header      = "CPU",
        .se_cmp         = sort__cpu_cmp,
+       .se_cmp_key     = sort__cpu_cmp_key,
        .se_snprintf    = hist_entry__cpu_snprintf,
        .se_width_idx   = HISTC_CPU,
 };
@@ -548,6 +670,22 @@ sort__cgroup_id_cmp(struct hist_entry *left, struct 
hist_entry *right)
                                       left->cgroup_id.ino);
 }
 
+static int64_t
+sort__cgroup_id_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key 
*key)
+{
+       struct namespaces *ns = thread__namespaces(key->al->thread);
+       int64_t ret;
+       u64 val;
+
+       val = ns ? ns->link_info[CGROUP_NS_INDEX].dev : 0;
+       ret = _sort__cgroup_dev_cmp(val, entry->cgroup_id.dev);
+       if (ret != 0)
+               return ret;
+
+       val = ns ? ns->link_info[CGROUP_NS_INDEX].ino : 0;
+       return _sort__cgroup_inode_cmp(val, entry->cgroup_id.ino);
+}
+
 static int hist_entry__cgroup_id_snprintf(struct hist_entry *he,
                                          char *bf, size_t size,
                                          unsigned int width __maybe_unused)
@@ -559,6 +697,7 @@ static int hist_entry__cgroup_id_snprintf(struct hist_entry 
*he,
 struct sort_entry sort_cgroup_id = {
        .se_header      = "cgroup id (dev/inode)",
        .se_cmp         = sort__cgroup_id_cmp,
+       .se_cmp_key     = sort__cgroup_id_cmp_key,
        .se_snprintf    = hist_entry__cgroup_id_snprintf,
        .se_width_idx   = HISTC_CGROUP_ID,
 };
@@ -571,6 +710,12 @@ sort__socket_cmp(struct hist_entry *left, struct 
hist_entry *right)
        return right->socket - left->socket;
 }
 
+static int64_t
+sort__socket_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
+{
+       return key->al->socket - entry->socket;
+}
+
 static int hist_entry__socket_snprintf(struct hist_entry *he, char *bf,
                                    size_t size, unsigned int width)
 {
@@ -590,6 +735,7 @@ static int hist_entry__socket_filter(struct hist_entry *he, 
int type, const void
 struct sort_entry sort_socket = {
        .se_header      = "Socket",
        .se_cmp         = sort__socket_cmp,
+       .se_cmp_key     = sort__socket_cmp_key,
        .se_snprintf    = hist_entry__socket_snprintf,
        .se_filter      = hist_entry__socket_filter,
        .se_width_idx   = HISTC_SOCKET,
@@ -597,20 +743,21 @@ struct sort_entry sort_socket = {
 
 /* --sort trace */
 
-static char *get_trace_output(struct hist_entry *he)
+static char *__get_trace_output(struct hists *hists, void *raw_data,
+                               u32 raw_size)
 {
        struct trace_seq seq;
        struct perf_evsel *evsel;
        struct tep_record rec = {
-               .data = he->raw_data,
-               .size = he->raw_size,
+               .data = raw_data,
+               .size = raw_size,
        };
 
-       evsel = hists_to_evsel(he->hists);
+       evsel = hists_to_evsel(hists);
 
        trace_seq_init(&seq);
        if (symbol_conf.raw_trace) {
-               tep_print_fields(&seq, he->raw_data, he->raw_size,
+               tep_print_fields(&seq, raw_data, raw_size,
                                 evsel->tp_format);
        } else {
                tep_event_info(&seq, evsel->tp_format, &rec);
@@ -622,6 +769,16 @@ static char *get_trace_output(struct hist_entry *he)
        return realloc(seq.buffer, seq.len + 1);
 }
 
+static char *get_trace_output(struct hist_entry *he)
+{
+       return __get_trace_output(he->hists, he->raw_data, he->raw_size);
+}
+
+static char *get_trace_output_key(struct hists *hists, struct 
hist_entry_cmp_key *key)
+{
+       return __get_trace_output(hists, key->sample->raw_data, 
key->sample->raw_size);
+}
+
 static int64_t
 sort__trace_cmp(struct hist_entry *left, struct hist_entry *right)
 {
@@ -639,6 +796,23 @@ sort__trace_cmp(struct hist_entry *left, struct hist_entry 
*right)
        return strcmp(right->trace_output, left->trace_output);
 }
 
+static int64_t
+sort__trace_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
+{
+       struct perf_evsel *evsel;
+
+       evsel = hists_to_evsel(entry->hists);
+       if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
+               return 0;
+
+       if (entry->trace_output == NULL)
+               entry->trace_output = get_trace_output(entry);
+       if (key->trace_output == NULL)
+               key->trace_output = get_trace_output_key(entry->hists, key);
+
+       return strcmp(key->trace_output, entry->trace_output);
+}
+
 static int hist_entry__trace_snprintf(struct hist_entry *he, char *bf,
                                    size_t size, unsigned int width)
 {
@@ -656,6 +830,7 @@ static int hist_entry__trace_snprintf(struct hist_entry 
*he, char *bf,
 struct sort_entry sort_trace = {
        .se_header      = "Trace output",
        .se_cmp         = sort__trace_cmp,
+       .se_cmp_key     = sort__trace_cmp_key,
        .se_snprintf    = hist_entry__trace_snprintf,
        .se_width_idx   = HISTC_TRACE,
 };
@@ -672,6 +847,16 @@ sort__dso_from_cmp(struct hist_entry *left, struct 
hist_entry *right)
                              right->branch_info->from.map);
 }
 
+static int64_t
+sort__dso_from_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key 
*key)
+{
+       if (!entry->branch_info || !key->bi)
+               return cmp_null(entry->branch_info, key->bi);
+
+       return _sort__dso_cmp(entry->branch_info->from.map,
+                             key->bi->from.map);
+}
+
 static int hist_entry__dso_from_snprintf(struct hist_entry *he, char *bf,
                                    size_t size, unsigned int width)
 {
@@ -704,6 +889,16 @@ sort__dso_to_cmp(struct hist_entry *left, struct 
hist_entry *right)
                              right->branch_info->to.map);
 }
 
+static int64_t
+sort__dso_to_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
+{
+       if (!entry->branch_info || !key->bi)
+               return cmp_null(entry->branch_info, key->bi);
+
+       return _sort__dso_cmp(entry->branch_info->to.map,
+                             key->bi->to.map);
+}
+
 static int hist_entry__dso_to_snprintf(struct hist_entry *he, char *bf,
                                       size_t size, unsigned int width)
 {
@@ -745,6 +940,24 @@ sort__sym_from_cmp(struct hist_entry *left, struct 
hist_entry *right)
 }
 
 static int64_t
+sort__sym_from_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key 
*key)
+{
+       struct addr_map_symbol *from_l = &entry->branch_info->from;
+       struct addr_map_symbol *from_r = &key->bi->from;
+
+       if (!entry->branch_info || !key->bi)
+               return cmp_null(entry->branch_info, key->bi);
+
+       from_l = &entry->branch_info->from;
+       from_r = &key->bi->from;
+
+       if (!from_l->sym && !from_r->sym)
+               return _sort__addr_cmp(from_l->addr, from_r->addr);
+
+       return _sort__sym_cmp(from_l->sym, from_r->sym);
+}
+
+static int64_t
 sort__sym_to_cmp(struct hist_entry *left, struct hist_entry *right)
 {
        struct addr_map_symbol *to_l, *to_r;
@@ -761,6 +974,23 @@ sort__sym_to_cmp(struct hist_entry *left, struct 
hist_entry *right)
        return _sort__sym_cmp(to_l->sym, to_r->sym);
 }
 
+static int64_t
+sort__sym_to_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
+{
+       struct addr_map_symbol *to_l, *to_r;
+
+       if (!entry->branch_info || !key->bi)
+               return cmp_null(entry->branch_info, key->bi);
+
+       to_l = &entry->branch_info->to;
+       to_r = &key->bi->to;
+
+       if (!to_l->sym && !to_r->sym)
+               return _sort__addr_cmp(to_l->addr, to_r->addr);
+
+       return _sort__sym_cmp(to_l->sym, to_r->sym);
+}
+
 static int hist_entry__sym_from_snprintf(struct hist_entry *he, char *bf,
                                         size_t size, unsigned int width)
 {
@@ -814,6 +1044,7 @@ static int hist_entry__sym_to_filter(struct hist_entry 
*he, int type,
 struct sort_entry sort_dso_from = {
        .se_header      = "Source Shared Object",
        .se_cmp         = sort__dso_from_cmp,
+       .se_cmp_key     = sort__dso_from_cmp_key,
        .se_snprintf    = hist_entry__dso_from_snprintf,
        .se_filter      = hist_entry__dso_from_filter,
        .se_width_idx   = HISTC_DSO_FROM,
@@ -822,6 +1053,7 @@ struct sort_entry sort_dso_from = {
 struct sort_entry sort_dso_to = {
        .se_header      = "Target Shared Object",
        .se_cmp         = sort__dso_to_cmp,
+       .se_cmp_key     = sort__dso_to_cmp_key,
        .se_snprintf    = hist_entry__dso_to_snprintf,
        .se_filter      = hist_entry__dso_to_filter,
        .se_width_idx   = HISTC_DSO_TO,
@@ -830,6 +1062,7 @@ struct sort_entry sort_dso_to = {
 struct sort_entry sort_sym_from = {
        .se_header      = "Source Symbol",
        .se_cmp         = sort__sym_from_cmp,
+       .se_cmp_key     = sort__sym_from_cmp_key,
        .se_snprintf    = hist_entry__sym_from_snprintf,
        .se_filter      = hist_entry__sym_from_filter,
        .se_width_idx   = HISTC_SYMBOL_FROM,
@@ -838,6 +1071,7 @@ struct sort_entry sort_sym_from = {
 struct sort_entry sort_sym_to = {
        .se_header      = "Target Symbol",
        .se_cmp         = sort__sym_to_cmp,
+       .se_cmp_key     = sort__sym_to_cmp_key,
        .se_snprintf    = hist_entry__sym_to_snprintf,
        .se_filter      = hist_entry__sym_to_filter,
        .se_width_idx   = HISTC_SYMBOL_TO,
@@ -856,6 +1090,19 @@ sort__mispredict_cmp(struct hist_entry *left, struct 
hist_entry *right)
        return mp || p;
 }
 
+static int64_t
+sort__mispredict_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key 
*key)
+{
+       unsigned char mp, p;
+
+       if (!entry->branch_info || !key->bi)
+               return cmp_null(entry->branch_info, key->bi);
+
+       mp = entry->branch_info->flags.mispred != key->bi->flags.mispred;
+       p  = entry->branch_info->flags.predicted != key->bi->flags.predicted;
+       return mp || p;
+}
+
 static int hist_entry__mispredict_snprintf(struct hist_entry *he, char *bf,
                                    size_t size, unsigned int width){
        static const char *out = "N/A";
@@ -880,6 +1127,16 @@ sort__cycles_cmp(struct hist_entry *left, struct 
hist_entry *right)
                right->branch_info->flags.cycles;
 }
 
+static int64_t
+sort__cycles_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
+{
+       if (!entry->branch_info || !key->bi)
+               return cmp_null(entry->branch_info, key->bi);
+
+       return entry->branch_info->flags.cycles -
+               key->bi->flags.cycles;
+}
+
 static int hist_entry__cycles_snprintf(struct hist_entry *he, char *bf,
                                    size_t size, unsigned int width)
 {
@@ -894,6 +1151,7 @@ static int hist_entry__cycles_snprintf(struct hist_entry 
*he, char *bf,
 struct sort_entry sort_cycles = {
        .se_header      = "Basic Block Cycles",
        .se_cmp         = sort__cycles_cmp,
+       .se_cmp_key     = sort__cycles_cmp_key,
        .se_snprintf    = hist_entry__cycles_snprintf,
        .se_width_idx   = HISTC_CYCLES,
 };
@@ -912,6 +1170,19 @@ sort__daddr_cmp(struct hist_entry *left, struct 
hist_entry *right)
        return (int64_t)(r - l);
 }
 
+static int64_t
+sort__daddr_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
+{
+       uint64_t l = 0, r = 0;
+
+       if (entry->mem_info)
+               l = entry->mem_info->daddr.addr;
+       if (key->mem_info)
+               r = key->mem_info->daddr.addr;
+
+       return (int64_t)(r - l);
+}
+
 static int hist_entry__daddr_snprintf(struct hist_entry *he, char *bf,
                                    size_t size, unsigned int width)
 {
@@ -941,6 +1212,19 @@ sort__iaddr_cmp(struct hist_entry *left, struct 
hist_entry *right)
        return (int64_t)(r - l);
 }
 
+static int64_t
+sort__iaddr_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
+{
+       uint64_t l = 0, r = 0;
+
+       if (entry->mem_info)
+               l = entry->mem_info->iaddr.addr;
+       if (key->mem_info)
+               r = key->mem_info->iaddr.addr;
+
+       return (int64_t)(r - l);
+}
+
 static int hist_entry__iaddr_snprintf(struct hist_entry *he, char *bf,
                                    size_t size, unsigned int width)
 {
@@ -971,6 +1255,20 @@ sort__dso_daddr_cmp(struct hist_entry *left, struct 
hist_entry *right)
        return _sort__dso_cmp(map_l, map_r);
 }
 
+static int64_t
+sort__dso_daddr_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key 
*key)
+{
+       struct map *map_l = NULL;
+       struct map *map_r = NULL;
+
+       if (entry->mem_info)
+               map_l = entry->mem_info->daddr.map;
+       if (key->mem_info)
+               map_r = key->mem_info->daddr.map;
+
+       return _sort__dso_cmp(map_l, map_r);
+}
+
 static int hist_entry__dso_daddr_snprintf(struct hist_entry *he, char *bf,
                                    size_t size, unsigned int width)
 {
@@ -1001,6 +1299,25 @@ sort__locked_cmp(struct hist_entry *left, struct 
hist_entry *right)
        return (int64_t)(data_src_r.mem_lock - data_src_l.mem_lock);
 }
 
+static int64_t
+sort__locked_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
+{
+       union perf_mem_data_src data_src_l;
+       union perf_mem_data_src data_src_r;
+
+       if (entry->mem_info)
+               data_src_l = entry->mem_info->data_src;
+       else
+               data_src_l.mem_lock = PERF_MEM_LOCK_NA;
+
+       if (key->mem_info)
+               data_src_r = key->mem_info->data_src;
+       else
+               data_src_r.mem_lock = PERF_MEM_LOCK_NA;
+
+       return (int64_t)(data_src_r.mem_lock - data_src_l.mem_lock);
+}
+
 static int hist_entry__locked_snprintf(struct hist_entry *he, char *bf,
                                    size_t size, unsigned int width)
 {
@@ -1029,6 +1346,25 @@ sort__tlb_cmp(struct hist_entry *left, struct hist_entry 
*right)
        return (int64_t)(data_src_r.mem_dtlb - data_src_l.mem_dtlb);
 }
 
+static int64_t
+sort__tlb_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
+{
+       union perf_mem_data_src data_src_l;
+       union perf_mem_data_src data_src_r;
+
+       if (entry->mem_info)
+               data_src_l = entry->mem_info->data_src;
+       else
+               data_src_l.mem_dtlb = PERF_MEM_TLB_NA;
+
+       if (key->mem_info)
+               data_src_r = key->mem_info->data_src;
+       else
+               data_src_r.mem_dtlb = PERF_MEM_TLB_NA;
+
+       return (int64_t)(data_src_r.mem_dtlb - data_src_l.mem_dtlb);
+}
+
 static int hist_entry__tlb_snprintf(struct hist_entry *he, char *bf,
                                    size_t size, unsigned int width)
 {
@@ -1057,6 +1393,25 @@ sort__lvl_cmp(struct hist_entry *left, struct hist_entry 
*right)
        return (int64_t)(data_src_r.mem_lvl - data_src_l.mem_lvl);
 }
 
+static int64_t
+sort__lvl_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
+{
+       union perf_mem_data_src data_src_l;
+       union perf_mem_data_src data_src_r;
+
+       if (entry->mem_info)
+               data_src_l = entry->mem_info->data_src;
+       else
+               data_src_l.mem_lvl = PERF_MEM_LVL_NA;
+
+       if (key->mem_info)
+               data_src_r = key->mem_info->data_src;
+       else
+               data_src_r.mem_lvl = PERF_MEM_LVL_NA;
+
+       return (int64_t)(data_src_r.mem_lvl - data_src_l.mem_lvl);
+}
+
 static int hist_entry__lvl_snprintf(struct hist_entry *he, char *bf,
                                    size_t size, unsigned int width)
 {
@@ -1085,6 +1440,25 @@ sort__snoop_cmp(struct hist_entry *left, struct 
hist_entry *right)
        return (int64_t)(data_src_r.mem_snoop - data_src_l.mem_snoop);
 }
 
+static int64_t
+sort__snoop_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
+{
+       union perf_mem_data_src data_src_l;
+       union perf_mem_data_src data_src_r;
+
+       if (entry->mem_info)
+               data_src_l = entry->mem_info->data_src;
+       else
+               data_src_l.mem_snoop = PERF_MEM_SNOOP_NA;
+
+       if (key->mem_info)
+               data_src_r = key->mem_info->data_src;
+       else
+               data_src_r.mem_snoop = PERF_MEM_SNOOP_NA;
+
+       return (int64_t)(data_src_r.mem_snoop - data_src_l.mem_snoop);
+}
+
 static int hist_entry__snoop_snprintf(struct hist_entry *he, char *bf,
                                    size_t size, unsigned int width)
 {
@@ -1158,6 +1532,70 @@ sort__dcacheline_cmp(struct hist_entry *left, struct 
hist_entry *right)
        return 0;
 }
 
+static int64_t
+sort__dcacheline_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key 
*key)
+{
+       u64 l, r;
+       struct map *l_map, *r_map;
+
+       if (!entry->mem_info)  return -1;
+       if (!key->mem_info) return 1;
+
+       /* group event types together */
+       if (entry->cpumode > key->al->cpumode) return -1;
+       if (entry->cpumode < key->al->cpumode) return 1;
+
+       l_map = entry->mem_info->daddr.map;
+       r_map = key->mem_info->daddr.map;
+
+       /* if both are NULL, jump to sort on al_addr instead */
+       if (!l_map && !r_map)
+               goto addr;
+
+       if (!l_map) return -1;
+       if (!r_map) return 1;
+
+       if (l_map->maj > r_map->maj) return -1;
+       if (l_map->maj < r_map->maj) return 1;
+
+       if (l_map->min > r_map->min) return -1;
+       if (l_map->min < r_map->min) return 1;
+
+       if (l_map->ino > r_map->ino) return -1;
+       if (l_map->ino < r_map->ino) return 1;
+
+       if (l_map->ino_generation > r_map->ino_generation) return -1;
+       if (l_map->ino_generation < r_map->ino_generation) return 1;
+
+       /*
+        * Addresses with no major/minor numbers are assumed to be
+        * anonymous in userspace.  Sort those on pid then address.
+        *
+        * The kernel and non-zero major/minor mapped areas are
+        * assumed to be unity mapped.  Sort those on address.
+        */
+
+       if ((entry->cpumode != PERF_RECORD_MISC_KERNEL) &&
+           (!(l_map->flags & MAP_SHARED)) &&
+           !l_map->maj && !l_map->min && !l_map->ino &&
+           !l_map->ino_generation) {
+               /* userspace anonymous */
+
+               if (entry->thread->pid_ > key->al->thread->pid_) return -1;
+               if (entry->thread->pid_ < key->al->thread->pid_) return 1;
+       }
+
+addr:
+       /* al_addr does all the right addr - start + offset calculations */
+       l = cl_address(entry->mem_info->daddr.al_addr);
+       r = cl_address(key->mem_info->daddr.al_addr);
+
+       if (l > r) return -1;
+       if (l < r) return 1;
+
+       return 0;
+}
+
 static int hist_entry__dcacheline_snprintf(struct hist_entry *he, char *bf,
                                          size_t size, unsigned int width)
 {
@@ -1189,6 +1627,7 @@ static int hist_entry__dcacheline_snprintf(struct 
hist_entry *he, char *bf,
 struct sort_entry sort_mispredict = {
        .se_header      = "Branch Mispredicted",
        .se_cmp         = sort__mispredict_cmp,
+       .se_cmp_key     = sort__mispredict_cmp_key,
        .se_snprintf    = hist_entry__mispredict_snprintf,
        .se_width_idx   = HISTC_MISPREDICT,
 };
@@ -1198,12 +1637,24 @@ static u64 he_weight(struct hist_entry *he)
        return he->stat.nr_events ? he->stat.weight / he->stat.nr_events : 0;
 }
 
+static u64 key_weight(struct hist_entry_cmp_key *key)
+{
+       return key->sample->weight;
+}
+
 static int64_t
 sort__local_weight_cmp(struct hist_entry *left, struct hist_entry *right)
 {
        return he_weight(left) - he_weight(right);
 }
 
+static int64_t
+sort__local_weight_cmp_key(struct hist_entry *entry,
+                          struct hist_entry_cmp_key *key)
+{
+       return he_weight(entry) - key_weight(key);
+}
+
 static int hist_entry__local_weight_snprintf(struct hist_entry *he, char *bf,
                                    size_t size, unsigned int width)
 {
@@ -1213,6 +1664,7 @@ static int hist_entry__local_weight_snprintf(struct 
hist_entry *he, char *bf,
 struct sort_entry sort_local_weight = {
        .se_header      = "Local Weight",
        .se_cmp         = sort__local_weight_cmp,
+       .se_cmp_key     = sort__local_weight_cmp_key,
        .se_snprintf    = hist_entry__local_weight_snprintf,
        .se_width_idx   = HISTC_LOCAL_WEIGHT,
 };
@@ -1223,6 +1675,13 @@ sort__global_weight_cmp(struct hist_entry *left, struct 
hist_entry *right)
        return left->stat.weight - right->stat.weight;
 }
 
+static int64_t
+sort__global_weight_cmp_key(struct hist_entry *entry,
+                           struct hist_entry_cmp_key *key __maybe_unused)
+{
+       return entry->stat.weight - key->sample->weight;
+}
+
 static int hist_entry__global_weight_snprintf(struct hist_entry *he, char *bf,
                                              size_t size, unsigned int width)
 {
@@ -1232,6 +1691,7 @@ static int hist_entry__global_weight_snprintf(struct 
hist_entry *he, char *bf,
 struct sort_entry sort_global_weight = {
        .se_header      = "Weight",
        .se_cmp         = sort__global_weight_cmp,
+       .se_cmp_key     = sort__global_weight_cmp_key,
        .se_snprintf    = hist_entry__global_weight_snprintf,
        .se_width_idx   = HISTC_GLOBAL_WEIGHT,
 };
@@ -1239,6 +1699,7 @@ struct sort_entry sort_global_weight = {
 struct sort_entry sort_mem_daddr_sym = {
        .se_header      = "Data Symbol",
        .se_cmp         = sort__daddr_cmp,
+       .se_cmp_key     = sort__daddr_cmp_key,
        .se_snprintf    = hist_entry__daddr_snprintf,
        .se_width_idx   = HISTC_MEM_DADDR_SYMBOL,
 };
@@ -1246,6 +1707,7 @@ struct sort_entry sort_mem_daddr_sym = {
 struct sort_entry sort_mem_iaddr_sym = {
        .se_header      = "Code Symbol",
        .se_cmp         = sort__iaddr_cmp,
+       .se_cmp_key     = sort__iaddr_cmp_key,
        .se_snprintf    = hist_entry__iaddr_snprintf,
        .se_width_idx   = HISTC_MEM_IADDR_SYMBOL,
 };
@@ -1253,6 +1715,7 @@ struct sort_entry sort_mem_iaddr_sym = {
 struct sort_entry sort_mem_daddr_dso = {
        .se_header      = "Data Object",
        .se_cmp         = sort__dso_daddr_cmp,
+       .se_cmp_key     = sort__dso_daddr_cmp_key,
        .se_snprintf    = hist_entry__dso_daddr_snprintf,
        .se_width_idx   = HISTC_MEM_DADDR_DSO,
 };
@@ -1260,6 +1723,7 @@ struct sort_entry sort_mem_daddr_dso = {
 struct sort_entry sort_mem_locked = {
        .se_header      = "Locked",
        .se_cmp         = sort__locked_cmp,
+       .se_cmp_key     = sort__locked_cmp_key,
        .se_snprintf    = hist_entry__locked_snprintf,
        .se_width_idx   = HISTC_MEM_LOCKED,
 };
@@ -1267,6 +1731,7 @@ struct sort_entry sort_mem_locked = {
 struct sort_entry sort_mem_tlb = {
        .se_header      = "TLB access",
        .se_cmp         = sort__tlb_cmp,
+       .se_cmp_key     = sort__tlb_cmp_key,
        .se_snprintf    = hist_entry__tlb_snprintf,
        .se_width_idx   = HISTC_MEM_TLB,
 };
@@ -1274,6 +1739,7 @@ struct sort_entry sort_mem_tlb = {
 struct sort_entry sort_mem_lvl = {
        .se_header      = "Memory access",
        .se_cmp         = sort__lvl_cmp,
+       .se_cmp_key     = sort__lvl_cmp_key,
        .se_snprintf    = hist_entry__lvl_snprintf,
        .se_width_idx   = HISTC_MEM_LVL,
 };
@@ -1281,6 +1747,7 @@ struct sort_entry sort_mem_lvl = {
 struct sort_entry sort_mem_snoop = {
        .se_header      = "Snoop",
        .se_cmp         = sort__snoop_cmp,
+       .se_cmp_key     = sort__snoop_cmp_key,
        .se_snprintf    = hist_entry__snoop_snprintf,
        .se_width_idx   = HISTC_MEM_SNOOP,
 };
@@ -1288,6 +1755,7 @@ struct sort_entry sort_mem_snoop = {
 struct sort_entry sort_mem_dcacheline = {
        .se_header      = "Data Cacheline",
        .se_cmp         = sort__dcacheline_cmp,
+       .se_cmp_key     = sort__dcacheline_cmp_key,
        .se_snprintf    = hist_entry__dcacheline_snprintf,
        .se_width_idx   = HISTC_MEM_DCACHELINE,
 };
@@ -1305,6 +1773,19 @@ sort__phys_daddr_cmp(struct hist_entry *left, struct 
hist_entry *right)
        return (int64_t)(r - l);
 }
 
+static int64_t
+sort__phys_daddr_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key 
*key)
+{
+       uint64_t l = 0, r = 0;
+
+       if (entry->mem_info)
+               l = entry->mem_info->daddr.phys_addr;
+       if (key->mem_info)
+               r = key->mem_info->daddr.phys_addr;
+
+       return (int64_t)(r - l);
+}
+
 static int hist_entry__phys_daddr_snprintf(struct hist_entry *he, char *bf,
                                           size_t size, unsigned int width)
 {
@@ -1329,6 +1810,7 @@ static int hist_entry__phys_daddr_snprintf(struct 
hist_entry *he, char *bf,
 struct sort_entry sort_mem_phys_daddr = {
        .se_header      = "Data Physical Address",
        .se_cmp         = sort__phys_daddr_cmp,
+       .se_cmp_key     = sort__phys_daddr_cmp_key,
        .se_snprintf    = hist_entry__phys_daddr_snprintf,
        .se_width_idx   = HISTC_MEM_PHYS_DADDR,
 };
@@ -1343,6 +1825,16 @@ sort__abort_cmp(struct hist_entry *left, struct 
hist_entry *right)
                right->branch_info->flags.abort;
 }
 
+static int64_t
+sort__abort_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
+{
+       if (!entry->branch_info || !key->bi)
+               return cmp_null(entry->branch_info, key->bi);
+
+       return entry->branch_info->flags.abort !=
+               key->bi->flags.abort;
+}
+
 static int hist_entry__abort_snprintf(struct hist_entry *he, char *bf,
                                    size_t size, unsigned int width)
 {
@@ -1361,6 +1853,7 @@ static int hist_entry__abort_snprintf(struct hist_entry 
*he, char *bf,
 struct sort_entry sort_abort = {
        .se_header      = "Transaction abort",
        .se_cmp         = sort__abort_cmp,
+       .se_cmp_key     = sort__abort_cmp_key,
        .se_snprintf    = hist_entry__abort_snprintf,
        .se_width_idx   = HISTC_ABORT,
 };
@@ -1375,6 +1868,16 @@ sort__in_tx_cmp(struct hist_entry *left, struct 
hist_entry *right)
                right->branch_info->flags.in_tx;
 }
 
+static int64_t
+sort__in_tx_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
+{
+       if (!entry->branch_info || !key->bi)
+               return cmp_null(entry->branch_info, key->bi);
+
+       return entry->branch_info->flags.in_tx !=
+               key->bi->flags.in_tx;
+}
+
 static int hist_entry__in_tx_snprintf(struct hist_entry *he, char *bf,
                                    size_t size, unsigned int width)
 {
@@ -1393,6 +1896,7 @@ static int hist_entry__in_tx_snprintf(struct hist_entry 
*he, char *bf,
 struct sort_entry sort_in_tx = {
        .se_header      = "Branch in transaction",
        .se_cmp         = sort__in_tx_cmp,
+       .se_cmp_key     = sort__in_tx_cmp_key,
        .se_snprintf    = hist_entry__in_tx_snprintf,
        .se_width_idx   = HISTC_IN_TX,
 };
@@ -1403,6 +1907,12 @@ sort__transaction_cmp(struct hist_entry *left, struct 
hist_entry *right)
        return left->transaction - right->transaction;
 }
 
+static int64_t
+sort__transaction_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key 
*key)
+{
+       return entry->transaction - key->sample->transaction;
+}
+
 static inline char *add_str(char *p, const char *str)
 {
        strcpy(p, str);
@@ -1465,6 +1975,7 @@ static int hist_entry__transaction_snprintf(struct 
hist_entry *he, char *bf,
 struct sort_entry sort_transaction = {
        .se_header      = "Transaction                ",
        .se_cmp         = sort__transaction_cmp,
+       .se_cmp_key     = sort__transaction_cmp_key,
        .se_snprintf    = hist_entry__transaction_snprintf,
        .se_width_idx   = HISTC_TRANSACTION,
 };
@@ -1486,6 +1997,12 @@ sort__sym_size_cmp(struct hist_entry *left, struct 
hist_entry *right)
        return _sort__sym_size_cmp(right->ms.sym, left->ms.sym);
 }
 
+static int64_t
+sort__sym_size_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key 
*key)
+{
+       return _sort__sym_size_cmp(key->al->sym, entry->ms.sym);
+}
+
 static int _hist_entry__sym_size_snprintf(struct symbol *sym, char *bf,
                                          size_t bf_size, unsigned int width)
 {
@@ -1504,6 +2021,7 @@ static int hist_entry__sym_size_snprintf(struct 
hist_entry *he, char *bf,
 struct sort_entry sort_sym_size = {
        .se_header      = "Symbol size",
        .se_cmp         = sort__sym_size_cmp,
+       .se_cmp_key     = sort__sym_size_cmp_key,
        .se_snprintf    = hist_entry__sym_size_snprintf,
        .se_width_idx   = HISTC_SYM_SIZE,
 };
@@ -1525,6 +2043,12 @@ sort__dso_size_cmp(struct hist_entry *left, struct 
hist_entry *right)
        return _sort__dso_size_cmp(right->ms.map, left->ms.map);
 }
 
+static int64_t
+sort__dso_size_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key 
*key)
+{
+       return _sort__dso_size_cmp(key->al->map, entry->ms.map);
+}
+
 static int _hist_entry__dso_size_snprintf(struct map *map, char *bf,
                                          size_t bf_size, unsigned int width)
 {
@@ -1544,6 +2068,7 @@ static int hist_entry__dso_size_snprintf(struct 
hist_entry *he, char *bf,
 struct sort_entry sort_dso_size = {
        .se_header      = "DSO size",
        .se_cmp         = sort__dso_size_cmp,
+       .se_cmp_key     = sort__dso_size_cmp_key,
        .se_snprintf    = hist_entry__dso_size_snprintf,
        .se_width_idx   = HISTC_DSO_SIZE,
 };
@@ -1693,12 +2218,13 @@ static int __sort__hpp_entry(struct perf_hpp_fmt *fmt, 
struct perf_hpp *hpp,
 }
 
 static int64_t __sort__hpp_cmp(struct perf_hpp_fmt *fmt,
-                              struct hist_entry *a, struct hist_entry *b)
+                              struct hist_entry *entry,
+                              struct hist_entry_cmp_key *key)
 {
        struct hpp_sort_entry *hse;
 
        hse = container_of(fmt, struct hpp_sort_entry, hpp);
-       return hse->se->se_cmp(a, b);
+       return hse->se->se_cmp_key(entry, key);
 }
 
 static int64_t __sort__hpp_collapse(struct perf_hpp_fmt *fmt,
@@ -2089,9 +2615,37 @@ static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt,
        return memcmp(a->raw_data + offset, b->raw_data + offset, size);
 }
 
+static int64_t __sort__hde_cmp_key(struct perf_hpp_fmt *fmt,
+                                  struct hist_entry *a,
+                                  struct hist_entry_cmp_key *key)
+{
+       struct hpp_dynamic_entry *hde;
+       struct tep_format_field *field;
+       unsigned offset, size;
+
+       hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+       field = hde->field;
+       if (field->flags & TEP_FIELD_IS_DYNAMIC) {
+               unsigned long long dyn;
+
+               tep_read_number_field(field, a->raw_data, &dyn);
+               offset = dyn & 0xffff;
+               size = (dyn >> 16) & 0xffff;
+
+               /* record max width for output */
+               if (size > hde->dynamic_len)
+                       hde->dynamic_len = size;
+       } else {
+               offset = field->offset;
+               size = field->size;
+       }
+
+       return memcmp(a->raw_data + offset, key->sample->raw_data + offset, 
size);
+}
+
 bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *fmt)
 {
-       return fmt->cmp == __sort__hde_cmp;
+       return fmt->cmp == __sort__hde_cmp_key;
 }
 
 static bool __sort__hde_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
@@ -2138,7 +2692,7 @@ __alloc_dynamic_entry(struct perf_evsel *evsel, struct 
tep_format_field *field,
        hde->hpp.entry  = __sort__hde_entry;
        hde->hpp.color  = NULL;
 
-       hde->hpp.cmp = __sort__hde_cmp;
+       hde->hpp.cmp = __sort__hde_cmp_key;
        hde->hpp.collapse = __sort__hde_cmp;
        hde->hpp.sort = __sort__hde_cmp;
        hde->hpp.equal = __sort__hde_equal;
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index a97cf8e..da85224 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -264,6 +264,7 @@ struct sort_entry {
        const char *se_header;
 
        int64_t (*se_cmp)(struct hist_entry *, struct hist_entry *);
+       int64_t (*se_cmp_key)(struct hist_entry *, struct hist_entry_cmp_key *);
        int64_t (*se_collapse)(struct hist_entry *, struct hist_entry *);
        int64_t (*se_sort)(struct hist_entry *, struct hist_entry *);
        int     (*se_snprintf)(struct hist_entry *he, char *bf, size_t size,
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 3badd7f..78df16b 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -150,7 +150,6 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, 
struct addr_location *al,
 struct perf_hpp;
 struct perf_hpp_fmt;
 
-int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
 int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry 
*right);
 int hist_entry__transaction_len(void);
 int hist_entry__sort_snprintf(struct hist_entry *he, char *bf, size_t size,
@@ -238,6 +237,18 @@ struct perf_hpp {
        void *ptr;
 };
 
+struct hist_entry_cmp_key {
+       struct addr_location *al;
+       struct comm *comm;
+       struct branch_info *bi;
+       struct symbol *sym_parent;
+       struct perf_sample *sample;
+       struct mem_info *mem_info;
+       char *srcfile;
+       char *trace_output;
+};
+
+struct comm;
 struct perf_hpp_fmt {
        const char *name;
        int (*header)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
@@ -249,7 +260,8 @@ struct perf_hpp_fmt {
        int (*entry)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
                     struct hist_entry *he);
        int64_t (*cmp)(struct perf_hpp_fmt *fmt,
-                      struct hist_entry *a, struct hist_entry *b);
+                      struct hist_entry *entry,
+                      struct hist_entry_cmp_key *key);
        int64_t (*collapse)(struct perf_hpp_fmt *fmt,
                            struct hist_entry *a, struct hist_entry *b);
        int64_t (*sort)(struct perf_hpp_fmt *fmt,
@@ -525,4 +537,8 @@ static inline int hists__scnprintf_title(struct hists 
*hists, char *bf, size_t s
        return __hists__scnprintf_title(hists, bf, size, true);
 }
 
+extern unsigned long hist_lookups;
+extern unsigned long hist_hits;
+extern unsigned long hist_misses;
+
 #endif /* __PERF_HIST_H */
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 828cb97..a4deb5d 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -364,16 +364,49 @@ void hists__delete_entries(struct hists *hists)
        }
 }
 
+static u8 symbol__parent_filter(const struct symbol *parent)
+{
+       if (symbol_conf.exclude_other && parent == NULL)
+               return 1 << HIST_FILTER__PARENT;
+       return 0;
+}
+
 /*
  * histogram, sorted on item, collects periods
  */
 
 static int hist_entry__init(struct hist_entry *he,
-                           struct hist_entry *template,
+                           struct hist_entry_cmp_key *key,
+                           struct hists *hists,
                            bool sample_self,
                            size_t callchain_size)
 {
-       *he = *template;
+       struct namespaces *ns = thread__namespaces(key->al->thread);
+
+       he->thread = key->al->thread;
+       he->comm = thread__comm(he->thread);
+       he->cgroup_id.dev = ns ? ns->link_info[CGROUP_NS_INDEX].dev : 0;
+       he->cgroup_id.ino = ns ? ns->link_info[CGROUP_NS_INDEX].ino : 0;
+       he->ms.map = key->al->map;
+       he->ms.sym = key->al->sym;
+       he->srcline = key->al->srcline ? strdup(key->al->srcline) : NULL;
+       he->socket       = key->al->socket;
+       he->cpu  = key->al->cpu;
+       he->cpumode = key->al->cpumode;
+       he->ip   = key->al->addr;
+       he->level        = key->al->level;
+       he->stat.nr_events = 1;
+       he->stat.period = key->sample->period;
+       he->stat.weight = key->sample->weight;
+       he->parent = key->sym_parent;
+       he->filtered = symbol__parent_filter(key->sym_parent) | 
key->al->filtered;
+       he->hists = hists;
+       he->branch_info = key->bi;
+       he->mem_info = key->mem_info;
+       he->transaction = key->sample->transaction;
+       he->raw_data = key->sample->raw_data;
+       he->raw_size = key->sample->raw_size;
+
        he->callchain_size = callchain_size;
 
        if (symbol_conf.cumulate_callchain) {
@@ -400,7 +433,7 @@ static int hist_entry__init(struct hist_entry *he,
                        return -ENOMEM;
                }
 
-               memcpy(he->branch_info, template->branch_info,
+               memcpy(he->branch_info, key->bi,
                       sizeof(*he->branch_info));
 
                map__get(he->branch_info->from.map);
@@ -459,23 +492,25 @@ static struct hist_entry_ops default_ops = {
        .free   = hist_entry__free,
 };
 
-static struct hist_entry *hist_entry__new(struct hist_entry *template,
+static struct hist_entry *hist_entry__new(struct hist_entry_cmp_key *key,
+                                         struct hists *hists,
+                                         struct hist_entry_ops *ops,
                                          bool sample_self)
 {
-       struct hist_entry_ops *ops = template->ops;
        size_t callchain_size = 0;
        struct hist_entry *he;
        int err = 0;
 
        if (!ops)
-               ops = template->ops = &default_ops;
+               ops = &default_ops;
 
        if (symbol_conf.use_callchain)
                callchain_size = sizeof(struct callchain_root);
 
        he = ops->new(callchain_size);
        if (he) {
-               err = hist_entry__init(he, template, sample_self, 
callchain_size);
+               he->ops = ops;
+               err = hist_entry__init(he, key, hists, sample_self, 
callchain_size);
                if (err) {
                        ops->free(he);
                        he = NULL;
@@ -485,13 +520,6 @@ static struct hist_entry *hist_entry__new(struct 
hist_entry *template,
        return he;
 }
 
-static u8 symbol__parent_filter(const struct symbol *parent)
-{
-       if (symbol_conf.exclude_other && parent == NULL)
-               return 1 << HIST_FILTER__PARENT;
-       return 0;
-}
-
 static void hist_entry__add_callchain_period(struct hist_entry *he, u64 period)
 {
        if (!hist_entry__has_callchains(he) || !symbol_conf.use_callchain)
@@ -502,17 +530,43 @@ static void hist_entry__add_callchain_period(struct 
hist_entry *he, u64 period)
                he->hists->callchain_non_filtered_period += period;
 }
 
+static int64_t
+hist_entry__cmp(struct hist_entry *entry, struct hist_entry_cmp_key *key)
+{
+       struct hists *hists = entry->hists;
+       struct perf_hpp_fmt *fmt;
+       int64_t cmp = 0;
+
+       hists__for_each_sort_list(hists, fmt) {
+               if (perf_hpp__is_dynamic_entry(fmt) &&
+                   !perf_hpp__defined_dynamic_entry(fmt, hists))
+                       continue;
+
+               cmp = fmt->cmp(fmt, entry, key);
+               if (cmp)
+                       break;
+       }
+
+       return cmp;
+}
+
+unsigned long hist_lookups;
+unsigned long hist_hits;
+unsigned long hist_misses;
+
 static struct hist_entry *hists__findnew_entry(struct hists *hists,
-                                              struct hist_entry *entry,
-                                              struct addr_location *al,
+                                              struct hist_entry_cmp_key *key,
+                                              struct hist_entry_ops *ops,
                                               bool sample_self)
 {
        struct rb_node **p;
        struct rb_node *parent = NULL;
        struct hist_entry *he;
        int64_t cmp;
-       u64 period = entry->stat.period;
-       u64 weight = entry->stat.weight;
+       u64 period = key->sample->period;
+       u64 weight = key->sample->weight;
+
+       hist_lookups++;
 
        p = &hists->entries_in->rb_node;
 
@@ -526,7 +580,7 @@ static struct hist_entry *hists__findnew_entry(struct hists 
*hists,
                 * function when searching an entry regardless which sort
                 * keys were used.
                 */
-               cmp = hist_entry__cmp(he, entry);
+               cmp = hist_entry__cmp(he, key);
 
                if (!cmp) {
                        if (sample_self) {
@@ -540,7 +594,7 @@ static struct hist_entry *hists__findnew_entry(struct hists 
*hists,
                         * This mem info was allocated from sample__resolve_mem
                         * and will not be used anymore.
                         */
-                       mem_info__zput(entry->mem_info);
+                       mem_info__zput(key->mem_info);
 
                        /* If the map of an existing hist_entry has
                         * become out-of-date due to an exec() or
@@ -548,10 +602,11 @@ static struct hist_entry *hists__findnew_entry(struct 
hists *hists,
                         * mis-adjust symbol addresses when computing
                         * the history counter to increment.
                         */
-                       if (he->ms.map != entry->ms.map) {
+                       if (he->ms.map != key->al->map) {
                                map__put(he->ms.map);
-                               he->ms.map = map__get(entry->ms.map);
+                               he->ms.map = map__get(key->al->map);
                        }
+                       hist_hits++;
                        goto out;
                }
 
@@ -561,7 +616,8 @@ static struct hist_entry *hists__findnew_entry(struct hists 
*hists,
                        p = &(*p)->rb_right;
        }
 
-       he = hist_entry__new(entry, sample_self);
+       hist_misses++;
+       he = hist_entry__new(key, hists, ops, sample_self);
        if (!he)
                return NULL;
 
@@ -573,9 +629,9 @@ static struct hist_entry *hists__findnew_entry(struct hists 
*hists,
        rb_insert_color(&he->rb_node_in, hists->entries_in);
 out:
        if (sample_self)
-               he_stat__add_cpumode_period(&he->stat, al->cpumode, period);
+               he_stat__add_cpumode_period(&he->stat, key->al->cpumode, 
period);
        if (symbol_conf.cumulate_callchain)
-               he_stat__add_cpumode_period(he->stat_acc, al->cpumode, period);
+               he_stat__add_cpumode_period(he->stat_acc, key->al->cpumode, 
period);
        return he;
 }
 
@@ -589,39 +645,19 @@ __hists__add_entry(struct hists *hists,
                   bool sample_self,
                   struct hist_entry_ops *ops)
 {
-       struct namespaces *ns = thread__namespaces(al->thread);
-       struct hist_entry entry = {
-               .thread = al->thread,
-               .comm = thread__comm(al->thread),
-               .cgroup_id = {
-                       .dev = ns ? ns->link_info[CGROUP_NS_INDEX].dev : 0,
-                       .ino = ns ? ns->link_info[CGROUP_NS_INDEX].ino : 0,
-               },
-               .ms = {
-                       .map    = al->map,
-                       .sym    = al->sym,
-               },
-               .srcline = al->srcline ? strdup(al->srcline) : NULL,
-               .socket  = al->socket,
-               .cpu     = al->cpu,
-               .cpumode = al->cpumode,
-               .ip      = al->addr,
-               .level   = al->level,
-               .stat = {
-                       .nr_events = 1,
-                       .period = sample->period,
-                       .weight = sample->weight,
-               },
-               .parent = sym_parent,
-               .filtered = symbol__parent_filter(sym_parent) | al->filtered,
-               .hists  = hists,
-               .branch_info = bi,
-               .mem_info = mi,
-               .transaction = sample->transaction,
-               .raw_data = sample->raw_data,
-               .raw_size = sample->raw_size,
-               .ops = ops,
-       }, *he = hists__findnew_entry(hists, &entry, al, sample_self);
+       struct hist_entry_cmp_key key;
+       struct hist_entry *he;
+
+       key.al = al;
+       key.comm = thread__comm(al->thread);
+       key.bi = bi;
+       key.sym_parent = sym_parent;
+       key.sample = sample;
+       key.mem_info = mi;
+       key.srcfile = NULL;
+       key.trace_output = NULL;
+
+       he = hists__findnew_entry(hists, &key, ops, sample_self);
 
        if (!hists->has_callchains && he && he->callchain_size != 0)
                hists->has_callchains = true;
@@ -947,7 +983,9 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
        struct perf_evsel *evsel = iter->evsel;
        struct perf_sample *sample = iter->sample;
        struct hist_entry **he_cache = iter->priv;
+       struct hist_entry_cmp_key key;
        struct hist_entry *he;
+#if 0
        struct hist_entry he_tmp = {
                .hists = evsel__hists(evsel),
                .cpu = al->cpu,
@@ -963,6 +1001,7 @@ iter_add_next_cumulative_entry(struct hist_entry_iter 
*iter,
                .raw_data = sample->raw_data,
                .raw_size = sample->raw_size,
        };
+#endif
        int i;
        struct callchain_cursor cursor;
 
@@ -974,8 +1013,16 @@ iter_add_next_cumulative_entry(struct hist_entry_iter 
*iter,
         * Check if there's duplicate entries in the callchain.
         * It's possible that it has cycles or recursive calls.
         */
+       key.al = al;
+       key.comm = thread__comm(al->thread);
+       key.bi = NULL;
+       key.sym_parent = iter->parent;
+       key.sample = sample;
+       key.mem_info = NULL;
+       key.srcfile = NULL;
+       key.trace_output = NULL;
        for (i = 0; i < iter->curr; i++) {
-               if (hist_entry__cmp(he_cache[i], &he_tmp) == 0) {
+               if (hist_entry__cmp(he_cache[i], &key) == 0) {
                        /* to avoid calling callback function */
                        iter->he = NULL;
                        return 0;
@@ -1088,26 +1135,6 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, 
struct addr_location *al,
 }
 
 int64_t
-hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
-{
-       struct hists *hists = left->hists;
-       struct perf_hpp_fmt *fmt;
-       int64_t cmp = 0;
-
-       hists__for_each_sort_list(hists, fmt) {
-               if (perf_hpp__is_dynamic_entry(fmt) &&
-                   !perf_hpp__defined_dynamic_entry(fmt, hists))
-                       continue;
-
-               cmp = fmt->cmp(fmt, left, right);
-               if (cmp)
-                       break;
-       }
-
-       return cmp;
-}
-
-int64_t
 hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
 {
        struct hists *hists = left->hists;
@@ -1312,7 +1339,11 @@ static struct hist_entry *hierarchy_insert_entry(struct 
hists *hists,
                        p = &parent->rb_right;
        }
 
-       new = hist_entry__new(he, true);
+#if 1
+       new = NULL;
+#else
+       new = hist_entry__new(he, true); /* XXX fix XXX */
+#endif
        if (new == NULL)
                return NULL;
 
@@ -2168,7 +2199,11 @@ static struct hist_entry *hists__add_dummy_entry(struct 
hists *hists,
                        p = &(*p)->rb_right;
        }
 
-       he = hist_entry__new(pair, true);
+#if 1
+       he = NULL;
+#else
+       he = hist_entry__new(pair, true); /* XXX fix XXX */
+#endif
        if (he) {
                memset(&he->stat, 0, sizeof(he->stat));
                he->hists = hists;
@@ -2213,7 +2248,11 @@ static struct hist_entry 
*add_dummy_hierarchy_entry(struct hists *hists,
                        p = &parent->rb_right;
        }
 
-       he = hist_entry__new(pair, true);
+#if 1
+       he = NULL;
+#else
+       he = hist_entry__new(pair, true); /* XXX fix XXX */
+#endif
        if (he) {
                rb_link_node(&he->rb_node_in, parent, p);
                rb_insert_color(&he->rb_node_in, root);
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index fe3dfaa..a3d66e1 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -372,8 +372,15 @@ HPP_RAW_FNS(samples, nr_events)
 HPP_RAW_FNS(period, period)
 
 static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
-                           struct hist_entry *a __maybe_unused,
-                           struct hist_entry *b __maybe_unused)
+                           struct hist_entry *entry __maybe_unused,
+                           struct hist_entry_cmp_key *key __maybe_unused)
+{
+       return 0;
+}
+
+static int64_t hpp__nop_collapse(struct perf_hpp_fmt *fmt __maybe_unused,
+                                struct hist_entry *a __maybe_unused,
+                                struct hist_entry *b __maybe_unused)
 {
        return 0;
 }
@@ -399,7 +406,7 @@ static bool hpp__equal(struct perf_hpp_fmt *a, struct 
perf_hpp_fmt *b)
                .color  = hpp__color_ ## _fn,           \
                .entry  = hpp__entry_ ## _fn,           \
                .cmp    = hpp__nop_cmp,                 \
-               .collapse = hpp__nop_cmp,               \
+               .collapse = hpp__nop_collapse,          \
                .sort   = hpp__sort_ ## _fn,            \
                .idx    = PERF_HPP__ ## _idx,           \
                .equal  = hpp__equal,                   \
@@ -413,7 +420,7 @@ static bool hpp__equal(struct perf_hpp_fmt *a, struct 
perf_hpp_fmt *b)
                .color  = hpp__color_ ## _fn,           \
                .entry  = hpp__entry_ ## _fn,           \
                .cmp    = hpp__nop_cmp,                 \
-               .collapse = hpp__nop_cmp,               \
+               .collapse = hpp__nop_collapse,          \
                .sort   = hpp__sort_ ## _fn,            \
                .idx    = PERF_HPP__ ## _idx,           \
                .equal  = hpp__equal,                   \
@@ -426,7 +433,7 @@ static bool hpp__equal(struct perf_hpp_fmt *a, struct 
perf_hpp_fmt *b)
                .width  = hpp__width_fn,                \
                .entry  = hpp__entry_ ## _fn,           \
                .cmp    = hpp__nop_cmp,                 \
-               .collapse = hpp__nop_cmp,               \
+               .collapse = hpp__nop_collapse,          \
                .sort   = hpp__sort_ ## _fn,            \
                .idx    = PERF_HPP__ ## _idx,           \
                .equal  = hpp__equal,                   \
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index f3aa9d0..190f5eb 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -1717,12 +1717,13 @@ static int c2c_se_entry(struct perf_hpp_fmt *fmt, 
struct perf_hpp *hpp,
 }
 
 static int64_t c2c_se_cmp(struct perf_hpp_fmt *fmt,
-                         struct hist_entry *a, struct hist_entry *b)
+                         struct hist_entry *entry,
+                         struct hist_entry_cmp_key *key)
 {
        struct c2c_fmt *c2c_fmt = container_of(fmt, struct c2c_fmt, fmt);
        struct c2c_dimension *dim = c2c_fmt->dim;
 
-       return dim->se->se_cmp(a, b);
+       return dim->se->se_cmp_key(entry, key);
 }
 
 static int64_t c2c_se_collapse(struct perf_hpp_fmt *fmt,
@@ -1755,8 +1756,13 @@ static struct c2c_fmt *get_format(const char *name)
        INIT_LIST_HEAD(&fmt->list);
        INIT_LIST_HEAD(&fmt->sort_list);
 
+#if 1
+       fmt->cmp        = c2c_se_cmp;
+       fmt->sort       = dim->cmp;
+#else
        fmt->cmp        = dim->se ? c2c_se_cmp   : dim->cmp;
        fmt->sort       = dim->se ? c2c_se_cmp   : dim->cmp;
+#endif
        fmt->color      = dim->se ? NULL         : dim->color;
        fmt->entry      = dim->se ? c2c_se_entry : dim->entry;
        fmt->header     = c2c_header;
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 39db2ee..2684efa 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -604,8 +604,16 @@ hist_entry__cmp_compute_idx(struct hist_entry *left, 
struct hist_entry *right,
 
 static int64_t
 hist_entry__cmp_nop(struct perf_hpp_fmt *fmt __maybe_unused,
-                   struct hist_entry *left __maybe_unused,
-                   struct hist_entry *right __maybe_unused)
+                   struct hist_entry *entry __maybe_unused,
+                   struct hist_entry_cmp_key *key __maybe_unused)
+{
+       return 0;
+}
+
+static int64_t
+hist_entry__collapse_nop(struct perf_hpp_fmt *fmt __maybe_unused,
+                        struct hist_entry *a __maybe_unused,
+                        struct hist_entry *b __maybe_unused)
 {
        return 0;
 }
@@ -1141,7 +1149,7 @@ static void data__hpp_register(struct data__file *d, int 
idx)
        fmt->width  = hpp__width;
        fmt->entry  = hpp__entry_global;
        fmt->cmp    = hist_entry__cmp_nop;
-       fmt->collapse = hist_entry__cmp_nop;
+       fmt->collapse = hist_entry__collapse_nop;
 
        /* TODO more colors */
        switch (idx) {
@@ -1166,7 +1174,7 @@ static void data__hpp_register(struct data__file *d, int 
idx)
                fmt->sort  = hist_entry__cmp_delta_abs;
                break;
        default:
-               fmt->sort  = hist_entry__cmp_nop;
+               fmt->sort  = hist_entry__collapse_nop;
                break;
        }
 
@@ -1230,7 +1238,7 @@ static int ui_init(void)
        }
 
        fmt->cmp      = hist_entry__cmp_nop;
-       fmt->collapse = hist_entry__cmp_nop;
+       fmt->collapse = hist_entry__collapse_nop;
 
        switch (compute) {
        case COMPUTE_DELTA:

Reply via email to