Re: [PATCH v2 3/5] perf report: Sort by sampled cycles percent per block for stdio

2019-10-21 Thread Jin, Yao




On 10/22/2019 12:08 AM, Jiri Olsa wrote:

On Tue, Oct 15, 2019 at 01:33:48PM +0800, Jin Yao wrote:

SNIP


+   cycles += bi->cycles_aggr / bi->num_aggr;
+
+   he_block = hists__add_entry_block(>block_hists,
+ , bi);
+   if (!he_block) {
+   block_info__put(bi);
+   return -1;
+   }
+   }
+   }
+
+   if (block_cycles)
+   *block_cycles += cycles;
+
+   return 0;
+}
+
+static int resort_cb(struct hist_entry *he, void *arg __maybe_unused)
+{
+   /* Skip the calculation of column length in output_resort */
+   he->filtered = true;


that's a nasty hack ;-) together with setting it back to false just below

why do you want to skip the columns calculation? we could add those columns
to the output as well no?

jirka



The columns calculation for this case causes the crash. The current 
columns calculation requires some information but we don't provide. So I 
just want to skip the columns calculation.


OK, I will check how to avoid this nasty hack.

Thanks
Jin Yao


+   return 0;
+}
+
+static void hists__clear_filtered(struct hists *hists)
+{
+   struct rb_node *next = rb_first_cached(>entries);
+   struct hist_entry *he;
+
+   while (next) {
+   he = rb_entry(next, struct hist_entry, rb_node);
+   he->filtered = false;
+   next = rb_next(>rb_node);
+   }
+}


SNIP

jirka



Re: [PATCH v2 3/5] perf report: Sort by sampled cycles percent per block for stdio

2019-10-21 Thread Jin, Yao




On 10/22/2019 12:07 AM, Jiri Olsa wrote:

On Tue, Oct 15, 2019 at 01:33:48PM +0800, Jin Yao wrote:

SNIP


--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -51,6 +51,7 @@
  #include "util/util.h" // perf_tip()
  #include "ui/ui.h"
  #include "ui/progress.h"
+#include "util/block.h"
  
  #include 

  #include 
@@ -96,10 +97,64 @@ struct report {
float   min_percent;
u64 nr_entries;
u64 queue_size;
+   u64 cycles_count;
+   u64 block_cycles;
int socket_filter;
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
struct branch_type_stat brtype_stat;
boolsymbol_ipc;
+   booltotal_cycles;
+   struct block_hist   block_hist;
+};
+


please put all this (everything that has *block* in the name ;-) )
to the block_info.[ch] and try to reuse some of the functions in
builtin-diff.c, looks like there's some duplicity like for
init_block_hist init_block_info

I understand that report and diff interface would be different
at some point, but there seems to be many common parts

jirka



OK, got it. I will try to reuse the common parts.

Thanks
Jin Yao


+struct block_fmt {
+   struct perf_hpp_fmt fmt;
+   int idx;
+   int width;
+   const char  *header;
+   struct report   *rep;
+};
+
+enum {
+   PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_COV,
+   PERF_HPP_REPORT__BLOCK_LBR_CYCLES,
+   PERF_HPP_REPORT__BLOCK_CYCLES_PCT,
+   PERF_HPP_REPORT__BLOCK_AVG_CYCLES,
+   PERF_HPP_REPORT__BLOCK_RANGE,
+   PERF_HPP_REPORT__BLOCK_DSO,
+   PERF_HPP_REPORT__BLOCK_MAX_INDEX
+};
+
+static struct block_fmt block_fmts[PERF_HPP_REPORT__BLOCK_MAX_INDEX];
+
+static struct block_header_column{
+   const char *name;
+   int width;
+} block_columns[PERF_HPP_REPORT__BLOCK_MAX_INDEX] = {
+   [PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_COV] = {
+   .name = "Sampled Cycles%",
+   .width = 15,
+   },
+   [PERF_HPP_REPORT__BLOCK_LBR_CYCLES] = {
+   .name = "Sampled Cycles",
+   .width = 14,
+   },
+   [PERF_HPP_REPORT__BLOCK_CYCLES_PCT] = {
+   .name = "Avg Cycles%",
+   .width = 11,
+   },
+   [PERF_HPP_REPORT__BLOCK_AVG_CYCLES] = {
+   .name = "Avg Cycles",
+   .width = 10,
+   },
+   [PERF_HPP_REPORT__BLOCK_RANGE] = {
+   .name = "[Program Block Range]",
+   .width = 70,
+   },
+   [PERF_HPP_REPORT__BLOCK_DSO] = {
+   .name = "Shared Object",
+   .width = 20,
+   }
  };
  
  static int report__config(const char *var, const char *value, void *cb)

@@ -277,7 +332,8 @@ static int process_sample_event(struct perf_tool *tool,
if (!sample->branch_stack)
goto out_put;
  
-		iter.add_entry_cb = hist_iter__branch_callback;

+   if (!rep->total_cycles)
+   iter.add_entry_cb = hist_iter__branch_callback;
iter.ops = _iter_branch;
} else if (rep->mem_mode) {
iter.ops = _iter_mem;
@@ -290,9 +346,10 @@ static int process_sample_event(struct perf_tool *tool,
if (al.map != NULL)
al.map->dso->hit = 1;
  
-	if (ui__has_annotation() || rep->symbol_ipc) {

+   if (ui__has_annotation() || rep->symbol_ipc || rep->total_cycles) {
hist__account_cycles(sample->branch_stack, , sample,
-rep->nonany_branch_mode, NULL);
+rep->nonany_branch_mode,
+>cycles_count);
}
  
  	ret = hist_entry_iter__add(, , rep->max_stack, rep);

@@ -480,6 +537,349 @@ static size_t hists__fprintf_nr_sample_events(struct 
hists *hists, struct report
return ret + fprintf(fp, "\n#\n");
  }
  
+static int block_column_header(struct perf_hpp_fmt *fmt __maybe_unused,

+  struct perf_hpp *hpp __maybe_unused,
+  struct hists *hists __maybe_unused,
+  int line __maybe_unused,
+  int *span __maybe_unused)
+{
+   struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+
+   return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width,
+block_fmt->header);
+}
+
+static int block_column_width(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct perf_hpp *hpp __maybe_unused,
+ struct hists *hists __maybe_unused)
+{
+   struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+
+   return block_fmt->width;
+}
+
+static int block_cycles_cov_entry(struct perf_hpp_fmt *fmt,
+  

Re: [PATCH v2 3/5] perf report: Sort by sampled cycles percent per block for stdio

2019-10-21 Thread Jiri Olsa
On Mon, Oct 21, 2019 at 04:04:39PM +0200, Jiri Olsa wrote:
> On Mon, Oct 21, 2019 at 02:56:57PM +0800, Jin, Yao wrote:
> 
> SNIP
> 
> > > > Does it seem like what the c2c does?
> > > 
> > > well c2c has its own data output with multiline column titles,
> > > hence it has its own separate dimension stuff, but your code
> > > output is within the standard perf report right? single column
> > > output.. why couldn't you use just sort_entry ?
> > > 
> > > jirka
> > > 
> > 
> > Hi Jiri,
> > 
> > I've being thinking how to use sort_entry but I have some troubles.
> > 
> > In v2, I used "struct perf_hpp_fmt" to pass extra argument. For example,
> > 
> > static int64_t block_cycles_cov_sort(struct perf_hpp_fmt *fmt,
> >  struct hist_entry *left,
> >  struct hist_entry *right)
> > {
> > struct block_fmt *block_fmt = container_of(fmt, ...);
> > struct report *rep = block_fmt->rep;
> > ...
> > }
> > 
> > But if I just use sort_entry, I can't pass extra argument (it's not a good
> > idea to add more fields in struct hist_entry).
> > 
> > int64_t sort__xxx_sort(struct hist_entry *left,
> >struct hist_entry *right)
> > 
> > And for entry print it's similar, I can't pass extra argument in.
> > 
> > In v2,
> > static int block_cycles_pct_entry(struct perf_hpp_fmt *fmt,
> >   struct perf_hpp *hpp,
> >   struct hist_entry *he)
> > {
> > struct block_fmt *block_fmt = container_of(fmt,...);
> > struct report *rep = block_fmt->rep;
> > ...
> > }
> > 
> > But for se_snprintf, I can't pass extra argument in.
> > 
> > hist_entry__xxx_snprintf(struct hist_entry *he, char *bf,
> >  size_t size, unsigned int width)
> > 
> > That's why I feel headache for just using the sort_entry. :(
> 
> you might be right, I just want to omit another field output framework ;-) 
> I'm checking on this and will let you know if I find some way

ok, because it's based on branch data the sort entry would not be
convenient.. I'm sending some other comments for the patch

thanks,
jirka



Re: [PATCH v2 3/5] perf report: Sort by sampled cycles percent per block for stdio

2019-10-21 Thread Jiri Olsa
On Tue, Oct 15, 2019 at 01:33:48PM +0800, Jin Yao wrote:

SNIP

> --- a/tools/perf/builtin-report.c
> +++ b/tools/perf/builtin-report.c
> @@ -51,6 +51,7 @@
>  #include "util/util.h" // perf_tip()
>  #include "ui/ui.h"
>  #include "ui/progress.h"
> +#include "util/block.h"
>  
>  #include 
>  #include 
> @@ -96,10 +97,64 @@ struct report {
>   float   min_percent;
>   u64 nr_entries;
>   u64 queue_size;
> + u64 cycles_count;
> + u64 block_cycles;
>   int socket_filter;
>   DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
>   struct branch_type_stat brtype_stat;
>   boolsymbol_ipc;
> + booltotal_cycles;
> + struct block_hist   block_hist;
> +};
> +

please put all this (everything that has *block* in the name ;-) )
to the block_info.[ch] and try to reuse some of the functions in
builtin-diff.c, looks like there's some duplicity like for
init_block_hist init_block_info

I understand that report and diff interface would be different
at some point, but there seems to be many common parts

jirka

> +struct block_fmt {
> + struct perf_hpp_fmt fmt;
> + int idx;
> + int width;
> + const char  *header;
> + struct report   *rep;
> +};
> +
> +enum {
> + PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_COV,
> + PERF_HPP_REPORT__BLOCK_LBR_CYCLES,
> + PERF_HPP_REPORT__BLOCK_CYCLES_PCT,
> + PERF_HPP_REPORT__BLOCK_AVG_CYCLES,
> + PERF_HPP_REPORT__BLOCK_RANGE,
> + PERF_HPP_REPORT__BLOCK_DSO,
> + PERF_HPP_REPORT__BLOCK_MAX_INDEX
> +};
> +
> +static struct block_fmt block_fmts[PERF_HPP_REPORT__BLOCK_MAX_INDEX];
> +
> +static struct block_header_column{
> + const char *name;
> + int width;
> +} block_columns[PERF_HPP_REPORT__BLOCK_MAX_INDEX] = {
> + [PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_COV] = {
> + .name = "Sampled Cycles%",
> + .width = 15,
> + },
> + [PERF_HPP_REPORT__BLOCK_LBR_CYCLES] = {
> + .name = "Sampled Cycles",
> + .width = 14,
> + },
> + [PERF_HPP_REPORT__BLOCK_CYCLES_PCT] = {
> + .name = "Avg Cycles%",
> + .width = 11,
> + },
> + [PERF_HPP_REPORT__BLOCK_AVG_CYCLES] = {
> + .name = "Avg Cycles",
> + .width = 10,
> + },
> + [PERF_HPP_REPORT__BLOCK_RANGE] = {
> + .name = "[Program Block Range]",
> + .width = 70,
> + },
> + [PERF_HPP_REPORT__BLOCK_DSO] = {
> + .name = "Shared Object",
> + .width = 20,
> + }
>  };
>  
>  static int report__config(const char *var, const char *value, void *cb)
> @@ -277,7 +332,8 @@ static int process_sample_event(struct perf_tool *tool,
>   if (!sample->branch_stack)
>   goto out_put;
>  
> - iter.add_entry_cb = hist_iter__branch_callback;
> + if (!rep->total_cycles)
> + iter.add_entry_cb = hist_iter__branch_callback;
>   iter.ops = _iter_branch;
>   } else if (rep->mem_mode) {
>   iter.ops = _iter_mem;
> @@ -290,9 +346,10 @@ static int process_sample_event(struct perf_tool *tool,
>   if (al.map != NULL)
>   al.map->dso->hit = 1;
>  
> - if (ui__has_annotation() || rep->symbol_ipc) {
> + if (ui__has_annotation() || rep->symbol_ipc || rep->total_cycles) {
>   hist__account_cycles(sample->branch_stack, , sample,
> -  rep->nonany_branch_mode, NULL);
> +  rep->nonany_branch_mode,
> +  >cycles_count);
>   }
>  
>   ret = hist_entry_iter__add(, , rep->max_stack, rep);
> @@ -480,6 +537,349 @@ static size_t hists__fprintf_nr_sample_events(struct 
> hists *hists, struct report
>   return ret + fprintf(fp, "\n#\n");
>  }
>  
> +static int block_column_header(struct perf_hpp_fmt *fmt __maybe_unused,
> +struct perf_hpp *hpp __maybe_unused,
> +struct hists *hists __maybe_unused,
> +int line __maybe_unused,
> +int *span __maybe_unused)
> +{
> + struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
> +
> + return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width,
> +  block_fmt->header);
> +}
> +
> +static int block_column_width(struct perf_hpp_fmt *fmt __maybe_unused,
> +   struct perf_hpp *hpp __maybe_unused,
> +   struct hists *hists __maybe_unused)
> +{
> + struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
> +
> + return block_fmt->width;
> +}
> +
> +static int block_cycles_cov_entry(struct perf_hpp_fmt *fmt,
> +  

Re: [PATCH v2 3/5] perf report: Sort by sampled cycles percent per block for stdio

2019-10-21 Thread Jiri Olsa
On Tue, Oct 15, 2019 at 01:33:48PM +0800, Jin Yao wrote:

SNIP

> + cycles += bi->cycles_aggr / bi->num_aggr;
> +
> + he_block = hists__add_entry_block(>block_hists,
> +   , bi);
> + if (!he_block) {
> + block_info__put(bi);
> + return -1;
> + }
> + }
> + }
> +
> + if (block_cycles)
> + *block_cycles += cycles;
> +
> + return 0;
> +}
> +
> +static int resort_cb(struct hist_entry *he, void *arg __maybe_unused)
> +{
> + /* Skip the calculation of column length in output_resort */
> + he->filtered = true;

that's a nasty hack ;-) together with setting it back to false just below

why do you want to skip the columns calculation? we could add those columns
to the output as well no?

jirka

> + return 0;
> +}
> +
> +static void hists__clear_filtered(struct hists *hists)
> +{
> + struct rb_node *next = rb_first_cached(>entries);
> + struct hist_entry *he;
> +
> + while (next) {
> + he = rb_entry(next, struct hist_entry, rb_node);
> + he->filtered = false;
> + next = rb_next(>rb_node);
> + }
> +}

SNIP

jirka



Re: [PATCH v2 3/5] perf report: Sort by sampled cycles percent per block for stdio

2019-10-21 Thread Jiri Olsa
On Mon, Oct 21, 2019 at 02:56:57PM +0800, Jin, Yao wrote:

SNIP

> > > Does it seem like what the c2c does?
> > 
> > well c2c has its own data output with multiline column titles,
> > hence it has its own separate dimension stuff, but your code
> > output is within the standard perf report right? single column
> > output.. why couldn't you use just sort_entry ?
> > 
> > jirka
> > 
> 
> Hi Jiri,
> 
> I've being thinking how to use sort_entry but I have some troubles.
> 
> In v2, I used "struct perf_hpp_fmt" to pass extra argument. For example,
> 
> static int64_t block_cycles_cov_sort(struct perf_hpp_fmt *fmt,
>struct hist_entry *left,
>struct hist_entry *right)
> {
>   struct block_fmt *block_fmt = container_of(fmt, ...);
>   struct report *rep = block_fmt->rep;
>   ...
> }
> 
> But if I just use sort_entry, I can't pass extra argument (it's not a good
> idea to add more fields in struct hist_entry).
> 
> int64_t sort__xxx_sort(struct hist_entry *left,
>  struct hist_entry *right)
> 
> And for entry print it's similar, I can't pass extra argument in.
> 
> In v2,
> static int block_cycles_pct_entry(struct perf_hpp_fmt *fmt,
> struct perf_hpp *hpp,
> struct hist_entry *he)
> {
>   struct block_fmt *block_fmt = container_of(fmt,...);
>   struct report *rep = block_fmt->rep;
>   ...
> }
> 
> But for se_snprintf, I can't pass extra argument in.
> 
> hist_entry__xxx_snprintf(struct hist_entry *he, char *bf,
>size_t size, unsigned int width)
> 
> That's why I feel headache for just using the sort_entry. :(

you might be right, I just want to omit another field output framework ;-) 
I'm checking on this and will let you know if I find some way

jirka



Re: [PATCH v2 3/5] perf report: Sort by sampled cycles percent per block for stdio

2019-10-21 Thread Jin, Yao




On 10/16/2019 8:53 PM, Jiri Olsa wrote:

On Wed, Oct 16, 2019 at 06:51:07PM +0800, Jin, Yao wrote:



On 10/16/2019 6:15 PM, Jiri Olsa wrote:

On Tue, Oct 15, 2019 at 10:53:18PM +0800, Jin, Yao wrote:

SNIP


+static struct block_header_column{
+   const char *name;
+   int width;
+} block_columns[PERF_HPP_REPORT__BLOCK_MAX_INDEX] = {
+   [PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_COV] = {
+   .name = "Sampled Cycles%",
+   .width = 15,
+   },
+   [PERF_HPP_REPORT__BLOCK_LBR_CYCLES] = {
+   .name = "Sampled Cycles",
+   .width = 14,
+   },
+   [PERF_HPP_REPORT__BLOCK_CYCLES_PCT] = {
+   .name = "Avg Cycles%",
+   .width = 11,
+   },
+   [PERF_HPP_REPORT__BLOCK_AVG_CYCLES] = {
+   .name = "Avg Cycles",
+   .width = 10,
+   },
+   [PERF_HPP_REPORT__BLOCK_RANGE] = {
+   .name = "[Program Block Range]",
+   .width = 70,
+   },
+   [PERF_HPP_REPORT__BLOCK_DSO] = {
+   .name = "Shared Object",
+   .width = 20,
+   }
};


so we already have support for multiple columns,
why don't you add those as 'struct sort_entry' objects?



For 'struct sort_entry' objects, do you mean I should reuse the "sort_dso"
which has been implemented yet in util/sort.c?

For other columns, it looks we can't reuse the existing sort_entry objects.


I did not mean reuse, just add new sort entries
to current sort framework



Does it seem like what the c2c does?


well c2c has its own data output with multiline column titles,
hence it has its own separate dimension stuff, but your code
output is within the standard perf report right? single column
output.. why couldn't you use just sort_entry ?

jirka



Hi Jiri,

I've being thinking how to use sort_entry but I have some troubles.

In v2, I used "struct perf_hpp_fmt" to pass extra argument. For example,

static int64_t block_cycles_cov_sort(struct perf_hpp_fmt *fmt,
 struct hist_entry *left,
 struct hist_entry *right)
{
struct block_fmt *block_fmt = container_of(fmt, ...);
struct report *rep = block_fmt->rep;
...
}

But if I just use sort_entry, I can't pass extra argument (it's not a 
good idea to add more fields in struct hist_entry).


int64_t sort__xxx_sort(struct hist_entry *left,
   struct hist_entry *right)

And for entry print it's similar, I can't pass extra argument in.

In v2,
static int block_cycles_pct_entry(struct perf_hpp_fmt *fmt,
  struct perf_hpp *hpp,
  struct hist_entry *he)
{
struct block_fmt *block_fmt = container_of(fmt,...);
struct report *rep = block_fmt->rep;
...
}

But for se_snprintf, I can't pass extra argument in.

hist_entry__xxx_snprintf(struct hist_entry *he, char *bf,
 size_t size, unsigned int width)

That's why I feel headache for just using the sort_entry. :(

Thanks
Jin Yao


Re: [PATCH v2 3/5] perf report: Sort by sampled cycles percent per block for stdio

2019-10-16 Thread Jin, Yao




On 10/16/2019 8:53 PM, Jiri Olsa wrote:

On Wed, Oct 16, 2019 at 06:51:07PM +0800, Jin, Yao wrote:



On 10/16/2019 6:15 PM, Jiri Olsa wrote:

On Tue, Oct 15, 2019 at 10:53:18PM +0800, Jin, Yao wrote:

SNIP


+static struct block_header_column{
+   const char *name;
+   int width;
+} block_columns[PERF_HPP_REPORT__BLOCK_MAX_INDEX] = {
+   [PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_COV] = {
+   .name = "Sampled Cycles%",
+   .width = 15,
+   },
+   [PERF_HPP_REPORT__BLOCK_LBR_CYCLES] = {
+   .name = "Sampled Cycles",
+   .width = 14,
+   },
+   [PERF_HPP_REPORT__BLOCK_CYCLES_PCT] = {
+   .name = "Avg Cycles%",
+   .width = 11,
+   },
+   [PERF_HPP_REPORT__BLOCK_AVG_CYCLES] = {
+   .name = "Avg Cycles",
+   .width = 10,
+   },
+   [PERF_HPP_REPORT__BLOCK_RANGE] = {
+   .name = "[Program Block Range]",
+   .width = 70,
+   },
+   [PERF_HPP_REPORT__BLOCK_DSO] = {
+   .name = "Shared Object",
+   .width = 20,
+   }
};


so we already have support for multiple columns,
why don't you add those as 'struct sort_entry' objects?



For 'struct sort_entry' objects, do you mean I should reuse the "sort_dso"
which has been implemented yet in util/sort.c?

For other columns, it looks we can't reuse the existing sort_entry objects.


I did not mean reuse, just add new sort entries
to current sort framework



Does it seem like what the c2c does?


well c2c has its own data output with multiline column titles,
hence it has its own separate dimension stuff, but your code
output is within the standard perf report right? single column
output.. why couldn't you use just sort_entry ?

jirka



No, my output is probably not within standard perf report. They are
totally 6 columns but only one column ('Sampled Cycles%') needs to be 
sorted. Other columns are not sorted. They only output some information.


For sort_entry, maybe the sorted column ('Sampled Cycles%') can use 
sort_entry, but others may not need it. So I don't know if I really need 
the sort_entry for my case.


I just feel maybe I still misunderstand what you have suggested. :(

Thanks
Jin Yao


Re: [PATCH v2 3/5] perf report: Sort by sampled cycles percent per block for stdio

2019-10-16 Thread Jiri Olsa
On Wed, Oct 16, 2019 at 06:51:07PM +0800, Jin, Yao wrote:
> 
> 
> On 10/16/2019 6:15 PM, Jiri Olsa wrote:
> > On Tue, Oct 15, 2019 at 10:53:18PM +0800, Jin, Yao wrote:
> > 
> > SNIP
> > 
> > > > > +static struct block_header_column{
> > > > > + const char *name;
> > > > > + int width;
> > > > > +} block_columns[PERF_HPP_REPORT__BLOCK_MAX_INDEX] = {
> > > > > + [PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_COV] = {
> > > > > + .name = "Sampled Cycles%",
> > > > > + .width = 15,
> > > > > + },
> > > > > + [PERF_HPP_REPORT__BLOCK_LBR_CYCLES] = {
> > > > > + .name = "Sampled Cycles",
> > > > > + .width = 14,
> > > > > + },
> > > > > + [PERF_HPP_REPORT__BLOCK_CYCLES_PCT] = {
> > > > > + .name = "Avg Cycles%",
> > > > > + .width = 11,
> > > > > + },
> > > > > + [PERF_HPP_REPORT__BLOCK_AVG_CYCLES] = {
> > > > > + .name = "Avg Cycles",
> > > > > + .width = 10,
> > > > > + },
> > > > > + [PERF_HPP_REPORT__BLOCK_RANGE] = {
> > > > > + .name = "[Program Block Range]",
> > > > > + .width = 70,
> > > > > + },
> > > > > + [PERF_HPP_REPORT__BLOCK_DSO] = {
> > > > > + .name = "Shared Object",
> > > > > + .width = 20,
> > > > > + }
> > > > >};
> > > > 
> > > > so we already have support for multiple columns,
> > > > why don't you add those as 'struct sort_entry' objects?
> > > > 
> > > 
> > > For 'struct sort_entry' objects, do you mean I should reuse the "sort_dso"
> > > which has been implemented yet in util/sort.c?
> > > 
> > > For other columns, it looks we can't reuse the existing sort_entry 
> > > objects.
> > 
> > I did not mean reuse, just add new sort entries
> > to current sort framework
> > 
> 
> Does it seem like what the c2c does?

well c2c has its own data output with multiline column titles,
hence it has its own separate dimension stuff, but your code
output is within the standard perf report right? single column
output.. why couldn't you use just sort_entry ?

jirka


Re: [PATCH v2 3/5] perf report: Sort by sampled cycles percent per block for stdio

2019-10-16 Thread Jin, Yao




On 10/16/2019 6:15 PM, Jiri Olsa wrote:

On Tue, Oct 15, 2019 at 10:53:18PM +0800, Jin, Yao wrote:

SNIP


+static struct block_header_column{
+   const char *name;
+   int width;
+} block_columns[PERF_HPP_REPORT__BLOCK_MAX_INDEX] = {
+   [PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_COV] = {
+   .name = "Sampled Cycles%",
+   .width = 15,
+   },
+   [PERF_HPP_REPORT__BLOCK_LBR_CYCLES] = {
+   .name = "Sampled Cycles",
+   .width = 14,
+   },
+   [PERF_HPP_REPORT__BLOCK_CYCLES_PCT] = {
+   .name = "Avg Cycles%",
+   .width = 11,
+   },
+   [PERF_HPP_REPORT__BLOCK_AVG_CYCLES] = {
+   .name = "Avg Cycles",
+   .width = 10,
+   },
+   [PERF_HPP_REPORT__BLOCK_RANGE] = {
+   .name = "[Program Block Range]",
+   .width = 70,
+   },
+   [PERF_HPP_REPORT__BLOCK_DSO] = {
+   .name = "Shared Object",
+   .width = 20,
+   }
   };


so we already have support for multiple columns,
why don't you add those as 'struct sort_entry' objects?



For 'struct sort_entry' objects, do you mean I should reuse the "sort_dso"
which has been implemented yet in util/sort.c?

For other columns, it looks we can't reuse the existing sort_entry objects.


I did not mean reuse, just add new sort entries
to current sort framework



Does it seem like what the c2c does?

For example, my new update is like:

struct block_dimension {
   const char  *header;
   int idx;
   int width;
   struct sort_entry   *se;
   int64_t (*cmp)(struct perf_hpp_fmt *,
  struct hist_entry *, struct hist_entry *);
   int64_t (*sort)(struct perf_hpp_fmt *,
   struct hist_entry *, struct hist_entry *);
   int   (*entry)(struct perf_hpp_fmt *, struct perf_hpp *,
  struct hist_entry *);
};

struct block_fmt {
   struct perf_hpp_fmt fmt;
   struct report   *rep;
   struct block_dimension  *dim;
};

static struct block_dimension dim_total_cycles_pct {
   .header = "Sampled Cycles%",
   .idx = PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT,
   .width = 15,
   .cmp = block_info__cmp;
   .sort = block_cycles_cov_sort;
   .entry = block_cycles_cov_entry;
};

..

Is above new update correct?




SNIP


+{
+   struct block_hist *bh = >block_hist;
+
+   get_block_hists(hists, bh, rep);
+   symbol_conf.report_individual_block = true;
+   hists__fprintf(>block_hists, true, 0, 0, 0,
+  stdout, true);
+   hists__delete_entries(>block_hists);
+   return 0;
+}
+
   static int perf_evlist__tty_browse_hists(struct evlist *evlist,
 struct report *rep,
 const char *help)
@@ -500,6 +900,12 @@ static int perf_evlist__tty_browse_hists(struct evlist 
*evlist,
continue;
hists__fprintf_nr_sample_events(hists, rep, evname, stdout);
+
+   if (rep->total_cycles) {
+   hists__fprintf_all_blocks(hists, rep);


so this call kicks all the block info setup/count/print, right?



Yes, all in this call.


I thingk it shouldn't be in the output code, but in the code before..
from what I see you could count block_info counts during the sample
processing, no?



In sample processing, we just get all symbols and account the cycles per
symbol. We need to create/count the block_info at some points after the
sample processing.


understand, but it needs to be outside display function



OK, I will move the code for block_info collection outside of display 
function.



also, can't you gather the block_info data gradually
during the sample processing?



Looks we have to gather the block_info after the sample processing. 
That's because in each sample processing, we will call 
hist__account_cycles(). Then finally __symbol__account_cycles() gets 
called for accounting one basic block in this symbol.


ch[offset].num_aggr++;
ch[offset].cycles_aggr += cycles;

So actually, after the sample processing, we will get the num_aggr and 
cycles_aggr for this basic block and compute the average cycles for this 
block (cycles_aggr / num_aggr).


That's why I think we should gather the block_info after the sample 
processing.


Thanks
Jin Yao


jirka



Maybe it's not very good to put block info setup/count/print in a call, but
it's really not easy to process the block_info during the sample processing.

Thanks
Jin Yao


jirka



Re: [PATCH v2 3/5] perf report: Sort by sampled cycles percent per block for stdio

2019-10-16 Thread Jiri Olsa
On Tue, Oct 15, 2019 at 10:53:18PM +0800, Jin, Yao wrote:

SNIP

> > > +static struct block_header_column{
> > > + const char *name;
> > > + int width;
> > > +} block_columns[PERF_HPP_REPORT__BLOCK_MAX_INDEX] = {
> > > + [PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_COV] = {
> > > + .name = "Sampled Cycles%",
> > > + .width = 15,
> > > + },
> > > + [PERF_HPP_REPORT__BLOCK_LBR_CYCLES] = {
> > > + .name = "Sampled Cycles",
> > > + .width = 14,
> > > + },
> > > + [PERF_HPP_REPORT__BLOCK_CYCLES_PCT] = {
> > > + .name = "Avg Cycles%",
> > > + .width = 11,
> > > + },
> > > + [PERF_HPP_REPORT__BLOCK_AVG_CYCLES] = {
> > > + .name = "Avg Cycles",
> > > + .width = 10,
> > > + },
> > > + [PERF_HPP_REPORT__BLOCK_RANGE] = {
> > > + .name = "[Program Block Range]",
> > > + .width = 70,
> > > + },
> > > + [PERF_HPP_REPORT__BLOCK_DSO] = {
> > > + .name = "Shared Object",
> > > + .width = 20,
> > > + }
> > >   };
> > 
> > so we already have support for multiple columns,
> > why don't you add those as 'struct sort_entry' objects?
> > 
> 
> For 'struct sort_entry' objects, do you mean I should reuse the "sort_dso"
> which has been implemented yet in util/sort.c?
> 
> For other columns, it looks we can't reuse the existing sort_entry objects.

I did not mean reuse, just add new sort entries
to current sort framework

> 
> > SNIP
> > 
> > > +{
> > > + struct block_hist *bh = >block_hist;
> > > +
> > > + get_block_hists(hists, bh, rep);
> > > + symbol_conf.report_individual_block = true;
> > > + hists__fprintf(>block_hists, true, 0, 0, 0,
> > > +stdout, true);
> > > + hists__delete_entries(>block_hists);
> > > + return 0;
> > > +}
> > > +
> > >   static int perf_evlist__tty_browse_hists(struct evlist *evlist,
> > >struct report *rep,
> > >const char *help)
> > > @@ -500,6 +900,12 @@ static int perf_evlist__tty_browse_hists(struct 
> > > evlist *evlist,
> > >   continue;
> > >   hists__fprintf_nr_sample_events(hists, rep, evname, 
> > > stdout);
> > > +
> > > + if (rep->total_cycles) {
> > > + hists__fprintf_all_blocks(hists, rep);
> > 
> > so this call kicks all the block info setup/count/print, right?
> > 
> 
> Yes, all in this call.
> 
> > I thingk it shouldn't be in the output code, but in the code before..
> > from what I see you could count block_info counts during the sample
> > processing, no?
> > 
> 
> In sample processing, we just get all symbols and account the cycles per
> symbol. We need to create/count the block_info at some points after the
> sample processing.

understand, but it needs to be outside display function

also, can't you gather the block_info data gradually
during the sample processing?

jirka

> 
> Maybe it's not very good to put block info setup/count/print in a call, but
> it's really not easy to process the block_info during the sample processing.
> 
> Thanks
> Jin Yao
> 
> > jirka
> > 


Re: [PATCH v2 3/5] perf report: Sort by sampled cycles percent per block for stdio

2019-10-15 Thread Jin, Yao




On 10/15/2019 4:41 PM, Jiri Olsa wrote:

On Tue, Oct 15, 2019 at 01:33:48PM +0800, Jin Yao wrote:

SNIP


+enum {
+   PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_COV,
+   PERF_HPP_REPORT__BLOCK_LBR_CYCLES,
+   PERF_HPP_REPORT__BLOCK_CYCLES_PCT,
+   PERF_HPP_REPORT__BLOCK_AVG_CYCLES,
+   PERF_HPP_REPORT__BLOCK_RANGE,
+   PERF_HPP_REPORT__BLOCK_DSO,
+   PERF_HPP_REPORT__BLOCK_MAX_INDEX
+};
+
+static struct block_fmt block_fmts[PERF_HPP_REPORT__BLOCK_MAX_INDEX];
+
+static struct block_header_column{
+   const char *name;
+   int width;
+} block_columns[PERF_HPP_REPORT__BLOCK_MAX_INDEX] = {
+   [PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_COV] = {
+   .name = "Sampled Cycles%",
+   .width = 15,
+   },
+   [PERF_HPP_REPORT__BLOCK_LBR_CYCLES] = {
+   .name = "Sampled Cycles",
+   .width = 14,
+   },
+   [PERF_HPP_REPORT__BLOCK_CYCLES_PCT] = {
+   .name = "Avg Cycles%",
+   .width = 11,
+   },
+   [PERF_HPP_REPORT__BLOCK_AVG_CYCLES] = {
+   .name = "Avg Cycles",
+   .width = 10,
+   },
+   [PERF_HPP_REPORT__BLOCK_RANGE] = {
+   .name = "[Program Block Range]",
+   .width = 70,
+   },
+   [PERF_HPP_REPORT__BLOCK_DSO] = {
+   .name = "Shared Object",
+   .width = 20,
+   }
  };


so we already have support for multiple columns,
why don't you add those as 'struct sort_entry' objects?



For 'struct sort_entry' objects, do you mean I should reuse the 
"sort_dso" which has been implemented yet in util/sort.c?


For other columns, it looks we can't reuse the existing sort_entry objects.


SNIP


+{
+   struct block_hist *bh = >block_hist;
+
+   get_block_hists(hists, bh, rep);
+   symbol_conf.report_individual_block = true;
+   hists__fprintf(>block_hists, true, 0, 0, 0,
+  stdout, true);
+   hists__delete_entries(>block_hists);
+   return 0;
+}
+
  static int perf_evlist__tty_browse_hists(struct evlist *evlist,
 struct report *rep,
 const char *help)
@@ -500,6 +900,12 @@ static int perf_evlist__tty_browse_hists(struct evlist 
*evlist,
continue;
  
  		hists__fprintf_nr_sample_events(hists, rep, evname, stdout);

+
+   if (rep->total_cycles) {
+   hists__fprintf_all_blocks(hists, rep);


so this call kicks all the block info setup/count/print, right?



Yes, all in this call.


I thingk it shouldn't be in the output code, but in the code before..
from what I see you could count block_info counts during the sample
processing, no?



In sample processing, we just get all symbols and account the cycles per 
symbol. We need to create/count the block_info at some points after the 
sample processing.


Maybe it's not very good to put block info setup/count/print in a call, 
but it's really not easy to process the block_info during the sample 
processing.


Thanks
Jin Yao


jirka



Re: [PATCH v2 3/5] perf report: Sort by sampled cycles percent per block for stdio

2019-10-15 Thread Jiri Olsa
On Tue, Oct 15, 2019 at 01:33:48PM +0800, Jin Yao wrote:

SNIP

> +enum {
> + PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_COV,
> + PERF_HPP_REPORT__BLOCK_LBR_CYCLES,
> + PERF_HPP_REPORT__BLOCK_CYCLES_PCT,
> + PERF_HPP_REPORT__BLOCK_AVG_CYCLES,
> + PERF_HPP_REPORT__BLOCK_RANGE,
> + PERF_HPP_REPORT__BLOCK_DSO,
> + PERF_HPP_REPORT__BLOCK_MAX_INDEX
> +};
> +
> +static struct block_fmt block_fmts[PERF_HPP_REPORT__BLOCK_MAX_INDEX];
> +
> +static struct block_header_column{
> + const char *name;
> + int width;
> +} block_columns[PERF_HPP_REPORT__BLOCK_MAX_INDEX] = {
> + [PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_COV] = {
> + .name = "Sampled Cycles%",
> + .width = 15,
> + },
> + [PERF_HPP_REPORT__BLOCK_LBR_CYCLES] = {
> + .name = "Sampled Cycles",
> + .width = 14,
> + },
> + [PERF_HPP_REPORT__BLOCK_CYCLES_PCT] = {
> + .name = "Avg Cycles%",
> + .width = 11,
> + },
> + [PERF_HPP_REPORT__BLOCK_AVG_CYCLES] = {
> + .name = "Avg Cycles",
> + .width = 10,
> + },
> + [PERF_HPP_REPORT__BLOCK_RANGE] = {
> + .name = "[Program Block Range]",
> + .width = 70,
> + },
> + [PERF_HPP_REPORT__BLOCK_DSO] = {
> + .name = "Shared Object",
> + .width = 20,
> + }
>  };

so we already have support for multiple columns,
why don't you add those as 'struct sort_entry' objects?

SNIP

> +{
> + struct block_hist *bh = >block_hist;
> +
> + get_block_hists(hists, bh, rep);
> + symbol_conf.report_individual_block = true;
> + hists__fprintf(>block_hists, true, 0, 0, 0,
> +stdout, true);
> + hists__delete_entries(>block_hists);
> + return 0;
> +}
> +
>  static int perf_evlist__tty_browse_hists(struct evlist *evlist,
>struct report *rep,
>const char *help)
> @@ -500,6 +900,12 @@ static int perf_evlist__tty_browse_hists(struct evlist 
> *evlist,
>   continue;
>  
>   hists__fprintf_nr_sample_events(hists, rep, evname, stdout);
> +
> + if (rep->total_cycles) {
> + hists__fprintf_all_blocks(hists, rep);

so this call kicks all the block info setup/count/print, right?

I thingk it shouldn't be in the output code, but in the code before..
from what I see you could count block_info counts during the sample
processing, no?

jirka