Re: [RFC] Add --show-total-period for perf annotate

2015-06-01 Thread Martin Liška
On 05/29/2015 05:55 PM, Andi Kleen wrote:
> Martin Liška  writes:
>>
>> Following patch sums samples that belong to a line in assembly language.
>> What do you think about it, would it be acceptable solution?
> 
> Basic patch looks good to me now. Thanks.
> 
> The only ugly thing is the global variable, perhaps that could be
> cleaned up.
> 
> -Andi
> 

Hello.

You are right, the global variable is not needed anymore as we do not
count any fraction related to samples related to a line in assembly language.

Please look at attached patch.

Thank you,
Martin
>From 285596ea221e1b6bb2fb8cb06ade6ae5f04a09a0 Mon Sep 17 00:00:00 2001
From: mliska 
Date: Wed, 27 May 2015 10:54:42 +0200
Subject: [PATCH] perf annotate: With --show-total-period, display total # of
 samples.

To compare two records on an instruction base, with --show-total-period
option provided, display total number of samples that belong to a line
in assembly language.

Signed-off-by: Martin Liska 
---
 tools/perf/builtin-annotate.c |  2 ++
 tools/perf/ui/browsers/annotate.c | 42 ---
 tools/perf/util/annotate.c| 28 --
 tools/perf/util/annotate.h|  3 ++-
 4 files changed, 52 insertions(+), 23 deletions(-)

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index b57a027..cce19d6 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -326,6 +326,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
 		   "objdump binary to use for disassembly and annotations"),
 	OPT_BOOLEAN(0, "group", _conf.event_group,
 		"Show event group information together"),
+	OPT_BOOLEAN(0, "show-total-period", _conf.show_total_period,
+		"Show a column with the sum of periods"),
 	OPT_END()
 	};
 	int ret = hists__init();
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index e5250eb..5c92fd5 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -11,16 +11,21 @@
 #include "../../util/evsel.h"
 #include 
 
+struct percent_tuple {
+double		percent;
+double		samples;
+};
+
 struct browser_disasm_line {
-	struct rb_node	rb_node;
-	u32		idx;
-	int		idx_asm;
-	int		jump_sources;
+	struct rb_node		rb_node;
+	u32			idx;
+	int			idx_asm;
+	int			jump_sources;
 	/*
 	 * actual length of this array is saved on the nr_events field
 	 * of the struct annotate_browser
 	 */
-	double		percent[1];
+	struct percent_tupletuples[1];
 };
 
 static struct annotate_browser_opt {
@@ -105,15 +110,18 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 	char bf[256];
 
 	for (i = 0; i < ab->nr_events; i++) {
-		if (bdl->percent[i] > percent_max)
-			percent_max = bdl->percent[i];
+		if (bdl->tuples[i].percent > percent_max)
+			percent_max = bdl->tuples[i].percent;
 	}
 
 	if (dl->offset != -1 && percent_max != 0.0) {
 		for (i = 0; i < ab->nr_events; i++) {
-			ui_browser__set_percent_color(browser, bdl->percent[i],
+			ui_browser__set_percent_color(browser, bdl->tuples[i].percent,
 		  current_entry);
-			slsmg_printf("%6.2f ", bdl->percent[i]);
+			if (symbol_conf.show_total_period)
+slsmg_printf("%7.0f ", bdl->tuples[i].samples);
+			else
+slsmg_printf("%6.2f ", bdl->tuples[i].percent);
 		}
 	} else {
 		ui_browser__set_percent_color(browser, 0, current_entry);
@@ -273,9 +281,9 @@ static int disasm__cmp(struct browser_disasm_line *a,
 	int i;
 
 	for (i = 0; i < nr_pcnt; i++) {
-		if (a->percent[i] == b->percent[i])
+		if (a->tuples[i].percent == b->tuples[i].percent)
 			continue;
-		return a->percent[i] < b->percent[i];
+		return a->tuples[i].percent < b->tuples[i].percent;
 	}
 	return 0;
 }
@@ -366,14 +374,16 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser,
 		next = disasm__get_next_ip_line(>src->source, pos);
 
 		for (i = 0; i < browser->nr_events; i++) {
-			bpos->percent[i] = disasm__calc_percent(notes,
+			double samples;
+			bpos->tuples[i].percent = disasm__calc_percent(notes,
 		evsel->idx + i,
 		pos->offset,
 		next ? next->offset : len,
-	);
+	, );
+			bpos->tuples[i].samples = samples;
 
-			if (max_percent < bpos->percent[i])
-max_percent = bpos->percent[i];
+			if (max_percent < bpos->tuples[i].percent)
+max_percent = bpos->tuples[i].percent;
 		}
 
 		if (max_percent < 0.01) {
@@ -925,7 +935,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 
 	if (perf_evsel__is_group_event(evsel)) {
 		nr_pcnt = evsel->nr_members;
-		sizeof_bdl += sizeof(double) * (nr_pcnt - 1);
+		sizeof_bdl += sizeof(struct percent_tuple) * (nr_pcnt - 1);
 	}
 
 	if (symbol__annotate(sym, map, sizeof_bdl) < 0) {
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 7f5bdfc..797ce62 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -647,10 +647,11 @@ struct disasm_line 

Re: [RFC] Add --show-total-period for perf annotate

2015-06-01 Thread Martin Liška
On 05/29/2015 05:55 PM, Andi Kleen wrote:
 Martin Liška mli...@suse.cz writes:

 Following patch sums samples that belong to a line in assembly language.
 What do you think about it, would it be acceptable solution?
 
 Basic patch looks good to me now. Thanks.
 
 The only ugly thing is the global variable, perhaps that could be
 cleaned up.
 
 -Andi
 

Hello.

You are right, the global variable is not needed anymore as we do not
count any fraction related to samples related to a line in assembly language.

Please look at attached patch.

Thank you,
Martin
From 285596ea221e1b6bb2fb8cb06ade6ae5f04a09a0 Mon Sep 17 00:00:00 2001
From: mliska mli...@suse.cz
Date: Wed, 27 May 2015 10:54:42 +0200
Subject: [PATCH] perf annotate: With --show-total-period, display total # of
 samples.

To compare two records on an instruction base, with --show-total-period
option provided, display total number of samples that belong to a line
in assembly language.

Signed-off-by: Martin Liska mli...@suse.cz
---
 tools/perf/builtin-annotate.c |  2 ++
 tools/perf/ui/browsers/annotate.c | 42 ---
 tools/perf/util/annotate.c| 28 --
 tools/perf/util/annotate.h|  3 ++-
 4 files changed, 52 insertions(+), 23 deletions(-)

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index b57a027..cce19d6 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -326,6 +326,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
 		   objdump binary to use for disassembly and annotations),
 	OPT_BOOLEAN(0, group, symbol_conf.event_group,
 		Show event group information together),
+	OPT_BOOLEAN(0, show-total-period, symbol_conf.show_total_period,
+		Show a column with the sum of periods),
 	OPT_END()
 	};
 	int ret = hists__init();
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index e5250eb..5c92fd5 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -11,16 +11,21 @@
 #include ../../util/evsel.h
 #include pthread.h
 
+struct percent_tuple {
+double		percent;
+double		samples;
+};
+
 struct browser_disasm_line {
-	struct rb_node	rb_node;
-	u32		idx;
-	int		idx_asm;
-	int		jump_sources;
+	struct rb_node		rb_node;
+	u32			idx;
+	int			idx_asm;
+	int			jump_sources;
 	/*
 	 * actual length of this array is saved on the nr_events field
 	 * of the struct annotate_browser
 	 */
-	double		percent[1];
+	struct percent_tupletuples[1];
 };
 
 static struct annotate_browser_opt {
@@ -105,15 +110,18 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 	char bf[256];
 
 	for (i = 0; i  ab-nr_events; i++) {
-		if (bdl-percent[i]  percent_max)
-			percent_max = bdl-percent[i];
+		if (bdl-tuples[i].percent  percent_max)
+			percent_max = bdl-tuples[i].percent;
 	}
 
 	if (dl-offset != -1  percent_max != 0.0) {
 		for (i = 0; i  ab-nr_events; i++) {
-			ui_browser__set_percent_color(browser, bdl-percent[i],
+			ui_browser__set_percent_color(browser, bdl-tuples[i].percent,
 		  current_entry);
-			slsmg_printf(%6.2f , bdl-percent[i]);
+			if (symbol_conf.show_total_period)
+slsmg_printf(%7.0f , bdl-tuples[i].samples);
+			else
+slsmg_printf(%6.2f , bdl-tuples[i].percent);
 		}
 	} else {
 		ui_browser__set_percent_color(browser, 0, current_entry);
@@ -273,9 +281,9 @@ static int disasm__cmp(struct browser_disasm_line *a,
 	int i;
 
 	for (i = 0; i  nr_pcnt; i++) {
-		if (a-percent[i] == b-percent[i])
+		if (a-tuples[i].percent == b-tuples[i].percent)
 			continue;
-		return a-percent[i]  b-percent[i];
+		return a-tuples[i].percent  b-tuples[i].percent;
 	}
 	return 0;
 }
@@ -366,14 +374,16 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser,
 		next = disasm__get_next_ip_line(notes-src-source, pos);
 
 		for (i = 0; i  browser-nr_events; i++) {
-			bpos-percent[i] = disasm__calc_percent(notes,
+			double samples;
+			bpos-tuples[i].percent = disasm__calc_percent(notes,
 		evsel-idx + i,
 		pos-offset,
 		next ? next-offset : len,
-	path);
+	path, samples);
+			bpos-tuples[i].samples = samples;
 
-			if (max_percent  bpos-percent[i])
-max_percent = bpos-percent[i];
+			if (max_percent  bpos-tuples[i].percent)
+max_percent = bpos-tuples[i].percent;
 		}
 
 		if (max_percent  0.01) {
@@ -925,7 +935,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 
 	if (perf_evsel__is_group_event(evsel)) {
 		nr_pcnt = evsel-nr_members;
-		sizeof_bdl += sizeof(double) * (nr_pcnt - 1);
+		sizeof_bdl += sizeof(struct percent_tuple) * (nr_pcnt - 1);
 	}
 
 	if (symbol__annotate(sym, map, sizeof_bdl)  0) {
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 7f5bdfc..797ce62 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -647,10 +647,11 @@ struct disasm_line 

Re: [RFC] Add --show-total-period for perf annotate

2015-05-29 Thread Andi Kleen
Martin Liška  writes:
>
> Following patch sums samples that belong to a line in assembly language.
> What do you think about it, would it be acceptable solution?

Basic patch looks good to me now. Thanks.

The only ugly thing is the global variable, perhaps that could be
cleaned up.

-Andi

-- 
a...@linux.intel.com -- Speaking for myself only
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] Add --show-total-period for perf annotate

2015-05-29 Thread Martin Liška

On 05/27/2015 04:04 PM, Andi Kleen wrote:

If I understand correctly, is it just about division of the number of events
related to an instruction and total number of events?


Yes that's it. The information is already displayed in the main view
(or more fine grained if you do --sort ...,srcline; unfortunately not
sorted in this case)

-Andi


Hi.

Following patch sums samples that belong to a line in assembly language.
What do you think about it, would it be acceptable solution?

Thanks,
Martin


--
To unsubscribe from this list: send the line "unsubscribe linux-perf-users" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


>From 0435d25155315f30f2040d5adff3b5f2ca825089 Mon Sep 17 00:00:00 2001
From: mliska 
Date: Wed, 27 May 2015 10:54:42 +0200
Subject: [PATCH] perf annotate: With --show-total-period, display total # of
 samples.

To compare two records on an instruction base, with --show-total-period
option provided, display total number of samples that belong to a line
in assembly language.

Signed-off-by: Martin Liska 
---
 tools/perf/builtin-annotate.c |  5 -
 tools/perf/perf.h |  2 ++
 tools/perf/ui/browsers/annotate.c | 42 ---
 tools/perf/util/annotate.c| 28 --
 tools/perf/util/annotate.h|  3 ++-
 5 files changed, 56 insertions(+), 24 deletions(-)

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index b57a027..38fc304 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -46,6 +46,8 @@ struct perf_annotate {
 	DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
 };
 
+unsigned long long total_nr_samples;
+
 static int perf_evsel__add_sample(struct perf_evsel *evsel,
   struct perf_sample *sample __maybe_unused,
   struct addr_location *al,
@@ -193,7 +195,6 @@ static int __cmd_annotate(struct perf_annotate *ann)
 	int ret;
 	struct perf_session *session = ann->session;
 	struct perf_evsel *pos;
-	u64 total_nr_samples;
 
 	machines__set_symbol_filter(>machines, symbol__annotate_init);
 
@@ -326,6 +327,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
 		   "objdump binary to use for disassembly and annotations"),
 	OPT_BOOLEAN(0, "group", _conf.event_group,
 		"Show event group information together"),
+	OPT_BOOLEAN(0, "show-total-period", _conf.show_total_period,
+		"Show a column with the sum of periods"),
 	OPT_END()
 	};
 	int ret = hists__init();
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index aa79fb8..9b629fc 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -35,6 +35,8 @@ extern const char *input_name;
 extern bool perf_host, perf_guest;
 extern const char perf_version_string[];
 
+extern unsigned long long total_nr_samples;
+
 void pthread__unblock_sigwinch(void);
 
 #include "util/target.h"
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index e5250eb..5c92fd5 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -11,16 +11,21 @@
 #include "../../util/evsel.h"
 #include 
 
+struct percent_tuple {
+double		percent;
+double		samples;
+};
+
 struct browser_disasm_line {
-	struct rb_node	rb_node;
-	u32		idx;
-	int		idx_asm;
-	int		jump_sources;
+	struct rb_node		rb_node;
+	u32			idx;
+	int			idx_asm;
+	int			jump_sources;
 	/*
 	 * actual length of this array is saved on the nr_events field
 	 * of the struct annotate_browser
 	 */
-	double		percent[1];
+	struct percent_tupletuples[1];
 };
 
 static struct annotate_browser_opt {
@@ -105,15 +110,18 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 	char bf[256];
 
 	for (i = 0; i < ab->nr_events; i++) {
-		if (bdl->percent[i] > percent_max)
-			percent_max = bdl->percent[i];
+		if (bdl->tuples[i].percent > percent_max)
+			percent_max = bdl->tuples[i].percent;
 	}
 
 	if (dl->offset != -1 && percent_max != 0.0) {
 		for (i = 0; i < ab->nr_events; i++) {
-			ui_browser__set_percent_color(browser, bdl->percent[i],
+			ui_browser__set_percent_color(browser, bdl->tuples[i].percent,
 		  current_entry);
-			slsmg_printf("%6.2f ", bdl->percent[i]);
+			if (symbol_conf.show_total_period)
+slsmg_printf("%7.0f ", bdl->tuples[i].samples);
+			else
+slsmg_printf("%6.2f ", bdl->tuples[i].percent);
 		}
 	} else {
 		ui_browser__set_percent_color(browser, 0, current_entry);
@@ -273,9 +281,9 @@ static int disasm__cmp(struct browser_disasm_line *a,
 	int i;
 
 	for (i = 0; i < nr_pcnt; i++) {
-		if (a->percent[i] == b->percent[i])
+		if (a->tuples[i].percent == b->tuples[i].percent)
 			continue;
-		return a->percent[i] < b->percent[i];
+		return a->tuples[i].percent < b->tuples[i].percent;
 	}
 	return 0;
 }
@@ -366,14 +374,16 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser,
 		next = 

Re: [RFC] Add --show-total-period for perf annotate

2015-05-29 Thread Martin Liška

On 05/27/2015 04:04 PM, Andi Kleen wrote:

If I understand correctly, is it just about division of the number of events
related to an instruction and total number of events?


Yes that's it. The information is already displayed in the main view
(or more fine grained if you do --sort ...,srcline; unfortunately not
sorted in this case)

-Andi


Hi.

Following patch sums samples that belong to a line in assembly language.
What do you think about it, would it be acceptable solution?

Thanks,
Martin


--
To unsubscribe from this list: send the line unsubscribe linux-perf-users in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


From 0435d25155315f30f2040d5adff3b5f2ca825089 Mon Sep 17 00:00:00 2001
From: mliska mli...@suse.cz
Date: Wed, 27 May 2015 10:54:42 +0200
Subject: [PATCH] perf annotate: With --show-total-period, display total # of
 samples.

To compare two records on an instruction base, with --show-total-period
option provided, display total number of samples that belong to a line
in assembly language.

Signed-off-by: Martin Liska mli...@suse.cz
---
 tools/perf/builtin-annotate.c |  5 -
 tools/perf/perf.h |  2 ++
 tools/perf/ui/browsers/annotate.c | 42 ---
 tools/perf/util/annotate.c| 28 --
 tools/perf/util/annotate.h|  3 ++-
 5 files changed, 56 insertions(+), 24 deletions(-)

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index b57a027..38fc304 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -46,6 +46,8 @@ struct perf_annotate {
 	DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
 };
 
+unsigned long long total_nr_samples;
+
 static int perf_evsel__add_sample(struct perf_evsel *evsel,
   struct perf_sample *sample __maybe_unused,
   struct addr_location *al,
@@ -193,7 +195,6 @@ static int __cmd_annotate(struct perf_annotate *ann)
 	int ret;
 	struct perf_session *session = ann-session;
 	struct perf_evsel *pos;
-	u64 total_nr_samples;
 
 	machines__set_symbol_filter(session-machines, symbol__annotate_init);
 
@@ -326,6 +327,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
 		   objdump binary to use for disassembly and annotations),
 	OPT_BOOLEAN(0, group, symbol_conf.event_group,
 		Show event group information together),
+	OPT_BOOLEAN(0, show-total-period, symbol_conf.show_total_period,
+		Show a column with the sum of periods),
 	OPT_END()
 	};
 	int ret = hists__init();
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index aa79fb8..9b629fc 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -35,6 +35,8 @@ extern const char *input_name;
 extern bool perf_host, perf_guest;
 extern const char perf_version_string[];
 
+extern unsigned long long total_nr_samples;
+
 void pthread__unblock_sigwinch(void);
 
 #include util/target.h
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index e5250eb..5c92fd5 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -11,16 +11,21 @@
 #include ../../util/evsel.h
 #include pthread.h
 
+struct percent_tuple {
+double		percent;
+double		samples;
+};
+
 struct browser_disasm_line {
-	struct rb_node	rb_node;
-	u32		idx;
-	int		idx_asm;
-	int		jump_sources;
+	struct rb_node		rb_node;
+	u32			idx;
+	int			idx_asm;
+	int			jump_sources;
 	/*
 	 * actual length of this array is saved on the nr_events field
 	 * of the struct annotate_browser
 	 */
-	double		percent[1];
+	struct percent_tupletuples[1];
 };
 
 static struct annotate_browser_opt {
@@ -105,15 +110,18 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 	char bf[256];
 
 	for (i = 0; i  ab-nr_events; i++) {
-		if (bdl-percent[i]  percent_max)
-			percent_max = bdl-percent[i];
+		if (bdl-tuples[i].percent  percent_max)
+			percent_max = bdl-tuples[i].percent;
 	}
 
 	if (dl-offset != -1  percent_max != 0.0) {
 		for (i = 0; i  ab-nr_events; i++) {
-			ui_browser__set_percent_color(browser, bdl-percent[i],
+			ui_browser__set_percent_color(browser, bdl-tuples[i].percent,
 		  current_entry);
-			slsmg_printf(%6.2f , bdl-percent[i]);
+			if (symbol_conf.show_total_period)
+slsmg_printf(%7.0f , bdl-tuples[i].samples);
+			else
+slsmg_printf(%6.2f , bdl-tuples[i].percent);
 		}
 	} else {
 		ui_browser__set_percent_color(browser, 0, current_entry);
@@ -273,9 +281,9 @@ static int disasm__cmp(struct browser_disasm_line *a,
 	int i;
 
 	for (i = 0; i  nr_pcnt; i++) {
-		if (a-percent[i] == b-percent[i])
+		if (a-tuples[i].percent == b-tuples[i].percent)
 			continue;
-		return a-percent[i]  b-percent[i];
+		return a-tuples[i].percent  b-tuples[i].percent;
 	}
 	return 0;
 }
@@ -366,14 +374,16 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser,
 		next = 

Re: [RFC] Add --show-total-period for perf annotate

2015-05-29 Thread Andi Kleen
Martin Liška mli...@suse.cz writes:

 Following patch sums samples that belong to a line in assembly language.
 What do you think about it, would it be acceptable solution?

Basic patch looks good to me now. Thanks.

The only ugly thing is the global variable, perhaps that could be
cleaned up.

-Andi

-- 
a...@linux.intel.com -- Speaking for myself only
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] Add --show-total-period for perf annotate

2015-05-27 Thread Andi Kleen
> If I understand correctly, is it just about division of the number of events
> related to an instruction and total number of events?

Yes that's it. The information is already displayed in the main view
(or more fine grained if you do --sort ...,srcline; unfortunately not
sorted in this case)

-Andi
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] Add --show-total-period for perf annotate

2015-05-27 Thread Martin Liška

On 05/27/2015 10:46 AM, Martin Liška wrote:

On 05/26/2015 07:03 PM, Andi Kleen wrote:

Anyway, attached patch is capable of displaying milliseconds approximation for 
each instruction.


You realize that the events perf is not counting do not directly map to
wall time? Even if you count cycles, the cycles are either stopping in idle
or changing unit as the CPU's frequencies change. For other events the
relationship is even more remote, think what happens when counting cache or
TLB misses.

Also even if it was mapping to time somehow, it's just a hit, not a
duration, so it cannot say how long a individual instruction took.

So you cannot map a sample event to time.

To do what you want you would need to use something like processor
trace, which can do exact accounting.

I think the only thing that makes sense is to account it relative to
the event counts.

-Andi



Hello Andi.

I realize all aspects and capabilities of perf infrastructure. Even though
these numbers are not precise, I helped me a lot with debugging of a benchmark
which heavily utilizes a single CPU and runs in magnitude of seconds.

Ok, so let's convert the patch to feature that we can map an instruction
to a percentage number of events (cycles) it takes.

If I understand correctly, is it just about division of the number of events
related to an instruction and total number of events?

Thanks,
Martin

--
To unsubscribe from this list: send the line "unsubscribe linux-perf-users" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Hi.

Sample output to verify that we have the same idea in mind:

$ perf annotate --show-total-period

 Disassembly of section .text:
 :
 :  00038890 :
 :  __random_r():
1695 :38890:   test   %rdi,%rdi
   0 :38893:   je 38918 
   1 :38899:   test   %rsi,%rsi
   0 :3889c:   je 38918 
   9 :3889e:   mov0x18(%rdi),%eax
1833 :388a1:   mov0x10(%rdi),%rdx
   2 :388a5:   test   %eax,%eax
   0 :388a7:   je 388f8 
 168 :388a9:   mov(%rdi),%rcx
   8 :388ac:   mov0x8(%rdi),%r8
1325 :388b0:   mov0x28(%rdi),%r9

Where:
$ perf report | head

# To display the perf.data header info, please use --header/--header-only 
options.
#
# Samples: 44K of event 'cycles'
# Event count (approx.): 42988831618

Thank for ideas,
Martin

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] Add --show-total-period for perf annotate

2015-05-27 Thread Martin Liška

On 05/26/2015 07:03 PM, Andi Kleen wrote:

Anyway, attached patch is capable of displaying milliseconds approximation for 
each instruction.


You realize that the events perf is not counting do not directly map to
wall time? Even if you count cycles, the cycles are either stopping in idle
or changing unit as the CPU's frequencies change. For other events the
relationship is even more remote, think what happens when counting cache or
TLB misses.

Also even if it was mapping to time somehow, it's just a hit, not a
duration, so it cannot say how long a individual instruction took.

So you cannot map a sample event to time.

To do what you want you would need to use something like processor
trace, which can do exact accounting.

I think the only thing that makes sense is to account it relative to
the event counts.

-Andi



Hello Andi.

I realize all aspects and capabilities of perf infrastructure. Even though
these numbers are not precise, I helped me a lot with debugging of a benchmark
which heavily utilizes a single CPU and runs in magnitude of seconds.

Ok, so let's convert the patch to feature that we can map an instruction
to a percentage number of events (cycles) it takes.

If I understand correctly, is it just about division of the number of events
related to an instruction and total number of events?

Thanks,
Martin

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] Add --show-total-period for perf annotate

2015-05-27 Thread Martin Liška

On 05/27/2015 10:46 AM, Martin Liška wrote:

On 05/26/2015 07:03 PM, Andi Kleen wrote:

Anyway, attached patch is capable of displaying milliseconds approximation for 
each instruction.


You realize that the events perf is not counting do not directly map to
wall time? Even if you count cycles, the cycles are either stopping in idle
or changing unit as the CPU's frequencies change. For other events the
relationship is even more remote, think what happens when counting cache or
TLB misses.

Also even if it was mapping to time somehow, it's just a hit, not a
duration, so it cannot say how long a individual instruction took.

So you cannot map a sample event to time.

To do what you want you would need to use something like processor
trace, which can do exact accounting.

I think the only thing that makes sense is to account it relative to
the event counts.

-Andi



Hello Andi.

I realize all aspects and capabilities of perf infrastructure. Even though
these numbers are not precise, I helped me a lot with debugging of a benchmark
which heavily utilizes a single CPU and runs in magnitude of seconds.

Ok, so let's convert the patch to feature that we can map an instruction
to a percentage number of events (cycles) it takes.

If I understand correctly, is it just about division of the number of events
related to an instruction and total number of events?

Thanks,
Martin

--
To unsubscribe from this list: send the line unsubscribe linux-perf-users in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Hi.

Sample output to verify that we have the same idea in mind:

$ perf annotate --show-total-period

 Disassembly of section .text:
 :
 :  00038890 random_r:
 :  __random_r():
1695 :38890:   test   %rdi,%rdi
   0 :38893:   je 38918 random_r+0x88
   1 :38899:   test   %rsi,%rsi
   0 :3889c:   je 38918 random_r+0x88
   9 :3889e:   mov0x18(%rdi),%eax
1833 :388a1:   mov0x10(%rdi),%rdx
   2 :388a5:   test   %eax,%eax
   0 :388a7:   je 388f8 random_r+0x68
 168 :388a9:   mov(%rdi),%rcx
   8 :388ac:   mov0x8(%rdi),%r8
1325 :388b0:   mov0x28(%rdi),%r9

Where:
$ perf report | head

# To display the perf.data header info, please use --header/--header-only 
options.
#
# Samples: 44K of event 'cycles'
# Event count (approx.): 42988831618

Thank for ideas,
Martin

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] Add --show-total-period for perf annotate

2015-05-27 Thread Martin Liška

On 05/26/2015 07:03 PM, Andi Kleen wrote:

Anyway, attached patch is capable of displaying milliseconds approximation for 
each instruction.


You realize that the events perf is not counting do not directly map to
wall time? Even if you count cycles, the cycles are either stopping in idle
or changing unit as the CPU's frequencies change. For other events the
relationship is even more remote, think what happens when counting cache or
TLB misses.

Also even if it was mapping to time somehow, it's just a hit, not a
duration, so it cannot say how long a individual instruction took.

So you cannot map a sample event to time.

To do what you want you would need to use something like processor
trace, which can do exact accounting.

I think the only thing that makes sense is to account it relative to
the event counts.

-Andi



Hello Andi.

I realize all aspects and capabilities of perf infrastructure. Even though
these numbers are not precise, I helped me a lot with debugging of a benchmark
which heavily utilizes a single CPU and runs in magnitude of seconds.

Ok, so let's convert the patch to feature that we can map an instruction
to a percentage number of events (cycles) it takes.

If I understand correctly, is it just about division of the number of events
related to an instruction and total number of events?

Thanks,
Martin

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] Add --show-total-period for perf annotate

2015-05-27 Thread Andi Kleen
 If I understand correctly, is it just about division of the number of events
 related to an instruction and total number of events?

Yes that's it. The information is already displayed in the main view
(or more fine grained if you do --sort ...,srcline; unfortunately not
sorted in this case)

-Andi
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] Add --show-total-period for perf annotate

2015-05-26 Thread Andi Kleen
> Anyway, attached patch is capable of displaying milliseconds approximation 
> for each instruction.

You realize that the events perf is not counting do not directly map to
wall time? Even if you count cycles, the cycles are either stopping in idle
or changing unit as the CPU's frequencies change. For other events the
relationship is even more remote, think what happens when counting cache or
TLB misses.

Also even if it was mapping to time somehow, it's just a hit, not a
duration, so it cannot say how long a individual instruction took.

So you cannot map a sample event to time.

To do what you want you would need to use something like processor
trace, which can do exact accounting.

I think the only thing that makes sense is to account it relative to 
the event counts.

-Andi
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] Add --show-total-period for perf annotate

2015-05-26 Thread Martin Liška

On 05/25/2015 05:14 PM, Andi Kleen wrote:

perf diff does not handle this? Especially with the differential
profiling options it should.


It does not work if you, in my case, compare ICC and GCC, where ICC uses a 
different mangling
scheme for fortran modules. Moreover, situation can be more complicated if a 
compiler performs
a bit different inlining decisions.


I suppose it could be enhanced with an input file that describes
equivalent functions. But yes wouldn't work for inlining.


Good point. Can you please help me how to compute a function percentage usage 
in perf annotate ;) ?


I wouldn't use time at all. Just sum up periods and then compute the
percentage. The period sum computation already happens in the main view, and
is displayed there. So you only need to save that value somewhere and
then use it in the annotate display for another column.

# Samples: 24  of event 'cycles'
# Event count (approx.): 8856637

-Andi



Hello.

Are you talking about summing cycles and compute global percentage for each 
isntruction?

Anyway, attached patch is capable of displaying milliseconds approximation for 
each instruction.

Example:

time ./perf record ./a.out
[ perf record: Woken up 7 times to write data ]
[ perf record: Captured and wrote 1.743 MB perf.data (45386 samples) ]

real0m11.465s
user0m11.424s
sys 0m0.045s

$ perf report --stdio

32.69%  a.outlibc-2.19.so   [.] __random_r
26.66%  a.outlibc-2.19.so   [.] __random
12.55%  a.outa.out  [.] foo
10.14%  a.outa.out  [.] bar
 9.27%  a.outa.out  [.] baz
 7.22%  a.outlibc-2.19.so   [.] rand
 1.37%  a.outa.out  [.] rand@plt

$ perf annotate --stdio

 Percent |  Source code & Disassembly of libc-2.19.so for cycles

 :
 :
 :
 :  Disassembly of section .text:
 :
 :  00038890 :
 :  __random_r():
   11.12 :38890:   test   %rdi,%rdi


$ perf annotate --stdio --show-total-period
 Percent |  Source code & Disassembly of libc-2.19.so for cycles

 :
 :
 :
 :  Disassembly of section .text:
 :
 :  00038890 :
 :  __random_r():
 413 :38890:   test   %rdi,%rdi

First `test` instruction: 11465*0.3269*0.1112 ~ 413ms.

Thanks for suggestions,
Martin


diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index b57a027..cce19d6 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -326,6 +326,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
 		   "objdump binary to use for disassembly and annotations"),
 	OPT_BOOLEAN(0, "group", _conf.event_group,
 		"Show event group information together"),
+	OPT_BOOLEAN(0, "show-total-period", _conf.show_total_period,
+		"Show a column with the sum of periods"),
 	OPT_END()
 	};
 	int ret = hists__init();
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 5dfe913..dfd0c8c 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -484,6 +484,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 	struct perf_session *session;
 	bool disabled = false, draining = false;
 	int fd;
+	unsigned long long t0, t1;
 
 	rec->progname = argv[0];
 
@@ -623,6 +624,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 	if (!target__none(>target) && !opts->initial_delay)
 		perf_evlist__enable(rec->evlist);
 
+	t0 = rdclock();
+
 	/*
 	 * Let the child rip
 	 */
@@ -692,6 +695,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 		goto out_child;
 	}
 
+	t1 = rdclock();
+	walltime_nsecs = t1 - t0;
+
 	if (!quiet)
 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
 
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index aa79fb8..0acdf4c 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -35,6 +35,8 @@ extern const char *input_name;
 extern bool perf_host, perf_guest;
 extern const char perf_version_string[];
 
+extern unsigned long long walltime_nsecs;
+
 void pthread__unblock_sigwinch(void);
 
 #include "util/target.h"
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index e5250eb..e7af8ec 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -113,7 +113,12 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 		for (i = 0; i < ab->nr_events; i++) {
 			ui_browser__set_percent_color(browser, bdl->percent[i],
 		  current_entry);
-			slsmg_printf("%6.2f ", bdl->percent[i]);
+
+			if (symbol_conf.show_total_period)
+			  slsmg_printf("%7.0f ", 

Re: [RFC] Add --show-total-period for perf annotate

2015-05-26 Thread Martin Liška

On 05/25/2015 05:14 PM, Andi Kleen wrote:

perf diff does not handle this? Especially with the differential
profiling options it should.


It does not work if you, in my case, compare ICC and GCC, where ICC uses a 
different mangling
scheme for fortran modules. Moreover, situation can be more complicated if a 
compiler performs
a bit different inlining decisions.


I suppose it could be enhanced with an input file that describes
equivalent functions. But yes wouldn't work for inlining.


Good point. Can you please help me how to compute a function percentage usage 
in perf annotate ;) ?


I wouldn't use time at all. Just sum up periods and then compute the
percentage. The period sum computation already happens in the main view, and
is displayed there. So you only need to save that value somewhere and
then use it in the annotate display for another column.

# Samples: 24  of event 'cycles'
# Event count (approx.): 8856637

-Andi



Hello.

Are you talking about summing cycles and compute global percentage for each 
isntruction?

Anyway, attached patch is capable of displaying milliseconds approximation for 
each instruction.

Example:

time ./perf record ./a.out
[ perf record: Woken up 7 times to write data ]
[ perf record: Captured and wrote 1.743 MB perf.data (45386 samples) ]

real0m11.465s
user0m11.424s
sys 0m0.045s

$ perf report --stdio

32.69%  a.outlibc-2.19.so   [.] __random_r
26.66%  a.outlibc-2.19.so   [.] __random
12.55%  a.outa.out  [.] foo
10.14%  a.outa.out  [.] bar
 9.27%  a.outa.out  [.] baz
 7.22%  a.outlibc-2.19.so   [.] rand
 1.37%  a.outa.out  [.] rand@plt

$ perf annotate --stdio

 Percent |  Source code  Disassembly of libc-2.19.so for cycles

 :
 :
 :
 :  Disassembly of section .text:
 :
 :  00038890 random_r:
 :  __random_r():
   11.12 :38890:   test   %rdi,%rdi


$ perf annotate --stdio --show-total-period
 Percent |  Source code  Disassembly of libc-2.19.so for cycles

 :
 :
 :
 :  Disassembly of section .text:
 :
 :  00038890 random_r:
 :  __random_r():
 413 :38890:   test   %rdi,%rdi

First `test` instruction: 11465*0.3269*0.1112 ~ 413ms.

Thanks for suggestions,
Martin


diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index b57a027..cce19d6 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -326,6 +326,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
 		   objdump binary to use for disassembly and annotations),
 	OPT_BOOLEAN(0, group, symbol_conf.event_group,
 		Show event group information together),
+	OPT_BOOLEAN(0, show-total-period, symbol_conf.show_total_period,
+		Show a column with the sum of periods),
 	OPT_END()
 	};
 	int ret = hists__init();
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 5dfe913..dfd0c8c 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -484,6 +484,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 	struct perf_session *session;
 	bool disabled = false, draining = false;
 	int fd;
+	unsigned long long t0, t1;
 
 	rec-progname = argv[0];
 
@@ -623,6 +624,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 	if (!target__none(opts-target)  !opts-initial_delay)
 		perf_evlist__enable(rec-evlist);
 
+	t0 = rdclock();
+
 	/*
 	 * Let the child rip
 	 */
@@ -692,6 +695,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 		goto out_child;
 	}
 
+	t1 = rdclock();
+	walltime_nsecs = t1 - t0;
+
 	if (!quiet)
 		fprintf(stderr, [ perf record: Woken up %ld times to write data ]\n, waking);
 
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index aa79fb8..0acdf4c 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -35,6 +35,8 @@ extern const char *input_name;
 extern bool perf_host, perf_guest;
 extern const char perf_version_string[];
 
+extern unsigned long long walltime_nsecs;
+
 void pthread__unblock_sigwinch(void);
 
 #include util/target.h
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index e5250eb..e7af8ec 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -113,7 +113,12 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 		for (i = 0; i  ab-nr_events; i++) {
 			ui_browser__set_percent_color(browser, bdl-percent[i],
 		  current_entry);
-			slsmg_printf(%6.2f , bdl-percent[i]);
+
+			if (symbol_conf.show_total_period)
+			  slsmg_printf(%7.0f , 

Re: [RFC] Add --show-total-period for perf annotate

2015-05-26 Thread Andi Kleen
 Anyway, attached patch is capable of displaying milliseconds approximation 
 for each instruction.

You realize that the events perf is not counting do not directly map to
wall time? Even if you count cycles, the cycles are either stopping in idle
or changing unit as the CPU's frequencies change. For other events the
relationship is even more remote, think what happens when counting cache or
TLB misses.

Also even if it was mapping to time somehow, it's just a hit, not a
duration, so it cannot say how long a individual instruction took.

So you cannot map a sample event to time.

To do what you want you would need to use something like processor
trace, which can do exact accounting.

I think the only thing that makes sense is to account it relative to 
the event counts.

-Andi
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] Add --show-total-period for perf annotate

2015-05-25 Thread Andi Kleen
> >perf diff does not handle this? Especially with the differential
> >profiling options it should.
> 
> It does not work if you, in my case, compare ICC and GCC, where ICC uses a 
> different mangling
> scheme for fortran modules. Moreover, situation can be more complicated if a 
> compiler performs
> a bit different inlining decisions.

I suppose it could be enhanced with an input file that describes
equivalent functions. But yes wouldn't work for inlining.

> Good point. Can you please help me how to compute a function percentage usage 
> in perf annotate ;) ?

I wouldn't use time at all. Just sum up periods and then compute the
percentage. The period sum computation already happens in the main view, and
is displayed there. So you only need to save that value somewhere and
then use it in the annotate display for another column.

# Samples: 24  of event 'cycles'
# Event count (approx.): 8856637

-Andi

-- 
a...@linux.intel.com -- Speaking for myself only.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] Add --show-total-period for perf annotate

2015-05-25 Thread Martin Liška

On 05/23/2015 06:08 AM, Andi Kleen wrote:

Martin Liška  writes:


I've been working on a new feature for perf annotate, which should be able to 
annotate
instructions with total spent time (compared to percentage usage).

Let's consider following use-case. You want to compare two different compilers
on the same code base and let's assume 90% of wall-time is spent in a single 
function.
Moreover, let's say that these compilers produce assembly of a totally 
different size.

In such case, it's very useful to get an approximation of spent time on a bunch 
of instructions,
which can be compared among other compilers. Otherwise, one has to somehow sum 
percentages and compare
it to size of a function.


perf diff does not handle this? Especially with the differential
profiling options it should.


It does not work if you, in my case, compare ICC and GCC, where ICC uses a 
different mangling
scheme for fortran modules. Moreover, situation can be more complicated if a 
compiler performs
a bit different inlining decisions.




@@ -623,6 +624,8 @@ static int __cmd_record(struct record *rec, int argc, const 
char **argv)

if (!target__none(>target) && !opts->initial_delay)
perf_evlist__enable(rec->evlist);

+   t0 = rdclock();
+
/*
 * Let the child rip
 */
@@ -692,6 +695,9 @@ static int __cmd_record(struct record *rec, int argc, const 
char **argv)
goto out_child;
}

+   t1 = rdclock();
+   walltime_nsecs = t1 - t0;


The walltime can be later computed by the difference of the first and
the last time stamp after sorting the events. So you don't need the new header.

-Andi



Good point. Can you please help me how to compute a function percentage usage 
in perf annotate ;) ?

Thanks,
Martin


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] Add --show-total-period for perf annotate

2015-05-25 Thread Martin Liška

On 05/23/2015 06:08 AM, Andi Kleen wrote:

Martin Liška mli...@suse.cz writes:


I've been working on a new feature for perf annotate, which should be able to 
annotate
instructions with total spent time (compared to percentage usage).

Let's consider following use-case. You want to compare two different compilers
on the same code base and let's assume 90% of wall-time is spent in a single 
function.
Moreover, let's say that these compilers produce assembly of a totally 
different size.

In such case, it's very useful to get an approximation of spent time on a bunch 
of instructions,
which can be compared among other compilers. Otherwise, one has to somehow sum 
percentages and compare
it to size of a function.


perf diff does not handle this? Especially with the differential
profiling options it should.


It does not work if you, in my case, compare ICC and GCC, where ICC uses a 
different mangling
scheme for fortran modules. Moreover, situation can be more complicated if a 
compiler performs
a bit different inlining decisions.




@@ -623,6 +624,8 @@ static int __cmd_record(struct record *rec, int argc, const 
char **argv)

if (!target__none(opts-target)  !opts-initial_delay)
perf_evlist__enable(rec-evlist);

+   t0 = rdclock();
+
/*
 * Let the child rip
 */
@@ -692,6 +695,9 @@ static int __cmd_record(struct record *rec, int argc, const 
char **argv)
goto out_child;
}

+   t1 = rdclock();
+   walltime_nsecs = t1 - t0;


The walltime can be later computed by the difference of the first and
the last time stamp after sorting the events. So you don't need the new header.

-Andi



Good point. Can you please help me how to compute a function percentage usage 
in perf annotate ;) ?

Thanks,
Martin


--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] Add --show-total-period for perf annotate

2015-05-25 Thread Andi Kleen
 perf diff does not handle this? Especially with the differential
 profiling options it should.
 
 It does not work if you, in my case, compare ICC and GCC, where ICC uses a 
 different mangling
 scheme for fortran modules. Moreover, situation can be more complicated if a 
 compiler performs
 a bit different inlining decisions.

I suppose it could be enhanced with an input file that describes
equivalent functions. But yes wouldn't work for inlining.

 Good point. Can you please help me how to compute a function percentage usage 
 in perf annotate ;) ?

I wouldn't use time at all. Just sum up periods and then compute the
percentage. The period sum computation already happens in the main view, and
is displayed there. So you only need to save that value somewhere and
then use it in the annotate display for another column.

# Samples: 24  of event 'cycles'
# Event count (approx.): 8856637

-Andi

-- 
a...@linux.intel.com -- Speaking for myself only.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] Add --show-total-period for perf annotate

2015-05-22 Thread Andi Kleen
Martin Liška  writes:

> I've been working on a new feature for perf annotate, which should be able to 
> annotate
> instructions with total spent time (compared to percentage usage).
>
> Let's consider following use-case. You want to compare two different compilers
> on the same code base and let's assume 90% of wall-time is spent in a single 
> function.
> Moreover, let's say that these compilers produce assembly of a totally 
> different size.
>
> In such case, it's very useful to get an approximation of spent time on a 
> bunch of instructions,
> which can be compared among other compilers. Otherwise, one has to somehow 
> sum percentages and compare
> it to size of a function.

perf diff does not handle this? Especially with the differential
profiling options it should.

>> @@ -623,6 +624,8 @@ static int __cmd_record(struct record *rec, int argc, 
>> const char **argv)
>   if (!target__none(>target) && !opts->initial_delay)
>   perf_evlist__enable(rec->evlist);
>  
> + t0 = rdclock();
> +
>   /*
>* Let the child rip
>*/
> @@ -692,6 +695,9 @@ static int __cmd_record(struct record *rec, int argc, 
> const char **argv)
>   goto out_child;
>   }
>  
> + t1 = rdclock();
> + walltime_nsecs = t1 - t0;

The walltime can be later computed by the difference of the first and
the last time stamp after sorting the events. So you don't need the new header.

-Andi

-- 
a...@linux.intel.com -- Speaking for myself only
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC] Add --show-total-period for perf annotate

2015-05-22 Thread Martin Liška

Hello.

I've been working on a new feature for perf annotate, which should be able to 
annotate
instructions with total spent time (compared to percentage usage).

Let's consider following use-case. You want to compare two different compilers
on the same code base and let's assume 90% of wall-time is spent in a single 
function.
Moreover, let's say that these compilers produce assembly of a totally 
different size.

In such case, it's very useful to get an approximation of spent time on a bunch 
of instructions,
which can be compared among other compilers. Otherwise, one has to somehow sum 
percentages and compare
it to size of a function.

Mail contains my experimental patch.

However, last missing thing is that I need to calculate portion of cycles a 
function
utilizes. May I ask you for help as perf annotate does not count these portions.

Thanks,
Martin
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index b57a027..cce19d6 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -326,6 +326,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
 		   "objdump binary to use for disassembly and annotations"),
 	OPT_BOOLEAN(0, "group", _conf.event_group,
 		"Show event group information together"),
+	OPT_BOOLEAN(0, "show-total-period", _conf.show_total_period,
+		"Show a column with the sum of periods"),
 	OPT_END()
 	};
 	int ret = hists__init();
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 5dfe913..dfd0c8c 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -484,6 +484,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 	struct perf_session *session;
 	bool disabled = false, draining = false;
 	int fd;
+	unsigned long long t0, t1;
 
 	rec->progname = argv[0];
 
@@ -623,6 +624,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 	if (!target__none(>target) && !opts->initial_delay)
 		perf_evlist__enable(rec->evlist);
 
+	t0 = rdclock();
+
 	/*
 	 * Let the child rip
 	 */
@@ -692,6 +695,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 		goto out_child;
 	}
 
+	t1 = rdclock();
+	walltime_nsecs = t1 - t0;
+
 	if (!quiet)
 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
 
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index aa79fb8..0acdf4c 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -35,6 +35,8 @@ extern const char *input_name;
 extern bool perf_host, perf_guest;
 extern const char perf_version_string[];
 
+extern unsigned long long walltime_nsecs;
+
 void pthread__unblock_sigwinch(void);
 
 #include "util/target.h"
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index e5250eb..e7af8ec 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -113,7 +113,12 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 		for (i = 0; i < ab->nr_events; i++) {
 			ui_browser__set_percent_color(browser, bdl->percent[i],
 		  current_entry);
-			slsmg_printf("%6.2f ", bdl->percent[i]);
+
+			if (symbol_conf.show_total_period)
+			  slsmg_printf("%7.0f ", bdl->percent[i] *
+   walltime_nsecs / (100 * 1e6));
+			else
+			  slsmg_printf("%7.2f ", bdl->percent[i]);
 		}
 	} else {
 		ui_browser__set_percent_color(browser, 0, current_entry);
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 7f5bdfc..263e1bf 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -753,7 +753,12 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
 		for (i = 0; i < nr_percent; i++) {
 			percent = ppercents[i];
 			color = get_percent_color(percent);
-			color_fprintf(stdout, color, " %7.2f", percent);
+
+			if (symbol_conf.show_total_period)
+			  color_fprintf(stdout, color, " %7.0f",
+	percent * walltime_nsecs / (100 * 1e6));
+			else
+			  color_fprintf(stdout, color, " %7.2f", percent);
 		}
 
 		printf(" :	");
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 3f0d809..a1d254b 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -47,6 +47,8 @@ struct perf_file_attr {
 	struct perf_file_section	ids;
 };
 
+unsigned long long walltime_nsecs;
+
 void perf_header__set_feat(struct perf_header *header, int feat)
 {
 	set_bit(feat, header->adds_features);
@@ -883,6 +885,14 @@ static int write_auxtrace(int fd, struct perf_header *h,
 	return err;
 }
 
+static int write_total_wall_time(int fd, struct perf_header *h __maybe_unused,
+			   struct perf_evlist *evlist __maybe_unused)
+{
+	do_write (fd, _nsecs, sizeof (walltime_nsecs));
+	return 0;
+}
+
+
 static void print_hostname(struct perf_header *ph, int fd __maybe_unused,
 			   FILE *fp)
 {
@@ -1171,6 +1181,13 @@ static void print_auxtrace(struct perf_header *ph __maybe_unused,
 	

Re: [RFC] Add --show-total-period for perf annotate

2015-05-22 Thread Andi Kleen
Martin Liška mli...@suse.cz writes:

 I've been working on a new feature for perf annotate, which should be able to 
 annotate
 instructions with total spent time (compared to percentage usage).

 Let's consider following use-case. You want to compare two different compilers
 on the same code base and let's assume 90% of wall-time is spent in a single 
 function.
 Moreover, let's say that these compilers produce assembly of a totally 
 different size.

 In such case, it's very useful to get an approximation of spent time on a 
 bunch of instructions,
 which can be compared among other compilers. Otherwise, one has to somehow 
 sum percentages and compare
 it to size of a function.

perf diff does not handle this? Especially with the differential
profiling options it should.

 @@ -623,6 +624,8 @@ static int __cmd_record(struct record *rec, int argc, 
 const char **argv)
   if (!target__none(opts-target)  !opts-initial_delay)
   perf_evlist__enable(rec-evlist);
  
 + t0 = rdclock();
 +
   /*
* Let the child rip
*/
 @@ -692,6 +695,9 @@ static int __cmd_record(struct record *rec, int argc, 
 const char **argv)
   goto out_child;
   }
  
 + t1 = rdclock();
 + walltime_nsecs = t1 - t0;

The walltime can be later computed by the difference of the first and
the last time stamp after sorting the events. So you don't need the new header.

-Andi

-- 
a...@linux.intel.com -- Speaking for myself only
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC] Add --show-total-period for perf annotate

2015-05-22 Thread Martin Liška

Hello.

I've been working on a new feature for perf annotate, which should be able to 
annotate
instructions with total spent time (compared to percentage usage).

Let's consider following use-case. You want to compare two different compilers
on the same code base and let's assume 90% of wall-time is spent in a single 
function.
Moreover, let's say that these compilers produce assembly of a totally 
different size.

In such case, it's very useful to get an approximation of spent time on a bunch 
of instructions,
which can be compared among other compilers. Otherwise, one has to somehow sum 
percentages and compare
it to size of a function.

Mail contains my experimental patch.

However, last missing thing is that I need to calculate portion of cycles a 
function
utilizes. May I ask you for help as perf annotate does not count these portions.

Thanks,
Martin
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index b57a027..cce19d6 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -326,6 +326,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
 		   objdump binary to use for disassembly and annotations),
 	OPT_BOOLEAN(0, group, symbol_conf.event_group,
 		Show event group information together),
+	OPT_BOOLEAN(0, show-total-period, symbol_conf.show_total_period,
+		Show a column with the sum of periods),
 	OPT_END()
 	};
 	int ret = hists__init();
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 5dfe913..dfd0c8c 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -484,6 +484,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 	struct perf_session *session;
 	bool disabled = false, draining = false;
 	int fd;
+	unsigned long long t0, t1;
 
 	rec-progname = argv[0];
 
@@ -623,6 +624,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 	if (!target__none(opts-target)  !opts-initial_delay)
 		perf_evlist__enable(rec-evlist);
 
+	t0 = rdclock();
+
 	/*
 	 * Let the child rip
 	 */
@@ -692,6 +695,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 		goto out_child;
 	}
 
+	t1 = rdclock();
+	walltime_nsecs = t1 - t0;
+
 	if (!quiet)
 		fprintf(stderr, [ perf record: Woken up %ld times to write data ]\n, waking);
 
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index aa79fb8..0acdf4c 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -35,6 +35,8 @@ extern const char *input_name;
 extern bool perf_host, perf_guest;
 extern const char perf_version_string[];
 
+extern unsigned long long walltime_nsecs;
+
 void pthread__unblock_sigwinch(void);
 
 #include util/target.h
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index e5250eb..e7af8ec 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -113,7 +113,12 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 		for (i = 0; i  ab-nr_events; i++) {
 			ui_browser__set_percent_color(browser, bdl-percent[i],
 		  current_entry);
-			slsmg_printf(%6.2f , bdl-percent[i]);
+
+			if (symbol_conf.show_total_period)
+			  slsmg_printf(%7.0f , bdl-percent[i] *
+   walltime_nsecs / (100 * 1e6));
+			else
+			  slsmg_printf(%7.2f , bdl-percent[i]);
 		}
 	} else {
 		ui_browser__set_percent_color(browser, 0, current_entry);
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 7f5bdfc..263e1bf 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -753,7 +753,12 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
 		for (i = 0; i  nr_percent; i++) {
 			percent = ppercents[i];
 			color = get_percent_color(percent);
-			color_fprintf(stdout, color,  %7.2f, percent);
+
+			if (symbol_conf.show_total_period)
+			  color_fprintf(stdout, color,  %7.0f,
+	percent * walltime_nsecs / (100 * 1e6));
+			else
+			  color_fprintf(stdout, color,  %7.2f, percent);
 		}
 
 		printf( :	);
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 3f0d809..a1d254b 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -47,6 +47,8 @@ struct perf_file_attr {
 	struct perf_file_section	ids;
 };
 
+unsigned long long walltime_nsecs;
+
 void perf_header__set_feat(struct perf_header *header, int feat)
 {
 	set_bit(feat, header-adds_features);
@@ -883,6 +885,14 @@ static int write_auxtrace(int fd, struct perf_header *h,
 	return err;
 }
 
+static int write_total_wall_time(int fd, struct perf_header *h __maybe_unused,
+			   struct perf_evlist *evlist __maybe_unused)
+{
+	do_write (fd, walltime_nsecs, sizeof (walltime_nsecs));
+	return 0;
+}
+
+
 static void print_hostname(struct perf_header *ph, int fd __maybe_unused,
 			   FILE *fp)
 {
@@ -1171,6 +1181,13 @@ static void print_auxtrace(struct perf_header *ph __maybe_unused,
 	fprintf(fp, #