Bug#911815: /usr/bin/perf_4.18: Please build perf against libbfd

2021-11-11 Thread Salvatore Bonaccorso
Hi,

On Fri, Sep 10, 2021 at 12:27:28PM +0200, Tony Garnock-Jones wrote:
> Just to follow up again, here's an improved version of the patch that
> doesn't hand-roll quite so much socketpair/fork/exec code, reusing existing
> tools/ code instead!
> 
> Also, here's the upstream discussion of the patch on the linux-perf-users
> mailing list: 
> https://lore.kernel.org/linux-perf-users/20210910102307.2055484-1-to...@leastfixedpoint.com/T/#u

Unless mistaken, then this has landed as
https://git.kernel.org/linus/be8ecc57f180415e8a7c1cc5620c5236be2a7e56
in mainline.

Regards,
Salvatore



Bug#911815: /usr/bin/perf_4.18: Please build perf against libbfd

2021-09-10 Thread Tony Garnock-Jones
Just to follow up again, here's an improved version of the patch that 
doesn't hand-roll quite so much socketpair/fork/exec code, reusing 
existing tools/ code instead!


Also, here's the upstream discussion of the patch on the 
linux-perf-users mailing list: 
https://lore.kernel.org/linux-perf-users/20210910102307.2055484-1-to...@leastfixedpoint.com/T/#u


Cheers,
  Tony
>From e3df73f35abf7485c351e541b6ab9280328cbbc9 Mon Sep 17 00:00:00 2001
From: Tony Garnock-Jones 
Date: Fri, 10 Sep 2021 12:19:46 +0200
Subject: [PATCH v2] tools/perf: Use long-running addr2line per dso

Invoking addr2line in a separate subprocess, one for each required
lookup, takes a terribly long time. This patch introduces a
long-running addr2line process for each dso, *DRAMATICALLY* speeding
up runs of perf. What used to take tens of minutes now takes tens of
seconds.

Debian bug report about this issue:
https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=911815

Signed-off-by: Tony Garnock-Jones 

---
Changes since v1:
 - use "subcmd/run-command.h" machinery instead of socketpair/fork/exec directly


 tools/perf/util/srcline.c | 384 --
 1 file changed, 284 insertions(+), 100 deletions(-)

diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index 5b7d6c16d33f..1e0b0be4de04 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c
@@ -1,8 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
 #include 
+#include 
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -15,6 +17,7 @@
 #include "srcline.h"
 #include "string2.h"
 #include "symbol.h"
+#include "subcmd/run-command.h"
 
 bool srcline_full_filename;
 
@@ -119,6 +122,8 @@ static struct symbol *new_inline_sym(struct dso *dso,
 	return inline_sym;
 }
 
+#define MAX_INLINE_NEST 1024
+
 #ifdef HAVE_LIBBFD_SUPPORT
 
 /*
@@ -273,8 +278,6 @@ static void addr2line_cleanup(struct a2l_data *a2l)
 	free(a2l);
 }
 
-#define MAX_INLINE_NEST 1024
-
 static int inline_list__append_dso_a2l(struct dso *dso,
    struct inline_node *node,
    struct symbol *sym)
@@ -361,26 +364,14 @@ void dso__free_a2l(struct dso *dso)
 	dso->a2l = NULL;
 }
 
-static struct inline_node *addr2inlines(const char *dso_name, u64 addr,
-	struct dso *dso, struct symbol *sym)
-{
-	struct inline_node *node;
-
-	node = zalloc(sizeof(*node));
-	if (node == NULL) {
-		perror("not enough memory for the inline node");
-		return NULL;
-	}
-
-	INIT_LIST_HEAD(&node->val);
-	node->addr = addr;
-
-	addr2line(dso_name, addr, NULL, NULL, dso, true, node, sym);
-	return node;
-}
-
 #else /* HAVE_LIBBFD_SUPPORT */
 
+struct a2l_subprocess {
+	struct child_process addr2line;
+	FILE *to_child;
+	FILE *from_child;
+};
+
 static int filename_split(char *filename, unsigned int *line_nr)
 {
 	char *sep;
@@ -402,114 +393,307 @@ static int filename_split(char *filename, unsigned int *line_nr)
 	return 0;
 }
 
+static void addr2line_subprocess_cleanup(struct a2l_subprocess *a2l)
+{
+	if (a2l->addr2line.pid != -1) {
+		kill(a2l->addr2line.pid, SIGKILL);
+		finish_command(&a2l->addr2line); /* ignore result, we don't care */
+		a2l->addr2line.pid = -1;
+	}
+
+	if (a2l->to_child != NULL) {
+		fclose(a2l->to_child);
+		a2l->to_child = NULL;
+	}
+
+	if (a2l->from_child != NULL) {
+		fclose(a2l->from_child);
+		a2l->from_child = NULL;
+	}
+
+	free(a2l);
+}
+
+static struct a2l_subprocess *addr2line_subprocess_init(const char *path)
+{
+	const char *argv[] = { "addr2line", "-e", path, "-i", "-f", NULL };
+	struct a2l_subprocess *a2l = NULL;
+	int start_command_status = 0;
+
+	a2l = zalloc(sizeof(*a2l));
+	if (a2l == NULL)
+		goto out;
+
+	a2l->to_child = NULL;
+	a2l->from_child = NULL;
+
+	a2l->addr2line.pid = -1;
+	a2l->addr2line.in = -1;
+	a2l->addr2line.out = -1;
+	a2l->addr2line.err = 0;
+	a2l->addr2line.dir = NULL;
+	a2l->addr2line.env = NULL;
+	a2l->addr2line.no_stdin = 0;
+	a2l->addr2line.no_stdout = 0;
+	a2l->addr2line.no_stderr = 1;
+	a2l->addr2line.exec_cmd = 0;
+	a2l->addr2line.stdout_to_stderr = 0;
+	a2l->addr2line.preexec_cb = NULL;
+
+	a2l->addr2line.argv = argv;
+	start_command_status = start_command(&a2l->addr2line);
+	a2l->addr2line.argv = NULL; /* it's not used after start_command; avoid dangling pointers */
+
+	if (start_command_status != 0) {
+		pr_warning("could not start addr2line for %s: start_command return code %d\n",
+			   path,
+			   start_command_status);
+		goto out;
+	}
+
+	a2l->to_child = fdopen(a2l->addr2line.in, "w");
+	if (a2l->to_child == NULL) {
+		pr_warning("could not open write-stream to addr2line of %s\n", path);
+		goto out;
+	}
+
+	a2l->from_child = fdopen(a2l->addr2line.out, "r");
+	if (a2l->from_child == NULL) {
+		pr_warning("could not open read-stream from addr2line of %s\n", path);
+		goto out;
+	}
+
+	return a2l;
+
+out:
+	if (a2l)
+		addr2line_subprocess_cleanup(a2l);
+
+	return NULL;
+}
+
+static int read_addr2line_stanza(struct a2l_subprocess *a2l,
+ char **function,
+ char **filename,
+ unsigned i

Bug#911815: /usr/bin/perf_4.18: Please build perf against libbfd

2021-09-09 Thread Steinar H. Gunderson
On Thu, Sep 09, 2021 at 12:54:50PM +0200, Tony Garnock-Jones wrote:
> Thanks, Steinar, for your suggestion! I've written a patch against the
> non-libbfd code in perf to try it out.
> 
> It works very well. What used to take endless minutes now takes a few
> seconds.

Thanks for doing this! I can confirm; I tested this on “perf report” against
a perf.data with DWARF tracebacks, with perf 5.13.4 (the same file every
time), and here are the results:

  non-bfd, without patch:  7m59s
  non-bfd, with patch:   15s
  bfd:   15s 

/* Steinar */
-- 
Homepage: https://www.sesse.net/



Bug#911815: /usr/bin/perf_4.18: Please build perf against libbfd

2021-09-09 Thread Tony Garnock-Jones

Dear Steinar, Ben, Mike, others:

On Tue, 18 Aug 2020 00:35:53 +0200 "Steinar H. Gunderson" 
 wrote:

But we can probably make the addr2line solution much faster?
perf runs:
[...]
but the normal way of running addr2line is to run it and then start feeding
it addresses on stdin (ie. don't start the program anew for each and every
address we want to look up). I haven't tried, but it sounds like that would
reduce overhead significantly?


Thanks, Steinar, for your suggestion! I've written a patch against the 
non-libbfd code in perf to try it out.


It works very well. What used to take endless minutes now takes a few 
seconds.


Please find my small patch (against "apt source linux-perf-5.10") attached.

I have also started the process of submitting it upstream.

Best Regards,
  Tony
From 536e3e3f348c4bbf28810e8fa4b8bbbfc16d4372 Mon Sep 17 00:00:00 2001
From: Tony Garnock-Jones 
Date: Thu, 9 Sep 2021 12:39:59 +0200
Subject: [PATCH] tools/perf: Use long-running addr2line per dso

Invoking addr2line in a separate subprocess, one for each required
lookup, takes a terribly long time. This patch introduces a
long-running addr2line process for each dso, *DRAMATICALLY* speeding
up runs of perf. What used to take tens of minutes now takes tens of
seconds.

Debian bug report about this issue:
https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=911815

Signed-off-by: Tony Garnock-Jones 

---
 tools/perf/util/srcline.c | 430 +-
 1 file changed, 330 insertions(+), 100 deletions(-)

diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index 5b7d6c16d33f..767dc9af2789 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c
@@ -1,8 +1,13 @@
 // SPDX-License-Identifier: GPL-2.0
 #include 
+#include 
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
+#include 
 
 #include 
 #include 
@@ -119,6 +124,8 @@ static struct symbol *new_inline_sym(struct dso *dso,
 	return inline_sym;
 }
 
+#define MAX_INLINE_NEST 1024
+
 #ifdef HAVE_LIBBFD_SUPPORT
 
 /*
@@ -273,8 +280,6 @@ static void addr2line_cleanup(struct a2l_data *a2l)
 	free(a2l);
 }
 
-#define MAX_INLINE_NEST 1024
-
 static int inline_list__append_dso_a2l(struct dso *dso,
    struct inline_node *node,
    struct symbol *sym)
@@ -361,26 +366,14 @@ void dso__free_a2l(struct dso *dso)
 	dso->a2l = NULL;
 }
 
-static struct inline_node *addr2inlines(const char *dso_name, u64 addr,
-	struct dso *dso, struct symbol *sym)
-{
-	struct inline_node *node;
-
-	node = zalloc(sizeof(*node));
-	if (node == NULL) {
-		perror("not enough memory for the inline node");
-		return NULL;
-	}
-
-	INIT_LIST_HEAD(&node->val);
-	node->addr = addr;
-
-	addr2line(dso_name, addr, NULL, NULL, dso, true, node, sym);
-	return node;
-}
-
 #else /* HAVE_LIBBFD_SUPPORT */
 
+struct a2l_subprocess {
+	pid_t addr2line_pid;
+	FILE *to_child;
+	FILE *from_child;
+};
+
 static int filename_split(char *filename, unsigned int *line_nr)
 {
 	char *sep;
@@ -402,114 +395,351 @@ static int filename_split(char *filename, unsigned int *line_nr)
 	return 0;
 }
 
+static FILE *dup_fdopen(int fd, const char *mode)
+{
+	FILE *f = NULL;
+	int nfd = dup(fd);
+
+	if (nfd == -1)
+		goto out;
+
+	f = fdopen(nfd, mode);
+	if (f == NULL) {
+		close(nfd);
+		goto out;
+	}
+
+out:
+	return f;
+}
+
+static void addr2line_subprocess_cleanup(struct a2l_subprocess *a2l)
+{
+	int _wstatus = 0;
+
+	if (a2l->addr2line_pid != -1) {
+		kill(a2l->addr2line_pid, SIGKILL);
+		waitpid(a2l->addr2line_pid, &_wstatus, 0);
+		a2l->addr2line_pid = -1;
+	}
+
+	if (a2l->to_child != NULL) {
+		fclose(a2l->to_child);
+		a2l->to_child = NULL;
+	}
+
+	if (a2l->from_child != NULL) {
+		fclose(a2l->from_child);
+		a2l->from_child = NULL;
+	}
+
+	free(a2l);
+}
+
+static struct a2l_subprocess *addr2line_subprocess_init(const char *path)
+{
+	struct a2l_subprocess *a2l = NULL;
+	int sockets[2] = {-1, -1};
+
+	a2l = zalloc(sizeof(*a2l));
+	if (a2l == NULL)
+		goto out;
+
+	a2l->addr2line_pid = -1;
+	a2l->to_child = NULL;
+	a2l->from_child = NULL;
+
+	/* Our convention: sockets[0] is for the parent, sockets[1] for the child */
+	if (socketpair(AF_UNIX, SOCK_STREAM, 0, sockets) == -1)
+		goto out;
+
+	a2l->addr2line_pid = fork();
+
+	switch (a2l->addr2line_pid) {
+	case -1:
+		pr_warning("fork failed for addr2line of %s\n", path);
+		goto out;
+	case 0:
+		/* We are in the child process. */
+		if (sockets[1] != STDIN_FILENO) {
+			if (dup2(sockets[1], STDIN_FILENO) == -1) {
+pr_warning("could not set stdin for addr2line of %s\n", path);
+close(sockets[1]);
+goto out;
+			}
+			close(sockets[1]);
+			sockets[1] = STDIN_FILENO;
+		}
+		if (sockets[1] != STDOUT_FILENO) {
+			if (dup2(sockets[1], STDOUT_FILENO) == -1) {
+pr_warning("could not set stdout for addr2line of %s\n", path);
+close(sockets[1]);
+goto out;
+			}
+		}
+		close(sockets[0]);
+		execlp("addr2line", "addr2line", "-e", path, "-i", "-f", NULL);
+		abort(); /* If execute reaches h

Bug#911815: /usr/bin/perf_4.18: Please build perf against libbfd

2020-08-17 Thread Steinar H. Gunderson
On Fri, Oct 26, 2018 at 01:12:51AM +0100, Ben Hutchings wrote:
> For future reference, that's the comment:
> 
> # perf can link against libbfd if available, but the result is
> # undistributable as they are licenced under GPL v2 and v3+
> # respectively.  Override detection of libbfd and insist that
> # cplus_demangle() can be found in libiberty (LGPL v2.1+).
> 
> Tagging this wontfix since we can't fix that problem.

But we can probably make the addr2line solution much faster?
perf runs:

scnprintf(cmd, sizeof(cmd), "addr2line -e %s %016"PRIx64,
  dso_name, addr);

fp = popen(cmd, "r");

but the normal way of running addr2line is to run it and then start feeding
it addresses on stdin (ie. don't start the program anew for each and every
address we want to look up). I haven't tried, but it sounds like that would
reduce overhead significantly?

I also don't know if there's a cache somewhere in front of this? It seems to
look up the same addresses over and over and over again, at least in my case
(decoding a processor trace).

/* Steinar */
-- 
Homepage: https://www.sesse.net/



Bug#911815: /usr/bin/perf_4.18: Please build perf against libbfd

2018-10-25 Thread Ben Hutchings
Control: tag -1 wontfix

On Thu, 2018-10-25 at 18:55 +0900, Mike Hommey wrote:
[...]
> I found 
> https://salsa.debian.org/kernel-team/linux/blob/master/debian/rules.d/tools/perf/Makefile#L27-31
[...]

For future reference, that's the comment:

# perf can link against libbfd if available, but the result is
# undistributable as they are licenced under GPL v2 and v3+
# respectively.  Override detection of libbfd and insist that
# cplus_demangle() can be found in libiberty (LGPL v2.1+).

Tagging this wontfix since we can't fix that problem.

Ben.

-- 
Ben Hutchings
The obvious mathematical breakthrough [to break modern encryption]
would be development of an easy way to factor large prime numbers.
   - Bill Gates



signature.asc
Description: This is a digitally signed message part


Bug#911815: /usr/bin/perf_4.18: Please build perf against libbfd

2018-10-25 Thread Mike Hommey
On Thu, Oct 25, 2018 at 03:20:11PM +0900, Mike Hommey wrote:
> Package: linux-perf-4.18
> Version: 4.18.10-2
> Severity: wishlist
> File: /usr/bin/perf_4.18
> 
> Dear Maintainer,
> 
> Running e.g. perf report with dwarf call graph info can take a long time
> depending on the size of the profile and the size of dwarf info in the
> binaries being profiled. That's because each address in each library is
> handled by forking and executing a new addr2line process. Each addr2line
> process has to parse the dwarf info of the library it's given just to
> find the location of one address. Multiply by the number of addresses,
> and this can quickly become ridiculous.
> 
> Perf, however, has an alternative implementation that just uses libbfd,
> so it would be much faster than spawning a large amount of new
> processes, each with a large overhead.

I found 
https://salsa.debian.org/kernel-team/linux/blob/master/debian/rules.d/tools/perf/Makefile#L27-31

This sucks badly :(
I have some massive perf data that take *hours* to deal with without
libbfd. With a reduced perf data, this is the kind of difference this
makes:

$ time perf script > /dev/null   # addr2line
real3m8.718s
user1m12.606s
sys 1m56.649s

$ time perf script > /dev/null   # libbfd
real0m4.141s
user0m3.425s
sys 0m0.894s

Mike



Bug#911815: /usr/bin/perf_4.18: Please build perf against libbfd

2018-10-24 Thread Mike Hommey
Package: linux-perf-4.18
Version: 4.18.10-2
Severity: wishlist
File: /usr/bin/perf_4.18

Dear Maintainer,

Running e.g. perf report with dwarf call graph info can take a long time
depending on the size of the profile and the size of dwarf info in the
binaries being profiled. That's because each address in each library is
handled by forking and executing a new addr2line process. Each addr2line
process has to parse the dwarf info of the library it's given just to
find the location of one address. Multiply by the number of addresses,
and this can quickly become ridiculous.

Perf, however, has an alternative implementation that just uses libbfd,
so it would be much faster than spawning a large amount of new
processes, each with a large overhead.

Mike


-- System Information:
Debian Release: buster/sid
  APT prefers unstable-debug
  APT policy: (500, 'unstable-debug'), (500, 'unstable'), (500, 'testing'), (1, 
'experimental-debug'), (1, 'experimental')
Architecture: amd64 (x86_64)
Foreign Architectures: i386

Kernel: Linux 4.18.0-2-amd64 (SMP w/4 CPU cores)
Locale: LANG=ja_JP.UTF-8, LC_CTYPE=ja_JP.UTF-8 (charmap=UTF-8), 
LANGUAGE=ja_JP.UTF-8 (charmap=UTF-8)
Shell: /bin/sh linked to /bin/dash
Init: systemd (via /run/systemd/system)
LSM: AppArmor: enabled

Versions of packages linux-perf-4.18 depends on:
ii  libbabeltrace1  1.5.6-1
ii  libc6   2.27-6
ii  libdw1  0.170-0.5
ii  libelf1 0.170-0.5
ii  liblzma55.2.2-1.3
ii  libnuma12.0.12-1
ii  libperl5.26 5.26.2-7+b1
ii  libpython3.63.6.7-1
ii  libslang2   2.3.2-1+b1
ii  libunwind8  1.2.1-8
ii  zlib1g  1:1.2.11.dfsg-1

Versions of packages linux-perf-4.18 recommends:
ii  linux-base  4.5

Versions of packages linux-perf-4.18 suggests:
pn  linux-doc-4.18  

-- no debconf information