Re: [PATCH v4 05/15] perf report: create real callchain entries for inlined frames

2017-10-08 Thread Milian Wolff
On Donnerstag, 5. Oktober 2017 05:35:29 CEST Namhyung Kim wrote:
> On Sun, Oct 01, 2017 at 04:30:50PM +0200, Milian Wolff wrote:
> > The inline_node structs are maintained by the new dso->inlines
> > tree. This in turn keeps ownership of the fake symbols and
> > srcline string representing an inline frame.
> > 
> > This tree is always sorted by name. All other entries of the symbol
> 
> Isn't it sorted by address?

True. The tree of inlines is sorted by address. The sorting by name is part
of a different patch, I'll revise the commit message.

> > beside the function name are unused for inline frames. The advantage
> > of this approach is that all existing users of the callchain API can
> > now transparently display inlined frames without having to patch
> > their code.
> > 
> > Cc: Jiri Olsa 
> > Cc: Arnaldo Carvalho de Melo 
> > Cc: David Ahern 
> > Cc: Namhyung Kim 
> > Cc: Peter Zijlstra 
> > Cc: Yao Jin 
> > Signed-off-by: Milian Wolff 
> > ---
> > 
> >  tools/perf/util/dso.c |  3 +++
> >  tools/perf/util/dso.h |  1 +
> >  tools/perf/util/machine.c | 37 ++
> >  tools/perf/util/srcline.c | 51
> >  +++
> >  tools/perf/util/srcline.h |  9 +
> >  5 files changed, 101 insertions(+)
> > 
> > diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
> > index 339e52971380..6e743dffc487 100644
> > --- a/tools/perf/util/dso.c
> > +++ b/tools/perf/util/dso.c
> > @@ -10,6 +10,7 @@
> > 
> >  #include "compress.h"
> >  #include "path.h"
> >  #include "symbol.h"
> > 
> > +#include "srcline.h"
> > 
> >  #include "dso.h"
> >  #include "machine.h"
> >  #include "auxtrace.h"
> > 
> > @@ -1201,6 +1202,7 @@ struct dso *dso__new(const char *name)
> > 
> > for (i = 0; i < MAP__NR_TYPES; ++i)
> > 
> > dso->symbols[i] = dso->symbol_names[i] = RB_ROOT;
> > 
> > dso->data.cache = RB_ROOT;
> > 
> > +   dso->inlined_nodes = RB_ROOT;
> > 
> > dso->data.fd = -1;
> > dso->data.status = DSO_DATA_STATUS_UNKNOWN;
> > dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND;
> > 
> > @@ -1234,6 +1236,7 @@ void dso__delete(struct dso *dso)
> > 
> >dso->long_name);
> > 
> > for (i = 0; i < MAP__NR_TYPES; ++i)
> > 
> > symbols__delete(&dso->symbols[i]);
> > 
> > +   inlines__tree_delete(&dso->inlined_nodes);
> 
> I'm still curious why it doesn't make a trouble (I think there's
> use-after-free for non-inlined symbols).  Anyway moving the
> inlines__tree_delete() above the symbols__delete() will solve the
> concern IMHO.

I just tested this again. The reason why I'm not seeing the issue is the 
following: The executable DSO is never freed in my case, probably due to a 
refcounting bug that is unrelated to the actual patch series here.

Adding some simple debug output to `dso__delete`, I see that it's never being 
called for my main executable - only for libraries libm, kernel modules and 
pseudo sections like stack/anon/heap etc.

If I'm using a more elaborate example binary with libraries that contain 
inlines, I still cannot reproduce an issue with valgrind though...

But with ASAN it finally shows up:

==11080==ERROR: AddressSanitizer: heap-use-after-free on address 
0x608000395e4b at pc 0x55a7017388a9 bp 0x7ffe1c99c450 sp 0x7ffe1c99c440
READ of size 1 at 0x608000395e4b thread T0
#0 0x55a7017388a8 in inline_node__delete util/srcline.c:579
#1 0x55a7017388a8 in inlines__tree_delete util/srcline.c:634
#2 0x55a70157c757 in dso__delete util/dso.c:1239
#3 0x55a701614d6b in dsos__purge util/machine.c:139
#4 0x55a701614d6b in dsos__exit util/machine.c:147
#5 0x55a701614d6b in machine__exit util/machine.c:176
#6 0x55a701641ce0 in perf_session__delete util/session.c:203
#7 0x55a70135c50f in cmd_script /home/milian/projects/src/linux/tools/
perf/builtin-script.c:3117
#8 0x55a701472b4c in run_builtin /home/milian/projects/src/linux/tools/
perf/perf.c:296
#9 0x55a70147354a in handle_internal_command /home/milian/projects/src/
linux/tools/perf/perf.c:348
#10 0x55a70128e4c5 in run_argv /home/milian/projects/src/linux/tools/perf/
perf.c:392
#11 0x55a70128e4c5 in main /home/milian/projects/src/linux/tools/perf/
perf.c:536
#12 0x7f402c989f69 in __libc_start_main (/usr/lib/libc.so.6+0x20f69)
#13 0x55a701292989 in _start (/home/milian/projects/src/linux/tools/perf/
perf+0xadd989)

0x608000395e4b is located 43 bytes inside of 86-byte region 
[0x608000395e20,0x608000395e76)
freed by thread T0 here:
#0 0x7f40300f2711 in __interceptor_free /build/gcc-multilib/src/gcc/
libsanitizer/asan/asan_malloc_linux.cc:45
#1 0x55a701588a2f in symbol__delete util/symbol.c:282
#2 0x55a701588a2f in symbols__delete util/symbol.c:294
#3 0x55a70157c745 in dso__delete util/dso.c:1238
#4 0x55a701614d6b in dsos__purge util/machine.c:139
#5 0x55a701614d6b in dsos__exit u

Re: [PATCH v4 05/15] perf report: create real callchain entries for inlined frames

2017-10-04 Thread Namhyung Kim
On Sun, Oct 01, 2017 at 04:30:50PM +0200, Milian Wolff wrote:
> The inline_node structs are maintained by the new dso->inlines
> tree. This in turn keeps ownership of the fake symbols and
> srcline string representing an inline frame.
> 
> This tree is always sorted by name. All other entries of the symbol

Isn't it sorted by address?

> beside the function name are unused for inline frames. The advantage
> of this approach is that all existing users of the callchain API can
> now transparently display inlined frames without having to patch
> their code.
> 
> Cc: Jiri Olsa 
> Cc: Arnaldo Carvalho de Melo 
> Cc: David Ahern 
> Cc: Namhyung Kim 
> Cc: Peter Zijlstra 
> Cc: Yao Jin 
> Signed-off-by: Milian Wolff 
> ---
>  tools/perf/util/dso.c |  3 +++
>  tools/perf/util/dso.h |  1 +
>  tools/perf/util/machine.c | 37 ++
>  tools/perf/util/srcline.c | 51 
> +++
>  tools/perf/util/srcline.h |  9 +
>  5 files changed, 101 insertions(+)
> 
> diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
> index 339e52971380..6e743dffc487 100644
> --- a/tools/perf/util/dso.c
> +++ b/tools/perf/util/dso.c
> @@ -10,6 +10,7 @@
>  #include "compress.h"
>  #include "path.h"
>  #include "symbol.h"
> +#include "srcline.h"
>  #include "dso.h"
>  #include "machine.h"
>  #include "auxtrace.h"
> @@ -1201,6 +1202,7 @@ struct dso *dso__new(const char *name)
>   for (i = 0; i < MAP__NR_TYPES; ++i)
>   dso->symbols[i] = dso->symbol_names[i] = RB_ROOT;
>   dso->data.cache = RB_ROOT;
> + dso->inlined_nodes = RB_ROOT;
>   dso->data.fd = -1;
>   dso->data.status = DSO_DATA_STATUS_UNKNOWN;
>   dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND;
> @@ -1234,6 +1236,7 @@ void dso__delete(struct dso *dso)
>  dso->long_name);
>   for (i = 0; i < MAP__NR_TYPES; ++i)
>   symbols__delete(&dso->symbols[i]);
> + inlines__tree_delete(&dso->inlined_nodes);

I'm still curious why it doesn't make a trouble (I think there's
use-after-free for non-inlined symbols).  Anyway moving the
inlines__tree_delete() above the symbols__delete() will solve the
concern IMHO.

Thanks,
Namhyung


>  
>   if (dso->short_name_allocated) {
>   zfree((char **)&dso->short_name);

[SNIP]

> diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
> index 07d1a058d7c4..69241d805275 100644
> --- a/tools/perf/util/srcline.c
> +++ b/tools/perf/util/srcline.c
> @@ -583,3 +583,54 @@ void inline_node__delete(struct inline_node *node)
>  
>   free(node);
>  }
> +
> +void inlines__tree_insert(struct rb_root *tree, struct inline_node *inlines)
> +{
> + struct rb_node **p = &tree->rb_node;
> + struct rb_node *parent = NULL;
> + const u64 addr = inlines->addr;
> + struct inline_node *i;
> +
> + while (*p != NULL) {
> + parent = *p;
> + i = rb_entry(parent, struct inline_node, rb_node);
> + if (addr < i->addr)
> + p = &(*p)->rb_left;
> + else
> + p = &(*p)->rb_right;
> + }
> + rb_link_node(&inlines->rb_node, parent, p);
> + rb_insert_color(&inlines->rb_node, tree);
> +}
> +
> +struct inline_node *inlines__tree_find(struct rb_root *tree, u64 addr)
> +{
> + struct rb_node *n = tree->rb_node;
> +
> + while (n) {
> + struct inline_node *i = rb_entry(n, struct inline_node,
> +  rb_node);
> +
> + if (addr < i->addr)
> + n = n->rb_left;
> + else if (addr > i->addr)
> + n = n->rb_right;
> + else
> + return i;
> + }
> +
> + return NULL;
> +}
> +
> +void inlines__tree_delete(struct rb_root *tree)
> +{
> + struct inline_node *pos;
> + struct rb_node *next = rb_first(tree);
> +
> + while (next) {
> + pos = rb_entry(next, struct inline_node, rb_node);
> + next = rb_next(&pos->rb_node);
> + rb_erase(&pos->rb_node, tree);
> + inline_node__delete(pos);
> + }
> +}
> diff --git a/tools/perf/util/srcline.h b/tools/perf/util/srcline.h
> index 0201ed2c0b9c..ebe38cd22294 100644
> --- a/tools/perf/util/srcline.h
> +++ b/tools/perf/util/srcline.h
> @@ -2,6 +2,7 @@
>  #define PERF_SRCLINE_H
>  
>  #include 
> +#include 
>  #include 
>  
>  struct dso;
> @@ -25,6 +26,7 @@ struct inline_list {
>  struct inline_node {
>   u64 addr;
>   struct list_headval;
> + struct rb_node  rb_node;
>  };
>  
>  /* parse inlined frames for the given address */
> @@ -33,4 +35,11 @@ struct inline_node *dso__parse_addr_inlines(struct dso 
> *dso, u64 addr,
>  /* free resources associated to the inline node list */
>  void inline_node__delete(struct inline_node *node);
>  
> +/* insert the inline

[PATCH v4 05/15] perf report: create real callchain entries for inlined frames

2017-10-01 Thread Milian Wolff
The inline_node structs are maintained by the new dso->inlines
tree. This in turn keeps ownership of the fake symbols and
srcline string representing an inline frame.

This tree is always sorted by name. All other entries of the symbol
beside the function name are unused for inline frames. The advantage
of this approach is that all existing users of the callchain API can
now transparently display inlined frames without having to patch
their code.

Cc: Jiri Olsa 
Cc: Arnaldo Carvalho de Melo 
Cc: David Ahern 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Cc: Yao Jin 
Signed-off-by: Milian Wolff 
---
 tools/perf/util/dso.c |  3 +++
 tools/perf/util/dso.h |  1 +
 tools/perf/util/machine.c | 37 ++
 tools/perf/util/srcline.c | 51 +++
 tools/perf/util/srcline.h |  9 +
 5 files changed, 101 insertions(+)

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 339e52971380..6e743dffc487 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -10,6 +10,7 @@
 #include "compress.h"
 #include "path.h"
 #include "symbol.h"
+#include "srcline.h"
 #include "dso.h"
 #include "machine.h"
 #include "auxtrace.h"
@@ -1201,6 +1202,7 @@ struct dso *dso__new(const char *name)
for (i = 0; i < MAP__NR_TYPES; ++i)
dso->symbols[i] = dso->symbol_names[i] = RB_ROOT;
dso->data.cache = RB_ROOT;
+   dso->inlined_nodes = RB_ROOT;
dso->data.fd = -1;
dso->data.status = DSO_DATA_STATUS_UNKNOWN;
dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND;
@@ -1234,6 +1236,7 @@ void dso__delete(struct dso *dso)
   dso->long_name);
for (i = 0; i < MAP__NR_TYPES; ++i)
symbols__delete(&dso->symbols[i]);
+   inlines__tree_delete(&dso->inlined_nodes);
 
if (dso->short_name_allocated) {
zfree((char **)&dso->short_name);
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index a2bbb21f301c..122eca0d242d 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -141,6 +141,7 @@ struct dso {
struct rb_root   *root; /* root of rbtree that rb_node is in */
struct rb_root   symbols[MAP__NR_TYPES];
struct rb_root   symbol_names[MAP__NR_TYPES];
+   struct rb_root   inlined_nodes;
struct {
u64 addr;
struct symbol   *symbol;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 59424c95435d..e6663b200f42 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2109,6 +2109,40 @@ static int thread__resolve_callchain_sample(struct 
thread *thread,
return 0;
 }
 
+static int append_inlines(struct callchain_cursor *cursor,
+ struct map *map, struct symbol *sym, u64 ip)
+{
+   struct inline_node *inline_node;
+   struct inline_list *ilist;
+   u64 addr;
+
+   if (!symbol_conf.inline_name || !map || !sym)
+   return 1;
+
+   addr = map__rip_2objdump(map, ip);
+
+   inline_node = inlines__tree_find(&map->dso->inlined_nodes, addr);
+   if (!inline_node) {
+   inline_node = dso__parse_addr_inlines(map->dso, addr, sym);
+   if (!inline_node)
+   return 1;
+
+   inlines__tree_insert(&map->dso->inlined_nodes, inline_node);
+   }
+
+   list_for_each_entry(ilist, &inline_node->val, list) {
+   int ret = callchain_cursor_append(cursor, ip, map,
+ ilist->symbol, false,
+ NULL, 0, 0, 0,
+ ilist->srcline);
+
+   if (ret != 0)
+   return ret;
+   }
+
+   return 0;
+}
+
 static int unwind_entry(struct unwind_entry *entry, void *arg)
 {
struct callchain_cursor *cursor = arg;
@@ -2117,6 +2151,9 @@ static int unwind_entry(struct unwind_entry *entry, void 
*arg)
if (symbol_conf.hide_unresolved && entry->sym == NULL)
return 0;
 
+   if (append_inlines(cursor, entry->map, entry->sym, entry->ip) == 0)
+   return 0;
+
srcline = callchain_srcline(entry->map, entry->sym, entry->ip);
return callchain_cursor_append(cursor, entry->ip,
   entry->map, entry->sym,
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index 07d1a058d7c4..69241d805275 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c
@@ -583,3 +583,54 @@ void inline_node__delete(struct inline_node *node)
 
free(node);
 }
+
+void inlines__tree_insert(struct rb_root *tree, struct inline_node *inlines)
+{
+   struct rb_node **p = &tree->rb_node;
+   struct rb_node *parent = NULL;
+   const u64 addr = inlines->addr;
+   struct inline_node *i;