Re: [PATCH v4 05/15] perf report: create real callchain entries for inlined frames
On Donnerstag, 5. Oktober 2017 05:35:29 CEST Namhyung Kim wrote: > On Sun, Oct 01, 2017 at 04:30:50PM +0200, Milian Wolff wrote: > > The inline_node structs are maintained by the new dso->inlines > > tree. This in turn keeps ownership of the fake symbols and > > srcline string representing an inline frame. > > > > This tree is always sorted by name. All other entries of the symbol > > Isn't it sorted by address? True. The tree of inlines is sorted by address. The sorting by name is part of a different patch, I'll revise the commit message. > > beside the function name are unused for inline frames. The advantage > > of this approach is that all existing users of the callchain API can > > now transparently display inlined frames without having to patch > > their code. > > > > Cc: Jiri Olsa > > Cc: Arnaldo Carvalho de Melo > > Cc: David Ahern > > Cc: Namhyung Kim > > Cc: Peter Zijlstra > > Cc: Yao Jin > > Signed-off-by: Milian Wolff > > --- > > > > tools/perf/util/dso.c | 3 +++ > > tools/perf/util/dso.h | 1 + > > tools/perf/util/machine.c | 37 ++ > > tools/perf/util/srcline.c | 51 > > +++ > > tools/perf/util/srcline.h | 9 + > > 5 files changed, 101 insertions(+) > > > > diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c > > index 339e52971380..6e743dffc487 100644 > > --- a/tools/perf/util/dso.c > > +++ b/tools/perf/util/dso.c > > @@ -10,6 +10,7 @@ > > > > #include "compress.h" > > #include "path.h" > > #include "symbol.h" > > > > +#include "srcline.h" > > > > #include "dso.h" > > #include "machine.h" > > #include "auxtrace.h" > > > > @@ -1201,6 +1202,7 @@ struct dso *dso__new(const char *name) > > > > for (i = 0; i < MAP__NR_TYPES; ++i) > > > > dso->symbols[i] = dso->symbol_names[i] = RB_ROOT; > > > > dso->data.cache = RB_ROOT; > > > > + dso->inlined_nodes = RB_ROOT; > > > > dso->data.fd = -1; > > dso->data.status = DSO_DATA_STATUS_UNKNOWN; > > dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND; > > > > @@ -1234,6 +1236,7 @@ void dso__delete(struct dso *dso) > > > >dso->long_name); > > > > for (i = 0; i < MAP__NR_TYPES; ++i) > > > > symbols__delete(&dso->symbols[i]); > > > > + inlines__tree_delete(&dso->inlined_nodes); > > I'm still curious why it doesn't make a trouble (I think there's > use-after-free for non-inlined symbols). Anyway moving the > inlines__tree_delete() above the symbols__delete() will solve the > concern IMHO. I just tested this again. The reason why I'm not seeing the issue is the following: The executable DSO is never freed in my case, probably due to a refcounting bug that is unrelated to the actual patch series here. Adding some simple debug output to `dso__delete`, I see that it's never being called for my main executable - only for libraries libm, kernel modules and pseudo sections like stack/anon/heap etc. If I'm using a more elaborate example binary with libraries that contain inlines, I still cannot reproduce an issue with valgrind though... But with ASAN it finally shows up: ==11080==ERROR: AddressSanitizer: heap-use-after-free on address 0x608000395e4b at pc 0x55a7017388a9 bp 0x7ffe1c99c450 sp 0x7ffe1c99c440 READ of size 1 at 0x608000395e4b thread T0 #0 0x55a7017388a8 in inline_node__delete util/srcline.c:579 #1 0x55a7017388a8 in inlines__tree_delete util/srcline.c:634 #2 0x55a70157c757 in dso__delete util/dso.c:1239 #3 0x55a701614d6b in dsos__purge util/machine.c:139 #4 0x55a701614d6b in dsos__exit util/machine.c:147 #5 0x55a701614d6b in machine__exit util/machine.c:176 #6 0x55a701641ce0 in perf_session__delete util/session.c:203 #7 0x55a70135c50f in cmd_script /home/milian/projects/src/linux/tools/ perf/builtin-script.c:3117 #8 0x55a701472b4c in run_builtin /home/milian/projects/src/linux/tools/ perf/perf.c:296 #9 0x55a70147354a in handle_internal_command /home/milian/projects/src/ linux/tools/perf/perf.c:348 #10 0x55a70128e4c5 in run_argv /home/milian/projects/src/linux/tools/perf/ perf.c:392 #11 0x55a70128e4c5 in main /home/milian/projects/src/linux/tools/perf/ perf.c:536 #12 0x7f402c989f69 in __libc_start_main (/usr/lib/libc.so.6+0x20f69) #13 0x55a701292989 in _start (/home/milian/projects/src/linux/tools/perf/ perf+0xadd989) 0x608000395e4b is located 43 bytes inside of 86-byte region [0x608000395e20,0x608000395e76) freed by thread T0 here: #0 0x7f40300f2711 in __interceptor_free /build/gcc-multilib/src/gcc/ libsanitizer/asan/asan_malloc_linux.cc:45 #1 0x55a701588a2f in symbol__delete util/symbol.c:282 #2 0x55a701588a2f in symbols__delete util/symbol.c:294 #3 0x55a70157c745 in dso__delete util/dso.c:1238 #4 0x55a701614d6b in dsos__purge util/machine.c:139 #5 0x55a701614d6b in dsos__exit u
Re: [PATCH v4 05/15] perf report: create real callchain entries for inlined frames
On Sun, Oct 01, 2017 at 04:30:50PM +0200, Milian Wolff wrote: > The inline_node structs are maintained by the new dso->inlines > tree. This in turn keeps ownership of the fake symbols and > srcline string representing an inline frame. > > This tree is always sorted by name. All other entries of the symbol Isn't it sorted by address? > beside the function name are unused for inline frames. The advantage > of this approach is that all existing users of the callchain API can > now transparently display inlined frames without having to patch > their code. > > Cc: Jiri Olsa > Cc: Arnaldo Carvalho de Melo > Cc: David Ahern > Cc: Namhyung Kim > Cc: Peter Zijlstra > Cc: Yao Jin > Signed-off-by: Milian Wolff > --- > tools/perf/util/dso.c | 3 +++ > tools/perf/util/dso.h | 1 + > tools/perf/util/machine.c | 37 ++ > tools/perf/util/srcline.c | 51 > +++ > tools/perf/util/srcline.h | 9 + > 5 files changed, 101 insertions(+) > > diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c > index 339e52971380..6e743dffc487 100644 > --- a/tools/perf/util/dso.c > +++ b/tools/perf/util/dso.c > @@ -10,6 +10,7 @@ > #include "compress.h" > #include "path.h" > #include "symbol.h" > +#include "srcline.h" > #include "dso.h" > #include "machine.h" > #include "auxtrace.h" > @@ -1201,6 +1202,7 @@ struct dso *dso__new(const char *name) > for (i = 0; i < MAP__NR_TYPES; ++i) > dso->symbols[i] = dso->symbol_names[i] = RB_ROOT; > dso->data.cache = RB_ROOT; > + dso->inlined_nodes = RB_ROOT; > dso->data.fd = -1; > dso->data.status = DSO_DATA_STATUS_UNKNOWN; > dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND; > @@ -1234,6 +1236,7 @@ void dso__delete(struct dso *dso) > dso->long_name); > for (i = 0; i < MAP__NR_TYPES; ++i) > symbols__delete(&dso->symbols[i]); > + inlines__tree_delete(&dso->inlined_nodes); I'm still curious why it doesn't make a trouble (I think there's use-after-free for non-inlined symbols). Anyway moving the inlines__tree_delete() above the symbols__delete() will solve the concern IMHO. Thanks, Namhyung > > if (dso->short_name_allocated) { > zfree((char **)&dso->short_name); [SNIP] > diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c > index 07d1a058d7c4..69241d805275 100644 > --- a/tools/perf/util/srcline.c > +++ b/tools/perf/util/srcline.c > @@ -583,3 +583,54 @@ void inline_node__delete(struct inline_node *node) > > free(node); > } > + > +void inlines__tree_insert(struct rb_root *tree, struct inline_node *inlines) > +{ > + struct rb_node **p = &tree->rb_node; > + struct rb_node *parent = NULL; > + const u64 addr = inlines->addr; > + struct inline_node *i; > + > + while (*p != NULL) { > + parent = *p; > + i = rb_entry(parent, struct inline_node, rb_node); > + if (addr < i->addr) > + p = &(*p)->rb_left; > + else > + p = &(*p)->rb_right; > + } > + rb_link_node(&inlines->rb_node, parent, p); > + rb_insert_color(&inlines->rb_node, tree); > +} > + > +struct inline_node *inlines__tree_find(struct rb_root *tree, u64 addr) > +{ > + struct rb_node *n = tree->rb_node; > + > + while (n) { > + struct inline_node *i = rb_entry(n, struct inline_node, > + rb_node); > + > + if (addr < i->addr) > + n = n->rb_left; > + else if (addr > i->addr) > + n = n->rb_right; > + else > + return i; > + } > + > + return NULL; > +} > + > +void inlines__tree_delete(struct rb_root *tree) > +{ > + struct inline_node *pos; > + struct rb_node *next = rb_first(tree); > + > + while (next) { > + pos = rb_entry(next, struct inline_node, rb_node); > + next = rb_next(&pos->rb_node); > + rb_erase(&pos->rb_node, tree); > + inline_node__delete(pos); > + } > +} > diff --git a/tools/perf/util/srcline.h b/tools/perf/util/srcline.h > index 0201ed2c0b9c..ebe38cd22294 100644 > --- a/tools/perf/util/srcline.h > +++ b/tools/perf/util/srcline.h > @@ -2,6 +2,7 @@ > #define PERF_SRCLINE_H > > #include > +#include > #include > > struct dso; > @@ -25,6 +26,7 @@ struct inline_list { > struct inline_node { > u64 addr; > struct list_headval; > + struct rb_node rb_node; > }; > > /* parse inlined frames for the given address */ > @@ -33,4 +35,11 @@ struct inline_node *dso__parse_addr_inlines(struct dso > *dso, u64 addr, > /* free resources associated to the inline node list */ > void inline_node__delete(struct inline_node *node); > > +/* insert the inline
[PATCH v4 05/15] perf report: create real callchain entries for inlined frames
The inline_node structs are maintained by the new dso->inlines tree. This in turn keeps ownership of the fake symbols and srcline string representing an inline frame. This tree is always sorted by name. All other entries of the symbol beside the function name are unused for inline frames. The advantage of this approach is that all existing users of the callchain API can now transparently display inlined frames without having to patch their code. Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Yao Jin Signed-off-by: Milian Wolff --- tools/perf/util/dso.c | 3 +++ tools/perf/util/dso.h | 1 + tools/perf/util/machine.c | 37 ++ tools/perf/util/srcline.c | 51 +++ tools/perf/util/srcline.h | 9 + 5 files changed, 101 insertions(+) diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 339e52971380..6e743dffc487 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -10,6 +10,7 @@ #include "compress.h" #include "path.h" #include "symbol.h" +#include "srcline.h" #include "dso.h" #include "machine.h" #include "auxtrace.h" @@ -1201,6 +1202,7 @@ struct dso *dso__new(const char *name) for (i = 0; i < MAP__NR_TYPES; ++i) dso->symbols[i] = dso->symbol_names[i] = RB_ROOT; dso->data.cache = RB_ROOT; + dso->inlined_nodes = RB_ROOT; dso->data.fd = -1; dso->data.status = DSO_DATA_STATUS_UNKNOWN; dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND; @@ -1234,6 +1236,7 @@ void dso__delete(struct dso *dso) dso->long_name); for (i = 0; i < MAP__NR_TYPES; ++i) symbols__delete(&dso->symbols[i]); + inlines__tree_delete(&dso->inlined_nodes); if (dso->short_name_allocated) { zfree((char **)&dso->short_name); diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index a2bbb21f301c..122eca0d242d 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -141,6 +141,7 @@ struct dso { struct rb_root *root; /* root of rbtree that rb_node is in */ struct rb_root symbols[MAP__NR_TYPES]; struct rb_root symbol_names[MAP__NR_TYPES]; + struct rb_root inlined_nodes; struct { u64 addr; struct symbol *symbol; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 59424c95435d..e6663b200f42 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2109,6 +2109,40 @@ static int thread__resolve_callchain_sample(struct thread *thread, return 0; } +static int append_inlines(struct callchain_cursor *cursor, + struct map *map, struct symbol *sym, u64 ip) +{ + struct inline_node *inline_node; + struct inline_list *ilist; + u64 addr; + + if (!symbol_conf.inline_name || !map || !sym) + return 1; + + addr = map__rip_2objdump(map, ip); + + inline_node = inlines__tree_find(&map->dso->inlined_nodes, addr); + if (!inline_node) { + inline_node = dso__parse_addr_inlines(map->dso, addr, sym); + if (!inline_node) + return 1; + + inlines__tree_insert(&map->dso->inlined_nodes, inline_node); + } + + list_for_each_entry(ilist, &inline_node->val, list) { + int ret = callchain_cursor_append(cursor, ip, map, + ilist->symbol, false, + NULL, 0, 0, 0, + ilist->srcline); + + if (ret != 0) + return ret; + } + + return 0; +} + static int unwind_entry(struct unwind_entry *entry, void *arg) { struct callchain_cursor *cursor = arg; @@ -2117,6 +2151,9 @@ static int unwind_entry(struct unwind_entry *entry, void *arg) if (symbol_conf.hide_unresolved && entry->sym == NULL) return 0; + if (append_inlines(cursor, entry->map, entry->sym, entry->ip) == 0) + return 0; + srcline = callchain_srcline(entry->map, entry->sym, entry->ip); return callchain_cursor_append(cursor, entry->ip, entry->map, entry->sym, diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index 07d1a058d7c4..69241d805275 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -583,3 +583,54 @@ void inline_node__delete(struct inline_node *node) free(node); } + +void inlines__tree_insert(struct rb_root *tree, struct inline_node *inlines) +{ + struct rb_node **p = &tree->rb_node; + struct rb_node *parent = NULL; + const u64 addr = inlines->addr; + struct inline_node *i;