On arm64 and frame pointer mode (e.g: perf record --callgraph fp),
use dwarf unwind info to check if the link register is the return
address in order to inject it to the frame pointer stack.

Write the following application:

        int a = 10;

        void f2(void)
        {
                for (int i = 0; i < 1000000; i++)
                        a *= a;
        }

        void f1()
        {
                f2();
        }

        int main (void)
        {
                f1();
                return 0;
        }

with the following compilation flags:
        gcc -g -fno-omit-frame-pointer -fno-inline -O1

The compiler omits the frame pointer for f2 on arm. This is a problem
with any leaf call, for example an application with many different
calls to malloc() would always omit the calling frame, even if it
can be determined.

        ./perf record --call-graph fp ./a.out
        ./perf report

currently gives the following stack:

0xffffea52f361
_start
__libc_start_main
main
f2

After this change, perf report correctly shows f1() calling f2(),
even though it was missing from the frame pointer unwind:

        ./perf report

0xffffea52f361
_start
__libc_start_main
main
f1
f2

Signed-off-by: Alexandre Truong <[email protected]>
Cc: John Garry <[email protected]>
Cc: Will Deacon <[email protected]>
Cc: Mathieu Poirier <[email protected]>
Cc: Leo Yan <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Cc: Mark Rutland <[email protected]>
Cc: Alexander Shishkin <[email protected]>
Cc: Jiri Olsa <[email protected]>
Cc: Namhyung Kim <[email protected]>
Cc: Kemeng Shi <[email protected]>
Cc: Ian Rogers <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Kan Liang <[email protected]>
Cc: Jin Yao <[email protected]>
Cc: Adrian Hunter <[email protected]>
Cc: Suzuki K Poulose <[email protected]>
Cc: Al Grant <[email protected]>
Cc: James Clark <[email protected]>
Cc: Wilco Dijkstra <[email protected]>
---
 tools/perf/util/Build                         |  1 +
 .../util/arm-frame-pointer-unwind-support.c   | 43 +++++++++++++++++++
 .../util/arm-frame-pointer-unwind-support.h   |  7 +++
 tools/perf/util/machine.c                     |  9 ++--
 4 files changed, 57 insertions(+), 3 deletions(-)
 create mode 100644 tools/perf/util/arm-frame-pointer-unwind-support.c
 create mode 100644 tools/perf/util/arm-frame-pointer-unwind-support.h

diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index e2563d0154eb..2009d5f02972 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -1,3 +1,4 @@
+perf-y += arm-frame-pointer-unwind-support.o
 perf-y += annotate.o
 perf-y += block-info.o
 perf-y += block-range.o
diff --git a/tools/perf/util/arm-frame-pointer-unwind-support.c 
b/tools/perf/util/arm-frame-pointer-unwind-support.c
new file mode 100644
index 000000000000..2901ae2917e9
--- /dev/null
+++ b/tools/perf/util/arm-frame-pointer-unwind-support.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../arch/arm64/include/uapi/asm/perf_regs.h"
+#include "arch/arm64/include/perf_regs.h"
+#include "event.h"
+#include "arm-frame-pointer-unwind-support.h"
+#include "callchain.h"
+#include "unwind.h"
+
+struct entries {
+       u64 stack[2];
+       int i;
+};
+
+static bool get_leaf_frame_caller_enabled(struct perf_sample *sample)
+{
+       return callchain_param.record_mode != CALLCHAIN_FP || 
!sample->user_regs.regs
+               || sample->user_regs.mask != PERF_REGS_MASK;
+}
+
+static int add_entry(struct unwind_entry *entry, void *arg)
+{
+       struct entries *entries = arg;
+
+       entries->stack[entries->i++] = entry->ip;
+       return 0;
+}
+
+u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread 
*thread)
+{
+       u64 leaf_frame;
+       struct entries entries = {{0, 0}, 0};
+
+       if (get_leaf_frame_caller_enabled(sample))
+               return 0;
+
+       unwind__get_entries(add_entry, &entries, thread, sample, 2);
+       leaf_frame = callchain_param.order == ORDER_CALLER ?
+               entries.stack[0] : entries.stack[1];
+
+       if (leaf_frame + 1 == sample->user_regs.regs[PERF_REG_ARM64_LR])
+               return sample->user_regs.regs[PERF_REG_ARM64_LR];
+       return 0;
+}
diff --git a/tools/perf/util/arm-frame-pointer-unwind-support.h 
b/tools/perf/util/arm-frame-pointer-unwind-support.h
new file mode 100644
index 000000000000..16dc03fa9abe
--- /dev/null
+++ b/tools/perf/util/arm-frame-pointer-unwind-support.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H
+#define __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H
+
+u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread 
*thread);
+
+#endif /* __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H */
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 40082d70eec1..bc6147e46c89 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -34,6 +34,7 @@
 #include "bpf-event.h"
 #include <internal/lib.h> // page_size
 #include "cgroup.h"
+#include "arm-frame-pointer-unwind-support.h"
 
 #include <linux/ctype.h>
 #include <symbol/kallsyms.h>
@@ -2671,10 +2672,12 @@ static int find_prev_cpumode(struct ip_callchain 
*chain, struct thread *thread,
        return err;
 }
 
-static u64 get_leaf_frame_caller(struct perf_sample *sample __maybe_unused,
-               struct thread *thread __maybe_unused)
+static u64 get_leaf_frame_caller(struct perf_sample *sample, struct thread 
*thread)
 {
-       return 0;
+       if (strncmp(thread->maps->machine->env->arch, "aarch64", 7) == 0)
+               return get_leaf_frame_caller_aarch64(sample, thread);
+       else
+               return 0;
 }
 
 static int thread__resolve_callchain_sample(struct thread *thread,
-- 
2.23.0

Reply via email to