[PATCH] tracing: Fix memory leak in fgraph storage selftest

2024-08-19 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

With ftrace boot-time selftest, kmemleak reported some memory leaks in
the new test case for function graph storage for multiple tracers.

unreferenced object 0x888005060080 (size 32):
  comm "swapper/0", pid 1, jiffies 4294676440
  hex dump (first 32 bytes):
00 00 00 00 00 00 00 00 20 10 06 05 80 88 ff ff   ...
54 0c 1e 81 ff ff ff ff 00 00 00 00 00 00 00 00  T...
  backtrace (crc 7c93416c):
[<0238ee6f>] __kmalloc_cache_noprof+0x11f/0x2a0
[<33d2b6c5>] enter_record+0xe8/0x150
[<54c38424>] match_records+0x1cd/0x230
[<c775b63d>] ftrace_set_hash+0xff/0x380
[<7bf7208c>] ftrace_set_filter+0x70/0x90
[<a5c08dda>] test_graph_storage_multi+0x2e/0xf0
[<6ba028ca>] trace_selftest_startup_function_graph+0x1e8/0x260
[<a715d3eb>] run_tracer_selftest+0x111/0x190
[<395cbf90>] register_tracer+0xdf/0x1f0
[<93e67f7b>] do_one_initcall+0x141/0x3b0
[<c591b682>] do_initcall_level+0x82/0xa0
[<4e4c6600>] do_initcalls+0x43/0x70
[<34f3c4e4>] kernel_init_freeable+0x170/0x1f0
[<c7a5dab2>] kernel_init+0x1a/0x1a0
[<ea105947>] ret_from_fork+0x3a/0x50
[<a1932e84>] ret_from_fork_asm+0x1a/0x30
...

This means filter hash allocated for the fixtures are not correctly
released after the test.

Free those hash lists after tests are done and split the loop for
initialize fixture and register fixture for rollback.

Fixes: dd120af2d5f8 ("ftrace: Add multiple fgraph storage selftest")
Cc: sta...@vger.kernel.org
Signed-off-by: Masami Hiramatsu (Google) 
---
 kernel/trace/trace_selftest.c |   23 ++-
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 97f1e4bc47dc..c4ad7cd7e778 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -942,7 +942,7 @@ static __init int test_graph_storage_multi(void)
 {
struct fgraph_fixture *fixture;
bool printed = false;
-   int i, ret;
+   int i, j, ret;
 
pr_cont("PASSED\n");
pr_info("Testing multiple fgraph storage on a function: ");
@@ -953,22 +953,35 @@ static __init int test_graph_storage_multi(void)
if (ret && ret != -ENODEV) {
pr_cont("*Could not set filter* ");
printed = true;
-   goto out;
+   goto out2;
}
+   }
 
+   for (j = 0; j < ARRAY_SIZE(store_bytes); j++) {
+   fixture = &store_bytes[j];
ret = register_ftrace_graph(&fixture->gops);
if (ret) {
pr_warn("Failed to init store_bytes fgraph tracing\n");
printed = true;
-   goto out;
+   goto out1;
}
}
 
DYN_FTRACE_TEST_NAME();
-out:
+out1:
+   while (--j >= 0) {
+   fixture = &store_bytes[j];
+   unregister_ftrace_graph(&fixture->gops);
+
+   if (fixture->error_str && !printed) {
+   pr_cont("*** %s ***", fixture->error_str);
+   printed = true;
+   }
+   }
+out2:
while (--i >= 0) {
fixture = &store_bytes[i];
-   unregister_ftrace_graph(&fixture->gops);
+   ftrace_free_filter(&fixture->gops.ops);
 
if (fixture->error_str && !printed) {
pr_cont("*** %s ***", fixture->error_str);




[PATCH v13 20/20] fgraph: Skip recording calltime/rettime if it is not nneeded

2024-08-18 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Skip recording calltime and rettime if the fgraph_ops does not need it.
This is a kind of performance optimization for fprobe. Since the fprobe
user does not use these entries, recording timestamp in fgraph is just
a overhead (e.g. eBPF, ftrace). So introduce the skip_timestamp flag,
and all fgraph_ops sets this flag, skip recording calltime and rettime.

Here is the performance results measured by
 tools/testing/selftests/bpf/benchs/run_bench_trigger.sh

Without this:
kprobe-multi   :5.700 ± 0.065M/s
kretprobe-multi:4.239 ± 0.006M/s

With skip-timestamp:
kprobe-multi   :6.265 ± 0.033M/s+9.91%
kretprobe-multi:4.758 ± 0.009M/s+12.24%

Suggested-by: Jiri Olsa 
Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v11:
  - Simplify it to be symmetric on push and pop. (Thus the timestamp
getting place is a bit shifted.)
 Changes in v10:
  - Add likely() to skipping timestamp.
 Changes in v9:
  - Newly added.
---
 include/linux/ftrace.h |2 ++
 kernel/trace/fgraph.c  |   36 +---
 kernel/trace/fprobe.c  |1 +
 3 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 63fb91088a23..bab6fabb3fa1 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1160,6 +1160,8 @@ struct fgraph_ops {
void*private;
trace_func_graph_ent_t  saved_func;
int idx;
+   /* If skip_timestamp is true, this does not record timestamps. */
+   boolskip_timestamp;
 };
 
 void *fgraph_reserve_data(int idx, int size_bytes);
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 6a3e2db16aa4..c116a92839ae 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -174,6 +174,7 @@ int ftrace_graph_active;
 
 static struct fgraph_ops *fgraph_array[FGRAPH_ARRAY_SIZE];
 static unsigned long fgraph_array_bitmask;
+static bool fgraph_skip_timestamp;
 
 /* LRU index table for fgraph_array */
 static int fgraph_lru_table[FGRAPH_ARRAY_SIZE];
@@ -557,7 +558,11 @@ ftrace_push_return_trace(unsigned long ret, unsigned long 
func,
return -EBUSY;
}
 
-   calltime = trace_clock_local();
+   /* This is not really 'likely' but for keeping the least path to be 
faster. */
+   if (likely(fgraph_skip_timestamp))
+   calltime = 0LL;
+   else
+   calltime = trace_clock_local();
 
offset = READ_ONCE(current->curr_ret_stack);
ret_stack = RET_STACK(current, offset);
@@ -728,6 +733,12 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, 
unsigned long *ret,
*ret = ret_stack->ret;
trace->func = ret_stack->func;
trace->calltime = ret_stack->calltime;
+   /* This is not really 'likely' but for keeping the least path to be 
faster. */
+   if (likely(!trace->calltime))
+   trace->rettime = 0LL;
+   else
+   trace->rettime = trace_clock_local();
+
trace->overrun = atomic_read(¤t->trace_overrun);
trace->depth = current->curr_ret_depth;
/*
@@ -788,7 +799,6 @@ __ftrace_return_to_handler(struct ftrace_regs *fregs, 
unsigned long frame_pointe
return (unsigned long)panic;
}
 
-   trace.rettime = trace_clock_local();
if (fregs)
ftrace_regs_set_instruction_pointer(fregs, ret);
 
@@ -1248,6 +1258,24 @@ static void ftrace_graph_disable_direct(bool 
disable_branch)
fgraph_direct_gops = &fgraph_stub;
 }
 
+static void update_fgraph_skip_timestamp(void)
+{
+   int i;
+
+   for (i = 0; i < FGRAPH_ARRAY_SIZE; i++) {
+   struct fgraph_ops *gops = fgraph_array[i];
+
+   if (gops == &fgraph_stub)
+   continue;
+
+   if (!gops->skip_timestamp) {
+   fgraph_skip_timestamp = false;
+   return;
+   }
+   }
+   fgraph_skip_timestamp = true;
+}
+
 int register_ftrace_graph(struct fgraph_ops *gops)
 {
int command = 0;
@@ -1271,6 +1299,7 @@ int register_ftrace_graph(struct fgraph_ops *gops)
gops->idx = i;
 
ftrace_graph_active++;
+   update_fgraph_skip_timestamp();
 
if (ftrace_graph_active == 2)
ftrace_graph_disable_direct(true);
@@ -1303,6 +1332,7 @@ int register_ftrace_graph(struct fgraph_ops *gops)
ftrace_graph_active--;
gops->saved_func = NULL;
fgraph_lru_release_index(i);
+   update_fgraph_skip_timestamp();
}
 out:
mutex_unlock(&ftrace_lock);
@@ -1326,8 +1356,8 @@ void unregister_ftrace_graph(struct fgraph_ops *gops)
goto out;
 
fgraph_array[gops->idx] = &fgraph_stub;
-
ftrace_

[PATCH v13 19/20] Documentation: probes: Update fprobe on function-graph tracer

2024-08-18 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Update fprobe documentation for the new fprobe on function-graph
tracer. This includes some bahvior changes and pt_regs to
ftrace_regs interface change.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v2:
  - Update @fregs parameter explanation.
---
 Documentation/trace/fprobe.rst |   42 ++--
 1 file changed, 27 insertions(+), 15 deletions(-)

diff --git a/Documentation/trace/fprobe.rst b/Documentation/trace/fprobe.rst
index 196f52386aaa..f58bdc64504f 100644
--- a/Documentation/trace/fprobe.rst
+++ b/Documentation/trace/fprobe.rst
@@ -9,9 +9,10 @@ Fprobe - Function entry/exit probe
 Introduction
 
 
-Fprobe is a function entry/exit probe mechanism based on ftrace.
-Instead of using ftrace full feature, if you only want to attach callbacks
-on function entry and exit, similar to the kprobes and kretprobes, you can
+Fprobe is a function entry/exit probe mechanism based on the function-graph
+tracer.
+Instead of tracing all functions, if you want to attach callbacks on specific
+function entry and exit, similar to the kprobes and kretprobes, you can
 use fprobe. Compared with kprobes and kretprobes, fprobe gives faster
 instrumentation for multiple functions with single handler. This document
 describes how to use fprobe.
@@ -91,12 +92,14 @@ The prototype of the entry/exit callback function are as 
follows:
 
 .. code-block:: c
 
- int entry_callback(struct fprobe *fp, unsigned long entry_ip, unsigned long 
ret_ip, struct pt_regs *regs, void *entry_data);
+ int entry_callback(struct fprobe *fp, unsigned long entry_ip, unsigned long 
ret_ip, struct ftrace_regs *fregs, void *entry_data);
 
- void exit_callback(struct fprobe *fp, unsigned long entry_ip, unsigned long 
ret_ip, struct pt_regs *regs, void *entry_data);
+ void exit_callback(struct fprobe *fp, unsigned long entry_ip, unsigned long 
ret_ip, struct ftrace_regs *fregs, void *entry_data);
 
-Note that the @entry_ip is saved at function entry and passed to exit handler.
-If the entry callback function returns !0, the corresponding exit callback 
will be cancelled.
+Note that the @entry_ip is saved at function entry and passed to exit
+handler.
+If the entry callback function returns !0, the corresponding exit callback
+will be cancelled.
 
 @fp
 This is the address of `fprobe` data structure related to this handler.
@@ -112,12 +115,10 @@ If the entry callback function returns !0, the 
corresponding exit callback will
 This is the return address that the traced function will return to,
 somewhere in the caller. This can be used at both entry and exit.
 
-@regs
-This is the `pt_regs` data structure at the entry and exit. Note that
-the instruction pointer of @regs may be different from the @entry_ip
-in the entry_handler. If you need traced instruction pointer, you need
-to use @entry_ip. On the other hand, in the exit_handler, the 
instruction
-pointer of @regs is set to the current return address.
+@fregs
+This is the `ftrace_regs` data structure at the entry and exit. This
+includes the function parameters, or the return values. So user can
+access thos values via appropriate `ftrace_regs_*` APIs.
 
 @entry_data
 This is a local storage to share the data between entry and exit 
handlers.
@@ -125,6 +126,17 @@ If the entry callback function returns !0, the 
corresponding exit callback will
 and `entry_data_size` field when registering the fprobe, the storage is
 allocated and passed to both `entry_handler` and `exit_handler`.
 
+Entry data size and exit handlers on the same function
+==
+
+Since the entry data is passed via per-task stack and it is has limited size,
+the entry data size per probe is limited to `15 * sizeof(long)`. You also need
+to take care that the different fprobes are probing on the same function, this
+limit becomes smaller. The entry data size is aligned to `sizeof(long)` and
+each fprobe which has exit handler uses a `sizeof(long)` space on the stack,
+you should keep the number of fprobes on the same function as small as
+possible.
+
 Share the callbacks with kprobes
 
 
@@ -165,8 +177,8 @@ This counter counts up when;
  - fprobe fails to take ftrace_recursion lock. This usually means that a 
function
which is traced by other ftrace users is called from the entry_handler.
 
- - fprobe fails to setup the function exit because of the shortage of rethook
-   (the shadow stack for hooking the function return.)
+ - fprobe fails to setup the function exit because of failing to allocate the
+   data buffer from the per-task shadow stack.
 
 The `fprobe::nmissed` field counts up in both cases. Therefore, the former
 skips both of entry and exit callback and the latter skips the exit




[PATCH v13 18/20] selftests/ftrace: Add a test case for repeating register/unregister fprobe

2024-08-18 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

This test case repeats define and undefine the fprobe dynamic event to
ensure that the fprobe does not cause any issue with such operations.

Signed-off-by: Masami Hiramatsu (Google) 
---
 .../test.d/dynevent/add_remove_fprobe_repeat.tc|   19 +++
 1 file changed, 19 insertions(+)
 create mode 100644 
tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc

diff --git 
a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc 
b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc
new file mode 100644
index ..b4ad09237e2a
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc
@@ -0,0 +1,19 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Generic dynamic event - Repeating add/remove fprobe events
+# requires: dynamic_events "f[:[/][]] [%return] 
[]":README
+
+echo 0 > events/enable
+echo > dynamic_events
+
+PLACE=$FUNCTION_FORK
+REPEAT_TIMES=64
+
+for i in `seq 1 $REPEAT_TIMES`; do
+  echo "f:myevent $PLACE" >> dynamic_events
+  grep -q myevent dynamic_events
+  test -d events/fprobes/myevent
+  echo > dynamic_events
+done
+
+clear_trace




[PATCH v13 17/20] selftests: ftrace: Remove obsolate maxactive syntax check

2024-08-18 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Since the fprobe event does not support maxactive anymore, stop
testing the maxactive syntax error checking.

Signed-off-by: Masami Hiramatsu (Google) 
---
 .../ftrace/test.d/dynevent/fprobe_syntax_errors.tc |4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git 
a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc 
b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
index 61877d166451..c9425a34fae3 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
@@ -16,9 +16,7 @@ aarch64)
   REG=%r0 ;;
 esac
 
-check_error 'f^100 vfs_read'   # MAXACT_NO_KPROBE
-check_error 'f^1a111 vfs_read' # BAD_MAXACT
-check_error 'f^10 vfs_read'# MAXACT_TOO_BIG
+check_error 'f^100 vfs_read'   # BAD_MAXACT
 
 check_error 'f ^non_exist_func'# BAD_PROBE_ADDR (enoent)
 check_error 'f ^vfs_read+10'   # BAD_PROBE_ADDR




[PATCH v13 16/20] tracing/fprobe: Remove nr_maxactive from fprobe

2024-08-18 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Remove depercated fprobe::nr_maxactive. This involves fprobe events to
rejects the maxactive number.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v2:
  - Newly added.
---
 include/linux/fprobe.h  |2 --
 kernel/trace/trace_fprobe.c |   44 ++-
 2 files changed, 6 insertions(+), 40 deletions(-)

diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h
index 2d06bbd99601..a86b3e4df2a0 100644
--- a/include/linux/fprobe.h
+++ b/include/linux/fprobe.h
@@ -54,7 +54,6 @@ struct fprobe_hlist {
  * @nmissed: The counter for missing events.
  * @flags: The status flag.
  * @entry_data_size: The private data storage size.
- * @nr_maxactive: The max number of active functions. (*deprecated)
  * @entry_handler: The callback function for function entry.
  * @exit_handler: The callback function for function exit.
  * @hlist_array: The fprobe_hlist for fprobe search from IP hash table.
@@ -63,7 +62,6 @@ struct fprobe {
unsigned long   nmissed;
unsigned intflags;
size_t  entry_data_size;
-   int nr_maxactive;
 
fprobe_entry_cb entry_handler;
fprobe_exit_cb  exit_handler;
diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
index 86cd6a8c806a..20ef5cd5d419 100644
--- a/kernel/trace/trace_fprobe.c
+++ b/kernel/trace/trace_fprobe.c
@@ -422,7 +422,6 @@ static struct trace_fprobe *alloc_trace_fprobe(const char 
*group,
   const char *event,
   const char *symbol,
   struct tracepoint *tpoint,
-  int maxactive,
   int nargs, bool is_return)
 {
struct trace_fprobe *tf;
@@ -442,7 +441,6 @@ static struct trace_fprobe *alloc_trace_fprobe(const char 
*group,
tf->fp.entry_handler = fentry_dispatcher;
 
tf->tpoint = tpoint;
-   tf->fp.nr_maxactive = maxactive;
 
ret = trace_probe_init(&tf->tp, event, group, false, nargs);
if (ret < 0)
@@ -1021,12 +1019,11 @@ static int __trace_fprobe_create(int argc, const char 
*argv[])
 *  FETCHARG:TYPE : use TYPE instead of unsigned long.
 */
struct trace_fprobe *tf = NULL;
-   int i, len, new_argc = 0, ret = 0;
+   int i, new_argc = 0, ret = 0;
bool is_return = false;
char *symbol = NULL;
const char *event = NULL, *group = FPROBE_EVENT_SYSTEM;
const char **new_argv = NULL;
-   int maxactive = 0;
char buf[MAX_EVENT_NAME_LEN];
char gbuf[MAX_EVENT_NAME_LEN];
char sbuf[KSYM_NAME_LEN];
@@ -1048,33 +1045,13 @@ static int __trace_fprobe_create(int argc, const char 
*argv[])
 
trace_probe_log_init("trace_fprobe", argc, argv);
 
-   event = strchr(&argv[0][1], ':');
-   if (event)
-   event++;
-
-   if (isdigit(argv[0][1])) {
-   if (event)
-   len = event - &argv[0][1] - 1;
-   else
-   len = strlen(&argv[0][1]);
-   if (len > MAX_EVENT_NAME_LEN - 1) {
-   trace_probe_log_err(1, BAD_MAXACT);
-   goto parse_error;
-   }
-   memcpy(buf, &argv[0][1], len);
-   buf[len] = '\0';
-   ret = kstrtouint(buf, 0, &maxactive);
-   if (ret || !maxactive) {
+   if (argv[0][1] != '\0') {
+   if (argv[0][1] != ':') {
+   trace_probe_log_set_index(0);
trace_probe_log_err(1, BAD_MAXACT);
goto parse_error;
}
-   /* fprobe rethook instances are iterated over via a list. The
-* maximum should stay reasonable.
-*/
-   if (maxactive > RETHOOK_MAXACTIVE_MAX) {
-   trace_probe_log_err(1, MAXACT_TOO_BIG);
-   goto parse_error;
-   }
+   event = &argv[0][2];
}
 
trace_probe_log_set_index(1);
@@ -1084,12 +1061,6 @@ static int __trace_fprobe_create(int argc, const char 
*argv[])
if (ret < 0)
goto parse_error;
 
-   if (!is_return && maxactive) {
-   trace_probe_log_set_index(0);
-   trace_probe_log_err(1, BAD_MAXACT_TYPE);
-   goto parse_error;
-   }
-
trace_probe_log_set_index(0);
if (event) {
ret = traceprobe_parse_event_name(&event, &group, gbuf,
@@ -1147,8 +1118,7 @@ static int __trace_fprobe_create(int argc, const char 
*argv[])
goto out;
 
/* setup a probe */
-   tf = al

[PATCH v13 15/20] tracing: Fix function timing profiler to initialize hashtable

2024-08-18 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Since the new fgraph requires to initialize fgraph_ops.ops.func_hash before
calling register_ftrace_graph(), initialize it with default (tracing all
functions) parameter.

Signed-off-by: Masami Hiramatsu (Google) 
---
 kernel/trace/ftrace.c |4 
 1 file changed, 4 insertions(+)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index fd6c5a50c5e5..c55cf21fd53c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -885,6 +885,10 @@ static void profile_graph_return(struct ftrace_graph_ret 
*trace,
 }
 
 static struct fgraph_ops fprofiler_ops = {
+   .ops = {
+   .flags = FTRACE_OPS_FL_INITIALIZED,
+   INIT_OPS_HASH(fprofiler_ops.ops)
+   },
.entryfunc = &profile_graph_entry,
.retfunc = &profile_graph_return,
 };




[PATCH v13 14/20] fprobe: Rewrite fprobe on function-graph tracer

2024-08-18 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Rewrite fprobe implementation on function-graph tracer.
Major API changes are:
 -  'nr_maxactive' field is deprecated.
 -  This depends on CONFIG_DYNAMIC_FTRACE_WITH_ARGS or
!CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS, and
CONFIG_HAVE_FUNCTION_GRAPH_FREGS. So currently works only
on x86_64.
 -  Currently the entry size is limited in 15 * sizeof(long).
 -  If there is too many fprobe exit handler set on the same
function, it will fail to probe.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v12:
  - Skip updating ftrace hash if not required.
 Changes in v9:
  - Remove unneeded prototype of ftrace_regs_get_return_address().
  - Fix entry data address calculation.
  - Remove DIV_ROUND_UP() from hotpath.
 Changes in v8:
  - Use trace_func_graph_ret/ent_t for fgraph_ops.
  - Update CONFIG_FPROBE dependencies.
  - Add ftrace_regs_get_return_address() for each arch.
 Changes in v3:
  - Update for new reserve_data/retrieve_data API.
  - Fix internal push/pop on fgraph data logic so that it can
correctly save/restore the returning fprobes.
 Changes in v2:
  - Add more lockdep_assert_held(fprobe_mutex)
  - Use READ_ONCE() and WRITE_ONCE() for fprobe_hlist_node::fp.
  - Add NOKPROBE_SYMBOL() for the functions which is called from
entry/exit callback.
---
 arch/arm64/include/asm/ftrace.h |6 
 arch/loongarch/include/asm/ftrace.h |6 
 arch/powerpc/include/asm/ftrace.h   |6 
 arch/s390/include/asm/ftrace.h  |6 
 arch/x86/include/asm/ftrace.h   |6 
 include/linux/fprobe.h  |   53 ++-
 kernel/trace/Kconfig|8 
 kernel/trace/fprobe.c   |  639 +--
 lib/test_fprobe.c   |   45 --
 9 files changed, 530 insertions(+), 245 deletions(-)

diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index 14ecb9a418d9..27e32f323048 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -132,6 +132,12 @@ ftrace_regs_get_frame_pointer(const struct ftrace_regs 
*fregs)
return fregs->fp;
 }
 
+static __always_inline unsigned long
+ftrace_regs_get_return_address(const struct ftrace_regs *fregs)
+{
+   return fregs->lr;
+}
+
 static __always_inline struct pt_regs *
 ftrace_partial_regs(const struct ftrace_regs *fregs, struct pt_regs *regs)
 {
diff --git a/arch/loongarch/include/asm/ftrace.h 
b/arch/loongarch/include/asm/ftrace.h
index 1a73f35ea9af..c021aa3194f3 100644
--- a/arch/loongarch/include/asm/ftrace.h
+++ b/arch/loongarch/include/asm/ftrace.h
@@ -80,6 +80,12 @@ ftrace_regs_set_instruction_pointer(struct ftrace_regs 
*fregs, unsigned long ip)
 #define ftrace_regs_get_frame_pointer(fregs) \
((fregs)->regs.regs[22])
 
+static __always_inline unsigned long
+ftrace_regs_get_return_address(struct ftrace_regs *fregs)
+{
+   return *(unsigned long *)(fregs->regs.regs[1]);
+}
+
 #define ftrace_graph_func ftrace_graph_func
 void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
   struct ftrace_ops *op, struct ftrace_regs *fregs);
diff --git a/arch/powerpc/include/asm/ftrace.h 
b/arch/powerpc/include/asm/ftrace.h
index e6ff6834bf7e..2a2d070dd23c 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -75,6 +75,12 @@ ftrace_regs_get_instruction_pointer(struct ftrace_regs 
*fregs)
 #define ftrace_regs_query_register_offset(name) \
regs_query_register_offset(name)
 
+static __always_inline unsigned long
+ftrace_regs_get_return_address(struct ftrace_regs *fregs)
+{
+   return fregs->regs.link;
+}
+
 struct ftrace_ops;
 
 #define ftrace_graph_func ftrace_graph_func
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index 0d9f6df21f81..7b80ff4d3386 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -84,6 +84,12 @@ ftrace_regs_get_frame_pointer(struct ftrace_regs *fregs)
return sp[0];   /* return backchain */
 }
 
+static __always_inline unsigned long
+ftrace_regs_get_return_address(const struct ftrace_regs *fregs)
+{
+   return fregs->regs.gprs[14];
+}
+
 #define arch_ftrace_fill_perf_regs(fregs, _regs)do {   \
(_regs)->psw.addr = (fregs)->regs.psw.addr; \
(_regs)->gprs[15] = (fregs)->regs.gprs[15]; \
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 1f4d1f7b19ed..8472ba394091 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -74,6 +74,12 @@ arch_ftrace_get_regs(struct ftrace_regs *fregs)
 #define ftrace_regs_get_frame_pointer(fregs) \
frame_pointer(&(fregs)->regs)
 
+static __always_inline unsigned long
+ftrace_regs_get_return_address(struct ftrace_regs *fregs)
+{
+   return *(unsigned long *)ftrace_regs_get_stack_pointer(fregs);
+}
+
 struct ftrace_op

[PATCH v13 13/20] ftrace: Add CONFIG_HAVE_FTRACE_GRAPH_FUNC

2024-08-18 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Add CONFIG_HAVE_FTRACE_GRAPH_FUNC kconfig in addition to ftrace_graph_func
macro check. This is for the other feature (e.g. FPROBE) which requires to
access ftrace_regs from fgraph_ops::entryfunc() can avoid compiling if
the fgraph can not pass the valid ftrace_regs.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v8:
  - Newly added.
---
 arch/arm64/Kconfig |1 +
 arch/loongarch/Kconfig |1 +
 arch/powerpc/Kconfig   |1 +
 arch/riscv/Kconfig |1 +
 arch/x86/Kconfig   |1 +
 kernel/trace/Kconfig   |5 +
 6 files changed, 10 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 17947f625b06..53eb9f36842d 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -208,6 +208,7 @@ config ARM64
select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_GUP_FAST
+   select HAVE_FTRACE_GRAPH_FUNC
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_ERROR_INJECTION
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 73cb657496c8..9f7adca388ec 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -129,6 +129,7 @@ config LOONGARCH
select HAVE_EFFICIENT_UNALIGNED_ACCESS if !ARCH_STRICT_ALIGN
select HAVE_EXIT_THREAD
select HAVE_GUP_FAST
+   select HAVE_FTRACE_GRAPH_FUNC
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_ARG_ACCESS_API
select HAVE_FUNCTION_ERROR_INJECTION
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index d7b09b064a8a..aa2669f5b314 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -238,6 +238,7 @@ config PPC
select HAVE_EBPF_JIT
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_GUP_FAST
+   select HAVE_FTRACE_GRAPH_FUNC
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_ARG_ACCESS_API
select HAVE_FUNCTION_DESCRIPTORSif PPC64_ELF_ABI_V1
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 6e8422269ba4..8f05e9fb7803 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -138,6 +138,7 @@ config RISCV
select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && 
(CLANG_SUPPORTS_DYNAMIC_FTRACE || GCC_SUPPORTS_DYNAMIC_FTRACE)
select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
select HAVE_DYNAMIC_FTRACE_WITH_ARGS if HAVE_DYNAMIC_FTRACE
+   select HAVE_FTRACE_GRAPH_FUNC
select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_GRAPH_FREGS
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 59788d8b220e..02863509ebd1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -228,6 +228,7 @@ config X86
select HAVE_EXIT_THREAD
select HAVE_GUP_FAST
select HAVE_FENTRY  if X86_64 || DYNAMIC_FTRACE
+   select HAVE_FTRACE_GRAPH_FUNC   if HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_GRAPH_FREGSif HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_GRAPH_TRACER   if X86_32 || (X86_64 && 
DYNAMIC_FTRACE)
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 0fc4c3129c19..c8dfd3a233c6 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -34,6 +34,11 @@ config HAVE_FUNCTION_GRAPH_TRACER
 config HAVE_FUNCTION_GRAPH_FREGS
bool
 
+config HAVE_FTRACE_GRAPH_FUNC
+   bool
+   help
+ True if ftrace_graph_func() is defined.
+
 config HAVE_DYNAMIC_FTRACE
bool
help




[PATCH v13 12/20] bpf: Enable kprobe_multi feature if CONFIG_FPROBE is enabled

2024-08-18 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Enable kprobe_multi feature if CONFIG_FPROBE is enabled. The pt_regs is
converted from ftrace_regs by ftrace_partial_regs(), thus some registers
may always returns 0. But it should be enough for function entry (access
arguments) and exit (access return value).

Signed-off-by: Masami Hiramatsu (Google) 
Acked-by: Florent Revest 
---
 Changes in v9:
  - Avoid wasting memory for bpf_kprobe_multi_pt_regs when
CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST=y
---
 kernel/trace/bpf_trace.c |   27 ++-
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index cdba9981b048..deb629f4a510 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2605,7 +2605,7 @@ struct bpf_session_run_ctx {
void *data;
 };
 
-#if defined(CONFIG_FPROBE) && defined(CONFIG_DYNAMIC_FTRACE_WITH_REGS)
+#ifdef CONFIG_FPROBE
 struct bpf_kprobe_multi_link {
struct bpf_link link;
struct fprobe fp;
@@ -2628,6 +2628,13 @@ struct user_syms {
char *buf;
 };
 
+#ifndef CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST
+static DEFINE_PER_CPU(struct pt_regs, bpf_kprobe_multi_pt_regs);
+#define bpf_kprobe_multi_pt_regs_ptr() this_cpu_ptr(&bpf_kprobe_multi_pt_regs)
+#else
+#define bpf_kprobe_multi_pt_regs_ptr() (NULL)
+#endif
+
 static int copy_user_syms(struct user_syms *us, unsigned long __user *usyms, 
u32 cnt)
 {
unsigned long __user usymbol;
@@ -2822,7 +2829,7 @@ static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx 
*ctx)
 
 static int
 kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
-  unsigned long entry_ip, struct pt_regs *regs,
+  unsigned long entry_ip, struct ftrace_regs *fregs,
   bool is_return, void *data)
 {
struct bpf_kprobe_multi_run_ctx run_ctx = {
@@ -2834,6 +2841,7 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link 
*link,
.entry_ip = entry_ip,
};
struct bpf_run_ctx *old_run_ctx;
+   struct pt_regs *regs;
int err;
 
if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
@@ -2844,6 +2852,7 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link 
*link,
 
migrate_disable();
rcu_read_lock();
+   regs = ftrace_partial_regs(fregs, bpf_kprobe_multi_pt_regs_ptr());
old_run_ctx = bpf_set_run_ctx(&run_ctx.session_ctx.run_ctx);
err = bpf_prog_run(link->link.prog, regs);
bpf_reset_run_ctx(old_run_ctx);
@@ -2860,15 +2869,11 @@ kprobe_multi_link_handler(struct fprobe *fp, unsigned 
long fentry_ip,
  unsigned long ret_ip, struct ftrace_regs *fregs,
  void *data)
 {
-   struct pt_regs *regs = ftrace_get_regs(fregs);
struct bpf_kprobe_multi_link *link;
int err;
 
-   if (!regs)
-   return 0;
-
link = container_of(fp, struct bpf_kprobe_multi_link, fp);
-   err = kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), regs, 
false, data);
+   err = kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), fregs, 
false, data);
return is_kprobe_session(link->link.prog) ? err : 0;
 }
 
@@ -2878,13 +2883,9 @@ kprobe_multi_link_exit_handler(struct fprobe *fp, 
unsigned long fentry_ip,
   void *data)
 {
struct bpf_kprobe_multi_link *link;
-   struct pt_regs *regs = ftrace_get_regs(fregs);
-
-   if (!regs)
-   return;
 
link = container_of(fp, struct bpf_kprobe_multi_link, fp);
-   kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), regs, true, 
data);
+   kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), fregs, true, 
data);
 }
 
 static int symbols_cmp_r(const void *a, const void *b, const void *priv)
@@ -3145,7 +3146,7 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr 
*attr, struct bpf_prog *pr
kvfree(cookies);
return err;
 }
-#else /* !CONFIG_FPROBE || !CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+#else /* !CONFIG_FPROBE */
 int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog 
*prog)
 {
return -EOPNOTSUPP;




[PATCH v13 11/20] tracing/fprobe: Enable fprobe events with CONFIG_DYNAMIC_FTRACE_WITH_ARGS

2024-08-18 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Allow fprobe events to be enabled with CONFIG_DYNAMIC_FTRACE_WITH_ARGS.
With this change, fprobe events mostly use ftrace_regs instead of pt_regs.
Note that if the arch doesn't enable HAVE_PT_REGS_COMPAT_FTRACE_REGS,
fprobe events will not be able to be used from perf.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v9:
  - Copy store_trace_entry_data() as store_fprobe_entry_data() for
fprobe.
 Chagnes in v3:
  - Use ftrace_regs_get_return_value().
 Changes in v2:
  - Define ftrace_regs_get_kernel_stack_nth() for
!CONFIG_HAVE_REGS_AND_STACK_ACCESS_API.
 Changes from previous series: Update against the new series.
---
 include/linux/ftrace.h  |   17 ++
 kernel/trace/Kconfig|1 
 kernel/trace/trace_fprobe.c |  107 +--
 kernel/trace/trace_probe_tmpl.h |2 -
 4 files changed, 86 insertions(+), 41 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 3edf2427ae73..63fb91088a23 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -255,6 +255,23 @@ static __always_inline bool ftrace_regs_has_args(struct 
ftrace_regs *fregs)
frame_pointer(&(fregs)->regs)
 #endif
 
+#ifdef CONFIG_HAVE_REGS_AND_STACK_ACCESS_API
+static __always_inline unsigned long
+ftrace_regs_get_kernel_stack_nth(struct ftrace_regs *fregs, unsigned int nth)
+{
+   unsigned long *stackp;
+
+   stackp = (unsigned long *)ftrace_regs_get_stack_pointer(fregs);
+   if (((unsigned long)(stackp + nth) & ~(THREAD_SIZE - 1)) ==
+   ((unsigned long)stackp & ~(THREAD_SIZE - 1)))
+   return *(stackp + nth);
+
+   return 0;
+}
+#else /* !CONFIG_HAVE_REGS_AND_STACK_ACCESS_API */
+#define ftrace_regs_get_kernel_stack_nth(fregs, nth)   (0L)
+#endif /* CONFIG_HAVE_REGS_AND_STACK_ACCESS_API */
+
 typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip,
  struct ftrace_ops *op, struct ftrace_regs *fregs);
 
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 2e2b39699542..0fc4c3129c19 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -680,7 +680,6 @@ config FPROBE_EVENTS
select TRACING
select PROBE_EVENTS
select DYNAMIC_EVENTS
-   depends on DYNAMIC_FTRACE_WITH_REGS
default y
help
  This allows user to add tracing events on the function entry and
diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
index 273cdf3cf70c..86cd6a8c806a 100644
--- a/kernel/trace/trace_fprobe.c
+++ b/kernel/trace/trace_fprobe.c
@@ -133,7 +133,7 @@ static int
 process_fetch_insn(struct fetch_insn *code, void *rec, void *edata,
   void *dest, void *base)
 {
-   struct pt_regs *regs = rec;
+   struct ftrace_regs *fregs = rec;
unsigned long val;
int ret;
 
@@ -141,17 +141,17 @@ process_fetch_insn(struct fetch_insn *code, void *rec, 
void *edata,
/* 1st stage: get value from context */
switch (code->op) {
case FETCH_OP_STACK:
-   val = regs_get_kernel_stack_nth(regs, code->param);
+   val = ftrace_regs_get_kernel_stack_nth(fregs, code->param);
break;
case FETCH_OP_STACKP:
-   val = kernel_stack_pointer(regs);
+   val = ftrace_regs_get_stack_pointer(fregs);
break;
case FETCH_OP_RETVAL:
-   val = regs_return_value(regs);
+   val = ftrace_regs_get_return_value(fregs);
break;
 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
case FETCH_OP_ARG:
-   val = regs_get_kernel_argument(regs, code->param);
+   val = ftrace_regs_get_argument(fregs, code->param);
break;
case FETCH_OP_EDATA:
val = *(unsigned long *)((unsigned long)edata + code->offset);
@@ -174,7 +174,7 @@ NOKPROBE_SYMBOL(process_fetch_insn)
 /* function entry handler */
 static nokprobe_inline void
 __fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
-   struct pt_regs *regs,
+   struct ftrace_regs *fregs,
struct trace_event_file *trace_file)
 {
struct fentry_trace_entry_head *entry;
@@ -188,41 +188,71 @@ __fentry_trace_func(struct trace_fprobe *tf, unsigned 
long entry_ip,
if (trace_trigger_soft_disabled(trace_file))
return;
 
-   dsize = __get_data_size(&tf->tp, regs, NULL);
+   dsize = __get_data_size(&tf->tp, fregs, NULL);
 
entry = trace_event_buffer_reserve(&fbuffer, trace_file,
   sizeof(*entry) + tf->tp.size + 
dsize);
if (!entry)
return;
 
-   fbuffer.regs = regs;
+   fbuffer.regs = ftrace_get_regs(fregs);
entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
en

[PATCH v13 10/20] tracing: Add ftrace_fill_perf_regs() for perf event

2024-08-18 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Add ftrace_fill_perf_regs() which should be compatible with the
perf_fetch_caller_regs(). In other words, the pt_regs returned from the
ftrace_fill_perf_regs() must satisfy 'user_mode(regs) == false' and can be
used for stack tracing.

Signed-off-by: Masami Hiramatsu (Google) 
---
  Changes from previous series: NOTHING, just forward ported.
---
 arch/arm64/include/asm/ftrace.h   |7 +++
 arch/powerpc/include/asm/ftrace.h |7 +++
 arch/s390/include/asm/ftrace.h|5 +
 arch/x86/include/asm/ftrace.h |7 +++
 include/linux/ftrace.h|   31 +++
 5 files changed, 57 insertions(+)

diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index 5cd587afab6d..14ecb9a418d9 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -143,6 +143,13 @@ ftrace_partial_regs(const struct ftrace_regs *fregs, 
struct pt_regs *regs)
return regs;
 }
 
+#define arch_ftrace_fill_perf_regs(fregs, _regs) do {  \
+   (_regs)->pc = (fregs)->pc;  \
+   (_regs)->regs[29] = (fregs)->fp;\
+   (_regs)->sp = (fregs)->sp;  \
+   (_regs)->pstate = PSR_MODE_EL1h;\
+   } while (0)
+
 int ftrace_regs_query_register_offset(const char *name);
 
 int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
diff --git a/arch/powerpc/include/asm/ftrace.h 
b/arch/powerpc/include/asm/ftrace.h
index 23d26f3afae4..e6ff6834bf7e 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -42,6 +42,13 @@ static __always_inline struct pt_regs 
*arch_ftrace_get_regs(struct ftrace_regs *
return fregs->regs.msr ? &fregs->regs : NULL;
 }
 
+#define arch_ftrace_fill_perf_regs(fregs, _regs) do {  \
+   (_regs)->result = 0;\
+   (_regs)->nip = (fregs)->regs.nip;   \
+   (_regs)->gpr[1] = (fregs)->regs.gpr[1]; \
+   asm volatile("mfmsr %0" : "=r" ((_regs)->msr)); \
+   } while (0)
+
 static __always_inline void
 ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs,
unsigned long ip)
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index 9cdd48a46bf7..0d9f6df21f81 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -84,6 +84,11 @@ ftrace_regs_get_frame_pointer(struct ftrace_regs *fregs)
return sp[0];   /* return backchain */
 }
 
+#define arch_ftrace_fill_perf_regs(fregs, _regs)do {   \
+   (_regs)->psw.addr = (fregs)->regs.psw.addr; \
+   (_regs)->gprs[15] = (fregs)->regs.gprs[15]; \
+   } while (0)
+
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
 /*
  * When an ftrace registered caller is tracing a function that is
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 669771ef3b5b..1f4d1f7b19ed 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -46,6 +46,13 @@ arch_ftrace_get_regs(struct ftrace_regs *fregs)
return &fregs->regs;
 }
 
+#define arch_ftrace_fill_perf_regs(fregs, _regs) do {  \
+   (_regs)->ip = (fregs)->regs.ip; \
+   (_regs)->sp = (fregs)->regs.sp; \
+   (_regs)->cs = __KERNEL_CS;  \
+   (_regs)->flags = 0; \
+   } while (0)
+
 #define ftrace_regs_set_instruction_pointer(fregs, _ip)\
do { (fregs)->regs.ip = (_ip); } while (0)
 
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index bd9a26bdf660..3edf2427ae73 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -193,6 +193,37 @@ ftrace_partial_regs(struct ftrace_regs *fregs, struct 
pt_regs *regs)
 
 #endif /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS || 
CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST */
 
+#ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
+
+/*
+ * Please define arch dependent pt_regs which compatible to the
+ * perf_arch_fetch_caller_regs() but based on ftrace_regs.
+ * This requires
+ *   - user_mode(_regs) returns false (always kernel mode).
+ *   - able to use the _regs for stack trace.
+ */
+#ifndef arch_ftrace_fill_perf_regs
+/* As same as perf_arch_fetch_caller_regs(), do nothing by default */
+#define arch_ftrace_fill_perf_regs(fregs, _regs) do {} while (0)
+#endif
+
+static __always_inline struct pt_regs *
+ftrace_fill_perf_regs(struct ftrace_regs *fregs, struct pt_regs *regs)
+{
+   arch_ftrace_fill_perf_regs(fregs, regs);
+   return regs;
+}
+
+#else /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */
+
+static __always_inline struct pt_regs

[PATCH v13 09/20] tracing: Add ftrace_partial_regs() for converting ftrace_regs to pt_regs

2024-08-18 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Add ftrace_partial_regs() which converts the ftrace_regs to pt_regs.
This is for the eBPF which needs this to keep the same pt_regs interface
to access registers.
Thus when replacing the pt_regs with ftrace_regs in fprobes (which is
used by kprobe_multi eBPF event), this will be used.

If the architecture defines its own ftrace_regs, this copies partial
registers to pt_regs and returns it. If not, ftrace_regs is the same as
pt_regs and ftrace_partial_regs() will return ftrace_regs::regs.

Signed-off-by: Masami Hiramatsu (Google) 
Acked-by: Florent Revest 
---
 Changes in v8:
  - Add the reason why this required in changelog.
 Changes from previous series: NOTHING, just forward ported.
---
 arch/arm64/include/asm/ftrace.h |   11 +++
 include/linux/ftrace.h  |   17 +
 2 files changed, 28 insertions(+)

diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index dffaab3dd1f1..5cd587afab6d 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -132,6 +132,17 @@ ftrace_regs_get_frame_pointer(const struct ftrace_regs 
*fregs)
return fregs->fp;
 }
 
+static __always_inline struct pt_regs *
+ftrace_partial_regs(const struct ftrace_regs *fregs, struct pt_regs *regs)
+{
+   memcpy(regs->regs, fregs->regs, sizeof(u64) * 9);
+   regs->sp = fregs->sp;
+   regs->pc = fregs->pc;
+   regs->regs[29] = fregs->fp;
+   regs->regs[30] = fregs->lr;
+   return regs;
+}
+
 int ftrace_regs_query_register_offset(const char *name);
 
 int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index a1b2ef492c7f..bd9a26bdf660 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -176,6 +176,23 @@ static __always_inline struct pt_regs 
*ftrace_get_regs(struct ftrace_regs *fregs
return arch_ftrace_get_regs(fregs);
 }
 
+#if !defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS) || \
+   defined(CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST)
+
+static __always_inline struct pt_regs *
+ftrace_partial_regs(struct ftrace_regs *fregs, struct pt_regs *regs)
+{
+   /*
+* If CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST=y, ftrace_regs memory
+* layout is the same as pt_regs. So always returns that address.
+* Since arch_ftrace_get_regs() will check some members and may return
+* NULL, we can not use it.
+*/
+   return &fregs->regs;
+}
+
+#endif /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS || 
CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST */
+
 /*
  * When true, the ftrace_regs_{get,set}_*() functions may be used on fregs.
  * Note: this can be true even when ftrace_get_regs() cannot provide a pt_regs.




[PATCH v13 08/20] fprobe: Use ftrace_regs in fprobe exit handler

2024-08-18 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Change the fprobe exit handler to use ftrace_regs structure instead of
pt_regs. This also introduce HAVE_PT_REGS_TO_FTRACE_REGS_CAST which means
the ftrace_regs's memory layout is equal to the pt_regs so that those are
able to cast. Fprobe introduces a new dependency with that.

Signed-off-by: Masami Hiramatsu (Google) 
---
  Changes in v3:
   - Use ftrace_regs_get_return_value()
  Changes from previous series: NOTHING, just forward ported.
---
 arch/loongarch/Kconfig  |1 +
 arch/s390/Kconfig   |1 +
 arch/x86/Kconfig|1 +
 include/linux/fprobe.h  |2 +-
 include/linux/ftrace.h  |6 ++
 kernel/trace/Kconfig|8 
 kernel/trace/bpf_trace.c|6 +-
 kernel/trace/fprobe.c   |3 ++-
 kernel/trace/trace_fprobe.c |6 +-
 lib/test_fprobe.c   |6 +++---
 samples/fprobe/fprobe_example.c |2 +-
 11 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 974f08f65f63..73cb657496c8 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -122,6 +122,7 @@ config LOONGARCH
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_ARGS
+   select HAVE_PT_REGS_TO_FTRACE_REGS_CAST
select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_EBPF_JIT
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 12e942cfbcde..eebc299f424b 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -175,6 +175,7 @@ config S390
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_ARGS
+   select HAVE_PT_REGS_TO_FTRACE_REGS_CAST
select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_EBPF_JIT if HAVE_MARCH_Z196_FEATURES
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 047384e4d93a..59788d8b220e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -218,6 +218,7 @@ config X86
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_DYNAMIC_FTRACE_WITH_ARGSif X86_64
+   select HAVE_PT_REGS_TO_FTRACE_REGS_CAST if X86_64
select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
select HAVE_SAMPLE_FTRACE_DIRECTif X86_64
select HAVE_SAMPLE_FTRACE_DIRECT_MULTI  if X86_64
diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h
index ca64ee5e45d2..ef609bcca0f9 100644
--- a/include/linux/fprobe.h
+++ b/include/linux/fprobe.h
@@ -14,7 +14,7 @@ typedef int (*fprobe_entry_cb)(struct fprobe *fp, unsigned 
long entry_ip,
   void *entry_data);
 
 typedef void (*fprobe_exit_cb)(struct fprobe *fp, unsigned long entry_ip,
-  unsigned long ret_ip, struct pt_regs *regs,
+  unsigned long ret_ip, struct ftrace_regs *regs,
   void *entry_data);
 
 /**
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index e7c41d9988e1..a1b2ef492c7f 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -162,6 +162,12 @@ struct ftrace_regs {
 #define ftrace_regs_set_instruction_pointer(fregs, ip) do { } while (0)
 #endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */
 
+#ifdef CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST
+
+static_assert(sizeof(struct pt_regs) == sizeof(struct ftrace_regs));
+
+#endif /* CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST */
+
 static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs 
*fregs)
 {
if (!fregs)
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 0c6a03554c13..2e2b39699542 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -57,6 +57,13 @@ config HAVE_DYNAMIC_FTRACE_WITH_ARGS
 This allows for use of ftrace_regs_get_argument() and
 ftrace_regs_get_stack_pointer().
 
+config HAVE_PT_REGS_TO_FTRACE_REGS_CAST
+   bool
+   help
+If this is set, the memory layout of the ftrace_regs data structure
+is the same as the pt_regs. So the pt_regs is possible to be casted
+to ftrace_regs.
+
 config HAVE_DYNAMIC_FTRACE_NO_PATCHABLE
bool
help
@@ -288,6 +295,7 @@ config FPROBE
bool "Kernel Function Probe (fprobe)"
depends on FUNCTION_TRACER
depends on DYNAMIC_FTRACE_WITH_REGS || DYNAMIC_FTRACE_WITH_ARGS
+   depends on HAVE_PT_REGS_TO_FTRACE_REGS_CAST || 
!HAVE_DYNAMIC_FTRACE_WITH_ARGS
depends on HAVE_RETHOOK
select RETHOOK
default n
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 947808a002d0..cdba9981b048 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2874,10 +2874,14 @@ kprobe_multi_link_handler(struct fprobe *fp, unsigned 
long fentry_ip,

[PATCH v13 07/20] fprobe: Use ftrace_regs in fprobe entry handler

2024-08-18 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

This allows fprobes to be available with CONFIG_DYNAMIC_FTRACE_WITH_ARGS
instead of CONFIG_DYNAMIC_FTRACE_WITH_REGS, then we can enable fprobe
on arm64.

Signed-off-by: Masami Hiramatsu (Google) 
Acked-by: Florent Revest 
---
 Changes in v6:
  - Keep using SAVE_REGS flag to avoid breaking bpf kprobe-multi test.
---
 include/linux/fprobe.h  |2 +-
 kernel/trace/Kconfig|3 ++-
 kernel/trace/bpf_trace.c|   10 +++---
 kernel/trace/fprobe.c   |3 ++-
 kernel/trace/trace_fprobe.c |6 +-
 lib/test_fprobe.c   |4 ++--
 samples/fprobe/fprobe_example.c |2 +-
 7 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h
index f39869588117..ca64ee5e45d2 100644
--- a/include/linux/fprobe.h
+++ b/include/linux/fprobe.h
@@ -10,7 +10,7 @@
 struct fprobe;
 
 typedef int (*fprobe_entry_cb)(struct fprobe *fp, unsigned long entry_ip,
-  unsigned long ret_ip, struct pt_regs *regs,
+  unsigned long ret_ip, struct ftrace_regs *regs,
   void *entry_data);
 
 typedef void (*fprobe_exit_cb)(struct fprobe *fp, unsigned long entry_ip,
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index ab277eff80dc..0c6a03554c13 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -287,7 +287,7 @@ config DYNAMIC_FTRACE_WITH_ARGS
 config FPROBE
bool "Kernel Function Probe (fprobe)"
depends on FUNCTION_TRACER
-   depends on DYNAMIC_FTRACE_WITH_REGS
+   depends on DYNAMIC_FTRACE_WITH_REGS || DYNAMIC_FTRACE_WITH_ARGS
depends on HAVE_RETHOOK
select RETHOOK
default n
@@ -672,6 +672,7 @@ config FPROBE_EVENTS
select TRACING
select PROBE_EVENTS
select DYNAMIC_EVENTS
+   depends on DYNAMIC_FTRACE_WITH_REGS
default y
help
  This allows user to add tracing events on the function entry and
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index cd098846e251..947808a002d0 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2605,7 +2605,7 @@ struct bpf_session_run_ctx {
void *data;
 };
 
-#ifdef CONFIG_FPROBE
+#if defined(CONFIG_FPROBE) && defined(CONFIG_DYNAMIC_FTRACE_WITH_REGS)
 struct bpf_kprobe_multi_link {
struct bpf_link link;
struct fprobe fp;
@@ -2857,12 +2857,16 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link 
*link,
 
 static int
 kprobe_multi_link_handler(struct fprobe *fp, unsigned long fentry_ip,
- unsigned long ret_ip, struct pt_regs *regs,
+ unsigned long ret_ip, struct ftrace_regs *fregs,
  void *data)
 {
+   struct pt_regs *regs = ftrace_get_regs(fregs);
struct bpf_kprobe_multi_link *link;
int err;
 
+   if (!regs)
+   return 0;
+
link = container_of(fp, struct bpf_kprobe_multi_link, fp);
err = kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), regs, 
false, data);
return is_kprobe_session(link->link.prog) ? err : 0;
@@ -3137,7 +3141,7 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr 
*attr, struct bpf_prog *pr
kvfree(cookies);
return err;
 }
-#else /* !CONFIG_FPROBE */
+#else /* !CONFIG_FPROBE || !CONFIG_DYNAMIC_FTRACE_WITH_REGS */
 int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog 
*prog)
 {
return -EOPNOTSUPP;
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 9ff018245840..3d3789283873 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -46,7 +46,7 @@ static inline void __fprobe_handler(unsigned long ip, 
unsigned long parent_ip,
}
 
if (fp->entry_handler)
-   ret = fp->entry_handler(fp, ip, parent_ip, 
ftrace_get_regs(fregs), entry_data);
+   ret = fp->entry_handler(fp, ip, parent_ip, fregs, entry_data);
 
/* If entry_handler returns !0, nmissed is not counted. */
if (rh) {
@@ -182,6 +182,7 @@ static void fprobe_init(struct fprobe *fp)
fp->ops.func = fprobe_kprobe_handler;
else
fp->ops.func = fprobe_handler;
+
fp->ops.flags |= FTRACE_OPS_FL_SAVE_REGS;
 }
 
diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
index 62e6a8f4aae9..b2c20d4fdfd7 100644
--- a/kernel/trace/trace_fprobe.c
+++ b/kernel/trace/trace_fprobe.c
@@ -338,12 +338,16 @@ NOKPROBE_SYMBOL(fexit_perf_func);
 #endif /* CONFIG_PERF_EVENTS */
 
 static int fentry_dispatcher(struct fprobe *fp, unsigned long entry_ip,
-unsigned long ret_ip, struct pt_regs *regs,
+unsigned long ret_ip, struct ftrace_regs *fregs,
 void *entry_data)
 {
struct trace_fprobe *tf = container_of(fp, s

[PATCH v13 06/20] function_graph: Pass ftrace_regs to retfunc

2024-08-18 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Pass ftrace_regs to the fgraph_ops::retfunc(). If ftrace_regs is not
available, it passes a NULL instead. User callback function can access
some registers (including return address) via this ftrace_regs.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v8:
  - Pass ftrace_regs to retfunc, instead of adding retregfunc.
 Changes in v6:
  - update to use ftrace_regs_get_return_value() because of reordering
patches.
 Changes in v3:
  - Update for new multiple fgraph.
  - Save the return address to instruction pointer in ftrace_regs.
---
 include/linux/ftrace.h   |3 ++-
 kernel/trace/fgraph.c|   16 +++-
 kernel/trace/ftrace.c|3 ++-
 kernel/trace/trace.h |3 ++-
 kernel/trace/trace_functions_graph.c |7 ---
 kernel/trace/trace_irqsoff.c |3 ++-
 kernel/trace/trace_sched_wakeup.c|3 ++-
 kernel/trace/trace_selftest.c|3 ++-
 8 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 13987cd63553..e7c41d9988e1 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1069,7 +1069,8 @@ struct fgraph_ops;
 
 /* Type of the callback handlers for tracing function graph*/
 typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *,
-  struct fgraph_ops *); /* return */
+  struct fgraph_ops *,
+  struct ftrace_regs *); /* return */
 typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *,
  struct fgraph_ops *,
  struct ftrace_regs *); /* entry */
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 30bebe43607d..6a3e2db16aa4 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -297,7 +297,8 @@ static int entry_run(struct ftrace_graph_ent *trace, struct 
fgraph_ops *ops,
 }
 
 /* ftrace_graph_return set to this to tell some archs to run function graph */
-static void return_run(struct ftrace_graph_ret *trace, struct fgraph_ops *ops)
+static void return_run(struct ftrace_graph_ret *trace, struct fgraph_ops *ops,
+  struct ftrace_regs *fregs)
 {
 }
 
@@ -491,7 +492,8 @@ int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace,
 }
 
 static void ftrace_graph_ret_stub(struct ftrace_graph_ret *trace,
- struct fgraph_ops *gops)
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
 {
 }
 
@@ -787,6 +789,9 @@ __ftrace_return_to_handler(struct ftrace_regs *fregs, 
unsigned long frame_pointe
}
 
trace.rettime = trace_clock_local();
+   if (fregs)
+   ftrace_regs_set_instruction_pointer(fregs, ret);
+
 #ifdef CONFIG_FUNCTION_GRAPH_RETVAL
trace.retval = ftrace_regs_get_return_value(fregs);
 #endif
@@ -796,7 +801,7 @@ __ftrace_return_to_handler(struct ftrace_regs *fregs, 
unsigned long frame_pointe
 #ifdef CONFIG_HAVE_STATIC_CALL
if (static_branch_likely(&fgraph_do_direct)) {
if (test_bit(fgraph_direct_gops->idx, &bitmap))
-   static_call(fgraph_retfunc)(&trace, fgraph_direct_gops);
+   static_call(fgraph_retfunc)(&trace, fgraph_direct_gops, 
fregs);
} else
 #endif
{
@@ -806,7 +811,7 @@ __ftrace_return_to_handler(struct ftrace_regs *fregs, 
unsigned long frame_pointe
if (gops == &fgraph_stub)
continue;
 
-   gops->retfunc(&trace, gops);
+   gops->retfunc(&trace, gops, fregs);
}
}
 
@@ -956,7 +961,8 @@ void ftrace_graph_sleep_time_control(bool enable)
  * Simply points to ftrace_stub, but with the proper protocol.
  * Defined by the linker script in linux/vmlinux.lds.h
  */
-void ftrace_stub_graph(struct ftrace_graph_ret *trace, struct fgraph_ops 
*gops);
+void ftrace_stub_graph(struct ftrace_graph_ret *trace, struct fgraph_ops *gops,
+  struct ftrace_regs *fregs);
 
 /* The callbacks that hook a function */
 trace_func_graph_ret_t ftrace_graph_return = ftrace_stub_graph;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 775040a9f541..fd6c5a50c5e5 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -840,7 +840,8 @@ static int profile_graph_entry(struct ftrace_graph_ent 
*trace,
 }
 
 static void profile_graph_return(struct ftrace_graph_ret *trace,
-struct fgraph_ops *gops)
+struct fgraph_ops *gops,
+struct ftrace_regs *fregs)
 {
struct ftrace_ret_stack *ret_stack;
struct ftrace_profile_stat *stat;
diff --git a/kernel/trace/trace.h b/ke

[PATCH v13 05/20] function_graph: Replace fgraph_ret_regs with ftrace_regs

2024-08-18 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Use ftrace_regs instead of fgraph_ret_regs for tracing return value
on function_graph tracer because of simplifying the callback interface.

The CONFIG_HAVE_FUNCTION_GRAPH_RETVAL is also replaced by
CONFIG_HAVE_FUNCTION_GRAPH_FREGS.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v8:
  - Newly added.
---
 arch/arm64/Kconfig  |1 +
 arch/arm64/include/asm/ftrace.h |   23 ++-
 arch/arm64/kernel/asm-offsets.c |   12 
 arch/arm64/kernel/entry-ftrace.S|   32 ++--
 arch/loongarch/Kconfig  |2 +-
 arch/loongarch/include/asm/ftrace.h |   24 ++--
 arch/loongarch/kernel/asm-offsets.c |   12 
 arch/loongarch/kernel/mcount.S  |   17 ++---
 arch/loongarch/kernel/mcount_dyn.S  |   14 +++---
 arch/riscv/Kconfig  |2 +-
 arch/riscv/include/asm/ftrace.h |   26 +-
 arch/riscv/kernel/mcount.S  |   24 +---
 arch/s390/Kconfig   |2 +-
 arch/s390/include/asm/ftrace.h  |   26 +-
 arch/s390/kernel/asm-offsets.c  |6 --
 arch/s390/kernel/mcount.S   |9 +
 arch/x86/Kconfig|2 +-
 arch/x86/include/asm/ftrace.h   |   22 ++
 arch/x86/kernel/ftrace_32.S |   15 +--
 arch/x86/kernel/ftrace_64.S |   17 +
 include/linux/ftrace.h  |   14 +++---
 kernel/trace/Kconfig|4 ++--
 kernel/trace/fgraph.c   |   21 +
 23 files changed, 122 insertions(+), 205 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a2f8ff354ca6..17947f625b06 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -211,6 +211,7 @@ config ARM64
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_ERROR_INJECTION
+   select HAVE_FUNCTION_GRAPH_FREGS
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_GRAPH_RETVAL
select HAVE_GCC_PLUGINS
diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index dc9cf0bd2a4c..dffaab3dd1f1 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -126,6 +126,12 @@ ftrace_override_function_with_return(struct ftrace_regs 
*fregs)
fregs->pc = fregs->lr;
 }
 
+static __always_inline unsigned long
+ftrace_regs_get_frame_pointer(const struct ftrace_regs *fregs)
+{
+   return fregs->fp;
+}
+
 int ftrace_regs_query_register_offset(const char *name);
 
 int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
@@ -183,23 +189,6 @@ static inline bool arch_syscall_match_sym_name(const char 
*sym,
 
 #ifndef __ASSEMBLY__
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-struct fgraph_ret_regs {
-   /* x0 - x7 */
-   unsigned long regs[8];
-
-   unsigned long fp;
-   unsigned long __unused;
-};
-
-static inline unsigned long fgraph_ret_regs_return_value(struct 
fgraph_ret_regs *ret_regs)
-{
-   return ret_regs->regs[0];
-}
-
-static inline unsigned long fgraph_ret_regs_frame_pointer(struct 
fgraph_ret_regs *ret_regs)
-{
-   return ret_regs->fp;
-}
 
 void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
   unsigned long frame_pointer);
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 27de1dddb0ab..9e03c9a7e5c3 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -201,18 +201,6 @@ int main(void)
   DEFINE(FTRACE_OPS_FUNC,  offsetof(struct ftrace_ops, func));
 #endif
   BLANK();
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-  DEFINE(FGRET_REGS_X0,offsetof(struct 
fgraph_ret_regs, regs[0]));
-  DEFINE(FGRET_REGS_X1,offsetof(struct 
fgraph_ret_regs, regs[1]));
-  DEFINE(FGRET_REGS_X2,offsetof(struct 
fgraph_ret_regs, regs[2]));
-  DEFINE(FGRET_REGS_X3,offsetof(struct 
fgraph_ret_regs, regs[3]));
-  DEFINE(FGRET_REGS_X4,offsetof(struct 
fgraph_ret_regs, regs[4]));
-  DEFINE(FGRET_REGS_X5,offsetof(struct 
fgraph_ret_regs, regs[5]));
-  DEFINE(FGRET_REGS_X6,offsetof(struct 
fgraph_ret_regs, regs[6]));
-  DEFINE(FGRET_REGS_X7,offsetof(struct 
fgraph_ret_regs, regs[7]));
-  DEFINE(FGRET_REGS_FP,offsetof(struct 
fgraph_ret_regs, fp));
-  DEFINE(FGRET_REGS_SIZE,  sizeof(struct fgraph_ret_regs));
-#endif
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
   DEFINE(FTRACE_OPS_DIRECT_CALL,   offsetof(struct ftrace_ops, 
direct_call));
 #endif
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry

[PATCH v13 04/20] function_graph: Pass ftrace_regs to entryfunc

2024-08-18 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Pass ftrace_regs to the fgraph_ops::entryfunc(). If ftrace_regs is not
available, it passes a NULL instead. User callback function can access
some registers (including return address) via this ftrace_regs.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v11:
  - Update for the latest for-next branch.
 Changes in v8:
  - Just pass ftrace_regs to the handler instead of adding a new
entryregfunc.
  - Update riscv ftrace_graph_func().
 Changes in v3:
  - Update for new multiple fgraph.
---
 arch/arm64/kernel/ftrace.c   |   20 +++-
 arch/loongarch/kernel/ftrace_dyn.c   |   10 +-
 arch/powerpc/kernel/trace/ftrace.c   |2 +
 arch/powerpc/kernel/trace/ftrace_64_pg.c |   10 --
 arch/riscv/kernel/ftrace.c   |   17 ++
 arch/x86/kernel/ftrace.c |   50 +-
 include/linux/ftrace.h   |   18 ---
 kernel/trace/fgraph.c|   23 --
 kernel/trace/ftrace.c|3 +-
 kernel/trace/trace.h |3 +-
 kernel/trace/trace_functions_graph.c |3 +-
 kernel/trace/trace_irqsoff.c |3 +-
 kernel/trace/trace_sched_wakeup.c|3 +-
 kernel/trace/trace_selftest.c|8 +++--
 14 files changed, 128 insertions(+), 45 deletions(-)

diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index a650f5e11fc5..bc647b725e6a 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -481,7 +481,25 @@ void prepare_ftrace_return(unsigned long self_addr, 
unsigned long *parent,
 void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
   struct ftrace_ops *op, struct ftrace_regs *fregs)
 {
-   prepare_ftrace_return(ip, &fregs->lr, fregs->fp);
+   unsigned long return_hooker = (unsigned long)&return_to_handler;
+   unsigned long frame_pointer = fregs->fp;
+   unsigned long *parent = &fregs->lr;
+   unsigned long old;
+
+   if (unlikely(atomic_read(¤t->tracing_graph_pause)))
+   return;
+
+   /*
+* Note:
+* No protection against faulting at *parent, which may be seen
+* on other archs. It's unlikely on AArch64.
+*/
+   old = *parent;
+
+   if (!function_graph_enter_regs(old, ip, frame_pointer,
+  (void *)frame_pointer, fregs)) {
+   *parent = return_hooker;
+   }
 }
 #else
 /*
diff --git a/arch/loongarch/kernel/ftrace_dyn.c 
b/arch/loongarch/kernel/ftrace_dyn.c
index bff058317062..966e0f7f7aca 100644
--- a/arch/loongarch/kernel/ftrace_dyn.c
+++ b/arch/loongarch/kernel/ftrace_dyn.c
@@ -243,8 +243,16 @@ void ftrace_graph_func(unsigned long ip, unsigned long 
parent_ip,
 {
struct pt_regs *regs = &fregs->regs;
unsigned long *parent = (unsigned long *)®s->regs[1];
+   unsigned long return_hooker = (unsigned long)&return_to_handler;
+   unsigned long old;
+
+   if (unlikely(atomic_read(¤t->tracing_graph_pause)))
+   return;
+
+   old = *parent;
 
-   prepare_ftrace_return(ip, (unsigned long *)parent);
+   if (!function_graph_enter_regs(old, ip, 0, parent, fregs))
+   *parent = return_hooker;
 }
 #else
 static int ftrace_modify_graph_caller(bool enable)
diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index d8d6b4fd9a14..a1a0e0b57662 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -434,7 +434,7 @@ void ftrace_graph_func(unsigned long ip, unsigned long 
parent_ip,
if (bit < 0)
goto out;
 
-   if (!function_graph_enter(parent_ip, ip, 0, (unsigned long *)sp))
+   if (!function_graph_enter_regs(parent_ip, ip, 0, (unsigned long *)sp, 
fregs))
parent_ip = ppc_function_entry(return_to_handler);
 
ftrace_test_recursion_unlock(bit);
diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.c 
b/arch/powerpc/kernel/trace/ftrace_64_pg.c
index 12fab1803bcf..4ae9eeb1c8f1 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_pg.c
+++ b/arch/powerpc/kernel/trace/ftrace_64_pg.c
@@ -800,7 +800,8 @@ int ftrace_disable_ftrace_graph_caller(void)
  * in current thread info. Return the address we want to divert to.
  */
 static unsigned long
-__prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long 
sp)
+__prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long 
sp,
+   struct ftrace_regs *fregs)
 {
unsigned long return_hooker;
int bit;
@@ -817,7 +818,7 @@ __prepare_ftrace_return(unsigned long parent, unsigned long 
ip, unsigned long sp
 
return_hooker = ppc_function_entry(return_to_handler);
 
-   if (!function_graph_enter(parent, ip, 0, (unsigned long *)sp))
+   if (!function

[PATCH v13 03/20] tracing: Rename ftrace_regs_return_value to ftrace_regs_get_return_value

2024-08-18 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Rename ftrace_regs_return_value to ftrace_regs_get_return_value as same as
other ftrace_regs_get/set_* APIs.

Signed-off-by: Masami Hiramatsu (Google) 
Acked-by: Mark Rutland 
---
 Changes in v6:
  - Moved to top of the series.
 Changes in v3:
  - Newly added.
---
 arch/loongarch/include/asm/ftrace.h |2 +-
 arch/powerpc/include/asm/ftrace.h   |2 +-
 arch/s390/include/asm/ftrace.h  |2 +-
 arch/x86/include/asm/ftrace.h   |2 +-
 include/linux/ftrace.h  |2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/loongarch/include/asm/ftrace.h 
b/arch/loongarch/include/asm/ftrace.h
index c0a682808e07..6f8517d59954 100644
--- a/arch/loongarch/include/asm/ftrace.h
+++ b/arch/loongarch/include/asm/ftrace.h
@@ -69,7 +69,7 @@ ftrace_regs_set_instruction_pointer(struct ftrace_regs 
*fregs, unsigned long ip)
regs_get_kernel_argument(&(fregs)->regs, n)
 #define ftrace_regs_get_stack_pointer(fregs) \
kernel_stack_pointer(&(fregs)->regs)
-#define ftrace_regs_return_value(fregs) \
+#define ftrace_regs_get_return_value(fregs) \
regs_return_value(&(fregs)->regs)
 #define ftrace_regs_set_return_value(fregs, ret) \
regs_set_return_value(&(fregs)->regs, ret)
diff --git a/arch/powerpc/include/asm/ftrace.h 
b/arch/powerpc/include/asm/ftrace.h
index 559560286e6d..23d26f3afae4 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -59,7 +59,7 @@ ftrace_regs_get_instruction_pointer(struct ftrace_regs *fregs)
regs_get_kernel_argument(&(fregs)->regs, n)
 #define ftrace_regs_get_stack_pointer(fregs) \
kernel_stack_pointer(&(fregs)->regs)
-#define ftrace_regs_return_value(fregs) \
+#define ftrace_regs_get_return_value(fregs) \
regs_return_value(&(fregs)->regs)
 #define ftrace_regs_set_return_value(fregs, ret) \
regs_set_return_value(&(fregs)->regs, ret)
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index fbadca645af7..de76c21eb4a3 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -83,7 +83,7 @@ ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs,
regs_get_kernel_argument(&(fregs)->regs, n)
 #define ftrace_regs_get_stack_pointer(fregs) \
kernel_stack_pointer(&(fregs)->regs)
-#define ftrace_regs_return_value(fregs) \
+#define ftrace_regs_get_return_value(fregs) \
regs_return_value(&(fregs)->regs)
 #define ftrace_regs_set_return_value(fregs, ret) \
regs_set_return_value(&(fregs)->regs, ret)
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 0152a81d9b4a..78f6a200e15b 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -56,7 +56,7 @@ arch_ftrace_get_regs(struct ftrace_regs *fregs)
regs_get_kernel_argument(&(fregs)->regs, n)
 #define ftrace_regs_get_stack_pointer(fregs) \
kernel_stack_pointer(&(fregs)->regs)
-#define ftrace_regs_return_value(fregs) \
+#define ftrace_regs_get_return_value(fregs) \
regs_return_value(&(fregs)->regs)
 #define ftrace_regs_set_return_value(fregs, ret) \
regs_set_return_value(&(fregs)->regs, ret)
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 42106b3de396..f84fb9635fb0 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -183,7 +183,7 @@ static __always_inline bool ftrace_regs_has_args(struct 
ftrace_regs *fregs)
regs_get_kernel_argument(ftrace_get_regs(fregs), n)
 #define ftrace_regs_get_stack_pointer(fregs) \
kernel_stack_pointer(ftrace_get_regs(fregs))
-#define ftrace_regs_return_value(fregs) \
+#define ftrace_regs_get_return_value(fregs) \
regs_return_value(ftrace_get_regs(fregs))
 #define ftrace_regs_set_return_value(fregs, ret) \
regs_set_return_value(ftrace_get_regs(fregs), ret)




[PATCH v13 02/20] tracing: Add a comment about ftrace_regs definition

2024-08-18 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

To clarify what will be expected on ftrace_regs, add a comment to the
architecture independent definition of the ftrace_regs.

Signed-off-by: Masami Hiramatsu (Google) 
Acked-by: Mark Rutland 
---
 Changes in v8:
  - Update that the saved registers depends on the context.
 Changes in v3:
  - Add instruction pointer
 Changes in v2:
  - newly added.
---
 include/linux/ftrace.h |   26 ++
 1 file changed, 26 insertions(+)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index fd5e84d0ec47..42106b3de396 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -117,6 +117,32 @@ extern int ftrace_enabled;
 
 #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
 
+/**
+ * ftrace_regs - ftrace partial/optimal register set
+ *
+ * ftrace_regs represents a group of registers which is used at the
+ * function entry and exit. There are three types of registers.
+ *
+ * - Registers for passing the parameters to callee, including the stack
+ *   pointer. (e.g. rcx, rdx, rdi, rsi, r8, r9 and rsp on x86_64)
+ * - Registers for passing the return values to caller.
+ *   (e.g. rax and rdx on x86_64)
+ * - Registers for hooking the function call and return including the
+ *   frame pointer (the frame pointer is architecture/config dependent)
+ *   (e.g. rip, rbp and rsp for x86_64)
+ *
+ * Also, architecture dependent fields can be used for internal process.
+ * (e.g. orig_ax on x86_64)
+ *
+ * On the function entry, those registers will be restored except for
+ * the stack pointer, so that user can change the function parameters
+ * and instruction pointer (e.g. live patching.)
+ * On the function exit, only registers which is used for return values
+ * are restored.
+ *
+ * NOTE: user *must not* access regs directly, only do it via APIs, because
+ * the member can be changed according to the architecture.
+ */
 struct ftrace_regs {
struct pt_regs  regs;
 };




[PATCH v13 01/20] tracing: fgraph: Fix to add new fgraph_ops to array after ftrace_startup_subops()

2024-08-18 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Since the register_ftrace_graph() assigns a new fgraph_ops to
fgraph_array before registring it by ftrace_startup_subops(), the new
fgraph_ops can be used in function_graph_enter().

In most cases, it is still OK because those fgraph_ops's hashtable is
already initialized by ftrace_set_filter*() etc.

But if a user registers a new fgraph_ops which does not initialize the
hash list, ftrace_ops_test() in function_graph_enter() causes a NULL
pointer dereference BUG because fgraph_ops->ops.func_hash is NULL.

This can be reproduced by the below commands because function profiler's
fgraph_ops does not initialize the hash list;

 # cd /sys/kernel/tracing
 # echo function_graph > current_tracer
 # echo 1 > function_profile_enabled

To fix this problem, add a new fgraph_ops to fgraph_array after
ftrace_startup_subops(). Thus, until the new fgraph_ops is initialized,
we will see fgraph_stub on the corresponding fgraph_array entry.

Fixes: c132be2c4fcc ("function_graph: Have the instances use their own 
ftrace_ops for filtering")
Signed-off-by: Masami Hiramatsu (Google) 
---
 kernel/trace/fgraph.c |   31 ++-
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index d1d5ea2d0a1b..d7d4fb403f6f 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -1206,18 +1206,24 @@ static void init_task_vars(int idx)
read_unlock(&tasklist_lock);
 }
 
-static void ftrace_graph_enable_direct(bool enable_branch)
+static void ftrace_graph_enable_direct(bool enable_branch, struct fgraph_ops 
*gops)
 {
trace_func_graph_ent_t func = NULL;
trace_func_graph_ret_t retfunc = NULL;
int i;
 
-   for_each_set_bit(i, &fgraph_array_bitmask,
-sizeof(fgraph_array_bitmask) * BITS_PER_BYTE) {
-   func = fgraph_array[i]->entryfunc;
-   retfunc = fgraph_array[i]->retfunc;
-   fgraph_direct_gops = fgraph_array[i];
-}
+   if (gops) {
+   func = gops->entryfunc;
+   retfunc = gops->retfunc;
+   fgraph_direct_gops = gops;
+   } else {
+   for_each_set_bit(i, &fgraph_array_bitmask,
+sizeof(fgraph_array_bitmask) * BITS_PER_BYTE) {
+   func = fgraph_array[i]->entryfunc;
+   retfunc = fgraph_array[i]->retfunc;
+   fgraph_direct_gops = fgraph_array[i];
+   }
+   }
if (WARN_ON_ONCE(!func))
return;
 
@@ -1256,8 +1262,6 @@ int register_ftrace_graph(struct fgraph_ops *gops)
ret = -ENOSPC;
goto out;
}
-
-   fgraph_array[i] = gops;
gops->idx = i;
 
ftrace_graph_active++;
@@ -1266,7 +1270,7 @@ int register_ftrace_graph(struct fgraph_ops *gops)
ftrace_graph_disable_direct(true);
 
if (ftrace_graph_active == 1) {
-   ftrace_graph_enable_direct(false);
+   ftrace_graph_enable_direct(false, gops);
register_pm_notifier(&ftrace_suspend_notifier);
ret = start_graph_tracing();
if (ret)
@@ -1281,14 +1285,15 @@ int register_ftrace_graph(struct fgraph_ops *gops)
} else {
init_task_vars(gops->idx);
}
-
/* Always save the function, and reset at unregistering */
gops->saved_func = gops->entryfunc;
 
ret = ftrace_startup_subops(&graph_ops, &gops->ops, command);
+   if (!ret)
+   fgraph_array[i] = gops;
+
 error:
if (ret) {
-   fgraph_array[i] = &fgraph_stub;
ftrace_graph_active--;
gops->saved_func = NULL;
fgraph_lru_release_index(i);
@@ -1324,7 +1329,7 @@ void unregister_ftrace_graph(struct fgraph_ops *gops)
ftrace_shutdown_subops(&graph_ops, &gops->ops, command);
 
if (ftrace_graph_active == 1)
-   ftrace_graph_enable_direct(true);
+   ftrace_graph_enable_direct(true, NULL);
else if (!ftrace_graph_active)
ftrace_graph_disable_direct(false);
 




[PATCH v13 00/20] tracing: fprobe: function_graph: Multi-function graph and fprobe on fgraph

2024-08-18 Thread Masami Hiramatsu (Google)
Hi,

Here is the 13th version of the series to re-implement the fprobe on
function-graph tracer. The previous version is;

https://lore.kernel.org/all/172000134410.63468.1374887213469474.stgit@devnote2/

This version is based on v6.11-rc3.
In this version, I added a bugfix as [1/20], which should go to urgent
branch, and dropped the performance improvement patch which was introduced
in v12 because I found that does not work with new kernel.

Overview

This series rewrites the fprobe on this function-graph.
The purposes of this change are;

 1) Remove dependency of the rethook from fprobe so that we can reduce
   the return hook code and shadow stack.

 2) Make 'ftrace_regs' the common trace interface for the function
   boundary.

1) Currently we have 2(or 3) different function return hook codes,
 the function-graph tracer and rethook (and legacy kretprobe).
 But since this  is redundant and needs double maintenance cost,
 I would like to unify those. From the user's viewpoint, function-
 graph tracer is very useful to grasp the execution path. For this
 purpose, it is hard to use the rethook in the function-graph
 tracer, but the opposite is possible. (Strictly speaking, kretprobe
 can not use it because it requires 'pt_regs' for historical reasons.)

2) Now the fprobe provides the 'pt_regs' for its handler, but that is
 wrong for the function entry and exit. Moreover, depending on the
 architecture, there is no way to accurately reproduce 'pt_regs'
 outside of interrupt or exception handlers. This means fprobe should
 not use 'pt_regs' because it does not use such exceptions.
 (Conversely, kprobe should use 'pt_regs' because it is an abstract
  interface of the software breakpoint exception.)

This series changes fprobe to use function-graph tracer for tracing
function entry and exit, instead of mixture of ftrace and rethook.
Unlike the rethook which is a per-task list of system-wide allocated
nodes, the function graph's ret_stack is a per-task shadow stack.
Thus it does not need to set 'nr_maxactive' (which is the number of
pre-allocated nodes).
Also the handlers will get the 'ftrace_regs' instead of 'pt_regs'.
Since eBPF mulit_kprobe/multi_kretprobe events still use 'pt_regs' as
their register interface, this changes it to convert 'ftrace_regs' to
'pt_regs'. Of course this conversion makes an incomplete 'pt_regs',
so users must access only registers for function parameters or
return value. 

Design
--
Instead of using ftrace's function entry hook directly, the new fprobe
is built on top of the function-graph's entry and return callbacks
with 'ftrace_regs'.

Since the fprobe requires access to 'ftrace_regs', the architecture
must support CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS and
CONFIG_HAVE_FTRACE_GRAPH_FUNC, which enables to call function-graph
entry callback with 'ftrace_regs', and also
CONFIG_HAVE_FUNCTION_GRAPH_FREGS, which passes the ftrace_regs to
return_to_handler.

All fprobes share a single function-graph ops (means shares a common
ftrace filter) similar to the kprobe-on-ftrace. This needs another
layer to find corresponding fprobe in the common function-graph
callbacks, but has much better scalability, since the number of
registered function-graph ops is limited.

In the entry callback, the fprobe runs its entry_handler and saves the
address of 'fprobe' on the function-graph's shadow stack as data. The
return callback decodes the data to get the 'fprobe' address, and runs
the exit_handler.

The fprobe introduces two hash-tables, one is for entry callback which
searches fprobes related to the given function address passed by entry
callback. The other is for a return callback which checks if the given
'fprobe' data structure pointer is still valid. Note that it is
possible to unregister fprobe before the return callback runs. Thus
the address validation must be done before using it in the return
callback.

Download

This series can be applied against the ftrace/for-next branch in
linux-trace tree.

This series can also be found below branch.

https://git.kernel.org/pub/scm/linux/kernel/git/mhiramat/linux.git/log/?h=topic/fprobe-on-fgraph

Thank you,

---

Masami Hiramatsu (Google) (20):
  tracing: fgraph: Fix to add new fgraph_ops to array after 
ftrace_startup_subops()
  tracing: Add a comment about ftrace_regs definition
  tracing: Rename ftrace_regs_return_value to ftrace_regs_get_return_value
  function_graph: Pass ftrace_regs to entryfunc
  function_graph: Replace fgraph_ret_regs with ftrace_regs
  function_graph: Pass ftrace_regs to retfunc
  fprobe: Use ftrace_regs in fprobe entry handler
  fprobe: Use ftrace_regs in fprobe exit handler
  tracing: Add ftrace_partial_regs() for converting ftrace_regs to pt_regs
  t

[PATCH v3 5/5] sefltests/tracing: Add a test for tracepoint events on modules

2024-08-18 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Add a test case for tracepoint events on modules. This checks if it can add
and remove the events correctly.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v3:
  - Add not-loaded module test.
---
 tools/testing/selftests/ftrace/config  |1 
 .../test.d/dynevent/add_remove_tprobe_module.tc|   61 
 2 files changed, 62 insertions(+)
 create mode 100644 
tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe_module.tc

diff --git a/tools/testing/selftests/ftrace/config 
b/tools/testing/selftests/ftrace/config
index 048a312abf40..544de0db5f58 100644
--- a/tools/testing/selftests/ftrace/config
+++ b/tools/testing/selftests/ftrace/config
@@ -20,6 +20,7 @@ CONFIG_PREEMPT_TRACER=y
 CONFIG_PROBE_EVENTS_BTF_ARGS=y
 CONFIG_SAMPLES=y
 CONFIG_SAMPLE_FTRACE_DIRECT=m
+CONFIG_SAMPLE_TRACE_EVENTS=m
 CONFIG_SAMPLE_TRACE_PRINTK=m
 CONFIG_SCHED_TRACER=y
 CONFIG_STACK_TRACER=y
diff --git 
a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe_module.tc 
b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe_module.tc
new file mode 100644
index ..d319d5ed4226
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe_module.tc
@@ -0,0 +1,61 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Generic dynamic event - add/remove tracepoint probe events on 
module
+# requires: dynamic_events "t[:[/][]]  
[]":README
+
+rmmod trace-events-sample ||:
+if ! modprobe trace-events-sample ; then
+  echo "No trace-events sample module - please make 
CONFIG_SAMPLE_TRACE_EVENTS=m"
+  exit_unresolved;
+fi
+trap "rmmod trace-events-sample" EXIT
+
+echo 0 > events/enable
+echo > dynamic_events
+
+TRACEPOINT1=foo_bar
+TRACEPOINT2=foo_bar_with_cond
+
+echo "t:myevent1 $TRACEPOINT1" >> dynamic_events
+echo "t:myevent2 $TRACEPOINT2" >> dynamic_events
+
+grep -q myevent1 dynamic_events
+grep -q myevent2 dynamic_events
+test -d events/tracepoints/myevent1
+test -d events/tracepoints/myevent2
+
+echo "-:myevent2" >> dynamic_events
+
+grep -q myevent1 dynamic_events
+! grep -q myevent2 dynamic_events
+
+echo > dynamic_events
+
+clear_trace
+
+:;: "Try to put a probe on a tracepoint in non-loaded module" ;:
+rmmod trace-events-sample
+
+echo "t:myevent1 $TRACEPOINT1" >> dynamic_events
+echo "t:myevent2 $TRACEPOINT2" >> dynamic_events
+
+grep -q myevent1 dynamic_events
+grep -q myevent2 dynamic_events
+test -d events/tracepoints/myevent1
+test -d events/tracepoints/myevent2
+
+echo 1 > events/tracepoints/enable
+
+modprobe trace-events-sample
+
+sleep 2
+
+grep -q "myevent1" trace
+grep -q "myevent2" trace
+
+rmmod trace-events-sample
+trap "" EXIT
+
+echo 0 > events/tracepoints/enable
+echo > dynamic_events
+clear_trace




[PATCH v3 4/5] tracing/fprobe: Support raw tracepoints on future loaded modules

2024-08-18 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Support raw tracepoint events on future loaded (unloaded) modules.
This allows user to create raw tracepoint events which can be used from
module's __init functions.

Note: since the kernel does not have any information about the tracepoints
in the unloaded modules, fprobe events can not check whether the tracepoint
exists nor extend the BTF based arguments.

Suggested-by: Mathieu Desnoyers 
Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v3:
  - Newly added.
---
 kernel/trace/trace_fprobe.c|  151 +---
 .../ftrace/test.d/dynevent/tprobe_syntax_errors.tc |1 
 2 files changed, 101 insertions(+), 51 deletions(-)

diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
index 8b1127e37da5..a079abd8955b 100644
--- a/kernel/trace/trace_fprobe.c
+++ b/kernel/trace/trace_fprobe.c
@@ -21,6 +21,7 @@
 #define FPROBE_EVENT_SYSTEM "fprobes"
 #define TRACEPOINT_EVENT_SYSTEM "tracepoints"
 #define RETHOOK_MAXACTIVE_MAX 4096
+#define TRACEPOINT_STUB ERR_PTR(-ENOENT)
 
 static int trace_fprobe_create(const char *raw_command);
 static int trace_fprobe_show(struct seq_file *m, struct dyn_event *ev);
@@ -674,6 +675,24 @@ static int unregister_fprobe_event(struct trace_fprobe *tf)
return trace_probe_unregister_event_call(&tf->tp);
 }
 
+static int __regsiter_tracepoint_fprobe(struct trace_fprobe *tf)
+{
+   struct tracepoint *tpoint = tf->tpoint;
+   unsigned long ip = (unsigned long)tpoint->probestub;
+   int ret;
+
+   /*
+* Here, we do 2 steps to enable fprobe on a tracepoint.
+* At first, put __probestub_##TP function on the tracepoint
+* and put a fprobe on the stub function.
+*/
+   ret = tracepoint_probe_register_prio_may_exist(tpoint,
+   tpoint->probestub, NULL, 0);
+   if (ret < 0)
+   return ret;
+   return register_fprobe_ips(&tf->fp, &ip, 1);
+}
+
 /* Internal register function - just handle fprobe and flags */
 static int __register_trace_fprobe(struct trace_fprobe *tf)
 {
@@ -700,18 +719,12 @@ static int __register_trace_fprobe(struct trace_fprobe 
*tf)
tf->fp.flags |= FPROBE_FL_DISABLED;
 
if (trace_fprobe_is_tracepoint(tf)) {
-   struct tracepoint *tpoint = tf->tpoint;
-   unsigned long ip = (unsigned long)tpoint->probestub;
-   /*
-* Here, we do 2 steps to enable fprobe on a tracepoint.
-* At first, put __probestub_##TP function on the tracepoint
-* and put a fprobe on the stub function.
-*/
-   ret = tracepoint_probe_register_prio_may_exist(tpoint,
-   tpoint->probestub, NULL, 0);
-   if (ret < 0)
-   return ret;
-   return register_fprobe_ips(&tf->fp, &ip, 1);
+
+   /* This tracepoint is not loaded yet */
+   if (tf->tpoint == TRACEPOINT_STUB)
+   return 0;
+
+   return __regsiter_tracepoint_fprobe(tf);
}
 
/* TODO: handle filter, nofilter or symbol list */
@@ -864,36 +877,6 @@ static int register_trace_fprobe(struct trace_fprobe *tf)
return ret;
 }
 
-#ifdef CONFIG_MODULES
-static int __tracepoint_probe_module_cb(struct notifier_block *self,
-   unsigned long val, void *data)
-{
-   struct tp_module *tp_mod = data;
-   struct trace_fprobe *tf;
-   struct dyn_event *pos;
-
-   if (val != MODULE_STATE_GOING)
-   return NOTIFY_DONE;
-
-   mutex_lock(&event_mutex);
-   for_each_trace_fprobe(tf, pos) {
-   if (tp_mod->mod == tf->mod) {
-   tracepoint_probe_unregister(tf->tpoint,
-   tf->tpoint->probestub, NULL);
-   tf->tpoint = NULL;
-   tf->mod = NULL;
-   }
-   }
-   mutex_unlock(&event_mutex);
-
-   return NOTIFY_DONE;
-}
-
-static struct notifier_block tracepoint_module_nb = {
-   .notifier_call = __tracepoint_probe_module_cb,
-};
-#endif /* CONFIG_MODULES */
-
 struct __find_tracepoint_cb_data {
const char *tp_name;
struct tracepoint *tpoint;
@@ -906,10 +889,12 @@ static void __find_tracepoint_module_cb(struct tracepoint 
*tp, struct module *mo
 
if (!data->tpoint && !strcmp(data->tp_name, tp->name)) {
data->tpoint = tp;
-   data->mod = mod;
-   if (!try_module_get(data->mod)) {
-   data->tpoint = NULL;
-   data->mod = NULL;
+   if (!data->mod) {
+   data->mod = mod;
+   

[PATCH v3 3/5] tracing/fprobe: Support raw tracepoint events on modules

2024-08-18 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Support raw tracepoint event on module by fprobe events.
Since it only uses for_each_kernel_tracepoint() to find a tracepoint,
the tracepoints on modules are not handled. Thus if user specified a
tracepoint on a module, it shows an error.
This adds new for_each_module_tracepoint() API to tracepoint subsystem,
and uses it to find tracepoints on modules.

Reported-by: don 
Closes: 
https://lore.kernel.org/all/20240530215718.aeec973a1d0bf058d39cb...@kernel.org/
Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v2:
  - Fix build errors with CONFIG_MODULES=y.
---
 kernel/trace/trace_fprobe.c |   46 ---
 1 file changed, 38 insertions(+), 8 deletions(-)

diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
index 62e6a8f4aae9..8b1127e37da5 100644
--- a/kernel/trace/trace_fprobe.c
+++ b/kernel/trace/trace_fprobe.c
@@ -385,6 +385,7 @@ static struct trace_fprobe *alloc_trace_fprobe(const char 
*group,
   const char *event,
   const char *symbol,
   struct tracepoint *tpoint,
+  struct module *mod,
   int maxactive,
   int nargs, bool is_return)
 {
@@ -405,6 +406,7 @@ static struct trace_fprobe *alloc_trace_fprobe(const char 
*group,
tf->fp.entry_handler = fentry_dispatcher;
 
tf->tpoint = tpoint;
+   tf->mod = mod;
tf->fp.nr_maxactive = maxactive;
 
ret = trace_probe_init(&tf->tp, event, group, false, nargs);
@@ -895,8 +897,23 @@ static struct notifier_block tracepoint_module_nb = {
 struct __find_tracepoint_cb_data {
const char *tp_name;
struct tracepoint *tpoint;
+   struct module *mod;
 };
 
+static void __find_tracepoint_module_cb(struct tracepoint *tp, struct module 
*mod, void *priv)
+{
+   struct __find_tracepoint_cb_data *data = priv;
+
+   if (!data->tpoint && !strcmp(data->tp_name, tp->name)) {
+   data->tpoint = tp;
+   data->mod = mod;
+   if (!try_module_get(data->mod)) {
+   data->tpoint = NULL;
+   data->mod = NULL;
+   }
+   }
+}
+
 static void __find_tracepoint_cb(struct tracepoint *tp, void *priv)
 {
struct __find_tracepoint_cb_data *data = priv;
@@ -905,14 +922,28 @@ static void __find_tracepoint_cb(struct tracepoint *tp, 
void *priv)
data->tpoint = tp;
 }
 
-static struct tracepoint *find_tracepoint(const char *tp_name)
+/*
+ * Find a tracepoint from kernel and module. If the tracepoint is in a module,
+ * this increments the module refcount to prevent unloading until the
+ * trace_fprobe is registered to the list. After registering the trace_fprobe
+ * on the trace_fprobe list, the module refcount is decremented because
+ * tracepoint_probe_module_cb will handle it.
+ */
+static struct tracepoint *find_tracepoint(const char *tp_name,
+ struct module **tp_mod)
 {
struct __find_tracepoint_cb_data data = {
.tp_name = tp_name,
+   .mod = NULL,
};
 
for_each_kernel_tracepoint(__find_tracepoint_cb, &data);
 
+   if (!data.tpoint && IS_ENABLED(CONFIG_MODULES)) {
+   for_each_module_tracepoint(__find_tracepoint_module_cb, &data);
+   *tp_mod = data.mod;
+   }
+
return data.tpoint;
 }
 
@@ -996,6 +1027,7 @@ static int __trace_fprobe_create(int argc, const char 
*argv[])
char abuf[MAX_BTF_ARGS_LEN];
char *dbuf = NULL;
bool is_tracepoint = false;
+   struct module *tp_mod = NULL;
struct tracepoint *tpoint = NULL;
struct traceprobe_parse_context ctx = {
.flags = TPARG_FL_KERNEL | TPARG_FL_FPROBE,
@@ -1080,7 +1112,7 @@ static int __trace_fprobe_create(int argc, const char 
*argv[])
 
if (is_tracepoint) {
ctx.flags |= TPARG_FL_TPOINT;
-   tpoint = find_tracepoint(symbol);
+   tpoint = find_tracepoint(symbol, &tp_mod);
if (!tpoint) {
trace_probe_log_set_index(1);
trace_probe_log_err(0, NO_TRACEPOINT);
@@ -1110,8 +1142,8 @@ static int __trace_fprobe_create(int argc, const char 
*argv[])
goto out;
 
/* setup a probe */
-   tf = alloc_trace_fprobe(group, event, symbol, tpoint, maxactive,
-   argc, is_return);
+   tf = alloc_trace_fprobe(group, event, symbol, tpoint, tp_mod,
+   maxactive, argc, is_return);
if (IS_ERR(tf)) {
ret = PTR_ERR(tf);
/* This must return -ENOMEM, else

[PATCH v3 2/5] tracepoint: Support tterating tracepoints in a loading module

2024-08-18 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Add for_each_tracepoint_in_module() function to iterate tracepoints in
a module. This API is needed for handling tracepoints in a loading
module from tracepoint_module_notifier callback function.
This also update for_each_module_tracepoint() to pass the module to
callback function so that it can find module easily.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v3:
  - Newly added.
---
 include/linux/tracepoint.h |   17 +++--
 kernel/tracepoint.c|   37 +
 2 files changed, 44 insertions(+), 10 deletions(-)

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 837fcf8ec0d5..93a9f3070b48 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -64,8 +64,13 @@ struct tp_module {
 bool trace_module_has_bad_taint(struct module *mod);
 extern int register_tracepoint_module_notifier(struct notifier_block *nb);
 extern int unregister_tracepoint_module_notifier(struct notifier_block *nb);
-void for_each_module_tracepoint(void (*fct)(struct tracepoint *, void *),
+void for_each_module_tracepoint(void (*fct)(struct tracepoint *,
+   struct module *, void *),
void *priv);
+void for_each_tracepoint_in_module(struct module *,
+  void (*fct)(struct tracepoint *,
+   struct module *, void *),
+  void *priv);
 #else
 static inline bool trace_module_has_bad_taint(struct module *mod)
 {
@@ -82,10 +87,18 @@ int unregister_tracepoint_module_notifier(struct 
notifier_block *nb)
return 0;
 }
 static inline
-void for_each_module_tracepoint(void (*fct)(struct tracepoint *, void *),
+void for_each_module_tracepoint(void (*fct)(struct tracepoint *,
+   struct module *, void *),
void *priv)
 {
 }
+static inline
+void for_each_tracepoint_in_module(struct module *mod,
+  void (*fct)(struct tracepoint *,
+   struct module *, void *),
+  void *priv)
+{
+}
 #endif /* CONFIG_MODULES */
 
 /*
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index bed4aad36d92..8879da16ef4d 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -736,24 +736,45 @@ static __init int init_tracepoints(void)
 }
 __initcall(init_tracepoints);
 
+/**
+ * for_each_tracepoint_in_module - iteration on all tracepoints in a module
+ * @mod: module
+ * @fct: callback
+ * @priv: private data
+ */
+void for_each_tracepoint_in_module(struct module *mod,
+  void (*fct)(struct tracepoint *tp,
+   struct module *mod, void *priv),
+  void *priv)
+{
+   tracepoint_ptr_t *begin, *end, *iter;
+
+   lockdep_assert_held(&tracepoint_module_list_mutex);
+
+   if (!mod)
+   return;
+
+   begin = mod->tracepoints_ptrs;
+   end = mod->tracepoints_ptrs + mod->num_tracepoints;
+
+   for (iter = begin; iter < end; iter++)
+   fct(tracepoint_ptr_deref(iter), mod, priv);
+}
+
 /**
  * for_each_module_tracepoint - iteration on all tracepoints in all modules
  * @fct: callback
  * @priv: private data
  */
-void for_each_module_tracepoint(void (*fct)(struct tracepoint *tp, void *priv),
+void for_each_module_tracepoint(void (*fct)(struct tracepoint *tp,
+struct module *mod, void *priv),
void *priv)
 {
struct tp_module *tp_mod;
-   struct module *mod;
 
mutex_lock(&tracepoint_module_list_mutex);
-   list_for_each_entry(tp_mod, &tracepoint_module_list, list) {
-   mod = tp_mod->mod;
-   for_each_tracepoint_range(mod->tracepoints_ptrs,
-   mod->tracepoints_ptrs + mod->num_tracepoints,
-   fct, priv);
-   }
+   list_for_each_entry(tp_mod, &tracepoint_module_list, list)
+   for_each_tracepoint_in_module(tp_mod->mod, fct, priv);
mutex_unlock(&tracepoint_module_list_mutex);
 }
 #endif /* CONFIG_MODULES */




[PATCH v3 1/5] tracepoint: Support iterating over tracepoints on modules

2024-08-18 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Add for_each_module_tracepoint() for iterating over tracepoints
on modules. This is similar to the for_each_kernel_tracepoint()
but only for the tracepoints on modules (not including kernel
built-in tracepoints).

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v3:
  - Add kerneldoc for for_each_module_tracepoint.
---
 include/linux/tracepoint.h |7 +++
 kernel/tracepoint.c|   21 +
 2 files changed, 28 insertions(+)

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 6be396bb4297..837fcf8ec0d5 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -64,6 +64,8 @@ struct tp_module {
 bool trace_module_has_bad_taint(struct module *mod);
 extern int register_tracepoint_module_notifier(struct notifier_block *nb);
 extern int unregister_tracepoint_module_notifier(struct notifier_block *nb);
+void for_each_module_tracepoint(void (*fct)(struct tracepoint *, void *),
+   void *priv);
 #else
 static inline bool trace_module_has_bad_taint(struct module *mod)
 {
@@ -79,6 +81,11 @@ int unregister_tracepoint_module_notifier(struct 
notifier_block *nb)
 {
return 0;
 }
+static inline
+void for_each_module_tracepoint(void (*fct)(struct tracepoint *, void *),
+   void *priv)
+{
+}
 #endif /* CONFIG_MODULES */
 
 /*
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 8d1507dd0724..bed4aad36d92 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -735,6 +735,27 @@ static __init int init_tracepoints(void)
return ret;
 }
 __initcall(init_tracepoints);
+
+/**
+ * for_each_module_tracepoint - iteration on all tracepoints in all modules
+ * @fct: callback
+ * @priv: private data
+ */
+void for_each_module_tracepoint(void (*fct)(struct tracepoint *tp, void *priv),
+   void *priv)
+{
+   struct tp_module *tp_mod;
+   struct module *mod;
+
+   mutex_lock(&tracepoint_module_list_mutex);
+   list_for_each_entry(tp_mod, &tracepoint_module_list, list) {
+   mod = tp_mod->mod;
+   for_each_tracepoint_range(mod->tracepoints_ptrs,
+   mod->tracepoints_ptrs + mod->num_tracepoints,
+   fct, priv);
+   }
+   mutex_unlock(&tracepoint_module_list_mutex);
+}
 #endif /* CONFIG_MODULES */
 
 /**




[PATCH v3 0/5] tracing/probes: Support tracepoint events on modules

2024-08-18 Thread Masami Hiramatsu (Google)
Hi,

This is the 3rd version of the raw tracepoint events on modules.
The previous version is here;

https://lore.kernel.org/all/fbfec8d9-d0ed-4384-bbd2-dd5c1e568...@efficios.com/

This version supports tracepoint event on unloaded modules according
to Mathies' suggestion ([2/5],[4/5] and part of [5/5]) .
The concern about blocking module unload by instrumentation is TBD.

Note, to support tracepoints in the unloaded modules, tracepoint event
can not check the given tracepoint is really defined or not. So unless
CONFIG_MODULES=n, it does not check the tracepoint existence. IOW,
user can specify any tracepoint name for tracepoint events. It will be
just ignored.

You can download this series from;

https://git.kernel.org/pub/scm/linux/kernel/git/mhiramat/linux.git 
topic/tprobe-on-module

Thank you,

---

Masami Hiramatsu (Google) (5):
  tracepoint: Support iterating over tracepoints on modules
  tracepoint: Support tterating tracepoints in a loading module
  tracing/fprobe: Support raw tracepoint events on modules
  tracing/fprobe: Support raw tracepoints on future loaded modules
  sefltests/tracing: Add a test for tracepoint events on modules


 include/linux/tracepoint.h |   20 ++
 kernel/trace/trace_fprobe.c|  179 +++-
 kernel/tracepoint.c|   42 +
 tools/testing/selftests/ftrace/config  |1 
 .../test.d/dynevent/add_remove_tprobe_module.tc|   61 +++
 .../ftrace/test.d/dynevent/tprobe_syntax_errors.tc |1 
 6 files changed, 254 insertions(+), 50 deletions(-)
 create mode 100644 
tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe_module.tc

--
Masami Hiramatsu (Google) 



[PATCH] kprobes: Fix to check symbol prefixes correctly

2024-08-02 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Since str_has_prefix() takes the prefix as the 2nd argument and the string
as the first, is_cfi_preamble_symbol() always fails to check the prefix.
Fix the function parameter order so that it correctly check the prefix.

Fixes: de02f2ac5d8c ("kprobes: Prohibit probing on CFI preamble symbol")
Signed-off-by: Masami Hiramatsu (Google) 
---
 kernel/kprobes.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index e85de37d9e1e..da59c68df841 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1557,8 +1557,8 @@ static bool is_cfi_preamble_symbol(unsigned long addr)
if (lookup_symbol_name(addr, symbuf))
return false;
 
-   return str_has_prefix("__cfi_", symbuf) ||
-   str_has_prefix("__pfx_", symbuf);
+   return str_has_prefix(symbuf, "__cfi_") ||
+   str_has_prefix(symbuf, "__pfx_");
 }
 
 static int check_kprobe_address_safe(struct kprobe *p,




[PATCH] MAINTAINERS: Add uprobes entry

2024-07-11 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Add uprobes entry to MAINTAINERS to clarify the maintainers.

Suggested-by: Peter Zijlstra 
Signed-off-by: Masami Hiramatsu (Google) 
---
 MAINTAINERS |   13 +
 1 file changed, 13 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index da5352dbd4f3..ae731fa2328c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -23105,6 +23105,19 @@ F: drivers/mtd/ubi/
 F: include/linux/mtd/ubi.h
 F: include/uapi/mtd/ubi-user.h
 
+UPROBES
+M: Masami Hiramatsu 
+M: Oleg Nesterov 
+M: Peter Zijlstra 
+L: linux-kernel@vger.kernel.org
+L: linux-trace-ker...@vger.kernel.org
+S: Maintained
+F: arch/*/include/asm/uprobes.h
+F: arch/*/kernel/probes/uprobes.c
+F: arch/*/kernel/uprobes.c
+F: include/linux/uprobes.h
+F: kernel/events/uprobes.c
+
 USB "USBNET" DRIVER FRAMEWORK
 M: Oliver Neukum 
 L: net...@vger.kernel.org




[PATCH] tracing/kprobes: Fix build error when find_module() is not available

2024-07-09 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

The kernel test robot reported that the find_module() is not available
if CONFIG_MODULES=n.
Fix this error by hiding find_modules() in #ifdef CONFIG_MODULES with
related rcu locks as try_module_get_by_name().

Reported-by: kernel test robot 
Closes: 
https://lore.kernel.org/oe-kbuild-all/202407070744.rclkn8sq-...@intel.com/
Closes: 
https://lore.kernel.org/oe-kbuild-all/202407070917.vvucblas-...@intel.com/
Signed-off-by: Masami Hiramatsu (Google) 
---
 kernel/trace/trace_kprobe.c |   25 +++--
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 4cee3442bcce..61a6da808203 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -794,6 +794,24 @@ static int validate_module_probe_symbol(const char 
*modname, const char *symbol)
return 0;
 }
 
+#ifdef CONFIG_MODULES
+/* Return NULL if the module is not loaded or under unloading. */
+static struct module *try_module_get_by_name(const char *name)
+{
+   struct module *mod;
+
+   rcu_read_lock_sched();
+   mod = find_module(name);
+   if (mod && !try_module_get(mod))
+   mod = NULL;
+   rcu_read_unlock_sched();
+
+   return mod;
+}
+#else
+#define try_module_get_by_name(name)   (NULL)
+#endif
+
 static int validate_probe_symbol(char *symbol)
 {
struct module *mod = NULL;
@@ -805,12 +823,7 @@ static int validate_probe_symbol(char *symbol)
modname = symbol;
symbol = p + 1;
*p = '\0';
-   /* Return 0 (defer) if the module does not exist yet. */
-   rcu_read_lock_sched();
-   mod = find_module(modname);
-   if (mod && !try_module_get(mod))
-   mod = NULL;
-   rcu_read_unlock_sched();
+   mod = try_module_get_by_name(modname);
if (!mod)
goto out;
}




[PATCH] tracing/kprobes: Fix build error when find_module() is not available

2024-07-09 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

The kernel test robot reported that the find_module() is not available
if CONFIG_MODULES=n.
Fix this error by hiding find_modules() in #ifdef CONFIG_MODULES with
related rcu locks as try_module_get_by_name().

Reported-by: kernel test robot 
Closes: 
https://lore.kernel.org/oe-kbuild-all/202407070744.rclkn8sq-...@intel.com/
Closes: 
https://lore.kernel.org/oe-kbuild-all/202407070917.vvucblas-...@intel.com/
Signed-off-by: Masami Hiramatsu (Google) 
---
 kernel/trace/trace_kprobe.c |   25 +++--
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 4cee3442bcce..61a6da808203 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -794,6 +794,24 @@ static int validate_module_probe_symbol(const char 
*modname, const char *symbol)
return 0;
 }
 
+#ifdef CONFIG_MODULES
+/* Return NULL if the module is not loaded or under unloading. */
+static struct module *try_module_get_by_name(const char *name)
+{
+   struct module *mod;
+
+   rcu_read_lock_sched();
+   mod = find_module(name);
+   if (mod && !try_module_get(mod))
+   mod = NULL;
+   rcu_read_unlock_sched();
+
+   return mod;
+}
+#else
+#define try_module_get_by_name(name)   (NULL)
+#endif
+
 static int validate_probe_symbol(char *symbol)
 {
struct module *mod = NULL;
@@ -805,12 +823,7 @@ static int validate_probe_symbol(char *symbol)
modname = symbol;
symbol = p + 1;
*p = '\0';
-   /* Return 0 (defer) if the module does not exist yet. */
-   rcu_read_lock_sched();
-   mod = find_module(modname);
-   if (mod && !try_module_get(mod))
-   mod = NULL;
-   rcu_read_unlock_sched();
+   mod = try_module_get_by_name(modname);
if (!mod)
goto out;
}




[PATCH for-next v4] tracing/kprobes: Add symbol counting check when module loads

2024-07-08 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Currently, kprobe event checks whether the target symbol name is unique
or not, so that it does not put a probe on an unexpected place. But this
skips the check if the target is on a module because the module may not
be loaded.

To fix this issue, this patch checks the number of probe target symbols
in a target module when the module is loaded. If the probe is not on the
unique name symbols in the module, it will be rejected at that point.

Note that the symbol which has a unique name in the target module,
it will be accepted even if there are same-name symbols in the
kernel or other modules,

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v4:
  - Hide find_module() in try_module_get_by_name().
  - Add bpf ML.
 Changes in v3:
  - Update the patch description.
  - Update for latest probe/for-next
 Updated from last October post, which was dropped by test failure:

https://lore.kernel.org/linux-trace-kernel/169854904604.132316.12500381416261460174.stgit@devnote2/
 Changes in v2:
  - Fix to skip checking uniqueness if the target module is not loaded.
  - Fix register_module_trace_kprobe() to pass correct symbol name.
  - Fix to call __register_trace_kprobe() from module callback.
---
 kernel/trace/trace_kprobe.c |  138 +--
 1 file changed, 94 insertions(+), 44 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 7fd0f8576e4c..61a6da808203 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -678,6 +678,21 @@ static int register_trace_kprobe(struct trace_kprobe *tk)
 }
 
 #ifdef CONFIG_MODULES
+static int validate_module_probe_symbol(const char *modname, const char 
*symbol);
+
+static int register_module_trace_kprobe(struct module *mod, struct 
trace_kprobe *tk)
+{
+   const char *p;
+   int ret = 0;
+
+   p = strchr(trace_kprobe_symbol(tk), ':');
+   if (p)
+   ret = validate_module_probe_symbol(module_name(mod), p + 1);
+   if (!ret)
+   ret = __register_trace_kprobe(tk);
+   return ret;
+}
+
 /* Module notifier call back, checking event on the module */
 static int trace_kprobe_module_callback(struct notifier_block *nb,
   unsigned long val, void *data)
@@ -696,7 +711,7 @@ static int trace_kprobe_module_callback(struct 
notifier_block *nb,
if (trace_kprobe_within_module(tk, mod)) {
/* Don't need to check busy - this should have gone. */
__unregister_trace_kprobe(tk);
-   ret = __register_trace_kprobe(tk);
+   ret = register_module_trace_kprobe(mod, tk);
if (ret)
pr_warn("Failed to re-register probe %s on %s: 
%d\n",
trace_probe_name(&tk->tp),
@@ -747,17 +762,81 @@ static int count_mod_symbols(void *data, const char 
*name, unsigned long unused)
return 0;
 }
 
-static unsigned int number_of_same_symbols(char *func_name)
+static unsigned int number_of_same_symbols(const char *mod, const char 
*func_name)
 {
struct sym_count_ctx ctx = { .count = 0, .name = func_name };
 
-   kallsyms_on_each_match_symbol(count_symbols, func_name, &ctx.count);
+   if (!mod)
+   kallsyms_on_each_match_symbol(count_symbols, func_name, 
&ctx.count);
 
-   module_kallsyms_on_each_symbol(NULL, count_mod_symbols, &ctx);
+   module_kallsyms_on_each_symbol(mod, count_mod_symbols, &ctx);
 
return ctx.count;
 }
 
+static int validate_module_probe_symbol(const char *modname, const char 
*symbol)
+{
+   unsigned int count = number_of_same_symbols(modname, symbol);
+
+   if (count > 1) {
+   /*
+* Users should use ADDR to remove the ambiguity of
+* using KSYM only.
+*/
+   return -EADDRNOTAVAIL;
+   } else if (count == 0) {
+   /*
+* We can return ENOENT earlier than when register the
+* kprobe.
+*/
+   return -ENOENT;
+   }
+   return 0;
+}
+
+#ifdef CONFIG_MODULES
+/* Return NULL if the module is not loaded or under unloading. */
+static struct module *try_module_get_by_name(const char *name)
+{
+   struct module *mod;
+
+   rcu_read_lock_sched();
+   mod = find_module(name);
+   if (mod && !try_module_get(mod))
+   mod = NULL;
+   rcu_read_unlock_sched();
+
+   return mod;
+}
+#else
+#define try_module_get_by_name(name)   (NULL)
+#endif
+
+static int validate_probe_symbol(char *symbol)
+{
+   struct module *mod = NULL;
+   char *modname = NULL, *p;
+   int ret = 0;
+
+   p = strchr(symbol, ':');
+   if (p) {
+   modname = symbol;
+

[PATCH for-next v3] tracing/kprobes: Add symbol counting check when module loads

2024-07-05 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Currently, kprobe event checks whether the target symbol name is unique
or not, so that it does not put a probe on an unexpected place. But this
skips the check if the target is on a module because the module may not
be loaded.

To fix this issue, this patch checks the number of probe target symbols
in a target module when the module is loaded. If the probe is not on the
unique name symbols in the module, it will be rejected at that point.

Note that the symbol which has a unique name in the target module,
it will be accepted even if there are same-name symbols in the
kernel or other modules,

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v3:
  - Update the patch description.
  - Update for latest probe/for-next
 Updated from last October post, which was dropped by test failure:

https://lore.kernel.org/linux-trace-kernel/169854904604.132316.12500381416261460174.stgit@devnote2/
 Changes in v2:
  - Fix to skip checking uniqueness if the target module is not loaded.
  - Fix register_module_trace_kprobe() to pass correct symbol name.
  - Fix to call __register_trace_kprobe() from module callback.
---
 kernel/trace/trace_kprobe.c |  125 ---
 1 file changed, 81 insertions(+), 44 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 7fd0f8576e4c..4cee3442bcce 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -678,6 +678,21 @@ static int register_trace_kprobe(struct trace_kprobe *tk)
 }
 
 #ifdef CONFIG_MODULES
+static int validate_module_probe_symbol(const char *modname, const char 
*symbol);
+
+static int register_module_trace_kprobe(struct module *mod, struct 
trace_kprobe *tk)
+{
+   const char *p;
+   int ret = 0;
+
+   p = strchr(trace_kprobe_symbol(tk), ':');
+   if (p)
+   ret = validate_module_probe_symbol(module_name(mod), p + 1);
+   if (!ret)
+   ret = __register_trace_kprobe(tk);
+   return ret;
+}
+
 /* Module notifier call back, checking event on the module */
 static int trace_kprobe_module_callback(struct notifier_block *nb,
   unsigned long val, void *data)
@@ -696,7 +711,7 @@ static int trace_kprobe_module_callback(struct 
notifier_block *nb,
if (trace_kprobe_within_module(tk, mod)) {
/* Don't need to check busy - this should have gone. */
__unregister_trace_kprobe(tk);
-   ret = __register_trace_kprobe(tk);
+   ret = register_module_trace_kprobe(mod, tk);
if (ret)
pr_warn("Failed to re-register probe %s on %s: 
%d\n",
trace_probe_name(&tk->tp),
@@ -747,17 +762,68 @@ static int count_mod_symbols(void *data, const char 
*name, unsigned long unused)
return 0;
 }
 
-static unsigned int number_of_same_symbols(char *func_name)
+static unsigned int number_of_same_symbols(const char *mod, const char 
*func_name)
 {
struct sym_count_ctx ctx = { .count = 0, .name = func_name };
 
-   kallsyms_on_each_match_symbol(count_symbols, func_name, &ctx.count);
+   if (!mod)
+   kallsyms_on_each_match_symbol(count_symbols, func_name, 
&ctx.count);
 
-   module_kallsyms_on_each_symbol(NULL, count_mod_symbols, &ctx);
+   module_kallsyms_on_each_symbol(mod, count_mod_symbols, &ctx);
 
return ctx.count;
 }
 
+static int validate_module_probe_symbol(const char *modname, const char 
*symbol)
+{
+   unsigned int count = number_of_same_symbols(modname, symbol);
+
+   if (count > 1) {
+   /*
+* Users should use ADDR to remove the ambiguity of
+* using KSYM only.
+*/
+   return -EADDRNOTAVAIL;
+   } else if (count == 0) {
+   /*
+* We can return ENOENT earlier than when register the
+* kprobe.
+*/
+   return -ENOENT;
+   }
+   return 0;
+}
+
+static int validate_probe_symbol(char *symbol)
+{
+   struct module *mod = NULL;
+   char *modname = NULL, *p;
+   int ret = 0;
+
+   p = strchr(symbol, ':');
+   if (p) {
+   modname = symbol;
+   symbol = p + 1;
+   *p = '\0';
+   /* Return 0 (defer) if the module does not exist yet. */
+   rcu_read_lock_sched();
+   mod = find_module(modname);
+   if (mod && !try_module_get(mod))
+   mod = NULL;
+   rcu_read_unlock_sched();
+   if (!mod)
+   goto out;
+   }
+
+   ret = validate_module_probe_symbol(modname, symbol);
+out:
+   if (p)
+   *p = ':'

[PATCH v12 19/19] fgraph: Skip push operation if no retfunc is registered

2024-07-03 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Skip push operation only when there is no fgraph_ops which sets retfunc.

This is for optimizing performance of fprobe on fgraph. Since the major
use case of fprobe is putting a probe on function entry and another
probe on exit. Since these probes are independent, if user only uses
fprobe on function entry, we don't need to push a frame information on
shadow stack.

Here is the performance improvement results;

Without this:
kprobe-multi   :6.265 ± 0.033M/s
kretprobe-multi:4.758 ± 0.009M/s

With this:
kprobe-multi   :6.377 ± 0.054M/s+1.79%
kretprobe-multi:4.815 ± 0.007M/s+1.20%

Signed-off-by: Masami Hiramatsu (Google) 
---
 include/linux/ftrace.h |1 +
 kernel/trace/fgraph.c  |   33 +
 kernel/trace/fprobe.c  |   25 -
 3 files changed, 50 insertions(+), 9 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index fabf1a0979d4..d08e5e6e725f 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1220,6 +1220,7 @@ unsigned long *fgraph_get_task_var(struct fgraph_ops 
*gops);
 #define FTRACE_RETFUNC_DEPTH 50
 #define FTRACE_RETSTACK_ALLOC_SIZE 32
 
+void ftrace_graph_update_flags(void);
 extern int register_ftrace_graph(struct fgraph_ops *ops);
 extern void unregister_ftrace_graph(struct fgraph_ops *ops);
 
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index cf3ae59a436e..3a23d4e5738c 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -175,6 +175,7 @@ int ftrace_graph_active;
 static struct fgraph_ops *fgraph_array[FGRAPH_ARRAY_SIZE];
 static unsigned long fgraph_array_bitmask;
 static bool fgraph_skip_timestamp;
+static bool fgraph_skip_all;
 
 /* LRU index table for fgraph_array */
 static int fgraph_lru_table[FGRAPH_ARRAY_SIZE];
@@ -349,6 +350,9 @@ void *fgraph_reserve_data(int idx, int size_bytes)
int curr_ret_stack = current->curr_ret_stack;
int data_size;
 
+   if (unlikely(fgraph_skip_all))
+   return NULL;
+
if (size_bytes > FGRAPH_MAX_DATA_SIZE)
return NULL;
 
@@ -632,9 +636,11 @@ int function_graph_enter_regs(unsigned long ret, unsigned 
long func,
trace.func = func;
trace.depth = ++current->curr_ret_depth;
 
-   offset = ftrace_push_return_trace(ret, func, frame_pointer, retp, 0);
-   if (offset < 0)
-   goto out;
+   if (likely(!fgraph_skip_all)) {
+   offset = ftrace_push_return_trace(ret, func, frame_pointer, 
retp, 0);
+   if (offset < 0)
+   goto out;
+   }
 
 #ifdef CONFIG_HAVE_STATIC_CALL
if (static_branch_likely(&fgraph_do_direct)) {
@@ -665,6 +671,8 @@ int function_graph_enter_regs(unsigned long ret, unsigned 
long func,
current->curr_ret_stack = save_curr_ret_stack;
}
}
+   if (unlikely(fgraph_skip_all))
+   goto out;
 
if (!bitmap)
goto out_ret;
@@ -1254,6 +1262,7 @@ static void ftrace_graph_disable_direct(bool 
disable_branch)
 
 static void update_fgraph_skip_timestamp(void)
 {
+   bool skip_all = true, skip_ts = true;
int i;
 
for (i = 0; i < FGRAPH_ARRAY_SIZE; i++) {
@@ -1262,12 +1271,20 @@ static void update_fgraph_skip_timestamp(void)
if (gops == &fgraph_stub)
continue;
 
-   if (!gops->skip_timestamp) {
-   fgraph_skip_timestamp = false;
-   return;
-   }
+   if (!gops->skip_timestamp)
+   skip_ts = false;
+   if (gops->retfunc)
+   skip_all = false;
}
-   fgraph_skip_timestamp = true;
+   fgraph_skip_timestamp = skip_ts;
+   fgraph_skip_all = skip_all;
+}
+
+void ftrace_graph_update_flags(void)
+{
+   mutex_lock(&ftrace_lock);
+   update_fgraph_skip_timestamp();
+   mutex_unlock(&ftrace_lock);
 }
 
 int register_ftrace_graph(struct fgraph_ops *gops)
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index b108d26d7ee5..188a38ac3153 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -42,6 +42,9 @@ static struct hlist_head fprobe_table[FPROBE_TABLE_SIZE];
 static struct hlist_head fprobe_ip_table[FPROBE_IP_TABLE_SIZE];
 static DEFINE_MUTEX(fprobe_mutex);
 
+/* Count the number of fprobe which has the exit_handler. */
+static int fprobe_nr_exit_handlers;
+
 /*
  * Find first fprobe in the hlist. It will be iterated twice in the entry
  * probe, once for correcting the total required size, the second time is
@@ -344,11 +347,18 @@ NOKPROBE_SYMBOL(fprobe_return);
 
 static struct fgraph_ops fprobe_graph_ops = {
.entryfunc  = fprobe_entry,
-   .retfunc= fprobe_return,
+   /* retfunc is set only if any fprobe.exit_handler is set. */
.skip_ti

[PATCH v12 18/19] fgraph: Skip recording calltime/rettime if it is not nneeded

2024-07-03 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Skip recording calltime and rettime if the fgraph_ops does not need it.
This is a kind of performance optimization for fprobe. Since the fprobe
user does not use these entries, recording timestamp in fgraph is just
a overhead (e.g. eBPF, ftrace). So introduce the skip_timestamp flag,
and all fgraph_ops sets this flag, skip recording calltime and rettime.

Here is the performance results measured by
 tools/testing/selftests/bpf/benchs/run_bench_trigger.sh

Without this:
kprobe-multi   :5.700 ± 0.065M/s
kretprobe-multi:4.239 ± 0.006M/s

With skip-timestamp:
kprobe-multi   :6.265 ± 0.033M/s+9.91%
kretprobe-multi:4.758 ± 0.009M/s+12.24%

Suggested-by: Jiri Olsa 
Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v11:
  - Simplify it to be symmetric on push and pop. (Thus the timestamp
getting place is a bit shifted.)
 Changes in v10:
  - Add likely() to skipping timestamp.
 Changes in v9:
  - Newly added.
---
 include/linux/ftrace.h |2 ++
 kernel/trace/fgraph.c  |   36 +---
 kernel/trace/fprobe.c  |1 +
 3 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index d8a58b940d81..fabf1a0979d4 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1160,6 +1160,8 @@ struct fgraph_ops {
void*private;
trace_func_graph_ent_t  saved_func;
int idx;
+   /* If skip_timestamp is true, this does not record timestamps. */
+   boolskip_timestamp;
 };
 
 void *fgraph_reserve_data(int idx, int size_bytes);
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index d735a8c872bb..cf3ae59a436e 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -174,6 +174,7 @@ int ftrace_graph_active;
 
 static struct fgraph_ops *fgraph_array[FGRAPH_ARRAY_SIZE];
 static unsigned long fgraph_array_bitmask;
+static bool fgraph_skip_timestamp;
 
 /* LRU index table for fgraph_array */
 static int fgraph_lru_table[FGRAPH_ARRAY_SIZE];
@@ -557,7 +558,11 @@ ftrace_push_return_trace(unsigned long ret, unsigned long 
func,
return -EBUSY;
}
 
-   calltime = trace_clock_local();
+   /* This is not really 'likely' but for keeping the least path to be 
faster. */
+   if (likely(fgraph_skip_timestamp))
+   calltime = 0LL;
+   else
+   calltime = trace_clock_local();
 
offset = READ_ONCE(current->curr_ret_stack);
ret_stack = RET_STACK(current, offset);
@@ -728,6 +733,12 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, 
unsigned long *ret,
*ret = ret_stack->ret;
trace->func = ret_stack->func;
trace->calltime = ret_stack->calltime;
+   /* This is not really 'likely' but for keeping the least path to be 
faster. */
+   if (likely(!trace->calltime))
+   trace->rettime = 0LL;
+   else
+   trace->rettime = trace_clock_local();
+
trace->overrun = atomic_read(¤t->trace_overrun);
trace->depth = current->curr_ret_depth;
/*
@@ -788,7 +799,6 @@ __ftrace_return_to_handler(struct ftrace_regs *fregs, 
unsigned long frame_pointe
return (unsigned long)panic;
}
 
-   trace.rettime = trace_clock_local();
if (fregs)
ftrace_regs_set_instruction_pointer(fregs, ret);
 
@@ -1242,6 +1252,24 @@ static void ftrace_graph_disable_direct(bool 
disable_branch)
fgraph_direct_gops = &fgraph_stub;
 }
 
+static void update_fgraph_skip_timestamp(void)
+{
+   int i;
+
+   for (i = 0; i < FGRAPH_ARRAY_SIZE; i++) {
+   struct fgraph_ops *gops = fgraph_array[i];
+
+   if (gops == &fgraph_stub)
+   continue;
+
+   if (!gops->skip_timestamp) {
+   fgraph_skip_timestamp = false;
+   return;
+   }
+   }
+   fgraph_skip_timestamp = true;
+}
+
 int register_ftrace_graph(struct fgraph_ops *gops)
 {
int command = 0;
@@ -1267,6 +1295,7 @@ int register_ftrace_graph(struct fgraph_ops *gops)
gops->idx = i;
 
ftrace_graph_active++;
+   update_fgraph_skip_timestamp();
 
if (ftrace_graph_active == 2)
ftrace_graph_disable_direct(true);
@@ -1298,6 +1327,7 @@ int register_ftrace_graph(struct fgraph_ops *gops)
ftrace_graph_active--;
gops->saved_func = NULL;
fgraph_lru_release_index(i);
+   update_fgraph_skip_timestamp();
}
 out:
mutex_unlock(&ftrace_lock);
@@ -1321,8 +1351,8 @@ void unregister_ftrace_graph(struct fgraph_ops *gops)
goto out;
 
fgraph_array[gops->idx] = &fgraph_stub;
-
ftrace_

[PATCH v12 17/19] Documentation: probes: Update fprobe on function-graph tracer

2024-07-03 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Update fprobe documentation for the new fprobe on function-graph
tracer. This includes some bahvior changes and pt_regs to
ftrace_regs interface change.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v2:
  - Update @fregs parameter explanation.
---
 Documentation/trace/fprobe.rst |   42 ++--
 1 file changed, 27 insertions(+), 15 deletions(-)

diff --git a/Documentation/trace/fprobe.rst b/Documentation/trace/fprobe.rst
index 196f52386aaa..f58bdc64504f 100644
--- a/Documentation/trace/fprobe.rst
+++ b/Documentation/trace/fprobe.rst
@@ -9,9 +9,10 @@ Fprobe - Function entry/exit probe
 Introduction
 
 
-Fprobe is a function entry/exit probe mechanism based on ftrace.
-Instead of using ftrace full feature, if you only want to attach callbacks
-on function entry and exit, similar to the kprobes and kretprobes, you can
+Fprobe is a function entry/exit probe mechanism based on the function-graph
+tracer.
+Instead of tracing all functions, if you want to attach callbacks on specific
+function entry and exit, similar to the kprobes and kretprobes, you can
 use fprobe. Compared with kprobes and kretprobes, fprobe gives faster
 instrumentation for multiple functions with single handler. This document
 describes how to use fprobe.
@@ -91,12 +92,14 @@ The prototype of the entry/exit callback function are as 
follows:
 
 .. code-block:: c
 
- int entry_callback(struct fprobe *fp, unsigned long entry_ip, unsigned long 
ret_ip, struct pt_regs *regs, void *entry_data);
+ int entry_callback(struct fprobe *fp, unsigned long entry_ip, unsigned long 
ret_ip, struct ftrace_regs *fregs, void *entry_data);
 
- void exit_callback(struct fprobe *fp, unsigned long entry_ip, unsigned long 
ret_ip, struct pt_regs *regs, void *entry_data);
+ void exit_callback(struct fprobe *fp, unsigned long entry_ip, unsigned long 
ret_ip, struct ftrace_regs *fregs, void *entry_data);
 
-Note that the @entry_ip is saved at function entry and passed to exit handler.
-If the entry callback function returns !0, the corresponding exit callback 
will be cancelled.
+Note that the @entry_ip is saved at function entry and passed to exit
+handler.
+If the entry callback function returns !0, the corresponding exit callback
+will be cancelled.
 
 @fp
 This is the address of `fprobe` data structure related to this handler.
@@ -112,12 +115,10 @@ If the entry callback function returns !0, the 
corresponding exit callback will
 This is the return address that the traced function will return to,
 somewhere in the caller. This can be used at both entry and exit.
 
-@regs
-This is the `pt_regs` data structure at the entry and exit. Note that
-the instruction pointer of @regs may be different from the @entry_ip
-in the entry_handler. If you need traced instruction pointer, you need
-to use @entry_ip. On the other hand, in the exit_handler, the 
instruction
-pointer of @regs is set to the current return address.
+@fregs
+This is the `ftrace_regs` data structure at the entry and exit. This
+includes the function parameters, or the return values. So user can
+access thos values via appropriate `ftrace_regs_*` APIs.
 
 @entry_data
 This is a local storage to share the data between entry and exit 
handlers.
@@ -125,6 +126,17 @@ If the entry callback function returns !0, the 
corresponding exit callback will
 and `entry_data_size` field when registering the fprobe, the storage is
 allocated and passed to both `entry_handler` and `exit_handler`.
 
+Entry data size and exit handlers on the same function
+==
+
+Since the entry data is passed via per-task stack and it is has limited size,
+the entry data size per probe is limited to `15 * sizeof(long)`. You also need
+to take care that the different fprobes are probing on the same function, this
+limit becomes smaller. The entry data size is aligned to `sizeof(long)` and
+each fprobe which has exit handler uses a `sizeof(long)` space on the stack,
+you should keep the number of fprobes on the same function as small as
+possible.
+
 Share the callbacks with kprobes
 
 
@@ -165,8 +177,8 @@ This counter counts up when;
  - fprobe fails to take ftrace_recursion lock. This usually means that a 
function
which is traced by other ftrace users is called from the entry_handler.
 
- - fprobe fails to setup the function exit because of the shortage of rethook
-   (the shadow stack for hooking the function return.)
+ - fprobe fails to setup the function exit because of failing to allocate the
+   data buffer from the per-task shadow stack.
 
 The `fprobe::nmissed` field counts up in both cases. Therefore, the former
 skips both of entry and exit callback and the latter skips the exit




[PATCH v12 16/19] selftests/ftrace: Add a test case for repeating register/unregister fprobe

2024-07-03 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

This test case repeats define and undefine the fprobe dynamic event to
ensure that the fprobe does not cause any issue with such operations.

Signed-off-by: Masami Hiramatsu (Google) 
---
 .../test.d/dynevent/add_remove_fprobe_repeat.tc|   19 +++
 1 file changed, 19 insertions(+)
 create mode 100644 
tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc

diff --git 
a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc 
b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc
new file mode 100644
index ..b4ad09237e2a
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc
@@ -0,0 +1,19 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Generic dynamic event - Repeating add/remove fprobe events
+# requires: dynamic_events "f[:[/][]] [%return] 
[]":README
+
+echo 0 > events/enable
+echo > dynamic_events
+
+PLACE=$FUNCTION_FORK
+REPEAT_TIMES=64
+
+for i in `seq 1 $REPEAT_TIMES`; do
+  echo "f:myevent $PLACE" >> dynamic_events
+  grep -q myevent dynamic_events
+  test -d events/fprobes/myevent
+  echo > dynamic_events
+done
+
+clear_trace




[PATCH v12 15/19] selftests: ftrace: Remove obsolate maxactive syntax check

2024-07-03 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Since the fprobe event does not support maxactive anymore, stop
testing the maxactive syntax error checking.

Signed-off-by: Masami Hiramatsu (Google) 
---
 .../ftrace/test.d/dynevent/fprobe_syntax_errors.tc |4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git 
a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc 
b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
index 61877d166451..c9425a34fae3 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
@@ -16,9 +16,7 @@ aarch64)
   REG=%r0 ;;
 esac
 
-check_error 'f^100 vfs_read'   # MAXACT_NO_KPROBE
-check_error 'f^1a111 vfs_read' # BAD_MAXACT
-check_error 'f^10 vfs_read'# MAXACT_TOO_BIG
+check_error 'f^100 vfs_read'   # BAD_MAXACT
 
 check_error 'f ^non_exist_func'# BAD_PROBE_ADDR (enoent)
 check_error 'f ^vfs_read+10'   # BAD_PROBE_ADDR




[PATCH v12 14/19] tracing/fprobe: Remove nr_maxactive from fprobe

2024-07-03 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Remove depercated fprobe::nr_maxactive. This involves fprobe events to
rejects the maxactive number.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v2:
  - Newly added.
---
 include/linux/fprobe.h  |2 --
 kernel/trace/trace_fprobe.c |   44 ++-
 2 files changed, 6 insertions(+), 40 deletions(-)

diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h
index 2d06bbd99601..a86b3e4df2a0 100644
--- a/include/linux/fprobe.h
+++ b/include/linux/fprobe.h
@@ -54,7 +54,6 @@ struct fprobe_hlist {
  * @nmissed: The counter for missing events.
  * @flags: The status flag.
  * @entry_data_size: The private data storage size.
- * @nr_maxactive: The max number of active functions. (*deprecated)
  * @entry_handler: The callback function for function entry.
  * @exit_handler: The callback function for function exit.
  * @hlist_array: The fprobe_hlist for fprobe search from IP hash table.
@@ -63,7 +62,6 @@ struct fprobe {
unsigned long   nmissed;
unsigned intflags;
size_t  entry_data_size;
-   int nr_maxactive;
 
fprobe_entry_cb entry_handler;
fprobe_exit_cb  exit_handler;
diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
index 86cd6a8c806a..20ef5cd5d419 100644
--- a/kernel/trace/trace_fprobe.c
+++ b/kernel/trace/trace_fprobe.c
@@ -422,7 +422,6 @@ static struct trace_fprobe *alloc_trace_fprobe(const char 
*group,
   const char *event,
   const char *symbol,
   struct tracepoint *tpoint,
-  int maxactive,
   int nargs, bool is_return)
 {
struct trace_fprobe *tf;
@@ -442,7 +441,6 @@ static struct trace_fprobe *alloc_trace_fprobe(const char 
*group,
tf->fp.entry_handler = fentry_dispatcher;
 
tf->tpoint = tpoint;
-   tf->fp.nr_maxactive = maxactive;
 
ret = trace_probe_init(&tf->tp, event, group, false, nargs);
if (ret < 0)
@@ -1021,12 +1019,11 @@ static int __trace_fprobe_create(int argc, const char 
*argv[])
 *  FETCHARG:TYPE : use TYPE instead of unsigned long.
 */
struct trace_fprobe *tf = NULL;
-   int i, len, new_argc = 0, ret = 0;
+   int i, new_argc = 0, ret = 0;
bool is_return = false;
char *symbol = NULL;
const char *event = NULL, *group = FPROBE_EVENT_SYSTEM;
const char **new_argv = NULL;
-   int maxactive = 0;
char buf[MAX_EVENT_NAME_LEN];
char gbuf[MAX_EVENT_NAME_LEN];
char sbuf[KSYM_NAME_LEN];
@@ -1048,33 +1045,13 @@ static int __trace_fprobe_create(int argc, const char 
*argv[])
 
trace_probe_log_init("trace_fprobe", argc, argv);
 
-   event = strchr(&argv[0][1], ':');
-   if (event)
-   event++;
-
-   if (isdigit(argv[0][1])) {
-   if (event)
-   len = event - &argv[0][1] - 1;
-   else
-   len = strlen(&argv[0][1]);
-   if (len > MAX_EVENT_NAME_LEN - 1) {
-   trace_probe_log_err(1, BAD_MAXACT);
-   goto parse_error;
-   }
-   memcpy(buf, &argv[0][1], len);
-   buf[len] = '\0';
-   ret = kstrtouint(buf, 0, &maxactive);
-   if (ret || !maxactive) {
+   if (argv[0][1] != '\0') {
+   if (argv[0][1] != ':') {
+   trace_probe_log_set_index(0);
trace_probe_log_err(1, BAD_MAXACT);
goto parse_error;
}
-   /* fprobe rethook instances are iterated over via a list. The
-* maximum should stay reasonable.
-*/
-   if (maxactive > RETHOOK_MAXACTIVE_MAX) {
-   trace_probe_log_err(1, MAXACT_TOO_BIG);
-   goto parse_error;
-   }
+   event = &argv[0][2];
}
 
trace_probe_log_set_index(1);
@@ -1084,12 +1061,6 @@ static int __trace_fprobe_create(int argc, const char 
*argv[])
if (ret < 0)
goto parse_error;
 
-   if (!is_return && maxactive) {
-   trace_probe_log_set_index(0);
-   trace_probe_log_err(1, BAD_MAXACT_TYPE);
-   goto parse_error;
-   }
-
trace_probe_log_set_index(0);
if (event) {
ret = traceprobe_parse_event_name(&event, &group, gbuf,
@@ -1147,8 +1118,7 @@ static int __trace_fprobe_create(int argc, const char 
*argv[])
goto out;
 
/* setup a probe */
-   tf = al

[PATCH v12 13/19] fprobe: Rewrite fprobe on function-graph tracer

2024-07-03 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Rewrite fprobe implementation on function-graph tracer.
Major API changes are:
 -  'nr_maxactive' field is deprecated.
 -  This depends on CONFIG_DYNAMIC_FTRACE_WITH_ARGS or
!CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS, and
CONFIG_HAVE_FUNCTION_GRAPH_FREGS. So currently works only
on x86_64.
 -  Currently the entry size is limited in 15 * sizeof(long).
 -  If there is too many fprobe exit handler set on the same
function, it will fail to probe.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v12:
  - Skip updating ftrace hash if not required.
 Changes in v9:
  - Remove unneeded prototype of ftrace_regs_get_return_address().
  - Fix entry data address calculation.
  - Remove DIV_ROUND_UP() from hotpath.
 Changes in v8:
  - Use trace_func_graph_ret/ent_t for fgraph_ops.
  - Update CONFIG_FPROBE dependencies.
  - Add ftrace_regs_get_return_address() for each arch.
 Changes in v3:
  - Update for new reserve_data/retrieve_data API.
  - Fix internal push/pop on fgraph data logic so that it can
correctly save/restore the returning fprobes.
 Changes in v2:
  - Add more lockdep_assert_held(fprobe_mutex)
  - Use READ_ONCE() and WRITE_ONCE() for fprobe_hlist_node::fp.
  - Add NOKPROBE_SYMBOL() for the functions which is called from
entry/exit callback.
---
 arch/arm64/include/asm/ftrace.h |6 
 arch/loongarch/include/asm/ftrace.h |6 
 arch/powerpc/include/asm/ftrace.h   |6 
 arch/s390/include/asm/ftrace.h  |6 
 arch/x86/include/asm/ftrace.h   |6 
 include/linux/fprobe.h  |   53 ++-
 kernel/trace/Kconfig|8 
 kernel/trace/fprobe.c   |  639 +--
 lib/test_fprobe.c   |   45 --
 9 files changed, 530 insertions(+), 245 deletions(-)

diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index 14ecb9a418d9..27e32f323048 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -132,6 +132,12 @@ ftrace_regs_get_frame_pointer(const struct ftrace_regs 
*fregs)
return fregs->fp;
 }
 
+static __always_inline unsigned long
+ftrace_regs_get_return_address(const struct ftrace_regs *fregs)
+{
+   return fregs->lr;
+}
+
 static __always_inline struct pt_regs *
 ftrace_partial_regs(const struct ftrace_regs *fregs, struct pt_regs *regs)
 {
diff --git a/arch/loongarch/include/asm/ftrace.h 
b/arch/loongarch/include/asm/ftrace.h
index 1a73f35ea9af..c021aa3194f3 100644
--- a/arch/loongarch/include/asm/ftrace.h
+++ b/arch/loongarch/include/asm/ftrace.h
@@ -80,6 +80,12 @@ ftrace_regs_set_instruction_pointer(struct ftrace_regs 
*fregs, unsigned long ip)
 #define ftrace_regs_get_frame_pointer(fregs) \
((fregs)->regs.regs[22])
 
+static __always_inline unsigned long
+ftrace_regs_get_return_address(struct ftrace_regs *fregs)
+{
+   return *(unsigned long *)(fregs->regs.regs[1]);
+}
+
 #define ftrace_graph_func ftrace_graph_func
 void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
   struct ftrace_ops *op, struct ftrace_regs *fregs);
diff --git a/arch/powerpc/include/asm/ftrace.h 
b/arch/powerpc/include/asm/ftrace.h
index e6ff6834bf7e..2a2d070dd23c 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -75,6 +75,12 @@ ftrace_regs_get_instruction_pointer(struct ftrace_regs 
*fregs)
 #define ftrace_regs_query_register_offset(name) \
regs_query_register_offset(name)
 
+static __always_inline unsigned long
+ftrace_regs_get_return_address(struct ftrace_regs *fregs)
+{
+   return fregs->regs.link;
+}
+
 struct ftrace_ops;
 
 #define ftrace_graph_func ftrace_graph_func
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index 0d9f6df21f81..7b80ff4d3386 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -84,6 +84,12 @@ ftrace_regs_get_frame_pointer(struct ftrace_regs *fregs)
return sp[0];   /* return backchain */
 }
 
+static __always_inline unsigned long
+ftrace_regs_get_return_address(const struct ftrace_regs *fregs)
+{
+   return fregs->regs.gprs[14];
+}
+
 #define arch_ftrace_fill_perf_regs(fregs, _regs)do {   \
(_regs)->psw.addr = (fregs)->regs.psw.addr; \
(_regs)->gprs[15] = (fregs)->regs.gprs[15]; \
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 1f4d1f7b19ed..8472ba394091 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -74,6 +74,12 @@ arch_ftrace_get_regs(struct ftrace_regs *fregs)
 #define ftrace_regs_get_frame_pointer(fregs) \
frame_pointer(&(fregs)->regs)
 
+static __always_inline unsigned long
+ftrace_regs_get_return_address(struct ftrace_regs *fregs)
+{
+   return *(unsigned long *)ftrace_regs_get_stack_pointer(fregs);
+}
+
 struct ftrace_op

[PATCH v12 12/19] ftrace: Add CONFIG_HAVE_FTRACE_GRAPH_FUNC

2024-07-03 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Add CONFIG_HAVE_FTRACE_GRAPH_FUNC kconfig in addition to ftrace_graph_func
macro check. This is for the other feature (e.g. FPROBE) which requires to
access ftrace_regs from fgraph_ops::entryfunc() can avoid compiling if
the fgraph can not pass the valid ftrace_regs.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v8:
  - Newly added.
---
 arch/arm64/Kconfig |1 +
 arch/loongarch/Kconfig |1 +
 arch/powerpc/Kconfig   |1 +
 arch/riscv/Kconfig |1 +
 arch/x86/Kconfig   |1 +
 kernel/trace/Kconfig   |5 +
 6 files changed, 10 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 8691683d782e..e99a3fd53efd 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -207,6 +207,7 @@ config ARM64
select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_GUP_FAST
+   select HAVE_FTRACE_GRAPH_FUNC
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_ERROR_INJECTION
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 0f1b2057507b..f1439c42c46a 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -126,6 +126,7 @@ config LOONGARCH
select HAVE_EFFICIENT_UNALIGNED_ACCESS if !ARCH_STRICT_ALIGN
select HAVE_EXIT_THREAD
select HAVE_GUP_FAST
+   select HAVE_FTRACE_GRAPH_FUNC
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_ARG_ACCESS_API
select HAVE_FUNCTION_ERROR_INJECTION
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index c88c6d46a5bc..910118faedaa 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -239,6 +239,7 @@ config PPC
select HAVE_EBPF_JIT
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_GUP_FAST
+   select HAVE_FTRACE_GRAPH_FUNC
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_ARG_ACCESS_API
select HAVE_FUNCTION_DESCRIPTORSif PPC64_ELF_ABI_V1
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 1904393bc399..83e8c8c64b99 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -130,6 +130,7 @@ config RISCV
select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && 
(CLANG_SUPPORTS_DYNAMIC_FTRACE || GCC_SUPPORTS_DYNAMIC_FTRACE)
select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
select HAVE_DYNAMIC_FTRACE_WITH_ARGS if HAVE_DYNAMIC_FTRACE
+   select HAVE_FTRACE_GRAPH_FUNC
select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_GRAPH_FREGS
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d4655b72e6d7..7213e27b5b2b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -228,6 +228,7 @@ config X86
select HAVE_EXIT_THREAD
select HAVE_GUP_FAST
select HAVE_FENTRY  if X86_64 || DYNAMIC_FTRACE
+   select HAVE_FTRACE_GRAPH_FUNC   if HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_GRAPH_FREGSif HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_GRAPH_TRACER   if X86_32 || (X86_64 && 
DYNAMIC_FTRACE)
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 4a3dd81f749b..a1fa9cba0ef3 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -34,6 +34,11 @@ config HAVE_FUNCTION_GRAPH_TRACER
 config HAVE_FUNCTION_GRAPH_FREGS
bool
 
+config HAVE_FTRACE_GRAPH_FUNC
+   bool
+   help
+ True if ftrace_graph_func() is defined.
+
 config HAVE_DYNAMIC_FTRACE
bool
help




[PATCH v12 11/19] bpf: Enable kprobe_multi feature if CONFIG_FPROBE is enabled

2024-07-03 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Enable kprobe_multi feature if CONFIG_FPROBE is enabled. The pt_regs is
converted from ftrace_regs by ftrace_partial_regs(), thus some registers
may always returns 0. But it should be enough for function entry (access
arguments) and exit (access return value).

Signed-off-by: Masami Hiramatsu (Google) 
Acked-by: Florent Revest 
---
 Changes in v9:
  - Avoid wasting memory for bpf_kprobe_multi_pt_regs when
CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST=y
---
 kernel/trace/bpf_trace.c |   27 ++-
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index f72b421abe9b..77fd63027286 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2602,7 +2602,7 @@ struct bpf_session_run_ctx {
void *data;
 };
 
-#if defined(CONFIG_FPROBE) && defined(CONFIG_DYNAMIC_FTRACE_WITH_REGS)
+#ifdef CONFIG_FPROBE
 struct bpf_kprobe_multi_link {
struct bpf_link link;
struct fprobe fp;
@@ -2625,6 +2625,13 @@ struct user_syms {
char *buf;
 };
 
+#ifndef CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST
+static DEFINE_PER_CPU(struct pt_regs, bpf_kprobe_multi_pt_regs);
+#define bpf_kprobe_multi_pt_regs_ptr() this_cpu_ptr(&bpf_kprobe_multi_pt_regs)
+#else
+#define bpf_kprobe_multi_pt_regs_ptr() (NULL)
+#endif
+
 static int copy_user_syms(struct user_syms *us, unsigned long __user *usyms, 
u32 cnt)
 {
unsigned long __user usymbol;
@@ -2819,7 +2826,7 @@ static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx 
*ctx)
 
 static int
 kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
-  unsigned long entry_ip, struct pt_regs *regs,
+  unsigned long entry_ip, struct ftrace_regs *fregs,
   bool is_return, void *data)
 {
struct bpf_kprobe_multi_run_ctx run_ctx = {
@@ -2831,6 +2838,7 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link 
*link,
.entry_ip = entry_ip,
};
struct bpf_run_ctx *old_run_ctx;
+   struct pt_regs *regs;
int err;
 
if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
@@ -2841,6 +2849,7 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link 
*link,
 
migrate_disable();
rcu_read_lock();
+   regs = ftrace_partial_regs(fregs, bpf_kprobe_multi_pt_regs_ptr());
old_run_ctx = bpf_set_run_ctx(&run_ctx.session_ctx.run_ctx);
err = bpf_prog_run(link->link.prog, regs);
bpf_reset_run_ctx(old_run_ctx);
@@ -2857,15 +2866,11 @@ kprobe_multi_link_handler(struct fprobe *fp, unsigned 
long fentry_ip,
  unsigned long ret_ip, struct ftrace_regs *fregs,
  void *data)
 {
-   struct pt_regs *regs = ftrace_get_regs(fregs);
struct bpf_kprobe_multi_link *link;
int err;
 
-   if (!regs)
-   return 0;
-
link = container_of(fp, struct bpf_kprobe_multi_link, fp);
-   err = kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), regs, 
false, data);
+   err = kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), fregs, 
false, data);
return is_kprobe_session(link->link.prog) ? err : 0;
 }
 
@@ -2875,13 +2880,9 @@ kprobe_multi_link_exit_handler(struct fprobe *fp, 
unsigned long fentry_ip,
   void *data)
 {
struct bpf_kprobe_multi_link *link;
-   struct pt_regs *regs = ftrace_get_regs(fregs);
-
-   if (!regs)
-   return;
 
link = container_of(fp, struct bpf_kprobe_multi_link, fp);
-   kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), regs, true, 
data);
+   kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), fregs, true, 
data);
 }
 
 static int symbols_cmp_r(const void *a, const void *b, const void *priv)
@@ -3142,7 +3143,7 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr 
*attr, struct bpf_prog *pr
kvfree(cookies);
return err;
 }
-#else /* !CONFIG_FPROBE || !CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+#else /* !CONFIG_FPROBE */
 int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog 
*prog)
 {
return -EOPNOTSUPP;




[PATCH v12 10/19] tracing/fprobe: Enable fprobe events with CONFIG_DYNAMIC_FTRACE_WITH_ARGS

2024-07-03 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Allow fprobe events to be enabled with CONFIG_DYNAMIC_FTRACE_WITH_ARGS.
With this change, fprobe events mostly use ftrace_regs instead of pt_regs.
Note that if the arch doesn't enable HAVE_PT_REGS_COMPAT_FTRACE_REGS,
fprobe events will not be able to be used from perf.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v9:
  - Copy store_trace_entry_data() as store_fprobe_entry_data() for
fprobe.
 Chagnes in v3:
  - Use ftrace_regs_get_return_value().
 Changes in v2:
  - Define ftrace_regs_get_kernel_stack_nth() for
!CONFIG_HAVE_REGS_AND_STACK_ACCESS_API.
 Changes from previous series: Update against the new series.
---
 include/linux/ftrace.h  |   17 ++
 kernel/trace/Kconfig|1 
 kernel/trace/trace_fprobe.c |  107 +--
 kernel/trace/trace_probe_tmpl.h |2 -
 4 files changed, 86 insertions(+), 41 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index d9a3723f987d..d8a58b940d81 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -255,6 +255,23 @@ static __always_inline bool ftrace_regs_has_args(struct 
ftrace_regs *fregs)
frame_pointer(&(fregs)->regs)
 #endif
 
+#ifdef CONFIG_HAVE_REGS_AND_STACK_ACCESS_API
+static __always_inline unsigned long
+ftrace_regs_get_kernel_stack_nth(struct ftrace_regs *fregs, unsigned int nth)
+{
+   unsigned long *stackp;
+
+   stackp = (unsigned long *)ftrace_regs_get_stack_pointer(fregs);
+   if (((unsigned long)(stackp + nth) & ~(THREAD_SIZE - 1)) ==
+   ((unsigned long)stackp & ~(THREAD_SIZE - 1)))
+   return *(stackp + nth);
+
+   return 0;
+}
+#else /* !CONFIG_HAVE_REGS_AND_STACK_ACCESS_API */
+#define ftrace_regs_get_kernel_stack_nth(fregs, nth)   (0L)
+#endif /* CONFIG_HAVE_REGS_AND_STACK_ACCESS_API */
+
 typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip,
  struct ftrace_ops *op, struct ftrace_regs *fregs);
 
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 15e340a865f5..4a3dd81f749b 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -680,7 +680,6 @@ config FPROBE_EVENTS
select TRACING
select PROBE_EVENTS
select DYNAMIC_EVENTS
-   depends on DYNAMIC_FTRACE_WITH_REGS
default y
help
  This allows user to add tracing events on the function entry and
diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
index 273cdf3cf70c..86cd6a8c806a 100644
--- a/kernel/trace/trace_fprobe.c
+++ b/kernel/trace/trace_fprobe.c
@@ -133,7 +133,7 @@ static int
 process_fetch_insn(struct fetch_insn *code, void *rec, void *edata,
   void *dest, void *base)
 {
-   struct pt_regs *regs = rec;
+   struct ftrace_regs *fregs = rec;
unsigned long val;
int ret;
 
@@ -141,17 +141,17 @@ process_fetch_insn(struct fetch_insn *code, void *rec, 
void *edata,
/* 1st stage: get value from context */
switch (code->op) {
case FETCH_OP_STACK:
-   val = regs_get_kernel_stack_nth(regs, code->param);
+   val = ftrace_regs_get_kernel_stack_nth(fregs, code->param);
break;
case FETCH_OP_STACKP:
-   val = kernel_stack_pointer(regs);
+   val = ftrace_regs_get_stack_pointer(fregs);
break;
case FETCH_OP_RETVAL:
-   val = regs_return_value(regs);
+   val = ftrace_regs_get_return_value(fregs);
break;
 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
case FETCH_OP_ARG:
-   val = regs_get_kernel_argument(regs, code->param);
+   val = ftrace_regs_get_argument(fregs, code->param);
break;
case FETCH_OP_EDATA:
val = *(unsigned long *)((unsigned long)edata + code->offset);
@@ -174,7 +174,7 @@ NOKPROBE_SYMBOL(process_fetch_insn)
 /* function entry handler */
 static nokprobe_inline void
 __fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
-   struct pt_regs *regs,
+   struct ftrace_regs *fregs,
struct trace_event_file *trace_file)
 {
struct fentry_trace_entry_head *entry;
@@ -188,41 +188,71 @@ __fentry_trace_func(struct trace_fprobe *tf, unsigned 
long entry_ip,
if (trace_trigger_soft_disabled(trace_file))
return;
 
-   dsize = __get_data_size(&tf->tp, regs, NULL);
+   dsize = __get_data_size(&tf->tp, fregs, NULL);
 
entry = trace_event_buffer_reserve(&fbuffer, trace_file,
   sizeof(*entry) + tf->tp.size + 
dsize);
if (!entry)
return;
 
-   fbuffer.regs = regs;
+   fbuffer.regs = ftrace_get_regs(fregs);
entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
en

[PATCH v12 09/19] tracing: Add ftrace_fill_perf_regs() for perf event

2024-07-03 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Add ftrace_fill_perf_regs() which should be compatible with the
perf_fetch_caller_regs(). In other words, the pt_regs returned from the
ftrace_fill_perf_regs() must satisfy 'user_mode(regs) == false' and can be
used for stack tracing.

Signed-off-by: Masami Hiramatsu (Google) 
---
  Changes from previous series: NOTHING, just forward ported.
---
 arch/arm64/include/asm/ftrace.h   |7 +++
 arch/powerpc/include/asm/ftrace.h |7 +++
 arch/s390/include/asm/ftrace.h|5 +
 arch/x86/include/asm/ftrace.h |7 +++
 include/linux/ftrace.h|   31 +++
 5 files changed, 57 insertions(+)

diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index 5cd587afab6d..14ecb9a418d9 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -143,6 +143,13 @@ ftrace_partial_regs(const struct ftrace_regs *fregs, 
struct pt_regs *regs)
return regs;
 }
 
+#define arch_ftrace_fill_perf_regs(fregs, _regs) do {  \
+   (_regs)->pc = (fregs)->pc;  \
+   (_regs)->regs[29] = (fregs)->fp;\
+   (_regs)->sp = (fregs)->sp;  \
+   (_regs)->pstate = PSR_MODE_EL1h;\
+   } while (0)
+
 int ftrace_regs_query_register_offset(const char *name);
 
 int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
diff --git a/arch/powerpc/include/asm/ftrace.h 
b/arch/powerpc/include/asm/ftrace.h
index 23d26f3afae4..e6ff6834bf7e 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -42,6 +42,13 @@ static __always_inline struct pt_regs 
*arch_ftrace_get_regs(struct ftrace_regs *
return fregs->regs.msr ? &fregs->regs : NULL;
 }
 
+#define arch_ftrace_fill_perf_regs(fregs, _regs) do {  \
+   (_regs)->result = 0;\
+   (_regs)->nip = (fregs)->regs.nip;   \
+   (_regs)->gpr[1] = (fregs)->regs.gpr[1]; \
+   asm volatile("mfmsr %0" : "=r" ((_regs)->msr)); \
+   } while (0)
+
 static __always_inline void
 ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs,
unsigned long ip)
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index 9cdd48a46bf7..0d9f6df21f81 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -84,6 +84,11 @@ ftrace_regs_get_frame_pointer(struct ftrace_regs *fregs)
return sp[0];   /* return backchain */
 }
 
+#define arch_ftrace_fill_perf_regs(fregs, _regs)do {   \
+   (_regs)->psw.addr = (fregs)->regs.psw.addr; \
+   (_regs)->gprs[15] = (fregs)->regs.gprs[15]; \
+   } while (0)
+
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
 /*
  * When an ftrace registered caller is tracing a function that is
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 669771ef3b5b..1f4d1f7b19ed 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -46,6 +46,13 @@ arch_ftrace_get_regs(struct ftrace_regs *fregs)
return &fregs->regs;
 }
 
+#define arch_ftrace_fill_perf_regs(fregs, _regs) do {  \
+   (_regs)->ip = (fregs)->regs.ip; \
+   (_regs)->sp = (fregs)->regs.sp; \
+   (_regs)->cs = __KERNEL_CS;  \
+   (_regs)->flags = 0; \
+   } while (0)
+
 #define ftrace_regs_set_instruction_pointer(fregs, _ip)\
do { (fregs)->regs.ip = (_ip); } while (0)
 
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 8e5da4dfb669..d9a3723f987d 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -193,6 +193,37 @@ ftrace_partial_regs(struct ftrace_regs *fregs, struct 
pt_regs *regs)
 
 #endif /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS || 
CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST */
 
+#ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
+
+/*
+ * Please define arch dependent pt_regs which compatible to the
+ * perf_arch_fetch_caller_regs() but based on ftrace_regs.
+ * This requires
+ *   - user_mode(_regs) returns false (always kernel mode).
+ *   - able to use the _regs for stack trace.
+ */
+#ifndef arch_ftrace_fill_perf_regs
+/* As same as perf_arch_fetch_caller_regs(), do nothing by default */
+#define arch_ftrace_fill_perf_regs(fregs, _regs) do {} while (0)
+#endif
+
+static __always_inline struct pt_regs *
+ftrace_fill_perf_regs(struct ftrace_regs *fregs, struct pt_regs *regs)
+{
+   arch_ftrace_fill_perf_regs(fregs, regs);
+   return regs;
+}
+
+#else /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */
+
+static __always_inline struct pt_regs

[PATCH v12 08/19] tracing: Add ftrace_partial_regs() for converting ftrace_regs to pt_regs

2024-07-03 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Add ftrace_partial_regs() which converts the ftrace_regs to pt_regs.
This is for the eBPF which needs this to keep the same pt_regs interface
to access registers.
Thus when replacing the pt_regs with ftrace_regs in fprobes (which is
used by kprobe_multi eBPF event), this will be used.

If the architecture defines its own ftrace_regs, this copies partial
registers to pt_regs and returns it. If not, ftrace_regs is the same as
pt_regs and ftrace_partial_regs() will return ftrace_regs::regs.

Signed-off-by: Masami Hiramatsu (Google) 
Acked-by: Florent Revest 
---
 Changes in v8:
  - Add the reason why this required in changelog.
 Changes from previous series: NOTHING, just forward ported.
---
 arch/arm64/include/asm/ftrace.h |   11 +++
 include/linux/ftrace.h  |   17 +
 2 files changed, 28 insertions(+)

diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index dffaab3dd1f1..5cd587afab6d 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -132,6 +132,17 @@ ftrace_regs_get_frame_pointer(const struct ftrace_regs 
*fregs)
return fregs->fp;
 }
 
+static __always_inline struct pt_regs *
+ftrace_partial_regs(const struct ftrace_regs *fregs, struct pt_regs *regs)
+{
+   memcpy(regs->regs, fregs->regs, sizeof(u64) * 9);
+   regs->sp = fregs->sp;
+   regs->pc = fregs->pc;
+   regs->regs[29] = fregs->fp;
+   regs->regs[30] = fregs->lr;
+   return regs;
+}
+
 int ftrace_regs_query_register_offset(const char *name);
 
 int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index fa578748f7d2..8e5da4dfb669 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -176,6 +176,23 @@ static __always_inline struct pt_regs 
*ftrace_get_regs(struct ftrace_regs *fregs
return arch_ftrace_get_regs(fregs);
 }
 
+#if !defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS) || \
+   defined(CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST)
+
+static __always_inline struct pt_regs *
+ftrace_partial_regs(struct ftrace_regs *fregs, struct pt_regs *regs)
+{
+   /*
+* If CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST=y, ftrace_regs memory
+* layout is the same as pt_regs. So always returns that address.
+* Since arch_ftrace_get_regs() will check some members and may return
+* NULL, we can not use it.
+*/
+   return &fregs->regs;
+}
+
+#endif /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS || 
CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST */
+
 /*
  * When true, the ftrace_regs_{get,set}_*() functions may be used on fregs.
  * Note: this can be true even when ftrace_get_regs() cannot provide a pt_regs.




[PATCH v12 07/19] fprobe: Use ftrace_regs in fprobe exit handler

2024-07-03 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Change the fprobe exit handler to use ftrace_regs structure instead of
pt_regs. This also introduce HAVE_PT_REGS_TO_FTRACE_REGS_CAST which means
the ftrace_regs's memory layout is equal to the pt_regs so that those are
able to cast. Fprobe introduces a new dependency with that.

Signed-off-by: Masami Hiramatsu (Google) 
---
  Changes in v3:
   - Use ftrace_regs_get_return_value()
  Changes from previous series: NOTHING, just forward ported.
---
 arch/loongarch/Kconfig  |1 +
 arch/s390/Kconfig   |1 +
 arch/x86/Kconfig|1 +
 include/linux/fprobe.h  |2 +-
 include/linux/ftrace.h  |6 ++
 kernel/trace/Kconfig|8 
 kernel/trace/bpf_trace.c|6 +-
 kernel/trace/fprobe.c   |3 ++-
 kernel/trace/trace_fprobe.c |6 +-
 lib/test_fprobe.c   |6 +++---
 samples/fprobe/fprobe_example.c |2 +-
 11 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 23014d5f0047..0f1b2057507b 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -119,6 +119,7 @@ config LOONGARCH
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_ARGS
+   select HAVE_PT_REGS_TO_FTRACE_REGS_CAST
select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_EBPF_JIT
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 33688d43fd14..adc8f6620525 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -173,6 +173,7 @@ config S390
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_ARGS
+   select HAVE_PT_REGS_TO_FTRACE_REGS_CAST
select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_EBPF_JIT if HAVE_MARCH_Z196_FEATURES
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5fc3a2997977..d4655b72e6d7 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -218,6 +218,7 @@ config X86
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_DYNAMIC_FTRACE_WITH_ARGSif X86_64
+   select HAVE_PT_REGS_TO_FTRACE_REGS_CAST if X86_64
select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
select HAVE_SAMPLE_FTRACE_DIRECTif X86_64
select HAVE_SAMPLE_FTRACE_DIRECT_MULTI  if X86_64
diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h
index ca64ee5e45d2..ef609bcca0f9 100644
--- a/include/linux/fprobe.h
+++ b/include/linux/fprobe.h
@@ -14,7 +14,7 @@ typedef int (*fprobe_entry_cb)(struct fprobe *fp, unsigned 
long entry_ip,
   void *entry_data);
 
 typedef void (*fprobe_exit_cb)(struct fprobe *fp, unsigned long entry_ip,
-  unsigned long ret_ip, struct pt_regs *regs,
+  unsigned long ret_ip, struct ftrace_regs *regs,
   void *entry_data);
 
 /**
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 85394b9fb630..fa578748f7d2 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -162,6 +162,12 @@ struct ftrace_regs {
 #define ftrace_regs_set_instruction_pointer(fregs, ip) do { } while (0)
 #endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */
 
+#ifdef CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST
+
+static_assert(sizeof(struct pt_regs) == sizeof(struct ftrace_regs));
+
+#endif /* CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST */
+
 static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs 
*fregs)
 {
if (!fregs)
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 78b0da6fda1a..15e340a865f5 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -57,6 +57,13 @@ config HAVE_DYNAMIC_FTRACE_WITH_ARGS
 This allows for use of ftrace_regs_get_argument() and
 ftrace_regs_get_stack_pointer().
 
+config HAVE_PT_REGS_TO_FTRACE_REGS_CAST
+   bool
+   help
+If this is set, the memory layout of the ftrace_regs data structure
+is the same as the pt_regs. So the pt_regs is possible to be casted
+to ftrace_regs.
+
 config HAVE_DYNAMIC_FTRACE_NO_PATCHABLE
bool
help
@@ -288,6 +295,7 @@ config FPROBE
bool "Kernel Function Probe (fprobe)"
depends on FUNCTION_TRACER
depends on DYNAMIC_FTRACE_WITH_REGS || DYNAMIC_FTRACE_WITH_ARGS
+   depends on HAVE_PT_REGS_TO_FTRACE_REGS_CAST || 
!HAVE_DYNAMIC_FTRACE_WITH_ARGS
depends on HAVE_RETHOOK
select RETHOOK
default n
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 7e782a58ca6d..f72b421abe9b 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2871,10 +2871,14 @@ kprobe_multi_link_handler(struct fprobe *fp, unsigned 
long fentry_ip,

[PATCH v12 06/19] fprobe: Use ftrace_regs in fprobe entry handler

2024-07-03 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

This allows fprobes to be available with CONFIG_DYNAMIC_FTRACE_WITH_ARGS
instead of CONFIG_DYNAMIC_FTRACE_WITH_REGS, then we can enable fprobe
on arm64.

Signed-off-by: Masami Hiramatsu (Google) 
Acked-by: Florent Revest 
---
 Changes in v6:
  - Keep using SAVE_REGS flag to avoid breaking bpf kprobe-multi test.
---
 include/linux/fprobe.h  |2 +-
 kernel/trace/Kconfig|3 ++-
 kernel/trace/bpf_trace.c|   10 +++---
 kernel/trace/fprobe.c   |3 ++-
 kernel/trace/trace_fprobe.c |6 +-
 lib/test_fprobe.c   |4 ++--
 samples/fprobe/fprobe_example.c |2 +-
 7 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h
index f39869588117..ca64ee5e45d2 100644
--- a/include/linux/fprobe.h
+++ b/include/linux/fprobe.h
@@ -10,7 +10,7 @@
 struct fprobe;
 
 typedef int (*fprobe_entry_cb)(struct fprobe *fp, unsigned long entry_ip,
-  unsigned long ret_ip, struct pt_regs *regs,
+  unsigned long ret_ip, struct ftrace_regs *regs,
   void *entry_data);
 
 typedef void (*fprobe_exit_cb)(struct fprobe *fp, unsigned long entry_ip,
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 33fcfb36eca5..78b0da6fda1a 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -287,7 +287,7 @@ config DYNAMIC_FTRACE_WITH_ARGS
 config FPROBE
bool "Kernel Function Probe (fprobe)"
depends on FUNCTION_TRACER
-   depends on DYNAMIC_FTRACE_WITH_REGS
+   depends on DYNAMIC_FTRACE_WITH_REGS || DYNAMIC_FTRACE_WITH_ARGS
depends on HAVE_RETHOOK
select RETHOOK
default n
@@ -672,6 +672,7 @@ config FPROBE_EVENTS
select TRACING
select PROBE_EVENTS
select DYNAMIC_EVENTS
+   depends on DYNAMIC_FTRACE_WITH_REGS
default y
help
  This allows user to add tracing events on the function entry and
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 6249dac61701..7e782a58ca6d 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2602,7 +2602,7 @@ struct bpf_session_run_ctx {
void *data;
 };
 
-#ifdef CONFIG_FPROBE
+#if defined(CONFIG_FPROBE) && defined(CONFIG_DYNAMIC_FTRACE_WITH_REGS)
 struct bpf_kprobe_multi_link {
struct bpf_link link;
struct fprobe fp;
@@ -2854,12 +2854,16 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link 
*link,
 
 static int
 kprobe_multi_link_handler(struct fprobe *fp, unsigned long fentry_ip,
- unsigned long ret_ip, struct pt_regs *regs,
+ unsigned long ret_ip, struct ftrace_regs *fregs,
  void *data)
 {
+   struct pt_regs *regs = ftrace_get_regs(fregs);
struct bpf_kprobe_multi_link *link;
int err;
 
+   if (!regs)
+   return 0;
+
link = container_of(fp, struct bpf_kprobe_multi_link, fp);
err = kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), regs, 
false, data);
return is_kprobe_session(link->link.prog) ? err : 0;
@@ -3134,7 +3138,7 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr 
*attr, struct bpf_prog *pr
kvfree(cookies);
return err;
 }
-#else /* !CONFIG_FPROBE */
+#else /* !CONFIG_FPROBE || !CONFIG_DYNAMIC_FTRACE_WITH_REGS */
 int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog 
*prog)
 {
return -EOPNOTSUPP;
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 9ff018245840..3d3789283873 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -46,7 +46,7 @@ static inline void __fprobe_handler(unsigned long ip, 
unsigned long parent_ip,
}
 
if (fp->entry_handler)
-   ret = fp->entry_handler(fp, ip, parent_ip, 
ftrace_get_regs(fregs), entry_data);
+   ret = fp->entry_handler(fp, ip, parent_ip, fregs, entry_data);
 
/* If entry_handler returns !0, nmissed is not counted. */
if (rh) {
@@ -182,6 +182,7 @@ static void fprobe_init(struct fprobe *fp)
fp->ops.func = fprobe_kprobe_handler;
else
fp->ops.func = fprobe_handler;
+
fp->ops.flags |= FTRACE_OPS_FL_SAVE_REGS;
 }
 
diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
index 62e6a8f4aae9..b2c20d4fdfd7 100644
--- a/kernel/trace/trace_fprobe.c
+++ b/kernel/trace/trace_fprobe.c
@@ -338,12 +338,16 @@ NOKPROBE_SYMBOL(fexit_perf_func);
 #endif /* CONFIG_PERF_EVENTS */
 
 static int fentry_dispatcher(struct fprobe *fp, unsigned long entry_ip,
-unsigned long ret_ip, struct pt_regs *regs,
+unsigned long ret_ip, struct ftrace_regs *fregs,
 void *entry_data)
 {
struct trace_fprobe *tf = container_of(fp, s

[PATCH v12 05/19] function_graph: Pass ftrace_regs to retfunc

2024-07-03 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Pass ftrace_regs to the fgraph_ops::retfunc(). If ftrace_regs is not
available, it passes a NULL instead. User callback function can access
some registers (including return address) via this ftrace_regs.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v8:
  - Pass ftrace_regs to retfunc, instead of adding retregfunc.
 Changes in v6:
  - update to use ftrace_regs_get_return_value() because of reordering
patches.
 Changes in v3:
  - Update for new multiple fgraph.
  - Save the return address to instruction pointer in ftrace_regs.
---
 include/linux/ftrace.h   |3 ++-
 kernel/trace/fgraph.c|   16 +++-
 kernel/trace/ftrace.c|3 ++-
 kernel/trace/trace.h |3 ++-
 kernel/trace/trace_functions_graph.c |7 ---
 kernel/trace/trace_irqsoff.c |3 ++-
 kernel/trace/trace_sched_wakeup.c|3 ++-
 kernel/trace/trace_selftest.c|3 ++-
 8 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 9230af20c92e..85394b9fb630 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1069,7 +1069,8 @@ struct fgraph_ops;
 
 /* Type of the callback handlers for tracing function graph*/
 typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *,
-  struct fgraph_ops *); /* return */
+  struct fgraph_ops *,
+  struct ftrace_regs *); /* return */
 typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *,
  struct fgraph_ops *,
  struct ftrace_regs *); /* entry */
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 709f920da939..d735a8c872bb 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -297,7 +297,8 @@ static int entry_run(struct ftrace_graph_ent *trace, struct 
fgraph_ops *ops,
 }
 
 /* ftrace_graph_return set to this to tell some archs to run function graph */
-static void return_run(struct ftrace_graph_ret *trace, struct fgraph_ops *ops)
+static void return_run(struct ftrace_graph_ret *trace, struct fgraph_ops *ops,
+  struct ftrace_regs *fregs)
 {
 }
 
@@ -491,7 +492,8 @@ int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace,
 }
 
 static void ftrace_graph_ret_stub(struct ftrace_graph_ret *trace,
- struct fgraph_ops *gops)
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
 {
 }
 
@@ -787,6 +789,9 @@ __ftrace_return_to_handler(struct ftrace_regs *fregs, 
unsigned long frame_pointe
}
 
trace.rettime = trace_clock_local();
+   if (fregs)
+   ftrace_regs_set_instruction_pointer(fregs, ret);
+
 #ifdef CONFIG_FUNCTION_GRAPH_RETVAL
trace.retval = ftrace_regs_get_return_value(fregs);
 #endif
@@ -796,7 +801,7 @@ __ftrace_return_to_handler(struct ftrace_regs *fregs, 
unsigned long frame_pointe
 #ifdef CONFIG_HAVE_STATIC_CALL
if (static_branch_likely(&fgraph_do_direct)) {
if (test_bit(fgraph_direct_gops->idx, &bitmap))
-   static_call(fgraph_retfunc)(&trace, fgraph_direct_gops);
+   static_call(fgraph_retfunc)(&trace, fgraph_direct_gops, 
fregs);
} else
 #endif
{
@@ -806,7 +811,7 @@ __ftrace_return_to_handler(struct ftrace_regs *fregs, 
unsigned long frame_pointe
if (gops == &fgraph_stub)
continue;
 
-   gops->retfunc(&trace, gops);
+   gops->retfunc(&trace, gops, fregs);
}
}
 
@@ -956,7 +961,8 @@ void ftrace_graph_sleep_time_control(bool enable)
  * Simply points to ftrace_stub, but with the proper protocol.
  * Defined by the linker script in linux/vmlinux.lds.h
  */
-void ftrace_stub_graph(struct ftrace_graph_ret *trace, struct fgraph_ops 
*gops);
+void ftrace_stub_graph(struct ftrace_graph_ret *trace, struct fgraph_ops *gops,
+  struct ftrace_regs *fregs);
 
 /* The callbacks that hook a function */
 trace_func_graph_ret_t ftrace_graph_return = ftrace_stub_graph;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 64d15428cffc..725a95b161a1 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -840,7 +840,8 @@ static int profile_graph_entry(struct ftrace_graph_ent 
*trace,
 }
 
 static void profile_graph_return(struct ftrace_graph_ret *trace,
-struct fgraph_ops *gops)
+struct fgraph_ops *gops,
+struct ftrace_regs *fregs)
 {
struct ftrace_ret_stack *ret_stack;
struct ftrace_profile_stat *stat;
diff --git a/kernel/trace/trace.h b/ke

[PATCH v12 04/19] function_graph: Replace fgraph_ret_regs with ftrace_regs

2024-07-03 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Use ftrace_regs instead of fgraph_ret_regs for tracing return value
on function_graph tracer because of simplifying the callback interface.

The CONFIG_HAVE_FUNCTION_GRAPH_RETVAL is also replaced by
CONFIG_HAVE_FUNCTION_GRAPH_FREGS.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v8:
  - Newly added.
---
 arch/arm64/Kconfig  |2 +-
 arch/arm64/include/asm/ftrace.h |   23 ++-
 arch/arm64/kernel/asm-offsets.c |   12 
 arch/arm64/kernel/entry-ftrace.S|   32 ++--
 arch/loongarch/Kconfig  |2 +-
 arch/loongarch/include/asm/ftrace.h |   24 ++--
 arch/loongarch/kernel/asm-offsets.c |   12 
 arch/loongarch/kernel/mcount.S  |   17 ++---
 arch/loongarch/kernel/mcount_dyn.S  |   14 +++---
 arch/riscv/Kconfig  |2 +-
 arch/riscv/include/asm/ftrace.h |   26 +-
 arch/riscv/kernel/mcount.S  |   24 +---
 arch/s390/Kconfig   |2 +-
 arch/s390/include/asm/ftrace.h  |   26 +-
 arch/s390/kernel/asm-offsets.c  |6 --
 arch/s390/kernel/mcount.S   |9 +
 arch/x86/Kconfig|2 +-
 arch/x86/include/asm/ftrace.h   |   22 ++
 arch/x86/kernel/ftrace_32.S |   15 +--
 arch/x86/kernel/ftrace_64.S |   17 +
 include/linux/ftrace.h  |   14 +++---
 kernel/trace/Kconfig|4 ++--
 kernel/trace/fgraph.c   |   21 +
 23 files changed, 122 insertions(+), 206 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 5d91259ee7b5..8691683d782e 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -210,7 +210,7 @@ config ARM64
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_ERROR_INJECTION
-   select HAVE_FUNCTION_GRAPH_RETVAL if HAVE_FUNCTION_GRAPH_TRACER
+   select HAVE_FUNCTION_GRAPH_FREGS
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_GCC_PLUGINS
select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && \
diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index dc9cf0bd2a4c..dffaab3dd1f1 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -126,6 +126,12 @@ ftrace_override_function_with_return(struct ftrace_regs 
*fregs)
fregs->pc = fregs->lr;
 }
 
+static __always_inline unsigned long
+ftrace_regs_get_frame_pointer(const struct ftrace_regs *fregs)
+{
+   return fregs->fp;
+}
+
 int ftrace_regs_query_register_offset(const char *name);
 
 int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
@@ -183,23 +189,6 @@ static inline bool arch_syscall_match_sym_name(const char 
*sym,
 
 #ifndef __ASSEMBLY__
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-struct fgraph_ret_regs {
-   /* x0 - x7 */
-   unsigned long regs[8];
-
-   unsigned long fp;
-   unsigned long __unused;
-};
-
-static inline unsigned long fgraph_ret_regs_return_value(struct 
fgraph_ret_regs *ret_regs)
-{
-   return ret_regs->regs[0];
-}
-
-static inline unsigned long fgraph_ret_regs_frame_pointer(struct 
fgraph_ret_regs *ret_regs)
-{
-   return ret_regs->fp;
-}
 
 void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
   unsigned long frame_pointer);
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 81496083c041..81bb6704ff5a 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -200,18 +200,6 @@ int main(void)
   DEFINE(FTRACE_OPS_FUNC,  offsetof(struct ftrace_ops, func));
 #endif
   BLANK();
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-  DEFINE(FGRET_REGS_X0,offsetof(struct 
fgraph_ret_regs, regs[0]));
-  DEFINE(FGRET_REGS_X1,offsetof(struct 
fgraph_ret_regs, regs[1]));
-  DEFINE(FGRET_REGS_X2,offsetof(struct 
fgraph_ret_regs, regs[2]));
-  DEFINE(FGRET_REGS_X3,offsetof(struct 
fgraph_ret_regs, regs[3]));
-  DEFINE(FGRET_REGS_X4,offsetof(struct 
fgraph_ret_regs, regs[4]));
-  DEFINE(FGRET_REGS_X5,offsetof(struct 
fgraph_ret_regs, regs[5]));
-  DEFINE(FGRET_REGS_X6,offsetof(struct 
fgraph_ret_regs, regs[6]));
-  DEFINE(FGRET_REGS_X7,offsetof(struct 
fgraph_ret_regs, regs[7]));
-  DEFINE(FGRET_REGS_FP,offsetof(struct 
fgraph_ret_regs, fp));
-  DEFINE(FGRET_REGS_SIZE,  sizeof(struct fgraph_ret_regs));
-#endif
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
   DEFINE(FTRACE_OPS_DIRECT_CALL,   offsetof(struct f

[PATCH v12 03/19] function_graph: Pass ftrace_regs to entryfunc

2024-07-03 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Pass ftrace_regs to the fgraph_ops::entryfunc(). If ftrace_regs is not
available, it passes a NULL instead. User callback function can access
some registers (including return address) via this ftrace_regs.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v11:
  - Update for the latest for-next branch.
 Changes in v8:
  - Just pass ftrace_regs to the handler instead of adding a new
entryregfunc.
  - Update riscv ftrace_graph_func().
 Changes in v3:
  - Update for new multiple fgraph.
---
 arch/arm64/kernel/ftrace.c   |   20 +++-
 arch/loongarch/kernel/ftrace_dyn.c   |   10 +-
 arch/powerpc/kernel/trace/ftrace.c   |2 +
 arch/powerpc/kernel/trace/ftrace_64_pg.c |   10 --
 arch/riscv/kernel/ftrace.c   |   17 ++
 arch/x86/kernel/ftrace.c |   50 +-
 include/linux/ftrace.h   |   18 ---
 kernel/trace/fgraph.c|   23 --
 kernel/trace/ftrace.c|3 +-
 kernel/trace/trace.h |3 +-
 kernel/trace/trace_functions_graph.c |3 +-
 kernel/trace/trace_irqsoff.c |3 +-
 kernel/trace/trace_sched_wakeup.c|3 +-
 kernel/trace/trace_selftest.c|8 +++--
 14 files changed, 128 insertions(+), 45 deletions(-)

diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index a650f5e11fc5..bc647b725e6a 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -481,7 +481,25 @@ void prepare_ftrace_return(unsigned long self_addr, 
unsigned long *parent,
 void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
   struct ftrace_ops *op, struct ftrace_regs *fregs)
 {
-   prepare_ftrace_return(ip, &fregs->lr, fregs->fp);
+   unsigned long return_hooker = (unsigned long)&return_to_handler;
+   unsigned long frame_pointer = fregs->fp;
+   unsigned long *parent = &fregs->lr;
+   unsigned long old;
+
+   if (unlikely(atomic_read(¤t->tracing_graph_pause)))
+   return;
+
+   /*
+* Note:
+* No protection against faulting at *parent, which may be seen
+* on other archs. It's unlikely on AArch64.
+*/
+   old = *parent;
+
+   if (!function_graph_enter_regs(old, ip, frame_pointer,
+  (void *)frame_pointer, fregs)) {
+   *parent = return_hooker;
+   }
 }
 #else
 /*
diff --git a/arch/loongarch/kernel/ftrace_dyn.c 
b/arch/loongarch/kernel/ftrace_dyn.c
index bff058317062..966e0f7f7aca 100644
--- a/arch/loongarch/kernel/ftrace_dyn.c
+++ b/arch/loongarch/kernel/ftrace_dyn.c
@@ -243,8 +243,16 @@ void ftrace_graph_func(unsigned long ip, unsigned long 
parent_ip,
 {
struct pt_regs *regs = &fregs->regs;
unsigned long *parent = (unsigned long *)®s->regs[1];
+   unsigned long return_hooker = (unsigned long)&return_to_handler;
+   unsigned long old;
+
+   if (unlikely(atomic_read(¤t->tracing_graph_pause)))
+   return;
+
+   old = *parent;
 
-   prepare_ftrace_return(ip, (unsigned long *)parent);
+   if (!function_graph_enter_regs(old, ip, 0, parent, fregs))
+   *parent = return_hooker;
 }
 #else
 static int ftrace_modify_graph_caller(bool enable)
diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index d8d6b4fd9a14..a1a0e0b57662 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -434,7 +434,7 @@ void ftrace_graph_func(unsigned long ip, unsigned long 
parent_ip,
if (bit < 0)
goto out;
 
-   if (!function_graph_enter(parent_ip, ip, 0, (unsigned long *)sp))
+   if (!function_graph_enter_regs(parent_ip, ip, 0, (unsigned long *)sp, 
fregs))
parent_ip = ppc_function_entry(return_to_handler);
 
ftrace_test_recursion_unlock(bit);
diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.c 
b/arch/powerpc/kernel/trace/ftrace_64_pg.c
index 12fab1803bcf..4ae9eeb1c8f1 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_pg.c
+++ b/arch/powerpc/kernel/trace/ftrace_64_pg.c
@@ -800,7 +800,8 @@ int ftrace_disable_ftrace_graph_caller(void)
  * in current thread info. Return the address we want to divert to.
  */
 static unsigned long
-__prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long 
sp)
+__prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long 
sp,
+   struct ftrace_regs *fregs)
 {
unsigned long return_hooker;
int bit;
@@ -817,7 +818,7 @@ __prepare_ftrace_return(unsigned long parent, unsigned long 
ip, unsigned long sp
 
return_hooker = ppc_function_entry(return_to_handler);
 
-   if (!function_graph_enter(parent, ip, 0, (unsigned long *)sp))
+   if (!function

[PATCH v12 02/19] tracing: Rename ftrace_regs_return_value to ftrace_regs_get_return_value

2024-07-03 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Rename ftrace_regs_return_value to ftrace_regs_get_return_value as same as
other ftrace_regs_get/set_* APIs.

Signed-off-by: Masami Hiramatsu (Google) 
Acked-by: Mark Rutland 
---
 Changes in v6:
  - Moved to top of the series.
 Changes in v3:
  - Newly added.
---
 arch/loongarch/include/asm/ftrace.h |2 +-
 arch/powerpc/include/asm/ftrace.h   |2 +-
 arch/s390/include/asm/ftrace.h  |2 +-
 arch/x86/include/asm/ftrace.h   |2 +-
 include/linux/ftrace.h  |2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/loongarch/include/asm/ftrace.h 
b/arch/loongarch/include/asm/ftrace.h
index c0a682808e07..6f8517d59954 100644
--- a/arch/loongarch/include/asm/ftrace.h
+++ b/arch/loongarch/include/asm/ftrace.h
@@ -69,7 +69,7 @@ ftrace_regs_set_instruction_pointer(struct ftrace_regs 
*fregs, unsigned long ip)
regs_get_kernel_argument(&(fregs)->regs, n)
 #define ftrace_regs_get_stack_pointer(fregs) \
kernel_stack_pointer(&(fregs)->regs)
-#define ftrace_regs_return_value(fregs) \
+#define ftrace_regs_get_return_value(fregs) \
regs_return_value(&(fregs)->regs)
 #define ftrace_regs_set_return_value(fregs, ret) \
regs_set_return_value(&(fregs)->regs, ret)
diff --git a/arch/powerpc/include/asm/ftrace.h 
b/arch/powerpc/include/asm/ftrace.h
index 559560286e6d..23d26f3afae4 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -59,7 +59,7 @@ ftrace_regs_get_instruction_pointer(struct ftrace_regs *fregs)
regs_get_kernel_argument(&(fregs)->regs, n)
 #define ftrace_regs_get_stack_pointer(fregs) \
kernel_stack_pointer(&(fregs)->regs)
-#define ftrace_regs_return_value(fregs) \
+#define ftrace_regs_get_return_value(fregs) \
regs_return_value(&(fregs)->regs)
 #define ftrace_regs_set_return_value(fregs, ret) \
regs_set_return_value(&(fregs)->regs, ret)
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index fbadca645af7..de76c21eb4a3 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -83,7 +83,7 @@ ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs,
regs_get_kernel_argument(&(fregs)->regs, n)
 #define ftrace_regs_get_stack_pointer(fregs) \
kernel_stack_pointer(&(fregs)->regs)
-#define ftrace_regs_return_value(fregs) \
+#define ftrace_regs_get_return_value(fregs) \
regs_return_value(&(fregs)->regs)
 #define ftrace_regs_set_return_value(fregs, ret) \
regs_set_return_value(&(fregs)->regs, ret)
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 0152a81d9b4a..78f6a200e15b 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -56,7 +56,7 @@ arch_ftrace_get_regs(struct ftrace_regs *fregs)
regs_get_kernel_argument(&(fregs)->regs, n)
 #define ftrace_regs_get_stack_pointer(fregs) \
kernel_stack_pointer(&(fregs)->regs)
-#define ftrace_regs_return_value(fregs) \
+#define ftrace_regs_get_return_value(fregs) \
regs_return_value(&(fregs)->regs)
 #define ftrace_regs_set_return_value(fregs, ret) \
regs_set_return_value(&(fregs)->regs, ret)
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 3c8a19ea8f45..bf04b29f9da1 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -183,7 +183,7 @@ static __always_inline bool ftrace_regs_has_args(struct 
ftrace_regs *fregs)
regs_get_kernel_argument(ftrace_get_regs(fregs), n)
 #define ftrace_regs_get_stack_pointer(fregs) \
kernel_stack_pointer(ftrace_get_regs(fregs))
-#define ftrace_regs_return_value(fregs) \
+#define ftrace_regs_get_return_value(fregs) \
regs_return_value(ftrace_get_regs(fregs))
 #define ftrace_regs_set_return_value(fregs, ret) \
regs_set_return_value(ftrace_get_regs(fregs), ret)




[PATCH v12 01/19] tracing: Add a comment about ftrace_regs definition

2024-07-03 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

To clarify what will be expected on ftrace_regs, add a comment to the
architecture independent definition of the ftrace_regs.

Signed-off-by: Masami Hiramatsu (Google) 
Acked-by: Mark Rutland 
---
 Changes in v8:
  - Update that the saved registers depends on the context.
 Changes in v3:
  - Add instruction pointer
 Changes in v2:
  - newly added.
---
 include/linux/ftrace.h |   26 ++
 1 file changed, 26 insertions(+)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 845c2ab0bc1c..3c8a19ea8f45 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -117,6 +117,32 @@ extern int ftrace_enabled;
 
 #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
 
+/**
+ * ftrace_regs - ftrace partial/optimal register set
+ *
+ * ftrace_regs represents a group of registers which is used at the
+ * function entry and exit. There are three types of registers.
+ *
+ * - Registers for passing the parameters to callee, including the stack
+ *   pointer. (e.g. rcx, rdx, rdi, rsi, r8, r9 and rsp on x86_64)
+ * - Registers for passing the return values to caller.
+ *   (e.g. rax and rdx on x86_64)
+ * - Registers for hooking the function call and return including the
+ *   frame pointer (the frame pointer is architecture/config dependent)
+ *   (e.g. rip, rbp and rsp for x86_64)
+ *
+ * Also, architecture dependent fields can be used for internal process.
+ * (e.g. orig_ax on x86_64)
+ *
+ * On the function entry, those registers will be restored except for
+ * the stack pointer, so that user can change the function parameters
+ * and instruction pointer (e.g. live patching.)
+ * On the function exit, only registers which is used for return values
+ * are restored.
+ *
+ * NOTE: user *must not* access regs directly, only do it via APIs, because
+ * the member can be changed according to the architecture.
+ */
 struct ftrace_regs {
struct pt_regs  regs;
 };




[PATCH v12 00/19] tracing: fprobe: function_graph: Multi-function graph and fprobe on fgraph

2024-07-03 Thread Masami Hiramatsu (Google)
Hi,

Here is the 12th version of the series to re-implement the fprobe on
function-graph tracer. The previous version is;

https://lore.kernel.org/all/171858878797.288820.237119113242007537.stgit@devnote2/

In this version, I added a new performance improvement patch[19/19] and
add some performance numbers to [18/19]. Here is the final result of this
change.

NOTE: I used tools/testing/selftests/bpf/benchs/run_bench_trigger.sh, 
 so kprobe-multi is fprobe entry_handker, kretprobe-multi is fprobe
 exit_handler.

Without this series:
kprobe-multi   :6.507 ± 0.065M/s 
kretprobe-multi:3.518 ± 0.002M/s 

With this series:
kprobe-multi   :6.377 ± 0.054M/s-2.00%
kretprobe-multi:4.815 ± 0.007M/s*36.87%

So the fprobe entry_handler performance is 2% down, but exit_handler
performance is 37% better.


Overview

This series rewrites the fprobe on this function-graph.
The purposes of this change are;

 1) Remove dependency of the rethook from fprobe so that we can reduce
   the return hook code and shadow stack.

 2) Make 'ftrace_regs' the common trace interface for the function
   boundary.

1) Currently we have 2(or 3) different function return hook codes,
 the function-graph tracer and rethook (and legacy kretprobe).
 But since this  is redundant and needs double maintenance cost,
 I would like to unify those. From the user's viewpoint, function-
 graph tracer is very useful to grasp the execution path. For this
 purpose, it is hard to use the rethook in the function-graph
 tracer, but the opposite is possible. (Strictly speaking, kretprobe
 can not use it because it requires 'pt_regs' for historical reasons.)

2) Now the fprobe provides the 'pt_regs' for its handler, but that is
 wrong for the function entry and exit. Moreover, depending on the
 architecture, there is no way to accurately reproduce 'pt_regs'
 outside of interrupt or exception handlers. This means fprobe should
 not use 'pt_regs' because it does not use such exceptions.
 (Conversely, kprobe should use 'pt_regs' because it is an abstract
  interface of the software breakpoint exception.)

This series changes fprobe to use function-graph tracer for tracing
function entry and exit, instead of mixture of ftrace and rethook.
Unlike the rethook which is a per-task list of system-wide allocated
nodes, the function graph's ret_stack is a per-task shadow stack.
Thus it does not need to set 'nr_maxactive' (which is the number of
pre-allocated nodes).
Also the handlers will get the 'ftrace_regs' instead of 'pt_regs'.
Since eBPF mulit_kprobe/multi_kretprobe events still use 'pt_regs' as
their register interface, this changes it to convert 'ftrace_regs' to
'pt_regs'. Of course this conversion makes an incomplete 'pt_regs',
so users must access only registers for function parameters or
return value. 

Design
--
Instead of using ftrace's function entry hook directly, the new fprobe
is built on top of the function-graph's entry and return callbacks
with 'ftrace_regs'.

Since the fprobe requires access to 'ftrace_regs', the architecture
must support CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS and
CONFIG_HAVE_FTRACE_GRAPH_FUNC, which enables to call function-graph
entry callback with 'ftrace_regs', and also
CONFIG_HAVE_FUNCTION_GRAPH_FREGS, which passes the ftrace_regs to
return_to_handler.

All fprobes share a single function-graph ops (means shares a common
ftrace filter) similar to the kprobe-on-ftrace. This needs another
layer to find corresponding fprobe in the common function-graph
callbacks, but has much better scalability, since the number of
registered function-graph ops is limited.

In the entry callback, the fprobe runs its entry_handler and saves the
address of 'fprobe' on the function-graph's shadow stack as data. The
return callback decodes the data to get the 'fprobe' address, and runs
the exit_handler.

The fprobe introduces two hash-tables, one is for entry callback which
searches fprobes related to the given function address passed by entry
callback. The other is for a return callback which checks if the given
'fprobe' data structure pointer is still valid. Note that it is
possible to unregister fprobe before the return callback runs. Thus
the address validation must be done before using it in the return
callback.

Download

This series can be applied against the ftrace/for-next branch in
linux-trace tree.

This series can also be found below branch.

https://git.kernel.org/pub/scm/linux/kernel/git/mhiramat/linux.git/log/?h=topic/fprobe-on-fgraph

Thank you,

---

Masami Hiramatsu (Google) (19):
  tracing: Add a comment about ftrace_regs definition
  tracing: Rename ftrace_regs_return_value to ftrace_regs_get_return_value
  function_graph: Pass ftrace_regs to entryfunc
  function_gra

[PATCH v11 18/18] fgraph: Skip recording calltime/rettime if it is not nneeded

2024-06-16 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Skip recording calltime and rettime if the fgraph_ops does not need it.
This is a kind of performance optimization for fprobe. Since the fprobe
user does not use these entries, recording timestamp in fgraph is just
a overhead (e.g. eBPF, ftrace). So introduce the skip_timestamp flag,
and all fgraph_ops sets this flag, skip recording calltime and rettime.

Suggested-by: Jiri Olsa 
Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v11:
  - Simplify it to be symmetric on push and pop. (Thus the timestamp
getting place is a bit shifted.)
 Changes in v10:
  - Add likely() to skipping timestamp.
 Changes in v9:
  - Newly added.
---
 include/linux/ftrace.h |2 ++
 kernel/trace/fgraph.c  |   36 +---
 kernel/trace/fprobe.c  |1 +
 3 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index d8a58b940d81..fabf1a0979d4 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1160,6 +1160,8 @@ struct fgraph_ops {
void*private;
trace_func_graph_ent_t  saved_func;
int idx;
+   /* If skip_timestamp is true, this does not record timestamps. */
+   boolskip_timestamp;
 };
 
 void *fgraph_reserve_data(int idx, int size_bytes);
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index d735a8c872bb..cf3ae59a436e 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -174,6 +174,7 @@ int ftrace_graph_active;
 
 static struct fgraph_ops *fgraph_array[FGRAPH_ARRAY_SIZE];
 static unsigned long fgraph_array_bitmask;
+static bool fgraph_skip_timestamp;
 
 /* LRU index table for fgraph_array */
 static int fgraph_lru_table[FGRAPH_ARRAY_SIZE];
@@ -557,7 +558,11 @@ ftrace_push_return_trace(unsigned long ret, unsigned long 
func,
return -EBUSY;
}
 
-   calltime = trace_clock_local();
+   /* This is not really 'likely' but for keeping the least path to be 
faster. */
+   if (likely(fgraph_skip_timestamp))
+   calltime = 0LL;
+   else
+   calltime = trace_clock_local();
 
offset = READ_ONCE(current->curr_ret_stack);
ret_stack = RET_STACK(current, offset);
@@ -728,6 +733,12 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, 
unsigned long *ret,
*ret = ret_stack->ret;
trace->func = ret_stack->func;
trace->calltime = ret_stack->calltime;
+   /* This is not really 'likely' but for keeping the least path to be 
faster. */
+   if (likely(!trace->calltime))
+   trace->rettime = 0LL;
+   else
+   trace->rettime = trace_clock_local();
+
trace->overrun = atomic_read(¤t->trace_overrun);
trace->depth = current->curr_ret_depth;
/*
@@ -788,7 +799,6 @@ __ftrace_return_to_handler(struct ftrace_regs *fregs, 
unsigned long frame_pointe
return (unsigned long)panic;
}
 
-   trace.rettime = trace_clock_local();
if (fregs)
ftrace_regs_set_instruction_pointer(fregs, ret);
 
@@ -1242,6 +1252,24 @@ static void ftrace_graph_disable_direct(bool 
disable_branch)
fgraph_direct_gops = &fgraph_stub;
 }
 
+static void update_fgraph_skip_timestamp(void)
+{
+   int i;
+
+   for (i = 0; i < FGRAPH_ARRAY_SIZE; i++) {
+   struct fgraph_ops *gops = fgraph_array[i];
+
+   if (gops == &fgraph_stub)
+   continue;
+
+   if (!gops->skip_timestamp) {
+   fgraph_skip_timestamp = false;
+   return;
+   }
+   }
+   fgraph_skip_timestamp = true;
+}
+
 int register_ftrace_graph(struct fgraph_ops *gops)
 {
int command = 0;
@@ -1267,6 +1295,7 @@ int register_ftrace_graph(struct fgraph_ops *gops)
gops->idx = i;
 
ftrace_graph_active++;
+   update_fgraph_skip_timestamp();
 
if (ftrace_graph_active == 2)
ftrace_graph_disable_direct(true);
@@ -1298,6 +1327,7 @@ int register_ftrace_graph(struct fgraph_ops *gops)
ftrace_graph_active--;
gops->saved_func = NULL;
fgraph_lru_release_index(i);
+   update_fgraph_skip_timestamp();
}
 out:
mutex_unlock(&ftrace_lock);
@@ -1321,8 +1351,8 @@ void unregister_ftrace_graph(struct fgraph_ops *gops)
goto out;
 
fgraph_array[gops->idx] = &fgraph_stub;
-
ftrace_graph_active--;
+   update_fgraph_skip_timestamp();
 
if (!ftrace_graph_active)
command = FTRACE_STOP_FUNC_RET;
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index afa52d9816cf..24bb8edec8a3 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -

[PATCH v11 17/18] Documentation: probes: Update fprobe on function-graph tracer

2024-06-16 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Update fprobe documentation for the new fprobe on function-graph
tracer. This includes some bahvior changes and pt_regs to
ftrace_regs interface change.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v2:
  - Update @fregs parameter explanation.
---
 Documentation/trace/fprobe.rst |   42 ++--
 1 file changed, 27 insertions(+), 15 deletions(-)

diff --git a/Documentation/trace/fprobe.rst b/Documentation/trace/fprobe.rst
index 196f52386aaa..f58bdc64504f 100644
--- a/Documentation/trace/fprobe.rst
+++ b/Documentation/trace/fprobe.rst
@@ -9,9 +9,10 @@ Fprobe - Function entry/exit probe
 Introduction
 
 
-Fprobe is a function entry/exit probe mechanism based on ftrace.
-Instead of using ftrace full feature, if you only want to attach callbacks
-on function entry and exit, similar to the kprobes and kretprobes, you can
+Fprobe is a function entry/exit probe mechanism based on the function-graph
+tracer.
+Instead of tracing all functions, if you want to attach callbacks on specific
+function entry and exit, similar to the kprobes and kretprobes, you can
 use fprobe. Compared with kprobes and kretprobes, fprobe gives faster
 instrumentation for multiple functions with single handler. This document
 describes how to use fprobe.
@@ -91,12 +92,14 @@ The prototype of the entry/exit callback function are as 
follows:
 
 .. code-block:: c
 
- int entry_callback(struct fprobe *fp, unsigned long entry_ip, unsigned long 
ret_ip, struct pt_regs *regs, void *entry_data);
+ int entry_callback(struct fprobe *fp, unsigned long entry_ip, unsigned long 
ret_ip, struct ftrace_regs *fregs, void *entry_data);
 
- void exit_callback(struct fprobe *fp, unsigned long entry_ip, unsigned long 
ret_ip, struct pt_regs *regs, void *entry_data);
+ void exit_callback(struct fprobe *fp, unsigned long entry_ip, unsigned long 
ret_ip, struct ftrace_regs *fregs, void *entry_data);
 
-Note that the @entry_ip is saved at function entry and passed to exit handler.
-If the entry callback function returns !0, the corresponding exit callback 
will be cancelled.
+Note that the @entry_ip is saved at function entry and passed to exit
+handler.
+If the entry callback function returns !0, the corresponding exit callback
+will be cancelled.
 
 @fp
 This is the address of `fprobe` data structure related to this handler.
@@ -112,12 +115,10 @@ If the entry callback function returns !0, the 
corresponding exit callback will
 This is the return address that the traced function will return to,
 somewhere in the caller. This can be used at both entry and exit.
 
-@regs
-This is the `pt_regs` data structure at the entry and exit. Note that
-the instruction pointer of @regs may be different from the @entry_ip
-in the entry_handler. If you need traced instruction pointer, you need
-to use @entry_ip. On the other hand, in the exit_handler, the 
instruction
-pointer of @regs is set to the current return address.
+@fregs
+This is the `ftrace_regs` data structure at the entry and exit. This
+includes the function parameters, or the return values. So user can
+access thos values via appropriate `ftrace_regs_*` APIs.
 
 @entry_data
 This is a local storage to share the data between entry and exit 
handlers.
@@ -125,6 +126,17 @@ If the entry callback function returns !0, the 
corresponding exit callback will
 and `entry_data_size` field when registering the fprobe, the storage is
 allocated and passed to both `entry_handler` and `exit_handler`.
 
+Entry data size and exit handlers on the same function
+==
+
+Since the entry data is passed via per-task stack and it is has limited size,
+the entry data size per probe is limited to `15 * sizeof(long)`. You also need
+to take care that the different fprobes are probing on the same function, this
+limit becomes smaller. The entry data size is aligned to `sizeof(long)` and
+each fprobe which has exit handler uses a `sizeof(long)` space on the stack,
+you should keep the number of fprobes on the same function as small as
+possible.
+
 Share the callbacks with kprobes
 
 
@@ -165,8 +177,8 @@ This counter counts up when;
  - fprobe fails to take ftrace_recursion lock. This usually means that a 
function
which is traced by other ftrace users is called from the entry_handler.
 
- - fprobe fails to setup the function exit because of the shortage of rethook
-   (the shadow stack for hooking the function return.)
+ - fprobe fails to setup the function exit because of failing to allocate the
+   data buffer from the per-task shadow stack.
 
 The `fprobe::nmissed` field counts up in both cases. Therefore, the former
 skips both of entry and exit callback and the latter skips the exit




[PATCH v11 16/18] selftests/ftrace: Add a test case for repeating register/unregister fprobe

2024-06-16 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

This test case repeats define and undefine the fprobe dynamic event to
ensure that the fprobe does not cause any issue with such operations.

Signed-off-by: Masami Hiramatsu (Google) 
---
 .../test.d/dynevent/add_remove_fprobe_repeat.tc|   19 +++
 1 file changed, 19 insertions(+)
 create mode 100644 
tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc

diff --git 
a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc 
b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc
new file mode 100644
index ..b4ad09237e2a
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc
@@ -0,0 +1,19 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Generic dynamic event - Repeating add/remove fprobe events
+# requires: dynamic_events "f[:[/][]] [%return] 
[]":README
+
+echo 0 > events/enable
+echo > dynamic_events
+
+PLACE=$FUNCTION_FORK
+REPEAT_TIMES=64
+
+for i in `seq 1 $REPEAT_TIMES`; do
+  echo "f:myevent $PLACE" >> dynamic_events
+  grep -q myevent dynamic_events
+  test -d events/fprobes/myevent
+  echo > dynamic_events
+done
+
+clear_trace




[PATCH v11 15/18] selftests: ftrace: Remove obsolate maxactive syntax check

2024-06-16 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Since the fprobe event does not support maxactive anymore, stop
testing the maxactive syntax error checking.

Signed-off-by: Masami Hiramatsu (Google) 
---
 .../ftrace/test.d/dynevent/fprobe_syntax_errors.tc |4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git 
a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc 
b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
index 61877d166451..c9425a34fae3 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
@@ -16,9 +16,7 @@ aarch64)
   REG=%r0 ;;
 esac
 
-check_error 'f^100 vfs_read'   # MAXACT_NO_KPROBE
-check_error 'f^1a111 vfs_read' # BAD_MAXACT
-check_error 'f^10 vfs_read'# MAXACT_TOO_BIG
+check_error 'f^100 vfs_read'   # BAD_MAXACT
 
 check_error 'f ^non_exist_func'# BAD_PROBE_ADDR (enoent)
 check_error 'f ^vfs_read+10'   # BAD_PROBE_ADDR




[PATCH v11 14/18] tracing/fprobe: Remove nr_maxactive from fprobe

2024-06-16 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Remove depercated fprobe::nr_maxactive. This involves fprobe events to
rejects the maxactive number.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v2:
  - Newly added.
---
 include/linux/fprobe.h  |2 --
 kernel/trace/trace_fprobe.c |   44 ++-
 2 files changed, 6 insertions(+), 40 deletions(-)

diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h
index 2d06bbd99601..a86b3e4df2a0 100644
--- a/include/linux/fprobe.h
+++ b/include/linux/fprobe.h
@@ -54,7 +54,6 @@ struct fprobe_hlist {
  * @nmissed: The counter for missing events.
  * @flags: The status flag.
  * @entry_data_size: The private data storage size.
- * @nr_maxactive: The max number of active functions. (*deprecated)
  * @entry_handler: The callback function for function entry.
  * @exit_handler: The callback function for function exit.
  * @hlist_array: The fprobe_hlist for fprobe search from IP hash table.
@@ -63,7 +62,6 @@ struct fprobe {
unsigned long   nmissed;
unsigned intflags;
size_t  entry_data_size;
-   int nr_maxactive;
 
fprobe_entry_cb entry_handler;
fprobe_exit_cb  exit_handler;
diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
index 86cd6a8c806a..20ef5cd5d419 100644
--- a/kernel/trace/trace_fprobe.c
+++ b/kernel/trace/trace_fprobe.c
@@ -422,7 +422,6 @@ static struct trace_fprobe *alloc_trace_fprobe(const char 
*group,
   const char *event,
   const char *symbol,
   struct tracepoint *tpoint,
-  int maxactive,
   int nargs, bool is_return)
 {
struct trace_fprobe *tf;
@@ -442,7 +441,6 @@ static struct trace_fprobe *alloc_trace_fprobe(const char 
*group,
tf->fp.entry_handler = fentry_dispatcher;
 
tf->tpoint = tpoint;
-   tf->fp.nr_maxactive = maxactive;
 
ret = trace_probe_init(&tf->tp, event, group, false, nargs);
if (ret < 0)
@@ -1021,12 +1019,11 @@ static int __trace_fprobe_create(int argc, const char 
*argv[])
 *  FETCHARG:TYPE : use TYPE instead of unsigned long.
 */
struct trace_fprobe *tf = NULL;
-   int i, len, new_argc = 0, ret = 0;
+   int i, new_argc = 0, ret = 0;
bool is_return = false;
char *symbol = NULL;
const char *event = NULL, *group = FPROBE_EVENT_SYSTEM;
const char **new_argv = NULL;
-   int maxactive = 0;
char buf[MAX_EVENT_NAME_LEN];
char gbuf[MAX_EVENT_NAME_LEN];
char sbuf[KSYM_NAME_LEN];
@@ -1048,33 +1045,13 @@ static int __trace_fprobe_create(int argc, const char 
*argv[])
 
trace_probe_log_init("trace_fprobe", argc, argv);
 
-   event = strchr(&argv[0][1], ':');
-   if (event)
-   event++;
-
-   if (isdigit(argv[0][1])) {
-   if (event)
-   len = event - &argv[0][1] - 1;
-   else
-   len = strlen(&argv[0][1]);
-   if (len > MAX_EVENT_NAME_LEN - 1) {
-   trace_probe_log_err(1, BAD_MAXACT);
-   goto parse_error;
-   }
-   memcpy(buf, &argv[0][1], len);
-   buf[len] = '\0';
-   ret = kstrtouint(buf, 0, &maxactive);
-   if (ret || !maxactive) {
+   if (argv[0][1] != '\0') {
+   if (argv[0][1] != ':') {
+   trace_probe_log_set_index(0);
trace_probe_log_err(1, BAD_MAXACT);
goto parse_error;
}
-   /* fprobe rethook instances are iterated over via a list. The
-* maximum should stay reasonable.
-*/
-   if (maxactive > RETHOOK_MAXACTIVE_MAX) {
-   trace_probe_log_err(1, MAXACT_TOO_BIG);
-   goto parse_error;
-   }
+   event = &argv[0][2];
}
 
trace_probe_log_set_index(1);
@@ -1084,12 +1061,6 @@ static int __trace_fprobe_create(int argc, const char 
*argv[])
if (ret < 0)
goto parse_error;
 
-   if (!is_return && maxactive) {
-   trace_probe_log_set_index(0);
-   trace_probe_log_err(1, BAD_MAXACT_TYPE);
-   goto parse_error;
-   }
-
trace_probe_log_set_index(0);
if (event) {
ret = traceprobe_parse_event_name(&event, &group, gbuf,
@@ -1147,8 +1118,7 @@ static int __trace_fprobe_create(int argc, const char 
*argv[])
goto out;
 
/* setup a probe */
-   tf = al

[PATCH v11 13/18] fprobe: Rewrite fprobe on function-graph tracer

2024-06-16 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Rewrite fprobe implementation on function-graph tracer.
Major API changes are:
 -  'nr_maxactive' field is deprecated.
 -  This depends on CONFIG_DYNAMIC_FTRACE_WITH_ARGS or
!CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS, and
CONFIG_HAVE_FUNCTION_GRAPH_FREGS. So currently works only
on x86_64.
 -  Currently the entry size is limited in 15 * sizeof(long).
 -  If there is too many fprobe exit handler set on the same
function, it will fail to probe.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v9:
  - Remove unneeded prototype of ftrace_regs_get_return_address().
  - Fix entry data address calculation.
  - Remove DIV_ROUND_UP() from hotpath.
 Changes in v8:
  - Use trace_func_graph_ret/ent_t for fgraph_ops.
  - Update CONFIG_FPROBE dependencies.
  - Add ftrace_regs_get_return_address() for each arch.
 Changes in v3:
  - Update for new reserve_data/retrieve_data API.
  - Fix internal push/pop on fgraph data logic so that it can
correctly save/restore the returning fprobes.
 Changes in v2:
  - Add more lockdep_assert_held(fprobe_mutex)
  - Use READ_ONCE() and WRITE_ONCE() for fprobe_hlist_node::fp.
  - Add NOKPROBE_SYMBOL() for the functions which is called from
entry/exit callback.
---
 arch/arm64/include/asm/ftrace.h |6 
 arch/loongarch/include/asm/ftrace.h |6 
 arch/powerpc/include/asm/ftrace.h   |6 
 arch/s390/include/asm/ftrace.h  |6 
 arch/x86/include/asm/ftrace.h   |6 
 include/linux/fprobe.h  |   53 ++-
 kernel/trace/Kconfig|8 
 kernel/trace/fprobe.c   |  638 +--
 lib/test_fprobe.c   |   45 --
 9 files changed, 529 insertions(+), 245 deletions(-)

diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index 14ecb9a418d9..27e32f323048 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -132,6 +132,12 @@ ftrace_regs_get_frame_pointer(const struct ftrace_regs 
*fregs)
return fregs->fp;
 }
 
+static __always_inline unsigned long
+ftrace_regs_get_return_address(const struct ftrace_regs *fregs)
+{
+   return fregs->lr;
+}
+
 static __always_inline struct pt_regs *
 ftrace_partial_regs(const struct ftrace_regs *fregs, struct pt_regs *regs)
 {
diff --git a/arch/loongarch/include/asm/ftrace.h 
b/arch/loongarch/include/asm/ftrace.h
index 1a73f35ea9af..c021aa3194f3 100644
--- a/arch/loongarch/include/asm/ftrace.h
+++ b/arch/loongarch/include/asm/ftrace.h
@@ -80,6 +80,12 @@ ftrace_regs_set_instruction_pointer(struct ftrace_regs 
*fregs, unsigned long ip)
 #define ftrace_regs_get_frame_pointer(fregs) \
((fregs)->regs.regs[22])
 
+static __always_inline unsigned long
+ftrace_regs_get_return_address(struct ftrace_regs *fregs)
+{
+   return *(unsigned long *)(fregs->regs.regs[1]);
+}
+
 #define ftrace_graph_func ftrace_graph_func
 void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
   struct ftrace_ops *op, struct ftrace_regs *fregs);
diff --git a/arch/powerpc/include/asm/ftrace.h 
b/arch/powerpc/include/asm/ftrace.h
index e6ff6834bf7e..2a2d070dd23c 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -75,6 +75,12 @@ ftrace_regs_get_instruction_pointer(struct ftrace_regs 
*fregs)
 #define ftrace_regs_query_register_offset(name) \
regs_query_register_offset(name)
 
+static __always_inline unsigned long
+ftrace_regs_get_return_address(struct ftrace_regs *fregs)
+{
+   return fregs->regs.link;
+}
+
 struct ftrace_ops;
 
 #define ftrace_graph_func ftrace_graph_func
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index 0d9f6df21f81..7b80ff4d3386 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -84,6 +84,12 @@ ftrace_regs_get_frame_pointer(struct ftrace_regs *fregs)
return sp[0];   /* return backchain */
 }
 
+static __always_inline unsigned long
+ftrace_regs_get_return_address(const struct ftrace_regs *fregs)
+{
+   return fregs->regs.gprs[14];
+}
+
 #define arch_ftrace_fill_perf_regs(fregs, _regs)do {   \
(_regs)->psw.addr = (fregs)->regs.psw.addr; \
(_regs)->gprs[15] = (fregs)->regs.gprs[15]; \
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 1f4d1f7b19ed..8472ba394091 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -74,6 +74,12 @@ arch_ftrace_get_regs(struct ftrace_regs *fregs)
 #define ftrace_regs_get_frame_pointer(fregs) \
frame_pointer(&(fregs)->regs)
 
+static __always_inline unsigned long
+ftrace_regs_get_return_address(struct ftrace_regs *fregs)
+{
+   return *(unsigned long *)ftrace_regs_get_stack_pointer(fregs);
+}
+
 struct ftrace_ops;
 #define ftrace_graph_func ftrace_graph_func
 void ftrace_graph

[PATCH v11 12/18] ftrace: Add CONFIG_HAVE_FTRACE_GRAPH_FUNC

2024-06-16 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Add CONFIG_HAVE_FTRACE_GRAPH_FUNC kconfig in addition to ftrace_graph_func
macro check. This is for the other feature (e.g. FPROBE) which requires to
access ftrace_regs from fgraph_ops::entryfunc() can avoid compiling if
the fgraph can not pass the valid ftrace_regs.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v8:
  - Newly added.
---
 arch/arm64/Kconfig |1 +
 arch/loongarch/Kconfig |1 +
 arch/powerpc/Kconfig   |1 +
 arch/riscv/Kconfig |1 +
 arch/x86/Kconfig   |1 +
 kernel/trace/Kconfig   |5 +
 6 files changed, 10 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 8691683d782e..e99a3fd53efd 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -207,6 +207,7 @@ config ARM64
select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_GUP_FAST
+   select HAVE_FTRACE_GRAPH_FUNC
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_ERROR_INJECTION
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 0f1b2057507b..f1439c42c46a 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -126,6 +126,7 @@ config LOONGARCH
select HAVE_EFFICIENT_UNALIGNED_ACCESS if !ARCH_STRICT_ALIGN
select HAVE_EXIT_THREAD
select HAVE_GUP_FAST
+   select HAVE_FTRACE_GRAPH_FUNC
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_ARG_ACCESS_API
select HAVE_FUNCTION_ERROR_INJECTION
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index c88c6d46a5bc..910118faedaa 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -239,6 +239,7 @@ config PPC
select HAVE_EBPF_JIT
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_GUP_FAST
+   select HAVE_FTRACE_GRAPH_FUNC
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_ARG_ACCESS_API
select HAVE_FUNCTION_DESCRIPTORSif PPC64_ELF_ABI_V1
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 1904393bc399..83e8c8c64b99 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -130,6 +130,7 @@ config RISCV
select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && 
(CLANG_SUPPORTS_DYNAMIC_FTRACE || GCC_SUPPORTS_DYNAMIC_FTRACE)
select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
select HAVE_DYNAMIC_FTRACE_WITH_ARGS if HAVE_DYNAMIC_FTRACE
+   select HAVE_FTRACE_GRAPH_FUNC
select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_GRAPH_FREGS
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d4655b72e6d7..7213e27b5b2b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -228,6 +228,7 @@ config X86
select HAVE_EXIT_THREAD
select HAVE_GUP_FAST
select HAVE_FENTRY  if X86_64 || DYNAMIC_FTRACE
+   select HAVE_FTRACE_GRAPH_FUNC   if HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_GRAPH_FREGSif HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_GRAPH_TRACER   if X86_32 || (X86_64 && 
DYNAMIC_FTRACE)
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 4a3dd81f749b..a1fa9cba0ef3 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -34,6 +34,11 @@ config HAVE_FUNCTION_GRAPH_TRACER
 config HAVE_FUNCTION_GRAPH_FREGS
bool
 
+config HAVE_FTRACE_GRAPH_FUNC
+   bool
+   help
+ True if ftrace_graph_func() is defined.
+
 config HAVE_DYNAMIC_FTRACE
bool
help




[PATCH v11 11/18] bpf: Enable kprobe_multi feature if CONFIG_FPROBE is enabled

2024-06-16 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Enable kprobe_multi feature if CONFIG_FPROBE is enabled. The pt_regs is
converted from ftrace_regs by ftrace_partial_regs(), thus some registers
may always returns 0. But it should be enough for function entry (access
arguments) and exit (access return value).

Signed-off-by: Masami Hiramatsu (Google) 
Acked-by: Florent Revest 
---
 Changes in v9:
  - Avoid wasting memory for bpf_kprobe_multi_pt_regs when
CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST=y
---
 kernel/trace/bpf_trace.c |   27 ++-
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index f72b421abe9b..77fd63027286 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2602,7 +2602,7 @@ struct bpf_session_run_ctx {
void *data;
 };
 
-#if defined(CONFIG_FPROBE) && defined(CONFIG_DYNAMIC_FTRACE_WITH_REGS)
+#ifdef CONFIG_FPROBE
 struct bpf_kprobe_multi_link {
struct bpf_link link;
struct fprobe fp;
@@ -2625,6 +2625,13 @@ struct user_syms {
char *buf;
 };
 
+#ifndef CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST
+static DEFINE_PER_CPU(struct pt_regs, bpf_kprobe_multi_pt_regs);
+#define bpf_kprobe_multi_pt_regs_ptr() this_cpu_ptr(&bpf_kprobe_multi_pt_regs)
+#else
+#define bpf_kprobe_multi_pt_regs_ptr() (NULL)
+#endif
+
 static int copy_user_syms(struct user_syms *us, unsigned long __user *usyms, 
u32 cnt)
 {
unsigned long __user usymbol;
@@ -2819,7 +2826,7 @@ static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx 
*ctx)
 
 static int
 kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
-  unsigned long entry_ip, struct pt_regs *regs,
+  unsigned long entry_ip, struct ftrace_regs *fregs,
   bool is_return, void *data)
 {
struct bpf_kprobe_multi_run_ctx run_ctx = {
@@ -2831,6 +2838,7 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link 
*link,
.entry_ip = entry_ip,
};
struct bpf_run_ctx *old_run_ctx;
+   struct pt_regs *regs;
int err;
 
if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
@@ -2841,6 +2849,7 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link 
*link,
 
migrate_disable();
rcu_read_lock();
+   regs = ftrace_partial_regs(fregs, bpf_kprobe_multi_pt_regs_ptr());
old_run_ctx = bpf_set_run_ctx(&run_ctx.session_ctx.run_ctx);
err = bpf_prog_run(link->link.prog, regs);
bpf_reset_run_ctx(old_run_ctx);
@@ -2857,15 +2866,11 @@ kprobe_multi_link_handler(struct fprobe *fp, unsigned 
long fentry_ip,
  unsigned long ret_ip, struct ftrace_regs *fregs,
  void *data)
 {
-   struct pt_regs *regs = ftrace_get_regs(fregs);
struct bpf_kprobe_multi_link *link;
int err;
 
-   if (!regs)
-   return 0;
-
link = container_of(fp, struct bpf_kprobe_multi_link, fp);
-   err = kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), regs, 
false, data);
+   err = kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), fregs, 
false, data);
return is_kprobe_session(link->link.prog) ? err : 0;
 }
 
@@ -2875,13 +2880,9 @@ kprobe_multi_link_exit_handler(struct fprobe *fp, 
unsigned long fentry_ip,
   void *data)
 {
struct bpf_kprobe_multi_link *link;
-   struct pt_regs *regs = ftrace_get_regs(fregs);
-
-   if (!regs)
-   return;
 
link = container_of(fp, struct bpf_kprobe_multi_link, fp);
-   kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), regs, true, 
data);
+   kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), fregs, true, 
data);
 }
 
 static int symbols_cmp_r(const void *a, const void *b, const void *priv)
@@ -3142,7 +3143,7 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr 
*attr, struct bpf_prog *pr
kvfree(cookies);
return err;
 }
-#else /* !CONFIG_FPROBE || !CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+#else /* !CONFIG_FPROBE */
 int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog 
*prog)
 {
return -EOPNOTSUPP;




[PATCH v11 10/18] tracing/fprobe: Enable fprobe events with CONFIG_DYNAMIC_FTRACE_WITH_ARGS

2024-06-16 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Allow fprobe events to be enabled with CONFIG_DYNAMIC_FTRACE_WITH_ARGS.
With this change, fprobe events mostly use ftrace_regs instead of pt_regs.
Note that if the arch doesn't enable HAVE_PT_REGS_COMPAT_FTRACE_REGS,
fprobe events will not be able to be used from perf.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v9:
  - Copy store_trace_entry_data() as store_fprobe_entry_data() for
fprobe.
 Chagnes in v3:
  - Use ftrace_regs_get_return_value().
 Changes in v2:
  - Define ftrace_regs_get_kernel_stack_nth() for
!CONFIG_HAVE_REGS_AND_STACK_ACCESS_API.
 Changes from previous series: Update against the new series.
---
 include/linux/ftrace.h  |   17 ++
 kernel/trace/Kconfig|1 
 kernel/trace/trace_fprobe.c |  107 +--
 kernel/trace/trace_probe_tmpl.h |2 -
 4 files changed, 86 insertions(+), 41 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index d9a3723f987d..d8a58b940d81 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -255,6 +255,23 @@ static __always_inline bool ftrace_regs_has_args(struct 
ftrace_regs *fregs)
frame_pointer(&(fregs)->regs)
 #endif
 
+#ifdef CONFIG_HAVE_REGS_AND_STACK_ACCESS_API
+static __always_inline unsigned long
+ftrace_regs_get_kernel_stack_nth(struct ftrace_regs *fregs, unsigned int nth)
+{
+   unsigned long *stackp;
+
+   stackp = (unsigned long *)ftrace_regs_get_stack_pointer(fregs);
+   if (((unsigned long)(stackp + nth) & ~(THREAD_SIZE - 1)) ==
+   ((unsigned long)stackp & ~(THREAD_SIZE - 1)))
+   return *(stackp + nth);
+
+   return 0;
+}
+#else /* !CONFIG_HAVE_REGS_AND_STACK_ACCESS_API */
+#define ftrace_regs_get_kernel_stack_nth(fregs, nth)   (0L)
+#endif /* CONFIG_HAVE_REGS_AND_STACK_ACCESS_API */
+
 typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip,
  struct ftrace_ops *op, struct ftrace_regs *fregs);
 
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 15e340a865f5..4a3dd81f749b 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -680,7 +680,6 @@ config FPROBE_EVENTS
select TRACING
select PROBE_EVENTS
select DYNAMIC_EVENTS
-   depends on DYNAMIC_FTRACE_WITH_REGS
default y
help
  This allows user to add tracing events on the function entry and
diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
index 273cdf3cf70c..86cd6a8c806a 100644
--- a/kernel/trace/trace_fprobe.c
+++ b/kernel/trace/trace_fprobe.c
@@ -133,7 +133,7 @@ static int
 process_fetch_insn(struct fetch_insn *code, void *rec, void *edata,
   void *dest, void *base)
 {
-   struct pt_regs *regs = rec;
+   struct ftrace_regs *fregs = rec;
unsigned long val;
int ret;
 
@@ -141,17 +141,17 @@ process_fetch_insn(struct fetch_insn *code, void *rec, 
void *edata,
/* 1st stage: get value from context */
switch (code->op) {
case FETCH_OP_STACK:
-   val = regs_get_kernel_stack_nth(regs, code->param);
+   val = ftrace_regs_get_kernel_stack_nth(fregs, code->param);
break;
case FETCH_OP_STACKP:
-   val = kernel_stack_pointer(regs);
+   val = ftrace_regs_get_stack_pointer(fregs);
break;
case FETCH_OP_RETVAL:
-   val = regs_return_value(regs);
+   val = ftrace_regs_get_return_value(fregs);
break;
 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
case FETCH_OP_ARG:
-   val = regs_get_kernel_argument(regs, code->param);
+   val = ftrace_regs_get_argument(fregs, code->param);
break;
case FETCH_OP_EDATA:
val = *(unsigned long *)((unsigned long)edata + code->offset);
@@ -174,7 +174,7 @@ NOKPROBE_SYMBOL(process_fetch_insn)
 /* function entry handler */
 static nokprobe_inline void
 __fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
-   struct pt_regs *regs,
+   struct ftrace_regs *fregs,
struct trace_event_file *trace_file)
 {
struct fentry_trace_entry_head *entry;
@@ -188,41 +188,71 @@ __fentry_trace_func(struct trace_fprobe *tf, unsigned 
long entry_ip,
if (trace_trigger_soft_disabled(trace_file))
return;
 
-   dsize = __get_data_size(&tf->tp, regs, NULL);
+   dsize = __get_data_size(&tf->tp, fregs, NULL);
 
entry = trace_event_buffer_reserve(&fbuffer, trace_file,
   sizeof(*entry) + tf->tp.size + 
dsize);
if (!entry)
return;
 
-   fbuffer.regs = regs;
+   fbuffer.regs = ftrace_get_regs(fregs);
entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
en

[PATCH v11 09/18] tracing: Add ftrace_fill_perf_regs() for perf event

2024-06-16 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Add ftrace_fill_perf_regs() which should be compatible with the
perf_fetch_caller_regs(). In other words, the pt_regs returned from the
ftrace_fill_perf_regs() must satisfy 'user_mode(regs) == false' and can be
used for stack tracing.

Signed-off-by: Masami Hiramatsu (Google) 
---
  Changes from previous series: NOTHING, just forward ported.
---
 arch/arm64/include/asm/ftrace.h   |7 +++
 arch/powerpc/include/asm/ftrace.h |7 +++
 arch/s390/include/asm/ftrace.h|5 +
 arch/x86/include/asm/ftrace.h |7 +++
 include/linux/ftrace.h|   31 +++
 5 files changed, 57 insertions(+)

diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index 5cd587afab6d..14ecb9a418d9 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -143,6 +143,13 @@ ftrace_partial_regs(const struct ftrace_regs *fregs, 
struct pt_regs *regs)
return regs;
 }
 
+#define arch_ftrace_fill_perf_regs(fregs, _regs) do {  \
+   (_regs)->pc = (fregs)->pc;  \
+   (_regs)->regs[29] = (fregs)->fp;\
+   (_regs)->sp = (fregs)->sp;  \
+   (_regs)->pstate = PSR_MODE_EL1h;\
+   } while (0)
+
 int ftrace_regs_query_register_offset(const char *name);
 
 int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
diff --git a/arch/powerpc/include/asm/ftrace.h 
b/arch/powerpc/include/asm/ftrace.h
index 23d26f3afae4..e6ff6834bf7e 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -42,6 +42,13 @@ static __always_inline struct pt_regs 
*arch_ftrace_get_regs(struct ftrace_regs *
return fregs->regs.msr ? &fregs->regs : NULL;
 }
 
+#define arch_ftrace_fill_perf_regs(fregs, _regs) do {  \
+   (_regs)->result = 0;\
+   (_regs)->nip = (fregs)->regs.nip;   \
+   (_regs)->gpr[1] = (fregs)->regs.gpr[1]; \
+   asm volatile("mfmsr %0" : "=r" ((_regs)->msr)); \
+   } while (0)
+
 static __always_inline void
 ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs,
unsigned long ip)
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index 9cdd48a46bf7..0d9f6df21f81 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -84,6 +84,11 @@ ftrace_regs_get_frame_pointer(struct ftrace_regs *fregs)
return sp[0];   /* return backchain */
 }
 
+#define arch_ftrace_fill_perf_regs(fregs, _regs)do {   \
+   (_regs)->psw.addr = (fregs)->regs.psw.addr; \
+   (_regs)->gprs[15] = (fregs)->regs.gprs[15]; \
+   } while (0)
+
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
 /*
  * When an ftrace registered caller is tracing a function that is
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 669771ef3b5b..1f4d1f7b19ed 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -46,6 +46,13 @@ arch_ftrace_get_regs(struct ftrace_regs *fregs)
return &fregs->regs;
 }
 
+#define arch_ftrace_fill_perf_regs(fregs, _regs) do {  \
+   (_regs)->ip = (fregs)->regs.ip; \
+   (_regs)->sp = (fregs)->regs.sp; \
+   (_regs)->cs = __KERNEL_CS;  \
+   (_regs)->flags = 0; \
+   } while (0)
+
 #define ftrace_regs_set_instruction_pointer(fregs, _ip)\
do { (fregs)->regs.ip = (_ip); } while (0)
 
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 8e5da4dfb669..d9a3723f987d 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -193,6 +193,37 @@ ftrace_partial_regs(struct ftrace_regs *fregs, struct 
pt_regs *regs)
 
 #endif /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS || 
CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST */
 
+#ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
+
+/*
+ * Please define arch dependent pt_regs which compatible to the
+ * perf_arch_fetch_caller_regs() but based on ftrace_regs.
+ * This requires
+ *   - user_mode(_regs) returns false (always kernel mode).
+ *   - able to use the _regs for stack trace.
+ */
+#ifndef arch_ftrace_fill_perf_regs
+/* As same as perf_arch_fetch_caller_regs(), do nothing by default */
+#define arch_ftrace_fill_perf_regs(fregs, _regs) do {} while (0)
+#endif
+
+static __always_inline struct pt_regs *
+ftrace_fill_perf_regs(struct ftrace_regs *fregs, struct pt_regs *regs)
+{
+   arch_ftrace_fill_perf_regs(fregs, regs);
+   return regs;
+}
+
+#else /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */
+
+static __always_inline struct pt_regs

[PATCH v11 08/18] tracing: Add ftrace_partial_regs() for converting ftrace_regs to pt_regs

2024-06-16 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Add ftrace_partial_regs() which converts the ftrace_regs to pt_regs.
This is for the eBPF which needs this to keep the same pt_regs interface
to access registers.
Thus when replacing the pt_regs with ftrace_regs in fprobes (which is
used by kprobe_multi eBPF event), this will be used.

If the architecture defines its own ftrace_regs, this copies partial
registers to pt_regs and returns it. If not, ftrace_regs is the same as
pt_regs and ftrace_partial_regs() will return ftrace_regs::regs.

Signed-off-by: Masami Hiramatsu (Google) 
Acked-by: Florent Revest 
---
 Changes in v8:
  - Add the reason why this required in changelog.
 Changes from previous series: NOTHING, just forward ported.
---
 arch/arm64/include/asm/ftrace.h |   11 +++
 include/linux/ftrace.h  |   17 +
 2 files changed, 28 insertions(+)

diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index dffaab3dd1f1..5cd587afab6d 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -132,6 +132,17 @@ ftrace_regs_get_frame_pointer(const struct ftrace_regs 
*fregs)
return fregs->fp;
 }
 
+static __always_inline struct pt_regs *
+ftrace_partial_regs(const struct ftrace_regs *fregs, struct pt_regs *regs)
+{
+   memcpy(regs->regs, fregs->regs, sizeof(u64) * 9);
+   regs->sp = fregs->sp;
+   regs->pc = fregs->pc;
+   regs->regs[29] = fregs->fp;
+   regs->regs[30] = fregs->lr;
+   return regs;
+}
+
 int ftrace_regs_query_register_offset(const char *name);
 
 int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index fa578748f7d2..8e5da4dfb669 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -176,6 +176,23 @@ static __always_inline struct pt_regs 
*ftrace_get_regs(struct ftrace_regs *fregs
return arch_ftrace_get_regs(fregs);
 }
 
+#if !defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS) || \
+   defined(CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST)
+
+static __always_inline struct pt_regs *
+ftrace_partial_regs(struct ftrace_regs *fregs, struct pt_regs *regs)
+{
+   /*
+* If CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST=y, ftrace_regs memory
+* layout is the same as pt_regs. So always returns that address.
+* Since arch_ftrace_get_regs() will check some members and may return
+* NULL, we can not use it.
+*/
+   return &fregs->regs;
+}
+
+#endif /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS || 
CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST */
+
 /*
  * When true, the ftrace_regs_{get,set}_*() functions may be used on fregs.
  * Note: this can be true even when ftrace_get_regs() cannot provide a pt_regs.




[PATCH v11 07/18] fprobe: Use ftrace_regs in fprobe exit handler

2024-06-16 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Change the fprobe exit handler to use ftrace_regs structure instead of
pt_regs. This also introduce HAVE_PT_REGS_TO_FTRACE_REGS_CAST which means
the ftrace_regs's memory layout is equal to the pt_regs so that those are
able to cast. Fprobe introduces a new dependency with that.

Signed-off-by: Masami Hiramatsu (Google) 
---
  Changes in v3:
   - Use ftrace_regs_get_return_value()
  Changes from previous series: NOTHING, just forward ported.
---
 arch/loongarch/Kconfig  |1 +
 arch/s390/Kconfig   |1 +
 arch/x86/Kconfig|1 +
 include/linux/fprobe.h  |2 +-
 include/linux/ftrace.h  |6 ++
 kernel/trace/Kconfig|8 
 kernel/trace/bpf_trace.c|6 +-
 kernel/trace/fprobe.c   |3 ++-
 kernel/trace/trace_fprobe.c |6 +-
 lib/test_fprobe.c   |6 +++---
 samples/fprobe/fprobe_example.c |2 +-
 11 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 23014d5f0047..0f1b2057507b 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -119,6 +119,7 @@ config LOONGARCH
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_ARGS
+   select HAVE_PT_REGS_TO_FTRACE_REGS_CAST
select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_EBPF_JIT
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 33688d43fd14..adc8f6620525 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -173,6 +173,7 @@ config S390
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_ARGS
+   select HAVE_PT_REGS_TO_FTRACE_REGS_CAST
select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_EBPF_JIT if HAVE_MARCH_Z196_FEATURES
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5fc3a2997977..d4655b72e6d7 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -218,6 +218,7 @@ config X86
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_DYNAMIC_FTRACE_WITH_ARGSif X86_64
+   select HAVE_PT_REGS_TO_FTRACE_REGS_CAST if X86_64
select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
select HAVE_SAMPLE_FTRACE_DIRECTif X86_64
select HAVE_SAMPLE_FTRACE_DIRECT_MULTI  if X86_64
diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h
index ca64ee5e45d2..ef609bcca0f9 100644
--- a/include/linux/fprobe.h
+++ b/include/linux/fprobe.h
@@ -14,7 +14,7 @@ typedef int (*fprobe_entry_cb)(struct fprobe *fp, unsigned 
long entry_ip,
   void *entry_data);
 
 typedef void (*fprobe_exit_cb)(struct fprobe *fp, unsigned long entry_ip,
-  unsigned long ret_ip, struct pt_regs *regs,
+  unsigned long ret_ip, struct ftrace_regs *regs,
   void *entry_data);
 
 /**
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 85394b9fb630..fa578748f7d2 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -162,6 +162,12 @@ struct ftrace_regs {
 #define ftrace_regs_set_instruction_pointer(fregs, ip) do { } while (0)
 #endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */
 
+#ifdef CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST
+
+static_assert(sizeof(struct pt_regs) == sizeof(struct ftrace_regs));
+
+#endif /* CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST */
+
 static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs 
*fregs)
 {
if (!fregs)
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 78b0da6fda1a..15e340a865f5 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -57,6 +57,13 @@ config HAVE_DYNAMIC_FTRACE_WITH_ARGS
 This allows for use of ftrace_regs_get_argument() and
 ftrace_regs_get_stack_pointer().
 
+config HAVE_PT_REGS_TO_FTRACE_REGS_CAST
+   bool
+   help
+If this is set, the memory layout of the ftrace_regs data structure
+is the same as the pt_regs. So the pt_regs is possible to be casted
+to ftrace_regs.
+
 config HAVE_DYNAMIC_FTRACE_NO_PATCHABLE
bool
help
@@ -288,6 +295,7 @@ config FPROBE
bool "Kernel Function Probe (fprobe)"
depends on FUNCTION_TRACER
depends on DYNAMIC_FTRACE_WITH_REGS || DYNAMIC_FTRACE_WITH_ARGS
+   depends on HAVE_PT_REGS_TO_FTRACE_REGS_CAST || 
!HAVE_DYNAMIC_FTRACE_WITH_ARGS
depends on HAVE_RETHOOK
select RETHOOK
default n
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 7e782a58ca6d..f72b421abe9b 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2871,10 +2871,14 @@ kprobe_multi_link_handler(struct fprobe *fp, unsigned 
long fentry_ip,

[PATCH v11 06/18] fprobe: Use ftrace_regs in fprobe entry handler

2024-06-16 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

This allows fprobes to be available with CONFIG_DYNAMIC_FTRACE_WITH_ARGS
instead of CONFIG_DYNAMIC_FTRACE_WITH_REGS, then we can enable fprobe
on arm64.

Signed-off-by: Masami Hiramatsu (Google) 
Acked-by: Florent Revest 
---
 Changes in v6:
  - Keep using SAVE_REGS flag to avoid breaking bpf kprobe-multi test.
---
 include/linux/fprobe.h  |2 +-
 kernel/trace/Kconfig|3 ++-
 kernel/trace/bpf_trace.c|   10 +++---
 kernel/trace/fprobe.c   |3 ++-
 kernel/trace/trace_fprobe.c |6 +-
 lib/test_fprobe.c   |4 ++--
 samples/fprobe/fprobe_example.c |2 +-
 7 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h
index f39869588117..ca64ee5e45d2 100644
--- a/include/linux/fprobe.h
+++ b/include/linux/fprobe.h
@@ -10,7 +10,7 @@
 struct fprobe;
 
 typedef int (*fprobe_entry_cb)(struct fprobe *fp, unsigned long entry_ip,
-  unsigned long ret_ip, struct pt_regs *regs,
+  unsigned long ret_ip, struct ftrace_regs *regs,
   void *entry_data);
 
 typedef void (*fprobe_exit_cb)(struct fprobe *fp, unsigned long entry_ip,
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 33fcfb36eca5..78b0da6fda1a 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -287,7 +287,7 @@ config DYNAMIC_FTRACE_WITH_ARGS
 config FPROBE
bool "Kernel Function Probe (fprobe)"
depends on FUNCTION_TRACER
-   depends on DYNAMIC_FTRACE_WITH_REGS
+   depends on DYNAMIC_FTRACE_WITH_REGS || DYNAMIC_FTRACE_WITH_ARGS
depends on HAVE_RETHOOK
select RETHOOK
default n
@@ -672,6 +672,7 @@ config FPROBE_EVENTS
select TRACING
select PROBE_EVENTS
select DYNAMIC_EVENTS
+   depends on DYNAMIC_FTRACE_WITH_REGS
default y
help
  This allows user to add tracing events on the function entry and
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 6249dac61701..7e782a58ca6d 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2602,7 +2602,7 @@ struct bpf_session_run_ctx {
void *data;
 };
 
-#ifdef CONFIG_FPROBE
+#if defined(CONFIG_FPROBE) && defined(CONFIG_DYNAMIC_FTRACE_WITH_REGS)
 struct bpf_kprobe_multi_link {
struct bpf_link link;
struct fprobe fp;
@@ -2854,12 +2854,16 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link 
*link,
 
 static int
 kprobe_multi_link_handler(struct fprobe *fp, unsigned long fentry_ip,
- unsigned long ret_ip, struct pt_regs *regs,
+ unsigned long ret_ip, struct ftrace_regs *fregs,
  void *data)
 {
+   struct pt_regs *regs = ftrace_get_regs(fregs);
struct bpf_kprobe_multi_link *link;
int err;
 
+   if (!regs)
+   return 0;
+
link = container_of(fp, struct bpf_kprobe_multi_link, fp);
err = kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), regs, 
false, data);
return is_kprobe_session(link->link.prog) ? err : 0;
@@ -3134,7 +3138,7 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr 
*attr, struct bpf_prog *pr
kvfree(cookies);
return err;
 }
-#else /* !CONFIG_FPROBE */
+#else /* !CONFIG_FPROBE || !CONFIG_DYNAMIC_FTRACE_WITH_REGS */
 int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog 
*prog)
 {
return -EOPNOTSUPP;
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 9ff018245840..3d3789283873 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -46,7 +46,7 @@ static inline void __fprobe_handler(unsigned long ip, 
unsigned long parent_ip,
}
 
if (fp->entry_handler)
-   ret = fp->entry_handler(fp, ip, parent_ip, 
ftrace_get_regs(fregs), entry_data);
+   ret = fp->entry_handler(fp, ip, parent_ip, fregs, entry_data);
 
/* If entry_handler returns !0, nmissed is not counted. */
if (rh) {
@@ -182,6 +182,7 @@ static void fprobe_init(struct fprobe *fp)
fp->ops.func = fprobe_kprobe_handler;
else
fp->ops.func = fprobe_handler;
+
fp->ops.flags |= FTRACE_OPS_FL_SAVE_REGS;
 }
 
diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
index 62e6a8f4aae9..b2c20d4fdfd7 100644
--- a/kernel/trace/trace_fprobe.c
+++ b/kernel/trace/trace_fprobe.c
@@ -338,12 +338,16 @@ NOKPROBE_SYMBOL(fexit_perf_func);
 #endif /* CONFIG_PERF_EVENTS */
 
 static int fentry_dispatcher(struct fprobe *fp, unsigned long entry_ip,
-unsigned long ret_ip, struct pt_regs *regs,
+unsigned long ret_ip, struct ftrace_regs *fregs,
 void *entry_data)
 {
struct trace_fprobe *tf = container_of(fp, s

[PATCH v11 05/18] function_graph: Pass ftrace_regs to retfunc

2024-06-16 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Pass ftrace_regs to the fgraph_ops::retfunc(). If ftrace_regs is not
available, it passes a NULL instead. User callback function can access
some registers (including return address) via this ftrace_regs.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v8:
  - Pass ftrace_regs to retfunc, instead of adding retregfunc.
 Changes in v6:
  - update to use ftrace_regs_get_return_value() because of reordering
patches.
 Changes in v3:
  - Update for new multiple fgraph.
  - Save the return address to instruction pointer in ftrace_regs.
---
 include/linux/ftrace.h   |3 ++-
 kernel/trace/fgraph.c|   16 +++-
 kernel/trace/ftrace.c|3 ++-
 kernel/trace/trace.h |3 ++-
 kernel/trace/trace_functions_graph.c |7 ---
 kernel/trace/trace_irqsoff.c |3 ++-
 kernel/trace/trace_sched_wakeup.c|3 ++-
 kernel/trace/trace_selftest.c|3 ++-
 8 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 9230af20c92e..85394b9fb630 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1069,7 +1069,8 @@ struct fgraph_ops;
 
 /* Type of the callback handlers for tracing function graph*/
 typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *,
-  struct fgraph_ops *); /* return */
+  struct fgraph_ops *,
+  struct ftrace_regs *); /* return */
 typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *,
  struct fgraph_ops *,
  struct ftrace_regs *); /* entry */
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 709f920da939..d735a8c872bb 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -297,7 +297,8 @@ static int entry_run(struct ftrace_graph_ent *trace, struct 
fgraph_ops *ops,
 }
 
 /* ftrace_graph_return set to this to tell some archs to run function graph */
-static void return_run(struct ftrace_graph_ret *trace, struct fgraph_ops *ops)
+static void return_run(struct ftrace_graph_ret *trace, struct fgraph_ops *ops,
+  struct ftrace_regs *fregs)
 {
 }
 
@@ -491,7 +492,8 @@ int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace,
 }
 
 static void ftrace_graph_ret_stub(struct ftrace_graph_ret *trace,
- struct fgraph_ops *gops)
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
 {
 }
 
@@ -787,6 +789,9 @@ __ftrace_return_to_handler(struct ftrace_regs *fregs, 
unsigned long frame_pointe
}
 
trace.rettime = trace_clock_local();
+   if (fregs)
+   ftrace_regs_set_instruction_pointer(fregs, ret);
+
 #ifdef CONFIG_FUNCTION_GRAPH_RETVAL
trace.retval = ftrace_regs_get_return_value(fregs);
 #endif
@@ -796,7 +801,7 @@ __ftrace_return_to_handler(struct ftrace_regs *fregs, 
unsigned long frame_pointe
 #ifdef CONFIG_HAVE_STATIC_CALL
if (static_branch_likely(&fgraph_do_direct)) {
if (test_bit(fgraph_direct_gops->idx, &bitmap))
-   static_call(fgraph_retfunc)(&trace, fgraph_direct_gops);
+   static_call(fgraph_retfunc)(&trace, fgraph_direct_gops, 
fregs);
} else
 #endif
{
@@ -806,7 +811,7 @@ __ftrace_return_to_handler(struct ftrace_regs *fregs, 
unsigned long frame_pointe
if (gops == &fgraph_stub)
continue;
 
-   gops->retfunc(&trace, gops);
+   gops->retfunc(&trace, gops, fregs);
}
}
 
@@ -956,7 +961,8 @@ void ftrace_graph_sleep_time_control(bool enable)
  * Simply points to ftrace_stub, but with the proper protocol.
  * Defined by the linker script in linux/vmlinux.lds.h
  */
-void ftrace_stub_graph(struct ftrace_graph_ret *trace, struct fgraph_ops 
*gops);
+void ftrace_stub_graph(struct ftrace_graph_ret *trace, struct fgraph_ops *gops,
+  struct ftrace_regs *fregs);
 
 /* The callbacks that hook a function */
 trace_func_graph_ret_t ftrace_graph_return = ftrace_stub_graph;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 64d15428cffc..725a95b161a1 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -840,7 +840,8 @@ static int profile_graph_entry(struct ftrace_graph_ent 
*trace,
 }
 
 static void profile_graph_return(struct ftrace_graph_ret *trace,
-struct fgraph_ops *gops)
+struct fgraph_ops *gops,
+struct ftrace_regs *fregs)
 {
struct ftrace_ret_stack *ret_stack;
struct ftrace_profile_stat *stat;
diff --git a/kernel/trace/trace.h b/ke

[PATCH v11 04/18] function_graph: Replace fgraph_ret_regs with ftrace_regs

2024-06-16 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Use ftrace_regs instead of fgraph_ret_regs for tracing return value
on function_graph tracer because of simplifying the callback interface.

The CONFIG_HAVE_FUNCTION_GRAPH_RETVAL is also replaced by
CONFIG_HAVE_FUNCTION_GRAPH_FREGS.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v8:
  - Newly added.
---
 arch/arm64/Kconfig  |2 +-
 arch/arm64/include/asm/ftrace.h |   23 ++-
 arch/arm64/kernel/asm-offsets.c |   12 
 arch/arm64/kernel/entry-ftrace.S|   32 ++--
 arch/loongarch/Kconfig  |2 +-
 arch/loongarch/include/asm/ftrace.h |   24 ++--
 arch/loongarch/kernel/asm-offsets.c |   12 
 arch/loongarch/kernel/mcount.S  |   17 ++---
 arch/loongarch/kernel/mcount_dyn.S  |   14 +++---
 arch/riscv/Kconfig  |2 +-
 arch/riscv/include/asm/ftrace.h |   26 +-
 arch/riscv/kernel/mcount.S  |   24 +---
 arch/s390/Kconfig   |2 +-
 arch/s390/include/asm/ftrace.h  |   26 +-
 arch/s390/kernel/asm-offsets.c  |6 --
 arch/s390/kernel/mcount.S   |9 +
 arch/x86/Kconfig|2 +-
 arch/x86/include/asm/ftrace.h   |   22 ++
 arch/x86/kernel/ftrace_32.S |   15 +--
 arch/x86/kernel/ftrace_64.S |   17 +
 include/linux/ftrace.h  |   14 +++---
 kernel/trace/Kconfig|4 ++--
 kernel/trace/fgraph.c   |   21 +
 23 files changed, 122 insertions(+), 206 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 5d91259ee7b5..8691683d782e 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -210,7 +210,7 @@ config ARM64
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_ERROR_INJECTION
-   select HAVE_FUNCTION_GRAPH_RETVAL if HAVE_FUNCTION_GRAPH_TRACER
+   select HAVE_FUNCTION_GRAPH_FREGS
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_GCC_PLUGINS
select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && \
diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index dc9cf0bd2a4c..dffaab3dd1f1 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -126,6 +126,12 @@ ftrace_override_function_with_return(struct ftrace_regs 
*fregs)
fregs->pc = fregs->lr;
 }
 
+static __always_inline unsigned long
+ftrace_regs_get_frame_pointer(const struct ftrace_regs *fregs)
+{
+   return fregs->fp;
+}
+
 int ftrace_regs_query_register_offset(const char *name);
 
 int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
@@ -183,23 +189,6 @@ static inline bool arch_syscall_match_sym_name(const char 
*sym,
 
 #ifndef __ASSEMBLY__
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-struct fgraph_ret_regs {
-   /* x0 - x7 */
-   unsigned long regs[8];
-
-   unsigned long fp;
-   unsigned long __unused;
-};
-
-static inline unsigned long fgraph_ret_regs_return_value(struct 
fgraph_ret_regs *ret_regs)
-{
-   return ret_regs->regs[0];
-}
-
-static inline unsigned long fgraph_ret_regs_frame_pointer(struct 
fgraph_ret_regs *ret_regs)
-{
-   return ret_regs->fp;
-}
 
 void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
   unsigned long frame_pointer);
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 81496083c041..81bb6704ff5a 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -200,18 +200,6 @@ int main(void)
   DEFINE(FTRACE_OPS_FUNC,  offsetof(struct ftrace_ops, func));
 #endif
   BLANK();
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-  DEFINE(FGRET_REGS_X0,offsetof(struct 
fgraph_ret_regs, regs[0]));
-  DEFINE(FGRET_REGS_X1,offsetof(struct 
fgraph_ret_regs, regs[1]));
-  DEFINE(FGRET_REGS_X2,offsetof(struct 
fgraph_ret_regs, regs[2]));
-  DEFINE(FGRET_REGS_X3,offsetof(struct 
fgraph_ret_regs, regs[3]));
-  DEFINE(FGRET_REGS_X4,offsetof(struct 
fgraph_ret_regs, regs[4]));
-  DEFINE(FGRET_REGS_X5,offsetof(struct 
fgraph_ret_regs, regs[5]));
-  DEFINE(FGRET_REGS_X6,offsetof(struct 
fgraph_ret_regs, regs[6]));
-  DEFINE(FGRET_REGS_X7,offsetof(struct 
fgraph_ret_regs, regs[7]));
-  DEFINE(FGRET_REGS_FP,offsetof(struct 
fgraph_ret_regs, fp));
-  DEFINE(FGRET_REGS_SIZE,  sizeof(struct fgraph_ret_regs));
-#endif
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
   DEFINE(FTRACE_OPS_DIRECT_CALL,   offsetof(struct f

[PATCH v11 03/18] function_graph: Pass ftrace_regs to entryfunc

2024-06-16 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Pass ftrace_regs to the fgraph_ops::entryfunc(). If ftrace_regs is not
available, it passes a NULL instead. User callback function can access
some registers (including return address) via this ftrace_regs.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v11:
  - Update for the latest for-next branch.
 Changes in v8:
  - Just pass ftrace_regs to the handler instead of adding a new
entryregfunc.
  - Update riscv ftrace_graph_func().
 Changes in v3:
  - Update for new multiple fgraph.
---
 arch/arm64/kernel/ftrace.c   |   20 +++-
 arch/loongarch/kernel/ftrace_dyn.c   |   10 +-
 arch/powerpc/kernel/trace/ftrace.c   |2 +
 arch/powerpc/kernel/trace/ftrace_64_pg.c |   10 --
 arch/riscv/kernel/ftrace.c   |   17 ++
 arch/x86/kernel/ftrace.c |   50 +-
 include/linux/ftrace.h   |   18 ---
 kernel/trace/fgraph.c|   23 --
 kernel/trace/ftrace.c|3 +-
 kernel/trace/trace.h |3 +-
 kernel/trace/trace_functions_graph.c |3 +-
 kernel/trace/trace_irqsoff.c |3 +-
 kernel/trace/trace_sched_wakeup.c|3 +-
 kernel/trace/trace_selftest.c|8 +++--
 14 files changed, 128 insertions(+), 45 deletions(-)

diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index a650f5e11fc5..bc647b725e6a 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -481,7 +481,25 @@ void prepare_ftrace_return(unsigned long self_addr, 
unsigned long *parent,
 void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
   struct ftrace_ops *op, struct ftrace_regs *fregs)
 {
-   prepare_ftrace_return(ip, &fregs->lr, fregs->fp);
+   unsigned long return_hooker = (unsigned long)&return_to_handler;
+   unsigned long frame_pointer = fregs->fp;
+   unsigned long *parent = &fregs->lr;
+   unsigned long old;
+
+   if (unlikely(atomic_read(¤t->tracing_graph_pause)))
+   return;
+
+   /*
+* Note:
+* No protection against faulting at *parent, which may be seen
+* on other archs. It's unlikely on AArch64.
+*/
+   old = *parent;
+
+   if (!function_graph_enter_regs(old, ip, frame_pointer,
+  (void *)frame_pointer, fregs)) {
+   *parent = return_hooker;
+   }
 }
 #else
 /*
diff --git a/arch/loongarch/kernel/ftrace_dyn.c 
b/arch/loongarch/kernel/ftrace_dyn.c
index bff058317062..966e0f7f7aca 100644
--- a/arch/loongarch/kernel/ftrace_dyn.c
+++ b/arch/loongarch/kernel/ftrace_dyn.c
@@ -243,8 +243,16 @@ void ftrace_graph_func(unsigned long ip, unsigned long 
parent_ip,
 {
struct pt_regs *regs = &fregs->regs;
unsigned long *parent = (unsigned long *)®s->regs[1];
+   unsigned long return_hooker = (unsigned long)&return_to_handler;
+   unsigned long old;
+
+   if (unlikely(atomic_read(¤t->tracing_graph_pause)))
+   return;
+
+   old = *parent;
 
-   prepare_ftrace_return(ip, (unsigned long *)parent);
+   if (!function_graph_enter_regs(old, ip, 0, parent, fregs))
+   *parent = return_hooker;
 }
 #else
 static int ftrace_modify_graph_caller(bool enable)
diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index d8d6b4fd9a14..a1a0e0b57662 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -434,7 +434,7 @@ void ftrace_graph_func(unsigned long ip, unsigned long 
parent_ip,
if (bit < 0)
goto out;
 
-   if (!function_graph_enter(parent_ip, ip, 0, (unsigned long *)sp))
+   if (!function_graph_enter_regs(parent_ip, ip, 0, (unsigned long *)sp, 
fregs))
parent_ip = ppc_function_entry(return_to_handler);
 
ftrace_test_recursion_unlock(bit);
diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.c 
b/arch/powerpc/kernel/trace/ftrace_64_pg.c
index 12fab1803bcf..4ae9eeb1c8f1 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_pg.c
+++ b/arch/powerpc/kernel/trace/ftrace_64_pg.c
@@ -800,7 +800,8 @@ int ftrace_disable_ftrace_graph_caller(void)
  * in current thread info. Return the address we want to divert to.
  */
 static unsigned long
-__prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long 
sp)
+__prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long 
sp,
+   struct ftrace_regs *fregs)
 {
unsigned long return_hooker;
int bit;
@@ -817,7 +818,7 @@ __prepare_ftrace_return(unsigned long parent, unsigned long 
ip, unsigned long sp
 
return_hooker = ppc_function_entry(return_to_handler);
 
-   if (!function_graph_enter(parent, ip, 0, (unsigned long *)sp))
+   if (!function

[PATCH v11 02/18] tracing: Rename ftrace_regs_return_value to ftrace_regs_get_return_value

2024-06-16 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Rename ftrace_regs_return_value to ftrace_regs_get_return_value as same as
other ftrace_regs_get/set_* APIs.

Signed-off-by: Masami Hiramatsu (Google) 
Acked-by: Mark Rutland 
---
 Changes in v6:
  - Moved to top of the series.
 Changes in v3:
  - Newly added.
---
 arch/loongarch/include/asm/ftrace.h |2 +-
 arch/powerpc/include/asm/ftrace.h   |2 +-
 arch/s390/include/asm/ftrace.h  |2 +-
 arch/x86/include/asm/ftrace.h   |2 +-
 include/linux/ftrace.h  |2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/loongarch/include/asm/ftrace.h 
b/arch/loongarch/include/asm/ftrace.h
index c0a682808e07..6f8517d59954 100644
--- a/arch/loongarch/include/asm/ftrace.h
+++ b/arch/loongarch/include/asm/ftrace.h
@@ -69,7 +69,7 @@ ftrace_regs_set_instruction_pointer(struct ftrace_regs 
*fregs, unsigned long ip)
regs_get_kernel_argument(&(fregs)->regs, n)
 #define ftrace_regs_get_stack_pointer(fregs) \
kernel_stack_pointer(&(fregs)->regs)
-#define ftrace_regs_return_value(fregs) \
+#define ftrace_regs_get_return_value(fregs) \
regs_return_value(&(fregs)->regs)
 #define ftrace_regs_set_return_value(fregs, ret) \
regs_set_return_value(&(fregs)->regs, ret)
diff --git a/arch/powerpc/include/asm/ftrace.h 
b/arch/powerpc/include/asm/ftrace.h
index 559560286e6d..23d26f3afae4 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -59,7 +59,7 @@ ftrace_regs_get_instruction_pointer(struct ftrace_regs *fregs)
regs_get_kernel_argument(&(fregs)->regs, n)
 #define ftrace_regs_get_stack_pointer(fregs) \
kernel_stack_pointer(&(fregs)->regs)
-#define ftrace_regs_return_value(fregs) \
+#define ftrace_regs_get_return_value(fregs) \
regs_return_value(&(fregs)->regs)
 #define ftrace_regs_set_return_value(fregs, ret) \
regs_set_return_value(&(fregs)->regs, ret)
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index fbadca645af7..de76c21eb4a3 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -83,7 +83,7 @@ ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs,
regs_get_kernel_argument(&(fregs)->regs, n)
 #define ftrace_regs_get_stack_pointer(fregs) \
kernel_stack_pointer(&(fregs)->regs)
-#define ftrace_regs_return_value(fregs) \
+#define ftrace_regs_get_return_value(fregs) \
regs_return_value(&(fregs)->regs)
 #define ftrace_regs_set_return_value(fregs, ret) \
regs_set_return_value(&(fregs)->regs, ret)
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 0152a81d9b4a..78f6a200e15b 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -56,7 +56,7 @@ arch_ftrace_get_regs(struct ftrace_regs *fregs)
regs_get_kernel_argument(&(fregs)->regs, n)
 #define ftrace_regs_get_stack_pointer(fregs) \
kernel_stack_pointer(&(fregs)->regs)
-#define ftrace_regs_return_value(fregs) \
+#define ftrace_regs_get_return_value(fregs) \
regs_return_value(&(fregs)->regs)
 #define ftrace_regs_set_return_value(fregs, ret) \
regs_set_return_value(&(fregs)->regs, ret)
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 3c8a19ea8f45..bf04b29f9da1 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -183,7 +183,7 @@ static __always_inline bool ftrace_regs_has_args(struct 
ftrace_regs *fregs)
regs_get_kernel_argument(ftrace_get_regs(fregs), n)
 #define ftrace_regs_get_stack_pointer(fregs) \
kernel_stack_pointer(ftrace_get_regs(fregs))
-#define ftrace_regs_return_value(fregs) \
+#define ftrace_regs_get_return_value(fregs) \
regs_return_value(ftrace_get_regs(fregs))
 #define ftrace_regs_set_return_value(fregs, ret) \
regs_set_return_value(ftrace_get_regs(fregs), ret)




[PATCH v11 01/18] tracing: Add a comment about ftrace_regs definition

2024-06-16 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

To clarify what will be expected on ftrace_regs, add a comment to the
architecture independent definition of the ftrace_regs.

Signed-off-by: Masami Hiramatsu (Google) 
Acked-by: Mark Rutland 
---
 Changes in v8:
  - Update that the saved registers depends on the context.
 Changes in v3:
  - Add instruction pointer
 Changes in v2:
  - newly added.
---
 include/linux/ftrace.h |   26 ++
 1 file changed, 26 insertions(+)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 845c2ab0bc1c..3c8a19ea8f45 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -117,6 +117,32 @@ extern int ftrace_enabled;
 
 #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
 
+/**
+ * ftrace_regs - ftrace partial/optimal register set
+ *
+ * ftrace_regs represents a group of registers which is used at the
+ * function entry and exit. There are three types of registers.
+ *
+ * - Registers for passing the parameters to callee, including the stack
+ *   pointer. (e.g. rcx, rdx, rdi, rsi, r8, r9 and rsp on x86_64)
+ * - Registers for passing the return values to caller.
+ *   (e.g. rax and rdx on x86_64)
+ * - Registers for hooking the function call and return including the
+ *   frame pointer (the frame pointer is architecture/config dependent)
+ *   (e.g. rip, rbp and rsp for x86_64)
+ *
+ * Also, architecture dependent fields can be used for internal process.
+ * (e.g. orig_ax on x86_64)
+ *
+ * On the function entry, those registers will be restored except for
+ * the stack pointer, so that user can change the function parameters
+ * and instruction pointer (e.g. live patching.)
+ * On the function exit, only registers which is used for return values
+ * are restored.
+ *
+ * NOTE: user *must not* access regs directly, only do it via APIs, because
+ * the member can be changed according to the architecture.
+ */
 struct ftrace_regs {
struct pt_regs  regs;
 };




[PATCH v11 00/18] tracing: fprobe: function_graph: Multi-function graph and fprobe on fgraph

2024-06-16 Thread Masami Hiramatsu (Google)
Hi,

Here is the 11th version of the series to re-implement the fprobe on
function-graph tracer. The previous version is;

https://lore.kernel.org/all/171509088006.162236.7227326999861366050.stgit@devnote2/

Most of the patches in the previous version (for multiple function graph
trace instance) are already merged via tracing/for-next. This version
is the remaining part, fprobe implement on fgraph. Basically just moves
on the updated fgraph implementation, and no major changes.

Overview

This series rewrites the fprobe on this function-graph.
The purposes of this change are;

 1) Remove dependency of the rethook from fprobe so that we can reduce
   the return hook code and shadow stack.

 2) Make 'ftrace_regs' the common trace interface for the function
   boundary.

1) Currently we have 2(or 3) different function return hook codes,
 the function-graph tracer and rethook (and legacy kretprobe).
 But since this  is redundant and needs double maintenance cost,
 I would like to unify those. From the user's viewpoint, function-
 graph tracer is very useful to grasp the execution path. For this
 purpose, it is hard to use the rethook in the function-graph
 tracer, but the opposite is possible. (Strictly speaking, kretprobe
 can not use it because it requires 'pt_regs' for historical reasons.)

2) Now the fprobe provides the 'pt_regs' for its handler, but that is
 wrong for the function entry and exit. Moreover, depending on the
 architecture, there is no way to accurately reproduce 'pt_regs'
 outside of interrupt or exception handlers. This means fprobe should
 not use 'pt_regs' because it does not use such exceptions.
 (Conversely, kprobe should use 'pt_regs' because it is an abstract
  interface of the software breakpoint exception.)

This series changes fprobe to use function-graph tracer for tracing
function entry and exit, instead of mixture of ftrace and rethook.
Unlike the rethook which is a per-task list of system-wide allocated
nodes, the function graph's ret_stack is a per-task shadow stack.
Thus it does not need to set 'nr_maxactive' (which is the number of
pre-allocated nodes).
Also the handlers will get the 'ftrace_regs' instead of 'pt_regs'.
Since eBPF mulit_kprobe/multi_kretprobe events still use 'pt_regs' as
their register interface, this changes it to convert 'ftrace_regs' to
'pt_regs'. Of course this conversion makes an incomplete 'pt_regs',
so users must access only registers for function parameters or
return value. 

Design
--
Instead of using ftrace's function entry hook directly, the new fprobe
is built on top of the function-graph's entry and return callbacks
with 'ftrace_regs'.

Since the fprobe requires access to 'ftrace_regs', the architecture
must support CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS and
CONFIG_HAVE_FTRACE_GRAPH_FUNC, which enables to call function-graph
entry callback with 'ftrace_regs', and also
CONFIG_HAVE_FUNCTION_GRAPH_FREGS, which passes the ftrace_regs to
return_to_handler.

All fprobes share a single function-graph ops (means shares a common
ftrace filter) similar to the kprobe-on-ftrace. This needs another
layer to find corresponding fprobe in the common function-graph
callbacks, but has much better scalability, since the number of
registered function-graph ops is limited.

In the entry callback, the fprobe runs its entry_handler and saves the
address of 'fprobe' on the function-graph's shadow stack as data. The
return callback decodes the data to get the 'fprobe' address, and runs
the exit_handler.

The fprobe introduces two hash-tables, one is for entry callback which
searches fprobes related to the given function address passed by entry
callback. The other is for a return callback which checks if the given
'fprobe' data structure pointer is still valid. Note that it is
possible to unregister fprobe before the return callback runs. Thus
the address validation must be done before using it in the return
callback.

Download

This series can be applied against the ftrace/for-next branch in
linux-trace tree.

This series can also be found below branch.

https://git.kernel.org/pub/scm/linux/kernel/git/mhiramat/linux.git/log/?h=topic/fprobe-on-fgraph

Thank you,

---

Masami Hiramatsu (Google) (18):
  tracing: Add a comment about ftrace_regs definition
  tracing: Rename ftrace_regs_return_value to ftrace_regs_get_return_value
  function_graph: Pass ftrace_regs to entryfunc
  function_graph: Replace fgraph_ret_regs with ftrace_regs
  function_graph: Pass ftrace_regs to retfunc
  fprobe: Use ftrace_regs in fprobe entry handler
  fprobe: Use ftrace_regs in fprobe exit handler
  tracing: Add ftrace_partial_regs() for converting ftrace_regs to pt_regs
  tracing: Add ftrace_fill_perf_regs() for perf event
 

[PATCH v3 3/3] tracing/kprobe: Remove cleanup code unrelated to selftest

2024-06-11 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

This cleanup all kprobe events code is not related to the selftest
itself, and it can fail by the reason unrelated to this test.
If the test is successful, the generated events are cleaned up.
And if not, we cannot guarantee that the kprobe events will work
correctly. So, anyway, there is no need to clean it up.

Signed-off-by: Masami Hiramatsu (Google) 
---
 kernel/trace/trace_kprobe.c |4 
 1 file changed, 4 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 8c5816c04bd2..7fd0f8576e4c 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -2114,10 +2114,6 @@ static __init int kprobe_trace_self_tests_init(void)
 
 
 end:
-   ret = dyn_events_release_all(&trace_kprobe_ops);
-   if (WARN_ONCE(ret, "error on cleaning up probes."))
-   warn++;
-
/*
 * Wait for the optimizer work to finish. Otherwise it might fiddle
 * with probes in already freed __init text.




[PATCH v3 2/3] tracing/kprobe: Integrate test warnings into WARN_ONCE

2024-06-11 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Cleanup the redundant WARN_ON_ONCE(cond) + pr_warn(msg) into
WARN_ONCE(cond, msg). Also add some WARN_ONCE() for hitcount check.
These WARN_ONCE() errors makes it easy to handle errors from ktest.

Suggested-by: Steven Rostedt 
Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v3:
  - integrate WARN_ON_ONCE() and pr_warn() instead of remove
WARN_ONCE().
---
 kernel/trace/trace_kprobe.c |   54 +++
 1 file changed, 19 insertions(+), 35 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 16383247bdbf..8c5816c04bd2 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -2023,19 +2023,16 @@ static __init int kprobe_trace_self_tests_init(void)
pr_info("Testing kprobe tracing: ");
 
ret = create_or_delete_trace_kprobe("p:testprobe 
kprobe_trace_selftest_target $stack $stack0 +0($stack)");
-   if (WARN_ON_ONCE(ret)) {
-   pr_warn("error on probing function entry.\n");
+   if (WARN_ONCE(ret, "error on probing function entry.")) {
warn++;
} else {
/* Enable trace point */
tk = find_trace_kprobe("testprobe", KPROBE_EVENT_SYSTEM);
-   if (WARN_ON_ONCE(tk == NULL)) {
-   pr_warn("error on getting new probe.\n");
+   if (WARN_ONCE(tk == NULL, "error on probing function entry.")) {
warn++;
} else {
file = find_trace_probe_file(tk, top_trace_array());
-   if (WARN_ON_ONCE(file == NULL)) {
-   pr_warn("error on getting probe file.\n");
+   if (WARN_ONCE(file == NULL, "error on getting probe 
file.")) {
warn++;
} else
enable_trace_kprobe(
@@ -2044,19 +2041,16 @@ static __init int kprobe_trace_self_tests_init(void)
}
 
ret = create_or_delete_trace_kprobe("r:testprobe2 
kprobe_trace_selftest_target $retval");
-   if (WARN_ON_ONCE(ret)) {
-   pr_warn("error on probing function return.\n");
+   if (WARN_ONCE(ret, "error on probing function return.")) {
warn++;
} else {
/* Enable trace point */
tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM);
-   if (WARN_ON_ONCE(tk == NULL)) {
-   pr_warn("error on getting 2nd new probe.\n");
+   if (WARN_ONCE(tk == NULL, "error on getting 2nd new probe.")) {
warn++;
} else {
file = find_trace_probe_file(tk, top_trace_array());
-   if (WARN_ON_ONCE(file == NULL)) {
-   pr_warn("error on getting probe file.\n");
+   if (WARN_ONCE(file == NULL, "error on getting probe 
file.")) {
warn++;
} else
enable_trace_kprobe(
@@ -2079,18 +2073,15 @@ static __init int kprobe_trace_self_tests_init(void)
 
/* Disable trace points before removing it */
tk = find_trace_kprobe("testprobe", KPROBE_EVENT_SYSTEM);
-   if (WARN_ON_ONCE(tk == NULL)) {
-   pr_warn("error on getting test probe.\n");
+   if (WARN_ONCE(tk == NULL, "error on getting test probe.")) {
warn++;
} else {
-   if (trace_kprobe_nhit(tk) != 1) {
-   pr_warn("incorrect number of testprobe hits\n");
+   if (WARN_ONCE(trace_kprobe_nhit(tk) != 1,
+"incorrect number of testprobe hits."))
warn++;
-   }
 
file = find_trace_probe_file(tk, top_trace_array());
-   if (WARN_ON_ONCE(file == NULL)) {
-   pr_warn("error on getting probe file.\n");
+   if (WARN_ONCE(file == NULL, "error on getting probe file.")) {
warn++;
} else
disable_trace_kprobe(
@@ -2098,18 +2089,15 @@ static __init int kprobe_trace_self_tests_init(void)
}
 
tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM);
-   if (WARN_ON_ONCE(tk == NULL)) {
-   pr_warn("error on getting 2nd test probe.\n");
+   if (WARN_ONCE(tk == NULL, "error on getting 2nd test probe.")) {
warn++;
} else {
-   if (trace_kprobe_nhit(tk) != 1) {
-   pr_warn("incorrect number of testprobe2 hits\n");
+

[PATCH v3 1/3] tracing: Build event generation tests only as modules

2024-06-11 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

The kprobes and synth event generation test modules add events and lock
(get a reference) those event file reference in module init function,
and unlock and delete it in module exit function. This is because those
are designed for playing as modules.

If we make those modules as built-in, those events are left locked in the
kernel, and never be removed. This causes kprobe event self-test failure
as below.

[   97.349708] [ cut here ]
[   97.353453] WARNING: CPU: 3 PID: 1 at kernel/trace/trace_kprobe.c:2133 
kprobe_trace_self_tests_init+0x3f1/0x480
[   97.357106] Modules linked in:
[   97.358488] CPU: 3 PID: 1 Comm: swapper/0 Not tainted 
6.9.0-g699646734ab5-dirty #14
[   97.361556] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
1.15.0-1 04/01/2014
[   97.363880] RIP: 0010:kprobe_trace_self_tests_init+0x3f1/0x480
[   97.365538] Code: a8 24 08 82 e9 ae fd ff ff 90 0f 0b 90 48 c7 c7 e5 aa 0b 
82 e9 ee fc ff ff 90 0f 0b 90 48 c7 c7 2d 61 06 82 e9 8e fd ff ff 90 <0f> 0b 90 
48 c7 c7 33 0b 0c 82 89 c6 e8 6e 03 1f ff 41 ff c7 e9 90
[   97.370429] RSP: :c9013b50 EFLAGS: 00010286
[   97.371852] RAX: fff0 RBX: 888005919c00 RCX: 
[   97.373829] RDX: 888003f4 RSI: 8236a598 RDI: 888003f40a68
[   97.375715] RBP:  R08: 0001 R09: 
[   97.377675] R10: 811c9ae5 R11: 8120c4e0 R12: 
[   97.379591] R13: 0001 R14: 0015 R15: 
[   97.381536] FS:  () GS:88807dcc() 
knlGS:
[   97.383813] CS:  0010 DS:  ES:  CR0: 80050033
[   97.385449] CR2:  CR3: 02244000 CR4: 06b0
[   97.387347] DR0:  DR1:  DR2: 
[   97.389277] DR3:  DR6: fffe0ff0 DR7: 0400
[   97.391196] Call Trace:
[   97.391967]  
[   97.392647]  ? __warn+0xcc/0x180
[   97.393640]  ? kprobe_trace_self_tests_init+0x3f1/0x480
[   97.395181]  ? report_bug+0xbd/0x150
[   97.396234]  ? handle_bug+0x3e/0x60
[   97.397311]  ? exc_invalid_op+0x1a/0x50
[   97.398434]  ? asm_exc_invalid_op+0x1a/0x20
[   97.399652]  ? trace_kprobe_is_busy+0x20/0x20
[   97.400904]  ? tracing_reset_all_online_cpus+0x15/0x90
[   97.402304]  ? kprobe_trace_self_tests_init+0x3f1/0x480
[   97.403773]  ? init_kprobe_trace+0x50/0x50
[   97.404972]  do_one_initcall+0x112/0x240
[   97.406113]  do_initcall_level+0x95/0xb0
[   97.407286]  ? kernel_init+0x1a/0x1a0
[   97.408401]  do_initcalls+0x3f/0x70
[   97.409452]  kernel_init_freeable+0x16f/0x1e0
[   97.410662]  ? rest_init+0x1f0/0x1f0
[   97.411738]  kernel_init+0x1a/0x1a0
[   97.412788]  ret_from_fork+0x39/0x50
[   97.413817]  ? rest_init+0x1f0/0x1f0
[   97.414844]  ret_from_fork_asm+0x11/0x20
[   97.416285]  
[   97.417134] irq event stamp: 13437323
[   97.418376] hardirqs last  enabled at (13437337): [] 
console_unlock+0x11c/0x150
[   97.421285] hardirqs last disabled at (13437370): [] 
console_unlock+0x101/0x150
[   97.423838] softirqs last  enabled at (13437366): [] 
handle_softirqs+0x23f/0x2a0
[   97.426450] softirqs last disabled at (13437393): [] 
__irq_exit_rcu+0x66/0xd0
[   97.428850] ---[ end trace  ]---

And also, since we can not cleanup dynamic_event file, ftracetest are
failed too.

To avoid these issues, build these tests only as modules.

Fixes: 9fe41efaca08 ("tracing: Add synth event generation test module")
Fixes: 64836248dda2 ("tracing: Add kprobe event command generation test module")
Signed-off-by: Masami Hiramatsu (Google) 
Reviewed-by: Steven Rostedt (Google) 
---
 kernel/trace/Kconfig |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 166ad5444eea..721c3b221048 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -1136,7 +1136,7 @@ config PREEMPTIRQ_DELAY_TEST
 
 config SYNTH_EVENT_GEN_TEST
tristate "Test module for in-kernel synthetic event generation"
-   depends on SYNTH_EVENTS
+   depends on SYNTH_EVENTS && m
help
   This option creates a test module to check the base
   functionality of in-kernel synthetic event definition and
@@ -1149,7 +1149,7 @@ config SYNTH_EVENT_GEN_TEST
 
 config KPROBE_EVENT_GEN_TEST
tristate "Test module for in-kernel kprobe event generation"
-   depends on KPROBE_EVENTS
+   depends on KPROBE_EVENTS && m
help
   This option creates a test module to check the base
   functionality of in-kernel kprobe event definition.




[PATCH v3 0/3] tracing: Fix some selftest issues

2024-06-11 Thread Masami Hiramatsu (Google)
Hi,

Here is v3 of a series of some fixes/cleanups for the test modules and
boot time selftest of kprobe events. The previous version is here;

https://lore.kernel.org/all/171805478534.52471.6269290579314514778.stgit@devnote2/

In this version, I updated the 2nd patch to integrate WARN_ON_ONCE() and
pr_warn() instead of removing WARN_ONCE() because this warning messages
are needed to ktest to handle errors.

Thank you,

---

Masami Hiramatsu (Google) (3):
  tracing: Build event generation tests only as modules
  tracing/kprobe: Integrate test warnings into WARN_ONCE
  tracing/kprobe: Remove cleanup code unrelated to selftest


 kernel/trace/Kconfig|4 ++-
 kernel/trace/trace_kprobe.c |   54 ++-
 2 files changed, 19 insertions(+), 39 deletions(-)

--
Masami Hiramatsu (Google) 



[PATCH 3/3] tracing/kprobe: Remove cleanup code unrelated to selftest

2024-06-10 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

This cleanup all kprobe events code is not related to the selftest
itself, and it can fail by the reason unrelated to this test.
If the test is successful, the generated events are cleaned up.
And if not, we cannot guarantee that the kprobe events will work
correctly. So, anyway, there is no need to clean it up.

Signed-off-by: Masami Hiramatsu (Google) 
---
 kernel/trace/trace_kprobe.c |5 -
 1 file changed, 5 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 4abed36544d0..f94628c15c14 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -2129,11 +2129,6 @@ static __init int kprobe_trace_self_tests_init(void)
}
 
 end:
-   ret = dyn_events_release_all(&trace_kprobe_ops);
-   if (ret) {
-   pr_warn("error on cleaning up probes.\n");
-   warn++;
-   }
/*
 * Wait for the optimizer work to finish. Otherwise it might fiddle
 * with probes in already freed __init text.




[PATCH 2/3] tracing/kprobe: Remove unneeded WARN_ON_ONCE() in selftests

2024-06-10 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Since the kprobe-events selftest shows OK or NG with the reason, the
WARN_ON_ONCE()s for each place are redundant. Let's remove it.

Signed-off-by: Masami Hiramatsu (Google) 
---
 kernel/trace/trace_kprobe.c |   26 +-
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 16383247bdbf..4abed36544d0 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -2023,18 +2023,18 @@ static __init int kprobe_trace_self_tests_init(void)
pr_info("Testing kprobe tracing: ");
 
ret = create_or_delete_trace_kprobe("p:testprobe 
kprobe_trace_selftest_target $stack $stack0 +0($stack)");
-   if (WARN_ON_ONCE(ret)) {
+   if (ret) {
pr_warn("error on probing function entry.\n");
warn++;
} else {
/* Enable trace point */
tk = find_trace_kprobe("testprobe", KPROBE_EVENT_SYSTEM);
-   if (WARN_ON_ONCE(tk == NULL)) {
+   if (tk == NULL) {
pr_warn("error on getting new probe.\n");
warn++;
} else {
file = find_trace_probe_file(tk, top_trace_array());
-   if (WARN_ON_ONCE(file == NULL)) {
+   if (file == NULL) {
pr_warn("error on getting probe file.\n");
warn++;
} else
@@ -2044,18 +2044,18 @@ static __init int kprobe_trace_self_tests_init(void)
}
 
ret = create_or_delete_trace_kprobe("r:testprobe2 
kprobe_trace_selftest_target $retval");
-   if (WARN_ON_ONCE(ret)) {
+   if (ret) {
pr_warn("error on probing function return.\n");
warn++;
} else {
/* Enable trace point */
tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM);
-   if (WARN_ON_ONCE(tk == NULL)) {
+   if (tk == NULL) {
pr_warn("error on getting 2nd new probe.\n");
warn++;
} else {
file = find_trace_probe_file(tk, top_trace_array());
-   if (WARN_ON_ONCE(file == NULL)) {
+   if (file == NULL) {
pr_warn("error on getting probe file.\n");
warn++;
} else
@@ -2079,7 +2079,7 @@ static __init int kprobe_trace_self_tests_init(void)
 
/* Disable trace points before removing it */
tk = find_trace_kprobe("testprobe", KPROBE_EVENT_SYSTEM);
-   if (WARN_ON_ONCE(tk == NULL)) {
+   if (tk == NULL) {
pr_warn("error on getting test probe.\n");
warn++;
} else {
@@ -2089,7 +2089,7 @@ static __init int kprobe_trace_self_tests_init(void)
}
 
file = find_trace_probe_file(tk, top_trace_array());
-   if (WARN_ON_ONCE(file == NULL)) {
+   if (file == NULL) {
pr_warn("error on getting probe file.\n");
warn++;
} else
@@ -2098,7 +2098,7 @@ static __init int kprobe_trace_self_tests_init(void)
}
 
tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM);
-   if (WARN_ON_ONCE(tk == NULL)) {
+   if (tk == NULL) {
pr_warn("error on getting 2nd test probe.\n");
warn++;
} else {
@@ -2108,7 +2108,7 @@ static __init int kprobe_trace_self_tests_init(void)
}
 
file = find_trace_probe_file(tk, top_trace_array());
-   if (WARN_ON_ONCE(file == NULL)) {
+   if (file == NULL) {
pr_warn("error on getting probe file.\n");
warn++;
} else
@@ -2117,20 +2117,20 @@ static __init int kprobe_trace_self_tests_init(void)
}
 
ret = create_or_delete_trace_kprobe("-:testprobe");
-   if (WARN_ON_ONCE(ret)) {
+   if (ret) {
pr_warn("error on deleting a probe.\n");
warn++;
}
 
ret = create_or_delete_trace_kprobe("-:testprobe2");
-   if (WARN_ON_ONCE(ret)) {
+   if (ret) {
pr_warn("error on deleting a probe.\n");
warn++;
}
 
 end:
ret = dyn_events_release_all(&trace_kprobe_ops);
-   if (WARN_ON_ONCE(ret)) {
+   if (ret) {
pr_warn("error on cleaning up probes.\n");
warn++;
}




[PATCH 1/3] tracing: Build event generation tests only as modules

2024-06-10 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

The kprobes and synth event generation test modules add events and lock
(get a reference) those event file reference in module init function,
and unlock and delete it in module exit function. This is because those
are designed for playing as modules.

If we make those modules as built-in, those events are left locked in the
kernel, and never be removed. This causes kprobe event self-test failure
as below.

[   97.349708] [ cut here ]
[   97.353453] WARNING: CPU: 3 PID: 1 at kernel/trace/trace_kprobe.c:2133 
kprobe_trace_self_tests_init+0x3f1/0x480
[   97.357106] Modules linked in:
[   97.358488] CPU: 3 PID: 1 Comm: swapper/0 Not tainted 
6.9.0-g699646734ab5-dirty #14
[   97.361556] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
1.15.0-1 04/01/2014
[   97.363880] RIP: 0010:kprobe_trace_self_tests_init+0x3f1/0x480
[   97.365538] Code: a8 24 08 82 e9 ae fd ff ff 90 0f 0b 90 48 c7 c7 e5 aa 0b 
82 e9 ee fc ff ff 90 0f 0b 90 48 c7 c7 2d 61 06 82 e9 8e fd ff ff 90 <0f> 0b 90 
48 c7 c7 33 0b 0c 82 89 c6 e8 6e 03 1f ff 41 ff c7 e9 90
[   97.370429] RSP: :c9013b50 EFLAGS: 00010286
[   97.371852] RAX: fff0 RBX: 888005919c00 RCX: 
[   97.373829] RDX: 888003f4 RSI: 8236a598 RDI: 888003f40a68
[   97.375715] RBP:  R08: 0001 R09: 
[   97.377675] R10: 811c9ae5 R11: 8120c4e0 R12: 
[   97.379591] R13: 0001 R14: 0015 R15: 
[   97.381536] FS:  () GS:88807dcc() 
knlGS:
[   97.383813] CS:  0010 DS:  ES:  CR0: 80050033
[   97.385449] CR2:  CR3: 02244000 CR4: 06b0
[   97.387347] DR0:  DR1:  DR2: 
[   97.389277] DR3:  DR6: fffe0ff0 DR7: 0400
[   97.391196] Call Trace:
[   97.391967]  
[   97.392647]  ? __warn+0xcc/0x180
[   97.393640]  ? kprobe_trace_self_tests_init+0x3f1/0x480
[   97.395181]  ? report_bug+0xbd/0x150
[   97.396234]  ? handle_bug+0x3e/0x60
[   97.397311]  ? exc_invalid_op+0x1a/0x50
[   97.398434]  ? asm_exc_invalid_op+0x1a/0x20
[   97.399652]  ? trace_kprobe_is_busy+0x20/0x20
[   97.400904]  ? tracing_reset_all_online_cpus+0x15/0x90
[   97.402304]  ? kprobe_trace_self_tests_init+0x3f1/0x480
[   97.403773]  ? init_kprobe_trace+0x50/0x50
[   97.404972]  do_one_initcall+0x112/0x240
[   97.406113]  do_initcall_level+0x95/0xb0
[   97.407286]  ? kernel_init+0x1a/0x1a0
[   97.408401]  do_initcalls+0x3f/0x70
[   97.409452]  kernel_init_freeable+0x16f/0x1e0
[   97.410662]  ? rest_init+0x1f0/0x1f0
[   97.411738]  kernel_init+0x1a/0x1a0
[   97.412788]  ret_from_fork+0x39/0x50
[   97.413817]  ? rest_init+0x1f0/0x1f0
[   97.414844]  ret_from_fork_asm+0x11/0x20
[   97.416285]  
[   97.417134] irq event stamp: 13437323
[   97.418376] hardirqs last  enabled at (13437337): [] 
console_unlock+0x11c/0x150
[   97.421285] hardirqs last disabled at (13437370): [] 
console_unlock+0x101/0x150
[   97.423838] softirqs last  enabled at (13437366): [] 
handle_softirqs+0x23f/0x2a0
[   97.426450] softirqs last disabled at (13437393): [] 
__irq_exit_rcu+0x66/0xd0
[   97.428850] ---[ end trace  ]---

And also, since we can not cleanup dynamic_event file, ftracetest are
failed too.

To avoid these issues, build these tests only as modules.

Fixes: 9fe41efaca08 ("tracing: Add synth event generation test module")
Fixes: 64836248dda2 ("tracing: Add kprobe event command generation test module")
Signed-off-by: Masami Hiramatsu (Google) 
---
 kernel/trace/Kconfig |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 166ad5444eea..721c3b221048 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -1136,7 +1136,7 @@ config PREEMPTIRQ_DELAY_TEST
 
 config SYNTH_EVENT_GEN_TEST
tristate "Test module for in-kernel synthetic event generation"
-   depends on SYNTH_EVENTS
+   depends on SYNTH_EVENTS && m
help
   This option creates a test module to check the base
   functionality of in-kernel synthetic event definition and
@@ -1149,7 +1149,7 @@ config SYNTH_EVENT_GEN_TEST
 
 config KPROBE_EVENT_GEN_TEST
tristate "Test module for in-kernel kprobe event generation"
-   depends on KPROBE_EVENTS
+   depends on KPROBE_EVENTS && m
help
   This option creates a test module to check the base
   functionality of in-kernel kprobe event definition.




[PATCH 0/3] tracing: Fix some selftest issues

2024-06-10 Thread Masami Hiramatsu (Google)
Hi,

Here is v2 of a series of some fixes/cleanups for the test modules and
boot time selftest of kprobe events. The previous version is here;

https://lore.kernel.org/all/171671825710.39694.6859036369216249956.stgit@devnote2/

In this version, I just update the description of the first patch to add
what bad things happen when the modules are built in.

I found a WARNING message with some boot time selftest configuration, which
came from the combination of embedded kprobe generate API tests module and
ftrace boot-time selftest. Since kprobe and synthetic event generation API
test modules add new events and lock it. Thus dynamic event remove-all
operation failes. This also causes all ftracetest failed because it tries
to cleanup all dynamic events before running test cases.

The main problem is that these modules should not be built-in. But I also
think this WARNING message is useless (because there are warning messages
already) and the cleanup code is redundant. This series fixes those issues.

Thank you,

---

Masami Hiramatsu (Google) (3):
  tracing: Build event generation tests only as modules
  tracing/kprobe: Remove unneeded WARN_ON_ONCE() in selftests
  tracing/kprobe: Remove cleanup code unrelated to selftest


 kernel/trace/Kconfig|4 ++--
 kernel/trace/trace_kprobe.c |   29 -
 2 files changed, 14 insertions(+), 19 deletions(-)

--
Masami Hiramatsu (Google) 



[PATCH v2 3/3] sefltests/tracing: Add a test for tracepoint events on modules

2024-06-01 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Add a test case for tracepoint events on modules. This checks if it can add
and remove the events correctly.

Signed-off-by: Masami Hiramatsu (Google) 
---
 tools/testing/selftests/ftrace/config  |1 +
 .../test.d/dynevent/add_remove_tprobe_module.tc|   34 
 2 files changed, 35 insertions(+)
 create mode 100644 
tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe_module.tc

diff --git a/tools/testing/selftests/ftrace/config 
b/tools/testing/selftests/ftrace/config
index 048a312abf40..544de0db5f58 100644
--- a/tools/testing/selftests/ftrace/config
+++ b/tools/testing/selftests/ftrace/config
@@ -20,6 +20,7 @@ CONFIG_PREEMPT_TRACER=y
 CONFIG_PROBE_EVENTS_BTF_ARGS=y
 CONFIG_SAMPLES=y
 CONFIG_SAMPLE_FTRACE_DIRECT=m
+CONFIG_SAMPLE_TRACE_EVENTS=m
 CONFIG_SAMPLE_TRACE_PRINTK=m
 CONFIG_SCHED_TRACER=y
 CONFIG_STACK_TRACER=y
diff --git 
a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe_module.tc 
b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe_module.tc
new file mode 100644
index ..2caed9454caa
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe_module.tc
@@ -0,0 +1,34 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Generic dynamic event - add/remove tracepoint probe events on 
module
+# requires: dynamic_events "t[:[/][]]  
[]":README
+
+rmmod trace-events-sample ||:
+if ! modprobe trace-events-sample ; then
+  echo "No trace-events sample module - please make 
CONFIG_SAMPLE_TRACE_EVENTS=m"
+  exit_unresolved;
+fi
+trap "rmmod trace-events-sample" EXIT
+
+echo 0 > events/enable
+echo > dynamic_events
+
+TRACEPOINT1=foo_bar
+TRACEPOINT2=foo_bar_with_cond
+
+echo "t:myevent1 $TRACEPOINT1" >> dynamic_events
+echo "t:myevent2 $TRACEPOINT2" >> dynamic_events
+
+grep -q myevent1 dynamic_events
+grep -q myevent2 dynamic_events
+test -d events/tracepoints/myevent1
+test -d events/tracepoints/myevent2
+
+echo "-:myevent2" >> dynamic_events
+
+grep -q myevent1 dynamic_events
+! grep -q myevent2 dynamic_events
+
+echo > dynamic_events
+
+clear_trace




[PATCH v2 2/3] tracing/fprobe: Support raw tracepoint events on modules

2024-06-01 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Support raw tracepoint event on module by fprobe events.
Since it only uses for_each_kernel_tracepoint() to find a tracepoint,
the tracepoints on modules are not handled. Thus if user specified a
tracepoint on a module, it shows an error.
This adds new for_each_module_tracepoint() API to tracepoint subsystem,
and uses it to find tracepoints on modules.

Reported-by: don 
Closes: 
https://lore.kernel.org/all/20240530215718.aeec973a1d0bf058d39cb...@kernel.org/
Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v2:
  - Fix build errors with CONFIG_MODULES=y.
---
 kernel/trace/trace_fprobe.c |   46 ---
 1 file changed, 38 insertions(+), 8 deletions(-)

diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
index 62e6a8f4aae9..1d8a983e1edc 100644
--- a/kernel/trace/trace_fprobe.c
+++ b/kernel/trace/trace_fprobe.c
@@ -385,6 +385,7 @@ static struct trace_fprobe *alloc_trace_fprobe(const char 
*group,
   const char *event,
   const char *symbol,
   struct tracepoint *tpoint,
+  struct module *mod,
   int maxactive,
   int nargs, bool is_return)
 {
@@ -405,6 +406,7 @@ static struct trace_fprobe *alloc_trace_fprobe(const char 
*group,
tf->fp.entry_handler = fentry_dispatcher;
 
tf->tpoint = tpoint;
+   tf->mod = mod;
tf->fp.nr_maxactive = maxactive;
 
ret = trace_probe_init(&tf->tp, event, group, false, nargs);
@@ -895,8 +897,23 @@ static struct notifier_block tracepoint_module_nb = {
 struct __find_tracepoint_cb_data {
const char *tp_name;
struct tracepoint *tpoint;
+   struct module *mod;
 };
 
+static void __find_tracepoint_module_cb(struct tracepoint *tp, void *priv)
+{
+   struct __find_tracepoint_cb_data *data = priv;
+
+   if (!data->tpoint && !strcmp(data->tp_name, tp->name)) {
+   data->tpoint = tp;
+   data->mod = __module_text_address((unsigned long)tp->probestub);
+   if (!try_module_get(data->mod)) {
+   data->tpoint = NULL;
+   data->mod = NULL;
+   }
+   }
+}
+
 static void __find_tracepoint_cb(struct tracepoint *tp, void *priv)
 {
struct __find_tracepoint_cb_data *data = priv;
@@ -905,14 +922,28 @@ static void __find_tracepoint_cb(struct tracepoint *tp, 
void *priv)
data->tpoint = tp;
 }
 
-static struct tracepoint *find_tracepoint(const char *tp_name)
+/*
+ * Find a tracepoint from kernel and module. If the tracepoint is in a module,
+ * this increments the module refcount to prevent unloading until the
+ * trace_fprobe is registered to the list. After registering the trace_fprobe
+ * on the trace_fprobe list, the module refcount is decremented because
+ * tracepoint_probe_module_cb will handle it.
+ */
+static struct tracepoint *find_tracepoint(const char *tp_name,
+ struct module **tp_mod)
 {
struct __find_tracepoint_cb_data data = {
.tp_name = tp_name,
+   .mod = NULL,
};
 
for_each_kernel_tracepoint(__find_tracepoint_cb, &data);
 
+   if (!data.tpoint && IS_ENABLED(CONFIG_MODULES)) {
+   for_each_module_tracepoint(__find_tracepoint_module_cb, &data);
+   *tp_mod = data.mod;
+   }
+
return data.tpoint;
 }
 
@@ -996,6 +1027,7 @@ static int __trace_fprobe_create(int argc, const char 
*argv[])
char abuf[MAX_BTF_ARGS_LEN];
char *dbuf = NULL;
bool is_tracepoint = false;
+   struct module *tp_mod = NULL;
struct tracepoint *tpoint = NULL;
struct traceprobe_parse_context ctx = {
.flags = TPARG_FL_KERNEL | TPARG_FL_FPROBE,
@@ -1080,7 +1112,7 @@ static int __trace_fprobe_create(int argc, const char 
*argv[])
 
if (is_tracepoint) {
ctx.flags |= TPARG_FL_TPOINT;
-   tpoint = find_tracepoint(symbol);
+   tpoint = find_tracepoint(symbol, &tp_mod);
if (!tpoint) {
trace_probe_log_set_index(1);
trace_probe_log_err(0, NO_TRACEPOINT);
@@ -1110,8 +1142,8 @@ static int __trace_fprobe_create(int argc, const char 
*argv[])
goto out;
 
/* setup a probe */
-   tf = alloc_trace_fprobe(group, event, symbol, tpoint, maxactive,
-   argc, is_return);
+   tf = alloc_trace_fprobe(group, event, symbol, tpoint, tp_mod,
+   maxactive, argc, is_return);
if (IS_ERR(tf)) {
ret = PTR_ERR(tf);
  

[PATCH v2 1/3] tracepoint: Support iterating over tracepoints on modules

2024-06-01 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Add for_each_module_tracepoint() for iterating over tracepoints
on modules. This is similar to the for_each_kernel_tracepoint()
but only for the tracepoints on modules (not including kernel
built-in tracepoints).

Signed-off-by: Masami Hiramatsu (Google) 
---
 include/linux/tracepoint.h |7 +++
 kernel/tracepoint.c|   16 
 2 files changed, 23 insertions(+)

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 689b6d71590e..46e6a5e759fd 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -65,6 +65,8 @@ struct tp_module {
 bool trace_module_has_bad_taint(struct module *mod);
 extern int register_tracepoint_module_notifier(struct notifier_block *nb);
 extern int unregister_tracepoint_module_notifier(struct notifier_block *nb);
+void for_each_module_tracepoint(void (*fct)(struct tracepoint *, void *),
+   void *priv);
 #else
 static inline bool trace_module_has_bad_taint(struct module *mod)
 {
@@ -80,6 +82,11 @@ int unregister_tracepoint_module_notifier(struct 
notifier_block *nb)
 {
return 0;
 }
+static inline
+void for_each_module_tracepoint(void (*fct)(struct tracepoint *, void *),
+   void *priv)
+{
+}
 #endif /* CONFIG_MODULES */
 
 /*
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 8d1507dd0724..b9b90dc46ab1 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -735,6 +735,22 @@ static __init int init_tracepoints(void)
return ret;
 }
 __initcall(init_tracepoints);
+
+void for_each_module_tracepoint(void (*fct)(struct tracepoint *tp, void *priv),
+   void *priv)
+{
+   struct tp_module *tp_mod;
+   struct module *mod;
+
+   mutex_lock(&tracepoint_module_list_mutex);
+   list_for_each_entry(tp_mod, &tracepoint_module_list, list) {
+   mod = tp_mod->mod;
+   for_each_tracepoint_range(mod->tracepoints_ptrs,
+   mod->tracepoints_ptrs + mod->num_tracepoints,
+   fct, priv);
+   }
+   mutex_unlock(&tracepoint_module_list_mutex);
+}
 #endif /* CONFIG_MODULES */
 
 /**




[PATCH v2 0/3] tracing/probes: Support tracepoint events on modules

2024-06-01 Thread Masami Hiramatsu (Google)
Hi,

This series implements the tracepoint events on modules.
This version separates a patch for tracepoint subsystem from
fprobe-event patch, and adds a selftests for tracepoint
events on modules.

Thank you,

---

Masami Hiramatsu (Google) (3):
  tracepoint: Support iterating over tracepoints on modules
  tracing/fprobe: Support raw tracepoint events on modules
  sefltests/tracing: Add a test for tracepoint events on modules


 include/linux/tracepoint.h |7 +++
 kernel/trace/trace_fprobe.c|   46 +---
 kernel/tracepoint.c|   16 +++
 tools/testing/selftests/ftrace/config  |1 
 .../test.d/dynevent/add_remove_tprobe_module.tc|   34 +++
 5 files changed, 96 insertions(+), 8 deletions(-)
 create mode 100644 
tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe_module.tc

--
Masami Hiramatsu (Google) 



[PATCH] tracing/fprobe: Support raw tracepoint events on modules

2024-05-31 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Support raw tracepoint event on module by fprobe events.
Since it only uses for_each_kernel_tracepoint() to find a tracepoint,
the tracepoints on modules are not handled. Thus if user specified a
tracepoint on a module, it shows an error.
This adds new for_each_module_tracepoint() API to tracepoint subsystem,
and uses it to find tracepoints on modules.

Reported-by: don 
Closes: 
https://lore.kernel.org/all/20240530215718.aeec973a1d0bf058d39cb...@kernel.org/
Signed-off-by: Masami Hiramatsu (Google) 
---
 include/linux/tracepoint.h  |7 +++
 kernel/trace/trace_fprobe.c |   46 ---
 kernel/tracepoint.c |   19 ++
 3 files changed, 64 insertions(+), 8 deletions(-)

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 689b6d71590e..46e6a5e759fd 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -65,6 +65,8 @@ struct tp_module {
 bool trace_module_has_bad_taint(struct module *mod);
 extern int register_tracepoint_module_notifier(struct notifier_block *nb);
 extern int unregister_tracepoint_module_notifier(struct notifier_block *nb);
+void for_each_module_tracepoint(void (*fct)(struct tracepoint *, void *),
+   void *priv);
 #else
 static inline bool trace_module_has_bad_taint(struct module *mod)
 {
@@ -80,6 +82,11 @@ int unregister_tracepoint_module_notifier(struct 
notifier_block *nb)
 {
return 0;
 }
+static inline
+void for_each_module_tracepoint(void (*fct)(struct tracepoint *, void *),
+   void *priv)
+{
+}
 #endif /* CONFIG_MODULES */
 
 /*
diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
index 62e6a8f4aae9..1d8a983e1edc 100644
--- a/kernel/trace/trace_fprobe.c
+++ b/kernel/trace/trace_fprobe.c
@@ -385,6 +385,7 @@ static struct trace_fprobe *alloc_trace_fprobe(const char 
*group,
   const char *event,
   const char *symbol,
   struct tracepoint *tpoint,
+  struct module *mod,
   int maxactive,
   int nargs, bool is_return)
 {
@@ -405,6 +406,7 @@ static struct trace_fprobe *alloc_trace_fprobe(const char 
*group,
tf->fp.entry_handler = fentry_dispatcher;
 
tf->tpoint = tpoint;
+   tf->mod = mod;
tf->fp.nr_maxactive = maxactive;
 
ret = trace_probe_init(&tf->tp, event, group, false, nargs);
@@ -895,8 +897,23 @@ static struct notifier_block tracepoint_module_nb = {
 struct __find_tracepoint_cb_data {
const char *tp_name;
struct tracepoint *tpoint;
+   struct module *mod;
 };
 
+static void __find_tracepoint_module_cb(struct tracepoint *tp, void *priv)
+{
+   struct __find_tracepoint_cb_data *data = priv;
+
+   if (!data->tpoint && !strcmp(data->tp_name, tp->name)) {
+   data->tpoint = tp;
+   data->mod = __module_text_address((unsigned long)tp->probestub);
+   if (!try_module_get(data->mod)) {
+   data->tpoint = NULL;
+   data->mod = NULL;
+   }
+   }
+}
+
 static void __find_tracepoint_cb(struct tracepoint *tp, void *priv)
 {
struct __find_tracepoint_cb_data *data = priv;
@@ -905,14 +922,28 @@ static void __find_tracepoint_cb(struct tracepoint *tp, 
void *priv)
data->tpoint = tp;
 }
 
-static struct tracepoint *find_tracepoint(const char *tp_name)
+/*
+ * Find a tracepoint from kernel and module. If the tracepoint is in a module,
+ * this increments the module refcount to prevent unloading until the
+ * trace_fprobe is registered to the list. After registering the trace_fprobe
+ * on the trace_fprobe list, the module refcount is decremented because
+ * tracepoint_probe_module_cb will handle it.
+ */
+static struct tracepoint *find_tracepoint(const char *tp_name,
+ struct module **tp_mod)
 {
struct __find_tracepoint_cb_data data = {
.tp_name = tp_name,
+   .mod = NULL,
};
 
for_each_kernel_tracepoint(__find_tracepoint_cb, &data);
 
+   if (!data.tpoint && IS_ENABLED(CONFIG_MODULES)) {
+   for_each_module_tracepoint(__find_tracepoint_module_cb, &data);
+   *tp_mod = data.mod;
+   }
+
return data.tpoint;
 }
 
@@ -996,6 +1027,7 @@ static int __trace_fprobe_create(int argc, const char 
*argv[])
char abuf[MAX_BTF_ARGS_LEN];
char *dbuf = NULL;
bool is_tracepoint = false;
+   struct module *tp_mod = NULL;
struct tracepoint *tpoint = NULL;
struct traceprobe_parse_context ctx = {
.fl

[PATCH 3/3] tracing/kprobe: Remove cleanup code unrelated to selftest

2024-05-26 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

This cleanup all kprobe events code is not related to the selftest
itself, and it can fail by the reason unrelated to this test.
If the test is successful, the generated events are cleaned up.
And if not, we cannot guarantee that the kprobe events will work
correctly. So, anyway, there is no need to clean it up.

Signed-off-by: Masami Hiramatsu (Google) 
---
 kernel/trace/trace_kprobe.c |5 -
 1 file changed, 5 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 4abed36544d0..f94628c15c14 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -2129,11 +2129,6 @@ static __init int kprobe_trace_self_tests_init(void)
}
 
 end:
-   ret = dyn_events_release_all(&trace_kprobe_ops);
-   if (ret) {
-   pr_warn("error on cleaning up probes.\n");
-   warn++;
-   }
/*
 * Wait for the optimizer work to finish. Otherwise it might fiddle
 * with probes in already freed __init text.




[PATCH 2/3] tracing/kprobe: Remove unneeded WARN_ON_ONCE() in selftests

2024-05-26 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Since the kprobe-events selftest shows OK or NG with the reason, the
WARN_ON_ONCE()s for each place are redundant. Let's remove it.

Signed-off-by: Masami Hiramatsu (Google) 
---
 kernel/trace/trace_kprobe.c |   26 +-
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 16383247bdbf..4abed36544d0 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -2023,18 +2023,18 @@ static __init int kprobe_trace_self_tests_init(void)
pr_info("Testing kprobe tracing: ");
 
ret = create_or_delete_trace_kprobe("p:testprobe 
kprobe_trace_selftest_target $stack $stack0 +0($stack)");
-   if (WARN_ON_ONCE(ret)) {
+   if (ret) {
pr_warn("error on probing function entry.\n");
warn++;
} else {
/* Enable trace point */
tk = find_trace_kprobe("testprobe", KPROBE_EVENT_SYSTEM);
-   if (WARN_ON_ONCE(tk == NULL)) {
+   if (tk == NULL) {
pr_warn("error on getting new probe.\n");
warn++;
} else {
file = find_trace_probe_file(tk, top_trace_array());
-   if (WARN_ON_ONCE(file == NULL)) {
+   if (file == NULL) {
pr_warn("error on getting probe file.\n");
warn++;
} else
@@ -2044,18 +2044,18 @@ static __init int kprobe_trace_self_tests_init(void)
}
 
ret = create_or_delete_trace_kprobe("r:testprobe2 
kprobe_trace_selftest_target $retval");
-   if (WARN_ON_ONCE(ret)) {
+   if (ret) {
pr_warn("error on probing function return.\n");
warn++;
} else {
/* Enable trace point */
tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM);
-   if (WARN_ON_ONCE(tk == NULL)) {
+   if (tk == NULL) {
pr_warn("error on getting 2nd new probe.\n");
warn++;
} else {
file = find_trace_probe_file(tk, top_trace_array());
-   if (WARN_ON_ONCE(file == NULL)) {
+   if (file == NULL) {
pr_warn("error on getting probe file.\n");
warn++;
} else
@@ -2079,7 +2079,7 @@ static __init int kprobe_trace_self_tests_init(void)
 
/* Disable trace points before removing it */
tk = find_trace_kprobe("testprobe", KPROBE_EVENT_SYSTEM);
-   if (WARN_ON_ONCE(tk == NULL)) {
+   if (tk == NULL) {
pr_warn("error on getting test probe.\n");
warn++;
} else {
@@ -2089,7 +2089,7 @@ static __init int kprobe_trace_self_tests_init(void)
}
 
file = find_trace_probe_file(tk, top_trace_array());
-   if (WARN_ON_ONCE(file == NULL)) {
+   if (file == NULL) {
pr_warn("error on getting probe file.\n");
warn++;
} else
@@ -2098,7 +2098,7 @@ static __init int kprobe_trace_self_tests_init(void)
}
 
tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM);
-   if (WARN_ON_ONCE(tk == NULL)) {
+   if (tk == NULL) {
pr_warn("error on getting 2nd test probe.\n");
warn++;
} else {
@@ -2108,7 +2108,7 @@ static __init int kprobe_trace_self_tests_init(void)
}
 
file = find_trace_probe_file(tk, top_trace_array());
-   if (WARN_ON_ONCE(file == NULL)) {
+   if (file == NULL) {
pr_warn("error on getting probe file.\n");
warn++;
} else
@@ -2117,20 +2117,20 @@ static __init int kprobe_trace_self_tests_init(void)
}
 
ret = create_or_delete_trace_kprobe("-:testprobe");
-   if (WARN_ON_ONCE(ret)) {
+   if (ret) {
pr_warn("error on deleting a probe.\n");
warn++;
}
 
ret = create_or_delete_trace_kprobe("-:testprobe2");
-   if (WARN_ON_ONCE(ret)) {
+   if (ret) {
pr_warn("error on deleting a probe.\n");
warn++;
}
 
 end:
ret = dyn_events_release_all(&trace_kprobe_ops);
-   if (WARN_ON_ONCE(ret)) {
+   if (ret) {
pr_warn("error on cleaning up probes.\n");
warn++;
}




[PATCH 1/3] tracing: Build event generation tests only as modules

2024-05-26 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Since the kprobes and synth event generation tests adds and enable
generated events in init_module() and delete it in exit_module(),
if we make it as built-in, those events are left in kernel and cause
kprobe event self-test failure.

[   97.349708] [ cut here ]
[   97.353453] WARNING: CPU: 3 PID: 1 at kernel/trace/trace_kprobe.c:2133 
kprobe_trace_self_tests_init+0x3f1/0x480
[   97.357106] Modules linked in:
[   97.358488] CPU: 3 PID: 1 Comm: swapper/0 Not tainted 
6.9.0-g699646734ab5-dirty #14
[   97.361556] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
1.15.0-1 04/01/2014
[   97.363880] RIP: 0010:kprobe_trace_self_tests_init+0x3f1/0x480
[   97.365538] Code: a8 24 08 82 e9 ae fd ff ff 90 0f 0b 90 48 c7 c7 e5 aa 0b 
82 e9 ee fc ff ff 90 0f 0b 90 48 c7 c7 2d 61 06 82 e9 8e fd ff ff 90 <0f> 0b 90 
48 c7 c7 33 0b 0c 82 89 c6 e8 6e 03 1f ff 41 ff c7 e9 90
[   97.370429] RSP: :c9013b50 EFLAGS: 00010286
[   97.371852] RAX: fff0 RBX: 888005919c00 RCX: 
[   97.373829] RDX: 888003f4 RSI: 8236a598 RDI: 888003f40a68
[   97.375715] RBP:  R08: 0001 R09: 
[   97.377675] R10: 811c9ae5 R11: 8120c4e0 R12: 
[   97.379591] R13: 0001 R14: 0015 R15: 
[   97.381536] FS:  () GS:88807dcc() 
knlGS:
[   97.383813] CS:  0010 DS:  ES:  CR0: 80050033
[   97.385449] CR2:  CR3: 02244000 CR4: 06b0
[   97.387347] DR0:  DR1:  DR2: 
[   97.389277] DR3:  DR6: fffe0ff0 DR7: 0400
[   97.391196] Call Trace:
[   97.391967]  
[   97.392647]  ? __warn+0xcc/0x180
[   97.393640]  ? kprobe_trace_self_tests_init+0x3f1/0x480
[   97.395181]  ? report_bug+0xbd/0x150
[   97.396234]  ? handle_bug+0x3e/0x60
[   97.397311]  ? exc_invalid_op+0x1a/0x50
[   97.398434]  ? asm_exc_invalid_op+0x1a/0x20
[   97.399652]  ? trace_kprobe_is_busy+0x20/0x20
[   97.400904]  ? tracing_reset_all_online_cpus+0x15/0x90
[   97.402304]  ? kprobe_trace_self_tests_init+0x3f1/0x480
[   97.403773]  ? init_kprobe_trace+0x50/0x50
[   97.404972]  do_one_initcall+0x112/0x240
[   97.406113]  do_initcall_level+0x95/0xb0
[   97.407286]  ? kernel_init+0x1a/0x1a0
[   97.408401]  do_initcalls+0x3f/0x70
[   97.409452]  kernel_init_freeable+0x16f/0x1e0
[   97.410662]  ? rest_init+0x1f0/0x1f0
[   97.411738]  kernel_init+0x1a/0x1a0
[   97.412788]  ret_from_fork+0x39/0x50
[   97.413817]  ? rest_init+0x1f0/0x1f0
[   97.414844]  ret_from_fork_asm+0x11/0x20
[   97.416285]  
[   97.417134] irq event stamp: 13437323
[   97.418376] hardirqs last  enabled at (13437337): [] 
console_unlock+0x11c/0x150
[   97.421285] hardirqs last disabled at (13437370): [] 
console_unlock+0x101/0x150
[   97.423838] softirqs last  enabled at (13437366): [] 
handle_softirqs+0x23f/0x2a0
[   97.426450] softirqs last disabled at (13437393): [] 
__irq_exit_rcu+0x66/0xd0
[   97.428850] ---[ end trace  ]---

To avoid this issue, build these tests only as modules.

Fixes: 9fe41efaca08 ("tracing: Add synth event generation test module")
Fixes: 64836248dda2 ("tracing: Add kprobe event command generation test module")
Signed-off-by: Masami Hiramatsu (Google) 
---
 kernel/trace/Kconfig |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 166ad5444eea..721c3b221048 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -1136,7 +1136,7 @@ config PREEMPTIRQ_DELAY_TEST
 
 config SYNTH_EVENT_GEN_TEST
tristate "Test module for in-kernel synthetic event generation"
-   depends on SYNTH_EVENTS
+   depends on SYNTH_EVENTS && m
help
   This option creates a test module to check the base
   functionality of in-kernel synthetic event definition and
@@ -1149,7 +1149,7 @@ config SYNTH_EVENT_GEN_TEST
 
 config KPROBE_EVENT_GEN_TEST
tristate "Test module for in-kernel kprobe event generation"
-   depends on KPROBE_EVENTS
+   depends on KPROBE_EVENTS && m
help
   This option creates a test module to check the base
   functionality of in-kernel kprobe event definition.




[PATCH 0/3] tracing: Fix some selftest issues

2024-05-26 Thread Masami Hiramatsu (Google)
Hi,

Here is a series of some fixes/improvements for the test modules and boot
time selftest of kprobe events. I found a WARNING message with some boot 
time selftest configuration, which came from the combination of embedded
kprobe generate API tests module and ftrace boot-time selftest. So the main
problem is that the test module should not be built-in. But I also think
this WARNING message is useless (because there are warning messages already)
and the cleanup code is redundant. This series fixes those issues.

Thank you,

---

Masami Hiramatsu (Google) (3):
  tracing: Build event generation tests only as modules
  tracing/kprobe: Remove unneeded WARN_ON_ONCE() in selftests
  tracing/kprobe: Remove cleanup code unrelated to selftest


 kernel/trace/Kconfig|4 ++--
 kernel/trace/trace_kprobe.c |   29 -
 2 files changed, 14 insertions(+), 19 deletions(-)

--
Masami Hiramatsu (Google) 



[PATCH v10 36/36] fgraph: Skip recording calltime/rettime if it is not nneeded

2024-05-07 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Skip recording calltime and rettime if the fgraph_ops does not need it.
This is a kind of performance optimization for fprobe. Since the fprobe
user does not use these entries, recording timestamp in fgraph is just
a overhead (e.g. eBPF, ftrace). So introduce the skip_timestamp flag,
and all fgraph_ops sets this flag, skip recording calltime and rettime.

Suggested-by: Jiri Olsa 
Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v10:
  - Add likely() to skipping timestamp.
 Changes in v9:
  - Newly added.
---
 include/linux/ftrace.h |2 ++
 kernel/trace/fgraph.c  |   51 +---
 kernel/trace/fprobe.c  |1 +
 3 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 64ca91d1527f..eb9de9d70829 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1156,6 +1156,8 @@ struct fgraph_ops {
struct ftrace_ops   ops; /* for the hash lists */
void*private;
int idx;
+   /* If skip_timestamp is true, this does not record timestamps. */
+   boolskip_timestamp;
 };
 
 void *fgraph_reserve_data(int idx, int size_bytes);
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 40f47fcbc6c3..13b41485ce49 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -138,6 +138,7 @@ DEFINE_STATIC_KEY_FALSE(kill_ftrace_graph);
 int ftrace_graph_active;
 
 static struct fgraph_ops *fgraph_array[FGRAPH_ARRAY_SIZE];
+static bool fgraph_skip_timestamp;
 
 /* LRU index table for fgraph_array */
 static int fgraph_lru_table[FGRAPH_ARRAY_SIZE];
@@ -483,7 +484,7 @@ void ftrace_graph_stop(void)
 static int
 ftrace_push_return_trace(unsigned long ret, unsigned long func,
 unsigned long frame_pointer, unsigned long *retp,
-int fgraph_idx)
+int fgraph_idx, bool skip_ts)
 {
struct ftrace_ret_stack *ret_stack;
unsigned long long calltime;
@@ -506,8 +507,12 @@ ftrace_push_return_trace(unsigned long ret, unsigned long 
func,
ret_stack = get_ret_stack(current, current->curr_ret_stack, &offset);
if (ret_stack && ret_stack->func == func &&
get_fgraph_type(current, offset + FGRAPH_FRAME_OFFSET) == 
FGRAPH_TYPE_BITMAP &&
-   !is_fgraph_index_set(current, offset + FGRAPH_FRAME_OFFSET, 
fgraph_idx))
+   !is_fgraph_index_set(current, offset + FGRAPH_FRAME_OFFSET, 
fgraph_idx)) {
+   /* If previous one skips calltime, update it. */
+   if (!skip_ts && !ret_stack->calltime)
+   ret_stack->calltime = trace_clock_local();
return offset + FGRAPH_FRAME_OFFSET;
+   }
 
val = (FGRAPH_TYPE_RESERVED << FGRAPH_TYPE_SHIFT) | FGRAPH_FRAME_OFFSET;
 
@@ -525,7 +530,11 @@ ftrace_push_return_trace(unsigned long ret, unsigned long 
func,
return -EBUSY;
}
 
-   calltime = trace_clock_local();
+   /* This is not really 'likely' but for keeping the least path to be 
faster. */
+   if (likely(skip_ts))
+   calltime = 0LL;
+   else
+   calltime = trace_clock_local();
 
offset = READ_ONCE(current->curr_ret_stack);
ret_stack = RET_STACK(current, offset);
@@ -609,7 +618,8 @@ int function_graph_enter_regs(unsigned long ret, unsigned 
long func,
trace.func = func;
trace.depth = ++current->curr_ret_depth;
 
-   offset = ftrace_push_return_trace(ret, func, frame_pointer, retp, 0);
+   offset = ftrace_push_return_trace(ret, func, frame_pointer, retp, 0,
+ fgraph_skip_timestamp);
if (offset < 0)
goto out;
 
@@ -662,7 +672,8 @@ int function_graph_enter_ops(unsigned long ret, unsigned 
long func,
return -ENODEV;
 
/* Use start for the distance to ret_stack (skipping over reserve) */
-   offset = ftrace_push_return_trace(ret, func, frame_pointer, retp, 
gops->idx);
+   offset = ftrace_push_return_trace(ret, func, frame_pointer, retp, 
gops->idx,
+ gops->skip_timestamp);
if (offset < 0)
return offset;
type = get_fgraph_type(current, offset);
@@ -740,6 +751,7 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, 
unsigned long *ret,
*ret = ret_stack->ret;
trace->func = ret_stack->func;
trace->calltime = ret_stack->calltime;
+   trace->rettime = 0;
trace->overrun = atomic_read(¤t->trace_overrun);
trace->depth = current->curr_ret_depth;
/*
@@ -800,7 +812,6 @@ __ftrace_return_to_handler(struct ftrace_regs *fregs, 
unsigned long frame_pointe

[PATCH v10 35/36] Documentation: probes: Update fprobe on function-graph tracer

2024-05-07 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Update fprobe documentation for the new fprobe on function-graph
tracer. This includes some bahvior changes and pt_regs to
ftrace_regs interface change.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v2:
  - Update @fregs parameter explanation.
---
 Documentation/trace/fprobe.rst |   42 ++--
 1 file changed, 27 insertions(+), 15 deletions(-)

diff --git a/Documentation/trace/fprobe.rst b/Documentation/trace/fprobe.rst
index 196f52386aaa..f58bdc64504f 100644
--- a/Documentation/trace/fprobe.rst
+++ b/Documentation/trace/fprobe.rst
@@ -9,9 +9,10 @@ Fprobe - Function entry/exit probe
 Introduction
 
 
-Fprobe is a function entry/exit probe mechanism based on ftrace.
-Instead of using ftrace full feature, if you only want to attach callbacks
-on function entry and exit, similar to the kprobes and kretprobes, you can
+Fprobe is a function entry/exit probe mechanism based on the function-graph
+tracer.
+Instead of tracing all functions, if you want to attach callbacks on specific
+function entry and exit, similar to the kprobes and kretprobes, you can
 use fprobe. Compared with kprobes and kretprobes, fprobe gives faster
 instrumentation for multiple functions with single handler. This document
 describes how to use fprobe.
@@ -91,12 +92,14 @@ The prototype of the entry/exit callback function are as 
follows:
 
 .. code-block:: c
 
- int entry_callback(struct fprobe *fp, unsigned long entry_ip, unsigned long 
ret_ip, struct pt_regs *regs, void *entry_data);
+ int entry_callback(struct fprobe *fp, unsigned long entry_ip, unsigned long 
ret_ip, struct ftrace_regs *fregs, void *entry_data);
 
- void exit_callback(struct fprobe *fp, unsigned long entry_ip, unsigned long 
ret_ip, struct pt_regs *regs, void *entry_data);
+ void exit_callback(struct fprobe *fp, unsigned long entry_ip, unsigned long 
ret_ip, struct ftrace_regs *fregs, void *entry_data);
 
-Note that the @entry_ip is saved at function entry and passed to exit handler.
-If the entry callback function returns !0, the corresponding exit callback 
will be cancelled.
+Note that the @entry_ip is saved at function entry and passed to exit
+handler.
+If the entry callback function returns !0, the corresponding exit callback
+will be cancelled.
 
 @fp
 This is the address of `fprobe` data structure related to this handler.
@@ -112,12 +115,10 @@ If the entry callback function returns !0, the 
corresponding exit callback will
 This is the return address that the traced function will return to,
 somewhere in the caller. This can be used at both entry and exit.
 
-@regs
-This is the `pt_regs` data structure at the entry and exit. Note that
-the instruction pointer of @regs may be different from the @entry_ip
-in the entry_handler. If you need traced instruction pointer, you need
-to use @entry_ip. On the other hand, in the exit_handler, the 
instruction
-pointer of @regs is set to the current return address.
+@fregs
+This is the `ftrace_regs` data structure at the entry and exit. This
+includes the function parameters, or the return values. So user can
+access thos values via appropriate `ftrace_regs_*` APIs.
 
 @entry_data
 This is a local storage to share the data between entry and exit 
handlers.
@@ -125,6 +126,17 @@ If the entry callback function returns !0, the 
corresponding exit callback will
 and `entry_data_size` field when registering the fprobe, the storage is
 allocated and passed to both `entry_handler` and `exit_handler`.
 
+Entry data size and exit handlers on the same function
+==
+
+Since the entry data is passed via per-task stack and it is has limited size,
+the entry data size per probe is limited to `15 * sizeof(long)`. You also need
+to take care that the different fprobes are probing on the same function, this
+limit becomes smaller. The entry data size is aligned to `sizeof(long)` and
+each fprobe which has exit handler uses a `sizeof(long)` space on the stack,
+you should keep the number of fprobes on the same function as small as
+possible.
+
 Share the callbacks with kprobes
 
 
@@ -165,8 +177,8 @@ This counter counts up when;
  - fprobe fails to take ftrace_recursion lock. This usually means that a 
function
which is traced by other ftrace users is called from the entry_handler.
 
- - fprobe fails to setup the function exit because of the shortage of rethook
-   (the shadow stack for hooking the function return.)
+ - fprobe fails to setup the function exit because of failing to allocate the
+   data buffer from the per-task shadow stack.
 
 The `fprobe::nmissed` field counts up in both cases. Therefore, the former
 skips both of entry and exit callback and the latter skips the exit




[PATCH v10 17/36] function_graph: Move graph notrace bit to shadow stack global var

2024-05-07 Thread Masami Hiramatsu (Google)
From: Steven Rostedt (VMware) 

The use of the task->trace_recursion for the logic used for the function
graph no-trace was a bit of an abuse of that variable. Now that there
exists global vars that are per stack for registered graph traces, use
that instead.

Signed-off-by: Steven Rostedt (VMware) 
Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v2:
  - Make description lines shorter than 76 chars.
---
 include/linux/trace_recursion.h  |7 ---
 kernel/trace/trace.h |9 +
 kernel/trace/trace_functions_graph.c |   10 ++
 3 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h
index fdfb6f66718a..ae04054a1be3 100644
--- a/include/linux/trace_recursion.h
+++ b/include/linux/trace_recursion.h
@@ -44,13 +44,6 @@ enum {
  */
TRACE_IRQ_BIT,
 
-   /*
-* To implement set_graph_notrace, if this bit is set, we ignore
-* function graph tracing of called functions, until the return
-* function is called to clear it.
-*/
-   TRACE_GRAPH_NOTRACE_BIT,
-
/* Used to prevent recursion recording from recursing. */
TRACE_RECORD_RECURSION_BIT,
 };
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 7ab731b9ebc8..f23b6fbd547d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -918,8 +918,17 @@ enum {
 
TRACE_GRAPH_DEPTH_START_BIT,
TRACE_GRAPH_DEPTH_END_BIT,
+
+   /*
+* To implement set_graph_notrace, if this bit is set, we ignore
+* function graph tracing of called functions, until the return
+* function is called to clear it.
+*/
+   TRACE_GRAPH_NOTRACE_BIT,
 };
 
+#define TRACE_GRAPH_NOTRACE(1 << TRACE_GRAPH_NOTRACE_BIT)
+
 static inline unsigned long ftrace_graph_depth(unsigned long *task_var)
 {
return (*task_var >> TRACE_GRAPH_DEPTH_START_BIT) & 3;
diff --git a/kernel/trace/trace_functions_graph.c 
b/kernel/trace/trace_functions_graph.c
index 66cce73e94f8..13d0387ac6a6 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -130,6 +130,7 @@ static inline int ftrace_graph_ignore_irqs(void)
 int trace_graph_entry(struct ftrace_graph_ent *trace,
  struct fgraph_ops *gops)
 {
+   unsigned long *task_var = fgraph_get_task_var(gops);
struct trace_array *tr = gops->private;
struct trace_array_cpu *data;
unsigned long flags;
@@ -138,7 +139,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace,
int ret;
int cpu;
 
-   if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT))
+   if (*task_var & TRACE_GRAPH_NOTRACE)
return 0;
 
/*
@@ -149,7 +150,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace,
 * returning from the function.
 */
if (ftrace_graph_notrace_addr(trace->func)) {
-   trace_recursion_set(TRACE_GRAPH_NOTRACE_BIT);
+   *task_var |= TRACE_GRAPH_NOTRACE_BIT;
/*
 * Need to return 1 to have the return called
 * that will clear the NOTRACE bit.
@@ -240,6 +241,7 @@ void __trace_graph_return(struct trace_array *tr,
 void trace_graph_return(struct ftrace_graph_ret *trace,
struct fgraph_ops *gops)
 {
+   unsigned long *task_var = fgraph_get_task_var(gops);
struct trace_array *tr = gops->private;
struct trace_array_cpu *data;
unsigned long flags;
@@ -249,8 +251,8 @@ void trace_graph_return(struct ftrace_graph_ret *trace,
 
ftrace_graph_addr_finish(gops, trace);
 
-   if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT)) {
-   trace_recursion_clear(TRACE_GRAPH_NOTRACE_BIT);
+   if (*task_var & TRACE_GRAPH_NOTRACE) {
+   *task_var &= ~TRACE_GRAPH_NOTRACE;
return;
}
 




[PATCH v10 34/36] selftests/ftrace: Add a test case for repeating register/unregister fprobe

2024-05-07 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

This test case repeats define and undefine the fprobe dynamic event to
ensure that the fprobe does not cause any issue with such operations.

Signed-off-by: Masami Hiramatsu (Google) 
---
 .../test.d/dynevent/add_remove_fprobe_repeat.tc|   19 +++
 1 file changed, 19 insertions(+)
 create mode 100644 
tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc

diff --git 
a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc 
b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc
new file mode 100644
index ..b4ad09237e2a
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc
@@ -0,0 +1,19 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Generic dynamic event - Repeating add/remove fprobe events
+# requires: dynamic_events "f[:[/][]] [%return] 
[]":README
+
+echo 0 > events/enable
+echo > dynamic_events
+
+PLACE=$FUNCTION_FORK
+REPEAT_TIMES=64
+
+for i in `seq 1 $REPEAT_TIMES`; do
+  echo "f:myevent $PLACE" >> dynamic_events
+  grep -q myevent dynamic_events
+  test -d events/fprobes/myevent
+  echo > dynamic_events
+done
+
+clear_trace




[PATCH v10 33/36] selftests: ftrace: Remove obsolate maxactive syntax check

2024-05-07 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Since the fprobe event does not support maxactive anymore, stop
testing the maxactive syntax error checking.

Signed-off-by: Masami Hiramatsu (Google) 
---
 .../ftrace/test.d/dynevent/fprobe_syntax_errors.tc |4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git 
a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc 
b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
index 61877d166451..c9425a34fae3 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
@@ -16,9 +16,7 @@ aarch64)
   REG=%r0 ;;
 esac
 
-check_error 'f^100 vfs_read'   # MAXACT_NO_KPROBE
-check_error 'f^1a111 vfs_read' # BAD_MAXACT
-check_error 'f^10 vfs_read'# MAXACT_TOO_BIG
+check_error 'f^100 vfs_read'   # BAD_MAXACT
 
 check_error 'f ^non_exist_func'# BAD_PROBE_ADDR (enoent)
 check_error 'f ^vfs_read+10'   # BAD_PROBE_ADDR




[PATCH v10 32/36] tracing/fprobe: Remove nr_maxactive from fprobe

2024-05-07 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Remove depercated fprobe::nr_maxactive. This involves fprobe events to
rejects the maxactive number.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v2:
  - Newly added.
---
 include/linux/fprobe.h  |2 --
 kernel/trace/trace_fprobe.c |   44 ++-
 2 files changed, 6 insertions(+), 40 deletions(-)

diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h
index 2d06bbd99601..a86b3e4df2a0 100644
--- a/include/linux/fprobe.h
+++ b/include/linux/fprobe.h
@@ -54,7 +54,6 @@ struct fprobe_hlist {
  * @nmissed: The counter for missing events.
  * @flags: The status flag.
  * @entry_data_size: The private data storage size.
- * @nr_maxactive: The max number of active functions. (*deprecated)
  * @entry_handler: The callback function for function entry.
  * @exit_handler: The callback function for function exit.
  * @hlist_array: The fprobe_hlist for fprobe search from IP hash table.
@@ -63,7 +62,6 @@ struct fprobe {
unsigned long   nmissed;
unsigned intflags;
size_t  entry_data_size;
-   int nr_maxactive;
 
fprobe_entry_cb entry_handler;
fprobe_exit_cb  exit_handler;
diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
index 86cd6a8c806a..20ef5cd5d419 100644
--- a/kernel/trace/trace_fprobe.c
+++ b/kernel/trace/trace_fprobe.c
@@ -422,7 +422,6 @@ static struct trace_fprobe *alloc_trace_fprobe(const char 
*group,
   const char *event,
   const char *symbol,
   struct tracepoint *tpoint,
-  int maxactive,
   int nargs, bool is_return)
 {
struct trace_fprobe *tf;
@@ -442,7 +441,6 @@ static struct trace_fprobe *alloc_trace_fprobe(const char 
*group,
tf->fp.entry_handler = fentry_dispatcher;
 
tf->tpoint = tpoint;
-   tf->fp.nr_maxactive = maxactive;
 
ret = trace_probe_init(&tf->tp, event, group, false, nargs);
if (ret < 0)
@@ -1021,12 +1019,11 @@ static int __trace_fprobe_create(int argc, const char 
*argv[])
 *  FETCHARG:TYPE : use TYPE instead of unsigned long.
 */
struct trace_fprobe *tf = NULL;
-   int i, len, new_argc = 0, ret = 0;
+   int i, new_argc = 0, ret = 0;
bool is_return = false;
char *symbol = NULL;
const char *event = NULL, *group = FPROBE_EVENT_SYSTEM;
const char **new_argv = NULL;
-   int maxactive = 0;
char buf[MAX_EVENT_NAME_LEN];
char gbuf[MAX_EVENT_NAME_LEN];
char sbuf[KSYM_NAME_LEN];
@@ -1048,33 +1045,13 @@ static int __trace_fprobe_create(int argc, const char 
*argv[])
 
trace_probe_log_init("trace_fprobe", argc, argv);
 
-   event = strchr(&argv[0][1], ':');
-   if (event)
-   event++;
-
-   if (isdigit(argv[0][1])) {
-   if (event)
-   len = event - &argv[0][1] - 1;
-   else
-   len = strlen(&argv[0][1]);
-   if (len > MAX_EVENT_NAME_LEN - 1) {
-   trace_probe_log_err(1, BAD_MAXACT);
-   goto parse_error;
-   }
-   memcpy(buf, &argv[0][1], len);
-   buf[len] = '\0';
-   ret = kstrtouint(buf, 0, &maxactive);
-   if (ret || !maxactive) {
+   if (argv[0][1] != '\0') {
+   if (argv[0][1] != ':') {
+   trace_probe_log_set_index(0);
trace_probe_log_err(1, BAD_MAXACT);
goto parse_error;
}
-   /* fprobe rethook instances are iterated over via a list. The
-* maximum should stay reasonable.
-*/
-   if (maxactive > RETHOOK_MAXACTIVE_MAX) {
-   trace_probe_log_err(1, MAXACT_TOO_BIG);
-   goto parse_error;
-   }
+   event = &argv[0][2];
}
 
trace_probe_log_set_index(1);
@@ -1084,12 +1061,6 @@ static int __trace_fprobe_create(int argc, const char 
*argv[])
if (ret < 0)
goto parse_error;
 
-   if (!is_return && maxactive) {
-   trace_probe_log_set_index(0);
-   trace_probe_log_err(1, BAD_MAXACT_TYPE);
-   goto parse_error;
-   }
-
trace_probe_log_set_index(0);
if (event) {
ret = traceprobe_parse_event_name(&event, &group, gbuf,
@@ -1147,8 +1118,7 @@ static int __trace_fprobe_create(int argc, const char 
*argv[])
goto out;
 
/* setup a probe */
-   tf = al

[PATCH v10 31/36] fprobe: Rewrite fprobe on function-graph tracer

2024-05-07 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Rewrite fprobe implementation on function-graph tracer.
Major API changes are:
 -  'nr_maxactive' field is deprecated.
 -  This depends on CONFIG_DYNAMIC_FTRACE_WITH_ARGS or
!CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS, and
CONFIG_HAVE_FUNCTION_GRAPH_FREGS. So currently works only
on x86_64.
 -  Currently the entry size is limited in 15 * sizeof(long).
 -  If there is too many fprobe exit handler set on the same
function, it will fail to probe.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v9:
  - Remove unneeded prototype of ftrace_regs_get_return_address().
  - Fix entry data address calculation.
  - Remove DIV_ROUND_UP() from hotpath.
 Changes in v8:
  - Use trace_func_graph_ret/ent_t for fgraph_ops.
  - Update CONFIG_FPROBE dependencies.
  - Add ftrace_regs_get_return_address() for each arch.
 Changes in v3:
  - Update for new reserve_data/retrieve_data API.
  - Fix internal push/pop on fgraph data logic so that it can
correctly save/restore the returning fprobes.
 Changes in v2:
  - Add more lockdep_assert_held(fprobe_mutex)
  - Use READ_ONCE() and WRITE_ONCE() for fprobe_hlist_node::fp.
  - Add NOKPROBE_SYMBOL() for the functions which is called from
entry/exit callback.
---
 arch/arm64/include/asm/ftrace.h |6 
 arch/loongarch/include/asm/ftrace.h |6 
 arch/powerpc/include/asm/ftrace.h   |6 
 arch/s390/include/asm/ftrace.h  |6 
 arch/x86/include/asm/ftrace.h   |6 
 include/linux/fprobe.h  |   53 ++-
 kernel/trace/Kconfig|8 
 kernel/trace/fprobe.c   |  638 +--
 lib/test_fprobe.c   |   45 --
 9 files changed, 529 insertions(+), 245 deletions(-)

diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index 95a8f349f871..800c75f46a13 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -143,6 +143,12 @@ ftrace_regs_get_frame_pointer(const struct ftrace_regs 
*fregs)
return fregs->fp;
 }
 
+static __always_inline unsigned long
+ftrace_regs_get_return_address(const struct ftrace_regs *fregs)
+{
+   return fregs->lr;
+}
+
 static __always_inline struct pt_regs *
 ftrace_partial_regs(const struct ftrace_regs *fregs, struct pt_regs *regs)
 {
diff --git a/arch/loongarch/include/asm/ftrace.h 
b/arch/loongarch/include/asm/ftrace.h
index 14a1576bf948..b8432b7cc9d4 100644
--- a/arch/loongarch/include/asm/ftrace.h
+++ b/arch/loongarch/include/asm/ftrace.h
@@ -81,6 +81,12 @@ ftrace_regs_set_instruction_pointer(struct ftrace_regs 
*fregs, unsigned long ip)
 #define ftrace_regs_get_frame_pointer(fregs) \
((fregs)->regs.regs[22])
 
+static __always_inline unsigned long
+ftrace_regs_get_return_address(struct ftrace_regs *fregs)
+{
+   return *(unsigned long *)(fregs->regs.regs[1]);
+}
+
 #define ftrace_graph_func ftrace_graph_func
 void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
   struct ftrace_ops *op, struct ftrace_regs *fregs);
diff --git a/arch/powerpc/include/asm/ftrace.h 
b/arch/powerpc/include/asm/ftrace.h
index 51245fd6b45b..d8a74a6570f8 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -77,6 +77,12 @@ ftrace_regs_get_instruction_pointer(struct ftrace_regs 
*fregs)
 #define ftrace_regs_query_register_offset(name) \
regs_query_register_offset(name)
 
+static __always_inline unsigned long
+ftrace_regs_get_return_address(struct ftrace_regs *fregs)
+{
+   return fregs->regs.link;
+}
+
 struct ftrace_ops;
 
 #define ftrace_graph_func ftrace_graph_func
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index cb8d60a5fe1d..d8ca1776c554 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -89,6 +89,12 @@ ftrace_regs_get_frame_pointer(struct ftrace_regs *fregs)
return sp[0];   /* return backchain */
 }
 
+static __always_inline unsigned long
+ftrace_regs_get_return_address(const struct ftrace_regs *fregs)
+{
+   return fregs->regs.gprs[14];
+}
+
 #define arch_ftrace_fill_perf_regs(fregs, _regs)do {   \
(_regs)->psw.addr = (fregs)->regs.psw.addr; \
(_regs)->gprs[15] = (fregs)->regs.gprs[15]; \
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 7625887fc49b..979d3458a328 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -82,6 +82,12 @@ arch_ftrace_get_regs(struct ftrace_regs *fregs)
 #define ftrace_regs_get_frame_pointer(fregs) \
frame_pointer(&(fregs)->regs)
 
+static __always_inline unsigned long
+ftrace_regs_get_return_address(struct ftrace_regs *fregs)
+{
+   return *(unsigned long *)ftrace_regs_get_stack_pointer(fregs);
+}
+
 struct ftrace_ops;
 #define ftrace_graph_func ftrace_graph_func
 void ftrace_graph

[PATCH v10 30/36] ftrace: Add CONFIG_HAVE_FTRACE_GRAPH_FUNC

2024-05-07 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Add CONFIG_HAVE_FTRACE_GRAPH_FUNC kconfig in addition to ftrace_graph_func
macro check. This is for the other feature (e.g. FPROBE) which requires to
access ftrace_regs from fgraph_ops::entryfunc() can avoid compiling if
the fgraph can not pass the valid ftrace_regs.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v8:
  - Newly added.
---
 arch/arm64/Kconfig |1 +
 arch/loongarch/Kconfig |1 +
 arch/powerpc/Kconfig   |1 +
 arch/riscv/Kconfig |1 +
 arch/x86/Kconfig   |1 +
 kernel/trace/Kconfig   |5 +
 6 files changed, 10 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 8d5047bc13bc..e0a5c69eeda2 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -206,6 +206,7 @@ config ARM64
select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_FAST_GUP
+   select HAVE_FTRACE_GRAPH_FUNC
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_ERROR_INJECTION
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 21dc39ae6bc2..0276a6825e6d 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -121,6 +121,7 @@ config LOONGARCH
select HAVE_EFFICIENT_UNALIGNED_ACCESS if !ARCH_STRICT_ALIGN
select HAVE_EXIT_THREAD
select HAVE_FAST_GUP
+   select HAVE_FTRACE_GRAPH_FUNC
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_ARG_ACCESS_API
select HAVE_FUNCTION_ERROR_INJECTION
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 1c4be3373686..b79d16c5846a 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -237,6 +237,7 @@ config PPC
select HAVE_EBPF_JIT
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_FAST_GUP
+   select HAVE_FTRACE_GRAPH_FUNC
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_ARG_ACCESS_API
select HAVE_FUNCTION_DESCRIPTORSif PPC64_ELF_ABI_V1
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index b58b8e81b510..6fd2a166904b 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -127,6 +127,7 @@ config RISCV
select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && 
(CLANG_SUPPORTS_DYNAMIC_FTRACE || GCC_SUPPORTS_DYNAMIC_FTRACE)
select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
+   select HAVE_FTRACE_GRAPH_FUNC
select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_GRAPH_FREGS
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4da23dc0b07c..bd86e598e31d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -225,6 +225,7 @@ config X86
select HAVE_EXIT_THREAD
select HAVE_FAST_GUP
select HAVE_FENTRY  if X86_64 || DYNAMIC_FTRACE
+   select HAVE_FTRACE_GRAPH_FUNC   if HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_GRAPH_FREGSif HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_GRAPH_TRACER   if X86_32 || (X86_64 && 
DYNAMIC_FTRACE)
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 0b6ce0a38967..0e4c33f1ab43 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -34,6 +34,11 @@ config HAVE_FUNCTION_GRAPH_TRACER
 config HAVE_FUNCTION_GRAPH_FREGS
bool
 
+config HAVE_FTRACE_GRAPH_FUNC
+   bool
+   help
+ True if ftrace_graph_func() is defined.
+
 config HAVE_DYNAMIC_FTRACE
bool
help




[PATCH v10 29/36] bpf: Enable kprobe_multi feature if CONFIG_FPROBE is enabled

2024-05-07 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Enable kprobe_multi feature if CONFIG_FPROBE is enabled. The pt_regs is
converted from ftrace_regs by ftrace_partial_regs(), thus some registers
may always returns 0. But it should be enough for function entry (access
arguments) and exit (access return value).

Signed-off-by: Masami Hiramatsu (Google) 
Acked-by: Florent Revest 
---
 Changes in v9:
  - Avoid wasting memory for bpf_kprobe_multi_pt_regs when
CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST=y
---
 kernel/trace/bpf_trace.c |   27 ++-
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index e51a6ef87167..b779f4a83361 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2577,7 +2577,7 @@ static int __init bpf_event_init(void)
 fs_initcall(bpf_event_init);
 #endif /* CONFIG_MODULES */
 
-#if defined(CONFIG_FPROBE) && defined(CONFIG_DYNAMIC_FTRACE_WITH_REGS)
+#ifdef CONFIG_FPROBE
 struct bpf_kprobe_multi_link {
struct bpf_link link;
struct fprobe fp;
@@ -2600,6 +2600,13 @@ struct user_syms {
char *buf;
 };
 
+#ifndef CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST
+static DEFINE_PER_CPU(struct pt_regs, bpf_kprobe_multi_pt_regs);
+#define bpf_kprobe_multi_pt_regs_ptr() this_cpu_ptr(&bpf_kprobe_multi_pt_regs)
+#else
+#define bpf_kprobe_multi_pt_regs_ptr() (NULL)
+#endif
+
 static int copy_user_syms(struct user_syms *us, unsigned long __user *usyms, 
u32 cnt)
 {
unsigned long __user usymbol;
@@ -2792,13 +2799,14 @@ static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx 
*ctx)
 
 static int
 kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
-  unsigned long entry_ip, struct pt_regs *regs)
+  unsigned long entry_ip, struct ftrace_regs *fregs)
 {
struct bpf_kprobe_multi_run_ctx run_ctx = {
.link = link,
.entry_ip = entry_ip,
};
struct bpf_run_ctx *old_run_ctx;
+   struct pt_regs *regs;
int err;
 
if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
@@ -2809,6 +2817,7 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link 
*link,
 
migrate_disable();
rcu_read_lock();
+   regs = ftrace_partial_regs(fregs, bpf_kprobe_multi_pt_regs_ptr());
old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
err = bpf_prog_run(link->link.prog, regs);
bpf_reset_run_ctx(old_run_ctx);
@@ -2826,13 +2835,9 @@ kprobe_multi_link_handler(struct fprobe *fp, unsigned 
long fentry_ip,
  void *data)
 {
struct bpf_kprobe_multi_link *link;
-   struct pt_regs *regs = ftrace_get_regs(fregs);
-
-   if (!regs)
-   return 0;
 
link = container_of(fp, struct bpf_kprobe_multi_link, fp);
-   kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), regs);
+   kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), fregs);
return 0;
 }
 
@@ -2842,13 +2847,9 @@ kprobe_multi_link_exit_handler(struct fprobe *fp, 
unsigned long fentry_ip,
   void *data)
 {
struct bpf_kprobe_multi_link *link;
-   struct pt_regs *regs = ftrace_get_regs(fregs);
-
-   if (!regs)
-   return;
 
link = container_of(fp, struct bpf_kprobe_multi_link, fp);
-   kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), regs);
+   kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), fregs);
 }
 
 static int symbols_cmp_r(const void *a, const void *b, const void *priv)
@@ -3107,7 +3108,7 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr 
*attr, struct bpf_prog *pr
kvfree(cookies);
return err;
 }
-#else /* !CONFIG_FPROBE || !CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+#else /* !CONFIG_FPROBE */
 int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog 
*prog)
 {
return -EOPNOTSUPP;




[PATCH v10 28/36] tracing/fprobe: Enable fprobe events with CONFIG_DYNAMIC_FTRACE_WITH_ARGS

2024-05-07 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Allow fprobe events to be enabled with CONFIG_DYNAMIC_FTRACE_WITH_ARGS.
With this change, fprobe events mostly use ftrace_regs instead of pt_regs.
Note that if the arch doesn't enable HAVE_PT_REGS_COMPAT_FTRACE_REGS,
fprobe events will not be able to be used from perf.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v9:
  - Copy store_trace_entry_data() as store_fprobe_entry_data() for
fprobe.
 Chagnes in v3:
  - Use ftrace_regs_get_return_value().
 Changes in v2:
  - Define ftrace_regs_get_kernel_stack_nth() for
!CONFIG_HAVE_REGS_AND_STACK_ACCESS_API.
 Changes from previous series: Update against the new series.
---
 include/linux/ftrace.h  |   17 ++
 kernel/trace/Kconfig|1 
 kernel/trace/trace_fprobe.c |  107 +--
 kernel/trace/trace_probe_tmpl.h |2 -
 4 files changed, 86 insertions(+), 41 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 3871823c1429..64ca91d1527f 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -256,6 +256,23 @@ static __always_inline bool ftrace_regs_has_args(struct 
ftrace_regs *fregs)
frame_pointer(&(fregs)->regs)
 #endif
 
+#ifdef CONFIG_HAVE_REGS_AND_STACK_ACCESS_API
+static __always_inline unsigned long
+ftrace_regs_get_kernel_stack_nth(struct ftrace_regs *fregs, unsigned int nth)
+{
+   unsigned long *stackp;
+
+   stackp = (unsigned long *)ftrace_regs_get_stack_pointer(fregs);
+   if (((unsigned long)(stackp + nth) & ~(THREAD_SIZE - 1)) ==
+   ((unsigned long)stackp & ~(THREAD_SIZE - 1)))
+   return *(stackp + nth);
+
+   return 0;
+}
+#else /* !CONFIG_HAVE_REGS_AND_STACK_ACCESS_API */
+#define ftrace_regs_get_kernel_stack_nth(fregs, nth)   (0L)
+#endif /* CONFIG_HAVE_REGS_AND_STACK_ACCESS_API */
+
 typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip,
  struct ftrace_ops *op, struct ftrace_regs *fregs);
 
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 7df7b1fb305c..0b6ce0a38967 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -680,7 +680,6 @@ config FPROBE_EVENTS
select TRACING
select PROBE_EVENTS
select DYNAMIC_EVENTS
-   depends on DYNAMIC_FTRACE_WITH_REGS
default y
help
  This allows user to add tracing events on the function entry and
diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
index 273cdf3cf70c..86cd6a8c806a 100644
--- a/kernel/trace/trace_fprobe.c
+++ b/kernel/trace/trace_fprobe.c
@@ -133,7 +133,7 @@ static int
 process_fetch_insn(struct fetch_insn *code, void *rec, void *edata,
   void *dest, void *base)
 {
-   struct pt_regs *regs = rec;
+   struct ftrace_regs *fregs = rec;
unsigned long val;
int ret;
 
@@ -141,17 +141,17 @@ process_fetch_insn(struct fetch_insn *code, void *rec, 
void *edata,
/* 1st stage: get value from context */
switch (code->op) {
case FETCH_OP_STACK:
-   val = regs_get_kernel_stack_nth(regs, code->param);
+   val = ftrace_regs_get_kernel_stack_nth(fregs, code->param);
break;
case FETCH_OP_STACKP:
-   val = kernel_stack_pointer(regs);
+   val = ftrace_regs_get_stack_pointer(fregs);
break;
case FETCH_OP_RETVAL:
-   val = regs_return_value(regs);
+   val = ftrace_regs_get_return_value(fregs);
break;
 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
case FETCH_OP_ARG:
-   val = regs_get_kernel_argument(regs, code->param);
+   val = ftrace_regs_get_argument(fregs, code->param);
break;
case FETCH_OP_EDATA:
val = *(unsigned long *)((unsigned long)edata + code->offset);
@@ -174,7 +174,7 @@ NOKPROBE_SYMBOL(process_fetch_insn)
 /* function entry handler */
 static nokprobe_inline void
 __fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
-   struct pt_regs *regs,
+   struct ftrace_regs *fregs,
struct trace_event_file *trace_file)
 {
struct fentry_trace_entry_head *entry;
@@ -188,41 +188,71 @@ __fentry_trace_func(struct trace_fprobe *tf, unsigned 
long entry_ip,
if (trace_trigger_soft_disabled(trace_file))
return;
 
-   dsize = __get_data_size(&tf->tp, regs, NULL);
+   dsize = __get_data_size(&tf->tp, fregs, NULL);
 
entry = trace_event_buffer_reserve(&fbuffer, trace_file,
   sizeof(*entry) + tf->tp.size + 
dsize);
if (!entry)
return;
 
-   fbuffer.regs = regs;
+   fbuffer.regs = ftrace_get_regs(fregs);
entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
en

  1   2   3   4   5   6   7   8   9   10   >