From: "Steven Rostedt (VMware)" <rost...@goodmis.org>

Allow for instances to have their own ftrace_ops part of the fgraph_ops
that makes the funtion_graph tracer filter on the set_ftrace_filter file
of the instance and not the top instance.

Note that this also requires to update ftrace_graph_func() to call new
function_graph_enter_ops() instead of function_graph_enter() so that
it avoid pushing on shadow stack multiple times on the same function.

Co-developed with Masami Hiramatsu:
Link: 
https://lore.kernel.org/linux-trace-kernel/171509102088.162236.15758883237657317789.stgit@devnote2

Signed-off-by: Steven Rostedt (VMware) <rost...@goodmis.org>
Signed-off-by: Masami Hiramatsu (Google) <mhira...@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rost...@goodmis.org>
---
 arch/arm64/kernel/ftrace.c           |  21 ++++-
 arch/loongarch/kernel/ftrace_dyn.c   |  15 ++-
 arch/powerpc/kernel/trace/ftrace.c   |   3 +-
 arch/riscv/kernel/ftrace.c           |  15 ++-
 arch/x86/kernel/ftrace.c             |  19 +++-
 include/linux/ftrace.h               |   6 ++
 kernel/trace/fgraph.c                | 131 ++++++++++++++++++++-------
 kernel/trace/ftrace.c                |   4 +-
 kernel/trace/trace.h                 |  16 ++--
 kernel/trace/trace_functions.c       |   2 +-
 kernel/trace/trace_functions_graph.c |   8 +-
 11 files changed, 190 insertions(+), 50 deletions(-)

diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index a650f5e11fc5..b96740829798 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -481,7 +481,26 @@ void prepare_ftrace_return(unsigned long self_addr, 
unsigned long *parent,
 void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
                       struct ftrace_ops *op, struct ftrace_regs *fregs)
 {
-       prepare_ftrace_return(ip, &fregs->lr, fregs->fp);
+       struct fgraph_ops *gops = container_of(op, struct fgraph_ops, ops);
+       unsigned long frame_pointer = fregs->fp;
+       unsigned long *parent = &fregs->lr;
+       int bit;
+
+       if (unlikely(ftrace_graph_is_dead()))
+               return;
+
+       if (unlikely(atomic_read(&current->tracing_graph_pause)))
+               return;
+
+       bit = ftrace_test_recursion_trylock(ip, *parent);
+       if (bit < 0)
+               return;
+
+       if (!function_graph_enter_ops(*parent, ip, frame_pointer,
+                                     (void *)frame_pointer, gops))
+               *parent = (unsigned long)&return_to_handler;
+
+       ftrace_test_recursion_unlock(bit);
 }
 #else
 /*
diff --git a/arch/loongarch/kernel/ftrace_dyn.c 
b/arch/loongarch/kernel/ftrace_dyn.c
index bff058317062..77acb69ad153 100644
--- a/arch/loongarch/kernel/ftrace_dyn.c
+++ b/arch/loongarch/kernel/ftrace_dyn.c
@@ -241,10 +241,21 @@ void prepare_ftrace_return(unsigned long self_addr, 
unsigned long *parent)
 void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
                       struct ftrace_ops *op, struct ftrace_regs *fregs)
 {
+       struct fgraph_ops *gops = container_of(op, struct fgraph_ops, ops);
+       unsigned long return_hooker = (unsigned long)&return_to_handler;
        struct pt_regs *regs = &fregs->regs;
-       unsigned long *parent = (unsigned long *)&regs->regs[1];
+       unsigned long *parent;
+       unsigned long old;
+
+       parent = (unsigned long *)&regs->regs[1];
 
-       prepare_ftrace_return(ip, (unsigned long *)parent);
+       if (unlikely(atomic_read(&current->tracing_graph_pause)))
+               return;
+
+       old = *parent;
+
+       if (!function_graph_enter_ops(old, ip, 0, parent, gops))
+               *parent = return_hooker;
 }
 #else
 static int ftrace_modify_graph_caller(bool enable)
diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index d8d6b4fd9a14..4a9294821c0d 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -421,6 +421,7 @@ int __init ftrace_dyn_arch_init(void)
 void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
                       struct ftrace_ops *op, struct ftrace_regs *fregs)
 {
+       struct fgraph_ops *gops = container_of(op, struct fgraph_ops, ops);
        unsigned long sp = fregs->regs.gpr[1];
        int bit;
 
@@ -434,7 +435,7 @@ void ftrace_graph_func(unsigned long ip, unsigned long 
parent_ip,
        if (bit < 0)
                goto out;
 
-       if (!function_graph_enter(parent_ip, ip, 0, (unsigned long *)sp))
+       if (!function_graph_enter_ops(parent_ip, ip, 0, (unsigned long *)sp, 
gops))
                parent_ip = ppc_function_entry(return_to_handler);
 
        ftrace_test_recursion_unlock(bit);
diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
index 4f4987a6d83d..7bcb5f321523 100644
--- a/arch/riscv/kernel/ftrace.c
+++ b/arch/riscv/kernel/ftrace.c
@@ -218,10 +218,23 @@ void prepare_ftrace_return(unsigned long *parent, 
unsigned long self_addr,
 void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
                       struct ftrace_ops *op, struct ftrace_regs *fregs)
 {
+       struct fgraph_ops *gops = container_of(op, struct fgraph_ops, ops);
+       unsigned long return_hooker = (unsigned long)&return_to_handler;
        struct pt_regs *regs = arch_ftrace_get_regs(fregs);
        unsigned long *parent = (unsigned long *)&regs->ra;
+       unsigned long old;
+
+       if (unlikely(atomic_read(&current->tracing_graph_pause)))
+               return;
+
+       /*
+        * We don't suffer access faults, so no extra fault-recovery assembly
+        * is needed here.
+        */
+       old = *parent;
 
-       prepare_ftrace_return(parent, ip, frame_pointer(regs));
+       if (!function_graph_enter_ops(old, ip, frame_pointer(regs), parent, 
gops))
+               *parent = return_hooker;
 }
 #else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
 extern void ftrace_graph_call(void);
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 8da0e66ca22d..998558cb8f15 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -648,9 +648,24 @@ void ftrace_graph_func(unsigned long ip, unsigned long 
parent_ip,
                       struct ftrace_ops *op, struct ftrace_regs *fregs)
 {
        struct pt_regs *regs = &fregs->regs;
-       unsigned long *stack = (unsigned long *)kernel_stack_pointer(regs);
+       unsigned long *parent = (unsigned long *)kernel_stack_pointer(regs);
+       struct fgraph_ops *gops = container_of(op, struct fgraph_ops, ops);
+       int bit;
+
+       if (unlikely(ftrace_graph_is_dead()))
+               return;
+
+       if (unlikely(atomic_read(&current->tracing_graph_pause)))
+               return;
 
-       prepare_ftrace_return(ip, (unsigned long *)stack, 0);
+       bit = ftrace_test_recursion_trylock(ip, *parent);
+       if (bit < 0)
+               return;
+
+       if (!function_graph_enter_ops(*parent, ip, 0, parent, gops))
+               *parent = (unsigned long)&return_to_handler;
+
+       ftrace_test_recursion_unlock(bit);
 }
 #endif
 
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 942a1f767280..e5b41683ffb9 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1041,6 +1041,7 @@ extern int ftrace_graph_entry_stub(struct 
ftrace_graph_ent *trace, struct fgraph
 struct fgraph_ops {
        trace_func_graph_ent_t          entryfunc;
        trace_func_graph_ret_t          retfunc;
+       struct ftrace_ops               ops; /* for the hash lists */
        void                            *private;
        int                             idx;
 };
@@ -1076,6 +1077,11 @@ extern int
 function_graph_enter(unsigned long ret, unsigned long func,
                     unsigned long frame_pointer, unsigned long *retp);
 
+extern int
+function_graph_enter_ops(unsigned long ret, unsigned long func,
+                        unsigned long frame_pointer, unsigned long *retp,
+                        struct fgraph_ops *gops);
+
 struct ftrace_ret_stack *
 ftrace_graph_get_ret_stack(struct task_struct *task, int skip);
 
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 54ed2ed2036b..67fa7fbf6aac 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -18,13 +18,6 @@
 #include "ftrace_internal.h"
 #include "trace.h"
 
-#ifdef CONFIG_DYNAMIC_FTRACE
-#define ASSIGN_OPS_HASH(opsname, val) \
-       .func_hash              = val, \
-       .local_hash.regex_lock  = 
__MUTEX_INITIALIZER(opsname.local_hash.regex_lock),
-#else
-#define ASSIGN_OPS_HASH(opsname, val)
-#endif
 
 /*
  * FGRAPH_FRAME_SIZE:  Size in bytes of the meta data on the shadow stack
@@ -155,6 +148,13 @@ get_bitmap_bits(struct task_struct *t, int offset)
        return (t->ret_stack[offset] >> FGRAPH_INDEX_SHIFT) & FGRAPH_INDEX_MASK;
 }
 
+/* For BITMAP type: set the bits in the bitmap bitmask at @offset on ret_stack 
*/
+static inline void
+set_bitmap_bits(struct task_struct *t, int offset, unsigned long bitmap)
+{
+       t->ret_stack[offset] |= (bitmap << FGRAPH_INDEX_SHIFT);
+}
+
 /* Write the bitmap to the ret_stack at @offset (does index, offset and 
bitmask) */
 static inline void
 set_bitmap(struct task_struct *t, int offset, unsigned long bitmap)
@@ -381,7 +381,8 @@ int function_graph_enter(unsigned long ret, unsigned long 
func,
                if (gops == &fgraph_stub)
                        continue;
 
-               if (gops->entryfunc(&trace, gops))
+               if (ftrace_ops_test(&gops->ops, func, NULL) &&
+                   gops->entryfunc(&trace, gops))
                        bitmap |= BIT(i);
        }
 
@@ -402,6 +403,46 @@ int function_graph_enter(unsigned long ret, unsigned long 
func,
        return -EBUSY;
 }
 
+/* This is called from ftrace_graph_func() via ftrace */
+int function_graph_enter_ops(unsigned long ret, unsigned long func,
+                            unsigned long frame_pointer, unsigned long *retp,
+                            struct fgraph_ops *gops)
+{
+       struct ftrace_graph_ent trace;
+       int offset;
+       int type;
+
+       /* Check whether the fgraph_ops is unregistered. */
+       if (unlikely(fgraph_array[gops->idx] == &fgraph_stub))
+               return -ENODEV;
+
+       /* Use start for the distance to ret_stack (skipping over reserve) */
+       offset = ftrace_push_return_trace(ret, func, frame_pointer, retp, 
gops->idx);
+       if (offset < 0)
+               return offset;
+       type = get_fgraph_type(current, offset);
+
+       /* This is the first ret_stack for this fentry */
+       if (type == FGRAPH_TYPE_RESERVED)
+               ++current->curr_ret_depth;
+
+       trace.func = func;
+       trace.depth = current->curr_ret_depth;
+       if (gops->entryfunc(&trace, gops)) {
+               if (type == FGRAPH_TYPE_RESERVED)
+                       set_bitmap(current, offset, BIT(gops->idx));
+               else
+                       set_bitmap_bits(current, offset, BIT(gops->idx));
+               return 0;
+       }
+
+       if (type == FGRAPH_TYPE_RESERVED) {
+               current->curr_ret_stack -= FGRAPH_FRAME_OFFSET + 1;
+               current->curr_ret_depth--;
+       }
+       return -EBUSY;
+}
+
 /* Retrieve a function return address to the trace stack on thread info.*/
 static struct ftrace_ret_stack *
 ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
@@ -662,17 +703,25 @@ unsigned long ftrace_graph_ret_addr(struct task_struct 
*task, int *idx,
 }
 #endif /* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */
 
-static struct ftrace_ops graph_ops = {
-       .func                   = ftrace_graph_func,
-       .flags                  = FTRACE_OPS_FL_INITIALIZED |
-                                  FTRACE_OPS_FL_PID |
-                                  FTRACE_OPS_GRAPH_STUB,
+void fgraph_init_ops(struct ftrace_ops *dst_ops,
+                    struct ftrace_ops *src_ops)
+{
+       dst_ops->func = ftrace_graph_func;
+       dst_ops->flags = FTRACE_OPS_FL_PID | FTRACE_OPS_GRAPH_STUB;
+
 #ifdef FTRACE_GRAPH_TRAMP_ADDR
-       .trampoline             = FTRACE_GRAPH_TRAMP_ADDR,
+       dst_ops->trampoline = FTRACE_GRAPH_TRAMP_ADDR;
        /* trampoline_size is only needed for dynamically allocated tramps */
 #endif
-       ASSIGN_OPS_HASH(graph_ops, &global_ops.local_hash)
-};
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+       if (src_ops) {
+               dst_ops->func_hash = &src_ops->local_hash;
+               mutex_init(&dst_ops->local_hash.regex_lock);
+               dst_ops->flags |= FTRACE_OPS_FL_INITIALIZED;
+       }
+#endif
+}
 
 void ftrace_graph_sleep_time_control(bool enable)
 {
@@ -876,11 +925,20 @@ static int start_graph_tracing(void)
 
 int register_ftrace_graph(struct fgraph_ops *gops)
 {
+       int command = 0;
        int ret = 0;
        int i;
 
        mutex_lock(&ftrace_lock);
 
+       if (!gops->ops.func) {
+               gops->ops.flags |= FTRACE_OPS_GRAPH_STUB;
+               gops->ops.func = ftrace_graph_func;
+#ifdef FTRACE_GRAPH_TRAMP_ADDR
+               gops->ops.trampoline = FTRACE_GRAPH_TRAMP_ADDR;
+#endif
+       }
+
        if (!fgraph_array[0]) {
                /* The array must always have real data on it */
                for (i = 0; i < FGRAPH_ARRAY_SIZE; i++)
@@ -893,7 +951,7 @@ int register_ftrace_graph(struct fgraph_ops *gops)
                        break;
        }
        if (i >= FGRAPH_ARRAY_SIZE) {
-               ret = -EBUSY;
+               ret = -ENOSPC;
                goto out;
        }
 
@@ -907,18 +965,22 @@ int register_ftrace_graph(struct fgraph_ops *gops)
        if (ftrace_graph_active == 1) {
                register_pm_notifier(&ftrace_suspend_notifier);
                ret = start_graph_tracing();
-               if (ret) {
-                       ftrace_graph_active--;
-                       goto out;
-               }
+               if (ret)
+                       goto error;
                /*
                 * Some archs just test to see if these are not
                 * the default function
                 */
                ftrace_graph_return = return_run;
                ftrace_graph_entry = entry_run;
+               command = FTRACE_START_FUNC_RET;
+       }
 
-               ret = ftrace_startup(&graph_ops, FTRACE_START_FUNC_RET);
+       ret = ftrace_startup(&gops->ops, command);
+error:
+       if (ret) {
+               fgraph_array[i] = &fgraph_stub;
+               ftrace_graph_active--;
        }
 out:
        mutex_unlock(&ftrace_lock);
@@ -927,6 +989,7 @@ int register_ftrace_graph(struct fgraph_ops *gops)
 
 void unregister_ftrace_graph(struct fgraph_ops *gops)
 {
+       int command = 0;
        int i;
 
        mutex_lock(&ftrace_lock);
@@ -934,25 +997,29 @@ void unregister_ftrace_graph(struct fgraph_ops *gops)
        if (unlikely(!ftrace_graph_active))
                goto out;
 
-       for (i = 0; i < fgraph_array_cnt; i++)
-               if (gops == fgraph_array[i])
-                       break;
-       if (i >= fgraph_array_cnt)
+       if (unlikely(gops->idx < 0 || gops->idx >= fgraph_array_cnt))
                goto out;
 
-       fgraph_array[i] = &fgraph_stub;
-       if (i + 1 == fgraph_array_cnt) {
-               for (; i >= 0; i--)
-                       if (fgraph_array[i] != &fgraph_stub)
-                               break;
+       WARN_ON_ONCE(fgraph_array[gops->idx] != gops);
+
+       fgraph_array[gops->idx] = &fgraph_stub;
+       if (gops->idx + 1 == fgraph_array_cnt) {
+               i = gops->idx;
+               while (i >= 0 && fgraph_array[i] == &fgraph_stub)
+                       i--;
                fgraph_array_cnt = i + 1;
        }
 
        ftrace_graph_active--;
+
+       if (!ftrace_graph_active)
+               command = FTRACE_STOP_FUNC_RET;
+
+       ftrace_shutdown(&gops->ops, command);
+
        if (!ftrace_graph_active) {
                ftrace_graph_return = ftrace_stub_graph;
                ftrace_graph_entry = ftrace_graph_entry_stub;
-               ftrace_shutdown(&graph_ops, FTRACE_STOP_FUNC_RET);
                unregister_pm_notifier(&ftrace_suspend_notifier);
                unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, 
NULL);
        }
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index b85f00b0ffe7..fa6a686726db 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3016,6 +3016,8 @@ int ftrace_startup(struct ftrace_ops *ops, int command)
        if (unlikely(ftrace_disabled))
                return -ENODEV;
 
+       ftrace_ops_init(ops);
+
        ret = __register_ftrace_function(ops);
        if (ret)
                return ret;
@@ -7327,7 +7329,7 @@ __init void ftrace_init_global_array_ops(struct 
trace_array *tr)
        tr->ops = &global_ops;
        tr->ops->private = tr;
        ftrace_init_trace_array(tr);
-       init_array_fgraph_ops(tr);
+       init_array_fgraph_ops(tr, tr->ops);
 }
 
 void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func)
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index a5070f9b977b..ced61eacf40f 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -894,8 +894,8 @@ extern int __trace_graph_entry(struct trace_array *tr,
 extern void __trace_graph_return(struct trace_array *tr,
                                 struct ftrace_graph_ret *trace,
                                 unsigned int trace_ctx);
-extern void init_array_fgraph_ops(struct trace_array *tr);
-extern int allocate_fgraph_ops(struct trace_array *tr);
+extern void init_array_fgraph_ops(struct trace_array *tr, struct ftrace_ops 
*ops);
+extern int allocate_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops);
 extern void free_fgraph_ops(struct trace_array *tr);
 
 #ifdef CONFIG_DYNAMIC_FTRACE
@@ -978,6 +978,7 @@ static inline int ftrace_graph_notrace_addr(unsigned long 
addr)
        preempt_enable_notrace();
        return ret;
 }
+
 #else
 static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace)
 {
@@ -1003,18 +1004,19 @@ static inline bool ftrace_graph_ignore_func(struct 
ftrace_graph_ent *trace)
                (fgraph_max_depth && trace->depth >= fgraph_max_depth);
 }
 
+void fgraph_init_ops(struct ftrace_ops *dst_ops,
+                    struct ftrace_ops *src_ops);
+
 #else /* CONFIG_FUNCTION_GRAPH_TRACER */
 static inline enum print_line_t
 print_graph_function_flags(struct trace_iterator *iter, u32 flags)
 {
        return TRACE_TYPE_UNHANDLED;
 }
-static inline void init_array_fgraph_ops(struct trace_array *tr) { }
-static inline int allocate_fgraph_ops(struct trace_array *tr)
-{
-       return 0;
-}
 static inline void free_fgraph_ops(struct trace_array *tr) { }
+/* ftrace_ops may not be defined */
+#define init_array_fgraph_ops(tr, ops) do { } while (0)
+#define allocate_fgraph_ops(tr, ops) ({ 0; })
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
 extern struct list_head ftrace_pids;
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 8e8da0d0ee52..13bf2415245d 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -91,7 +91,7 @@ int ftrace_create_function_files(struct trace_array *tr,
        if (!tr->ops)
                return -EINVAL;
 
-       ret = allocate_fgraph_ops(tr);
+       ret = allocate_fgraph_ops(tr, tr->ops);
        if (ret) {
                kfree(tr->ops);
                return ret;
diff --git a/kernel/trace/trace_functions_graph.c 
b/kernel/trace/trace_functions_graph.c
index 9ccc904a7703..7f30652f0e97 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -288,7 +288,7 @@ static struct fgraph_ops funcgraph_ops = {
        .retfunc = &trace_graph_return,
 };
 
-int allocate_fgraph_ops(struct trace_array *tr)
+int allocate_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops)
 {
        struct fgraph_ops *gops;
 
@@ -301,6 +301,9 @@ int allocate_fgraph_ops(struct trace_array *tr)
 
        tr->gops = gops;
        gops->private = tr;
+
+       fgraph_init_ops(&gops->ops, ops);
+
        return 0;
 }
 
@@ -309,10 +312,11 @@ void free_fgraph_ops(struct trace_array *tr)
        kfree(tr->gops);
 }
 
-__init void init_array_fgraph_ops(struct trace_array *tr)
+__init void init_array_fgraph_ops(struct trace_array *tr, struct ftrace_ops 
*ops)
 {
        tr->gops = &funcgraph_ops;
        funcgraph_ops.private = tr;
+       fgraph_init_ops(&tr->gops->ops, ops);
 }
 
 static int graph_trace_init(struct trace_array *tr)
-- 
2.43.0



Reply via email to