From: Pengfei Li <[email protected]>

Add TRACE_STACK_ID event type and integrate ftrace_stackmap into
__ftrace_trace_stack(). When the 'stackmap' trace option is enabled,
the stack recording path stores a 4-byte stack_id in the ring buffer
instead of the full stack trace.

Changes:
- New TRACE_STACK_ID in trace_type enum
- New stack_id_entry in trace_entries.h (just 'int stack_id')
- New TRACE_ITER_STACKMAP trace option flag
- Modified __ftrace_trace_stack() to call ftrace_stackmap_get_id()
  when stackmap option is active
- Added stack_id print handler in trace_output.c
- Added stackmap field to struct trace_array (per-instance support)

The stack_id event is committed unconditionally (no filter check)
since it is a synthetic side-event tied to the parent event which
was already subject to filtering.

Fallback behavior: if stackmap returns an error (pool exhausted or
resetting), the full stack trace is recorded as before.

Usage:
  echo 1 > /sys/kernel/debug/tracing/options/stackmap
  echo 1 > /sys/kernel/debug/tracing/options/stacktrace

Signed-off-by: Pengfei Li <[email protected]>
---
 kernel/trace/trace.c         | 46 ++++++++++++++++++++++++++++++++++++
 kernel/trace/trace.h         | 16 +++++++++++++
 kernel/trace/trace_entries.h | 15 ++++++++++++
 kernel/trace/trace_output.c  | 23 ++++++++++++++++++
 4 files changed, 100 insertions(+)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 6eb4d3097a4d..c72cb8491217 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -57,6 +57,7 @@
 
 #include "trace.h"
 #include "trace_output.h"
+#include "trace_stackmap.h"
 
 #ifdef CONFIG_FTRACE_STARTUP_TEST
 /*
@@ -2184,6 +2185,37 @@ void __ftrace_trace_stack(struct trace_array *tr,
        }
 #endif
 
+#ifdef CONFIG_FTRACE_STACKMAP
+       /*
+        * If stackmap dedup is enabled, try to store only the stack_id
+        * in the ring buffer instead of the full stack trace.
+        */
+       if (tr->trace_flags & TRACE_ITER_STACKMAP) {
+               struct stack_id_entry *sid_entry;
+               int sid;
+
+               sid = ftrace_stackmap_get_id(tr->stackmap, fstack->calls, 
nr_entries);
+               if (sid >= 0) {
+                       event = __trace_buffer_lock_reserve(buffer,
+                                       TRACE_STACK_ID,
+                                       sizeof(*sid_entry), trace_ctx);
+                       if (!event)
+                               goto out;
+                       sid_entry = ring_buffer_event_data(event);
+                       sid_entry->stack_id = sid;
+                       /*
+                        * stack_id is a synthetic side-event attached to a
+                        * primary trace event that was already subject to
+                        * filtering. No per-event filter is defined for
+                        * TRACE_STACK_ID, so commit unconditionally.
+                        */
+                       __buffer_unlock_commit(buffer, event);
+                       goto out;
+               }
+               /* Fall through to full stack on stackmap failure */
+       }
+#endif
+
        event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
                                    struct_size(entry, caller, nr_entries),
                                    trace_ctx);
@@ -9222,6 +9254,20 @@ static __init void tracer_init_tracefs_work_func(struct 
work_struct *work)
                        NULL, &tracing_dyn_info_fops);
 #endif
 
+#ifdef CONFIG_FTRACE_STACKMAP
+       global_trace.stackmap = ftrace_stackmap_create();
+       if (!IS_ERR(global_trace.stackmap)) {
+               trace_create_file("stack_map", TRACE_MODE_WRITE, NULL,
+                               global_trace.stackmap, &ftrace_stackmap_fops);
+               trace_create_file("stack_map_stat", TRACE_MODE_READ, NULL,
+                               global_trace.stackmap, 
&ftrace_stackmap_stat_fops);
+               trace_create_file("stack_map_bin", TRACE_MODE_READ, NULL,
+                               global_trace.stackmap, 
&ftrace_stackmap_bin_fops);
+       } else {
+               pr_warn("ftrace stackmap init failed, dedup disabled\n");
+               global_trace.stackmap = NULL;
+       }
+#endif
        create_trace_instances(NULL);
 
        update_tracer_options();
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 80fe152af1dd..74f421a89347 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -57,6 +57,7 @@ enum trace_type {
        TRACE_TIMERLAT,
        TRACE_RAW_DATA,
        TRACE_FUNC_REPEATS,
+       TRACE_STACK_ID,
 
        __TRACE_LAST_TYPE,
 };
@@ -453,6 +454,9 @@ struct trace_array {
        struct cond_snapshot    *cond_snapshot;
 #endif
        struct trace_func_repeats       __percpu *last_func_repeats;
+#ifdef CONFIG_FTRACE_STACKMAP
+       struct ftrace_stackmap          *stackmap;
+#endif
        /*
         * On boot up, the ring buffer is set to the minimum size, so that
         * we do not waste memory on systems that are not using tracing.
@@ -579,6 +583,8 @@ extern void __ftrace_bad_type(void);
                          TRACE_GRAPH_RET);             \
                IF_ASSIGN(var, ent, struct func_repeats_entry,          \
                          TRACE_FUNC_REPEATS);                          \
+               IF_ASSIGN(var, ent, struct stack_id_entry,              \
+                         TRACE_STACK_ID);                              \
                __ftrace_bad_type();                                    \
        } while (0)
 
@@ -1449,7 +1455,16 @@ extern int trace_get_user(struct trace_parser *parser, 
const char __user *ubuf,
 # define STACK_FLAGS
 #endif
 
+#ifdef CONFIG_FTRACE_STACKMAP
+# define STACKMAP_FLAGS                                \
+                       C(STACKMAP,             "stackmap"),
+#else
+# define STACKMAP_FLAGS
+# define TRACE_ITER_STACKMAP           0UL
+#endif
+
 #ifdef CONFIG_FUNCTION_PROFILER
+
 # define PROFILER_FLAGS                                        \
                C(PROF_TEXT_OFFSET,     "prof-text-offset"),
 # ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -1506,6 +1521,7 @@ extern int trace_get_user(struct trace_parser *parser, 
const char __user *ubuf,
                FUNCTION_FLAGS                                  \
                FGRAPH_FLAGS                                    \
                STACK_FLAGS                                     \
+               STACKMAP_FLAGS                                  \
                BRANCH_FLAGS                                    \
                PROFILER_FLAGS                                  \
                FPROFILE_FLAGS
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index 54417468fdeb..89ed14b7e5fd 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -250,6 +250,21 @@ FTRACE_ENTRY(user_stack, userstack_entry,
                 (void *)__entry->caller[6], (void *)__entry->caller[7])
 );
 
+/*
+ * Stack ID entry - stores only a stack_id referencing the stackmap.
+ * Used when CONFIG_FTRACE_STACKMAP is enabled to deduplicate stacks.
+ */
+FTRACE_ENTRY(stack_id, stack_id_entry,
+
+       TRACE_STACK_ID,
+
+       F_STRUCT(
+               __field(        int,            stack_id        )
+       ),
+
+       F_printk("<stack_id %d>", __entry->stack_id)
+);
+
 /*
  * trace_printk entry:
  */
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index a5ad76175d10..68678ea88159 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -1517,6 +1517,28 @@ static struct trace_event trace_user_stack_event = {
        .funcs          = &trace_user_stack_funcs,
 };
 
+/* TRACE_STACK_ID */
+static enum print_line_t trace_stack_id_print(struct trace_iterator *iter,
+                                             int flags, struct trace_event 
*event)
+{
+       struct stack_id_entry *field;
+       struct trace_seq *s = &iter->seq;
+
+       trace_assign_type(field, iter->ent);
+       trace_seq_printf(s, "<stack_id %d>\n", field->stack_id);
+
+       return trace_handle_return(s);
+}
+
+static struct trace_event_functions trace_stack_id_funcs = {
+       .trace          = trace_stack_id_print,
+};
+
+static struct trace_event trace_stack_id_event = {
+       .type           = TRACE_STACK_ID,
+       .funcs          = &trace_stack_id_funcs,
+};
+
 /* TRACE_HWLAT */
 static enum print_line_t
 trace_hwlat_print(struct trace_iterator *iter, int flags,
@@ -1908,6 +1930,7 @@ static struct trace_event *events[] __initdata = {
        &trace_wake_event,
        &trace_stack_event,
        &trace_user_stack_event,
+       &trace_stack_id_event,
        &trace_bputs_event,
        &trace_bprint_event,
        &trace_print_event,
-- 
2.34.1


Reply via email to