When multiple syscall events are specified in the kernel command line
(e.g., trace_event=syscalls:sys_enter_openat,syscalls:sys_enter_close),
they are often not captured after boot, even though they appear enabled
in the tracing/set_event file.

The issue stems from how syscall events are initialized. Syscall
tracepoints require the global reference count (sys_tracepoint_refcount)
to transition from 0 to 1 to trigger the registration of the syscall
work (TIF_SYSCALL_TRACEPOINT) for tasks, including the init process (pid 1).

The current implementation of early_enable_events() with disable_first=true
used an interleaved sequence of "Disable A -> Enable A -> Disable B -> Enable 
B".
If multiple syscalls are enabled, the refcount never drops to zero,
preventing the 0->1 transition that triggers actual registration.

Fix this by splitting early_enable_events() into two distinct phases:
1. Disable all events specified in the buffer.
2. Enable all events specified in the buffer.

This ensures the refcount hits zero before re-enabling, allowing syscall
events to be properly activated during early boot.

The code is also refactored to use a helper function to avoid logic
duplication between the disable and enable phases.

Fixes: ce1039bd3a89 ("tracing: Fix enabling of syscall events on the command 
line")
Signed-off-by: Huiwen He <[email protected]>
---
 kernel/trace/trace_events.c | 52 ++++++++++++++++++++++++++-----------
 1 file changed, 37 insertions(+), 15 deletions(-)

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 137b4d9bb116..879ed8b0cc78 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -4514,26 +4514,22 @@ static __init int event_trace_memsetup(void)
        return 0;
 }
 
-__init void
-early_enable_events(struct trace_array *tr, char *buf, bool disable_first)
+/*
+ * Helper function to enable or disable a comma-separated list of events
+ * from the bootup buffer.
+ */
+static __init void __early_set_events(struct trace_array *tr, char *buf, bool 
enable)
 {
        char *token;
-       int ret;
-
-       while (true) {
-               token = strsep(&buf, ",");
-
-               if (!token)
-                       break;
 
+       while ((token = strsep(&buf, ","))) {
                if (*token) {
-                       /* Restarting syscalls requires that we stop them first 
*/
-                       if (disable_first)
+                       if (enable) {
+                               if (ftrace_set_clr_event(tr, token, 1))
+                                       pr_warn("Failed to enable trace event: 
%s\n", token);
+                       } else {
                                ftrace_set_clr_event(tr, token, 0);
-
-                       ret = ftrace_set_clr_event(tr, token, 1);
-                       if (ret)
-                               pr_warn("Failed to enable trace event: %s\n", 
token);
+                       }
                }
 
                /* Put back the comma to allow this to be called again */
@@ -4542,6 +4538,32 @@ early_enable_events(struct trace_array *tr, char *buf, 
bool disable_first)
        }
 }
 
+/**
+ * early_enable_events - enable events from the bootup buffer
+ * @tr: The trace array to enable the events in
+ * @buf: The buffer containing the comma separated list of events
+ * @disable_first: If true, disable all events in @buf before enabling them
+ *
+ * This function enables events from the bootup buffer. If @disable_first
+ * is true, it will first disable all events in the buffer before enabling
+ * them.
+ *
+ * For syscall events, which rely on a global refcount to register the
+ * SYSCALL_WORK_SYSCALL_TRACEPOINT flag (especially for pid 1), we must
+ * ensure the refcount hits zero before re-enabling them. A simple
+ * "disable then enable" per-event is not enough if multiple syscalls are
+ * used, as the refcount will stay above zero. Thus, we need a two-phase
+ * approach: disable all, then enable all.
+ */
+__init void
+early_enable_events(struct trace_array *tr, char *buf, bool disable_first)
+{
+       if (disable_first)
+               __early_set_events(tr, buf, false);
+
+       __early_set_events(tr, buf, true);
+}
+
 static __init int event_trace_enable(void)
 {
        struct trace_array *tr = top_trace_array();
-- 
2.43.0


Reply via email to