Allow unprivileged users to trace their own processes' syscalls using perf trace, similar to strace without the intrusive overhead of ptrace().
Currently, perf trace requires CAP_PERFMON or paranoid level ≤ 1 even though the kernel has existing infrastructure (TRACE_EVENT_FL_CAP_ANY) specifically designed to mark syscall tracepoints as safe for unprivileged access. To fix this: 1. Loosen the condition in perf_event_open() which requires priviliges for all events with exclude_kernel=0. This allows perf_event_open() to bypass the paranoid check for task-attached tracepoint events. 2. Make the format and id tracefs files world-readable only for tracepoints with TRACE_EVENT_FL_CAP_ANY, allowing unprivileged users to see syscall tracepoint ids without exposing sensitive information. Example usage after this change: $ perf trace ls # works as unprivileged user $ perf trace # system-wide, still requires privileges $ perf trace -p 1234 # requires ptrace permission on pid 1234 Assisted-by: Claude:claude-sonnet-4.5 Signed-off-by: Anubhav Shelat <[email protected]> --- kernel/events/core.c | 2 +- kernel/trace/trace_events.c | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 89b40e439717..71d99ea4bea4 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -13833,7 +13833,7 @@ SYSCALL_DEFINE5(perf_event_open, if (err) return err; - if (!attr.exclude_kernel) { + if (!attr.exclude_kernel && !(attr.type == PERF_TYPE_TRACEPOINT && pid != -1)) { err = perf_allow_kernel(); if (err) return err; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 249d1cba72c0..6250b2529376 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -3051,7 +3051,9 @@ static int event_callback(const char *name, umode_t *mode, void **data, struct trace_event_call *call = file->event_call; if (strcmp(name, "format") == 0) { - *mode = TRACE_MODE_READ; + *mode = (call->flags & TRACE_EVENT_FL_CAP_ANY) ? + (TRACE_MODE_READ | 0004) : + TRACE_MODE_READ; *fops = &ftrace_event_format_fops; return 1; } @@ -3087,7 +3089,9 @@ static int event_callback(const char *name, umode_t *mode, void **data, #ifdef CONFIG_PERF_EVENTS if (call->event.type && call->class->reg && strcmp(name, "id") == 0) { - *mode = TRACE_MODE_READ; + *mode = (call->flags & TRACE_EVENT_FL_CAP_ANY) ? + (TRACE_MODE_READ | 0004) : + TRACE_MODE_READ; *data = (void *)(long)call->event.type; *fops = &ftrace_event_id_fops; return 1; -- 2.53.0
