Add a new BPF_PROG_TYPE_TRACE_EVENT prog type that allows the eBPF program to access the fields defined for a trace event. Trace event fields are defined and available on both static and kprobes-based trace events, and so TRACE_EVENT eBPF progs can be used on both types.
The reason a new prog type is needed is that BPF_PROG_TYPE_KPROBE progs expect a pt_regs * ctx, while a TRACE_EVENT prog needs a trace rec * ctx. It would have been nice to have a probe that could do both, but existing KPROBE progs expect pt_regs * ctx. We can't change that to some more self-descriptive ctx without breaking existing eBPF programs. In any case, mixing the two different types of access in a given program probably isn't that common a thing to want to do - if you're grabbing probe params and chasing pointers in your probe, you're probably not typically interested in accessing event fields too, and vice versa. Signed-off-by: Tom Zanussi <tom.zanu...@linux.intel.com> --- include/linux/trace_events.h | 7 +++++ include/uapi/linux/bpf.h | 1 + kernel/events/core.c | 12 +++++---- kernel/trace/bpf_trace.c | 62 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 77 insertions(+), 5 deletions(-) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 70f8fc4..f7f12f3 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -15,6 +15,13 @@ struct tracer; struct dentry; struct bpf_prog; +struct trace_event_context { + struct trace_event_call *call; + void *record; +}; + +#define TRACE_EVENT_CTX_HDR_SIZE offsetof(struct trace_event_context, record) + struct trace_print_flags { unsigned long mask; const char *name; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 9ea2d22..df6a7ff 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -89,6 +89,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_KPROBE, BPF_PROG_TYPE_SCHED_CLS, BPF_PROG_TYPE_SCHED_ACT, + BPF_PROG_TYPE_TRACE_EVENT, }; #define BPF_PSEUDO_MAP_FD 1 diff --git a/kernel/events/core.c b/kernel/events/core.c index cfc227c..c366e6e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7081,15 +7081,17 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) if (event->tp_event->prog) return -EEXIST; - if (!(event->tp_event->flags & TRACE_EVENT_FL_UKPROBE)) - /* bpf programs can only be attached to u/kprobes */ - return -EINVAL; - prog = bpf_prog_get(prog_fd); if (IS_ERR(prog)) return PTR_ERR(prog); - if (prog->type != BPF_PROG_TYPE_KPROBE) { + if ((prog->type == BPF_PROG_TYPE_KPROBE) && + !(event->tp_event->flags & TRACE_EVENT_FL_UKPROBE)) + /* KPROBE bpf programs can only be attached to u/kprobes */ + return -EINVAL; + + if (prog->type != BPF_PROG_TYPE_KPROBE && + prog->type != BPF_PROG_TYPE_TRACE_EVENT) { /* valid fd, but invalid bpf program type */ bpf_prog_put(prog); return -EINVAL; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 4228fd3..78dbac0 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -326,9 +326,71 @@ static struct bpf_prog_type_list kprobe_tl = { .type = BPF_PROG_TYPE_KPROBE, }; +static const struct bpf_func_proto * +trace_event_prog_func_proto(enum bpf_func_id func_id) +{ + switch (func_id) { + case BPF_FUNC_map_lookup_elem: + return &bpf_map_lookup_elem_proto; + case BPF_FUNC_map_update_elem: + return &bpf_map_update_elem_proto; + case BPF_FUNC_map_delete_elem: + return &bpf_map_delete_elem_proto; + case BPF_FUNC_probe_read: + return &bpf_probe_read_proto; + case BPF_FUNC_ktime_get_ns: + return &bpf_ktime_get_ns_proto; + case BPF_FUNC_tail_call: + return &bpf_tail_call_proto; + case BPF_FUNC_get_current_pid_tgid: + return &bpf_get_current_pid_tgid_proto; + case BPF_FUNC_get_current_uid_gid: + return &bpf_get_current_uid_gid_proto; + case BPF_FUNC_get_current_comm: + return &bpf_get_current_comm_proto; + case BPF_FUNC_trace_printk: + return bpf_get_trace_printk_proto(); + case BPF_FUNC_get_smp_processor_id: + return &bpf_get_smp_processor_id_proto; + case BPF_FUNC_perf_event_read: + return &bpf_perf_event_read_proto; + case BPF_FUNC_perf_event_output: + return &bpf_perf_event_output_proto; + default: + return NULL; + } +} + +/* trace_event programs can access fields of trace event in rec */ +static bool trace_event_prog_is_valid_access(int off, int size, + enum bpf_access_type type) +{ + /* check bounds */ + if (off < 0 || off >= TRACE_EVENT_CTX_HDR_SIZE + BUF_MAX_DATA_SIZE) + return false; + + /* only read is allowed */ + if (type != BPF_READ) + return false; + + return true; +} + +static const struct bpf_verifier_ops trace_event_prog_ops = { + .get_func_proto = trace_event_prog_func_proto, + .is_valid_access = trace_event_prog_is_valid_access, +}; + +static struct bpf_prog_type_list trace_event_tl = { + .ops = &trace_event_prog_ops, + .type = BPF_PROG_TYPE_TRACE_EVENT, +}; + static int __init register_kprobe_prog_ops(void) { bpf_register_prog_type(&kprobe_tl); + bpf_register_prog_type(&trace_event_tl); + return 0; } late_initcall(register_kprobe_prog_ops); -- 1.9.3