Add a new BPF_PROG_TYPE_TRACE_EVENT prog type that allows the eBPF
program to access the fields defined for a trace event.  Trace event
fields are defined and available on both static and kprobes-based
trace events, and so TRACE_EVENT eBPF progs can be used on both types.

The reason a new prog type is needed is that BPF_PROG_TYPE_KPROBE
progs expect a pt_regs * ctx, while a TRACE_EVENT prog needs a trace
rec * ctx.  It would have been nice to have a probe that could do
both, but existing KPROBE progs expect pt_regs * ctx.  We can't change
that to some more self-descriptive ctx without breaking existing eBPF
programs.

In any case, mixing the two different types of access in a given
program probably isn't that common a thing to want to do - if you're
grabbing probe params and chasing pointers in your probe, you're
probably not typically interested in accessing event fields too, and
vice versa.

Signed-off-by: Tom Zanussi <tom.zanu...@linux.intel.com>
---
 include/linux/trace_events.h |  7 +++++
 include/uapi/linux/bpf.h     |  1 +
 kernel/events/core.c         | 12 +++++----
 kernel/trace/bpf_trace.c     | 62 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 77 insertions(+), 5 deletions(-)

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 70f8fc4..f7f12f3 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -15,6 +15,13 @@ struct tracer;
 struct dentry;
 struct bpf_prog;
 
+struct trace_event_context {
+       struct trace_event_call *call;
+       void *record;
+};
+
+#define TRACE_EVENT_CTX_HDR_SIZE offsetof(struct trace_event_context, record)
+
 struct trace_print_flags {
        unsigned long           mask;
        const char              *name;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 9ea2d22..df6a7ff 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -89,6 +89,7 @@ enum bpf_prog_type {
        BPF_PROG_TYPE_KPROBE,
        BPF_PROG_TYPE_SCHED_CLS,
        BPF_PROG_TYPE_SCHED_ACT,
+       BPF_PROG_TYPE_TRACE_EVENT,
 };
 
 #define BPF_PSEUDO_MAP_FD      1
diff --git a/kernel/events/core.c b/kernel/events/core.c
index cfc227c..c366e6e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7081,15 +7081,17 @@ static int perf_event_set_bpf_prog(struct perf_event 
*event, u32 prog_fd)
        if (event->tp_event->prog)
                return -EEXIST;
 
-       if (!(event->tp_event->flags & TRACE_EVENT_FL_UKPROBE))
-               /* bpf programs can only be attached to u/kprobes */
-               return -EINVAL;
-
        prog = bpf_prog_get(prog_fd);
        if (IS_ERR(prog))
                return PTR_ERR(prog);
 
-       if (prog->type != BPF_PROG_TYPE_KPROBE) {
+       if ((prog->type == BPF_PROG_TYPE_KPROBE) &&
+           !(event->tp_event->flags & TRACE_EVENT_FL_UKPROBE))
+               /* KPROBE bpf programs can only be attached to u/kprobes */
+               return -EINVAL;
+
+       if (prog->type != BPF_PROG_TYPE_KPROBE &&
+           prog->type != BPF_PROG_TYPE_TRACE_EVENT) {
                /* valid fd, but invalid bpf program type */
                bpf_prog_put(prog);
                return -EINVAL;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 4228fd3..78dbac0 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -326,9 +326,71 @@ static struct bpf_prog_type_list kprobe_tl = {
        .type   = BPF_PROG_TYPE_KPROBE,
 };
 
+static const struct bpf_func_proto *
+trace_event_prog_func_proto(enum bpf_func_id func_id)
+{
+       switch (func_id) {
+       case BPF_FUNC_map_lookup_elem:
+               return &bpf_map_lookup_elem_proto;
+       case BPF_FUNC_map_update_elem:
+               return &bpf_map_update_elem_proto;
+       case BPF_FUNC_map_delete_elem:
+               return &bpf_map_delete_elem_proto;
+       case BPF_FUNC_probe_read:
+               return &bpf_probe_read_proto;
+       case BPF_FUNC_ktime_get_ns:
+               return &bpf_ktime_get_ns_proto;
+       case BPF_FUNC_tail_call:
+               return &bpf_tail_call_proto;
+       case BPF_FUNC_get_current_pid_tgid:
+               return &bpf_get_current_pid_tgid_proto;
+       case BPF_FUNC_get_current_uid_gid:
+               return &bpf_get_current_uid_gid_proto;
+       case BPF_FUNC_get_current_comm:
+               return &bpf_get_current_comm_proto;
+       case BPF_FUNC_trace_printk:
+               return bpf_get_trace_printk_proto();
+       case BPF_FUNC_get_smp_processor_id:
+               return &bpf_get_smp_processor_id_proto;
+       case BPF_FUNC_perf_event_read:
+               return &bpf_perf_event_read_proto;
+       case BPF_FUNC_perf_event_output:
+               return &bpf_perf_event_output_proto;
+       default:
+               return NULL;
+       }
+}
+
+/* trace_event programs can access fields of trace event in rec */
+static bool trace_event_prog_is_valid_access(int off, int size,
+                                            enum bpf_access_type type)
+{
+       /* check bounds */
+       if (off < 0 || off >= TRACE_EVENT_CTX_HDR_SIZE + BUF_MAX_DATA_SIZE)
+               return false;
+
+       /* only read is allowed */
+       if (type != BPF_READ)
+               return false;
+
+       return true;
+}
+
+static const struct bpf_verifier_ops trace_event_prog_ops = {
+       .get_func_proto  = trace_event_prog_func_proto,
+       .is_valid_access = trace_event_prog_is_valid_access,
+};
+
+static struct bpf_prog_type_list trace_event_tl = {
+       .ops    = &trace_event_prog_ops,
+       .type   = BPF_PROG_TYPE_TRACE_EVENT,
+};
+
 static int __init register_kprobe_prog_ops(void)
 {
        bpf_register_prog_type(&kprobe_tl);
+       bpf_register_prog_type(&trace_event_tl);
+
        return 0;
 }
 late_initcall(register_kprobe_prog_ops);
-- 
1.9.3

Reply via email to