Re: [PATCH v4 3/4] bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU conuter

Daniel Borkmann Wed, 29 Jul 2015 16:53:05 -0700

On 07/28/2015 01:17 PM, Kaixu Xia wrote:

According to the perf_event_map_fd and index, the function
bpf_perf_event_read() can convert the corresponding map
value to the pointer to struct perf_event and return the
Hardware PMU counter value.


Signed-off-by: Kaixu Xia <[email protected]>
---
  include/linux/bpf.h        |  1 +
  include/linux/perf_event.h |  3 ++-
  include/uapi/linux/bpf.h   |  1 +
  kernel/bpf/helpers.c       | 36 ++++++++++++++++++++++++++++++++++++
  kernel/events/core.c       |  4 ++--
  kernel/trace/bpf_trace.c   |  2 ++
  6 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 3c9c0eb..516992c 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -190,6 +190,7 @@ extern const struct bpf_func_proto 
bpf_map_lookup_elem_proto;
  extern const struct bpf_func_proto bpf_map_update_elem_proto;
  extern const struct bpf_func_proto bpf_map_delete_elem_proto;

+extern const struct bpf_func_proto bpf_perf_event_read_proto;
  extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
  extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
  extern const struct bpf_func_proto bpf_tail_call_proto;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2ea4067..899abcb 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -662,7 +662,8 @@ extern void perf_pmu_migrate_context(struct pmu *pmu,
                                int src_cpu, int dst_cpu);
  extern u64 perf_event_read_value(struct perf_event *event,
                                 u64 *enabled, u64 *running);
-
+extern void __perf_event_read(void *info);
+extern u64 perf_event_count(struct perf_event *event);

  struct perf_sample_data {
        /*
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 69a1f6b..b9b13ce 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -250,6 +250,7 @@ enum bpf_func_id {
         * Return: 0 on success
         */
        BPF_FUNC_get_current_comm,
+       BPF_FUNC_perf_event_read,       /* u64 bpf_perf_event_read(&map, index) 
*/
        __BPF_FUNC_MAX_ID,
  };

diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 1447ec0..c40c5ea 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -182,3 +182,39 @@ const struct bpf_func_proto bpf_get_current_comm_proto = {
        .arg1_type      = ARG_PTR_TO_STACK,
        .arg2_type      = ARG_CONST_STACK_SIZE,
  };
+
+static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5)
+{
+       struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
+       struct bpf_array *array = container_of(map, struct bpf_array, map);
+       struct perf_event *event;
+
+       if (index >= array->map.max_entries)
+               return -E2BIG;


Maybe unlikely(...), likewise below.

+       event = (struct perf_event *)array->ptrs[index];
+       if (!event)
+               return -ENOENT;
+
+       if (event->state != PERF_EVENT_STATE_ACTIVE)
+               return -EINVAL;
+
+       if (event->oncpu != raw_smp_processor_id() &&
+           event->ctx->task != current)
+               return -EINVAL;
+
+       if (event->attr.inherit)
+               return -EINVAL;
+
+       __perf_event_read(event);
+
+       return perf_event_count(event);


I believe this helper should rather go somewhere such as bpf_trace.c
(or under kernel/events/ ?), wouldn't we otherwise get a build error
when perf events are compiled out?

Anyway, I let perf folks comment on that (and the helper in general).

+}
+
+const struct bpf_func_proto bpf_perf_event_read_proto = {
+       .func           = bpf_perf_event_read,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_CONST_MAP_PTR,
+       .arg2_type      = ARG_ANYTHING,
+};
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 58f0d47..c926c6d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3177,7 +3177,7 @@ void perf_event_exec(void)
  /*
   * Cross CPU call to read the hardware event
   */
-static void __perf_event_read(void *info)
+void __perf_event_read(void *info)


Does this need to be declared in a header file, no?

  {
        struct perf_event *event = info;
        struct perf_event_context *ctx = event->ctx;
@@ -3204,7 +3204,7 @@ static void __perf_event_read(void *info)
        raw_spin_unlock(&ctx->lock);
  }

-static inline u64 perf_event_count(struct perf_event *event)
+u64 perf_event_count(struct perf_event *event)


Likewise? Should the inlining be preserved?

  {
        if (event->pmu->count)
                return event->pmu->count(event);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 88a041a..9cf094f 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -183,6 +183,8 @@ static const struct bpf_func_proto 
*kprobe_prog_func_proto(enum bpf_func_id func


Btw, I'm wondering if the perf event map portions should actually better go
here into bpf_trace.c ...

                return bpf_get_trace_printk_proto();
        case BPF_FUNC_get_smp_processor_id:
                return &bpf_get_smp_processor_id_proto;
+       case BPF_FUNC_perf_event_read:
+               return &bpf_perf_event_read_proto;
        default:
                return NULL;
        }


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v4 3/4] bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU conuter

Reply via email to