This enables reading event group's counter values together with a
PERF_EVENT_IOC_READ_CGROUP command like we do in the regular read().
Users should give a correct size of buffer to be read which includes
the total buffer size and the cgroup id.

Acked-by: Song Liu <songliubrav...@fb.com>
Signed-off-by: Namhyung Kim <namhy...@kernel.org>
---
 kernel/events/core.c | 120 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 117 insertions(+), 3 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index bcf51c0b7855..7440857d680e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2232,13 +2232,24 @@ static void perf_add_cgrp_node_list(struct perf_event 
*event,
 {
        struct list_head *cgrp_ctx_list = this_cpu_ptr(&cgroup_ctx_list);
        struct perf_cgroup *cgrp = perf_cgroup_from_task(current, ctx);
+       struct perf_event *sibling;
        bool is_first;
 
        lockdep_assert_irqs_disabled();
        lockdep_assert_held(&ctx->lock);
 
+       /* only group leader can be added directly */
+       if (event->group_leader != event)
+               return;
+
+       if (!event_has_cgroup_node(event))
+               return;
+
        is_first = list_empty(&ctx->cgrp_node_list);
+
        list_add_tail(&event->cgrp_node_entry, &ctx->cgrp_node_list);
+       for_each_sibling_event(sibling, event)
+               list_add_tail(&sibling->cgrp_node_entry, &ctx->cgrp_node_list);
 
        if (is_first)
                list_add_tail(&ctx->cgrp_ctx_entry, cgrp_ctx_list);
@@ -2250,15 +2261,25 @@ static void perf_del_cgrp_node_list(struct perf_event 
*event,
                                    struct perf_event_context *ctx)
 {
        struct perf_cgroup *cgrp = perf_cgroup_from_task(current, ctx);
+       struct perf_event *sibling;
 
        lockdep_assert_irqs_disabled();
        lockdep_assert_held(&ctx->lock);
 
+       /* only group leader can be deleted directly */
+       if (event->group_leader != event)
+               return;
+
+       if (!event_has_cgroup_node(event))
+               return;
+
        update_cgroup_node(event, cgrp->css.cgroup);
        /* to refresh delta when it's enabled */
        event->cgrp_node_count = 0;
 
        list_del(&event->cgrp_node_entry);
+       for_each_sibling_event(sibling, event)
+               list_del(&sibling->cgrp_node_entry);
 
        if (list_empty(&ctx->cgrp_node_list))
                list_del(&ctx->cgrp_ctx_entry);
@@ -2333,7 +2354,7 @@ static int perf_event_attach_cgroup_node(struct 
perf_event *event, u64 nr_cgrps,
 
        raw_spin_unlock_irqrestore(&ctx->lock, flags);
 
-       if (is_first && enabled)
+       if (is_first && enabled && event->group_leader == event)
                event_function_call(event, perf_attach_cgroup_node, NULL);
 
        return 0;
@@ -2370,8 +2391,8 @@ static void __perf_read_cgroup_node(struct perf_event 
*event)
        }
 }
 
-static int perf_event_read_cgroup_node(struct perf_event *event, u64 read_size,
-                                      u64 cgrp_id, char __user *buf)
+static int perf_event_read_cgrp_node_one(struct perf_event *event, u64 cgrp_id,
+                                        char __user *buf)
 {
        struct perf_cgroup_node *cgrp;
        struct perf_event_context *ctx = event->ctx;
@@ -2406,6 +2427,92 @@ static int perf_event_read_cgroup_node(struct perf_event 
*event, u64 read_size,
 
        return n * sizeof(u64);
 }
+
+static int perf_event_read_cgrp_node_sibling(struct perf_event *event,
+                                            u64 read_format, u64 cgrp_id,
+                                            u64 *values)
+{
+       struct perf_cgroup_node *cgrp;
+       int n = 0;
+
+       cgrp = find_cgroup_node(event, cgrp_id);
+       if (cgrp == NULL)
+               return (read_format & PERF_FORMAT_ID) ? 2 : 1;
+
+       values[n++] = cgrp->count;
+       if (read_format & PERF_FORMAT_ID)
+               values[n++] = primary_event_id(event);
+       return n;
+}
+
+static int perf_event_read_cgrp_node_group(struct perf_event *event, u64 
cgrp_id,
+                                          char __user *buf)
+{
+       struct perf_cgroup_node *cgrp;
+       struct perf_event_context *ctx = event->ctx;
+       struct perf_event *sibling;
+       u64 read_format = event->attr.read_format;
+       unsigned long flags;
+       u64 *values;
+       int n = 1;
+       int ret;
+
+       values = kzalloc(event->read_size, GFP_KERNEL);
+       if (!values)
+               return -ENOMEM;
+
+       values[0] = 1 + event->nr_siblings;
+
+       /* update event count and times (possibly run on other cpu) */
+       (void)perf_event_read(event, true);
+
+       raw_spin_lock_irqsave(&ctx->lock, flags);
+
+       cgrp = find_cgroup_node(event, cgrp_id);
+       if (cgrp == NULL) {
+               raw_spin_unlock_irqrestore(&ctx->lock, flags);
+               kfree(values);
+               return -ENOENT;
+       }
+
+       if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+               values[n++] = cgrp->time_enabled;
+       if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+               values[n++] = cgrp->time_running;
+
+       values[n++] = cgrp->count;
+       if (read_format & PERF_FORMAT_ID)
+               values[n++] = primary_event_id(event);
+
+       for_each_sibling_event(sibling, event) {
+               n += perf_event_read_cgrp_node_sibling(sibling, read_format,
+                                                      cgrp_id, &values[n]);
+       }
+
+       raw_spin_unlock_irqrestore(&ctx->lock, flags);
+
+       ret = copy_to_user(buf, values, n * sizeof(u64));
+       kfree(values);
+       if (ret)
+               return -EFAULT;
+
+       return n * sizeof(u64);
+}
+
+static int perf_event_read_cgroup_node(struct perf_event *event, u64 read_size,
+                                      u64 cgrp_id, char __user *buf)
+{
+       u64 read_format = event->attr.read_format;
+
+       /* buf = bufsize + cgroup_id + read_buffer */
+       if (read_size < 2 * sizeof(u64) + event->read_size)
+               return -EINVAL;
+
+       if (read_format & PERF_FORMAT_GROUP)
+               return perf_event_read_cgrp_node_group(event, cgrp_id, buf);
+
+       return perf_event_read_cgrp_node_one(event, cgrp_id, buf);
+}
 #else  /* !CONFIG_CGROUP_PERF */
 static inline bool event_can_attach_cgroup(struct perf_event *event)
 {
@@ -2512,6 +2619,7 @@ static void perf_group_detach(struct perf_event *event)
                        if (sibling->state == PERF_EVENT_STATE_ACTIVE)
                                list_add_tail(&sibling->active_list, 
get_event_list(sibling));
                }
+               perf_add_cgrp_node_list(sibling, event->ctx);
 
                WARN_ON_ONCE(sibling->ctx != event->ctx);
        }
@@ -2655,6 +2763,9 @@ __perf_remove_from_context(struct perf_event *event,
                perf_group_detach(event);
        list_del_event(event, ctx);
 
+       if (event->state > PERF_EVENT_STATE_OFF)
+               perf_del_cgrp_node_list(event, ctx);
+
        if (!ctx->nr_events && ctx->is_active) {
                ctx->is_active = 0;
                ctx->rotate_necessary = 0;
@@ -3113,6 +3224,9 @@ static int  __perf_install_in_context(void *info)
                reprogram = cgroup_is_descendant(cgrp->css.cgroup,
                                        event->cgrp->css.cgroup);
        }
+
+       if (event->state > PERF_EVENT_STATE_OFF)
+               perf_add_cgrp_node_list(event, ctx);
 #endif
 
        if (reprogram) {
-- 
2.31.1.295.g9ea45b61b8-goog

Reply via email to