> On Mar 23, 2021, at 9:21 AM, Namhyung Kim <namhy...@kernel.org> wrote:
>
> This enables reading event group's counter values together with a
> PERF_EVENT_IOC_READ_CGROUP command like we do in the regular read().
> Users should give a correct size of buffer to be read.
>
> Signed-off-by: Namhyung Kim <namhy...@kernel.org>
> ---
> kernel/events/core.c | 119 +++++++++++++++++++++++++++++++++++++++++--
> 1 file changed, 116 insertions(+), 3 deletions(-)
>
[...]
> +}
> +
> +static int perf_event_read_cgrp_node_group(struct perf_event *event, u64
> cgrp_id,
> + char __user *buf)
> +{
> + struct perf_cgroup_node *cgrp;
> + struct perf_event_context *ctx = event->ctx;
> + struct perf_event *sibling;
> + u64 read_format = event->attr.read_format;
> + unsigned long flags;
> + u64 *values;
> + int n = 1;
> + int ret;
> +
> + values = kzalloc(event->read_size, GFP_KERNEL);
> + if (!values)
> + return -ENOMEM;
> +
> + values[0] = 1 + event->nr_siblings;
> +
> + /* update event count and times (possibly run on other cpu) */
> + (void)perf_event_read(event, true);
> +
> + raw_spin_lock_irqsave(&ctx->lock, flags);
> +
> + cgrp = find_cgroup_node(event, cgrp_id);
> + if (cgrp == NULL) {
> + raw_spin_unlock_irqrestore(&ctx->lock, flags);
> + kfree(values);
> + return -ENOENT;
> + }
> +
> + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
> + values[n++] = cgrp->time_enabled;
> + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
> + values[n++] = cgrp->time_running;
> +
> + values[n++] = cgrp->count;
> + if (read_format & PERF_FORMAT_ID)
> + values[n++] = primary_event_id(event);
> +
> + for_each_sibling_event(sibling, event) {
> + n += perf_event_read_cgrp_node_sibling(sibling, read_format,
> + cgrp_id, &values[n]);
> + }
> +
> + raw_spin_unlock_irqrestore(&ctx->lock, flags);
> +
> + ret = copy_to_user(buf, values, n * sizeof(u64));
> + kfree(values);
> + if (ret)
> + return -EFAULT;
> +
> + return n * sizeof(u64);
> +}
> +
> +static int perf_event_read_cgroup_node(struct perf_event *event, u64
> read_size,
> + u64 cgrp_id, char __user *buf)
> +{
> + u64 read_format = event->attr.read_format;
> +
> + if (read_size < event->read_size + 2 * sizeof(u64))
Why do we need read_size + 2 u64 here?
Thanks,
Song
[...]