core: add mux switch to skip to the current CPU's events list on mux interrupt

Alexander Shishkin Wed, 23 Aug 2017 05:03:52 -0700

Alexey Budankov <alexey.budan...@linux.intel.com> writes:

> This patch implements mux switch that triggers skipping to the 
> current CPU's events list at mulitplexing hrtimer interrupt 
> handler as well as adoption of the switch in the existing 
> implementation.
>
> perf_event_groups_iterate_cpu() API is introduced to implement 
> iteration thru the certain CPU groups list skipping groups


"through"

> allocated for the other CPUs.
>
> Signed-off-by: Alexey Budankov <alexey.budan...@linux.intel.com>
> ---
>  kernel/events/core.c | 193 
> ++++++++++++++++++++++++++++++++++++---------------
>  1 file changed, 137 insertions(+), 56 deletions(-)
>
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 08ccfb2..aeb0f81 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -556,11 +556,11 @@ void perf_sample_event_took(u64 sample_len_ns)
>  static atomic64_t perf_event_id;
>  
>  static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
> -                           enum event_type_t event_type);
> +                           enum event_type_t event_type, int mux);
>  
>  static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
>                            enum event_type_t event_type,
> -                          struct task_struct *task);
> +                          struct task_struct *task, int mux);
>  
>  static void update_context_time(struct perf_event_context *ctx);
>  static u64 perf_event_time(struct perf_event *event);
> @@ -702,6 +702,7 @@ static void perf_cgroup_switch(struct task_struct *task, 
> int mode)
>       struct perf_cpu_context *cpuctx;
>       struct list_head *list;
>       unsigned long flags;
> +     int mux = 0;
>  
>       /*
>        * Disable interrupts and preemption to avoid this CPU's
> @@ -717,7 +718,7 @@ static void perf_cgroup_switch(struct task_struct *task, 
> int mode)
>               perf_pmu_disable(cpuctx->ctx.pmu);
>  
>               if (mode & PERF_CGROUP_SWOUT) {
> -                     cpu_ctx_sched_out(cpuctx, EVENT_ALL);
> +                     cpu_ctx_sched_out(cpuctx, EVENT_ALL, mux);
>                       /*
>                        * must not be done before ctxswout due
>                        * to event_filter_match() in event_sched_out()
> @@ -736,7 +737,7 @@ static void perf_cgroup_switch(struct task_struct *task, 
> int mode)
>                        */
>                       cpuctx->cgrp = perf_cgroup_from_task(task,
>                                                            &cpuctx->ctx);
> -                     cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);
> +                     cpu_ctx_sched_in(cpuctx, EVENT_ALL, task, mux);

'mux' is always zero in this function, isn't it?

>               }
>               perf_pmu_enable(cpuctx->ctx.pmu);
>               perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
> @@ -1613,8 +1614,16 @@ perf_event_groups_rotate(struct rb_root *groups, int 
> cpu)
>   */
>  #define perf_event_groups_for_each(event, iter, tree, node, list, link) \
>          for (iter = rb_first(tree); iter; iter = rb_next(iter))      \
> -             list_for_each_entry(event, &(rb_entry(iter,             \
> -                     typeof(*event), node)->list), link)
> +             list_for_each_entry(event, &(rb_entry(iter,             \
> +                             typeof(*event), node)->list), link)

Is this an indentation change? What is it doing here?

> +
> +/*
> + * Iterate event groups related to specific cpu.
> + */
> +#define perf_event_groups_for_each_cpu(event, cpu, tree, list, link) \
> +             list = perf_event_groups_get_list(tree, cpu);           \
> +             if (list)                                               \
> +                     list_for_each_entry(event, list, link)

..or not, if there's no list.

>  
>  /*
>   * Add a event from the lists for its context.
> @@ -2397,36 +2406,38 @@ static void add_event_to_ctx(struct perf_event *event,
>  
>  static void ctx_sched_out(struct perf_event_context *ctx,
>                         struct perf_cpu_context *cpuctx,
> -                       enum event_type_t event_type);
> +                       enum event_type_t event_type, int mux);
>  static void
>  ctx_sched_in(struct perf_event_context *ctx,
>            struct perf_cpu_context *cpuctx,
>            enum event_type_t event_type,
> -          struct task_struct *task);
> +          struct task_struct *task, int mux);
>  
>  static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
>                              struct perf_event_context *ctx,
>                              enum event_type_t event_type)
>  {
> +     int mux = 0;
> +
>       if (!cpuctx->task_ctx)
>               return;
>  
>       if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
>               return;
>  
> -     ctx_sched_out(ctx, cpuctx, event_type);
> +     ctx_sched_out(ctx, cpuctx, event_type, mux);

Just use 0.

>  }
>  
>  static void perf_event_sched_in(struct perf_cpu_context *cpuctx,
>                               struct perf_event_context *ctx,
> -                             struct task_struct *task)
> +                             struct task_struct *task, int mux)
>  {
> -     cpu_ctx_sched_in(cpuctx, EVENT_PINNED, task);
> +     cpu_ctx_sched_in(cpuctx, EVENT_PINNED, task, mux);
>       if (ctx)
> -             ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task);
> -     cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task);
> +             ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task, mux);
> +     cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task, mux);
>       if (ctx)
> -             ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task);
> +             ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task, mux);
>  }
>  
>  /*
> @@ -2450,6 +2461,7 @@ static void ctx_resched(struct perf_cpu_context *cpuctx,
>  {
>       enum event_type_t ctx_event_type = event_type & EVENT_ALL;
>       bool cpu_event = !!(event_type & EVENT_CPU);
> +     int mux = 0;
>  
>       /*
>        * If pinned groups are involved, flexible groups also need to be
> @@ -2470,11 +2482,11 @@ static void ctx_resched(struct perf_cpu_context 
> *cpuctx,
>        *  - otherwise, do nothing more.
>        */
>       if (cpu_event)
> -             cpu_ctx_sched_out(cpuctx, ctx_event_type);
> +             cpu_ctx_sched_out(cpuctx, ctx_event_type, mux);
>       else if (ctx_event_type & EVENT_PINNED)
> -             cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
> +             cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE, mux);
>  
> -     perf_event_sched_in(cpuctx, task_ctx, current);
> +     perf_event_sched_in(cpuctx, task_ctx, current, mux);

Also mux==0 in all cases in this function.

>       perf_pmu_enable(cpuctx->ctx.pmu);
>  }
>  
> @@ -2491,7 +2503,7 @@ static int  __perf_install_in_context(void *info)
>       struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
>       struct perf_event_context *task_ctx = cpuctx->task_ctx;
>       bool reprogram = true;
> -     int ret = 0;
> +     int ret = 0, mux =0;
>  
>       raw_spin_lock(&cpuctx->ctx.lock);
>       if (ctx->task) {
> @@ -2518,7 +2530,7 @@ static int  __perf_install_in_context(void *info)
>       }
>  
>       if (reprogram) {
> -             ctx_sched_out(ctx, cpuctx, EVENT_TIME);
> +             ctx_sched_out(ctx, cpuctx, EVENT_TIME, mux);
>               add_event_to_ctx(event, ctx);
>               ctx_resched(cpuctx, task_ctx, get_event_type(event));
>       } else {
> @@ -2655,13 +2667,14 @@ static void __perf_event_enable(struct perf_event 
> *event,
>  {
>       struct perf_event *leader = event->group_leader;
>       struct perf_event_context *task_ctx;
> +     int mux = 0;
>  
>       if (event->state >= PERF_EVENT_STATE_INACTIVE ||
>           event->state <= PERF_EVENT_STATE_ERROR)
>               return;
>  
>       if (ctx->is_active)
> -             ctx_sched_out(ctx, cpuctx, EVENT_TIME);
> +             ctx_sched_out(ctx, cpuctx, EVENT_TIME, mux);
>  
>       __perf_event_mark_enabled(event);
>  
> @@ -2671,7 +2684,7 @@ static void __perf_event_enable(struct perf_event 
> *event,
>       if (!event_filter_match(event)) {
>               if (is_cgroup_event(event))
>                       perf_cgroup_defer_enabled(event);
> -             ctx_sched_in(ctx, cpuctx, EVENT_TIME, current);
> +             ctx_sched_in(ctx, cpuctx, EVENT_TIME, current, mux);
>               return;
>       }
>  
> @@ -2680,7 +2693,7 @@ static void __perf_event_enable(struct perf_event 
> *event,
>        * then don't put it on unless the group is on.
>        */
>       if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE) {
> -             ctx_sched_in(ctx, cpuctx, EVENT_TIME, current);
> +             ctx_sched_in(ctx, cpuctx, EVENT_TIME, current, mux);

And here.

>               return;
>       }
>  
> @@ -2876,11 +2889,13 @@ EXPORT_SYMBOL_GPL(perf_event_refresh);
>  
>  static void ctx_sched_out(struct perf_event_context *ctx,
>                         struct perf_cpu_context *cpuctx,
> -                       enum event_type_t event_type)
> +                       enum event_type_t event_type, int mux)
>  {
>       int is_active = ctx->is_active;
> +     struct list_head *group_list;
>       struct perf_event *event;
>       struct rb_node *node;
> +     int sw = -1, cpu = smp_processor_id();

Same thing seems to be happening with 'sw'.

>       lockdep_assert_held(&ctx->lock);
>  
>       if (likely(!ctx->nr_events)) {
> @@ -2926,17 +2941,47 @@ static void ctx_sched_out(struct perf_event_context 
> *ctx,
>  
>       perf_pmu_disable(ctx->pmu);
>  
> -     if (is_active & EVENT_PINNED)
> -             perf_event_groups_for_each(event, node,
> -                             &ctx->pinned_groups, group_node,
> -                             group_list, group_entry)
> -                     group_sched_out(event, cpuctx, ctx);
> +     if (is_active & EVENT_PINNED) {
> +             if (mux) {

So it's 'rotate', really.

> +                     perf_event_groups_for_each_cpu(event, cpu,
> +                                     &ctx->pinned_groups,
> +                                     group_list, group_entry) {
> +                                     group_sched_out(event, cpuctx, ctx);
> +                     }
> +                     perf_event_groups_for_each_cpu(event, sw,
> +                                     &ctx->pinned_groups,
> +                                     group_list, group_entry) {
> +                                     group_sched_out(event, cpuctx, ctx);
> +                     }
> +             } else {
> +                     perf_event_groups_for_each(event, node,
> +                                     &ctx->pinned_groups, group_node,
> +                                     group_list, group_entry) {
> +                                     group_sched_out(event, cpuctx, ctx);
> +                     }
> +             }
> +     }
>  
> -     if (is_active & EVENT_FLEXIBLE)
> -             perf_event_groups_for_each(event, node,
> -                             &ctx->flexible_groups, group_node,
> -                             group_list, group_entry)
> -                     group_sched_out(event, cpuctx, ctx);
> +     if (is_active & EVENT_FLEXIBLE) {
> +             if (mux) {
> +                     perf_event_groups_for_each_cpu(event, cpu,
> +                                     &ctx->flexible_groups,
> +                                     group_list, group_entry) {
> +                                     group_sched_out(event, cpuctx, ctx);
> +                     }
> +                     perf_event_groups_for_each_cpu(event, sw,
> +                                     &ctx->flexible_groups,
> +                                     group_list, group_entry) {
> +                                     group_sched_out(event, cpuctx, ctx);
> +                     }
> +             } else {
> +                     perf_event_groups_for_each(event, node,
> +                                     &ctx->flexible_groups, group_node,
> +                                     group_list, group_entry) {
> +                                     group_sched_out(event, cpuctx, ctx);
> +                     }
> +             }
> +     }
>  
>       perf_pmu_enable(ctx->pmu);
>  }
> @@ -3225,9 +3270,9 @@ void __perf_event_task_sched_out(struct task_struct 
> *task,
>   * Called with IRQs disabled
>   */
>  static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
> -                           enum event_type_t event_type)
> +                           enum event_type_t event_type, int mux)
>  {
> -     ctx_sched_out(&cpuctx->ctx, cpuctx, event_type);
> +     ctx_sched_out(&cpuctx->ctx, cpuctx, event_type, mux);
>  }
>  
>  static void
> @@ -3287,11 +3332,13 @@ static void
>  ctx_sched_in(struct perf_event_context *ctx,
>            struct perf_cpu_context *cpuctx,
>            enum event_type_t event_type,
> -          struct task_struct *task)
> +          struct task_struct *task, int mux)
>  {
>       int is_active = ctx->is_active;
> +     struct list_head *group_list;
>       struct perf_event *event;
>       struct rb_node *node;
> +     int sw = -1, cpu = smp_processor_id();
>  
>       lockdep_assert_held(&ctx->lock);
>  
> @@ -3319,35 +3366,69 @@ ctx_sched_in(struct perf_event_context *ctx,
>        * First go through the list and put on any pinned groups
>        * in order to give them the best chance of going on.
>        */
> -     if (is_active & EVENT_PINNED)
> -             perf_event_groups_for_each(event, node,
> -                             &ctx->pinned_groups, group_node,
> -                             group_list, group_entry)
> -                     ctx_pinned_sched_in(event, cpuctx, ctx);
> +     if (is_active & EVENT_PINNED) {
> +             if (mux) {
> +                     perf_event_groups_for_each_cpu(event, sw,
> +                                     &ctx->pinned_groups,
> +                                     group_list, group_entry) {
> +                                     ctx_pinned_sched_in(event, cpuctx, ctx);
> +                     }
> +                     perf_event_groups_for_each_cpu(event, cpu,
> +                                     &ctx->pinned_groups,
> +                                     group_list, group_entry) {
> +                                     ctx_pinned_sched_in(event, cpuctx, ctx);
> +                     }
> +             } else {
> +                     perf_event_groups_for_each(event, node,
> +                                     &ctx->pinned_groups, group_node,
> +                                     group_list, group_entry) {
> +                                     ctx_pinned_sched_in(event, cpuctx, ctx);
> +                     }
> +             }
> +     }
>  
>       /* Then walk through the lower prio flexible groups */
>       if (is_active & EVENT_FLEXIBLE) {
>               int can_add_hw = 1;
> -             perf_event_groups_for_each(event, node,
> -                             &ctx->flexible_groups, group_node,
> -                             group_list, group_entry)
> -                     ctx_flexible_sched_in(event, cpuctx, ctx, &can_add_hw);
> +             if (mux) {
> +                     perf_event_groups_for_each_cpu(event, sw,
> +                                     &ctx->flexible_groups,
> +                                     group_list, group_entry) {
> +                                     ctx_flexible_sched_in(event, cpuctx,
> +                                                     ctx, &can_add_hw);
> +                     }
> +                     can_add_hw = 1;
> +                     perf_event_groups_for_each_cpu(event, cpu,
> +                                     &ctx->flexible_groups,
> +                                     group_list, group_entry) {
> +                                     ctx_flexible_sched_in(event, cpuctx,
> +                                                     ctx, &can_add_hw);
> +                     }
> +             } else {
> +                     perf_event_groups_for_each(event, node,
> +                                     &ctx->flexible_groups, group_node,
> +                                     group_list, group_entry) {
> +                                     ctx_flexible_sched_in(event, cpuctx,
> +                                                     ctx, &can_add_hw);
> +                     }
> +             }
>       }
>  }
>  
>  static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
>                            enum event_type_t event_type,
> -                          struct task_struct *task)
> +                          struct task_struct *task, int mux)
>  {
>       struct perf_event_context *ctx = &cpuctx->ctx;
>  
> -     ctx_sched_in(ctx, cpuctx, event_type, task);
> +     ctx_sched_in(ctx, cpuctx, event_type, task, mux);
>  }
>  
>  static void perf_event_context_sched_in(struct perf_event_context *ctx,
>                                       struct task_struct *task)
>  {
>       struct perf_cpu_context *cpuctx;
> +     int mux = 0;
>  
>       cpuctx = __get_cpu_context(ctx);
>       if (cpuctx->task_ctx == ctx)
> @@ -3371,8 +3452,8 @@ static void perf_event_context_sched_in(struct 
> perf_event_context *ctx,
>        * events, no need to flip the cpuctx's events around.
>        */
>       if (!RB_EMPTY_ROOT(&ctx->pinned_groups))
> -             cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
> -     perf_event_sched_in(cpuctx, ctx, task);
> +             cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE, mux);
> +     perf_event_sched_in(cpuctx, ctx, task, mux);
>       perf_pmu_enable(ctx->pmu);
>  
>  unlock:
> @@ -3618,7 +3699,7 @@ static void rotate_ctx(struct perf_event_context *ctx)
>  static int perf_rotate_context(struct perf_cpu_context *cpuctx)
>  {
>       struct perf_event_context *ctx = NULL;
> -     int rotate = 0;
> +     int rotate = 0, mux = 1;
>  
>       if (cpuctx->ctx.nr_events) {
>               if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
> @@ -3637,15 +3718,15 @@ static int perf_rotate_context(struct 
> perf_cpu_context *cpuctx)
>       perf_ctx_lock(cpuctx, cpuctx->task_ctx);
>       perf_pmu_disable(cpuctx->ctx.pmu);
>  
> -     cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
> +     cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE, mux);

It's '1'.

Re: [PATCH v7 2/2] perf/core: add mux switch to skip to the current CPU's events list on mux interrupt

Reply via email to