On Tue, Jan 27, 2015 at 08:03:47PM +0200, Alexander Shishkin wrote: > +static int account_per_task_counter(struct perf_event *event) > +{ > + struct pmu *pmu = event->pmu; > + > + if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE)) > + return 0; > + > + /* > + * Prevent co-existence of per-task and cpu-wide events on the > + * same exclusive pmu. > + * > + * Negative pmu::nr_per_task_events means there are cpu-wide > + * events on this "exclusive" pmu, positive means there are > + * per-task events. > + */ > + if (event->cpu == -1 && > + !atomic_inc_unless_negative(&pmu->nr_per_task_events)) > + return -EBUSY; > + else if (!(event->attach_state & PERF_ATTACH_TASK) && > + !atomic_dec_unless_positive(&pmu->nr_per_task_events)) > + return -EBUSY; > + > + return 0; > +}
I think that logic fails for per-task events that have a cpu set. > +static bool exclusive_event_ok(struct perf_event *event, > + struct perf_event_context *ctx) > +{ > + struct pmu *pmu = event->pmu; > + bool ret = true; > + > + if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE)) > + return true; > + > + if (!__exclusive_event_ok(event, ctx)) > + return false; > + > + if (ctx->task) { > + if (event->cpu != -1) { > + struct perf_event_context *cpuctx; > + > + cpuctx = &per_cpu_ptr(pmu->pmu_cpu_context, > event->cpu)->ctx; > + > + mutex_lock(&cpuctx->mutex); > + ret = __exclusive_event_ok(event, cpuctx); > + mutex_unlock(&cpuctx->mutex); We're already holding ctx->mutex, this should have made lockdep scream. > + } > + } > + > + return ret; > +} Would something like this work? --- --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -168,6 +168,7 @@ struct perf_event; #define PERF_PMU_CAP_NO_INTERRUPT 0x01 #define PERF_PMU_CAP_AUX_NO_SG 0x02 #define PERF_PMU_CAP_AUX_SW_DOUBLEBUF 0x04 +#define PERF_PMU_CAP_EXCLUSIVE 0x08 /** * struct pmu - generic performance monitoring unit @@ -188,6 +189,7 @@ struct pmu { int * __percpu pmu_disable_count; struct perf_cpu_context * __percpu pmu_cpu_context; + atomic_t exclusive_cnt; /* <0: cpu, >0: tsk */ int task_ctx_nr; int hrtimer_interval_ms; --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3487,6 +3487,9 @@ static void __free_event(struct perf_eve if (event->destroy) event->destroy(event); + if (event->pmu && event->ctx) + exclusive_event_release(event); + if (event->ctx) put_ctx(event->ctx); @@ -7092,6 +7095,7 @@ int perf_pmu_register(struct pmu *pmu, c pmu->event_idx = perf_event_idx_default; list_add_rcu(&pmu->entry, &pmus); + atomic_set(&pmu->exclusive_cnt, 0); ret = 0; unlock: mutex_unlock(&pmus_lock); @@ -7537,6 +7541,78 @@ perf_event_set_output(struct perf_event return ret; } +static bool exclusive_event_match(struct perf_event *e1, struct perf_event *e2) +{ + if ((e1->pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE) && + (e1->cpu == e2->cpu || + e1->cpu == -1 || + e2->cpu == -1)) + return true; + return false; +} + +static bool __exclusive_event_ok(struct perf_event *event, + struct perf_event_context *ctx) +{ + struct perf_event *iter_event; + + list_for_each_entry(iter_event, &ctx->event_list, event_entry) { + if (exclusive_event_match(iter_event, event)) + return false; + } + + return true; +} + +static bool exclusive_event_ok(struct perf_event *event, + struct perf_event_context *ctx) +{ + struct pmu *pmu = event->pmu; + bool ret; + + if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE)) + return true; + + /* + * exclusive_cnt <0: cpu + * >0: tsk + */ + if (ctx->task) { + if (!atomic_inc_unless_negative(&pmu->exclusive_cnt)) + return false; + } else { + if (!atomic_dec_unless_positive(&pmu->exclusive_cnt)) + return false; + } + + mutex_lock(&ctx->lock); + ret = __exclusive_event_ok(event, ctx); + mutex_unlock(&ctx->lock); + + if (!ret) { + if (ctx->task) + atomic_dec(&pmu->exclusive_cnt); + else + atomic_inc(&pmu->exclusive_cnt); + } + + return ret; +} + +static void exclusive_event_release(struct perf_event *event) +{ + struct perf_event_context *ctx = event->ctx; + struct pmu *pmu = event->pmu; + + if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE)) + return; + + if (ctx->task) + atomic_dec(&pmu->exclusive_cnt); + else + atomic_inc(&pmu->exclusive_cnt); +} + static void mutex_lock_double(struct mutex *a, struct mutex *b) { if (b < a) @@ -7702,6 +7778,15 @@ SYSCALL_DEFINE5(perf_event_open, task = NULL; } + if (pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE) { + err = -EBUSY; + if (group_leader) + goto err_context; + + if (!exclusive_event_ok(event, ctx)) + goto err_context; + } + /* * Look up the group leader (we will attach this event to it): */ @@ -7903,6 +7988,11 @@ perf_event_create_kernel_counter(struct goto err_free; } + if (!exclusive_event_ok(event, ctx)) { + err = -EBUSY; + goto err_context; + } + WARN_ON_ONCE(ctx->parent_ctx); mutex_lock(&ctx->mutex); perf_install_in_context(ctx, event, cpu); @@ -7911,6 +8001,9 @@ perf_event_create_kernel_counter(struct return event; +err_context: + perf_unpin_context(ctx); + put_ctx(ctx); err_free: free_event(event); err: -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/