On Fri, Jan 3, 2014 at 6:47 AM, Yan, Zheng <zheng.z....@intel.com> wrote: > The callback is invoked when process is scheduled in or out. It > provides mechanism for later patches to save/store the LBR stack. > It can also replace the flush branch stack callback. > I think you need to say this callback may be invoked on context switches with per-thread events attached. As far I understand, the callback cannot be invoked for system-wide events.
> To avoid unnecessary overhead, the callback is enabled dynamically > > Signed-off-by: Yan, Zheng <zheng.z....@intel.com> > --- > arch/x86/kernel/cpu/perf_event.c | 7 +++++ > arch/x86/kernel/cpu/perf_event.h | 4 +++ > include/linux/perf_event.h | 8 ++++++ > kernel/events/core.c | 60 > +++++++++++++++++++++++++++++++++++++++- > 4 files changed, 78 insertions(+), 1 deletion(-) > > diff --git a/arch/x86/kernel/cpu/perf_event.c > b/arch/x86/kernel/cpu/perf_event.c > index 8e13293..6703d17 100644 > --- a/arch/x86/kernel/cpu/perf_event.c > +++ b/arch/x86/kernel/cpu/perf_event.c > @@ -1846,6 +1846,12 @@ static const struct attribute_group > *x86_pmu_attr_groups[] = { > NULL, > }; > > +static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) > +{ > + if (x86_pmu.sched_task) > + x86_pmu.sched_task(ctx, sched_in); > +} > + > static void x86_pmu_flush_branch_stack(void) > { > if (x86_pmu.flush_branch_stack) > @@ -1879,6 +1885,7 @@ static struct pmu pmu = { > > .event_idx = x86_pmu_event_idx, > .flush_branch_stack = x86_pmu_flush_branch_stack, > + .sched_task = x86_pmu_sched_task, > }; > > void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) > diff --git a/arch/x86/kernel/cpu/perf_event.h > b/arch/x86/kernel/cpu/perf_event.h > index 745f6fb..3fdb751 100644 > --- a/arch/x86/kernel/cpu/perf_event.h > +++ b/arch/x86/kernel/cpu/perf_event.h > @@ -417,6 +417,8 @@ struct x86_pmu { > > void (*check_microcode)(void); > void (*flush_branch_stack)(void); > + void (*sched_task)(struct perf_event_context *ctx, > + bool sched_in); > > /* > * Intel Arch Perfmon v2+ > @@ -675,6 +677,8 @@ void intel_pmu_pebs_disable_all(void); > > void intel_ds_init(void); > > +void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in); > + There is no mention of this function anywhere else. Should not be here. > void intel_pmu_lbr_reset(void); > > void intel_pmu_lbr_enable(struct perf_event *event); > diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h > index 8f4a70f..6a3e603 100644 > --- a/include/linux/perf_event.h > +++ b/include/linux/perf_event.h > @@ -251,6 +251,12 @@ struct pmu { > * flush branch stack on context-switches (needed in cpu-wide mode) > */ > void (*flush_branch_stack) (void); > + > + /* > + * PMU callback for context-switches. optional > + */ > + void (*sched_task) (struct perf_event_context *ctx, > + bool sched_in); > }; > > /** > @@ -546,6 +552,8 @@ extern void perf_event_delayed_put(struct task_struct > *task); > extern void perf_event_print_debug(void); > extern void perf_pmu_disable(struct pmu *pmu); > extern void perf_pmu_enable(struct pmu *pmu); > +extern void perf_sched_cb_disable(struct pmu *pmu); > +extern void perf_sched_cb_enable(struct pmu *pmu); > extern int perf_event_task_disable(void); > extern int perf_event_task_enable(void); > extern int perf_event_refresh(struct perf_event *event, int refresh); > diff --git a/kernel/events/core.c b/kernel/events/core.c > index 89d34f9..d110a23 100644 > --- a/kernel/events/core.c > +++ b/kernel/events/core.c > @@ -141,6 +141,7 @@ enum event_type_t { > struct static_key_deferred perf_sched_events __read_mostly; > static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); > static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events); > +static DEFINE_PER_CPU(int, perf_sched_cb_usages); > > static atomic_t nr_mmap_events __read_mostly; > static atomic_t nr_comm_events __read_mostly; > @@ -150,6 +151,7 @@ static atomic_t nr_freq_events __read_mostly; > static LIST_HEAD(pmus); > static DEFINE_MUTEX(pmus_lock); > static struct srcu_struct pmus_srcu; > +static struct idr pmu_idr; > > /* > * perf event paranoia level: > @@ -2327,6 +2329,57 @@ unlock: > } > } > > +void perf_sched_cb_disable(struct pmu *pmu) > +{ > + __get_cpu_var(perf_sched_cb_usages)--; > +} > + > +void perf_sched_cb_enable(struct pmu *pmu) > +{ > + __get_cpu_var(perf_sched_cb_usages)++; > +} > + I think you want to use jump_labels instead of this to make the callback optional. This is already used all over the place in the generic code. > +/* > + * This function provides the context switch callback to the lower code > + * layer. It is invoked ONLY when the context switch callback is enabled. > + */ > +static void perf_pmu_sched_task(struct task_struct *prev, > + struct task_struct *next, > + bool sched_in) > +{ > + struct perf_cpu_context *cpuctx; > + struct pmu *pmu; > + unsigned long flags; > + > + if (prev == next) > + return; > + > + local_irq_save(flags); > + > + rcu_read_lock(); > + > + pmu = idr_find(&pmu_idr, PERF_TYPE_RAW); > + > + if (pmu && pmu->sched_task) { > + cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); > + pmu = cpuctx->ctx.pmu; > + > + perf_ctx_lock(cpuctx, cpuctx->task_ctx); > + > + perf_pmu_disable(pmu); > + > + pmu->sched_task(cpuctx->task_ctx, sched_in); > + > + perf_pmu_enable(pmu); > + > + perf_ctx_unlock(cpuctx, cpuctx->task_ctx); > + } > + > + rcu_read_unlock(); > + > + local_irq_restore(flags); > +} > + > #define for_each_task_context_nr(ctxn) \ > for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++) > > @@ -2346,6 +2399,9 @@ void __perf_event_task_sched_out(struct task_struct > *task, > { > int ctxn; > > + if (__get_cpu_var(perf_sched_cb_usages)) > + perf_pmu_sched_task(task, next, false); > + > for_each_task_context_nr(ctxn) > perf_event_context_sched_out(task, ctxn, next); > > @@ -2605,6 +2661,9 @@ void __perf_event_task_sched_in(struct task_struct > *prev, > /* check for system-wide branch_stack events */ > if (atomic_read(&__get_cpu_var(perf_branch_stack_events))) > perf_branch_stack_sched_in(prev, task); > + > + if (__get_cpu_var(perf_sched_cb_usages)) > + perf_pmu_sched_task(prev, task, true); > } > > static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 > count) > @@ -6326,7 +6385,6 @@ static void free_pmu_context(struct pmu *pmu) > out: > mutex_unlock(&pmus_lock); > } > -static struct idr pmu_idr; > > static ssize_t > type_show(struct device *dev, struct device_attribute *attr, char *page) > -- > 1.8.4.2 > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/