The callback is invoked when process is scheduled in or out. It
provides mechanism for later patches to save/store the LBR stack.
It can also replace the flush branch stack callback.

To avoid unnecessary overhead, the callback is enabled dynamically

Signed-off-by: Yan, Zheng <zheng.z....@intel.com>
---
 arch/x86/kernel/cpu/perf_event.c |  7 +++++
 arch/x86/kernel/cpu/perf_event.h |  4 +++
 include/linux/perf_event.h       |  8 ++++++
 kernel/events/core.c             | 60 +++++++++++++++++++++++++++++++++++++++-
 4 files changed, 78 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 8e13293..6703d17 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1846,6 +1846,12 @@ static const struct attribute_group 
*x86_pmu_attr_groups[] = {
        NULL,
 };
 
+static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+       if (x86_pmu.sched_task)
+               x86_pmu.sched_task(ctx, sched_in);
+}
+
 static void x86_pmu_flush_branch_stack(void)
 {
        if (x86_pmu.flush_branch_stack)
@@ -1879,6 +1885,7 @@ static struct pmu pmu = {
 
        .event_idx              = x86_pmu_event_idx,
        .flush_branch_stack     = x86_pmu_flush_branch_stack,
+       .sched_task             = x86_pmu_sched_task,
 };
 
 void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 745f6fb..3fdb751 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -417,6 +417,8 @@ struct x86_pmu {
 
        void            (*check_microcode)(void);
        void            (*flush_branch_stack)(void);
+       void            (*sched_task)(struct perf_event_context *ctx,
+                                     bool sched_in);
 
        /*
         * Intel Arch Perfmon v2+
@@ -675,6 +677,8 @@ void intel_pmu_pebs_disable_all(void);
 
 void intel_ds_init(void);
 
+void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
+
 void intel_pmu_lbr_reset(void);
 
 void intel_pmu_lbr_enable(struct perf_event *event);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 8f4a70f..6a3e603 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -251,6 +251,12 @@ struct pmu {
         * flush branch stack on context-switches (needed in cpu-wide mode)
         */
        void (*flush_branch_stack)      (void);
+
+       /*
+        * PMU callback for context-switches. optional
+        */
+       void (*sched_task)              (struct perf_event_context *ctx,
+                                        bool sched_in);
 };
 
 /**
@@ -546,6 +552,8 @@ extern void perf_event_delayed_put(struct task_struct 
*task);
 extern void perf_event_print_debug(void);
 extern void perf_pmu_disable(struct pmu *pmu);
 extern void perf_pmu_enable(struct pmu *pmu);
+extern void perf_sched_cb_disable(struct pmu *pmu);
+extern void perf_sched_cb_enable(struct pmu *pmu);
 extern int perf_event_task_disable(void);
 extern int perf_event_task_enable(void);
 extern int perf_event_refresh(struct perf_event *event, int refresh);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 89d34f9..d110a23 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -141,6 +141,7 @@ enum event_type_t {
 struct static_key_deferred perf_sched_events __read_mostly;
 static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
 static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events);
+static DEFINE_PER_CPU(int, perf_sched_cb_usages);
 
 static atomic_t nr_mmap_events __read_mostly;
 static atomic_t nr_comm_events __read_mostly;
@@ -150,6 +151,7 @@ static atomic_t nr_freq_events __read_mostly;
 static LIST_HEAD(pmus);
 static DEFINE_MUTEX(pmus_lock);
 static struct srcu_struct pmus_srcu;
+static struct idr pmu_idr;
 
 /*
  * perf event paranoia level:
@@ -2327,6 +2329,57 @@ unlock:
        }
 }
 
+void perf_sched_cb_disable(struct pmu *pmu)
+{
+       __get_cpu_var(perf_sched_cb_usages)--;
+}
+
+void perf_sched_cb_enable(struct pmu *pmu)
+{
+       __get_cpu_var(perf_sched_cb_usages)++;
+}
+
+/*
+ * This function provides the context switch callback to the lower code
+ * layer. It is invoked ONLY when the context switch callback is enabled.
+ */
+static void perf_pmu_sched_task(struct task_struct *prev,
+                               struct task_struct *next,
+                               bool sched_in)
+{
+       struct perf_cpu_context *cpuctx;
+       struct pmu *pmu;
+       unsigned long flags;
+
+       if (prev == next)
+               return;
+
+       local_irq_save(flags);
+
+       rcu_read_lock();
+
+       pmu = idr_find(&pmu_idr, PERF_TYPE_RAW);
+
+       if (pmu && pmu->sched_task) {
+               cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+               pmu = cpuctx->ctx.pmu;
+
+               perf_ctx_lock(cpuctx, cpuctx->task_ctx);
+
+               perf_pmu_disable(pmu);
+
+               pmu->sched_task(cpuctx->task_ctx, sched_in);
+
+               perf_pmu_enable(pmu);
+
+               perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
+       }
+
+       rcu_read_unlock();
+
+       local_irq_restore(flags);
+}
+
 #define for_each_task_context_nr(ctxn)                                 \
        for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++)
 
@@ -2346,6 +2399,9 @@ void __perf_event_task_sched_out(struct task_struct *task,
 {
        int ctxn;
 
+       if (__get_cpu_var(perf_sched_cb_usages))
+               perf_pmu_sched_task(task, next, false);
+
        for_each_task_context_nr(ctxn)
                perf_event_context_sched_out(task, ctxn, next);
 
@@ -2605,6 +2661,9 @@ void __perf_event_task_sched_in(struct task_struct *prev,
        /* check for system-wide branch_stack events */
        if (atomic_read(&__get_cpu_var(perf_branch_stack_events)))
                perf_branch_stack_sched_in(prev, task);
+
+       if (__get_cpu_var(perf_sched_cb_usages))
+               perf_pmu_sched_task(prev, task, true);
 }
 
 static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
@@ -6326,7 +6385,6 @@ static void free_pmu_context(struct pmu *pmu)
 out:
        mutex_unlock(&pmus_lock);
 }
-static struct idr pmu_idr;
 
 static ssize_t
 type_show(struct device *dev, struct device_attribute *attr, char *page)
-- 
1.8.4.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to