Re: [RFC][PATCH 1/7] perf/x86/intel: Rework the large PEBS setup code

2016-07-10 Thread Jiri Olsa
On Sat, Jul 09, 2016 at 12:25:09AM +0200, Peter Zijlstra wrote:
> On Sat, Jul 09, 2016 at 12:00:47AM +0200, Peter Zijlstra wrote:
> > Yes, you're right. Let me try and see if I can make that better.
> 
> Something like so?

yep, seems good ;-)

jirka

> 
> ---
> --- a/arch/x86/events/intel/ds.c
> +++ b/arch/x86/events/intel/ds.c
> @@ -831,6 +831,18 @@ static inline void pebs_update_threshold
>   ds->pebs_interrupt_threshold = threshold;
>  }
>  
> +static void pebs_update_state(bool needs_cb, struct cpu_hw_events *cpuc, 
> struct pmu *pmu)
> +{
> + if (needs_cb != pebs_needs_sched_cb(cpuc)) {
> + if (!needs_cb)
> + perf_sched_cb_inc(pmu);
> + else
> + perf_sched_cb_dec(pmu);
> +
> + pebs_update_threshold(cpuc);
> + }
> +}
> +
>  static void intel_pmu_pebs_add(struct perf_event *event)
>  {
>   struct cpu_hw_events *cpuc = this_cpu_ptr(_hw_events);
> @@ -841,10 +853,7 @@ static void intel_pmu_pebs_add(struct pe
>   if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
>   cpuc->n_large_pebs++;
>  
> - if (!needs_cb && pebs_needs_sched_cb(cpuc))
> - perf_sched_cb_inc(event->ctx->pmu);
> -
> - pebs_update_threshold(cpuc);
> + pebs_update_state(needs_cb, cpuc, event->ctx->pmu);
>  }
>  
>  void intel_pmu_pebs_enable(struct perf_event *event)
> @@ -884,11 +893,7 @@ static void intel_pmu_pebs_del(struct pe
>   if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
>   cpuc->n_large_pebs--;
>  
> - if (needs_cb && !pebs_needs_sched_cb(cpuc))
> - perf_sched_cb_dec(event->ctx->pmu);
> -
> - if (cpuc->n_pebs)
> - pebs_update_threshold(cpuc);
> + pebs_update_state(needs_cb, cpuc, event->ctx->pmu);
>  }
>  
>  void intel_pmu_pebs_disable(struct perf_event *event)


Re: [RFC][PATCH 1/7] perf/x86/intel: Rework the large PEBS setup code

2016-07-10 Thread Jiri Olsa
On Sat, Jul 09, 2016 at 12:25:09AM +0200, Peter Zijlstra wrote:
> On Sat, Jul 09, 2016 at 12:00:47AM +0200, Peter Zijlstra wrote:
> > Yes, you're right. Let me try and see if I can make that better.
> 
> Something like so?

yep, seems good ;-)

jirka

> 
> ---
> --- a/arch/x86/events/intel/ds.c
> +++ b/arch/x86/events/intel/ds.c
> @@ -831,6 +831,18 @@ static inline void pebs_update_threshold
>   ds->pebs_interrupt_threshold = threshold;
>  }
>  
> +static void pebs_update_state(bool needs_cb, struct cpu_hw_events *cpuc, 
> struct pmu *pmu)
> +{
> + if (needs_cb != pebs_needs_sched_cb(cpuc)) {
> + if (!needs_cb)
> + perf_sched_cb_inc(pmu);
> + else
> + perf_sched_cb_dec(pmu);
> +
> + pebs_update_threshold(cpuc);
> + }
> +}
> +
>  static void intel_pmu_pebs_add(struct perf_event *event)
>  {
>   struct cpu_hw_events *cpuc = this_cpu_ptr(_hw_events);
> @@ -841,10 +853,7 @@ static void intel_pmu_pebs_add(struct pe
>   if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
>   cpuc->n_large_pebs++;
>  
> - if (!needs_cb && pebs_needs_sched_cb(cpuc))
> - perf_sched_cb_inc(event->ctx->pmu);
> -
> - pebs_update_threshold(cpuc);
> + pebs_update_state(needs_cb, cpuc, event->ctx->pmu);
>  }
>  
>  void intel_pmu_pebs_enable(struct perf_event *event)
> @@ -884,11 +893,7 @@ static void intel_pmu_pebs_del(struct pe
>   if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
>   cpuc->n_large_pebs--;
>  
> - if (needs_cb && !pebs_needs_sched_cb(cpuc))
> - perf_sched_cb_dec(event->ctx->pmu);
> -
> - if (cpuc->n_pebs)
> - pebs_update_threshold(cpuc);
> + pebs_update_state(needs_cb, cpuc, event->ctx->pmu);
>  }
>  
>  void intel_pmu_pebs_disable(struct perf_event *event)


Re: [RFC][PATCH 1/7] perf/x86/intel: Rework the large PEBS setup code

2016-07-08 Thread Peter Zijlstra
On Sat, Jul 09, 2016 at 12:00:47AM +0200, Peter Zijlstra wrote:
> Yes, you're right. Let me try and see if I can make that better.

Something like so?

---
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -831,6 +831,18 @@ static inline void pebs_update_threshold
ds->pebs_interrupt_threshold = threshold;
 }
 
+static void pebs_update_state(bool needs_cb, struct cpu_hw_events *cpuc, 
struct pmu *pmu)
+{
+   if (needs_cb != pebs_needs_sched_cb(cpuc)) {
+   if (!needs_cb)
+   perf_sched_cb_inc(pmu);
+   else
+   perf_sched_cb_dec(pmu);
+
+   pebs_update_threshold(cpuc);
+   }
+}
+
 static void intel_pmu_pebs_add(struct perf_event *event)
 {
struct cpu_hw_events *cpuc = this_cpu_ptr(_hw_events);
@@ -841,10 +853,7 @@ static void intel_pmu_pebs_add(struct pe
if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
cpuc->n_large_pebs++;
 
-   if (!needs_cb && pebs_needs_sched_cb(cpuc))
-   perf_sched_cb_inc(event->ctx->pmu);
-
-   pebs_update_threshold(cpuc);
+   pebs_update_state(needs_cb, cpuc, event->ctx->pmu);
 }
 
 void intel_pmu_pebs_enable(struct perf_event *event)
@@ -884,11 +893,7 @@ static void intel_pmu_pebs_del(struct pe
if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
cpuc->n_large_pebs--;
 
-   if (needs_cb && !pebs_needs_sched_cb(cpuc))
-   perf_sched_cb_dec(event->ctx->pmu);
-
-   if (cpuc->n_pebs)
-   pebs_update_threshold(cpuc);
+   pebs_update_state(needs_cb, cpuc, event->ctx->pmu);
 }
 
 void intel_pmu_pebs_disable(struct perf_event *event)


Re: [RFC][PATCH 1/7] perf/x86/intel: Rework the large PEBS setup code

2016-07-08 Thread Peter Zijlstra
On Sat, Jul 09, 2016 at 12:00:47AM +0200, Peter Zijlstra wrote:
> Yes, you're right. Let me try and see if I can make that better.

Something like so?

---
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -831,6 +831,18 @@ static inline void pebs_update_threshold
ds->pebs_interrupt_threshold = threshold;
 }
 
+static void pebs_update_state(bool needs_cb, struct cpu_hw_events *cpuc, 
struct pmu *pmu)
+{
+   if (needs_cb != pebs_needs_sched_cb(cpuc)) {
+   if (!needs_cb)
+   perf_sched_cb_inc(pmu);
+   else
+   perf_sched_cb_dec(pmu);
+
+   pebs_update_threshold(cpuc);
+   }
+}
+
 static void intel_pmu_pebs_add(struct perf_event *event)
 {
struct cpu_hw_events *cpuc = this_cpu_ptr(_hw_events);
@@ -841,10 +853,7 @@ static void intel_pmu_pebs_add(struct pe
if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
cpuc->n_large_pebs++;
 
-   if (!needs_cb && pebs_needs_sched_cb(cpuc))
-   perf_sched_cb_inc(event->ctx->pmu);
-
-   pebs_update_threshold(cpuc);
+   pebs_update_state(needs_cb, cpuc, event->ctx->pmu);
 }
 
 void intel_pmu_pebs_enable(struct perf_event *event)
@@ -884,11 +893,7 @@ static void intel_pmu_pebs_del(struct pe
if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
cpuc->n_large_pebs--;
 
-   if (needs_cb && !pebs_needs_sched_cb(cpuc))
-   perf_sched_cb_dec(event->ctx->pmu);
-
-   if (cpuc->n_pebs)
-   pebs_update_threshold(cpuc);
+   pebs_update_state(needs_cb, cpuc, event->ctx->pmu);
 }
 
 void intel_pmu_pebs_disable(struct perf_event *event)


Re: [RFC][PATCH 1/7] perf/x86/intel: Rework the large PEBS setup code

2016-07-08 Thread Peter Zijlstra
On Fri, Jul 08, 2016 at 06:36:16PM +0200, Jiri Olsa wrote:
> On Fri, Jul 08, 2016 at 03:31:00PM +0200, Peter Zijlstra wrote:
> 
> SNIP
> 
> > /*
> > -* When the event is constrained enough we can use a larger
> > -* threshold and run the event with less frequent PMI.
> > +* Use auto-reload if possible to save a MSR write in the PMI.
> > +* This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD.
> >  */
> > -   if (hwc->flags & PERF_X86_EVENT_FREERUNNING) {
> > -   threshold = ds->pebs_absolute_maximum -
> > -   x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
> > -
> > -   if (first_pebs)
> > -   perf_sched_cb_inc(event->ctx->pmu);
> > -   } else {
> > -   threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
> > -
> > -   /*
> > -* If not all events can use larger buffer,
> > -* roll back to threshold = 1
> > -*/
> > -   if (!first_pebs &&
> > -   (ds->pebs_interrupt_threshold > threshold))
> > -   perf_sched_cb_dec(event->ctx->pmu);
> > -   }
> 
> hum, the original code switched back the perf_sched_cb,
> in case !feerunning event was detected.. I dont see it
> in the new code.. just the threshold update

> +static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
>  {
> + return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
> +}

> +static void intel_pmu_pebs_add(struct perf_event *event)
> +{
> + struct cpu_hw_events *cpuc = this_cpu_ptr(_hw_events);
> + struct hw_perf_event *hwc = >hw;
> + bool needs_cb = pebs_needs_sched_cb(cpuc);
> +
> + cpuc->n_pebs++;
> + if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
> + cpuc->n_large_pebs++;
> +
> + if (!needs_cb && pebs_needs_sched_cb(cpuc))
> + perf_sched_cb_inc(event->ctx->pmu);

Ah, you're saying this,

> + pebs_update_threshold(cpuc);
>  }

> +static void intel_pmu_pebs_del(struct perf_event *event)
> +{
> + struct cpu_hw_events *cpuc = this_cpu_ptr(_hw_events);
> + struct hw_perf_event *hwc = >hw;
> + bool needs_cb = pebs_needs_sched_cb(cpuc);
> +
> + cpuc->n_pebs--;
> + if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
> + cpuc->n_large_pebs--;
> +
> + if (needs_cb && !pebs_needs_sched_cb(cpuc))
> + perf_sched_cb_dec(event->ctx->pmu);

and this, should also have something like

if (!needs_cb && pebs_needs_sched_cb(cpuc))
perf_sched_cb_inc(event->ctx->pmu)

Because the event we just removed was the one inhibiting FREERUNNING and
we can now let it rip again.

Yes, you're right. Let me try and see if I can make that better.

Thanks!

> +
> + if (cpuc->n_pebs)
> + pebs_update_threshold(cpuc);
>  }


Re: [RFC][PATCH 1/7] perf/x86/intel: Rework the large PEBS setup code

2016-07-08 Thread Peter Zijlstra
On Fri, Jul 08, 2016 at 06:36:16PM +0200, Jiri Olsa wrote:
> On Fri, Jul 08, 2016 at 03:31:00PM +0200, Peter Zijlstra wrote:
> 
> SNIP
> 
> > /*
> > -* When the event is constrained enough we can use a larger
> > -* threshold and run the event with less frequent PMI.
> > +* Use auto-reload if possible to save a MSR write in the PMI.
> > +* This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD.
> >  */
> > -   if (hwc->flags & PERF_X86_EVENT_FREERUNNING) {
> > -   threshold = ds->pebs_absolute_maximum -
> > -   x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
> > -
> > -   if (first_pebs)
> > -   perf_sched_cb_inc(event->ctx->pmu);
> > -   } else {
> > -   threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
> > -
> > -   /*
> > -* If not all events can use larger buffer,
> > -* roll back to threshold = 1
> > -*/
> > -   if (!first_pebs &&
> > -   (ds->pebs_interrupt_threshold > threshold))
> > -   perf_sched_cb_dec(event->ctx->pmu);
> > -   }
> 
> hum, the original code switched back the perf_sched_cb,
> in case !feerunning event was detected.. I dont see it
> in the new code.. just the threshold update

> +static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
>  {
> + return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
> +}

> +static void intel_pmu_pebs_add(struct perf_event *event)
> +{
> + struct cpu_hw_events *cpuc = this_cpu_ptr(_hw_events);
> + struct hw_perf_event *hwc = >hw;
> + bool needs_cb = pebs_needs_sched_cb(cpuc);
> +
> + cpuc->n_pebs++;
> + if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
> + cpuc->n_large_pebs++;
> +
> + if (!needs_cb && pebs_needs_sched_cb(cpuc))
> + perf_sched_cb_inc(event->ctx->pmu);

Ah, you're saying this,

> + pebs_update_threshold(cpuc);
>  }

> +static void intel_pmu_pebs_del(struct perf_event *event)
> +{
> + struct cpu_hw_events *cpuc = this_cpu_ptr(_hw_events);
> + struct hw_perf_event *hwc = >hw;
> + bool needs_cb = pebs_needs_sched_cb(cpuc);
> +
> + cpuc->n_pebs--;
> + if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
> + cpuc->n_large_pebs--;
> +
> + if (needs_cb && !pebs_needs_sched_cb(cpuc))
> + perf_sched_cb_dec(event->ctx->pmu);

and this, should also have something like

if (!needs_cb && pebs_needs_sched_cb(cpuc))
perf_sched_cb_inc(event->ctx->pmu)

Because the event we just removed was the one inhibiting FREERUNNING and
we can now let it rip again.

Yes, you're right. Let me try and see if I can make that better.

Thanks!

> +
> + if (cpuc->n_pebs)
> + pebs_update_threshold(cpuc);
>  }


Re: [RFC][PATCH 1/7] perf/x86/intel: Rework the large PEBS setup code

2016-07-08 Thread Jiri Olsa
On Fri, Jul 08, 2016 at 03:31:00PM +0200, Peter Zijlstra wrote:

SNIP

>   /*
> -  * When the event is constrained enough we can use a larger
> -  * threshold and run the event with less frequent PMI.
> +  * Use auto-reload if possible to save a MSR write in the PMI.
> +  * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD.
>*/
> - if (hwc->flags & PERF_X86_EVENT_FREERUNNING) {
> - threshold = ds->pebs_absolute_maximum -
> - x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
> -
> - if (first_pebs)
> - perf_sched_cb_inc(event->ctx->pmu);
> - } else {
> - threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
> -
> - /*
> -  * If not all events can use larger buffer,
> -  * roll back to threshold = 1
> -  */
> - if (!first_pebs &&
> - (ds->pebs_interrupt_threshold > threshold))
> - perf_sched_cb_dec(event->ctx->pmu);
> - }

hum, the original code switched back the perf_sched_cb,
in case !feerunning event was detected.. I dont see it
in the new code.. just the threshold update

jirka


Re: [RFC][PATCH 1/7] perf/x86/intel: Rework the large PEBS setup code

2016-07-08 Thread Jiri Olsa
On Fri, Jul 08, 2016 at 03:31:00PM +0200, Peter Zijlstra wrote:

SNIP

>   /*
> -  * When the event is constrained enough we can use a larger
> -  * threshold and run the event with less frequent PMI.
> +  * Use auto-reload if possible to save a MSR write in the PMI.
> +  * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD.
>*/
> - if (hwc->flags & PERF_X86_EVENT_FREERUNNING) {
> - threshold = ds->pebs_absolute_maximum -
> - x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
> -
> - if (first_pebs)
> - perf_sched_cb_inc(event->ctx->pmu);
> - } else {
> - threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
> -
> - /*
> -  * If not all events can use larger buffer,
> -  * roll back to threshold = 1
> -  */
> - if (!first_pebs &&
> - (ds->pebs_interrupt_threshold > threshold))
> - perf_sched_cb_dec(event->ctx->pmu);
> - }

hum, the original code switched back the perf_sched_cb,
in case !feerunning event was detected.. I dont see it
in the new code.. just the threshold update

jirka


[RFC][PATCH 1/7] perf/x86/intel: Rework the large PEBS setup code

2016-07-08 Thread Peter Zijlstra
In order to allow optimizing perf_pmu_sched_task() we must ensure
perf_sched_cb_{inc,dec} are no longer called from NMI context; this
means that pmu::{start,stop}() can no longer use them.

Prepare for this by reworking the whole large PEBS setup code.

The current code relied on the cpuc->pebs_enabled state, however since
that reflects the current active state as per pmu::{start,stop}() we
can no longer rely on this.

Introduce two counters: cpuc->n_pebs and cpuc->n_large_pebs which
count the total number of PEBS events and the number of PEBS events
that have FREERUNNING set, resp.. With this we can tell if the current
setup requires a single record interrupt threshold or can use a larger
buffer.

This also improves the code in that it re-enables the large threshold
once the PEBS event that required single record gets removed.

Signed-off-by: Peter Zijlstra (Intel) 
---
 arch/x86/events/intel/ds.c   |   96 +++
 arch/x86/events/perf_event.h |2 
 kernel/events/core.c |4 +
 3 files changed, 67 insertions(+), 35 deletions(-)

--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -806,9 +806,45 @@ struct event_constraint *intel_pebs_cons
return 
 }
 
-static inline bool pebs_is_enabled(struct cpu_hw_events *cpuc)
+/*
+ * We need the sched_task callback even for per-cpu events when we use
+ * the large interrupt threshold, such that we can provide PID and TID
+ * to PEBS samples.
+ */
+static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
 {
-   return (cpuc->pebs_enabled & ((1ULL << MAX_PEBS_EVENTS) - 1));
+   return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
+}
+
+static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
+{
+   struct debug_store *ds = cpuc->ds;
+   u64 threshold;
+
+   if (cpuc->n_pebs == cpuc->n_large_pebs) {
+   threshold = ds->pebs_absolute_maximum -
+   x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
+   } else {
+   threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
+   }
+
+   ds->pebs_interrupt_threshold = threshold;
+}
+
+static void intel_pmu_pebs_add(struct perf_event *event)
+{
+   struct cpu_hw_events *cpuc = this_cpu_ptr(_hw_events);
+   struct hw_perf_event *hwc = >hw;
+   bool needs_cb = pebs_needs_sched_cb(cpuc);
+
+   cpuc->n_pebs++;
+   if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
+   cpuc->n_large_pebs++;
+
+   if (!needs_cb && pebs_needs_sched_cb(cpuc))
+   perf_sched_cb_inc(event->ctx->pmu);
+
+   pebs_update_threshold(cpuc);
 }
 
 void intel_pmu_pebs_enable(struct perf_event *event)
@@ -816,12 +852,11 @@ void intel_pmu_pebs_enable(struct perf_e
struct cpu_hw_events *cpuc = this_cpu_ptr(_hw_events);
struct hw_perf_event *hwc = >hw;
struct debug_store *ds = cpuc->ds;
-   bool first_pebs;
-   u64 threshold;
+
+   intel_pmu_pebs_add(event);
 
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
 
-   first_pebs = !pebs_is_enabled(cpuc);
cpuc->pebs_enabled |= 1ULL << hwc->idx;
 
if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
@@ -830,46 +865,38 @@ void intel_pmu_pebs_enable(struct perf_e
cpuc->pebs_enabled |= 1ULL << 63;
 
/*
-* When the event is constrained enough we can use a larger
-* threshold and run the event with less frequent PMI.
+* Use auto-reload if possible to save a MSR write in the PMI.
+* This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD.
 */
-   if (hwc->flags & PERF_X86_EVENT_FREERUNNING) {
-   threshold = ds->pebs_absolute_maximum -
-   x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
-
-   if (first_pebs)
-   perf_sched_cb_inc(event->ctx->pmu);
-   } else {
-   threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
-
-   /*
-* If not all events can use larger buffer,
-* roll back to threshold = 1
-*/
-   if (!first_pebs &&
-   (ds->pebs_interrupt_threshold > threshold))
-   perf_sched_cb_dec(event->ctx->pmu);
-   }
-
-   /* Use auto-reload if possible to save a MSR write in the PMI */
if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
ds->pebs_event_reset[hwc->idx] =
(u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
}
+}
 
-   if (first_pebs || ds->pebs_interrupt_threshold > threshold)
-   ds->pebs_interrupt_threshold = threshold;
+static void intel_pmu_pebs_del(struct perf_event *event)
+{
+   struct cpu_hw_events *cpuc = this_cpu_ptr(_hw_events);
+   struct hw_perf_event *hwc = >hw;
+   bool needs_cb = pebs_needs_sched_cb(cpuc);
+
+   cpuc->n_pebs--;
+   if 

[RFC][PATCH 1/7] perf/x86/intel: Rework the large PEBS setup code

2016-07-08 Thread Peter Zijlstra
In order to allow optimizing perf_pmu_sched_task() we must ensure
perf_sched_cb_{inc,dec} are no longer called from NMI context; this
means that pmu::{start,stop}() can no longer use them.

Prepare for this by reworking the whole large PEBS setup code.

The current code relied on the cpuc->pebs_enabled state, however since
that reflects the current active state as per pmu::{start,stop}() we
can no longer rely on this.

Introduce two counters: cpuc->n_pebs and cpuc->n_large_pebs which
count the total number of PEBS events and the number of PEBS events
that have FREERUNNING set, resp.. With this we can tell if the current
setup requires a single record interrupt threshold or can use a larger
buffer.

This also improves the code in that it re-enables the large threshold
once the PEBS event that required single record gets removed.

Signed-off-by: Peter Zijlstra (Intel) 
---
 arch/x86/events/intel/ds.c   |   96 +++
 arch/x86/events/perf_event.h |2 
 kernel/events/core.c |4 +
 3 files changed, 67 insertions(+), 35 deletions(-)

--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -806,9 +806,45 @@ struct event_constraint *intel_pebs_cons
return 
 }
 
-static inline bool pebs_is_enabled(struct cpu_hw_events *cpuc)
+/*
+ * We need the sched_task callback even for per-cpu events when we use
+ * the large interrupt threshold, such that we can provide PID and TID
+ * to PEBS samples.
+ */
+static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
 {
-   return (cpuc->pebs_enabled & ((1ULL << MAX_PEBS_EVENTS) - 1));
+   return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
+}
+
+static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
+{
+   struct debug_store *ds = cpuc->ds;
+   u64 threshold;
+
+   if (cpuc->n_pebs == cpuc->n_large_pebs) {
+   threshold = ds->pebs_absolute_maximum -
+   x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
+   } else {
+   threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
+   }
+
+   ds->pebs_interrupt_threshold = threshold;
+}
+
+static void intel_pmu_pebs_add(struct perf_event *event)
+{
+   struct cpu_hw_events *cpuc = this_cpu_ptr(_hw_events);
+   struct hw_perf_event *hwc = >hw;
+   bool needs_cb = pebs_needs_sched_cb(cpuc);
+
+   cpuc->n_pebs++;
+   if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
+   cpuc->n_large_pebs++;
+
+   if (!needs_cb && pebs_needs_sched_cb(cpuc))
+   perf_sched_cb_inc(event->ctx->pmu);
+
+   pebs_update_threshold(cpuc);
 }
 
 void intel_pmu_pebs_enable(struct perf_event *event)
@@ -816,12 +852,11 @@ void intel_pmu_pebs_enable(struct perf_e
struct cpu_hw_events *cpuc = this_cpu_ptr(_hw_events);
struct hw_perf_event *hwc = >hw;
struct debug_store *ds = cpuc->ds;
-   bool first_pebs;
-   u64 threshold;
+
+   intel_pmu_pebs_add(event);
 
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
 
-   first_pebs = !pebs_is_enabled(cpuc);
cpuc->pebs_enabled |= 1ULL << hwc->idx;
 
if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
@@ -830,46 +865,38 @@ void intel_pmu_pebs_enable(struct perf_e
cpuc->pebs_enabled |= 1ULL << 63;
 
/*
-* When the event is constrained enough we can use a larger
-* threshold and run the event with less frequent PMI.
+* Use auto-reload if possible to save a MSR write in the PMI.
+* This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD.
 */
-   if (hwc->flags & PERF_X86_EVENT_FREERUNNING) {
-   threshold = ds->pebs_absolute_maximum -
-   x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
-
-   if (first_pebs)
-   perf_sched_cb_inc(event->ctx->pmu);
-   } else {
-   threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
-
-   /*
-* If not all events can use larger buffer,
-* roll back to threshold = 1
-*/
-   if (!first_pebs &&
-   (ds->pebs_interrupt_threshold > threshold))
-   perf_sched_cb_dec(event->ctx->pmu);
-   }
-
-   /* Use auto-reload if possible to save a MSR write in the PMI */
if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
ds->pebs_event_reset[hwc->idx] =
(u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
}
+}
 
-   if (first_pebs || ds->pebs_interrupt_threshold > threshold)
-   ds->pebs_interrupt_threshold = threshold;
+static void intel_pmu_pebs_del(struct perf_event *event)
+{
+   struct cpu_hw_events *cpuc = this_cpu_ptr(_hw_events);
+   struct hw_perf_event *hwc = >hw;
+   bool needs_cb = pebs_needs_sched_cb(cpuc);
+
+   cpuc->n_pebs--;
+   if (hwc->flags &