Re: [RFC][PATCH 1/7] perf/x86/intel: Rework the large PEBS setup code
On Sat, Jul 09, 2016 at 12:25:09AM +0200, Peter Zijlstra wrote: > On Sat, Jul 09, 2016 at 12:00:47AM +0200, Peter Zijlstra wrote: > > Yes, you're right. Let me try and see if I can make that better. > > Something like so? yep, seems good ;-) jirka > > --- > --- a/arch/x86/events/intel/ds.c > +++ b/arch/x86/events/intel/ds.c > @@ -831,6 +831,18 @@ static inline void pebs_update_threshold > ds->pebs_interrupt_threshold = threshold; > } > > +static void pebs_update_state(bool needs_cb, struct cpu_hw_events *cpuc, > struct pmu *pmu) > +{ > + if (needs_cb != pebs_needs_sched_cb(cpuc)) { > + if (!needs_cb) > + perf_sched_cb_inc(pmu); > + else > + perf_sched_cb_dec(pmu); > + > + pebs_update_threshold(cpuc); > + } > +} > + > static void intel_pmu_pebs_add(struct perf_event *event) > { > struct cpu_hw_events *cpuc = this_cpu_ptr(_hw_events); > @@ -841,10 +853,7 @@ static void intel_pmu_pebs_add(struct pe > if (hwc->flags & PERF_X86_EVENT_FREERUNNING) > cpuc->n_large_pebs++; > > - if (!needs_cb && pebs_needs_sched_cb(cpuc)) > - perf_sched_cb_inc(event->ctx->pmu); > - > - pebs_update_threshold(cpuc); > + pebs_update_state(needs_cb, cpuc, event->ctx->pmu); > } > > void intel_pmu_pebs_enable(struct perf_event *event) > @@ -884,11 +893,7 @@ static void intel_pmu_pebs_del(struct pe > if (hwc->flags & PERF_X86_EVENT_FREERUNNING) > cpuc->n_large_pebs--; > > - if (needs_cb && !pebs_needs_sched_cb(cpuc)) > - perf_sched_cb_dec(event->ctx->pmu); > - > - if (cpuc->n_pebs) > - pebs_update_threshold(cpuc); > + pebs_update_state(needs_cb, cpuc, event->ctx->pmu); > } > > void intel_pmu_pebs_disable(struct perf_event *event)
Re: [RFC][PATCH 1/7] perf/x86/intel: Rework the large PEBS setup code
On Sat, Jul 09, 2016 at 12:25:09AM +0200, Peter Zijlstra wrote: > On Sat, Jul 09, 2016 at 12:00:47AM +0200, Peter Zijlstra wrote: > > Yes, you're right. Let me try and see if I can make that better. > > Something like so? yep, seems good ;-) jirka > > --- > --- a/arch/x86/events/intel/ds.c > +++ b/arch/x86/events/intel/ds.c > @@ -831,6 +831,18 @@ static inline void pebs_update_threshold > ds->pebs_interrupt_threshold = threshold; > } > > +static void pebs_update_state(bool needs_cb, struct cpu_hw_events *cpuc, > struct pmu *pmu) > +{ > + if (needs_cb != pebs_needs_sched_cb(cpuc)) { > + if (!needs_cb) > + perf_sched_cb_inc(pmu); > + else > + perf_sched_cb_dec(pmu); > + > + pebs_update_threshold(cpuc); > + } > +} > + > static void intel_pmu_pebs_add(struct perf_event *event) > { > struct cpu_hw_events *cpuc = this_cpu_ptr(_hw_events); > @@ -841,10 +853,7 @@ static void intel_pmu_pebs_add(struct pe > if (hwc->flags & PERF_X86_EVENT_FREERUNNING) > cpuc->n_large_pebs++; > > - if (!needs_cb && pebs_needs_sched_cb(cpuc)) > - perf_sched_cb_inc(event->ctx->pmu); > - > - pebs_update_threshold(cpuc); > + pebs_update_state(needs_cb, cpuc, event->ctx->pmu); > } > > void intel_pmu_pebs_enable(struct perf_event *event) > @@ -884,11 +893,7 @@ static void intel_pmu_pebs_del(struct pe > if (hwc->flags & PERF_X86_EVENT_FREERUNNING) > cpuc->n_large_pebs--; > > - if (needs_cb && !pebs_needs_sched_cb(cpuc)) > - perf_sched_cb_dec(event->ctx->pmu); > - > - if (cpuc->n_pebs) > - pebs_update_threshold(cpuc); > + pebs_update_state(needs_cb, cpuc, event->ctx->pmu); > } > > void intel_pmu_pebs_disable(struct perf_event *event)
Re: [RFC][PATCH 1/7] perf/x86/intel: Rework the large PEBS setup code
On Sat, Jul 09, 2016 at 12:00:47AM +0200, Peter Zijlstra wrote: > Yes, you're right. Let me try and see if I can make that better. Something like so? --- --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -831,6 +831,18 @@ static inline void pebs_update_threshold ds->pebs_interrupt_threshold = threshold; } +static void pebs_update_state(bool needs_cb, struct cpu_hw_events *cpuc, struct pmu *pmu) +{ + if (needs_cb != pebs_needs_sched_cb(cpuc)) { + if (!needs_cb) + perf_sched_cb_inc(pmu); + else + perf_sched_cb_dec(pmu); + + pebs_update_threshold(cpuc); + } +} + static void intel_pmu_pebs_add(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(_hw_events); @@ -841,10 +853,7 @@ static void intel_pmu_pebs_add(struct pe if (hwc->flags & PERF_X86_EVENT_FREERUNNING) cpuc->n_large_pebs++; - if (!needs_cb && pebs_needs_sched_cb(cpuc)) - perf_sched_cb_inc(event->ctx->pmu); - - pebs_update_threshold(cpuc); + pebs_update_state(needs_cb, cpuc, event->ctx->pmu); } void intel_pmu_pebs_enable(struct perf_event *event) @@ -884,11 +893,7 @@ static void intel_pmu_pebs_del(struct pe if (hwc->flags & PERF_X86_EVENT_FREERUNNING) cpuc->n_large_pebs--; - if (needs_cb && !pebs_needs_sched_cb(cpuc)) - perf_sched_cb_dec(event->ctx->pmu); - - if (cpuc->n_pebs) - pebs_update_threshold(cpuc); + pebs_update_state(needs_cb, cpuc, event->ctx->pmu); } void intel_pmu_pebs_disable(struct perf_event *event)
Re: [RFC][PATCH 1/7] perf/x86/intel: Rework the large PEBS setup code
On Sat, Jul 09, 2016 at 12:00:47AM +0200, Peter Zijlstra wrote: > Yes, you're right. Let me try and see if I can make that better. Something like so? --- --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -831,6 +831,18 @@ static inline void pebs_update_threshold ds->pebs_interrupt_threshold = threshold; } +static void pebs_update_state(bool needs_cb, struct cpu_hw_events *cpuc, struct pmu *pmu) +{ + if (needs_cb != pebs_needs_sched_cb(cpuc)) { + if (!needs_cb) + perf_sched_cb_inc(pmu); + else + perf_sched_cb_dec(pmu); + + pebs_update_threshold(cpuc); + } +} + static void intel_pmu_pebs_add(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(_hw_events); @@ -841,10 +853,7 @@ static void intel_pmu_pebs_add(struct pe if (hwc->flags & PERF_X86_EVENT_FREERUNNING) cpuc->n_large_pebs++; - if (!needs_cb && pebs_needs_sched_cb(cpuc)) - perf_sched_cb_inc(event->ctx->pmu); - - pebs_update_threshold(cpuc); + pebs_update_state(needs_cb, cpuc, event->ctx->pmu); } void intel_pmu_pebs_enable(struct perf_event *event) @@ -884,11 +893,7 @@ static void intel_pmu_pebs_del(struct pe if (hwc->flags & PERF_X86_EVENT_FREERUNNING) cpuc->n_large_pebs--; - if (needs_cb && !pebs_needs_sched_cb(cpuc)) - perf_sched_cb_dec(event->ctx->pmu); - - if (cpuc->n_pebs) - pebs_update_threshold(cpuc); + pebs_update_state(needs_cb, cpuc, event->ctx->pmu); } void intel_pmu_pebs_disable(struct perf_event *event)
Re: [RFC][PATCH 1/7] perf/x86/intel: Rework the large PEBS setup code
On Fri, Jul 08, 2016 at 06:36:16PM +0200, Jiri Olsa wrote: > On Fri, Jul 08, 2016 at 03:31:00PM +0200, Peter Zijlstra wrote: > > SNIP > > > /* > > -* When the event is constrained enough we can use a larger > > -* threshold and run the event with less frequent PMI. > > +* Use auto-reload if possible to save a MSR write in the PMI. > > +* This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD. > > */ > > - if (hwc->flags & PERF_X86_EVENT_FREERUNNING) { > > - threshold = ds->pebs_absolute_maximum - > > - x86_pmu.max_pebs_events * x86_pmu.pebs_record_size; > > - > > - if (first_pebs) > > - perf_sched_cb_inc(event->ctx->pmu); > > - } else { > > - threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size; > > - > > - /* > > -* If not all events can use larger buffer, > > -* roll back to threshold = 1 > > -*/ > > - if (!first_pebs && > > - (ds->pebs_interrupt_threshold > threshold)) > > - perf_sched_cb_dec(event->ctx->pmu); > > - } > > hum, the original code switched back the perf_sched_cb, > in case !feerunning event was detected.. I dont see it > in the new code.. just the threshold update > +static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc) > { > + return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs); > +} > +static void intel_pmu_pebs_add(struct perf_event *event) > +{ > + struct cpu_hw_events *cpuc = this_cpu_ptr(_hw_events); > + struct hw_perf_event *hwc = >hw; > + bool needs_cb = pebs_needs_sched_cb(cpuc); > + > + cpuc->n_pebs++; > + if (hwc->flags & PERF_X86_EVENT_FREERUNNING) > + cpuc->n_large_pebs++; > + > + if (!needs_cb && pebs_needs_sched_cb(cpuc)) > + perf_sched_cb_inc(event->ctx->pmu); Ah, you're saying this, > + pebs_update_threshold(cpuc); > } > +static void intel_pmu_pebs_del(struct perf_event *event) > +{ > + struct cpu_hw_events *cpuc = this_cpu_ptr(_hw_events); > + struct hw_perf_event *hwc = >hw; > + bool needs_cb = pebs_needs_sched_cb(cpuc); > + > + cpuc->n_pebs--; > + if (hwc->flags & PERF_X86_EVENT_FREERUNNING) > + cpuc->n_large_pebs--; > + > + if (needs_cb && !pebs_needs_sched_cb(cpuc)) > + perf_sched_cb_dec(event->ctx->pmu); and this, should also have something like if (!needs_cb && pebs_needs_sched_cb(cpuc)) perf_sched_cb_inc(event->ctx->pmu) Because the event we just removed was the one inhibiting FREERUNNING and we can now let it rip again. Yes, you're right. Let me try and see if I can make that better. Thanks! > + > + if (cpuc->n_pebs) > + pebs_update_threshold(cpuc); > }
Re: [RFC][PATCH 1/7] perf/x86/intel: Rework the large PEBS setup code
On Fri, Jul 08, 2016 at 06:36:16PM +0200, Jiri Olsa wrote: > On Fri, Jul 08, 2016 at 03:31:00PM +0200, Peter Zijlstra wrote: > > SNIP > > > /* > > -* When the event is constrained enough we can use a larger > > -* threshold and run the event with less frequent PMI. > > +* Use auto-reload if possible to save a MSR write in the PMI. > > +* This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD. > > */ > > - if (hwc->flags & PERF_X86_EVENT_FREERUNNING) { > > - threshold = ds->pebs_absolute_maximum - > > - x86_pmu.max_pebs_events * x86_pmu.pebs_record_size; > > - > > - if (first_pebs) > > - perf_sched_cb_inc(event->ctx->pmu); > > - } else { > > - threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size; > > - > > - /* > > -* If not all events can use larger buffer, > > -* roll back to threshold = 1 > > -*/ > > - if (!first_pebs && > > - (ds->pebs_interrupt_threshold > threshold)) > > - perf_sched_cb_dec(event->ctx->pmu); > > - } > > hum, the original code switched back the perf_sched_cb, > in case !feerunning event was detected.. I dont see it > in the new code.. just the threshold update > +static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc) > { > + return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs); > +} > +static void intel_pmu_pebs_add(struct perf_event *event) > +{ > + struct cpu_hw_events *cpuc = this_cpu_ptr(_hw_events); > + struct hw_perf_event *hwc = >hw; > + bool needs_cb = pebs_needs_sched_cb(cpuc); > + > + cpuc->n_pebs++; > + if (hwc->flags & PERF_X86_EVENT_FREERUNNING) > + cpuc->n_large_pebs++; > + > + if (!needs_cb && pebs_needs_sched_cb(cpuc)) > + perf_sched_cb_inc(event->ctx->pmu); Ah, you're saying this, > + pebs_update_threshold(cpuc); > } > +static void intel_pmu_pebs_del(struct perf_event *event) > +{ > + struct cpu_hw_events *cpuc = this_cpu_ptr(_hw_events); > + struct hw_perf_event *hwc = >hw; > + bool needs_cb = pebs_needs_sched_cb(cpuc); > + > + cpuc->n_pebs--; > + if (hwc->flags & PERF_X86_EVENT_FREERUNNING) > + cpuc->n_large_pebs--; > + > + if (needs_cb && !pebs_needs_sched_cb(cpuc)) > + perf_sched_cb_dec(event->ctx->pmu); and this, should also have something like if (!needs_cb && pebs_needs_sched_cb(cpuc)) perf_sched_cb_inc(event->ctx->pmu) Because the event we just removed was the one inhibiting FREERUNNING and we can now let it rip again. Yes, you're right. Let me try and see if I can make that better. Thanks! > + > + if (cpuc->n_pebs) > + pebs_update_threshold(cpuc); > }
Re: [RFC][PATCH 1/7] perf/x86/intel: Rework the large PEBS setup code
On Fri, Jul 08, 2016 at 03:31:00PM +0200, Peter Zijlstra wrote: SNIP > /* > - * When the event is constrained enough we can use a larger > - * threshold and run the event with less frequent PMI. > + * Use auto-reload if possible to save a MSR write in the PMI. > + * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD. >*/ > - if (hwc->flags & PERF_X86_EVENT_FREERUNNING) { > - threshold = ds->pebs_absolute_maximum - > - x86_pmu.max_pebs_events * x86_pmu.pebs_record_size; > - > - if (first_pebs) > - perf_sched_cb_inc(event->ctx->pmu); > - } else { > - threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size; > - > - /* > - * If not all events can use larger buffer, > - * roll back to threshold = 1 > - */ > - if (!first_pebs && > - (ds->pebs_interrupt_threshold > threshold)) > - perf_sched_cb_dec(event->ctx->pmu); > - } hum, the original code switched back the perf_sched_cb, in case !feerunning event was detected.. I dont see it in the new code.. just the threshold update jirka
Re: [RFC][PATCH 1/7] perf/x86/intel: Rework the large PEBS setup code
On Fri, Jul 08, 2016 at 03:31:00PM +0200, Peter Zijlstra wrote: SNIP > /* > - * When the event is constrained enough we can use a larger > - * threshold and run the event with less frequent PMI. > + * Use auto-reload if possible to save a MSR write in the PMI. > + * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD. >*/ > - if (hwc->flags & PERF_X86_EVENT_FREERUNNING) { > - threshold = ds->pebs_absolute_maximum - > - x86_pmu.max_pebs_events * x86_pmu.pebs_record_size; > - > - if (first_pebs) > - perf_sched_cb_inc(event->ctx->pmu); > - } else { > - threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size; > - > - /* > - * If not all events can use larger buffer, > - * roll back to threshold = 1 > - */ > - if (!first_pebs && > - (ds->pebs_interrupt_threshold > threshold)) > - perf_sched_cb_dec(event->ctx->pmu); > - } hum, the original code switched back the perf_sched_cb, in case !feerunning event was detected.. I dont see it in the new code.. just the threshold update jirka
[RFC][PATCH 1/7] perf/x86/intel: Rework the large PEBS setup code
In order to allow optimizing perf_pmu_sched_task() we must ensure perf_sched_cb_{inc,dec} are no longer called from NMI context; this means that pmu::{start,stop}() can no longer use them. Prepare for this by reworking the whole large PEBS setup code. The current code relied on the cpuc->pebs_enabled state, however since that reflects the current active state as per pmu::{start,stop}() we can no longer rely on this. Introduce two counters: cpuc->n_pebs and cpuc->n_large_pebs which count the total number of PEBS events and the number of PEBS events that have FREERUNNING set, resp.. With this we can tell if the current setup requires a single record interrupt threshold or can use a larger buffer. This also improves the code in that it re-enables the large threshold once the PEBS event that required single record gets removed. Signed-off-by: Peter Zijlstra (Intel)--- arch/x86/events/intel/ds.c | 96 +++ arch/x86/events/perf_event.h |2 kernel/events/core.c |4 + 3 files changed, 67 insertions(+), 35 deletions(-) --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -806,9 +806,45 @@ struct event_constraint *intel_pebs_cons return } -static inline bool pebs_is_enabled(struct cpu_hw_events *cpuc) +/* + * We need the sched_task callback even for per-cpu events when we use + * the large interrupt threshold, such that we can provide PID and TID + * to PEBS samples. + */ +static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc) { - return (cpuc->pebs_enabled & ((1ULL << MAX_PEBS_EVENTS) - 1)); + return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs); +} + +static inline void pebs_update_threshold(struct cpu_hw_events *cpuc) +{ + struct debug_store *ds = cpuc->ds; + u64 threshold; + + if (cpuc->n_pebs == cpuc->n_large_pebs) { + threshold = ds->pebs_absolute_maximum - + x86_pmu.max_pebs_events * x86_pmu.pebs_record_size; + } else { + threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size; + } + + ds->pebs_interrupt_threshold = threshold; +} + +static void intel_pmu_pebs_add(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(_hw_events); + struct hw_perf_event *hwc = >hw; + bool needs_cb = pebs_needs_sched_cb(cpuc); + + cpuc->n_pebs++; + if (hwc->flags & PERF_X86_EVENT_FREERUNNING) + cpuc->n_large_pebs++; + + if (!needs_cb && pebs_needs_sched_cb(cpuc)) + perf_sched_cb_inc(event->ctx->pmu); + + pebs_update_threshold(cpuc); } void intel_pmu_pebs_enable(struct perf_event *event) @@ -816,12 +852,11 @@ void intel_pmu_pebs_enable(struct perf_e struct cpu_hw_events *cpuc = this_cpu_ptr(_hw_events); struct hw_perf_event *hwc = >hw; struct debug_store *ds = cpuc->ds; - bool first_pebs; - u64 threshold; + + intel_pmu_pebs_add(event); hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; - first_pebs = !pebs_is_enabled(cpuc); cpuc->pebs_enabled |= 1ULL << hwc->idx; if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) @@ -830,46 +865,38 @@ void intel_pmu_pebs_enable(struct perf_e cpuc->pebs_enabled |= 1ULL << 63; /* -* When the event is constrained enough we can use a larger -* threshold and run the event with less frequent PMI. +* Use auto-reload if possible to save a MSR write in the PMI. +* This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD. */ - if (hwc->flags & PERF_X86_EVENT_FREERUNNING) { - threshold = ds->pebs_absolute_maximum - - x86_pmu.max_pebs_events * x86_pmu.pebs_record_size; - - if (first_pebs) - perf_sched_cb_inc(event->ctx->pmu); - } else { - threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size; - - /* -* If not all events can use larger buffer, -* roll back to threshold = 1 -*/ - if (!first_pebs && - (ds->pebs_interrupt_threshold > threshold)) - perf_sched_cb_dec(event->ctx->pmu); - } - - /* Use auto-reload if possible to save a MSR write in the PMI */ if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { ds->pebs_event_reset[hwc->idx] = (u64)(-hwc->sample_period) & x86_pmu.cntval_mask; } +} - if (first_pebs || ds->pebs_interrupt_threshold > threshold) - ds->pebs_interrupt_threshold = threshold; +static void intel_pmu_pebs_del(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(_hw_events); + struct hw_perf_event *hwc = >hw; + bool needs_cb = pebs_needs_sched_cb(cpuc); + + cpuc->n_pebs--; + if
[RFC][PATCH 1/7] perf/x86/intel: Rework the large PEBS setup code
In order to allow optimizing perf_pmu_sched_task() we must ensure perf_sched_cb_{inc,dec} are no longer called from NMI context; this means that pmu::{start,stop}() can no longer use them. Prepare for this by reworking the whole large PEBS setup code. The current code relied on the cpuc->pebs_enabled state, however since that reflects the current active state as per pmu::{start,stop}() we can no longer rely on this. Introduce two counters: cpuc->n_pebs and cpuc->n_large_pebs which count the total number of PEBS events and the number of PEBS events that have FREERUNNING set, resp.. With this we can tell if the current setup requires a single record interrupt threshold or can use a larger buffer. This also improves the code in that it re-enables the large threshold once the PEBS event that required single record gets removed. Signed-off-by: Peter Zijlstra (Intel) --- arch/x86/events/intel/ds.c | 96 +++ arch/x86/events/perf_event.h |2 kernel/events/core.c |4 + 3 files changed, 67 insertions(+), 35 deletions(-) --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -806,9 +806,45 @@ struct event_constraint *intel_pebs_cons return } -static inline bool pebs_is_enabled(struct cpu_hw_events *cpuc) +/* + * We need the sched_task callback even for per-cpu events when we use + * the large interrupt threshold, such that we can provide PID and TID + * to PEBS samples. + */ +static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc) { - return (cpuc->pebs_enabled & ((1ULL << MAX_PEBS_EVENTS) - 1)); + return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs); +} + +static inline void pebs_update_threshold(struct cpu_hw_events *cpuc) +{ + struct debug_store *ds = cpuc->ds; + u64 threshold; + + if (cpuc->n_pebs == cpuc->n_large_pebs) { + threshold = ds->pebs_absolute_maximum - + x86_pmu.max_pebs_events * x86_pmu.pebs_record_size; + } else { + threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size; + } + + ds->pebs_interrupt_threshold = threshold; +} + +static void intel_pmu_pebs_add(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(_hw_events); + struct hw_perf_event *hwc = >hw; + bool needs_cb = pebs_needs_sched_cb(cpuc); + + cpuc->n_pebs++; + if (hwc->flags & PERF_X86_EVENT_FREERUNNING) + cpuc->n_large_pebs++; + + if (!needs_cb && pebs_needs_sched_cb(cpuc)) + perf_sched_cb_inc(event->ctx->pmu); + + pebs_update_threshold(cpuc); } void intel_pmu_pebs_enable(struct perf_event *event) @@ -816,12 +852,11 @@ void intel_pmu_pebs_enable(struct perf_e struct cpu_hw_events *cpuc = this_cpu_ptr(_hw_events); struct hw_perf_event *hwc = >hw; struct debug_store *ds = cpuc->ds; - bool first_pebs; - u64 threshold; + + intel_pmu_pebs_add(event); hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; - first_pebs = !pebs_is_enabled(cpuc); cpuc->pebs_enabled |= 1ULL << hwc->idx; if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) @@ -830,46 +865,38 @@ void intel_pmu_pebs_enable(struct perf_e cpuc->pebs_enabled |= 1ULL << 63; /* -* When the event is constrained enough we can use a larger -* threshold and run the event with less frequent PMI. +* Use auto-reload if possible to save a MSR write in the PMI. +* This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD. */ - if (hwc->flags & PERF_X86_EVENT_FREERUNNING) { - threshold = ds->pebs_absolute_maximum - - x86_pmu.max_pebs_events * x86_pmu.pebs_record_size; - - if (first_pebs) - perf_sched_cb_inc(event->ctx->pmu); - } else { - threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size; - - /* -* If not all events can use larger buffer, -* roll back to threshold = 1 -*/ - if (!first_pebs && - (ds->pebs_interrupt_threshold > threshold)) - perf_sched_cb_dec(event->ctx->pmu); - } - - /* Use auto-reload if possible to save a MSR write in the PMI */ if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { ds->pebs_event_reset[hwc->idx] = (u64)(-hwc->sample_period) & x86_pmu.cntval_mask; } +} - if (first_pebs || ds->pebs_interrupt_threshold > threshold) - ds->pebs_interrupt_threshold = threshold; +static void intel_pmu_pebs_del(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(_hw_events); + struct hw_perf_event *hwc = >hw; + bool needs_cb = pebs_needs_sched_cb(cpuc); + + cpuc->n_pebs--; + if (hwc->flags &