intel: Support PEBS output to PT

Peter Zijlstra Mon, 29 Jul 2019 06:37:32 -0700

On Thu, Jul 04, 2019 at 07:00:19PM +0300, Alexander Shishkin wrote:

> diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
> index f0e4804515d8..a11924e20df3 100644
> --- a/arch/x86/events/core.c
> +++ b/arch/x86/events/core.c
> @@ -869,6 +869,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int 
> n, int *assign)
>       unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
>       struct perf_event *e;
>       int n0, i, wmin, wmax, unsched = 0;
> +     int n_pebs_ds, n_pebs_pt;
>       struct hw_perf_event *hwc;
>  
>       bitmap_zero(used_mask, X86_PMC_IDX_MAX);
> @@ -884,6 +885,37 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int 
> n, int *assign)
>       if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
>               n0 -= cpuc->n_txn;
>  
> +     /*
> +      * Check for PEBS->DS and PEBS->PT events.
> +      * 1) They can't be scheduled simultaneously;
> +      * 2) PEBS->PT events depend on a corresponding PT event
> +      */
> +     for (i = 0, n_pebs_ds = 0, n_pebs_pt = 0; i < n; i++) {
> +             e = cpuc->event_list[i];
> +
> +             if (e->attr.precise_ip) {
> +                     if (e->hw.flags & PERF_X86_EVENT_PEBS_VIA_PT) {
> +                             /*
> +                              * New PEBS->PT event, check ->aux_event; if
> +                              * it's NULL, the group has been broken down
> +                              * and this event can't schedule any more.
> +                              */
> +                             if (!cpuc->is_fake && i >= n0 && !e->aux_event)
> +                                     return -EINVAL;


How can this happen? Is this an artifact if creating a group, and then
destroying the group leader (the PT event) and then getting a bunch of
unschedulable events as remains?

> +                             n_pebs_pt++;
> +                     } else {
> +                             n_pebs_ds++;
> +                     }
> +             }
> +     }

This makes for the 3rd i..n iteration in a row, now the first is over
cpuc->event_constraint[], this is the second and the third isn't
guaranteed to terminate but is over both cpuc->event_list[] and
->event_constraint[].

It just feels like we can do better.

> +
> +     /*
> +      * Fail to add conflicting PEBS events. If this happens, rotation
> +      * takes care that all events get to run.
> +      */
> +     if (n_pebs_ds && n_pebs_pt)
> +             return -EINVAL;

This basically means we can rewrite the above like:

        u8 pebs_pt = 0;

        if (e->attr.precise_ip) {
                bool pt = is_pebs_pt(e);

                if (pebs_pt & (1 << !pt))
                        return -EINVAL;

                pebs_pt |= 1 << pt;
        }

There's no need to finish the loop or to actually count how many there
are; all we need to know is there's only one type.

Then again, if you put these counters in cpuc, you can make
collect_events() reject the event before we ever get to scheduling and
avoid the whole iteration.

> +
>       if (x86_pmu.start_scheduling)
>               x86_pmu.start_scheduling(cpuc);
>  

> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
> index bda450ff51ee..6955d4f7e7aa 100644
> --- a/arch/x86/events/intel/core.c
> +++ b/arch/x86/events/intel/core.c

> @@ -3814,6 +3821,17 @@ static int intel_pmu_check_period(struct perf_event 
> *event, u64 value)
>       return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
>  }
>  
> +static int intel_pmu_aux_source_match(struct perf_event *event)
> +{
> +     if (!x86_pmu.intel_cap.pebs_output_pt_available)
> +             return 0;
> +
> +     if (event->pmu->name && !strcmp(event->pmu->name, "intel_pt"))

Yuck, surely we can do something like:

        if (is_pt_event(event))

which is implemented in intel/pt.c and does something like:

        return event->pmu == &pt_pmu.pmu;

> +             return 1;
> +
> +     return 0;
> +}
> +
>  PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
>  
>  PMU_FORMAT_ATTR(ldlat, "config1:0-15");

> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
> index 7acc526b4ad2..9c59462f38a3 100644
> --- a/arch/x86/events/intel/ds.c
> +++ b/arch/x86/events/intel/ds.c

> +static void intel_pmu_pebs_via_pt_enable(struct perf_event *event)
> +{
> +     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
> +     struct hw_perf_event *hwc = &event->hw;
> +     struct debug_store *ds = cpuc->ds;
> +
> +     if (!(event->hw.flags & PERF_X86_EVENT_PEBS_VIA_PT))
> +             return;
> +
> +     /*
> +      * In case there's a mix of PEBS->PT and PEBS->DS, fall back
> +      * to DS.
> +      */

I thought we disallowed that from happening !?

> +     if (cpuc->n_pebs != cpuc->n_pebs_via_pt) {
> +             /* PEBS-to-DS events present, fall back to DS */
> +             intel_pmu_pebs_via_pt_disable(event);
> +             return;
> +     }
> +
> +     if (!(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS))
> +             cpuc->pebs_enabled |= PEBS_PMI_AFTER_EACH_RECORD;
> +
> +     cpuc->pebs_enabled |= PEBS_OUTPUT_PT;
> +
> +     wrmsrl(MSR_RELOAD_PMC0 + hwc->idx, ds->pebs_event_reset[hwc->idx]);
> +}
> +

Re: [PATCH v1 2/7] perf/x86/intel: Support PEBS output to PT

Reply via email to