cqm: make read of RMIDs per package (Temporal)

David Carrillo-Cisneros Thu, 28 Apr 2016 21:54:07 -0700

The previous version of Intel's CQM introduced pmu::count as a replacement
for reading CQM events. This was done to avoid using an IPI to read the
CQM occupancy event when reading events attached to a thread.
Using pmu->count in place of pmu->read is inconsistent with the usage by
other PMUs and introduces several problems such as:
  1) pmu::read for thread events returns bogus values when called from
  interrupts disabled contexts.
  2) perf_event_count(), behavior depends on whether interruptions are
  enabled or not.
  3) perf_event_count() will always read a fresh value from the PMU, which
  is inconsistent with the behavior of other events.
  4) perf_event_count() will perform slow MSR read and writes and IPIs.


This patches removes pmu::count from CQM and makes pmu::read always
read from the local socket (package). Future patches will add a mechanism
to add the event count from other packages.

This patch also removes the unused field rmid_usecnt from intel_pqr_state.

Reviewed-by: Stephane Eranian <eran...@google.com>
Signed-off-by: David Carrillo-Cisneros <davi...@google.com>
---
 arch/x86/events/intel/cqm.c | 125 ++++++--------------------------------------
 1 file changed, 16 insertions(+), 109 deletions(-)

diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
index 3c1e247..afd60dd 100644
--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -20,7 +20,6 @@ static unsigned int cqm_l3_scale; /* supposedly cacheline 
size */
  * struct intel_pqr_state - State cache for the PQR MSR
  * @rmid:              The cached Resource Monitoring ID
  * @closid:            The cached Class Of Service ID
- * @rmid_usecnt:       The usage counter for rmid
  *
  * The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the
  * lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always
@@ -32,7 +31,6 @@ static unsigned int cqm_l3_scale; /* supposedly cacheline 
size */
 struct intel_pqr_state {
        u32                     rmid;
        u32                     closid;
-       int                     rmid_usecnt;
 };
 
 /*
@@ -44,6 +42,19 @@ struct intel_pqr_state {
 static DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
 
 /*
+ * Updates caller cpu's cache.
+ */
+static inline void __update_pqr_rmid(u32 rmid)
+{
+       struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
+
+       if (state->rmid == rmid)
+               return;
+       state->rmid = rmid;
+       wrmsr(MSR_IA32_PQR_ASSOC, rmid, state->closid);
+}
+
+/*
  * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru.
  * Also protects event->hw.cqm_rmid
  *
@@ -309,7 +320,7 @@ struct rmid_read {
        atomic64_t value;
 };
 
-static void __intel_cqm_event_count(void *info);
+static void intel_cqm_event_read(struct perf_event *event);
 
 /*
  * If we fail to assign a new RMID for intel_cqm_rotation_rmid because
@@ -376,12 +387,6 @@ static void intel_cqm_event_read(struct perf_event *event)
        u32 rmid;
        u64 val;
 
-       /*
-        * Task events are handled by intel_cqm_event_count().
-        */
-       if (event->cpu == -1)
-               return;
-
        raw_spin_lock_irqsave(&cache_lock, flags);
        rmid = event->hw.cqm_rmid;
 
@@ -401,123 +406,28 @@ out:
        raw_spin_unlock_irqrestore(&cache_lock, flags);
 }
 
-static void __intel_cqm_event_count(void *info)
-{
-       struct rmid_read *rr = info;
-       u64 val;
-
-       val = __rmid_read(rr->rmid);
-
-       if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
-               return;
-
-       atomic64_add(val, &rr->value);
-}
-
 static inline bool cqm_group_leader(struct perf_event *event)
 {
        return !list_empty(&event->hw.cqm_groups_entry);
 }
 
-static u64 intel_cqm_event_count(struct perf_event *event)
-{
-       unsigned long flags;
-       struct rmid_read rr = {
-               .value = ATOMIC64_INIT(0),
-       };
-
-       /*
-        * We only need to worry about task events. System-wide events
-        * are handled like usual, i.e. entirely with
-        * intel_cqm_event_read().
-        */
-       if (event->cpu != -1)
-               return __perf_event_count(event);
-
-       /*
-        * Only the group leader gets to report values. This stops us
-        * reporting duplicate values to userspace, and gives us a clear
-        * rule for which task gets to report the values.
-        *
-        * Note that it is impossible to attribute these values to
-        * specific packages - we forfeit that ability when we create
-        * task events.
-        */
-       if (!cqm_group_leader(event))
-               return 0;
-
-       /*
-        * Getting up-to-date values requires an SMP IPI which is not
-        * possible if we're being called in interrupt context. Return
-        * the cached values instead.
-        */
-       if (unlikely(in_interrupt()))
-               goto out;
-
-       /*
-        * Notice that we don't perform the reading of an RMID
-        * atomically, because we can't hold a spin lock across the
-        * IPIs.
-        *
-        * Speculatively perform the read, since @event might be
-        * assigned a different (possibly invalid) RMID while we're
-        * busying performing the IPI calls. It's therefore necessary to
-        * check @event's RMID afterwards, and if it has changed,
-        * discard the result of the read.
-        */
-       rr.rmid = ACCESS_ONCE(event->hw.cqm_rmid);
-
-       if (!__rmid_valid(rr.rmid))
-               goto out;
-
-       on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, &rr, 1);
-
-       raw_spin_lock_irqsave(&cache_lock, flags);
-       if (event->hw.cqm_rmid == rr.rmid)
-               local64_set(&event->count, atomic64_read(&rr.value));
-       raw_spin_unlock_irqrestore(&cache_lock, flags);
-out:
-       return __perf_event_count(event);
-}
-
 static void intel_cqm_event_start(struct perf_event *event, int mode)
 {
-       struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
-       u32 rmid = event->hw.cqm_rmid;
-
        if (!(event->hw.cqm_state & PERF_HES_STOPPED))
                return;
 
        event->hw.cqm_state &= ~PERF_HES_STOPPED;
-
-       if (state->rmid_usecnt++) {
-               if (!WARN_ON_ONCE(state->rmid != rmid))
-                       return;
-       } else {
-               WARN_ON_ONCE(state->rmid);
-       }
-
-       state->rmid = rmid;
-       wrmsr(MSR_IA32_PQR_ASSOC, rmid, state->closid);
+       __update_pqr_rmid(event->hw.cqm_rmid);
 }
 
 static void intel_cqm_event_stop(struct perf_event *event, int mode)
 {
-       struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
-
        if (event->hw.cqm_state & PERF_HES_STOPPED)
                return;
 
        event->hw.cqm_state |= PERF_HES_STOPPED;
-
        intel_cqm_event_read(event);
-
-       if (!--state->rmid_usecnt) {
-               state->rmid = 0;
-               wrmsr(MSR_IA32_PQR_ASSOC, 0, state->closid);
-       } else {
-               WARN_ON_ONCE(!state->rmid);
-       }
+       __update_pqr_rmid(0);
 }
 
 static int intel_cqm_event_add(struct perf_event *event, int mode)
@@ -534,7 +444,6 @@ static int intel_cqm_event_add(struct perf_event *event, 
int mode)
                intel_cqm_event_start(event, mode);
 
        raw_spin_unlock_irqrestore(&cache_lock, flags);
-
        return 0;
 }
 
@@ -720,7 +629,6 @@ static struct pmu intel_cqm_pmu = {
        .start               = intel_cqm_event_start,
        .stop                = intel_cqm_event_stop,
        .read                = intel_cqm_event_read,
-       .count               = intel_cqm_event_count,
 };
 
 static inline void cqm_pick_event_reader(int cpu)
@@ -743,7 +651,6 @@ static void intel_cqm_cpu_starting(unsigned int cpu)
 
        state->rmid = 0;
        state->closid = 0;
-       state->rmid_usecnt = 0;
 
        WARN_ON(c->x86_cache_max_rmid != cqm_max_rmid);
        WARN_ON(c->x86_cache_occ_scale != cqm_l3_scale);
-- 
2.8.0.rc3.226.g39d4020

[PATCH 04/32] perf/x86/intel/cqm: make read of RMIDs per package (Temporal)

Reply via email to