To avoid IPIs from IRQ disabled contexts, the occupancy for a RMID in a
remote package (a package other than the one the current cpu belongs) is
obtained from a cache that is periodically updated.
This removes the need for an IPI when reading occupancy for a task event,
that was the reason to add the problematic pmu::count and dummy
perf_event_read() in the previous CQM version.

The occupancy of all active prmids is updated every
__rmid_timed_update_period ms .

To avoid holding raw_spin_locks on the prmid hierarchy for too long, the
raw rmids to be read are copied to a temporal array list. The array list
is consumed to perform the wrmsrl and rdmsrl in each RMID required to
read its llc_occupancy.

This decoupling of traversing the RMID hierarchy and read occupancy is
specially useful due to high latency of the wrmsrl and rdmsl for the
llc_occupancy event (thousand of cycles in my test machine).

To avoid unnecessary memory allocations, the objects used to temporarily
store RMIDs are pooled in a per-package list and allocated on demand.

The infrastructure introduced in this patch will be used in future patches
in this series to perform reads on subtrees of a prmid hierarchy.

Reviewed-by: Stephane Eranian <eran...@google.com>
Signed-off-by: David Carrillo-Cisneros <davi...@google.com>
---
 arch/x86/events/intel/cqm.c | 251 +++++++++++++++++++++++++++++++++++++++++++-
 arch/x86/events/intel/cqm.h |  36 +++++++
 2 files changed, 286 insertions(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
index 31f0fd6..904f2d3 100644
--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -189,6 +189,8 @@ static inline bool __valid_pkg_id(u16 pkg_id)
        return pkg_id < PQR_MAX_NR_PKGS;
 }
 
+static int anode_pool__alloc_one(u16 pkg_id);
+
 /* Init cqm pkg_data for @cpu 's package. */
 static int pkg_data_init_cpu(int cpu)
 {
@@ -241,11 +243,19 @@ static int pkg_data_init_cpu(int cpu)
        mutex_init(&pkg_data->pkg_data_mutex);
        raw_spin_lock_init(&pkg_data->pkg_data_lock);
 
+       INIT_LIST_HEAD(&pkg_data->anode_pool_head);
+       raw_spin_lock_init(&pkg_data->anode_pool_lock);
+
        INIT_DELAYED_WORK(
                &pkg_data->rotation_work, intel_cqm_rmid_rotation_work);
        /* XXX: Chose randomly*/
        pkg_data->rotation_cpu = cpu;
 
+       INIT_DELAYED_WORK(
+               &pkg_data->timed_update_work, intel_cqm_timed_update_work);
+       /* XXX: Chose randomly*/
+       pkg_data->timed_update_cpu = cpu;
+
        cqm_pkgs_data[pkg_id] = pkg_data;
        return 0;
 }
@@ -744,6 +754,189 @@ static void monr_dealloc(struct monr *monr)
 }
 
 /*
+ * Logic for reading sets of rmids into per-package lists.
+ * This package lists can be used to update occupancies without
+ * holding locks in the hierarchies of pmonrs.
+ * @pool: free pool.
+ */
+struct astack {
+       struct list_head        pool;
+       struct list_head        items;
+       int                     top_idx;
+       int                     max_idx;
+       u16                     pkg_id;
+};
+
+static void astack__init(struct astack *astack, int max_idx, u16 pkg_id)
+{
+       INIT_LIST_HEAD(&astack->items);
+       INIT_LIST_HEAD(&astack->pool);
+       astack->top_idx = -1;
+       astack->max_idx = max_idx;
+       astack->pkg_id = pkg_id;
+}
+
+/* Try to enlarge astack->pool with a anode from this pkgs pool. */
+static int astack__try_add_pool(struct astack *astack)
+{
+       unsigned long flags;
+       int ret = -1;
+       struct pkg_data *pkg_data = cqm_pkgs_data[astack->pkg_id];
+
+       raw_spin_lock_irqsave(&pkg_data->anode_pool_lock, flags);
+
+       if (!list_empty(&pkg_data->anode_pool_head)) {
+               list_move_tail(pkg_data->anode_pool_head.prev, &astack->pool);
+               ret = 0;
+       }
+
+       raw_spin_unlock_irqrestore(&pkg_data->anode_pool_lock, flags);
+       return ret;
+}
+
+static int astack__push(struct astack *astack)
+{
+       if (!list_empty(&astack->items) && astack->top_idx < astack->max_idx) {
+               astack->top_idx++;
+               return 0;
+       }
+
+       if (list_empty(&astack->pool) && astack__try_add_pool(astack))
+               return -1;
+       list_move_tail(astack->pool.prev, &astack->items);
+       astack->top_idx = 0;
+       return 0;
+}
+
+/* Must be non-empty */
+# define __astack__top(astack_, member_) \
+       list_last_entry(&(astack_)->items, \
+       struct anode, entry)->member_[(astack_)->top_idx]
+
+static void astack__clear(struct astack *astack)
+{
+       list_splice_tail_init(&astack->items, &astack->pool);
+       astack->top_idx = -1;
+}
+
+/* Put back into pkg_data's pool. */
+static void astack__release(struct astack *astack)
+{
+       unsigned long flags;
+       struct pkg_data *pkg_data = cqm_pkgs_data[astack->pkg_id];
+
+       astack__clear(astack);
+       raw_spin_lock_irqsave(&pkg_data->anode_pool_lock, flags);
+       list_splice_tail_init(&astack->pool, &pkg_data->anode_pool_head);
+       raw_spin_unlock_irqrestore(&pkg_data->anode_pool_lock, flags);
+}
+
+static int anode_pool__alloc_one(u16 pkg_id)
+{
+       unsigned long flags;
+       struct anode *anode;
+       struct pkg_data *pkg_data = cqm_pkgs_data[pkg_id];
+
+       anode = kmalloc_node(sizeof(struct anode), GFP_KERNEL,
+                            cpu_to_node(pkg_data->rotation_cpu));
+       if (!anode)
+               return -ENOMEM;
+       raw_spin_lock_irqsave(&pkg_data->anode_pool_lock, flags);
+       list_add_tail(&anode->entry, &pkg_data->anode_pool_head);
+       raw_spin_unlock_irqrestore(&pkg_data->anode_pool_lock, flags);
+       return 0;
+}
+
+static int astack__end(struct astack *astack, struct anode *anode, int idx)
+{
+       return list_is_last(&anode->entry, &astack->items) &&
+              idx > astack->top_idx;
+}
+
+static int __rmid_fn__cqm_prmid_update(struct prmid *prmid, u64 *val)
+{
+       int ret = cqm_prmid_update(prmid);
+
+       if (ret >= 0)
+               *val = atomic64_read(&prmid->last_read_value);
+       return ret;
+}
+
+/* Apply function to all elements in all nodes.
+ * On error returns first error in read, zero otherwise.
+ */
+static int astack__rmids_sum_apply(
+       struct astack *astack,
+       u16 pkg_id, int (*fn)(struct prmid *, u64 *), u64 *total)
+{
+       struct prmid *prmid;
+       struct anode *anode;
+       u32 rmid;
+       int i, ret, first_error = 0;
+       u64 count;
+       *total = 0;
+
+       list_for_each_entry(anode, &astack->items, entry) {
+               for (i = 0; i <= astack->max_idx; i++) {
+                       /* node in tail only has astack->top_idx elements. */
+                       if (astack__end(astack, anode, i))
+                               break;
+                       rmid = anode->rmids[i];
+                       prmid = cqm_pkgs_data[pkg_id]->prmids_by_rmid[rmid];
+                       WARN_ON_ONCE(!prmid);
+                       ret = fn(prmid, &count);
+                       if (ret < 0) {
+                               if (!first_error)
+                                       first_error = ret;
+                               continue;
+                       }
+                       *total += count;
+               }
+       }
+       return first_error;
+}
+
+/* Does not need mutex since protected by locks when transversing
+ * astate_pmonrs_lru and updating atomic prmids.
+ */
+static int update_rmids_in_astate_pmonrs_lru(u16 pkg_id)
+{
+       struct astack astack;
+       struct pkg_data *pkg_data;
+       struct pmonr *pmonr;
+       int ret = 0;
+       unsigned long flags;
+       u64 count;
+
+       astack__init(&astack, NR_RMIDS_PER_NODE - 1, pkg_id);
+       pkg_data = cqm_pkgs_data[pkg_id];
+
+retry:
+       if (ret) {
+               anode_pool__alloc_one(pkg_id);
+               ret = 0;
+       }
+       raw_spin_lock_irqsave_nested(&pkg_data->pkg_data_lock, flags, pkg_id);
+       list_for_each_entry(pmonr,
+                           &pkg_data->astate_pmonrs_lru, rotation_entry) {
+               ret = astack__push(&astack);
+               if (ret)
+                       break;
+               __astack__top(&astack, rmids) = pmonr->prmid->rmid;
+       }
+       raw_spin_unlock_irqrestore(&pkg_data->pkg_data_lock, flags);
+       if (ret) {
+               astack__clear(&astack);
+               goto retry;
+       }
+       /* count is not used. */
+       ret = astack__rmids_sum_apply(&astack, pkg_id,
+                                     &__rmid_fn__cqm_prmid_update, &count);
+       astack__release(&astack);
+       return ret;
+}
+
+/*
  * Wrappers for monr manipulation in events.
  *
  */
@@ -1532,6 +1725,17 @@ exit:
        mutex_unlock(&pkg_data->pkg_data_mutex);
 }
 
+static void
+__intel_cqm_timed_update(u16 pkg_id)
+{
+       int ret;
+
+       mutex_lock_nested(&cqm_pkgs_data[pkg_id]->pkg_data_mutex, pkg_id);
+       ret = update_rmids_in_astate_pmonrs_lru(pkg_id);
+       mutex_unlock(&cqm_pkgs_data[pkg_id]->pkg_data_mutex);
+       WARN_ON_ONCE(ret);
+}
+
 static struct pmu intel_cqm_pmu;
 
 /* Rotation only needs to be run when there is any pmonr in (I)state. */
@@ -1554,6 +1758,22 @@ static bool intel_cqm_need_rotation(u16 pkg_id)
        return need_rot;
 }
 
+static bool intel_cqm_need_timed_update(u16 pkg_id)
+{
+
+       struct pkg_data *pkg_data;
+       bool need_update;
+
+       pkg_data = cqm_pkgs_data[pkg_id];
+
+       mutex_lock_nested(&pkg_data->pkg_data_mutex, pkg_id);
+       /* Update is needed if prmids if there is any active prmid. */
+       need_update = !list_empty(&pkg_data->active_prmids_pool);
+       mutex_unlock(&pkg_data->pkg_data_mutex);
+
+       return need_update;
+}
+
 /*
  * Schedule rotation in one package.
  */
@@ -1568,6 +1788,19 @@ static void __intel_cqm_schedule_rotation_for_pkg(u16 
pkg_id)
                pkg_data->rotation_cpu, &pkg_data->rotation_work, delay);
 }
 
+static void __intel_cqm_schedule_timed_update_for_pkg(u16 pkg_id)
+{
+       struct pkg_data *pkg_data;
+       unsigned long delay;
+
+       delay = msecs_to_jiffies(__rmid_timed_update_period);
+       pkg_data = cqm_pkgs_data[pkg_id];
+       schedule_delayed_work_on(
+               pkg_data->timed_update_cpu,
+               &pkg_data->timed_update_work, delay);
+}
+
+
 /*
  * Schedule rotation and rmid's timed update in all packages.
  * Reescheduling will stop when no longer needed.
@@ -1576,8 +1809,10 @@ static void intel_cqm_schedule_work_all_pkgs(void)
 {
        int pkg_id;
 
-       cqm_pkg_id_for_each_online(pkg_id)
+       cqm_pkg_id_for_each_online(pkg_id) {
                __intel_cqm_schedule_rotation_for_pkg(pkg_id);
+               __intel_cqm_schedule_timed_update_for_pkg(pkg_id);
+       }
 }
 
 static void intel_cqm_rmid_rotation_work(struct work_struct *work)
@@ -1598,6 +1833,20 @@ static void intel_cqm_rmid_rotation_work(struct 
work_struct *work)
                __intel_cqm_schedule_rotation_for_pkg(pkg_id);
 }
 
+static void intel_cqm_timed_update_work(struct work_struct *work)
+{
+       struct pkg_data *pkg_data = container_of(
+               to_delayed_work(work), struct pkg_data, timed_update_work);
+       u16 pkg_id = topology_physical_package_id(pkg_data->timed_update_cpu);
+
+       WARN_ON_ONCE(pkg_data != cqm_pkgs_data[pkg_id]);
+
+       __intel_cqm_timed_update(pkg_id);
+
+       if (intel_cqm_need_timed_update(pkg_id))
+               __intel_cqm_schedule_timed_update_for_pkg(pkg_id);
+}
+
 /*
  * Find a group and setup RMID.
  *
diff --git a/arch/x86/events/intel/cqm.h b/arch/x86/events/intel/cqm.h
index b0e1698..25646a2 100644
--- a/arch/x86/events/intel/cqm.h
+++ b/arch/x86/events/intel/cqm.h
@@ -45,6 +45,10 @@ static unsigned int __rmid_min_update_time = 
RMID_DEFAULT_MIN_UPDATE_TIME;
 
 static inline int cqm_prmid_update(struct prmid *prmid);
 
+#define RMID_DEFAULT_TIMED_UPDATE_PERIOD 100 /* ms */
+static unsigned int __rmid_timed_update_period =
+       RMID_DEFAULT_TIMED_UPDATE_PERIOD;
+
 /*
  * union prmid_summary: Machine-size summary of a pmonr's prmid state.
  * @value:             One word accesor.
@@ -211,6 +215,21 @@ struct pmonr {
        atomic64_t                              prmid_summary_atomic;
 };
 
+/* Store all RMIDs that can fit in a anode while keeping sizeof(struct anode)
+ * within one cache line (for performance).
+ */
+#define NR_TYPE_PER_NODE(__type) ((SMP_CACHE_BYTES - (int)sizeof(struct 
list_head)) / \
+       (int)sizeof(__type))
+
+#define NR_RMIDS_PER_NODE NR_TYPE_PER_NODE(u32)
+
+/* struct anode: Node of an array list used to temporarily store RMIDs. */
+struct anode {
+       /* Last valid RMID is RMID_INVALID */
+       u32                     rmids[NR_RMIDS_PER_NODE];
+       struct list_head        entry;
+};
+
 /*
  * struct pkg_data: Per-package CQM data.
  * @max_rmid:                  Max rmid valid for cpus in this package.
@@ -239,6 +258,14 @@ struct pmonr {
  * @rotation_cpu:               CPU to run @rotation_work on, it must be in the
  *                              package associated to this instance of 
pkg_data.
  * @rotation_work:             Task that performs rotation of prmids.
+ * @timed_update_work:         Task that performs periodic updates of values
+ *                             for active rmids. These values are used when
+ *                             inter-package event read is not available due to
+ *                             irqs disabled contexts.
+ * @timed_update_cpu:          CPU to run @timed_update_work on, it must be a
+ *                             cpu in this package.
+ * @anode_pool_head:           Pool of unused anodes.
+ * @anode_pool_lock:           Protect @anode_pool_head.
  */
 struct pkg_data {
        u32                     max_rmid;
@@ -268,6 +295,13 @@ struct pkg_data {
 
        struct delayed_work     rotation_work;
        int                     rotation_cpu;
+
+       struct delayed_work     timed_update_work;
+       int                     timed_update_cpu;
+
+       /* Pool of unused rmid_list_nodes and its lock */
+       struct list_head        anode_pool_head;
+       raw_spinlock_t          anode_pool_lock;
 };
 
 /*
@@ -438,6 +472,8 @@ static inline int monr_hrchy_count_held_raw_spin_locks(void)
  */
 static void intel_cqm_rmid_rotation_work(struct work_struct *work);
 
+static void intel_cqm_timed_update_work(struct work_struct *work);
+
 /*
  * Service Level Objectives (SLO) for the rotation logic.
  *
-- 
2.8.0.rc3.226.g39d4020

Reply via email to