To make rmid rotation more dependable, this patch series introduces
rotation Service Level Objectives (SLOs) that are described in
code's documentation.

This patch introduces cmt_{pre,min}_mon_slice SLOs that protects from
bogus values when a rmid has not been available since the beginning of
monitoring. It also introduces auxiliary variables necessary for the
SLOs to work and the checks in intel_cmt_event_read that enforce the SLOs
for the read of llc_occupancy event.

Signed-off-by: David Carrillo-Cisneros <davi...@google.com>
---
 arch/x86/events/intel/cmt.c | 46 ++++++++++++++++++++++++++++++++++++++++++++-
 arch/x86/events/intel/cmt.h | 28 +++++++++++++++++++++++++++
 2 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/cmt.c b/arch/x86/events/intel/cmt.c
index 3ade923..649eb5f 100644
--- a/arch/x86/events/intel/cmt.c
+++ b/arch/x86/events/intel/cmt.c
@@ -51,6 +51,25 @@ static size_t pkg_uflags_size;
 static struct pkg_data **cmt_pkgs_data;
 
 /*
+ * Rotation Service Level Objectives (SLO) for monrs with llc_occupancy
+ * monitoring. Note that these are monr level SLOs, therefore all pmonrs in
+ * the monr meet or exceed them.
+ * (A "monitored"  monr is a monr with no pmonr in a Dependent state).
+ *
+ * SLOs:
+ *
+ * @__cmt_pre_mon_slice: Min time a monr is monitored before being readable.
+ * @__cmt_min_mon_slice: Min time a monr stays monitored after becoming
+ *                       readable.
+ */
+#define CMT_DEFAULT_PRE_MON_SLICE 2000         /* ms */
+static u64 __cmt_pre_mon_slice;
+
+#define CMT_DEFAULT_MIN_MON_SLICE 5000         /* ms */
+static u64 __cmt_min_mon_slice;
+
+
+/*
  * If @pkgd == NULL, return first online, pkg_data in cmt_pkgs_data.
  * Otherwise next online pkg_data or NULL if no more.
  */
@@ -300,6 +319,7 @@ static void pmonr_to_unused(struct pmonr *pmonr)
                        pmonr_move_all_dependants(pmonr, lender);
                }
                __set_bit(rmids.read_rmid, pkgd->dirty_rmids);
+               pkgd->nr_dirty_rmids++;
 
        } else if (pmonr->state == PMONR_DEP_IDLE ||
                   pmonr->state == PMONR_DEP_DIRTY) {
@@ -312,6 +332,11 @@ static void pmonr_to_unused(struct pmonr *pmonr)
                        __set_bit(rmids.read_rmid, pkgd->dirty_rmids);
                else
                        pkgd->nr_dep_pmonrs--;
+
+
+               if (!atomic_dec_and_test(&pmonr->monr->nr_dep_pmonrs))
+                       atomic64_set(&pmonr->monr->last_rmid_recoup,
+                                    get_jiffies_64());
        } else {
                WARN_ON_ONCE(true);
                return;
@@ -372,6 +397,7 @@ static inline void __pmonr_to_dep_helper(
 
        lender_rmids.value = atomic64_read(&lender->atomic_rmids);
        pmonr_set_rmids(pmonr, lender_rmids.sched_rmid, read_rmid);
+       atomic_inc(&pmonr->monr->nr_dep_pmonrs);
 }
 
 static inline void pmonr_unused_to_dep_idle(struct pmonr *pmonr)
@@ -390,6 +416,7 @@ static void pmonr_unused_to_off(struct pmonr *pmonr)
 
 static void pmonr_active_to_dep_dirty(struct pmonr *pmonr)
 {
+       struct pkg_data *pkgd = pmonr->pkgd;
        struct pmonr *lender;
        union pmonr_rmids rmids;
 
@@ -398,6 +425,7 @@ static void pmonr_active_to_dep_dirty(struct pmonr *pmonr)
 
        rmids.value = atomic64_read(&pmonr->atomic_rmids);
        __pmonr_to_dep_helper(pmonr, lender, rmids.read_rmid);
+       pkgd->nr_dirty_rmids++;
 }
 
 static void __pmonr_dep_to_active_helper(struct pmonr *pmonr, u32 rmid)
@@ -408,6 +436,9 @@ static void __pmonr_dep_to_active_helper(struct pmonr 
*pmonr, u32 rmid)
        pmonr_move_dependants(pmonr->lender, pmonr);
        pmonr->lender = NULL;
        __pmonr_to_active_helper(pmonr, rmid);
+
+       if (!atomic_dec_and_test(&pmonr->monr->nr_dep_pmonrs))
+               atomic64_set(&pmonr->monr->last_rmid_recoup, get_jiffies_64());
 }
 
 static void pmonr_dep_idle_to_active(struct pmonr *pmonr, u32 rmid)
@@ -422,6 +453,7 @@ static void pmonr_dep_dirty_to_active(struct pmonr *pmonr)
        union pmonr_rmids rmids;
 
        rmids.value = atomic64_read(&pmonr->atomic_rmids);
+       pmonr->pkgd->nr_dirty_rmids--;
        __pmonr_dep_to_active_helper(pmonr, rmids.read_rmid);
 }
 
@@ -1599,7 +1631,7 @@ static int read_all_pkgs(struct monr *monr, int 
wait_time_ms, u64 *count)
 static int intel_cmt_event_read(struct perf_event *event)
 {
        struct monr *monr = monr_from_event(event);
-       u64 count;
+       u64 count, recoup, wait_end;
        u16 pkgid = topology_logical_package_id(smp_processor_id());
        int err;
 
@@ -1614,6 +1646,15 @@ static int intel_cmt_event_read(struct perf_event *event)
                return -ENXIO;
 
        /*
+        * If rmid has been stolen, only read if enough time has elapsed since
+        * rmid were recovered.
+        */
+       recoup = atomic64_read(&monr->last_rmid_recoup);
+       wait_end = recoup + __cmt_pre_mon_slice;
+       if (recoup && time_before64(get_jiffies_64(), wait_end))
+               return -EAGAIN;
+
+       /*
         * Only event parent can return a value, everyone else share its
         * rmid and therefore doesn't track occupancy independently.
         */
@@ -2267,6 +2308,9 @@ static int __init intel_cmt_init(void)
        struct pkg_data *pkgd = NULL;
        int err = 0;
 
+       __cmt_pre_mon_slice = msecs_to_jiffies(CMT_DEFAULT_PRE_MON_SLICE);
+       __cmt_min_mon_slice = msecs_to_jiffies(CMT_DEFAULT_MIN_MON_SLICE);
+
        if (!x86_match_cpu(intel_cmt_match)) {
                err = -ENODEV;
                goto err_exit;
diff --git a/arch/x86/events/intel/cmt.h b/arch/x86/events/intel/cmt.h
index 8bb43bd..8756666 100644
--- a/arch/x86/events/intel/cmt.h
+++ b/arch/x86/events/intel/cmt.h
@@ -52,6 +52,24 @@
  * schedule and read.
  *
  *
+ * Rotation
+ *
+ * The number of rmids in hw is relatively small with respect to the number
+ * of potential monitored resources. rmids are rotated to among pmonrs that
+ * need one to give a fair-ish usage of this resource.
+ *
+ * A hw constraint is that occupancy for a rmid cannot be restarted, therefore
+ * a rmid with llc_occupancy need some time unscheduled until all cache lines
+ * tagged to it are evicted from cache (if this ever happens).
+ *
+ * When a rmid is "rotated", it is stolen from a pmonr and must wait until its
+ * llc_occupancy has decreased enough to be considered "clean". Meanwhile, that
+ * rmid is considered "dirty".
+ *
+ * Rotation logic periodically reads occupancy of this "dirty" rmids and, when
+ * clean, the rmid is either reused or placed in a free pool.
+ *
+ *
  * Locking
  *
  * One global cmt_mutex. One mutex and spin_lock per package.
@@ -62,6 +80,7 @@
  *  cgroup start/stop.
  *  - Hold pkg->mutex and pkg->lock in _all_ active packages to traverse or
  *  change the monr hierarchy.
+ *  - pkgd->mutex: Hold in current package for rotation in that pkgd.
  *  - pkgd->lock: Hold in current package to access that pkgd's members. Hold
  *  a pmonr's package pkgd->lock for non-atomic access to pmonr.
  */
@@ -225,6 +244,7 @@ struct cmt_csd {
  * @dep_dirty_pmonrs:          LRU of Dep_Dirty pmonrs.
  * @dep_pmonrs:                        LRU of Dep_Idle and Dep_Dirty pmonrs.
  * @nr_dep_pmonrs:             nr Dep_Idle + nr Dep_Dirty pmonrs.
+ * @nr_dirty_rmids:            "dirty" rmids, both with and without a pmonr.
  * @mutex:                     Hold when modifying this pkg_data.
  * @mutex_key:                 lockdep class for pkg_data's mutex.
  * @lock:                      Hold to protect pmonrs in this pkg_data.
@@ -243,6 +263,7 @@ struct pkg_data {
        struct list_head        dep_dirty_pmonrs;
        struct list_head        dep_pmonrs;
        int                     nr_dep_pmonrs;
+       int                     nr_dirty_rmids;
 
        struct mutex            mutex;
        raw_spinlock_t          lock;
@@ -280,6 +301,10 @@ enum cmt_user_flags {
  * @parent:            Parent in monr hierarchy.
  * @children:          List of children in monr hierarchy.
  * @parent_entry:      Entry in parent's children list.
+ * @last_rmid_recoup:  Last time that nr_dep_pmonrs decreased to zero. It's
+ *                     zero if a rmid has never been stolen from this monr.
+ * @nr_dep_pmonrs:     nr of Dep_* pmonrs in this monr. A zero implies that
+ *                     monr is monitoring in all required packages.
  * @flags:             monr_flags.
  * @nr_has_user:       nr of CMT_UF_HAS_USER set in events in mon_events.
  * @nr_nolazy_user:    nr of CMT_UF_NOLAZY_RMID set in events in mon_events.
@@ -303,6 +328,9 @@ struct monr {
        struct list_head                children;
        struct list_head                parent_entry;
 
+       atomic64_t                      last_rmid_recoup;
+       atomic_t                        nr_dep_pmonrs;
+
        enum monr_flags                 flags;
        int                             nr_has_user;
        int                             nr_nolazy_rmid;
-- 
2.8.0.rc3.226.g39d4020

Reply via email to