Try to activate monrs at a __cmt_min_progress_rate rate.

Signed-off-by: David Carrillo-Cisneros <davi...@google.com>
---
 arch/x86/events/intel/cmt.c | 274 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 273 insertions(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/cmt.c b/arch/x86/events/intel/cmt.c
index 8bf6aa5..ba82f95 100644
--- a/arch/x86/events/intel/cmt.c
+++ b/arch/x86/events/intel/cmt.c
@@ -79,6 +79,14 @@ static u64 __cmt_min_mon_slice;
 static unsigned int __cmt_max_threshold;       /* bytes */
 
 /*
+ * Rotation SLO of all monrs events (including those without llc_occupancy):
+ * @__cmt_min_progrees_rate: Min numbers of pmonrs that must go to Active
+ * state per second, otherwise, recycling occupancy error is increased.
+ */
+#define CMT_DEFAULT_MIN_PROGRESS_RATE 2                /* pmonrs per sec */
+static unsigned int __cmt_min_progress_rate = CMT_DEFAULT_MIN_PROGRESS_RATE;
+
+/*
  * If @pkgd == NULL, return first online, pkg_data in cmt_pkgs_data.
  * Otherwise next online pkg_data or NULL if no more.
  */
@@ -466,6 +474,21 @@ static void pmonr_dep_dirty_to_active(struct pmonr *pmonr)
        __pmonr_dep_to_active_helper(pmonr, rmids.read_rmid);
 }
 
+/* dirty rmid must be clean enough to go to free_rmids. */
+static void pmonr_dep_dirty_to_dep_idle_helper(struct pmonr *pmonr,
+                                              union pmonr_rmids rmids)
+{
+       struct pkg_data *pkgd = pmonr->pkgd;
+
+       pmonr->pkgd->nr_dirty_rmids--;
+       __set_bit(rmids.read_rmid, pkgd->free_rmids);
+       list_move_tail(&pmonr->rot_entry, &pkgd->dep_idle_pmonrs);
+       pkgd->nr_dep_pmonrs++;
+
+       pmonr->state = PMONR_DEP_IDLE;
+       pmonr_set_rmids(pmonr, rmids.sched_rmid, INVALID_RMID);
+}
+
 static void monr_dealloc(struct monr *monr)
 {
        u16 p, nr_pkgs = topology_max_packages();
@@ -1311,6 +1334,242 @@ static void smp_call_rmid_read(void *data)
        atomic_set(&ccsd->on_read, 0);
 }
 
+/*
+ * Try to reuse dirty rmid's for pmonrs at the front of dep_dirty_pmonrs.
+ */
+static int __try_activate_dep_dirty_pmonrs(struct pkg_data *pkgd)
+{
+       int reused = 0;
+       struct pmonr *pmonr;
+       struct list_head *lhead = &pkgd->dep_pmonrs;
+
+       lockdep_assert_held(&pkgd->lock);
+
+       while ((pmonr = list_first_entry_or_null(
+                               lhead, struct pmonr, pkgd_deps_entry))) {
+               if (!pmonr || pmonr->state == PMONR_DEP_IDLE)
+                       break;
+               pmonr_dep_dirty_to_active(pmonr);
+               reused++;
+       }
+
+       return reused;
+}
+
+static int try_activate_dep_dirty_pmonrs(struct pkg_data *pkgd)
+{
+       int nr_reused;
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&pkgd->lock, flags);
+       nr_reused = __try_activate_dep_dirty_pmonrs(pkgd);
+       raw_spin_unlock_irqrestore(&pkgd->lock, flags);
+
+       return nr_reused;
+}
+
+static inline int __try_use_free_rmid(struct pkg_data *pkgd, u32 rmid)
+{
+       struct pmonr *pmonr;
+
+       lockdep_assert_held(&pkgd->lock);
+
+       pmonr = list_first_entry_or_null(&pkgd->dep_idle_pmonrs,
+                                        struct pmonr, rot_entry);
+       if (!pmonr)
+               return 0;
+       /* The state transition will move the rmid to the active list.  */
+       pmonr_dep_idle_to_active(pmonr, rmid);
+
+       return 1 + __try_activate_dep_dirty_pmonrs(pkgd);
+}
+
+static int __try_use_free_rmids(struct pkg_data *pkgd)
+{
+       int nr_activated = 0, nr_used, r;
+
+       for_each_set_bit(r, pkgd->free_rmids, CMT_MAX_NR_RMIDS) {
+               /* Removes the rmid from free list if succeeds. */
+               nr_used = __try_use_free_rmid(pkgd, r);
+               if (!nr_used)
+                       break;
+               nr_activated += nr_used;
+       }
+
+       return nr_activated;
+}
+
+static bool is_rmid_dirty(struct pkg_data *pkgd, u32 rmid, bool do_read,
+                         unsigned int dirty_thld, unsigned int *min_dirty)
+{
+       u64 val;
+
+       if (do_read && WARN_ON_ONCE(cmt_rmid_read(rmid, &val)))
+               return true;
+       if (val > dirty_thld) {
+               if (val < *min_dirty)
+                       *min_dirty = val;
+               return true;
+       }
+
+       return false;
+}
+
+static int try_free_dep_dirty_pmonrs(struct pkg_data *pkgd,
+                                    bool do_read,
+                                    unsigned int dirty_thld,
+                                    unsigned int *min_dirty)
+{
+       struct pmonr *pmonr, *tmp;
+       union pmonr_rmids rmids;
+       int nr_activated = 0;
+       unsigned long flags;
+
+       /*
+        * No need to acquire pkg lock for pkgd->dep_dirty_pmonrs because
+        * rotation logic is the only user of this list.
+        */
+       list_for_each_entry_safe(pmonr, tmp,
+                                &pkgd->dep_dirty_pmonrs, rot_entry) {
+               rmids.value = atomic64_read(&pmonr->atomic_rmids);
+               if (is_rmid_dirty(pkgd, rmids.read_rmid,
+                                       do_read, dirty_thld, min_dirty))
+                       continue;
+
+               raw_spin_lock_irqsave(&pkgd->lock, flags);
+               pmonr_dep_dirty_to_dep_idle_helper(pmonr, rmids);
+               nr_activated += __try_use_free_rmid(pkgd, rmids.read_rmid);
+               raw_spin_unlock_irqrestore(&pkgd->lock, flags);
+       }
+
+       return nr_activated;
+}
+
+static int try_free_dirty_rmids(struct pkg_data *pkgd,
+                               bool do_read,
+                               unsigned int dirty_thld,
+                               unsigned int *min_dirty,
+                               unsigned long *rmids_bm)
+{
+       int nr_activated = 0, r;
+       unsigned long flags;
+
+       /*
+        * To avoid holding pkgd->lock while reading rmids in hw (slow), hold
+        * once and save all rmids that must be read. Then read them while
+        * unlocked.
+        */
+       raw_spin_lock_irqsave(&pkgd->lock, flags);
+       memcpy(rmids_bm, pkgd->dirty_rmids, CMT_MAX_NR_RMIDS_BYTES);
+       raw_spin_unlock_irqrestore(&pkgd->lock, flags);
+
+       for_each_set_bit(r, rmids_bm, CMT_MAX_NR_RMIDS) {
+               if (is_rmid_dirty(pkgd, r, do_read, dirty_thld, min_dirty))
+                       continue;
+
+               raw_spin_lock_irqsave(&pkgd->lock, flags);
+
+               pkgd->nr_dirty_rmids--;
+               __clear_bit(r, pkgd->dirty_rmids);
+               __set_bit(r, pkgd->free_rmids);
+               nr_activated += __try_use_free_rmid(pkgd, r);
+
+               raw_spin_unlock_irqrestore(&pkgd->lock, flags);
+       }
+
+       return nr_activated;
+}
+
+/**
+ * __intel_cmt_rmid_rotate - Rotate rmids among pmonrs and handle dirty rmids.
+ * @pkgd:              The package data to rotate rmids on.
+ * @active_goal:       Target min nr of pmonrs to put in Active state.
+ * @max_dirty_thld:    Upper bound for dirty_thld, in CMT cache units.
+ *
+ * The goals for each iteration of rotation logic are:
+ *   1) to activate @active_goal pmonrs.
+ *
+ * In order to activate Dep_{Dirty,Idle} pmonrs, rotation logic:
+ *   1) activate eligible Dep_Dirty pmonrs: These pmonrs can reuse their former
+ *   rmid, even if it is not clean, without increasing the error.
+ *   2) take clean rmids from Dep_Dirty pmonrs and reuse them for other pmonrs
+ *   or add them to pool of free rmids.
+ *   3) use free rmids to activate Dep_Idle pmonrs.
+ *
+ * Rotation logic also checks the occupancy of dirty rmids and, if now clean,
+ * uses them or adds them to free rmids.
+ * When a Dep_Idle pmonr is activated, any Dep_Dirty pmonr that is immediately
+ * after it in the pkg->dep_pmonrs list can be activated reusing its dirty
+ * rmid.
+ */
+static int __intel_cmt_rmid_rotate(struct pkg_data *pkgd,
+               unsigned int active_goal, unsigned int max_dirty_thld)
+{
+       unsigned int dirty_thld = 0, min_dirty, nr_activated;
+       unsigned int nr_dep_pmonrs;
+       unsigned long flags, *rmids_bm = NULL;
+       bool do_active_goal, read_dirty = true, dirty_is_max;
+
+       lockdep_assert_held(&pkgd->mutex);
+
+       rmids_bm = kzalloc(CMT_MAX_NR_RMIDS_BYTES, GFP_KERNEL);
+       if (!rmids_bm)
+               return -ENOMEM;
+
+       nr_activated = try_activate_dep_dirty_pmonrs(pkgd);
+
+again:
+       min_dirty = UINT_MAX;
+
+       /* retry every iteration since dirty_thld may have changed. */
+       nr_activated += try_free_dirty_rmids(pkgd, read_dirty,
+                                            dirty_thld, &min_dirty, rmids_bm);
+
+       raw_spin_lock_irqsave(&pkgd->lock, flags);
+       nr_activated += __try_use_free_rmids(pkgd);
+       raw_spin_unlock_irqrestore(&pkgd->lock, flags);
+
+       nr_activated += try_free_dep_dirty_pmonrs(pkgd, read_dirty,
+                                                 dirty_thld, &min_dirty);
+
+       raw_spin_lock_irqsave(&pkgd->lock, flags);
+       nr_activated += __try_use_free_rmids(pkgd);
+       nr_dep_pmonrs = pkgd->nr_dep_pmonrs;
+       raw_spin_unlock_irqrestore(&pkgd->lock, flags);
+
+       /*
+        * If there is no room to increase dirty_thld, then no more dirty rmids
+        * could be reused and must give up active goal.
+        */
+       dirty_is_max = dirty_thld >= max_dirty_thld;
+       do_active_goal = nr_activated < active_goal && !dirty_is_max;
+
+       /*
+        * Since Dep_Dirty pmonrs have their own dirty rmid, only Dep_Idle
+        * pmonrs are waiting for a rmid to be available. Stop if no pmonr
+        * wait for rmid or no goals to pursue.
+        */
+       if (!nr_dep_pmonrs || !do_active_goal)
+               goto exit;
+
+       /*
+        * Try to activate more pmonrs by increasing the dirty threshold.
+        * Using the minimum observed occupancy in dirty rmids guarantees to
+        * recover at least one rmid per iteration.
+        */
+       if (do_active_goal) {
+               dirty_thld = min(min_dirty, max_dirty_thld);
+               /* do not read occupancy for dirty rmids twice. */
+               read_dirty = true;
+               goto again;
+       }
+
+exit:
+       kfree(rmids_bm);
+
+       return 0;
+}
+
 static struct pmu intel_cmt_pmu;
 
 /* Schedule rotation in one package. */
@@ -1360,10 +1619,20 @@ static bool intel_cmt_need_rmid_rotation(struct 
pkg_data *pkgd)
 
 /*
  * Rotation function, runs per-package.
+ * If rmids are needed in a package it will steal rmids from pmonr that have
+ * been active longer than __cmt_pre_mon_slice + __cmt_min_mon_slice.
+ * The hardware doesn't provide a way to free occupancy for a rmid that will
+ * be reused. Therefore, before reusing a rmid, it should stay unscheduled for
+ * a while, hoping that the cache lines counted towards this rmid will
+ * eventually be replaced and the rmid occupancy will decrease below
+ * __cmt_max_threshold.
  */
 static void intel_cmt_rmid_rotation_work(struct work_struct *work)
 {
        struct pkg_data *pkgd;
+       /* not precise elapsed time, but good enough for rotation purposes. */
+       unsigned int elapsed_ms = intel_cmt_pmu.hrtimer_interval_ms;
+       unsigned int active_goal, max_dirty_threshold;
 
        pkgd = container_of(to_delayed_work(work),
                            struct pkg_data, rotation_work);
@@ -1377,7 +1646,10 @@ static void intel_cmt_rmid_rotation_work(struct 
work_struct *work)
        if (!intel_cmt_need_rmid_rotation(pkgd))
                goto exit;
 
-       /* To add call to rotation function in next patch */
+       active_goal = max(1u, (elapsed_ms * __cmt_min_progress_rate) / 1000);
+       max_dirty_threshold = READ_ONCE(__cmt_max_threshold) / cmt_l3_scale;
+
+       __intel_cmt_rmid_rotate(pkgd, active_goal, max_dirty_threshold);
 
        if (intel_cmt_need_rmid_rotation(pkgd))
                __intel_cmt_schedule_rotation_for_pkg(pkgd);
-- 
2.8.0.rc3.226.g39d4020

Reply via email to