Add rmid rotation code to steal an rmid whenever not enough
pmonrs are being reactivated.

More details in code's comments.

Signed-off-by: David Carrillo-Cisneros <[email protected]>
---
 arch/x86/events/intel/cmt.c | 149 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 144 insertions(+), 5 deletions(-)

diff --git a/arch/x86/events/intel/cmt.c b/arch/x86/events/intel/cmt.c
index ba82f95..e677511 100644
--- a/arch/x86/events/intel/cmt.c
+++ b/arch/x86/events/intel/cmt.c
@@ -1368,6 +1368,106 @@ static int try_activate_dep_dirty_pmonrs(struct 
pkg_data *pkgd)
        return nr_reused;
 }
 
+/**
+ * can_steal_rmid() - Tell if this pmonr's rmid can be stolen.
+ *
+ * The "rmid cycle" for a pmonr starts when an Active pmonr gets its rmid
+ * stolen and completes when it receives a rmid again.
+ * A monr "rmid recoup" occurs when all its non Off/Unused pmonrs
+ * obtain a rmid (i.e. when all pmonr than need a rmid have one).
+ *
+ * A pmonr's rmid can be stolen if either:
+ *   1) No other pmonr in pmonr's monr has been stolen before, or
+ *   2) Some pmonrs have had rmids stolen but rmids for all pmonrs have been
+ *   recovered (rmid recoup) and kept for at least
+ *     __cmt_pre_mon_slice + __cmt_min_mon_slice time.
+ *   3) At least one of the pmonrs with pkgid smaller than @pmonr's has not
+ *   completed its first "rmid cycle". Once this condition is false, the pmonr
+ *   will have completed its last "rmid cycle" and stealing will no be longer
+ *   allowed.
+ *   This guarantees that the last "rmid cycle" of a pmonr occurs in
+ *   pkgid order, preventing rmid deadlocks. It also guarantees that eventually
+ *   all pmonrs will eventually have a last "rmid cycle", recovering all
+ *   required rmids.
+ */
+static bool can_steal_rmid(struct pmonr *pmonr)
+{
+       union pmonr_rmids rmids;
+       struct monr *monr = pmonr->monr;
+       struct pkg_data *pkgd = NULL;
+       struct pmonr *pos_pmonr;
+       bool need_rmid_state;
+       u64 last_all_active, next_steal_time, last_pmonr_active;
+
+       last_all_active = atomic64_read(&monr->last_rmid_recoup);
+       /*
+        * Can steal if no pmonr has been stolen or all not Unused have been
+        * in Active state for long enough.
+        */
+       if (!atomic_read(&monr->nr_dep_pmonrs)) {
+               /* Check steal condition 1. */
+               if (!last_all_active)
+                       return true;
+               next_steal_time = last_all_active +
+                               __cmt_pre_mon_slice + __cmt_min_mon_slice;
+               /* Check steal condition 2. */
+               if (time_after64(next_steal_time, get_jiffies_64()))
+                       return true;
+
+               return false;
+       }
+
+       rcu_read_lock();
+
+       /* Check for steal condition 3 without locking. */
+       while ((pkgd = cmt_pkgs_data_next_rcu(pkgd))) {
+               /* To avoid deadlocks, wait for pmonr in pkgid order. */
+               if (pkgd->pkgid >= pmonr->pkgd->pkgid)
+                       break;
+               pos_pmonr = pkgd_pmonr(pkgd, monr);
+               rmids.value = atomic64_read(&pos_pmonr->atomic_rmids);
+               last_pmonr_active = atomic64_read(
+                               &pos_pmonr->last_enter_active);
+
+               /* pmonrs in Dep_{Idle,Dirty} states are waiting for a rmid. */
+               need_rmid_state = rmids.sched_rmid != INVALID_RMID &&
+                                 rmids.sched_rmid != rmids.read_rmid;
+
+               /* test if pos_pmonr has finished its first rmid cycle. */
+               if (need_rmid_state && last_all_active <= last_pmonr_active) {
+                       rcu_read_unlock();
+
+                       return true;
+               }
+       }
+       rcu_read_unlock();
+
+       return false;
+}
+
+/* Steal as many rmids as possible, up to @max_to_steal. */
+static int try_steal_active_pmonrs(struct pkg_data *pkgd,
+                                  unsigned int max_to_steal)
+{
+       struct pmonr *pmonr, *tmp;
+       unsigned long flags;
+       int nr_stolen = 0;
+
+       raw_spin_lock_irqsave(&pkgd->lock, flags);
+
+       list_for_each_entry_safe(pmonr, tmp, &pkgd->active_pmonrs, rot_entry) {
+               if (!can_steal_rmid(pmonr))
+                       continue;
+               pmonr_active_to_dep_dirty(pmonr);
+               nr_stolen++;
+               if (nr_stolen == max_to_steal)
+                       break;
+       }
+       raw_spin_unlock_irqrestore(&pkgd->lock, flags);
+
+       return nr_stolen;
+}
+
 static inline int __try_use_free_rmid(struct pkg_data *pkgd, u32 rmid)
 {
        struct pmonr *pmonr;
@@ -1485,9 +1585,17 @@ static int try_free_dirty_rmids(struct pkg_data *pkgd,
  * @pkgd:              The package data to rotate rmids on.
  * @active_goal:       Target min nr of pmonrs to put in Active state.
  * @max_dirty_thld:    Upper bound for dirty_thld, in CMT cache units.
+ * @max_dirty_goal:    Max nr of rmids to leave dirty, waiting to drop
+ *                     occupancy.
+ * @dirty_cushion:     nr of rmids to try to leave in dirty on top of the
+ *                     nr of pmonrs that need rmid (Dep_Idle), in case
+ *                     some dirty rmids do not drop occupancy fast enough.
  *
  * The goals for each iteration of rotation logic are:
  *   1) to activate @active_goal pmonrs.
+ *   2) if any pmonr is waiting for rmid (Dep_Idle), to steal enough rmids to
+ *   meet its dirty_goal. The dirty_goal is an estimate of the number of dirty
+ *   rmids required so that next call reaches its @active_goal.
  *
  * In order to activate Dep_{Dirty,Idle} pmonrs, rotation logic:
  *   1) activate eligible Dep_Dirty pmonrs: These pmonrs can reuse their former
@@ -1503,12 +1611,14 @@ static int try_free_dirty_rmids(struct pkg_data *pkgd,
  * rmid.
  */
 static int __intel_cmt_rmid_rotate(struct pkg_data *pkgd,
-               unsigned int active_goal, unsigned int max_dirty_thld)
+               unsigned int active_goal, unsigned int max_dirty_thld,
+               unsigned int max_dirty_goal, unsigned int dirty_cushion)
 {
        unsigned int dirty_thld = 0, min_dirty, nr_activated;
-       unsigned int nr_dep_pmonrs;
+       unsigned int nr_to_steal, nr_stolen;
+       unsigned int nr_dirty, dirty_goal, nr_dep_pmonrs;
        unsigned long flags, *rmids_bm = NULL;
-       bool do_active_goal, read_dirty = true, dirty_is_max;
+       bool do_active_goal, do_dirty_goal, read_dirty = true, dirty_is_max;
 
        lockdep_assert_held(&pkgd->mutex);
 
@@ -1534,6 +1644,7 @@ static int __intel_cmt_rmid_rotate(struct pkg_data *pkgd,
 
        raw_spin_lock_irqsave(&pkgd->lock, flags);
        nr_activated += __try_use_free_rmids(pkgd);
+       nr_dirty = pkgd->nr_dirty_rmids;
        nr_dep_pmonrs = pkgd->nr_dep_pmonrs;
        raw_spin_unlock_irqrestore(&pkgd->lock, flags);
 
@@ -1544,14 +1655,27 @@ static int __intel_cmt_rmid_rotate(struct pkg_data 
*pkgd,
        dirty_is_max = dirty_thld >= max_dirty_thld;
        do_active_goal = nr_activated < active_goal && !dirty_is_max;
 
+       dirty_goal = min(max_dirty_goal, nr_dep_pmonrs + dirty_cushion);
+       do_dirty_goal = nr_dirty < dirty_goal;
+
        /*
         * Since Dep_Dirty pmonrs have their own dirty rmid, only Dep_Idle
         * pmonrs are waiting for a rmid to be available. Stop if no pmonr
         * wait for rmid or no goals to pursue.
         */
-       if (!nr_dep_pmonrs || !do_active_goal)
+       if (!nr_dep_pmonrs || (!do_dirty_goal && !do_active_goal))
                goto exit;
 
+       if (do_dirty_goal) {
+               nr_to_steal = dirty_goal - nr_dirty;
+               nr_stolen = try_steal_active_pmonrs(pkgd, nr_to_steal);
+               /*
+                * It tried to steal from all Active pmonrs, makes no sense
+                * to reattempt.
+                */
+               max_dirty_goal = 0;
+       }
+
        /*
         * Try to activate more pmonrs by increasing the dirty threshold.
         * Using the minimum observed occupancy in dirty rmids guarantees to
@@ -1633,6 +1757,7 @@ static void intel_cmt_rmid_rotation_work(struct 
work_struct *work)
        /* not precise elapsed time, but good enough for rotation purposes. */
        unsigned int elapsed_ms = intel_cmt_pmu.hrtimer_interval_ms;
        unsigned int active_goal, max_dirty_threshold;
+       unsigned int dirty_cushion, max_dirty_goal;
 
        pkgd = container_of(to_delayed_work(work),
                            struct pkg_data, rotation_work);
@@ -1649,7 +1774,21 @@ static void intel_cmt_rmid_rotation_work(struct 
work_struct *work)
        active_goal = max(1u, (elapsed_ms * __cmt_min_progress_rate) / 1000);
        max_dirty_threshold = READ_ONCE(__cmt_max_threshold) / cmt_l3_scale;
 
-       __intel_cmt_rmid_rotate(pkgd, active_goal, max_dirty_threshold);
+       /*
+        * Upper bound for the nr of rmids to be dirty in order to have a good
+        * chance of finding enough rmids in next iteration of rotation logic.
+        */
+       max_dirty_goal = min(active_goal + 1, (pkgd->max_rmid + 1) / 4);
+
+       /*
+        * Nr of extra rmids to put in dirty in case some don't drop occupancy.
+        * To be calculated in a sensible manner once statistics about rmid
+        * recycling rate are in place.
+        */
+       dirty_cushion = 2;
+
+       __intel_cmt_rmid_rotate(pkgd, active_goal, max_dirty_threshold,
+                               max_dirty_goal, dirty_cushion);
 
        if (intel_cmt_need_rmid_rotation(pkgd))
                __intel_cmt_schedule_rotation_for_pkg(pkgd);
-- 
2.8.0.rc3.226.g39d4020

Reply via email to