Add remaining states for pmonr's state machine:
  - Active: A pmonr that is actively used.
  - Dep_Idle: A pmonr that failed to obtain a rmid. It "borrows" its rmid
    from its lowest monitored (Active in same pkgd) ancestor in the
    monr hierarchy.
  - Dep_Dirty: A pmonr that was Active but has lost its rmid (due to rmid
    rotation, introduced later in this patch series). It is similar to
    Dep_Idle but keeps track of its former rmid in case there is a reuse
    opportunity in the future.

This patch adds states, states transition functions for pmonrs.
It also adds infrastructure and usage statistics to struct pkg_data that
will be used later in this series.

The transitions Unused -> Active and Unused -> Dep_Idle are inline because
they will be called during tasks context switch the first time a monr
runs in a package (later in this series).

More details in code's comments.

Signed-off-by: David Carrillo-Cisneros <davi...@google.com>
---
 arch/x86/events/intel/cmt.c | 237 ++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/events/intel/cmt.h |  95 +++++++++++++++++-
 2 files changed, 329 insertions(+), 3 deletions(-)

diff --git a/arch/x86/events/intel/cmt.c b/arch/x86/events/intel/cmt.c
index fb6877f..86c3013 100644
--- a/arch/x86/events/intel/cmt.c
+++ b/arch/x86/events/intel/cmt.c
@@ -142,6 +142,10 @@ static struct pmonr *pmonr_alloc(struct pkg_data *pkgd)
        if (!pmonr)
                return ERR_PTR(-ENOMEM);
 
+       /*pmonr_deps_{head, entry} are in a union, initialize one of them. */
+       INIT_LIST_HEAD(&pmonr->pmonr_deps_head);
+       INIT_LIST_HEAD(&pmonr->pkgd_deps_entry);
+       INIT_LIST_HEAD(&pmonr->rot_entry);
        pmonr_set_rmids(pmonr, INVALID_RMID, INVALID_RMID);
        pmonr->pkgd = pkgd;
 
@@ -153,9 +157,108 @@ static inline bool monr_is_root(struct monr *monr)
        return monr_hrchy_root == monr;
 }
 
+/*
+ * @a must be distinct to @b.
+ * @true if @a is ancestor of @b or equal to it.
+ */
+static inline bool monr_hrchy_is_ancestor(struct monr *a, struct monr *b)
+{
+       if (monr_hrchy_root == a || a == b)
+               return true;
+       if (monr_hrchy_root == b)
+               return false;
+
+       b = b->parent;
+       /* Break at the root */
+       while (b != monr_hrchy_root) {
+               if (a == b)
+                       return true;
+               b = b->parent;
+       }
+
+       return false;
+}
+
+/**
+ * pmonr_find_lma() - Find Lowest Monitored Ancestor (lma) of a pmonr.
+ * @pmonr:             The pmonr to start the search on.
+ *
+ * Always succeed since pmonrs in monr_hrchy_root are always in Active state.
+ * Return: lma of @pmonr.
+ */
+static struct pmonr *pmonr_find_lma(struct pmonr *pmonr)
+{
+       struct monr *monr = pmonr->monr;
+       struct pkg_data *pkgd = pmonr->pkgd;
+
+       lockdep_assert_held(&pkgd->lock);
+
+       while ((monr = monr->parent)) {
+               /* protected by pkgd lock. */
+               pmonr = pkgd_pmonr(pkgd, monr);
+               if (pmonr->state == PMONR_ACTIVE)
+                       return pmonr;
+       }
+       /* Should have hit monr_hrchy_root. */
+       WARN_ON_ONCE(true);
+
+       return pkgd_pmonr(pkgd, monr_hrchy_root);
+}
+
+/**
+ * pmnor_move_all_dependants() - Move all dependants from @old lender to @new.
+ * @old: Old lender.
+ * @new: New lender.
+ *
+ * @new->monr must be ancestor of @old->monr and they must be distinct.
+ */
+static void pmonr_move_all_dependants(struct pmonr *old, struct pmonr *new)
+{
+       struct pmonr *dep;
+       union pmonr_rmids dep_rmids, new_rmids;
+
+       new_rmids.value = atomic64_read(&new->atomic_rmids);
+       /* Update this pmonr's dependants to depend on new lender. */
+       list_for_each_entry(dep, &old->pmonr_deps_head, pmonr_deps_entry) {
+               dep->lender = new;
+               dep_rmids.value = atomic64_read(&dep->atomic_rmids);
+               pmonr_set_rmids(dep, new_rmids.sched_rmid, dep_rmids.read_rmid);
+       }
+       list_splice_tail_init(&old->pmonr_deps_head, &new->pmonr_deps_head);
+}
+
+/**
+ * pmonr_move_dependants() -  Move some dependants from @old lender to @new.
+ *
+ * Move @old's dependants that are @new->monr descendants to be @new's
+ * dependants. An opposed to pmonr_move_all_dependants, @new->monr does not
+ * need to be an ancestor of @old->monr.
+ */
+static inline void pmonr_move_dependants(struct pmonr *old, struct pmonr *new)
+{
+       struct pmonr *dep, *tmp;
+       union pmonr_rmids dep_rmids, new_rmids;
+
+       new_rmids.value = atomic64_read(&new->atomic_rmids);
+
+       list_for_each_entry_safe(dep, tmp, &old->pmonr_deps_head,
+                                pmonr_deps_entry) {
+               if (!monr_hrchy_is_ancestor(new->monr, dep->monr))
+                       continue;
+               list_move_tail(&dep->pmonr_deps_entry, &new->pmonr_deps_head);
+               dep->lender = new;
+               dep_rmids.value = atomic64_read(&dep->atomic_rmids);
+               pmonr_set_rmids(dep, new_rmids.sched_rmid, dep_rmids.read_rmid);
+       }
+}
+
 /* pkg_data lock is not required for transition from Off state. */
 static void pmonr_to_unused(struct pmonr *pmonr)
 {
+       struct pkg_data *pkgd = pmonr->pkgd;
+       struct pmonr *lender;
+       union pmonr_rmids rmids;
+
        /*
         * Do not warn on re-entering Unused state to simplify cleanup
         * of initialized pmonrs that were not scheduled.
@@ -168,6 +271,98 @@ static void pmonr_to_unused(struct pmonr *pmonr)
                pmonr_set_rmids(pmonr, INVALID_RMID, 0);
                return;
        }
+
+       lockdep_assert_held(&pkgd->lock);
+       rmids.value = atomic64_read(&pmonr->atomic_rmids);
+
+       if (pmonr->state == PMONR_ACTIVE) {
+               if (monr_is_root(pmonr->monr)) {
+                       WARN_ON_ONCE(!list_empty(&pmonr->pmonr_deps_head));
+               } else {
+                       lender = pmonr_find_lma(pmonr);
+                       pmonr_move_all_dependants(pmonr, lender);
+               }
+               __set_bit(rmids.read_rmid, pkgd->dirty_rmids);
+
+       } else if (pmonr->state == PMONR_DEP_IDLE ||
+                  pmonr->state == PMONR_DEP_DIRTY) {
+
+               pmonr->lender = NULL;
+               list_del_init(&pmonr->pmonr_deps_entry);
+               list_del_init(&pmonr->pkgd_deps_entry);
+
+               if (pmonr->state == PMONR_DEP_DIRTY)
+                       __set_bit(rmids.read_rmid, pkgd->dirty_rmids);
+               else
+                       pkgd->nr_dep_pmonrs--;
+       } else {
+               WARN_ON_ONCE(true);
+               return;
+       }
+
+       list_del_init(&pmonr->rot_entry);
+       pmonr->state = PMONR_UNUSED;
+       pmonr_set_rmids(pmonr, INVALID_RMID, INVALID_RMID);
+}
+
+static inline void __pmonr_to_active_helper(struct pmonr *pmonr, u32 rmid)
+{
+       struct pkg_data *pkgd = pmonr->pkgd;
+
+       list_move_tail(&pmonr->rot_entry, &pkgd->active_pmonrs);
+       pmonr->state = PMONR_ACTIVE;
+       pmonr_set_rmids(pmonr, rmid, rmid);
+       atomic64_set(&pmonr->last_enter_active, get_jiffies_64());
+}
+
+static inline void pmonr_unused_to_active(struct pmonr *pmonr, u32 rmid)
+{
+       struct pmonr *lender;
+
+       __clear_bit(rmid, pmonr->pkgd->free_rmids);
+       __pmonr_to_active_helper(pmonr, rmid);
+       /*
+        * If monr is root, no ancestor exists to move pmonr to. If monr is
+        * root's child, no dependants of its parent (root) could be moved.
+        * Check both cases separately to avoid unnecessary calls to
+        * pmonr_move_depandants.
+        */
+       if (!monr_is_root(pmonr->monr) && !monr_is_root(pmonr->monr->parent)) {
+               lender = pmonr_find_lma(pmonr);
+               pmonr_move_dependants(lender, pmonr);
+       }
+}
+
+/* helper function for transitions to Dep_{Idle,Dirty} states. */
+static inline void __pmonr_to_dep_helper(
+       struct pmonr *pmonr, struct pmonr *lender, u32 read_rmid)
+{
+       struct pkg_data *pkgd = pmonr->pkgd;
+       union pmonr_rmids lender_rmids;
+
+       pmonr->lender = lender;
+       list_move_tail(&pmonr->pmonr_deps_entry, &lender->pmonr_deps_head);
+       list_move_tail(&pmonr->pkgd_deps_entry, &pkgd->dep_pmonrs);
+
+       if (read_rmid == INVALID_RMID) {
+               list_move_tail(&pmonr->rot_entry, &pkgd->dep_idle_pmonrs);
+               pkgd->nr_dep_pmonrs++;
+               pmonr->state = PMONR_DEP_IDLE;
+       } else {
+               list_move_tail(&pmonr->rot_entry, &pkgd->dep_dirty_pmonrs);
+               pmonr->state = PMONR_DEP_DIRTY;
+       }
+
+       lender_rmids.value = atomic64_read(&lender->atomic_rmids);
+       pmonr_set_rmids(pmonr, lender_rmids.sched_rmid, read_rmid);
+}
+
+static inline void pmonr_unused_to_dep_idle(struct pmonr *pmonr)
+{
+       struct pmonr *lender;
+
+       lender = pmonr_find_lma(pmonr);
+       __pmonr_to_dep_helper(pmonr, lender, INVALID_RMID);
 }
 
 static void pmonr_unused_to_off(struct pmonr *pmonr)
@@ -176,6 +371,43 @@ static void pmonr_unused_to_off(struct pmonr *pmonr)
        pmonr_set_rmids(pmonr, INVALID_RMID, 0);
 }
 
+static void pmonr_active_to_dep_dirty(struct pmonr *pmonr)
+{
+       struct pmonr *lender;
+       union pmonr_rmids rmids;
+
+       lender = pmonr_find_lma(pmonr);
+       pmonr_move_all_dependants(pmonr, lender);
+
+       rmids.value = atomic64_read(&pmonr->atomic_rmids);
+       __pmonr_to_dep_helper(pmonr, lender, rmids.read_rmid);
+}
+
+static void __pmonr_dep_to_active_helper(struct pmonr *pmonr, u32 rmid)
+{
+       list_del_init(&pmonr->pkgd_deps_entry);
+       /* pmonr will no longer be dependent on pmonr_lender. */
+       list_del_init(&pmonr->pmonr_deps_entry);
+       pmonr_move_dependants(pmonr->lender, pmonr);
+       pmonr->lender = NULL;
+       __pmonr_to_active_helper(pmonr, rmid);
+}
+
+static void pmonr_dep_idle_to_active(struct pmonr *pmonr, u32 rmid)
+{
+       __clear_bit(rmid, pmonr->pkgd->free_rmids);
+       pmonr->pkgd->nr_dep_pmonrs--;
+       __pmonr_dep_to_active_helper(pmonr, rmid);
+}
+
+static void pmonr_dep_dirty_to_active(struct pmonr *pmonr)
+{
+       union pmonr_rmids rmids;
+
+       rmids.value = atomic64_read(&pmonr->atomic_rmids);
+       __pmonr_dep_to_active_helper(pmonr, rmids.read_rmid);
+}
+
 static void monr_dealloc(struct monr *monr)
 {
        u16 p, nr_pkgs = topology_max_packages();
@@ -780,6 +1012,11 @@ static struct pkg_data *alloc_pkg_data(int cpu)
                pkgd->max_rmid = CMT_MAX_NR_RMIDS - 1;
        }
 
+       INIT_LIST_HEAD(&pkgd->active_pmonrs);
+       INIT_LIST_HEAD(&pkgd->dep_idle_pmonrs);
+       INIT_LIST_HEAD(&pkgd->dep_dirty_pmonrs);
+       INIT_LIST_HEAD(&pkgd->dep_pmonrs);
+
        mutex_init(&pkgd->mutex);
        raw_spin_lock_init(&pkgd->lock);
 
diff --git a/arch/x86/events/intel/cmt.h b/arch/x86/events/intel/cmt.h
index 05325c8..bf90c26 100644
--- a/arch/x86/events/intel/cmt.h
+++ b/arch/x86/events/intel/cmt.h
@@ -36,6 +36,21 @@
  * online cpu. The pmonr handles the CMT and MBM monitoring within its package
  * by managing the rmid to write into each CPU that runs a monitored thread.
  *
+ * The lma of a pmonr is its closest ancestor pmonr in Active state pmonr.
+ *
+ * A pmonr allocates a rmid when needed, depending of its state (see
+ * enum pmonr_state comments). If a pmonr fails to obtain a free rmid, it
+ * "borrows" the one used by its Lowest Monitored Ancestor (lma).
+ *
+ * The "borrowed" rmid is used when threads are scheduled in so that the
+ * occupancy and memory bandwidth for those threads is accounted for in the
+ * monr hierarchy. Yet, that pmonr cannot use a "borrowed" rmid to read,
+ * since that rmid is not counting the "borrower"'s monr cache events.
+ * Therefore, a pmonr uses rmids in two ways:
+ *   (1) to schedule, and (2) to read.
+ * When a pmonr owns a rmid (Active state), that rmid is used for both
+ * schedule and read.
+ *
  *
  * Locking
  *
@@ -56,6 +71,16 @@
  *   - Off:      pmonr is unavailable for monitoring. It's the starting state.
  *   - Unused:   pmonr is available for monitoring but no thread associated to
  *               this pmonr's monr has been scheduled in this pmonr's package.
+ *   - Active:   pmonr is actively used. It successfully obtained a free rmid
+ *               to sched in/out and uses it to read pmonr's llc_occupancy.
+ *   - Dep_Idle:  pmonr failed to obtain its own free rmid and is borrowing the
+ *               rmid from its lowest Active ancestor monr (its lma monr).
+ *   - Dep_Dirty: pmonr was Active but its rmid was stolen. This state differs
+ *               from Dep_Idle in that the pmonr keeps a reference to its
+ *               former Active rmid. If the pmonr becomes eligible to recoup
+ *               its rmid in the near future, this previously used rmid can
+ *               be reused even if "dirty" without introducing additional
+ *               counting error.
  *
  * The valid state transitions are:
  *
@@ -64,11 +89,37 @@
  *  Off                |  Unused       monitoring is enabled for a pmonr.
  *-----------------------------------------------------------------------------
  *  Unused     |  Off          monitoring is disabled for a pmonr.
+ *             |--------------------------------------------------------------
+ *             |  Active       First thread associated to pmonr is scheduled
+ *             |               in package and a free rmid is available.
+ *             |--------------------------------------------------------------
+ *             |  Dep_Idle     Could not find a free rmid available.
+ *-----------------------------------------------------------------------------
+ *  Active     |  Dep_Dirty    rmid is stolen, keep reference to old rmid
+ *             |               in read_rmid, but do not used to read.
+ *             |--------------------------------------------------------------
+ *             |  Unused       pmonr releases the rmid, released rmid can be
+ *             |               "dirty" and therefore goes to dirty_rmids.
+ *-----------------------------------------------------------------------------
+ *  Dep_Idle   |  Active       pmonr receives a "clean" rmid.
+ *             |--------------------------------------------------------------
+ *             |  Unused       pmonr is no longer waiting for rmid.
+ *-----------------------------------------------------------------------------
+ *  Dep_Dirty  |  Active       dirty rmid is reissued to pmonr that had it
+ *             |               before the transition.
+ *             |--------------------------------------------------------------
+ *             |  Dep_Idle     dirty rmid has become "clean" and is reissued
+ *             |               to a distinct pmonr (or go to free_rmids).
+ *             |--------------------------------------------------------------
+ *             |  Unused       pmonr is no longer waiting for rmid.
  *-----------------------------------------------------------------------------
  */
 enum pmonr_state {
        PMONR_OFF = 0,
        PMONR_UNUSED,
+       PMONR_ACTIVE,
+       PMONR_DEP_IDLE,
+       PMONR_DEP_DIRTY,
 };
 
 /**
@@ -81,11 +132,11 @@ enum pmonr_state {
  * Its values can also used to atomically read the state (preventing
  * unnecessary locks of pkgd->lock) in the following way:
  *                                     pmonr state
- *           |      Off         Unused
+ *           |      Off         Unused       Active      Dep_Idle     Dep_Dirty
  * ============================================================================
- * sched_rmid |        INVALID_RMID  INVALID_RMID
+ * sched_rmid |        INVALID_RMID  INVALID_RMID    valid       lender's     
lender's
  * ----------------------------------------------------------------------------
- *  read_rmid |        INVALID_RMID        0
+ *  read_rmid |        INVALID_RMID        0         (same)    INVALID_RMID   
old rmid
  *
  */
 union pmonr_rmids {
@@ -98,16 +149,42 @@ union pmonr_rmids {
 
 /**
  * struct pmonr - per-package componet of MONitored Resources (monr).
+ * @lender:            if in Dep_Idle or Dep_Dirty state, it's the pmonr that
+ *                     lends its rmid to this pmonr. NULL otherwise.
+ * @pmonr_deps_head:   List of pmonrs in Dep_Idle or Dep_Dirty state that
+ *                     borrow their sched_rmid from this pmonr.
+ * @pmonr_deps_entry:  Entry into lender's @pmonr_deps_head when in Dep_Idle
+ *                     or Dep_Dirty state.
+ * @pkgd_deps_entry:   When in Dep_Dirty state, the list entry for dep_pmonrs.
  * @monr:              The monr that contains this pmonr.
  * @pkgd:              The package data associated with this pmonr.
+ * @rot_entry:         List entry to attach to pmonr rotation lists in
+ *                     pkg_data.
+
+ * @last_enter_active: Time last enter Active state.
  * @atomic_rmids:      Atomic accesor for this pmonr_rmids.
  * @state:             The state for this pmonr, note that this can also
  *                     be inferred from the combination of sched_rmid and
  *                     read_rmid in @atomic_rmids.
  */
 struct pmonr {
+       struct pmonr                            *lender;
+       /* save space with union since pmonr is in only one state at a time. */
+       union{
+               struct { /* variables for Active state. */
+                       struct list_head        pmonr_deps_head;
+               };
+               struct { /* variables for Dep_Idle and Dep_Dirty states. */
+                       struct list_head        pmonr_deps_entry;
+                       struct list_head        pkgd_deps_entry;
+               };
+       };
+
        struct monr                             *monr;
        struct pkg_data                         *pkgd;
+       struct list_head                        rot_entry;
+
+       atomic64_t                              last_enter_active;
 
        /* all writers are sync'ed by package's lock. */
        atomic64_t                              atomic_rmids;
@@ -130,7 +207,13 @@ struct pmonr {
  * @free_rmids:                        Pool of free rmids.
  * @dirty_rmids:               Pool of "dirty" rmids that are not referenced
  *                             by a pmonr.
+ * @active_pmonrs:             LRU of Active pmonrs.
+ * @dep_idle_pmonrs:           LRU of Dep_Idle pmonrs.
+ * @dep_dirty_pmonrs:          LRU of Dep_Dirty pmonrs.
+ * @dep_pmonrs:                        LRU of Dep_Idle and Dep_Dirty pmonrs.
+ * @nr_dep_pmonrs:             nr Dep_Idle + nr Dep_Dirty pmonrs.
  * @mutex:                     Hold when modifying this pkg_data.
+ * @mutex_key:                 lockdep class for pkg_data's mutex.
  * @lock:                      Hold to protect pmonrs in this pkg_data.
  * @work_cpu:                  CPU to run rotation and other batch jobs.
  *                             It must be in the package associated to its
@@ -142,6 +225,12 @@ struct pkg_data {
        unsigned long           free_rmids[CMT_MAX_NR_RMIDS_LONGS];
        unsigned long           dirty_rmids[CMT_MAX_NR_RMIDS_LONGS];
 
+       struct list_head        active_pmonrs;
+       struct list_head        dep_idle_pmonrs;
+       struct list_head        dep_dirty_pmonrs;
+       struct list_head        dep_pmonrs;
+       int                     nr_dep_pmonrs;
+
        struct mutex            mutex;
        raw_spinlock_t          lock;
 
-- 
2.8.0.rc3.226.g39d4020

Reply via email to