Add remaining states for pmonr's state machine: - Active: A pmonr that is actively used. - Dep_Idle: A pmonr that failed to obtain a rmid. It "borrows" its rmid from its lowest monitored (Active in same pkgd) ancestor in the monr hierarchy. - Dep_Dirty: A pmonr that was Active but has lost its rmid (due to rmid rotation, introduced later in this patch series). It is similar to Dep_Idle but keeps track of its former rmid in case there is a reuse opportunity in the future.
This patch adds states, states transition functions for pmonrs. It also adds infrastructure and usage statistics to struct pkg_data that will be used later in this series. The transitions Unused -> Active and Unused -> Dep_Idle are inline because they will be called during tasks context switch the first time a monr runs in a package (later in this series). More details in code's comments. Signed-off-by: David Carrillo-Cisneros <davi...@google.com> --- arch/x86/events/intel/cmt.c | 237 ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/events/intel/cmt.h | 95 +++++++++++++++++- 2 files changed, 329 insertions(+), 3 deletions(-) diff --git a/arch/x86/events/intel/cmt.c b/arch/x86/events/intel/cmt.c index fb6877f..86c3013 100644 --- a/arch/x86/events/intel/cmt.c +++ b/arch/x86/events/intel/cmt.c @@ -142,6 +142,10 @@ static struct pmonr *pmonr_alloc(struct pkg_data *pkgd) if (!pmonr) return ERR_PTR(-ENOMEM); + /*pmonr_deps_{head, entry} are in a union, initialize one of them. */ + INIT_LIST_HEAD(&pmonr->pmonr_deps_head); + INIT_LIST_HEAD(&pmonr->pkgd_deps_entry); + INIT_LIST_HEAD(&pmonr->rot_entry); pmonr_set_rmids(pmonr, INVALID_RMID, INVALID_RMID); pmonr->pkgd = pkgd; @@ -153,9 +157,108 @@ static inline bool monr_is_root(struct monr *monr) return monr_hrchy_root == monr; } +/* + * @a must be distinct to @b. + * @true if @a is ancestor of @b or equal to it. + */ +static inline bool monr_hrchy_is_ancestor(struct monr *a, struct monr *b) +{ + if (monr_hrchy_root == a || a == b) + return true; + if (monr_hrchy_root == b) + return false; + + b = b->parent; + /* Break at the root */ + while (b != monr_hrchy_root) { + if (a == b) + return true; + b = b->parent; + } + + return false; +} + +/** + * pmonr_find_lma() - Find Lowest Monitored Ancestor (lma) of a pmonr. + * @pmonr: The pmonr to start the search on. + * + * Always succeed since pmonrs in monr_hrchy_root are always in Active state. + * Return: lma of @pmonr. + */ +static struct pmonr *pmonr_find_lma(struct pmonr *pmonr) +{ + struct monr *monr = pmonr->monr; + struct pkg_data *pkgd = pmonr->pkgd; + + lockdep_assert_held(&pkgd->lock); + + while ((monr = monr->parent)) { + /* protected by pkgd lock. */ + pmonr = pkgd_pmonr(pkgd, monr); + if (pmonr->state == PMONR_ACTIVE) + return pmonr; + } + /* Should have hit monr_hrchy_root. */ + WARN_ON_ONCE(true); + + return pkgd_pmonr(pkgd, monr_hrchy_root); +} + +/** + * pmnor_move_all_dependants() - Move all dependants from @old lender to @new. + * @old: Old lender. + * @new: New lender. + * + * @new->monr must be ancestor of @old->monr and they must be distinct. + */ +static void pmonr_move_all_dependants(struct pmonr *old, struct pmonr *new) +{ + struct pmonr *dep; + union pmonr_rmids dep_rmids, new_rmids; + + new_rmids.value = atomic64_read(&new->atomic_rmids); + /* Update this pmonr's dependants to depend on new lender. */ + list_for_each_entry(dep, &old->pmonr_deps_head, pmonr_deps_entry) { + dep->lender = new; + dep_rmids.value = atomic64_read(&dep->atomic_rmids); + pmonr_set_rmids(dep, new_rmids.sched_rmid, dep_rmids.read_rmid); + } + list_splice_tail_init(&old->pmonr_deps_head, &new->pmonr_deps_head); +} + +/** + * pmonr_move_dependants() - Move some dependants from @old lender to @new. + * + * Move @old's dependants that are @new->monr descendants to be @new's + * dependants. An opposed to pmonr_move_all_dependants, @new->monr does not + * need to be an ancestor of @old->monr. + */ +static inline void pmonr_move_dependants(struct pmonr *old, struct pmonr *new) +{ + struct pmonr *dep, *tmp; + union pmonr_rmids dep_rmids, new_rmids; + + new_rmids.value = atomic64_read(&new->atomic_rmids); + + list_for_each_entry_safe(dep, tmp, &old->pmonr_deps_head, + pmonr_deps_entry) { + if (!monr_hrchy_is_ancestor(new->monr, dep->monr)) + continue; + list_move_tail(&dep->pmonr_deps_entry, &new->pmonr_deps_head); + dep->lender = new; + dep_rmids.value = atomic64_read(&dep->atomic_rmids); + pmonr_set_rmids(dep, new_rmids.sched_rmid, dep_rmids.read_rmid); + } +} + /* pkg_data lock is not required for transition from Off state. */ static void pmonr_to_unused(struct pmonr *pmonr) { + struct pkg_data *pkgd = pmonr->pkgd; + struct pmonr *lender; + union pmonr_rmids rmids; + /* * Do not warn on re-entering Unused state to simplify cleanup * of initialized pmonrs that were not scheduled. @@ -168,6 +271,98 @@ static void pmonr_to_unused(struct pmonr *pmonr) pmonr_set_rmids(pmonr, INVALID_RMID, 0); return; } + + lockdep_assert_held(&pkgd->lock); + rmids.value = atomic64_read(&pmonr->atomic_rmids); + + if (pmonr->state == PMONR_ACTIVE) { + if (monr_is_root(pmonr->monr)) { + WARN_ON_ONCE(!list_empty(&pmonr->pmonr_deps_head)); + } else { + lender = pmonr_find_lma(pmonr); + pmonr_move_all_dependants(pmonr, lender); + } + __set_bit(rmids.read_rmid, pkgd->dirty_rmids); + + } else if (pmonr->state == PMONR_DEP_IDLE || + pmonr->state == PMONR_DEP_DIRTY) { + + pmonr->lender = NULL; + list_del_init(&pmonr->pmonr_deps_entry); + list_del_init(&pmonr->pkgd_deps_entry); + + if (pmonr->state == PMONR_DEP_DIRTY) + __set_bit(rmids.read_rmid, pkgd->dirty_rmids); + else + pkgd->nr_dep_pmonrs--; + } else { + WARN_ON_ONCE(true); + return; + } + + list_del_init(&pmonr->rot_entry); + pmonr->state = PMONR_UNUSED; + pmonr_set_rmids(pmonr, INVALID_RMID, INVALID_RMID); +} + +static inline void __pmonr_to_active_helper(struct pmonr *pmonr, u32 rmid) +{ + struct pkg_data *pkgd = pmonr->pkgd; + + list_move_tail(&pmonr->rot_entry, &pkgd->active_pmonrs); + pmonr->state = PMONR_ACTIVE; + pmonr_set_rmids(pmonr, rmid, rmid); + atomic64_set(&pmonr->last_enter_active, get_jiffies_64()); +} + +static inline void pmonr_unused_to_active(struct pmonr *pmonr, u32 rmid) +{ + struct pmonr *lender; + + __clear_bit(rmid, pmonr->pkgd->free_rmids); + __pmonr_to_active_helper(pmonr, rmid); + /* + * If monr is root, no ancestor exists to move pmonr to. If monr is + * root's child, no dependants of its parent (root) could be moved. + * Check both cases separately to avoid unnecessary calls to + * pmonr_move_depandants. + */ + if (!monr_is_root(pmonr->monr) && !monr_is_root(pmonr->monr->parent)) { + lender = pmonr_find_lma(pmonr); + pmonr_move_dependants(lender, pmonr); + } +} + +/* helper function for transitions to Dep_{Idle,Dirty} states. */ +static inline void __pmonr_to_dep_helper( + struct pmonr *pmonr, struct pmonr *lender, u32 read_rmid) +{ + struct pkg_data *pkgd = pmonr->pkgd; + union pmonr_rmids lender_rmids; + + pmonr->lender = lender; + list_move_tail(&pmonr->pmonr_deps_entry, &lender->pmonr_deps_head); + list_move_tail(&pmonr->pkgd_deps_entry, &pkgd->dep_pmonrs); + + if (read_rmid == INVALID_RMID) { + list_move_tail(&pmonr->rot_entry, &pkgd->dep_idle_pmonrs); + pkgd->nr_dep_pmonrs++; + pmonr->state = PMONR_DEP_IDLE; + } else { + list_move_tail(&pmonr->rot_entry, &pkgd->dep_dirty_pmonrs); + pmonr->state = PMONR_DEP_DIRTY; + } + + lender_rmids.value = atomic64_read(&lender->atomic_rmids); + pmonr_set_rmids(pmonr, lender_rmids.sched_rmid, read_rmid); +} + +static inline void pmonr_unused_to_dep_idle(struct pmonr *pmonr) +{ + struct pmonr *lender; + + lender = pmonr_find_lma(pmonr); + __pmonr_to_dep_helper(pmonr, lender, INVALID_RMID); } static void pmonr_unused_to_off(struct pmonr *pmonr) @@ -176,6 +371,43 @@ static void pmonr_unused_to_off(struct pmonr *pmonr) pmonr_set_rmids(pmonr, INVALID_RMID, 0); } +static void pmonr_active_to_dep_dirty(struct pmonr *pmonr) +{ + struct pmonr *lender; + union pmonr_rmids rmids; + + lender = pmonr_find_lma(pmonr); + pmonr_move_all_dependants(pmonr, lender); + + rmids.value = atomic64_read(&pmonr->atomic_rmids); + __pmonr_to_dep_helper(pmonr, lender, rmids.read_rmid); +} + +static void __pmonr_dep_to_active_helper(struct pmonr *pmonr, u32 rmid) +{ + list_del_init(&pmonr->pkgd_deps_entry); + /* pmonr will no longer be dependent on pmonr_lender. */ + list_del_init(&pmonr->pmonr_deps_entry); + pmonr_move_dependants(pmonr->lender, pmonr); + pmonr->lender = NULL; + __pmonr_to_active_helper(pmonr, rmid); +} + +static void pmonr_dep_idle_to_active(struct pmonr *pmonr, u32 rmid) +{ + __clear_bit(rmid, pmonr->pkgd->free_rmids); + pmonr->pkgd->nr_dep_pmonrs--; + __pmonr_dep_to_active_helper(pmonr, rmid); +} + +static void pmonr_dep_dirty_to_active(struct pmonr *pmonr) +{ + union pmonr_rmids rmids; + + rmids.value = atomic64_read(&pmonr->atomic_rmids); + __pmonr_dep_to_active_helper(pmonr, rmids.read_rmid); +} + static void monr_dealloc(struct monr *monr) { u16 p, nr_pkgs = topology_max_packages(); @@ -780,6 +1012,11 @@ static struct pkg_data *alloc_pkg_data(int cpu) pkgd->max_rmid = CMT_MAX_NR_RMIDS - 1; } + INIT_LIST_HEAD(&pkgd->active_pmonrs); + INIT_LIST_HEAD(&pkgd->dep_idle_pmonrs); + INIT_LIST_HEAD(&pkgd->dep_dirty_pmonrs); + INIT_LIST_HEAD(&pkgd->dep_pmonrs); + mutex_init(&pkgd->mutex); raw_spin_lock_init(&pkgd->lock); diff --git a/arch/x86/events/intel/cmt.h b/arch/x86/events/intel/cmt.h index 05325c8..bf90c26 100644 --- a/arch/x86/events/intel/cmt.h +++ b/arch/x86/events/intel/cmt.h @@ -36,6 +36,21 @@ * online cpu. The pmonr handles the CMT and MBM monitoring within its package * by managing the rmid to write into each CPU that runs a monitored thread. * + * The lma of a pmonr is its closest ancestor pmonr in Active state pmonr. + * + * A pmonr allocates a rmid when needed, depending of its state (see + * enum pmonr_state comments). If a pmonr fails to obtain a free rmid, it + * "borrows" the one used by its Lowest Monitored Ancestor (lma). + * + * The "borrowed" rmid is used when threads are scheduled in so that the + * occupancy and memory bandwidth for those threads is accounted for in the + * monr hierarchy. Yet, that pmonr cannot use a "borrowed" rmid to read, + * since that rmid is not counting the "borrower"'s monr cache events. + * Therefore, a pmonr uses rmids in two ways: + * (1) to schedule, and (2) to read. + * When a pmonr owns a rmid (Active state), that rmid is used for both + * schedule and read. + * * * Locking * @@ -56,6 +71,16 @@ * - Off: pmonr is unavailable for monitoring. It's the starting state. * - Unused: pmonr is available for monitoring but no thread associated to * this pmonr's monr has been scheduled in this pmonr's package. + * - Active: pmonr is actively used. It successfully obtained a free rmid + * to sched in/out and uses it to read pmonr's llc_occupancy. + * - Dep_Idle: pmonr failed to obtain its own free rmid and is borrowing the + * rmid from its lowest Active ancestor monr (its lma monr). + * - Dep_Dirty: pmonr was Active but its rmid was stolen. This state differs + * from Dep_Idle in that the pmonr keeps a reference to its + * former Active rmid. If the pmonr becomes eligible to recoup + * its rmid in the near future, this previously used rmid can + * be reused even if "dirty" without introducing additional + * counting error. * * The valid state transitions are: * @@ -64,11 +89,37 @@ * Off | Unused monitoring is enabled for a pmonr. *----------------------------------------------------------------------------- * Unused | Off monitoring is disabled for a pmonr. + * |-------------------------------------------------------------- + * | Active First thread associated to pmonr is scheduled + * | in package and a free rmid is available. + * |-------------------------------------------------------------- + * | Dep_Idle Could not find a free rmid available. + *----------------------------------------------------------------------------- + * Active | Dep_Dirty rmid is stolen, keep reference to old rmid + * | in read_rmid, but do not used to read. + * |-------------------------------------------------------------- + * | Unused pmonr releases the rmid, released rmid can be + * | "dirty" and therefore goes to dirty_rmids. + *----------------------------------------------------------------------------- + * Dep_Idle | Active pmonr receives a "clean" rmid. + * |-------------------------------------------------------------- + * | Unused pmonr is no longer waiting for rmid. + *----------------------------------------------------------------------------- + * Dep_Dirty | Active dirty rmid is reissued to pmonr that had it + * | before the transition. + * |-------------------------------------------------------------- + * | Dep_Idle dirty rmid has become "clean" and is reissued + * | to a distinct pmonr (or go to free_rmids). + * |-------------------------------------------------------------- + * | Unused pmonr is no longer waiting for rmid. *----------------------------------------------------------------------------- */ enum pmonr_state { PMONR_OFF = 0, PMONR_UNUSED, + PMONR_ACTIVE, + PMONR_DEP_IDLE, + PMONR_DEP_DIRTY, }; /** @@ -81,11 +132,11 @@ enum pmonr_state { * Its values can also used to atomically read the state (preventing * unnecessary locks of pkgd->lock) in the following way: * pmonr state - * | Off Unused + * | Off Unused Active Dep_Idle Dep_Dirty * ============================================================================ - * sched_rmid | INVALID_RMID INVALID_RMID + * sched_rmid | INVALID_RMID INVALID_RMID valid lender's lender's * ---------------------------------------------------------------------------- - * read_rmid | INVALID_RMID 0 + * read_rmid | INVALID_RMID 0 (same) INVALID_RMID old rmid * */ union pmonr_rmids { @@ -98,16 +149,42 @@ union pmonr_rmids { /** * struct pmonr - per-package componet of MONitored Resources (monr). + * @lender: if in Dep_Idle or Dep_Dirty state, it's the pmonr that + * lends its rmid to this pmonr. NULL otherwise. + * @pmonr_deps_head: List of pmonrs in Dep_Idle or Dep_Dirty state that + * borrow their sched_rmid from this pmonr. + * @pmonr_deps_entry: Entry into lender's @pmonr_deps_head when in Dep_Idle + * or Dep_Dirty state. + * @pkgd_deps_entry: When in Dep_Dirty state, the list entry for dep_pmonrs. * @monr: The monr that contains this pmonr. * @pkgd: The package data associated with this pmonr. + * @rot_entry: List entry to attach to pmonr rotation lists in + * pkg_data. + + * @last_enter_active: Time last enter Active state. * @atomic_rmids: Atomic accesor for this pmonr_rmids. * @state: The state for this pmonr, note that this can also * be inferred from the combination of sched_rmid and * read_rmid in @atomic_rmids. */ struct pmonr { + struct pmonr *lender; + /* save space with union since pmonr is in only one state at a time. */ + union{ + struct { /* variables for Active state. */ + struct list_head pmonr_deps_head; + }; + struct { /* variables for Dep_Idle and Dep_Dirty states. */ + struct list_head pmonr_deps_entry; + struct list_head pkgd_deps_entry; + }; + }; + struct monr *monr; struct pkg_data *pkgd; + struct list_head rot_entry; + + atomic64_t last_enter_active; /* all writers are sync'ed by package's lock. */ atomic64_t atomic_rmids; @@ -130,7 +207,13 @@ struct pmonr { * @free_rmids: Pool of free rmids. * @dirty_rmids: Pool of "dirty" rmids that are not referenced * by a pmonr. + * @active_pmonrs: LRU of Active pmonrs. + * @dep_idle_pmonrs: LRU of Dep_Idle pmonrs. + * @dep_dirty_pmonrs: LRU of Dep_Dirty pmonrs. + * @dep_pmonrs: LRU of Dep_Idle and Dep_Dirty pmonrs. + * @nr_dep_pmonrs: nr Dep_Idle + nr Dep_Dirty pmonrs. * @mutex: Hold when modifying this pkg_data. + * @mutex_key: lockdep class for pkg_data's mutex. * @lock: Hold to protect pmonrs in this pkg_data. * @work_cpu: CPU to run rotation and other batch jobs. * It must be in the package associated to its @@ -142,6 +225,12 @@ struct pkg_data { unsigned long free_rmids[CMT_MAX_NR_RMIDS_LONGS]; unsigned long dirty_rmids[CMT_MAX_NR_RMIDS_LONGS]; + struct list_head active_pmonrs; + struct list_head dep_idle_pmonrs; + struct list_head dep_dirty_pmonrs; + struct list_head dep_pmonrs; + int nr_dep_pmonrs; + struct mutex mutex; raw_spinlock_t lock; -- 2.8.0.rc3.226.g39d4020