To make rmid rotation more dependable, this patch series introduces rotation Service Level Objectives (SLOs) that are described in code's documentation.
This patch introduces cmt_{pre,min}_mon_slice SLOs that protects from bogus values when a rmid has not been available since the beginning of monitoring. It also introduces auxiliary variables necessary for the SLOs to work and the checks in intel_cmt_event_read that enforce the SLOs for the read of llc_occupancy event. Signed-off-by: David Carrillo-Cisneros <davi...@google.com> --- arch/x86/events/intel/cmt.c | 46 ++++++++++++++++++++++++++++++++++++++++++++- arch/x86/events/intel/cmt.h | 28 +++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/cmt.c b/arch/x86/events/intel/cmt.c index 3ade923..649eb5f 100644 --- a/arch/x86/events/intel/cmt.c +++ b/arch/x86/events/intel/cmt.c @@ -51,6 +51,25 @@ static size_t pkg_uflags_size; static struct pkg_data **cmt_pkgs_data; /* + * Rotation Service Level Objectives (SLO) for monrs with llc_occupancy + * monitoring. Note that these are monr level SLOs, therefore all pmonrs in + * the monr meet or exceed them. + * (A "monitored" monr is a monr with no pmonr in a Dependent state). + * + * SLOs: + * + * @__cmt_pre_mon_slice: Min time a monr is monitored before being readable. + * @__cmt_min_mon_slice: Min time a monr stays monitored after becoming + * readable. + */ +#define CMT_DEFAULT_PRE_MON_SLICE 2000 /* ms */ +static u64 __cmt_pre_mon_slice; + +#define CMT_DEFAULT_MIN_MON_SLICE 5000 /* ms */ +static u64 __cmt_min_mon_slice; + + +/* * If @pkgd == NULL, return first online, pkg_data in cmt_pkgs_data. * Otherwise next online pkg_data or NULL if no more. */ @@ -300,6 +319,7 @@ static void pmonr_to_unused(struct pmonr *pmonr) pmonr_move_all_dependants(pmonr, lender); } __set_bit(rmids.read_rmid, pkgd->dirty_rmids); + pkgd->nr_dirty_rmids++; } else if (pmonr->state == PMONR_DEP_IDLE || pmonr->state == PMONR_DEP_DIRTY) { @@ -312,6 +332,11 @@ static void pmonr_to_unused(struct pmonr *pmonr) __set_bit(rmids.read_rmid, pkgd->dirty_rmids); else pkgd->nr_dep_pmonrs--; + + + if (!atomic_dec_and_test(&pmonr->monr->nr_dep_pmonrs)) + atomic64_set(&pmonr->monr->last_rmid_recoup, + get_jiffies_64()); } else { WARN_ON_ONCE(true); return; @@ -372,6 +397,7 @@ static inline void __pmonr_to_dep_helper( lender_rmids.value = atomic64_read(&lender->atomic_rmids); pmonr_set_rmids(pmonr, lender_rmids.sched_rmid, read_rmid); + atomic_inc(&pmonr->monr->nr_dep_pmonrs); } static inline void pmonr_unused_to_dep_idle(struct pmonr *pmonr) @@ -390,6 +416,7 @@ static void pmonr_unused_to_off(struct pmonr *pmonr) static void pmonr_active_to_dep_dirty(struct pmonr *pmonr) { + struct pkg_data *pkgd = pmonr->pkgd; struct pmonr *lender; union pmonr_rmids rmids; @@ -398,6 +425,7 @@ static void pmonr_active_to_dep_dirty(struct pmonr *pmonr) rmids.value = atomic64_read(&pmonr->atomic_rmids); __pmonr_to_dep_helper(pmonr, lender, rmids.read_rmid); + pkgd->nr_dirty_rmids++; } static void __pmonr_dep_to_active_helper(struct pmonr *pmonr, u32 rmid) @@ -408,6 +436,9 @@ static void __pmonr_dep_to_active_helper(struct pmonr *pmonr, u32 rmid) pmonr_move_dependants(pmonr->lender, pmonr); pmonr->lender = NULL; __pmonr_to_active_helper(pmonr, rmid); + + if (!atomic_dec_and_test(&pmonr->monr->nr_dep_pmonrs)) + atomic64_set(&pmonr->monr->last_rmid_recoup, get_jiffies_64()); } static void pmonr_dep_idle_to_active(struct pmonr *pmonr, u32 rmid) @@ -422,6 +453,7 @@ static void pmonr_dep_dirty_to_active(struct pmonr *pmonr) union pmonr_rmids rmids; rmids.value = atomic64_read(&pmonr->atomic_rmids); + pmonr->pkgd->nr_dirty_rmids--; __pmonr_dep_to_active_helper(pmonr, rmids.read_rmid); } @@ -1599,7 +1631,7 @@ static int read_all_pkgs(struct monr *monr, int wait_time_ms, u64 *count) static int intel_cmt_event_read(struct perf_event *event) { struct monr *monr = monr_from_event(event); - u64 count; + u64 count, recoup, wait_end; u16 pkgid = topology_logical_package_id(smp_processor_id()); int err; @@ -1614,6 +1646,15 @@ static int intel_cmt_event_read(struct perf_event *event) return -ENXIO; /* + * If rmid has been stolen, only read if enough time has elapsed since + * rmid were recovered. + */ + recoup = atomic64_read(&monr->last_rmid_recoup); + wait_end = recoup + __cmt_pre_mon_slice; + if (recoup && time_before64(get_jiffies_64(), wait_end)) + return -EAGAIN; + + /* * Only event parent can return a value, everyone else share its * rmid and therefore doesn't track occupancy independently. */ @@ -2267,6 +2308,9 @@ static int __init intel_cmt_init(void) struct pkg_data *pkgd = NULL; int err = 0; + __cmt_pre_mon_slice = msecs_to_jiffies(CMT_DEFAULT_PRE_MON_SLICE); + __cmt_min_mon_slice = msecs_to_jiffies(CMT_DEFAULT_MIN_MON_SLICE); + if (!x86_match_cpu(intel_cmt_match)) { err = -ENODEV; goto err_exit; diff --git a/arch/x86/events/intel/cmt.h b/arch/x86/events/intel/cmt.h index 8bb43bd..8756666 100644 --- a/arch/x86/events/intel/cmt.h +++ b/arch/x86/events/intel/cmt.h @@ -52,6 +52,24 @@ * schedule and read. * * + * Rotation + * + * The number of rmids in hw is relatively small with respect to the number + * of potential monitored resources. rmids are rotated to among pmonrs that + * need one to give a fair-ish usage of this resource. + * + * A hw constraint is that occupancy for a rmid cannot be restarted, therefore + * a rmid with llc_occupancy need some time unscheduled until all cache lines + * tagged to it are evicted from cache (if this ever happens). + * + * When a rmid is "rotated", it is stolen from a pmonr and must wait until its + * llc_occupancy has decreased enough to be considered "clean". Meanwhile, that + * rmid is considered "dirty". + * + * Rotation logic periodically reads occupancy of this "dirty" rmids and, when + * clean, the rmid is either reused or placed in a free pool. + * + * * Locking * * One global cmt_mutex. One mutex and spin_lock per package. @@ -62,6 +80,7 @@ * cgroup start/stop. * - Hold pkg->mutex and pkg->lock in _all_ active packages to traverse or * change the monr hierarchy. + * - pkgd->mutex: Hold in current package for rotation in that pkgd. * - pkgd->lock: Hold in current package to access that pkgd's members. Hold * a pmonr's package pkgd->lock for non-atomic access to pmonr. */ @@ -225,6 +244,7 @@ struct cmt_csd { * @dep_dirty_pmonrs: LRU of Dep_Dirty pmonrs. * @dep_pmonrs: LRU of Dep_Idle and Dep_Dirty pmonrs. * @nr_dep_pmonrs: nr Dep_Idle + nr Dep_Dirty pmonrs. + * @nr_dirty_rmids: "dirty" rmids, both with and without a pmonr. * @mutex: Hold when modifying this pkg_data. * @mutex_key: lockdep class for pkg_data's mutex. * @lock: Hold to protect pmonrs in this pkg_data. @@ -243,6 +263,7 @@ struct pkg_data { struct list_head dep_dirty_pmonrs; struct list_head dep_pmonrs; int nr_dep_pmonrs; + int nr_dirty_rmids; struct mutex mutex; raw_spinlock_t lock; @@ -280,6 +301,10 @@ enum cmt_user_flags { * @parent: Parent in monr hierarchy. * @children: List of children in monr hierarchy. * @parent_entry: Entry in parent's children list. + * @last_rmid_recoup: Last time that nr_dep_pmonrs decreased to zero. It's + * zero if a rmid has never been stolen from this monr. + * @nr_dep_pmonrs: nr of Dep_* pmonrs in this monr. A zero implies that + * monr is monitoring in all required packages. * @flags: monr_flags. * @nr_has_user: nr of CMT_UF_HAS_USER set in events in mon_events. * @nr_nolazy_user: nr of CMT_UF_NOLAZY_RMID set in events in mon_events. @@ -303,6 +328,9 @@ struct monr { struct list_head children; struct list_head parent_entry; + atomic64_t last_rmid_recoup; + atomic_t nr_dep_pmonrs; + enum monr_flags flags; int nr_has_user; int nr_nolazy_rmid; -- 2.8.0.rc3.226.g39d4020