Make execution of rotation a delayed_work that does a best effort to rotate __cqm_min_progress_rate pmonrs per-second in every package.
Reviewed-by: Stephane Eranian <eran...@google.com> Signed-off-by: David Carrillo-Cisneros <davi...@google.com> --- arch/x86/events/intel/cqm.c | 70 ++++++++++++++++++++++++++++++++++++++++++++- arch/x86/events/intel/cqm.h | 21 ++++++++++++++ 2 files changed, 90 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c index 203fc66..a61dd70 100644 --- a/arch/x86/events/intel/cqm.c +++ b/arch/x86/events/intel/cqm.c @@ -222,6 +222,8 @@ static int pkg_data_init_cpu(int cpu) mutex_init(&pkg_data->pkg_data_mutex); raw_spin_lock_init(&pkg_data->pkg_data_lock); + INIT_DELAYED_WORK( + &pkg_data->rotation_work, intel_cqm_rmid_rotation_work); /* XXX: Chose randomly*/ pkg_data->rotation_cpu = cpu; @@ -1384,7 +1386,7 @@ read_nr_instate_pmonrs(struct pkg_data *pkg_data, u16 pkg_id) { * this point we can start reading values for the new RMID and treat the * old RMID as the free RMID for the next rotation. */ -void +static void __intel_cqm_rmid_rotate(struct pkg_data *pkg_data, unsigned int nr_max_limbo, unsigned int nr_min_activated) @@ -1519,6 +1521,70 @@ exit: static struct pmu intel_cqm_pmu; +/* Rotation only needs to be run when there is any pmonr in (I)state. */ +static bool intel_cqm_need_rotation(u16 pkg_id) +{ + + struct pkg_data *pkg_data; + bool need_rot; + + pkg_data = cqm_pkgs_data[pkg_id]; + + mutex_lock_nested(&pkg_data->pkg_data_mutex, pkg_id); + /* Rotation is needed if prmids in limbo need to be recycled or if + * there are pmonrs in (I)state. + */ + need_rot = !list_empty(&pkg_data->nopmonr_limbo_prmids_pool) || + !list_empty(&pkg_data->istate_pmonrs_lru); + + mutex_unlock(&pkg_data->pkg_data_mutex); + return need_rot; +} + +/* + * Schedule rotation in one package. + */ +static void __intel_cqm_schedule_rotation_for_pkg(u16 pkg_id) +{ + struct pkg_data *pkg_data; + unsigned long delay; + + delay = msecs_to_jiffies(intel_cqm_pmu.hrtimer_interval_ms); + pkg_data = cqm_pkgs_data[pkg_id]; + schedule_delayed_work_on( + pkg_data->rotation_cpu, &pkg_data->rotation_work, delay); +} + +/* + * Schedule rotation and rmid's timed update in all packages. + * Reescheduling will stop when no longer needed. + */ +static void intel_cqm_schedule_work_all_pkgs(void) +{ + int pkg_id; + + cqm_pkg_id_for_each_online(pkg_id) + __intel_cqm_schedule_rotation_for_pkg(pkg_id); +} + +static void intel_cqm_rmid_rotation_work(struct work_struct *work) +{ + struct pkg_data *pkg_data = container_of( + to_delayed_work(work), struct pkg_data, rotation_work); + /* Allow max 25% of RMIDs to be in limbo. */ + unsigned int max_limbo_rmids = max(1u, (pkg_data->max_rmid + 1) / 4); + unsigned int min_activated = max(1u, (intel_cqm_pmu.hrtimer_interval_ms + * __cqm_min_progress_rate) / 1000); + u16 pkg_id = topology_physical_package_id(pkg_data->rotation_cpu); + + WARN_ON_ONCE(pkg_data != cqm_pkgs_data[pkg_id]); + + __intel_cqm_rmid_rotate(pkg_data, max_limbo_rmids, min_activated); + + if (intel_cqm_need_rotation(pkg_id)) + __intel_cqm_schedule_rotation_for_pkg(pkg_id); +} + /* * Find a group and setup RMID. * @@ -1744,6 +1810,8 @@ static int intel_cqm_event_init(struct perf_event *event) mutex_unlock(&cqm_mutex); + intel_cqm_schedule_work_all_pkgs(); + return 0; } diff --git a/arch/x86/events/intel/cqm.h b/arch/x86/events/intel/cqm.h index 12f4156..7e4e37a 100644 --- a/arch/x86/events/intel/cqm.h +++ b/arch/x86/events/intel/cqm.h @@ -239,6 +239,7 @@ struct pmonr { * during process scheduling. The locks for all * packages must be held when modifying the monr * hierarchy. + * @rotation_work: Task that performs rotation of prmids. * @rotation_cpu: CPU to run @rotation_work on, it must be in the * package associated to this instance of pkg_data. */ @@ -268,6 +269,7 @@ struct pkg_data { struct mutex pkg_data_mutex; raw_spinlock_t pkg_data_lock; + struct delayed_work rotation_work; int rotation_cpu; }; @@ -428,6 +430,18 @@ static inline int monr_hrchy_count_held_raw_spin_locks(void) #define CQM_DEFAULT_ROTATION_PERIOD 1200 /* ms */ /* + * Rotation function. + * Rotation logic runs per-package. In each package, if free rmids are needed, + * it will steal prmids from the pmonr that has been the longest time in + * (A)state. + * The hardware provides to way to signal that a rmid will be reused, therefore, + * before reusing a rmid that has been stolen, the rmid should stay for some + * in a "limbo" state where is not associated to any thread, hoping that the + * cache lines allocated for this rmid will eventually be replaced. + */ +static void intel_cqm_rmid_rotation_work(struct work_struct *work); + +/* * Service Level Objectives (SLO) for the rotation logic. * * @__cqm_min_duration_mon_slice: Minimum duration of a monitored slice. @@ -444,6 +458,13 @@ static unsigned int __cqm_min_mon_slice = CQM_DEFAULT_MIN_MON_SLICE; static unsigned int __cqm_max_wait_mon = CQM_DEFAULT_MAX_WAIT_MON; /* + * Minimum numbers of pmonrs that must go to Active state per second in order + * to consider rotation to be effective. + */ +#define CQM_DEFAULT_MIN_PROGRESS_RATE 1 +static unsigned int __cqm_min_progress_rate = CQM_DEFAULT_MIN_PROGRESS_RATE; + +/* * If we fail to assign any RMID for intel_cqm_rotation because cachelines are * still tagged with RMIDs in limbo even after having stolen enough rmids (a * maximum number of rmids in limbo at any time), then we increment the dirty -- 2.8.0.rc3.226.g39d4020