Move code around, delete unnecesary code and do some renaming in in order to increase readibility of next patches. Create cqm.h file.
Reviewed-by: Stephane Eranian <eran...@google.com> Signed-off-by: David Carrillo-Cisneros <davi...@google.com> --- arch/x86/events/intel/cqm.c | 170 +++++++++++++++----------------------------- arch/x86/events/intel/cqm.h | 42 +++++++++++ include/linux/perf_event.h | 8 +-- 3 files changed, 103 insertions(+), 117 deletions(-) create mode 100644 arch/x86/events/intel/cqm.h diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c index d5eac8f..f678014 100644 --- a/arch/x86/events/intel/cqm.c +++ b/arch/x86/events/intel/cqm.c @@ -4,10 +4,9 @@ * Based very, very heavily on work by Peter Zijlstra. */ -#include <linux/perf_event.h> #include <linux/slab.h> #include <asm/cpu_device_id.h> -#include <asm/pqr_common.h> +#include "cqm.h" #include "../perf_event.h" #define MSR_IA32_QM_CTR 0x0c8e @@ -16,13 +15,26 @@ static u32 cqm_max_rmid = -1; static unsigned int cqm_l3_scale; /* supposedly cacheline size */ +#define RMID_VAL_ERROR (1ULL << 63) +#define RMID_VAL_UNAVAIL (1ULL << 62) + +#define QOS_L3_OCCUP_EVENT_ID (1 << 0) + +#define QOS_EVENT_MASK QOS_L3_OCCUP_EVENT_ID + +#define CQM_EVENT_ATTR_STR(_name, v, str) \ +static struct perf_pmu_events_attr event_attr_##v = { \ + .attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \ + .id = 0, \ + .event_str = str, \ +} + /* * Updates caller cpu's cache. */ static inline void __update_pqr_rmid(u32 rmid) { struct intel_pqr_state *state = this_cpu_ptr(&pqr_state); - if (state->rmid == rmid) return; state->rmid = rmid; @@ -30,37 +42,18 @@ static inline void __update_pqr_rmid(u32 rmid) } /* - * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru. - * Also protects event->hw.cqm_rmid - * - * Hold either for stability, both for modification of ->hw.cqm_rmid. - */ -static DEFINE_MUTEX(cache_mutex); -static DEFINE_RAW_SPINLOCK(cache_lock); - -#define CQM_EVENT_ATTR_STR(_name, v, str) \ -static struct perf_pmu_events_attr event_attr_##v = { \ - .attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \ - .id = 0, \ - .event_str = str, \ -} - -/* * Groups of events that have the same target(s), one RMID per group. + * Protected by cqm_mutex. */ static LIST_HEAD(cache_groups); +static DEFINE_MUTEX(cqm_mutex); +static DEFINE_RAW_SPINLOCK(cache_lock); /* * Mask of CPUs for reading CQM values. We only need one per-socket. */ static cpumask_t cqm_cpumask; -#define RMID_VAL_ERROR (1ULL << 63) -#define RMID_VAL_UNAVAIL (1ULL << 62) - -#define QOS_L3_OCCUP_EVENT_ID (1 << 0) - -#define QOS_EVENT_MASK QOS_L3_OCCUP_EVENT_ID /* * This is central to the rotation algorithm in __intel_cqm_rmid_rotate(). @@ -71,8 +64,6 @@ static cpumask_t cqm_cpumask; */ static u32 intel_cqm_rotation_rmid; -#define INVALID_RMID (-1) - /* * Is @rmid valid for programming the hardware? * @@ -140,7 +131,7 @@ struct cqm_rmid_entry { * rotation worker moves RMIDs from the limbo list to the free list once * the occupancy value drops below __intel_cqm_threshold. * - * Both lists are protected by cache_mutex. + * Both lists are protected by cqm_mutex. */ static LIST_HEAD(cqm_rmid_free_lru); static LIST_HEAD(cqm_rmid_limbo_lru); @@ -172,13 +163,13 @@ static inline struct cqm_rmid_entry *__rmid_entry(u32 rmid) /* * Returns < 0 on fail. * - * We expect to be called with cache_mutex held. + * We expect to be called with cqm_mutex held. */ static u32 __get_rmid(void) { struct cqm_rmid_entry *entry; - lockdep_assert_held(&cache_mutex); + lockdep_assert_held(&cqm_mutex); if (list_empty(&cqm_rmid_free_lru)) return INVALID_RMID; @@ -193,7 +184,7 @@ static void __put_rmid(u32 rmid) { struct cqm_rmid_entry *entry; - lockdep_assert_held(&cache_mutex); + lockdep_assert_held(&cqm_mutex); WARN_ON(!__rmid_valid(rmid)); entry = __rmid_entry(rmid); @@ -237,9 +228,9 @@ static int intel_cqm_setup_rmid_cache(void) entry = __rmid_entry(0); list_del(&entry->list); - mutex_lock(&cache_mutex); + mutex_lock(&cqm_mutex); intel_cqm_rotation_rmid = __get_rmid(); - mutex_unlock(&cache_mutex); + mutex_unlock(&cqm_mutex); return 0; fail: @@ -250,6 +241,7 @@ fail: return -ENOMEM; } + /* * Determine if @a and @b measure the same set of tasks. * @@ -287,49 +279,11 @@ static bool __match_event(struct perf_event *a, struct perf_event *b) return false; } -#ifdef CONFIG_CGROUP_PERF -static inline struct perf_cgroup *event_to_cgroup(struct perf_event *event) -{ - if (event->attach_state & PERF_ATTACH_TASK) - return perf_cgroup_from_task(event->hw.target, event->ctx); - - return event->cgrp; -} -#endif - struct rmid_read { u32 rmid; atomic64_t value; }; -static void intel_cqm_event_read(struct perf_event *event); - -/* - * If we fail to assign a new RMID for intel_cqm_rotation_rmid because - * cachelines are still tagged with RMIDs in limbo, we progressively - * increment the threshold until we find an RMID in limbo with <= - * __intel_cqm_threshold lines tagged. This is designed to mitigate the - * problem where cachelines tagged with an RMID are not steadily being - * evicted. - * - * On successful rotations we decrease the threshold back towards zero. - * - * __intel_cqm_max_threshold provides an upper bound on the threshold, - * and is measured in bytes because it's exposed to userland. - */ -static unsigned int __intel_cqm_threshold; -static unsigned int __intel_cqm_max_threshold; - -/* - * Initially use this constant for both the limbo queue time and the - * rotation timer interval, pmu::hrtimer_interval_ms. - * - * They don't need to be the same, but the two are related since if you - * rotate faster than you recycle RMIDs, you may run out of available - * RMIDs. - */ -#define RMID_DEFAULT_QUEUE_TIME 250 /* ms */ - static struct pmu intel_cqm_pmu; /* @@ -344,7 +298,7 @@ static void intel_cqm_setup_event(struct perf_event *event, bool conflict = false; u32 rmid; - list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) { + list_for_each_entry(iter, &cache_groups, hw.cqm_event_groups_entry) { rmid = iter->hw.cqm_rmid; if (__match_event(iter, event)) { @@ -390,24 +344,24 @@ out: static inline bool cqm_group_leader(struct perf_event *event) { - return !list_empty(&event->hw.cqm_groups_entry); + return !list_empty(&event->hw.cqm_event_groups_entry); } static void intel_cqm_event_start(struct perf_event *event, int mode) { - if (!(event->hw.cqm_state & PERF_HES_STOPPED)) + if (!(event->hw.state & PERF_HES_STOPPED)) return; - event->hw.cqm_state &= ~PERF_HES_STOPPED; + event->hw.state &= ~PERF_HES_STOPPED; __update_pqr_rmid(event->hw.cqm_rmid); } static void intel_cqm_event_stop(struct perf_event *event, int mode) { - if (event->hw.cqm_state & PERF_HES_STOPPED) + if (event->hw.state & PERF_HES_STOPPED) return; - event->hw.cqm_state |= PERF_HES_STOPPED; + event->hw.state |= PERF_HES_STOPPED; intel_cqm_event_read(event); __update_pqr_rmid(0); } @@ -419,7 +373,7 @@ static int intel_cqm_event_add(struct perf_event *event, int mode) raw_spin_lock_irqsave(&cache_lock, flags); - event->hw.cqm_state = PERF_HES_STOPPED; + event->hw.state = PERF_HES_STOPPED; rmid = event->hw.cqm_rmid; if (__rmid_valid(rmid) && (mode & PERF_EF_START)) @@ -433,16 +387,16 @@ static void intel_cqm_event_destroy(struct perf_event *event) { struct perf_event *group_other = NULL; - mutex_lock(&cache_mutex); + mutex_lock(&cqm_mutex); /* * If there's another event in this group... */ - if (!list_empty(&event->hw.cqm_group_entry)) { - group_other = list_first_entry(&event->hw.cqm_group_entry, + if (!list_empty(&event->hw.cqm_event_group_entry)) { + group_other = list_first_entry(&event->hw.cqm_event_group_entry, struct perf_event, - hw.cqm_group_entry); - list_del(&event->hw.cqm_group_entry); + hw.cqm_event_group_entry); + list_del(&event->hw.cqm_event_group_entry); } /* @@ -454,18 +408,18 @@ static void intel_cqm_event_destroy(struct perf_event *event) * destroy the group and return the RMID. */ if (group_other) { - list_replace(&event->hw.cqm_groups_entry, - &group_other->hw.cqm_groups_entry); + list_replace(&event->hw.cqm_event_groups_entry, + &group_other->hw.cqm_event_groups_entry); } else { u32 rmid = event->hw.cqm_rmid; if (__rmid_valid(rmid)) __put_rmid(rmid); - list_del(&event->hw.cqm_groups_entry); + list_del(&event->hw.cqm_event_groups_entry); } } - mutex_unlock(&cache_mutex); + mutex_unlock(&cqm_mutex); } static int intel_cqm_event_init(struct perf_event *event) @@ -488,25 +442,26 @@ static int intel_cqm_event_init(struct perf_event *event) event->attr.sample_period) /* no sampling */ return -EINVAL; - INIT_LIST_HEAD(&event->hw.cqm_group_entry); - INIT_LIST_HEAD(&event->hw.cqm_groups_entry); + INIT_LIST_HEAD(&event->hw.cqm_event_groups_entry); + INIT_LIST_HEAD(&event->hw.cqm_event_group_entry); event->destroy = intel_cqm_event_destroy; - mutex_lock(&cache_mutex); + mutex_lock(&cqm_mutex); + /* Will also set rmid */ intel_cqm_setup_event(event, &group); if (group) { - list_add_tail(&event->hw.cqm_group_entry, - &group->hw.cqm_group_entry); + list_add_tail(&event->hw.cqm_event_group_entry, + &group->hw.cqm_event_group_entry); } else { - list_add_tail(&event->hw.cqm_groups_entry, - &cache_groups); + list_add_tail(&event->hw.cqm_event_groups_entry, + &cache_groups); } - mutex_unlock(&cache_mutex); + mutex_unlock(&cqm_mutex); return 0; } @@ -543,14 +498,14 @@ static struct attribute_group intel_cqm_format_group = { }; static ssize_t -max_recycle_threshold_show(struct device *dev, struct device_attribute *attr, - char *page) +max_recycle_threshold_show( + struct device *dev, struct device_attribute *attr, char *page) { ssize_t rv; - mutex_lock(&cache_mutex); + mutex_lock(&cqm_mutex); rv = snprintf(page, PAGE_SIZE-1, "%u\n", __intel_cqm_max_threshold); - mutex_unlock(&cache_mutex); + mutex_unlock(&cqm_mutex); return rv; } @@ -560,25 +515,16 @@ max_recycle_threshold_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - unsigned int bytes, cachelines; + unsigned int bytes; int ret; ret = kstrtouint(buf, 0, &bytes); if (ret) return ret; - mutex_lock(&cache_mutex); - + mutex_lock(&cqm_mutex); __intel_cqm_max_threshold = bytes; - cachelines = bytes / cqm_l3_scale; - - /* - * The new maximum takes effect immediately. - */ - if (__intel_cqm_threshold > cachelines) - __intel_cqm_threshold = cachelines; - - mutex_unlock(&cache_mutex); + mutex_unlock(&cqm_mutex); return count; } @@ -602,7 +548,7 @@ static const struct attribute_group *intel_cqm_attr_groups[] = { }; static struct pmu intel_cqm_pmu = { - .hrtimer_interval_ms = RMID_DEFAULT_QUEUE_TIME, + .hrtimer_interval_ms = CQM_DEFAULT_ROTATION_PERIOD, .attr_groups = intel_cqm_attr_groups, .task_ctx_nr = perf_sw_context, .event_init = intel_cqm_event_init, diff --git a/arch/x86/events/intel/cqm.h b/arch/x86/events/intel/cqm.h new file mode 100644 index 0000000..e25d0a1 --- /dev/null +++ b/arch/x86/events/intel/cqm.h @@ -0,0 +1,42 @@ +/* + * Intel Cache Quality-of-Service Monitoring (CQM) support. + * + * A Resource Manager ID (RMID) is a u32 value that, when programmed in a + * logical CPU, will allow the LLC cache to associate the changes in occupancy + * generated by that cpu (cache lines allocations - deallocations) to the RMID. + * If an rmid has been assigned to a thread T long enough for all cache lines + * used by T to be allocated, then the occupancy reported by the hardware is + * equal to the total cache occupancy for T. + * + * Groups of threads that are to be monitored together (such as cgroups + * or processes) can shared a RMID. + * + * This driver implements a tree hierarchy of Monitored Resources (monr). Each + * monr is a cgroup, a process or a thread that needs one single RMID. + */ + +#include <linux/perf_event.h> +#include <asm/pqr_common.h> + +/* + * Minimum time elapsed between reads of occupancy value for an RMID when + * transversing the monr hierarchy. + */ +#define RMID_DEFAULT_MIN_UPDATE_TIME 20 /* ms */ + +# define INVALID_RMID (-1) + +/* + * Time between execution of rotation logic. The frequency of execution does + * not affect the rate at which RMIDs are recycled, except by the delay by the + * delay updating the prmid's and their pools. + * The rotation period is stored in pmu->hrtimer_interval_ms. + */ +#define CQM_DEFAULT_ROTATION_PERIOD 1200 /* ms */ + +/* + * __intel_cqm_max_threshold provides an upper bound on the threshold, + * and is measured in bytes because it's exposed to userland. + * It's units are bytes must be scaled by cqm_l3_scale to obtain cache lines. + */ +static unsigned int __intel_cqm_max_threshold; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 3a847bf..5eb7dea 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -120,11 +120,9 @@ struct hw_perf_event { }; #ifdef CONFIG_INTEL_RDT struct { /* intel_cqm */ - int cqm_state; - u32 cqm_rmid; - struct list_head cqm_events_entry; - struct list_head cqm_groups_entry; - struct list_head cqm_group_entry; + u32 cqm_rmid; + struct list_head cqm_event_group_entry; + struct list_head cqm_event_groups_entry; }; #endif struct { /* itrace */ -- 2.8.0.rc3.226.g39d4020