Read RMIDs llc_occupancy for cgroups by adding the occupancy of all pmonrs with a read_rmid along its subtree in the pmonr hierarchy for the event's package.
The RMID to read for a monr is the same as its RMID to schedule in hw if the monr is in (A)state. If in (IL)state, the RMID to read is that of its limbo_prmid. This reduces the error introduced by (IL)states since the llc_occupancy of limbo_prmid is a lower bound of its real llc_occupancy. monrs in (U)state can be safely ignored since they do not have any occupancy. Reviewed-by: Stephane Eranian <eran...@google.com> Signed-off-by: David Carrillo-Cisneros <davi...@google.com> --- arch/x86/events/intel/cqm.c | 218 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 211 insertions(+), 7 deletions(-) diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c index 6e85021..c14f1c7 100644 --- a/arch/x86/events/intel/cqm.c +++ b/arch/x86/events/intel/cqm.c @@ -2305,18 +2305,222 @@ intel_cqm_setup_event(struct perf_event *event, struct perf_event **group) return monr_hrchy_attach_event(event); } +static struct monr * +monr_next_child(struct monr *pos, struct monr *parent) +{ +#ifdef CONFIG_LOCKDEP + WARN_ON(!monr_hrchy_count_held_raw_spin_locks()); +#endif + if (!pos) + return list_first_entry_or_null( + &parent->children, struct monr, parent_entry); + if (list_is_last(&pos->parent_entry, &parent->children)) + return NULL; + return list_next_entry(pos, parent_entry); +} + +static struct monr * +monr_next_descendant_pre(struct monr *pos, struct monr *root) +{ + struct monr *next; + +#ifdef CONFIG_LOCKDEP + WARN_ON(!monr_hrchy_count_held_raw_spin_locks()); +#endif + if (!pos) + return root; + next = monr_next_child(NULL, pos); + if (next) + return next; + while (pos != root) { + next = monr_next_child(pos, pos->parent); + if (next) + return next; + pos = pos->parent; + } + return NULL; +} + +/* Read pmonr's summary, safe to call without pkg's prmids lock. + * The possible scenarios are: + * - summary's occupancy cannot be read, return -1. + * - summary has no RMID but could be read as zero occupancy, return 0 and set + * rmid = INVALID_RMID. + * - summary has valid read RMID, set rmid to it. + */ +static inline int +pmonr__get_read_rmid(struct pmonr *pmonr, u32 *rmid, bool fail_on_inherited) +{ + union prmid_summary summary; + + *rmid = INVALID_RMID; + + summary.value = atomic64_read(&pmonr->prmid_summary_atomic); + /* A pmonr in (I)state that doesn't fail can report it's limbo_prmid + * or NULL. + */ + if (prmid_summary__is_istate(summary) && fail_on_inherited) + return -1; + /* A pmonr with inactive monitoring can be safely ignored. */ + if (!prmid_summary__is_mon_active(summary)) + return 0; + + /* A pmonr that hasnt run in a pkg is safe to ignore since it + * cannot have occupancy there. + */ + if (prmid_summary__is_ustate(summary)) + return 0; + /* At this point the pmonr is either in (A)state or (I)state + * with fail_on_inherited=false . In the latter case, + * read_rmid is INVALID_RMID and is a successful read_rmid. + */ + *rmid = summary.read_rmid; + return 0; +} + +/* Read occupancy for all pmonrs in the subtree rooted at monr + * for the current package. + * Best effort two-stages read. First, obtain all RMIDs in subtree + * with locks held. The rmids are added to stack. If stack is full + * proceed to update and read in place. After finish storing the RMIDs, + * update and read occupancy for rmids in stack. + */ +static int pmonr__read_subtree(struct monr *monr, u16 pkg_id, + u64 *total, bool fail_on_inh_descendant) +{ + struct monr *pos = NULL; + struct astack astack; + int ret; + unsigned long flags; + u64 count; + struct pkg_data *pkg_data = cqm_pkgs_data[pkg_id]; + + *total = 0; + /* Must run in a CPU in the package to read. */ + if (WARN_ON_ONCE(pkg_id != + topology_physical_package_id(smp_processor_id()))) + return -1; + + astack__init(&astack, NR_RMIDS_PER_NODE - 1, pkg_id); + + /* Lock to protect againsts changes in pmonr hierarchy. */ + raw_spin_lock_irqsave_nested(&pkg_data->pkg_data_lock, flags, pkg_id); + + while ((pos = monr_next_descendant_pre(pos, monr))) { + struct prmid *prmid; + u32 rmid; + /* the pmonr of the monr to read cannot be inherited, + * descendants may, depending on flag. + */ + bool fail_on_inh = pos == monr || fail_on_inh_descendant; + + ret = pmonr__get_read_rmid(pos->pmonrs[pkg_id], + &rmid, fail_on_inh); + if (ret) + goto exit_error; + + if (rmid == INVALID_RMID) + continue; + + ret = astack__push(&astack); + if (!ret) { + __astack__top(&astack, rmids) = rmid; + continue; + } + /* If no space in stack, update and read here (slower). */ + prmid = __prmid_from_rmid(pkg_id, rmid); + if (WARN_ON_ONCE(!prmid)) + goto exit_error; + + ret = cqm_prmid_update(prmid); + if (ret < 0) + goto exit_error; + + *total += atomic64_read(&prmid->last_read_value); + } + raw_spin_unlock_irqrestore(&pkg_data->pkg_data_lock, flags); + + ret = astack__rmids_sum_apply(&astack, pkg_id, + &__rmid_fn__cqm_prmid_update, &count); + if (ret < 0) + return ret; + + *total += count; + astack__release(&astack); + + return 0; + +exit_error: + raw_spin_unlock_irqrestore(&pkg_data->pkg_data_lock, flags); + astack__release(&astack); + return ret; +} + +/* Read current package immediately and remote pkg (if any) from cache. */ +static void __read_task_event(struct perf_event *event) +{ + int i, ret; + u64 count = 0; + u16 pkg_id = topology_physical_package_id(smp_processor_id()); + struct monr *monr = monr_from_event(event); + + /* Read either local or polled occupancy from all packages. */ + cqm_pkg_id_for_each_online(i) { + struct prmid *prmid; + u32 rmid; + struct pmonr *pmonr = monr->pmonrs[i]; + + ret = pmonr__get_read_rmid(pmonr, &rmid, true); + if (ret) + return; + if (rmid == INVALID_RMID) + continue; + prmid = __prmid_from_rmid(i, rmid); + if (WARN_ON_ONCE(!prmid)) + return; + + /* update and read local for this cpu's package. */ + if (i == pkg_id) + cqm_prmid_update(prmid); + count += atomic64_read(&prmid->last_read_value); + } + local64_set(&event->count, count); +} + /* Read current package immediately and remote pkg (if any) from cache. */ static void intel_cqm_event_read(struct perf_event *event) { - union prmid_summary summary; - struct prmid *prmid; + struct monr *monr; + u64 count; u16 pkg_id = topology_physical_package_id(smp_processor_id()); - struct pmonr *pmonr = monr_from_event(event)->pmonrs[pkg_id]; - summary.value = atomic64_read(&pmonr->prmid_summary_atomic); - prmid = __prmid_from_rmid(pkg_id, summary.read_rmid); - cqm_prmid_update(prmid); - local64_set(&event->count, atomic64_read(&prmid->last_read_value)); + monr = monr_from_event(event); + + WARN_ON_ONCE(event->cpu != -1 && + topology_physical_package_id(event->cpu) != pkg_id); + + /* Only perf_event leader can return a value, everybody else share + * the same RMID. + */ + if (event->parent) { + local64_set(&event->count, 0); + return; + } + + if (event->attach_state & PERF_ATTACH_TASK) { + __read_task_event(event); + return; + } + + /* It's either a cgroup or a cpu event. */ + if (WARN_ON_ONCE(event->cpu < 0)) + return; + + /* XXX: expose fail_on_inh_descendant as a configuration parameter? */ + pmonr__read_subtree(monr, pkg_id, &count, false); + + local64_set(&event->count, count); + return; } static inline bool cqm_group_leader(struct perf_event *event) -- 2.8.0.rc3.226.g39d4020