A llc_occupancy read for a cgroup event must read llc_occupancy for all
monrs in or below the event's monr.

The cgroup's monr's pmonr must have a valid rmid for the read to be
meaningful. Descendant pmonrs that do not have a valid read_rmid are
skipped since their occupancy is already included in its Lowest Monitored
Ancestor (lma) pmonr's occupancy.

Signed-off-by: David Carrillo-Cisneros <davi...@google.com>
---
 arch/x86/events/intel/cmt.c | 113 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 111 insertions(+), 2 deletions(-)

diff --git a/arch/x86/events/intel/cmt.c b/arch/x86/events/intel/cmt.c
index f9195ec..275d128 100644
--- a/arch/x86/events/intel/cmt.c
+++ b/arch/x86/events/intel/cmt.c
@@ -1359,6 +1359,110 @@ static struct monr *monr_next_descendant_post(struct 
monr *pos,
        return pos->parent;
 }
 
+static int read_subtree_rmids(u32 root_r, unsigned long *rmids_bm, u64 *total)
+{
+       u64 val;
+       int r, err;
+
+       /* first iteration reads root's rmid. */
+       r = root_r;
+       do {
+               if (r != INVALID_RMID) {
+                       err = cmt_rmid_read(r, &val);
+                       if (WARN_ON_ONCE(err))
+                               return err;
+                       (*total) += val;
+               }
+               if (!rmids_bm)
+                       break;
+               if (root_r != INVALID_RMID) {
+                       root_r = INVALID_RMID;
+                       r = find_first_bit(rmids_bm, CMT_MAX_NR_RMIDS);
+               } else {
+                       r = find_next_bit(rmids_bm, CMT_MAX_NR_RMIDS, r + 1);
+               }
+       } while (r < CMT_MAX_NR_RMIDS);
+
+       return 0;
+}
+
+/**
+ * pmonr_read_subtree() - Read occupancy for a pmonr subtree.
+ *
+ * Read and add occupancy for all pmonrs in the subtree rooted at
+ * @root_pmonr->monr and in @root_pmonr->pkgd package.
+ * Fast-path for common case of a leaf pmonr, otherwise, a best effort
+ * two-stages read:
+ *   1) read all rmids in subtree with pkgd->lock held, and
+ *   2) read and add occupancy for rmids in previous stage, without locks held.
+ */
+static int pmonr_read_subtree(struct pmonr *root_pmonr, u64 *total)
+{
+       struct monr *pos;
+       struct pkg_data *pkgd = root_pmonr->pkgd;
+       struct pmonr *pmonr;
+       union pmonr_rmids rmids;
+       int err = 0, root_r;
+       unsigned long flags, *rmids_bm = NULL;
+
+       *total = 0;
+       rmids.value = atomic64_read(&root_pmonr->atomic_rmids);
+       /*
+        * The root of the subtree must be in Unused or Active state for the
+        * read to be meaningful (Unused pmonr have zero occupancy), yet its
+        * descendants can be in Dep_{Idle,Dirty} since states use their
+        * Lowest Monitored Ancestor's rmid.
+        */
+       if (rmids.sched_rmid == INVALID_RMID) {
+               /* Unused state. */
+               if (rmids.read_rmid == 0)
+                       root_r = INVALID_RMID;
+               else
+               /* Off state. */
+                       return -ENODATA;
+       } else {
+               /* Dep_{Idle, Dirty} state. */
+               if (rmids.sched_rmid != rmids.read_rmid)
+                       return -ENODATA;
+               /* Active state */
+               root_r = rmids.read_rmid;
+       }
+       /*
+        * Lock-less fast-path for common case of childless monr. No need
+        * to lock for list_empty since either path leads to a read that is
+        * correct at some time close to the moment the check happens.
+        */
+       if (list_empty(&root_pmonr->monr->children))
+               goto read_rmids;
+
+       rmids_bm = kzalloc(CMT_MAX_NR_RMIDS_BYTES, GFP_ATOMIC);
+       if (!rmids_bm)
+               return -ENOMEM;
+
+       /* Lock to protect againsts changes in pmonr hierarchy. */
+       raw_spin_lock_irqsave(&pkgd->lock, flags);
+
+       /* Starts on subtree's first child. */
+       pos = root_pmonr->monr;
+       while ((pos = monr_next_descendant_pre(pos, root_pmonr->monr))) {
+               /* protected by pkgd lock. */
+               pmonr = pkgd_pmonr(pkgd, pos);
+               rmids.value = atomic64_read(&pmonr->atomic_rmids);
+               /* Exclude all pmonrs not in Active or Dep_Dirty states. */
+               if (rmids.sched_rmid == INVALID_RMID ||
+                   rmids.read_rmid == INVALID_RMID)
+                       continue;
+               __set_bit(rmids.read_rmid, rmids_bm);
+       }
+
+       raw_spin_unlock_irqrestore(&pkgd->lock, flags);
+read_rmids:
+       err = read_subtree_rmids(root_r, rmids_bm, total);
+       kfree(rmids_bm);
+
+       return err;
+}
+
 /* Issue reads to CPUs in remote packages. */
 static int issue_read_remote_pkgs(struct monr *monr,
                                  struct cmt_csd **issued_ccsds,
@@ -1522,8 +1626,13 @@ static int intel_cmt_event_read(struct perf_event *event)
                /* It's a task event. */
                err = read_all_pkgs(monr, CMT_IPI_WAIT_TIME, &count);
        } else {
-               /* To add support in next patches in series */
-               return -ENOTSUPP;
+               struct pmonr *pmonr;
+
+               /* It's either a cgroup or a cpu event. */
+               rcu_read_lock();
+               pmonr = rcu_dereference(monr->pmonrs[pkgid]);
+               err = pmonr_read_subtree(pmonr, &count);
+               rcu_read_unlock();
        }
        if (err)
                return err;
-- 
2.8.0.rc3.226.g39d4020

Reply via email to