For MBM, since we report total bytes for the duration the perf counts,
we need to keep the total bytes counted every time we loose an RMID.
Introduce rc_count(recycle count) per event
keep this history count(all bytes counted before the current RMID).

If we do not keep this count separately then we may end up sending a
count that may be less than the previous count during -I perf stat
option which leads to negative numbers being reported in the perf. This
happens say when we counted a greater amount with RMID1 and then
counted lesser with RMID2, and if user checks counts in interval mode
after RMID1 and then again after RMID2.

Signed-off-by: Vikas Shivappa <vikas.shiva...@linux.intel.com>
---
 arch/x86/events/intel/cqm.c | 49 ++++++++++++++++++++++++++++++++++++++++-----
 include/linux/perf_event.h  |  1 +
 2 files changed, 45 insertions(+), 5 deletions(-)

diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
index 5f2104a..320af26 100644
--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -479,6 +479,16 @@ static void cqm_mask_call(struct rmid_read *rr)
                on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, rr, 1);
 }
 
+static inline void
+mbm_set_rccount(struct perf_event *event, struct rmid_read *rr)
+{
+       u64 tmpval;
+
+       tmpval = local64_read(&event->hw.rc_count) + atomic64_read(&rr->value);
+       local64_set(&event->hw.rc_count, tmpval);
+       local64_set(&event->count, tmpval);
+}
+
 /*
  * Exchange the RMID of a group of events.
  */
@@ -493,12 +503,19 @@ static u32 intel_cqm_xchg_rmid(struct perf_event *group, 
u32 rmid)
 
        /*
         * If our RMID is being deallocated, perform a read now.
+        * For mbm, we need to store the bytes that were counted till now
+        * separately.
         */
        if (__rmid_valid(old_rmid) && !__rmid_valid(rmid)) {
 
                rr = __init_rr(old_rmid, group->attr.config, 0);
                cqm_mask_call(&rr);
-               local64_set(&group->count, atomic64_read(&rr.value));
+
+               if (is_mbm_event(group->attr.config))
+                       mbm_set_rccount(group, &rr);
+               else
+                       local64_set(&group->count, atomic64_read(&rr.value));
+
                list_for_each_entry(event, head, hw.cqm_group_entry) {
                        if (event->hw.is_group_event) {
 
@@ -506,8 +523,11 @@ static u32 intel_cqm_xchg_rmid(struct perf_event *group, 
u32 rmid)
                                rr = __init_rr(old_rmid, evttype, 0);
 
                                cqm_mask_call(&rr);
-                               local64_set(&event->count,
-                                           atomic64_read(&rr.value));
+                               if (is_mbm_event(event->attr.config))
+                                       mbm_set_rccount(event, &rr);
+                               else
+                                       local64_set(&event->count,
+                                                   atomic64_read(&rr.value));
                        }
                }
        }
@@ -1194,6 +1214,7 @@ static u64 intel_cqm_event_count(struct perf_event *event)
 {
        unsigned long flags;
        struct rmid_read rr = __init_rr(-1, event->attr.config, 0);
+       u64 tmpval;
 
        /*
         * We only need to worry about task events. System-wide events
@@ -1235,6 +1256,16 @@ static u64 intel_cqm_event_count(struct perf_event 
*event)
         * busying performing the IPI calls. It's therefore necessary to
         * check @event's RMID afterwards, and if it has changed,
         * discard the result of the read.
+        *
+        * For MBM events, we are reading the total bytes and not
+        * a snapshot. Hence if RMIDs were recycled for the event, we need
+        * to add the counts of all RMIDs associated with the event together.
+        * Suppose RMID(1).. RMID(k) represent the total_bytes of the
+        * different RMIDs the event was associated with,
+        * count = RMID(1) + RMID(2) +...+ RMID(k-1)+ RMID(k).
+        *       = rc_count + RMID(k).
+        * RMID(k) - is the count we read now via IPI
+        * rc_count = RMID(1) + RMID(2) +...+ RMID(k-1).
         */
        rr.rmid = ACCESS_ONCE(event->hw.cqm_rmid);
 
@@ -1244,8 +1275,16 @@ static u64 intel_cqm_event_count(struct perf_event 
*event)
        cqm_mask_call(&rr);
 
        raw_spin_lock_irqsave(&cache_lock, flags);
-       if (event->hw.cqm_rmid == rr.rmid)
-               local64_set(&event->count, atomic64_read(&rr.value));
+       if (event->hw.cqm_rmid == rr.rmid) {
+               if (is_mbm_event(event->attr.config)) {
+                       tmpval = atomic64_read(&rr.value) +
+                               local64_read(&event->hw.rc_count);
+
+                       local64_set(&event->count, tmpval);
+               } else {
+                       local64_set(&event->count, atomic64_read(&rr.value));
+               }
+       }
        raw_spin_unlock_irqrestore(&cache_lock, flags);
 out:
        return __perf_event_count(event);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index f291275..ec7772a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -122,6 +122,7 @@ struct hw_perf_event {
                        int                     cqm_state;
                        u32                     cqm_rmid;
                        int                     is_group_event;
+                       local64_t               rc_count;
                        struct list_head        cqm_events_entry;
                        struct list_head        cqm_groups_entry;
                        struct list_head        cqm_group_entry;
-- 
1.9.1

Reply via email to