From: Kan Liang <kan.li...@linux.intel.com>

With Icelake CPUs, the TopDown metrics are directly available as fixed
counters and do not require generic counters, which make it possible to
measure TopDown per thread/process instead of only per core.

The metrics and slots values have to be saved/restored during context
switching.
The saved values are also used as previous values to calculate the
delta.

The PERF_METRICS MSR value will be returned if RDPMC metrics events.

Signed-off-by: Kan Liang <kan.li...@linux.intel.com>
---

New for V2

 arch/x86/events/core.c       |   5 ++-
 arch/x86/events/intel/core.c | 101 +++++++++++++++++++++++++++++++++++--------
 include/linux/perf_event.h   |   3 ++
 3 files changed, 90 insertions(+), 19 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 6169af6bf723..fde44fdba256 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2150,7 +2150,10 @@ static int x86_pmu_event_idx(struct perf_event *event)
        if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
                return 0;
 
-       if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
+       /* Return PERF_METRICS MSR value for metrics event */
+       if (is_metric_idx(idx))
+               idx = 1 << 29;
+       else if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
                idx -= INTEL_PMC_IDX_FIXED;
                idx |= 1 << 30;
        }
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 753a345a1db0..1c0dd95fd0d2 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2254,6 +2254,11 @@ static int icl_set_topdown_event_period(struct 
perf_event *event)
                local64_set(&hwc->period_left, 0);
        }
 
+       if ((hwc->saved_slots) && is_first_topdown_event_in_group(event)) {
+               wrmsrl(MSR_CORE_PERF_FIXED_CTR3, hwc->saved_slots);
+               wrmsrl(MSR_PERF_METRICS, hwc->saved_metric);
+       }
+
        perf_event_update_userpage(event);
 
        return 0;
@@ -2272,7 +2277,7 @@ static u64 icl_get_metrics_event_value(u64 metric, u64 
slots, int idx)
        return  mul_u64_u32_div(slots, val, 0xff);
 }
 
-static void __icl_update_topdown_event(struct perf_event *event,
+static u64 icl_get_topdown_value(struct perf_event *event,
                                       u64 slots, u64 metrics)
 {
        int idx = event->hw.idx;
@@ -2283,7 +2288,50 @@ static void __icl_update_topdown_event(struct perf_event 
*event,
        else
                delta = slots;
 
-       local64_add(delta, &event->count);
+       return delta;
+}
+
+static void __icl_update_topdown_event(struct perf_event *event,
+                                      u64 slots, u64 metrics,
+                                      u64 last_slots, u64 last_metrics)
+{
+       u64 delta, last = 0;
+
+       delta = icl_get_topdown_value(event, slots, metrics);
+       if (last_slots)
+               last = icl_get_topdown_value(event, last_slots, last_metrics);
+
+       /*
+        * The 8bit integer percentage of metric may be not accurate,
+        * especially when the changes is very small.
+        * For example, if only a few bad_spec happens, the percentage
+        * may be reduced from 1% to 0%. If so, the bad_spec event value
+        * will be 0 which is definitely less than the last value.
+        * Avoid update event->count for this case.
+        */
+       if (delta > last) {
+               delta -= last;
+               local64_add(delta, &event->count);
+       }
+}
+
+static void update_saved_topdown_regs(struct perf_event *event,
+                                     u64 slots, u64 metrics)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct perf_event *other;
+       int idx;
+
+       event->hw.saved_slots = slots;
+       event->hw.saved_metric = metrics;
+
+       for_each_set_bit(idx, cpuc->active_mask, INTEL_PMC_IDX_TD_BE_BOUND + 1) 
{
+               if (!is_topdown_idx(idx))
+                       continue;
+               other = cpuc->events[idx];
+               other->hw.saved_slots = slots;
+               other->hw.saved_metric = metrics;
+       }
 }
 
 /*
@@ -2295,6 +2343,7 @@ static u64 icl_update_topdown_event(struct perf_event 
*event)
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        struct perf_event *other;
        u64 slots, metrics;
+       bool reset = true;
        int idx;
 
        /*
@@ -2316,26 +2365,45 @@ static u64 icl_update_topdown_event(struct perf_event 
*event)
                if (!is_topdown_idx(idx))
                        continue;
                other = cpuc->events[idx];
-               __icl_update_topdown_event(other, slots, metrics);
+               __icl_update_topdown_event(other, slots, metrics,
+                                          event ? event->hw.saved_slots : 0,
+                                          event ? event->hw.saved_metric : 0);
        }
 
        /*
         * Check and update this event, which may have been cleared
         * in active_mask e.g. x86_pmu_stop()
         */
-       if (event && !test_bit(event->hw.idx, cpuc->active_mask))
-               __icl_update_topdown_event(event, slots, metrics);
+       if (event && !test_bit(event->hw.idx, cpuc->active_mask)) {
+               __icl_update_topdown_event(event, slots, metrics,
+                                          event->hw.saved_slots,
+                                          event->hw.saved_metric);
 
-       /*
-        * To avoid the known issues as below, the PERF_METRICS and
-        * Fixed counter 3 are reset for each read.
-        * - The 8bit metrics ratio values lose precision when the
-        *   measurement period gets longer.
-        * - The PERF_METRICS may report wrong value if its delta was
-        *   less than 1/255 of Fixed counter 3.
-        */
-       wrmsrl(MSR_PERF_METRICS, 0);
-       wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0);
+               /*
+                * In x86_pmu_stop(), the event is cleared in active_mask first,
+                * then drain the delta, which indicates context switch for
+                * counting.
+                * Save metric and slots for context switch.
+                * Don't need to reset the PERF_METRICS and Fixed counter 3.
+                * Because the values will be restored in next schedule in.
+                */
+               update_saved_topdown_regs(event, slots, metrics);
+               reset = false;
+       }
+
+       if (reset) {
+               /*
+                * To avoid the known issues as below, the PERF_METRICS and
+                * Fixed counter 3 are reset for each read.
+                * - The 8bit metrics ratio values lose precision when the
+                *   measurement period gets longer.
+                * - The PERF_METRICS may report wrong value if its delta was
+                *   less than 1/255 of Fixed counter 3.
+                */
+               wrmsrl(MSR_PERF_METRICS, 0);
+               wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0);
+               update_saved_topdown_regs(event, 0, 0);
+       }
 
        return slots;
 }
@@ -3517,9 +3585,6 @@ static int intel_pmu_hw_config(struct perf_event *event)
                        event->attr.config1 = event->hw.config &
                                              X86_ALL_EVENT_FLAGS;
                        event->hw.flags |= PERF_X86_EVENT_TOPDOWN;
-
-                       if (is_metric_event(event))
-                               event->hw.flags &= 
~PERF_X86_EVENT_RDPMC_ALLOWED;
                }
        }
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 3dc01cf98e16..afd53e46d5e6 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -133,6 +133,9 @@ struct hw_perf_event {
 
                        struct hw_perf_event_extra extra_reg;
                        struct hw_perf_event_extra branch_reg;
+
+                       u64             saved_slots;
+                       u64             saved_metric;
                };
                struct { /* software */
                        struct hrtimer  hrtimer;
-- 
2.14.5

Reply via email to