This patch should probably be squashed with Tvrtko's PMU enabling
patch...

Making perf-stat workable with i915 PMU. The major point is that
current implementation of i915 PMU exposes global counter rather
thatn per-task counters. Thus, required changes are:
* Register PMU with .task_ctx_nr=perf_invalid_context
* Expose cpumask for the PMU with the single CPU in the mask
* Properly support pmu->stop(): it should call pmu->read()
* Properly support pmu->del(): it should call stop(event, PERF_EF_UPDATE)

Examples:
perf stat -e i915/rcs0-busy/ workload.sh
   This should error out with "failed to read counter" because the
requested counter can't be queried in per-task mode (which is the case).

perf stat -e i915/rcs0-busy/ -a workload.sh
perf stat -e i915/rcs0-busy/ -a -C0 workload.sh
   These 2 commands should give the same result with the correct counter
values. Counter value will be queried once in the end of the wrokload.
The example output would be:

 Performance counter stats for 'system wide':
       649,547,987      i915/rcs0-busy/

       1.895530680 seconds time elapsed

perf stat -e i915/rcs0-busy/ -a -I 100 workload.sh
   This will query counter perdiodically (each 100ms) and dump output:

     0.100108369          4,137,438      i915/rcs0-busy/
i915/rcs0-busy/: 37037414 100149071 100149071
     0.200249024         37,037,414      i915/rcs0-busy/
i915/rcs0-busy/: 36935429 100145077 100145077
     0.300391916         36,935,429      i915/rcs0-busy/
i915/rcs0-busy/: 34262017 100126136 100126136
     0.400518037         34,262,017      i915/rcs0-busy/
i915/rcs0-busy/: 34539960 100126217 100126217

v1: Make pmu.busy_stats a refcounter to avoid busy stats going away
    with some deleted event.

v2: Expose cpumask for i915 PMU to avoid multiple events creation of
    the same type followed by counter aggregation by perf-stat.

Change-Id: I7d1abe747a4399196e72253f7b66441a6528dbee
Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozh...@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
Cc: Peter Zijlstra <pet...@infradead.org>
---
 drivers/gpu/drm/i915/i915_pmu.c         | 71 +++++++++++++++++++++++++--------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  2 +-
 2 files changed, 55 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index bcdf2bc..c551d64 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -15,6 +15,8 @@
 
 #define ENGINE_SAMPLE_BITS (16)
 
+static cpumask_t i915_pmu_cpumask = CPU_MASK_CPU0;
+
 static u8 engine_config_sample(u64 config)
 {
        return config & I915_PMU_SAMPLE_MASK;
@@ -285,16 +287,24 @@ static int i915_pmu_event_init(struct perf_event *event)
 {
        struct drm_i915_private *i915 =
                container_of(event->pmu, typeof(*i915), pmu.base);
+       int cpu;
        int ret;
 
-       /* XXX ideally only want pid == -1 && cpu == -1 */
-
        if (event->attr.type != event->pmu->type)
                return -ENOENT;
 
        if (has_branch_stack(event))
                return -EOPNOTSUPP;
 
+       if (event->cpu < 0)
+               return -EINVAL;
+
+       cpu = cpumask_any_and(&i915_pmu_cpumask,
+               topology_sibling_cpumask(event->cpu));
+
+       if (cpu >= nr_cpu_ids)
+               return -ENODEV;
+
        ret = 0;
        if (is_engine_event(event)) {
                ret = engine_event_init(event);
@@ -314,6 +324,7 @@ static int i915_pmu_event_init(struct perf_event *event)
        if (ret)
                return ret;
 
+       event->cpu = cpu;
        if (!event->parent)
                event->destroy = i915_pmu_event_destroy;
 
@@ -469,11 +480,16 @@ static void i915_pmu_enable(struct perf_event *event)
                                       ns_to_ktime(PERIOD), 0,
                                       HRTIMER_MODE_REL_PINNED);
                i915->pmu.timer_enabled = true;
-       } else if (is_engine_event(event) && engine_needs_busy_stats(engine) &&
-                  !engine->pmu.busy_stats) {
-               engine->pmu.busy_stats = true;
-               if (!cancel_delayed_work(&engine->pmu.disable_busy_stats))
-                       queue_work(i915->wq, &engine->pmu.enable_busy_stats);
+       } else if (is_engine_event(event) && engine_needs_busy_stats(engine)) {
+               /* Enable busy stats for the first event demanding it, next
+                * events just reference counter. So, if some events will go
+                * away, we will still have busy stats enabled till remaining
+                * events still use them.
+                */
+               if (engine->pmu.busy_stats++ == 0) {
+                       if 
(!cancel_delayed_work(&engine->pmu.disable_busy_stats))
+                               queue_work(i915->wq, 
&engine->pmu.enable_busy_stats);
+               }
        }
 
        spin_unlock_irqrestore(&i915->pmu.lock, flags);
@@ -495,16 +511,16 @@ static void i915_pmu_disable(struct perf_event *event)
                enum intel_engine_id id;
 
                engine = intel_engine_lookup_user(i915,
-                                                 engine_event_class(event),
-                                                 engine_event_instance(event));
+                                               engine_event_class(event),
+                                               engine_event_instance(event));
                GEM_BUG_ON(!engine);
                engine->pmu.enable &= ~BIT(engine_event_sample(event));
-               if (engine->pmu.busy_stats &&
-                   !engine_needs_busy_stats(engine)) {
-                       engine->pmu.busy_stats = false;
-                       queue_delayed_work(i915->wq,
-                                          &engine->pmu.disable_busy_stats,
-                                          round_jiffies_up_relative(2 * HZ));
+               if (!engine_needs_busy_stats(engine)) {
+                       if (engine->pmu.busy_stats && --engine->pmu.busy_stats 
== 0) {
+                               queue_delayed_work(i915->wq,
+                                               &engine->pmu.disable_busy_stats,
+                                               round_jiffies_up_relative(2 * 
HZ));
+                       }
                }
                mask = 0;
                for_each_engine(engine, i915, id)
@@ -529,6 +545,8 @@ static void i915_pmu_event_start(struct perf_event *event, 
int flags)
 
 static void i915_pmu_event_stop(struct perf_event *event, int flags)
 {
+       if (flags & PERF_EF_UPDATE)
+               i915_pmu_event_read(event);
        i915_pmu_disable(event);
 }
 
@@ -546,7 +564,7 @@ static int i915_pmu_event_add(struct perf_event *event, int 
flags)
 
 static void i915_pmu_event_del(struct perf_event *event, int flags)
 {
-       i915_pmu_disable(event);
+       i915_pmu_event_stop(event, PERF_EF_UPDATE);
 }
 
 static int i915_pmu_event_event_idx(struct perf_event *event)
@@ -656,9 +674,28 @@ static ssize_t i915_pmu_event_show(struct device *dev,
         .attrs = i915_pmu_events_attrs,
 };
 
+static ssize_t i915_pmu_get_attr_cpumask(struct device *dev,
+                                      struct device_attribute *attr,
+                                      char *buf)
+{
+       return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask);
+}
+
+static DEVICE_ATTR(cpumask, S_IRUGO, i915_pmu_get_attr_cpumask, NULL);
+
+static struct attribute *i915_cpumask_attrs[] = {
+       &dev_attr_cpumask.attr,
+       NULL,
+};
+
+static struct attribute_group i915_pmu_cpumask_attr_group = {
+       .attrs = i915_cpumask_attrs,
+};
+
 static const struct attribute_group *i915_pmu_attr_groups[] = {
         &i915_pmu_format_attr_group,
         &i915_pmu_events_attr_group,
+        &i915_pmu_cpumask_attr_group,
         NULL
 };
 
@@ -687,7 +724,7 @@ void i915_pmu_register(struct drm_i915_private *i915)
                return;
 
        i915->pmu.base.attr_groups      = i915_pmu_attr_groups;
-       i915->pmu.base.task_ctx_nr      = perf_sw_context;
+       i915->pmu.base.task_ctx_nr      = perf_invalid_context;
        i915->pmu.base.event_init       = i915_pmu_event_init;
        i915->pmu.base.add              = i915_pmu_event_add;
        i915->pmu.base.del              = i915_pmu_event_del;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index fd5d838..0530e4e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -248,7 +248,7 @@ struct intel_engine_cs {
        struct {
                u32 enable;
                u64 sample[4];
-               bool busy_stats;
+               unsigned int busy_stats;
                struct work_struct enable_busy_stats;
                struct delayed_work disable_busy_stats;
        } pmu;
-- 
1.8.3.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to