From: Tvrtko Ursulin <tvrtko.ursu...@intel.com>

We can use engine busy stats instead of the MMIO sampling timer
for better efficiency.

As minimum this saves period * num_engines / sec mmio reads,
and in a better case, when only engine busy samplers are active,
it enables us to not kick off the sampling timer at all.

v2: Rebase.
v3:
 * Rebase, comments.
 * Leave engine busyness controls out of workers.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
---
 drivers/gpu/drm/i915/i915_pmu.c         | 36 ++++++++++++++++++++++++++++++---
 drivers/gpu/drm/i915/intel_ringbuffer.h |  4 ++++
 2 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 26e735f27282..f8a6195c17f1 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -90,6 +90,11 @@ static unsigned int event_enabled_bit(struct perf_event 
*event)
        return config_enabled_bit(event->attr.config);
 }
 
+static bool supports_busy_stats(void)
+{
+       return i915.enable_execlists;
+}
+
 static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
 {
        u64 enable = i915->pmu.enable;
@@ -100,6 +105,8 @@ static bool pmu_needs_timer(struct drm_i915_private *i915, 
bool gpu_active)
 
        if (!gpu_active)
                enable &= ~ENGINE_SAMPLE_MASK;
+       else if (supports_busy_stats())
+               enable &= ~BIT(I915_SAMPLE_BUSY);
 
        return enable;
 }
@@ -163,7 +170,8 @@ static void engines_sample(struct drm_i915_private 
*dev_priv)
                if (enable & BIT(I915_SAMPLE_QUEUED))
                        engine->pmu.sample[I915_SAMPLE_QUEUED] += PERIOD;
 
-               if (enable & BIT(I915_SAMPLE_BUSY)) {
+               if ((enable & BIT(I915_SAMPLE_BUSY)) &&
+                   !engine->pmu.busy_stats) {
                        u32 val;
 
                        fw = grab_forcewake(dev_priv, fw);
@@ -342,6 +350,9 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
 
                if (WARN_ON_ONCE(!engine)) {
                        /* Do nothing */
+               } else if (sample == I915_SAMPLE_BUSY &&
+                          engine->pmu.busy_stats) {
+                       val = ktime_to_ns(intel_engine_get_busy_time(engine));
                } else {
                        val = engine->pmu.sample[sample];
                }
@@ -385,6 +396,12 @@ static void i915_pmu_event_read(struct perf_event *event)
                    local64_read(&event->hw.prev_count));
 }
 
+static bool engine_needs_busy_stats(struct intel_engine_cs *engine)
+{
+       return supports_busy_stats() &&
+              (engine->pmu.enable & BIT(I915_SAMPLE_BUSY));
+}
+
 static void i915_pmu_enable(struct perf_event *event)
 {
        struct drm_i915_private *i915 =
@@ -429,7 +446,14 @@ static void i915_pmu_enable(struct perf_event *event)
 
                GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
                GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
-               engine->pmu.enable_count[sample]++;
+               if (engine->pmu.enable_count[sample]++ == 0) {
+                       if (engine_needs_busy_stats(engine) &&
+                           !engine->pmu.busy_stats) {
+                               engine->pmu.busy_stats =
+                                       intel_enable_engine_stats(engine) == 0;
+                               WARN_ON_ONCE(!engine->pmu.busy_stats);
+                       }
+               }
        }
 
        /*
@@ -465,8 +489,14 @@ static void i915_pmu_disable(struct perf_event *event)
                 * Decrement the reference count and clear the enabled
                 * bitmask when the last listener on an event goes away.
                 */
-               if (--engine->pmu.enable_count[sample] == 0)
+               if (--engine->pmu.enable_count[sample] == 0) {
                        engine->pmu.enable &= ~BIT(sample);
+                       if (!engine_needs_busy_stats(engine) &&
+                           engine->pmu.busy_stats) {
+                               engine->pmu.busy_stats = false;
+                               intel_disable_engine_stats(engine);
+                       }
+               }
        }
 
        GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index f618c5f98edf..fe554fc76867 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -265,6 +265,10 @@ struct intel_engine_cs {
                 * Our internal timer stores the current counter in this field.
                 */
                u64 sample[I915_ENGINE_SAMPLE_MAX];
+               /**
+                * @busy_stats: Has enablement of engine stats tracking been 
requested.
+                */
+               bool busy_stats;
        } pmu;
 
        /*
-- 
2.9.5

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to