Nice data, thank you!

-----Original Message-----
From: Tvrtko Ursulin [mailto:tvrtko.ursu...@linux.intel.com] 
Sent: Wednesday, October 4, 2017 10:35 AM
To: Tvrtko Ursulin <tursu...@ursulin.net>; Intel-gfx@lists.freedesktop.org
Cc: Chris Wilson <ch...@chris-wilson.co.uk>; Rogozhkin, Dmitry V 
<dmitry.v.rogozh...@intel.com>
Subject: Re: [Intel-gfx] [PATCH 06/10] drm/i915/pmu: Wire up engine busy stats 
to PMU


On 29/09/2017 13:34, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
> 
> We can use engine busy stats instead of the sampling timer for better 
> accuracy.
> 
> By doing this we replace the stohastic sampling with busyness metric 
> derived directly from engine activity. This is context switch 
> interrupt driven, so as accurate as we can get from software tracking.

To quantify the precision benefit with a graph:

https://people.freedesktop.org/~tursulin/sampling-vs-busy-stats.png

This represents the render engine busyness on neverball menu screen looping a 
few times. PMU sampling interval is 100ms. It was generated from a special 
build with both PMU counters running in parallel so the data is absolutely 
aligned.

Anomaly of busyness going over 100% is caused by me not bothering to use the 
actual timestamps got from PMU, but using fixed interval (perf stat -I 100). 
But I don't think it matters for the rough comparison and looking at the noise 
and error in the sampling version.

> As a secondary benefit, we can also not run the sampling timer in 
> cases only busyness metric is enabled.

For this one data is not showing anything significant. I've seen the 
i915_sample function use <0.1% of CPU time but that's it. Probably with the 
original version which used MMIO it was much worse.

Regards,

Tvrtko

> v2: Rebase.
> v3:
>   * Rebase, comments.
>   * Leave engine busyness controls out of workers.
> v4: Checkpatch cleanup.
> v5: Added comment to pmu_needs_timer change.
> v6:
>   * Rebase.
>   * Fix style of some comments. (Chris Wilson)
> v7: Rebase and commit message update. (Chris Wilson)
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
> Reviewed-by: Chris Wilson <ch...@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_pmu.c         | 37 
> +++++++++++++++++++++++++++++++--
>   drivers/gpu/drm/i915/intel_ringbuffer.h |  5 +++++
>   2 files changed, 40 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c 
> b/drivers/gpu/drm/i915/i915_pmu.c index f341c904c159..93c0e7ec7d75 
> 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -90,6 +90,11 @@ static unsigned int event_enabled_bit(struct perf_event 
> *event)
>       return config_enabled_bit(event->attr.config);
>   }
>   
> +static bool supports_busy_stats(void) {
> +     return i915_modparams.enable_execlists; }
> +
>   static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
>   {
>       u64 enable;
> @@ -115,6 +120,12 @@ static bool pmu_needs_timer(struct drm_i915_private 
> *i915, bool gpu_active)
>        */
>       if (!gpu_active)
>               enable &= ~ENGINE_SAMPLE_MASK;
> +     /*
> +      * Also there is software busyness tracking available we do not
> +      * need the timer for I915_SAMPLE_BUSY counter.
> +      */
> +     else if (supports_busy_stats())
> +             enable &= ~BIT(I915_SAMPLE_BUSY);
>   
>       /*
>        * If some bits remain it means we need the sampling timer running.
> @@ -362,6 +373,9 @@ static u64 __i915_pmu_event_read(struct perf_event 
> *event)
>   
>               if (WARN_ON_ONCE(!engine)) {
>                       /* Do nothing */
> +             } else if (sample == I915_SAMPLE_BUSY &&
> +                        engine->pmu.busy_stats) {
> +                     val = ktime_to_ns(intel_engine_get_busy_time(engine));
>               } else {
>                       val = engine->pmu.sample[sample].cur;
>               }
> @@ -398,6 +412,12 @@ static void i915_pmu_event_read(struct perf_event *event)
>       local64_add(new - prev, &event->count);
>   }
>   
> +static bool engine_needs_busy_stats(struct intel_engine_cs *engine) {
> +     return supports_busy_stats() &&
> +            (engine->pmu.enable & BIT(I915_SAMPLE_BUSY)); }
> +
>   static void i915_pmu_enable(struct perf_event *event)
>   {
>       struct drm_i915_private *i915 =
> @@ -437,7 +457,14 @@ static void i915_pmu_enable(struct perf_event 
> *event)
>   
>               GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
>               GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
> -             engine->pmu.enable_count[sample]++;
> +             if (engine->pmu.enable_count[sample]++ == 0) {
> +                     if (engine_needs_busy_stats(engine) &&
> +                         !engine->pmu.busy_stats) {
> +                             engine->pmu.busy_stats =
> +                                     intel_enable_engine_stats(engine) == 0;
> +                             WARN_ON_ONCE(!engine->pmu.busy_stats);
> +                     }
> +             }
>       }
>   
>       /*
> @@ -473,8 +500,14 @@ static void i915_pmu_disable(struct perf_event *event)
>                * Decrement the reference count and clear the enabled
>                * bitmask when the last listener on an event goes away.
>                */
> -             if (--engine->pmu.enable_count[sample] == 0)
> +             if (--engine->pmu.enable_count[sample] == 0) {
>                       engine->pmu.enable &= ~BIT(sample);
> +                     if (!engine_needs_busy_stats(engine) &&
> +                         engine->pmu.busy_stats) {
> +                             engine->pmu.busy_stats = false;
> +                             intel_disable_engine_stats(engine);
> +                     }
> +             }
>       }
>   
>       GEM_BUG_ON(bit >= I915_PMU_MASK_BITS); diff --git 
> a/drivers/gpu/drm/i915/intel_ringbuffer.h 
> b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index ee738c7694e5..0f8ccb243407 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -351,6 +351,11 @@ struct intel_engine_cs {
>                * Our internal timer stores the current counters in this field.
>                */
>               struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX];
> +             /**
> +              * @busy_stats: Has enablement of engine stats tracking been
> +              *              requested.
> +              */
> +             bool busy_stats;
>       } pmu;
>   
>       /*
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to