Re: [Intel-gfx] [RFC v3 11/11] drm/i915: Gate engine stats collection with a static key

2017-09-15 Thread Tvrtko Ursulin


On 14/09/2017 21:22, Chris Wilson wrote:

Quoting Tvrtko Ursulin (2017-09-13 13:18:19)

From: Tvrtko Ursulin 

This reduces the cost of the software engine busyness tracking
to a single no-op instruction when there are no listeners.

v2: Rebase and some comments.
v3: Rebase.

Signed-off-by: Tvrtko Ursulin 
---
  drivers/gpu/drm/i915/i915_pmu.c | 54 +++--
  drivers/gpu/drm/i915/intel_engine_cs.c  | 17 
  drivers/gpu/drm/i915/intel_ringbuffer.h | 70 ++---
  3 files changed, 113 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 3c0c5d0549b3..d734879e67ee 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -460,11 +460,17 @@ static void i915_pmu_enable(struct perf_event *event)
 GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
 GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
 if (engine->pmu.enable_count[sample]++ == 0) {
+   /*
+* Enable engine busy stats tracking if needed or
+* alternatively cancel the scheduled disabling of the
+* same.
+*/
 if (engine_needs_busy_stats(engine) &&
 !engine->pmu.busy_stats) {
-   engine->pmu.busy_stats =
-   intel_enable_engine_stats(engine) == 0;
-   WARN_ON_ONCE(!engine->pmu.busy_stats);
+   engine->pmu.busy_stats = true;
+   if 
(!cancel_delayed_work(>pmu.disable_busy_stats))
+   queue_work(i915->wq,
+   >pmu.enable_busy_stats);


The only users of i915->wq are supposed to be dependent on struct_mutex,
it is limited to a single job under the presumption that each worker
would be serialised by that mutex.


Good point, so I need a dedicated wq. Might still want to make it 
ordered since they themselves will be serialised by the global static 
key mutex.


Regards,

Tvrtko
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [RFC v3 11/11] drm/i915: Gate engine stats collection with a static key

2017-09-14 Thread Chris Wilson
Quoting Tvrtko Ursulin (2017-09-13 13:18:19)
> From: Tvrtko Ursulin 
> 
> This reduces the cost of the software engine busyness tracking
> to a single no-op instruction when there are no listeners.
> 
> v2: Rebase and some comments.
> v3: Rebase.
> 
> Signed-off-by: Tvrtko Ursulin 
> ---
>  drivers/gpu/drm/i915/i915_pmu.c | 54 +++--
>  drivers/gpu/drm/i915/intel_engine_cs.c  | 17 
>  drivers/gpu/drm/i915/intel_ringbuffer.h | 70 
> ++---
>  3 files changed, 113 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index 3c0c5d0549b3..d734879e67ee 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -460,11 +460,17 @@ static void i915_pmu_enable(struct perf_event *event)
> GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
> GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
> if (engine->pmu.enable_count[sample]++ == 0) {
> +   /*
> +* Enable engine busy stats tracking if needed or
> +* alternatively cancel the scheduled disabling of the
> +* same.
> +*/
> if (engine_needs_busy_stats(engine) &&
> !engine->pmu.busy_stats) {
> -   engine->pmu.busy_stats =
> -   intel_enable_engine_stats(engine) == 
> 0;
> -   WARN_ON_ONCE(!engine->pmu.busy_stats);
> +   engine->pmu.busy_stats = true;
> +   if 
> (!cancel_delayed_work(>pmu.disable_busy_stats))
> +   queue_work(i915->wq,
> +   
> >pmu.enable_busy_stats);

The only users of i915->wq are supposed to be dependent on struct_mutex,
it is limited to a single job under the presumption that each worker
would be serialised by that mutex.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC v3 11/11] drm/i915: Gate engine stats collection with a static key

2017-09-13 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

This reduces the cost of the software engine busyness tracking
to a single no-op instruction when there are no listeners.

v2: Rebase and some comments.
v3: Rebase.

Signed-off-by: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/i915_pmu.c | 54 +++--
 drivers/gpu/drm/i915/intel_engine_cs.c  | 17 
 drivers/gpu/drm/i915/intel_ringbuffer.h | 70 ++---
 3 files changed, 113 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 3c0c5d0549b3..d734879e67ee 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -460,11 +460,17 @@ static void i915_pmu_enable(struct perf_event *event)
GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
if (engine->pmu.enable_count[sample]++ == 0) {
+   /*
+* Enable engine busy stats tracking if needed or
+* alternatively cancel the scheduled disabling of the
+* same.
+*/
if (engine_needs_busy_stats(engine) &&
!engine->pmu.busy_stats) {
-   engine->pmu.busy_stats =
-   intel_enable_engine_stats(engine) == 0;
-   WARN_ON_ONCE(!engine->pmu.busy_stats);
+   engine->pmu.busy_stats = true;
+   if 
(!cancel_delayed_work(>pmu.disable_busy_stats))
+   queue_work(i915->wq,
+   >pmu.enable_busy_stats);
}
}
}
@@ -507,7 +513,15 @@ static void i915_pmu_disable(struct perf_event *event)
if (!engine_needs_busy_stats(engine) &&
engine->pmu.busy_stats) {
engine->pmu.busy_stats = false;
-   intel_disable_engine_stats(engine);
+   /*
+* We request a delayed disable to handle the
+* rapid on/off cycles on events which can
+* happen when tools like perf stat start in a
+* nicer way.
+*/
+   queue_delayed_work(i915->wq,
+  
>pmu.disable_busy_stats,
+  
round_jiffies_up_relative(HZ));
}
}
}
@@ -715,9 +729,27 @@ static int i915_pmu_cpu_offline(unsigned int cpu, struct 
hlist_node *node)
return 0;
 }
 
+static void __enable_busy_stats(struct work_struct *work)
+{
+   struct intel_engine_cs *engine =
+   container_of(work, typeof(*engine), pmu.enable_busy_stats);
+
+   WARN_ON_ONCE(intel_enable_engine_stats(engine));
+}
+
+static void __disable_busy_stats(struct work_struct *work)
+{
+   struct intel_engine_cs *engine =
+  container_of(work, typeof(*engine), pmu.disable_busy_stats.work);
+
+   intel_disable_engine_stats(engine);
+}
+
 void i915_pmu_register(struct drm_i915_private *i915)
 {
int ret;
+   struct intel_engine_cs *engine;
+   enum intel_engine_id id;
 
if (INTEL_GEN(i915) <= 2) {
ret = -ENOTSUPP;
@@ -751,6 +783,12 @@ void i915_pmu_register(struct drm_i915_private *i915)
i915->pmu.timer.function = i915_sample;
i915->pmu.enable = 0;
 
+   for_each_engine(engine, i915, id) {
+   INIT_WORK(>pmu.enable_busy_stats, __enable_busy_stats);
+   INIT_DELAYED_WORK(>pmu.disable_busy_stats,
+ __disable_busy_stats);
+   }
+
if (perf_pmu_register(>pmu.base, "i915", -1))
i915->pmu.base.event_init = NULL;
 
@@ -761,6 +799,9 @@ void i915_pmu_register(struct drm_i915_private *i915)
 
 void i915_pmu_unregister(struct drm_i915_private *i915)
 {
+   struct intel_engine_cs *engine;
+   enum intel_engine_id id;
+
if (!i915->pmu.base.event_init)
return;
 
@@ -772,6 +813,11 @@ void i915_pmu_unregister(struct drm_i915_private *i915)
 
hrtimer_cancel(>pmu.timer);
 
+   for_each_engine(engine, i915, id) {
+   flush_work(>pmu.enable_busy_stats);
+   flush_delayed_work(>pmu.disable_busy_stats);
+   }
+
perf_pmu_unregister(>pmu.base);
i915->pmu.base.event_init = NULL;
 }
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c 
b/drivers/gpu/drm/i915/intel_engine_cs.c
index e2152dd21b4a..e4a8eb83a018 100644
---