Re: [Intel-gfx] [RFC v3 11/11] drm/i915: Gate engine stats collection with a static key
On 14/09/2017 21:22, Chris Wilson wrote: Quoting Tvrtko Ursulin (2017-09-13 13:18:19) From: Tvrtko UrsulinThis reduces the cost of the software engine busyness tracking to a single no-op instruction when there are no listeners. v2: Rebase and some comments. v3: Rebase. Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_pmu.c | 54 +++-- drivers/gpu/drm/i915/intel_engine_cs.c | 17 drivers/gpu/drm/i915/intel_ringbuffer.h | 70 ++--- 3 files changed, 113 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 3c0c5d0549b3..d734879e67ee 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -460,11 +460,17 @@ static void i915_pmu_enable(struct perf_event *event) GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS); GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0); if (engine->pmu.enable_count[sample]++ == 0) { + /* +* Enable engine busy stats tracking if needed or +* alternatively cancel the scheduled disabling of the +* same. +*/ if (engine_needs_busy_stats(engine) && !engine->pmu.busy_stats) { - engine->pmu.busy_stats = - intel_enable_engine_stats(engine) == 0; - WARN_ON_ONCE(!engine->pmu.busy_stats); + engine->pmu.busy_stats = true; + if (!cancel_delayed_work(>pmu.disable_busy_stats)) + queue_work(i915->wq, + >pmu.enable_busy_stats); The only users of i915->wq are supposed to be dependent on struct_mutex, it is limited to a single job under the presumption that each worker would be serialised by that mutex. Good point, so I need a dedicated wq. Might still want to make it ordered since they themselves will be serialised by the global static key mutex. Regards, Tvrtko ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [RFC v3 11/11] drm/i915: Gate engine stats collection with a static key
Quoting Tvrtko Ursulin (2017-09-13 13:18:19) > From: Tvrtko Ursulin> > This reduces the cost of the software engine busyness tracking > to a single no-op instruction when there are no listeners. > > v2: Rebase and some comments. > v3: Rebase. > > Signed-off-by: Tvrtko Ursulin > --- > drivers/gpu/drm/i915/i915_pmu.c | 54 +++-- > drivers/gpu/drm/i915/intel_engine_cs.c | 17 > drivers/gpu/drm/i915/intel_ringbuffer.h | 70 > ++--- > 3 files changed, 113 insertions(+), 28 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c > index 3c0c5d0549b3..d734879e67ee 100644 > --- a/drivers/gpu/drm/i915/i915_pmu.c > +++ b/drivers/gpu/drm/i915/i915_pmu.c > @@ -460,11 +460,17 @@ static void i915_pmu_enable(struct perf_event *event) > GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS); > GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0); > if (engine->pmu.enable_count[sample]++ == 0) { > + /* > +* Enable engine busy stats tracking if needed or > +* alternatively cancel the scheduled disabling of the > +* same. > +*/ > if (engine_needs_busy_stats(engine) && > !engine->pmu.busy_stats) { > - engine->pmu.busy_stats = > - intel_enable_engine_stats(engine) == > 0; > - WARN_ON_ONCE(!engine->pmu.busy_stats); > + engine->pmu.busy_stats = true; > + if > (!cancel_delayed_work(>pmu.disable_busy_stats)) > + queue_work(i915->wq, > + > >pmu.enable_busy_stats); The only users of i915->wq are supposed to be dependent on struct_mutex, it is limited to a single job under the presumption that each worker would be serialised by that mutex. -Chris ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC v3 11/11] drm/i915: Gate engine stats collection with a static key
From: Tvrtko UrsulinThis reduces the cost of the software engine busyness tracking to a single no-op instruction when there are no listeners. v2: Rebase and some comments. v3: Rebase. Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_pmu.c | 54 +++-- drivers/gpu/drm/i915/intel_engine_cs.c | 17 drivers/gpu/drm/i915/intel_ringbuffer.h | 70 ++--- 3 files changed, 113 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 3c0c5d0549b3..d734879e67ee 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -460,11 +460,17 @@ static void i915_pmu_enable(struct perf_event *event) GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS); GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0); if (engine->pmu.enable_count[sample]++ == 0) { + /* +* Enable engine busy stats tracking if needed or +* alternatively cancel the scheduled disabling of the +* same. +*/ if (engine_needs_busy_stats(engine) && !engine->pmu.busy_stats) { - engine->pmu.busy_stats = - intel_enable_engine_stats(engine) == 0; - WARN_ON_ONCE(!engine->pmu.busy_stats); + engine->pmu.busy_stats = true; + if (!cancel_delayed_work(>pmu.disable_busy_stats)) + queue_work(i915->wq, + >pmu.enable_busy_stats); } } } @@ -507,7 +513,15 @@ static void i915_pmu_disable(struct perf_event *event) if (!engine_needs_busy_stats(engine) && engine->pmu.busy_stats) { engine->pmu.busy_stats = false; - intel_disable_engine_stats(engine); + /* +* We request a delayed disable to handle the +* rapid on/off cycles on events which can +* happen when tools like perf stat start in a +* nicer way. +*/ + queue_delayed_work(i915->wq, + >pmu.disable_busy_stats, + round_jiffies_up_relative(HZ)); } } } @@ -715,9 +729,27 @@ static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) return 0; } +static void __enable_busy_stats(struct work_struct *work) +{ + struct intel_engine_cs *engine = + container_of(work, typeof(*engine), pmu.enable_busy_stats); + + WARN_ON_ONCE(intel_enable_engine_stats(engine)); +} + +static void __disable_busy_stats(struct work_struct *work) +{ + struct intel_engine_cs *engine = + container_of(work, typeof(*engine), pmu.disable_busy_stats.work); + + intel_disable_engine_stats(engine); +} + void i915_pmu_register(struct drm_i915_private *i915) { int ret; + struct intel_engine_cs *engine; + enum intel_engine_id id; if (INTEL_GEN(i915) <= 2) { ret = -ENOTSUPP; @@ -751,6 +783,12 @@ void i915_pmu_register(struct drm_i915_private *i915) i915->pmu.timer.function = i915_sample; i915->pmu.enable = 0; + for_each_engine(engine, i915, id) { + INIT_WORK(>pmu.enable_busy_stats, __enable_busy_stats); + INIT_DELAYED_WORK(>pmu.disable_busy_stats, + __disable_busy_stats); + } + if (perf_pmu_register(>pmu.base, "i915", -1)) i915->pmu.base.event_init = NULL; @@ -761,6 +799,9 @@ void i915_pmu_register(struct drm_i915_private *i915) void i915_pmu_unregister(struct drm_i915_private *i915) { + struct intel_engine_cs *engine; + enum intel_engine_id id; + if (!i915->pmu.base.event_init) return; @@ -772,6 +813,11 @@ void i915_pmu_unregister(struct drm_i915_private *i915) hrtimer_cancel(>pmu.timer); + for_each_engine(engine, i915, id) { + flush_work(>pmu.enable_busy_stats); + flush_delayed_work(>pmu.disable_busy_stats); + } + perf_pmu_unregister(>pmu.base); i915->pmu.base.event_init = NULL; } diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index e2152dd21b4a..e4a8eb83a018 100644 ---