[PATCH] drm/i915/pmu: Fix sleep under atomic in RC6 readout
From: Tvrtko Ursulin We are not allowed to call intel_runtime_pm_get from the PMU counter read callback since the former can sleep, and the latter is running under IRQ context. To workaround this, we start our timer when we detect that we have failed to obtain a runtime PM reference during read, and approximate the growing RC6 counter from the timer. Once the timer manages to obtain the runtime PM reference, we stop the timer and go back to the above described behaviour. We have to be careful not to overshoot the RC6 estimate, so once resumed after a period of approximation, we only update the counter once it catches up. With the observation that RC6 is increasing while the device is suspended, this should not pose a problem and can only cause slight inaccuracies due clock base differences. Signed-off-by: Tvrtko Ursulin Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104943 Fixes: 6060b6aec03c ("drm/i915/pmu: Add RC6 residency metrics") Testcase: igt/perf_pmu/rc6-runtime-pm Cc: Tvrtko Ursulin Cc: Chris Wilson Cc: Imre Deak Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: David Airlie Cc: intel-...@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org --- drivers/gpu/drm/i915/i915_pmu.c | 149 ++-- drivers/gpu/drm/i915/i915_pmu.h | 1 + 2 files changed, 114 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 1c440460255d..dca41c072a7c 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -90,23 +90,16 @@ static unsigned int event_enabled_bit(struct perf_event *event) return config_enabled_bit(event->attr.config); } -static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active) +static bool +__pmu_needs_timer(struct drm_i915_private *i915, u64 enable, bool gpu_active) { - u64 enable; - - /* -* Only some counters need the sampling timer. -* -* We start with a bitmask of all currently enabled events. -*/ - enable = i915->pmu.enable; - /* -* Mask out all the ones which do not need the timer, or in +* Mask out all events which do not need the timer, or in * other words keep all the ones that could need the timer. */ enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) | config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) | + config_enabled_mask(I915_PMU_RC6_RESIDENCY) | ENGINE_SAMPLE_MASK; /* @@ -130,6 +123,11 @@ static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active) return enable; } +static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active) +{ + return __pmu_needs_timer(i915, i915->pmu.enable, gpu_active); +} + void i915_pmu_gt_parked(struct drm_i915_private *i915) { if (!i915->pmu.base.event_init) @@ -181,20 +179,20 @@ update_sample(struct i915_pmu_sample *sample, u32 unit, u32 val) sample->cur += mul_u32_u32(val, unit); } -static void engines_sample(struct drm_i915_private *dev_priv) +static bool engines_sample(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; enum intel_engine_id id; bool fw = false; if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0) - return; + return false; if (!dev_priv->gt.awake) - return; + return false; if (!intel_runtime_pm_get_if_in_use(dev_priv)) - return; + return false; for_each_engine(engine, dev_priv, id) { u32 current_seqno = intel_engine_get_seqno(engine); @@ -225,10 +223,51 @@ static void engines_sample(struct drm_i915_private *dev_priv) if (fw) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - intel_runtime_pm_put(dev_priv); + return true; +} + +static u64 read_rc6_residency(struct drm_i915_private *i915) +{ + u64 val; + + val = intel_rc6_residency_ns(i915, IS_VALLEYVIEW(i915) ? + VLV_GT_RENDER_RC6 : GEN6_GT_GFX_RC6); + if (HAS_RC6p(i915)) + val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p); + if (HAS_RC6pp(i915)) + val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp); + + return val; +} + +static void +update_rc6_sample(struct drm_i915_private *i915, u64 val, bool locked) +{ + unsigned long flags; + + if (!locked) + spin_lock_irqsave(&i915->pmu.lock, flags); + + /* +* Update stored RC6 counter only if it is greater than the current +* value. This deals with periods of runtime suspend during which we are +* estimating the RC6 residency, so do not want to overshoot the real +* value read once the device is woken up. +*/ + if (val > i915->p
Re: [Intel-gfx] [PATCH] drm/i915/pmu: Fix sleep under atomic in RC6 readout
On 06/02/2018 16:10, Imre Deak wrote: On Tue, Feb 06, 2018 at 04:04:10PM +, Chris Wilson wrote: Quoting Tvrtko Ursulin (2018-02-06 14:31:07) +static u64 read_rc6_residency(struct drm_i915_private *i915) +{ + u64 val; + + val = intel_rc6_residency_ns(i915, IS_VALLEYVIEW(i915) ? + VLV_GT_RENDER_RC6 : GEN6_GT_GFX_RC6); + if (HAS_RC6p(i915)) + val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p); + if (HAS_RC6pp(i915)) + val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp); We really should mention that these may produce interesting results every 53 minutes. Switching to a timer will allow us to notice the wraparound in each counter. + + return val; +} + +static void +update_rc6_sample(struct drm_i915_private *i915, u64 val, bool locked) +{ + unsigned long flags; + + if (!locked) + spin_lock_irqsave(&i915->pmu.lock, flags); + + /* +* Update stored RC6 counter only if it is greater than the current +* value. This deals with periods of runtime suspend during which we are +* estimating the RC6 residency, so do not want to overshoot the real +* value read once the device is woken up. +*/ + if (val > i915->pmu.sample[__I915_SAMPLE_RC6].cur) + i915->pmu.sample[__I915_SAMPLE_RC6].cur = val; 64b wraparound? Maybe not today, maybe not tomorrow... ;) But in 585 years we're in trouble? :)) + + /* We don't need to sample RC6 from the timer any more. */ + i915->pmu.timer_enabled = + __pmu_needs_timer(i915, + i915->pmu.enable & ~config_enabled_mask(I915_PMU_RC6_RESIDENCY), + READ_ONCE(i915->gt.awake)); But we do... :) One thing I had in mind was to hook into runtime suspend/resume to read the counters there and compensate, but the more I think about it, we may as well solve the lack of resolution in the rc6 counters whilst we are here. https://bugs.freedesktop.org/show_bug.cgi?id=94852. How about using dev->power.suspended_jiffies? This sounds promising and I hope it ends up with a much nicer fix so I'll give it a go. RC6 wraparound issue I hope we can leave for a different patch. I hope it will be possible without a timer, as long as someone is not taking samples every 54 minutes only. (Don't mention VLV.) :) Regards, Tvrtko ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Re: [PATCH] drm/i915/pmu: Fix sleep under atomic in RC6 readout
On Tue, Feb 06, 2018 at 04:04:10PM +, Chris Wilson wrote: > Quoting Tvrtko Ursulin (2018-02-06 14:31:07) > > +static u64 read_rc6_residency(struct drm_i915_private *i915) > > +{ > > + u64 val; > > + > > + val = intel_rc6_residency_ns(i915, IS_VALLEYVIEW(i915) ? > > + VLV_GT_RENDER_RC6 : > > GEN6_GT_GFX_RC6); > > + if (HAS_RC6p(i915)) > > + val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p); > > + if (HAS_RC6pp(i915)) > > + val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp); > > We really should mention that these may produce interesting results > every 53 minutes. Switching to a timer will allow us to notice the > wraparound in each counter. > > > + > > + return val; > > +} > > + > > +static void > > +update_rc6_sample(struct drm_i915_private *i915, u64 val, bool locked) > > +{ > > + unsigned long flags; > > + > > + if (!locked) > > + spin_lock_irqsave(&i915->pmu.lock, flags); > > + > > + /* > > +* Update stored RC6 counter only if it is greater than the current > > +* value. This deals with periods of runtime suspend during which > > we are > > +* estimating the RC6 residency, so do not want to overshoot the > > real > > +* value read once the device is woken up. > > +*/ > > + if (val > i915->pmu.sample[__I915_SAMPLE_RC6].cur) > > + i915->pmu.sample[__I915_SAMPLE_RC6].cur = val; > > 64b wraparound? Maybe not today, maybe not tomorrow... ;) > > > + > > + /* We don't need to sample RC6 from the timer any more. */ > > + i915->pmu.timer_enabled = > > + __pmu_needs_timer(i915, > > + i915->pmu.enable & > > ~config_enabled_mask(I915_PMU_RC6_RESIDENCY), > > + READ_ONCE(i915->gt.awake)); > > But we do... :) > One thing I had in mind was to hook into runtime suspend/resume to read > the counters there and compensate, but the more I think about it, we may > as well solve the lack of resolution in the rc6 counters whilst we are > here. https://bugs.freedesktop.org/show_bug.cgi?id=94852. How about using dev->power.suspended_jiffies? --Imre ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Re: [PATCH] drm/i915/pmu: Fix sleep under atomic in RC6 readout
Quoting Tvrtko Ursulin (2018-02-06 14:31:07) > +static u64 read_rc6_residency(struct drm_i915_private *i915) > +{ > + u64 val; > + > + val = intel_rc6_residency_ns(i915, IS_VALLEYVIEW(i915) ? > + VLV_GT_RENDER_RC6 : > GEN6_GT_GFX_RC6); > + if (HAS_RC6p(i915)) > + val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p); > + if (HAS_RC6pp(i915)) > + val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp); We really should mention that these may produce interesting results every 53 minutes. Switching to a timer will allow us to notice the wraparound in each counter. > + > + return val; > +} > + > +static void > +update_rc6_sample(struct drm_i915_private *i915, u64 val, bool locked) > +{ > + unsigned long flags; > + > + if (!locked) > + spin_lock_irqsave(&i915->pmu.lock, flags); > + > + /* > +* Update stored RC6 counter only if it is greater than the current > +* value. This deals with periods of runtime suspend during which we > are > +* estimating the RC6 residency, so do not want to overshoot the real > +* value read once the device is woken up. > +*/ > + if (val > i915->pmu.sample[__I915_SAMPLE_RC6].cur) > + i915->pmu.sample[__I915_SAMPLE_RC6].cur = val; 64b wraparound? Maybe not today, maybe not tomorrow... ;) > + > + /* We don't need to sample RC6 from the timer any more. */ > + i915->pmu.timer_enabled = > + __pmu_needs_timer(i915, > + i915->pmu.enable & > ~config_enabled_mask(I915_PMU_RC6_RESIDENCY), > + READ_ONCE(i915->gt.awake)); But we do... :) One thing I had in mind was to hook into runtime suspend/resume to read the counters there and compensate, but the more I think about it, we may as well solve the lack of resolution in the rc6 counters whilst we are here. https://bugs.freedesktop.org/show_bug.cgi?id=94852. -Chris ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel