The RPS worker exists to do the bidding of the GT powermanagement, so move it from i915_irq to intel_gt_pm.c where it can be hidden from the rest of the world. The goal being that the RPS worker is the one true way though which all RPS updates are coordinated.
Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> Reviewed-by: Sagar Arun Kamble <sagar.a.kam...@intel.com> --- drivers/gpu/drm/i915/i915_drv.h | 1 - drivers/gpu/drm/i915/i915_irq.c | 141 --------------------- drivers/gpu/drm/i915/i915_sysfs.c | 38 ++---- drivers/gpu/drm/i915/intel_gt_pm.c | 189 +++++++++++++++++++++++------ drivers/gpu/drm/i915/intel_gt_pm.h | 1 - 5 files changed, 163 insertions(+), 207 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e11b8c7dbf4f..af0ac85615d0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3365,7 +3365,6 @@ extern void i915_redisable_vga(struct drm_i915_private *dev_priv); extern void i915_redisable_vga_power_on(struct drm_i915_private *dev_priv); extern bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val); extern void intel_init_pch_refclk(struct drm_i915_private *dev_priv); -extern int intel_set_rps(struct drm_i915_private *dev_priv, u8 val); extern bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 697800f607c8..24f10c1e9889 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1193,145 +1193,6 @@ static void notify_ring(struct intel_engine_cs *engine) trace_intel_engine_notify(engine, wait); } -static void vlv_c0_read(struct drm_i915_private *dev_priv, - struct intel_rps_ei *ei) -{ - ei->ktime = ktime_get_raw(); - ei->render_c0 = I915_READ(VLV_RENDER_C0_COUNT); - ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT); -} - -void gen6_rps_reset_ei(struct drm_i915_private *dev_priv) -{ - memset(&dev_priv->gt_pm.rps.ei, 0, sizeof(dev_priv->gt_pm.rps.ei)); -} - -static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - const struct intel_rps_ei *prev = &rps->ei; - struct intel_rps_ei now; - u32 events = 0; - - if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0) - return 0; - - vlv_c0_read(dev_priv, &now); - - if (prev->ktime) { - u64 time, c0; - u32 render, media; - - time = ktime_us_delta(now.ktime, prev->ktime); - - time *= dev_priv->czclk_freq; - - /* Workload can be split between render + media, - * e.g. SwapBuffers being blitted in X after being rendered in - * mesa. To account for this we need to combine both engines - * into our activity counter. - */ - render = now.render_c0 - prev->render_c0; - media = now.media_c0 - prev->media_c0; - c0 = max(render, media); - c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */ - - if (c0 > time * rps->up_threshold) - events = GEN6_PM_RP_UP_THRESHOLD; - else if (c0 < time * rps->down_threshold) - events = GEN6_PM_RP_DOWN_THRESHOLD; - } - - rps->ei = now; - return events; -} - -static void gen6_pm_rps_work(struct work_struct *work) -{ - struct drm_i915_private *dev_priv = - container_of(work, struct drm_i915_private, gt_pm.rps.work); - struct intel_rps *rps = &dev_priv->gt_pm.rps; - bool client_boost = false; - int new_delay, adj, min, max; - u32 pm_iir = 0; - - spin_lock_irq(&dev_priv->irq_lock); - if (rps->interrupts_enabled) { - pm_iir = fetch_and_zero(&rps->pm_iir); - client_boost = atomic_read(&rps->num_waiters); - } - spin_unlock_irq(&dev_priv->irq_lock); - - /* Make sure we didn't queue anything we're not going to process. */ - WARN_ON(pm_iir & ~dev_priv->pm_rps_events); - if ((pm_iir & dev_priv->pm_rps_events) == 0 && !client_boost) - goto out; - - mutex_lock(&rps->lock); - - pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir); - - adj = rps->last_adj; - new_delay = rps->cur_freq; - min = rps->min_freq_softlimit; - max = rps->max_freq_softlimit; - if (client_boost) - max = rps->max_freq; - if (client_boost && new_delay < rps->boost_freq) { - new_delay = rps->boost_freq; - adj = 0; - } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) { - if (adj > 0) - adj *= 2; - else /* CHV needs even encode values */ - adj = IS_CHERRYVIEW(dev_priv) ? 2 : 1; - - if (new_delay >= rps->max_freq_softlimit) - adj = 0; - } else if (client_boost) { - adj = 0; - } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { - if (rps->cur_freq > rps->efficient_freq) - new_delay = rps->efficient_freq; - else if (rps->cur_freq > rps->min_freq_softlimit) - new_delay = rps->min_freq_softlimit; - adj = 0; - } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) { - if (adj < 0) - adj *= 2; - else /* CHV needs even encode values */ - adj = IS_CHERRYVIEW(dev_priv) ? -2 : -1; - - if (new_delay <= rps->min_freq_softlimit) - adj = 0; - } else { /* unknown event */ - adj = 0; - } - - rps->last_adj = adj; - - /* sysfs frequency interfaces may have snuck in while servicing the - * interrupt - */ - new_delay += adj; - new_delay = clamp_t(int, new_delay, min, max); - - if (intel_set_rps(dev_priv, new_delay)) { - DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n"); - rps->last_adj = 0; - } - - mutex_unlock(&rps->lock); - -out: - /* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */ - spin_lock_irq(&dev_priv->irq_lock); - if (rps->interrupts_enabled) - gen6_unmask_pm_irq(dev_priv, dev_priv->pm_rps_events); - spin_unlock_irq(&dev_priv->irq_lock); -} - - /** * ivybridge_parity_work - Workqueue called when a parity error interrupt * occurred. @@ -4363,8 +4224,6 @@ void intel_irq_init(struct drm_i915_private *dev_priv) intel_hpd_init_work(dev_priv); - INIT_WORK(&rps->work, gen6_pm_rps_work); - INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work); for (i = 0; i < MAX_L3_SLICES; ++i) dev_priv->l3_parity.remap_info[i] = NULL; diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index fde5f0139ca1..a72aab28399f 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -355,17 +355,16 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); struct intel_rps *rps = &dev_priv->gt_pm.rps; - u32 val; ssize_t ret; + u32 val; ret = kstrtou32(buf, 0, &val); if (ret) return ret; - intel_runtime_pm_get(dev_priv); - mutex_lock(&rps->lock); - val = intel_freq_opcode(dev_priv, val); + + mutex_lock(&rps->lock); if (val < rps->min_freq || val > rps->max_freq || val < rps->min_freq_softlimit) { @@ -378,19 +377,11 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, intel_gpu_freq(dev_priv, val)); rps->max_freq_softlimit = val; - - val = clamp_t(int, rps->cur_freq, - rps->min_freq_softlimit, - rps->max_freq_softlimit); - - /* We still need *_set_rps to process the new max_delay and - * update the interrupt limits and PMINTRMSK even though - * frequency request may be unchanged. */ - ret = intel_set_rps(dev_priv, val); + schedule_work(&rps->work); unlock: mutex_unlock(&rps->lock); - intel_runtime_pm_put(dev_priv); + flush_work(&rps->work); return ret ?: count; } @@ -410,17 +401,16 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); struct intel_rps *rps = &dev_priv->gt_pm.rps; - u32 val; ssize_t ret; + u32 val; ret = kstrtou32(buf, 0, &val); if (ret) return ret; - intel_runtime_pm_get(dev_priv); - mutex_lock(&rps->lock); - val = intel_freq_opcode(dev_priv, val); + + mutex_lock(&rps->lock); if (val < rps->min_freq || val > rps->max_freq || val > rps->max_freq_softlimit) { @@ -429,19 +419,11 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, } rps->min_freq_softlimit = val; - - val = clamp_t(int, rps->cur_freq, - rps->min_freq_softlimit, - rps->max_freq_softlimit); - - /* We still need *_set_rps to process the new min_delay and - * update the interrupt limits and PMINTRMSK even though - * frequency request may be unchanged. */ - ret = intel_set_rps(dev_priv, val); + schedule_work(&rps->work); unlock: mutex_unlock(&rps->lock); - intel_runtime_pm_put(dev_priv); + flush_work(&rps->work); return ret ?: count; } diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c index 733d346601ca..c51b40c791f8 100644 --- a/drivers/gpu/drm/i915/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/intel_gt_pm.c @@ -329,13 +329,7 @@ static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) { struct intel_rps *rps = &dev_priv->gt_pm.rps; - /* - * min/max delay may still have been modified so be sure to - * write the limits value. - */ if (val != rps->cur_freq) { - gen6_set_rps_thresholds(dev_priv, val); - if (INTEL_GEN(dev_priv) >= 9) I915_WRITE(GEN6_RPNSWREQ, GEN9_FREQUENCY(val)); @@ -349,6 +343,8 @@ static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) GEN6_AGGRESSIVE_TURBO); } + gen6_set_rps_thresholds(dev_priv, val); + /* * Make sure we continue to get interrupts * until we hit the minimum or maximum frequencies. @@ -370,18 +366,17 @@ static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) "Odd GPU freq value\n")) val &= ~1; - I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); - if (val != dev_priv->gt_pm.rps.cur_freq) { vlv_punit_get(dev_priv); err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); vlv_punit_put(dev_priv); if (err) return err; - - gen6_set_rps_thresholds(dev_priv, val); } + gen6_set_rps_thresholds(dev_priv, val); + I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); + dev_priv->gt_pm.rps.cur_freq = val; trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); @@ -426,6 +421,151 @@ static void vlv_set_rps_idle(struct drm_i915_private *i915) DRM_ERROR("Failed to set RPS for idle\n"); } +static int intel_set_rps(struct drm_i915_private *i915, u8 val) +{ + struct intel_rps *rps = &i915->gt_pm.rps; + int err; + + lockdep_assert_held(&rps->lock); + GEM_BUG_ON(val > rps->max_freq); + GEM_BUG_ON(val < rps->min_freq); + + if (!rps->enabled) { + rps->cur_freq = val; + return 0; + } + + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + err = valleyview_set_rps(i915, val); + else + err = gen6_set_rps(i915, val); + + return err; +} + +static void vlv_c0_read(struct drm_i915_private *dev_priv, + struct intel_rps_ei *ei) +{ + ei->ktime = ktime_get_raw(); + ei->render_c0 = I915_READ(VLV_RENDER_C0_COUNT); + ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT); +} + +static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir) +{ + struct intel_rps *rps = &dev_priv->gt_pm.rps; + const struct intel_rps_ei *prev = &rps->ei; + struct intel_rps_ei now; + u32 events = 0; + + if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0) + return 0; + + vlv_c0_read(dev_priv, &now); + + if (prev->ktime) { + u64 time, c0; + u32 render, media; + + time = ktime_us_delta(now.ktime, prev->ktime); + + time *= dev_priv->czclk_freq; + + /* Workload can be split between render + media, + * e.g. SwapBuffers being blitted in X after being rendered in + * mesa. To account for this we need to combine both engines + * into our activity counter. + */ + render = now.render_c0 - prev->render_c0; + media = now.media_c0 - prev->media_c0; + c0 = max(render, media); + c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */ + + if (c0 > time * rps->up_threshold) + events = GEN6_PM_RP_UP_THRESHOLD; + else if (c0 < time * rps->down_threshold) + events = GEN6_PM_RP_DOWN_THRESHOLD; + } + + rps->ei = now; + return events; +} + +static void intel_rps_work(struct work_struct *work) +{ + struct drm_i915_private *i915 = + container_of(work, struct drm_i915_private, gt_pm.rps.work); + struct intel_rps *rps = &i915->gt_pm.rps; + int freq, adj, min, max; + bool client_boost; + u32 pm_iir; + + pm_iir = xchg(&rps->pm_iir, 0) & ~i915->pm_rps_events; + pm_iir |= vlv_wa_c0_ei(i915, pm_iir); + + client_boost = atomic_read(&rps->num_waiters); + + mutex_lock(&rps->lock); + + min = rps->min_freq_softlimit; + max = rps->max_freq_softlimit; + if (client_boost && max < rps->boost_freq) + max = rps->boost_freq; + + GEM_BUG_ON(min < rps->min_freq); + GEM_BUG_ON(max > rps->max_freq); + GEM_BUG_ON(max < min); + + adj = rps->last_adj; + freq = rps->cur_freq; + if (client_boost && freq < rps->boost_freq) { + freq = rps->boost_freq; + adj = 0; + } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) { + if (adj > 0) + adj *= 2; + else /* CHV needs even encode values */ + adj = IS_CHERRYVIEW(i915) ? 2 : 1; + + if (freq >= max) + adj = 0; + } else if (client_boost) { + adj = 0; + } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { + if (freq > max_t(int, rps->efficient_freq, min)) + freq = max_t(int, rps->efficient_freq, min); + else if (freq > min_t(int, rps->efficient_freq, min)) + freq = min_t(int, rps->efficient_freq, min); + + adj = 0; + } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) { + if (adj < 0) + adj *= 2; + else /* CHV needs even encode values */ + adj = IS_CHERRYVIEW(i915) ? -2 : -1; + + if (freq <= min) + adj = 0; + } else { /* unknown/external event */ + adj = 0; + } + + if (intel_set_rps(i915, clamp_t(int, freq + adj, min, max))) { + DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n"); + adj = 0; + } + + mutex_unlock(&rps->lock); + + if (pm_iir) { + spin_lock_irq(&i915->irq_lock); + if (rps->interrupts_enabled) + gen6_unmask_pm_irq(i915, i915->pm_rps_events); + spin_unlock_irq(&i915->irq_lock); + rps->last_adj = adj; + } +} + void gen6_rps_busy(struct drm_i915_private *dev_priv) { struct intel_rps *rps = &dev_priv->gt_pm.rps; @@ -434,19 +574,17 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv) if (rps->enabled) { u8 freq; - if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED) - gen6_rps_reset_ei(dev_priv); I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, rps->cur_freq)); gen6_enable_rps_interrupts(dev_priv); + memset(&rps->ei, 0, sizeof(rps->ei)); /* * Use the user's desired frequency as a guide, but for better * performance, jump directly to RPe as our starting frequency. */ - freq = max(rps->cur_freq, - rps->efficient_freq); + freq = max(rps->cur_freq, rps->efficient_freq); if (intel_set_rps(dev_priv, clamp(freq, @@ -515,28 +653,6 @@ void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *client) atomic_inc(client ? &client->boosts : &rps->boosts); } -int intel_set_rps(struct drm_i915_private *i915, u8 val) -{ - struct intel_rps *rps = &i915->gt_pm.rps; - int err; - - lockdep_assert_held(&rps->lock); - GEM_BUG_ON(val > rps->max_freq); - GEM_BUG_ON(val < rps->min_freq); - - if (!rps->enabled) { - rps->cur_freq = val; - return 0; - } - - if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) - err = valleyview_set_rps(i915, val); - else - err = gen6_set_rps(i915, val); - - return err; -} - static void gen9_disable_rc6(struct drm_i915_private *dev_priv) { I915_WRITE(GEN6_RC_CONTROL, 0); @@ -2124,6 +2240,7 @@ void intel_init_gt_powersave(struct drm_i915_private *i915) struct intel_rps *rps = &i915->gt_pm.rps; mutex_init(&rps->lock); + INIT_WORK(&rps->work, intel_rps_work); /* * RPM depends on RC6 to save restore the GT HW context, so make RC6 a diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h index 20e937d6c7e0..5c52ca208df1 100644 --- a/drivers/gpu/drm/i915/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/intel_gt_pm.h @@ -95,7 +95,6 @@ void intel_disable_gt_powersave(struct drm_i915_private *i915); void intel_suspend_gt_powersave(struct drm_i915_private *i915); void gen6_rps_busy(struct drm_i915_private *i915); -void gen6_rps_reset_ei(struct drm_i915_private *i915); void gen6_rps_idle(struct drm_i915_private *i915); void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *rps); -- 2.17.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx