Re: [Intel-gfx] [PATCH 4/6] drm/i915/gt: Switch to manual evaluation of RPS

2020-04-25 Thread Chris Wilson
Quoting Chris Wilson (2020-04-25 18:57:49)
> +static bool has_busy_stats(struct intel_rps *rps)
> +{
> +   struct intel_engine_cs *engine;
> +   enum intel_engine_id id;
> +
> +   for_each_engine(engine, rps_to_gt(rps), id) {
> +   if (!intel_engine_supports_stats(engine))

Bah. The engines are not setup by this point. For the moment, I can just
hardcode it :(
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 4/6] drm/i915/gt: Switch to manual evaluation of RPS

2020-04-25 Thread Chris Wilson
As with the realisation for soft-rc6, we respond to idling the engines
within microseconds, far faster than the response times for HW RC6 and
RPS. Furthermore, our fast parking upon idle, prevents HW RPS from
running for many desktop workloads, as the RPS evaluation intervals are
on the order of tens of milliseconds, but the typical workload is just a
couple of milliseconds, but yet we still need to determine the best
frequency for user latency versus power.

Recognising that the HW evaluation intervals are a poor fit, and that
they were deprecated [in bspec at least] from gen10, start to wean
ourselves off them and replace the EI with a timer and our accurate
busy-stats. The principle benefit of manually evaluating RPS intervals
is that we can be more responsive for better performance and powersaving
for both spiky workloads and steady-state.

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1698
Fixes: 98479ada421a ("drm/i915/gt: Treat idling as a RPS downclock event")
Signed-off-by: Chris Wilson 
Cc: Mika Kuoppala 
Cc: Andi Shyti 
---
 drivers/gpu/drm/i915/gt/intel_engine_types.h |  5 ++
 drivers/gpu/drm/i915/gt/intel_rps.c  | 80 +++-
 drivers/gpu/drm/i915/gt/intel_rps.h  | 10 +++
 drivers/gpu/drm/i915/gt/intel_rps_types.h|  5 ++
 4 files changed, 99 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h 
b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index d7250b2d4175..b2c3e8df3eba 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -551,6 +551,11 @@ struct intel_engine_cs {
 * Idle is defined as active == 0, active is active > 0.
 */
ktime_t start;
+
+   /**
+* @rps: Utilisation at last RPS sampling.
+*/
+   ktime_t rps;
} stats;
 
struct {
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index 58a10b3d60ba..36e59b74689f 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -45,6 +45,62 @@ static inline void set(struct intel_uncore *uncore, 
i915_reg_t reg, u32 val)
intel_uncore_write_fw(uncore, reg, val);
 }
 
+static void rps_timer(struct timer_list *t)
+{
+   struct intel_rps *rps = from_timer(rps, t, timer);
+   struct intel_engine_cs *engine;
+   enum intel_engine_id id;
+   ktime_t dt, last;
+   s64 max_busy = 0;
+
+   for_each_engine(engine, rps_to_gt(rps), id) {
+   dt = intel_engine_get_busy_time(engine);
+   last = engine->stats.rps;
+   engine->stats.rps = dt;
+
+   max_busy = max(max_busy, ktime_to_ns(ktime_sub(dt, last)));
+   }
+
+   dt = ktime_get();
+   last = rps->pm_timestamp;
+   rps->pm_timestamp = dt;
+
+   if (intel_rps_is_active(rps)) {
+   dt = ktime_sub(dt, last);
+
+   if (10 * max_busy > 9 * dt && /* >90% busy */
+   rps->cur_freq < rps->max_freq_softlimit) {
+   rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD;
+   rps->pm_interval = 1;
+   schedule_work(&rps->work);
+   } else if (4 * max_busy < 3 * dt && /* <75% busy */
+  rps->cur_freq > rps->min_freq_softlimit) {
+   rps->pm_iir |= GEN6_PM_RP_DOWN_THRESHOLD;
+   rps->pm_interval = 1;
+   schedule_work(&rps->work);
+   } else {
+   rps->last_adj = 0;
+   }
+
+   mod_timer(&rps->timer,
+ jiffies + msecs_to_jiffies(rps->pm_interval));
+   rps->pm_interval = min(rps->pm_interval + 1, 15u);
+   }
+}
+
+static void rps_start_timer(struct intel_rps *rps)
+{
+   rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp);
+   rps->pm_interval = 1;
+   mod_timer(&rps->timer, jiffies + 1);
+}
+
+static void rps_stop_timer(struct intel_rps *rps)
+{
+   del_timer_sync(&rps->timer);
+   rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp);
+}
+
 static u32 rps_pm_mask(struct intel_rps *rps, u8 val)
 {
u32 mask = 0;
@@ -742,8 +798,11 @@ void intel_rps_unpark(struct intel_rps *rps)
 
mutex_unlock(&rps->lock);
 
+   rps->pm_iir = 0;
if (intel_rps_has_interrupts(rps))
rps_enable_interrupts(rps);
+   if (intel_rps_uses_timer(rps))
+   rps_start_timer(rps);
 
if (IS_GEN(rps_to_i915(rps), 5))
gen5_rps_update(rps);
@@ -754,6 +813,8 @@ void intel_rps_park(struct intel_rps *rps)
if (!intel_rps_clear_active(rps))
return;
 
+   if (intel_rps_uses_timer(rps))
+   rps_stop_timer(rps);
if (intel_rps_has_interrupts(rps))
rps_disable_interrupts(rps);
 
@@ -1679,10 +17