Re: [Intel-gfx] [PATCH 2/2] drm/i915/pmu: Use GT parked for estimating RC6 while asleep

2019-08-05 Thread Chris Wilson
Quoting Chris Wilson (2019-08-05 10:43:33)
> As we track when we put the GT device to sleep upon idling, we can use
> that callback to sample the current rc6 counters and record the
> timestamp for estimating samples after that point while asleep.
> 
> v2: Stick to using ktime_t
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105010
> Signed-off-by: Chris Wilson 
> Cc: Tvrtko Ursulin 
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c |  21 +++--
>  drivers/gpu/drm/i915/i915_pmu.c | 120 ++--
>  drivers/gpu/drm/i915/i915_pmu.h |   4 +-
>  3 files changed, 69 insertions(+), 76 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
> b/drivers/gpu/drm/i915/i915_debugfs.c
> index 8953336f2ae5..bcb8081f564f 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -39,6 +39,7 @@
>  #include "display/intel_psr.h"
>  
>  #include "gem/i915_gem_context.h"
> +#include "gt/intel_gt_pm.h"
>  #include "gt/intel_reset.h"
>  #include "gt/uc/intel_guc_submission.h"
>  
> @@ -3996,13 +3997,11 @@ static int i915_sseu_status(struct seq_file *m, void 
> *unused)
>  static int i915_forcewake_open(struct inode *inode, struct file *file)
>  {
> struct drm_i915_private *i915 = inode->i_private;
> +   struct intel_gt *gt = >gt;
>  
> -   if (INTEL_GEN(i915) < 6)
> -   return 0;
> -
> -   file->private_data =
> -   (void *)(uintptr_t)intel_runtime_pm_get(>runtime_pm);
> -   intel_uncore_forcewake_user_get(>uncore);
> +   intel_gt_pm_get(gt);
> +   if (INTEL_GEN(i915) >= 6)
> +   intel_uncore_forcewake_user_get(gt->uncore);

And this interacts badly with the plan to use wait_for_idle. :|
So be it.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH 2/2] drm/i915/pmu: Use GT parked for estimating RC6 while asleep

2019-08-05 Thread Chris Wilson
As we track when we put the GT device to sleep upon idling, we can use
that callback to sample the current rc6 counters and record the
timestamp for estimating samples after that point while asleep.

v2: Stick to using ktime_t

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105010
Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/i915_debugfs.c |  21 +++--
 drivers/gpu/drm/i915/i915_pmu.c | 120 ++--
 drivers/gpu/drm/i915/i915_pmu.h |   4 +-
 3 files changed, 69 insertions(+), 76 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 8953336f2ae5..bcb8081f564f 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -39,6 +39,7 @@
 #include "display/intel_psr.h"
 
 #include "gem/i915_gem_context.h"
+#include "gt/intel_gt_pm.h"
 #include "gt/intel_reset.h"
 #include "gt/uc/intel_guc_submission.h"
 
@@ -3996,13 +3997,11 @@ static int i915_sseu_status(struct seq_file *m, void 
*unused)
 static int i915_forcewake_open(struct inode *inode, struct file *file)
 {
struct drm_i915_private *i915 = inode->i_private;
+   struct intel_gt *gt = >gt;
 
-   if (INTEL_GEN(i915) < 6)
-   return 0;
-
-   file->private_data =
-   (void *)(uintptr_t)intel_runtime_pm_get(>runtime_pm);
-   intel_uncore_forcewake_user_get(>uncore);
+   intel_gt_pm_get(gt);
+   if (INTEL_GEN(i915) >= 6)
+   intel_uncore_forcewake_user_get(gt->uncore);
 
return 0;
 }
@@ -4010,13 +4009,11 @@ static int i915_forcewake_open(struct inode *inode, 
struct file *file)
 static int i915_forcewake_release(struct inode *inode, struct file *file)
 {
struct drm_i915_private *i915 = inode->i_private;
+   struct intel_gt *gt = >gt;
 
-   if (INTEL_GEN(i915) < 6)
-   return 0;
-
-   intel_uncore_forcewake_user_put(>uncore);
-   intel_runtime_pm_put(>runtime_pm,
-(intel_wakeref_t)(uintptr_t)file->private_data);
+   if (INTEL_GEN(i915) >= 6)
+   intel_uncore_forcewake_user_put(>uncore);
+   intel_gt_pm_put(gt);
 
return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index c2e5f6d5c1e0..61d4fa99e413 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -115,19 +115,51 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool 
gpu_active)
return enable;
 }
 
+static u64 __get_rc6(struct intel_gt *gt)
+{
+   struct drm_i915_private *i915 = gt->i915;
+   u64 val;
+
+   val = intel_rc6_residency_ns(i915,
+IS_VALLEYVIEW(i915) ?
+VLV_GT_RENDER_RC6 :
+GEN6_GT_GFX_RC6);
+
+   if (HAS_RC6p(i915))
+   val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
+
+   if (HAS_RC6pp(i915))
+   val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
+
+   return val;
+}
+
 void i915_pmu_gt_parked(struct drm_i915_private *i915)
 {
struct i915_pmu *pmu = >pmu;
+   u64 val;
 
if (!pmu->base.event_init)
return;
 
spin_lock_irq(>lock);
+
+   val = 0;
+   if (pmu->sample[__I915_SAMPLE_RC6].cur)
+   val = __get_rc6(>gt);
+
+   if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
+   pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
+   pmu->sample[__I915_SAMPLE_RC6].cur = val;
+   }
+   pmu->sleep_last = ktime_get();
+
/*
 * Signal sampling timer to stop if only engine events are enabled and
 * GPU went idle.
 */
pmu->timer_enabled = pmu_needs_timer(pmu, false);
+
spin_unlock_irq(>lock);
 }
 
@@ -142,6 +174,11 @@ static void __i915_pmu_maybe_start_timer(struct i915_pmu 
*pmu)
}
 }
 
+static inline s64 ktime_since(const ktime_t kt)
+{
+   return ktime_to_ns(ktime_sub(ktime_get(), kt));
+}
+
 void i915_pmu_gt_unparked(struct drm_i915_private *i915)
 {
struct i915_pmu *pmu = >pmu;
@@ -150,10 +187,22 @@ void i915_pmu_gt_unparked(struct drm_i915_private *i915)
return;
 
spin_lock_irq(>lock);
+
/*
 * Re-enable sampling timer when GPU goes active.
 */
__i915_pmu_maybe_start_timer(pmu);
+
+   /* Estimate how long we slept and accumulate that into rc6 counters */
+   if (pmu->sample[__I915_SAMPLE_RC6].cur) {
+   u64 val;
+
+   val = ktime_since(pmu->sleep_last);
+   val += pmu->sample[__I915_SAMPLE_RC6].cur;
+
+   pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
+   }
+
spin_unlock_irq(>lock);
 }
 
@@ -425,39 +474,18 @@ static int i915_pmu_event_init(struct perf_event *event)
return 0;
 }
 
-static u64 __get_rc6(struct intel_gt *gt)
-{
-   struct 

Re: [Intel-gfx] [PATCH 2/2] drm/i915/pmu: Use GT parked for estimating RC6 while asleep

2019-08-02 Thread Chris Wilson
Quoting Tvrtko Ursulin (2019-08-02 09:41:26)
> 
> On 01/08/2019 19:26, Chris Wilson wrote:
> > As we track when we put the GT device to sleep upon idling, we can use
> > that callback to sample the current rc6 counters and record the
> > timestamp for estimating samples after that point while asleep.
> > 
> > Signed-off-by: Chris Wilson 
> > Cc: Tvrtko Ursulin 
> > ---
> >   drivers/gpu/drm/i915/i915_debugfs.c |  21 ++---
> >   drivers/gpu/drm/i915/i915_pmu.c | 122 ++--
> >   drivers/gpu/drm/i915/i915_pmu.h |   4 +-
> >   3 files changed, 71 insertions(+), 76 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
> > b/drivers/gpu/drm/i915/i915_debugfs.c
> > index 24787bb48c9f..a96e630d3f86 100644
> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> > @@ -39,6 +39,7 @@
> >   #include "display/intel_psr.h"
> >   
> >   #include "gem/i915_gem_context.h"
> > +#include "gt/intel_gt_pm.h"
> >   #include "gt/intel_reset.h"
> >   #include "gt/uc/intel_guc_submission.h"
> >   
> > @@ -4057,13 +4058,11 @@ static int i915_sseu_status(struct seq_file *m, 
> > void *unused)
> >   static int i915_forcewake_open(struct inode *inode, struct file *file)
> >   {
> >   struct drm_i915_private *i915 = inode->i_private;
> > + struct intel_gt *gt = >gt;
> >   
> > - if (INTEL_GEN(i915) < 6)
> > - return 0;
> > -
> > - file->private_data =
> > - (void *)(uintptr_t)intel_runtime_pm_get(>runtime_pm);
> > - intel_uncore_forcewake_user_get(>uncore);
> > + intel_gt_pm_get(gt);
> > + if (INTEL_GEN(i915) >= 6)
> > + intel_uncore_forcewake_user_get(gt->uncore);
> >   
> >   return 0;
> >   }
> > @@ -4071,13 +4070,11 @@ static int i915_forcewake_open(struct inode *inode, 
> > struct file *file)
> >   static int i915_forcewake_release(struct inode *inode, struct file *file)
> >   {
> >   struct drm_i915_private *i915 = inode->i_private;
> > + struct intel_gt *gt = >gt;
> >   
> > - if (INTEL_GEN(i915) < 6)
> > - return 0;
> > -
> > - intel_uncore_forcewake_user_put(>uncore);
> > - intel_runtime_pm_put(>runtime_pm,
> > -  (intel_wakeref_t)(uintptr_t)file->private_data);
> > + if (INTEL_GEN(i915) >= 6)
> > + intel_uncore_forcewake_user_put(>uncore);
> > + intel_gt_pm_put(gt);
> >   
> >   return 0;
> >   }
> > diff --git a/drivers/gpu/drm/i915/i915_pmu.c 
> > b/drivers/gpu/drm/i915/i915_pmu.c
> > index 4d7cabeea687..680618bd385c 100644
> > --- a/drivers/gpu/drm/i915/i915_pmu.c
> > +++ b/drivers/gpu/drm/i915/i915_pmu.c
> > @@ -114,17 +114,50 @@ static bool pmu_needs_timer(struct drm_i915_private 
> > *i915, bool gpu_active)
> >   return enable;
> >   }
> >   
> > +static u64 __get_rc6(struct intel_gt *gt)
> > +{
> > + struct drm_i915_private *i915 = gt->i915;
> > + u64 val;
> > +
> > + val = intel_rc6_residency_ns(i915,
> > +  IS_VALLEYVIEW(i915) ?
> > +  VLV_GT_RENDER_RC6 :
> > +  GEN6_GT_GFX_RC6);
> > +
> > + if (HAS_RC6p(i915))
> > + val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
> > +
> > + if (HAS_RC6pp(i915))
> > + val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
> > +
> > + return val;
> > +}
> > +
> >   void i915_pmu_gt_parked(struct drm_i915_private *i915)
> >   {
> > + u64 val;
> > +
> >   if (!i915->pmu.base.event_init)
> >   return;
> >   
> > + val = 0;
> > + if (i915->pmu.sample[__I915_SAMPLE_RC6].cur)
> > + val = __get_rc6(>gt);
> 
> The conditional could be racy outside the lock. If a parallel perf 
> reader updates .cur from zero to non-zero the house keep below would see 
> val as zero. Perhaps you can store val = __get_rc6 outside the lock, and 
> then decide which val to use inside the lock?

I don't think it matters tbh if we regard the pmu as being off and it is
switched on as we park as it doesn't affect the estimation.

It's inside the lock on one branch, so no real excuse not to do so here.

> >   spin_lock_irq(>pmu.lock);
> > +
> > + if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
> > + i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
> > + i915->pmu.sample[__I915_SAMPLE_RC6].cur = val;
> > + }
> > + i915->pmu.sleep_timestamp = jiffies;
> 
> ktime would be better I think. More precision but just why use archaic 
> jiffies.

It's only a coarse estimate, but your wish is my command :-)

> > -static u64 get_rc6(struct drm_i915_private *i915)
> > -{
> > -#if IS_ENABLED(CONFIG_PM)
> 
> We still end up with never getting into estimation mode, even when 
> parked, right? Hm.. why I added this.. never mind.

As we were using pm_runtime, we depended upon its structs.

> > - struct intel_runtime_pm *rpm = >runtime_pm;
> > -

Re: [Intel-gfx] [PATCH 2/2] drm/i915/pmu: Use GT parked for estimating RC6 while asleep

2019-08-02 Thread Tvrtko Ursulin


On 01/08/2019 19:26, Chris Wilson wrote:

As we track when we put the GT device to sleep upon idling, we can use
that callback to sample the current rc6 counters and record the
timestamp for estimating samples after that point while asleep.

Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
  drivers/gpu/drm/i915/i915_debugfs.c |  21 ++---
  drivers/gpu/drm/i915/i915_pmu.c | 122 ++--
  drivers/gpu/drm/i915/i915_pmu.h |   4 +-
  3 files changed, 71 insertions(+), 76 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 24787bb48c9f..a96e630d3f86 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -39,6 +39,7 @@
  #include "display/intel_psr.h"
  
  #include "gem/i915_gem_context.h"

+#include "gt/intel_gt_pm.h"
  #include "gt/intel_reset.h"
  #include "gt/uc/intel_guc_submission.h"
  
@@ -4057,13 +4058,11 @@ static int i915_sseu_status(struct seq_file *m, void *unused)

  static int i915_forcewake_open(struct inode *inode, struct file *file)
  {
struct drm_i915_private *i915 = inode->i_private;
+   struct intel_gt *gt = >gt;
  
-	if (INTEL_GEN(i915) < 6)

-   return 0;
-
-   file->private_data =
-   (void *)(uintptr_t)intel_runtime_pm_get(>runtime_pm);
-   intel_uncore_forcewake_user_get(>uncore);
+   intel_gt_pm_get(gt);
+   if (INTEL_GEN(i915) >= 6)
+   intel_uncore_forcewake_user_get(gt->uncore);
  
  	return 0;

  }
@@ -4071,13 +4070,11 @@ static int i915_forcewake_open(struct inode *inode, 
struct file *file)
  static int i915_forcewake_release(struct inode *inode, struct file *file)
  {
struct drm_i915_private *i915 = inode->i_private;
+   struct intel_gt *gt = >gt;
  
-	if (INTEL_GEN(i915) < 6)

-   return 0;
-
-   intel_uncore_forcewake_user_put(>uncore);
-   intel_runtime_pm_put(>runtime_pm,
-(intel_wakeref_t)(uintptr_t)file->private_data);
+   if (INTEL_GEN(i915) >= 6)
+   intel_uncore_forcewake_user_put(>uncore);
+   intel_gt_pm_put(gt);
  
  	return 0;

  }
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 4d7cabeea687..680618bd385c 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -114,17 +114,50 @@ static bool pmu_needs_timer(struct drm_i915_private 
*i915, bool gpu_active)
return enable;
  }
  
+static u64 __get_rc6(struct intel_gt *gt)

+{
+   struct drm_i915_private *i915 = gt->i915;
+   u64 val;
+
+   val = intel_rc6_residency_ns(i915,
+IS_VALLEYVIEW(i915) ?
+VLV_GT_RENDER_RC6 :
+GEN6_GT_GFX_RC6);
+
+   if (HAS_RC6p(i915))
+   val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
+
+   if (HAS_RC6pp(i915))
+   val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
+
+   return val;
+}
+
  void i915_pmu_gt_parked(struct drm_i915_private *i915)
  {
+   u64 val;
+
if (!i915->pmu.base.event_init)
return;
  
+	val = 0;

+   if (i915->pmu.sample[__I915_SAMPLE_RC6].cur)
+   val = __get_rc6(>gt);


The conditional could be racy outside the lock. If a parallel perf 
reader updates .cur from zero to non-zero the house keep below would see 
val as zero. Perhaps you can store val = __get_rc6 outside the lock, and 
then decide which val to use inside the lock?



+
spin_lock_irq(>pmu.lock);
+
+   if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
+   i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
+   i915->pmu.sample[__I915_SAMPLE_RC6].cur = val;
+   }
+   i915->pmu.sleep_timestamp = jiffies;


ktime would be better I think. More precision but just why use archaic 
jiffies.



+
/*
 * Signal sampling timer to stop if only engine events are enabled and
 * GPU went idle.
 */
i915->pmu.timer_enabled = pmu_needs_timer(i915, false);
+
spin_unlock_irq(>pmu.lock);
  }
  
@@ -145,10 +178,23 @@ void i915_pmu_gt_unparked(struct drm_i915_private *i915)

return;
  
  	spin_lock_irq(>pmu.lock);

+
/*
 * Re-enable sampling timer when GPU goes active.
 */
__i915_pmu_maybe_start_timer(i915);
+
+   /* Estimate how long we slept and accumulate that into rc6 counters */
+   if (i915->pmu.sample[__I915_SAMPLE_RC6].cur) {
+   u64 val;
+
+   val = jiffies - i915->pmu.sleep_timestamp;
+   val = jiffies_to_nsecs(val);
+   val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
+
+   i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
+   }
+
spin_unlock_irq(>pmu.lock);
  }
  
@@ -417,36 +463,17 @@ static int i915_pmu_event_init(struct 

Re: [Intel-gfx] [PATCH 2/2] drm/i915/pmu: Use GT parked for estimating RC6 while asleep

2019-08-01 Thread Chris Wilson
Quoting Chris Wilson (2019-08-01 19:26:57)
> As we track when we put the GT device to sleep upon idling, we can use
> that callback to sample the current rc6 counters and record the
> timestamp for estimating samples after that point while asleep.
> 

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105010
> Signed-off-by: Chris Wilson 
> Cc: Tvrtko Ursulin 
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH 2/2] drm/i915/pmu: Use GT parked for estimating RC6 while asleep

2019-08-01 Thread Chris Wilson
As we track when we put the GT device to sleep upon idling, we can use
that callback to sample the current rc6 counters and record the
timestamp for estimating samples after that point while asleep.

Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/i915_debugfs.c |  21 ++---
 drivers/gpu/drm/i915/i915_pmu.c | 122 ++--
 drivers/gpu/drm/i915/i915_pmu.h |   4 +-
 3 files changed, 71 insertions(+), 76 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 24787bb48c9f..a96e630d3f86 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -39,6 +39,7 @@
 #include "display/intel_psr.h"
 
 #include "gem/i915_gem_context.h"
+#include "gt/intel_gt_pm.h"
 #include "gt/intel_reset.h"
 #include "gt/uc/intel_guc_submission.h"
 
@@ -4057,13 +4058,11 @@ static int i915_sseu_status(struct seq_file *m, void 
*unused)
 static int i915_forcewake_open(struct inode *inode, struct file *file)
 {
struct drm_i915_private *i915 = inode->i_private;
+   struct intel_gt *gt = >gt;
 
-   if (INTEL_GEN(i915) < 6)
-   return 0;
-
-   file->private_data =
-   (void *)(uintptr_t)intel_runtime_pm_get(>runtime_pm);
-   intel_uncore_forcewake_user_get(>uncore);
+   intel_gt_pm_get(gt);
+   if (INTEL_GEN(i915) >= 6)
+   intel_uncore_forcewake_user_get(gt->uncore);
 
return 0;
 }
@@ -4071,13 +4070,11 @@ static int i915_forcewake_open(struct inode *inode, 
struct file *file)
 static int i915_forcewake_release(struct inode *inode, struct file *file)
 {
struct drm_i915_private *i915 = inode->i_private;
+   struct intel_gt *gt = >gt;
 
-   if (INTEL_GEN(i915) < 6)
-   return 0;
-
-   intel_uncore_forcewake_user_put(>uncore);
-   intel_runtime_pm_put(>runtime_pm,
-(intel_wakeref_t)(uintptr_t)file->private_data);
+   if (INTEL_GEN(i915) >= 6)
+   intel_uncore_forcewake_user_put(>uncore);
+   intel_gt_pm_put(gt);
 
return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 4d7cabeea687..680618bd385c 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -114,17 +114,50 @@ static bool pmu_needs_timer(struct drm_i915_private 
*i915, bool gpu_active)
return enable;
 }
 
+static u64 __get_rc6(struct intel_gt *gt)
+{
+   struct drm_i915_private *i915 = gt->i915;
+   u64 val;
+
+   val = intel_rc6_residency_ns(i915,
+IS_VALLEYVIEW(i915) ?
+VLV_GT_RENDER_RC6 :
+GEN6_GT_GFX_RC6);
+
+   if (HAS_RC6p(i915))
+   val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
+
+   if (HAS_RC6pp(i915))
+   val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
+
+   return val;
+}
+
 void i915_pmu_gt_parked(struct drm_i915_private *i915)
 {
+   u64 val;
+
if (!i915->pmu.base.event_init)
return;
 
+   val = 0;
+   if (i915->pmu.sample[__I915_SAMPLE_RC6].cur)
+   val = __get_rc6(>gt);
+
spin_lock_irq(>pmu.lock);
+
+   if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
+   i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
+   i915->pmu.sample[__I915_SAMPLE_RC6].cur = val;
+   }
+   i915->pmu.sleep_timestamp = jiffies;
+
/*
 * Signal sampling timer to stop if only engine events are enabled and
 * GPU went idle.
 */
i915->pmu.timer_enabled = pmu_needs_timer(i915, false);
+
spin_unlock_irq(>pmu.lock);
 }
 
@@ -145,10 +178,23 @@ void i915_pmu_gt_unparked(struct drm_i915_private *i915)
return;
 
spin_lock_irq(>pmu.lock);
+
/*
 * Re-enable sampling timer when GPU goes active.
 */
__i915_pmu_maybe_start_timer(i915);
+
+   /* Estimate how long we slept and accumulate that into rc6 counters */
+   if (i915->pmu.sample[__I915_SAMPLE_RC6].cur) {
+   u64 val;
+
+   val = jiffies - i915->pmu.sleep_timestamp;
+   val = jiffies_to_nsecs(val);
+   val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
+
+   i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
+   }
+
spin_unlock_irq(>pmu.lock);
 }
 
@@ -417,36 +463,17 @@ static int i915_pmu_event_init(struct perf_event *event)
return 0;
 }
 
-static u64 __get_rc6(struct drm_i915_private *i915)
+static u64 get_rc6(struct intel_gt *gt)
 {
-   u64 val;
-
-   val = intel_rc6_residency_ns(i915,
-IS_VALLEYVIEW(i915) ?
-VLV_GT_RENDER_RC6 :
-GEN6_GT_GFX_RC6);
-
-   if (HAS_RC6p(i915))
-