Re: [Intel-gfx] [PATCH v2] drm/i915/pmu: Fix sleep under atomic in RC6 readout

2018-02-07 Thread Rafael J. Wysocki

On 2/6/2018 10:54 PM, Imre Deak wrote:

Hi Rafael,

On Tue, Feb 06, 2018 at 09:11:02PM +, Chris Wilson wrote:

Quoting Tvrtko Ursulin (2018-02-06 18:33:11)

From: Tvrtko Ursulin 

We are not allowed to call intel_runtime_pm_get from the PMU counter read
callback since the former can sleep, and the latter is running under IRQ
context.

To workaround this, we record the last known RC6 and while runtime
suspended estimate its increase by querying the runtime PM core
timestamps.

Downside of this approach is that we can temporarily lose a chunk of RC6
time, from the last PMU read-out to runtime suspend entry, but that will
eventually catch up, once device comes back online and in the presence of
PMU queries.

Also, we have to be careful not to overshoot the RC6 estimate, so once
resumed after a period of approximation, we only update the counter once
it catches up. With the observation that RC6 is increasing while the
device is suspended, this should not pose a problem and can only cause
slight inaccuracies due clock base differences.

v2: Simplify by estimating on top of PM core counters. (Imre)

Signed-off-by: Tvrtko Ursulin 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104943
Fixes: 6060b6aec03c ("drm/i915/pmu: Add RC6 residency metrics")
Testcase: igt/perf_pmu/rc6-runtime-pm
Cc: Tvrtko Ursulin 
Cc: Chris Wilson 
Cc: Imre Deak 
Cc: Jani Nikula 
Cc: Joonas Lahtinen 
Cc: Rodrigo Vivi 
Cc: David Airlie 
Cc: intel-gfx@lists.freedesktop.org
Cc: dri-de...@lists.freedesktop.org
---
  drivers/gpu/drm/i915/i915_pmu.c | 93 ++---
  drivers/gpu/drm/i915/i915_pmu.h |  6 +++
  2 files changed, 84 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 1c440460255d..bfc402d47609 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -415,7 +415,81 @@ static int i915_pmu_event_init(struct perf_event *event)
 return 0;
  }
  
-static u64 __i915_pmu_event_read(struct perf_event *event)

+static u64 get_rc6(struct drm_i915_private *i915, bool locked)
+{
+   unsigned long flags;
+   u64 val;
+
+   if (intel_runtime_pm_get_if_in_use(i915)) {
+   val = intel_rc6_residency_ns(i915, IS_VALLEYVIEW(i915) ?
+  VLV_GT_RENDER_RC6 :
+  GEN6_GT_GFX_RC6);
+
+   if (HAS_RC6p(i915))
+   val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
+
+   if (HAS_RC6pp(i915))
+   val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
+
+   intel_runtime_pm_put(i915);
+
+   /*
+* If we are coming back from being runtime suspended we must
+* be careful not to report a larger value than returned
+* previously.
+*/
+
+   if (!locked)
+   spin_lock_irqsave(&i915->pmu.lock, flags);
+
+   if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
+   i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
+   i915->pmu.sample[__I915_SAMPLE_RC6].cur = val;
+   } else {
+   val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
+   }
+
+   if (!locked)
+   spin_unlock_irqrestore(&i915->pmu.lock, flags);
+   } else {
+   struct pci_dev *pdev = i915->drm.pdev;
+   struct device *kdev = &pdev->dev;
+   unsigned long flags2;
+
+   /*
+* We are runtime suspended.
+*
+* Report the delta from when the device was suspended to now,
+* on top of the last known real value, as the approximated RC6
+* counter value.
+*/
+   if (!locked)
+   spin_lock_irqsave(&i915->pmu.lock, flags);
+
+   spin_lock_irqsave(&kdev->power.lock, flags2);
+
+   if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
+   i915->pmu.suspended_jiffies_last =
+   kdev->power.suspended_jiffies;
+
+   val = kdev->power.suspended_jiffies -
+ i915->pmu.suspended_jiffies_last;
+   val += jiffies - kdev->power.accounting_timestamp;
+
+   spin_unlock_irqrestore(&kdev->power.lock, flags2);
+
+   val = jiffies_to_nsecs(val);
+   val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
+   i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
+
+   if (!locked)
+   spin_unlock_irqrestore(&i915->pmu.lock, flags);
+   }
+
+   return val;
+}

I feel slightly dirty, but the dance checks out.

Would it be possible to add an RPM helper that provide

Re: [Intel-gfx] [PATCH v2] drm/i915/pmu: Fix sleep under atomic in RC6 readout

2018-02-07 Thread Chris Wilson
Quoting Tvrtko Ursulin (2018-02-07 09:20:27)
> 
> On 06/02/2018 21:54, Imre Deak wrote:
> > Hi Rafael,
> > 
> > On Tue, Feb 06, 2018 at 09:11:02PM +, Chris Wilson wrote:
> >> Quoting Tvrtko Ursulin (2018-02-06 18:33:11)
> >>> From: Tvrtko Ursulin 
> >>>
> >>> We are not allowed to call intel_runtime_pm_get from the PMU counter read
> >>> callback since the former can sleep, and the latter is running under IRQ
> >>> context.
> >>>
> >>> To workaround this, we record the last known RC6 and while runtime
> >>> suspended estimate its increase by querying the runtime PM core
> >>> timestamps.
> >>>
> >>> Downside of this approach is that we can temporarily lose a chunk of RC6
> >>> time, from the last PMU read-out to runtime suspend entry, but that will
> >>> eventually catch up, once device comes back online and in the presence of
> >>> PMU queries.
> >>>
> >>> Also, we have to be careful not to overshoot the RC6 estimate, so once
> >>> resumed after a period of approximation, we only update the counter once
> >>> it catches up. With the observation that RC6 is increasing while the
> >>> device is suspended, this should not pose a problem and can only cause
> >>> slight inaccuracies due clock base differences.
> >>>
> >>> v2: Simplify by estimating on top of PM core counters. (Imre)
> >>>
> >>> Signed-off-by: Tvrtko Ursulin 
> >>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104943
> >>> Fixes: 6060b6aec03c ("drm/i915/pmu: Add RC6 residency metrics")
> >>> Testcase: igt/perf_pmu/rc6-runtime-pm
> >>> Cc: Tvrtko Ursulin 
> >>> Cc: Chris Wilson 
> >>> Cc: Imre Deak 
> >>> Cc: Jani Nikula 
> >>> Cc: Joonas Lahtinen 
> >>> Cc: Rodrigo Vivi 
> >>> Cc: David Airlie 
> >>> Cc: intel-gfx@lists.freedesktop.org
> >>> Cc: dri-de...@lists.freedesktop.org
> >>> ---
> >>>   drivers/gpu/drm/i915/i915_pmu.c | 93 
> >>> ++---
> >>>   drivers/gpu/drm/i915/i915_pmu.h |  6 +++
> >>>   2 files changed, 84 insertions(+), 15 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/i915/i915_pmu.c 
> >>> b/drivers/gpu/drm/i915/i915_pmu.c
> >>> index 1c440460255d..bfc402d47609 100644
> >>> --- a/drivers/gpu/drm/i915/i915_pmu.c
> >>> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> >>> @@ -415,7 +415,81 @@ static int i915_pmu_event_init(struct perf_event 
> >>> *event)
> >>>  return 0;
> >>>   }
> >>>   
> >>> -static u64 __i915_pmu_event_read(struct perf_event *event)
> >>> +static u64 get_rc6(struct drm_i915_private *i915, bool locked)
> >>> +{
> >>> +   unsigned long flags;
> >>> +   u64 val;
> >>> +
> >>> +   if (intel_runtime_pm_get_if_in_use(i915)) {
> >>> +   val = intel_rc6_residency_ns(i915, IS_VALLEYVIEW(i915) ?
> >>> +  VLV_GT_RENDER_RC6 :
> >>> +  GEN6_GT_GFX_RC6);
> >>> +
> >>> +   if (HAS_RC6p(i915))
> >>> +   val += intel_rc6_residency_ns(i915, 
> >>> GEN6_GT_GFX_RC6p);
> >>> +
> >>> +   if (HAS_RC6pp(i915))
> >>> +   val += intel_rc6_residency_ns(i915, 
> >>> GEN6_GT_GFX_RC6pp);
> >>> +
> >>> +   intel_runtime_pm_put(i915);
> >>> +
> >>> +   /*
> >>> +* If we are coming back from being runtime suspended we 
> >>> must
> >>> +* be careful not to report a larger value than returned
> >>> +* previously.
> >>> +*/
> >>> +
> >>> +   if (!locked)
> >>> +   spin_lock_irqsave(&i915->pmu.lock, flags);
> >>> +
> >>> +   if (val >= 
> >>> i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
> >>> +   i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur 
> >>> = 0;
> >>> +   i915->pmu.sample[__I915_SAMPLE_RC6].cur = val;
> >>> +   } else {
> >>> +   val = 
> >>> i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
> >>> +   }
> >>> +
> >>> +   if (!locked)
> >>> +   spin_unlock_irqrestore(&i915->pmu.lock, flags);
> >>> +   } else {
> >>> +   struct pci_dev *pdev = i915->drm.pdev;
> >>> +   struct device *kdev = &pdev->dev;
> >>> +   unsigned long flags2;
> >>> +
> >>> +   /*
> >>> +* We are runtime suspended.
> >>> +*
> >>> +* Report the delta from when the device was suspended to 
> >>> now,
> >>> +* on top of the last known real value, as the 
> >>> approximated RC6
> >>> +* counter value.
> >>> +*/
> >>> +   if (!locked)
> >>> +   spin_lock_irqsave(&i915->pmu.lock, flags);
> >>> +
> >>> +   spin_lock_irqsave(&kdev->power.lock, flags2);
> >>> +
> >>> +   if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
> >>> +   i915->pmu.suspended_jiffies_

Re: [Intel-gfx] [PATCH v2] drm/i915/pmu: Fix sleep under atomic in RC6 readout

2018-02-07 Thread Tvrtko Ursulin


On 06/02/2018 21:54, Imre Deak wrote:

Hi Rafael,

On Tue, Feb 06, 2018 at 09:11:02PM +, Chris Wilson wrote:

Quoting Tvrtko Ursulin (2018-02-06 18:33:11)

From: Tvrtko Ursulin 

We are not allowed to call intel_runtime_pm_get from the PMU counter read
callback since the former can sleep, and the latter is running under IRQ
context.

To workaround this, we record the last known RC6 and while runtime
suspended estimate its increase by querying the runtime PM core
timestamps.

Downside of this approach is that we can temporarily lose a chunk of RC6
time, from the last PMU read-out to runtime suspend entry, but that will
eventually catch up, once device comes back online and in the presence of
PMU queries.

Also, we have to be careful not to overshoot the RC6 estimate, so once
resumed after a period of approximation, we only update the counter once
it catches up. With the observation that RC6 is increasing while the
device is suspended, this should not pose a problem and can only cause
slight inaccuracies due clock base differences.

v2: Simplify by estimating on top of PM core counters. (Imre)

Signed-off-by: Tvrtko Ursulin 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104943
Fixes: 6060b6aec03c ("drm/i915/pmu: Add RC6 residency metrics")
Testcase: igt/perf_pmu/rc6-runtime-pm
Cc: Tvrtko Ursulin 
Cc: Chris Wilson 
Cc: Imre Deak 
Cc: Jani Nikula 
Cc: Joonas Lahtinen 
Cc: Rodrigo Vivi 
Cc: David Airlie 
Cc: intel-gfx@lists.freedesktop.org
Cc: dri-de...@lists.freedesktop.org
---
  drivers/gpu/drm/i915/i915_pmu.c | 93 ++---
  drivers/gpu/drm/i915/i915_pmu.h |  6 +++
  2 files changed, 84 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 1c440460255d..bfc402d47609 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -415,7 +415,81 @@ static int i915_pmu_event_init(struct perf_event *event)
 return 0;
  }
  
-static u64 __i915_pmu_event_read(struct perf_event *event)

+static u64 get_rc6(struct drm_i915_private *i915, bool locked)
+{
+   unsigned long flags;
+   u64 val;
+
+   if (intel_runtime_pm_get_if_in_use(i915)) {
+   val = intel_rc6_residency_ns(i915, IS_VALLEYVIEW(i915) ?
+  VLV_GT_RENDER_RC6 :
+  GEN6_GT_GFX_RC6);
+
+   if (HAS_RC6p(i915))
+   val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
+
+   if (HAS_RC6pp(i915))
+   val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
+
+   intel_runtime_pm_put(i915);
+
+   /*
+* If we are coming back from being runtime suspended we must
+* be careful not to report a larger value than returned
+* previously.
+*/
+
+   if (!locked)
+   spin_lock_irqsave(&i915->pmu.lock, flags);
+
+   if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
+   i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
+   i915->pmu.sample[__I915_SAMPLE_RC6].cur = val;
+   } else {
+   val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
+   }
+
+   if (!locked)
+   spin_unlock_irqrestore(&i915->pmu.lock, flags);
+   } else {
+   struct pci_dev *pdev = i915->drm.pdev;
+   struct device *kdev = &pdev->dev;
+   unsigned long flags2;
+
+   /*
+* We are runtime suspended.
+*
+* Report the delta from when the device was suspended to now,
+* on top of the last known real value, as the approximated RC6
+* counter value.
+*/
+   if (!locked)
+   spin_lock_irqsave(&i915->pmu.lock, flags);
+
+   spin_lock_irqsave(&kdev->power.lock, flags2);
+
+   if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
+   i915->pmu.suspended_jiffies_last =
+   kdev->power.suspended_jiffies;
+
+   val = kdev->power.suspended_jiffies -
+ i915->pmu.suspended_jiffies_last;
+   val += jiffies - kdev->power.accounting_timestamp;
+
+   spin_unlock_irqrestore(&kdev->power.lock, flags2);
+
+   val = jiffies_to_nsecs(val);
+   val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
+   i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
+
+   if (!locked)
+   spin_unlock_irqrestore(&i915->pmu.lock, flags);
+   }
+
+   return val;
+}


I feel slightly dirty, but the dance checks out.


Would it be possible to add an RPM helper that provi

Re: [Intel-gfx] [PATCH v2] drm/i915/pmu: Fix sleep under atomic in RC6 readout

2018-02-06 Thread Imre Deak
Hi Rafael,

On Tue, Feb 06, 2018 at 09:11:02PM +, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-02-06 18:33:11)
> > From: Tvrtko Ursulin 
> > 
> > We are not allowed to call intel_runtime_pm_get from the PMU counter read
> > callback since the former can sleep, and the latter is running under IRQ
> > context.
> > 
> > To workaround this, we record the last known RC6 and while runtime
> > suspended estimate its increase by querying the runtime PM core
> > timestamps.
> > 
> > Downside of this approach is that we can temporarily lose a chunk of RC6
> > time, from the last PMU read-out to runtime suspend entry, but that will
> > eventually catch up, once device comes back online and in the presence of
> > PMU queries.
> > 
> > Also, we have to be careful not to overshoot the RC6 estimate, so once
> > resumed after a period of approximation, we only update the counter once
> > it catches up. With the observation that RC6 is increasing while the
> > device is suspended, this should not pose a problem and can only cause
> > slight inaccuracies due clock base differences.
> > 
> > v2: Simplify by estimating on top of PM core counters. (Imre)
> > 
> > Signed-off-by: Tvrtko Ursulin 
> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104943
> > Fixes: 6060b6aec03c ("drm/i915/pmu: Add RC6 residency metrics")
> > Testcase: igt/perf_pmu/rc6-runtime-pm
> > Cc: Tvrtko Ursulin 
> > Cc: Chris Wilson 
> > Cc: Imre Deak 
> > Cc: Jani Nikula 
> > Cc: Joonas Lahtinen 
> > Cc: Rodrigo Vivi 
> > Cc: David Airlie 
> > Cc: intel-gfx@lists.freedesktop.org
> > Cc: dri-de...@lists.freedesktop.org
> > ---
> >  drivers/gpu/drm/i915/i915_pmu.c | 93 
> > ++---
> >  drivers/gpu/drm/i915/i915_pmu.h |  6 +++
> >  2 files changed, 84 insertions(+), 15 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_pmu.c 
> > b/drivers/gpu/drm/i915/i915_pmu.c
> > index 1c440460255d..bfc402d47609 100644
> > --- a/drivers/gpu/drm/i915/i915_pmu.c
> > +++ b/drivers/gpu/drm/i915/i915_pmu.c
> > @@ -415,7 +415,81 @@ static int i915_pmu_event_init(struct perf_event 
> > *event)
> > return 0;
> >  }
> >  
> > -static u64 __i915_pmu_event_read(struct perf_event *event)
> > +static u64 get_rc6(struct drm_i915_private *i915, bool locked)
> > +{
> > +   unsigned long flags;
> > +   u64 val;
> > +
> > +   if (intel_runtime_pm_get_if_in_use(i915)) {
> > +   val = intel_rc6_residency_ns(i915, IS_VALLEYVIEW(i915) ?
> > +  VLV_GT_RENDER_RC6 :
> > +  GEN6_GT_GFX_RC6);
> > +
> > +   if (HAS_RC6p(i915))
> > +   val += intel_rc6_residency_ns(i915, 
> > GEN6_GT_GFX_RC6p);
> > +
> > +   if (HAS_RC6pp(i915))
> > +   val += intel_rc6_residency_ns(i915, 
> > GEN6_GT_GFX_RC6pp);
> > +
> > +   intel_runtime_pm_put(i915);
> > +
> > +   /*
> > +* If we are coming back from being runtime suspended we 
> > must
> > +* be careful not to report a larger value than returned
> > +* previously.
> > +*/
> > +
> > +   if (!locked)
> > +   spin_lock_irqsave(&i915->pmu.lock, flags);
> > +
> > +   if (val >= 
> > i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
> > +   i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 
> > 0;
> > +   i915->pmu.sample[__I915_SAMPLE_RC6].cur = val;
> > +   } else {
> > +   val = 
> > i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
> > +   }
> > +
> > +   if (!locked)
> > +   spin_unlock_irqrestore(&i915->pmu.lock, flags);
> > +   } else {
> > +   struct pci_dev *pdev = i915->drm.pdev;
> > +   struct device *kdev = &pdev->dev;
> > +   unsigned long flags2;
> > +
> > +   /*
> > +* We are runtime suspended.
> > +*
> > +* Report the delta from when the device was suspended to 
> > now,
> > +* on top of the last known real value, as the approximated 
> > RC6
> > +* counter value.
> > +*/
> > +   if (!locked)
> > +   spin_lock_irqsave(&i915->pmu.lock, flags);
> > +
> > +   spin_lock_irqsave(&kdev->power.lock, flags2);
> > +
> > +   if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
> > +   i915->pmu.suspended_jiffies_last =
> > +   
> > kdev->power.suspended_jiffies;
> > +
> > +   val = kdev->power.suspended_jiffies -
> > + i915->pmu.suspended_jiffies_last;
> > +   val += jiffies - kdev->power.accounting_timestamp;
> > +
> > +   spin_unlock_irqres

Re: [Intel-gfx] [PATCH v2] drm/i915/pmu: Fix sleep under atomic in RC6 readout

2018-02-06 Thread Chris Wilson
Quoting Tvrtko Ursulin (2018-02-06 18:33:11)
> From: Tvrtko Ursulin 
> 
> We are not allowed to call intel_runtime_pm_get from the PMU counter read
> callback since the former can sleep, and the latter is running under IRQ
> context.
> 
> To workaround this, we record the last known RC6 and while runtime
> suspended estimate its increase by querying the runtime PM core
> timestamps.
> 
> Downside of this approach is that we can temporarily lose a chunk of RC6
> time, from the last PMU read-out to runtime suspend entry, but that will
> eventually catch up, once device comes back online and in the presence of
> PMU queries.
> 
> Also, we have to be careful not to overshoot the RC6 estimate, so once
> resumed after a period of approximation, we only update the counter once
> it catches up. With the observation that RC6 is increasing while the
> device is suspended, this should not pose a problem and can only cause
> slight inaccuracies due clock base differences.
> 
> v2: Simplify by estimating on top of PM core counters. (Imre)
> 
> Signed-off-by: Tvrtko Ursulin 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104943
> Fixes: 6060b6aec03c ("drm/i915/pmu: Add RC6 residency metrics")
> Testcase: igt/perf_pmu/rc6-runtime-pm
> Cc: Tvrtko Ursulin 
> Cc: Chris Wilson 
> Cc: Imre Deak 
> Cc: Jani Nikula 
> Cc: Joonas Lahtinen 
> Cc: Rodrigo Vivi 
> Cc: David Airlie 
> Cc: intel-gfx@lists.freedesktop.org
> Cc: dri-de...@lists.freedesktop.org
> ---
>  drivers/gpu/drm/i915/i915_pmu.c | 93 
> ++---
>  drivers/gpu/drm/i915/i915_pmu.h |  6 +++
>  2 files changed, 84 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index 1c440460255d..bfc402d47609 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -415,7 +415,81 @@ static int i915_pmu_event_init(struct perf_event *event)
> return 0;
>  }
>  
> -static u64 __i915_pmu_event_read(struct perf_event *event)
> +static u64 get_rc6(struct drm_i915_private *i915, bool locked)
> +{
> +   unsigned long flags;
> +   u64 val;
> +
> +   if (intel_runtime_pm_get_if_in_use(i915)) {
> +   val = intel_rc6_residency_ns(i915, IS_VALLEYVIEW(i915) ?
> +  VLV_GT_RENDER_RC6 :
> +  GEN6_GT_GFX_RC6);
> +
> +   if (HAS_RC6p(i915))
> +   val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
> +
> +   if (HAS_RC6pp(i915))
> +   val += intel_rc6_residency_ns(i915, 
> GEN6_GT_GFX_RC6pp);
> +
> +   intel_runtime_pm_put(i915);
> +
> +   /*
> +* If we are coming back from being runtime suspended we must
> +* be careful not to report a larger value than returned
> +* previously.
> +*/
> +
> +   if (!locked)
> +   spin_lock_irqsave(&i915->pmu.lock, flags);
> +
> +   if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) 
> {
> +   i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
> +   i915->pmu.sample[__I915_SAMPLE_RC6].cur = val;
> +   } else {
> +   val = 
> i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
> +   }
> +
> +   if (!locked)
> +   spin_unlock_irqrestore(&i915->pmu.lock, flags);
> +   } else {
> +   struct pci_dev *pdev = i915->drm.pdev;
> +   struct device *kdev = &pdev->dev;
> +   unsigned long flags2;
> +
> +   /*
> +* We are runtime suspended.
> +*
> +* Report the delta from when the device was suspended to now,
> +* on top of the last known real value, as the approximated 
> RC6
> +* counter value.
> +*/
> +   if (!locked)
> +   spin_lock_irqsave(&i915->pmu.lock, flags);
> +
> +   spin_lock_irqsave(&kdev->power.lock, flags2);
> +
> +   if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
> +   i915->pmu.suspended_jiffies_last =
> +   kdev->power.suspended_jiffies;
> +
> +   val = kdev->power.suspended_jiffies -
> + i915->pmu.suspended_jiffies_last;
> +   val += jiffies - kdev->power.accounting_timestamp;
> +
> +   spin_unlock_irqrestore(&kdev->power.lock, flags2);
> +
> +   val = jiffies_to_nsecs(val);
> +   val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
> +   i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
> +
> +   if (!locked)
> +   spin_unlock_irqrestore(&i915->pmu.lock, flags);
> +   }

[Intel-gfx] [PATCH v2] drm/i915/pmu: Fix sleep under atomic in RC6 readout

2018-02-06 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

We are not allowed to call intel_runtime_pm_get from the PMU counter read
callback since the former can sleep, and the latter is running under IRQ
context.

To workaround this, we record the last known RC6 and while runtime
suspended estimate its increase by querying the runtime PM core
timestamps.

Downside of this approach is that we can temporarily lose a chunk of RC6
time, from the last PMU read-out to runtime suspend entry, but that will
eventually catch up, once device comes back online and in the presence of
PMU queries.

Also, we have to be careful not to overshoot the RC6 estimate, so once
resumed after a period of approximation, we only update the counter once
it catches up. With the observation that RC6 is increasing while the
device is suspended, this should not pose a problem and can only cause
slight inaccuracies due clock base differences.

v2: Simplify by estimating on top of PM core counters. (Imre)

Signed-off-by: Tvrtko Ursulin 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104943
Fixes: 6060b6aec03c ("drm/i915/pmu: Add RC6 residency metrics")
Testcase: igt/perf_pmu/rc6-runtime-pm
Cc: Tvrtko Ursulin 
Cc: Chris Wilson 
Cc: Imre Deak 
Cc: Jani Nikula 
Cc: Joonas Lahtinen 
Cc: Rodrigo Vivi 
Cc: David Airlie 
Cc: intel-gfx@lists.freedesktop.org
Cc: dri-de...@lists.freedesktop.org
---
 drivers/gpu/drm/i915/i915_pmu.c | 93 ++---
 drivers/gpu/drm/i915/i915_pmu.h |  6 +++
 2 files changed, 84 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 1c440460255d..bfc402d47609 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -415,7 +415,81 @@ static int i915_pmu_event_init(struct perf_event *event)
return 0;
 }
 
-static u64 __i915_pmu_event_read(struct perf_event *event)
+static u64 get_rc6(struct drm_i915_private *i915, bool locked)
+{
+   unsigned long flags;
+   u64 val;
+
+   if (intel_runtime_pm_get_if_in_use(i915)) {
+   val = intel_rc6_residency_ns(i915, IS_VALLEYVIEW(i915) ?
+  VLV_GT_RENDER_RC6 :
+  GEN6_GT_GFX_RC6);
+
+   if (HAS_RC6p(i915))
+   val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
+
+   if (HAS_RC6pp(i915))
+   val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
+
+   intel_runtime_pm_put(i915);
+
+   /*
+* If we are coming back from being runtime suspended we must
+* be careful not to report a larger value than returned
+* previously.
+*/
+
+   if (!locked)
+   spin_lock_irqsave(&i915->pmu.lock, flags);
+
+   if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
+   i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
+   i915->pmu.sample[__I915_SAMPLE_RC6].cur = val;
+   } else {
+   val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
+   }
+
+   if (!locked)
+   spin_unlock_irqrestore(&i915->pmu.lock, flags);
+   } else {
+   struct pci_dev *pdev = i915->drm.pdev;
+   struct device *kdev = &pdev->dev;
+   unsigned long flags2;
+
+   /*
+* We are runtime suspended.
+*
+* Report the delta from when the device was suspended to now,
+* on top of the last known real value, as the approximated RC6
+* counter value.
+*/
+   if (!locked)
+   spin_lock_irqsave(&i915->pmu.lock, flags);
+
+   spin_lock_irqsave(&kdev->power.lock, flags2);
+
+   if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
+   i915->pmu.suspended_jiffies_last =
+   kdev->power.suspended_jiffies;
+
+   val = kdev->power.suspended_jiffies -
+ i915->pmu.suspended_jiffies_last;
+   val += jiffies - kdev->power.accounting_timestamp;
+
+   spin_unlock_irqrestore(&kdev->power.lock, flags2);
+
+   val = jiffies_to_nsecs(val);
+   val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
+   i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
+
+   if (!locked)
+   spin_unlock_irqrestore(&i915->pmu.lock, flags);
+   }
+
+   return val;
+}
+
+static u64 __i915_pmu_event_read(struct perf_event *event, bool locked)
 {
struct drm_i915_private *i915 =
container_of(event->pmu, typeof(*i915), pmu.base);
@@ -453,18 +527,7 @@ static u64 __i915_pmu_event_read(struct perf_event *event)