Re: [Intel-gfx] [PATCH v2] drm/i915: Modifying RC6 Promotion timer for Media workloads.

2015-03-12 Thread Deepak S



On Friday 06 March 2015 10:10 PM, Daniel Vetter wrote:

On Thu, Mar 05, 2015 at 09:27:59PM +0530, deepa...@linux.intel.com wrote:

From: Deepak S deepa...@linux.intel.com

In normal cases, RC6 promotion timer is 1700us/500us. This will
result in more time spent in C1 state. For more residency in
C6 in case of media workloads, this is changed to 250us.
Not doing this for 3D workloads as too many C6-C0
transition delays can result in performance impact.

v2: Extend GPU busy  idle detection framework for rc6 Promotion
timer changes (Chris)

Signed-off-by: Deepak S deepa...@linux.intel.com

I've thougth Chris' idea was to put this into the gen6_rps_boost/idle
functions? You could check from within them I think for whether the vcs is
still busy ... One more comment below.
-Daniel


Hi Daniel,

gen6_rps_boost/idle will be called only for RCS right? Also we get 
gen6_rps_boost during  __wait_request
But we want to program promotion timer when we add request to VCS to apply the 
value immediately.

Thanks
Deepak


---
  drivers/gpu/drm/i915/i915_gem.c  | 10 +-
  drivers/gpu/drm/i915/intel_display.c |  3 ++-
  drivers/gpu/drm/i915/intel_drv.h |  2 ++
  drivers/gpu/drm/i915/intel_pm.c  | 27 +++
  4 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3831cc0..85f8aa6 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2428,7 +2428,7 @@ int __i915_add_request(struct intel_engine_cs *ring,
struct drm_i915_gem_request *request;
struct intel_ringbuffer *ringbuf;
u32 request_start;
-   int ret;
+   int ret, was_empty;
  
  	request = ring-outstanding_lazy_request;

if (WARN_ON(request == NULL))
@@ -2495,6 +2495,7 @@ int __i915_add_request(struct intel_engine_cs *ring,
}
  
  	request-emitted_jiffies = jiffies;

+   was_empty = list_empty(ring-request_list);
list_add_tail(request-list, ring-request_list);
request-file_priv = NULL;
  
@@ -2519,6 +2520,10 @@ int __i915_add_request(struct intel_engine_cs *ring,

queue_delayed_work(dev_priv-wq,
   dev_priv-mm.retire_work,
   round_jiffies_up_relative(HZ));
+
+   if ((ring-id == VCS)  was_empty)
+   vlv_media_promotion_timer_busy(dev_priv);
+
intel_mark_busy(dev_priv-dev);
  
  	return 0;

@@ -2802,6 +2807,9 @@ i915_gem_retire_requests_ring(struct intel_engine_cs 
*ring)
}
  
  	WARN_ON(i915_verify_lists(ring-dev));

+
+   if (ring-id == VCS  list_empty(ring-request_list))
+   vlv_media_promotion_timer_idle(dev_priv);
  }
  
  bool

diff --git a/drivers/gpu/drm/i915/intel_display.c 
b/drivers/gpu/drm/i915/intel_display.c
index 597c10b..5d121b4 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -9172,8 +9172,9 @@ void intel_mark_idle(struct drm_device *dev)
intel_decrease_pllclock(crtc);
}
  
-	if (INTEL_INFO(dev)-gen = 6)

+   if (INTEL_INFO(dev)-gen = 6) {
gen6_rps_idle(dev-dev_private);
+   }

Uncessary hunk. And a bikeshed: I think generally if we name something
vlv_ we put the platform checks outside of the function. Or have some
other guarantee in place to make sure it's only called on the right
platforms. Otherwise we generally pick an intel_ prefix.


Thanks Daniel. I will create intel_ prefix, we might need to extend this for 
future platforms.

  
  out:

intel_runtime_pm_put(dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 2a6ec4b..f1a90b8 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1233,6 +1233,8 @@ void ironlake_teardown_rc6(struct drm_device *dev);
  void gen6_update_ring_freq(struct drm_device *dev);
  void gen6_rps_idle(struct drm_i915_private *dev_priv);
  void gen6_rps_boost(struct drm_i915_private *dev_priv);
+void vlv_media_promotion_timer_idle(struct drm_i915_private *dev_priv);
+void vlv_media_promotion_timer_busy(struct drm_i915_private *dev_priv);
  void ilk_wm_get_hw_state(struct drm_device *dev);
  void skl_wm_get_hw_state(struct drm_device *dev);
  void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index e710b43..d23b60a 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3961,6 +3961,33 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv)
mutex_unlock(dev_priv-rps.hw_lock);
  }
  
+void vlv_media_promotion_timer_idle(struct drm_i915_private *dev_priv)

+{
+   struct drm_device *dev = dev_priv-dev;
+
+   if (!IS_VALLEYVIEW(dev))
+   return;
+
+   if (IS_CHERRYVIEW(dev_priv-dev)) {
+   /* TO threshold set to 500 us ( 0x186 * 1.28 us) */
+   

Re: [Intel-gfx] [PATCH v2] drm/i915: Modifying RC6 Promotion timer for Media workloads.

2015-03-11 Thread Chris Wilson
On Wed, Mar 11, 2015 at 07:07:12PM +0530, Deepak S wrote:
 
 
 On Friday 06 March 2015 10:10 PM, Daniel Vetter wrote:
 On Thu, Mar 05, 2015 at 09:27:59PM +0530, deepa...@linux.intel.com wrote:
 From: Deepak S deepa...@linux.intel.com
 
 In normal cases, RC6 promotion timer is 1700us/500us. This will
 result in more time spent in C1 state. For more residency in
 C6 in case of media workloads, this is changed to 250us.
 Not doing this for 3D workloads as too many C6-C0
 transition delays can result in performance impact.
 
 v2: Extend GPU busy  idle detection framework for rc6 Promotion
 timer changes (Chris)
 
 Signed-off-by: Deepak S deepa...@linux.intel.com
 I've thougth Chris' idea was to put this into the gen6_rps_boost/idle
 functions? You could check from within them I think for whether the vcs is
 still busy ... One more comment below.
 -Daniel
 
 Hi Daniel,
 
 gen6_rps_boost/idle will be called only for RCS right? Also we get 
 gen6_rps_boost during  __wait_request
 But we want to program promotion timer when we add request to VCS to apply 
 the value immediately.

It's gen6_rps_busy/gen6_rps_idle. They are called from intel_mark_busy
and intel_mark_idle. It is intel_mark_busy/intel_mark_idle that we want
to extend to cover the VCS case as well. I think if you add a ring
parameter to the functions, we can start specialising per ring and
global state changes. You will then also be in a position to judge what
is the best idle timer (and consider making i915_gem_idle_work_handler
per ring). The goal is simply to evolve the current infrastucture for
idle/busyness handling to cover your use case as well (and hopefully in
the process improving the old/general cases).
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v2] drm/i915: Modifying RC6 Promotion timer for Media workloads.

2015-03-11 Thread Deepak S



On Wednesday 11 March 2015 07:26 PM, Chris Wilson wrote:

On Wed, Mar 11, 2015 at 07:07:12PM +0530, Deepak S wrote:


On Friday 06 March 2015 10:10 PM, Daniel Vetter wrote:

On Thu, Mar 05, 2015 at 09:27:59PM +0530, deepa...@linux.intel.com wrote:

From: Deepak S deepa...@linux.intel.com

In normal cases, RC6 promotion timer is 1700us/500us. This will
result in more time spent in C1 state. For more residency in
C6 in case of media workloads, this is changed to 250us.
Not doing this for 3D workloads as too many C6-C0
transition delays can result in performance impact.

v2: Extend GPU busy  idle detection framework for rc6 Promotion
timer changes (Chris)

Signed-off-by: Deepak S deepa...@linux.intel.com

I've thougth Chris' idea was to put this into the gen6_rps_boost/idle
functions? You could check from within them I think for whether the vcs is
still busy ... One more comment below.
-Daniel

Hi Daniel,

gen6_rps_boost/idle will be called only for RCS right? Also we get 
gen6_rps_boost during  __wait_request
But we want to program promotion timer when we add request to VCS to apply the 
value immediately.

It's gen6_rps_busy/gen6_rps_idle. They are called from intel_mark_busy
and intel_mark_idle. It is intel_mark_busy/intel_mark_idle that we want
to extend to cover the VCS case as well. I think if you add a ring
parameter to the functions, we can start specialising per ring and
global state changes. You will then also be in a position to judge what
is the best idle timer (and consider making i915_gem_idle_work_handler
per ring). The goal is simply to evolve the current infrastucture for
idle/busyness handling to cover your use case as well (and hopefully in
the process improving the old/general cases).
-Chris


Thanks Chris. extending intel_mark_busy/intel_mark_idle
makes sense. I will work on adding the change

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v2] drm/i915: Modifying RC6 Promotion timer for Media workloads.

2015-03-06 Thread Daniel Vetter
On Thu, Mar 05, 2015 at 09:27:59PM +0530, deepa...@linux.intel.com wrote:
 From: Deepak S deepa...@linux.intel.com
 
 In normal cases, RC6 promotion timer is 1700us/500us. This will
 result in more time spent in C1 state. For more residency in
 C6 in case of media workloads, this is changed to 250us.
 Not doing this for 3D workloads as too many C6-C0
 transition delays can result in performance impact.
 
 v2: Extend GPU busy  idle detection framework for rc6 Promotion
 timer changes (Chris)
 
 Signed-off-by: Deepak S deepa...@linux.intel.com

I've thougth Chris' idea was to put this into the gen6_rps_boost/idle
functions? You could check from within them I think for whether the vcs is
still busy ... One more comment below.
-Daniel

 ---
  drivers/gpu/drm/i915/i915_gem.c  | 10 +-
  drivers/gpu/drm/i915/intel_display.c |  3 ++-
  drivers/gpu/drm/i915/intel_drv.h |  2 ++
  drivers/gpu/drm/i915/intel_pm.c  | 27 +++
  4 files changed, 40 insertions(+), 2 deletions(-)
 
 diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
 index 3831cc0..85f8aa6 100644
 --- a/drivers/gpu/drm/i915/i915_gem.c
 +++ b/drivers/gpu/drm/i915/i915_gem.c
 @@ -2428,7 +2428,7 @@ int __i915_add_request(struct intel_engine_cs *ring,
   struct drm_i915_gem_request *request;
   struct intel_ringbuffer *ringbuf;
   u32 request_start;
 - int ret;
 + int ret, was_empty;
  
   request = ring-outstanding_lazy_request;
   if (WARN_ON(request == NULL))
 @@ -2495,6 +2495,7 @@ int __i915_add_request(struct intel_engine_cs *ring,
   }
  
   request-emitted_jiffies = jiffies;
 + was_empty = list_empty(ring-request_list);
   list_add_tail(request-list, ring-request_list);
   request-file_priv = NULL;
  
 @@ -2519,6 +2520,10 @@ int __i915_add_request(struct intel_engine_cs *ring,
   queue_delayed_work(dev_priv-wq,
  dev_priv-mm.retire_work,
  round_jiffies_up_relative(HZ));
 +
 + if ((ring-id == VCS)  was_empty)
 + vlv_media_promotion_timer_busy(dev_priv);
 +
   intel_mark_busy(dev_priv-dev);
  
   return 0;
 @@ -2802,6 +2807,9 @@ i915_gem_retire_requests_ring(struct intel_engine_cs 
 *ring)
   }
  
   WARN_ON(i915_verify_lists(ring-dev));
 +
 + if (ring-id == VCS  list_empty(ring-request_list))
 + vlv_media_promotion_timer_idle(dev_priv);
  }
  
  bool
 diff --git a/drivers/gpu/drm/i915/intel_display.c 
 b/drivers/gpu/drm/i915/intel_display.c
 index 597c10b..5d121b4 100644
 --- a/drivers/gpu/drm/i915/intel_display.c
 +++ b/drivers/gpu/drm/i915/intel_display.c
 @@ -9172,8 +9172,9 @@ void intel_mark_idle(struct drm_device *dev)
   intel_decrease_pllclock(crtc);
   }
  
 - if (INTEL_INFO(dev)-gen = 6)
 + if (INTEL_INFO(dev)-gen = 6) {
   gen6_rps_idle(dev-dev_private);
 + }

Uncessary hunk. And a bikeshed: I think generally if we name something
vlv_ we put the platform checks outside of the function. Or have some
other guarantee in place to make sure it's only called on the right
platforms. Otherwise we generally pick an intel_ prefix.
  
  out:
   intel_runtime_pm_put(dev_priv);
 diff --git a/drivers/gpu/drm/i915/intel_drv.h 
 b/drivers/gpu/drm/i915/intel_drv.h
 index 2a6ec4b..f1a90b8 100644
 --- a/drivers/gpu/drm/i915/intel_drv.h
 +++ b/drivers/gpu/drm/i915/intel_drv.h
 @@ -1233,6 +1233,8 @@ void ironlake_teardown_rc6(struct drm_device *dev);
  void gen6_update_ring_freq(struct drm_device *dev);
  void gen6_rps_idle(struct drm_i915_private *dev_priv);
  void gen6_rps_boost(struct drm_i915_private *dev_priv);
 +void vlv_media_promotion_timer_idle(struct drm_i915_private *dev_priv);
 +void vlv_media_promotion_timer_busy(struct drm_i915_private *dev_priv);
  void ilk_wm_get_hw_state(struct drm_device *dev);
  void skl_wm_get_hw_state(struct drm_device *dev);
  void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
 diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
 index e710b43..d23b60a 100644
 --- a/drivers/gpu/drm/i915/intel_pm.c
 +++ b/drivers/gpu/drm/i915/intel_pm.c
 @@ -3961,6 +3961,33 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv)
   mutex_unlock(dev_priv-rps.hw_lock);
  }
  
 +void vlv_media_promotion_timer_idle(struct drm_i915_private *dev_priv)
 +{
 + struct drm_device *dev = dev_priv-dev;
 +
 + if (!IS_VALLEYVIEW(dev))
 + return;
 +
 + if (IS_CHERRYVIEW(dev_priv-dev)) {
 + /* TO threshold set to 500 us ( 0x186 * 1.28 us) */
 + I915_WRITE(GEN6_RC6_THRESHOLD, 0x186);
 + } else {
 + /* TO threshold set to 1750 us ( 0x557 * 1.28 us) */
 + I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
 + }
 +}
 +
 +void vlv_media_promotion_timer_busy(struct drm_i915_private *dev_priv)
 +{
 + struct drm_device *dev = dev_priv-dev;
 +
 + if (!IS_VALLEYVIEW(dev))
 + return;

[Intel-gfx] [PATCH v2] drm/i915: Modifying RC6 Promotion timer for Media workloads.

2015-03-05 Thread deepak . s
From: Deepak S deepa...@linux.intel.com

In normal cases, RC6 promotion timer is 1700us/500us. This will
result in more time spent in C1 state. For more residency in
C6 in case of media workloads, this is changed to 250us.
Not doing this for 3D workloads as too many C6-C0
transition delays can result in performance impact.

v2: Extend GPU busy  idle detection framework for rc6 Promotion
timer changes (Chris)

Signed-off-by: Deepak S deepa...@linux.intel.com
---
 drivers/gpu/drm/i915/i915_gem.c  | 10 +-
 drivers/gpu/drm/i915/intel_display.c |  3 ++-
 drivers/gpu/drm/i915/intel_drv.h |  2 ++
 drivers/gpu/drm/i915/intel_pm.c  | 27 +++
 4 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3831cc0..85f8aa6 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2428,7 +2428,7 @@ int __i915_add_request(struct intel_engine_cs *ring,
struct drm_i915_gem_request *request;
struct intel_ringbuffer *ringbuf;
u32 request_start;
-   int ret;
+   int ret, was_empty;
 
request = ring-outstanding_lazy_request;
if (WARN_ON(request == NULL))
@@ -2495,6 +2495,7 @@ int __i915_add_request(struct intel_engine_cs *ring,
}
 
request-emitted_jiffies = jiffies;
+   was_empty = list_empty(ring-request_list);
list_add_tail(request-list, ring-request_list);
request-file_priv = NULL;
 
@@ -2519,6 +2520,10 @@ int __i915_add_request(struct intel_engine_cs *ring,
queue_delayed_work(dev_priv-wq,
   dev_priv-mm.retire_work,
   round_jiffies_up_relative(HZ));
+
+   if ((ring-id == VCS)  was_empty)
+   vlv_media_promotion_timer_busy(dev_priv);
+
intel_mark_busy(dev_priv-dev);
 
return 0;
@@ -2802,6 +2807,9 @@ i915_gem_retire_requests_ring(struct intel_engine_cs 
*ring)
}
 
WARN_ON(i915_verify_lists(ring-dev));
+
+   if (ring-id == VCS  list_empty(ring-request_list))
+   vlv_media_promotion_timer_idle(dev_priv);
 }
 
 bool
diff --git a/drivers/gpu/drm/i915/intel_display.c 
b/drivers/gpu/drm/i915/intel_display.c
index 597c10b..5d121b4 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -9172,8 +9172,9 @@ void intel_mark_idle(struct drm_device *dev)
intel_decrease_pllclock(crtc);
}
 
-   if (INTEL_INFO(dev)-gen = 6)
+   if (INTEL_INFO(dev)-gen = 6) {
gen6_rps_idle(dev-dev_private);
+   }
 
 out:
intel_runtime_pm_put(dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 2a6ec4b..f1a90b8 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1233,6 +1233,8 @@ void ironlake_teardown_rc6(struct drm_device *dev);
 void gen6_update_ring_freq(struct drm_device *dev);
 void gen6_rps_idle(struct drm_i915_private *dev_priv);
 void gen6_rps_boost(struct drm_i915_private *dev_priv);
+void vlv_media_promotion_timer_idle(struct drm_i915_private *dev_priv);
+void vlv_media_promotion_timer_busy(struct drm_i915_private *dev_priv);
 void ilk_wm_get_hw_state(struct drm_device *dev);
 void skl_wm_get_hw_state(struct drm_device *dev);
 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index e710b43..d23b60a 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3961,6 +3961,33 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv)
mutex_unlock(dev_priv-rps.hw_lock);
 }
 
+void vlv_media_promotion_timer_idle(struct drm_i915_private *dev_priv)
+{
+   struct drm_device *dev = dev_priv-dev;
+
+   if (!IS_VALLEYVIEW(dev))
+   return;
+
+   if (IS_CHERRYVIEW(dev_priv-dev)) {
+   /* TO threshold set to 500 us ( 0x186 * 1.28 us) */
+   I915_WRITE(GEN6_RC6_THRESHOLD, 0x186);
+   } else {
+   /* TO threshold set to 1750 us ( 0x557 * 1.28 us) */
+   I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
+   }
+}
+
+void vlv_media_promotion_timer_busy(struct drm_i915_private *dev_priv)
+{
+   struct drm_device *dev = dev_priv-dev;
+
+   if (!IS_VALLEYVIEW(dev))
+   return;
+
+   /* TO threshold set to 250 us ( 0xC3 * 1.28 us) */
+   I915_WRITE(GEN6_RC6_THRESHOLD, 0xC3);
+}
+
 void intel_set_rps(struct drm_device *dev, u8 val)
 {
if (IS_VALLEYVIEW(dev))
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx