[Intel-gfx] [PATCH v9 6/7] drm/i915: Adjust CDCLK accordingly to our DBuf bw needs

2020-05-20 Thread Stanislav Lisovskiy
According to BSpec max BW per slice is calculated using formula
Max BW = CDCLK * 64. Currently when calculating min CDCLK we
account only per plane requirements, however in order to avoid
FIFO underruns we need to estimate accumulated BW consumed by
all planes(ddb entries basically) residing on that particular
DBuf slice. This will allow us to put CDCLK lower and save power
when we don't need that much bandwidth or gain additional
performance once plane consumption grows.

v2: - Fix long line warning
- Limited new DBuf bw checks to only gens >= 11

v3: - Lets track used Dbuf bw per slice and per crtc in bw state
  (or may be in DBuf state in future), that way we don't need
  to have all crtcs in state and those only if we detect if
  are actually going to change cdclk, just same way as we
  do with other stuff, i.e intel_atomic_serialize_global_state
  and co. Just as per Ville's paradigm.
- Made dbuf bw calculation procedure look nicer by introducing
  for_each_dbuf_slice_in_mask - we often will now need to iterate
  slices using mask.
- According to experimental results CDCLK * 64 accounts for
  overall bandwidth across all dbufs, not per dbuf.

v4: - Fixed missing const(Ville)
- Removed spurious whitespaces(Ville)
- Fixed local variable init(reduced scope where not needed)
- Added some comments about data rate for planar formats
- Changed struct intel_crtc_bw to intel_dbuf_bw
- Moved dbuf bw calculation to intel_compute_min_cdclk(Ville)

v5: - Removed unneeded macro

v6: - Prevent too frequent CDCLK switching back and forth:
  Always switch to higher CDCLK when needed to prevent bandwidth
  issues, however don't switch to lower CDCLK earlier than once
  in 30 minutes in order to prevent constant modeset blinking.
  We could of course not switch back at all, however this is
  bad from power consumption point of view.

v7: - Fixed to track cdclk using bw_state, modeset will be now
  triggered only when CDCLK change is really needed.

v8: - Lock global state if bw_state->min_cdclk is changed.
- Try getting bw_state only if there are crtcs in the commit
  (need to have read-locked global state)

v9: - Do not do Dbuf bw check for gens < 9 - triggers WARN
  as ddb_size is 0.

v10: - Lock global state for older gens as well.

v11: - Define new bw_calc_min_cdclk hook, instead of using
   a condition(Manasi Navare)

v12: - Fixed rebase conflict

v13: - Added spaces after declarations to make checkpatch happy.

Signed-off-by: Stanislav Lisovskiy 
Reviewed-by: Manasi Navare 
---
 drivers/gpu/drm/i915/display/intel_bw.c  | 121 ++-
 drivers/gpu/drm/i915/display/intel_bw.h  |  10 ++
 drivers/gpu/drm/i915/display/intel_cdclk.c   |  28 -
 drivers/gpu/drm/i915/display/intel_cdclk.h   |   1 -
 drivers/gpu/drm/i915/display/intel_display.c |  39 +-
 drivers/gpu/drm/i915/i915_drv.h  |   1 +
 drivers/gpu/drm/i915/intel_pm.c  |  31 -
 drivers/gpu/drm/i915/intel_pm.h  |   4 +
 8 files changed, 220 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_bw.c 
b/drivers/gpu/drm/i915/display/intel_bw.c
index fef04e2d954e..a539b1ed7723 100644
--- a/drivers/gpu/drm/i915/display/intel_bw.c
+++ b/drivers/gpu/drm/i915/display/intel_bw.c
@@ -6,11 +6,12 @@
 #include 
 
 #include "intel_bw.h"
+#include "intel_pm.h"
 #include "intel_display_types.h"
 #include "intel_sideband.h"
 #include "intel_atomic.h"
 #include "intel_pm.h"
-
+#include "intel_cdclk.h"
 
 /* Parameters for Qclk Geyserville (QGV) */
 struct intel_qgv_point {
@@ -343,7 +344,6 @@ static unsigned int intel_bw_crtc_data_rate(const struct 
intel_crtc_state *crtc_
 
return data_rate;
 }
-
 void intel_bw_crtc_update(struct intel_bw_state *bw_state,
  const struct intel_crtc_state *crtc_state)
 {
@@ -420,6 +420,123 @@ intel_atomic_get_bw_state(struct intel_atomic_state 
*state)
return to_intel_bw_state(bw_state);
 }
 
+int skl_bw_calc_min_cdclk(struct intel_atomic_state *state)
+{
+   struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+   int i;
+   const struct intel_crtc_state *crtc_state;
+   struct intel_crtc *crtc;
+   int max_bw = 0;
+   int slice_id;
+   struct intel_bw_state *new_bw_state = NULL;
+   struct intel_bw_state *old_bw_state = NULL;
+
+   for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) {
+   enum plane_id plane_id;
+   struct intel_dbuf_bw *crtc_bw;
+
+   new_bw_state = intel_atomic_get_bw_state(state);
+   if (IS_ERR(new_bw_state))
+   return PTR_ERR(new_bw_state);
+
+   crtc_bw = _bw_state->dbuf_bw[crtc->pipe];
+
+   memset(_bw->used_bw, 0, sizeof(crtc_bw->used_bw));
+
+   for_each_plane_id_on_crtc(crtc, plane_id) {
+   const struct 

Re: [Intel-gfx] [PATCH v9 6/7] drm/i915: Adjust CDCLK accordingly to our DBuf bw needs

2020-05-19 Thread Manasi Navare
On Wed, May 20, 2020 at 12:25:25AM +0300, Stanislav Lisovskiy wrote:
> According to BSpec max BW per slice is calculated using formula
> Max BW = CDCLK * 64. Currently when calculating min CDCLK we
> account only per plane requirements, however in order to avoid
> FIFO underruns we need to estimate accumulated BW consumed by
> all planes(ddb entries basically) residing on that particular
> DBuf slice. This will allow us to put CDCLK lower and save power
> when we don't need that much bandwidth or gain additional
> performance once plane consumption grows.
> 
> v2: - Fix long line warning
> - Limited new DBuf bw checks to only gens >= 11
> 
> v3: - Lets track used Dbuf bw per slice and per crtc in bw state
>   (or may be in DBuf state in future), that way we don't need
>   to have all crtcs in state and those only if we detect if
>   are actually going to change cdclk, just same way as we
>   do with other stuff, i.e intel_atomic_serialize_global_state
>   and co. Just as per Ville's paradigm.
> - Made dbuf bw calculation procedure look nicer by introducing
>   for_each_dbuf_slice_in_mask - we often will now need to iterate
>   slices using mask.
> - According to experimental results CDCLK * 64 accounts for
>   overall bandwidth across all dbufs, not per dbuf.
> 
> v4: - Fixed missing const(Ville)
> - Removed spurious whitespaces(Ville)
> - Fixed local variable init(reduced scope where not needed)
> - Added some comments about data rate for planar formats
> - Changed struct intel_crtc_bw to intel_dbuf_bw
> - Moved dbuf bw calculation to intel_compute_min_cdclk(Ville)
> 
> v5: - Removed unneeded macro
> 
> v6: - Prevent too frequent CDCLK switching back and forth:
>   Always switch to higher CDCLK when needed to prevent bandwidth
>   issues, however don't switch to lower CDCLK earlier than once
>   in 30 minutes in order to prevent constant modeset blinking.
>   We could of course not switch back at all, however this is
>   bad from power consumption point of view.
> 
> v7: - Fixed to track cdclk using bw_state, modeset will be now
>   triggered only when CDCLK change is really needed.
> 
> v8: - Lock global state if bw_state->min_cdclk is changed.
> - Try getting bw_state only if there are crtcs in the commit
>   (need to have read-locked global state)
> 
> v9: - Do not do Dbuf bw check for gens < 9 - triggers WARN
>   as ddb_size is 0.
> 
> v10: - Lock global state for older gens as well.
> 
> v11: - Define new bw_calc_min_cdclk hook, instead of using
>a condition(Manasi Navare)
> 
> v12: - Fixed rebase conflict
> 
> Signed-off-by: Stanislav Lisovskiy 

Looks good now with the hooks

Reviewed-by: Manasi Navare 

Manasi

> ---
>  drivers/gpu/drm/i915/display/intel_bw.c  | 119 ++-
>  drivers/gpu/drm/i915/display/intel_bw.h  |  10 ++
>  drivers/gpu/drm/i915/display/intel_cdclk.c   |  28 -
>  drivers/gpu/drm/i915/display/intel_cdclk.h   |   1 -
>  drivers/gpu/drm/i915/display/intel_display.c |  39 +-
>  drivers/gpu/drm/i915/i915_drv.h  |   1 +
>  drivers/gpu/drm/i915/intel_pm.c  |  31 -
>  drivers/gpu/drm/i915/intel_pm.h  |   4 +
>  8 files changed, 218 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_bw.c 
> b/drivers/gpu/drm/i915/display/intel_bw.c
> index fef04e2d954e..cb614b624e20 100644
> --- a/drivers/gpu/drm/i915/display/intel_bw.c
> +++ b/drivers/gpu/drm/i915/display/intel_bw.c
> @@ -6,11 +6,12 @@
>  #include 
>  
>  #include "intel_bw.h"
> +#include "intel_pm.h"
>  #include "intel_display_types.h"
>  #include "intel_sideband.h"
>  #include "intel_atomic.h"
>  #include "intel_pm.h"
> -
> +#include "intel_cdclk.h"
>  
>  /* Parameters for Qclk Geyserville (QGV) */
>  struct intel_qgv_point {
> @@ -343,7 +344,6 @@ static unsigned int intel_bw_crtc_data_rate(const struct 
> intel_crtc_state *crtc_
>  
>   return data_rate;
>  }
> -
>  void intel_bw_crtc_update(struct intel_bw_state *bw_state,
> const struct intel_crtc_state *crtc_state)
>  {
> @@ -420,6 +420,121 @@ intel_atomic_get_bw_state(struct intel_atomic_state 
> *state)
>   return to_intel_bw_state(bw_state);
>  }
>  
> +int skl_bw_calc_min_cdclk(struct intel_atomic_state *state)
> +{
> + struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> + int i;
> + const struct intel_crtc_state *crtc_state;
> + struct intel_crtc *crtc;
> + int max_bw = 0;
> + int slice_id;
> + struct intel_bw_state *new_bw_state = NULL;
> + struct intel_bw_state *old_bw_state = NULL;
> +
> + for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) {
> + enum plane_id plane_id;
> + struct intel_dbuf_bw *crtc_bw;
> +
> + new_bw_state = intel_atomic_get_bw_state(state);
> + if (IS_ERR(new_bw_state))
> + return 

[Intel-gfx] [PATCH v9 6/7] drm/i915: Adjust CDCLK accordingly to our DBuf bw needs

2020-05-19 Thread Stanislav Lisovskiy
According to BSpec max BW per slice is calculated using formula
Max BW = CDCLK * 64. Currently when calculating min CDCLK we
account only per plane requirements, however in order to avoid
FIFO underruns we need to estimate accumulated BW consumed by
all planes(ddb entries basically) residing on that particular
DBuf slice. This will allow us to put CDCLK lower and save power
when we don't need that much bandwidth or gain additional
performance once plane consumption grows.

v2: - Fix long line warning
- Limited new DBuf bw checks to only gens >= 11

v3: - Lets track used Dbuf bw per slice and per crtc in bw state
  (or may be in DBuf state in future), that way we don't need
  to have all crtcs in state and those only if we detect if
  are actually going to change cdclk, just same way as we
  do with other stuff, i.e intel_atomic_serialize_global_state
  and co. Just as per Ville's paradigm.
- Made dbuf bw calculation procedure look nicer by introducing
  for_each_dbuf_slice_in_mask - we often will now need to iterate
  slices using mask.
- According to experimental results CDCLK * 64 accounts for
  overall bandwidth across all dbufs, not per dbuf.

v4: - Fixed missing const(Ville)
- Removed spurious whitespaces(Ville)
- Fixed local variable init(reduced scope where not needed)
- Added some comments about data rate for planar formats
- Changed struct intel_crtc_bw to intel_dbuf_bw
- Moved dbuf bw calculation to intel_compute_min_cdclk(Ville)

v5: - Removed unneeded macro

v6: - Prevent too frequent CDCLK switching back and forth:
  Always switch to higher CDCLK when needed to prevent bandwidth
  issues, however don't switch to lower CDCLK earlier than once
  in 30 minutes in order to prevent constant modeset blinking.
  We could of course not switch back at all, however this is
  bad from power consumption point of view.

v7: - Fixed to track cdclk using bw_state, modeset will be now
  triggered only when CDCLK change is really needed.

v8: - Lock global state if bw_state->min_cdclk is changed.
- Try getting bw_state only if there are crtcs in the commit
  (need to have read-locked global state)

v9: - Do not do Dbuf bw check for gens < 9 - triggers WARN
  as ddb_size is 0.

v10: - Lock global state for older gens as well.

v11: - Define new bw_calc_min_cdclk hook, instead of using
   a condition(Manasi Navare)

v12: - Fixed rebase conflict

Signed-off-by: Stanislav Lisovskiy 
---
 drivers/gpu/drm/i915/display/intel_bw.c  | 119 ++-
 drivers/gpu/drm/i915/display/intel_bw.h  |  10 ++
 drivers/gpu/drm/i915/display/intel_cdclk.c   |  28 -
 drivers/gpu/drm/i915/display/intel_cdclk.h   |   1 -
 drivers/gpu/drm/i915/display/intel_display.c |  39 +-
 drivers/gpu/drm/i915/i915_drv.h  |   1 +
 drivers/gpu/drm/i915/intel_pm.c  |  31 -
 drivers/gpu/drm/i915/intel_pm.h  |   4 +
 8 files changed, 218 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_bw.c 
b/drivers/gpu/drm/i915/display/intel_bw.c
index fef04e2d954e..cb614b624e20 100644
--- a/drivers/gpu/drm/i915/display/intel_bw.c
+++ b/drivers/gpu/drm/i915/display/intel_bw.c
@@ -6,11 +6,12 @@
 #include 
 
 #include "intel_bw.h"
+#include "intel_pm.h"
 #include "intel_display_types.h"
 #include "intel_sideband.h"
 #include "intel_atomic.h"
 #include "intel_pm.h"
-
+#include "intel_cdclk.h"
 
 /* Parameters for Qclk Geyserville (QGV) */
 struct intel_qgv_point {
@@ -343,7 +344,6 @@ static unsigned int intel_bw_crtc_data_rate(const struct 
intel_crtc_state *crtc_
 
return data_rate;
 }
-
 void intel_bw_crtc_update(struct intel_bw_state *bw_state,
  const struct intel_crtc_state *crtc_state)
 {
@@ -420,6 +420,121 @@ intel_atomic_get_bw_state(struct intel_atomic_state 
*state)
return to_intel_bw_state(bw_state);
 }
 
+int skl_bw_calc_min_cdclk(struct intel_atomic_state *state)
+{
+   struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+   int i;
+   const struct intel_crtc_state *crtc_state;
+   struct intel_crtc *crtc;
+   int max_bw = 0;
+   int slice_id;
+   struct intel_bw_state *new_bw_state = NULL;
+   struct intel_bw_state *old_bw_state = NULL;
+
+   for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) {
+   enum plane_id plane_id;
+   struct intel_dbuf_bw *crtc_bw;
+
+   new_bw_state = intel_atomic_get_bw_state(state);
+   if (IS_ERR(new_bw_state))
+   return PTR_ERR(new_bw_state);
+
+   crtc_bw = _bw_state->dbuf_bw[crtc->pipe];
+
+   memset(_bw->used_bw, 0, sizeof(crtc_bw->used_bw));
+
+   for_each_plane_id_on_crtc(crtc, plane_id) {
+   const struct skl_ddb_entry *plane_alloc =
+   _state->wm.skl.plane_ddb_y[plane_id];
+   

[Intel-gfx] [PATCH v9 6/7] drm/i915: Adjust CDCLK accordingly to our DBuf bw needs

2020-05-19 Thread Stanislav Lisovskiy
According to BSpec max BW per slice is calculated using formula
Max BW = CDCLK * 64. Currently when calculating min CDCLK we
account only per plane requirements, however in order to avoid
FIFO underruns we need to estimate accumulated BW consumed by
all planes(ddb entries basically) residing on that particular
DBuf slice. This will allow us to put CDCLK lower and save power
when we don't need that much bandwidth or gain additional
performance once plane consumption grows.

v2: - Fix long line warning
- Limited new DBuf bw checks to only gens >= 11

v3: - Lets track used Dbuf bw per slice and per crtc in bw state
  (or may be in DBuf state in future), that way we don't need
  to have all crtcs in state and those only if we detect if
  are actually going to change cdclk, just same way as we
  do with other stuff, i.e intel_atomic_serialize_global_state
  and co. Just as per Ville's paradigm.
- Made dbuf bw calculation procedure look nicer by introducing
  for_each_dbuf_slice_in_mask - we often will now need to iterate
  slices using mask.
- According to experimental results CDCLK * 64 accounts for
  overall bandwidth across all dbufs, not per dbuf.

v4: - Fixed missing const(Ville)
- Removed spurious whitespaces(Ville)
- Fixed local variable init(reduced scope where not needed)
- Added some comments about data rate for planar formats
- Changed struct intel_crtc_bw to intel_dbuf_bw
- Moved dbuf bw calculation to intel_compute_min_cdclk(Ville)

v5: - Removed unneeded macro

v6: - Prevent too frequent CDCLK switching back and forth:
  Always switch to higher CDCLK when needed to prevent bandwidth
  issues, however don't switch to lower CDCLK earlier than once
  in 30 minutes in order to prevent constant modeset blinking.
  We could of course not switch back at all, however this is
  bad from power consumption point of view.

v7: - Fixed to track cdclk using bw_state, modeset will be now
  triggered only when CDCLK change is really needed.

v8: - Lock global state if bw_state->min_cdclk is changed.
- Try getting bw_state only if there are crtcs in the commit
  (need to have read-locked global state)

v9: - Do not do Dbuf bw check for gens < 9 - triggers WARN
  as ddb_size is 0.

v10: - Lock global state for older gens as well.

v11: - Define new bw_calc_min_cdclk hook, instead of using
   a condition(Manasi Navare)

Signed-off-by: Stanislav Lisovskiy 
---
 drivers/gpu/drm/i915/display/intel_bw.c  | 118 ++-
 drivers/gpu/drm/i915/display/intel_bw.h  |  10 ++
 drivers/gpu/drm/i915/display/intel_cdclk.c   |  28 -
 drivers/gpu/drm/i915/display/intel_cdclk.h   |   1 -
 drivers/gpu/drm/i915/display/intel_display.c |  39 +-
 drivers/gpu/drm/i915/i915_drv.h  |   1 +
 drivers/gpu/drm/i915/intel_pm.c  |  31 -
 drivers/gpu/drm/i915/intel_pm.h  |   3 +
 8 files changed, 217 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_bw.c 
b/drivers/gpu/drm/i915/display/intel_bw.c
index 6e7cc3a4f1aa..e46bc9e626b1 100644
--- a/drivers/gpu/drm/i915/display/intel_bw.c
+++ b/drivers/gpu/drm/i915/display/intel_bw.c
@@ -6,8 +6,10 @@
 #include 
 
 #include "intel_bw.h"
+#include "intel_pm.h"
 #include "intel_display_types.h"
 #include "intel_sideband.h"
+#include "intel_cdclk.h"
 
 /* Parameters for Qclk Geyserville (QGV) */
 struct intel_qgv_point {
@@ -333,7 +335,6 @@ static unsigned int intel_bw_crtc_data_rate(const struct 
intel_crtc_state *crtc_
 
return data_rate;
 }
-
 void intel_bw_crtc_update(struct intel_bw_state *bw_state,
  const struct intel_crtc_state *crtc_state)
 {
@@ -410,6 +411,121 @@ intel_atomic_get_bw_state(struct intel_atomic_state 
*state)
return to_intel_bw_state(bw_state);
 }
 
+int skl_bw_calc_min_cdclk(struct intel_atomic_state *state)
+{
+   struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+   int i;
+   const struct intel_crtc_state *crtc_state;
+   struct intel_crtc *crtc;
+   int max_bw = 0;
+   int slice_id;
+   struct intel_bw_state *new_bw_state = NULL;
+   struct intel_bw_state *old_bw_state = NULL;
+
+   for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) {
+   enum plane_id plane_id;
+   struct intel_dbuf_bw *crtc_bw;
+
+   new_bw_state = intel_atomic_get_bw_state(state);
+   if (IS_ERR(new_bw_state))
+   return PTR_ERR(new_bw_state);
+
+   crtc_bw = _bw_state->dbuf_bw[crtc->pipe];
+
+   memset(_bw->used_bw, 0, sizeof(crtc_bw->used_bw));
+
+   for_each_plane_id_on_crtc(crtc, plane_id) {
+   const struct skl_ddb_entry *plane_alloc =
+   _state->wm.skl.plane_ddb_y[plane_id];
+   const struct skl_ddb_entry *uv_plane_alloc =
+