skl+: Optimize WM calculation

Mahesh Kumar Tue, 20 Jun 2017 23:11:01 -0700

Plane configuration parameters doesn't change for each WM-level
calculation. Currently we compute same parameters 8 times for each
wm-level.
This patch optimizes it by calculating these parameters in beginning
& reuses during each level-wm calculation.


Signed-off-by: Mahesh Kumar <mahesh1.ku...@intel.com>
Acked-by: Maarten Lankhorst <maarten.lankho...@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h |  13 +++
 drivers/gpu/drm/i915/intel_pm.c | 179 ++++++++++++++++++++++------------------
 2 files changed, 111 insertions(+), 81 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7014cad60794..3633d043ee7d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1781,6 +1781,19 @@ struct skl_wm_level {
        uint8_t plane_res_l;
 };
 
+/* Stores plane specific WM parameters */
+struct skl_wm_params{
+       bool x_tiled, y_tiled;
+       uint32_t width;
+       uint8_t cpp;
+       uint32_t plane_pixel_rate;
+       uint32_t y_min_scanlines;
+       uint32_t plane_bytes_per_line;
+       uint_fixed_16_16_t plane_blocks_per_line;
+       uint_fixed_16_16_t y_tile_minimum;
+       uint32_t linetime_us;
+};
+
 /*
  * This struct helps tracking the state needed for runtime PM, which puts the
  * device in PCI D3 state. Notice that when this happens, nothing on the
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index dbec28c3016e..ba82cd54f802 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4340,128 +4340,135 @@ skl_adjusted_plane_pixel_rate(const struct 
intel_crtc_state *cstate,
                                            downscale_amount);
 }
 
-static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
-                               struct intel_crtc_state *cstate,
-                               const struct intel_plane_state *intel_pstate,
-                               uint16_t ddb_allocation,
-                               int level,
-                               uint16_t *out_blocks, /* out */
-                               uint8_t *out_lines, /* out */
-                               bool *enabled /* out */)
+static int
+skl_compute_plane_wm_params(const struct drm_i915_private *dev_priv,
+                           struct intel_crtc_state *cstate,
+                           const struct intel_plane_state *intel_pstate,
+                           struct skl_wm_params *wp)
 {
        struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane);
        const struct drm_plane_state *pstate = &intel_pstate->base;
        const struct drm_framebuffer *fb = pstate->fb;
-       uint32_t latency = dev_priv->wm.skl_latency[level];
-       uint_fixed_16_16_t method1, method2;
-       uint_fixed_16_16_t plane_blocks_per_line;
-       uint_fixed_16_16_t selected_result;
        uint32_t interm_pbpl;
-       uint32_t plane_bytes_per_line;
-       uint32_t res_blocks, res_lines;
-       uint8_t cpp;
-       uint32_t width = 0;
-       uint32_t plane_pixel_rate;
-       uint_fixed_16_16_t y_tile_minimum;
-       uint32_t y_min_scanlines;
        struct intel_atomic_state *state =
                to_intel_atomic_state(cstate->base.state);
        bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
-       bool y_tiled, x_tiled;
 
-       if (latency == 0 ||
-           !intel_wm_plane_visible(cstate, intel_pstate)) {
-               *enabled = false;
+       if (!intel_wm_plane_visible(cstate, intel_pstate))
                return 0;
-       }
 
-       y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED ||
-                 fb->modifier == I915_FORMAT_MOD_Yf_TILED;
-       x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED;
-
-       /* Display WA #1141: kbl,cfl */
-       if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) &&
-           dev_priv->ipc_enabled)
-               latency += 4;
-
-       if (apply_memory_bw_wa && x_tiled)
-               latency += 15;
+       wp->y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED ||
+                     fb->modifier == I915_FORMAT_MOD_Yf_TILED;
+       wp->x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED;
 
        if (plane->id == PLANE_CURSOR) {
-               width = intel_pstate->base.crtc_w;
+               wp->width = intel_pstate->base.crtc_w;
        } else {
                /*
                 * Src coordinates are already rotated by 270 degrees for
                 * the 90/270 degree plane rotation cases (to match the
                 * GTT mapping), hence no need to account for rotation here.
                 */
-               width = drm_rect_width(&intel_pstate->base.src) >> 16;
+               wp->width = drm_rect_width(&intel_pstate->base.src) >> 16;
        }
 
-       cpp = (fb->format->format == DRM_FORMAT_NV12) ? fb->format->cpp[1] :
-                                                       fb->format->cpp[0];
-       plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate, intel_pstate);
+       wp->cpp = (fb->format->format == DRM_FORMAT_NV12) ? fb->format->cpp[1] :
+                                                           fb->format->cpp[0];
+       wp->plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate,
+                                                            intel_pstate);
 
        if (drm_rotation_90_or_270(pstate->rotation)) {
-
-               switch (cpp) {
+               switch (wp->cpp) {
                case 1:
-                       y_min_scanlines = 16;
+                       wp->y_min_scanlines = 16;
                        break;
                case 2:
-                       y_min_scanlines = 8;
+                       wp->y_min_scanlines = 8;
                        break;
                case 4:
-                       y_min_scanlines = 4;
+                       wp->y_min_scanlines = 4;
                        break;
                default:
-                       MISSING_CASE(cpp);
+                       MISSING_CASE(wp->cpp);
                        return -EINVAL;
                }
        } else {
-               y_min_scanlines = 4;
+               wp->y_min_scanlines = 4;
        }
 
        if (apply_memory_bw_wa)
-               y_min_scanlines *= 2;
-
-       plane_bytes_per_line = width * cpp;
-       if (y_tiled) {
-               interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line *
-                                          y_min_scanlines, 512);
-               plane_blocks_per_line = div_fixed16(interm_pbpl,
-                                                       y_min_scanlines);
-       } else if (x_tiled) {
-               interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512);
-               plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
+               wp->y_min_scanlines *= 2;
+
+       wp->plane_bytes_per_line = wp->width * wp->cpp;
+       if (wp->y_tiled) {
+               interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line *
+                                          wp->y_min_scanlines, 512);
+               wp->plane_blocks_per_line = div_fixed16(interm_pbpl,
+                                                       wp->y_min_scanlines);
+       } else if (wp->x_tiled) {
+               interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, 512);
+               wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
        } else {
-               interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512) + 1;
-               plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
+               interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, 512) + 1;
+               wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
        }
 
-       method1 = skl_wm_method1(plane_pixel_rate, cpp, latency);
-       method2 = skl_wm_method2(plane_pixel_rate,
+       wp->y_tile_minimum = mul_u32_fixed16(wp->y_min_scanlines,
+                                            wp->plane_blocks_per_line);
+       wp->linetime_us = fixed16_to_u32_round_up(
+                                       intel_get_linetime_us(cstate));
+       return 0;
+}
+
+static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
+                               struct intel_crtc_state *cstate,
+                               const struct intel_plane_state *intel_pstate,
+                               uint16_t ddb_allocation,
+                               int level,
+                               const struct skl_wm_params *wp,
+                               uint16_t *out_blocks, /* out */
+                               uint8_t *out_lines, /* out */
+                               bool *enabled /* out */)
+{
+       const struct drm_plane_state *pstate = &intel_pstate->base;
+       uint32_t latency = dev_priv->wm.skl_latency[level];
+       uint_fixed_16_16_t method1, method2;
+       uint_fixed_16_16_t selected_result;
+       uint32_t res_blocks, res_lines;
+       struct intel_atomic_state *state =
+               to_intel_atomic_state(cstate->base.state);
+       bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
+
+       if (latency == 0 ||
+           !intel_wm_plane_visible(cstate, intel_pstate)) {
+               *enabled = false;
+               return 0;
+       }
+
+       /* Display WA #1141: kbl,cfl */
+       if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) &&
+           dev_priv->ipc_enabled)
+               latency += 4;
+
+       if (apply_memory_bw_wa && wp->x_tiled)
+               latency += 15;
+
+       method1 = skl_wm_method1(wp->plane_pixel_rate, wp->cpp, latency);
+       method2 = skl_wm_method2(wp->plane_pixel_rate,
                                 cstate->base.adjusted_mode.crtc_htotal,
                                 latency,
-                                plane_blocks_per_line);
-
-       y_tile_minimum = mul_u32_fixed16(y_min_scanlines,
-                                        plane_blocks_per_line);
+                                wp->plane_blocks_per_line);
 
-       if (y_tiled) {
-               selected_result = max_fixed16(method2, y_tile_minimum);
+       if (wp->y_tiled) {
+               selected_result = max_fixed16(method2, wp->y_tile_minimum);
        } else {
-               uint32_t linetime_us;
-
-               linetime_us = fixed16_to_u32_round_up(
-                               intel_get_linetime_us(cstate));
-               if ((cpp * cstate->base.adjusted_mode.crtc_htotal / 512 < 1) &&
-                   (plane_bytes_per_line / 512 < 1))
+               if ((wp->cpp * cstate->base.adjusted_mode.crtc_htotal / 512 < 1)
+                   && (wp->plane_bytes_per_line / 512 < 1))
                        selected_result = method2;
                else if ((ddb_allocation && ddb_allocation /
-                       fixed16_to_u32_round_up(plane_blocks_per_line)) >= 1)
+                       fixed16_to_u32_round_up(wp->plane_blocks_per_line)) >= 
1)
                        selected_result = min_fixed16(method1, method2);
-               else if (latency >= linetime_us)
+               else if (latency >= wp->linetime_us)
                        selected_result = min_fixed16(method1, method2);
                else
                        selected_result = method1;
@@ -4469,12 +4476,13 @@ static int skl_compute_plane_wm(const struct 
drm_i915_private *dev_priv,
 
        res_blocks = fixed16_to_u32_round_up(selected_result) + 1;
        res_lines = div_round_up_fixed16(selected_result,
-                                        plane_blocks_per_line);
+                                        wp->plane_blocks_per_line);
 
        if (level >= 1 && level <= 7) {
-               if (y_tiled) {
-                       res_blocks += fixed16_to_u32_round_up(y_tile_minimum);
-                       res_lines += y_min_scanlines;
+               if (wp->y_tiled) {
+                       res_blocks += fixed16_to_u32_round_up(
+                                                       wp->y_tile_minimum);
+                       res_lines += wp->y_min_scanlines;
                } else {
                        res_blocks++;
                }
@@ -4512,6 +4520,7 @@ skl_compute_wm_levels(const struct drm_i915_private 
*dev_priv,
                      struct skl_ddb_allocation *ddb,
                      struct intel_crtc_state *cstate,
                      const struct intel_plane_state *intel_pstate,
+                     const struct skl_wm_params *wm_params,
                      struct skl_plane_wm *wm)
 {
        struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
@@ -4535,6 +4544,7 @@ skl_compute_wm_levels(const struct drm_i915_private 
*dev_priv,
                                           intel_pstate,
                                           ddb_blocks,
                                           level,
+                                          wm_params,
                                           &result->plane_res_b,
                                           &result->plane_res_l,
                                           &result->plane_en);
@@ -4599,11 +4609,18 @@ static int skl_build_pipe_wm(struct intel_crtc_state 
*cstate,
                const struct intel_plane_state *intel_pstate =
                                                to_intel_plane_state(pstate);
                enum plane_id plane_id = to_intel_plane(plane)->id;
+               struct skl_wm_params wm_params;
 
                wm = &pipe_wm->planes[plane_id];
+               memset(&wm_params, 0, sizeof(struct skl_wm_params));
+
+               ret = skl_compute_plane_wm_params(dev_priv, cstate,
+                                                 intel_pstate, &wm_params);
+               if (ret)
+                       return ret;
 
                ret = skl_compute_wm_levels(dev_priv, ddb, cstate,
-                                           intel_pstate, wm);
+                                           intel_pstate, &wm_params, wm);
                if (ret)
                        return ret;
                skl_compute_transition_wm(cstate, &wm->trans_wm);
-- 
2.13.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH 7/8] drm/i915/skl+: Optimize WM calculation

Reply via email to