gen9: WM memory bandwidth related workaround

Mahesh Kumar Wed, 09 Nov 2016 21:54:06 -0800

Hi,


On Friday 04 November 2016 10:39 PM, Paulo Zanoni wrote:

Em Qui, 2016-10-13 às 16:28 +0530, Kumar, Mahesh escreveu:

This patch implemnets Workariunds related to display arbitrated
memory
bandwidth. These WA are applicabe for all gen-9 based platforms.

Changes since v1:
  - Rebase on top of Paulo's patch series
Changes since v2:
  - Rebase/rework after addressing Paulo's comments in previous patch

A lot of this code has changed since then, so this will need a
significant rebase. In the meantime, I added skl_needs_memory_bw_wa()
and we're now applying the WA by default: we just won't apply the WA
when we're pretty sure we don't need to. This helps avoiding underruns
by default.

See more below.

make sense, Will change default WA to WA_T_TILED

Signed-off-by: "Kumar, Mahesh" <mahesh1.ku...@intel.com>
---
  drivers/gpu/drm/i915/i915_drv.h  |   9 +++
  drivers/gpu/drm/i915/intel_drv.h |  11 +++
  drivers/gpu/drm/i915/intel_pm.c  | 146
+++++++++++++++++++++++++++++++++++++++
  3 files changed, 166 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.h
b/drivers/gpu/drm/i915/i915_drv.h
index adbd9aa..c169360 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1092,6 +1092,13 @@ enum intel_sbi_destination {
        SBI_MPHY,
  };

+/* SKL+ Watermark arbitrated display bandwidth Workarounds */

+enum watermark_memory_wa {
+       WATERMARK_WA_NONE,
+       WATERMARK_WA_X_TILED,
+       WATERMARK_WA_Y_TILED,
+};
+
  #define QUIRK_PIPEA_FORCE (1<<0)
  #define QUIRK_LVDS_SSC_DISABLE (1<<1)
  #define QUIRK_INVERT_BRIGHTNESS (1<<2)
@@ -1644,6 +1651,8 @@ struct skl_ddb_allocation {

struct skl_wm_values {

        unsigned dirty_pipes;
+       /* any WaterMark memory workaround Required */

We can remove this comment since it doesn't say anything the variable
name doesn't.

+       enum watermark_memory_wa mem_wa;

Now that we have a proper variable in the state struct, it probably
makes sense to just kill skl_needs_memory_bw_wa() and read this
variable when we need to.

        struct skl_ddb_allocation ddb;
        uint32_t wm_linetime[I915_MAX_PIPES];
        uint32_t plane[I915_MAX_PIPES][I915_MAX_PLANES][8];
diff --git a/drivers/gpu/drm/i915/intel_drv.h
b/drivers/gpu/drm/i915/intel_drv.h
index f48e79a..2c1897b 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1813,6 +1813,17 @@ intel_atomic_get_crtc_state(struct
drm_atomic_state *state,
        return to_intel_crtc_state(crtc_state);
  }

+static inline struct intel_crtc_state *

+intel_atomic_get_existing_crtc_state(struct drm_atomic_state *state,
+                                     struct intel_crtc *crtc)
+{
+       struct drm_crtc_state *crtc_state;
+
+       crtc_state = drm_atomic_get_existing_crtc_state(state,
&crtc->base);
+
+       return to_intel_crtc_state(crtc_state);

I really don't like the idea of calling to_intel_crtc_state() on a
potentially NULL pointer so the caller of this function will also check
for NULL. Even though it works today, I still think it's unsafe
practice. Please check crtc_state for NULL directly and then return
NULL.

Also, I think this function should be extracted to its own commit, and
we'd probably be able to find some callers in the existing i915 code.

Will extract this function & include NULL check for crtc_state itself

+}
+
  static inline struct intel_plane_state *
  intel_atomic_get_existing_plane_state(struct drm_atomic_state
*state,
                                      struct intel_plane *plane)
diff --git a/drivers/gpu/drm/i915/intel_pm.c
b/drivers/gpu/drm/i915/intel_pm.c
index 84ec6b1..5b8f715 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3589,6 +3589,8 @@ static int skl_compute_plane_wm(const struct
drm_i915_private *dev_priv,
  {
        struct drm_plane_state *pstate = &intel_pstate->base;
        struct drm_framebuffer *fb = pstate->fb;
+       struct intel_atomic_state *intel_state =
+                       to_intel_atomic_state(cstate->base.state);
        uint32_t latency = dev_priv->wm.skl_latency[level];
        uint32_t method1, method2;
        uint32_t plane_bytes_per_line, plane_blocks_per_line;
@@ -3598,10 +3600,17 @@ static int skl_compute_plane_wm(const struct
drm_i915_private *dev_priv,
        uint32_t width = 0, height = 0;
        uint32_t plane_pixel_rate;
        uint32_t y_tile_minimum, y_min_scanlines;
+       enum watermark_memory_wa mem_wa;

if (latency == 0 || !cstate->base.active || !intel_pstate-

base.visible)

                return 0;

+ mem_wa = intel_state ? intel_state->wm_results.mem_wa :

WATERMARK_WA_NONE;
+       if (mem_wa != WATERMARK_WA_NONE) {
+               if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
+                       latency += 15;
+       }
+
        width = drm_rect_width(&intel_pstate->base.src) >> 16;
        height = drm_rect_height(&intel_pstate->base.src) >> 16;

@@ -3634,6 +3643,9 @@ static int skl_compute_plane_wm(const struct

drm_i915_private *dev_priv,
                y_min_scanlines = 4;
        }

+ if (mem_wa == WATERMARK_WA_Y_TILED)

+               y_min_scanlines *= 2;

The changes to this function will need to be rebased. But it's
interesting that my interpretation regarding this *= 2 was different.
After an email to the spec authors it seems your interpretation is the
right one...

+
        plane_bytes_per_line = width * cpp;
        if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
            fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) {
@@ -4075,6 +4087,15 @@ skl_include_affected_pipes(struct
drm_atomic_state *state)
                intel_state->wm_results.dirty_pipes = ~0;
        }

+ /*

+        * If Watermark workaround is changed we need to recalculate
+        * watermark values for all active pipes
+        */
+       if (intel_state->wm_results.mem_wa != dev_priv-

wm.skl_hw.mem_wa) {

+               realloc_pipes = ~0;
+               intel_state->wm_results.dirty_pipes = ~0;
+       }
+

Things have changed since this patch was written. I'd recommend moving
this to skl_set_memory_bandwidth_wa().

Working on re-basing the patch.

        for_each_intel_crtc_mask(dev, intel_crtc, realloc_pipes) {
                struct intel_crtc_state *cstate;

@@ -4087,6 +4108,129 @@ skl_include_affected_pipes(struct

drm_atomic_state *state)
  }

static void

+skl_set_memory_bandwidth_wm_wa(struct drm_atomic_state *state)

We're not really setting anything here: we're computing. Rename to
skl_compute_wm_memory_bw_wa?

+{
+       struct drm_device *dev = state->dev;
+       struct drm_i915_private *dev_priv = to_i915(dev);
+       struct intel_crtc *intel_crtc;
+       struct intel_plane_state *intel_pstate;
+       struct intel_atomic_state *intel_state =
to_intel_atomic_state(state);
+       struct memdev_info *memdev_info = &dev_priv->memdev_info;
+       int num_active_plane, num_active_pipe;

I like to use plural in the number-of variables, since we're counting
the number of active planeS, not the number of the active plane.

+       uint32_t plane_bw, max_plane_bw, pipe_bw, max_pipe_bw;
+       uint32_t total_pipe_bw;
+       uint32_t system_bw = 0;
+       uint32_t rank;
+       int x_tile_per;
+       int display_bw_per;

I read this and kept thinking "x tiles per what?", took me a while to
realize it's percentage. It's probably better to just use a long self-
documenting name here than to use an ambiguous abbreviation.

Also, I'd just go with uint32_t in the percentages too to avoid any
possible problems with the uin32_t calculations.

And perhaps some declarations could be moved to smaller inner scopes
below.

+       bool y_tile_enabled = false;
+

if (!platforms_that_require_the_wa) {
        wa = WATERMARK_WA_NONE;
        return;
}

this function is not called by any GEN other than GEN9, will it be ok toadd "if (!GEN9)" check?

+       if (!memdev_info->valid)
+               goto exit;

Our default behavior should be to apply the WA then in doubt, not
to avoid it, so the return value here and in the other error cases
should be WATERAMARK_WA_Y_TILED.

Also, you can avoid the gotos by just setting mem_wa at the beginning
of the function, then you can just "return" later instead of goto.

+
+       system_bw = memdev_info->mem_speed_khz * memdev_info-

num_channels *

+                               memdev_info->channel_width_bytes;
+
+       if (!system_bw)
+               goto exit;

This shouldn't be possible. Otherwise, the previous patch needs to be
fixed in a way to not allow system_bw to end up as zero. Please either
remove the check or change to "if (WARN_ON(!system_bw))".

yes, ideally this should not be possible, but what if because of anyreason BIOS is not writing the register OR we don't have BIOS altogether?

If we add WARN_ON here, it'll flood the logs, we can add WARN_ON_ONCE,but we are anyway printing the warning in previous patch, if frequencyor any other variable calculated is ZERO, IMO will add more prints inprevious patch instead of adding here.

+
+       max_pipe_bw = 0;
+       for_each_intel_crtc(dev, intel_crtc) {
+               struct intel_crtc_state *cstate;
+               struct intel_plane *plane;
+
+               /*
+                * If CRTC is part of current atomic commit, get
crtc state from
+                * existing CRTC state. else take the cached CRTC
state
+                */
+               cstate = NULL;
+               if (state)

If state is NULL we'll segfault way before this point, so there's no
need for this check.

+                       cstate =
intel_atomic_get_existing_crtc_state(state,
+                                       intel_crtc);
+               if (!cstate)
+                       cstate = to_intel_crtc_state(intel_crtc-

base.state);

+
+               if (!cstate->base.active)
+                       continue;
+
+               num_active_plane = 0;
+               max_plane_bw = 0;
+               for_each_intel_plane_mask(dev, plane, cstate-

base.plane_mask) {

+                       struct drm_framebuffer *fb = NULL;
+
+                       intel_pstate = NULL;
+                       if (state)

Same here: no need to check for state.

+                               intel_pstate =
+                               intel_atomic_get_existing_plane_stat
e(state,
+                                                                       
plane);
+                       if (!intel_pstate)
+                               intel_pstate =
+                                       to_intel_plane_state(plane-

base.state);

+
+                       WARN_ON(!intel_pstate->base.fb);

if (WARN_ON(!intel_pstate->base.fb))
        return;

Then we can just forget about checking for fb again below.

+
+                       if (!intel_pstate->base.visible)
+                               continue;

Don't we also need to exclude the cursor here?

yes, cursor should be excluded, will add that. thanks,

+
+                       fb = intel_pstate->base.fb;
+                       if (fb && (fb->modifier[0] ==
I915_FORMAT_MOD_Y_TILED ||
+                               fb->modifier[0] ==
I915_FORMAT_MOD_Yf_TILED))
+                               y_tile_enabled = true;
+
+                       plane_bw =
skl_adjusted_plane_pixel_rate(cstate,
+                                                               inte
l_pstate);
+                       max_plane_bw = max(plane_bw, max_plane_bw);
+                       num_active_plane++;
+               }
+               pipe_bw = max_plane_bw * num_active_plane;
+               max_pipe_bw = max(pipe_bw, max_pipe_bw);
+       }
+
+       if (intel_state->active_pipe_changes)
+               num_active_pipe = hweight32(intel_state-

active_crtcs);

+       else
+               num_active_pipe = hweight32(dev_priv->active_crtcs);

Why don't we just trust intel_state->active_crtcs even when
active_pipe_changes is false?

I really didn't dig into that code & reused the code from differentfunction. will check into this

+
+       total_pipe_bw = max_pipe_bw * num_active_pipe;
+
+       display_bw_per = DIV_ROUND_UP_ULL(total_pipe_bw * 100,
system_bw * 1000);

Why ULL?

Also, if everything is KHz as it's supposed to be, the *100 and *1000
don't make sense.

In previous patch I converted the frequency to MHz... need to fix that,then *1000 not be required,

*100 is still required to get the percentage.

+
+       /*
+        * If there is any Ytile plane enabled and arbitrated
display
+        * bandwidth > 20% of raw system memory bandwidth
+        * Enale Y-tile related WA
+        *
+        * If memory is dual channel single rank, Xtile limit = 35%,
else Xtile
+        * limit = 60%
+        * If there is no Ytile plane enabled and
+        * arbitrated display bandwidth > Xtile limit
+        * Enable X-tile realated WA
+        */
+       if (y_tile_enabled && (display_bw_per > 20))
+               intel_state->wm_results.mem_wa =
WATERMARK_WA_Y_TILED;
+       else {
+
+               if (memdev_info->rank_valid)
+                       rank = memdev_info->rank;
+               else
+                       rank = DRAM_RANK_DUAL; /* Assume we are dual
rank */

When in doubt, apply the most restrictive workaround to avoid the
possibility of underruns. So here it's safer to assume
DRAM_RANK_SINGLE.

OK...

thanks,

Regards,
-Mahesh

+
+               if ((rank == DRAM_RANK_SINGLE) &&
+                                       (memdev_info->num_channels
== 2))
+                       x_tile_per = 35;
+               else
+                       x_tile_per = 60;
+
+               if (display_bw_per > x_tile_per)
+                       intel_state->wm_results.mem_wa =
WATERMARK_WA_X_TILED;
+       }
+       return;
+
+exit:
+       intel_state->wm_results.mem_wa = WATERMARK_WA_NONE;
+}
+
+static void
  skl_copy_wm_for_pipe(struct skl_wm_values *dst,
                     struct skl_wm_values *src,
                     enum pipe pipe)
@@ -4131,6 +4275,8 @@ skl_compute_wm(struct drm_atomic_state *state)
        /* Clear all dirty flags */
        results->dirty_pipes = 0;

+ skl_set_memory_bandwidth_wm_wa(state);

+
        ret = skl_include_affected_pipes(state);
        if (ret)
                return ret;


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH v4 4/8] drm/i915/gen9: WM memory bandwidth related workaround

Reply via email to