On Mon, Sep 08, 2025 at 10:35:33AM +0300, Luca Coelho wrote:
> There are currently two booleans to define three tiling modes, which
> is bad practice because it allows representing an invalid mode. In
> order to simplify this, convert these two booleans into one
> enumeration with three possible tiling modes.
>
> Additionally, introduce the concept of Y "family" of tiling, which
> groups Y, Yf and 4 tiling, since they're effectively treated in the
> same way in the watermark calculations. Describe the grouping in the
> enumeration definition.
>
> Signed-off-by: Luca Coelho <[email protected]>
> ---
> drivers/gpu/drm/i915/display/skl_watermark.c | 35 ++++++++++++++------
> 1 file changed, 24 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c
> b/drivers/gpu/drm/i915/display/skl_watermark.c
> index 0ce3420a919e..dd4bed02c3c0 100644
> --- a/drivers/gpu/drm/i915/display/skl_watermark.c
> +++ b/drivers/gpu/drm/i915/display/skl_watermark.c
> @@ -53,9 +53,16 @@ struct intel_dbuf_state {
> #define intel_atomic_get_new_dbuf_state(state) \
> to_intel_dbuf_state(intel_atomic_get_new_global_obj_state(state,
> &to_intel_display(state)->dbuf.obj))
>
> +/* Tiling mode groups relevant to WM calculations */
> +enum wm_tiling_mode {
> + WM_TILING_LINEAR,
> + WM_TILING_X_TILED, /* mostly like linear */
The _TILED suffix seems redundant here.
> + WM_TILING_Y_FAMILY, /* includes Y, Yf and 4 tiling */
I don't really like the "y family" invention. Doesn't really
unconfuse anything for the reader without going back to have
a look at the comment.
I think it would be better to just spell out each tilimg mode.
So I guess something like "WM_TILING_Y_Yf_4"
> +};
> +
> /* Stores plane specific WM parameters */
> struct skl_wm_params {
> - bool x_tiled, y_tiled;
> + enum wm_tiling_mode tiling;
That'll now be 4 bytes.
> bool rc_surface;
> bool is_planar;
and we'll have a two byte hole here.
> u32 width;
u8 cpp;
And there's a 3 byte hole already here after the cpp.
Should group the u8 with the bools to avoid so many holes.
We could also shrink y_min_scanlines to a u8 and
stick it into the last 1 byte hole. That'd shrink the whole
struct by 4 bytes.
dbuf_block_size would also fit in a u16, but doesn't look
like we have any other holes where we could stick it. Hmm,
actually 'width' could probably also be shrunk to be a u16.
So could get rid of another 4 bytes here if we really
wanted to.
But I suppose all that repacking should be a separate patch...
> @@ -618,7 +625,8 @@ static unsigned int skl_wm_latency(struct intel_display
> *display, int level,
> display->platform.cometlake) && skl_watermark_ipc_enabled(display))
> latency += 4;
>
> - if (skl_needs_memory_bw_wa(display) && wp && wp->x_tiled)
> + if (skl_needs_memory_bw_wa(display) &&
> + wp && wp->tiling == WM_TILING_X_TILED)
> latency += 15;
>
> return latency;
> @@ -1674,9 +1682,14 @@ skl_compute_wm_params(const struct intel_crtc_state
> *crtc_state,
> return -EINVAL;
> }
>
> - wp->x_tiled = modifier == I915_FORMAT_MOD_X_TILED;
> - wp->y_tiled = modifier != I915_FORMAT_MOD_X_TILED &&
> - intel_fb_is_tiled_modifier(modifier);
> + if (modifier == I915_FORMAT_MOD_X_TILED)
> + wp->tiling = WM_TILING_X_TILED;
> + else if (modifier != I915_FORMAT_MOD_X_TILED &&
The modifier check here is redundant with the if-else construct.
> + intel_fb_is_tiled_modifier(modifier))
> + wp->tiling = WM_TILING_Y_FAMILY;
> + else
> + wp->tiling = WM_TILING_LINEAR;
In fact we can avoid the entire intel_fb_is_tiled_modifier()
call with something like:
if (mod == LINEAR)
tiling = LINEAR;
else if (mod == X)
tiling = X;
else
tiling = Y_Yf_4;
The wm code always pops up fairly high in cpu profiles, so
anything that makes it lighter is worth considering.
> +
> wp->rc_surface = intel_fb_is_ccs_modifier(modifier);
> wp->is_planar = intel_format_info_is_yuv_semiplanar(format, modifier);
>
> @@ -1716,7 +1729,7 @@ skl_compute_wm_params(const struct intel_crtc_state
> *crtc_state,
> wp->y_min_scanlines *= 2;
>
> wp->plane_bytes_per_line = wp->width * wp->cpp;
> - if (wp->y_tiled) {
> + if (wp->tiling == WM_TILING_Y_FAMILY) {
> interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line *
> wp->y_min_scanlines,
> wp->dbuf_block_size);
> @@ -1732,7 +1745,7 @@ skl_compute_wm_params(const struct intel_crtc_state
> *crtc_state,
> interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line,
> wp->dbuf_block_size);
>
> - if (!wp->x_tiled || DISPLAY_VER(display) >= 10)
> + if (wp->tiling != WM_TILING_X_TILED || DISPLAY_VER(display) >=
> 10)
> interm_pbpl++;
>
> wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
> @@ -1820,7 +1833,7 @@ static void skl_compute_plane_wm(const struct
> intel_crtc_state *crtc_state,
> latency,
> wp->plane_blocks_per_line);
>
> - if (wp->y_tiled) {
> + if (wp->tiling == WM_TILING_Y_FAMILY) {
> selected_result = max_fixed16(method2, wp->y_tile_minimum);
> } else {
> if ((wp->cpp * crtc_state->hw.pipe_mode.crtc_htotal /
> @@ -1870,7 +1883,7 @@ static void skl_compute_plane_wm(const struct
> intel_crtc_state *crtc_state,
>
> /* Display WA #1126: skl,bxt,kbl */
> if (level >= 1 && level <= 7) {
> - if (wp->y_tiled) {
> + if (wp->tiling == WM_TILING_Y_FAMILY) {
> blocks +=
> fixed16_to_u32_round_up(wp->y_tile_minimum);
> lines += wp->y_min_scanlines;
> } else {
> @@ -1889,7 +1902,7 @@ static void skl_compute_plane_wm(const struct
> intel_crtc_state *crtc_state,
> }
>
> if (DISPLAY_VER(display) >= 11) {
> - if (wp->y_tiled) {
> + if (wp->tiling == WM_TILING_Y_FAMILY) {
> int extra_lines;
>
> if (lines % wp->y_min_scanlines == 0)
> @@ -2015,7 +2028,7 @@ static void skl_compute_transition_wm(struct
> intel_display *display,
> */
> wm0_blocks = wm0->blocks - 1;
>
> - if (wp->y_tiled) {
> + if (wp->tiling == WM_TILING_Y_FAMILY) {
> trans_y_tile_min =
> (u16)mul_round_up_u32_fixed16(2, wp->y_tile_minimum);
> blocks = max(wm0_blocks, trans_y_tile_min) + trans_offset;
> --
> 2.50.1
--
Ville Syrjälä
Intel