[AMD Official Use Only - General]

Please remove the file 
"/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c.rej" if 
it's not necessary.

Thanks,
Wayne
________________________________
From: Kotarac, Pavle <pavle.kota...@amd.com>
Sent: Thursday, September 8, 2022 01:32
To: amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
Cc: Wentland, Harry <harry.wentl...@amd.com>; Li, Sun peng (Leo) 
<sunpeng...@amd.com>; Lakha, Bhawanpreet <bhawanpreet.la...@amd.com>; Siqueira, 
Rodrigo <rodrigo.sique...@amd.com>; Pillai, Aurabindo 
<aurabindo.pil...@amd.com>; Zhuo, Qingqing (Lillian) <qingqing.z...@amd.com>; 
Li, Roman <roman...@amd.com>; Lin, Wayne <wayne....@amd.com>; Wang, Chao-kai 
(Stylon) <stylon.w...@amd.com>; Chiu, Solomon <solomon.c...@amd.com>; Kotarac, 
Pavle <pavle.kota...@amd.com>; Gutierrez, Agustin <agustin.gutier...@amd.com>; 
Cyr, Aric <aric....@amd.com>; Stupar, Nevenko <nevenko.stu...@amd.com>; 
Kotarac, Pavle <pavle.kota...@amd.com>
Subject: [PATCH 02/27] drm/amd/display: Optimizations for DML math

From: Aric Cyr <aric....@amd.com>

[why]
Conditionals in the DML basic math functions significantly impact mode
enumeration.

[how]
Remove conditionals for floor/ceil operations which are used frequently
in DML and add an assertion for invalid callers using zero granuality.
Fix existing callers that rely on 0 granularity.

Reviewed-by: Nevenko Stupar <nevenko.stu...@amd.com>
Acked-by: Pavle Kotarac <pavle.kota...@amd.com>
Signed-off-by: Aric Cyr <aric....@amd.com>
---
 .../amd/display/dc/dml/calcs/dcn_calc_auto.c  | 22 ++++++-------
 .../amd/display/dc/dml/calcs/dcn_calc_math.c  | 16 +++++-----
 .../dc/dml/dcn20/display_mode_vba_20v2.c      | 10 +++---
 .../dc/dml/dcn21/display_mode_vba_21.c        |  6 ++--
 .../dc/dml/dcn30/display_mode_vba_30.c        |  8 ++---
 .../dc/dml/dcn31/display_mode_vba_31.c        |  6 ++--
 .../dc/dml/dcn314/display_mode_vba_314.c      |  6 ++--
 .../dc/dml/dcn32/display_mode_vba_util_32.c   | 31 +++++++++----------
 .../dml/dcn32/display_mode_vba_util_32.c.rej  | 12 +++++++
 .../drm/amd/display/dc/dml/dml_inline_defs.h  |  9 ++----
 10 files changed, 65 insertions(+), 61 deletions(-)
 create mode 100644 
drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c.rej

diff --git a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_auto.c 
b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_auto.c
index 41284e263325..288d22a16cf2 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_auto.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_auto.c
@@ -526,10 +526,10 @@ void mode_support_and_system_configuration(struct 
dcn_bw_internal_vars *v)
                                 }
                                 if (v->max_swath_height_c[k] > 0.0) {
                                         v->swath_width_granularity_c = 256.0 
/dcn_bw_ceil2(v->byte_per_pixel_in_detc[k], 2.0) / v->max_swath_height_c[k];
-                               }
-                               v->rounded_up_max_swath_size_bytes_c = 
(dcn_bw_ceil2(v->swath_width_yper_state[i][j][k] / 2.0 - 1.0, 
v->swath_width_granularity_c) + v->swath_width_granularity_c) * 
v->byte_per_pixel_in_detc[k] * v->max_swath_height_c[k];
-                               if (v->source_pixel_format[k] == 
dcn_bw_yuv420_sub_10) {
-                                       v->rounded_up_max_swath_size_bytes_c 
=dcn_bw_ceil2(v->rounded_up_max_swath_size_bytes_c, 256.0) + 256;
+                                       v->rounded_up_max_swath_size_bytes_c = 
(dcn_bw_ceil2(v->swath_width_yper_state[i][j][k] / 2.0 - 1.0, 
v->swath_width_granularity_c) + v->swath_width_granularity_c) * 
v->byte_per_pixel_in_detc[k] * v->max_swath_height_c[k];
+                                       if (v->source_pixel_format[k] == 
dcn_bw_yuv420_sub_10) {
+                                               
v->rounded_up_max_swath_size_bytes_c = 
dcn_bw_ceil2(v->rounded_up_max_swath_size_bytes_c, 256.0) + 256;
+                                       }
                                 }
                                 if (v->rounded_up_max_swath_size_bytes_y + 
v->rounded_up_max_swath_size_bytes_c <= v->det_buffer_size_in_kbyte * 1024.0 / 
2.0) {
                                         v->swath_height_yper_state[i][j][k] = 
v->max_swath_height_y[k];
@@ -552,14 +552,14 @@ void mode_support_and_system_configuration(struct 
dcn_bw_internal_vars *v)
                                         v->lines_in_det_chroma = 
v->det_buffer_size_in_kbyte * 1024.0 / 3.0 / v->byte_per_pixel_in_dety[k] / 
(v->swath_width_yper_state[i][j][k] / 2.0);
                                 }
                                 
v->effective_lb_latency_hiding_source_lines_luma 
=dcn_bw_min2(v->max_line_buffer_lines,dcn_bw_floor2(v->line_buffer_size / 
v->lb_bit_per_pixel[k] / (v->swath_width_yper_state[i][j][k] 
/dcn_bw_max2(v->h_ratio[k], 1.0)), 1.0)) - (v->vtaps[k] - 1.0);
-                               
v->effective_lb_latency_hiding_source_lines_chroma 
=dcn_bw_min2(v->max_line_buffer_lines,dcn_bw_floor2(v->line_buffer_size / 
v->lb_bit_per_pixel[k] / (v->swath_width_yper_state[i][j][k] / 2.0 
/dcn_bw_max2(v->h_ratio[k] / 2.0, 1.0)), 1.0)) - (v->vta_pschroma[k] - 1.0);
                                 v->effective_detlb_lines_luma 
=dcn_bw_floor2(v->lines_in_det_luma +dcn_bw_min2(v->lines_in_det_luma * 
v->required_dispclk[i][j] * v->byte_per_pixel_in_dety[k] * v->pscl_factor[k] / 
v->return_bw_per_state[i], v->effective_lb_latency_hiding_source_lines_luma), 
v->swath_height_yper_state[i][j][k]);
-                               v->effective_detlb_lines_chroma 
=dcn_bw_floor2(v->lines_in_det_chroma +dcn_bw_min2(v->lines_in_det_chroma * 
v->required_dispclk[i][j] * v->byte_per_pixel_in_detc[k] * 
v->pscl_factor_chroma[k] / v->return_bw_per_state[i], 
v->effective_lb_latency_hiding_source_lines_chroma), 
v->swath_height_cper_state[i][j][k]);
                                 if (v->byte_per_pixel_in_detc[k] == 0.0) {
                                         
v->urgent_latency_support_us_per_state[i][j][k] = v->effective_detlb_lines_luma 
* (v->htotal[k] / v->pixel_clock[k]) / v->v_ratio[k] - 
v->effective_detlb_lines_luma * v->swath_width_yper_state[i][j][k] 
*dcn_bw_ceil2(v->byte_per_pixel_in_dety[k], 1.0) / (v->return_bw_per_state[i] / 
v->no_of_dpp[i][j][k]);
                                 }
                                 else {
-                                       
v->urgent_latency_support_us_per_state[i][j][k] 
=dcn_bw_min2(v->effective_detlb_lines_luma * (v->htotal[k] / v->pixel_clock[k]) 
/ v->v_ratio[k] - v->effective_detlb_lines_luma * 
v->swath_width_yper_state[i][j][k] *dcn_bw_ceil2(v->byte_per_pixel_in_dety[k], 
1.0) / (v->return_bw_per_state[i] / v->no_of_dpp[i][j][k]), 
v->effective_detlb_lines_chroma * (v->htotal[k] / v->pixel_clock[k]) / 
(v->v_ratio[k] / 2.0) - v->effective_detlb_lines_chroma * 
v->swath_width_yper_state[i][j][k] / 2.0 
*dcn_bw_ceil2(v->byte_per_pixel_in_detc[k], 2.0) / (v->return_bw_per_state[i] / 
v->no_of_dpp[i][j][k]));
+                                       
v->effective_lb_latency_hiding_source_lines_chroma = 
dcn_bw_min2(v->max_line_buffer_lines, dcn_bw_floor2(v->line_buffer_size / 
v->lb_bit_per_pixel[k] / (v->swath_width_yper_state[i][j][k] / 2.0 / 
dcn_bw_max2(v->h_ratio[k] / 2.0, 1.0)), 1.0)) - (v->vta_pschroma[k] - 1.0);
+                                       v->effective_detlb_lines_chroma = 
dcn_bw_floor2(v->lines_in_det_chroma + dcn_bw_min2(v->lines_in_det_chroma * 
v->required_dispclk[i][j] * v->byte_per_pixel_in_detc[k] * 
v->pscl_factor_chroma[k] / v->return_bw_per_state[i], 
v->effective_lb_latency_hiding_source_lines_chroma), 
v->swath_height_cper_state[i][j][k]);
+                                       
v->urgent_latency_support_us_per_state[i][j][k] = 
dcn_bw_min2(v->effective_detlb_lines_luma * (v->htotal[k] / v->pixel_clock[k]) 
/ v->v_ratio[k] - v->effective_detlb_lines_luma * 
v->swath_width_yper_state[i][j][k] * dcn_bw_ceil2(v->byte_per_pixel_in_dety[k], 
1.0) / (v->return_bw_per_state[i] / v->no_of_dpp[i][j][k]), 
v->effective_detlb_lines_chroma * (v->htotal[k] / v->pixel_clock[k]) / 
(v->v_ratio[k] / 2.0) - v->effective_detlb_lines_chroma * 
v->swath_width_yper_state[i][j][k] / 2.0 * 
dcn_bw_ceil2(v->byte_per_pixel_in_detc[k], 2.0) / (v->return_bw_per_state[i] / 
v->no_of_dpp[i][j][k]));
                                 }
                         }
                 }
@@ -1146,10 +1146,10 @@ void display_pipe_configuration(struct 
dcn_bw_internal_vars *v)
                 }
                 if (v->maximum_swath_height_c > 0.0) {
                         v->swath_width_granularity_c = 256.0 
/dcn_bw_ceil2(v->byte_per_pix_detc, 2.0) / v->maximum_swath_height_c;
-               }
-               v->rounded_up_max_swath_size_bytes_c = 
(dcn_bw_ceil2(v->swath_width / 2.0 - 1.0, v->swath_width_granularity_c) + 
v->swath_width_granularity_c) * v->byte_per_pix_detc * 
v->maximum_swath_height_c;
-               if (v->source_pixel_format[k] == dcn_bw_yuv420_sub_10) {
-                       v->rounded_up_max_swath_size_bytes_c 
=dcn_bw_ceil2(v->rounded_up_max_swath_size_bytes_c, 256.0) + 256;
+                       v->rounded_up_max_swath_size_bytes_c = 
(dcn_bw_ceil2(v->swath_width / 2.0 - 1.0, v->swath_width_granularity_c) + 
v->swath_width_granularity_c) * v->byte_per_pix_detc * 
v->maximum_swath_height_c;
+                       if (v->source_pixel_format[k] == dcn_bw_yuv420_sub_10) {
+                               v->rounded_up_max_swath_size_bytes_c = 
dcn_bw_ceil2(v->rounded_up_max_swath_size_bytes_c, 256.0) + 256;
+                       }
                 }
                 if (v->rounded_up_max_swath_size_bytes_y + 
v->rounded_up_max_swath_size_bytes_c <= v->det_buffer_size_in_kbyte * 1024.0 / 
2.0) {
                         v->swath_height_y[k] = v->maximum_swath_height_y;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_math.c 
b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_math.c
index 07d18e78de49..cac72413a097 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_math.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_math.c
@@ -23,6 +23,7 @@
  *
  */

+#include "os_types.h"
 #include "dcn_calc_math.h"

 #define isNaN(number) ((number) != (number))
@@ -69,8 +70,8 @@ float dcn_bw_max2(const float arg1, const float arg2)

 float dcn_bw_floor2(const float arg, const float significance)
 {
-       if (significance == 0)
-               return 0;
+       ASSERT(significance != 0);
+
         return ((int) (arg / significance)) * significance;
 }
 float dcn_bw_floor(const float arg)
@@ -80,17 +81,14 @@ float dcn_bw_floor(const float arg)

 float dcn_bw_ceil(const float arg)
 {
-       float flr = dcn_bw_floor2(arg, 1);
-
-       return flr + 0.00001 >= arg ? arg : flr + 1;
+       return (int) (arg + 0.99999);
 }

 float dcn_bw_ceil2(const float arg, const float significance)
 {
-       float flr = dcn_bw_floor2(arg, significance);
-       if (significance == 0)
-               return 0;
-       return flr + 0.00001 >= arg ? arg : flr + significance;
+       ASSERT(significance != 0);
+
+       return ((int) (arg / significance + 0.99999)) * significance;
 }

 float dcn_bw_max3(float v1, float v2, float v3)
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
index 63bbdf8b8678..edd098c7eb92 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
@@ -4478,17 +4478,17 @@ void 
dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
                                                 
locals->EffectiveLBLatencyHidingSourceLinesLuma),
                                                 
locals->SwathHeightYPerState[i][j][k]);

-                               locals->EffectiveDETLBLinesChroma = 
dml_floor(locals->LinesInDETChroma + dml_min(
-                                               locals->LinesInDETChroma * 
locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETC[k] *
-                                               locals->PSCL_FACTOR_CHROMA[k] / 
locals->ReturnBWPerState[i][0],
-                                               
locals->EffectiveLBLatencyHidingSourceLinesChroma),
-                                               
locals->SwathHeightCPerState[i][j][k]);

                                 if (locals->BytePerPixelInDETC[k] == 0) {
                                         
locals->UrgentLatencySupportUsPerState[i][j][k] = 
locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k])
                                                         / locals->VRatio[k] - 
locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] *
                                                                 
dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i][0] / 
locals->NoOfDPP[i][j][k]);
                                 } else {
+                                       locals->EffectiveDETLBLinesChroma = 
dml_floor(locals->LinesInDETChroma + dml_min(
+                                                       
locals->LinesInDETChroma * locals->RequiredDISPCLK[i][j] * 
locals->BytePerPixelInDETC[k] *
+                                                       
locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i][0],
+                                                       
locals->EffectiveLBLatencyHidingSourceLinesChroma),
+                                                       
locals->SwathHeightCPerState[i][j][k]);
                                         
locals->UrgentLatencySupportUsPerState[i][j][k] = dml_min(
                                                 
locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k])
                                                 / locals->VRatio[k] - 
locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] *
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
index 8a7485e21d53..d40d32e380f4 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
@@ -806,10 +806,12 @@ static bool CalculatePrefetchSchedule(

         if (myPipe->SourceScan == dm_horz) {
                 *swath_width_luma_ub = dml_ceil(SwathWidthY - 1, 
myPipe->BlockWidth256BytesY) + myPipe->BlockWidth256BytesY;
-               *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, 
myPipe->BlockWidth256BytesC) + myPipe->BlockWidth256BytesC;
+               if (myPipe->BlockWidth256BytesC > 0)
+                       *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, 
myPipe->BlockWidth256BytesC) + myPipe->BlockWidth256BytesC;
         } else {
                 *swath_width_luma_ub = dml_ceil(SwathWidthY - 1, 
myPipe->BlockHeight256BytesY) + myPipe->BlockHeight256BytesY;
-               *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, 
myPipe->BlockHeight256BytesC) + myPipe->BlockHeight256BytesC;
+               if (myPipe->BlockWidth256BytesC > 0)
+                       *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, 
myPipe->BlockHeight256BytesC) + myPipe->BlockHeight256BytesC;
         }

         prefetch_bw_oto = (PrefetchSourceLinesY * *swath_width_luma_ub * 
dml_ceil(BytePerPixelDETY, 1) + PrefetchSourceLinesC * *swath_width_chroma_ub * 
dml_ceil(BytePerPixelDETC, 2)) / Tsw_oto;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
index b7fa003ffe06..c117a9724ae1 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
@@ -6322,10 +6322,6 @@ static void CalculateSwathWidth(

         for (k = 0; k < NumberOfActivePlanes; ++k) {
                 enum odm_combine_mode MainPlaneODMCombine = 0;
-               surface_width_ub_l = dml_ceil(SurfaceWidthY[k], 
Read256BytesBlockWidthY[k]);
-               surface_height_ub_l = dml_ceil(SurfaceHeightY[k], 
Read256BytesBlockHeightY[k]);
-               surface_width_ub_c = dml_ceil(SurfaceWidthC[k], 
Read256BytesBlockWidthC[k]);
-               surface_height_ub_c = dml_ceil(SurfaceHeightC[k], 
Read256BytesBlockHeightC[k]);

                 if (SourceScan[k] != dm_vert) {
                         SwathWidthSingleDPPY[k] = ViewportWidth[k];
@@ -6365,8 +6361,6 @@ static void CalculateSwathWidth(

                 surface_width_ub_l  = dml_ceil(SurfaceWidthY[k], 
Read256BytesBlockWidthY[k]);
                 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], 
Read256BytesBlockHeightY[k]);
-               surface_width_ub_c  = dml_ceil(SurfaceWidthC[k], 
Read256BytesBlockWidthC[k]);
-               surface_height_ub_c = dml_ceil(SurfaceHeightC[k], 
Read256BytesBlockHeightC[k]);

                 if (SourceScan[k] != dm_vert) {
                         MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
@@ -6374,6 +6368,7 @@ static void CalculateSwathWidth(
                         swath_width_luma_ub[k] = dml_min(surface_width_ub_l, 
(long) dml_ceil(SwathWidthY[k] - 1,
                                         Read256BytesBlockWidthY[k]) + 
Read256BytesBlockWidthY[k]);
                         if (BytePerPixC[k] > 0) {
+                               surface_width_ub_c  = 
dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
                                 swath_width_chroma_ub[k] = 
dml_min(surface_width_ub_c, (long) dml_ceil(SwathWidthC[k] - 1,
                                                 Read256BytesBlockWidthC[k]) + 
Read256BytesBlockWidthC[k]);
                         } else {
@@ -6385,6 +6380,7 @@ static void CalculateSwathWidth(
                         swath_width_luma_ub[k] = dml_min(surface_height_ub_l, 
(long) dml_ceil(SwathWidthY[k] - 1,
                                         Read256BytesBlockHeightY[k]) + 
Read256BytesBlockHeightY[k]);
                         if (BytePerPixC[k] > 0) {
+                               surface_height_ub_c = 
dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
                                 swath_width_chroma_ub[k] = 
dml_min(surface_height_ub_c, (long) dml_ceil(SwathWidthC[k] - 1,
                                                 Read256BytesBlockHeightC[k]) + 
Read256BytesBlockHeightC[k]);
                         } else {
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
index d63b4209b14c..8753f94bdd79 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
@@ -6933,8 +6933,6 @@ static void CalculateSwathWidth(
                 {
                 int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], 
Read256BytesBlockWidthY[k]);
                 int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], 
Read256BytesBlockHeightY[k]);
-               int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], 
Read256BytesBlockWidthC[k]);
-               int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], 
Read256BytesBlockHeightC[k]);

 #ifdef __DML_VBA_DEBUG__
                 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, 
k, surface_width_ub_l);
@@ -6945,6 +6943,8 @@ static void CalculateSwathWidth(
                         MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
                         swath_width_luma_ub[k] = dml_min(surface_width_ub_l, 
(int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + 
Read256BytesBlockWidthY[k]);
                         if (BytePerPixC[k] > 0) {
+                               int surface_width_ub_c = 
dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
+
                                 swath_width_chroma_ub[k] = dml_min(
                                                 surface_width_ub_c,
                                                 (int) dml_ceil(SwathWidthC[k] 
- 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
@@ -6956,6 +6956,8 @@ static void CalculateSwathWidth(
                         MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
                         swath_width_luma_ub[k] = dml_min(surface_height_ub_l, 
(int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + 
Read256BytesBlockHeightY[k]);
                         if (BytePerPixC[k] > 0) {
+                               int surface_height_ub_c = 
dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
+
                                 swath_width_chroma_ub[k] = dml_min(
                                                 surface_height_ub_c,
                                                 (int) dml_ceil(SwathWidthC[k] 
- 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
index fc4d7474c111..503d9ede0ac1 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
@@ -7049,8 +7049,6 @@ static void CalculateSwathWidth(
                 {
                 int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], 
Read256BytesBlockWidthY[k]);
                 int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], 
Read256BytesBlockHeightY[k]);
-               int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], 
Read256BytesBlockWidthC[k]);
-               int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], 
Read256BytesBlockHeightC[k]);

 #ifdef __DML_VBA_DEBUG__
                 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, 
k, surface_width_ub_l);
@@ -7061,6 +7059,8 @@ static void CalculateSwathWidth(
                         MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
                         swath_width_luma_ub[k] = dml_min(surface_width_ub_l, 
(int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + 
Read256BytesBlockWidthY[k]);
                         if (BytePerPixC[k] > 0) {
+                               int surface_width_ub_c = 
dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
+
                                 swath_width_chroma_ub[k] = dml_min(
                                                 surface_width_ub_c,
                                                 (int) dml_ceil(SwathWidthC[k] 
- 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
@@ -7072,6 +7072,8 @@ static void CalculateSwathWidth(
                         MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
                         swath_width_luma_ub[k] = dml_min(surface_height_ub_l, 
(int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + 
Read256BytesBlockHeightY[k]);
                         if (BytePerPixC[k] > 0) {
+                               int surface_height_ub_c = 
dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
+
                                 swath_width_chroma_ub[k] = dml_min(
                                                 surface_height_ub_c,
                                                 (int) dml_ceil(SwathWidthC[k] 
- 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
diff --git 
a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
index dc501ee7d01a..c385c54832cb 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
@@ -719,8 +719,8 @@ void dml32_CalculateSwathWidth(

         unsigned int surface_width_ub_l;
         unsigned int surface_height_ub_l;
-       unsigned int surface_width_ub_c;
-       unsigned int surface_height_ub_c;
+       unsigned int surface_width_ub_c = 0;
+       unsigned int surface_height_ub_c = 0;

 #ifdef __DML_VBA_DEBUG__
         dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
@@ -784,21 +784,6 @@ void dml32_CalculateSwathWidth(

                 surface_width_ub_l  = dml_ceil(SurfaceWidthY[k], 
Read256BytesBlockWidthY[k]);
                 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], 
Read256BytesBlockHeightY[k]);
-               surface_width_ub_c  = dml_ceil(SurfaceWidthC[k], 
Read256BytesBlockWidthC[k]);
-               surface_height_ub_c = dml_ceil(SurfaceHeightC[k], 
Read256BytesBlockHeightC[k]);
-
-#ifdef __DML_VBA_DEBUG__
-               dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, 
k, surface_width_ub_l);
-               dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, 
k, surface_height_ub_l);
-               dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, 
k, surface_width_ub_c);
-               dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, 
k, surface_height_ub_c);
-               dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", 
__func__, k, Read256BytesBlockWidthY[k]);
-               dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", 
__func__, k, Read256BytesBlockHeightY[k]);
-               dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", 
__func__, k, Read256BytesBlockWidthC[k]);
-               dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", 
__func__, k, Read256BytesBlockHeightC[k]);
-               dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, 
k, ViewportStationary[k]);
-               dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, 
DPPPerSurface[k]);
-#endif

                 if (!IsVertical(SourceRotation[k])) {
                         MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
@@ -818,6 +803,7 @@ void dml32_CalculateSwathWidth(
                                                                 
Read256BytesBlockWidthY[k]);
                         }
                         if (BytePerPixC[k] > 0) {
+                               surface_width_ub_c  = 
dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
                                 if (ViewportStationary[k] && DPPPerSurface[k] 
== 1) {
                                         swath_width_chroma_ub[k] = 
dml_min(surface_width_ub_c,
                                                         
dml_floor(ViewportXStartC[k] + SwathWidthC[k] +
@@ -848,6 +834,7 @@ void dml32_CalculateSwathWidth(
                                                 Read256BytesBlockHeightY[k]) + 
Read256BytesBlockHeightY[k]);
                         }
                         if (BytePerPixC[k] > 0) {
+                               surface_height_ub_c = 
dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
                                 if (ViewportStationary[k] && DPPPerSurface[k] 
== 1) {
                                         swath_width_chroma_ub[k] = 
dml_min(surface_height_ub_c,
                                                         
dml_floor(ViewportYStartC[k] + SwathWidthC[k] +
@@ -866,6 +853,16 @@ void dml32_CalculateSwathWidth(
                 }

 #ifdef __DML_VBA_DEBUG__
+               dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, 
k, surface_width_ub_l);
+               dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, 
k, surface_height_ub_l);
+               dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, 
k, surface_width_ub_c);
+               dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, 
k, surface_height_ub_c);
+               dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", 
__func__, k, Read256BytesBlockWidthY[k]);
+               dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", 
__func__, k, Read256BytesBlockHeightY[k]);
+               dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", 
__func__, k, Read256BytesBlockWidthC[k]);
+               dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", 
__func__, k, Read256BytesBlockHeightC[k]);
+               dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, 
k, ViewportStationary[k]);
+               dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, 
DPPPerSurface[k]);
                 dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, 
k, swath_width_luma_ub[k]);
                 dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", 
__func__, k, swath_width_chroma_ub[k]);
                 dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, 
k, MaximumSwathHeightY[k]);
diff --git 
a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c.rej 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c.rej
new file mode 100644
index 000000000000..dff2badbf820
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c.rej
@@ -0,0 +1,12 @@
+diff a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c       
(rejected hunks)
+@@ -727,8 +727,8 @@ void dml32_CalculateSwathWidth(
+        enum odm_combine_mode MainSurfaceODMMode;
+        unsigned int surface_width_ub_l;
+        unsigned int surface_height_ub_l;
+-      unsigned int surface_width_ub_c;
+-      unsigned int surface_height_ub_c;
++      unsigned int surface_width_ub_c = 0;
++      unsigned int surface_height_ub_c = 0;
+        unsigned int k, j;
+
+ #ifdef __DML_VBA_DEBUG__
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h 
b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h
index 479d7d83220c..072bd0539605 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h
@@ -76,14 +76,9 @@ static inline double dml_floor(double a, double granularity)

 static inline double dml_round(double a)
 {
-       double round_pt = 0.5;
-       double ceil = dml_ceil(a, 1);
-       double floor = dml_floor(a, 1);
+       const double round_pt = 0.5;

-       if (a - floor >= round_pt)
-               return ceil;
-       else
-               return floor;
+       return dml_floor(a + round_pt, 1);
 }

 /* float
--
2.34.1

Reply via email to