On 2023-08-10 12:02, Melissa Wen wrote:
> From: Harry Wentland <harry.wentl...@amd.com>
> 
> The region and segment calculation was incapable of dealing
> with regions of more than 16 segments. We first fix this.
> 
> Now that we can support regions up to 256 elements we can
> define a better segment distribution for near-linear LUTs
> for our maximum of 256 HW-supported points.
> 
> With these changes an "identity" LUT looks visually
> indistinguishable from bypass and allows us to use
> our 3DLUT.
> 

Have you had a chance to test whether this patch makes a
difference? I haven't had the time yet.

Harry

> Signed-off-by: Harry Wentland <harry.wentl...@amd.com>
> Signed-off-by: Melissa Wen <m...@igalia.com>
> ---
>  .../amd/display/dc/dcn10/dcn10_cm_common.c    | 93 +++++++++++++++----
>  1 file changed, 75 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c 
> b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
> index 3538973bd0c6..04b2e04b68f3 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
> +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
> @@ -349,20 +349,37 @@ bool cm_helper_translate_curve_to_hw_format(struct 
> dc_context *ctx,
>                * segment is from 2^-10 to 2^1
>                * There are less than 256 points, for optimization
>                */
> -             seg_distr[0] = 3;
> -             seg_distr[1] = 4;
> -             seg_distr[2] = 4;
> -             seg_distr[3] = 4;
> -             seg_distr[4] = 4;
> -             seg_distr[5] = 4;
> -             seg_distr[6] = 4;
> -             seg_distr[7] = 4;
> -             seg_distr[8] = 4;
> -             seg_distr[9] = 4;
> -             seg_distr[10] = 1;
> +             if (output_tf->tf == TRANSFER_FUNCTION_LINEAR) {
> +                     seg_distr[0] = 0; /* 2 */
> +                     seg_distr[1] = 1; /* 4 */
> +                     seg_distr[2] = 2; /* 4 */
> +                     seg_distr[3] = 3; /* 8 */
> +                     seg_distr[4] = 4; /* 16 */
> +                     seg_distr[5] = 5; /* 32 */
> +                     seg_distr[6] = 6; /* 64 */
> +                     seg_distr[7] = 7; /* 128 */
> +
> +                     region_start = -8;
> +                     region_end = 1;
> +             } else {
> +                     seg_distr[0] = 3; /* 8 */
> +                     seg_distr[1] = 4; /* 16 */
> +                     seg_distr[2] = 4;
> +                     seg_distr[3] = 4;
> +                     seg_distr[4] = 4;
> +                     seg_distr[5] = 4;
> +                     seg_distr[6] = 4;
> +                     seg_distr[7] = 4;
> +                     seg_distr[8] = 4;
> +                     seg_distr[9] = 4;
> +                     seg_distr[10] = 1; /* 2 */
> +                     /* total = 8*16 + 8 + 64 + 2 = */
> +
> +                     region_start = -10;
> +                     region_end = 1;
> +             }
> +
>  
> -             region_start = -10;
> -             region_end = 1;
>       }
>  
>       for (i = region_end - region_start; i < MAX_REGIONS_NUMBER ; i++)
> @@ -375,16 +392,56 @@ bool cm_helper_translate_curve_to_hw_format(struct 
> dc_context *ctx,
>  
>       j = 0;
>       for (k = 0; k < (region_end - region_start); k++) {
> -             increment = NUMBER_SW_SEGMENTS / (1 << seg_distr[k]);
> +             /*
> +              * We're using an ugly-ish hack here. Our HW allows for
> +              * 256 segments per region but SW_SEGMENTS is 16.
> +              * SW_SEGMENTS has some undocumented relationship to
> +              * the number of points in the tf_pts struct, which
> +              * is 512, unlike what's suggested TRANSFER_FUNC_POINTS.
> +              *
> +              * In order to work past this dilemma we'll scale our
> +              * increment by (1 << 4) and then do the inverse (1 >> 4)
> +              * when accessing the elements in tf_pts.
> +              *
> +              * TODO: find a better way using SW_SEGMENTS and
> +              *       TRANSFER_FUNC_POINTS definitions
> +              */
> +             increment = (NUMBER_SW_SEGMENTS << 4) / (1 << seg_distr[k]);
>               start_index = (region_start + k + MAX_LOW_POINT) *
>                               NUMBER_SW_SEGMENTS;
> -             for (i = start_index; i < start_index + NUMBER_SW_SEGMENTS;
> +             for (i = (start_index << 4); i < (start_index << 4) + 
> (NUMBER_SW_SEGMENTS << 4);
>                               i += increment) {
> +                     struct fixed31_32 in_plus_one, in;
> +                     struct fixed31_32 value, red_value, green_value, 
> blue_value;
> +                     uint32_t t = i & 0xf;
> +
>                       if (j == hw_points - 1)
>                               break;
> -                     rgb_resulted[j].red = output_tf->tf_pts.red[i];
> -                     rgb_resulted[j].green = output_tf->tf_pts.green[i];
> -                     rgb_resulted[j].blue = output_tf->tf_pts.blue[i];
> +
> +                     in_plus_one = output_tf->tf_pts.red[(i >> 4) + 1];
> +                     in = output_tf->tf_pts.red[i >> 4];
> +                     value = dc_fixpt_sub(in_plus_one, in);
> +                     value = dc_fixpt_shr(dc_fixpt_mul_int(value, t),  4);
> +                     value = dc_fixpt_add(in, value);
> +                     red_value = value;
> +
> +                     in_plus_one = output_tf->tf_pts.green[(i >> 4) + 1];
> +                     in = output_tf->tf_pts.green[i >> 4];
> +                     value = dc_fixpt_sub(in_plus_one, in);
> +                     value = dc_fixpt_shr(dc_fixpt_mul_int(value, t),  4);
> +                     value = dc_fixpt_add(in, value);
> +                     green_value = value;
> +
> +                     in_plus_one = output_tf->tf_pts.blue[(i >> 4) + 1];
> +                     in = output_tf->tf_pts.blue[i >> 4];
> +                     value = dc_fixpt_sub(in_plus_one, in);
> +                     value = dc_fixpt_shr(dc_fixpt_mul_int(value, t),  4);
> +                     value = dc_fixpt_add(in, value);
> +                     blue_value = value;
> +
> +                     rgb_resulted[j].red = red_value;
> +                     rgb_resulted[j].green = green_value;
> +                     rgb_resulted[j].blue = blue_value;
>                       j++;
>               }
>       }

Reply via email to