RE: [PATCH] Revert "drm/amd/pm: resolve reboot exception for si oland"
[AMD Official Use Only - General] Reviewed-by: Yang Wang Best Regards, Kevin -Original Message- From: amd-gfx On Behalf Of Alex Deucher Sent: Tuesday, February 27, 2024 6:47 AM To: amd-gfx@lists.freedesktop.org Cc: Deucher, Alexander Subject: [PATCH] Revert "drm/amd/pm: resolve reboot exception for si oland" This reverts commit e490d60a2f76bff636c68ce4fe34c1b6c34bbd86. This causes hangs on SI when DC is enabled and errors on driver reboot and power off cycles. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3216 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2755 Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c | 29 ++ 1 file changed, 29 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index e145d53a794ba..a586e0b7c47d1 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -6927,6 +6927,23 @@ static int si_dpm_enable(struct amdgpu_device *adev) return 0; } +static int si_set_temperature_range(struct amdgpu_device *adev) { + int ret; + + ret = si_thermal_enable_alert(adev, false); + if (ret) + return ret; + ret = si_thermal_set_temperature_range(adev, R600_TEMP_RANGE_MIN, R600_TEMP_RANGE_MAX); + if (ret) + return ret; + ret = si_thermal_enable_alert(adev, true); + if (ret) + return ret; + + return ret; +} + static void si_dpm_disable(struct amdgpu_device *adev) { struct rv7xx_power_info *pi = rv770_get_pi(adev); @@ -7610,6 +7627,18 @@ static int si_dpm_process_interrupt(struct amdgpu_device *adev, static int si_dpm_late_init(void *handle) { + int ret; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (!adev->pm.dpm_enabled) + return 0; + + ret = si_set_temperature_range(adev); + if (ret) + return ret; +#if 0 //TODO ? + si_dpm_powergate_uvd(adev, true); +#endif return 0; } -- 2.43.2
[PATCH] Revert "drm/amd/pm: resolve reboot exception for si oland"
This reverts commit e490d60a2f76bff636c68ce4fe34c1b6c34bbd86. This causes hangs on SI when DC is enabled and errors on driver reboot and power off cycles. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3216 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2755 Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c | 29 ++ 1 file changed, 29 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index e145d53a794ba..a586e0b7c47d1 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -6927,6 +6927,23 @@ static int si_dpm_enable(struct amdgpu_device *adev) return 0; } +static int si_set_temperature_range(struct amdgpu_device *adev) +{ + int ret; + + ret = si_thermal_enable_alert(adev, false); + if (ret) + return ret; + ret = si_thermal_set_temperature_range(adev, R600_TEMP_RANGE_MIN, R600_TEMP_RANGE_MAX); + if (ret) + return ret; + ret = si_thermal_enable_alert(adev, true); + if (ret) + return ret; + + return ret; +} + static void si_dpm_disable(struct amdgpu_device *adev) { struct rv7xx_power_info *pi = rv770_get_pi(adev); @@ -7610,6 +7627,18 @@ static int si_dpm_process_interrupt(struct amdgpu_device *adev, static int si_dpm_late_init(void *handle) { + int ret; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (!adev->pm.dpm_enabled) + return 0; + + ret = si_set_temperature_range(adev); + if (ret) + return ret; +#if 0 //TODO ? + si_dpm_powergate_uvd(adev, true); +#endif return 0; } -- 2.43.2
[RFC PATCH v4 38/42] drm/colorop: Add 1D Curve Custom LUT type
From: Alex Hung We've previously introduced DRM_COLOROP_1D_CURVE for pre-defined 1D curves. But we also have HW that supports custom curves and userspace needs the ability to pass custom curves, aka LUTs. This patch introduces a new colorop type, called DRM_COLOROP_1D_LUT that provides a SIZE property which is used by a driver to advertise the supported SIZE of the LUT, as well as a DATA property which userspace uses to set the LUT. DATA and size function in the same way as current drm_crtc GAMMA and DEGAMMA LUTs. Signed-off-by: Alex Hung Signed-off-by: Harry Wentland Co-developed-by: Harry Wentland --- drivers/gpu/drm/drm_atomic.c | 4 drivers/gpu/drm/drm_atomic_uapi.c | 5 + drivers/gpu/drm/drm_colorop.c | 36 +-- include/drm/drm_colorop.h | 16 ++ include/uapi/drm/drm_mode.h | 1 + 5 files changed, 60 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 6e736ffa6d7c..f7d51839ca03 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -799,6 +799,10 @@ static void drm_atomic_colorop_print_state(struct drm_printer *p, drm_printf(p, "\tcurve_1d_type=%s\n", drm_get_colorop_curve_1d_type_name(state->curve_1d_type)); break; + case DRM_COLOROP_1D_LUT: + drm_printf(p, "\tsize=%d\n", state->size); + drm_printf(p, "\tdata blob id=%d\n", state->data ? state->data->base.id : 0); + break; case DRM_COLOROP_CTM_3X4: drm_printf(p, "\tdata blob id=%d\n", state->data ? state->data->base.id : 0); break; diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index c7c1c614b0d9..6bfe857720cd 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -700,6 +700,9 @@ static int drm_atomic_color_set_data_property(struct drm_colorop *colorop, bool replaced = false; switch (colorop->type) { + case DRM_COLOROP_1D_LUT: + size = state->size * sizeof(struct drm_color_lut); + break; case DRM_COLOROP_CTM_3X4: size = sizeof(struct drm_color_ctm_3x4); break; @@ -749,6 +752,8 @@ drm_atomic_colorop_get_property(struct drm_colorop *colorop, *val = state->bypass; } else if (property == colorop->curve_1d_type_property) { *val = state->curve_1d_type; + } else if (property == colorop->size_property) { + *val = state->size; } else if (property == colorop->data_property) { *val = (state->data) ? state->data->base.id : 0; } else { diff --git a/drivers/gpu/drm/drm_colorop.c b/drivers/gpu/drm/drm_colorop.c index b10cad5a7208..4452eaeeb242 100644 --- a/drivers/gpu/drm/drm_colorop.c +++ b/drivers/gpu/drm/drm_colorop.c @@ -34,6 +34,7 @@ static const struct drm_prop_enum_list drm_colorop_type_enum_list[] = { { DRM_COLOROP_1D_CURVE, "1D Curve" }, + { DRM_COLOROP_1D_LUT, "1D Curve Custom LUT" }, { DRM_COLOROP_CTM_3X4, "3x4 Matrix"} }; @@ -175,11 +176,41 @@ static int drm_colorop_create_data_prop(struct drm_device *dev, struct drm_color colorop->data_property = prop; drm_object_attach_property(>base, - colorop->data_property, - 0); + colorop->data_property, + 0); return 0; } +int drm_colorop_curve_1d_lut_init(struct drm_device *dev, struct drm_colorop *colorop, + struct drm_plane *plane, uint32_t lut_size) +{ + struct drm_property *prop; + int ret; + + ret = drm_colorop_init(dev, colorop, plane, DRM_COLOROP_1D_LUT); + if (ret) + return ret; + + /* initialize 1D LUT only attribute */ + /* LUT size */ + prop = drm_property_create_range(dev, DRM_MODE_PROP_ATOMIC, "SIZE", lut_size, lut_size); + if (!prop) + return -ENOMEM; + + colorop->size_property = prop; + drm_object_attach_property(>base, colorop->size_property, 0); + + /* data */ + ret = drm_colorop_create_data_prop(dev, colorop); + if (ret) + return ret; + + drm_colorop_reset(colorop); + colorop->state->size = lut_size; + + return 0; +} +EXPORT_SYMBOL(drm_colorop_curve_1d_lut_init); int drm_colorop_ctm_3x4_init(struct drm_device *dev, struct drm_colorop *colorop, struct drm_plane *plane) @@ -301,6 +332,7 @@ EXPORT_SYMBOL(drm_colorop_reset); static const char * const colorop_type_name[] = { [DRM_COLOROP_1D_CURVE] = "1D Curve", + [DRM_COLOROP_1D_LUT] = "1D Curve Custom LUT", [DRM_COLOROP_CTM_3X4] = "3x4 Matrix" }; diff --git
[RFC PATCH v4 33/42] drm/amd/display: Add support for sRGB EOTF in BLND block
From: Alex Hung Expose a 3rd 1D curve colorop, with support for DRM_COLOROP_1D_CURVE_SRGB_EOTF and program the BLND block to perform the sRGB transform when the colorop is not in bypass With this change the following IGT test passes: kms_colorop --run plane-XR30-XR30-srgb_eotf-srgb_inv_eotf-srgb_eotf The color pipeline now consists of the following colorops: 1. 1D curve colorop w/ sRGB EOTF support 2. 1D curve colorop w/ sRGB Inverse EOTF support 3. 1D curve colorop w/ sRGB EOTF support Signed-off-by: Alex Hung Signed-off-by: Harry Wentland Co-developed-by: Harry Wentland --- .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 77 +++ .../amd/display/amdgpu_dm/amdgpu_dm_colorop.c | 18 + .../amd/display/amdgpu_dm/amdgpu_dm_colorop.h | 1 + 3 files changed, 96 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 8788cfd26abd..3e3ae2b58b06 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -1267,6 +1267,72 @@ __set_dm_plane_colorop_shaper(struct drm_plane_state *plane_state, return __set_colorop_in_shaper_1d_curve(dc_plane_state, colorop_state); } + +static int +__set_colorop_1d_curve_blend_tf_lut(struct dc_plane_state *dc_plane_state, + struct drm_colorop_state *colorop_state) +{ + + struct dc_transfer_func *tf = dc_plane_state->blend_tf; + struct drm_colorop *colorop = colorop_state->colorop; + struct drm_device *drm = colorop->dev; + const struct drm_color_lut *blend_lut; + uint32_t blend_size; + + if (colorop->type != DRM_COLOROP_1D_CURVE && + colorop_state->curve_1d_type != DRM_COLOROP_1D_CURVE_SRGB_EOTF) + return -EINVAL; + + if (colorop_state->bypass) { + tf->type = TF_TYPE_BYPASS; + tf->tf = TRANSFER_FUNCTION_LINEAR; + return 0; + } + + drm_dbg(drm, "Blend colorop with ID: %d\n", colorop->base.id); + + if (colorop->type == DRM_COLOROP_1D_CURVE) { + tf->type = TF_TYPE_DISTRIBUTED_POINTS; + tf->tf = amdgpu_colorop_tf_to_dc_tf(colorop_state->curve_1d_type); + tf->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE; + return __set_input_tf(NULL, tf, blend_lut, blend_size); + } + + return -EINVAL; +} + +static int +__set_dm_plane_colorop_blend(struct drm_plane_state *plane_state, +struct dc_plane_state *dc_plane_state, +struct drm_colorop *colorop) +{ + struct drm_colorop *old_colorop; + struct drm_colorop_state *colorop_state = NULL, *new_colorop_state; + struct drm_atomic_state *state = plane_state->state; + int i = 0; + + old_colorop = colorop; + + /* 3nd op: 1d curve - blend */ + for_each_new_colorop_in_state(state, colorop, new_colorop_state, i) { + if (new_colorop_state->colorop == old_colorop && + new_colorop_state->curve_1d_type == DRM_COLOROP_1D_CURVE_SRGB_EOTF) { + colorop_state = new_colorop_state; + break; + } + + if (new_colorop_state->colorop == old_colorop) { + colorop_state = new_colorop_state; + break; + } + } + + if (!colorop_state) + return -EINVAL; + + return __set_colorop_1d_curve_blend_tf_lut(dc_plane_state, colorop_state); +} + static int amdgpu_dm_plane_set_color_properties(struct drm_plane_state *plane_state, struct dc_plane_state *dc_plane_state) @@ -1345,6 +1411,17 @@ amdgpu_dm_plane_set_colorop_properties(struct drm_plane_state *plane_state, if (ret) return ret; + /* 1D Curve - BLND TF */ + colorop = colorop->next; + if (!colorop) { + drm_dbg(dev, "no Blend TF colorop found\n"); + return -EINVAL; + } + + ret = __set_dm_plane_colorop_blend(plane_state, dc_plane_state, colorop); + if (ret) + return ret; + return 0; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c index 0d1626abf577..449a2ad6a184 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c @@ -37,6 +37,9 @@ const u64 amdgpu_dm_supported_degam_tfs = const u64 amdgpu_dm_supported_shaper_tfs = BIT(DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF); +const u64 amdgpu_dm_supported_blnd_tfs = + BIT(DRM_COLOROP_1D_CURVE_SRGB_EOTF); + int amdgpu_dm_initialize_default_pipeline(struct drm_plane *plane, struct drm_prop_enum_list *list) { struct drm_colorop *op, *prev_op; @@ -72,5
[RFC PATCH v4 08/42] drm/doc/rfc: Describe why prescriptive color pipeline is needed
v4: - Drop IOCTL docs since we dropped the IOCTLs (Pekka) - Clarify reading and setting of COLOR_PIPELINE prop (Pekka) - Add blurb about not requiring to reject a pipeline due to incompatible ops, as long as op can be bypassed (Pekka) - Dropped informational strings (such as input CSC) as they're not actually intended to be advertised (Pekka) v3: - Describe DRM_CLIENT_CAP_PLANE_COLOR_PIPELINE (Sebastian) - Ask for clear documentation of colorop behavior (Sebastian) v2: - Update colorop visualizations to match reality (Sebastian, Alex Hung) - Updated wording (Pekka) - Change BYPASS wording to make it non-mandatory (Sebastian) - Drop cover-letter-like paragraph from COLOR_PIPELINE Plane Property section (Pekka) - Use PQ EOTF instead of its inverse in Pipeline Programming example (Melissa) - Add "Driver Implementer's Guide" section (Pekka) - Add "Driver Forward/Backward Compatibility" section (Sebastian, Pekka) Signed-off-by: Harry Wentland --- Documentation/gpu/rfc/color_pipeline.rst | 360 +++ 1 file changed, 360 insertions(+) create mode 100644 Documentation/gpu/rfc/color_pipeline.rst diff --git a/Documentation/gpu/rfc/color_pipeline.rst b/Documentation/gpu/rfc/color_pipeline.rst new file mode 100644 index ..6c653e17054a --- /dev/null +++ b/Documentation/gpu/rfc/color_pipeline.rst @@ -0,0 +1,360 @@ + +Linux Color Pipeline API + + +What problem are we solving? + + +We would like to support pre-, and post-blending complex color +transformations in display controller hardware in order to allow for +HW-supported HDR use-cases, as well as to provide support to +color-managed applications, such as video or image editors. + +It is possible to support an HDR output on HW supporting the Colorspace +and HDR Metadata drm_connector properties, but that requires the +compositor or application to render and compose the content into one +final buffer intended for display. Doing so is costly. + +Most modern display HW offers various 1D LUTs, 3D LUTs, matrices, and other +operations to support color transformations. These operations are often +implemented in fixed-function HW and therefore much more power efficient than +performing similar operations via shaders or CPU. + +We would like to make use of this HW functionality to support complex color +transformations with no, or minimal CPU or shader load. + + +How are other OSes solving this problem? + + +The most widely supported use-cases regard HDR content, whether video or +gaming. + +Most OSes will specify the source content format (color gamut, encoding transfer +function, and other metadata, such as max and average light levels) to a driver. +Drivers will then program their fixed-function HW accordingly to map from a +source content buffer's space to a display's space. + +When fixed-function HW is not available the compositor will assemble a shader to +ask the GPU to perform the transformation from the source content format to the +display's format. + +A compositor's mapping function and a driver's mapping function are usually +entirely separate concepts. On OSes where a HW vendor has no insight into +closed-source compositor code such a vendor will tune their color management +code to visually match the compositor's. On other OSes, where both mapping +functions are open to an implementer they will ensure both mappings match. + +This results in mapping algorithm lock-in, meaning that no-one alone can +experiment with or introduce new mapping algorithms and achieve +consistent results regardless of which implementation path is taken. + +Why is Linux different? +=== + +Unlike other OSes, where there is one compositor for one or more drivers, on +Linux we have a many-to-many relationship. Many compositors; many drivers. +In addition each compositor vendor or community has their own view of how +color management should be done. This is what makes Linux so beautiful. + +This means that a HW vendor can now no longer tune their driver to one +compositor, as tuning it to one could make it look fairly different from +another compositor's color mapping. + +We need a better solution. + + +Descriptive API +=== + +An API that describes the source and destination colorspaces is a descriptive +API. It describes the input and output color spaces but does not describe +how precisely they should be mapped. Such a mapping includes many minute +design decision that can greatly affect the look of the final result. + +It is not feasible to describe such mapping with enough detail to ensure the +same result from each implementation. In fact, these mappings are a very active +research area. + + +Prescriptive API + + +A prescriptive API describes not the source and destination colorspaces. It +instead prescribes a recipe for how to manipulate pixel values to arrive
[RFC PATCH v4 02/42] drm: Add helper for conversion from signed-magnitude
CTM values are defined as signed-magnitude values. Add a helper that converts from CTM signed-magnitude fixed point value to the twos-complement value used by drm_fixed. Signed-off-by: Harry Wentland --- include/drm/drm_fixed.h | 18 ++ 1 file changed, 18 insertions(+) diff --git a/include/drm/drm_fixed.h b/include/drm/drm_fixed.h index 0c9f917a4d4b..cb842ba80ddd 100644 --- a/include/drm/drm_fixed.h +++ b/include/drm/drm_fixed.h @@ -78,6 +78,24 @@ static inline u32 dfixed_div(fixed20_12 A, fixed20_12 B) #define DRM_FIXED_EPSILON 1LL #define DRM_FIXED_ALMOST_ONE (DRM_FIXED_ONE - DRM_FIXED_EPSILON) +/** + * @drm_sm2fixp + * + * Convert a 1.31.32 signed-magnitude fixed point to 32.32 + * 2s-complement fixed point + * + * @return s64 2s-complement fixed point + */ +static inline s64 drm_sm2fixp(__u64 a) +{ + if ((a & (1LL << 63))) { + return -(a & 0x7fffll); + } else { + return a; + } + +} + static inline s64 drm_int2fixp(int a) { return ((s64)a) << DRM_FIXED_POINT; -- 2.44.0
[RFC PATCH v4 29/42] drm/amd/display: Add bypass COLOR PIPELINE
Add the default Bypass pipeline and ensure it passes the kms_colorop test plane-XR30-XR30-bypass. Signed-off-by: Harry Wentland --- .../amd/display/amdgpu_dm/amdgpu_dm_plane.c | 19 +++ 1 file changed, 19 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index 8a4c40b4c27e..c5c07b4cd6c9 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -1651,6 +1651,20 @@ dm_atomic_plane_get_property(struct drm_plane *plane, } #endif +#define MAX_COLOR_PIPELINES 5 + +static int +dm_plane_init_colorops(struct drm_plane *plane) +{ + struct drm_prop_enum_list pipelines[MAX_COLOR_PIPELINES]; + int len = 0; + + /* Create COLOR_PIPELINE property and attach */ + drm_plane_create_color_pipeline_property(plane, pipelines, len); + + return 0; +} + static const struct drm_plane_funcs dm_plane_funcs = { .update_plane = drm_atomic_helper_update_plane, .disable_plane = drm_atomic_helper_disable_plane, @@ -1744,7 +1758,12 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, #ifdef AMD_PRIVATE_COLOR dm_atomic_plane_attach_color_mgmt_properties(dm, plane); +#else + res = dm_plane_init_colorops(plane); + if (res) + return res; #endif + /* Create (reset) the plane state */ if (plane->funcs->reset) plane->funcs->reset(plane); -- 2.44.0
[RFC PATCH v4 27/42] drm/colorop: define a new macro for_each_new_colorop_in_state
From: Alex Hung Create a new macro for_each_new_colorop_in_state to access new drm_colorop_state updated from uapi. Signed-off-by: Alex Hung --- include/drm/drm_atomic.h | 20 1 file changed, 20 insertions(+) diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h index 61e6b5553eec..a4c5ff99a515 100644 --- a/include/drm/drm_atomic.h +++ b/include/drm/drm_atomic.h @@ -979,6 +979,26 @@ void drm_state_dump(struct drm_device *dev, struct drm_printer *p); (new_colorop_state) = (__state)->colorops[__i].new_state, 1)) +/** + * for_each_new_colorop_in_state - iterate over all colorops in an atomic update + * @__state: drm_atomic_state pointer + * @colorop: drm_colorop iteration cursor + * @new_colorop_state: drm_colorop_state iteration cursor for the new state + * @__i: int iteration cursor, for macro-internal use + * + * This iterates over all colorops in an atomic update, tracking new state. This is + * useful is useful in places where the state delta needs to be considered, for + * example in atomic check functions. + */ +#define for_each_new_colorop_in_state(__state, colorop, new_colorop_state, __i) \ + for ((__i) = 0; \ +(__i) < (__state)->dev->mode_config.num_colorop; \ +(__i)++) \ + for_each_if ((__state)->colorops[__i].ptr &&\ +((colorop) = (__state)->colorops[__i].ptr, \ + (void)(colorop) /* Only to avoid unused-but-set-variable warning */, \ + (new_colorop_state) = (__state)->colorops[__i].new_state, 1)) + /** * for_each_oldnew_plane_in_state - iterate over all planes in an atomic update * @__state: drm_atomic_state pointer -- 2.44.0
[RFC PATCH v4 30/42] drm/amd/display: Skip color pipeline initialization for cursor plane
From: Alex Hung Signed-off-by: Alex Hung --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index c5c07b4cd6c9..d3f64f586243 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -1659,6 +1659,9 @@ dm_plane_init_colorops(struct drm_plane *plane) struct drm_prop_enum_list pipelines[MAX_COLOR_PIPELINES]; int len = 0; + if (plane->type == DRM_PLANE_TYPE_CURSOR) + return 0; + /* Create COLOR_PIPELINE property and attach */ drm_plane_create_color_pipeline_property(plane, pipelines, len); -- 2.44.0
[RFC PATCH v4 32/42] drm/amd/display: Add support for sRGB Inverse EOTF in SHAPER block
From: Alex Hung Expose a 2nd curve colorop with support for DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF and program HW to perform the sRGB Inverse EOTF on the shaper block when the colorop is not in bypass. With this change the follow IGT tests pass: kms_colorop --run plane-XR30-XR30-srgb_inv_eotf kms_colorop --run plane-XR30-XR30-srgb_eotf-srgb_inv_eotf The color pipeline now consists of the following colorops: 1. 1D curve colorop w/ sRGB EOTF support 2. 1D curve colorop w/ sRGB Inverse EOTF support Signed-off-by: Alex Hung Signed-off-by: Harry Wentland Co-developed-by: Harry Wentland --- .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 76 +++ .../amd/display/amdgpu_dm/amdgpu_dm_colorop.c | 20 - .../amd/display/amdgpu_dm/amdgpu_dm_colorop.h | 1 + 3 files changed, 96 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 3ec759934669..8788cfd26abd 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -1203,6 +1203,70 @@ __set_dm_plane_colorop_degamma(struct drm_plane_state *plane_state, return __set_colorop_in_tf_1d_curve(dc_plane_state, colorop_state); } +static int +__set_colorop_in_shaper_1d_curve(struct dc_plane_state *dc_plane_state, + struct drm_colorop_state *colorop_state) +{ + struct dc_transfer_func *tf = dc_plane_state->in_shaper_func; + struct drm_colorop *colorop = colorop_state->colorop; + struct drm_device *drm = colorop->dev; + const struct drm_color_lut *shaper_lut; + uint32_t shaper_size; + + if (colorop->type != DRM_COLOROP_1D_CURVE && + colorop_state->curve_1d_type != DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF) + return -EINVAL; + + if (colorop_state->bypass) { + tf->type = TF_TYPE_BYPASS; + tf->tf = TRANSFER_FUNCTION_LINEAR; + return 0; + } + + drm_dbg(drm, "Shaper colorop with ID: %d\n", colorop->base.id); + + if (colorop->type == DRM_COLOROP_1D_CURVE) { + tf->type = TF_TYPE_DISTRIBUTED_POINTS; + tf->tf = amdgpu_colorop_tf_to_dc_tf(colorop_state->curve_1d_type); + tf->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE; + return __set_output_tf(tf, shaper_lut, shaper_size, false); + } + + return -EINVAL; +} + +static int +__set_dm_plane_colorop_shaper(struct drm_plane_state *plane_state, + struct dc_plane_state *dc_plane_state, + struct drm_colorop *colorop) +{ + struct drm_colorop *old_colorop; + struct drm_colorop_state *colorop_state = NULL, *new_colorop_state; + struct drm_atomic_state *state = plane_state->state; + int i = 0; + + old_colorop = colorop; + + /* 2nd op: 1d curve - shaper */ + for_each_new_colorop_in_state(state, colorop, new_colorop_state, i) { + if (new_colorop_state->colorop == old_colorop && + new_colorop_state->curve_1d_type == DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF) { + colorop_state = new_colorop_state; + break; + } + + if (new_colorop_state->colorop == old_colorop) { + colorop_state = new_colorop_state; + break; + } + } + + if (!colorop_state) + return -EINVAL; + + return __set_colorop_in_shaper_1d_curve(dc_plane_state, colorop_state); +} + static int amdgpu_dm_plane_set_color_properties(struct drm_plane_state *plane_state, struct dc_plane_state *dc_plane_state) @@ -1258,6 +1322,7 @@ amdgpu_dm_plane_set_colorop_properties(struct drm_plane_state *plane_state, struct dc_plane_state *dc_plane_state) { struct drm_colorop *colorop = plane_state->color_pipeline; + struct drm_device *dev = plane_state->plane->dev; int ret; /* 1D Curve - DEGAM TF */ @@ -1269,6 +1334,17 @@ amdgpu_dm_plane_set_colorop_properties(struct drm_plane_state *plane_state, if (ret) return ret; + /* 1D Curve - SHAPER TF */ + colorop = colorop->next; + if (!colorop) { + drm_dbg(dev, "no Shaper TF colorop found\n"); + return -EINVAL; + } + + ret = __set_dm_plane_colorop_shaper(plane_state, dc_plane_state, colorop); + if (ret) + return ret; + return 0; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c index e8b7fc8bb0f1..0d1626abf577 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c @@ -34,9 +34,12 @@ const u64
[RFC PATCH v4 40/42] drm/amd/display: add 3x4 matrix colorop
From: Alex Hung This adds support for a 3x4 color transformation matrix. With this change the following IGT tests pass: kms_colorop --run plane-XR30-XR30-ctm_3x4_50_desat kms_colorop --run plane-XR30-XR30-ctm_3x4_overdrive kms_colorop --run plane-XR30-XR30-ctm_3x4_oversaturate kms_colorop --run plane-XR30-XR30-ctm_3x4_bt709_enc kms_colorop --run plane-XR30-XR30-ctm_3x4_bt709_dec The color pipeline now consists of the following colorops: 1. 1D curve colorop 2. 3x4 CTM 3. 1D curve colorop 4. 1D LUT 5. 1D curve colorop 6. 1D LUT Signed-off-by: Alex Hung Signed-off-by: Harry Wentland --- .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 50 +++ .../amd/display/amdgpu_dm/amdgpu_dm_colorop.c | 15 ++ 2 files changed, 65 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index e7b51b29cc04..ef50640b362b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -1212,6 +1212,45 @@ __set_dm_plane_colorop_degamma(struct drm_plane_state *plane_state, return __set_colorop_in_tf_1d_curve(dc_plane_state, colorop_state); } +static int +__set_dm_plane_colorop_3x4_matrix(struct drm_plane_state *plane_state, + struct dc_plane_state *dc_plane_state, + struct drm_colorop *colorop) +{ + struct drm_colorop *old_colorop; + struct drm_colorop_state *colorop_state = NULL, *new_colorop_state; + struct drm_atomic_state *state = plane_state->state; + const struct drm_device *dev = colorop->dev; + const struct drm_property_blob *blob; + struct drm_color_ctm_3x4 *ctm = NULL; + int i = 0; + + /* 3x4 matrix */ + old_colorop = colorop; + for_each_new_colorop_in_state(state, colorop, new_colorop_state, i) { + if (new_colorop_state->colorop == old_colorop && + new_colorop_state->colorop->type == DRM_COLOROP_CTM_3X4) { + colorop_state = new_colorop_state; + break; + } + } + + if (colorop_state && !colorop_state->bypass && colorop->type == DRM_COLOROP_CTM_3X4) { + drm_dbg(dev, "3x4 matrix colorop with ID: %d\n", colorop->base.id); + blob = colorop_state->data; + if (blob->length == sizeof(struct drm_color_ctm_3x4)) { + ctm = blob ? (struct drm_color_ctm_3x4 *) blob->data : NULL; + __drm_ctm_3x4_to_dc_matrix(ctm, dc_plane_state->gamut_remap_matrix.matrix); + dc_plane_state->gamut_remap_matrix.enable_remap = true; + dc_plane_state->input_csc_color_matrix.enable_adjustment = false; + } else + drm_warn(dev, "blob->length (%ld) isn't equal to drm_color_ctm_3x4 (%ld)\n", +blob->length, sizeof(struct drm_color_ctm_3x4)); + } + + return 0; +} + static int __set_dm_plane_colorop_shaper(struct drm_plane_state *plane_state, struct dc_plane_state *dc_plane_state, @@ -1411,6 +1450,17 @@ amdgpu_dm_plane_set_colorop_properties(struct drm_plane_state *plane_state, if (ret) return ret; + /* 3x4 matrix */ + colorop = colorop->next; + if (!colorop) { + drm_dbg(dev, "no 3x4 matrix colorop found\n"); + return -EINVAL; + } + + ret = __set_dm_plane_colorop_3x4_matrix(plane_state, dc_plane_state, colorop); + if (ret) + return ret; + /* 1D Curve & LUT - SHAPER TF & LUT */ colorop = colorop->next; if (!colorop) { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c index 08480bf61dc5..ba42f1f6b620 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c @@ -69,6 +69,21 @@ int amdgpu_dm_initialize_default_pipeline(struct drm_plane *plane, struct drm_pr prev_op = op; + /* 3x4 matrix */ + op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL); + if (!op) { + DRM_ERROR("KMS: Failed to allocate colorop\n"); + return -ENOMEM; + } + + ret = drm_colorop_ctm_3x4_init(dev, op, plane); + if (ret) + return ret; + + drm_colorop_set_next_property(prev_op, op); + + prev_op = op; + /* 1D curve - SHAPER TF */ op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL); if (!op) { -- 2.44.0
[RFC PATCH v4 26/42] drm/colorop: pass plane_color_pipeline client cap to atomic check
Drivers will need to know whether an atomic check/commit originated from a client with DRM_CLIENT_CAP_PLANE_COLOR_PIPELINE so they can ignore deprecated properties, like COLOR_ENCODING and COLOR_RANGE. Pass the plane_color_pipeline bit to drm_atomic_state. Signed-off-by: Harry Wentland --- drivers/gpu/drm/drm_atomic_uapi.c | 1 + include/drm/drm_atomic.h | 17 + 2 files changed, 18 insertions(+) diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index 23b248987a7c..c7c1c614b0d9 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -1571,6 +1571,7 @@ int drm_mode_atomic_ioctl(struct drm_device *dev, drm_modeset_acquire_init(, DRM_MODESET_ACQUIRE_INTERRUPTIBLE); state->acquire_ctx = state->allow_modeset = !!(arg->flags & DRM_MODE_ATOMIC_ALLOW_MODESET); + state->plane_color_pipeline = file_priv->plane_color_pipeline; retry: copied_objs = 0; diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h index 2346f19eda9f..61e6b5553eec 100644 --- a/include/drm/drm_atomic.h +++ b/include/drm/drm_atomic.h @@ -404,6 +404,23 @@ struct drm_atomic_state { * states. */ bool duplicated : 1; + + /** +* @plane_color_pipeline : 1 +* +* Indicates whether this atomic state originated with a client that +* set the DRM_CLIENT_CAP_PLANE_COLOR_PIPELINE. +* +* Drivers and helper functions should use this to ignore legacy +* properties that are incompatible with the drm_plane COLOR_PIPELINE +* behavior, such as +* - COLOR_RANGE +* - COLOR_ENCODING +* or any other driver-specific properties that might affect pixel +* values. +*/ + bool plane_color_pipeline : 1; + struct __drm_colorops_state *colorops; struct __drm_planes_state *planes; struct __drm_crtcs_state *crtcs; -- 2.44.0
[RFC PATCH v4 13/42] drm/colorop: Add NEXT property
We'll construct color pipelines out of drm_colorop by chaining them via the NEXT pointer. NEXT will point to the next drm_colorop in the pipeline, or by 0 if we're at the end of the pipeline. v4: - Allow setting of NEXT property to NULL (Chaitanya Kumar Borah) v3: - Add next pointer to colorop to be used by drivers and in DRM core Signed-off-by: Harry Wentland --- drivers/gpu/drm/drm_colorop.c | 29 + include/drm/drm_colorop.h | 20 2 files changed, 49 insertions(+) diff --git a/drivers/gpu/drm/drm_colorop.c b/drivers/gpu/drm/drm_colorop.c index 29979816a2d1..71c2286333a1 100644 --- a/drivers/gpu/drm/drm_colorop.c +++ b/drivers/gpu/drm/drm_colorop.c @@ -59,6 +59,7 @@ int drm_colorop_init(struct drm_device *dev, struct drm_colorop *colorop, colorop->dev = dev; colorop->type = type; colorop->plane = plane; + colorop->next = NULL; list_add_tail(>head, >colorop_list); colorop->index = config->num_colorop++; @@ -92,6 +93,15 @@ int drm_colorop_init(struct drm_device *dev, struct drm_colorop *colorop, colorop->bypass_property, 1); + prop = drm_property_create_object(dev, DRM_MODE_PROP_IMMUTABLE | DRM_MODE_PROP_ATOMIC, + "NEXT", DRM_MODE_OBJECT_COLOROP); + if (!prop) + return -ENOMEM; + colorop->next_property = prop; + drm_object_attach_property(>base, + colorop->next_property, + 0); + return ret; } EXPORT_SYMBOL(drm_colorop_init); @@ -279,3 +289,22 @@ const char *drm_get_colorop_curve_1d_type_name(enum drm_colorop_curve_1d_type ty return colorop_curve_1d_type_names[type]; } + +/** + * drm_colorop_set_next_property - sets the next pointer + * @colorop: drm colorop + * @next: next colorop + * + * Should be used when constructing the color pipeline + */ +void drm_colorop_set_next_property(struct drm_colorop *colorop, struct drm_colorop *next) +{ + if (!colorop->next_property) + return; + + drm_object_property_set_value(>base, + colorop->next_property, + next ? next->base.id : 0); + colorop->next = next; +} +EXPORT_SYMBOL(drm_colorop_set_next_property); diff --git a/include/drm/drm_colorop.h b/include/drm/drm_colorop.h index 28aa5c1c309e..8060988b5892 100644 --- a/include/drm/drm_colorop.h +++ b/include/drm/drm_colorop.h @@ -132,6 +132,14 @@ struct drm_colorop { */ enum drm_colorop_type type; + /** +* @next: +* +* Read-only +* Pointer to next drm_colorop in pipeline +*/ + struct drm_colorop *next; + /** * @type_property: * @@ -159,10 +167,20 @@ struct drm_colorop { */ struct drm_property *curve_1d_type_property; + /** +* @next_property +* +* Read-only property to next colorop in the pipeline +*/ + struct drm_property *next_property; + }; #define obj_to_colorop(x) container_of(x, struct drm_colorop, base) + + + /** * drm_crtc_find - look up a Colorop object from its ID * @dev: DRM device @@ -213,5 +231,7 @@ static inline unsigned int drm_colorop_index(const struct drm_colorop *colorop) #define drm_for_each_colorop(colorop, dev) \ list_for_each_entry(colorop, &(dev)->mode_config.colorop_list, head) +void drm_colorop_set_next_property(struct drm_colorop *colorop, struct drm_colorop *next); + #endif /* __DRM_COLOROP_H__ */ -- 2.44.0
[RFC PATCH v4 42/42] drm/amd/display: add multiplier colorop
From: Alex Hung This adds support for a multiplier. This multiplier is programmed via the HDR Multiplier in DCN. With this change the following IGT tests pass: kms_colorop --run plane-XR30-XR30-multiply_125 kms_colorop --run plane-XR30-XR30-multiply_inv_125 The color pipeline now consists of the following colorops: 1. 1D curve colorop 2. 3x4 CTM 3. Multiplier 4. 1D curve colorop 5. 1D LUT 6. 1D curve colorop 7. 1D LUT Signed-off-by: Alex Hung --- .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 40 +++ .../amd/display/amdgpu_dm/amdgpu_dm_colorop.c | 15 +++ 2 files changed, 55 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index ef50640b362b..b05e4fea8a08 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -1251,6 +1251,35 @@ __set_dm_plane_colorop_3x4_matrix(struct drm_plane_state *plane_state, return 0; } +static int +__set_dm_plane_colorop_multiplier(struct drm_plane_state *plane_state, + struct dc_plane_state *dc_plane_state, + struct drm_colorop *colorop) +{ + struct drm_colorop *old_colorop; + struct drm_colorop_state *colorop_state = NULL, *new_colorop_state; + struct drm_atomic_state *state = plane_state->state; + const struct drm_device *dev = colorop->dev; + int i = 0; + + /* Multiplier */ + old_colorop = colorop; + for_each_new_colorop_in_state(state, colorop, new_colorop_state, i) { + if (new_colorop_state->colorop == old_colorop && + new_colorop_state->colorop->type == DRM_COLOROP_MULTIPLIER) { + colorop_state = new_colorop_state; + break; + } + } + + if (colorop_state && !colorop_state->bypass && colorop->type == DRM_COLOROP_MULTIPLIER) { + drm_dbg(dev, "Multiplier colorop with ID: %d\n", colorop->base.id); + dc_plane_state->hdr_mult = amdgpu_dm_fixpt_from_s3132(colorop_state->multiplier); + } + + return 0; +} + static int __set_dm_plane_colorop_shaper(struct drm_plane_state *plane_state, struct dc_plane_state *dc_plane_state, @@ -1461,6 +1490,17 @@ amdgpu_dm_plane_set_colorop_properties(struct drm_plane_state *plane_state, if (ret) return ret; + /* Multiplier */ + colorop = colorop->next; + if (!colorop) { + drm_dbg(dev, "no multiplier colorop found\n"); + return -EINVAL; + } + + ret = __set_dm_plane_colorop_multiplier(plane_state, dc_plane_state, colorop); + if (ret) + return ret; + /* 1D Curve & LUT - SHAPER TF & LUT */ colorop = colorop->next; if (!colorop) { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c index ba42f1f6b620..b739d6cb3e6b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c @@ -84,6 +84,21 @@ int amdgpu_dm_initialize_default_pipeline(struct drm_plane *plane, struct drm_pr prev_op = op; + /* Multiplier */ + op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL); + if (!op) { + DRM_ERROR("KMS: Failed to allocate colorop\n"); + return -ENOMEM; + } + + ret = drm_colorop_mult_init(dev, op, plane); + if (ret) + return ret; + + drm_colorop_set_next_property(prev_op, op); + + prev_op = op; + /* 1D curve - SHAPER TF */ op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL); if (!op) { -- 2.44.0
[RFC PATCH v4 31/42] drm/amd/display: Add support for sRGB EOTF in DEGAM block
From: Alex Hung Expose one 1D curve colorop with support for DRM_COLOROP_1D_CURVE_SRGB_EOTF and program HW to perform the sRGB transform when the colorop is not in bypass. With this change the following IGT test passes: kms_colorop --run plane-XR30-XR30-srgb_eotf The color pipeline now consists of a single colorop: 1. 1D curve colorop w/ sRGB EOTF Signed-off-by: Alex Hung Signed-off-by: Harry Wentland Co-developed-by: Harry Wentland --- .../gpu/drm/amd/display/amdgpu_dm/Makefile| 3 +- .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 88 +++ .../amd/display/amdgpu_dm/amdgpu_dm_colorop.c | 58 .../amd/display/amdgpu_dm/amdgpu_dm_colorop.h | 34 +++ .../amd/display/amdgpu_dm/amdgpu_dm_plane.c | 10 +++ 5 files changed, 192 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.h diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile index ab2a97e354da..46158d67ab12 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile @@ -38,7 +38,8 @@ AMDGPUDM = \ amdgpu_dm_pp_smu.o \ amdgpu_dm_psr.o \ amdgpu_dm_replay.o \ - amdgpu_dm_wb.o + amdgpu_dm_wb.o \ + amdgpu_dm_colorop.o ifdef CONFIG_DRM_AMD_DC_FP AMDGPUDM += dc_fpu.o diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 9b527bffe11a..3ec759934669 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -668,6 +668,19 @@ amdgpu_tf_to_dc_tf(enum amdgpu_transfer_function tf) } } +static enum dc_transfer_func_predefined +amdgpu_colorop_tf_to_dc_tf(enum drm_colorop_curve_1d_type tf) +{ + switch (tf) + { + case DRM_COLOROP_1D_CURVE_SRGB_EOTF: + case DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF: + return TRANSFER_FUNCTION_SRGB; + default: + return TRANSFER_FUNCTION_LINEAR;; + } +} + static void __to_dc_lut3d_color(struct dc_rgb *rgb, const struct drm_color_lut lut, int bit_precision) @@ -1137,6 +1150,59 @@ __set_dm_plane_degamma(struct drm_plane_state *plane_state, return 0; } +static int +__set_colorop_in_tf_1d_curve(struct dc_plane_state *dc_plane_state, + struct drm_colorop_state *colorop_state) +{ + struct dc_transfer_func *tf = dc_plane_state->in_transfer_func; + struct drm_colorop *colorop = colorop_state->colorop; + struct drm_device *drm = colorop->dev; + + if (colorop->type != DRM_COLOROP_1D_CURVE && + colorop_state->curve_1d_type != DRM_COLOROP_1D_CURVE_SRGB_EOTF) + return -EINVAL; + + if (colorop_state->bypass) { + tf->type = TF_TYPE_BYPASS; + tf->tf = TRANSFER_FUNCTION_LINEAR; + return 0; + } + + drm_dbg(drm, "Degamma colorop with ID: %d\n", colorop->base.id); + + tf->type = TF_TYPE_PREDEFINED; + tf->tf = amdgpu_colorop_tf_to_dc_tf(colorop_state->curve_1d_type); + + return 0; +} + +static int +__set_dm_plane_colorop_degamma(struct drm_plane_state *plane_state, + struct dc_plane_state *dc_plane_state, + struct drm_colorop *colorop) +{ + struct drm_colorop *old_colorop; + struct drm_colorop_state *colorop_state = NULL, *new_colorop_state; + struct drm_atomic_state *state = plane_state->state; + int i = 0; + + old_colorop = colorop; + + /* 1st op: 1d curve - degamma */ + for_each_new_colorop_in_state(state, colorop, new_colorop_state, i) { + if (new_colorop_state->colorop == old_colorop && + new_colorop_state->curve_1d_type == DRM_COLOROP_1D_CURVE_SRGB_EOTF) { + colorop_state = new_colorop_state; + break; + } + } + + if (!colorop_state) + return -EINVAL; + + return __set_colorop_in_tf_1d_curve(dc_plane_state, colorop_state); +} + static int amdgpu_dm_plane_set_color_properties(struct drm_plane_state *plane_state, struct dc_plane_state *dc_plane_state) @@ -1187,6 +1253,25 @@ amdgpu_dm_plane_set_color_properties(struct drm_plane_state *plane_state, return 0; } +static int +amdgpu_dm_plane_set_colorop_properties(struct drm_plane_state *plane_state, + struct dc_plane_state *dc_plane_state) +{ + struct drm_colorop *colorop = plane_state->color_pipeline; + int ret; + + /* 1D Curve - DEGAM TF */ + if (!colorop) { + return -EINVAL; + } + + ret =
[RFC PATCH v4 37/42] drm/amd/display: Add support for BT.709 and BT.2020 TFs
This adds support for the BT.709/BT.2020 transfer functions on all current 1D curve plane colorops, i.e., on DEGAM, SHAPER, and BLND blocks. With this change the following IGT subtests pass: kms_colorop --run plane-XR30-XR30-bt2020_inv_oetf kms_colorop --run plane-XR30-XR30-bt2020_oetf Signed-off-by: Harry Wentland --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 11 --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c | 10 +++--- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 20b7eb47388c..d5d356cf9fc6 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -677,6 +677,9 @@ amdgpu_colorop_tf_to_dc_tf(enum drm_colorop_curve_1d_type tf) case DRM_COLOROP_1D_CURVE_SRGB_EOTF: case DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF: return TRANSFER_FUNCTION_SRGB; + case DRM_COLOROP_1D_CURVE_BT2020_INV_OETF: + case DRM_COLOROP_1D_CURVE_BT2020_OETF: + return TRANSFER_FUNCTION_BT709; case DRM_COLOROP_1D_CURVE_PQ_125_EOTF: case DRM_COLOROP_1D_CURVE_PQ_125_INV_EOTF: return TRANSFER_FUNCTION_PQ; @@ -1287,8 +1290,10 @@ __set_colorop_1d_curve_blend_tf_lut(struct dc_plane_state *dc_plane_state, const struct drm_color_lut *blend_lut; uint32_t blend_size; - if (colorop->type != DRM_COLOROP_1D_CURVE && - colorop_state->curve_1d_type != DRM_COLOROP_1D_CURVE_SRGB_EOTF) + if (colorop->type != DRM_COLOROP_1D_CURVE) + return -EINVAL; + + if (!(BIT(colorop_state->curve_1d_type) & amdgpu_dm_supported_blnd_tfs)) return -EINVAL; if (colorop_state->bypass) { @@ -1324,7 +1329,7 @@ __set_dm_plane_colorop_blend(struct drm_plane_state *plane_state, /* 3nd op: 1d curve - blend */ for_each_new_colorop_in_state(state, colorop, new_colorop_state, i) { if (new_colorop_state->colorop == old_colorop && - new_colorop_state->curve_1d_type == DRM_COLOROP_1D_CURVE_SRGB_EOTF) { + (BIT(new_colorop_state->curve_1d_type) & amdgpu_dm_supported_blnd_tfs)) { colorop_state = new_colorop_state; break; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c index f99d8e09d89b..bc66bd4f9fdd 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c @@ -33,14 +33,18 @@ const u64 amdgpu_dm_supported_degam_tfs = BIT(DRM_COLOROP_1D_CURVE_SRGB_EOTF) | - BIT(DRM_COLOROP_1D_CURVE_PQ_125_EOTF); + BIT(DRM_COLOROP_1D_CURVE_PQ_125_EOTF) | + BIT(DRM_COLOROP_1D_CURVE_BT2020_INV_OETF); const u64 amdgpu_dm_supported_shaper_tfs = BIT(DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF) | - BIT(DRM_COLOROP_1D_CURVE_PQ_125_INV_EOTF); + BIT(DRM_COLOROP_1D_CURVE_PQ_125_INV_EOTF) | + BIT(DRM_COLOROP_1D_CURVE_BT2020_OETF); const u64 amdgpu_dm_supported_blnd_tfs = - BIT(DRM_COLOROP_1D_CURVE_SRGB_EOTF); + BIT(DRM_COLOROP_1D_CURVE_SRGB_EOTF) | + BIT(DRM_COLOROP_1D_CURVE_PQ_125_EOTF) | + BIT(DRM_COLOROP_1D_CURVE_BT2020_INV_OETF); int amdgpu_dm_initialize_default_pipeline(struct drm_plane *plane, struct drm_prop_enum_list *list) { -- 2.44.0
[RFC PATCH v4 20/42] drm/colorop: Add 3x4 CTM type
This type is used to support a 3x4 matrix in colorops. A 3x4 matrix uses the last column as a "bias" column. Some HW exposes support for 3x4. The calculation looks like: out matrixin |R| |0 1 2 3 | | R | |G| = |4 5 6 7 | x | G | |B| |8 9 10 11| | B | |1.0| This is also the first colorop where we need a blob property to program the property. For that we'll introduce a new DATA property that can be used by all colorop TYPEs requiring a blob. The way a DATA blob is read depends on the TYPE of the colorop. We only create the DATA property for property types that need it. v4: - Create helper function for creating 3x4 CTM colorop - Fix CTM indexes in comment (Pekka) Signed-off-by: Harry Wentland --- drivers/gpu/drm/drm_atomic.c | 14 ++- drivers/gpu/drm/drm_atomic_uapi.c | 29 ++ drivers/gpu/drm/drm_colorop.c | 40 +++ include/drm/drm_colorop.h | 19 +++ include/uapi/drm/drm_mode.h | 9 ++- 5 files changed, 109 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index d82858dabf06..6e736ffa6d7c 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -793,7 +793,19 @@ static void drm_atomic_colorop_print_state(struct drm_printer *p, drm_printf(p, "colorop[%u]:\n", colorop->base.id); drm_printf(p, "\ttype=%s\n", drm_get_colorop_type_name(colorop->type)); drm_printf(p, "\tbypass=%u\n", state->bypass); - drm_printf(p, "\tcurve_1d_type=%s\n", drm_get_colorop_curve_1d_type_name(state->curve_1d_type)); + + switch (colorop->type) { + case DRM_COLOROP_1D_CURVE: + drm_printf(p, "\tcurve_1d_type=%s\n", + drm_get_colorop_curve_1d_type_name(state->curve_1d_type)); + break; + case DRM_COLOROP_CTM_3X4: + drm_printf(p, "\tdata blob id=%d\n", state->data ? state->data->base.id : 0); + break; + default: + break; + } + drm_printf(p, "\tnext=%d\n", colorop->next ? colorop->next->base.id : 0); } diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index ff258b34544e..23b248987a7c 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -691,6 +691,30 @@ drm_atomic_plane_get_property(struct drm_plane *plane, return 0; } +static int drm_atomic_color_set_data_property(struct drm_colorop *colorop, + struct drm_colorop_state *state, + struct drm_property *property, uint64_t val) +{ + ssize_t elem_size = -1; + ssize_t size = -1; + bool replaced = false; + + switch (colorop->type) { + case DRM_COLOROP_CTM_3X4: + size = sizeof(struct drm_color_ctm_3x4); + break; + default: + /* should never get here */ + return -EINVAL; + } + + return drm_property_replace_blob_from_id(colorop->dev, + >data, + val, + size, + elem_size, + ); +} static int drm_atomic_colorop_set_property(struct drm_colorop *colorop, struct drm_colorop_state *state, struct drm_file *file_priv, @@ -700,6 +724,9 @@ static int drm_atomic_colorop_set_property(struct drm_colorop *colorop, state->bypass = val; } else if (property == colorop->curve_1d_type_property) { state->curve_1d_type = val; + } else if (property == colorop->data_property) { + return drm_atomic_color_set_data_property(colorop, + state, property, val); } else { drm_dbg_atomic(colorop->dev, "[COLOROP:%d:%d] unknown property [PROP:%d:%s]]\n", @@ -722,6 +749,8 @@ drm_atomic_colorop_get_property(struct drm_colorop *colorop, *val = state->bypass; } else if (property == colorop->curve_1d_type_property) { *val = state->curve_1d_type; + } else if (property == colorop->data_property) { + *val = (state->data) ? state->data->base.id : 0; } else { return -EINVAL; } diff --git a/drivers/gpu/drm/drm_colorop.c b/drivers/gpu/drm/drm_colorop.c index 71c2286333a1..7baa1eba 100644 --- a/drivers/gpu/drm/drm_colorop.c +++ b/drivers/gpu/drm/drm_colorop.c @@ -34,6 +34,7 @@ static const struct drm_prop_enum_list drm_colorop_type_enum_list[] = { { DRM_COLOROP_1D_CURVE, "1D Curve" }, + { DRM_COLOROP_CTM_3X4, "3x4 Matrix"} }; static const char * const colorop_curve_1d_type_names[] = { @@ -93,6 +94,7 @@ int drm_colorop_init(struct drm_device *dev, struct drm_colorop
[RFC PATCH v4 39/42] drm/amd/display: add shaper and blend colorops for 1D Curve Custom LUT
From: Alex Hung This patch adds colorops for custom 1D LUTs in the SHAPER and BLND HW blocks. With this change the following IGT tests pass: kms_colorop --run plane-XR30-XR30-srgb_inv_eotf_lut kms_colorop --run plane-XR30-XR30-srgb_inv_eotf_lut-srgb_eotf_lut The color pipeline now consists of the following colorops: 1. 1D curve colorop 2. 1D curve colorop 3. 1D LUT 4. 1D curve colorop 5. 1D LUT The 1D curve colorops support sRGB, BT2020, and PQ scaled to 125.0. Signed-off-by: Alex Hung Signed-off-by: Harry Wentland --- .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 170 +- .../amd/display/amdgpu_dm/amdgpu_dm_colorop.c | 30 2 files changed, 119 insertions(+), 81 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index d5d356cf9fc6..e7b51b29cc04 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -1212,40 +1212,6 @@ __set_dm_plane_colorop_degamma(struct drm_plane_state *plane_state, return __set_colorop_in_tf_1d_curve(dc_plane_state, colorop_state); } -static int -__set_colorop_in_shaper_1d_curve(struct dc_plane_state *dc_plane_state, - struct drm_colorop_state *colorop_state) -{ - struct dc_transfer_func *tf = dc_plane_state->in_shaper_func; - struct drm_colorop *colorop = colorop_state->colorop; - struct drm_device *drm = colorop->dev; - const struct drm_color_lut *shaper_lut; - uint32_t shaper_size; - - if (colorop->type != DRM_COLOROP_1D_CURVE) - return -EINVAL; - - if (!(BIT(colorop_state->curve_1d_type) & amdgpu_dm_supported_shaper_tfs)) - return -EINVAL; - - if (colorop_state->bypass) { - tf->type = TF_TYPE_BYPASS; - tf->tf = TRANSFER_FUNCTION_LINEAR; - return 0; - } - - drm_dbg(drm, "Shaper colorop with ID: %d\n", colorop->base.id); - - if (colorop->type == DRM_COLOROP_1D_CURVE) { - tf->type = TF_TYPE_DISTRIBUTED_POINTS; - tf->tf = amdgpu_colorop_tf_to_dc_tf(colorop_state->curve_1d_type); - tf->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE; - return __set_output_tf(tf, shaper_lut, shaper_size, false); - } - - return -EINVAL; -} - static int __set_dm_plane_colorop_shaper(struct drm_plane_state *plane_state, struct dc_plane_state *dc_plane_state, @@ -1254,64 +1220,61 @@ __set_dm_plane_colorop_shaper(struct drm_plane_state *plane_state, struct drm_colorop *old_colorop; struct drm_colorop_state *colorop_state = NULL, *new_colorop_state; struct drm_atomic_state *state = plane_state->state; + enum dc_transfer_func_predefined default_tf = TRANSFER_FUNCTION_LINEAR; + struct dc_transfer_func *tf = dc_plane_state->in_shaper_func; + const struct drm_color_lut *shaper_lut; + struct drm_device *dev = colorop->dev; + uint32_t shaper_size; int i = 0; + /* 1D Curve - SHAPER TF */ old_colorop = colorop; - - /* 2nd op: 1d curve - shaper */ for_each_new_colorop_in_state(state, colorop, new_colorop_state, i) { if (new_colorop_state->colorop == old_colorop && (BIT(new_colorop_state->curve_1d_type) & amdgpu_dm_supported_shaper_tfs)) { colorop_state = new_colorop_state; break; } + } - if (new_colorop_state->colorop == old_colorop) { + if (colorop_state && !colorop_state->bypass && colorop->type == DRM_COLOROP_1D_CURVE) { + drm_dbg(dev, "Shaper TF colorop with ID: %d\n", colorop->base.id); + tf->type = TF_TYPE_DISTRIBUTED_POINTS; + tf->tf = default_tf = amdgpu_colorop_tf_to_dc_tf(colorop_state->curve_1d_type); + tf->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE; + __set_output_tf(tf, shaper_lut, shaper_size, false); + } + + /* 1D LUT - SHAPER LUT */ + colorop = old_colorop->next; + if (!colorop) { + drm_dbg(dev, "no Shaper LUT colorop found\n"); + return -EINVAL; + } + + old_colorop = colorop; + for_each_new_colorop_in_state(state, colorop, new_colorop_state, i) { + if (new_colorop_state->colorop == old_colorop && + new_colorop_state->colorop->type == DRM_COLOROP_1D_LUT) { colorop_state = new_colorop_state; break; } } - if (!colorop_state) - return -EINVAL; - - return __set_colorop_in_shaper_1d_curve(dc_plane_state, colorop_state); -} - - -static int -__set_colorop_1d_curve_blend_tf_lut(struct dc_plane_state *dc_plane_state, -
[RFC PATCH v4 41/42] drm/colorop: Add mutliplier type
From: Alex Hung This introduces a new drm_colorop_type: DRM_COLOROP_MULTIPLIER. It's a simple multiplier to all pixel values. The value is specified via a S31.32 fixed point provided via the "MULTIPLIER" property. Signed-off-by: Alex Hung --- drivers/gpu/drm/drm_atomic.c | 3 +++ drivers/gpu/drm/drm_atomic_uapi.c | 4 drivers/gpu/drm/drm_colorop.c | 29 +++-- include/drm/drm_colorop.h | 16 include/uapi/drm/drm_mode.h | 1 + 5 files changed, 51 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index f7d51839ca03..af0b6338a55c 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -806,6 +806,9 @@ static void drm_atomic_colorop_print_state(struct drm_printer *p, case DRM_COLOROP_CTM_3X4: drm_printf(p, "\tdata blob id=%d\n", state->data ? state->data->base.id : 0); break; + case DRM_COLOROP_MULTIPLIER: + drm_printf(p, "\tmultiplier=%u\n", state->multiplier); + break; default: break; } diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index 6bfe857720cd..b4ecda563728 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -727,6 +727,8 @@ static int drm_atomic_colorop_set_property(struct drm_colorop *colorop, state->bypass = val; } else if (property == colorop->curve_1d_type_property) { state->curve_1d_type = val; + } else if (property == colorop->multiplier_property) { + state->multiplier = val; } else if (property == colorop->data_property) { return drm_atomic_color_set_data_property(colorop, state, property, val); @@ -752,6 +754,8 @@ drm_atomic_colorop_get_property(struct drm_colorop *colorop, *val = state->bypass; } else if (property == colorop->curve_1d_type_property) { *val = state->curve_1d_type; + } else if (property == colorop->multiplier_property) { + *val = state->multiplier; } else if (property == colorop->size_property) { *val = state->size; } else if (property == colorop->data_property) { diff --git a/drivers/gpu/drm/drm_colorop.c b/drivers/gpu/drm/drm_colorop.c index 4452eaeeb242..c6cdd743de51 100644 --- a/drivers/gpu/drm/drm_colorop.c +++ b/drivers/gpu/drm/drm_colorop.c @@ -35,7 +35,8 @@ static const struct drm_prop_enum_list drm_colorop_type_enum_list[] = { { DRM_COLOROP_1D_CURVE, "1D Curve" }, { DRM_COLOROP_1D_LUT, "1D Curve Custom LUT" }, - { DRM_COLOROP_CTM_3X4, "3x4 Matrix"} + { DRM_COLOROP_CTM_3X4, "3x4 Matrix"}, + { DRM_COLOROP_MULTIPLIER, "Multiplier"}, }; static const char * const colorop_curve_1d_type_names[] = { @@ -231,6 +232,29 @@ int drm_colorop_ctm_3x4_init(struct drm_device *dev, struct drm_colorop *colorop } EXPORT_SYMBOL(drm_colorop_ctm_3x4_init); +int drm_colorop_mult_init(struct drm_device *dev, struct drm_colorop *colorop, + struct drm_plane *plane) +{ + struct drm_property *prop; + int ret; + + ret = drm_colorop_init(dev, colorop, plane, DRM_COLOROP_MULTIPLIER); + if (ret) + return ret; + + prop = drm_property_create_range(dev, DRM_MODE_PROP_ATOMIC, "MULTIPLIER", 0, U64_MAX); + if (!prop) + return -ENOMEM; + + colorop->multiplier_property = prop; + drm_object_attach_property(>base, colorop->multiplier_property, 0); + + drm_colorop_reset(colorop); + + return 0; +} +EXPORT_SYMBOL(drm_colorop_mult_init); + static void __drm_atomic_helper_colorop_duplicate_state(struct drm_colorop *colorop, struct drm_colorop_state *state) { @@ -333,7 +357,8 @@ EXPORT_SYMBOL(drm_colorop_reset); static const char * const colorop_type_name[] = { [DRM_COLOROP_1D_CURVE] = "1D Curve", [DRM_COLOROP_1D_LUT] = "1D Curve Custom LUT", - [DRM_COLOROP_CTM_3X4] = "3x4 Matrix" + [DRM_COLOROP_CTM_3X4] = "3x4 Matrix", + [DRM_COLOROP_MULTIPLIER] = "Multiplier", }; /** diff --git a/include/drm/drm_colorop.h b/include/drm/drm_colorop.h index 8adc7ece3bd1..f9f83644cc9f 100644 --- a/include/drm/drm_colorop.h +++ b/include/drm/drm_colorop.h @@ -64,6 +64,13 @@ struct drm_colorop_state { */ enum drm_colorop_curve_1d_type curve_1d_type; + /** +* @multiplier: +* +* Multiplier to 'gain' the plane. Format is S31.32 sign-magnitude. +*/ + uint64_t multiplier; + /** * @size: * @@ -186,6 +193,13 @@ struct drm_colorop { */ struct drm_property *curve_1d_type_property; + /** +* @multiplier_property: +*
[RFC PATCH v4 36/42] drm/colorop: add BT2020/BT709 OETF and Inverse OETF
The BT.709 and BT.2020 OETFs are the same, the only difference being that the BT.2020 variant is defined with more precision for 10 and 12-bit per color encodings. Both are used as encoding functions for video content, and are therefore defined as OETF (opto-electronic transfer function) instead of as EOTF (electro-optical transfer function). Signed-off-by: Harry Wentland --- drivers/gpu/drm/drm_colorop.c | 2 ++ include/drm/drm_colorop.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/drm_colorop.c b/drivers/gpu/drm/drm_colorop.c index 781947e42b02..b10cad5a7208 100644 --- a/drivers/gpu/drm/drm_colorop.c +++ b/drivers/gpu/drm/drm_colorop.c @@ -40,6 +40,8 @@ static const struct drm_prop_enum_list drm_colorop_type_enum_list[] = { static const char * const colorop_curve_1d_type_names[] = { [DRM_COLOROP_1D_CURVE_SRGB_EOTF] = "sRGB EOTF", [DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF] = "sRGB Inverse EOTF", + [DRM_COLOROP_1D_CURVE_BT2020_INV_OETF] = "BT.2020 Inverse OETF", + [DRM_COLOROP_1D_CURVE_BT2020_OETF] = "BT.2020 OETF", [DRM_COLOROP_1D_CURVE_PQ_125_EOTF] = "PQ 125 EOTF", [DRM_COLOROP_1D_CURVE_PQ_125_INV_EOTF] = "PQ 125 Inverse EOTF", }; diff --git a/include/drm/drm_colorop.h b/include/drm/drm_colorop.h index e06d9ea28efd..28b3136dabad 100644 --- a/include/drm/drm_colorop.h +++ b/include/drm/drm_colorop.h @@ -33,6 +33,8 @@ enum drm_colorop_curve_1d_type { DRM_COLOROP_1D_CURVE_SRGB_EOTF, DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF, + DRM_COLOROP_1D_CURVE_BT2020_INV_OETF, + DRM_COLOROP_1D_CURVE_BT2020_OETF, DRM_COLOROP_1D_CURVE_PQ_125_EOTF, DRM_COLOROP_1D_CURVE_PQ_125_INV_EOTF, DRM_COLOROP_1D_CURVE_COUNT -- 2.44.0
[RFC PATCH v4 19/42] drm/colorop: Introduce DRM_CLIENT_CAP_PLANE_COLOR_PIPELINE
With the introduction of the pre-blending color pipeline we can no longer have color operations that don't have a clear position in the color pipeline. We deprecate all existing plane properties. For upstream drivers those are: - COLOR_ENCODING - COLOR_RANGE Drivers are expected to ignore these properties when programming the HW. Setting of the COLOR_PIPELINE plane property or drm_colorop properties is only allowed for userspace that sets this client cap. v4: - Don't block setting of COLOR_RANGE and COLOR_ENCODING when client cap is set Signed-off-by: Harry Wentland --- drivers/gpu/drm/drm_atomic_uapi.c | 13 - drivers/gpu/drm/drm_ioctl.c | 7 +++ include/drm/drm_file.h| 7 +++ include/uapi/drm/drm.h| 16 4 files changed, 42 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index 86f77a9aa3a8..ff258b34544e 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -570,6 +570,12 @@ static int drm_atomic_plane_set_property(struct drm_plane *plane, } else if (property == plane->color_range_property) { state->color_range = val; } else if (property == plane->color_pipeline_property) { + if (!file_priv->plane_color_pipeline) { + drm_dbg_atomic(dev, + "Setting COLOR_PIPELINE plane property not permitted unless DRM_CLIENT_CAP_PLANE_COLOR_PIPELINE is set\n"); + return -EINVAL; + } + /* find DRM colorop object */ struct drm_colorop *colorop = NULL; colorop = drm_colorop_find(dev, file_priv, val); @@ -1179,6 +1185,12 @@ int drm_atomic_set_property(struct drm_atomic_state *state, break; } case DRM_MODE_OBJECT_COLOROP: { + if (!file_priv->plane_color_pipeline) { + drm_dbg_atomic(prop->dev, + "[OBJECT:%d] is a colorop but DRM_CLIENT_CAP_PLANE_COLOR_PIPELINE not set\n", + obj->id); + ret = -EINVAL; + } struct drm_colorop *colorop = obj_to_colorop(obj); struct drm_colorop_state *colorop_state; @@ -1191,7 +1203,6 @@ int drm_atomic_set_property(struct drm_atomic_state *state, ret = drm_atomic_colorop_set_property(colorop, colorop_state, file_priv, prop, prop_value); - break; } default: diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index e368fc084c77..da59e37ae228 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -373,6 +373,13 @@ drm_setclientcap(struct drm_device *dev, void *data, struct drm_file *file_priv) return -EINVAL; file_priv->supports_virtualized_cursor_plane = req->value; break; + case DRM_CLIENT_CAP_PLANE_COLOR_PIPELINE: + if (!file_priv->atomic) + return -EINVAL; + if (req->value > 1) + return -EINVAL; + file_priv->plane_color_pipeline = req->value; + break; default: return -EINVAL; } diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h index ab230d3af138..63c1d29b8520 100644 --- a/include/drm/drm_file.h +++ b/include/drm/drm_file.h @@ -204,6 +204,13 @@ struct drm_file { */ bool writeback_connectors; + /** +* @plane_color_pipeline: +* +* True if client understands plane color pipelines +*/ + bool plane_color_pipeline; + /** * @was_master: * diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 16122819edfe..2d74c49274ee 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -875,6 +875,22 @@ struct drm_get_cap { */ #define DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT6 +/** + * DRM_CLIENT_CAP_PLANE_COLOR_PIPELINE + * + * If set to 1 + * - the DRM core will allow setting of plane the COLOR_PIPELINE + * property, as well as drm_colorop properties. + * - Drivers will ignore these properties + * - COLOR_ENCODING + * - COLOR_RANGE + * + * The client must enable _CLIENT_CAP_ATOMIC first. + * + * This capability is currently in development. + */ +#define DRM_CLIENT_CAP_PLANE_COLOR_PIPELINE7 + /* DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */ struct drm_set_client_cap { __u64 capability; -- 2.44.0
[RFC PATCH v4 34/42] drm/colorop: Add PQ 125 EOTF and its inverse
The PQ function defines a mapping of code values to nits (cd/m^2). The max code value maps to 10,000 nits. Windows DWM's canonical composition color space (CCCS) defaults to composing SDR contents to 80 nits and uses a float value of 1.0 to represent this. For this reason AMD HW hard-codes a PQ function that is scaled by 125, yielding 80 nit PQ values for 1.0 and 10,000 nits at 125.0. This patch introduces this scaled PQ EOTF and its inverse as 1D curve types. Signed-off-by: Harry Wentland --- drivers/gpu/drm/drm_colorop.c | 2 ++ include/drm/drm_colorop.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/drm_colorop.c b/drivers/gpu/drm/drm_colorop.c index 7baa1eba..781947e42b02 100644 --- a/drivers/gpu/drm/drm_colorop.c +++ b/drivers/gpu/drm/drm_colorop.c @@ -40,6 +40,8 @@ static const struct drm_prop_enum_list drm_colorop_type_enum_list[] = { static const char * const colorop_curve_1d_type_names[] = { [DRM_COLOROP_1D_CURVE_SRGB_EOTF] = "sRGB EOTF", [DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF] = "sRGB Inverse EOTF", + [DRM_COLOROP_1D_CURVE_PQ_125_EOTF] = "PQ 125 EOTF", + [DRM_COLOROP_1D_CURVE_PQ_125_INV_EOTF] = "PQ 125 Inverse EOTF", }; diff --git a/include/drm/drm_colorop.h b/include/drm/drm_colorop.h index 8710e550790c..e06d9ea28efd 100644 --- a/include/drm/drm_colorop.h +++ b/include/drm/drm_colorop.h @@ -33,6 +33,8 @@ enum drm_colorop_curve_1d_type { DRM_COLOROP_1D_CURVE_SRGB_EOTF, DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF, + DRM_COLOROP_1D_CURVE_PQ_125_EOTF, + DRM_COLOROP_1D_CURVE_PQ_125_INV_EOTF, DRM_COLOROP_1D_CURVE_COUNT }; -- 2.44.0
[RFC PATCH v4 21/42] drm/vkms: Pull apply_colorop out of pre_blend_color_transform
The if/switch statement is bound to grow with more types and subtypes. Pull this out into its own funcion to make things more manageable and readable. Signed-off-by: Harry Wentland --- drivers/gpu/drm/vkms/vkms_composer.c | 48 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c index 9493bdb1ba3f..25b786b49db0 100644 --- a/drivers/gpu/drm/vkms/vkms_composer.c +++ b/drivers/gpu/drm/vkms/vkms_composer.c @@ -164,6 +164,31 @@ static void apply_lut(const struct vkms_crtc_state *crtc_state, struct line_buff } } +static void apply_colorop(struct pixel_argb_u16 *pixel, struct drm_colorop *colorop) +{ + /* TODO is this right? */ + struct drm_colorop_state *colorop_state = colorop->state; + + if (colorop->type == DRM_COLOROP_1D_CURVE) { + switch (colorop_state->curve_1d_type) { + case DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF: + pixel->r = apply_lut_to_channel_value(_inv_eotf, pixel->r, LUT_RED); + pixel->g = apply_lut_to_channel_value(_inv_eotf, pixel->g, LUT_GREEN); + pixel->b = apply_lut_to_channel_value(_inv_eotf, pixel->b, LUT_BLUE); + break; + case DRM_COLOROP_1D_CURVE_SRGB_EOTF: + pixel->r = apply_lut_to_channel_value(_eotf, pixel->r, LUT_RED); + pixel->g = apply_lut_to_channel_value(_eotf, pixel->g, LUT_GREEN); + pixel->b = apply_lut_to_channel_value(_eotf, pixel->b, LUT_BLUE); + break; + default: + DRM_DEBUG_DRIVER("unkown colorop 1D curve type %d\n", colorop_state->curve_1d_type); + break; + } + } + +} + static void pre_blend_color_transform(const struct vkms_plane_state *plane_state, struct line_buffer *output_buffer) { struct drm_colorop *colorop = plane_state->base.base.color_pipeline; @@ -180,26 +205,9 @@ static void pre_blend_color_transform(const struct vkms_plane_state *plane_state if (!colorop_state) return; - for (size_t x = 0; x < output_buffer->n_pixels; x++) { - struct pixel_argb_u16 *pixel = _buffer->pixels[x]; - - if (colorop->type == DRM_COLOROP_1D_CURVE && - colorop_state->bypass == false) { - switch (colorop_state->curve_1d_type) { - case DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF: - pixel->r = apply_lut_to_channel_value(_inv_eotf, pixel->r, LUT_RED); - pixel->g = apply_lut_to_channel_value(_inv_eotf, pixel->g, LUT_GREEN); - pixel->b = apply_lut_to_channel_value(_inv_eotf, pixel->b, LUT_BLUE); - break; - case DRM_COLOROP_1D_CURVE_SRGB_EOTF: - default: - pixel->r = apply_lut_to_channel_value(_eotf, pixel->r, LUT_RED); - pixel->g = apply_lut_to_channel_value(_eotf, pixel->g, LUT_GREEN); - pixel->b = apply_lut_to_channel_value(_eotf, pixel->b, LUT_BLUE); - break; - } - } - } + for (size_t x = 0; x < output_buffer->n_pixels; x++) + if (!colorop_state->bypass) + apply_colorop(_buffer->pixels[x], colorop); colorop = colorop->next; } -- 2.44.0
[RFC PATCH v4 35/42] drm/amd/display: Enable support for PQ 125 EOTF and Inverse
This patchset enables support for the PQ_125 EOTF and its inverse on all existing plane 1D curve colorops, i.e., on DEGAM, SHAPER, and BLND blocks. With this patchset the following IGT subtests are passing: kms_colorop --run plane-XR30-XR30-pq_125_eotf kms_colorop --run plane-XR30-XR30-pq_125_inv_eotf kms_colorop --run plane-XR30-XR30-pq_125_eotf-pq_125_inv_eotf kms_colorop --run plane-XR30-XR30-pq_125_eotf-pq_125_inv_eotf-pq_125_eotf Signed-off-by: Harry Wentland --- .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 20 +-- .../amd/display/amdgpu_dm/amdgpu_dm_colorop.c | 8 +--- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 3e3ae2b58b06..20b7eb47388c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -25,6 +25,7 @@ #include "amdgpu.h" #include "amdgpu_mode.h" #include "amdgpu_dm.h" +#include "amdgpu_dm_colorop.h" #include "dc.h" #include "modules/color/color_gamma.h" #include "basics/conversion.h" @@ -676,6 +677,9 @@ amdgpu_colorop_tf_to_dc_tf(enum drm_colorop_curve_1d_type tf) case DRM_COLOROP_1D_CURVE_SRGB_EOTF: case DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF: return TRANSFER_FUNCTION_SRGB; + case DRM_COLOROP_1D_CURVE_PQ_125_EOTF: + case DRM_COLOROP_1D_CURVE_PQ_125_INV_EOTF: + return TRANSFER_FUNCTION_PQ; default: return TRANSFER_FUNCTION_LINEAR;; } @@ -1158,8 +1162,10 @@ __set_colorop_in_tf_1d_curve(struct dc_plane_state *dc_plane_state, struct drm_colorop *colorop = colorop_state->colorop; struct drm_device *drm = colorop->dev; - if (colorop->type != DRM_COLOROP_1D_CURVE && - colorop_state->curve_1d_type != DRM_COLOROP_1D_CURVE_SRGB_EOTF) + if (colorop->type != DRM_COLOROP_1D_CURVE) + return -EINVAL; + + if (!(BIT(colorop_state->curve_1d_type) & amdgpu_dm_supported_degam_tfs)) return -EINVAL; if (colorop_state->bypass) { @@ -1191,7 +1197,7 @@ __set_dm_plane_colorop_degamma(struct drm_plane_state *plane_state, /* 1st op: 1d curve - degamma */ for_each_new_colorop_in_state(state, colorop, new_colorop_state, i) { if (new_colorop_state->colorop == old_colorop && - new_colorop_state->curve_1d_type == DRM_COLOROP_1D_CURVE_SRGB_EOTF) { + (BIT(new_colorop_state->curve_1d_type) & amdgpu_dm_supported_degam_tfs)) { colorop_state = new_colorop_state; break; } @@ -1213,8 +1219,10 @@ __set_colorop_in_shaper_1d_curve(struct dc_plane_state *dc_plane_state, const struct drm_color_lut *shaper_lut; uint32_t shaper_size; - if (colorop->type != DRM_COLOROP_1D_CURVE && - colorop_state->curve_1d_type != DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF) + if (colorop->type != DRM_COLOROP_1D_CURVE) + return -EINVAL; + + if (!(BIT(colorop_state->curve_1d_type) & amdgpu_dm_supported_shaper_tfs)) return -EINVAL; if (colorop_state->bypass) { @@ -1250,7 +1258,7 @@ __set_dm_plane_colorop_shaper(struct drm_plane_state *plane_state, /* 2nd op: 1d curve - shaper */ for_each_new_colorop_in_state(state, colorop, new_colorop_state, i) { if (new_colorop_state->colorop == old_colorop && - new_colorop_state->curve_1d_type == DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF) { + (BIT(new_colorop_state->curve_1d_type) & amdgpu_dm_supported_shaper_tfs)) { colorop_state = new_colorop_state; break; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c index 449a2ad6a184..f99d8e09d89b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c @@ -32,10 +32,12 @@ #include "amdgpu_dm_colorop.h" const u64 amdgpu_dm_supported_degam_tfs = - BIT(DRM_COLOROP_1D_CURVE_SRGB_EOTF); + BIT(DRM_COLOROP_1D_CURVE_SRGB_EOTF) | + BIT(DRM_COLOROP_1D_CURVE_PQ_125_EOTF); - const u64 amdgpu_dm_supported_shaper_tfs = - BIT(DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF); +const u64 amdgpu_dm_supported_shaper_tfs = + BIT(DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF) | + BIT(DRM_COLOROP_1D_CURVE_PQ_125_INV_EOTF); const u64 amdgpu_dm_supported_blnd_tfs = BIT(DRM_COLOROP_1D_CURVE_SRGB_EOTF); -- 2.44.0
[RFC PATCH v4 15/42] drm/plane: Add COLOR PIPELINE property
We're adding a new enum COLOR PIPELINE property. This property will have entries for each COLOR PIPELINE by referencing the DRM object ID of the first drm_colorop of the pipeline. 0 disables the entire COLOR PIPELINE. Userspace can use this to discover the available color pipelines, as well as set the desired one. The color pipelines are programmed via properties on the actual drm_colorop objects. v4: - Add pipeline property creation helper (Pekka) - Fix function comment for drm_atomic_set_colorop_for_plane (Pekka) - Always create Bypass pipeline (Pekka) - Add missing function declaration (Chaitanya Kumar Borah) Signed-off-by: Harry Wentland --- drivers/gpu/drm/drm_atomic.c | 46 drivers/gpu/drm/drm_atomic_state_helper.c | 5 +++ drivers/gpu/drm/drm_atomic_uapi.c | 42 ++ drivers/gpu/drm/drm_plane.c | 52 +++ include/drm/drm_atomic.h | 3 ++ include/drm/drm_atomic_uapi.h | 2 + include/drm/drm_plane.h | 11 + 7 files changed, 161 insertions(+) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 3645c36d63b3..27a8805c5fa1 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -1458,6 +1458,52 @@ drm_atomic_add_affected_planes(struct drm_atomic_state *state, } EXPORT_SYMBOL(drm_atomic_add_affected_planes); +/** + * drm_atomic_add_affected_colorops - add colorops for plane + * @state: atomic state + * @plane: DRM plane + * + * This function walks the current configuration and adds all colorops + * currently used by @plane to the atomic configuration @state. This is useful + * when an atomic commit also needs to check all currently enabled colorop on + * @plane, e.g. when changing the mode. It's also useful when re-enabling a plane + * to avoid special code to force-enable all colorops. + * + * Since acquiring a colorop state will always also acquire the w/w mutex of the + * current plane for that colorop (if there is any) adding all the colorop states for + * a plane will not reduce parallelism of atomic updates. + * + * Returns: + * 0 on success or can fail with -EDEADLK or -ENOMEM. When the error is EDEADLK + * then the w/w mutex code has detected a deadlock and the entire atomic + * sequence must be restarted. All other errors are fatal. + */ +int +drm_atomic_add_affected_colorops(struct drm_atomic_state *state, +struct drm_plane *plane) +{ + struct drm_colorop *colorop; + struct drm_colorop_state *colorop_state; + + WARN_ON(!drm_atomic_get_new_plane_state(state, plane)); + + drm_dbg_atomic(plane->dev, + "Adding all current colorops for [plane:%d:%s] to %p\n", + plane->base.id, plane->name, state); + + drm_for_each_colorop(colorop, plane->dev) { + if (colorop->plane != plane) + continue; + + colorop_state = drm_atomic_get_colorop_state(state, colorop); + if (IS_ERR(colorop_state)) + return PTR_ERR(colorop_state); + } + + return 0; +} +EXPORT_SYMBOL(drm_atomic_add_affected_colorops); + /** * drm_atomic_check_only - check whether a given config would work * @state: atomic configuration to check diff --git a/drivers/gpu/drm/drm_atomic_state_helper.c b/drivers/gpu/drm/drm_atomic_state_helper.c index 519228eb1095..d1dd082b1286 100644 --- a/drivers/gpu/drm/drm_atomic_state_helper.c +++ b/drivers/gpu/drm/drm_atomic_state_helper.c @@ -267,6 +267,11 @@ void __drm_atomic_helper_plane_state_reset(struct drm_plane_state *plane_state, plane_state->color_range = val; } + if (plane->color_pipeline_property) { + /* default is always NULL, i.e., bypass */ + plane_state->color_pipeline = NULL; + } + if (plane->zpos_property) { if (!drm_object_property_get_default_value(>base, plane->zpos_property, diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index 87f00131be11..86f77a9aa3a8 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -256,6 +256,36 @@ drm_atomic_set_fb_for_plane(struct drm_plane_state *plane_state, } EXPORT_SYMBOL(drm_atomic_set_fb_for_plane); + +/** + * drm_atomic_set_colorop_for_plane - set colorop for plane + * @plane_state: atomic state object for the plane + * @colorop: colorop to use for the plane + * + * Helper function to select the color pipeline on a plane by setting + * it to the first drm_colorop element of the pipeline. + */ +void +drm_atomic_set_colorop_for_plane(struct drm_plane_state *plane_state, +struct drm_colorop *colorop) +{ + struct drm_plane *plane = plane_state->plane; + + if (colorop) +
[RFC PATCH v4 25/42] drm/vkms: Add tests for CTM handling
A whole slew of tests for CTM handling that greatly helped in debugging the CTM code. The extent of tests might seem a bit silly but they're fast and might someday help save someone else's day when debugging this. v4: - Comment on origin of bt709_enc matrix (Pekka) - Use full opaque alpha (Pekka) - Add additional check for Y < 0x (Pekka) - Remove unused code (Pekka) - Rename red, green, blue to Y, U, V where applicable Signed-off-by: Harry Wentland --- drivers/gpu/drm/vkms/tests/vkms_color_tests.c | 251 ++ drivers/gpu/drm/vkms/vkms_composer.c | 2 +- 2 files changed, 252 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vkms/tests/vkms_color_tests.c b/drivers/gpu/drm/vkms/tests/vkms_color_tests.c index e6ac01dee830..83d07f7bae37 100644 --- a/drivers/gpu/drm/vkms/tests/vkms_color_tests.c +++ b/drivers/gpu/drm/vkms/tests/vkms_color_tests.c @@ -3,6 +3,7 @@ #include #include +#include #define TEST_LUT_SIZE 16 @@ -181,11 +182,261 @@ static void vkms_color_srgb_inv_srgb(struct kunit *test) } } +#define FIXPT_HALF(DRM_FIXED_ONE >> 1) +#define FIXPT_QUARTER (DRM_FIXED_ONE >> 2) + +const struct drm_color_ctm_3x4 test_matrix_3x4_50_desat = { { + FIXPT_HALF, FIXPT_QUARTER, FIXPT_QUARTER, 0, + FIXPT_QUARTER, FIXPT_HALF, FIXPT_QUARTER, 0, + FIXPT_QUARTER, FIXPT_QUARTER, FIXPT_HALF, 0 +} }; + +static void vkms_color_ctm_3x4_50_desat(struct kunit *test) +{ + struct pixel_argb_s32 ref, out; + + /* full white */ + ref.a = 0x; + ref.r = 0x; + ref.g = 0x; + ref.b = 0x; + + memcpy(, , sizeof(out)); + apply_3x4_matrix(, _matrix_3x4_50_desat); + + KUNIT_EXPECT_MEMEQ(test, , , sizeof(out)); + + /* full black */ + ref.a = 0x; + ref.r = 0x0; + ref.g = 0x0; + ref.b = 0x0; + + memcpy(, , sizeof(out)); + apply_3x4_matrix(, _matrix_3x4_50_desat); + + KUNIT_EXPECT_MEMEQ(test, , , sizeof(out)); + + /* 50% grey */ + ref.a = 0x; + ref.r = 0x8000; + ref.g = 0x8000; + ref.b = 0x8000; + + memcpy(, , sizeof(out)); + apply_3x4_matrix(, _matrix_3x4_50_desat); + + KUNIT_EXPECT_MEMEQ(test, , , sizeof(out)); + + /* full red to 50% desat */ + ref.a = 0x; + ref.r = 0x7fff; + ref.g = 0x3fff; + ref.b = 0x3fff; + + out.a = 0x; + out.r = 0x; + out.g = 0x0; + out.b = 0x0; + + apply_3x4_matrix(, _matrix_3x4_50_desat); + + KUNIT_EXPECT_MEMEQ(test, , , sizeof(out)); +} + +/* + * BT.709 encoding matrix + * + * Values printed from within IGT when converting + * igt_matrix_3x4_bt709_enc to the fixed-point format expected + * by DRM/KMS. + */ +const struct drm_color_ctm_3x4 test_matrix_3x4_bt709_enc = { { + 0x366cf400ull, 0xb7175900ull, 0x000127bb300ull, 0, + 0x80001993b3a0ull, 0x80005609fe80ull, 0x6f9db200ull, 0, + 0x9d70a400ull, 0x80008f011100ull, 0x8e6f9330ull, 0 +} }; + +static void vkms_color_ctm_3x4_bt709(struct kunit *test) +{ + struct pixel_argb_s32 out; + + /* full white to bt709 */ + out.a = 0x; + out.r = 0x; + out.g = 0x; + out.b = 0x; + + apply_3x4_matrix(, _matrix_3x4_bt709_enc); + + /* Y 255 */ + KUNIT_EXPECT_GT(test, out.r, 0xfe00); + KUNIT_EXPECT_LT(test, out.r, 0x1); + + /* U 0 */ + KUNIT_EXPECT_LT(test, out.g, 0x0100); + + /* V 0 */ + KUNIT_EXPECT_LT(test, out.b, 0x0100); + + /* full black to bt709 */ + out.a = 0x; + out.r = 0x0; + out.g = 0x0; + out.b = 0x0; + + apply_3x4_matrix(, _matrix_3x4_bt709_enc); + + /* Y 0 */ + KUNIT_EXPECT_LT(test, out.r, 0x100); + + /* U 0 */ + KUNIT_EXPECT_LT(test, out.g, 0x0100); + + /* V 0 */ + KUNIT_EXPECT_LT(test, out.b, 0x0100); + + /* gray to bt709 */ + out.a = 0x; + out.r = 0x7fff; + out.g = 0x7fff; + out.b = 0x7fff; + + apply_3x4_matrix(, _matrix_3x4_bt709_enc); + + /* Y 127 */ + KUNIT_EXPECT_GT(test, out.r, 0x7e00); + KUNIT_EXPECT_LT(test, out.r, 0x8000); + + /* U 0 */ + KUNIT_EXPECT_LT(test, out.g, 0x0100); + + /* V 0 */ + KUNIT_EXPECT_LT(test, out.b, 0x0100); + + /* == red 255 - bt709 enc == */ + out.a = 0x; + out.r = 0x; + out.g = 0x0; + out.b = 0x0; + + apply_3x4_matrix(, _matrix_3x4_bt709_enc); + + /* Y 54 */ + KUNIT_EXPECT_GT(test, out.r, 0x3500); + KUNIT_EXPECT_LT(test, out.r, 0x3700); + + /* U 0 */ + KUNIT_EXPECT_LT(test, out.g, 0x0100); + + /* V 157 */ + KUNIT_EXPECT_GT(test, out.b, 0x9C00); + KUNIT_EXPECT_LT(test, out.b, 0x9E00); + + + /* == green 255 - bt709 enc == */ + out.a = 0x; + out.r = 0x0; +
[RFC PATCH v4 24/42] drm/tests: Add a few tests around drm_fixed.h
While working on the CTM implementation of VKMS I had to ascertain myself of a few assumptions. One of those is whether drm_fixed.h treats its numbers using signed-magnitude or twos-complement. It is twos-complement. In order to make someone else's day easier I am adding the drm_test_int2fixp test that validates the above assumption. I am also adding a test for the new sm2fixp function that converts from a signed-magnitude fixed point to the twos-complement fixed point. Signed-off-by: Harry Wentland --- drivers/gpu/drm/tests/Makefile| 3 +- drivers/gpu/drm/tests/drm_fixp_test.c | 69 +++ 2 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/tests/drm_fixp_test.c diff --git a/drivers/gpu/drm/tests/Makefile b/drivers/gpu/drm/tests/Makefile index d6183b3d7688..98468f7908dd 100644 --- a/drivers/gpu/drm/tests/Makefile +++ b/drivers/gpu/drm/tests/Makefile @@ -19,6 +19,7 @@ obj-$(CONFIG_DRM_KUNIT_TEST) += \ drm_modes_test.o \ drm_plane_helper_test.o \ drm_probe_helper_test.o \ - drm_rect_test.o + drm_rect_test.o \ + drm_fixp_test.o CFLAGS_drm_mm_test.o := $(DISABLE_STRUCTLEAK_PLUGIN) diff --git a/drivers/gpu/drm/tests/drm_fixp_test.c b/drivers/gpu/drm/tests/drm_fixp_test.c new file mode 100644 index ..f420f173ff66 --- /dev/null +++ b/drivers/gpu/drm/tests/drm_fixp_test.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2022 Advanced Micro Devices, Inc. + */ + +#include +#include + +static void drm_test_sm2fixp(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, 0x7fffll, ((1LL << 63) - 1)); + + /* 1 */ + KUNIT_EXPECT_EQ(test, drm_int2fixp(1), drm_sm2fixp(1ull << DRM_FIXED_POINT)); + + /* -1 */ + KUNIT_EXPECT_EQ(test, drm_int2fixp(-1), drm_sm2fixp((1ull << 63) | (1ull << DRM_FIXED_POINT))); + + /* 0.5 */ + KUNIT_EXPECT_EQ(test, drm_fixp_from_fraction(1, 2), drm_sm2fixp(1ull << (DRM_FIXED_POINT - 1))); + + /* -0.5 */ + KUNIT_EXPECT_EQ(test, drm_fixp_from_fraction(-1, 2), drm_sm2fixp((1ull << 63) | (1ull << (DRM_FIXED_POINT - 1; + +} + +static void drm_test_int2fixp(struct kunit *test) +{ + /* 1 */ + KUNIT_EXPECT_EQ(test, 1ll << 32, drm_int2fixp(1)); + + /* -1 */ + KUNIT_EXPECT_EQ(test, -(1ll << 32), drm_int2fixp(-1)); + + /* 1 + (-1) = 0 */ + KUNIT_EXPECT_EQ(test, 0, drm_int2fixp(1) + drm_int2fixp(-1)); + + /* 1 / 2 */ + KUNIT_EXPECT_EQ(test, 1ll << 31, drm_fixp_from_fraction(1, 2)); + + /* -0.5 */ + KUNIT_EXPECT_EQ(test, -(1ll << 31), drm_fixp_from_fraction(-1, 2)); + + /* (1 / 2) + (-1) = 0.5 */ + KUNIT_EXPECT_EQ(test, 1ll << 31, drm_fixp_from_fraction(-1, 2) + drm_int2fixp(1)); + + /* (1 / 2) - 1) = 0.5 */ + KUNIT_EXPECT_EQ(test, -(1ll << 31), drm_fixp_from_fraction(1, 2) + drm_int2fixp(-1)); + + /* (1 / 2) - 1) = 0.5 */ + KUNIT_EXPECT_EQ(test, -(1ll << 31), drm_fixp_from_fraction(1, 2) - drm_int2fixp(1)); + +} + +static struct kunit_case drm_fixp_tests[] = { + KUNIT_CASE(drm_test_int2fixp), + KUNIT_CASE(drm_test_sm2fixp), + { } +}; + +static struct kunit_suite drm_rect_test_suite = { + .name = "drm_fixp", + .test_cases = drm_fixp_tests, +}; + +kunit_test_suite(drm_rect_test_suite); + +MODULE_AUTHOR("AMD"); +MODULE_LICENSE("GPL and additional rights"); \ No newline at end of file -- 2.44.0
[RFC PATCH v4 10/42] drm/colorop: Add TYPE property
Add a read-only TYPE property. The TYPE specifies the colorop type, such as enumerated curve, 1D LUT, CTM, 3D LUT, PWL LUT, etc. v4: - Use enum property for TYPE (Pekka) v3: - Make TYPE a range property - Move enum drm_colorop_type to uapi header - Fix drm_get_colorop_type_name description For now we're only introducing an enumerated 1D LUT type to illustrate the concept. Signed-off-by: Harry Wentland --- drivers/gpu/drm/drm_atomic.c | 4 +-- drivers/gpu/drm/drm_atomic_uapi.c | 8 +- drivers/gpu/drm/drm_colorop.c | 44 ++- include/drm/drm_colorop.h | 17 +++- include/uapi/drm/drm_mode.h | 4 +++ 5 files changed, 72 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 62e87e6a9653..b400e32c9d39 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -634,8 +634,8 @@ drm_atomic_get_colorop_state(struct drm_atomic_state *state, state->colorops[index].new_state = colorop_state; colorop_state->state = state; - drm_dbg_atomic(colorop->dev, "Added [COLOROP:%d] %p state to %p\n", - colorop->base.id, colorop_state, state); + drm_dbg_atomic(colorop->dev, "Added [COLOROP:%d:%d] %p state to %p\n", + colorop->base.id, colorop->type, colorop_state, state); return colorop_state; } diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index 1f9b6dfa8ca7..e3067c095c72 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -660,7 +660,13 @@ drm_atomic_colorop_get_property(struct drm_colorop *colorop, const struct drm_colorop_state *state, struct drm_property *property, uint64_t *val) { - return -EINVAL; + if (property == colorop->type_property) { + *val = colorop->type; + } else { + return -EINVAL; + } + + return 0; } static int drm_atomic_set_writeback_fb_for_connector( diff --git a/drivers/gpu/drm/drm_colorop.c b/drivers/gpu/drm/drm_colorop.c index a295ab96aee1..3a07a8fe2842 100644 --- a/drivers/gpu/drm/drm_colorop.c +++ b/drivers/gpu/drm/drm_colorop.c @@ -32,12 +32,17 @@ /* TODO big colorop doc, including properties, etc. */ +static const struct drm_prop_enum_list drm_colorop_type_enum_list[] = { + { DRM_COLOROP_1D_CURVE, "1D Curve" }, +}; + /* Init Helpers */ int drm_colorop_init(struct drm_device *dev, struct drm_colorop *colorop, -struct drm_plane *plane) +struct drm_plane *plane, enum drm_colorop_type type) { struct drm_mode_config *config = >mode_config; + struct drm_property *prop; int ret = 0; ret = drm_mode_object_add(dev, >base, DRM_MODE_OBJECT_COLOROP); @@ -46,12 +51,29 @@ int drm_colorop_init(struct drm_device *dev, struct drm_colorop *colorop, colorop->base.properties = >properties; colorop->dev = dev; + colorop->type = type; colorop->plane = plane; list_add_tail(>head, >colorop_list); colorop->index = config->num_colorop++; /* add properties */ + + /* type */ + prop = drm_property_create_enum(dev, + DRM_MODE_PROP_IMMUTABLE, + "TYPE", drm_colorop_type_enum_list, + ARRAY_SIZE(drm_colorop_type_enum_list)); + + if (!prop) + return -ENOMEM; + + colorop->type_property = prop; + + drm_object_attach_property(>base, + colorop->type_property, + colorop->type); + return ret; } EXPORT_SYMBOL(drm_colorop_init); @@ -150,3 +172,23 @@ void drm_colorop_reset(struct drm_colorop *colorop) __drm_colorop_reset(colorop, colorop->state); } EXPORT_SYMBOL(drm_colorop_reset); + + +static const char * const colorop_type_name[] = { + [DRM_COLOROP_1D_CURVE] = "1D Curve", +}; + +/** + * drm_get_colorop_type_name - return a string for colorop type + * @type: colorop type to compute name of + * + * In contrast to the other drm_get_*_name functions this one here returns a + * const pointer and hence is threadsafe. + */ +const char *drm_get_colorop_type_name(enum drm_colorop_type type) +{ + if (WARN_ON(type >= ARRAY_SIZE(colorop_type_name))) + return "unknown"; + + return colorop_type_name[type]; +} diff --git a/include/drm/drm_colorop.h b/include/drm/drm_colorop.h index e611f830f986..cb98c55f8387 100644 --- a/include/drm/drm_colorop.h +++ b/include/drm/drm_colorop.h @@ -103,6 +103,21 @@ struct drm_colorop { /** @properties: property tracking for this plane */ struct drm_object_properties properties; + /** +* @type: +* +* Read-only +* Type of color operation +
[RFC PATCH v4 14/42] drm/colorop: Add atomic state print for drm_colorop
Signed-off-by: Harry Wentland --- drivers/gpu/drm/drm_atomic.c | 25 - include/drm/drm_colorop.h| 5 + 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index b400e32c9d39..3645c36d63b3 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -783,6 +783,19 @@ static int drm_atomic_plane_check(const struct drm_plane_state *old_plane_state, return 0; } + + +static void drm_atomic_colorop_print_state(struct drm_printer *p, + const struct drm_colorop_state *state) +{ + struct drm_colorop *colorop = state->colorop; + + drm_printf(p, "colorop[%u]:\n", colorop->base.id); + drm_printf(p, "\ttype=%s\n", drm_get_colorop_type_name(colorop->type)); + drm_printf(p, "\tbypass=%u\n", state->bypass); + drm_printf(p, "\tcurve_1d_type=%s\n", drm_get_colorop_curve_1d_type_name(state->curve_1d_type)); +} + static void drm_atomic_plane_print_state(struct drm_printer *p, const struct drm_plane_state *state) { @@ -804,7 +817,8 @@ static void drm_atomic_plane_print_state(struct drm_printer *p, drm_printf(p, "\tcolor-range=%s\n", drm_get_color_range_name(state->color_range)); drm_printf(p, "\tcolor_mgmt_changed=%d\n", state->color_mgmt_changed); - + drm_printf(p, "\tcolor-pipeline=%d\n", + state->color_pipeline ? state->color_pipeline->base.id : 0); if (plane->funcs->atomic_print_state) plane->funcs->atomic_print_state(p, state); } @@ -1840,6 +1854,7 @@ static void __drm_state_dump(struct drm_device *dev, struct drm_printer *p, bool take_locks) { struct drm_mode_config *config = >mode_config; + struct drm_colorop *colorop; struct drm_plane *plane; struct drm_crtc *crtc; struct drm_connector *connector; @@ -1849,6 +1864,14 @@ static void __drm_state_dump(struct drm_device *dev, struct drm_printer *p, if (!drm_drv_uses_atomic_modeset(dev)) return; + list_for_each_entry(colorop, >colorop_list, head) { + if (take_locks) + drm_modeset_lock(>plane->mutex, NULL); + drm_atomic_colorop_print_state(p, colorop->state); + if (take_locks) + drm_modeset_unlock(>plane->mutex); + } + list_for_each_entry(plane, >plane_list, head) { if (take_locks) drm_modeset_lock(>mutex, NULL); diff --git a/include/drm/drm_colorop.h b/include/drm/drm_colorop.h index 8060988b5892..fc9a28d138b8 100644 --- a/include/drm/drm_colorop.h +++ b/include/drm/drm_colorop.h @@ -231,6 +231,11 @@ static inline unsigned int drm_colorop_index(const struct drm_colorop *colorop) #define drm_for_each_colorop(colorop, dev) \ list_for_each_entry(colorop, &(dev)->mode_config.colorop_list, head) +const char *drm_get_color_pipeline_name(struct drm_colorop *colorop); + +const char *drm_get_colorop_type_name(enum drm_colorop_type type); +const char *drm_get_colorop_curve_1d_type_name(enum drm_colorop_curve_1d_type type); + void drm_colorop_set_next_property(struct drm_colorop *colorop, struct drm_colorop *next); -- 2.44.0
[RFC PATCH v4 28/42] drm/amd/display: Ignore deprecated props when plane_color_pipeline set
When the plane_color_pipeline bit is set we should ignore deprecated properties, such as COLOR_RANGE and COLOR_ENCODING. Signed-off-by: Harry Wentland --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index f6575d7dee97..47c6fd33fd60 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4948,6 +4948,10 @@ fill_plane_color_attributes(const struct drm_plane_state *plane_state, *color_space = COLOR_SPACE_SRGB; + /* Ignore properties when DRM_CLIENT_CAP_PLANE_COLOR_PIPELINE is set */ + if (plane_state->state && plane_state->state->plane_color_pipeline) + return 0; + /* DRM color properties only affect non-RGB formats. */ if (format < SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) return 0; -- 2.44.0
[RFC PATCH v4 09/42] drm/colorop: Introduce new drm_colorop mode object
This patches introduces a new drm_colorop mode object. This object represents color transformations and can be used to define color pipelines. We also introduce the drm_colorop_state here, as well as various helpers and state tracking bits. v4: - Drop IOCTL definitions (Pekka) - add missing declaration (Chaitanya Kumar Borah) v3: - Drop TODO for lock (it's handled in drm_modeset_drop_locks) (Melissa) - Don't get plane state when getting colorop state - Make some functions static (kernel test robot) Signed-off-by: Harry Wentland --- drivers/gpu/drm/Makefile| 1 + drivers/gpu/drm/drm_atomic.c| 70 drivers/gpu/drm/drm_atomic_helper.c | 12 +++ drivers/gpu/drm/drm_atomic_uapi.c | 48 + drivers/gpu/drm/drm_colorop.c | 152 ++ drivers/gpu/drm/drm_mode_config.c | 7 ++ include/drm/drm_atomic.h| 82 +++ include/drm/drm_atomic_uapi.h | 1 + include/drm/drm_colorop.h | 158 include/drm/drm_mode_config.h | 18 include/drm/drm_plane.h | 2 + include/uapi/drm/drm_mode.h | 1 + 12 files changed, 552 insertions(+) create mode 100644 drivers/gpu/drm/drm_colorop.c create mode 100644 include/drm/drm_colorop.h diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 104b42df2e95..4b14dcbb6117 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -16,6 +16,7 @@ drm-y := \ drm_client.o \ drm_client_modeset.o \ drm_color_mgmt.o \ + drm_colorop.o \ drm_connector.o \ drm_crtc.o \ drm_displayid.o \ diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index a91737adf8e7..62e87e6a9653 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -42,6 +42,7 @@ #include #include #include +#include #include "drm_crtc_internal.h" #include "drm_internal.h" @@ -108,6 +109,7 @@ void drm_atomic_state_default_release(struct drm_atomic_state *state) kfree(state->connectors); kfree(state->crtcs); kfree(state->planes); + kfree(state->colorops); kfree(state->private_objs); } EXPORT_SYMBOL(drm_atomic_state_default_release); @@ -139,6 +141,10 @@ drm_atomic_state_init(struct drm_device *dev, struct drm_atomic_state *state) sizeof(*state->planes), GFP_KERNEL); if (!state->planes) goto fail; + state->colorops = kcalloc(dev->mode_config.num_colorop, + sizeof(*state->colorops), GFP_KERNEL); + if (!state->colorops) + goto fail; /* * Because drm_atomic_state can be committed asynchronously we need our @@ -250,6 +256,20 @@ void drm_atomic_state_default_clear(struct drm_atomic_state *state) state->planes[i].new_state = NULL; } + for (i = 0; i < config->num_colorop; i++) { + struct drm_colorop *colorop = state->colorops[i].ptr; + + if (!colorop) + continue; + + drm_colorop_atomic_destroy_state(colorop, +state->colorops[i].state); + state->colorops[i].ptr = NULL; + state->colorops[i].state = NULL; + state->colorops[i].old_state = NULL; + state->colorops[i].new_state = NULL; + } + for (i = 0; i < state->num_private_objs; i++) { struct drm_private_obj *obj = state->private_objs[i].ptr; @@ -571,6 +591,56 @@ drm_atomic_get_plane_state(struct drm_atomic_state *state, } EXPORT_SYMBOL(drm_atomic_get_plane_state); + +/** + * drm_atomic_get_colorop_state - get colorop state + * @state: global atomic state object + * @colorop: colorop to get state object for + * + * This function returns the colorop state for the given colorop, allocating it + * if needed. It will also grab the relevant plane lock to make sure that the + * state is consistent. + * + * Returns: + * + * Either the allocated state or the error code encoded into the pointer. When + * the error is EDEADLK then the w/w mutex code has detected a deadlock and the + * entire atomic sequence must be restarted. All other errors are fatal. + */ +struct drm_colorop_state * +drm_atomic_get_colorop_state(struct drm_atomic_state *state, +struct drm_colorop *colorop) +{ + int ret, index = drm_colorop_index(colorop); + struct drm_colorop_state *colorop_state; + + WARN_ON(!state->acquire_ctx); + + colorop_state = drm_atomic_get_existing_colorop_state(state, colorop); + if (colorop_state) + return colorop_state; + + ret = drm_modeset_lock(>plane->mutex, state->acquire_ctx); + if (ret) + return ERR_PTR(ret); + + colorop_state = drm_atomic_helper_colorop_duplicate_state(colorop);
[RFC PATCH v4 11/42] drm/colorop: Add 1D Curve subtype
v4: - Use drm_colorop_curve_1d_type_enum_list to get name (Pekka) - Create separate init function for 1D curve - Pass supported TFs into 1D curve init function Signed-off-by: Harry Wentland Signed-off-by: Alex Hung Co-developed-by: Alex Hung --- drivers/gpu/drm/drm_atomic_uapi.c | 18 +--- drivers/gpu/drm/drm_colorop.c | 71 +++ include/drm/drm_colorop.h | 24 +++ 3 files changed, 108 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index e3067c095c72..fdd540cfe24f 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -648,11 +648,17 @@ static int drm_atomic_colorop_set_property(struct drm_colorop *colorop, struct drm_colorop_state *state, struct drm_file *file_priv, struct drm_property *property, uint64_t val) { - drm_dbg_atomic(colorop->dev, - "[COLOROP:%d] unknown property [PROP:%d:%s]]\n", - colorop->base.id, - property->base.id, property->name); - return -EINVAL; + if (property == colorop->curve_1d_type_property) { + state->curve_1d_type = val; + } else { + drm_dbg_atomic(colorop->dev, + "[COLOROP:%d:%d] unknown property [PROP:%d:%s]]\n", + colorop->base.id, colorop->type, + property->base.id, property->name); + return -EINVAL; + } + + return 0; } static int @@ -662,6 +668,8 @@ drm_atomic_colorop_get_property(struct drm_colorop *colorop, { if (property == colorop->type_property) { *val = colorop->type; + } else if (property == colorop->curve_1d_type_property) { + *val = state->curve_1d_type; } else { return -EINVAL; } diff --git a/drivers/gpu/drm/drm_colorop.c b/drivers/gpu/drm/drm_colorop.c index 3a07a8fe2842..f4740b6115d1 100644 --- a/drivers/gpu/drm/drm_colorop.c +++ b/drivers/gpu/drm/drm_colorop.c @@ -36,6 +36,12 @@ static const struct drm_prop_enum_list drm_colorop_type_enum_list[] = { { DRM_COLOROP_1D_CURVE, "1D Curve" }, }; +static const char * const colorop_curve_1d_type_names[] = { + [DRM_COLOROP_1D_CURVE_SRGB_EOTF] = "sRGB EOTF", + [DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF] = "sRGB Inverse EOTF", +}; + + /* Init Helpers */ int drm_colorop_init(struct drm_device *dev, struct drm_colorop *colorop, @@ -78,6 +84,56 @@ int drm_colorop_init(struct drm_device *dev, struct drm_colorop *colorop, } EXPORT_SYMBOL(drm_colorop_init); +int drm_colorop_curve_1d_init(struct drm_device *dev, struct drm_colorop *colorop, + struct drm_plane *plane, u64 supported_tfs) +{ + struct drm_prop_enum_list enum_list[DRM_COLOROP_1D_CURVE_COUNT]; + int i, len; + + struct drm_property *prop; + int ret; + + if (!supported_tfs) { + drm_err(dev, + "No supported TFs for new 1D curve colorop on [PLANE:%d:%s]\n", + plane->base.id, plane->name); + return -EINVAL; + } + + if ((supported_tfs & -BIT(DRM_COLOROP_1D_CURVE_COUNT)) != 0) { + drm_err(dev, "Unknown TF provided on [PLANE:%d:%s]\n", + plane->base.id, plane->name); + return -EINVAL; + } + + ret = drm_colorop_init(dev, colorop, plane, DRM_COLOROP_1D_CURVE); + if (ret) + return ret; + + len = 0; + for (i = 0; i < DRM_COLOROP_1D_CURVE_COUNT; i++) { + if ((supported_tfs & BIT(i)) == 0) + continue; + + enum_list[len].type = i; + enum_list[len].name = colorop_curve_1d_type_names[i]; + len++; + } + + /* initialize 1D curve only attribute */ + prop = drm_property_create_enum(dev, DRM_MODE_PROP_ATOMIC, "CURVE_1D_TYPE", + enum_list, len); + if (!prop) + return -ENOMEM; + + colorop->curve_1d_type_property = prop; + drm_object_attach_property(>base, colorop->curve_1d_type_property, 0); + drm_colorop_reset(colorop); + + return 0; +} +EXPORT_SYMBOL(drm_colorop_curve_1d_init); + static void __drm_atomic_helper_colorop_duplicate_state(struct drm_colorop *colorop, struct drm_colorop_state *state) { @@ -192,3 +248,18 @@ const char *drm_get_colorop_type_name(enum drm_colorop_type type) return colorop_type_name[type]; } + +/** + * drm_get_colorop_curve_1d_type_name - return a string for 1D curve type + * @range: 1d curve type to compute name of + * + * In contrast to the other drm_get_*_name functions this one here returns a + * const pointer and hence is threadsafe. + */ +const char
[RFC PATCH v4 23/42] drm/vkms: add 3x4 matrix in color pipeline
We add two 3x4 matrices into the VKMS color pipeline. The reason we're adding matrices is so that we can test that application of a matrix and its inverse yields an output equal to the input image. One complication with the matrix implementation has to do with the fact that the matrix entries are in signed-magnitude fixed point, whereas the drm_fixed.h implementation uses 2s-complement. The latter one is the one that we want for easy addition and subtraction, so we convert all entries to 2s-complement. Signed-off-by: Harry Wentland --- drivers/gpu/drm/vkms/vkms_colorop.c | 32 +++- drivers/gpu/drm/vkms/vkms_composer.c | 27 +++ include/drm/drm_colorop.h| 2 ++ 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vkms/vkms_colorop.c b/drivers/gpu/drm/vkms/vkms_colorop.c index d2db366da6d3..a0e54b2c1f7a 100644 --- a/drivers/gpu/drm/vkms/vkms_colorop.c +++ b/drivers/gpu/drm/vkms/vkms_colorop.c @@ -35,7 +35,37 @@ const int vkms_initialize_color_pipeline(struct drm_plane *plane, struct drm_pro prev_op = op; - /* 2nd op: 1d curve */ + /* 2nd op: 3x4 matrix */ + op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL); + if (!op) { + DRM_ERROR("KMS: Failed to allocate colorop\n"); + return -ENOMEM; + } + + ret = drm_colorop_ctm_3x4_init(dev, op, plane); + if (ret) + return ret; + + drm_colorop_set_next_property(prev_op, op); + + prev_op = op; + + /* 3rd op: 3x4 matrix */ + op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL); + if (!op) { + DRM_ERROR("KMS: Failed to allocate colorop\n"); + return -ENOMEM; + } + + ret = drm_colorop_ctm_3x4_init(dev, op, plane); + if (ret) + return ret; + + drm_colorop_set_next_property(prev_op, op); + + prev_op = op; + + /* 4th op: 1d curve */ op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL); if (!op) { DRM_ERROR("KMS: Failed to allocate colorop\n"); diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c index d2101fa55aa3..8bbfce651526 100644 --- a/drivers/gpu/drm/vkms/vkms_composer.c +++ b/drivers/gpu/drm/vkms/vkms_composer.c @@ -164,6 +164,30 @@ static void apply_lut(const struct vkms_crtc_state *crtc_state, struct line_buff } } +static void apply_3x4_matrix(struct pixel_argb_s32 *pixel, const struct drm_color_ctm_3x4 *matrix) +{ + s64 rf, gf, bf; + + rf = drm_fixp_mul(drm_sm2fixp(matrix->matrix[0]), drm_int2fixp(pixel->r)) + +drm_fixp_mul(drm_sm2fixp(matrix->matrix[1]), drm_int2fixp(pixel->g)) + +drm_fixp_mul(drm_sm2fixp(matrix->matrix[2]), drm_int2fixp(pixel->b)) + +drm_sm2fixp(matrix->matrix[3]); + + gf = drm_fixp_mul(drm_sm2fixp(matrix->matrix[4]), drm_int2fixp(pixel->r)) + +drm_fixp_mul(drm_sm2fixp(matrix->matrix[5]), drm_int2fixp(pixel->g)) + +drm_fixp_mul(drm_sm2fixp(matrix->matrix[6]), drm_int2fixp(pixel->b)) + +drm_sm2fixp(matrix->matrix[7]); + + bf = drm_fixp_mul(drm_sm2fixp(matrix->matrix[8]), drm_int2fixp(pixel->r)) + +drm_fixp_mul(drm_sm2fixp(matrix->matrix[9]), drm_int2fixp(pixel->g)) + +drm_fixp_mul(drm_sm2fixp(matrix->matrix[10]), drm_int2fixp(pixel->b)) + +drm_sm2fixp(matrix->matrix[11]); + + pixel->r = drm_fixp2int(rf); + pixel->g = drm_fixp2int(gf); + pixel->b = drm_fixp2int(bf); +} + static void apply_colorop(struct pixel_argb_s32 *pixel, struct drm_colorop *colorop) { /* TODO is this right? */ @@ -185,6 +209,9 @@ static void apply_colorop(struct pixel_argb_s32 *pixel, struct drm_colorop *colo DRM_DEBUG_DRIVER("unkown colorop 1D curve type %d\n", colorop_state->curve_1d_type); break; } + } else if (colorop->type == DRM_COLOROP_CTM_3X4) { + if (colorop_state->data) + apply_3x4_matrix(pixel, (struct drm_color_ctm_3x4 *) colorop_state->data->data); } } diff --git a/include/drm/drm_colorop.h b/include/drm/drm_colorop.h index 4aee29e161d6..8710e550790c 100644 --- a/include/drm/drm_colorop.h +++ b/include/drm/drm_colorop.h @@ -224,6 +224,8 @@ int drm_colorop_init(struct drm_device *dev, struct drm_colorop *colorop, int drm_colorop_curve_1d_init(struct drm_device *dev, struct drm_colorop *colorop, struct drm_plane *plane, u64 supported_tfs); +int drm_colorop_ctm_3x4_init(struct drm_device *dev, struct drm_colorop *colorop, +struct drm_plane *plane); struct drm_colorop_state * drm_atomic_helper_colorop_duplicate_state(struct drm_colorop *colorop); -- 2.44.0
[RFC PATCH v4 17/42] drm/vkms: Add enumerated 1D curve colorop
This patch introduces a VKMS color pipeline that includes two drm_colorops for named transfer functions. For now the only ones supported are sRGB EOTF, sRGB Inverse EOTF, and a Linear TF. We will expand this in the future but I don't want to do so without accompanying IGT tests. We introduce a new vkms_luts.c file that hard-codes sRGB EOTF, sRGB Inverse EOTF, and a linear EOTF LUT. These have been generated with 256 entries each as IGT is currently testing only 8 bpc surfaces. We will likely need higher precision but I'm reluctant to make that change without clear indication that we need it. We'll revisit and, if necessary, regenerate the LUTs when we have IGT tests for higher precision buffers. v4: - Drop _tf_ from color_pipeline init function - Pass supported TFs into colorop init - Create bypass pipeline in DRM helper (Pekka) v2: - Add commit description - Fix sRGB EOTF LUT definition - Add linear and sRGB inverse EOTF LUTs Signed-off-by: Harry Wentland Signed-off-by: Alex Hung --- drivers/gpu/drm/vkms/Makefile| 4 +- drivers/gpu/drm/vkms/vkms_colorop.c | 70 +++ drivers/gpu/drm/vkms/vkms_composer.c | 45 ++ drivers/gpu/drm/vkms/vkms_drv.h | 4 + drivers/gpu/drm/vkms/vkms_luts.c | 802 +++ drivers/gpu/drm/vkms/vkms_luts.h | 12 + drivers/gpu/drm/vkms/vkms_plane.c| 2 + 7 files changed, 938 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/vkms/vkms_colorop.c create mode 100644 drivers/gpu/drm/vkms/vkms_luts.c create mode 100644 drivers/gpu/drm/vkms/vkms_luts.h diff --git a/drivers/gpu/drm/vkms/Makefile b/drivers/gpu/drm/vkms/Makefile index 1b28a6a32948..c38455c46be4 100644 --- a/drivers/gpu/drm/vkms/Makefile +++ b/drivers/gpu/drm/vkms/Makefile @@ -6,6 +6,8 @@ vkms-y := \ vkms_formats.o \ vkms_crtc.o \ vkms_composer.o \ - vkms_writeback.o + vkms_writeback.o \ + vkms_colorop.o \ + vkms_luts.o obj-$(CONFIG_DRM_VKMS) += vkms.o diff --git a/drivers/gpu/drm/vkms/vkms_colorop.c b/drivers/gpu/drm/vkms/vkms_colorop.c new file mode 100644 index ..d2db366da6d3 --- /dev/null +++ b/drivers/gpu/drm/vkms/vkms_colorop.c @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#include +#include +#include +#include +#include + +#define MAX_COLOR_PIPELINES 5 + +static const u64 supported_tfs = + BIT(DRM_COLOROP_1D_CURVE_SRGB_EOTF) | + BIT(DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF); + +const int vkms_initialize_color_pipeline(struct drm_plane *plane, struct drm_prop_enum_list *list) +{ + + struct drm_colorop *op, *prev_op; + struct drm_device *dev = plane->dev; + int ret; + + /* 1st op: 1d curve */ + op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL); + if (!op) { + DRM_ERROR("KMS: Failed to allocate colorop\n"); + return -ENOMEM; + } + + ret = drm_colorop_curve_1d_init(dev, op, plane, supported_tfs); + if (ret) + return ret; + + list->type = op->base.id; + list->name = kasprintf(GFP_KERNEL, "Color Pipeline %d", op->base.id); + + prev_op = op; + + /* 2nd op: 1d curve */ + op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL); + if (!op) { + DRM_ERROR("KMS: Failed to allocate colorop\n"); + return -ENOMEM; + } + + ret = drm_colorop_curve_1d_init(dev, op, plane, supported_tfs); + if (ret) + return ret; + + drm_colorop_set_next_property(prev_op, op); + + return 0; +} + +int vkms_initialize_colorops(struct drm_plane *plane) +{ + struct drm_prop_enum_list pipelines[MAX_COLOR_PIPELINES]; + int len = 0; + int ret; + + /* Add color pipeline */ + ret = vkms_initialize_color_pipeline(plane, &(pipelines[len])); + if (ret) + return ret; + len++; + + /* Create COLOR_PIPELINE property and attach */ + drm_plane_create_color_pipeline_property(plane, pipelines, len); + + return 0; +} diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c index b90e446d5954..9493bdb1ba3f 100644 --- a/drivers/gpu/drm/vkms/vkms_composer.c +++ b/drivers/gpu/drm/vkms/vkms_composer.c @@ -12,6 +12,7 @@ #include #include "vkms_drv.h" +#include "vkms_luts.h" static u16 pre_mul_blend_channel(u16 src, u16 dst, u16 alpha) { @@ -163,6 +164,47 @@ static void apply_lut(const struct vkms_crtc_state *crtc_state, struct line_buff } } +static void pre_blend_color_transform(const struct vkms_plane_state *plane_state, struct line_buffer *output_buffer) +{ + struct drm_colorop *colorop = plane_state->base.base.color_pipeline; + + while (colorop) { + struct drm_colorop_state *colorop_state; + + if (!colorop) + return; + + /* TODO this is probably wrong */ + colorop_state = colorop->state; + +
[RFC PATCH v4 22/42] drm/vkms: Use s32 for internal color pipeline precision
Certain operations require us to preserve values below 0.0 and above 1.0 (0x0 and 0x respectively in 16 bpc unorm). One such operation is a BT709 encoding operation followed by its decoding operation, or the reverse. We'll use s32 values as intermediate in and outputs of our color operations, for the operations where it matters. For now this won't apply to LUT operations. We might want to update those to work on s32 as well, but it's unclear how that should work for unorm LUT definitions. We'll revisit that once we add LUT + CTM tests. In order to allow for this we'll also invert the nesting of our colorop processing loops. We now use the pixel iteration loop on the outside and the colorop iteration on the inside. v4: - Clarify that we're packing 16-bit UNORM into s32, not converting values to a different representation (Pekka) v3: - Use new colorop->next pointer Signed-off-by: Harry Wentland --- drivers/gpu/drm/vkms/vkms_composer.c | 57 +--- drivers/gpu/drm/vkms/vkms_drv.h | 4 ++ 2 files changed, 48 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c index 25b786b49db0..d2101fa55aa3 100644 --- a/drivers/gpu/drm/vkms/vkms_composer.c +++ b/drivers/gpu/drm/vkms/vkms_composer.c @@ -164,7 +164,7 @@ static void apply_lut(const struct vkms_crtc_state *crtc_state, struct line_buff } } -static void apply_colorop(struct pixel_argb_u16 *pixel, struct drm_colorop *colorop) +static void apply_colorop(struct pixel_argb_s32 *pixel, struct drm_colorop *colorop) { /* TODO is this right? */ struct drm_colorop_state *colorop_state = colorop->state; @@ -191,25 +191,56 @@ static void apply_colorop(struct pixel_argb_u16 *pixel, struct drm_colorop *colo static void pre_blend_color_transform(const struct vkms_plane_state *plane_state, struct line_buffer *output_buffer) { - struct drm_colorop *colorop = plane_state->base.base.color_pipeline; + struct drm_colorop *colorop; + struct pixel_argb_s32 pixel; - while (colorop) { - struct drm_colorop_state *colorop_state; + for (size_t x = 0; x < output_buffer->n_pixels; x++) { - if (!colorop) - return; + /* +* Some operations, such as applying a BT709 encoding matrix, +* followed by a decoding matrix, require that we preserve +* values above 1.0 and below 0.0 until the end of the pipeline. +* +* Pack the 16-bit UNORM values into s32 to give us head-room to +* avoid clipping until we're at the end of the pipeline. Clip +* intentionally at the end of the pipeline before packing +* UNORM values back into u16. +*/ + pixel.a = output_buffer->pixels[x].a; + pixel.r = output_buffer->pixels[x].r; + pixel.g = output_buffer->pixels[x].g; + pixel.b = output_buffer->pixels[x].b; - /* TODO this is probably wrong */ - colorop_state = colorop->state; + colorop = plane_state->base.base.color_pipeline; + while (colorop) { + struct drm_colorop_state *colorop_state; - if (!colorop_state) - return; + if (!colorop) + return; + + /* TODO this is probably wrong */ + colorop_state = colorop->state; + + if (!colorop_state) + return; - for (size_t x = 0; x < output_buffer->n_pixels; x++) if (!colorop_state->bypass) - apply_colorop(_buffer->pixels[x], colorop); + apply_colorop(, colorop); - colorop = colorop->next; + colorop = colorop->next; + } + + /* clamp pixel */ + pixel.a = max(min(pixel.a, 0x), 0x0); + pixel.r = max(min(pixel.r, 0x), 0x0); + pixel.g = max(min(pixel.g, 0x), 0x0); + pixel.b = max(min(pixel.b, 0x), 0x0); + + /* put back to output_buffer */ + output_buffer->pixels[x].a = pixel.a; + output_buffer->pixels[x].r = pixel.r; + output_buffer->pixels[x].g = pixel.g; + output_buffer->pixels[x].b = pixel.b; } } diff --git a/drivers/gpu/drm/vkms/vkms_drv.h b/drivers/gpu/drm/vkms/vkms_drv.h index 2bcc24c196a2..fadb7685a360 100644 --- a/drivers/gpu/drm/vkms/vkms_drv.h +++ b/drivers/gpu/drm/vkms/vkms_drv.h @@ -36,6 +36,10 @@ struct vkms_frame_info { unsigned int cpp; }; +struct pixel_argb_s32 { + s32 a, r, g, b; +}; + struct pixel_argb_u16 { u16 a, r, g, b; };
[RFC PATCH v4 16/42] drm/colorop: Add NEXT to colorop state print
v3: - Read NEXT ID from drm_colorop's next pointer Signed-off-by: Harry Wentland --- drivers/gpu/drm/drm_atomic.c | 1 + include/drm/drm_colorop.h| 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 27a8805c5fa1..d82858dabf06 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -794,6 +794,7 @@ static void drm_atomic_colorop_print_state(struct drm_printer *p, drm_printf(p, "\ttype=%s\n", drm_get_colorop_type_name(colorop->type)); drm_printf(p, "\tbypass=%u\n", state->bypass); drm_printf(p, "\tcurve_1d_type=%s\n", drm_get_colorop_curve_1d_type_name(state->curve_1d_type)); + drm_printf(p, "\tnext=%d\n", colorop->next ? colorop->next->base.id : 0); } static void drm_atomic_plane_print_state(struct drm_printer *p, diff --git a/include/drm/drm_colorop.h b/include/drm/drm_colorop.h index fc9a28d138b8..e85ed5aa68d0 100644 --- a/include/drm/drm_colorop.h +++ b/include/drm/drm_colorop.h @@ -237,6 +237,8 @@ const char *drm_get_colorop_type_name(enum drm_colorop_type type); const char *drm_get_colorop_curve_1d_type_name(enum drm_colorop_curve_1d_type type); void drm_colorop_set_next_property(struct drm_colorop *colorop, struct drm_colorop *next); +uint32_t drm_colorop_get_next_property(struct drm_colorop *colorop); +struct drm_colorop *drm_colorop_get_next(struct drm_colorop *colorop); #endif /* __DRM_COLOROP_H__ */ -- 2.44.0
[RFC PATCH v4 04/42] drm/vkms: Round fixp2int conversion in lerp_u16
fixp2int always rounds down, fixp2int_ceil rounds up. We need the new fixp2int_round. Signed-off-by: Harry Wentland --- drivers/gpu/drm/vkms/vkms_composer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c index 3c99fb8b54e2..e70cd473e3be 100644 --- a/drivers/gpu/drm/vkms/vkms_composer.c +++ b/drivers/gpu/drm/vkms/vkms_composer.c @@ -98,7 +98,7 @@ static u16 lerp_u16(u16 a, u16 b, s64 t) s64 delta = drm_fixp_mul(b_fp - a_fp, t); - return drm_fixp2int(a_fp + delta); + return drm_fixp2int_round(a_fp + delta); } static s64 get_lut_index(const struct vkms_color_lut *lut, u16 channel_value) -- 2.44.0
[RFC PATCH v4 18/42] drm/vkms: Add kunit tests for linear and sRGB LUTs
Signed-off-by: Harry Wentland --- drivers/gpu/drm/vkms/tests/vkms_color_tests.c | 37 ++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vkms/tests/vkms_color_tests.c b/drivers/gpu/drm/vkms/tests/vkms_color_tests.c index fc73e48aa57c..e6ac01dee830 100644 --- a/drivers/gpu/drm/vkms/tests/vkms_color_tests.c +++ b/drivers/gpu/drm/vkms/tests/vkms_color_tests.c @@ -31,7 +31,6 @@ const struct vkms_color_lut test_linear_lut = { .channel_value2index_ratio = 0xf000fll }; - static void vkms_color_test_get_lut_index(struct kunit *test) { int i; @@ -40,6 +39,19 @@ static void vkms_color_test_get_lut_index(struct kunit *test) for (i = 0; i < TEST_LUT_SIZE; i++) KUNIT_EXPECT_EQ(test, drm_fixp2int_ceil(get_lut_index(_linear_lut, test_linear_array[i].red)), i); + + KUNIT_EXPECT_EQ(test, drm_fixp2int(get_lut_index(_eotf, 0x0)), 0x0); + KUNIT_EXPECT_EQ(test, drm_fixp2int_ceil(get_lut_index(_eotf, 0x0)), 0x0); + KUNIT_EXPECT_EQ(test, drm_fixp2int_ceil(get_lut_index(_eotf, 0x101)), 0x1); + KUNIT_EXPECT_EQ(test, drm_fixp2int_ceil(get_lut_index(_eotf, 0x202)), 0x2); + + KUNIT_EXPECT_EQ(test, drm_fixp2int(get_lut_index(_inv_eotf, 0x0)), 0x0); + KUNIT_EXPECT_EQ(test, drm_fixp2int_ceil(get_lut_index(_inv_eotf, 0x0)), 0x0); + KUNIT_EXPECT_EQ(test, drm_fixp2int_ceil(get_lut_index(_inv_eotf, 0x101)), 0x1); + KUNIT_EXPECT_EQ(test, drm_fixp2int_ceil(get_lut_index(_inv_eotf, 0x202)), 0x2); + + KUNIT_EXPECT_EQ(test, drm_fixp2int_ceil(get_lut_index(_eotf, 0xfefe)), 0xfe); + KUNIT_EXPECT_EQ(test, drm_fixp2int_ceil(get_lut_index(_eotf, 0x)), 0xff); } static void vkms_color_test_lerp(struct kunit *test) @@ -148,9 +160,32 @@ static void vkms_color_test_lerp(struct kunit *test) KUNIT_EXPECT_EQ(test, lerp_u16(0x0, 0x1, 0x8000), 0x1); } +static void vkms_color_test_linear(struct kunit *test) +{ + for (int i = 0; i < LUT_SIZE; i++) { + int linear = apply_lut_to_channel_value(_eotf, i * 0x101, LUT_RED); + KUNIT_EXPECT_EQ(test, DIV_ROUND_CLOSEST(linear, 0x101), i); + } +} + +static void vkms_color_srgb_inv_srgb(struct kunit *test) +{ + u16 srgb, final; + + for (int i = 0; i < LUT_SIZE; i++) { + srgb = apply_lut_to_channel_value(_eotf, i * 0x101, LUT_RED); + final = apply_lut_to_channel_value(_inv_eotf, srgb, LUT_RED); + + KUNIT_EXPECT_GE(test, final / 0x101, i-1); + KUNIT_EXPECT_LE(test, final / 0x101, i+1); + } +} + static struct kunit_case vkms_color_test_cases[] = { KUNIT_CASE(vkms_color_test_get_lut_index), KUNIT_CASE(vkms_color_test_lerp), + KUNIT_CASE(vkms_color_test_linear), + KUNIT_CASE(vkms_color_srgb_inv_srgb), {} }; -- 2.44.0
[RFC PATCH v4 12/42] drm/colorop: Add BYPASS property
We want to be able to bypass each colorop at all times. Introduce a new BYPASS boolean property for this. Signed-off-by: Harry Wentland --- drivers/gpu/drm/drm_atomic_uapi.c | 6 +- drivers/gpu/drm/drm_colorop.c | 16 include/drm/drm_colorop.h | 20 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index fdd540cfe24f..87f00131be11 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -648,7 +648,9 @@ static int drm_atomic_colorop_set_property(struct drm_colorop *colorop, struct drm_colorop_state *state, struct drm_file *file_priv, struct drm_property *property, uint64_t val) { - if (property == colorop->curve_1d_type_property) { + if (property == colorop->bypass_property) { + state->bypass = val; + } else if (property == colorop->curve_1d_type_property) { state->curve_1d_type = val; } else { drm_dbg_atomic(colorop->dev, @@ -668,6 +670,8 @@ drm_atomic_colorop_get_property(struct drm_colorop *colorop, { if (property == colorop->type_property) { *val = colorop->type; + } else if (property == colorop->bypass_property) { + *val = state->bypass; } else if (property == colorop->curve_1d_type_property) { *val = state->curve_1d_type; } else { diff --git a/drivers/gpu/drm/drm_colorop.c b/drivers/gpu/drm/drm_colorop.c index f4740b6115d1..29979816a2d1 100644 --- a/drivers/gpu/drm/drm_colorop.c +++ b/drivers/gpu/drm/drm_colorop.c @@ -80,6 +80,18 @@ int drm_colorop_init(struct drm_device *dev, struct drm_colorop *colorop, colorop->type_property, colorop->type); + /* bypass */ + /* TODO can we reuse the mode_config->active_prop? */ + prop = drm_property_create_bool(dev, DRM_MODE_PROP_ATOMIC, + "BYPASS"); + if (!prop) + return -ENOMEM; + + colorop->bypass_property = prop; + drm_object_attach_property(>base, + colorop->bypass_property, + 1); + return ret; } EXPORT_SYMBOL(drm_colorop_init); @@ -123,6 +135,7 @@ int drm_colorop_curve_1d_init(struct drm_device *dev, struct drm_colorop *coloro /* initialize 1D curve only attribute */ prop = drm_property_create_enum(dev, DRM_MODE_PROP_ATOMIC, "CURVE_1D_TYPE", enum_list, len); + if (!prop) return -ENOMEM; @@ -138,6 +151,8 @@ static void __drm_atomic_helper_colorop_duplicate_state(struct drm_colorop *colo struct drm_colorop_state *state) { memcpy(state, colorop->state, sizeof(*state)); + + state->bypass = true; } struct drm_colorop_state * @@ -189,6 +204,7 @@ static void __drm_colorop_state_reset(struct drm_colorop_state *colorop_state, struct drm_colorop *colorop) { colorop_state->colorop = colorop; + colorop_state->bypass = true; } /** diff --git a/include/drm/drm_colorop.h b/include/drm/drm_colorop.h index 539db900f16f..28aa5c1c309e 100644 --- a/include/drm/drm_colorop.h +++ b/include/drm/drm_colorop.h @@ -45,6 +45,14 @@ struct drm_colorop_state { /* colorop properties */ + /** +* @bypass: +* +* True if colorop shall be bypassed. False if colorop is +* enabled. +*/ + bool bypass; + /** * @curve_1d_type: * @@ -132,6 +140,18 @@ struct drm_colorop { */ struct drm_property *type_property; + /** +* @bypass_property: +* +* Boolean property to control enablement of the color +* operation. Setting bypass to "true" shall always be supported +* in order to allow compositors to quickly fall back to +* alternate methods of color processing. This is important +* since setting color operations can fail due to unique +* HW constraints. +*/ + struct drm_property *bypass_property; + /** * @curve_1d_type: * -- 2.44.0
[RFC PATCH v4 05/42] drm/vkms: Create separate Kconfig file for VKMS
This aligns with most other DRM drivers and will allow us to add new VKMS config options without polluting the DRM Kconfig. v3: - Change SPDX to GPL-2.0-only to match DRM KConfig SPDX (Simon) Signed-off-by: Harry Wentland Reviewed-by: Simon Ser --- drivers/gpu/drm/Kconfig | 14 +- drivers/gpu/drm/vkms/Kconfig | 15 +++ 2 files changed, 16 insertions(+), 13 deletions(-) create mode 100644 drivers/gpu/drm/vkms/Kconfig diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 2520db0b776e..e3ea8793cb8a 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -289,19 +289,7 @@ config DRM_VGEM as used by Mesa's software renderer for enhanced performance. If M is selected the module will be called vgem. -config DRM_VKMS - tristate "Virtual KMS (EXPERIMENTAL)" - depends on DRM && MMU - select DRM_KMS_HELPER - select DRM_GEM_SHMEM_HELPER - select CRC32 - default n - help - Virtual Kernel Mode-Setting (VKMS) is used for testing or for - running GPU in a headless machines. Choose this option to get - a VKMS. - - If M is selected the module will be called vkms. +source "drivers/gpu/drm/vkms/Kconfig" source "drivers/gpu/drm/exynos/Kconfig" diff --git a/drivers/gpu/drm/vkms/Kconfig b/drivers/gpu/drm/vkms/Kconfig new file mode 100644 index ..b9ecdebecb0b --- /dev/null +++ b/drivers/gpu/drm/vkms/Kconfig @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0-only + +config DRM_VKMS + tristate "Virtual KMS (EXPERIMENTAL)" + depends on DRM && MMU + select DRM_KMS_HELPER + select DRM_GEM_SHMEM_HELPER + select CRC32 + default n + help + Virtual Kernel Mode-Setting (VKMS) is used for testing or for + running GPU in a headless machines. Choose this option to get + a VKMS. + + If M is selected the module will be called vkms. -- 2.44.0
[RFC PATCH v4 07/42] drm/vkms: Avoid reading beyond LUT array
When the floor LUT index (drm_fixp2int(lut_index) is the last index of the array the ceil LUT index will point to an entry beyond the array. Make sure we guard against it and use the value of the floor LUT index. v3: - Drop bits from commit description that didn't contribute anything of value Fixes: db1f254f2cfaf ("drm/vkms: Add support to 1D gamma LUT") Signed-off-by: Harry Wentland Cc: Arthur Grillo Reviewed-by: Melissa Wen --- drivers/gpu/drm/vkms/vkms_composer.c | 14 ++ 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c index d178f2a400f6..b90e446d5954 100644 --- a/drivers/gpu/drm/vkms/vkms_composer.c +++ b/drivers/gpu/drm/vkms/vkms_composer.c @@ -123,6 +123,8 @@ static u16 apply_lut_to_channel_value(const struct vkms_color_lut *lut, u16 chan enum lut_channel channel) { s64 lut_index = get_lut_index(lut, channel_value); + u16 *floor_lut_value, *ceil_lut_value; + u16 floor_channel_value, ceil_channel_value; /* * This checks if `struct drm_color_lut` has any gap added by the compiler @@ -130,11 +132,15 @@ static u16 apply_lut_to_channel_value(const struct vkms_color_lut *lut, u16 chan */ static_assert(sizeof(struct drm_color_lut) == sizeof(__u16) * 4); - u16 *floor_lut_value = (__u16 *)>base[drm_fixp2int(lut_index)]; - u16 *ceil_lut_value = (__u16 *)>base[drm_fixp2int_ceil(lut_index)]; + floor_lut_value = (__u16 *)>base[drm_fixp2int(lut_index)]; + if (drm_fixp2int(lut_index) == (lut->lut_length - 1)) + /* We're at the end of the LUT array, use same value for ceil and floor */ + ceil_lut_value = floor_lut_value; + else + ceil_lut_value = (__u16 *)>base[drm_fixp2int_ceil(lut_index)]; - u16 floor_channel_value = floor_lut_value[channel]; - u16 ceil_channel_value = ceil_lut_value[channel]; + floor_channel_value = floor_lut_value[channel]; + ceil_channel_value = ceil_lut_value[channel]; return lerp_u16(floor_channel_value, ceil_channel_value, lut_index & DRM_FIXED_DECIMAL_MASK); -- 2.44.0
[RFC PATCH v4 06/42] drm/vkms: Add kunit tests for VKMS LUT handling
Debugging LUT math is much easier when we can unit test it. Add kunit functionality to VKMS and add tests for - get_lut_index - lerp_u16 v4: - Test the critical points of the lerp function (Pekka) v3: - Use include way of testing static functions (Arthur) Signed-off-by: Harry Wentland Cc: Arthur Grillo --- drivers/gpu/drm/vkms/Kconfig | 5 + drivers/gpu/drm/vkms/tests/.kunitconfig | 4 + drivers/gpu/drm/vkms/tests/vkms_color_tests.c | 163 ++ drivers/gpu/drm/vkms/vkms_composer.c | 8 +- 4 files changed, 178 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/vkms/tests/.kunitconfig create mode 100644 drivers/gpu/drm/vkms/tests/vkms_color_tests.c diff --git a/drivers/gpu/drm/vkms/Kconfig b/drivers/gpu/drm/vkms/Kconfig index b9ecdebecb0b..c1f8b343ff0e 100644 --- a/drivers/gpu/drm/vkms/Kconfig +++ b/drivers/gpu/drm/vkms/Kconfig @@ -13,3 +13,8 @@ config DRM_VKMS a VKMS. If M is selected the module will be called vkms. + +config DRM_VKMS_KUNIT_TESTS + tristate "Tests for VKMS" if !KUNIT_ALL_TESTS + depends on DRM_VKMS && KUNIT + default KUNIT_ALL_TESTS diff --git a/drivers/gpu/drm/vkms/tests/.kunitconfig b/drivers/gpu/drm/vkms/tests/.kunitconfig new file mode 100644 index ..70e378228cbd --- /dev/null +++ b/drivers/gpu/drm/vkms/tests/.kunitconfig @@ -0,0 +1,4 @@ +CONFIG_KUNIT=y +CONFIG_DRM=y +CONFIG_DRM_VKMS=y +CONFIG_DRM_VKMS_KUNIT_TESTS=y diff --git a/drivers/gpu/drm/vkms/tests/vkms_color_tests.c b/drivers/gpu/drm/vkms/tests/vkms_color_tests.c new file mode 100644 index ..fc73e48aa57c --- /dev/null +++ b/drivers/gpu/drm/vkms/tests/vkms_color_tests.c @@ -0,0 +1,163 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#include + +#include + +#define TEST_LUT_SIZE 16 + +static struct drm_color_lut test_linear_array[TEST_LUT_SIZE] = { + { 0x0, 0x0, 0x0, 0 }, + { 0x, 0x, 0x, 0 }, + { 0x, 0x, 0x, 0 }, + { 0x, 0x, 0x, 0 }, + { 0x, 0x, 0x, 0 }, + { 0x, 0x, 0x, 0 }, + { 0x, 0x, 0x, 0 }, + { 0x, 0x, 0x, 0 }, + { 0x, 0x, 0x, 0 }, + { 0x, 0x, 0x, 0 }, + { 0x, 0x, 0x, 0 }, + { 0x, 0x, 0x, 0 }, + { 0x, 0x, 0x, 0 }, + { 0x, 0x, 0x, 0 }, + { 0x, 0x, 0x, 0 }, + { 0x, 0x, 0x, 0 }, +}; + +const struct vkms_color_lut test_linear_lut = { + .base = test_linear_array, + .lut_length = TEST_LUT_SIZE, + .channel_value2index_ratio = 0xf000fll +}; + + +static void vkms_color_test_get_lut_index(struct kunit *test) +{ + int i; + + KUNIT_EXPECT_EQ(test, drm_fixp2int(get_lut_index(_linear_lut, test_linear_array[0].red)), 0); + + for (i = 0; i < TEST_LUT_SIZE; i++) + KUNIT_EXPECT_EQ(test, drm_fixp2int_ceil(get_lut_index(_linear_lut, test_linear_array[i].red)), i); +} + +static void vkms_color_test_lerp(struct kunit *test) +{ + /*** half-way round down ***/ + s64 t = 0x8000 - 1; + KUNIT_EXPECT_EQ(test, lerp_u16(0x0, 0x10, t), 0x8); + + /* odd a */ + KUNIT_EXPECT_EQ(test, lerp_u16(0x1, 0x10, t), 0x8); + + /* odd b */ + KUNIT_EXPECT_EQ(test, lerp_u16(0x1, 0xf, t), 0x8); + + /* b = a */ + KUNIT_EXPECT_EQ(test, lerp_u16(0x10, 0x10, t), 0x10); + + /* b = a + 1 */ + KUNIT_EXPECT_EQ(test, lerp_u16(0x10, 0x11, t), 0x10); + + + /*** half-way round up ***/ + t = 0x8000; + KUNIT_EXPECT_EQ(test, lerp_u16(0x0, 0x10, t), 0x8); + + /* odd a */ + KUNIT_EXPECT_EQ(test, lerp_u16(0x1, 0x10, t), 0x9); + + /* odd b */ + KUNIT_EXPECT_EQ(test, lerp_u16(0x1, 0xf, t), 0x8); + + /* b = a */ + KUNIT_EXPECT_EQ(test, lerp_u16(0x10, 0x10, t), 0x10); + + /* b = a + 1 */ + KUNIT_EXPECT_EQ(test, lerp_u16(0x10, 0x11, t), 0x11); + + /*** t = 0.0 ***/ + t = 0x0; + KUNIT_EXPECT_EQ(test, lerp_u16(0x0, 0x10, t), 0x0); + + /* odd a */ + KUNIT_EXPECT_EQ(test, lerp_u16(0x1, 0x10, t), 0x1); + + /* odd b */ + KUNIT_EXPECT_EQ(test, lerp_u16(0x1, 0xf, t), 0x1); + + /* b = a */ + KUNIT_EXPECT_EQ(test, lerp_u16(0x10, 0x10, t), 0x10); + + /* b = a + 1 */ + KUNIT_EXPECT_EQ(test, lerp_u16(0x10, 0x11, t), 0x10); + + /*** t = 1.0 ***/ + t = 0x1; + KUNIT_EXPECT_EQ(test, lerp_u16(0x0, 0x10, t), 0x10); + + /* odd a */ + KUNIT_EXPECT_EQ(test, lerp_u16(0x1, 0x10, t), 0x10); + + /* odd b */ + KUNIT_EXPECT_EQ(test, lerp_u16(0x1, 0xf, t), 0xf); + + /* b = a */ + KUNIT_EXPECT_EQ(test, lerp_u16(0x10, 0x10, t), 0x10); + + /* b = a + 1 */ + KUNIT_EXPECT_EQ(test, lerp_u16(0x10, 0x11, t), 0x11); + + + /*** t = 0.0 + 1 ***/ + t = 0x0 + 1; +
[RFC PATCH v4 01/42] drm: Don't treat 0 as -1 in drm_fixp2int_ceil
Unit testing this in VKMS shows that passing 0 into this function returns -1, which is highly counter- intuitive. Fix it by checking whether the input is >= 0 instead of > 0. Fixes: 64566b5e767f9 ("drm: Add drm_fixp_from_fraction and drm_fixp2int_ceil") Signed-off-by: Harry Wentland Reviewed-by: Simon Ser Reviewed-by: Melissa Wen --- include/drm/drm_fixed.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/drm/drm_fixed.h b/include/drm/drm_fixed.h index 6ea339d5de08..0c9f917a4d4b 100644 --- a/include/drm/drm_fixed.h +++ b/include/drm/drm_fixed.h @@ -95,7 +95,7 @@ static inline int drm_fixp2int_round(s64 a) static inline int drm_fixp2int_ceil(s64 a) { - if (a > 0) + if (a >= 0) return drm_fixp2int(a + DRM_FIXED_ALMOST_ONE); else return drm_fixp2int(a - DRM_FIXED_ALMOST_ONE); -- 2.44.0
[RFC PATCH v4 00/42] Color Pipeline API w/ VKMS
This is an RFC set for a color pipeline API, along with a sample implementation in VKMS. All the key API bits are here. VKMS now supports two named transfer function colorops and two matrix colorops. We have IGT tests that check all four of these colorops with a pixel-by-pixel comparison that checks that these colorops do what we expect them to do with a +/- 1 8 bpc code point margin. The big new change with v4 is the addition of an amdgpu color pipeline, for all AMD GPUs with DCN 3 and newer. Amdgpu now support the following: 1. 1D Curve EOTF 2. 3x4 CTM 3. Multiplier 4. 1D Curve Inverse EOTF 5. 1D LUT 6. 1D Curve EOTF 7. 1D LUT The supported curves for the 1D Curve type are: - sRGB EOTF and its inverse - PQ EOTF, scaled to [0.0, 125.0] and its inverse - BT.2020/BT.709 OETF and its inverse Note that the 1st and 5th colorops take the EOTF or Inverse OETF while the 3rd colorop takes the Inverse EOTF or OETF. We are working on two more ops for amdgpu, the HDR multiplier and the 3DLUT, which will give us this: 1. 1D Curve EOTF 2. 3x4 CTM 3. HDR Multiplier 4. 1D Curve Inverse EOTF 5. 1D LUT 6. 3D LUT 7. 1D Curve EOTF 8. 1D LUT This, essentially mirrors the color pipeline used by gamescope and presented by Melissa Wen, with the exception of the DEGAM LUT, which is not currently used. See [1] https://indico.freedesktop.org/event/4/contributions/186/attachments/138/218/xdc2023-TheRainbowTreasureMap-MelissaWen.pdf After this we'd like to also add the following ops: - Scaler (Informational only) - Color Encoding, to replace drm_plane's COLOR_ENCODING - Color Range, to replace drm_plane's COLOR_RANGE This patchset is grouped as follows: - Patches 1-3: couple general patches/fixes - Patches 4-7: introduce kunit to VKMS - Patch 7: description of motivation and details behind the Color Pipeline API. If you're reading nothing else but are interested in the topic I highly recommend you take a look at this. - Patches 7-27: DRM core and VKMS changes for color pipeline API - Patches 28-40: DRM core and amdgpu changes for color pipeline API VKMS patches could still be improved in a few ways, though the payoff might be limited and I would rather focus on other work at the moment. The most obvious thing to improve would be to eliminate the hard-coded LUTs for identity, and sRGB, and replace them with fixed-point math instead. There are plenty of things that I would like to see here but haven't had a chance to look at. These will (hopefully) be addressed in future iterations, either in VKMS or amdgpu: - Clear documentation for each drm_colorop_type - Add custom LUT colorops to VKMS - Add pre-blending 3DLUT - How to support HW which can't bypass entire pipeline? - Add ability to create colorops that don't have BYPASS - Can we do a LOAD / COMMIT model for LUTs (and other properties)? - read-only scaling colorop which defines scaling taps and position - read-only color format colorop to define supported color formats for a pipeline - named matrices, for things like converting YUV to RGB IGT tests can be found at https://gitlab.freedesktop.org/hwentland/igt-gpu-tools/-/merge_requests/1 IGT patches are also being sent to the igt-dev mailing list. If you prefer a gitlab MR for review you can find it at https://gitlab.freedesktop.org/hwentland/linux/-/merge_requests/5 v4: - Add amdgpu color pipeline (WIP) - Don't block setting of deprecated properties, instead pass client cap to atomic check so drivers can ignore these props - Drop IOCTL definitions (Pekka) - Use enum property for colorop TYPE (Pekka) - A few cleanups to the docs (Pekka) - Rework the TYPE enum to name relation to avoid code duplication (Pekka) - Add missing function declarations (Chaitanya Kumar Borah) - Allow setting of NEXT property to NULL in _set_ function (Chaitanya Kumar Borah) - Add helper for creation of pipeline drm_plane property (Pekka) - Always create Bypass pipeline (Pekka) - A bunch of changes to VKMS kunit tests (Pekka) - Fix index in CTM doc (Pekka) v3: - Abandon IOCTLs and discover colorops as clients iterate the pipeline - Remove need for libdrm - Add color_pipeline client cap and make mutually exclusive with COLOR_RANGE and COLOR_ENCODING properties - add CTM colorop to VKMS - Use include way for kunit testing static functions (Arthur) - Make TYPE a range property - Move enum drm_colorop_type to uapi header - and a bunch of smaller bits that are highlighted in the relevant commit description v2: - Rebased on drm-misc-next - Introduce a VKMS Kunit so we can test LUT functionality in vkms_composer - Incorporate feedback in color_pipeline.rst doc - Add support for sRGB inverse EOTF - Add 2nd enumerated TF colorop to VKMS - Fix LUTs and some issues with applying LUTs in VKMS Cc: Ville Syrjala Cc: Pekka Paalanen Cc: Simon Ser Cc: Harry Wentland Cc: Melissa Wen Cc: Jonas Ådahl Cc: Sebastian Wick Cc: Shashank Sharma Cc: Alexander Goins Cc:
[RFC PATCH v4 03/42] drm: Correctly round for fixp2int_round
A value of 0x8000 and higher should round up, and below should round down. VKMS Kunit tests for lerp_u16 showed that this is not the case. Fix it. 1 << (DRM_FIXED_POINT_HALF - 1) = 1 << 15 = 0x8000 This is not 0.5, but 0.0762939453125. Instead of some smart math use a simple if/else to round up or down. This helps people like me to understand what the function does. Signed-off-by: Harry Wentland --- include/drm/drm_fixed.h | 15 ++- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/include/drm/drm_fixed.h b/include/drm/drm_fixed.h index cb842ba80ddd..8ee549f68537 100644 --- a/include/drm/drm_fixed.h +++ b/include/drm/drm_fixed.h @@ -77,6 +77,8 @@ static inline u32 dfixed_div(fixed20_12 A, fixed20_12 B) #define DRM_FIXED_DIGITS_MASK (~DRM_FIXED_DECIMAL_MASK) #define DRM_FIXED_EPSILON 1LL #define DRM_FIXED_ALMOST_ONE (DRM_FIXED_ONE - DRM_FIXED_EPSILON) +#define DRM_FIXED_FRACTIONAL 0xll +#define DRM_FIXED_HALF 0x8000ll /** * @drm_sm2fixp @@ -106,11 +108,6 @@ static inline int drm_fixp2int(s64 a) return ((s64)a) >> DRM_FIXED_POINT; } -static inline int drm_fixp2int_round(s64 a) -{ - return drm_fixp2int(a + (1 << (DRM_FIXED_POINT_HALF - 1))); -} - static inline int drm_fixp2int_ceil(s64 a) { if (a >= 0) @@ -119,6 +116,14 @@ static inline int drm_fixp2int_ceil(s64 a) return drm_fixp2int(a - DRM_FIXED_ALMOST_ONE); } +static inline int drm_fixp2int_round(s64 a) +{ + if ((a & DRM_FIXED_FRACTIONAL) < DRM_FIXED_HALF) + return drm_fixp2int(a); + else + return drm_fixp2int_ceil(a); +} + static inline unsigned drm_fixp_msbset(s64 a) { unsigned shift, sign = (a >> 63) & 1; -- 2.44.0
Re: [PATCH 1/3] drm/amdgpu: Refuse to create a KMS FB for non-P2P exported dma-bufs
On 2024-02-26 17:53, Christian König wrote: > Am 26.02.24 um 17:50 schrieb Michel Dänzer: >> On 2024-02-26 17:46, Michel Dänzer wrote: >>> On 2024-02-26 17:34, Christian König wrote: >>> My question is why has it worked so far? I mean we are not doing this since yesterday and the problem only shows up now? >>> Yes, Wayland compositors are only starting to try and make use of this now. >> To expand on this, mutter will want to do something like this as well sooner >> or later. I suspect it's the same for others like kwin, sway etc. > > Yeah, but we have done similar things with X decades before. E.g. basically > the client sends a BO to the server for displaying it. The scenario in https://gitlab.freedesktop.org/mesa/mesa/-/issues/10635 is direct scanout of a client buffer on a secondary dGPU, while the compositor uses the APU as the primary compositing GPU. AFAIR Xorg has never supported direct scanout of client buffers in this scenario. Secondary GPU scanout is always done from a separate local buffer allocated by Xorg / the driver. This is Wayland compositors pushing the envelope. > Why we suddenly have to juggle with the fact that it is DMA-buf shared with > another device? The problematic case is if the Wayland compositor has to produce a composited frame (e.g. due to a notification or other window popping up over the fullscreen window), but the client hasn't attached a new buffer to the fullscreen surface, so the compositor has to use the contents of the same client buffer which is still being scanned out by the dGPU for compositing with the APU. -- Earthling Michel Dänzer| https://redhat.com Libre software enthusiast | Mesa and Xwayland developer
Re: [PATCH] drm/amdkfd: Use SQC when TCP would fail in gfx10.1 context save
On 2/23/2024 16:08, Laurent Morichetti wrote: > Similarly to gfx9, gfx10.1 drops vector stores when an xnack error is > raised. To work around this issue, use scalar stores instead of vector > stores when trapsts.xnack_error == 1. > > Signed-off-by: Laurent Morichetti Reviewed-by: Jay Cornwall
Re: [PATCH v3 3/3] drm/amdgpu: sync page table freeing with tlb flush
Am 26.02.24 um 17:52 schrieb Philip Yang: On 2024-02-23 08:42, Shashank Sharma wrote: This patch: - adds a new list in amdgou_vm to hold the VM PT entries being freed - waits for the TLB flush using the vm->tlb_flush_fence - actually frees the PT BOs V2: rebase V3: Do not attach the tlb_fence to the entries, rather add the entries to a list and delay their freeing (Christian) Cc: Christian König Cc: Alex Deucher Cc: Felix Kuehling Cc: Rajneesh Bhardwaj Signed-off-by: Shashank Sharma --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 6 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h| 6 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 51 --- 3 files changed, 58 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 67c690044b97..eebb73f2c2ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -939,6 +939,10 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, /* Makes sure no PD/PT is freed before the flush */ dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence, DMA_RESV_USAGE_BOOKKEEP); + + mutex_lock(>tlb_fence_lock); + vm->tlb_fence_last = *fence; + mutex_unlock(>tlb_fence_lock); } amdgpu_res_first(pages_addr ? NULL : res, offset, @@ -2212,6 +2216,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, INIT_LIST_HEAD(>freed); INIT_LIST_HEAD(>done); INIT_LIST_HEAD(>pt_freed); + INIT_LIST_HEAD(>tlb_flush_waitlist); INIT_WORK(>pt_free_work, amdgpu_vm_pt_free_work); INIT_KFIFO(vm->faults); @@ -2244,6 +2249,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->last_unlocked = dma_fence_get_stub(); vm->generation = 0; + mutex_init(>tlb_fence_lock); mutex_init(>eviction_lock); vm->evicting = false; vm->tlb_fence_context = dma_fence_context_alloc(1); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 8e6fd25d07b7..77f10ed80973 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -334,6 +334,10 @@ struct amdgpu_vm { uint64_t*tlb_seq_cpu_addr; uint64_ttlb_fence_context; + struct mutex tlb_fence_lock; + struct dma_fence*tlb_fence_last; + struct list_headtlb_flush_waitlist; + atomic64_t kfd_last_flushed_seq; /* How many times we had to re-generate the page tables */ @@ -379,6 +383,8 @@ struct amdgpu_vm { /* cached fault info */ struct amdgpu_vm_fault_info fault_info; + + int count_bos; }; struct amdgpu_vm_manager { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index 95dc0afdaffb..57ea95c5c085 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -643,13 +643,13 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) if (!entry->bo) return; - entry->bo->vm_bo = NULL; shadow = amdgpu_bo_shadowed(entry->bo); if (shadow) { ttm_bo_set_bulk_move(>tbo, NULL); amdgpu_bo_unref(); } ttm_bo_set_bulk_move(>bo->tbo, NULL); + entry->bo->vm_bo = NULL; spin_lock(>vm->status_lock); list_del(>vm_status); @@ -657,6 +657,38 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) amdgpu_bo_unref(>bo); } +static void amdgpu_vm_pt_flush_waitlist(struct amdgpu_vm *vm) +{ + struct amdgpu_vm_bo_base *entry, *next; + LIST_HEAD(tlb_flush_waitlist); + + if (!vm || list_empty(>tlb_flush_waitlist)) + return; + + /* Wait for pending TLB flush before freeing PT BOs */ + mutex_lock(>tlb_fence_lock); + if (vm->tlb_fence_last && !dma_fence_is_signaled(vm->tlb_fence_last)) { + if (dma_fence_wait_timeout(vm->tlb_fence_last, false, + MAX_SCHEDULE_TIMEOUT) <= 0) { + DRM_ERROR("Timedout waiting for TLB flush, not freeing PT BOs\n"); + mutex_unlock(>tlb_fence_lock); + return; + } + + vm->tlb_fence_last = NULL; + } + + /* Save the waitlist locally and reset the flushlist */ + list_splice_init(>tlb_flush_waitlist, _flush_waitlist); + mutex_unlock(>tlb_fence_lock); + + /* Now free the entries */ + list_for_each_entry_safe(entry, next, _flush_waitlist, vm_status) { + if (entry) + amdgpu_vm_pt_free(entry); + } +} + void amdgpu_vm_pt_free_work(struct work_struct
Re: [PATCH 1/3] drm/amdgpu: Refuse to create a KMS FB for non-P2P exported dma-bufs
Am 26.02.24 um 17:50 schrieb Michel Dänzer: On 2024-02-26 17:46, Michel Dänzer wrote: On 2024-02-26 17:34, Christian König wrote: My question is why has it worked so far? I mean we are not doing this since yesterday and the problem only shows up now? Yes, Wayland compositors are only starting to try and make use of this now. To expand on this, mutter will want to do something like this as well sooner or later. I suspect it's the same for others like kwin, sway etc. Yeah, but we have done similar things with X decades before. E.g. basically the client sends a BO to the server for displaying it. Why we suddenly have to juggle with the fact that it is DMA-buf shared with another device? This has worked for at least a decade before. Regards, Christian.
Re: [PATCH v3 3/3] drm/amdgpu: sync page table freeing with tlb flush
On 2024-02-23 08:42, Shashank Sharma wrote: This patch: - adds a new list in amdgou_vm to hold the VM PT entries being freed - waits for the TLB flush using the vm->tlb_flush_fence - actually frees the PT BOs V2: rebase V3: Do not attach the tlb_fence to the entries, rather add the entries to a list and delay their freeing (Christian) Cc: Christian König Cc: Alex Deucher Cc: Felix Kuehling Cc: Rajneesh Bhardwaj Signed-off-by: Shashank Sharma --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 6 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h| 6 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 51 --- 3 files changed, 58 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 67c690044b97..eebb73f2c2ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -939,6 +939,10 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, /* Makes sure no PD/PT is freed before the flush */ dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence, DMA_RESV_USAGE_BOOKKEEP); + + mutex_lock(>tlb_fence_lock); + vm->tlb_fence_last = *fence; + mutex_unlock(>tlb_fence_lock); } amdgpu_res_first(pages_addr ? NULL : res, offset, @@ -2212,6 +2216,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, INIT_LIST_HEAD(>freed); INIT_LIST_HEAD(>done); INIT_LIST_HEAD(>pt_freed); + INIT_LIST_HEAD(>tlb_flush_waitlist); INIT_WORK(>pt_free_work, amdgpu_vm_pt_free_work); INIT_KFIFO(vm->faults); @@ -2244,6 +2249,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->last_unlocked = dma_fence_get_stub(); vm->generation = 0; + mutex_init(>tlb_fence_lock); mutex_init(>eviction_lock); vm->evicting = false; vm->tlb_fence_context = dma_fence_context_alloc(1); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 8e6fd25d07b7..77f10ed80973 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -334,6 +334,10 @@ struct amdgpu_vm { uint64_t *tlb_seq_cpu_addr; uint64_t tlb_fence_context; + struct mutex tlb_fence_lock; + struct dma_fence *tlb_fence_last; + struct list_head tlb_flush_waitlist; + atomic64_t kfd_last_flushed_seq; /* How many times we had to re-generate the page tables */ @@ -379,6 +383,8 @@ struct amdgpu_vm { /* cached fault info */ struct amdgpu_vm_fault_info fault_info; + + int count_bos; }; struct amdgpu_vm_manager { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index 95dc0afdaffb..57ea95c5c085 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -643,13 +643,13 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) if (!entry->bo) return; - entry->bo->vm_bo = NULL; shadow = amdgpu_bo_shadowed(entry->bo); if (shadow) { ttm_bo_set_bulk_move(>tbo, NULL); amdgpu_bo_unref(); } ttm_bo_set_bulk_move(>bo->tbo, NULL); + entry->bo->vm_bo = NULL; spin_lock(>vm->status_lock); list_del(>vm_status); @@ -657,6 +657,38 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) amdgpu_bo_unref(>bo); } +static void amdgpu_vm_pt_flush_waitlist(struct amdgpu_vm *vm) +{ + struct amdgpu_vm_bo_base *entry, *next; + LIST_HEAD(tlb_flush_waitlist); + + if (!vm || list_empty(>tlb_flush_waitlist)) + return; + + /* Wait for pending TLB flush before freeing PT BOs */ + mutex_lock(>tlb_fence_lock); + if (vm->tlb_fence_last && !dma_fence_is_signaled(vm->tlb_fence_last)) { + if (dma_fence_wait_timeout(vm->tlb_fence_last, false, + MAX_SCHEDULE_TIMEOUT) <= 0) { + DRM_ERROR("Timedout waiting for TLB flush, not freeing PT BOs\n"); + mutex_unlock(>tlb_fence_lock); + return; + } + + vm->tlb_fence_last = NULL; + } + + /* Save the waitlist locally and reset the flushlist */ + list_splice_init(>tlb_flush_waitlist, _flush_waitlist); + mutex_unlock(>tlb_fence_lock); + + /* Now free the entries */ + list_for_each_entry_safe(entry, next, _flush_waitlist, vm_status) { + if (entry) + amdgpu_vm_pt_free(entry); + } +} + void amdgpu_vm_pt_free_work(struct work_struct *work) { struct amdgpu_vm_bo_base *entry, *next; @@ -673,7 +705,7 @@ void amdgpu_vm_pt_free_work(struct work_struct *work) amdgpu_bo_reserve(vm->root.bo, true); list_for_each_entry_safe(entry, next, _freed, vm_status) - amdgpu_vm_pt_free(entry); + list_move(>vm_status, >tlb_flush_waitlist); amdgpu_bo_unreserve(vm->root.bo); } @@ -708,11 +740,17 @@ static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev, return; } - for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) - amdgpu_vm_pt_free(entry); + mutex_lock(>tlb_fence_lock); + + for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) { + if
Re: [PATCH 1/3] drm/amdgpu: Refuse to create a KMS FB for non-P2P exported dma-bufs
On 2024-02-26 17:46, Michel Dänzer wrote: > On 2024-02-26 17:34, Christian König wrote: > >> My question is why has it worked so far? I mean we are not doing this since >> yesterday and the problem only shows up now? > > Yes, Wayland compositors are only starting to try and make use of this now. To expand on this, mutter will want to do something like this as well sooner or later. I suspect it's the same for others like kwin, sway etc. -- Earthling Michel Dänzer| https://redhat.com Libre software enthusiast | Mesa and Xwayland developer
Re: [PATCH 1/3] drm/amdgpu: Refuse to create a KMS FB for non-P2P exported dma-bufs
On 2024-02-26 17:34, Christian König wrote: > Am 26.02.24 um 17:27 schrieb Michel Dänzer: >> On 2024-02-26 16:58, Christian König wrote: >>> Am 23.02.24 um 17:43 schrieb Michel Dänzer: On 2024-02-23 11:04, Michel Dänzer wrote: > On 2024-02-23 10:34, Christian König wrote: >> Am 23.02.24 um 09:11 schrieb Michel Dänzer: >>> On 2024-02-23 08:06, Christian König wrote: Am 22.02.24 um 18:28 schrieb Michel Dänzer: > From: Michel Dänzer > > Pinning the BO storage to VRAM for scanout would make it inaccessible > to non-P2P dma-buf importers. Thinking more about it I don't think we can do this. Using the BO in a ping/pong fashion for scanout and DMA-buf is actually valid, you just can't do both at the same time. And if I'm not completely mistaken we actually have use cases for this at the moment, >>> Those use cases don't have P2P & CONFIG_DMABUF_MOVE_NOTIFY? >> Nope, we are basically talking about unit tests and examples for inter >> device operations. > Sounds like the "no user-space regressions" rule might not apply then. To clarify what I mean by that: "We can't fix this issue, because it would break some unit tests and examples" is similar to saying "We can't fix this KMS bug, because there are IGT tests expecting the buggy behaviour". In practice, the latter do get fixed, along with the IGT tests. >>> The problem here is that this is not a bug, but intentional behavior. >>> Exporting BOs and using them in scanout in a ping/pong fashion is perfectly >>> valid. >> The problem with the status quo is that it requires amdgpu-specific >> knowledge and worst-case pessimization in user space. > > Yeah, I'm perfectly aware of that. But that approach here really doesn't > seems to be valid. It's the only known sane approach at this point. There's no other proposal which allows using both BO sharing and scanout without pessimizing or hitting seemingly random CS ioctl failures. >>> We have use cases which depend on this behavior and I'm not going to break >>> those to fix your use case. >> It's not "my" use case, it's a Wayland compositor trying to make use of BO >> sharing and scanout without always pessimizing for the worst case. >> >> That's surely more of a real-world use case than unit tests and examples. > > I've looked a bit deeper into this and we have told customers for the last > ~10 years or so that this is how it is supposed to work and that they can use > this approach. > > So this is much more than unit tests and examples, we are changing existing > behavior which is clearly not a bug but intentional and have a very high > chance to break valid use cases. "Very high chance" sounds like you still don't know of any actual real-world use case relying on it though? > My question is why has it worked so far? I mean we are not doing this since > yesterday and the problem only shows up now? Yes, Wayland compositors are only starting to try and make use of this now. -- Earthling Michel Dänzer| https://redhat.com Libre software enthusiast | Mesa and Xwayland developer
Re: [PATCH 1/3] drm/amdgpu: Refuse to create a KMS FB for non-P2P exported dma-bufs
Am 26.02.24 um 17:27 schrieb Michel Dänzer: On 2024-02-26 16:58, Christian König wrote: Am 23.02.24 um 17:43 schrieb Michel Dänzer: On 2024-02-23 11:04, Michel Dänzer wrote: On 2024-02-23 10:34, Christian König wrote: Am 23.02.24 um 09:11 schrieb Michel Dänzer: On 2024-02-23 08:06, Christian König wrote: Am 22.02.24 um 18:28 schrieb Michel Dänzer: From: Michel Dänzer Pinning the BO storage to VRAM for scanout would make it inaccessible to non-P2P dma-buf importers. Thinking more about it I don't think we can do this. Using the BO in a ping/pong fashion for scanout and DMA-buf is actually valid, you just can't do both at the same time. And if I'm not completely mistaken we actually have use cases for this at the moment, Those use cases don't have P2P & CONFIG_DMABUF_MOVE_NOTIFY? Nope, we are basically talking about unit tests and examples for inter device operations. Sounds like the "no user-space regressions" rule might not apply then. To clarify what I mean by that: "We can't fix this issue, because it would break some unit tests and examples" is similar to saying "We can't fix this KMS bug, because there are IGT tests expecting the buggy behaviour". In practice, the latter do get fixed, along with the IGT tests. The problem here is that this is not a bug, but intentional behavior. Exporting BOs and using them in scanout in a ping/pong fashion is perfectly valid. The problem with the status quo is that it requires amdgpu-specific knowledge and worst-case pessimization in user space. Yeah, I'm perfectly aware of that. But that approach here really doesn't seems to be valid. We have use cases which depend on this behavior and I'm not going to break those to fix your use case. It's not "my" use case, it's a Wayland compositor trying to make use of BO sharing and scanout without always pessimizing for the worst case. That's surely more of a real-world use case than unit tests and examples. I've looked a bit deeper into this and we have told customers for the last ~10 years or so that this is how it is supposed to work and that they can use this approach. So this is much more than unit tests and examples, we are changing existing behavior which is clearly not a bug but intentional and have a very high chance to break valid use cases. My question is why has it worked so far? I mean we are not doing this since yesterday and the problem only shows up now? While I see the use case something is still missing in that picture. Christian.
Re: [PATCH 1/3] drm/amdgpu: Refuse to create a KMS FB for non-P2P exported dma-bufs
On 2024-02-26 16:58, Christian König wrote: > Am 23.02.24 um 17:43 schrieb Michel Dänzer: >> On 2024-02-23 11:04, Michel Dänzer wrote: >>> On 2024-02-23 10:34, Christian König wrote: Am 23.02.24 um 09:11 schrieb Michel Dänzer: > On 2024-02-23 08:06, Christian König wrote: >> Am 22.02.24 um 18:28 schrieb Michel Dänzer: >>> From: Michel Dänzer >>> >>> Pinning the BO storage to VRAM for scanout would make it inaccessible >>> to non-P2P dma-buf importers. >> Thinking more about it I don't think we can do this. >> >> Using the BO in a ping/pong fashion for scanout and DMA-buf is actually >> valid, you just can't do both at the same time. >> >> And if I'm not completely mistaken we actually have use cases for this >> at the moment, > Those use cases don't have P2P & CONFIG_DMABUF_MOVE_NOTIFY? Nope, we are basically talking about unit tests and examples for inter device operations. >>> Sounds like the "no user-space regressions" rule might not apply then. >> To clarify what I mean by that: >> >> "We can't fix this issue, because it would break some unit tests and >> examples" is similar to saying "We can't fix this KMS bug, because there are >> IGT tests expecting the buggy behaviour". In practice, the latter do get >> fixed, along with the IGT tests. > > The problem here is that this is not a bug, but intentional behavior. > Exporting BOs and using them in scanout in a ping/pong fashion is perfectly > valid. The problem with the status quo is that it requires amdgpu-specific knowledge and worst-case pessimization in user space. > We have use cases which depend on this behavior and I'm not going to break > those to fix your use case. It's not "my" use case, it's a Wayland compositor trying to make use of BO sharing and scanout without always pessimizing for the worst case. That's surely more of a real-world use case than unit tests and examples. -- Earthling Michel Dänzer| https://redhat.com Libre software enthusiast | Mesa and Xwayland developer
Re: [PATCH v3 1/3] drm/amdgpu: replace TLB seq callback with HW seq
[AMD Official Use Only - General] Please feel free to use: Reviewed-by: Shashank Sharma Regards Shashank From: Christian König Sent: Monday, February 26, 2024 3:45 PM To: Sharma, Shashank ; amd-gfx@lists.freedesktop.org Cc: Koenig, Christian ; Deucher, Alexander ; Kuehling, Felix ; Bhardwaj, Rajneesh Subject: Re: [PATCH v3 1/3] drm/amdgpu: replace TLB seq callback with HW seq Am 23.02.24 um 14:42 schrieb Shashank Sharma: > From: Christian König > > The callback we installed for the SDMA update were actually pretty > horrible. since we now have seq64 use that one and HW seq writes > instead. > > V2:(Shashank) > - rebased on amd-drm-staging-next > - changed amdgpu_seq64_gpu_addr > > Cc: Christian König > Cc: Alex Deucher > Cc: Felix Kuehling > Cc: Rajneesh Bhardwaj > Signed-off-by: Christian König Shashank can I get an rb on this patch here so that I can push it to amd-staging-drm-next? The patch was basically just to double check if the seq64 code works as intended. Thanks, Christian. > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c | 14 > drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h | 1 + > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 79 - > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 27 ++- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c | 3 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 2 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 5 ++ > 7 files changed, 42 insertions(+), 89 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c > index 3d0d56087d41..300dc79fa2b9 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c > @@ -199,6 +199,20 @@ void amdgpu_seq64_free(struct amdgpu_device *adev, u64 > va) >__clear_bit(bit_pos, adev->seq64.used); > } > > +/** > + * amdgpu_seq64_gpu_addr - Calculate GPU addr from va > + * > + * @adev: amdgpu_device pointer > + * @va: virtual address in client address space > + * > + * Calculate the GART address for a VA. > + */ > +u64 amdgpu_seq64_gpu_addr(struct amdgpu_device *adev, u64 va) > +{ > + return va - amdgpu_seq64_get_va_base(adev) + > + amdgpu_bo_gpu_offset(adev->seq64.sbo); > +} > + > /** >* amdgpu_seq64_fini - Cleanup seq64 driver >* > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h > index 4203b2ab318d..63e8ac0a2057 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h > @@ -43,6 +43,7 @@ void amdgpu_seq64_free(struct amdgpu_device *adev, u64 > gpu_addr); > int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, > struct amdgpu_bo_va **bo_va); > void amdgpu_seq64_unmap(struct amdgpu_device *adev, struct amdgpu_fpriv > *fpriv); > +u64 amdgpu_seq64_gpu_addr(struct amdgpu_device *adev, u64 va); > > #endif > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > index ed4a8c5d26d7..0960e0a665d3 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > @@ -111,21 +111,6 @@ struct amdgpu_prt_cb { >struct dma_fence_cb cb; > }; > > -/** > - * struct amdgpu_vm_tlb_seq_struct - Helper to increment the TLB flush > sequence > - */ > -struct amdgpu_vm_tlb_seq_struct { > - /** > - * @vm: pointer to the amdgpu_vm structure to set the fence sequence on > - */ > - struct amdgpu_vm *vm; > - > - /** > - * @cb: callback > - */ > - struct dma_fence_cb cb; > -}; > - > /** >* amdgpu_vm_set_pasid - manage pasid and vm ptr mapping >* > @@ -862,23 +847,6 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, >return r; > } > > -/** > - * amdgpu_vm_tlb_seq_cb - make sure to increment tlb sequence > - * @fence: unused > - * @cb: the callback structure > - * > - * Increments the tlb sequence to make sure that future CS execute a VM > flush. > - */ > -static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence, > - struct dma_fence_cb *cb) > -{ > - struct amdgpu_vm_tlb_seq_struct *tlb_cb; > - > - tlb_cb = container_of(cb, typeof(*tlb_cb), cb); > - atomic64_inc(_cb->vm->tlb_seq); > - kfree(tlb_cb); > -} > - > /** >* amdgpu_vm_update_range - update a range in the vm page table >* > @@ -911,7 +879,6 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, > struct amdgpu_vm *vm, > struct dma_fence **fence) > { >struct amdgpu_vm_update_params params; > - struct amdgpu_vm_tlb_seq_struct *tlb_cb; >struct amdgpu_res_cursor cursor; >enum amdgpu_sync_mode sync_mode; >int r, idx; > @@ -919,12 +886,6 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, > struct amdgpu_vm *vm, >if
Re: [PATCH] drm/amdgpu: disable ring_muxer if mcbp is off
Am 23.02.24 um 17:30 schrieb Alex Deucher: On Fri, Feb 23, 2024 at 4:48 AM Pierre-Eric Pelloux-Prayer wrote: Using the ring_muxer without preemption adds overhead for no reason since mcbp cannot be triggered. Moving back to a single queue in this case also helps when high priority app are used: in this case the gpu_scheduler priority handling will work as expected - much better than ring_muxer with its 2 independant schedulers competing for the same hardware queue. This change requires moving amdgpu_device_set_mcbp above amdgpu_device_ip_early_init because we use adev->gfx.mcbp. Signed-off-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Alex Deucher Acked-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 21 - 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d534e192e260..40516d24026c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4054,13 +4054,13 @@ int amdgpu_device_init(struct amdgpu_device *adev, return r; } + amdgpu_device_set_mcbp(adev); + /* early init functions */ r = amdgpu_device_ip_early_init(adev); if (r) return r; - amdgpu_device_set_mcbp(adev); - /* Get rid of things like offb */ r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, _kms_driver); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 169d45268ef6..f682f830f7f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2080,7 +2080,7 @@ static int gfx_v9_0_sw_init(void *handle) ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; /* disable scheduler on the real ring */ - ring->no_scheduler = true; + ring->no_scheduler = adev->gfx.mcbp; ring->vm_hub = AMDGPU_GFXHUB(0); r = amdgpu_ring_init(adev, ring, 1024, >gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, @@ -2090,7 +2090,7 @@ static int gfx_v9_0_sw_init(void *handle) } /* set up the software rings */ - if (adev->gfx.num_gfx_rings) { + if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) { for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) { ring = >gfx.sw_gfx_ring[i]; ring->ring_obj = NULL; @@ -2181,7 +2181,7 @@ static int gfx_v9_0_sw_fini(void *handle) int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->gfx.num_gfx_rings) { + if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) { for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) amdgpu_ring_fini(>gfx.sw_gfx_ring[i]); amdgpu_ring_mux_fini(>gfx.muxer); @@ -5910,11 +5910,14 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, switch (me_id) { case 0: - if (adev->gfx.num_gfx_rings && - !amdgpu_mcbp_handle_trailing_fence_irq(>gfx.muxer)) { - /* Fence signals are handled on the software rings*/ - for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) - amdgpu_fence_process(>gfx.sw_gfx_ring[i]); + if (adev->gfx.num_gfx_rings) { + if (!adev->gfx.mcbp) { + amdgpu_fence_process(>gfx.gfx_ring[0]); + } else if (!amdgpu_mcbp_handle_trailing_fence_irq(>gfx.muxer)) { + /* Fence signals are handled on the software rings*/ + for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) + amdgpu_fence_process(>gfx.sw_gfx_ring[i]); + } } break; case 1: @@ -7051,7 +7054,7 @@ static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) for (i = 0; i < adev->gfx.num_gfx_rings; i++) adev->gfx.gfx_ring[i].funcs = _v9_0_ring_funcs_gfx; - if (adev->gfx.num_gfx_rings) { + if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) { for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) adev->gfx.sw_gfx_ring[i].funcs = _v9_0_sw_ring_funcs_gfx; } -- 2.40.1
Re: [PATCH 1/3] drm/amdgpu: Refuse to create a KMS FB for non-P2P exported dma-bufs
Am 23.02.24 um 17:43 schrieb Michel Dänzer: On 2024-02-23 11:04, Michel Dänzer wrote: On 2024-02-23 10:34, Christian König wrote: Am 23.02.24 um 09:11 schrieb Michel Dänzer: On 2024-02-23 08:06, Christian König wrote: Am 22.02.24 um 18:28 schrieb Michel Dänzer: From: Michel Dänzer Pinning the BO storage to VRAM for scanout would make it inaccessible to non-P2P dma-buf importers. Thinking more about it I don't think we can do this. Using the BO in a ping/pong fashion for scanout and DMA-buf is actually valid, you just can't do both at the same time. And if I'm not completely mistaken we actually have use cases for this at the moment, Those use cases don't have P2P & CONFIG_DMABUF_MOVE_NOTIFY? Nope, we are basically talking about unit tests and examples for inter device operations. Sounds like the "no user-space regressions" rule might not apply then. To clarify what I mean by that: "We can't fix this issue, because it would break some unit tests and examples" is similar to saying "We can't fix this KMS bug, because there are IGT tests expecting the buggy behaviour". In practice, the latter do get fixed, along with the IGT tests. The problem here is that this is not a bug, but intentional behavior. Exporting BOs and using them in scanout in a ping/pong fashion is perfectly valid. We have use cases which depend on this behavior and I'm not going to break those to fix your use case. So as far as I can see this approach here is a no-go. Regards, Christian.
Re: [PATCH v3 2/3] drm/amdgpu: implement TLB flush fence
On 2024-02-23 11:58, Philip Yang wrote: On 2024-02-23 08:42, Shashank Sharma wrote: From: Christian König The problem is that when (for example) 4k pages are replaced with a single 2M page we need to wait for change to be flushed out by invalidating the TLB before the PT can be freed. Solve this by moving the TLB flush into a DMA-fence object which can be used to delay the freeing of the PT BOs until it is signaled. V2: (Shashank) - rebase - set dma_fence_error only in case of error - add tlb_flush fence only when PT/PD BO is locked (Felix) - use vm->pasid when f is NULL (Mukul) Cc: Christian Koenig Cc: Felix Kuehling Cc: Rajneesh Bhardwaj Cc: Alex Deucher Signed-off-by: Christian König Signed-off-by: Shashank Sharma --- drivers/gpu/drm/amd/amdgpu/Makefile | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 10 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h| 4 + .../gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c | 106 ++ 4 files changed, 122 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 4c989da4d2f3..fdbb3d770c7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -70,7 +70,8 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \ amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o \ atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \ atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ - amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_ib.o amdgpu_pll.o \ + amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_vm_tlb_fence.o \ + amdgpu_ib.o amdgpu_pll.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o \ amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 0960e0a665d3..67c690044b97 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -932,6 +932,15 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (r) goto error_unlock; + /* Prepare a TLB flush fence to be attached to PTs */ + if (!unlocked && params.needs_flush && vm->is_compute_context) { + amdgpu_vm_tlb_fence_create(adev, vm, fence); + + /* Makes sure no PD/PT is freed before the flush */ + dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence, + DMA_RESV_USAGE_BOOKKEEP); + } + amdgpu_res_first(pages_addr ? NULL : res, offset, (last - start + 1) * AMDGPU_GPU_PAGE_SIZE, ); while (cursor.remaining) { @@ -2237,6 +2246,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, mutex_init(>eviction_lock); vm->evicting = false; + vm->tlb_fence_context = dma_fence_context_alloc(1); r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level, false, , xcp_id); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index ac9380afcb69..8e6fd25d07b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -332,6 +332,7 @@ struct amdgpu_vm { atomic64_t tlb_seq; uint64_t tlb_seq_va; uint64_t *tlb_seq_cpu_addr; + uint64_t tlb_fence_context; atomic64_t kfd_last_flushed_seq; @@ -585,5 +586,8 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev, uint64_t addr, uint32_t status, unsigned int vmhub); +void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct dma_fence **fence); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c new file mode 100644 index ..569681badd7c --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
Re: [PATCH v3 2/3] drm/amdgpu: implement TLB flush fence
Am 23.02.24 um 17:58 schrieb Philip Yang: On 2024-02-23 08:42, Shashank Sharma wrote: From: Christian König The problem is that when (for example) 4k pages are replaced with a single 2M page we need to wait for change to be flushed out by invalidating the TLB before the PT can be freed. Solve this by moving the TLB flush into a DMA-fence object which can be used to delay the freeing of the PT BOs until it is signaled. V2: (Shashank) - rebase - set dma_fence_error only in case of error - add tlb_flush fence only when PT/PD BO is locked (Felix) - use vm->pasid when f is NULL (Mukul) Cc: Christian Koenig Cc: Felix Kuehling Cc: Rajneesh Bhardwaj Cc: Alex Deucher Signed-off-by: Christian König Signed-off-by: Shashank Sharma --- drivers/gpu/drm/amd/amdgpu/Makefile | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 10 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h| 4 + .../gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c | 106 ++ 4 files changed, 122 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 4c989da4d2f3..fdbb3d770c7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -70,7 +70,8 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \ amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o \ atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \ atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ - amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_ib.o amdgpu_pll.o \ + amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_vm_tlb_fence.o \ + amdgpu_ib.o amdgpu_pll.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o \ amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 0960e0a665d3..67c690044b97 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -932,6 +932,15 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (r) goto error_unlock; + /* Prepare a TLB flush fence to be attached to PTs */ + if (!unlocked && params.needs_flush && vm->is_compute_context) { + amdgpu_vm_tlb_fence_create(adev, vm, fence); + + /* Makes sure no PD/PT is freed before the flush */ + dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence, + DMA_RESV_USAGE_BOOKKEEP); + } + amdgpu_res_first(pages_addr ? NULL : res, offset, (last - start + 1) * AMDGPU_GPU_PAGE_SIZE, ); while (cursor.remaining) { @@ -2237,6 +2246,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, mutex_init(>eviction_lock); vm->evicting = false; + vm->tlb_fence_context = dma_fence_context_alloc(1); r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level, false, , xcp_id); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index ac9380afcb69..8e6fd25d07b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -332,6 +332,7 @@ struct amdgpu_vm { atomic64_t tlb_seq; uint64_ttlb_seq_va; uint64_t*tlb_seq_cpu_addr; + uint64_ttlb_fence_context; atomic64_t kfd_last_flushed_seq; @@ -585,5 +586,8 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev, uint64_t addr, uint32_t status, unsigned int vmhub); +void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev, +struct amdgpu_vm *vm, +struct dma_fence **fence); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c new file mode 100644 index ..569681badd7c --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the
Re: [PATCH] drm/amdgpu: Fix missing break in ATOM_ARG_IMM Case of atom_get_src_int()
[Public] Reviewed-by: Alex Deucher From: SHANMUGAM, SRINIVASAN Sent: Saturday, February 24, 2024 1:38 AM To: Koenig, Christian ; Deucher, Alexander Cc: amd-gfx@lists.freedesktop.org ; SHANMUGAM, SRINIVASAN ; Jammy Zhou Subject: [PATCH] drm/amdgpu: Fix missing break in ATOM_ARG_IMM Case of atom_get_src_int() Missing break statement in the ATOM_ARG_IMM case of a switch statement, adds the missing break statement, ensuring that the program's control flow is as intended. Fixes the below: drivers/gpu/drm/amd/amdgpu/atom.c:323 atom_get_src_int() warn: ignoring unreachable code. Fixes: d38ceaf99ed0 ("drm/amdgpu: add core driver (v4)") Cc: Jammy Zhou Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam --- drivers/gpu/drm/amd/amdgpu/atom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c index b888613f653f..72362df352f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.c +++ b/drivers/gpu/drm/amd/amdgpu/atom.c @@ -320,7 +320,7 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr, DEBUG("IMM 0x%02X\n", val); return val; } - return 0; + break; case ATOM_ARG_PLL: idx = U8(*ptr); (*ptr)++; -- 2.34.1
Re: [PATCH v3 1/3] drm/amdgpu: replace TLB seq callback with HW seq
Am 23.02.24 um 14:42 schrieb Shashank Sharma: From: Christian König The callback we installed for the SDMA update were actually pretty horrible. since we now have seq64 use that one and HW seq writes instead. V2:(Shashank) - rebased on amd-drm-staging-next - changed amdgpu_seq64_gpu_addr Cc: Christian König Cc: Alex Deucher Cc: Felix Kuehling Cc: Rajneesh Bhardwaj Signed-off-by: Christian König Shashank can I get an rb on this patch here so that I can push it to amd-staging-drm-next? The patch was basically just to double check if the seq64 code works as intended. Thanks, Christian. --- drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c | 14 drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 79 - drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 27 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 5 ++ 7 files changed, 42 insertions(+), 89 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c index 3d0d56087d41..300dc79fa2b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c @@ -199,6 +199,20 @@ void amdgpu_seq64_free(struct amdgpu_device *adev, u64 va) __clear_bit(bit_pos, adev->seq64.used); } +/** + * amdgpu_seq64_gpu_addr - Calculate GPU addr from va + * + * @adev: amdgpu_device pointer + * @va: virtual address in client address space + * + * Calculate the GART address for a VA. + */ +u64 amdgpu_seq64_gpu_addr(struct amdgpu_device *adev, u64 va) +{ + return va - amdgpu_seq64_get_va_base(adev) + + amdgpu_bo_gpu_offset(adev->seq64.sbo); +} + /** * amdgpu_seq64_fini - Cleanup seq64 driver * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h index 4203b2ab318d..63e8ac0a2057 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h @@ -43,6 +43,7 @@ void amdgpu_seq64_free(struct amdgpu_device *adev, u64 gpu_addr); int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo_va **bo_va); void amdgpu_seq64_unmap(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv); +u64 amdgpu_seq64_gpu_addr(struct amdgpu_device *adev, u64 va); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index ed4a8c5d26d7..0960e0a665d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -111,21 +111,6 @@ struct amdgpu_prt_cb { struct dma_fence_cb cb; }; -/** - * struct amdgpu_vm_tlb_seq_struct - Helper to increment the TLB flush sequence - */ -struct amdgpu_vm_tlb_seq_struct { - /** -* @vm: pointer to the amdgpu_vm structure to set the fence sequence on -*/ - struct amdgpu_vm *vm; - - /** -* @cb: callback -*/ - struct dma_fence_cb cb; -}; - /** * amdgpu_vm_set_pasid - manage pasid and vm ptr mapping * @@ -862,23 +847,6 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, return r; } -/** - * amdgpu_vm_tlb_seq_cb - make sure to increment tlb sequence - * @fence: unused - * @cb: the callback structure - * - * Increments the tlb sequence to make sure that future CS execute a VM flush. - */ -static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence, -struct dma_fence_cb *cb) -{ - struct amdgpu_vm_tlb_seq_struct *tlb_cb; - - tlb_cb = container_of(cb, typeof(*tlb_cb), cb); - atomic64_inc(_cb->vm->tlb_seq); - kfree(tlb_cb); -} - /** * amdgpu_vm_update_range - update a range in the vm page table * @@ -911,7 +879,6 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct dma_fence **fence) { struct amdgpu_vm_update_params params; - struct amdgpu_vm_tlb_seq_struct *tlb_cb; struct amdgpu_res_cursor cursor; enum amdgpu_sync_mode sync_mode; int r, idx; @@ -919,12 +886,6 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (!drm_dev_enter(adev_to_drm(adev), )) return -ENODEV; - tlb_cb = kmalloc(sizeof(*tlb_cb), GFP_KERNEL); - if (!tlb_cb) { - r = -ENOMEM; - goto error_unlock; - } - /* Vega20+XGMI where PTEs get inadvertently cached in L2 texture cache, * heavy-weight flush TLB unconditionally. */ @@ -942,6 +903,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, params.immediate = immediate; params.pages_addr = pages_addr; params.unlocked = unlocked; + params.needs_flush = flush_tlb;
Re: Kernel 6.7+ broke under-powering of my RX 6700XT. (Archlinux, mesa/amdgpu)
Back from vacations ... On Wed, 21 Feb 2024 at 16:39, Alex Deucher wrote: > > On Wed, Feb 21, 2024 at 1:06 AM Linux regression tracking (Thorsten > Leemhuis) wrote: > > > > On 20.02.24 21:18, Alex Deucher wrote: > > > On Tue, Feb 20, 2024 at 2:41 PM Romano wrote: > > >> > > >> If the increased low range is allowed via boot option, like in proposed > > >> patch, user clearly made an intentional decision. Undefined, but won't > > >> fry his hardware for sure. Undefined is also overclocking in that > > >> matter. You can go out of range with ratio of voltage vs frequency(still > > >> within vendor's limits) for example and crash the system. > > > > > > This whole thing reminds me of this: > > > https://xkcd.com/1172/ > > > The problem is another module parameter is another interface to > > > maintain and validate. > > > > Yup, of course, all that is understood. > > > > But we have this "no regressions" rule for a reason. Adhering to it > > strictly would afaics be counter-productive in this situation, but give > > users some way to manually do what was possible before out-of-the box > > IMHO is the minimum we should do. > > > > Maybe just allow that parameter only up to a certain recent GPU > > generation, that way you won't have to deal with that at some point in > > the future. > > The problem is the cumulative effect of all of these parameters. > Every time there is some change in the driver someone disagrees with > there is a push to add a module parameter for it. The driver already > has too many module parameters and it's hard to keep them all working > consistently and in every possible combination. Moreover, the module > options are supposed to be mainly for debugging. The driver sets > proper defaults for all chips to ensure proper operation, however lots > of random forums seem to treat them like they are the recipe for some > special sauce so users are constantly setting various combinations of > them because they read somewhere on a forum that it would make their > GPU run faster. More often than not this leads to problems. > > Even if we did make the option only valid for these specific chips, > there will be an expectation that future chips will support it as > well, because someone will hack the driver and test it and it may work > for them and then there will be a push to add it for those chips too. Chiming in here ... tldr; yes gpu drivers are ridiculously hard to get right, combinatorial explosion is a real issue and concern, it's not some hiding behind corporate rules - drm folks added module_param*unsafe to discourage users from playing around with options we need for debugging for very, very real reasons. We have aggressively removed tuning knobs in the past, and those we have in various drivers are causing endless amounts of pain. Also, the "no regression" rules is not ironclad, especially on power/perf regressions, or all the security fixes would be impossible to merge. First make it correct (even if the bug has gone unnoticed for forever), then make it fast/power efficient/pretty/whatever people fancy. Yes there's some exceptions like "my desktop is crawling like a slide-show and absolutely unusable" kind of regressions, but my understanding is this isn't the case here. So unless Dave or Linus are screaming and overruling Alex here, "do nothing" is my take here too. Cheers, Sima > > Alex > > > > Moreover, we've had a number of cases in the > > > past where users have under or overclocked and reported bugs or > > > stability issues and it did not come to light that they were doing > > > that until we'd already spent a good deal of time trying to debug the > > > issue. > > > > Taint the kernel when that module parameter is used? We iirc have a > > taint bit exactly for this sort of situation. Sure, such reports will > > still happen, but then you at least have an indicator to spot them. > > > > Ciao, Thorsten > > > > > This obviously can still happen if you allow any sort of over > > > or underclocking, but at least if you stick to the limits you are > > > staying within the bounding box of the design. > > > > > > Alex > > > > > >> On 2/20/24 19:09, Alex Deucher wrote: > > >>> On Tue, Feb 20, 2024 at 11:46 AM Romano wrote: > > For Windows, apps like MSI Afterburner is the one to try and what most > > people go for. Using it in the past myself, I would be surprised if it > > adhered to such a high min power cap. But even if it did, why would we > > have to. > > > > Relying on vendors cap in this case has already proven wrong because > > things worked for quite some time already and people reported saving > > significant amount of watts, in my case 90W(!) for <10% perf. > > > > Therefore this talk about safety seems rather strange to me and > > especially so when we are talking about min_cap. Or name me a single > > case where someone fried his card due to "too low power" set in said > > variable. Now there
RE: [PATCH] drm/amd/pm: Increase SMUv13.0.6 mode-2 reset time
[AMD Official Use Only - General] Reviewed-by: Asad Kamal Thanks & Regards Asad -Original Message- From: Lazar, Lijo Sent: Monday, February 26, 2024 4:08 PM To: amd-gfx@lists.freedesktop.org Cc: Zhang, Hawking ; Deucher, Alexander ; Kamal, Asad Subject: [PATCH] drm/amd/pm: Increase SMUv13.0.6 mode-2 reset time On SOCs with SMUv13.0.6, mode-2 reset takes a bit longer. Wait for 200ms before trying to restore config space after mode-2 reset. Signed-off-by: Lijo Lazar --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 29c102fe650d..2b7a60b23d6b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -2307,8 +2307,8 @@ static int smu_v13_0_6_mode2_reset(struct smu_context *smu) ret = smu_cmn_send_msg_without_waiting(smu, (uint16_t)index, SMU_RESET_MODE_2); - /* This is similar to FLR, wait till max FLR timeout */ - msleep(100); + /* Reset takes a bit longer, wait for 200ms. */ + msleep(200); dev_dbg(smu->adev->dev, "restore config space...\n"); /* Restore the config space saved during init */ -- 2.25.1
Re: [PATCH v5 7/8] drm/amd/display: Introduce KUnit tests to dc_dmub_srv library
On Thu, 22 Feb 2024, Rodrigo Siqueira wrote: > diff --git a/drivers/gpu/drm/amd/display/test/kunit/.kunitconfig > b/drivers/gpu/drm/amd/display/test/kunit/.kunitconfig > index eb6f81601757..4c5861ad58bd 100644 > --- a/drivers/gpu/drm/amd/display/test/kunit/.kunitconfig > +++ b/drivers/gpu/drm/amd/display/test/kunit/.kunitconfig > @@ -4,5 +4,6 @@ CONFIG_DRM=y > CONFIG_DRM_AMDGPU=y > CONFIG_DRM_AMD_DC=y > CONFIG_AMD_DC_BASICS_KUNIT_TEST=y > +CONFIG_AMD_DC_KUNIT_TEST=y > CONFIG_DCE_KUNIT_TEST=y > CONFIG_DML_KUNIT_TEST=y A bit random patch to comment on, but this hunk demonstrates the point: Should all the configs have DRM_AMD_ prefix to put them in a "namespace"? BR, Jani. -- Jani Nikula, Intel
[PATCH] drm/amd/pm: Increase SMUv13.0.6 mode-2 reset time
On SOCs with SMUv13.0.6, mode-2 reset takes a bit longer. Wait for 200ms before trying to restore config space after mode-2 reset. Signed-off-by: Lijo Lazar --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 29c102fe650d..2b7a60b23d6b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -2307,8 +2307,8 @@ static int smu_v13_0_6_mode2_reset(struct smu_context *smu) ret = smu_cmn_send_msg_without_waiting(smu, (uint16_t)index, SMU_RESET_MODE_2); - /* This is similar to FLR, wait till max FLR timeout */ - msleep(100); + /* Reset takes a bit longer, wait for 200ms. */ + msleep(200); dev_dbg(smu->adev->dev, "restore config space...\n"); /* Restore the config space saved during init */ -- 2.25.1
[PATCH v2] drm/amd/amdgpu: Fix potential ioremap() memory leaks in amdgpu_device_init()
This ensures that the memory mapped by ioremap for adev->rmmio, is properly handled in amdgpu_device_init(). If the function exits early due to an error, the memory is unmapped. If the function completes successfully, the memory remains mapped. Reported by smatch: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c:4337 amdgpu_device_init() warn: 'adev->rmmio' from ioremap() not released on lines: 4035,4045,4051,4058,4068,4337 Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam --- v2: - updated commit message - use a goto label and error handling instead (Christian) drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 16 ++-- 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1ef892bea488..d0e77bbee60e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4031,8 +4031,10 @@ int amdgpu_device_init(struct amdgpu_device *adev, * early on during init and before calling to RREG32. */ adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev"); - if (!adev->reset_domain) - return -ENOMEM; + if (!adev->reset_domain) { + r = -ENOMEM; + goto unmap_memory; + } /* detect hw virtualization here */ amdgpu_detect_virtualization(adev); @@ -4042,20 +4044,20 @@ int amdgpu_device_init(struct amdgpu_device *adev, r = amdgpu_device_get_job_timeout_settings(adev); if (r) { dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); - return r; + goto unmap_memory; } /* early init functions */ r = amdgpu_device_ip_early_init(adev); if (r) - return r; + goto unmap_memory; amdgpu_device_set_mcbp(adev); /* Get rid of things like offb */ r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, _kms_driver); if (r) - return r; + goto unmap_memory; /* Enable TMZ based on IP_VERSION */ amdgpu_gmc_tmz_set(adev); @@ -4065,7 +4067,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (adev->gmc.xgmi.supported) { r = adev->gfxhub.funcs->get_xgmi_info(adev); if (r) - return r; + goto unmap_memory; } /* enable PCIE atomic ops */ @@ -4334,6 +4336,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, failed: amdgpu_vf_error_trans_all(adev); +unmap_memory: + iounmap(adev->rmmio); return r; } -- 2.34.1
Re: [FYI][PATCH] tracing/treewide: Remove second parameter of __assign_str()
On Fri, 23 Feb 2024 14:50:49 -0500 Kent Overstreet wrote: > Tangentially related though, what would make me really happy is if we > could create the string with in the TP__fast_assign() section. I have to > have a bunch of annoying wrappers right now because the string length > has to be known when we invoke the tracepoint. You can use __string_len() to determine the string length in the tracepoint (which is executed in the TP_fast_assign() section). My clean up patches will make __assign_str_len() obsolete too (I'm working on them now), and you can just use __assign_str(). I noticed that I don't have a string_len example in the sample code and I'm actually writing it now. // cutting out everything else: TRACE_EVENT(foo_bar, TP_PROTO(const char *foo, int bar), TP_ARGS(foo, bar), TP_STRUCT__entry( __string_len( lstr, foo,bar < strlen(foo) ? bar : strlen(foo) ) ), TP_fast_assign( __assign_str(lstr, foo); // Note, the above is with my updates, without them, you need to duplicate the logic // __assign_str_len(lstr, foo, bar < strlen(foo) ? bar : strlen(foo)); ), TP_printk("%s", __get_str(lstr)) ); The above will allocate "bar < strlen(foo) ? bar : strlen(foo)" size on the ring buffer. As the size is already stored, my clean up code uses that instead of requiring duplicating the logic again. -- Steve
Re: [FYI][PATCH] tracing/treewide: Remove second parameter of __assign_str()
On 2/23/2024 9:56 AM, Steven Rostedt wrote: > From: "Steven Rostedt (Google)" > > [ >This is a treewide change. I will likely re-create this patch again in >the second week of the merge window of v6.9 and submit it then. Hoping >to keep the conflicts that it will cause to a minimum. > ] > > With the rework of how the __string() handles dynamic strings where it > saves off the source string in field in the helper structure[1], the > assignment of that value to the trace event field is stored in the helper > value and does not need to be passed in again. Just curious if this could be done piecemeal by first changing the macros to be variadic macros which allows you to ignore the extra argument. The callers could then be modified in their separate trees. And then once all the callers have be merged, the macros could be changed to no longer be variadic.
WARNING: in amdgpu_dm_atomic_commit_tail
Hello, this warning and drm error keep appearing here past wakeup from suspend: [ 247.716103] Freezing remaining freezable tasks [ 247.717379] Freezing remaining freezable tasks completed (elapsed 0.001 seconds) [ 247.717387] printk: Suspending console(s) (use no_console_suspend to debug) [ 247.871602] ACPI: EC: interrupt blocked [ 247.949021] ACPI: EC: interrupt unblocked [ 248.538831] [drm] PCIE GART of 512M enabled (table at 0x00803FD0). [ 248.538861] amdgpu :65:00.0: amdgpu: SMU is resuming... [ 248.541794] amdgpu :65:00.0: amdgpu: SMU is resumed successfully! [ 248.541832] amdgpu :65:00.0: [drm] *ERROR* Error queueing DMUB command: status=4 [ 248.548469] nvme nvme0: Shutdown timeout set to 8 seconds [ 248.573859] nvme nvme1: 15/0/0 default/read/poll queues [ 248.592079] [ cut here ] [ 248.592081] WARNING: CPU: 3 PID: 2146 at drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:8059 amdgpu_dm_atomic_commit_tail+0x35b9/0x3fe0 [amdgpu] [ 248.592528] Modules linked in: ccm hid_logitech_hidpp uhid xt_CHECKSUM xt_MASQUERADE xt_conntrack ipt_REJECT nf_reject_ipv4 xt_tcpudp nft_compat nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nf_tables libcrc32c nfnetlink bridge stp llc cachefiles fscache netfs nvme_fabrics cmac algif_hash algif_skcipher af_alg bnep cdc_ncm cdc_ether usbnet snd_ctl_led snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio snd_hda_codec_hdmi snd_soc_dmic snd_soc_ps_mach snd_ps_pdm_dma snd_hda_intel snd_sof_amd_acp63 edac_mce_amd snd_sof_amd_acp snd_intel_dspcfg snd_sof_xtensa_dsp snd_hda_codec snd_sof_pci snd_sof snd_hwdep snd_sof_utils mt7921e snd_hda_core kvm_amd mt7921_common snd_soc_core mt792x_lib mt76_connac_lib snd_compress snd_seq_midi kvm snd_seq_midi_event ac97_bus mt76 sunrpc crct10dif_pclmul binfmt_misc irqbypass snd_rawmidi snd_pci_ps uvcvideo crc32_pclmul videobuf2_vmalloc mac80211 polyval_clmulni snd_pcm uvc snd_seq btusb polyval_generic videobuf2_memops ghash_clmulni_intel videobuf2_v4l2 btrtl [ 248.592575] sha256_ssse3 btintel sha1_ssse3 btbcm snd_seq_device aesni_intel btmtk snd_acp_pci snd_timer videodev snd_acp_config crypto_simd cfg80211 snd bluetooth r8152 rtsx_pci_sdmmc videobuf2_common snd_soc_acpi hid_multitouch cryptd ucsi_acpi think_lmi nls_iso8859_1 rapl ecdh_generic ecc firmware_attributes_class ideapad_laptop mc typec_ucsi wmi_bmof mii thunderbolt sparse_keymap i2c_piix4 snd_acp_legacy_common soundcore ccp libarc4 rtsx_pci typec platform_profile i2c_hid_acpi i2c_hid sch_fq_codel msr efi_pstore ip_tables x_tables autofs4 joydev amdgpu i2c_algo_bit drm_ttm_helper ttm drm_exec drm_suballoc_helper input_leds amdxcp mfd_core drm_buddy serio_raw gpu_sched nvme drm_display_helper cec nvme_core t10_pi video wmi [ 248.592615] CPU: 3 PID: 2146 Comm: kworker/u32:18 Tainted: G W 6.7.6 #1 [ 248.592618] Hardware name: LENOVO 83AR/LNVNB161216, BIOS MDCN30WW 09/17/2023 [ 248.592620] Workqueue: events_unbound async_run_entry_fn [ 248.592628] RIP: 0010:amdgpu_dm_atomic_commit_tail+0x35b9/0x3fe0 [amdgpu] [ 248.592804] Code: 4c 89 e9 48 c7 c6 c0 26 3c c1 e8 d2 aa 9d de 49 8b 56 08 e9 2c d1 ff ff 0f 0b 49 8b 3f e8 7f bf 9b de 85 c0 0f 84 ad d5 ff ff <0f> 0b e9 a6 d5 ff ff 0f 0b e9 90 d4 ff ff 48 8b 85 90 fe ff ff 31 [ 248.592806] RSP: 0018:a85c8aaff880 EFLAGS: 00010282 [ 248.592808] RAX: ffea RBX: RCX: [ 248.592809] RDX: RSI: RDI: [ 248.592810] RBP: a85c8aaffac0 R08: R09: [ 248.592811] R10: R11: R12: 8b0c0e8b9000 [ 248.592811] R13: 8b0c0d7f6000 R14: 8b0c2176a080 R15: 8b0e2526dc00 [ 248.592812] FS: () GS:8b133ecc() knlGS: [ 248.592813] CS: 0010 DS: ES: CR0: 80050033 [ 248.592814] CR2: 7f104b3985a0 CR3: 0005f3a1d000 CR4: 00750ef0 [ 248.592816] PKRU: 5554 [ 248.592816] Call Trace: [ 248.592818] [ 248.592822] ? show_regs+0x6e/0x80 [ 248.592828] ? amdgpu_dm_atomic_commit_tail+0x35b9/0x3fe0 [amdgpu] [ 248.592971] ? __warn+0x8d/0x160 [ 248.592975] ? amdgpu_dm_atomic_commit_tail+0x35b9/0x3fe0 [amdgpu] [ 248.593109] ? report_bug+0x1b3/0x1c0 [ 248.593116] ? handle_bug+0x46/0x80 [ 248.593121] ? exc_invalid_op+0x19/0x80 [ 248.593122] ? asm_exc_invalid_op+0x1b/0x20 [ 248.593128] ? amdgpu_dm_atomic_commit_tail+0x35b9/0x3fe0 [amdgpu] [ 248.593266] ? kfree+0x79/0x120 [ 248.593271] ? dcn314_validate_bandwidth+0x2be/0x4a0 [amdgpu] [ 248.593454] ? amdgpu_bo_gpu_offset_no_check+0x3b/0x90 [amdgpu] [ 248.593564] commit_tail+0xc2/0x190 [ 248.593570] ? drm_atomic_helper_swap_state+0x246/0x380 [ 248.593571] drm_atomic_helper_commit+0x12f/0x160 [ 248.593573] drm_atomic_commit+0x93/0xd0 [ 248.593578] ? __drm_printfn_seq_file+0x30/0x30 [ 248.593581]
Re: [FYI][PATCH] tracing/treewide: Remove second parameter of __assign_str()
On Fri, 23 Feb 2024 13:46:53 -0500 Steven Rostedt wrote: > Now one thing I could do is to not remove the parameter, but just add: > > WARN_ON_ONCE((src) != __data_offsets->item##_ptr_); > > in the __assign_str() macro to make sure that it's still the same that is > assigned. But I'm not sure how useful that is, and still causes burden to > have it. I never really liked the passing of the string in two places to > begin with. Hmm, maybe I'll just add this patch for 6.9 and then in 6.10 do the parameter removal. -- Steve diff --git a/include/trace/stages/stage6_event_callback.h b/include/trace/stages/stage6_event_callback.h index 0c0f50bcdc56..7372e2c2a0c4 100644 --- a/include/trace/stages/stage6_event_callback.h +++ b/include/trace/stages/stage6_event_callback.h @@ -35,6 +35,7 @@ #define __assign_str(dst, src) do {\ char *__str__ = __get_str(dst); \ int __len__ = __get_dynamic_array_len(dst) - 1; \ + WARN_ON_ONCE((src) != __data_offsets.dst##_ptr_); \ memcpy(__str__, __data_offsets.dst##_ptr_ ? : \ EVENT_NULL_STR, __len__);\ __str__[__len__] = '\0';\
Re: [FYI][PATCH] tracing/treewide: Remove second parameter of __assign_str()
On Fri, Feb 23, 2024 at 01:46:53PM -0500, Steven Rostedt wrote: > On Fri, 23 Feb 2024 10:30:45 -0800 > Jeff Johnson wrote: > > > On 2/23/2024 9:56 AM, Steven Rostedt wrote: > > > From: "Steven Rostedt (Google)" > > > > > > [ > > >This is a treewide change. I will likely re-create this patch again in > > >the second week of the merge window of v6.9 and submit it then. Hoping > > >to keep the conflicts that it will cause to a minimum. > > > ] > > > > > > With the rework of how the __string() handles dynamic strings where it > > > saves off the source string in field in the helper structure[1], the > > > assignment of that value to the trace event field is stored in the helper > > > value and does not need to be passed in again. > > > > Just curious if this could be done piecemeal by first changing the > > macros to be variadic macros which allows you to ignore the extra > > argument. The callers could then be modified in their separate trees. > > And then once all the callers have be merged, the macros could be > > changed to no longer be variadic. > > I weighed doing that, but I think ripping off the band-aid is a better > approach. One thing I found is that leaving unused parameters in the macros > can cause bugs itself. I found one case doing my clean up, where an unused > parameter in one of the macros was bogus, and when I made it a used > parameter, it broke the build. > > I think for tree-wide changes, the preferred approach is to do one big > patch at once. And since this only affects TRACE_EVENT() macros, it > hopefully would not be too much of a burden (although out of tree users may > suffer from this, but do we care?) Agreed on doing it all at once, it'll be way less spam for people to deal with. Tangentially related though, what would make me really happy is if we could create the string with in the TP__fast_assign() section. I have to have a bunch of annoying wrappers right now because the string length has to be known when we invoke the tracepoint.
Re: [FYI][PATCH] tracing/treewide: Remove second parameter of __assign_str()
On Fri, 23 Feb 2024 10:30:45 -0800 Jeff Johnson wrote: > On 2/23/2024 9:56 AM, Steven Rostedt wrote: > > From: "Steven Rostedt (Google)" > > > > [ > >This is a treewide change. I will likely re-create this patch again in > >the second week of the merge window of v6.9 and submit it then. Hoping > >to keep the conflicts that it will cause to a minimum. > > ] > > > > With the rework of how the __string() handles dynamic strings where it > > saves off the source string in field in the helper structure[1], the > > assignment of that value to the trace event field is stored in the helper > > value and does not need to be passed in again. > > Just curious if this could be done piecemeal by first changing the > macros to be variadic macros which allows you to ignore the extra > argument. The callers could then be modified in their separate trees. > And then once all the callers have be merged, the macros could be > changed to no longer be variadic. I weighed doing that, but I think ripping off the band-aid is a better approach. One thing I found is that leaving unused parameters in the macros can cause bugs itself. I found one case doing my clean up, where an unused parameter in one of the macros was bogus, and when I made it a used parameter, it broke the build. I think for tree-wide changes, the preferred approach is to do one big patch at once. And since this only affects TRACE_EVENT() macros, it hopefully would not be too much of a burden (although out of tree users may suffer from this, but do we care?) Now one thing I could do is to not remove the parameter, but just add: WARN_ON_ONCE((src) != __data_offsets->item##_ptr_); in the __assign_str() macro to make sure that it's still the same that is assigned. But I'm not sure how useful that is, and still causes burden to have it. I never really liked the passing of the string in two places to begin with. -- Steve
Re: [PATCH] drm/amd/display: clean unnecessary braces
Hi, Christian! Ok, thanks for clarifying this for me. I'll continue analyzing the files here, now based on these points. Best regards Em qui., 22 de fev. de 2024 às 06:33, Christian König escreveu: > > Am 21.02.24 um 19:01 schrieb Rodrigo Siqueira Jordao: > > [SNIP] > >> diff --git > >> a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c > >> b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c > >> index 87760600e154..e179dea148e7 100644 > >> --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c > >> +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c > >> @@ -110,14 +110,12 @@ uint32_t > >> dcn32_helper_calculate_num_ways_for_subvp( > >> struct dc_state *context) > >> { > >> if (context->bw_ctx.bw.dcn.mall_subvp_size_bytes > 0) { > >> -if (dc->debug.force_subvp_num_ways) { > >> +if (dc->debug.force_subvp_num_ways) > >> return dc->debug.force_subvp_num_ways; > >> -} else { > >> +else > >> return dcn32_helper_mall_bytes_to_ways(dc, > >> context->bw_ctx.bw.dcn.mall_subvp_size_bytes); > >> -} > >> -} else { > >> +} else > > > > Actually, we want to keep the braces around the else part to keep the > > braces balanced with the if condition. > > Yeah, and checkpatch actually complains about that. E.g. you shouldn't > use "} else" or "else {", but always "} else {". > > So the patch is actually a bit bogus and introduces new coding style > warnings. > > Regards, > Christian. > > > > > Thanks > > Siqueira > > > >> return 0; > >> -} > >> } > >> void dcn32_merge_pipes_for_subvp(struct dc *dc, > >> @@ -250,9 +248,9 @@ bool dcn32_is_psr_capable(struct pipe_ctx *pipe) > >> { > >> bool psr_capable = false; > >> -if (pipe->stream && > >> pipe->stream->link->psr_settings.psr_version != > >> DC_PSR_VERSION_UNSUPPORTED) { > >> +if (pipe->stream && pipe->stream->link->psr_settings.psr_version > >> != DC_PSR_VERSION_UNSUPPORTED) > >> psr_capable = true; > >> -} > >> + > >> return psr_capable; > >> } > >> @@ -278,9 +276,9 @@ static void override_det_for_subvp(struct dc > >> *dc, struct dc_state *context, uint > >> if (pipe_ctx->stream && pipe_ctx->plane_state && > >> dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) { > >> if (dcn32_allow_subvp_high_refresh_rate(dc, context, > >> pipe_ctx)) { > >> -if (pipe_ctx->stream->timing.v_addressable == 1080 > >> && pipe_ctx->stream->timing.h_addressable == 1920) { > >> +if (pipe_ctx->stream->timing.v_addressable == 1080 > >> && pipe_ctx->stream->timing.h_addressable == 1920) > >> fhd_count++; > >> -} > >> + > >> subvp_high_refresh_count++; > >> } > >> } > > >
Re: [FYI][PATCH] tracing/treewide: Remove second parameter of __assign_str()
On Fri, 23 Feb 2024 12:56:34 -0500 Steven Rostedt wrote: > Note, the same updates will need to be done for: > > __assign_str_len() > __assign_rel_str() > __assign_rel_str_len() Correction: The below macros do not pass in their source to the entry macros, so they will not need to be updated. -- Steve > __assign_bitmask() > __assign_rel_bitmask() > __assign_cpumask() > __assign_rel_cpumask()
Re: [PATCH] drm/amd/amdgpu: Fix potential ioremap() memory leaks in amdgpu_device_init()
Hi Christian, On 2/26/2024 1:46 PM, Christian König wrote: Am 24.02.24 um 07:38 schrieb Srinivasan Shanmugam: This ensures that the memory mapped by ioremap for adev->rmmio, is properly handled in amdgpu_device_init(). If the function exits early due to an error, the memory is unmapped. If the function completes successfully, the memory remains mapped. Fixes the below: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c:4337 amdgpu_device_init() warn: 'adev->rmmio' from ioremap() not released on lines: 4035,4045,4051,4058,4068,4337 Hui? How do you got that warning? It was caught by smatch & will update the same in the commit message. Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 18 ++ 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1ef892bea488..68bf5e910cb8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4031,8 +4031,10 @@ int amdgpu_device_init(struct amdgpu_device *adev, * early on during init and before calling to RREG32. */ adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev"); - if (!adev->reset_domain) + if (!adev->reset_domain) { + iounmap(adev->rmmio); return -ENOMEM; + } Please use a goto label and error handling instead. Apart from that looks good to me. Thanks a lot for all your reviews, highly appreciate it, will send v2 for this. Best Wishes, Srini Regards, Christian. /* detect hw virtualization here */ amdgpu_detect_virtualization(adev); @@ -4042,20 +4044,25 @@ int amdgpu_device_init(struct amdgpu_device *adev, r = amdgpu_device_get_job_timeout_settings(adev); if (r) { dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); + iounmap(adev->rmmio); return r; } /* early init functions */ r = amdgpu_device_ip_early_init(adev); - if (r) + if (r) { + iounmap(adev->rmmio); return r; + } amdgpu_device_set_mcbp(adev); /* Get rid of things like offb */ r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, _kms_driver); - if (r) + if (r) { + iounmap(adev->rmmio); return r; + } /* Enable TMZ based on IP_VERSION */ amdgpu_gmc_tmz_set(adev); @@ -4064,8 +4071,10 @@ int amdgpu_device_init(struct amdgpu_device *adev, /* Need to get xgmi info early to decide the reset behavior*/ if (adev->gmc.xgmi.supported) { r = adev->gfxhub.funcs->get_xgmi_info(adev); - if (r) + if (r) { + iounmap(adev->rmmio); return r; + } } /* enable PCIE atomic ops */ @@ -4334,6 +4343,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, failed: amdgpu_vf_error_trans_all(adev); + iounmap(adev->rmmio); return r; }
Re: [PATCH] drm/amd/amdgpu: Fix potential ioremap() memory leaks in amdgpu_device_init()
Am 24.02.24 um 07:38 schrieb Srinivasan Shanmugam: This ensures that the memory mapped by ioremap for adev->rmmio, is properly handled in amdgpu_device_init(). If the function exits early due to an error, the memory is unmapped. If the function completes successfully, the memory remains mapped. Fixes the below: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c:4337 amdgpu_device_init() warn: 'adev->rmmio' from ioremap() not released on lines: 4035,4045,4051,4058,4068,4337 Hui? How do you got that warning? Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 18 ++ 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1ef892bea488..68bf5e910cb8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4031,8 +4031,10 @@ int amdgpu_device_init(struct amdgpu_device *adev, * early on during init and before calling to RREG32. */ adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev"); - if (!adev->reset_domain) + if (!adev->reset_domain) { + iounmap(adev->rmmio); return -ENOMEM; + } Please use a goto label and error handling instead. Apart from that looks good to me. Regards, Christian. /* detect hw virtualization here */ amdgpu_detect_virtualization(adev); @@ -4042,20 +4044,25 @@ int amdgpu_device_init(struct amdgpu_device *adev, r = amdgpu_device_get_job_timeout_settings(adev); if (r) { dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); + iounmap(adev->rmmio); return r; } /* early init functions */ r = amdgpu_device_ip_early_init(adev); - if (r) + if (r) { + iounmap(adev->rmmio); return r; + } amdgpu_device_set_mcbp(adev); /* Get rid of things like offb */ r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, _kms_driver); - if (r) + if (r) { + iounmap(adev->rmmio); return r; + } /* Enable TMZ based on IP_VERSION */ amdgpu_gmc_tmz_set(adev); @@ -4064,8 +4071,10 @@ int amdgpu_device_init(struct amdgpu_device *adev, /* Need to get xgmi info early to decide the reset behavior*/ if (adev->gmc.xgmi.supported) { r = adev->gfxhub.funcs->get_xgmi_info(adev); - if (r) + if (r) { + iounmap(adev->rmmio); return r; + } } /* enable PCIE atomic ops */ @@ -4334,6 +4343,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, failed: amdgpu_vf_error_trans_all(adev); + iounmap(adev->rmmio); return r; }