RE: [PATCH] Revert "drm/amd/pm: resolve reboot exception for si oland"

2024-02-26 Thread Wang, Yang(Kevin)
[AMD Official Use Only - General]

Reviewed-by: Yang Wang 

Best Regards,
Kevin

-Original Message-
From: amd-gfx  On Behalf Of Alex Deucher
Sent: Tuesday, February 27, 2024 6:47 AM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander 
Subject: [PATCH] Revert "drm/amd/pm: resolve reboot exception for si oland"

This reverts commit e490d60a2f76bff636c68ce4fe34c1b6c34bbd86.

This causes hangs on SI when DC is enabled and errors on driver reboot and 
power off cycles.

Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3216
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2755
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c | 29 ++
 1 file changed, 29 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c 
b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
index e145d53a794ba..a586e0b7c47d1 100644
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
@@ -6927,6 +6927,23 @@ static int si_dpm_enable(struct amdgpu_device *adev)
return 0;
 }

+static int si_set_temperature_range(struct amdgpu_device *adev) {
+   int ret;
+
+   ret = si_thermal_enable_alert(adev, false);
+   if (ret)
+   return ret;
+   ret = si_thermal_set_temperature_range(adev, R600_TEMP_RANGE_MIN, 
R600_TEMP_RANGE_MAX);
+   if (ret)
+   return ret;
+   ret = si_thermal_enable_alert(adev, true);
+   if (ret)
+   return ret;
+
+   return ret;
+}
+
 static void si_dpm_disable(struct amdgpu_device *adev)  {
struct rv7xx_power_info *pi = rv770_get_pi(adev); @@ -7610,6 +7627,18 
@@ static int si_dpm_process_interrupt(struct amdgpu_device *adev,

 static int si_dpm_late_init(void *handle)  {
+   int ret;
+   struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+   if (!adev->pm.dpm_enabled)
+   return 0;
+
+   ret = si_set_temperature_range(adev);
+   if (ret)
+   return ret;
+#if 0 //TODO ?
+   si_dpm_powergate_uvd(adev, true);
+#endif
return 0;
 }

--
2.43.2



[PATCH] Revert "drm/amd/pm: resolve reboot exception for si oland"

2024-02-26 Thread Alex Deucher
This reverts commit e490d60a2f76bff636c68ce4fe34c1b6c34bbd86.

This causes hangs on SI when DC is enabled and errors on driver
reboot and power off cycles.

Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3216
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2755
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c | 29 ++
 1 file changed, 29 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c 
b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
index e145d53a794ba..a586e0b7c47d1 100644
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
@@ -6927,6 +6927,23 @@ static int si_dpm_enable(struct amdgpu_device *adev)
return 0;
 }
 
+static int si_set_temperature_range(struct amdgpu_device *adev)
+{
+   int ret;
+
+   ret = si_thermal_enable_alert(adev, false);
+   if (ret)
+   return ret;
+   ret = si_thermal_set_temperature_range(adev, R600_TEMP_RANGE_MIN, 
R600_TEMP_RANGE_MAX);
+   if (ret)
+   return ret;
+   ret = si_thermal_enable_alert(adev, true);
+   if (ret)
+   return ret;
+
+   return ret;
+}
+
 static void si_dpm_disable(struct amdgpu_device *adev)
 {
struct rv7xx_power_info *pi = rv770_get_pi(adev);
@@ -7610,6 +7627,18 @@ static int si_dpm_process_interrupt(struct amdgpu_device 
*adev,
 
 static int si_dpm_late_init(void *handle)
 {
+   int ret;
+   struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+   if (!adev->pm.dpm_enabled)
+   return 0;
+
+   ret = si_set_temperature_range(adev);
+   if (ret)
+   return ret;
+#if 0 //TODO ?
+   si_dpm_powergate_uvd(adev, true);
+#endif
return 0;
 }
 
-- 
2.43.2



[RFC PATCH v4 38/42] drm/colorop: Add 1D Curve Custom LUT type

2024-02-26 Thread Harry Wentland
From: Alex Hung 

We've previously introduced DRM_COLOROP_1D_CURVE for
pre-defined 1D curves. But we also have HW that supports
custom curves and userspace needs the ability to pass
custom curves, aka LUTs.

This patch introduces a new colorop type, called
DRM_COLOROP_1D_LUT that provides a SIZE property which
is used by a driver to advertise the supported SIZE
of the LUT, as well as a DATA property which userspace
uses to set the LUT.

DATA and size function in the same way as current drm_crtc
GAMMA and DEGAMMA LUTs.

Signed-off-by: Alex Hung 
Signed-off-by: Harry Wentland 
Co-developed-by: Harry Wentland 
---
 drivers/gpu/drm/drm_atomic.c  |  4 
 drivers/gpu/drm/drm_atomic_uapi.c |  5 +
 drivers/gpu/drm/drm_colorop.c | 36 +--
 include/drm/drm_colorop.h | 16 ++
 include/uapi/drm/drm_mode.h   |  1 +
 5 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 6e736ffa6d7c..f7d51839ca03 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -799,6 +799,10 @@ static void drm_atomic_colorop_print_state(struct 
drm_printer *p,
drm_printf(p, "\tcurve_1d_type=%s\n",
   
drm_get_colorop_curve_1d_type_name(state->curve_1d_type));
break;
+   case DRM_COLOROP_1D_LUT:
+   drm_printf(p, "\tsize=%d\n", state->size);
+   drm_printf(p, "\tdata blob id=%d\n", state->data ? 
state->data->base.id : 0);
+   break;
case DRM_COLOROP_CTM_3X4:
drm_printf(p, "\tdata blob id=%d\n", state->data ? 
state->data->base.id : 0);
break;
diff --git a/drivers/gpu/drm/drm_atomic_uapi.c 
b/drivers/gpu/drm/drm_atomic_uapi.c
index c7c1c614b0d9..6bfe857720cd 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -700,6 +700,9 @@ static int drm_atomic_color_set_data_property(struct 
drm_colorop *colorop,
bool replaced = false;
 
switch (colorop->type) {
+   case DRM_COLOROP_1D_LUT:
+   size = state->size * sizeof(struct drm_color_lut);
+   break;
case DRM_COLOROP_CTM_3X4:
size = sizeof(struct drm_color_ctm_3x4);
break;
@@ -749,6 +752,8 @@ drm_atomic_colorop_get_property(struct drm_colorop *colorop,
*val = state->bypass;
} else if (property == colorop->curve_1d_type_property) {
*val = state->curve_1d_type;
+   } else if (property == colorop->size_property) {
+   *val = state->size;
} else if (property == colorop->data_property) {
*val = (state->data) ? state->data->base.id : 0;
} else {
diff --git a/drivers/gpu/drm/drm_colorop.c b/drivers/gpu/drm/drm_colorop.c
index b10cad5a7208..4452eaeeb242 100644
--- a/drivers/gpu/drm/drm_colorop.c
+++ b/drivers/gpu/drm/drm_colorop.c
@@ -34,6 +34,7 @@
 
 static const struct drm_prop_enum_list drm_colorop_type_enum_list[] = {
{ DRM_COLOROP_1D_CURVE, "1D Curve" },
+   { DRM_COLOROP_1D_LUT, "1D Curve Custom LUT" },
{ DRM_COLOROP_CTM_3X4, "3x4 Matrix"}
 };
 
@@ -175,11 +176,41 @@ static int drm_colorop_create_data_prop(struct drm_device 
*dev, struct drm_color
 
colorop->data_property = prop;
drm_object_attach_property(>base,
-   colorop->data_property,
-   0);
+  colorop->data_property,
+  0);
 
return 0;
 }
+int drm_colorop_curve_1d_lut_init(struct drm_device *dev, struct drm_colorop 
*colorop,
+ struct drm_plane *plane, uint32_t lut_size)
+{
+   struct drm_property *prop;
+   int ret;
+
+   ret = drm_colorop_init(dev, colorop, plane, DRM_COLOROP_1D_LUT);
+   if (ret)
+   return ret;
+
+   /* initialize 1D LUT only attribute */
+   /* LUT size */
+   prop = drm_property_create_range(dev, DRM_MODE_PROP_ATOMIC, "SIZE", 
lut_size, lut_size);
+   if (!prop)
+   return -ENOMEM;
+
+   colorop->size_property = prop;
+   drm_object_attach_property(>base, colorop->size_property, 0);
+
+   /* data */
+   ret = drm_colorop_create_data_prop(dev, colorop);
+   if (ret)
+   return ret;
+
+   drm_colorop_reset(colorop);
+   colorop->state->size = lut_size;
+
+   return 0;
+}
+EXPORT_SYMBOL(drm_colorop_curve_1d_lut_init);
 
 int drm_colorop_ctm_3x4_init(struct drm_device *dev, struct drm_colorop 
*colorop,
 struct drm_plane *plane)
@@ -301,6 +332,7 @@ EXPORT_SYMBOL(drm_colorop_reset);
 
 static const char * const colorop_type_name[] = {
[DRM_COLOROP_1D_CURVE] = "1D Curve",
+   [DRM_COLOROP_1D_LUT] = "1D Curve Custom LUT",
[DRM_COLOROP_CTM_3X4] = "3x4 Matrix"
 };
 
diff --git 

[RFC PATCH v4 33/42] drm/amd/display: Add support for sRGB EOTF in BLND block

2024-02-26 Thread Harry Wentland
From: Alex Hung 

Expose a 3rd 1D curve colorop, with support for
DRM_COLOROP_1D_CURVE_SRGB_EOTF and program the BLND block
to perform the sRGB transform when the colorop is not in
bypass

With this change the following IGT test passes:
kms_colorop --run plane-XR30-XR30-srgb_eotf-srgb_inv_eotf-srgb_eotf

The color pipeline now consists of the following colorops:
1. 1D curve colorop w/ sRGB EOTF support
2. 1D curve colorop w/ sRGB Inverse EOTF support
3. 1D curve colorop w/ sRGB EOTF support

Signed-off-by: Alex Hung 
Signed-off-by: Harry Wentland 
Co-developed-by: Harry Wentland 
---
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 77 +++
 .../amd/display/amdgpu_dm/amdgpu_dm_colorop.c | 18 +
 .../amd/display/amdgpu_dm/amdgpu_dm_colorop.h |  1 +
 3 files changed, 96 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 8788cfd26abd..3e3ae2b58b06 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -1267,6 +1267,72 @@ __set_dm_plane_colorop_shaper(struct drm_plane_state 
*plane_state,
return __set_colorop_in_shaper_1d_curve(dc_plane_state, colorop_state);
 }
 
+
+static int
+__set_colorop_1d_curve_blend_tf_lut(struct dc_plane_state *dc_plane_state,
+ struct drm_colorop_state *colorop_state)
+{
+
+   struct dc_transfer_func *tf = dc_plane_state->blend_tf;
+   struct drm_colorop *colorop = colorop_state->colorop;
+   struct drm_device *drm = colorop->dev;
+   const struct drm_color_lut *blend_lut;
+   uint32_t blend_size;
+
+   if (colorop->type != DRM_COLOROP_1D_CURVE &&
+   colorop_state->curve_1d_type != DRM_COLOROP_1D_CURVE_SRGB_EOTF)
+   return -EINVAL;
+
+   if (colorop_state->bypass) {
+   tf->type = TF_TYPE_BYPASS;
+   tf->tf = TRANSFER_FUNCTION_LINEAR;
+   return 0;
+   }
+
+   drm_dbg(drm, "Blend colorop with ID: %d\n", colorop->base.id);
+
+   if (colorop->type == DRM_COLOROP_1D_CURVE) {
+   tf->type = TF_TYPE_DISTRIBUTED_POINTS;
+   tf->tf = 
amdgpu_colorop_tf_to_dc_tf(colorop_state->curve_1d_type);
+   tf->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE;
+   return __set_input_tf(NULL, tf, blend_lut, blend_size);
+   }
+
+   return -EINVAL;
+}
+
+static int
+__set_dm_plane_colorop_blend(struct drm_plane_state *plane_state,
+struct dc_plane_state *dc_plane_state,
+struct drm_colorop *colorop)
+{
+   struct drm_colorop *old_colorop;
+   struct drm_colorop_state *colorop_state = NULL, *new_colorop_state;
+   struct drm_atomic_state *state = plane_state->state;
+   int i = 0;
+
+   old_colorop = colorop;
+
+   /* 3nd op: 1d curve - blend */
+   for_each_new_colorop_in_state(state, colorop, new_colorop_state, i) {
+   if (new_colorop_state->colorop == old_colorop &&
+   new_colorop_state->curve_1d_type == 
DRM_COLOROP_1D_CURVE_SRGB_EOTF) {
+   colorop_state = new_colorop_state;
+   break;
+   }
+
+   if (new_colorop_state->colorop == old_colorop) {
+   colorop_state = new_colorop_state;
+   break;
+   }
+   }
+
+   if (!colorop_state)
+   return -EINVAL;
+
+   return __set_colorop_1d_curve_blend_tf_lut(dc_plane_state, 
colorop_state);
+}
+
 static int
 amdgpu_dm_plane_set_color_properties(struct drm_plane_state *plane_state,
 struct dc_plane_state *dc_plane_state)
@@ -1345,6 +1411,17 @@ amdgpu_dm_plane_set_colorop_properties(struct 
drm_plane_state *plane_state,
if (ret)
return ret;
 
+   /* 1D Curve - BLND TF */
+   colorop = colorop->next;
+   if (!colorop) {
+   drm_dbg(dev, "no Blend TF colorop found\n");
+   return -EINVAL;
+   }
+
+   ret = __set_dm_plane_colorop_blend(plane_state, dc_plane_state, 
colorop);
+   if (ret)
+   return ret;
+
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c
index 0d1626abf577..449a2ad6a184 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c
@@ -37,6 +37,9 @@ const u64 amdgpu_dm_supported_degam_tfs =
  const u64 amdgpu_dm_supported_shaper_tfs =
BIT(DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF);
 
+const u64 amdgpu_dm_supported_blnd_tfs =
+   BIT(DRM_COLOROP_1D_CURVE_SRGB_EOTF);
+
 int amdgpu_dm_initialize_default_pipeline(struct drm_plane *plane, struct 
drm_prop_enum_list *list)
 {
struct drm_colorop *op, *prev_op;
@@ -72,5 

[RFC PATCH v4 08/42] drm/doc/rfc: Describe why prescriptive color pipeline is needed

2024-02-26 Thread Harry Wentland
v4:
 - Drop IOCTL docs since we dropped the IOCTLs (Pekka)
 - Clarify reading and setting of COLOR_PIPELINE prop (Pekka)
 - Add blurb about not requiring to reject a pipeline due to
   incompatible ops, as long as op can be bypassed (Pekka)
 - Dropped informational strings (such as input CSC) as they're
   not actually intended to be advertised (Pekka)

v3:
 - Describe DRM_CLIENT_CAP_PLANE_COLOR_PIPELINE (Sebastian)
 - Ask for clear documentation of colorop behavior (Sebastian)

v2:
 - Update colorop visualizations to match reality (Sebastian, Alex Hung)
 - Updated wording (Pekka)
 - Change BYPASS wording to make it non-mandatory (Sebastian)
 - Drop cover-letter-like paragraph from COLOR_PIPELINE Plane Property
   section (Pekka)
 - Use PQ EOTF instead of its inverse in Pipeline Programming example (Melissa)
 - Add "Driver Implementer's Guide" section (Pekka)
 - Add "Driver Forward/Backward Compatibility" section (Sebastian, Pekka)

Signed-off-by: Harry Wentland 
---
 Documentation/gpu/rfc/color_pipeline.rst | 360 +++
 1 file changed, 360 insertions(+)
 create mode 100644 Documentation/gpu/rfc/color_pipeline.rst

diff --git a/Documentation/gpu/rfc/color_pipeline.rst 
b/Documentation/gpu/rfc/color_pipeline.rst
new file mode 100644
index ..6c653e17054a
--- /dev/null
+++ b/Documentation/gpu/rfc/color_pipeline.rst
@@ -0,0 +1,360 @@
+
+Linux Color Pipeline API
+
+
+What problem are we solving?
+
+
+We would like to support pre-, and post-blending complex color
+transformations in display controller hardware in order to allow for
+HW-supported HDR use-cases, as well as to provide support to
+color-managed applications, such as video or image editors.
+
+It is possible to support an HDR output on HW supporting the Colorspace
+and HDR Metadata drm_connector properties, but that requires the
+compositor or application to render and compose the content into one
+final buffer intended for display. Doing so is costly.
+
+Most modern display HW offers various 1D LUTs, 3D LUTs, matrices, and other
+operations to support color transformations. These operations are often
+implemented in fixed-function HW and therefore much more power efficient than
+performing similar operations via shaders or CPU.
+
+We would like to make use of this HW functionality to support complex color
+transformations with no, or minimal CPU or shader load.
+
+
+How are other OSes solving this problem?
+
+
+The most widely supported use-cases regard HDR content, whether video or
+gaming.
+
+Most OSes will specify the source content format (color gamut, encoding 
transfer
+function, and other metadata, such as max and average light levels) to a 
driver.
+Drivers will then program their fixed-function HW accordingly to map from a
+source content buffer's space to a display's space.
+
+When fixed-function HW is not available the compositor will assemble a shader 
to
+ask the GPU to perform the transformation from the source content format to the
+display's format.
+
+A compositor's mapping function and a driver's mapping function are usually
+entirely separate concepts. On OSes where a HW vendor has no insight into
+closed-source compositor code such a vendor will tune their color management
+code to visually match the compositor's. On other OSes, where both mapping
+functions are open to an implementer they will ensure both mappings match.
+
+This results in mapping algorithm lock-in, meaning that no-one alone can
+experiment with or introduce new mapping algorithms and achieve
+consistent results regardless of which implementation path is taken.
+
+Why is Linux different?
+===
+
+Unlike other OSes, where there is one compositor for one or more drivers, on
+Linux we have a many-to-many relationship. Many compositors; many drivers.
+In addition each compositor vendor or community has their own view of how
+color management should be done. This is what makes Linux so beautiful.
+
+This means that a HW vendor can now no longer tune their driver to one
+compositor, as tuning it to one could make it look fairly different from
+another compositor's color mapping.
+
+We need a better solution.
+
+
+Descriptive API
+===
+
+An API that describes the source and destination colorspaces is a descriptive
+API. It describes the input and output color spaces but does not describe
+how precisely they should be mapped. Such a mapping includes many minute
+design decision that can greatly affect the look of the final result.
+
+It is not feasible to describe such mapping with enough detail to ensure the
+same result from each implementation. In fact, these mappings are a very active
+research area.
+
+
+Prescriptive API
+
+
+A prescriptive API describes not the source and destination colorspaces. It
+instead prescribes a recipe for how to manipulate pixel values to arrive 

[RFC PATCH v4 02/42] drm: Add helper for conversion from signed-magnitude

2024-02-26 Thread Harry Wentland
CTM values are defined as signed-magnitude values. Add
a helper that converts from CTM signed-magnitude fixed
point value to the twos-complement value used by
drm_fixed.

Signed-off-by: Harry Wentland 
---
 include/drm/drm_fixed.h | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/include/drm/drm_fixed.h b/include/drm/drm_fixed.h
index 0c9f917a4d4b..cb842ba80ddd 100644
--- a/include/drm/drm_fixed.h
+++ b/include/drm/drm_fixed.h
@@ -78,6 +78,24 @@ static inline u32 dfixed_div(fixed20_12 A, fixed20_12 B)
 #define DRM_FIXED_EPSILON  1LL
 #define DRM_FIXED_ALMOST_ONE   (DRM_FIXED_ONE - DRM_FIXED_EPSILON)
 
+/**
+ * @drm_sm2fixp
+ *
+ * Convert a 1.31.32 signed-magnitude fixed point to 32.32
+ * 2s-complement fixed point
+ *
+ * @return s64 2s-complement fixed point
+ */
+static inline s64 drm_sm2fixp(__u64 a)
+{
+   if ((a & (1LL << 63))) {
+   return -(a & 0x7fffll);
+   } else {
+   return a;
+   }
+
+}
+
 static inline s64 drm_int2fixp(int a)
 {
return ((s64)a) << DRM_FIXED_POINT;
-- 
2.44.0



[RFC PATCH v4 29/42] drm/amd/display: Add bypass COLOR PIPELINE

2024-02-26 Thread Harry Wentland
Add the default Bypass pipeline and ensure it passes the
kms_colorop test plane-XR30-XR30-bypass.

Signed-off-by: Harry Wentland 
---
 .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index 8a4c40b4c27e..c5c07b4cd6c9 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -1651,6 +1651,20 @@ dm_atomic_plane_get_property(struct drm_plane *plane,
 }
 #endif
 
+#define MAX_COLOR_PIPELINES 5
+
+static int
+dm_plane_init_colorops(struct drm_plane *plane)
+{
+   struct drm_prop_enum_list pipelines[MAX_COLOR_PIPELINES];
+   int len = 0;
+
+   /* Create COLOR_PIPELINE property and attach */
+   drm_plane_create_color_pipeline_property(plane, pipelines, len);
+
+   return 0;
+}
+
 static const struct drm_plane_funcs dm_plane_funcs = {
.update_plane   = drm_atomic_helper_update_plane,
.disable_plane  = drm_atomic_helper_disable_plane,
@@ -1744,7 +1758,12 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager 
*dm,
 
 #ifdef AMD_PRIVATE_COLOR
dm_atomic_plane_attach_color_mgmt_properties(dm, plane);
+#else
+   res = dm_plane_init_colorops(plane);
+   if (res)
+   return res;
 #endif
+
/* Create (reset) the plane state */
if (plane->funcs->reset)
plane->funcs->reset(plane);
-- 
2.44.0



[RFC PATCH v4 27/42] drm/colorop: define a new macro for_each_new_colorop_in_state

2024-02-26 Thread Harry Wentland
From: Alex Hung 

Create a new macro for_each_new_colorop_in_state to access new
drm_colorop_state updated from uapi.

Signed-off-by: Alex Hung 
---
 include/drm/drm_atomic.h | 20 
 1 file changed, 20 insertions(+)

diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h
index 61e6b5553eec..a4c5ff99a515 100644
--- a/include/drm/drm_atomic.h
+++ b/include/drm/drm_atomic.h
@@ -979,6 +979,26 @@ void drm_state_dump(struct drm_device *dev, struct 
drm_printer *p);
  (new_colorop_state) = 
(__state)->colorops[__i].new_state, 1))
 
 
+/**
+ * for_each_new_colorop_in_state - iterate over all colorops in an atomic 
update
+ * @__state:  drm_atomic_state pointer
+ * @colorop:  drm_colorop iteration cursor
+ * @new_colorop_state:  drm_colorop_state iteration cursor for the new 
state
+ * @__i: int iteration cursor, for macro-internal use
+ *
+ * This iterates over all colorops in an atomic update, tracking new state. 
This is
+ * useful is useful in places where the state delta needs to be considered, for
+ * example in atomic check functions.
+ */
+#define for_each_new_colorop_in_state(__state, colorop, new_colorop_state, 
__i) \
+   for ((__i) = 0; \
+(__i) < (__state)->dev->mode_config.num_colorop;   \
+(__i)++)   \
+   for_each_if ((__state)->colorops[__i].ptr &&\
+((colorop) = (__state)->colorops[__i].ptr, \
+ (void)(colorop) /* Only to avoid 
unused-but-set-variable warning */, \
+ (new_colorop_state) = 
(__state)->colorops[__i].new_state, 1))
+
 /**
  * for_each_oldnew_plane_in_state - iterate over all planes in an atomic update
  * @__state:  drm_atomic_state pointer
-- 
2.44.0



[RFC PATCH v4 30/42] drm/amd/display: Skip color pipeline initialization for cursor plane

2024-02-26 Thread Harry Wentland
From: Alex Hung 

Signed-off-by: Alex Hung 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index c5c07b4cd6c9..d3f64f586243 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -1659,6 +1659,9 @@ dm_plane_init_colorops(struct drm_plane *plane)
struct drm_prop_enum_list pipelines[MAX_COLOR_PIPELINES];
int len = 0;
 
+   if (plane->type == DRM_PLANE_TYPE_CURSOR)
+   return 0;
+
/* Create COLOR_PIPELINE property and attach */
drm_plane_create_color_pipeline_property(plane, pipelines, len);
 
-- 
2.44.0



[RFC PATCH v4 32/42] drm/amd/display: Add support for sRGB Inverse EOTF in SHAPER block

2024-02-26 Thread Harry Wentland
From: Alex Hung 

Expose a 2nd curve colorop with support for
DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF and program HW to
perform the sRGB Inverse EOTF on the shaper block
when the colorop is not in bypass.

With this change the follow IGT tests pass:
kms_colorop --run plane-XR30-XR30-srgb_inv_eotf
kms_colorop --run plane-XR30-XR30-srgb_eotf-srgb_inv_eotf

The color pipeline now consists of the following colorops:
1. 1D curve colorop w/ sRGB EOTF support
2. 1D curve colorop w/ sRGB Inverse EOTF support

Signed-off-by: Alex Hung 
Signed-off-by: Harry Wentland 
Co-developed-by: Harry Wentland 
---
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 76 +++
 .../amd/display/amdgpu_dm/amdgpu_dm_colorop.c | 20 -
 .../amd/display/amdgpu_dm/amdgpu_dm_colorop.h |  1 +
 3 files changed, 96 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 3ec759934669..8788cfd26abd 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -1203,6 +1203,70 @@ __set_dm_plane_colorop_degamma(struct drm_plane_state 
*plane_state,
return __set_colorop_in_tf_1d_curve(dc_plane_state, colorop_state);
 }
 
+static int
+__set_colorop_in_shaper_1d_curve(struct dc_plane_state *dc_plane_state,
+  struct drm_colorop_state *colorop_state)
+{
+   struct dc_transfer_func *tf = dc_plane_state->in_shaper_func;
+   struct drm_colorop *colorop = colorop_state->colorop;
+   struct drm_device *drm = colorop->dev;
+   const struct drm_color_lut *shaper_lut;
+   uint32_t shaper_size;
+
+   if (colorop->type != DRM_COLOROP_1D_CURVE &&
+   colorop_state->curve_1d_type != DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF)
+   return -EINVAL;
+
+   if (colorop_state->bypass) {
+   tf->type = TF_TYPE_BYPASS;
+   tf->tf = TRANSFER_FUNCTION_LINEAR;
+   return 0;
+   }
+
+   drm_dbg(drm, "Shaper colorop with ID: %d\n", colorop->base.id);
+
+   if (colorop->type == DRM_COLOROP_1D_CURVE) {
+   tf->type = TF_TYPE_DISTRIBUTED_POINTS;
+   tf->tf = 
amdgpu_colorop_tf_to_dc_tf(colorop_state->curve_1d_type);
+   tf->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE;
+   return __set_output_tf(tf, shaper_lut, shaper_size, false);
+   }
+
+   return -EINVAL;
+}
+
+static int
+__set_dm_plane_colorop_shaper(struct drm_plane_state *plane_state,
+ struct dc_plane_state *dc_plane_state,
+ struct drm_colorop *colorop)
+{
+   struct drm_colorop *old_colorop;
+   struct drm_colorop_state *colorop_state = NULL, *new_colorop_state;
+   struct drm_atomic_state *state = plane_state->state;
+   int i = 0;
+
+   old_colorop = colorop;
+
+   /* 2nd op: 1d curve - shaper */
+   for_each_new_colorop_in_state(state, colorop, new_colorop_state, i) {
+   if (new_colorop_state->colorop == old_colorop &&
+   new_colorop_state->curve_1d_type == 
DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF) {
+   colorop_state = new_colorop_state;
+   break;
+   }
+
+   if (new_colorop_state->colorop == old_colorop) {
+   colorop_state = new_colorop_state;
+   break;
+   }
+   }
+
+   if (!colorop_state)
+   return -EINVAL;
+
+   return __set_colorop_in_shaper_1d_curve(dc_plane_state, colorop_state);
+}
+
 static int
 amdgpu_dm_plane_set_color_properties(struct drm_plane_state *plane_state,
 struct dc_plane_state *dc_plane_state)
@@ -1258,6 +1322,7 @@ amdgpu_dm_plane_set_colorop_properties(struct 
drm_plane_state *plane_state,
   struct dc_plane_state *dc_plane_state)
 {
struct drm_colorop *colorop = plane_state->color_pipeline;
+   struct drm_device *dev = plane_state->plane->dev;
int ret;
 
/* 1D Curve - DEGAM TF */
@@ -1269,6 +1334,17 @@ amdgpu_dm_plane_set_colorop_properties(struct 
drm_plane_state *plane_state,
if (ret)
return ret;
 
+   /* 1D Curve - SHAPER TF */
+   colorop = colorop->next;
+   if (!colorop) {
+   drm_dbg(dev, "no Shaper TF colorop found\n");
+   return -EINVAL;
+   }
+
+   ret = __set_dm_plane_colorop_shaper(plane_state, dc_plane_state, 
colorop);
+   if (ret)
+   return ret;
+
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c
index e8b7fc8bb0f1..0d1626abf577 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c
@@ -34,9 +34,12 @@
 const u64 

[RFC PATCH v4 40/42] drm/amd/display: add 3x4 matrix colorop

2024-02-26 Thread Harry Wentland
From: Alex Hung 

This adds support for a 3x4 color transformation matrix.

With this change the following IGT tests pass:
kms_colorop --run plane-XR30-XR30-ctm_3x4_50_desat
kms_colorop --run plane-XR30-XR30-ctm_3x4_overdrive
kms_colorop --run plane-XR30-XR30-ctm_3x4_oversaturate
kms_colorop --run plane-XR30-XR30-ctm_3x4_bt709_enc
kms_colorop --run plane-XR30-XR30-ctm_3x4_bt709_dec

The color pipeline now consists of the following colorops:
1. 1D curve colorop
2. 3x4 CTM
3. 1D curve colorop
4. 1D LUT
5. 1D curve colorop
6. 1D LUT

Signed-off-by: Alex Hung 
Signed-off-by: Harry Wentland 
---
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 50 +++
 .../amd/display/amdgpu_dm/amdgpu_dm_colorop.c | 15 ++
 2 files changed, 65 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index e7b51b29cc04..ef50640b362b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -1212,6 +1212,45 @@ __set_dm_plane_colorop_degamma(struct drm_plane_state 
*plane_state,
return __set_colorop_in_tf_1d_curve(dc_plane_state, colorop_state);
 }
 
+static int
+__set_dm_plane_colorop_3x4_matrix(struct drm_plane_state *plane_state,
+ struct dc_plane_state *dc_plane_state,
+ struct drm_colorop *colorop)
+{
+   struct drm_colorop *old_colorop;
+   struct drm_colorop_state *colorop_state = NULL, *new_colorop_state;
+   struct drm_atomic_state *state = plane_state->state;
+   const struct drm_device *dev = colorop->dev;
+   const struct drm_property_blob *blob;
+   struct drm_color_ctm_3x4 *ctm = NULL;
+   int i = 0;
+
+   /* 3x4 matrix */
+   old_colorop = colorop;
+   for_each_new_colorop_in_state(state, colorop, new_colorop_state, i) {
+   if (new_colorop_state->colorop == old_colorop &&
+   new_colorop_state->colorop->type == DRM_COLOROP_CTM_3X4) {
+   colorop_state = new_colorop_state;
+   break;
+   }
+   }
+
+   if (colorop_state && !colorop_state->bypass && colorop->type == 
DRM_COLOROP_CTM_3X4) {
+   drm_dbg(dev, "3x4 matrix colorop with ID: %d\n", 
colorop->base.id);
+   blob = colorop_state->data;
+   if (blob->length == sizeof(struct drm_color_ctm_3x4)) {
+   ctm = blob ? (struct drm_color_ctm_3x4 *) blob->data : 
NULL;
+   __drm_ctm_3x4_to_dc_matrix(ctm, 
dc_plane_state->gamut_remap_matrix.matrix);
+   dc_plane_state->gamut_remap_matrix.enable_remap = true;
+   
dc_plane_state->input_csc_color_matrix.enable_adjustment = false;
+   } else
+   drm_warn(dev, "blob->length (%ld) isn't equal to 
drm_color_ctm_3x4 (%ld)\n",
+blob->length, sizeof(struct 
drm_color_ctm_3x4));
+   }
+
+   return 0;
+}
+
 static int
 __set_dm_plane_colorop_shaper(struct drm_plane_state *plane_state,
  struct dc_plane_state *dc_plane_state,
@@ -1411,6 +1450,17 @@ amdgpu_dm_plane_set_colorop_properties(struct 
drm_plane_state *plane_state,
if (ret)
return ret;
 
+   /* 3x4 matrix */
+   colorop = colorop->next;
+   if (!colorop) {
+   drm_dbg(dev, "no 3x4 matrix colorop found\n");
+   return -EINVAL;
+   }
+
+   ret = __set_dm_plane_colorop_3x4_matrix(plane_state, dc_plane_state, 
colorop);
+   if (ret)
+   return ret;
+
/* 1D Curve & LUT - SHAPER TF & LUT */
colorop = colorop->next;
if (!colorop) {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c
index 08480bf61dc5..ba42f1f6b620 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c
@@ -69,6 +69,21 @@ int amdgpu_dm_initialize_default_pipeline(struct drm_plane 
*plane, struct drm_pr
 
prev_op = op;
 
+   /* 3x4 matrix */
+   op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL);
+   if (!op) {
+   DRM_ERROR("KMS: Failed to allocate colorop\n");
+   return -ENOMEM;
+   }
+
+   ret = drm_colorop_ctm_3x4_init(dev, op, plane);
+   if (ret)
+   return ret;
+
+   drm_colorop_set_next_property(prev_op, op);
+
+   prev_op = op;
+
/* 1D curve - SHAPER TF */
op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL);
if (!op) {
-- 
2.44.0



[RFC PATCH v4 26/42] drm/colorop: pass plane_color_pipeline client cap to atomic check

2024-02-26 Thread Harry Wentland
Drivers will need to know whether an atomic check/commit
originated from a client with DRM_CLIENT_CAP_PLANE_COLOR_PIPELINE
so they can ignore deprecated properties, like COLOR_ENCODING
and COLOR_RANGE.

Pass the plane_color_pipeline bit to drm_atomic_state.

Signed-off-by: Harry Wentland 
---
 drivers/gpu/drm/drm_atomic_uapi.c |  1 +
 include/drm/drm_atomic.h  | 17 +
 2 files changed, 18 insertions(+)

diff --git a/drivers/gpu/drm/drm_atomic_uapi.c 
b/drivers/gpu/drm/drm_atomic_uapi.c
index 23b248987a7c..c7c1c614b0d9 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -1571,6 +1571,7 @@ int drm_mode_atomic_ioctl(struct drm_device *dev,
drm_modeset_acquire_init(, DRM_MODESET_ACQUIRE_INTERRUPTIBLE);
state->acquire_ctx = 
state->allow_modeset = !!(arg->flags & DRM_MODE_ATOMIC_ALLOW_MODESET);
+   state->plane_color_pipeline = file_priv->plane_color_pipeline;
 
 retry:
copied_objs = 0;
diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h
index 2346f19eda9f..61e6b5553eec 100644
--- a/include/drm/drm_atomic.h
+++ b/include/drm/drm_atomic.h
@@ -404,6 +404,23 @@ struct drm_atomic_state {
 * states.
 */
bool duplicated : 1;
+
+   /**
+* @plane_color_pipeline : 1
+*
+* Indicates whether this atomic state originated with a client that
+* set the DRM_CLIENT_CAP_PLANE_COLOR_PIPELINE.
+*
+* Drivers and helper functions should use this to ignore legacy
+* properties that are incompatible with the drm_plane COLOR_PIPELINE
+* behavior, such as
+*  - COLOR_RANGE
+*  - COLOR_ENCODING
+* or any other driver-specific properties that might affect pixel
+* values.
+*/
+   bool plane_color_pipeline : 1;
+
struct __drm_colorops_state *colorops;
struct __drm_planes_state *planes;
struct __drm_crtcs_state *crtcs;
-- 
2.44.0



[RFC PATCH v4 13/42] drm/colorop: Add NEXT property

2024-02-26 Thread Harry Wentland
We'll construct color pipelines out of drm_colorop by
chaining them via the NEXT pointer. NEXT will point to
the next drm_colorop in the pipeline, or by 0 if we're
at the end of the pipeline.

v4:
 - Allow setting of NEXT property to NULL (Chaitanya Kumar Borah)

v3:
 - Add next pointer to colorop to be used by drivers
   and in DRM core

Signed-off-by: Harry Wentland 
---
 drivers/gpu/drm/drm_colorop.c | 29 +
 include/drm/drm_colorop.h | 20 
 2 files changed, 49 insertions(+)

diff --git a/drivers/gpu/drm/drm_colorop.c b/drivers/gpu/drm/drm_colorop.c
index 29979816a2d1..71c2286333a1 100644
--- a/drivers/gpu/drm/drm_colorop.c
+++ b/drivers/gpu/drm/drm_colorop.c
@@ -59,6 +59,7 @@ int drm_colorop_init(struct drm_device *dev, struct 
drm_colorop *colorop,
colorop->dev = dev;
colorop->type = type;
colorop->plane = plane;
+   colorop->next = NULL;
 
list_add_tail(>head, >colorop_list);
colorop->index = config->num_colorop++;
@@ -92,6 +93,15 @@ int drm_colorop_init(struct drm_device *dev, struct 
drm_colorop *colorop,
   colorop->bypass_property,
   1);
 
+   prop = drm_property_create_object(dev, DRM_MODE_PROP_IMMUTABLE | 
DRM_MODE_PROP_ATOMIC,
+ "NEXT", DRM_MODE_OBJECT_COLOROP);
+   if (!prop)
+   return -ENOMEM;
+   colorop->next_property = prop;
+   drm_object_attach_property(>base,
+  colorop->next_property,
+  0);
+
return ret;
 }
 EXPORT_SYMBOL(drm_colorop_init);
@@ -279,3 +289,22 @@ const char *drm_get_colorop_curve_1d_type_name(enum 
drm_colorop_curve_1d_type ty
 
return colorop_curve_1d_type_names[type];
 }
+
+/**
+ * drm_colorop_set_next_property - sets the next pointer
+ * @colorop: drm colorop
+ * @next: next colorop
+ *
+ * Should be used when constructing the color pipeline
+ */
+void drm_colorop_set_next_property(struct drm_colorop *colorop, struct 
drm_colorop *next)
+{
+   if (!colorop->next_property)
+   return;
+
+   drm_object_property_set_value(>base,
+ colorop->next_property,
+ next ? next->base.id : 0);
+   colorop->next = next;
+}
+EXPORT_SYMBOL(drm_colorop_set_next_property);
diff --git a/include/drm/drm_colorop.h b/include/drm/drm_colorop.h
index 28aa5c1c309e..8060988b5892 100644
--- a/include/drm/drm_colorop.h
+++ b/include/drm/drm_colorop.h
@@ -132,6 +132,14 @@ struct drm_colorop {
 */
enum drm_colorop_type type;
 
+   /**
+* @next:
+*
+* Read-only
+* Pointer to next drm_colorop in pipeline
+*/
+   struct drm_colorop *next;
+
/**
 * @type_property:
 *
@@ -159,10 +167,20 @@ struct drm_colorop {
 */
struct drm_property *curve_1d_type_property;
 
+   /**
+* @next_property
+*
+* Read-only property to next colorop in the pipeline
+*/
+   struct drm_property *next_property;
+
 };
 
 #define obj_to_colorop(x) container_of(x, struct drm_colorop, base)
 
+
+
+
 /**
  * drm_crtc_find - look up a Colorop object from its ID
  * @dev: DRM device
@@ -213,5 +231,7 @@ static inline unsigned int drm_colorop_index(const struct 
drm_colorop *colorop)
 #define drm_for_each_colorop(colorop, dev) \
list_for_each_entry(colorop, &(dev)->mode_config.colorop_list, head)
 
+void drm_colorop_set_next_property(struct drm_colorop *colorop, struct 
drm_colorop *next);
+
 
 #endif /* __DRM_COLOROP_H__ */
-- 
2.44.0



[RFC PATCH v4 42/42] drm/amd/display: add multiplier colorop

2024-02-26 Thread Harry Wentland
From: Alex Hung 

This adds support for a multiplier. This multiplier is
programmed via the HDR Multiplier in DCN.

With this change the following IGT tests pass:
kms_colorop --run plane-XR30-XR30-multiply_125
kms_colorop --run plane-XR30-XR30-multiply_inv_125

The color pipeline now consists of the following colorops:
1. 1D curve colorop
2. 3x4 CTM
3. Multiplier
4. 1D curve colorop
5. 1D LUT
6. 1D curve colorop
7. 1D LUT

Signed-off-by: Alex Hung 
---
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 40 +++
 .../amd/display/amdgpu_dm/amdgpu_dm_colorop.c | 15 +++
 2 files changed, 55 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index ef50640b362b..b05e4fea8a08 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -1251,6 +1251,35 @@ __set_dm_plane_colorop_3x4_matrix(struct drm_plane_state 
*plane_state,
return 0;
 }
 
+static int
+__set_dm_plane_colorop_multiplier(struct drm_plane_state *plane_state,
+ struct dc_plane_state *dc_plane_state,
+ struct drm_colorop *colorop)
+{
+   struct drm_colorop *old_colorop;
+   struct drm_colorop_state *colorop_state = NULL, *new_colorop_state;
+   struct drm_atomic_state *state = plane_state->state;
+   const struct drm_device *dev = colorop->dev;
+   int i = 0;
+
+   /* Multiplier */
+   old_colorop = colorop;
+   for_each_new_colorop_in_state(state, colorop, new_colorop_state, i) {
+   if (new_colorop_state->colorop == old_colorop &&
+   new_colorop_state->colorop->type == DRM_COLOROP_MULTIPLIER) 
{
+   colorop_state = new_colorop_state;
+   break;
+   }
+   }
+
+   if (colorop_state && !colorop_state->bypass && colorop->type == 
DRM_COLOROP_MULTIPLIER) {
+   drm_dbg(dev, "Multiplier colorop with ID: %d\n", 
colorop->base.id);
+   dc_plane_state->hdr_mult = 
amdgpu_dm_fixpt_from_s3132(colorop_state->multiplier);
+   }
+
+   return 0;
+}
+
 static int
 __set_dm_plane_colorop_shaper(struct drm_plane_state *plane_state,
  struct dc_plane_state *dc_plane_state,
@@ -1461,6 +1490,17 @@ amdgpu_dm_plane_set_colorop_properties(struct 
drm_plane_state *plane_state,
if (ret)
return ret;
 
+   /* Multiplier */
+   colorop = colorop->next;
+   if (!colorop) {
+   drm_dbg(dev, "no multiplier colorop found\n");
+   return -EINVAL;
+   }
+
+   ret = __set_dm_plane_colorop_multiplier(plane_state, dc_plane_state, 
colorop);
+   if (ret)
+   return ret;
+
/* 1D Curve & LUT - SHAPER TF & LUT */
colorop = colorop->next;
if (!colorop) {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c
index ba42f1f6b620..b739d6cb3e6b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c
@@ -84,6 +84,21 @@ int amdgpu_dm_initialize_default_pipeline(struct drm_plane 
*plane, struct drm_pr
 
prev_op = op;
 
+   /* Multiplier */
+   op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL);
+   if (!op) {
+   DRM_ERROR("KMS: Failed to allocate colorop\n");
+   return -ENOMEM;
+   }
+
+   ret = drm_colorop_mult_init(dev, op, plane);
+   if (ret)
+   return ret;
+
+   drm_colorop_set_next_property(prev_op, op);
+
+   prev_op = op;
+
/* 1D curve - SHAPER TF */
op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL);
if (!op) {
-- 
2.44.0



[RFC PATCH v4 31/42] drm/amd/display: Add support for sRGB EOTF in DEGAM block

2024-02-26 Thread Harry Wentland
From: Alex Hung 

Expose one 1D curve colorop with support for
DRM_COLOROP_1D_CURVE_SRGB_EOTF and program HW to perform
the sRGB transform when the colorop is not in bypass.

With this change the following IGT test passes:
kms_colorop --run plane-XR30-XR30-srgb_eotf

The color pipeline now consists of a single colorop:
1. 1D curve colorop w/ sRGB EOTF

Signed-off-by: Alex Hung 
Signed-off-by: Harry Wentland 
Co-developed-by: Harry Wentland 
---
 .../gpu/drm/amd/display/amdgpu_dm/Makefile|  3 +-
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 88 +++
 .../amd/display/amdgpu_dm/amdgpu_dm_colorop.c | 58 
 .../amd/display/amdgpu_dm/amdgpu_dm_colorop.h | 34 +++
 .../amd/display/amdgpu_dm/amdgpu_dm_plane.c   | 10 +++
 5 files changed, 192 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c
 create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.h

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile 
b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
index ab2a97e354da..46158d67ab12 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
@@ -38,7 +38,8 @@ AMDGPUDM = \
amdgpu_dm_pp_smu.o \
amdgpu_dm_psr.o \
amdgpu_dm_replay.o \
-   amdgpu_dm_wb.o
+   amdgpu_dm_wb.o \
+   amdgpu_dm_colorop.o
 
 ifdef CONFIG_DRM_AMD_DC_FP
 AMDGPUDM += dc_fpu.o
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 9b527bffe11a..3ec759934669 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -668,6 +668,19 @@ amdgpu_tf_to_dc_tf(enum amdgpu_transfer_function tf)
}
 }
 
+static enum dc_transfer_func_predefined
+amdgpu_colorop_tf_to_dc_tf(enum drm_colorop_curve_1d_type tf)
+{
+   switch (tf)
+   {
+   case DRM_COLOROP_1D_CURVE_SRGB_EOTF:
+   case DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF:
+   return TRANSFER_FUNCTION_SRGB;
+   default:
+   return TRANSFER_FUNCTION_LINEAR;;
+   }
+}
+
 static void __to_dc_lut3d_color(struct dc_rgb *rgb,
const struct drm_color_lut lut,
int bit_precision)
@@ -1137,6 +1150,59 @@ __set_dm_plane_degamma(struct drm_plane_state 
*plane_state,
return 0;
 }
 
+static int
+__set_colorop_in_tf_1d_curve(struct dc_plane_state *dc_plane_state,
+  struct drm_colorop_state *colorop_state)
+{
+   struct dc_transfer_func *tf = dc_plane_state->in_transfer_func;
+   struct drm_colorop *colorop = colorop_state->colorop;
+   struct drm_device *drm = colorop->dev;
+
+   if (colorop->type != DRM_COLOROP_1D_CURVE &&
+   colorop_state->curve_1d_type != DRM_COLOROP_1D_CURVE_SRGB_EOTF)
+   return -EINVAL;
+
+   if (colorop_state->bypass) {
+   tf->type = TF_TYPE_BYPASS;
+   tf->tf = TRANSFER_FUNCTION_LINEAR;
+   return 0;
+   }
+
+   drm_dbg(drm, "Degamma colorop with ID: %d\n", colorop->base.id);
+
+   tf->type = TF_TYPE_PREDEFINED;
+   tf->tf = amdgpu_colorop_tf_to_dc_tf(colorop_state->curve_1d_type);
+
+   return 0;
+}
+
+static int
+__set_dm_plane_colorop_degamma(struct drm_plane_state *plane_state,
+  struct dc_plane_state *dc_plane_state,
+  struct drm_colorop *colorop)
+{
+   struct drm_colorop *old_colorop;
+   struct drm_colorop_state *colorop_state = NULL, *new_colorop_state;
+   struct drm_atomic_state *state = plane_state->state;
+   int i = 0;
+
+   old_colorop = colorop;
+
+   /* 1st op: 1d curve - degamma */
+   for_each_new_colorop_in_state(state, colorop, new_colorop_state, i) {
+   if (new_colorop_state->colorop == old_colorop &&
+   new_colorop_state->curve_1d_type == 
DRM_COLOROP_1D_CURVE_SRGB_EOTF) {
+   colorop_state = new_colorop_state;
+   break;
+   }
+   }
+
+   if (!colorop_state)
+   return -EINVAL;
+
+   return __set_colorop_in_tf_1d_curve(dc_plane_state, colorop_state);
+}
+
 static int
 amdgpu_dm_plane_set_color_properties(struct drm_plane_state *plane_state,
 struct dc_plane_state *dc_plane_state)
@@ -1187,6 +1253,25 @@ amdgpu_dm_plane_set_color_properties(struct 
drm_plane_state *plane_state,
return 0;
 }
 
+static int
+amdgpu_dm_plane_set_colorop_properties(struct drm_plane_state *plane_state,
+  struct dc_plane_state *dc_plane_state)
+{
+   struct drm_colorop *colorop = plane_state->color_pipeline;
+   int ret;
+
+   /* 1D Curve - DEGAM TF */
+   if (!colorop) {
+   return -EINVAL;
+   }
+
+   ret = 

[RFC PATCH v4 37/42] drm/amd/display: Add support for BT.709 and BT.2020 TFs

2024-02-26 Thread Harry Wentland
This adds support for the BT.709/BT.2020 transfer functions
on all current 1D curve plane colorops, i.e., on DEGAM, SHAPER,
and BLND blocks.

With this change the following IGT subtests pass:
kms_colorop --run plane-XR30-XR30-bt2020_inv_oetf
kms_colorop --run plane-XR30-XR30-bt2020_oetf

Signed-off-by: Harry Wentland 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c   | 11 ---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c | 10 +++---
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 20b7eb47388c..d5d356cf9fc6 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -677,6 +677,9 @@ amdgpu_colorop_tf_to_dc_tf(enum drm_colorop_curve_1d_type 
tf)
case DRM_COLOROP_1D_CURVE_SRGB_EOTF:
case DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF:
return TRANSFER_FUNCTION_SRGB;
+   case DRM_COLOROP_1D_CURVE_BT2020_INV_OETF:
+   case DRM_COLOROP_1D_CURVE_BT2020_OETF:
+   return TRANSFER_FUNCTION_BT709;
case DRM_COLOROP_1D_CURVE_PQ_125_EOTF:
case DRM_COLOROP_1D_CURVE_PQ_125_INV_EOTF:
return TRANSFER_FUNCTION_PQ;
@@ -1287,8 +1290,10 @@ __set_colorop_1d_curve_blend_tf_lut(struct 
dc_plane_state *dc_plane_state,
const struct drm_color_lut *blend_lut;
uint32_t blend_size;
 
-   if (colorop->type != DRM_COLOROP_1D_CURVE &&
-   colorop_state->curve_1d_type != DRM_COLOROP_1D_CURVE_SRGB_EOTF)
+   if (colorop->type != DRM_COLOROP_1D_CURVE)
+   return -EINVAL;
+
+   if (!(BIT(colorop_state->curve_1d_type) & amdgpu_dm_supported_blnd_tfs))
return -EINVAL;
 
if (colorop_state->bypass) {
@@ -1324,7 +1329,7 @@ __set_dm_plane_colorop_blend(struct drm_plane_state 
*plane_state,
/* 3nd op: 1d curve - blend */
for_each_new_colorop_in_state(state, colorop, new_colorop_state, i) {
if (new_colorop_state->colorop == old_colorop &&
-   new_colorop_state->curve_1d_type == 
DRM_COLOROP_1D_CURVE_SRGB_EOTF) {
+   (BIT(new_colorop_state->curve_1d_type) & 
amdgpu_dm_supported_blnd_tfs)) {
colorop_state = new_colorop_state;
break;
}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c
index f99d8e09d89b..bc66bd4f9fdd 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c
@@ -33,14 +33,18 @@
 
 const u64 amdgpu_dm_supported_degam_tfs =
BIT(DRM_COLOROP_1D_CURVE_SRGB_EOTF) |
-   BIT(DRM_COLOROP_1D_CURVE_PQ_125_EOTF);
+   BIT(DRM_COLOROP_1D_CURVE_PQ_125_EOTF) |
+   BIT(DRM_COLOROP_1D_CURVE_BT2020_INV_OETF);
 
 const u64 amdgpu_dm_supported_shaper_tfs =
BIT(DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF) |
-   BIT(DRM_COLOROP_1D_CURVE_PQ_125_INV_EOTF);
+   BIT(DRM_COLOROP_1D_CURVE_PQ_125_INV_EOTF) |
+   BIT(DRM_COLOROP_1D_CURVE_BT2020_OETF);
 
 const u64 amdgpu_dm_supported_blnd_tfs =
-   BIT(DRM_COLOROP_1D_CURVE_SRGB_EOTF);
+   BIT(DRM_COLOROP_1D_CURVE_SRGB_EOTF) |
+   BIT(DRM_COLOROP_1D_CURVE_PQ_125_EOTF) |
+   BIT(DRM_COLOROP_1D_CURVE_BT2020_INV_OETF);
 
 int amdgpu_dm_initialize_default_pipeline(struct drm_plane *plane, struct 
drm_prop_enum_list *list)
 {
-- 
2.44.0



[RFC PATCH v4 20/42] drm/colorop: Add 3x4 CTM type

2024-02-26 Thread Harry Wentland
This type is used to support a 3x4 matrix in colorops. A 3x4
matrix uses the last column as a "bias" column. Some HW exposes
support for 3x4. The calculation looks like:

 out   matrixin
 |R|   |0  1  2  3 |   | R |
 |G| = |4  5  6  7 | x | G |
 |B|   |8  9  10 11|   | B |
   |1.0|

This is also the first colorop where we need a blob property to
program the property. For that we'll introduce a new DATA
property that can be used by all colorop TYPEs requiring a
blob. The way a DATA blob is read depends on the TYPE of
the colorop.

We only create the DATA property for property types that
need it.

v4:
 - Create helper function for creating 3x4 CTM colorop
 - Fix CTM indexes in comment (Pekka)

Signed-off-by: Harry Wentland 
---
 drivers/gpu/drm/drm_atomic.c  | 14 ++-
 drivers/gpu/drm/drm_atomic_uapi.c | 29 ++
 drivers/gpu/drm/drm_colorop.c | 40 +++
 include/drm/drm_colorop.h | 19 +++
 include/uapi/drm/drm_mode.h   |  9 ++-
 5 files changed, 109 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index d82858dabf06..6e736ffa6d7c 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -793,7 +793,19 @@ static void drm_atomic_colorop_print_state(struct 
drm_printer *p,
drm_printf(p, "colorop[%u]:\n", colorop->base.id);
drm_printf(p, "\ttype=%s\n", drm_get_colorop_type_name(colorop->type));
drm_printf(p, "\tbypass=%u\n", state->bypass);
-   drm_printf(p, "\tcurve_1d_type=%s\n", 
drm_get_colorop_curve_1d_type_name(state->curve_1d_type));
+
+   switch (colorop->type) {
+   case DRM_COLOROP_1D_CURVE:
+   drm_printf(p, "\tcurve_1d_type=%s\n",
+  
drm_get_colorop_curve_1d_type_name(state->curve_1d_type));
+   break;
+   case DRM_COLOROP_CTM_3X4:
+   drm_printf(p, "\tdata blob id=%d\n", state->data ? 
state->data->base.id : 0);
+   break;
+   default:
+   break;
+   }
+
drm_printf(p, "\tnext=%d\n", colorop->next ? colorop->next->base.id : 
0);
 }
 
diff --git a/drivers/gpu/drm/drm_atomic_uapi.c 
b/drivers/gpu/drm/drm_atomic_uapi.c
index ff258b34544e..23b248987a7c 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -691,6 +691,30 @@ drm_atomic_plane_get_property(struct drm_plane *plane,
return 0;
 }
 
+static int drm_atomic_color_set_data_property(struct drm_colorop *colorop,
+   struct drm_colorop_state *state,
+   struct drm_property *property, uint64_t val)
+{
+   ssize_t elem_size = -1;
+   ssize_t size = -1;
+   bool replaced = false;
+
+   switch (colorop->type) {
+   case DRM_COLOROP_CTM_3X4:
+   size = sizeof(struct drm_color_ctm_3x4);
+   break;
+   default:
+   /* should never get here */
+   return -EINVAL;
+   }
+
+   return drm_property_replace_blob_from_id(colorop->dev,
+   >data,
+   val,
+   size,
+   elem_size,
+   );
+}
 
 static int drm_atomic_colorop_set_property(struct drm_colorop *colorop,
struct drm_colorop_state *state, struct drm_file *file_priv,
@@ -700,6 +724,9 @@ static int drm_atomic_colorop_set_property(struct 
drm_colorop *colorop,
state->bypass = val;
} else if (property == colorop->curve_1d_type_property) {
state->curve_1d_type = val;
+   } else if (property == colorop->data_property) {
+   return drm_atomic_color_set_data_property(colorop,
+   state, property, val);
} else {
drm_dbg_atomic(colorop->dev,
   "[COLOROP:%d:%d] unknown property 
[PROP:%d:%s]]\n",
@@ -722,6 +749,8 @@ drm_atomic_colorop_get_property(struct drm_colorop *colorop,
*val = state->bypass;
} else if (property == colorop->curve_1d_type_property) {
*val = state->curve_1d_type;
+   } else if (property == colorop->data_property) {
+   *val = (state->data) ? state->data->base.id : 0;
} else {
return -EINVAL;
}
diff --git a/drivers/gpu/drm/drm_colorop.c b/drivers/gpu/drm/drm_colorop.c
index 71c2286333a1..7baa1eba 100644
--- a/drivers/gpu/drm/drm_colorop.c
+++ b/drivers/gpu/drm/drm_colorop.c
@@ -34,6 +34,7 @@
 
 static const struct drm_prop_enum_list drm_colorop_type_enum_list[] = {
{ DRM_COLOROP_1D_CURVE, "1D Curve" },
+   { DRM_COLOROP_CTM_3X4, "3x4 Matrix"}
 };
 
 static const char * const colorop_curve_1d_type_names[] = {
@@ -93,6 +94,7 @@ int drm_colorop_init(struct drm_device *dev, struct 
drm_colorop 

[RFC PATCH v4 39/42] drm/amd/display: add shaper and blend colorops for 1D Curve Custom LUT

2024-02-26 Thread Harry Wentland
From: Alex Hung 

This patch adds colorops for custom 1D LUTs in the SHAPER and
BLND HW blocks.

With this change the following IGT tests pass:
kms_colorop --run plane-XR30-XR30-srgb_inv_eotf_lut
kms_colorop --run plane-XR30-XR30-srgb_inv_eotf_lut-srgb_eotf_lut

The color pipeline now consists of the following colorops:
1. 1D curve colorop
2. 1D curve colorop
3. 1D LUT
4. 1D curve colorop
5. 1D LUT

The 1D curve colorops support sRGB, BT2020, and PQ scaled to 125.0.

Signed-off-by: Alex Hung 
Signed-off-by: Harry Wentland 
---
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 170 +-
 .../amd/display/amdgpu_dm/amdgpu_dm_colorop.c |  30 
 2 files changed, 119 insertions(+), 81 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index d5d356cf9fc6..e7b51b29cc04 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -1212,40 +1212,6 @@ __set_dm_plane_colorop_degamma(struct drm_plane_state 
*plane_state,
return __set_colorop_in_tf_1d_curve(dc_plane_state, colorop_state);
 }
 
-static int
-__set_colorop_in_shaper_1d_curve(struct dc_plane_state *dc_plane_state,
-  struct drm_colorop_state *colorop_state)
-{
-   struct dc_transfer_func *tf = dc_plane_state->in_shaper_func;
-   struct drm_colorop *colorop = colorop_state->colorop;
-   struct drm_device *drm = colorop->dev;
-   const struct drm_color_lut *shaper_lut;
-   uint32_t shaper_size;
-
-   if (colorop->type != DRM_COLOROP_1D_CURVE)
-   return -EINVAL;
-
-   if (!(BIT(colorop_state->curve_1d_type) & 
amdgpu_dm_supported_shaper_tfs))
-   return -EINVAL;
-
-   if (colorop_state->bypass) {
-   tf->type = TF_TYPE_BYPASS;
-   tf->tf = TRANSFER_FUNCTION_LINEAR;
-   return 0;
-   }
-
-   drm_dbg(drm, "Shaper colorop with ID: %d\n", colorop->base.id);
-
-   if (colorop->type == DRM_COLOROP_1D_CURVE) {
-   tf->type = TF_TYPE_DISTRIBUTED_POINTS;
-   tf->tf = 
amdgpu_colorop_tf_to_dc_tf(colorop_state->curve_1d_type);
-   tf->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE;
-   return __set_output_tf(tf, shaper_lut, shaper_size, false);
-   }
-
-   return -EINVAL;
-}
-
 static int
 __set_dm_plane_colorop_shaper(struct drm_plane_state *plane_state,
  struct dc_plane_state *dc_plane_state,
@@ -1254,64 +1220,61 @@ __set_dm_plane_colorop_shaper(struct drm_plane_state 
*plane_state,
struct drm_colorop *old_colorop;
struct drm_colorop_state *colorop_state = NULL, *new_colorop_state;
struct drm_atomic_state *state = plane_state->state;
+   enum dc_transfer_func_predefined default_tf = TRANSFER_FUNCTION_LINEAR;
+   struct dc_transfer_func *tf = dc_plane_state->in_shaper_func;
+   const struct drm_color_lut *shaper_lut;
+   struct drm_device *dev = colorop->dev;
+   uint32_t shaper_size;
int i = 0;
 
+   /* 1D Curve - SHAPER TF */
old_colorop = colorop;
-
-   /* 2nd op: 1d curve - shaper */
for_each_new_colorop_in_state(state, colorop, new_colorop_state, i) {
if (new_colorop_state->colorop == old_colorop &&
(BIT(new_colorop_state->curve_1d_type) & 
amdgpu_dm_supported_shaper_tfs)) {
colorop_state = new_colorop_state;
break;
}
+   }
 
-   if (new_colorop_state->colorop == old_colorop) {
+   if (colorop_state && !colorop_state->bypass && colorop->type == 
DRM_COLOROP_1D_CURVE) {
+   drm_dbg(dev, "Shaper TF colorop with ID: %d\n", 
colorop->base.id);
+   tf->type = TF_TYPE_DISTRIBUTED_POINTS;
+   tf->tf = default_tf = 
amdgpu_colorop_tf_to_dc_tf(colorop_state->curve_1d_type);
+   tf->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE;
+   __set_output_tf(tf, shaper_lut, shaper_size, false);
+   }
+
+   /* 1D LUT - SHAPER LUT */
+   colorop = old_colorop->next;
+   if (!colorop) {
+   drm_dbg(dev, "no Shaper LUT colorop found\n");
+   return -EINVAL;
+   }
+
+   old_colorop = colorop;
+   for_each_new_colorop_in_state(state, colorop, new_colorop_state, i) {
+   if (new_colorop_state->colorop == old_colorop &&
+   new_colorop_state->colorop->type == DRM_COLOROP_1D_LUT) {
colorop_state = new_colorop_state;
break;
}
}
 
-   if (!colorop_state)
-   return -EINVAL;
-
-   return __set_colorop_in_shaper_1d_curve(dc_plane_state, colorop_state);
-}
-
-
-static int
-__set_colorop_1d_curve_blend_tf_lut(struct dc_plane_state *dc_plane_state,
- 

[RFC PATCH v4 41/42] drm/colorop: Add mutliplier type

2024-02-26 Thread Harry Wentland
From: Alex Hung 

This introduces a new drm_colorop_type: DRM_COLOROP_MULTIPLIER.

It's a simple multiplier to all pixel values. The value is
specified via a S31.32 fixed point provided via the
"MULTIPLIER" property.

Signed-off-by: Alex Hung 
---
 drivers/gpu/drm/drm_atomic.c  |  3 +++
 drivers/gpu/drm/drm_atomic_uapi.c |  4 
 drivers/gpu/drm/drm_colorop.c | 29 +++--
 include/drm/drm_colorop.h | 16 
 include/uapi/drm/drm_mode.h   |  1 +
 5 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index f7d51839ca03..af0b6338a55c 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -806,6 +806,9 @@ static void drm_atomic_colorop_print_state(struct 
drm_printer *p,
case DRM_COLOROP_CTM_3X4:
drm_printf(p, "\tdata blob id=%d\n", state->data ? 
state->data->base.id : 0);
break;
+   case DRM_COLOROP_MULTIPLIER:
+   drm_printf(p, "\tmultiplier=%u\n", state->multiplier);
+   break;
default:
break;
}
diff --git a/drivers/gpu/drm/drm_atomic_uapi.c 
b/drivers/gpu/drm/drm_atomic_uapi.c
index 6bfe857720cd..b4ecda563728 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -727,6 +727,8 @@ static int drm_atomic_colorop_set_property(struct 
drm_colorop *colorop,
state->bypass = val;
} else if (property == colorop->curve_1d_type_property) {
state->curve_1d_type = val;
+   } else if (property == colorop->multiplier_property) {
+   state->multiplier = val;
} else if (property == colorop->data_property) {
return drm_atomic_color_set_data_property(colorop,
state, property, val);
@@ -752,6 +754,8 @@ drm_atomic_colorop_get_property(struct drm_colorop *colorop,
*val = state->bypass;
} else if (property == colorop->curve_1d_type_property) {
*val = state->curve_1d_type;
+   } else if (property == colorop->multiplier_property) {
+   *val = state->multiplier;
} else if (property == colorop->size_property) {
*val = state->size;
} else if (property == colorop->data_property) {
diff --git a/drivers/gpu/drm/drm_colorop.c b/drivers/gpu/drm/drm_colorop.c
index 4452eaeeb242..c6cdd743de51 100644
--- a/drivers/gpu/drm/drm_colorop.c
+++ b/drivers/gpu/drm/drm_colorop.c
@@ -35,7 +35,8 @@
 static const struct drm_prop_enum_list drm_colorop_type_enum_list[] = {
{ DRM_COLOROP_1D_CURVE, "1D Curve" },
{ DRM_COLOROP_1D_LUT, "1D Curve Custom LUT" },
-   { DRM_COLOROP_CTM_3X4, "3x4 Matrix"}
+   { DRM_COLOROP_CTM_3X4, "3x4 Matrix"},
+   { DRM_COLOROP_MULTIPLIER, "Multiplier"},
 };
 
 static const char * const colorop_curve_1d_type_names[] = {
@@ -231,6 +232,29 @@ int drm_colorop_ctm_3x4_init(struct drm_device *dev, 
struct drm_colorop *colorop
 }
 EXPORT_SYMBOL(drm_colorop_ctm_3x4_init);
 
+int drm_colorop_mult_init(struct drm_device *dev, struct drm_colorop *colorop,
+ struct drm_plane *plane)
+{
+   struct drm_property *prop;
+   int ret;
+
+   ret = drm_colorop_init(dev, colorop, plane, DRM_COLOROP_MULTIPLIER);
+   if (ret)
+   return ret;
+
+   prop = drm_property_create_range(dev, DRM_MODE_PROP_ATOMIC, 
"MULTIPLIER", 0, U64_MAX);
+   if (!prop)
+   return -ENOMEM;
+
+   colorop->multiplier_property = prop;
+   drm_object_attach_property(>base, 
colorop->multiplier_property, 0);
+
+   drm_colorop_reset(colorop);
+
+   return 0;
+}
+EXPORT_SYMBOL(drm_colorop_mult_init);
+
 static void __drm_atomic_helper_colorop_duplicate_state(struct drm_colorop 
*colorop,
struct 
drm_colorop_state *state)
 {
@@ -333,7 +357,8 @@ EXPORT_SYMBOL(drm_colorop_reset);
 static const char * const colorop_type_name[] = {
[DRM_COLOROP_1D_CURVE] = "1D Curve",
[DRM_COLOROP_1D_LUT] = "1D Curve Custom LUT",
-   [DRM_COLOROP_CTM_3X4] = "3x4 Matrix"
+   [DRM_COLOROP_CTM_3X4] = "3x4 Matrix",
+   [DRM_COLOROP_MULTIPLIER] = "Multiplier",
 };
 
 /**
diff --git a/include/drm/drm_colorop.h b/include/drm/drm_colorop.h
index 8adc7ece3bd1..f9f83644cc9f 100644
--- a/include/drm/drm_colorop.h
+++ b/include/drm/drm_colorop.h
@@ -64,6 +64,13 @@ struct drm_colorop_state {
 */
enum drm_colorop_curve_1d_type curve_1d_type;
 
+   /**
+* @multiplier:
+*
+* Multiplier to 'gain' the plane. Format is S31.32 sign-magnitude.
+*/
+   uint64_t multiplier;
+
/**
 * @size:
 *
@@ -186,6 +193,13 @@ struct drm_colorop {
 */
struct drm_property *curve_1d_type_property;
 
+   /**
+* @multiplier_property:
+*

[RFC PATCH v4 36/42] drm/colorop: add BT2020/BT709 OETF and Inverse OETF

2024-02-26 Thread Harry Wentland
The BT.709 and BT.2020 OETFs are the same, the only difference
being that the BT.2020 variant is defined with more precision
for 10 and 12-bit per color encodings.

Both are used as encoding functions for video content, and are
therefore defined as OETF (opto-electronic transfer function)
instead of as EOTF (electro-optical transfer function).

Signed-off-by: Harry Wentland 
---
 drivers/gpu/drm/drm_colorop.c | 2 ++
 include/drm/drm_colorop.h | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/drm_colorop.c b/drivers/gpu/drm/drm_colorop.c
index 781947e42b02..b10cad5a7208 100644
--- a/drivers/gpu/drm/drm_colorop.c
+++ b/drivers/gpu/drm/drm_colorop.c
@@ -40,6 +40,8 @@ static const struct drm_prop_enum_list 
drm_colorop_type_enum_list[] = {
 static const char * const colorop_curve_1d_type_names[] = {
[DRM_COLOROP_1D_CURVE_SRGB_EOTF] = "sRGB EOTF",
[DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF] = "sRGB Inverse EOTF",
+   [DRM_COLOROP_1D_CURVE_BT2020_INV_OETF] = "BT.2020 Inverse OETF",
+   [DRM_COLOROP_1D_CURVE_BT2020_OETF] = "BT.2020 OETF",
[DRM_COLOROP_1D_CURVE_PQ_125_EOTF] = "PQ 125 EOTF",
[DRM_COLOROP_1D_CURVE_PQ_125_INV_EOTF] = "PQ 125 Inverse EOTF",
 };
diff --git a/include/drm/drm_colorop.h b/include/drm/drm_colorop.h
index e06d9ea28efd..28b3136dabad 100644
--- a/include/drm/drm_colorop.h
+++ b/include/drm/drm_colorop.h
@@ -33,6 +33,8 @@
 enum drm_colorop_curve_1d_type {
DRM_COLOROP_1D_CURVE_SRGB_EOTF,
DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF,
+   DRM_COLOROP_1D_CURVE_BT2020_INV_OETF,
+   DRM_COLOROP_1D_CURVE_BT2020_OETF,
DRM_COLOROP_1D_CURVE_PQ_125_EOTF,
DRM_COLOROP_1D_CURVE_PQ_125_INV_EOTF,
DRM_COLOROP_1D_CURVE_COUNT
-- 
2.44.0



[RFC PATCH v4 19/42] drm/colorop: Introduce DRM_CLIENT_CAP_PLANE_COLOR_PIPELINE

2024-02-26 Thread Harry Wentland
With the introduction of the pre-blending color pipeline we
can no longer have color operations that don't have a clear
position in the color pipeline. We deprecate all existing
plane properties. For upstream drivers those are:
 - COLOR_ENCODING
 - COLOR_RANGE

Drivers are expected to ignore these properties when
programming the HW.

Setting of the COLOR_PIPELINE plane property or drm_colorop
properties is only allowed for userspace that sets this
client cap.

v4:
 - Don't block setting of COLOR_RANGE and COLOR_ENCODING
   when client cap is set

Signed-off-by: Harry Wentland 
---
 drivers/gpu/drm/drm_atomic_uapi.c | 13 -
 drivers/gpu/drm/drm_ioctl.c   |  7 +++
 include/drm/drm_file.h|  7 +++
 include/uapi/drm/drm.h| 16 
 4 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_atomic_uapi.c 
b/drivers/gpu/drm/drm_atomic_uapi.c
index 86f77a9aa3a8..ff258b34544e 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -570,6 +570,12 @@ static int drm_atomic_plane_set_property(struct drm_plane 
*plane,
} else if (property == plane->color_range_property) {
state->color_range = val;
} else if (property == plane->color_pipeline_property) {
+   if (!file_priv->plane_color_pipeline) {
+   drm_dbg_atomic(dev,
+  "Setting COLOR_PIPELINE plane property 
not permitted unless DRM_CLIENT_CAP_PLANE_COLOR_PIPELINE is set\n");
+   return -EINVAL;
+   }
+
/* find DRM colorop object */
struct drm_colorop *colorop = NULL;
colorop = drm_colorop_find(dev, file_priv, val);
@@ -1179,6 +1185,12 @@ int drm_atomic_set_property(struct drm_atomic_state 
*state,
break;
}
case DRM_MODE_OBJECT_COLOROP: {
+   if (!file_priv->plane_color_pipeline) {
+   drm_dbg_atomic(prop->dev,
+  "[OBJECT:%d] is a colorop but 
DRM_CLIENT_CAP_PLANE_COLOR_PIPELINE not set\n",
+  obj->id);
+   ret = -EINVAL;
+   }
struct drm_colorop *colorop = obj_to_colorop(obj);
struct drm_colorop_state *colorop_state;
 
@@ -1191,7 +1203,6 @@ int drm_atomic_set_property(struct drm_atomic_state 
*state,
ret = drm_atomic_colorop_set_property(colorop,
colorop_state, file_priv,
prop, prop_value);
-
break;
}
default:
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index e368fc084c77..da59e37ae228 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -373,6 +373,13 @@ drm_setclientcap(struct drm_device *dev, void *data, 
struct drm_file *file_priv)
return -EINVAL;
file_priv->supports_virtualized_cursor_plane = req->value;
break;
+   case DRM_CLIENT_CAP_PLANE_COLOR_PIPELINE:
+   if (!file_priv->atomic)
+   return -EINVAL;
+   if (req->value > 1)
+   return -EINVAL;
+   file_priv->plane_color_pipeline = req->value;
+   break;
default:
return -EINVAL;
}
diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h
index ab230d3af138..63c1d29b8520 100644
--- a/include/drm/drm_file.h
+++ b/include/drm/drm_file.h
@@ -204,6 +204,13 @@ struct drm_file {
 */
bool writeback_connectors;
 
+   /**
+* @plane_color_pipeline:
+*
+* True if client understands plane color pipelines
+*/
+   bool plane_color_pipeline;
+
/**
 * @was_master:
 *
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 16122819edfe..2d74c49274ee 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -875,6 +875,22 @@ struct drm_get_cap {
  */
 #define DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT6
 
+/**
+ * DRM_CLIENT_CAP_PLANE_COLOR_PIPELINE
+ *
+ * If set to 1
+ * - the DRM core will allow setting of plane the COLOR_PIPELINE
+ *   property, as well as drm_colorop properties.
+ * - Drivers will ignore these properties
+ *   - COLOR_ENCODING
+ *   - COLOR_RANGE
+ *
+ * The client must enable _CLIENT_CAP_ATOMIC first.
+ *
+ * This capability is currently in development.
+ */
+#define DRM_CLIENT_CAP_PLANE_COLOR_PIPELINE7
+
 /* DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */
 struct drm_set_client_cap {
__u64 capability;
-- 
2.44.0



[RFC PATCH v4 34/42] drm/colorop: Add PQ 125 EOTF and its inverse

2024-02-26 Thread Harry Wentland
The PQ function defines a mapping of code values to nits (cd/m^2).
The max code value maps to 10,000 nits.

Windows DWM's canonical composition color space (CCCS)  defaults
to composing SDR contents to 80 nits and uses a float value of
1.0 to represent this. For this reason AMD HW hard-codes a PQ
function that is scaled by 125, yielding 80 nit PQ values for
1.0 and 10,000 nits at 125.0.

This patch introduces this scaled PQ EOTF and its inverse as
1D curve types.

Signed-off-by: Harry Wentland 
---
 drivers/gpu/drm/drm_colorop.c | 2 ++
 include/drm/drm_colorop.h | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/drm_colorop.c b/drivers/gpu/drm/drm_colorop.c
index 7baa1eba..781947e42b02 100644
--- a/drivers/gpu/drm/drm_colorop.c
+++ b/drivers/gpu/drm/drm_colorop.c
@@ -40,6 +40,8 @@ static const struct drm_prop_enum_list 
drm_colorop_type_enum_list[] = {
 static const char * const colorop_curve_1d_type_names[] = {
[DRM_COLOROP_1D_CURVE_SRGB_EOTF] = "sRGB EOTF",
[DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF] = "sRGB Inverse EOTF",
+   [DRM_COLOROP_1D_CURVE_PQ_125_EOTF] = "PQ 125 EOTF",
+   [DRM_COLOROP_1D_CURVE_PQ_125_INV_EOTF] = "PQ 125 Inverse EOTF",
 };
 
 
diff --git a/include/drm/drm_colorop.h b/include/drm/drm_colorop.h
index 8710e550790c..e06d9ea28efd 100644
--- a/include/drm/drm_colorop.h
+++ b/include/drm/drm_colorop.h
@@ -33,6 +33,8 @@
 enum drm_colorop_curve_1d_type {
DRM_COLOROP_1D_CURVE_SRGB_EOTF,
DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF,
+   DRM_COLOROP_1D_CURVE_PQ_125_EOTF,
+   DRM_COLOROP_1D_CURVE_PQ_125_INV_EOTF,
DRM_COLOROP_1D_CURVE_COUNT
 };
 
-- 
2.44.0



[RFC PATCH v4 21/42] drm/vkms: Pull apply_colorop out of pre_blend_color_transform

2024-02-26 Thread Harry Wentland
The if/switch statement is bound to grow with more types and
subtypes. Pull this out into its own funcion to make things more
manageable and readable.

Signed-off-by: Harry Wentland 
---
 drivers/gpu/drm/vkms/vkms_composer.c | 48 
 1 file changed, 28 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/vkms/vkms_composer.c 
b/drivers/gpu/drm/vkms/vkms_composer.c
index 9493bdb1ba3f..25b786b49db0 100644
--- a/drivers/gpu/drm/vkms/vkms_composer.c
+++ b/drivers/gpu/drm/vkms/vkms_composer.c
@@ -164,6 +164,31 @@ static void apply_lut(const struct vkms_crtc_state 
*crtc_state, struct line_buff
}
 }
 
+static void apply_colorop(struct pixel_argb_u16 *pixel, struct drm_colorop 
*colorop)
+{
+   /* TODO is this right? */
+   struct drm_colorop_state *colorop_state = colorop->state;
+
+   if (colorop->type == DRM_COLOROP_1D_CURVE) {
+   switch (colorop_state->curve_1d_type) {
+   case DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF:
+   pixel->r = 
apply_lut_to_channel_value(_inv_eotf, pixel->r, LUT_RED);
+   pixel->g = 
apply_lut_to_channel_value(_inv_eotf, pixel->g, LUT_GREEN);
+   pixel->b = 
apply_lut_to_channel_value(_inv_eotf, pixel->b, LUT_BLUE);
+   break;
+   case DRM_COLOROP_1D_CURVE_SRGB_EOTF:
+   pixel->r = 
apply_lut_to_channel_value(_eotf, pixel->r, LUT_RED);
+   pixel->g = 
apply_lut_to_channel_value(_eotf, pixel->g, LUT_GREEN);
+   pixel->b = 
apply_lut_to_channel_value(_eotf, pixel->b, LUT_BLUE);
+   break;
+   default:
+   DRM_DEBUG_DRIVER("unkown colorop 1D curve type 
%d\n", colorop_state->curve_1d_type);
+   break;
+   }
+   }
+
+}
+
 static void pre_blend_color_transform(const struct vkms_plane_state 
*plane_state, struct line_buffer *output_buffer)
 {
struct drm_colorop *colorop = plane_state->base.base.color_pipeline;
@@ -180,26 +205,9 @@ static void pre_blend_color_transform(const struct 
vkms_plane_state *plane_state
if (!colorop_state)
return;
 
-   for (size_t x = 0; x < output_buffer->n_pixels; x++) {
-   struct pixel_argb_u16 *pixel = 
_buffer->pixels[x];
-
-   if (colorop->type == DRM_COLOROP_1D_CURVE &&
-   colorop_state->bypass == false) {
-   switch (colorop_state->curve_1d_type) {
-   case DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF:
-   pixel->r = 
apply_lut_to_channel_value(_inv_eotf, pixel->r, LUT_RED);
-   pixel->g = 
apply_lut_to_channel_value(_inv_eotf, pixel->g, LUT_GREEN);
-   pixel->b = 
apply_lut_to_channel_value(_inv_eotf, pixel->b, LUT_BLUE);
-   break;
-   case DRM_COLOROP_1D_CURVE_SRGB_EOTF:
-   default:
-   pixel->r = 
apply_lut_to_channel_value(_eotf, pixel->r, LUT_RED);
-   pixel->g = 
apply_lut_to_channel_value(_eotf, pixel->g, LUT_GREEN);
-   pixel->b = 
apply_lut_to_channel_value(_eotf, pixel->b, LUT_BLUE);
-   break;
-   }
-   }
-   }
+   for (size_t x = 0; x < output_buffer->n_pixels; x++)
+   if (!colorop_state->bypass)
+   apply_colorop(_buffer->pixels[x], 
colorop);
 
colorop = colorop->next;
}
-- 
2.44.0



[RFC PATCH v4 35/42] drm/amd/display: Enable support for PQ 125 EOTF and Inverse

2024-02-26 Thread Harry Wentland
This patchset enables support for the PQ_125 EOTF and its inverse
on all existing plane 1D curve colorops, i.e., on DEGAM, SHAPER,
and BLND blocks.

With this patchset the following IGT subtests are passing:
kms_colorop --run plane-XR30-XR30-pq_125_eotf
kms_colorop --run plane-XR30-XR30-pq_125_inv_eotf
kms_colorop --run plane-XR30-XR30-pq_125_eotf-pq_125_inv_eotf
kms_colorop --run plane-XR30-XR30-pq_125_eotf-pq_125_inv_eotf-pq_125_eotf

Signed-off-by: Harry Wentland 
---
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c   | 20 +--
 .../amd/display/amdgpu_dm/amdgpu_dm_colorop.c |  8 +---
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 3e3ae2b58b06..20b7eb47388c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -25,6 +25,7 @@
 #include "amdgpu.h"
 #include "amdgpu_mode.h"
 #include "amdgpu_dm.h"
+#include "amdgpu_dm_colorop.h"
 #include "dc.h"
 #include "modules/color/color_gamma.h"
 #include "basics/conversion.h"
@@ -676,6 +677,9 @@ amdgpu_colorop_tf_to_dc_tf(enum drm_colorop_curve_1d_type 
tf)
case DRM_COLOROP_1D_CURVE_SRGB_EOTF:
case DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF:
return TRANSFER_FUNCTION_SRGB;
+   case DRM_COLOROP_1D_CURVE_PQ_125_EOTF:
+   case DRM_COLOROP_1D_CURVE_PQ_125_INV_EOTF:
+   return TRANSFER_FUNCTION_PQ;
default:
return TRANSFER_FUNCTION_LINEAR;;
}
@@ -1158,8 +1162,10 @@ __set_colorop_in_tf_1d_curve(struct dc_plane_state 
*dc_plane_state,
struct drm_colorop *colorop = colorop_state->colorop;
struct drm_device *drm = colorop->dev;
 
-   if (colorop->type != DRM_COLOROP_1D_CURVE &&
-   colorop_state->curve_1d_type != DRM_COLOROP_1D_CURVE_SRGB_EOTF)
+   if (colorop->type != DRM_COLOROP_1D_CURVE)
+   return -EINVAL;
+
+   if (!(BIT(colorop_state->curve_1d_type) & 
amdgpu_dm_supported_degam_tfs))
return -EINVAL;
 
if (colorop_state->bypass) {
@@ -1191,7 +1197,7 @@ __set_dm_plane_colorop_degamma(struct drm_plane_state 
*plane_state,
/* 1st op: 1d curve - degamma */
for_each_new_colorop_in_state(state, colorop, new_colorop_state, i) {
if (new_colorop_state->colorop == old_colorop &&
-   new_colorop_state->curve_1d_type == 
DRM_COLOROP_1D_CURVE_SRGB_EOTF) {
+   (BIT(new_colorop_state->curve_1d_type) & 
amdgpu_dm_supported_degam_tfs)) {
colorop_state = new_colorop_state;
break;
}
@@ -1213,8 +1219,10 @@ __set_colorop_in_shaper_1d_curve(struct dc_plane_state 
*dc_plane_state,
const struct drm_color_lut *shaper_lut;
uint32_t shaper_size;
 
-   if (colorop->type != DRM_COLOROP_1D_CURVE &&
-   colorop_state->curve_1d_type != DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF)
+   if (colorop->type != DRM_COLOROP_1D_CURVE)
+   return -EINVAL;
+
+   if (!(BIT(colorop_state->curve_1d_type) & 
amdgpu_dm_supported_shaper_tfs))
return -EINVAL;
 
if (colorop_state->bypass) {
@@ -1250,7 +1258,7 @@ __set_dm_plane_colorop_shaper(struct drm_plane_state 
*plane_state,
/* 2nd op: 1d curve - shaper */
for_each_new_colorop_in_state(state, colorop, new_colorop_state, i) {
if (new_colorop_state->colorop == old_colorop &&
-   new_colorop_state->curve_1d_type == 
DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF) {
+   (BIT(new_colorop_state->curve_1d_type) & 
amdgpu_dm_supported_shaper_tfs)) {
colorop_state = new_colorop_state;
break;
}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c
index 449a2ad6a184..f99d8e09d89b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c
@@ -32,10 +32,12 @@
 #include "amdgpu_dm_colorop.h"
 
 const u64 amdgpu_dm_supported_degam_tfs =
-   BIT(DRM_COLOROP_1D_CURVE_SRGB_EOTF);
+   BIT(DRM_COLOROP_1D_CURVE_SRGB_EOTF) |
+   BIT(DRM_COLOROP_1D_CURVE_PQ_125_EOTF);
 
- const u64 amdgpu_dm_supported_shaper_tfs =
-   BIT(DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF);
+const u64 amdgpu_dm_supported_shaper_tfs =
+   BIT(DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF) |
+   BIT(DRM_COLOROP_1D_CURVE_PQ_125_INV_EOTF);
 
 const u64 amdgpu_dm_supported_blnd_tfs =
BIT(DRM_COLOROP_1D_CURVE_SRGB_EOTF);
-- 
2.44.0



[RFC PATCH v4 15/42] drm/plane: Add COLOR PIPELINE property

2024-02-26 Thread Harry Wentland
We're adding a new enum COLOR PIPELINE property. This
property will have entries for each COLOR PIPELINE by
referencing the DRM object ID of the first drm_colorop
of the pipeline. 0 disables the entire COLOR PIPELINE.

Userspace can use this to discover the available color
pipelines, as well as set the desired one. The color
pipelines are programmed via properties on the actual
drm_colorop objects.

v4:
 - Add pipeline property creation helper (Pekka)
 - Fix function comment for
   drm_atomic_set_colorop_for_plane (Pekka)
 - Always create Bypass pipeline (Pekka)
 - Add missing function declaration (Chaitanya Kumar Borah)

Signed-off-by: Harry Wentland 
---
 drivers/gpu/drm/drm_atomic.c  | 46 
 drivers/gpu/drm/drm_atomic_state_helper.c |  5 +++
 drivers/gpu/drm/drm_atomic_uapi.c | 42 ++
 drivers/gpu/drm/drm_plane.c   | 52 +++
 include/drm/drm_atomic.h  |  3 ++
 include/drm/drm_atomic_uapi.h |  2 +
 include/drm/drm_plane.h   | 11 +
 7 files changed, 161 insertions(+)

diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 3645c36d63b3..27a8805c5fa1 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -1458,6 +1458,52 @@ drm_atomic_add_affected_planes(struct drm_atomic_state 
*state,
 }
 EXPORT_SYMBOL(drm_atomic_add_affected_planes);
 
+/**
+ * drm_atomic_add_affected_colorops - add colorops for plane
+ * @state: atomic state
+ * @plane: DRM plane
+ *
+ * This function walks the current configuration and adds all colorops
+ * currently used by @plane to the atomic configuration @state. This is useful
+ * when an atomic commit also needs to check all currently enabled colorop on
+ * @plane, e.g. when changing the mode. It's also useful when re-enabling a 
plane
+ * to avoid special code to force-enable all colorops.
+ *
+ * Since acquiring a colorop state will always also acquire the w/w mutex of 
the
+ * current plane for that colorop (if there is any) adding all the colorop 
states for
+ * a plane will not reduce parallelism of atomic updates.
+ *
+ * Returns:
+ * 0 on success or can fail with -EDEADLK or -ENOMEM. When the error is EDEADLK
+ * then the w/w mutex code has detected a deadlock and the entire atomic
+ * sequence must be restarted. All other errors are fatal.
+ */
+int
+drm_atomic_add_affected_colorops(struct drm_atomic_state *state,
+struct drm_plane *plane)
+{
+   struct drm_colorop *colorop;
+   struct drm_colorop_state *colorop_state;
+
+   WARN_ON(!drm_atomic_get_new_plane_state(state, plane));
+
+   drm_dbg_atomic(plane->dev,
+  "Adding all current colorops for [plane:%d:%s] to %p\n",
+  plane->base.id, plane->name, state);
+
+   drm_for_each_colorop(colorop, plane->dev) {
+   if (colorop->plane != plane)
+   continue;
+
+   colorop_state = drm_atomic_get_colorop_state(state, colorop);
+   if (IS_ERR(colorop_state))
+   return PTR_ERR(colorop_state);
+   }
+
+   return 0;
+}
+EXPORT_SYMBOL(drm_atomic_add_affected_colorops);
+
 /**
  * drm_atomic_check_only - check whether a given config would work
  * @state: atomic configuration to check
diff --git a/drivers/gpu/drm/drm_atomic_state_helper.c 
b/drivers/gpu/drm/drm_atomic_state_helper.c
index 519228eb1095..d1dd082b1286 100644
--- a/drivers/gpu/drm/drm_atomic_state_helper.c
+++ b/drivers/gpu/drm/drm_atomic_state_helper.c
@@ -267,6 +267,11 @@ void __drm_atomic_helper_plane_state_reset(struct 
drm_plane_state *plane_state,
plane_state->color_range = val;
}
 
+   if (plane->color_pipeline_property) {
+   /* default is always NULL, i.e., bypass */
+   plane_state->color_pipeline = NULL;
+   }
+
if (plane->zpos_property) {
if (!drm_object_property_get_default_value(>base,
   plane->zpos_property,
diff --git a/drivers/gpu/drm/drm_atomic_uapi.c 
b/drivers/gpu/drm/drm_atomic_uapi.c
index 87f00131be11..86f77a9aa3a8 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -256,6 +256,36 @@ drm_atomic_set_fb_for_plane(struct drm_plane_state 
*plane_state,
 }
 EXPORT_SYMBOL(drm_atomic_set_fb_for_plane);
 
+
+/**
+ * drm_atomic_set_colorop_for_plane - set colorop for plane
+ * @plane_state: atomic state object for the plane
+ * @colorop: colorop to use for the plane
+ *
+ * Helper function to select the color pipeline on a plane by setting
+ * it to the first drm_colorop element of the pipeline.
+ */
+void
+drm_atomic_set_colorop_for_plane(struct drm_plane_state *plane_state,
+struct drm_colorop *colorop)
+{
+   struct drm_plane *plane = plane_state->plane;
+
+   if (colorop)
+

[RFC PATCH v4 25/42] drm/vkms: Add tests for CTM handling

2024-02-26 Thread Harry Wentland
A whole slew of tests for CTM handling that greatly helped in
debugging the CTM code. The extent of tests might seem a bit
silly but they're fast and might someday help save someone
else's day when debugging this.

v4:
 - Comment on origin of bt709_enc matrix (Pekka)
 - Use full opaque alpha (Pekka)
 - Add additional check for Y < 0x (Pekka)
 - Remove unused code (Pekka)
 - Rename red, green, blue to Y, U, V where applicable

Signed-off-by: Harry Wentland 
---
 drivers/gpu/drm/vkms/tests/vkms_color_tests.c | 251 ++
 drivers/gpu/drm/vkms/vkms_composer.c  |   2 +-
 2 files changed, 252 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vkms/tests/vkms_color_tests.c 
b/drivers/gpu/drm/vkms/tests/vkms_color_tests.c
index e6ac01dee830..83d07f7bae37 100644
--- a/drivers/gpu/drm/vkms/tests/vkms_color_tests.c
+++ b/drivers/gpu/drm/vkms/tests/vkms_color_tests.c
@@ -3,6 +3,7 @@
 #include 
 
 #include 
+#include 
 
 #define TEST_LUT_SIZE 16
 
@@ -181,11 +182,261 @@ static void vkms_color_srgb_inv_srgb(struct kunit *test)
}
 }
 
+#define FIXPT_HALF(DRM_FIXED_ONE >> 1)
+#define FIXPT_QUARTER (DRM_FIXED_ONE >> 2)
+
+const struct drm_color_ctm_3x4 test_matrix_3x4_50_desat = { {
+   FIXPT_HALF, FIXPT_QUARTER, FIXPT_QUARTER, 0,
+   FIXPT_QUARTER, FIXPT_HALF, FIXPT_QUARTER, 0,
+   FIXPT_QUARTER, FIXPT_QUARTER, FIXPT_HALF, 0
+} };
+
+static void vkms_color_ctm_3x4_50_desat(struct kunit *test)
+{
+   struct pixel_argb_s32 ref, out;
+
+   /* full white */
+   ref.a = 0x;
+   ref.r = 0x;
+   ref.g = 0x;
+   ref.b = 0x;
+
+   memcpy(, , sizeof(out));
+   apply_3x4_matrix(, _matrix_3x4_50_desat);
+
+   KUNIT_EXPECT_MEMEQ(test, , , sizeof(out));
+
+   /* full black */
+   ref.a = 0x;
+   ref.r = 0x0;
+   ref.g = 0x0;
+   ref.b = 0x0;
+
+   memcpy(, , sizeof(out));
+   apply_3x4_matrix(, _matrix_3x4_50_desat);
+
+   KUNIT_EXPECT_MEMEQ(test, , , sizeof(out));
+
+   /* 50% grey */
+   ref.a = 0x;
+   ref.r = 0x8000;
+   ref.g = 0x8000;
+   ref.b = 0x8000;
+
+   memcpy(, , sizeof(out));
+   apply_3x4_matrix(, _matrix_3x4_50_desat);
+
+   KUNIT_EXPECT_MEMEQ(test, , , sizeof(out));
+
+   /* full red to 50% desat */
+   ref.a = 0x;
+   ref.r = 0x7fff;
+   ref.g = 0x3fff;
+   ref.b = 0x3fff;
+
+   out.a = 0x;
+   out.r = 0x;
+   out.g = 0x0;
+   out.b = 0x0;
+
+   apply_3x4_matrix(, _matrix_3x4_50_desat);
+
+   KUNIT_EXPECT_MEMEQ(test, , , sizeof(out));
+}
+
+/*
+ * BT.709 encoding matrix
+ *
+ * Values printed from within IGT when converting
+ * igt_matrix_3x4_bt709_enc to the fixed-point format expected
+ * by DRM/KMS.
+ */
+const struct drm_color_ctm_3x4 test_matrix_3x4_bt709_enc = { {
+   0x366cf400ull, 0xb7175900ull, 0x000127bb300ull, 0,
+   0x80001993b3a0ull, 0x80005609fe80ull, 0x6f9db200ull, 0,
+   0x9d70a400ull, 0x80008f011100ull, 0x8e6f9330ull, 0
+} };
+
+static void vkms_color_ctm_3x4_bt709(struct kunit *test)
+{
+   struct pixel_argb_s32 out;
+
+   /* full white to bt709 */
+   out.a = 0x;
+   out.r = 0x;
+   out.g = 0x;
+   out.b = 0x;
+
+   apply_3x4_matrix(, _matrix_3x4_bt709_enc);
+
+   /* Y 255 */
+   KUNIT_EXPECT_GT(test, out.r, 0xfe00);
+   KUNIT_EXPECT_LT(test, out.r, 0x1);
+
+   /* U 0 */
+   KUNIT_EXPECT_LT(test, out.g, 0x0100);
+
+   /* V 0 */
+   KUNIT_EXPECT_LT(test, out.b, 0x0100);
+
+   /* full black to bt709 */
+   out.a = 0x;
+   out.r = 0x0;
+   out.g = 0x0;
+   out.b = 0x0;
+
+   apply_3x4_matrix(, _matrix_3x4_bt709_enc);
+
+   /* Y 0 */
+   KUNIT_EXPECT_LT(test, out.r, 0x100);
+
+   /* U 0 */
+   KUNIT_EXPECT_LT(test, out.g, 0x0100);
+
+   /* V 0 */
+   KUNIT_EXPECT_LT(test, out.b, 0x0100);
+
+   /* gray to bt709 */
+   out.a = 0x;
+   out.r = 0x7fff;
+   out.g = 0x7fff;
+   out.b = 0x7fff;
+
+   apply_3x4_matrix(, _matrix_3x4_bt709_enc);
+
+   /* Y 127 */
+   KUNIT_EXPECT_GT(test, out.r, 0x7e00);
+   KUNIT_EXPECT_LT(test, out.r, 0x8000);
+
+   /* U 0 */
+   KUNIT_EXPECT_LT(test, out.g, 0x0100);
+
+   /* V 0 */
+   KUNIT_EXPECT_LT(test, out.b, 0x0100);
+
+   /* == red 255 - bt709 enc == */
+   out.a = 0x;
+   out.r = 0x;
+   out.g = 0x0;
+   out.b = 0x0;
+
+   apply_3x4_matrix(, _matrix_3x4_bt709_enc);
+
+   /* Y 54 */
+   KUNIT_EXPECT_GT(test, out.r, 0x3500);
+   KUNIT_EXPECT_LT(test, out.r, 0x3700);
+
+   /* U 0 */
+   KUNIT_EXPECT_LT(test, out.g, 0x0100);
+
+   /* V 157 */
+   KUNIT_EXPECT_GT(test, out.b, 0x9C00);
+   KUNIT_EXPECT_LT(test, out.b, 0x9E00);
+
+
+   /* == green 255 - bt709 enc == */
+   out.a = 0x;
+   out.r = 0x0;
+  

[RFC PATCH v4 24/42] drm/tests: Add a few tests around drm_fixed.h

2024-02-26 Thread Harry Wentland
While working on the CTM implementation of VKMS I had to ascertain
myself of a few assumptions. One of those is whether drm_fixed.h
treats its numbers using signed-magnitude or twos-complement. It is
twos-complement.

In order to make someone else's day easier I am adding the
drm_test_int2fixp test that validates the above assumption.

I am also adding a test for the new sm2fixp function that converts
from a signed-magnitude fixed point to the twos-complement fixed
point.

Signed-off-by: Harry Wentland 
---
 drivers/gpu/drm/tests/Makefile|  3 +-
 drivers/gpu/drm/tests/drm_fixp_test.c | 69 +++
 2 files changed, 71 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/tests/drm_fixp_test.c

diff --git a/drivers/gpu/drm/tests/Makefile b/drivers/gpu/drm/tests/Makefile
index d6183b3d7688..98468f7908dd 100644
--- a/drivers/gpu/drm/tests/Makefile
+++ b/drivers/gpu/drm/tests/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_DRM_KUNIT_TEST) += \
drm_modes_test.o \
drm_plane_helper_test.o \
drm_probe_helper_test.o \
-   drm_rect_test.o
+   drm_rect_test.o \
+   drm_fixp_test.o
 
 CFLAGS_drm_mm_test.o := $(DISABLE_STRUCTLEAK_PLUGIN)
diff --git a/drivers/gpu/drm/tests/drm_fixp_test.c 
b/drivers/gpu/drm/tests/drm_fixp_test.c
new file mode 100644
index ..f420f173ff66
--- /dev/null
+++ b/drivers/gpu/drm/tests/drm_fixp_test.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ */
+
+#include 
+#include 
+
+static void drm_test_sm2fixp(struct kunit *test)
+{
+   KUNIT_EXPECT_EQ(test, 0x7fffll, ((1LL << 63) - 1));
+
+   /* 1 */
+   KUNIT_EXPECT_EQ(test, drm_int2fixp(1), drm_sm2fixp(1ull << 
DRM_FIXED_POINT));
+
+   /* -1 */
+   KUNIT_EXPECT_EQ(test, drm_int2fixp(-1), drm_sm2fixp((1ull << 63) | 
(1ull << DRM_FIXED_POINT)));
+
+   /* 0.5 */
+   KUNIT_EXPECT_EQ(test, drm_fixp_from_fraction(1, 2), drm_sm2fixp(1ull << 
(DRM_FIXED_POINT - 1)));
+
+   /* -0.5 */
+   KUNIT_EXPECT_EQ(test, drm_fixp_from_fraction(-1, 2), drm_sm2fixp((1ull 
<< 63) | (1ull << (DRM_FIXED_POINT - 1;
+
+}
+
+static void drm_test_int2fixp(struct kunit *test)
+{
+   /* 1 */
+   KUNIT_EXPECT_EQ(test, 1ll << 32, drm_int2fixp(1));
+
+   /* -1 */
+   KUNIT_EXPECT_EQ(test, -(1ll << 32), drm_int2fixp(-1));
+
+   /* 1 + (-1) = 0 */
+   KUNIT_EXPECT_EQ(test, 0, drm_int2fixp(1) + drm_int2fixp(-1));
+
+   /* 1 / 2 */
+   KUNIT_EXPECT_EQ(test, 1ll << 31, drm_fixp_from_fraction(1, 2));
+
+   /* -0.5 */
+   KUNIT_EXPECT_EQ(test, -(1ll << 31), drm_fixp_from_fraction(-1, 2));
+
+   /* (1 / 2) + (-1) = 0.5 */
+   KUNIT_EXPECT_EQ(test, 1ll << 31, drm_fixp_from_fraction(-1, 2) + 
drm_int2fixp(1));
+
+   /* (1 / 2) - 1) = 0.5 */
+   KUNIT_EXPECT_EQ(test, -(1ll << 31), drm_fixp_from_fraction(1, 2) + 
drm_int2fixp(-1));
+
+   /* (1 / 2) - 1) = 0.5 */
+   KUNIT_EXPECT_EQ(test, -(1ll << 31), drm_fixp_from_fraction(1, 2) - 
drm_int2fixp(1));
+
+}
+
+static struct kunit_case drm_fixp_tests[] = {
+   KUNIT_CASE(drm_test_int2fixp),
+   KUNIT_CASE(drm_test_sm2fixp),
+   { }
+};
+
+static struct kunit_suite drm_rect_test_suite = {
+   .name = "drm_fixp",
+   .test_cases = drm_fixp_tests,
+};
+
+kunit_test_suite(drm_rect_test_suite);
+
+MODULE_AUTHOR("AMD");
+MODULE_LICENSE("GPL and additional rights");
\ No newline at end of file
-- 
2.44.0



[RFC PATCH v4 10/42] drm/colorop: Add TYPE property

2024-02-26 Thread Harry Wentland
Add a read-only TYPE property. The TYPE specifies the colorop
type, such as enumerated curve, 1D LUT, CTM, 3D LUT, PWL LUT,
etc.

v4:
 - Use enum property for TYPE (Pekka)

v3:
 - Make TYPE a range property
 - Move enum drm_colorop_type to uapi header
 - Fix drm_get_colorop_type_name description

For now we're only introducing an enumerated 1D LUT type to
illustrate the concept.

Signed-off-by: Harry Wentland 
---
 drivers/gpu/drm/drm_atomic.c  |  4 +--
 drivers/gpu/drm/drm_atomic_uapi.c |  8 +-
 drivers/gpu/drm/drm_colorop.c | 44 ++-
 include/drm/drm_colorop.h | 17 +++-
 include/uapi/drm/drm_mode.h   |  4 +++
 5 files changed, 72 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 62e87e6a9653..b400e32c9d39 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -634,8 +634,8 @@ drm_atomic_get_colorop_state(struct drm_atomic_state *state,
state->colorops[index].new_state = colorop_state;
colorop_state->state = state;
 
-   drm_dbg_atomic(colorop->dev, "Added [COLOROP:%d] %p state to %p\n",
-  colorop->base.id, colorop_state, state);
+   drm_dbg_atomic(colorop->dev, "Added [COLOROP:%d:%d] %p state to %p\n",
+  colorop->base.id, colorop->type, colorop_state, state);
 
return colorop_state;
 }
diff --git a/drivers/gpu/drm/drm_atomic_uapi.c 
b/drivers/gpu/drm/drm_atomic_uapi.c
index 1f9b6dfa8ca7..e3067c095c72 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -660,7 +660,13 @@ drm_atomic_colorop_get_property(struct drm_colorop 
*colorop,
const struct drm_colorop_state *state,
struct drm_property *property, uint64_t *val)
 {
-   return -EINVAL;
+   if (property == colorop->type_property) {
+   *val = colorop->type;
+   } else {
+   return -EINVAL;
+   }
+
+   return 0;
 }
 
 static int drm_atomic_set_writeback_fb_for_connector(
diff --git a/drivers/gpu/drm/drm_colorop.c b/drivers/gpu/drm/drm_colorop.c
index a295ab96aee1..3a07a8fe2842 100644
--- a/drivers/gpu/drm/drm_colorop.c
+++ b/drivers/gpu/drm/drm_colorop.c
@@ -32,12 +32,17 @@
 
 /* TODO big colorop doc, including properties, etc. */
 
+static const struct drm_prop_enum_list drm_colorop_type_enum_list[] = {
+   { DRM_COLOROP_1D_CURVE, "1D Curve" },
+};
+
 /* Init Helpers */
 
 int drm_colorop_init(struct drm_device *dev, struct drm_colorop *colorop,
-struct drm_plane *plane)
+struct drm_plane *plane, enum drm_colorop_type type)
 {
struct drm_mode_config *config = >mode_config;
+   struct drm_property *prop;
int ret = 0;
 
ret = drm_mode_object_add(dev, >base, DRM_MODE_OBJECT_COLOROP);
@@ -46,12 +51,29 @@ int drm_colorop_init(struct drm_device *dev, struct 
drm_colorop *colorop,
 
colorop->base.properties = >properties;
colorop->dev = dev;
+   colorop->type = type;
colorop->plane = plane;
 
list_add_tail(>head, >colorop_list);
colorop->index = config->num_colorop++;
 
/* add properties */
+
+   /* type */
+   prop = drm_property_create_enum(dev,
+   DRM_MODE_PROP_IMMUTABLE,
+   "TYPE", drm_colorop_type_enum_list,
+   ARRAY_SIZE(drm_colorop_type_enum_list));
+
+   if (!prop)
+   return -ENOMEM;
+
+   colorop->type_property = prop;
+
+   drm_object_attach_property(>base,
+  colorop->type_property,
+  colorop->type);
+
return ret;
 }
 EXPORT_SYMBOL(drm_colorop_init);
@@ -150,3 +172,23 @@ void drm_colorop_reset(struct drm_colorop *colorop)
__drm_colorop_reset(colorop, colorop->state);
 }
 EXPORT_SYMBOL(drm_colorop_reset);
+
+
+static const char * const colorop_type_name[] = {
+   [DRM_COLOROP_1D_CURVE] = "1D Curve",
+};
+
+/**
+ * drm_get_colorop_type_name - return a string for colorop type
+ * @type: colorop type to compute name of
+ *
+ * In contrast to the other drm_get_*_name functions this one here returns a
+ * const pointer and hence is threadsafe.
+ */
+const char *drm_get_colorop_type_name(enum drm_colorop_type type)
+{
+   if (WARN_ON(type >= ARRAY_SIZE(colorop_type_name)))
+   return "unknown";
+
+   return colorop_type_name[type];
+}
diff --git a/include/drm/drm_colorop.h b/include/drm/drm_colorop.h
index e611f830f986..cb98c55f8387 100644
--- a/include/drm/drm_colorop.h
+++ b/include/drm/drm_colorop.h
@@ -103,6 +103,21 @@ struct drm_colorop {
/** @properties: property tracking for this plane */
struct drm_object_properties properties;
 
+   /**
+* @type:
+*
+* Read-only
+* Type of color operation
+

[RFC PATCH v4 14/42] drm/colorop: Add atomic state print for drm_colorop

2024-02-26 Thread Harry Wentland
Signed-off-by: Harry Wentland 
---
 drivers/gpu/drm/drm_atomic.c | 25 -
 include/drm/drm_colorop.h|  5 +
 2 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index b400e32c9d39..3645c36d63b3 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -783,6 +783,19 @@ static int drm_atomic_plane_check(const struct 
drm_plane_state *old_plane_state,
return 0;
 }
 
+
+
+static void drm_atomic_colorop_print_state(struct drm_printer *p,
+   const struct drm_colorop_state *state)
+{
+   struct drm_colorop *colorop = state->colorop;
+
+   drm_printf(p, "colorop[%u]:\n", colorop->base.id);
+   drm_printf(p, "\ttype=%s\n", drm_get_colorop_type_name(colorop->type));
+   drm_printf(p, "\tbypass=%u\n", state->bypass);
+   drm_printf(p, "\tcurve_1d_type=%s\n", 
drm_get_colorop_curve_1d_type_name(state->curve_1d_type));
+}
+
 static void drm_atomic_plane_print_state(struct drm_printer *p,
const struct drm_plane_state *state)
 {
@@ -804,7 +817,8 @@ static void drm_atomic_plane_print_state(struct drm_printer 
*p,
drm_printf(p, "\tcolor-range=%s\n",
   drm_get_color_range_name(state->color_range));
drm_printf(p, "\tcolor_mgmt_changed=%d\n", state->color_mgmt_changed);
-
+   drm_printf(p, "\tcolor-pipeline=%d\n",
+  state->color_pipeline ? state->color_pipeline->base.id : 0);
if (plane->funcs->atomic_print_state)
plane->funcs->atomic_print_state(p, state);
 }
@@ -1840,6 +1854,7 @@ static void __drm_state_dump(struct drm_device *dev, 
struct drm_printer *p,
 bool take_locks)
 {
struct drm_mode_config *config = >mode_config;
+   struct drm_colorop *colorop;
struct drm_plane *plane;
struct drm_crtc *crtc;
struct drm_connector *connector;
@@ -1849,6 +1864,14 @@ static void __drm_state_dump(struct drm_device *dev, 
struct drm_printer *p,
if (!drm_drv_uses_atomic_modeset(dev))
return;
 
+   list_for_each_entry(colorop, >colorop_list, head) {
+   if (take_locks)
+   drm_modeset_lock(>plane->mutex, NULL);
+   drm_atomic_colorop_print_state(p, colorop->state);
+   if (take_locks)
+   drm_modeset_unlock(>plane->mutex);
+   }
+
list_for_each_entry(plane, >plane_list, head) {
if (take_locks)
drm_modeset_lock(>mutex, NULL);
diff --git a/include/drm/drm_colorop.h b/include/drm/drm_colorop.h
index 8060988b5892..fc9a28d138b8 100644
--- a/include/drm/drm_colorop.h
+++ b/include/drm/drm_colorop.h
@@ -231,6 +231,11 @@ static inline unsigned int drm_colorop_index(const struct 
drm_colorop *colorop)
 #define drm_for_each_colorop(colorop, dev) \
list_for_each_entry(colorop, &(dev)->mode_config.colorop_list, head)
 
+const char *drm_get_color_pipeline_name(struct drm_colorop *colorop);
+
+const char *drm_get_colorop_type_name(enum drm_colorop_type type);
+const char *drm_get_colorop_curve_1d_type_name(enum drm_colorop_curve_1d_type 
type);
+
 void drm_colorop_set_next_property(struct drm_colorop *colorop, struct 
drm_colorop *next);
 
 
-- 
2.44.0



[RFC PATCH v4 28/42] drm/amd/display: Ignore deprecated props when plane_color_pipeline set

2024-02-26 Thread Harry Wentland
When the plane_color_pipeline bit is set we should ignore
deprecated properties, such as COLOR_RANGE and COLOR_ENCODING.

Signed-off-by: Harry Wentland 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index f6575d7dee97..47c6fd33fd60 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -4948,6 +4948,10 @@ fill_plane_color_attributes(const struct drm_plane_state 
*plane_state,
 
*color_space = COLOR_SPACE_SRGB;
 
+   /* Ignore properties when DRM_CLIENT_CAP_PLANE_COLOR_PIPELINE is set */
+   if (plane_state->state && plane_state->state->plane_color_pipeline)
+   return 0;
+
/* DRM color properties only affect non-RGB formats. */
if (format < SURFACE_PIXEL_FORMAT_VIDEO_BEGIN)
return 0;
-- 
2.44.0



[RFC PATCH v4 09/42] drm/colorop: Introduce new drm_colorop mode object

2024-02-26 Thread Harry Wentland
This patches introduces a new drm_colorop mode object. This
object represents color transformations and can be used to
define color pipelines.

We also introduce the drm_colorop_state here, as well as
various helpers and state tracking bits.

v4:
 - Drop IOCTL definitions (Pekka)
 - add missing declaration (Chaitanya Kumar Borah)

v3:
 - Drop TODO for lock (it's handled in drm_modeset_drop_locks)
   (Melissa)
 - Don't get plane state when getting colorop state
 - Make some functions static (kernel test robot)

Signed-off-by: Harry Wentland 
---
 drivers/gpu/drm/Makefile|   1 +
 drivers/gpu/drm/drm_atomic.c|  70 
 drivers/gpu/drm/drm_atomic_helper.c |  12 +++
 drivers/gpu/drm/drm_atomic_uapi.c   |  48 +
 drivers/gpu/drm/drm_colorop.c   | 152 ++
 drivers/gpu/drm/drm_mode_config.c   |   7 ++
 include/drm/drm_atomic.h|  82 +++
 include/drm/drm_atomic_uapi.h   |   1 +
 include/drm/drm_colorop.h   | 158 
 include/drm/drm_mode_config.h   |  18 
 include/drm/drm_plane.h |   2 +
 include/uapi/drm/drm_mode.h |   1 +
 12 files changed, 552 insertions(+)
 create mode 100644 drivers/gpu/drm/drm_colorop.c
 create mode 100644 include/drm/drm_colorop.h

diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 104b42df2e95..4b14dcbb6117 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -16,6 +16,7 @@ drm-y := \
drm_client.o \
drm_client_modeset.o \
drm_color_mgmt.o \
+   drm_colorop.o \
drm_connector.o \
drm_crtc.o \
drm_displayid.o \
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index a91737adf8e7..62e87e6a9653 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -42,6 +42,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "drm_crtc_internal.h"
 #include "drm_internal.h"
@@ -108,6 +109,7 @@ void drm_atomic_state_default_release(struct 
drm_atomic_state *state)
kfree(state->connectors);
kfree(state->crtcs);
kfree(state->planes);
+   kfree(state->colorops);
kfree(state->private_objs);
 }
 EXPORT_SYMBOL(drm_atomic_state_default_release);
@@ -139,6 +141,10 @@ drm_atomic_state_init(struct drm_device *dev, struct 
drm_atomic_state *state)
sizeof(*state->planes), GFP_KERNEL);
if (!state->planes)
goto fail;
+   state->colorops = kcalloc(dev->mode_config.num_colorop,
+ sizeof(*state->colorops), GFP_KERNEL);
+   if (!state->colorops)
+   goto fail;
 
/*
 * Because drm_atomic_state can be committed asynchronously we need our
@@ -250,6 +256,20 @@ void drm_atomic_state_default_clear(struct 
drm_atomic_state *state)
state->planes[i].new_state = NULL;
}
 
+   for (i = 0; i < config->num_colorop; i++) {
+   struct drm_colorop *colorop = state->colorops[i].ptr;
+
+   if (!colorop)
+   continue;
+
+   drm_colorop_atomic_destroy_state(colorop,
+state->colorops[i].state);
+   state->colorops[i].ptr = NULL;
+   state->colorops[i].state = NULL;
+   state->colorops[i].old_state = NULL;
+   state->colorops[i].new_state = NULL;
+   }
+
for (i = 0; i < state->num_private_objs; i++) {
struct drm_private_obj *obj = state->private_objs[i].ptr;
 
@@ -571,6 +591,56 @@ drm_atomic_get_plane_state(struct drm_atomic_state *state,
 }
 EXPORT_SYMBOL(drm_atomic_get_plane_state);
 
+
+/**
+ * drm_atomic_get_colorop_state - get colorop state
+ * @state: global atomic state object
+ * @colorop: colorop to get state object for
+ *
+ * This function returns the colorop state for the given colorop, allocating it
+ * if needed. It will also grab the relevant plane lock to make sure that the
+ * state is consistent.
+ *
+ * Returns:
+ *
+ * Either the allocated state or the error code encoded into the pointer. When
+ * the error is EDEADLK then the w/w mutex code has detected a deadlock and the
+ * entire atomic sequence must be restarted. All other errors are fatal.
+ */
+struct drm_colorop_state *
+drm_atomic_get_colorop_state(struct drm_atomic_state *state,
+struct drm_colorop *colorop)
+{
+   int ret, index = drm_colorop_index(colorop);
+   struct drm_colorop_state *colorop_state;
+
+   WARN_ON(!state->acquire_ctx);
+
+   colorop_state = drm_atomic_get_existing_colorop_state(state, colorop);
+   if (colorop_state)
+   return colorop_state;
+
+   ret = drm_modeset_lock(>plane->mutex, state->acquire_ctx);
+   if (ret)
+   return ERR_PTR(ret);
+
+   colorop_state = drm_atomic_helper_colorop_duplicate_state(colorop);

[RFC PATCH v4 11/42] drm/colorop: Add 1D Curve subtype

2024-02-26 Thread Harry Wentland
v4:
 - Use drm_colorop_curve_1d_type_enum_list to get name (Pekka)
 - Create separate init function for 1D curve
 - Pass supported TFs into 1D curve init function

Signed-off-by: Harry Wentland 
Signed-off-by: Alex Hung 
Co-developed-by: Alex Hung 
---
 drivers/gpu/drm/drm_atomic_uapi.c | 18 +---
 drivers/gpu/drm/drm_colorop.c | 71 +++
 include/drm/drm_colorop.h | 24 +++
 3 files changed, 108 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/drm_atomic_uapi.c 
b/drivers/gpu/drm/drm_atomic_uapi.c
index e3067c095c72..fdd540cfe24f 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -648,11 +648,17 @@ static int drm_atomic_colorop_set_property(struct 
drm_colorop *colorop,
struct drm_colorop_state *state, struct drm_file *file_priv,
struct drm_property *property, uint64_t val)
 {
-   drm_dbg_atomic(colorop->dev,
-   "[COLOROP:%d] unknown property [PROP:%d:%s]]\n",
-   colorop->base.id,
-   property->base.id, property->name);
-   return -EINVAL;
+   if (property == colorop->curve_1d_type_property) {
+   state->curve_1d_type = val;
+   } else {
+   drm_dbg_atomic(colorop->dev,
+  "[COLOROP:%d:%d] unknown property 
[PROP:%d:%s]]\n",
+  colorop->base.id, colorop->type,
+  property->base.id, property->name);
+   return -EINVAL;
+   }
+
+   return 0;
 }
 
 static int
@@ -662,6 +668,8 @@ drm_atomic_colorop_get_property(struct drm_colorop *colorop,
 {
if (property == colorop->type_property) {
*val = colorop->type;
+   } else if (property == colorop->curve_1d_type_property) {
+   *val = state->curve_1d_type;
} else {
return -EINVAL;
}
diff --git a/drivers/gpu/drm/drm_colorop.c b/drivers/gpu/drm/drm_colorop.c
index 3a07a8fe2842..f4740b6115d1 100644
--- a/drivers/gpu/drm/drm_colorop.c
+++ b/drivers/gpu/drm/drm_colorop.c
@@ -36,6 +36,12 @@ static const struct drm_prop_enum_list 
drm_colorop_type_enum_list[] = {
{ DRM_COLOROP_1D_CURVE, "1D Curve" },
 };
 
+static const char * const colorop_curve_1d_type_names[] = {
+   [DRM_COLOROP_1D_CURVE_SRGB_EOTF] = "sRGB EOTF",
+   [DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF] = "sRGB Inverse EOTF",
+};
+
+
 /* Init Helpers */
 
 int drm_colorop_init(struct drm_device *dev, struct drm_colorop *colorop,
@@ -78,6 +84,56 @@ int drm_colorop_init(struct drm_device *dev, struct 
drm_colorop *colorop,
 }
 EXPORT_SYMBOL(drm_colorop_init);
 
+int drm_colorop_curve_1d_init(struct drm_device *dev, struct drm_colorop 
*colorop,
+ struct drm_plane *plane, u64 supported_tfs)
+{
+   struct drm_prop_enum_list enum_list[DRM_COLOROP_1D_CURVE_COUNT];
+   int i, len;
+
+   struct drm_property *prop;
+   int ret;
+
+   if (!supported_tfs) {
+   drm_err(dev,
+   "No supported TFs for new 1D curve colorop on 
[PLANE:%d:%s]\n",
+   plane->base.id, plane->name);
+   return -EINVAL;
+   }
+
+   if ((supported_tfs & -BIT(DRM_COLOROP_1D_CURVE_COUNT)) != 0) {
+   drm_err(dev, "Unknown TF provided on [PLANE:%d:%s]\n",
+   plane->base.id, plane->name);
+   return -EINVAL;
+   }
+
+   ret = drm_colorop_init(dev, colorop, plane, DRM_COLOROP_1D_CURVE);
+   if (ret)
+   return ret;
+
+   len = 0;
+   for (i = 0; i < DRM_COLOROP_1D_CURVE_COUNT; i++) {
+   if ((supported_tfs & BIT(i)) == 0)
+   continue;
+
+   enum_list[len].type = i;
+   enum_list[len].name = colorop_curve_1d_type_names[i];
+   len++;
+   }
+
+   /* initialize 1D curve only attribute */
+   prop = drm_property_create_enum(dev, DRM_MODE_PROP_ATOMIC, 
"CURVE_1D_TYPE",
+   enum_list, len);
+   if (!prop)
+   return -ENOMEM;
+
+   colorop->curve_1d_type_property = prop;
+   drm_object_attach_property(>base, 
colorop->curve_1d_type_property, 0);
+   drm_colorop_reset(colorop);
+
+   return 0;
+}
+EXPORT_SYMBOL(drm_colorop_curve_1d_init);
+
 static void __drm_atomic_helper_colorop_duplicate_state(struct drm_colorop 
*colorop,
struct 
drm_colorop_state *state)
 {
@@ -192,3 +248,18 @@ const char *drm_get_colorop_type_name(enum 
drm_colorop_type type)
 
return colorop_type_name[type];
 }
+
+/**
+ * drm_get_colorop_curve_1d_type_name - return a string for 1D curve type
+ * @range: 1d curve type to compute name of
+ *
+ * In contrast to the other drm_get_*_name functions this one here returns a
+ * const pointer and hence is threadsafe.
+ */
+const char 

[RFC PATCH v4 23/42] drm/vkms: add 3x4 matrix in color pipeline

2024-02-26 Thread Harry Wentland
We add two 3x4 matrices into the VKMS color pipeline. The reason
we're adding matrices is so that we can test that application
of a matrix and its inverse yields an output equal to the input
image.

One complication with the matrix implementation has to do with
the fact that the matrix entries are in signed-magnitude fixed
point, whereas the drm_fixed.h implementation uses 2s-complement.
The latter one is the one that we want for easy addition and
subtraction, so we convert all entries to 2s-complement.

Signed-off-by: Harry Wentland 
---
 drivers/gpu/drm/vkms/vkms_colorop.c  | 32 +++-
 drivers/gpu/drm/vkms/vkms_composer.c | 27 +++
 include/drm/drm_colorop.h|  2 ++
 3 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vkms/vkms_colorop.c 
b/drivers/gpu/drm/vkms/vkms_colorop.c
index d2db366da6d3..a0e54b2c1f7a 100644
--- a/drivers/gpu/drm/vkms/vkms_colorop.c
+++ b/drivers/gpu/drm/vkms/vkms_colorop.c
@@ -35,7 +35,37 @@ const int vkms_initialize_color_pipeline(struct drm_plane 
*plane, struct drm_pro
 
prev_op = op;
 
-   /* 2nd op: 1d curve */
+   /* 2nd op: 3x4 matrix */
+   op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL);
+   if (!op) {
+   DRM_ERROR("KMS: Failed to allocate colorop\n");
+   return -ENOMEM;
+   }
+
+   ret = drm_colorop_ctm_3x4_init(dev, op, plane);
+   if (ret)
+   return ret;
+
+   drm_colorop_set_next_property(prev_op, op);
+
+   prev_op = op;
+
+   /* 3rd op: 3x4 matrix */
+   op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL);
+   if (!op) {
+   DRM_ERROR("KMS: Failed to allocate colorop\n");
+   return -ENOMEM;
+   }
+
+   ret = drm_colorop_ctm_3x4_init(dev, op, plane);
+   if (ret)
+   return ret;
+
+   drm_colorop_set_next_property(prev_op, op);
+
+   prev_op = op;
+
+   /* 4th op: 1d curve */
op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL);
if (!op) {
DRM_ERROR("KMS: Failed to allocate colorop\n");
diff --git a/drivers/gpu/drm/vkms/vkms_composer.c 
b/drivers/gpu/drm/vkms/vkms_composer.c
index d2101fa55aa3..8bbfce651526 100644
--- a/drivers/gpu/drm/vkms/vkms_composer.c
+++ b/drivers/gpu/drm/vkms/vkms_composer.c
@@ -164,6 +164,30 @@ static void apply_lut(const struct vkms_crtc_state 
*crtc_state, struct line_buff
}
 }
 
+static void apply_3x4_matrix(struct pixel_argb_s32 *pixel, const struct 
drm_color_ctm_3x4 *matrix)
+{
+   s64 rf, gf, bf;
+
+   rf = drm_fixp_mul(drm_sm2fixp(matrix->matrix[0]), 
drm_int2fixp(pixel->r)) +
+drm_fixp_mul(drm_sm2fixp(matrix->matrix[1]), 
drm_int2fixp(pixel->g)) +
+drm_fixp_mul(drm_sm2fixp(matrix->matrix[2]), 
drm_int2fixp(pixel->b)) +
+drm_sm2fixp(matrix->matrix[3]);
+
+   gf = drm_fixp_mul(drm_sm2fixp(matrix->matrix[4]), 
drm_int2fixp(pixel->r)) +
+drm_fixp_mul(drm_sm2fixp(matrix->matrix[5]), 
drm_int2fixp(pixel->g)) +
+drm_fixp_mul(drm_sm2fixp(matrix->matrix[6]), 
drm_int2fixp(pixel->b)) +
+drm_sm2fixp(matrix->matrix[7]);
+
+   bf = drm_fixp_mul(drm_sm2fixp(matrix->matrix[8]), 
drm_int2fixp(pixel->r)) +
+drm_fixp_mul(drm_sm2fixp(matrix->matrix[9]), 
drm_int2fixp(pixel->g)) +
+drm_fixp_mul(drm_sm2fixp(matrix->matrix[10]), 
drm_int2fixp(pixel->b)) +
+drm_sm2fixp(matrix->matrix[11]);
+
+   pixel->r = drm_fixp2int(rf);
+   pixel->g = drm_fixp2int(gf);
+   pixel->b = drm_fixp2int(bf);
+}
+
 static void apply_colorop(struct pixel_argb_s32 *pixel, struct drm_colorop 
*colorop)
 {
/* TODO is this right? */
@@ -185,6 +209,9 @@ static void apply_colorop(struct pixel_argb_s32 *pixel, 
struct drm_colorop *colo
DRM_DEBUG_DRIVER("unkown colorop 1D curve type 
%d\n", colorop_state->curve_1d_type);
break;
}
+   } else if (colorop->type == DRM_COLOROP_CTM_3X4) {
+   if (colorop_state->data)
+   apply_3x4_matrix(pixel, (struct drm_color_ctm_3x4 *) 
colorop_state->data->data);
}
 
 }
diff --git a/include/drm/drm_colorop.h b/include/drm/drm_colorop.h
index 4aee29e161d6..8710e550790c 100644
--- a/include/drm/drm_colorop.h
+++ b/include/drm/drm_colorop.h
@@ -224,6 +224,8 @@ int drm_colorop_init(struct drm_device *dev, struct 
drm_colorop *colorop,
 
 int drm_colorop_curve_1d_init(struct drm_device *dev, struct drm_colorop 
*colorop,
  struct drm_plane *plane, u64 supported_tfs);
+int drm_colorop_ctm_3x4_init(struct drm_device *dev, struct drm_colorop 
*colorop,
+struct drm_plane *plane);
 
 struct drm_colorop_state *
 drm_atomic_helper_colorop_duplicate_state(struct drm_colorop *colorop);
-- 
2.44.0



[RFC PATCH v4 17/42] drm/vkms: Add enumerated 1D curve colorop

2024-02-26 Thread Harry Wentland
This patch introduces a VKMS color pipeline that includes two
drm_colorops for named transfer functions. For now the only ones
supported are sRGB EOTF, sRGB Inverse EOTF, and a Linear TF.
We will expand this in the future but I don't want to do so
without accompanying IGT tests.

We introduce a new vkms_luts.c file that hard-codes sRGB EOTF,
sRGB Inverse EOTF, and a linear EOTF LUT. These have been
generated with 256 entries each as IGT is currently testing
only 8 bpc surfaces. We will likely need higher precision
but I'm reluctant to make that change without clear indication
that we need it. We'll revisit and, if necessary, regenerate
the LUTs when we have IGT tests for higher precision buffers.

v4:
 - Drop _tf_ from color_pipeline init function
 - Pass supported TFs into colorop init
 - Create bypass pipeline in DRM helper (Pekka)

v2:
 - Add commit description
 - Fix sRGB EOTF LUT definition
 - Add linear and sRGB inverse EOTF LUTs

Signed-off-by: Harry Wentland 
Signed-off-by: Alex Hung 
---
 drivers/gpu/drm/vkms/Makefile|   4 +-
 drivers/gpu/drm/vkms/vkms_colorop.c  |  70 +++
 drivers/gpu/drm/vkms/vkms_composer.c |  45 ++
 drivers/gpu/drm/vkms/vkms_drv.h  |   4 +
 drivers/gpu/drm/vkms/vkms_luts.c | 802 +++
 drivers/gpu/drm/vkms/vkms_luts.h |  12 +
 drivers/gpu/drm/vkms/vkms_plane.c|   2 +
 7 files changed, 938 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/vkms/vkms_colorop.c
 create mode 100644 drivers/gpu/drm/vkms/vkms_luts.c
 create mode 100644 drivers/gpu/drm/vkms/vkms_luts.h

diff --git a/drivers/gpu/drm/vkms/Makefile b/drivers/gpu/drm/vkms/Makefile
index 1b28a6a32948..c38455c46be4 100644
--- a/drivers/gpu/drm/vkms/Makefile
+++ b/drivers/gpu/drm/vkms/Makefile
@@ -6,6 +6,8 @@ vkms-y := \
vkms_formats.o \
vkms_crtc.o \
vkms_composer.o \
-   vkms_writeback.o
+   vkms_writeback.o \
+   vkms_colorop.o \
+   vkms_luts.o
 
 obj-$(CONFIG_DRM_VKMS) += vkms.o
diff --git a/drivers/gpu/drm/vkms/vkms_colorop.c 
b/drivers/gpu/drm/vkms/vkms_colorop.c
new file mode 100644
index ..d2db366da6d3
--- /dev/null
+++ b/drivers/gpu/drm/vkms/vkms_colorop.c
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define MAX_COLOR_PIPELINES 5
+
+static const u64 supported_tfs =
+   BIT(DRM_COLOROP_1D_CURVE_SRGB_EOTF) |
+   BIT(DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF);
+
+const int vkms_initialize_color_pipeline(struct drm_plane *plane, struct 
drm_prop_enum_list *list)
+{
+
+   struct drm_colorop *op, *prev_op;
+   struct drm_device *dev = plane->dev;
+   int ret;
+
+   /* 1st op: 1d curve */
+   op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL);
+   if (!op) {
+   DRM_ERROR("KMS: Failed to allocate colorop\n");
+   return -ENOMEM;
+   }
+
+   ret = drm_colorop_curve_1d_init(dev, op, plane, supported_tfs);
+   if (ret)
+   return ret;
+
+   list->type = op->base.id;
+   list->name = kasprintf(GFP_KERNEL, "Color Pipeline %d", op->base.id);
+
+   prev_op = op;
+
+   /* 2nd op: 1d curve */
+   op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL);
+   if (!op) {
+   DRM_ERROR("KMS: Failed to allocate colorop\n");
+   return -ENOMEM;
+   }
+
+   ret = drm_colorop_curve_1d_init(dev, op, plane, supported_tfs);
+   if (ret)
+   return ret;
+
+   drm_colorop_set_next_property(prev_op, op);
+
+   return 0;
+}
+
+int vkms_initialize_colorops(struct drm_plane *plane)
+{
+   struct drm_prop_enum_list pipelines[MAX_COLOR_PIPELINES];
+   int len = 0;
+   int ret;
+
+   /* Add color pipeline */
+   ret = vkms_initialize_color_pipeline(plane, &(pipelines[len]));
+   if (ret)
+   return ret;
+   len++;
+
+   /* Create COLOR_PIPELINE property and attach */
+   drm_plane_create_color_pipeline_property(plane, pipelines, len);
+
+   return 0;
+}
diff --git a/drivers/gpu/drm/vkms/vkms_composer.c 
b/drivers/gpu/drm/vkms/vkms_composer.c
index b90e446d5954..9493bdb1ba3f 100644
--- a/drivers/gpu/drm/vkms/vkms_composer.c
+++ b/drivers/gpu/drm/vkms/vkms_composer.c
@@ -12,6 +12,7 @@
 #include 
 
 #include "vkms_drv.h"
+#include "vkms_luts.h"
 
 static u16 pre_mul_blend_channel(u16 src, u16 dst, u16 alpha)
 {
@@ -163,6 +164,47 @@ static void apply_lut(const struct vkms_crtc_state 
*crtc_state, struct line_buff
}
 }
 
+static void pre_blend_color_transform(const struct vkms_plane_state 
*plane_state, struct line_buffer *output_buffer)
+{
+   struct drm_colorop *colorop = plane_state->base.base.color_pipeline;
+
+   while (colorop) {
+   struct drm_colorop_state *colorop_state;
+
+   if (!colorop)
+   return;
+
+   /* TODO this is probably wrong */
+   colorop_state = colorop->state;
+
+  

[RFC PATCH v4 22/42] drm/vkms: Use s32 for internal color pipeline precision

2024-02-26 Thread Harry Wentland
Certain operations require us to preserve values below 0.0 and
above 1.0 (0x0 and 0x respectively in 16 bpc unorm). One
such operation is a BT709 encoding operation followed by its
decoding operation, or the reverse.

We'll use s32 values as intermediate in and outputs of our
color operations, for the operations where it matters.

For now this won't apply to LUT operations. We might want to
update those to work on s32 as well, but it's unclear how
that should work for unorm LUT definitions. We'll revisit
that once we add LUT + CTM tests.

In order to allow for this we'll also invert the nesting of our
colorop processing loops. We now use the pixel iteration loop
on the outside and the colorop iteration on the inside.

v4:
 - Clarify that we're packing 16-bit UNORM into s32, not
   converting values to a different representation (Pekka)

v3:
 - Use new colorop->next pointer

Signed-off-by: Harry Wentland 
---
 drivers/gpu/drm/vkms/vkms_composer.c | 57 +---
 drivers/gpu/drm/vkms/vkms_drv.h  |  4 ++
 2 files changed, 48 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/vkms/vkms_composer.c 
b/drivers/gpu/drm/vkms/vkms_composer.c
index 25b786b49db0..d2101fa55aa3 100644
--- a/drivers/gpu/drm/vkms/vkms_composer.c
+++ b/drivers/gpu/drm/vkms/vkms_composer.c
@@ -164,7 +164,7 @@ static void apply_lut(const struct vkms_crtc_state 
*crtc_state, struct line_buff
}
 }
 
-static void apply_colorop(struct pixel_argb_u16 *pixel, struct drm_colorop 
*colorop)
+static void apply_colorop(struct pixel_argb_s32 *pixel, struct drm_colorop 
*colorop)
 {
/* TODO is this right? */
struct drm_colorop_state *colorop_state = colorop->state;
@@ -191,25 +191,56 @@ static void apply_colorop(struct pixel_argb_u16 *pixel, 
struct drm_colorop *colo
 
 static void pre_blend_color_transform(const struct vkms_plane_state 
*plane_state, struct line_buffer *output_buffer)
 {
-   struct drm_colorop *colorop = plane_state->base.base.color_pipeline;
+   struct drm_colorop *colorop;
+   struct pixel_argb_s32 pixel;
 
-   while (colorop) {
-   struct drm_colorop_state *colorop_state;
+   for (size_t x = 0; x < output_buffer->n_pixels; x++) {
 
-   if (!colorop)
-   return;
+   /*
+* Some operations, such as applying a BT709 encoding matrix,
+* followed by a decoding matrix, require that we preserve
+* values above 1.0 and below 0.0 until the end of the pipeline.
+*
+* Pack the 16-bit UNORM values into s32 to give us head-room to
+* avoid clipping until we're at the end of the pipeline. Clip
+* intentionally at the end of the pipeline before packing
+* UNORM values back into u16.
+*/
+   pixel.a = output_buffer->pixels[x].a;
+   pixel.r = output_buffer->pixels[x].r;
+   pixel.g = output_buffer->pixels[x].g;
+   pixel.b = output_buffer->pixels[x].b;
 
-   /* TODO this is probably wrong */
-   colorop_state = colorop->state;
+   colorop = plane_state->base.base.color_pipeline;
+   while (colorop) {
+   struct drm_colorop_state *colorop_state;
 
-   if (!colorop_state)
-   return;
+   if (!colorop)
+   return;
+
+   /* TODO this is probably wrong */
+   colorop_state = colorop->state;
+
+   if (!colorop_state)
+   return;
 
-   for (size_t x = 0; x < output_buffer->n_pixels; x++)
if (!colorop_state->bypass)
-   apply_colorop(_buffer->pixels[x], 
colorop);
+   apply_colorop(, colorop);
 
-   colorop = colorop->next;
+   colorop = colorop->next;
+   }
+
+   /* clamp pixel */
+   pixel.a = max(min(pixel.a, 0x), 0x0);
+   pixel.r = max(min(pixel.r, 0x), 0x0);
+   pixel.g = max(min(pixel.g, 0x), 0x0);
+   pixel.b = max(min(pixel.b, 0x), 0x0);
+
+   /* put back to output_buffer */
+   output_buffer->pixels[x].a = pixel.a;
+   output_buffer->pixels[x].r = pixel.r;
+   output_buffer->pixels[x].g = pixel.g;
+   output_buffer->pixels[x].b = pixel.b;
}
 }
 
diff --git a/drivers/gpu/drm/vkms/vkms_drv.h b/drivers/gpu/drm/vkms/vkms_drv.h
index 2bcc24c196a2..fadb7685a360 100644
--- a/drivers/gpu/drm/vkms/vkms_drv.h
+++ b/drivers/gpu/drm/vkms/vkms_drv.h
@@ -36,6 +36,10 @@ struct vkms_frame_info {
unsigned int cpp;
 };
 
+struct pixel_argb_s32 {
+   s32 a, r, g, b;
+};
+
 struct pixel_argb_u16 {
u16 a, r, g, b;
 };

[RFC PATCH v4 16/42] drm/colorop: Add NEXT to colorop state print

2024-02-26 Thread Harry Wentland
v3:
 - Read NEXT ID from drm_colorop's next pointer

Signed-off-by: Harry Wentland 
---
 drivers/gpu/drm/drm_atomic.c | 1 +
 include/drm/drm_colorop.h| 2 ++
 2 files changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 27a8805c5fa1..d82858dabf06 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -794,6 +794,7 @@ static void drm_atomic_colorop_print_state(struct 
drm_printer *p,
drm_printf(p, "\ttype=%s\n", drm_get_colorop_type_name(colorop->type));
drm_printf(p, "\tbypass=%u\n", state->bypass);
drm_printf(p, "\tcurve_1d_type=%s\n", 
drm_get_colorop_curve_1d_type_name(state->curve_1d_type));
+   drm_printf(p, "\tnext=%d\n", colorop->next ? colorop->next->base.id : 
0);
 }
 
 static void drm_atomic_plane_print_state(struct drm_printer *p,
diff --git a/include/drm/drm_colorop.h b/include/drm/drm_colorop.h
index fc9a28d138b8..e85ed5aa68d0 100644
--- a/include/drm/drm_colorop.h
+++ b/include/drm/drm_colorop.h
@@ -237,6 +237,8 @@ const char *drm_get_colorop_type_name(enum drm_colorop_type 
type);
 const char *drm_get_colorop_curve_1d_type_name(enum drm_colorop_curve_1d_type 
type);
 
 void drm_colorop_set_next_property(struct drm_colorop *colorop, struct 
drm_colorop *next);
+uint32_t drm_colorop_get_next_property(struct drm_colorop *colorop);
+struct drm_colorop *drm_colorop_get_next(struct drm_colorop *colorop);
 
 
 #endif /* __DRM_COLOROP_H__ */
-- 
2.44.0



[RFC PATCH v4 04/42] drm/vkms: Round fixp2int conversion in lerp_u16

2024-02-26 Thread Harry Wentland
fixp2int always rounds down, fixp2int_ceil rounds up. We need
the new fixp2int_round.

Signed-off-by: Harry Wentland 
---
 drivers/gpu/drm/vkms/vkms_composer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vkms/vkms_composer.c 
b/drivers/gpu/drm/vkms/vkms_composer.c
index 3c99fb8b54e2..e70cd473e3be 100644
--- a/drivers/gpu/drm/vkms/vkms_composer.c
+++ b/drivers/gpu/drm/vkms/vkms_composer.c
@@ -98,7 +98,7 @@ static u16 lerp_u16(u16 a, u16 b, s64 t)
 
s64 delta = drm_fixp_mul(b_fp - a_fp,  t);
 
-   return drm_fixp2int(a_fp + delta);
+   return drm_fixp2int_round(a_fp + delta);
 }
 
 static s64 get_lut_index(const struct vkms_color_lut *lut, u16 channel_value)
-- 
2.44.0



[RFC PATCH v4 18/42] drm/vkms: Add kunit tests for linear and sRGB LUTs

2024-02-26 Thread Harry Wentland
Signed-off-by: Harry Wentland 
---
 drivers/gpu/drm/vkms/tests/vkms_color_tests.c | 37 ++-
 1 file changed, 36 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vkms/tests/vkms_color_tests.c 
b/drivers/gpu/drm/vkms/tests/vkms_color_tests.c
index fc73e48aa57c..e6ac01dee830 100644
--- a/drivers/gpu/drm/vkms/tests/vkms_color_tests.c
+++ b/drivers/gpu/drm/vkms/tests/vkms_color_tests.c
@@ -31,7 +31,6 @@ const struct vkms_color_lut test_linear_lut = {
.channel_value2index_ratio = 0xf000fll
 };
 
-
 static void vkms_color_test_get_lut_index(struct kunit *test)
 {
int i;
@@ -40,6 +39,19 @@ static void vkms_color_test_get_lut_index(struct kunit *test)
 
for (i = 0; i < TEST_LUT_SIZE; i++)
KUNIT_EXPECT_EQ(test, 
drm_fixp2int_ceil(get_lut_index(_linear_lut, test_linear_array[i].red)), 
i);
+
+   KUNIT_EXPECT_EQ(test, drm_fixp2int(get_lut_index(_eotf, 0x0)), 
0x0);
+   KUNIT_EXPECT_EQ(test, drm_fixp2int_ceil(get_lut_index(_eotf, 
0x0)), 0x0);
+   KUNIT_EXPECT_EQ(test, drm_fixp2int_ceil(get_lut_index(_eotf, 
0x101)), 0x1);
+   KUNIT_EXPECT_EQ(test, drm_fixp2int_ceil(get_lut_index(_eotf, 
0x202)), 0x2);
+
+   KUNIT_EXPECT_EQ(test, drm_fixp2int(get_lut_index(_inv_eotf, 0x0)), 
0x0);
+   KUNIT_EXPECT_EQ(test, drm_fixp2int_ceil(get_lut_index(_inv_eotf, 
0x0)), 0x0);
+   KUNIT_EXPECT_EQ(test, drm_fixp2int_ceil(get_lut_index(_inv_eotf, 
0x101)), 0x1);
+   KUNIT_EXPECT_EQ(test, drm_fixp2int_ceil(get_lut_index(_inv_eotf, 
0x202)), 0x2);
+
+   KUNIT_EXPECT_EQ(test, drm_fixp2int_ceil(get_lut_index(_eotf, 
0xfefe)), 0xfe);
+   KUNIT_EXPECT_EQ(test, drm_fixp2int_ceil(get_lut_index(_eotf, 
0x)), 0xff);
 }
 
 static void vkms_color_test_lerp(struct kunit *test)
@@ -148,9 +160,32 @@ static void vkms_color_test_lerp(struct kunit *test)
KUNIT_EXPECT_EQ(test, lerp_u16(0x0, 0x1, 0x8000), 0x1);
 }
 
+static void vkms_color_test_linear(struct kunit *test)
+{
+   for (int i = 0; i < LUT_SIZE; i++) {
+   int linear = apply_lut_to_channel_value(_eotf, i * 
0x101, LUT_RED);
+   KUNIT_EXPECT_EQ(test, DIV_ROUND_CLOSEST(linear, 0x101), i);
+   }
+}
+
+static void vkms_color_srgb_inv_srgb(struct kunit *test)
+{
+   u16 srgb, final;
+
+   for (int i = 0; i < LUT_SIZE; i++) {
+   srgb = apply_lut_to_channel_value(_eotf, i * 0x101, 
LUT_RED);
+   final = apply_lut_to_channel_value(_inv_eotf, srgb, 
LUT_RED);
+
+   KUNIT_EXPECT_GE(test, final / 0x101, i-1);
+   KUNIT_EXPECT_LE(test, final / 0x101, i+1);
+   }
+}
+
 static struct kunit_case vkms_color_test_cases[] = {
KUNIT_CASE(vkms_color_test_get_lut_index),
KUNIT_CASE(vkms_color_test_lerp),
+   KUNIT_CASE(vkms_color_test_linear),
+   KUNIT_CASE(vkms_color_srgb_inv_srgb),
{}
 };
 
-- 
2.44.0



[RFC PATCH v4 12/42] drm/colorop: Add BYPASS property

2024-02-26 Thread Harry Wentland
We want to be able to bypass each colorop at all times.
Introduce a new BYPASS boolean property for this.

Signed-off-by: Harry Wentland 
---
 drivers/gpu/drm/drm_atomic_uapi.c |  6 +-
 drivers/gpu/drm/drm_colorop.c | 16 
 include/drm/drm_colorop.h | 20 
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_atomic_uapi.c 
b/drivers/gpu/drm/drm_atomic_uapi.c
index fdd540cfe24f..87f00131be11 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -648,7 +648,9 @@ static int drm_atomic_colorop_set_property(struct 
drm_colorop *colorop,
struct drm_colorop_state *state, struct drm_file *file_priv,
struct drm_property *property, uint64_t val)
 {
-   if (property == colorop->curve_1d_type_property) {
+   if (property == colorop->bypass_property) {
+   state->bypass = val;
+   } else if (property == colorop->curve_1d_type_property) {
state->curve_1d_type = val;
} else {
drm_dbg_atomic(colorop->dev,
@@ -668,6 +670,8 @@ drm_atomic_colorop_get_property(struct drm_colorop *colorop,
 {
if (property == colorop->type_property) {
*val = colorop->type;
+   } else if (property == colorop->bypass_property) {
+   *val = state->bypass;
} else if (property == colorop->curve_1d_type_property) {
*val = state->curve_1d_type;
} else {
diff --git a/drivers/gpu/drm/drm_colorop.c b/drivers/gpu/drm/drm_colorop.c
index f4740b6115d1..29979816a2d1 100644
--- a/drivers/gpu/drm/drm_colorop.c
+++ b/drivers/gpu/drm/drm_colorop.c
@@ -80,6 +80,18 @@ int drm_colorop_init(struct drm_device *dev, struct 
drm_colorop *colorop,
   colorop->type_property,
   colorop->type);
 
+   /* bypass */
+   /* TODO can we reuse the mode_config->active_prop? */
+   prop = drm_property_create_bool(dev, DRM_MODE_PROP_ATOMIC,
+   "BYPASS");
+   if (!prop)
+   return -ENOMEM;
+
+   colorop->bypass_property = prop;
+   drm_object_attach_property(>base,
+  colorop->bypass_property,
+  1);
+
return ret;
 }
 EXPORT_SYMBOL(drm_colorop_init);
@@ -123,6 +135,7 @@ int drm_colorop_curve_1d_init(struct drm_device *dev, 
struct drm_colorop *coloro
/* initialize 1D curve only attribute */
prop = drm_property_create_enum(dev, DRM_MODE_PROP_ATOMIC, 
"CURVE_1D_TYPE",
enum_list, len);
+
if (!prop)
return -ENOMEM;
 
@@ -138,6 +151,8 @@ static void 
__drm_atomic_helper_colorop_duplicate_state(struct drm_colorop *colo
struct 
drm_colorop_state *state)
 {
memcpy(state, colorop->state, sizeof(*state));
+
+   state->bypass = true;
 }
 
 struct drm_colorop_state *
@@ -189,6 +204,7 @@ static void __drm_colorop_state_reset(struct 
drm_colorop_state *colorop_state,
  struct drm_colorop *colorop)
 {
colorop_state->colorop = colorop;
+   colorop_state->bypass = true;
 }
 
 /**
diff --git a/include/drm/drm_colorop.h b/include/drm/drm_colorop.h
index 539db900f16f..28aa5c1c309e 100644
--- a/include/drm/drm_colorop.h
+++ b/include/drm/drm_colorop.h
@@ -45,6 +45,14 @@ struct drm_colorop_state {
 
/* colorop properties */
 
+   /**
+* @bypass:
+*
+* True if colorop shall be bypassed. False if colorop is
+* enabled.
+*/
+   bool bypass;
+
/**
 * @curve_1d_type:
 *
@@ -132,6 +140,18 @@ struct drm_colorop {
 */
struct drm_property *type_property;
 
+   /**
+* @bypass_property:
+*
+* Boolean property to control enablement of the color
+* operation. Setting bypass to "true" shall always be supported
+* in order to allow compositors to quickly fall back to
+* alternate methods of color processing. This is important
+* since setting color operations can fail due to unique
+* HW constraints.
+*/
+   struct drm_property *bypass_property;
+
/**
 * @curve_1d_type:
 *
-- 
2.44.0



[RFC PATCH v4 05/42] drm/vkms: Create separate Kconfig file for VKMS

2024-02-26 Thread Harry Wentland
This aligns with most other DRM drivers and will allow
us to add new VKMS config options without polluting
the DRM Kconfig.

v3:
 - Change SPDX to GPL-2.0-only to match DRM KConfig
   SPDX (Simon)

Signed-off-by: Harry Wentland 
Reviewed-by: Simon Ser 
---
 drivers/gpu/drm/Kconfig  | 14 +-
 drivers/gpu/drm/vkms/Kconfig | 15 +++
 2 files changed, 16 insertions(+), 13 deletions(-)
 create mode 100644 drivers/gpu/drm/vkms/Kconfig

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 2520db0b776e..e3ea8793cb8a 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -289,19 +289,7 @@ config DRM_VGEM
  as used by Mesa's software renderer for enhanced performance.
  If M is selected the module will be called vgem.
 
-config DRM_VKMS
-   tristate "Virtual KMS (EXPERIMENTAL)"
-   depends on DRM && MMU
-   select DRM_KMS_HELPER
-   select DRM_GEM_SHMEM_HELPER
-   select CRC32
-   default n
-   help
- Virtual Kernel Mode-Setting (VKMS) is used for testing or for
- running GPU in a headless machines. Choose this option to get
- a VKMS.
-
- If M is selected the module will be called vkms.
+source "drivers/gpu/drm/vkms/Kconfig"
 
 source "drivers/gpu/drm/exynos/Kconfig"
 
diff --git a/drivers/gpu/drm/vkms/Kconfig b/drivers/gpu/drm/vkms/Kconfig
new file mode 100644
index ..b9ecdebecb0b
--- /dev/null
+++ b/drivers/gpu/drm/vkms/Kconfig
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config DRM_VKMS
+   tristate "Virtual KMS (EXPERIMENTAL)"
+   depends on DRM && MMU
+   select DRM_KMS_HELPER
+   select DRM_GEM_SHMEM_HELPER
+   select CRC32
+   default n
+   help
+ Virtual Kernel Mode-Setting (VKMS) is used for testing or for
+ running GPU in a headless machines. Choose this option to get
+ a VKMS.
+
+ If M is selected the module will be called vkms.
-- 
2.44.0



[RFC PATCH v4 07/42] drm/vkms: Avoid reading beyond LUT array

2024-02-26 Thread Harry Wentland
When the floor LUT index (drm_fixp2int(lut_index) is the last
index of the array the ceil LUT index will point to an entry
beyond the array. Make sure we guard against it and use the
value of the floor LUT index.

v3:
 - Drop bits from commit description that didn't contribute
   anything of value

Fixes: db1f254f2cfaf ("drm/vkms: Add support to 1D gamma LUT")
Signed-off-by: Harry Wentland 
Cc: Arthur Grillo 
Reviewed-by: Melissa Wen 
---
 drivers/gpu/drm/vkms/vkms_composer.c | 14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/vkms/vkms_composer.c 
b/drivers/gpu/drm/vkms/vkms_composer.c
index d178f2a400f6..b90e446d5954 100644
--- a/drivers/gpu/drm/vkms/vkms_composer.c
+++ b/drivers/gpu/drm/vkms/vkms_composer.c
@@ -123,6 +123,8 @@ static u16 apply_lut_to_channel_value(const struct 
vkms_color_lut *lut, u16 chan
  enum lut_channel channel)
 {
s64 lut_index = get_lut_index(lut, channel_value);
+   u16 *floor_lut_value, *ceil_lut_value;
+   u16 floor_channel_value, ceil_channel_value;
 
/*
 * This checks if `struct drm_color_lut` has any gap added by the 
compiler
@@ -130,11 +132,15 @@ static u16 apply_lut_to_channel_value(const struct 
vkms_color_lut *lut, u16 chan
 */
static_assert(sizeof(struct drm_color_lut) == sizeof(__u16) * 4);
 
-   u16 *floor_lut_value = (__u16 *)>base[drm_fixp2int(lut_index)];
-   u16 *ceil_lut_value = (__u16 *)>base[drm_fixp2int_ceil(lut_index)];
+   floor_lut_value = (__u16 *)>base[drm_fixp2int(lut_index)];
+   if (drm_fixp2int(lut_index) == (lut->lut_length - 1))
+   /* We're at the end of the LUT array, use same value for ceil 
and floor */
+   ceil_lut_value = floor_lut_value;
+   else
+   ceil_lut_value = (__u16 
*)>base[drm_fixp2int_ceil(lut_index)];
 
-   u16 floor_channel_value = floor_lut_value[channel];
-   u16 ceil_channel_value = ceil_lut_value[channel];
+   floor_channel_value = floor_lut_value[channel];
+   ceil_channel_value = ceil_lut_value[channel];
 
return lerp_u16(floor_channel_value, ceil_channel_value,
lut_index & DRM_FIXED_DECIMAL_MASK);
-- 
2.44.0



[RFC PATCH v4 06/42] drm/vkms: Add kunit tests for VKMS LUT handling

2024-02-26 Thread Harry Wentland
Debugging LUT math is much easier when we can unit test
it. Add kunit functionality to VKMS and add tests for
 - get_lut_index
 - lerp_u16

v4:
 - Test the critical points of the lerp function (Pekka)

v3:
 - Use include way of testing static functions (Arthur)

Signed-off-by: Harry Wentland 
Cc: Arthur Grillo 
---
 drivers/gpu/drm/vkms/Kconfig  |   5 +
 drivers/gpu/drm/vkms/tests/.kunitconfig   |   4 +
 drivers/gpu/drm/vkms/tests/vkms_color_tests.c | 163 ++
 drivers/gpu/drm/vkms/vkms_composer.c  |   8 +-
 4 files changed, 178 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/drm/vkms/tests/.kunitconfig
 create mode 100644 drivers/gpu/drm/vkms/tests/vkms_color_tests.c

diff --git a/drivers/gpu/drm/vkms/Kconfig b/drivers/gpu/drm/vkms/Kconfig
index b9ecdebecb0b..c1f8b343ff0e 100644
--- a/drivers/gpu/drm/vkms/Kconfig
+++ b/drivers/gpu/drm/vkms/Kconfig
@@ -13,3 +13,8 @@ config DRM_VKMS
  a VKMS.
 
  If M is selected the module will be called vkms.
+
+config DRM_VKMS_KUNIT_TESTS
+   tristate "Tests for VKMS" if !KUNIT_ALL_TESTS
+   depends on DRM_VKMS && KUNIT
+   default KUNIT_ALL_TESTS
diff --git a/drivers/gpu/drm/vkms/tests/.kunitconfig 
b/drivers/gpu/drm/vkms/tests/.kunitconfig
new file mode 100644
index ..70e378228cbd
--- /dev/null
+++ b/drivers/gpu/drm/vkms/tests/.kunitconfig
@@ -0,0 +1,4 @@
+CONFIG_KUNIT=y
+CONFIG_DRM=y
+CONFIG_DRM_VKMS=y
+CONFIG_DRM_VKMS_KUNIT_TESTS=y
diff --git a/drivers/gpu/drm/vkms/tests/vkms_color_tests.c 
b/drivers/gpu/drm/vkms/tests/vkms_color_tests.c
new file mode 100644
index ..fc73e48aa57c
--- /dev/null
+++ b/drivers/gpu/drm/vkms/tests/vkms_color_tests.c
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#include 
+
+#include 
+
+#define TEST_LUT_SIZE 16
+
+static struct drm_color_lut test_linear_array[TEST_LUT_SIZE] = {
+   { 0x0, 0x0, 0x0, 0 },
+   { 0x, 0x, 0x, 0 },
+   { 0x, 0x, 0x, 0 },
+   { 0x, 0x, 0x, 0 },
+   { 0x, 0x, 0x, 0 },
+   { 0x, 0x, 0x, 0 },
+   { 0x, 0x, 0x, 0 },
+   { 0x, 0x, 0x, 0 },
+   { 0x, 0x, 0x, 0 },
+   { 0x, 0x, 0x, 0 },
+   { 0x, 0x, 0x, 0 },
+   { 0x, 0x, 0x, 0 },
+   { 0x, 0x, 0x, 0 },
+   { 0x, 0x, 0x, 0 },
+   { 0x, 0x, 0x, 0 },
+   { 0x, 0x, 0x, 0 },
+};
+
+const struct vkms_color_lut test_linear_lut = {
+   .base = test_linear_array,
+   .lut_length = TEST_LUT_SIZE,
+   .channel_value2index_ratio = 0xf000fll
+};
+
+
+static void vkms_color_test_get_lut_index(struct kunit *test)
+{
+   int i;
+
+   KUNIT_EXPECT_EQ(test, drm_fixp2int(get_lut_index(_linear_lut, 
test_linear_array[0].red)), 0);
+
+   for (i = 0; i < TEST_LUT_SIZE; i++)
+   KUNIT_EXPECT_EQ(test, 
drm_fixp2int_ceil(get_lut_index(_linear_lut, test_linear_array[i].red)), 
i);
+}
+
+static void vkms_color_test_lerp(struct kunit *test)
+{
+   /*** half-way round down ***/
+   s64 t = 0x8000 - 1;
+   KUNIT_EXPECT_EQ(test, lerp_u16(0x0, 0x10, t), 0x8);
+
+   /* odd a */
+   KUNIT_EXPECT_EQ(test, lerp_u16(0x1, 0x10, t), 0x8);
+
+   /* odd b */
+   KUNIT_EXPECT_EQ(test, lerp_u16(0x1, 0xf, t), 0x8);
+
+   /* b = a */
+   KUNIT_EXPECT_EQ(test, lerp_u16(0x10, 0x10, t), 0x10);
+
+   /* b = a + 1 */
+   KUNIT_EXPECT_EQ(test, lerp_u16(0x10, 0x11, t), 0x10);
+
+
+   /*** half-way round up ***/
+   t = 0x8000;
+   KUNIT_EXPECT_EQ(test, lerp_u16(0x0, 0x10, t), 0x8);
+
+   /* odd a */
+   KUNIT_EXPECT_EQ(test, lerp_u16(0x1, 0x10, t), 0x9);
+
+   /* odd b */
+   KUNIT_EXPECT_EQ(test, lerp_u16(0x1, 0xf, t), 0x8);
+
+   /* b = a */
+   KUNIT_EXPECT_EQ(test, lerp_u16(0x10, 0x10, t), 0x10);
+
+   /* b = a + 1 */
+   KUNIT_EXPECT_EQ(test, lerp_u16(0x10, 0x11, t), 0x11);
+
+   /*** t = 0.0 ***/
+   t = 0x0;
+   KUNIT_EXPECT_EQ(test, lerp_u16(0x0, 0x10, t), 0x0);
+
+   /* odd a */
+   KUNIT_EXPECT_EQ(test, lerp_u16(0x1, 0x10, t), 0x1);
+
+   /* odd b */
+   KUNIT_EXPECT_EQ(test, lerp_u16(0x1, 0xf, t), 0x1);
+
+   /* b = a */
+   KUNIT_EXPECT_EQ(test, lerp_u16(0x10, 0x10, t), 0x10);
+
+   /* b = a + 1 */
+   KUNIT_EXPECT_EQ(test, lerp_u16(0x10, 0x11, t), 0x10);
+
+   /*** t = 1.0 ***/
+   t = 0x1;
+   KUNIT_EXPECT_EQ(test, lerp_u16(0x0, 0x10, t), 0x10);
+
+   /* odd a */
+   KUNIT_EXPECT_EQ(test, lerp_u16(0x1, 0x10, t), 0x10);
+
+   /* odd b */
+   KUNIT_EXPECT_EQ(test, lerp_u16(0x1, 0xf, t), 0xf);
+
+   /* b = a */
+   KUNIT_EXPECT_EQ(test, lerp_u16(0x10, 0x10, t), 0x10);
+
+   /* b = a + 1 */
+   KUNIT_EXPECT_EQ(test, lerp_u16(0x10, 0x11, t), 0x11);
+
+
+   /*** t = 0.0 + 1 ***/
+   t = 0x0 + 1;
+   

[RFC PATCH v4 01/42] drm: Don't treat 0 as -1 in drm_fixp2int_ceil

2024-02-26 Thread Harry Wentland
Unit testing this in VKMS shows that passing 0 into
this function returns -1, which is highly counter-
intuitive. Fix it by checking whether the input is
>= 0 instead of > 0.

Fixes: 64566b5e767f9 ("drm: Add drm_fixp_from_fraction and drm_fixp2int_ceil")
Signed-off-by: Harry Wentland 
Reviewed-by: Simon Ser 
Reviewed-by: Melissa Wen 
---
 include/drm/drm_fixed.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/drm/drm_fixed.h b/include/drm/drm_fixed.h
index 6ea339d5de08..0c9f917a4d4b 100644
--- a/include/drm/drm_fixed.h
+++ b/include/drm/drm_fixed.h
@@ -95,7 +95,7 @@ static inline int drm_fixp2int_round(s64 a)
 
 static inline int drm_fixp2int_ceil(s64 a)
 {
-   if (a > 0)
+   if (a >= 0)
return drm_fixp2int(a + DRM_FIXED_ALMOST_ONE);
else
return drm_fixp2int(a - DRM_FIXED_ALMOST_ONE);
-- 
2.44.0



[RFC PATCH v4 00/42] Color Pipeline API w/ VKMS

2024-02-26 Thread Harry Wentland
This is an RFC set for a color pipeline API, along with a sample
implementation in VKMS. All the key API bits are here. VKMS now
supports two named transfer function colorops and two matrix
colorops. We have IGT tests that check all four of these colorops
with a pixel-by-pixel comparison that checks that these colorops
do what we expect them to do with a +/- 1 8 bpc code point margin.

The big new change with v4 is the addition of an amdgpu color
pipeline, for all AMD GPUs with DCN 3 and newer. Amdgpu now support
the following:

1. 1D Curve EOTF
2. 3x4 CTM
3. Multiplier
4. 1D Curve Inverse EOTF
5. 1D LUT
6. 1D Curve EOTF
7. 1D LUT

The supported curves for the 1D Curve type are:
- sRGB EOTF and its inverse
- PQ EOTF, scaled to [0.0, 125.0] and its inverse
- BT.2020/BT.709 OETF and its inverse

Note that the 1st and 5th colorops take the EOTF or Inverse
OETF while the 3rd colorop takes the Inverse EOTF or OETF.

We are working on two more ops for amdgpu, the HDR multiplier
and the 3DLUT, which will give us this:

1. 1D Curve EOTF
2. 3x4 CTM
3. HDR Multiplier
4. 1D Curve Inverse EOTF
5. 1D LUT
6. 3D LUT
7. 1D Curve EOTF
8. 1D LUT

This, essentially mirrors the color pipeline used by gamescope
and presented by Melissa Wen, with the exception of the DEGAM
LUT, which is not currently used. See
[1] 
https://indico.freedesktop.org/event/4/contributions/186/attachments/138/218/xdc2023-TheRainbowTreasureMap-MelissaWen.pdf

After this we'd like to also add the following ops:
- Scaler (Informational only)
- Color Encoding, to replace drm_plane's COLOR_ENCODING
- Color Range, to replace drm_plane's COLOR_RANGE

This patchset is grouped as follows:
 - Patches 1-3: couple general patches/fixes
 - Patches 4-7: introduce kunit to VKMS
 - Patch 7: description of motivation and details behind the
Color Pipeline API. If you're reading nothing else
but are interested in the topic I highly recommend
you take a look at this.
 - Patches 7-27: DRM core and VKMS changes for color pipeline API
 - Patches 28-40: DRM core and amdgpu changes for color pipeline API

VKMS patches could still be improved in a few ways, though the
payoff might be limited and I would rather focus on other work
at the moment. The most obvious thing to improve would be to
eliminate the hard-coded LUTs for identity, and sRGB, and replace
them with fixed-point math instead.

There are plenty of things that I would like to see here but
haven't had a chance to look at. These will (hopefully) be
addressed in future iterations, either in VKMS or amdgpu:
 - Clear documentation for each drm_colorop_type
 - Add custom LUT colorops to VKMS
 - Add pre-blending 3DLUT
 - How to support HW which can't bypass entire pipeline?
 - Add ability to create colorops that don't have BYPASS
 - Can we do a LOAD / COMMIT model for LUTs (and other properties)?
 - read-only scaling colorop which defines scaling taps and position
 - read-only color format colorop to define supported color formats
   for a pipeline
 - named matrices, for things like converting YUV to RGB

IGT tests can be found at
https://gitlab.freedesktop.org/hwentland/igt-gpu-tools/-/merge_requests/1

IGT patches are also being sent to the igt-dev mailing list.

If you prefer a gitlab MR for review you can find it at
https://gitlab.freedesktop.org/hwentland/linux/-/merge_requests/5

v4:
 - Add amdgpu color pipeline (WIP)
 - Don't block setting of deprecated properties, instead pass client cap
   to atomic check so drivers can ignore these props
 - Drop IOCTL definitions (Pekka)
 - Use enum property for colorop TYPE (Pekka)
 - A few cleanups to the docs (Pekka)
 - Rework the TYPE enum to name relation to avoid code duplication (Pekka)
 - Add missing function declarations (Chaitanya Kumar Borah)
 - Allow setting of NEXT property to NULL in _set_ function (Chaitanya Kumar 
Borah)
 - Add helper for creation of pipeline drm_plane property (Pekka)
 - Always create Bypass pipeline (Pekka)
 - A bunch of changes to VKMS kunit tests (Pekka)
 - Fix index in CTM doc (Pekka)

v3:
 - Abandon IOCTLs and discover colorops as clients iterate the pipeline
 - Remove need for libdrm
 - Add color_pipeline client cap and make mutually exclusive with
   COLOR_RANGE and COLOR_ENCODING properties
 - add CTM colorop to VKMS
 - Use include way for kunit testing static functions (Arthur)
 - Make TYPE a range property
 - Move enum drm_colorop_type to uapi header
 - and a bunch of smaller bits that are highlighted in the relevant commit
   description

v2:
 - Rebased on drm-misc-next
 - Introduce a VKMS Kunit so we can test LUT functionality in vkms_composer
 - Incorporate feedback in color_pipeline.rst doc
 - Add support for sRGB inverse EOTF
 - Add 2nd enumerated TF colorop to VKMS
 - Fix LUTs and some issues with applying LUTs in VKMS

Cc: Ville Syrjala 
Cc: Pekka Paalanen 
Cc: Simon Ser 
Cc: Harry Wentland 
Cc: Melissa Wen 
Cc: Jonas Ådahl 
Cc: Sebastian Wick 
Cc: Shashank Sharma 
Cc: Alexander Goins 
Cc: 

[RFC PATCH v4 03/42] drm: Correctly round for fixp2int_round

2024-02-26 Thread Harry Wentland
A value of 0x8000 and higher should round up, and
below should round down. VKMS Kunit tests for lerp_u16
showed that this is not the case. Fix it.

1 << (DRM_FIXED_POINT_HALF - 1) =
1 << 15 = 0x8000

This is not 0.5, but 0.0762939453125.

Instead of some smart math use a simple if/else to
round up or down. This helps people like me to understand
what the function does.

Signed-off-by: Harry Wentland 
---
 include/drm/drm_fixed.h | 15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/include/drm/drm_fixed.h b/include/drm/drm_fixed.h
index cb842ba80ddd..8ee549f68537 100644
--- a/include/drm/drm_fixed.h
+++ b/include/drm/drm_fixed.h
@@ -77,6 +77,8 @@ static inline u32 dfixed_div(fixed20_12 A, fixed20_12 B)
 #define DRM_FIXED_DIGITS_MASK  (~DRM_FIXED_DECIMAL_MASK)
 #define DRM_FIXED_EPSILON  1LL
 #define DRM_FIXED_ALMOST_ONE   (DRM_FIXED_ONE - DRM_FIXED_EPSILON)
+#define DRM_FIXED_FRACTIONAL   0xll
+#define DRM_FIXED_HALF 0x8000ll
 
 /**
  * @drm_sm2fixp
@@ -106,11 +108,6 @@ static inline int drm_fixp2int(s64 a)
return ((s64)a) >> DRM_FIXED_POINT;
 }
 
-static inline int drm_fixp2int_round(s64 a)
-{
-   return drm_fixp2int(a + (1 << (DRM_FIXED_POINT_HALF - 1)));
-}
-
 static inline int drm_fixp2int_ceil(s64 a)
 {
if (a >= 0)
@@ -119,6 +116,14 @@ static inline int drm_fixp2int_ceil(s64 a)
return drm_fixp2int(a - DRM_FIXED_ALMOST_ONE);
 }
 
+static inline int drm_fixp2int_round(s64 a)
+{
+   if ((a & DRM_FIXED_FRACTIONAL) < DRM_FIXED_HALF)
+   return drm_fixp2int(a);
+   else
+   return drm_fixp2int_ceil(a);
+}
+
 static inline unsigned drm_fixp_msbset(s64 a)
 {
unsigned shift, sign = (a >> 63) & 1;
-- 
2.44.0



Re: [PATCH 1/3] drm/amdgpu: Refuse to create a KMS FB for non-P2P exported dma-bufs

2024-02-26 Thread Michel Dänzer
On 2024-02-26 17:53, Christian König wrote:
> Am 26.02.24 um 17:50 schrieb Michel Dänzer:
>> On 2024-02-26 17:46, Michel Dänzer wrote:
>>> On 2024-02-26 17:34, Christian König wrote:
>>>
 My question is why has it worked so far? I mean we are not doing this 
 since yesterday and the problem only shows up now?
>>> Yes, Wayland compositors are only starting to try and make use of this now.
>> To expand on this, mutter will want to do something like this as well sooner 
>> or later. I suspect it's the same for others like kwin, sway etc.
> 
> Yeah, but we have done similar things with X decades before. E.g. basically 
> the client sends a BO to the server for displaying it.

The scenario in https://gitlab.freedesktop.org/mesa/mesa/-/issues/10635 is 
direct scanout of a client buffer on a secondary dGPU, while the compositor 
uses the APU as the primary compositing GPU.

AFAIR Xorg has never supported direct scanout of client buffers in this 
scenario. Secondary GPU scanout is always done from a separate local buffer 
allocated by Xorg / the driver.

This is Wayland compositors pushing the envelope.


> Why we suddenly have to juggle with the fact that it is DMA-buf shared with 
> another device?

The problematic case is if the Wayland compositor has to produce a composited 
frame (e.g. due to a notification or other window popping up over the 
fullscreen window), but the client hasn't attached a new buffer to the 
fullscreen surface, so the compositor has to use the contents of the same 
client buffer which is still being scanned out by the dGPU for compositing with 
the APU.


-- 
Earthling Michel Dänzer|  https://redhat.com
Libre software enthusiast  | Mesa and Xwayland developer



Re: [PATCH] drm/amdkfd: Use SQC when TCP would fail in gfx10.1 context save

2024-02-26 Thread Jay Cornwall
On 2/23/2024 16:08, Laurent Morichetti wrote:
> Similarly to gfx9, gfx10.1 drops vector stores when an xnack error is
> raised. To work around this issue, use scalar stores instead of vector
> stores when trapsts.xnack_error == 1.
> 
> Signed-off-by: Laurent Morichetti 

Reviewed-by: Jay Cornwall 



Re: [PATCH v3 3/3] drm/amdgpu: sync page table freeing with tlb flush

2024-02-26 Thread Christian König

Am 26.02.24 um 17:52 schrieb Philip Yang:


On 2024-02-23 08:42, Shashank Sharma wrote:


This patch:
- adds a new list in amdgou_vm to hold the VM PT entries being freed
- waits for the TLB flush using the vm->tlb_flush_fence
- actually frees the PT BOs

V2: rebase
V3: Do not attach the tlb_fence to the entries, rather add the entries
 to a list and delay their freeing (Christian)

Cc: Christian König
Cc: Alex Deucher
Cc: Felix Kuehling
Cc: Rajneesh Bhardwaj
Signed-off-by: Shashank Sharma
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|  6 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|  6 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 51 ---
  3 files changed, 58 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 67c690044b97..eebb73f2c2ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -939,6 +939,10 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
/* Makes sure no PD/PT is freed before the flush */
dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence,
   DMA_RESV_USAGE_BOOKKEEP);
+
+   mutex_lock(>tlb_fence_lock);
+   vm->tlb_fence_last = *fence;
+   mutex_unlock(>tlb_fence_lock);
}
  
  	amdgpu_res_first(pages_addr ? NULL : res, offset,

@@ -2212,6 +2216,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
INIT_LIST_HEAD(>freed);
INIT_LIST_HEAD(>done);
INIT_LIST_HEAD(>pt_freed);
+   INIT_LIST_HEAD(>tlb_flush_waitlist);
INIT_WORK(>pt_free_work, amdgpu_vm_pt_free_work);
INIT_KFIFO(vm->faults);
  
@@ -2244,6 +2249,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,

vm->last_unlocked = dma_fence_get_stub();
vm->generation = 0;
  
+	mutex_init(>tlb_fence_lock);

mutex_init(>eviction_lock);
vm->evicting = false;
vm->tlb_fence_context = dma_fence_context_alloc(1);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 8e6fd25d07b7..77f10ed80973 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -334,6 +334,10 @@ struct amdgpu_vm {
uint64_t*tlb_seq_cpu_addr;
uint64_ttlb_fence_context;
  
+	struct mutex 		tlb_fence_lock;

+   struct dma_fence*tlb_fence_last;
+   struct list_headtlb_flush_waitlist;
+
atomic64_t  kfd_last_flushed_seq;
  
  	/* How many times we had to re-generate the page tables */

@@ -379,6 +383,8 @@ struct amdgpu_vm {
  
  	/* cached fault info */

struct amdgpu_vm_fault_info fault_info;
+
+   int count_bos;
  };
  
  struct amdgpu_vm_manager {

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index 95dc0afdaffb..57ea95c5c085 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -643,13 +643,13 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base 
*entry)
if (!entry->bo)
return;
  
-	entry->bo->vm_bo = NULL;

shadow = amdgpu_bo_shadowed(entry->bo);
if (shadow) {
ttm_bo_set_bulk_move(>tbo, NULL);
amdgpu_bo_unref();
}
ttm_bo_set_bulk_move(>bo->tbo, NULL);
+   entry->bo->vm_bo = NULL;
  
  	spin_lock(>vm->status_lock);

list_del(>vm_status);
@@ -657,6 +657,38 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base 
*entry)
amdgpu_bo_unref(>bo);
  }
  
+static void amdgpu_vm_pt_flush_waitlist(struct amdgpu_vm *vm)

+{
+   struct amdgpu_vm_bo_base *entry, *next;
+   LIST_HEAD(tlb_flush_waitlist);
+
+   if (!vm || list_empty(>tlb_flush_waitlist))
+   return;
+
+   /* Wait for pending TLB flush before freeing PT BOs */
+   mutex_lock(>tlb_fence_lock);
+   if (vm->tlb_fence_last && !dma_fence_is_signaled(vm->tlb_fence_last)) {
+   if (dma_fence_wait_timeout(vm->tlb_fence_last, false,
+  MAX_SCHEDULE_TIMEOUT) <= 0) {
+   DRM_ERROR("Timedout waiting for TLB flush, not freeing PT 
BOs\n");
+   mutex_unlock(>tlb_fence_lock);
+   return;
+   }
+
+   vm->tlb_fence_last = NULL;
+   }
+
+   /* Save the waitlist locally and reset the flushlist */
+   list_splice_init(>tlb_flush_waitlist, _flush_waitlist);
+   mutex_unlock(>tlb_fence_lock);
+
+   /* Now free the entries */
+   list_for_each_entry_safe(entry, next, _flush_waitlist, vm_status) {
+   if (entry)
+   amdgpu_vm_pt_free(entry);
+   }
+}
+
  void amdgpu_vm_pt_free_work(struct work_struct 

Re: [PATCH 1/3] drm/amdgpu: Refuse to create a KMS FB for non-P2P exported dma-bufs

2024-02-26 Thread Christian König

Am 26.02.24 um 17:50 schrieb Michel Dänzer:

On 2024-02-26 17:46, Michel Dänzer wrote:

On 2024-02-26 17:34, Christian König wrote:


My question is why has it worked so far? I mean we are not doing this since 
yesterday and the problem only shows up now?

Yes, Wayland compositors are only starting to try and make use of this now.

To expand on this, mutter will want to do something like this as well sooner or 
later. I suspect it's the same for others like kwin, sway etc.


Yeah, but we have done similar things with X decades before. E.g. 
basically the client sends a BO to the server for displaying it.


Why we suddenly have to juggle with the fact that it is DMA-buf shared 
with another device? This has worked for at least a decade before.


Regards,
Christian.


Re: [PATCH v3 3/3] drm/amdgpu: sync page table freeing with tlb flush

2024-02-26 Thread Philip Yang

  


On 2024-02-23 08:42, Shashank Sharma
  wrote:


  This patch:
- adds a new list in amdgou_vm to hold the VM PT entries being freed
- waits for the TLB flush using the vm->tlb_flush_fence
- actually frees the PT BOs

V2: rebase
V3: Do not attach the tlb_fence to the entries, rather add the entries
to a list and delay their freeing (Christian)

Cc: Christian König 
Cc: Alex Deucher 
Cc: Felix Kuehling 
Cc: Rajneesh Bhardwaj 
Signed-off-by: Shashank Sharma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|  6 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|  6 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 51 ---
 3 files changed, 58 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 67c690044b97..eebb73f2c2ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -939,6 +939,10 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		/* Makes sure no PD/PT is freed before the flush */
 		dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence,
    DMA_RESV_USAGE_BOOKKEEP);
+
+		mutex_lock(>tlb_fence_lock);
+		vm->tlb_fence_last = *fence;
+		mutex_unlock(>tlb_fence_lock);
 	}
 
 	amdgpu_res_first(pages_addr ? NULL : res, offset,
@@ -2212,6 +2216,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	INIT_LIST_HEAD(>freed);
 	INIT_LIST_HEAD(>done);
 	INIT_LIST_HEAD(>pt_freed);
+	INIT_LIST_HEAD(>tlb_flush_waitlist);
 	INIT_WORK(>pt_free_work, amdgpu_vm_pt_free_work);
 	INIT_KFIFO(vm->faults);
 
@@ -2244,6 +2249,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	vm->last_unlocked = dma_fence_get_stub();
 	vm->generation = 0;
 
+	mutex_init(>tlb_fence_lock);
 	mutex_init(>eviction_lock);
 	vm->evicting = false;
 	vm->tlb_fence_context = dma_fence_context_alloc(1);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 8e6fd25d07b7..77f10ed80973 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -334,6 +334,10 @@ struct amdgpu_vm {
 	uint64_t		*tlb_seq_cpu_addr;
 	uint64_t		tlb_fence_context;
 
+	struct mutex 		tlb_fence_lock;
+	struct dma_fence	*tlb_fence_last;
+	struct list_head	tlb_flush_waitlist;
+
 	atomic64_t		kfd_last_flushed_seq;
 
 	/* How many times we had to re-generate the page tables */
@@ -379,6 +383,8 @@ struct amdgpu_vm {
 
 	/* cached fault info */
 	struct amdgpu_vm_fault_info fault_info;
+
+	int count_bos;
 };
 
 struct amdgpu_vm_manager {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index 95dc0afdaffb..57ea95c5c085 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -643,13 +643,13 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
 	if (!entry->bo)
 		return;
 
-	entry->bo->vm_bo = NULL;
 	shadow = amdgpu_bo_shadowed(entry->bo);
 	if (shadow) {
 		ttm_bo_set_bulk_move(>tbo, NULL);
 		amdgpu_bo_unref();
 	}
 	ttm_bo_set_bulk_move(>bo->tbo, NULL);
+	entry->bo->vm_bo = NULL;
 
 	spin_lock(>vm->status_lock);
 	list_del(>vm_status);
@@ -657,6 +657,38 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
 	amdgpu_bo_unref(>bo);
 }
 
+static void amdgpu_vm_pt_flush_waitlist(struct amdgpu_vm *vm)
+{
+	struct amdgpu_vm_bo_base *entry, *next;
+	LIST_HEAD(tlb_flush_waitlist);
+
+	if (!vm || list_empty(>tlb_flush_waitlist))
+		return;
+
+	/* Wait for pending TLB flush before freeing PT BOs */
+	mutex_lock(>tlb_fence_lock);
+	if (vm->tlb_fence_last && !dma_fence_is_signaled(vm->tlb_fence_last)) {
+		if (dma_fence_wait_timeout(vm->tlb_fence_last, false,
+	   MAX_SCHEDULE_TIMEOUT) <= 0) {
+			DRM_ERROR("Timedout waiting for TLB flush, not freeing PT BOs\n");
+			mutex_unlock(>tlb_fence_lock);
+			return;
+		}
+
+		vm->tlb_fence_last = NULL;
+	}
+
+	/* Save the waitlist locally and reset the flushlist */
+	list_splice_init(>tlb_flush_waitlist, _flush_waitlist);
+	mutex_unlock(>tlb_fence_lock);
+
+	/* Now free the entries */
+	list_for_each_entry_safe(entry, next, _flush_waitlist, vm_status) {
+		if (entry)
+			amdgpu_vm_pt_free(entry);
+	}
+}
+
 void amdgpu_vm_pt_free_work(struct work_struct *work)
 {
 	struct amdgpu_vm_bo_base *entry, *next;
@@ -673,7 +705,7 @@ void amdgpu_vm_pt_free_work(struct work_struct *work)
 	amdgpu_bo_reserve(vm->root.bo, true);
 
 	list_for_each_entry_safe(entry, next, _freed, vm_status)
-		amdgpu_vm_pt_free(entry);
+		list_move(>vm_status, >tlb_flush_waitlist);
 
 	amdgpu_bo_unreserve(vm->root.bo);
 }
@@ -708,11 +740,17 @@ static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
 		return;
 	}
 
-	for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
-		amdgpu_vm_pt_free(entry);
+	mutex_lock(>tlb_fence_lock);
+
+	for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) {
+		if 

Re: [PATCH 1/3] drm/amdgpu: Refuse to create a KMS FB for non-P2P exported dma-bufs

2024-02-26 Thread Michel Dänzer
On 2024-02-26 17:46, Michel Dänzer wrote:
> On 2024-02-26 17:34, Christian König wrote:
> 
>> My question is why has it worked so far? I mean we are not doing this since 
>> yesterday and the problem only shows up now?
> 
> Yes, Wayland compositors are only starting to try and make use of this now.

To expand on this, mutter will want to do something like this as well sooner or 
later. I suspect it's the same for others like kwin, sway etc.


-- 
Earthling Michel Dänzer|  https://redhat.com
Libre software enthusiast  | Mesa and Xwayland developer



Re: [PATCH 1/3] drm/amdgpu: Refuse to create a KMS FB for non-P2P exported dma-bufs

2024-02-26 Thread Michel Dänzer
On 2024-02-26 17:34, Christian König wrote:
> Am 26.02.24 um 17:27 schrieb Michel Dänzer:
>> On 2024-02-26 16:58, Christian König wrote:
>>> Am 23.02.24 um 17:43 schrieb Michel Dänzer:
 On 2024-02-23 11:04, Michel Dänzer wrote:
> On 2024-02-23 10:34, Christian König wrote:
>> Am 23.02.24 um 09:11 schrieb Michel Dänzer:
>>> On 2024-02-23 08:06, Christian König wrote:
 Am 22.02.24 um 18:28 schrieb Michel Dänzer:
> From: Michel Dänzer 
>
> Pinning the BO storage to VRAM for scanout would make it inaccessible
> to non-P2P dma-buf importers.
 Thinking more about it I don't think we can do this.

 Using the BO in a ping/pong fashion for scanout and DMA-buf is 
 actually valid, you just can't do both at the same time.

 And if I'm not completely mistaken we actually have use cases for this 
 at the moment,
>>> Those use cases don't have P2P & CONFIG_DMABUF_MOVE_NOTIFY?
>> Nope, we are basically talking about unit tests and examples for inter 
>> device operations.
> Sounds like the "no user-space regressions" rule might not apply then.
 To clarify what I mean by that:

 "We can't fix this issue, because it would break some unit tests and 
 examples" is similar to saying "We can't fix this KMS bug, because there 
 are IGT tests expecting the buggy behaviour". In practice, the latter do 
 get fixed, along with the IGT tests.
>>> The problem here is that this is not a bug, but intentional behavior. 
>>> Exporting BOs and using them in scanout in a ping/pong fashion is perfectly 
>>> valid.
>> The problem with the status quo is that it requires amdgpu-specific 
>> knowledge and worst-case pessimization in user space.
> 
> Yeah, I'm perfectly aware of that. But that approach here really doesn't 
> seems to be valid.

It's the only known sane approach at this point. There's no other proposal 
which allows using both BO sharing and scanout without pessimizing or hitting 
seemingly random CS ioctl failures.


>>> We have use cases which depend on this behavior and I'm not going to break 
>>> those to fix your use case.
>> It's not "my" use case, it's a Wayland compositor trying to make use of BO 
>> sharing and scanout without always pessimizing for the worst case.
>>
>> That's surely more of a real-world use case than unit tests and examples.
> 
> I've looked a bit deeper into this and we have told customers for the last 
> ~10 years or so that this is how it is supposed to work and that they can use 
> this approach.
> 
> So this is much more than unit tests and examples, we are changing existing 
> behavior which is clearly not a bug but intentional and have a very high 
> chance to break valid use cases.

"Very high chance" sounds like you still don't know of any actual real-world 
use case relying on it though?


> My question is why has it worked so far? I mean we are not doing this since 
> yesterday and the problem only shows up now?

Yes, Wayland compositors are only starting to try and make use of this now.


-- 
Earthling Michel Dänzer|  https://redhat.com
Libre software enthusiast  | Mesa and Xwayland developer



Re: [PATCH 1/3] drm/amdgpu: Refuse to create a KMS FB for non-P2P exported dma-bufs

2024-02-26 Thread Christian König

Am 26.02.24 um 17:27 schrieb Michel Dänzer:

On 2024-02-26 16:58, Christian König wrote:

Am 23.02.24 um 17:43 schrieb Michel Dänzer:

On 2024-02-23 11:04, Michel Dänzer wrote:

On 2024-02-23 10:34, Christian König wrote:

Am 23.02.24 um 09:11 schrieb Michel Dänzer:

On 2024-02-23 08:06, Christian König wrote:

Am 22.02.24 um 18:28 schrieb Michel Dänzer:

From: Michel Dänzer

Pinning the BO storage to VRAM for scanout would make it inaccessible
to non-P2P dma-buf importers.

Thinking more about it I don't think we can do this.

Using the BO in a ping/pong fashion for scanout and DMA-buf is actually valid, 
you just can't do both at the same time.

And if I'm not completely mistaken we actually have use cases for this at the 
moment,

Those use cases don't have P2P & CONFIG_DMABUF_MOVE_NOTIFY?

Nope, we are basically talking about unit tests and examples for inter device 
operations.

Sounds like the "no user-space regressions" rule might not apply then.

To clarify what I mean by that:

"We can't fix this issue, because it would break some unit tests and examples" is similar 
to saying "We can't fix this KMS bug, because there are IGT tests expecting the buggy 
behaviour". In practice, the latter do get fixed, along with the IGT tests.

The problem here is that this is not a bug, but intentional behavior. Exporting 
BOs and using them in scanout in a ping/pong fashion is perfectly valid.

The problem with the status quo is that it requires amdgpu-specific knowledge 
and worst-case pessimization in user space.


Yeah, I'm perfectly aware of that. But that approach here really doesn't 
seems to be valid.



We have use cases which depend on this behavior and I'm not going to break 
those to fix your use case.

It's not "my" use case, it's a Wayland compositor trying to make use of BO 
sharing and scanout without always pessimizing for the worst case.

That's surely more of a real-world use case than unit tests and examples.


I've looked a bit deeper into this and we have told customers for the 
last ~10 years or so that this is how it is supposed to work and that 
they can use this approach.


So this is much more than unit tests and examples, we are changing 
existing behavior which is clearly not a bug but intentional and have a 
very high chance to break valid use cases.


My question is why has it worked so far? I mean we are not doing this 
since yesterday and the problem only shows up now?


While I see the use case something is still missing in that picture.

Christian.

Re: [PATCH 1/3] drm/amdgpu: Refuse to create a KMS FB for non-P2P exported dma-bufs

2024-02-26 Thread Michel Dänzer
On 2024-02-26 16:58, Christian König wrote:
> Am 23.02.24 um 17:43 schrieb Michel Dänzer:
>> On 2024-02-23 11:04, Michel Dänzer wrote:
>>> On 2024-02-23 10:34, Christian König wrote:
 Am 23.02.24 um 09:11 schrieb Michel Dänzer:
> On 2024-02-23 08:06, Christian König wrote:
>> Am 22.02.24 um 18:28 schrieb Michel Dänzer:
>>> From: Michel Dänzer 
>>>
>>> Pinning the BO storage to VRAM for scanout would make it inaccessible
>>> to non-P2P dma-buf importers.
>> Thinking more about it I don't think we can do this.
>>
>> Using the BO in a ping/pong fashion for scanout and DMA-buf is actually 
>> valid, you just can't do both at the same time.
>>
>> And if I'm not completely mistaken we actually have use cases for this 
>> at the moment,
> Those use cases don't have P2P & CONFIG_DMABUF_MOVE_NOTIFY?
 Nope, we are basically talking about unit tests and examples for inter 
 device operations.
>>> Sounds like the "no user-space regressions" rule might not apply then.
>> To clarify what I mean by that:
>>
>> "We can't fix this issue, because it would break some unit tests and 
>> examples" is similar to saying "We can't fix this KMS bug, because there are 
>> IGT tests expecting the buggy behaviour". In practice, the latter do get 
>> fixed, along with the IGT tests.
> 
> The problem here is that this is not a bug, but intentional behavior. 
> Exporting BOs and using them in scanout in a ping/pong fashion is perfectly 
> valid.

The problem with the status quo is that it requires amdgpu-specific knowledge 
and worst-case pessimization in user space.


> We have use cases which depend on this behavior and I'm not going to break 
> those to fix your use case.

It's not "my" use case, it's a Wayland compositor trying to make use of BO 
sharing and scanout without always pessimizing for the worst case.

That's surely more of a real-world use case than unit tests and examples.


-- 
Earthling Michel Dänzer|  https://redhat.com
Libre software enthusiast  | Mesa and Xwayland developer



Re: [PATCH v3 1/3] drm/amdgpu: replace TLB seq callback with HW seq

2024-02-26 Thread Sharma, Shashank
[AMD Official Use Only - General]

Please feel free to use:
Reviewed-by: Shashank Sharma 

Regards
Shashank

From: Christian König 
Sent: Monday, February 26, 2024 3:45 PM
To: Sharma, Shashank ; amd-gfx@lists.freedesktop.org 

Cc: Koenig, Christian ; Deucher, Alexander 
; Kuehling, Felix ; 
Bhardwaj, Rajneesh 
Subject: Re: [PATCH v3 1/3] drm/amdgpu: replace TLB seq callback with HW seq

Am 23.02.24 um 14:42 schrieb Shashank Sharma:
> From: Christian König 
>
> The callback we installed for the SDMA update were actually pretty
> horrible. since we now have seq64 use that one and HW seq writes
> instead.
>
> V2:(Shashank)
>   - rebased on amd-drm-staging-next
>   - changed amdgpu_seq64_gpu_addr
>
> Cc: Christian König 
> Cc: Alex Deucher 
> Cc: Felix Kuehling 
> Cc: Rajneesh Bhardwaj 
> Signed-off-by: Christian König 

Shashank can I get an rb on this patch here so that I can push it to
amd-staging-drm-next?

The patch was basically just to double check if the seq64 code works as
intended.

Thanks,
Christian.

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c   | 14 
>   drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h   |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 79 -
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  | 27 ++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c  |  3 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c   |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c |  5 ++
>   7 files changed, 42 insertions(+), 89 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
> index 3d0d56087d41..300dc79fa2b9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
> @@ -199,6 +199,20 @@ void amdgpu_seq64_free(struct amdgpu_device *adev, u64 
> va)
>__clear_bit(bit_pos, adev->seq64.used);
>   }
>
> +/**
> + * amdgpu_seq64_gpu_addr - Calculate GPU addr from va
> + *
> + * @adev: amdgpu_device pointer
> + * @va: virtual address in client address space
> + *
> + * Calculate the GART address for a VA.
> + */
> +u64 amdgpu_seq64_gpu_addr(struct amdgpu_device *adev, u64 va)
> +{
> + return va - amdgpu_seq64_get_va_base(adev) +
> + amdgpu_bo_gpu_offset(adev->seq64.sbo);
> +}
> +
>   /**
>* amdgpu_seq64_fini - Cleanup seq64 driver
>*
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h
> index 4203b2ab318d..63e8ac0a2057 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h
> @@ -43,6 +43,7 @@ void amdgpu_seq64_free(struct amdgpu_device *adev, u64 
> gpu_addr);
>   int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> struct amdgpu_bo_va **bo_va);
>   void amdgpu_seq64_unmap(struct amdgpu_device *adev, struct amdgpu_fpriv 
> *fpriv);
> +u64 amdgpu_seq64_gpu_addr(struct amdgpu_device *adev, u64 va);
>
>   #endif
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index ed4a8c5d26d7..0960e0a665d3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -111,21 +111,6 @@ struct amdgpu_prt_cb {
>struct dma_fence_cb cb;
>   };
>
> -/**
> - * struct amdgpu_vm_tlb_seq_struct - Helper to increment the TLB flush 
> sequence
> - */
> -struct amdgpu_vm_tlb_seq_struct {
> - /**
> -  * @vm: pointer to the amdgpu_vm structure to set the fence sequence on
> -  */
> - struct amdgpu_vm *vm;
> -
> - /**
> -  * @cb: callback
> -  */
> - struct dma_fence_cb cb;
> -};
> -
>   /**
>* amdgpu_vm_set_pasid - manage pasid and vm ptr mapping
>*
> @@ -862,23 +847,6 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
>return r;
>   }
>
> -/**
> - * amdgpu_vm_tlb_seq_cb - make sure to increment tlb sequence
> - * @fence: unused
> - * @cb: the callback structure
> - *
> - * Increments the tlb sequence to make sure that future CS execute a VM 
> flush.
> - */
> -static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
> -  struct dma_fence_cb *cb)
> -{
> - struct amdgpu_vm_tlb_seq_struct *tlb_cb;
> -
> - tlb_cb = container_of(cb, typeof(*tlb_cb), cb);
> - atomic64_inc(_cb->vm->tlb_seq);
> - kfree(tlb_cb);
> -}
> -
>   /**
>* amdgpu_vm_update_range - update a range in the vm page table
>*
> @@ -911,7 +879,6 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, 
> struct amdgpu_vm *vm,
>   struct dma_fence **fence)
>   {
>struct amdgpu_vm_update_params params;
> - struct amdgpu_vm_tlb_seq_struct *tlb_cb;
>struct amdgpu_res_cursor cursor;
>enum amdgpu_sync_mode sync_mode;
>int r, idx;
> @@ -919,12 +886,6 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, 
> struct amdgpu_vm *vm,
>if 

Re: [PATCH] drm/amdgpu: disable ring_muxer if mcbp is off

2024-02-26 Thread Christian König

Am 23.02.24 um 17:30 schrieb Alex Deucher:

On Fri, Feb 23, 2024 at 4:48 AM Pierre-Eric Pelloux-Prayer
 wrote:

Using the ring_muxer without preemption adds overhead for no
reason since mcbp cannot be triggered.

Moving back to a single queue in this case also helps when
high priority app are used: in this case the gpu_scheduler
priority handling will work as expected - much better than
ring_muxer with its 2 independant schedulers competing for
the same hardware queue.

This change requires moving amdgpu_device_set_mcbp above
amdgpu_device_ip_early_init because we use adev->gfx.mcbp.

Signed-off-by: Pierre-Eric Pelloux-Prayer 

Reviewed-by: Alex Deucher 


Acked-by: Christian König 




---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  4 ++--
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 21 -
  2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index d534e192e260..40516d24026c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4054,13 +4054,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 return r;
 }

+   amdgpu_device_set_mcbp(adev);
+
 /* early init functions */
 r = amdgpu_device_ip_early_init(adev);
 if (r)
 return r;

-   amdgpu_device_set_mcbp(adev);
-
 /* Get rid of things like offb */
 r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, 
_kms_driver);
 if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 169d45268ef6..f682f830f7f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2080,7 +2080,7 @@ static int gfx_v9_0_sw_init(void *handle)
 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;

 /* disable scheduler on the real ring */
-   ring->no_scheduler = true;
+   ring->no_scheduler = adev->gfx.mcbp;
 ring->vm_hub = AMDGPU_GFXHUB(0);
 r = amdgpu_ring_init(adev, ring, 1024, >gfx.eop_irq,
  AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
@@ -2090,7 +2090,7 @@ static int gfx_v9_0_sw_init(void *handle)
 }

 /* set up the software rings */
-   if (adev->gfx.num_gfx_rings) {
+   if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
 ring = >gfx.sw_gfx_ring[i];
 ring->ring_obj = NULL;
@@ -2181,7 +2181,7 @@ static int gfx_v9_0_sw_fini(void *handle)
 int i;
 struct amdgpu_device *adev = (struct amdgpu_device *)handle;

-   if (adev->gfx.num_gfx_rings) {
+   if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
 amdgpu_ring_fini(>gfx.sw_gfx_ring[i]);
 amdgpu_ring_mux_fini(>gfx.muxer);
@@ -5910,11 +5910,14 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,

 switch (me_id) {
 case 0:
-   if (adev->gfx.num_gfx_rings &&
-   !amdgpu_mcbp_handle_trailing_fence_irq(>gfx.muxer)) {
-   /* Fence signals are handled on the software rings*/
-   for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
-   amdgpu_fence_process(>gfx.sw_gfx_ring[i]);
+   if (adev->gfx.num_gfx_rings) {
+   if (!adev->gfx.mcbp) {
+   amdgpu_fence_process(>gfx.gfx_ring[0]);
+   } else if 
(!amdgpu_mcbp_handle_trailing_fence_irq(>gfx.muxer)) {
+   /* Fence signals are handled on the software 
rings*/
+   for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
+   
amdgpu_fence_process(>gfx.sw_gfx_ring[i]);
+   }
 }
 break;
 case 1:
@@ -7051,7 +7054,7 @@ static void gfx_v9_0_set_ring_funcs(struct amdgpu_device 
*adev)
 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
 adev->gfx.gfx_ring[i].funcs = _v9_0_ring_funcs_gfx;

-   if (adev->gfx.num_gfx_rings) {
+   if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
 adev->gfx.sw_gfx_ring[i].funcs = 
_v9_0_sw_ring_funcs_gfx;
 }
--
2.40.1





Re: [PATCH 1/3] drm/amdgpu: Refuse to create a KMS FB for non-P2P exported dma-bufs

2024-02-26 Thread Christian König

Am 23.02.24 um 17:43 schrieb Michel Dänzer:

On 2024-02-23 11:04, Michel Dänzer wrote:

On 2024-02-23 10:34, Christian König wrote:

Am 23.02.24 um 09:11 schrieb Michel Dänzer:

On 2024-02-23 08:06, Christian König wrote:

Am 22.02.24 um 18:28 schrieb Michel Dänzer:

From: Michel Dänzer 

Pinning the BO storage to VRAM for scanout would make it inaccessible
to non-P2P dma-buf importers.

Thinking more about it I don't think we can do this.

Using the BO in a ping/pong fashion for scanout and DMA-buf is actually valid, 
you just can't do both at the same time.

And if I'm not completely mistaken we actually have use cases for this at the 
moment,

Those use cases don't have P2P & CONFIG_DMABUF_MOVE_NOTIFY?

Nope, we are basically talking about unit tests and examples for inter device 
operations.

Sounds like the "no user-space regressions" rule might not apply then.

To clarify what I mean by that:

"We can't fix this issue, because it would break some unit tests and examples" is similar 
to saying "We can't fix this KMS bug, because there are IGT tests expecting the buggy 
behaviour". In practice, the latter do get fixed, along with the IGT tests.


The problem here is that this is not a bug, but intentional behavior. 
Exporting BOs and using them in scanout in a ping/pong fashion is 
perfectly valid.


We have use cases which depend on this behavior and I'm not going to 
break those to fix your use case.


So as far as I can see this approach here is a no-go.

Regards,
Christian.



Re: [PATCH v3 2/3] drm/amdgpu: implement TLB flush fence

2024-02-26 Thread Philip Yang

  


On 2024-02-23 11:58, Philip Yang wrote:


  
  
  
  On 2024-02-23 08:42, Shashank Sharma
wrote:
  
  
From: Christian König 

The problem is that when (for example) 4k pages are replaced
with a single 2M page we need to wait for change to be flushed
out by invalidating the TLB before the PT can be freed.

Solve this by moving the TLB flush into a DMA-fence object which
can be used to delay the freeing of the PT BOs until it is signaled.

V2: (Shashank)
- rebase
- set dma_fence_error only in case of error
- add tlb_flush fence only when PT/PD BO is locked (Felix)
- use vm->pasid when f is NULL (Mukul)

Cc: Christian Koenig 
Cc: Felix Kuehling 
Cc: Rajneesh Bhardwaj 
Cc: Alex Deucher 
Signed-off-by: Christian König 
Signed-off-by: Shashank Sharma 
---
 drivers/gpu/drm/amd/amdgpu/Makefile   |   3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|  10 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|   4 +
 .../gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c  | 106 ++
 4 files changed, 122 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 4c989da4d2f3..fdbb3d770c7b 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -70,7 +70,8 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \
 	amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o \
 	atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
 	atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
-	amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_ib.o amdgpu_pll.o \
+	amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_vm_tlb_fence.o \
+	amdgpu_ib.o amdgpu_pll.o \
 	amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
 	amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o \
 	amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 0960e0a665d3..67c690044b97 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -932,6 +932,15 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	if (r)
 		goto error_unlock;
 
+	/* Prepare a TLB flush fence to be attached to PTs */
+	if (!unlocked && params.needs_flush && vm->is_compute_context) {
+		amdgpu_vm_tlb_fence_create(adev, vm, fence);
+
+		/* Makes sure no PD/PT is freed before the flush */
+		dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence,
+   DMA_RESV_USAGE_BOOKKEEP);
+	}
+
 	amdgpu_res_first(pages_addr ? NULL : res, offset,
 			 (last - start + 1) * AMDGPU_GPU_PAGE_SIZE, );
 	while (cursor.remaining) {
@@ -2237,6 +2246,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 
 	mutex_init(>eviction_lock);
 	vm->evicting = false;
+	vm->tlb_fence_context = dma_fence_context_alloc(1);
 
 	r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level,
 false, , xcp_id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index ac9380afcb69..8e6fd25d07b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -332,6 +332,7 @@ struct amdgpu_vm {
 	atomic64_t		tlb_seq;
 	uint64_t		tlb_seq_va;
 	uint64_t		*tlb_seq_cpu_addr;
+	uint64_t		tlb_fence_context;
 
 	atomic64_t		kfd_last_flushed_seq;
 
@@ -585,5 +586,8 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
   uint64_t addr,
   uint32_t status,
   unsigned int vmhub);
+void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct dma_fence **fence);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c
new file mode 100644
index ..569681badd7c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO 

Re: [PATCH v3 2/3] drm/amdgpu: implement TLB flush fence

2024-02-26 Thread Christian König




Am 23.02.24 um 17:58 schrieb Philip Yang:



On 2024-02-23 08:42, Shashank Sharma wrote:

From: Christian König

The problem is that when (for example) 4k pages are replaced
with a single 2M page we need to wait for change to be flushed
out by invalidating the TLB before the PT can be freed.

Solve this by moving the TLB flush into a DMA-fence object which
can be used to delay the freeing of the PT BOs until it is signaled.

V2: (Shashank)
 - rebase
 - set dma_fence_error only in case of error
 - add tlb_flush fence only when PT/PD BO is locked (Felix)
 - use vm->pasid when f is NULL (Mukul)

Cc: Christian Koenig
Cc: Felix Kuehling
Cc: Rajneesh Bhardwaj
Cc: Alex Deucher
Signed-off-by: Christian König
Signed-off-by: Shashank Sharma
---
  drivers/gpu/drm/amd/amdgpu/Makefile   |   3 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|  10 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|   4 +
  .../gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c  | 106 ++
  4 files changed, 122 insertions(+), 1 deletion(-)
  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 4c989da4d2f3..fdbb3d770c7b 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -70,7 +70,8 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o 
amdgpu_kms.o \
amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o \
atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
-   amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_ib.o amdgpu_pll.o \
+   amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_vm_tlb_fence.o \
+   amdgpu_ib.o amdgpu_pll.o \
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o \
amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 0960e0a665d3..67c690044b97 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -932,6 +932,15 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
if (r)
goto error_unlock;
  
+	/* Prepare a TLB flush fence to be attached to PTs */

+   if (!unlocked && params.needs_flush && vm->is_compute_context) {
+   amdgpu_vm_tlb_fence_create(adev, vm, fence);
+
+   /* Makes sure no PD/PT is freed before the flush */
+   dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence,
+  DMA_RESV_USAGE_BOOKKEEP);
+   }
+
amdgpu_res_first(pages_addr ? NULL : res, offset,
 (last - start + 1) * AMDGPU_GPU_PAGE_SIZE, );
while (cursor.remaining) {
@@ -2237,6 +2246,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
  
  	mutex_init(>eviction_lock);

vm->evicting = false;
+   vm->tlb_fence_context = dma_fence_context_alloc(1);
  
  	r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level,

false, , xcp_id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index ac9380afcb69..8e6fd25d07b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -332,6 +332,7 @@ struct amdgpu_vm {
atomic64_t  tlb_seq;
uint64_ttlb_seq_va;
uint64_t*tlb_seq_cpu_addr;
+   uint64_ttlb_fence_context;
  
  	atomic64_t		kfd_last_flushed_seq;
  
@@ -585,5 +586,8 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,

  uint64_t addr,
  uint32_t status,
  unsigned int vmhub);
+void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev,
+struct amdgpu_vm *vm,
+struct dma_fence **fence);
  
  #endif

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c
new file mode 100644
index ..569681badd7c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the 

Re: [PATCH] drm/amdgpu: Fix missing break in ATOM_ARG_IMM Case of atom_get_src_int()

2024-02-26 Thread Deucher, Alexander
[Public]

Reviewed-by: Alex Deucher 

From: SHANMUGAM, SRINIVASAN 
Sent: Saturday, February 24, 2024 1:38 AM
To: Koenig, Christian ; Deucher, Alexander 

Cc: amd-gfx@lists.freedesktop.org ; SHANMUGAM, 
SRINIVASAN ; Jammy Zhou 
Subject: [PATCH] drm/amdgpu: Fix missing break in ATOM_ARG_IMM Case of 
atom_get_src_int()

Missing break statement in the ATOM_ARG_IMM case of a switch statement,
adds the missing break statement, ensuring that the program's control
flow is as intended.

Fixes the below:
drivers/gpu/drm/amd/amdgpu/atom.c:323 atom_get_src_int() warn: ignoring 
unreachable code.

Fixes: d38ceaf99ed0 ("drm/amdgpu: add core driver (v4)")
Cc: Jammy Zhou 
Cc: Christian König 
Cc: Alex Deucher 
Signed-off-by: Srinivasan Shanmugam 
---
 drivers/gpu/drm/amd/amdgpu/atom.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c 
b/drivers/gpu/drm/amd/amdgpu/atom.c
index b888613f653f..72362df352f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/atom.c
+++ b/drivers/gpu/drm/amd/amdgpu/atom.c
@@ -320,7 +320,7 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, 
uint8_t attr,
 DEBUG("IMM 0x%02X\n", val);
 return val;
 }
-   return 0;
+   break;
 case ATOM_ARG_PLL:
 idx = U8(*ptr);
 (*ptr)++;
--
2.34.1



Re: [PATCH v3 1/3] drm/amdgpu: replace TLB seq callback with HW seq

2024-02-26 Thread Christian König

Am 23.02.24 um 14:42 schrieb Shashank Sharma:

From: Christian König 

The callback we installed for the SDMA update were actually pretty
horrible. since we now have seq64 use that one and HW seq writes
instead.

V2:(Shashank)
  - rebased on amd-drm-staging-next
  - changed amdgpu_seq64_gpu_addr

Cc: Christian König 
Cc: Alex Deucher 
Cc: Felix Kuehling 
Cc: Rajneesh Bhardwaj 
Signed-off-by: Christian König 


Shashank can I get an rb on this patch here so that I can push it to 
amd-staging-drm-next?


The patch was basically just to double check if the seq64 code works as 
intended.


Thanks,
Christian.


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c   | 14 
  drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h   |  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 79 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  | 27 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c  |  3 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c   |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c |  5 ++
  7 files changed, 42 insertions(+), 89 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
index 3d0d56087d41..300dc79fa2b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
@@ -199,6 +199,20 @@ void amdgpu_seq64_free(struct amdgpu_device *adev, u64 va)
__clear_bit(bit_pos, adev->seq64.used);
  }
  
+/**

+ * amdgpu_seq64_gpu_addr - Calculate GPU addr from va
+ *
+ * @adev: amdgpu_device pointer
+ * @va: virtual address in client address space
+ *
+ * Calculate the GART address for a VA.
+ */
+u64 amdgpu_seq64_gpu_addr(struct amdgpu_device *adev, u64 va)
+{
+   return va - amdgpu_seq64_get_va_base(adev) +
+   amdgpu_bo_gpu_offset(adev->seq64.sbo);
+}
+
  /**
   * amdgpu_seq64_fini - Cleanup seq64 driver
   *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h
index 4203b2ab318d..63e8ac0a2057 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h
@@ -43,6 +43,7 @@ void amdgpu_seq64_free(struct amdgpu_device *adev, u64 
gpu_addr);
  int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 struct amdgpu_bo_va **bo_va);
  void amdgpu_seq64_unmap(struct amdgpu_device *adev, struct amdgpu_fpriv 
*fpriv);
+u64 amdgpu_seq64_gpu_addr(struct amdgpu_device *adev, u64 va);
  
  #endif
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index ed4a8c5d26d7..0960e0a665d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -111,21 +111,6 @@ struct amdgpu_prt_cb {
struct dma_fence_cb cb;
  };
  
-/**

- * struct amdgpu_vm_tlb_seq_struct - Helper to increment the TLB flush sequence
- */
-struct amdgpu_vm_tlb_seq_struct {
-   /**
-* @vm: pointer to the amdgpu_vm structure to set the fence sequence on
-*/
-   struct amdgpu_vm *vm;
-
-   /**
-* @cb: callback
-*/
-   struct dma_fence_cb cb;
-};
-
  /**
   * amdgpu_vm_set_pasid - manage pasid and vm ptr mapping
   *
@@ -862,23 +847,6 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
return r;
  }
  
-/**

- * amdgpu_vm_tlb_seq_cb - make sure to increment tlb sequence
- * @fence: unused
- * @cb: the callback structure
- *
- * Increments the tlb sequence to make sure that future CS execute a VM flush.
- */
-static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
-struct dma_fence_cb *cb)
-{
-   struct amdgpu_vm_tlb_seq_struct *tlb_cb;
-
-   tlb_cb = container_of(cb, typeof(*tlb_cb), cb);
-   atomic64_inc(_cb->vm->tlb_seq);
-   kfree(tlb_cb);
-}
-
  /**
   * amdgpu_vm_update_range - update a range in the vm page table
   *
@@ -911,7 +879,6 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
   struct dma_fence **fence)
  {
struct amdgpu_vm_update_params params;
-   struct amdgpu_vm_tlb_seq_struct *tlb_cb;
struct amdgpu_res_cursor cursor;
enum amdgpu_sync_mode sync_mode;
int r, idx;
@@ -919,12 +886,6 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
if (!drm_dev_enter(adev_to_drm(adev), ))
return -ENODEV;
  
-	tlb_cb = kmalloc(sizeof(*tlb_cb), GFP_KERNEL);

-   if (!tlb_cb) {
-   r = -ENOMEM;
-   goto error_unlock;
-   }
-
/* Vega20+XGMI where PTEs get inadvertently cached in L2 texture cache,
 * heavy-weight flush TLB unconditionally.
 */
@@ -942,6 +903,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
params.immediate = immediate;
params.pages_addr = pages_addr;
params.unlocked = unlocked;
+   params.needs_flush = flush_tlb;

Re: Kernel 6.7+ broke under-powering of my RX 6700XT. (Archlinux, mesa/amdgpu)

2024-02-26 Thread Daniel Vetter
Back from vacations ...

On Wed, 21 Feb 2024 at 16:39, Alex Deucher  wrote:
>
> On Wed, Feb 21, 2024 at 1:06 AM Linux regression tracking (Thorsten
> Leemhuis)  wrote:
> >
> > On 20.02.24 21:18, Alex Deucher wrote:
> > > On Tue, Feb 20, 2024 at 2:41 PM Romano  wrote:
> > >>
> > >> If the increased low range is allowed via boot option, like in proposed
> > >> patch, user clearly made an intentional decision. Undefined, but won't
> > >> fry his hardware for sure. Undefined is also overclocking in that
> > >> matter. You can go out of range with ratio of voltage vs frequency(still
> > >> within vendor's limits) for example and crash the system.
> > >
> > > This whole thing reminds me of this:
> > > https://xkcd.com/1172/
> > > The problem is another module parameter is another interface to
> > > maintain and validate.
> >
> > Yup, of course, all that is understood.
> >
> > But we have this "no regressions" rule for a reason. Adhering to it
> > strictly would afaics be counter-productive in this situation, but give
> > users some way to manually do what was possible before out-of-the box
> > IMHO is the minimum we should do.
> >
> > Maybe just allow that parameter only up to a certain recent GPU
> > generation, that way you won't have to deal with that at some point in
> > the future.
>
> The problem is the cumulative effect of all of these parameters.
> Every time there is some change in the driver someone disagrees with
> there is a push to add a module parameter for it.  The driver already
> has too many module parameters and it's hard to keep them all working
> consistently and in every possible combination.  Moreover, the module
> options are supposed to be mainly for debugging.  The driver sets
> proper defaults for all chips to ensure proper operation, however lots
> of random forums seem to treat them like they are the recipe for some
> special sauce so users are constantly setting various combinations of
> them because they read somewhere on a forum that it would make their
> GPU run faster.  More often than not this leads to problems.
>
> Even if we did make the option only valid for these specific chips,
> there will be an expectation that future chips will support it as
> well, because someone will hack the driver and test it and it may work
> for them and then there will be a push to add it for those chips too.

Chiming in here ...

tldr; yes

gpu drivers are ridiculously hard to get right, combinatorial
explosion is a real issue and concern, it's not some hiding behind
corporate rules - drm folks added module_param*unsafe to discourage
users from playing around with options we need for debugging for very,
very real reasons. We have aggressively removed tuning knobs in the
past, and those we have in various drivers are causing endless amounts
of pain.

Also, the "no regression" rules is not ironclad, especially on
power/perf regressions, or all the security fixes would be impossible
to merge. First make it correct (even if the bug has gone unnoticed
for forever), then make it fast/power efficient/pretty/whatever people
fancy. Yes there's some exceptions like "my desktop is crawling like a
slide-show and absolutely unusable" kind of regressions, but my
understanding is this isn't the case here.

So unless Dave or Linus are screaming and overruling Alex here, "do
nothing" is my take here too.

Cheers, Sima

>
> Alex
>
> > >  Moreover, we've had a number of cases in the
> > > past where users have under or overclocked and reported bugs or
> > > stability issues and it did not come to light that they were doing
> > > that until we'd already spent a good deal of time trying to debug the
> > > issue.
> >
> > Taint the kernel when that module parameter is used? We iirc have a
> > taint bit exactly for this sort of situation. Sure, such reports will
> > still happen, but then you at least have an indicator to spot them.
> >
> > Ciao, Thorsten
> >
> > >  This obviously can still happen if you allow any sort of over
> > > or underclocking, but at least if you stick to the limits you are
> > > staying within the bounding box of the design.
> > >
> > > Alex
> > >
> > >> On 2/20/24 19:09, Alex Deucher wrote:
> > >>> On Tue, Feb 20, 2024 at 11:46 AM Romano  wrote:
> >  For Windows, apps like MSI Afterburner is the one to try and what most
> >  people go for. Using it in the past myself, I would be surprised if it
> >  adhered to such a high min power cap. But even if it did, why would we
> >  have to.
> > 
> >  Relying on vendors cap in this case has already proven wrong because
> >  things worked for quite some time already and people reported saving
> >  significant amount of watts, in my case 90W(!) for <10% perf.
> > 
> >  Therefore this talk about safety seems rather strange to me and
> >  especially so when we are talking about min_cap. Or name me a single
> >  case where someone fried his card due to "too low power" set in said
> >  variable. Now there 

RE: [PATCH] drm/amd/pm: Increase SMUv13.0.6 mode-2 reset time

2024-02-26 Thread Kamal, Asad
[AMD Official Use Only - General]

Reviewed-by: Asad Kamal 

Thanks & Regards
Asad

-Original Message-
From: Lazar, Lijo 
Sent: Monday, February 26, 2024 4:08 PM
To: amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking ; Deucher, Alexander 
; Kamal, Asad 
Subject: [PATCH] drm/amd/pm: Increase SMUv13.0.6 mode-2 reset time

On SOCs with SMUv13.0.6, mode-2 reset takes a bit longer. Wait for 200ms before 
trying to restore config space after mode-2 reset.

Signed-off-by: Lijo Lazar 
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 29c102fe650d..2b7a60b23d6b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -2307,8 +2307,8 @@ static int smu_v13_0_6_mode2_reset(struct smu_context 
*smu)
ret = smu_cmn_send_msg_without_waiting(smu, (uint16_t)index,
   SMU_RESET_MODE_2);

-   /* This is similar to FLR, wait till max FLR timeout */
-   msleep(100);
+   /* Reset takes a bit longer, wait for 200ms. */
+   msleep(200);

dev_dbg(smu->adev->dev, "restore config space...\n");
/* Restore the config space saved during init */
--
2.25.1



Re: [PATCH v5 7/8] drm/amd/display: Introduce KUnit tests to dc_dmub_srv library

2024-02-26 Thread Jani Nikula
On Thu, 22 Feb 2024, Rodrigo Siqueira  wrote:
> diff --git a/drivers/gpu/drm/amd/display/test/kunit/.kunitconfig 
> b/drivers/gpu/drm/amd/display/test/kunit/.kunitconfig
> index eb6f81601757..4c5861ad58bd 100644
> --- a/drivers/gpu/drm/amd/display/test/kunit/.kunitconfig
> +++ b/drivers/gpu/drm/amd/display/test/kunit/.kunitconfig
> @@ -4,5 +4,6 @@ CONFIG_DRM=y
>  CONFIG_DRM_AMDGPU=y
>  CONFIG_DRM_AMD_DC=y
>  CONFIG_AMD_DC_BASICS_KUNIT_TEST=y
> +CONFIG_AMD_DC_KUNIT_TEST=y
>  CONFIG_DCE_KUNIT_TEST=y
>  CONFIG_DML_KUNIT_TEST=y

A bit random patch to comment on, but this hunk demonstrates the point:

Should all the configs have DRM_AMD_ prefix to put them in a
"namespace"?


BR,
Jani.


-- 
Jani Nikula, Intel


[PATCH] drm/amd/pm: Increase SMUv13.0.6 mode-2 reset time

2024-02-26 Thread Lijo Lazar
On SOCs with SMUv13.0.6, mode-2 reset takes a bit longer. Wait for 200ms
before trying to restore config space after mode-2 reset.

Signed-off-by: Lijo Lazar 
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 29c102fe650d..2b7a60b23d6b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -2307,8 +2307,8 @@ static int smu_v13_0_6_mode2_reset(struct smu_context 
*smu)
ret = smu_cmn_send_msg_without_waiting(smu, (uint16_t)index,
   SMU_RESET_MODE_2);
 
-   /* This is similar to FLR, wait till max FLR timeout */
-   msleep(100);
+   /* Reset takes a bit longer, wait for 200ms. */
+   msleep(200);
 
dev_dbg(smu->adev->dev, "restore config space...\n");
/* Restore the config space saved during init */
-- 
2.25.1



[PATCH v2] drm/amd/amdgpu: Fix potential ioremap() memory leaks in amdgpu_device_init()

2024-02-26 Thread Srinivasan Shanmugam
This ensures that the memory mapped by ioremap for adev->rmmio, is
properly handled in amdgpu_device_init(). If the function exits early
due to an error, the memory is unmapped. If the function completes
successfully, the memory remains mapped.

Reported by smatch:
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c:4337 amdgpu_device_init() warn: 
'adev->rmmio' from ioremap() not released on lines: 
4035,4045,4051,4058,4068,4337

Cc: Christian König 
Cc: Alex Deucher 
Signed-off-by: Srinivasan Shanmugam 
---
v2: 
 - updated commit message
 - use a goto label and error handling instead (Christian)

 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1ef892bea488..d0e77bbee60e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4031,8 +4031,10 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 * early on during init and before calling to RREG32.
 */
adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, 
"amdgpu-reset-dev");
-   if (!adev->reset_domain)
-   return -ENOMEM;
+   if (!adev->reset_domain) {
+   r = -ENOMEM;
+   goto unmap_memory;
+   }
 
/* detect hw virtualization here */
amdgpu_detect_virtualization(adev);
@@ -4042,20 +4044,20 @@ int amdgpu_device_init(struct amdgpu_device *adev,
r = amdgpu_device_get_job_timeout_settings(adev);
if (r) {
dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
-   return r;
+   goto unmap_memory;
}
 
/* early init functions */
r = amdgpu_device_ip_early_init(adev);
if (r)
-   return r;
+   goto unmap_memory;
 
amdgpu_device_set_mcbp(adev);
 
/* Get rid of things like offb */
r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, 
_kms_driver);
if (r)
-   return r;
+   goto unmap_memory;
 
/* Enable TMZ based on IP_VERSION */
amdgpu_gmc_tmz_set(adev);
@@ -4065,7 +4067,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (adev->gmc.xgmi.supported) {
r = adev->gfxhub.funcs->get_xgmi_info(adev);
if (r)
-   return r;
+   goto unmap_memory;
}
 
/* enable PCIE atomic ops */
@@ -4334,6 +4336,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 failed:
amdgpu_vf_error_trans_all(adev);
 
+unmap_memory:
+   iounmap(adev->rmmio);
return r;
 }
 
-- 
2.34.1



Re: [FYI][PATCH] tracing/treewide: Remove second parameter of __assign_str()

2024-02-26 Thread Steven Rostedt
On Fri, 23 Feb 2024 14:50:49 -0500
Kent Overstreet  wrote:

> Tangentially related though, what would make me really happy is if we
> could create the string with in the TP__fast_assign() section. I have to
> have a bunch of annoying wrappers right now because the string length
> has to be known when we invoke the tracepoint.

You can use __string_len() to determine the string length in the tracepoint
(which is executed in the TP_fast_assign() section).

My clean up patches will make __assign_str_len() obsolete too (I'm working
on them now), and you can just use __assign_str().

I noticed that I don't have a string_len example in the sample code and I'm
actually writing it now.

// cutting out everything else:

TRACE_EVENT(foo_bar,

TP_PROTO(const char *foo, int bar),

TP_ARGS(foo, bar),

TP_STRUCT__entry(
__string_len(   lstr,   foo,bar < strlen(foo) ? bar : 
strlen(foo) )
),

TP_fast_assign(
__assign_str(lstr, foo);

// Note, the above is with my updates, without them, you need to duplicate the 
logic

//  __assign_str_len(lstr, foo, bar < strlen(foo) ? bar : 
strlen(foo));
),

TP_printk("%s", __get_str(lstr))
);


The above will allocate "bar < strlen(foo) ? bar : strlen(foo)" size on the
ring buffer. As the size is already stored, my clean up code uses that
instead of requiring duplicating the logic again.

-- Steve


Re: [FYI][PATCH] tracing/treewide: Remove second parameter of __assign_str()

2024-02-26 Thread Jeff Johnson
On 2/23/2024 9:56 AM, Steven Rostedt wrote:
> From: "Steven Rostedt (Google)" 
> 
> [
>This is a treewide change. I will likely re-create this patch again in
>the second week of the merge window of v6.9 and submit it then. Hoping
>to keep the conflicts that it will cause to a minimum.
> ]
> 
> With the rework of how the __string() handles dynamic strings where it
> saves off the source string in field in the helper structure[1], the
> assignment of that value to the trace event field is stored in the helper
> value and does not need to be passed in again.

Just curious if this could be done piecemeal by first changing the
macros to be variadic macros which allows you to ignore the extra
argument. The callers could then be modified in their separate trees.
And then once all the callers have be merged, the macros could be
changed to no longer be variadic.


WARNING: in amdgpu_dm_atomic_commit_tail

2024-02-26 Thread Karsten Wiese
Hello,

this warning and drm error keep appearing here past wakeup from suspend:

[  247.716103] Freezing remaining freezable tasks
[  247.717379] Freezing remaining freezable tasks completed (elapsed
0.001 seconds)
[  247.717387] printk: Suspending console(s) (use no_console_suspend to debug)
[  247.871602] ACPI: EC: interrupt blocked
[  247.949021] ACPI: EC: interrupt unblocked
[  248.538831] [drm] PCIE GART of 512M enabled (table at 0x00803FD0).
[  248.538861] amdgpu :65:00.0: amdgpu: SMU is resuming...
[  248.541794] amdgpu :65:00.0: amdgpu: SMU is resumed successfully!
[  248.541832] amdgpu :65:00.0: [drm] *ERROR* Error queueing DMUB
command: status=4
[  248.548469] nvme nvme0: Shutdown timeout set to 8 seconds
[  248.573859] nvme nvme1: 15/0/0 default/read/poll queues
[  248.592079] [ cut here ]
[  248.592081] WARNING: CPU: 3 PID: 2146 at
drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:8059
amdgpu_dm_atomic_commit_tail+0x35b9/0x3fe0 [amdgpu]
[  248.592528] Modules linked in: ccm hid_logitech_hidpp uhid
xt_CHECKSUM xt_MASQUERADE xt_conntrack ipt_REJECT nf_reject_ipv4
xt_tcpudp nft_compat nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6
nf_defrag_ipv4 nf_tables libcrc32c nfnetlink bridge stp llc cachefiles
fscache netfs nvme_fabrics cmac algif_hash algif_skcipher af_alg bnep
cdc_ncm cdc_ether usbnet snd_ctl_led snd_hda_codec_realtek
snd_hda_codec_generic ledtrig_audio snd_hda_codec_hdmi snd_soc_dmic
snd_soc_ps_mach snd_ps_pdm_dma snd_hda_intel snd_sof_amd_acp63
edac_mce_amd snd_sof_amd_acp snd_intel_dspcfg snd_sof_xtensa_dsp
snd_hda_codec snd_sof_pci snd_sof snd_hwdep snd_sof_utils mt7921e
snd_hda_core kvm_amd mt7921_common snd_soc_core mt792x_lib
mt76_connac_lib snd_compress snd_seq_midi kvm snd_seq_midi_event
ac97_bus mt76 sunrpc crct10dif_pclmul binfmt_misc irqbypass
snd_rawmidi snd_pci_ps uvcvideo crc32_pclmul videobuf2_vmalloc
mac80211 polyval_clmulni snd_pcm uvc snd_seq btusb polyval_generic
videobuf2_memops ghash_clmulni_intel videobuf2_v4l2 btrtl
[  248.592575]  sha256_ssse3 btintel sha1_ssse3 btbcm snd_seq_device
aesni_intel btmtk snd_acp_pci snd_timer videodev snd_acp_config
crypto_simd cfg80211 snd bluetooth r8152 rtsx_pci_sdmmc
videobuf2_common snd_soc_acpi hid_multitouch cryptd ucsi_acpi
think_lmi nls_iso8859_1 rapl ecdh_generic ecc
firmware_attributes_class ideapad_laptop mc typec_ucsi wmi_bmof mii
thunderbolt sparse_keymap i2c_piix4 snd_acp_legacy_common soundcore
ccp libarc4 rtsx_pci typec platform_profile i2c_hid_acpi i2c_hid
sch_fq_codel msr efi_pstore ip_tables x_tables autofs4 joydev amdgpu
i2c_algo_bit drm_ttm_helper ttm drm_exec drm_suballoc_helper
input_leds amdxcp mfd_core drm_buddy serio_raw gpu_sched nvme
drm_display_helper cec nvme_core t10_pi video wmi
[  248.592615] CPU: 3 PID: 2146 Comm: kworker/u32:18 Tainted: G
W  6.7.6 #1
[  248.592618] Hardware name: LENOVO 83AR/LNVNB161216, BIOS MDCN30WW 09/17/2023
[  248.592620] Workqueue: events_unbound async_run_entry_fn
[  248.592628] RIP: 0010:amdgpu_dm_atomic_commit_tail+0x35b9/0x3fe0 [amdgpu]
[  248.592804] Code: 4c 89 e9 48 c7 c6 c0 26 3c c1 e8 d2 aa 9d de 49
8b 56 08 e9 2c d1 ff ff 0f 0b 49 8b 3f e8 7f bf 9b de 85 c0 0f 84 ad
d5 ff ff <0f> 0b e9 a6 d5 ff ff 0f 0b e9 90 d4 ff ff 48 8b 85 90 fe ff
ff 31
[  248.592806] RSP: 0018:a85c8aaff880 EFLAGS: 00010282
[  248.592808] RAX: ffea RBX:  RCX: 
[  248.592809] RDX:  RSI:  RDI: 
[  248.592810] RBP: a85c8aaffac0 R08:  R09: 
[  248.592811] R10:  R11:  R12: 8b0c0e8b9000
[  248.592811] R13: 8b0c0d7f6000 R14: 8b0c2176a080 R15: 8b0e2526dc00
[  248.592812] FS:  () GS:8b133ecc()
knlGS:
[  248.592813] CS:  0010 DS:  ES:  CR0: 80050033
[  248.592814] CR2: 7f104b3985a0 CR3: 0005f3a1d000 CR4: 00750ef0
[  248.592816] PKRU: 5554
[  248.592816] Call Trace:
[  248.592818]  
[  248.592822]  ? show_regs+0x6e/0x80
[  248.592828]  ? amdgpu_dm_atomic_commit_tail+0x35b9/0x3fe0 [amdgpu]
[  248.592971]  ? __warn+0x8d/0x160
[  248.592975]  ? amdgpu_dm_atomic_commit_tail+0x35b9/0x3fe0 [amdgpu]
[  248.593109]  ? report_bug+0x1b3/0x1c0
[  248.593116]  ? handle_bug+0x46/0x80
[  248.593121]  ? exc_invalid_op+0x19/0x80
[  248.593122]  ? asm_exc_invalid_op+0x1b/0x20
[  248.593128]  ? amdgpu_dm_atomic_commit_tail+0x35b9/0x3fe0 [amdgpu]
[  248.593266]  ? kfree+0x79/0x120
[  248.593271]  ? dcn314_validate_bandwidth+0x2be/0x4a0 [amdgpu]
[  248.593454]  ? amdgpu_bo_gpu_offset_no_check+0x3b/0x90 [amdgpu]
[  248.593564]  commit_tail+0xc2/0x190
[  248.593570]  ? drm_atomic_helper_swap_state+0x246/0x380
[  248.593571]  drm_atomic_helper_commit+0x12f/0x160
[  248.593573]  drm_atomic_commit+0x93/0xd0
[  248.593578]  ? __drm_printfn_seq_file+0x30/0x30
[  248.593581]  

Re: [FYI][PATCH] tracing/treewide: Remove second parameter of __assign_str()

2024-02-26 Thread Steven Rostedt
On Fri, 23 Feb 2024 13:46:53 -0500
Steven Rostedt  wrote:

> Now one thing I could do is to not remove the parameter, but just add:
> 
>   WARN_ON_ONCE((src) != __data_offsets->item##_ptr_);
> 
> in the __assign_str() macro to make sure that it's still the same that is
> assigned. But I'm not sure how useful that is, and still causes burden to
> have it. I never really liked the passing of the string in two places to
> begin with.

Hmm, maybe I'll just add this patch for 6.9 and then in 6.10 do the
parameter removal.

-- Steve

diff --git a/include/trace/stages/stage6_event_callback.h 
b/include/trace/stages/stage6_event_callback.h
index 0c0f50bcdc56..7372e2c2a0c4 100644
--- a/include/trace/stages/stage6_event_callback.h
+++ b/include/trace/stages/stage6_event_callback.h
@@ -35,6 +35,7 @@ #define __assign_str(dst, src)
do {\
char *__str__ = __get_str(dst); \
int __len__ = __get_dynamic_array_len(dst) - 1; \
+   WARN_ON_ONCE((src) != __data_offsets.dst##_ptr_);   \
memcpy(__str__, __data_offsets.dst##_ptr_ ? :   \
   EVENT_NULL_STR, __len__);\
__str__[__len__] = '\0';\


Re: [FYI][PATCH] tracing/treewide: Remove second parameter of __assign_str()

2024-02-26 Thread Kent Overstreet
On Fri, Feb 23, 2024 at 01:46:53PM -0500, Steven Rostedt wrote:
> On Fri, 23 Feb 2024 10:30:45 -0800
> Jeff Johnson  wrote:
> 
> > On 2/23/2024 9:56 AM, Steven Rostedt wrote:
> > > From: "Steven Rostedt (Google)" 
> > > 
> > > [
> > >This is a treewide change. I will likely re-create this patch again in
> > >the second week of the merge window of v6.9 and submit it then. Hoping
> > >to keep the conflicts that it will cause to a minimum.
> > > ]
> > > 
> > > With the rework of how the __string() handles dynamic strings where it
> > > saves off the source string in field in the helper structure[1], the
> > > assignment of that value to the trace event field is stored in the helper
> > > value and does not need to be passed in again.  
> > 
> > Just curious if this could be done piecemeal by first changing the
> > macros to be variadic macros which allows you to ignore the extra
> > argument. The callers could then be modified in their separate trees.
> > And then once all the callers have be merged, the macros could be
> > changed to no longer be variadic.
> 
> I weighed doing that, but I think ripping off the band-aid is a better
> approach. One thing I found is that leaving unused parameters in the macros
> can cause bugs itself. I found one case doing my clean up, where an unused
> parameter in one of the macros was bogus, and when I made it a used
> parameter, it broke the build.
> 
> I think for tree-wide changes, the preferred approach is to do one big
> patch at once. And since this only affects TRACE_EVENT() macros, it
> hopefully would not be too much of a burden (although out of tree users may
> suffer from this, but do we care?)

Agreed on doing it all at once, it'll be way less spam for people to
deal with.

Tangentially related though, what would make me really happy is if we
could create the string with in the TP__fast_assign() section. I have to
have a bunch of annoying wrappers right now because the string length
has to be known when we invoke the tracepoint.


Re: [FYI][PATCH] tracing/treewide: Remove second parameter of __assign_str()

2024-02-26 Thread Steven Rostedt
On Fri, 23 Feb 2024 10:30:45 -0800
Jeff Johnson  wrote:

> On 2/23/2024 9:56 AM, Steven Rostedt wrote:
> > From: "Steven Rostedt (Google)" 
> > 
> > [
> >This is a treewide change. I will likely re-create this patch again in
> >the second week of the merge window of v6.9 and submit it then. Hoping
> >to keep the conflicts that it will cause to a minimum.
> > ]
> > 
> > With the rework of how the __string() handles dynamic strings where it
> > saves off the source string in field in the helper structure[1], the
> > assignment of that value to the trace event field is stored in the helper
> > value and does not need to be passed in again.  
> 
> Just curious if this could be done piecemeal by first changing the
> macros to be variadic macros which allows you to ignore the extra
> argument. The callers could then be modified in their separate trees.
> And then once all the callers have be merged, the macros could be
> changed to no longer be variadic.

I weighed doing that, but I think ripping off the band-aid is a better
approach. One thing I found is that leaving unused parameters in the macros
can cause bugs itself. I found one case doing my clean up, where an unused
parameter in one of the macros was bogus, and when I made it a used
parameter, it broke the build.

I think for tree-wide changes, the preferred approach is to do one big
patch at once. And since this only affects TRACE_EVENT() macros, it
hopefully would not be too much of a burden (although out of tree users may
suffer from this, but do we care?)

Now one thing I could do is to not remove the parameter, but just add:

WARN_ON_ONCE((src) != __data_offsets->item##_ptr_);

in the __assign_str() macro to make sure that it's still the same that is
assigned. But I'm not sure how useful that is, and still causes burden to
have it. I never really liked the passing of the string in two places to
begin with.

-- Steve


Re: [PATCH] drm/amd/display: clean unnecessary braces

2024-02-26 Thread Tulio Moreira Fernandes
Hi, Christian!

Ok, thanks for clarifying this for me.

I'll continue analyzing the files here, now based on these points.

Best regards


Em qui., 22 de fev. de 2024 às 06:33, Christian König
 escreveu:
>
> Am 21.02.24 um 19:01 schrieb Rodrigo Siqueira Jordao:
> > [SNIP]
> >> diff --git
> >> a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
> >> b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
> >> index 87760600e154..e179dea148e7 100644
> >> --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
> >> +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
> >> @@ -110,14 +110,12 @@ uint32_t
> >> dcn32_helper_calculate_num_ways_for_subvp(
> >>   struct dc_state *context)
> >>   {
> >>   if (context->bw_ctx.bw.dcn.mall_subvp_size_bytes > 0) {
> >> -if (dc->debug.force_subvp_num_ways) {
> >> +if (dc->debug.force_subvp_num_ways)
> >>   return dc->debug.force_subvp_num_ways;
> >> -} else {
> >> +else
> >>   return dcn32_helper_mall_bytes_to_ways(dc,
> >> context->bw_ctx.bw.dcn.mall_subvp_size_bytes);
> >> -}
> >> -} else {
> >> +} else
> >
> > Actually, we want to keep the braces around the else part to keep the
> > braces balanced with the if condition.
>
> Yeah, and checkpatch actually complains about that. E.g. you shouldn't
> use "} else" or "else {", but always "} else {".
>
> So the patch is actually a bit bogus and introduces new coding style
> warnings.
>
> Regards,
> Christian.
>
> >
> > Thanks
> > Siqueira
> >
> >>   return 0;
> >> -}
> >>   }
> >> void dcn32_merge_pipes_for_subvp(struct dc *dc,
> >> @@ -250,9 +248,9 @@ bool dcn32_is_psr_capable(struct pipe_ctx *pipe)
> >>   {
> >>   bool psr_capable = false;
> >>   -if (pipe->stream &&
> >> pipe->stream->link->psr_settings.psr_version !=
> >> DC_PSR_VERSION_UNSUPPORTED) {
> >> +if (pipe->stream && pipe->stream->link->psr_settings.psr_version
> >> != DC_PSR_VERSION_UNSUPPORTED)
> >>   psr_capable = true;
> >> -}
> >> +
> >>   return psr_capable;
> >>   }
> >>   @@ -278,9 +276,9 @@ static void override_det_for_subvp(struct dc
> >> *dc, struct dc_state *context, uint
> >>   if (pipe_ctx->stream && pipe_ctx->plane_state &&
> >> dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) {
> >>   if (dcn32_allow_subvp_high_refresh_rate(dc, context,
> >> pipe_ctx)) {
> >>   -if (pipe_ctx->stream->timing.v_addressable == 1080
> >> && pipe_ctx->stream->timing.h_addressable == 1920) {
> >> +if (pipe_ctx->stream->timing.v_addressable == 1080
> >> && pipe_ctx->stream->timing.h_addressable == 1920)
> >>   fhd_count++;
> >> -}
> >> +
> >>   subvp_high_refresh_count++;
> >>   }
> >>   }
> >
>


Re: [FYI][PATCH] tracing/treewide: Remove second parameter of __assign_str()

2024-02-26 Thread Steven Rostedt
On Fri, 23 Feb 2024 12:56:34 -0500
Steven Rostedt  wrote:

> Note, the same updates will need to be done for:
> 
>   __assign_str_len()
>   __assign_rel_str()
>   __assign_rel_str_len()

Correction: The below macros do not pass in their source to the entry
macros, so they will not need to be updated.

-- Steve

>   __assign_bitmask()
>   __assign_rel_bitmask()
>   __assign_cpumask()
>   __assign_rel_cpumask()



Re: [PATCH] drm/amd/amdgpu: Fix potential ioremap() memory leaks in amdgpu_device_init()

2024-02-26 Thread SRINIVASAN SHANMUGAM

Hi Christian,

On 2/26/2024 1:46 PM, Christian König wrote:

Am 24.02.24 um 07:38 schrieb Srinivasan Shanmugam:

This ensures that the memory mapped by ioremap for adev->rmmio, is
properly handled in amdgpu_device_init(). If the function exits early
due to an error, the memory is unmapped. If the function completes
successfully, the memory remains mapped.

Fixes the below:
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c:4337 amdgpu_device_init() 
warn: 'adev->rmmio' from ioremap() not released on lines: 
4035,4045,4051,4058,4068,4337


Hui? How do you got that warning?


It was caught by smatch & will update the same in the commit message.


Cc: Christian König 
Cc: Alex Deucher 
Signed-off-by: Srinivasan Shanmugam 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 18 ++
  1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 1ef892bea488..68bf5e910cb8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4031,8 +4031,10 @@ int amdgpu_device_init(struct amdgpu_device 
*adev,

   * early on during init and before calling to RREG32.
   */
  adev->reset_domain = 
amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");

-    if (!adev->reset_domain)
+    if (!adev->reset_domain) {
+    iounmap(adev->rmmio);
  return -ENOMEM;
+    }


Please use a goto label and error handling instead. Apart from that 
looks good to me.


Thanks a lot for all your reviews, highly appreciate it, will send v2 
for this.


Best Wishes,

Srini



Regards,
Christian.


    /* detect hw virtualization here */
  amdgpu_detect_virtualization(adev);
@@ -4042,20 +4044,25 @@ int amdgpu_device_init(struct amdgpu_device 
*adev,

  r = amdgpu_device_get_job_timeout_settings(adev);
  if (r) {
  dev_err(adev->dev, "invalid lockup_timeout parameter 
syntax\n");

+    iounmap(adev->rmmio);
  return r;
  }
    /* early init functions */
  r = amdgpu_device_ip_early_init(adev);
-    if (r)
+    if (r) {
+    iounmap(adev->rmmio);
  return r;
+    }
    amdgpu_device_set_mcbp(adev);
    /* Get rid of things like offb */
  r = 
drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, 
_kms_driver);

-    if (r)
+    if (r) {
+    iounmap(adev->rmmio);
  return r;
+    }
    /* Enable TMZ based on IP_VERSION */
  amdgpu_gmc_tmz_set(adev);
@@ -4064,8 +4071,10 @@ int amdgpu_device_init(struct amdgpu_device 
*adev,

  /* Need to get xgmi info early to decide the reset behavior*/
  if (adev->gmc.xgmi.supported) {
  r = adev->gfxhub.funcs->get_xgmi_info(adev);
-    if (r)
+    if (r) {
+    iounmap(adev->rmmio);
  return r;
+    }
  }
    /* enable PCIE atomic ops */
@@ -4334,6 +4343,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
  failed:
  amdgpu_vf_error_trans_all(adev);
  +    iounmap(adev->rmmio);
  return r;
  }




Re: [PATCH] drm/amd/amdgpu: Fix potential ioremap() memory leaks in amdgpu_device_init()

2024-02-26 Thread Christian König

Am 24.02.24 um 07:38 schrieb Srinivasan Shanmugam:

This ensures that the memory mapped by ioremap for adev->rmmio, is
properly handled in amdgpu_device_init(). If the function exits early
due to an error, the memory is unmapped. If the function completes
successfully, the memory remains mapped.

Fixes the below:
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c:4337 amdgpu_device_init() warn: 
'adev->rmmio' from ioremap() not released on lines: 
4035,4045,4051,4058,4068,4337


Hui? How do you got that warning?



Cc: Christian König 
Cc: Alex Deucher 
Signed-off-by: Srinivasan Shanmugam 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 18 ++
  1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1ef892bea488..68bf5e910cb8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4031,8 +4031,10 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 * early on during init and before calling to RREG32.
 */
adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, 
"amdgpu-reset-dev");
-   if (!adev->reset_domain)
+   if (!adev->reset_domain) {
+   iounmap(adev->rmmio);
return -ENOMEM;
+   }


Please use a goto label and error handling instead. Apart from that 
looks good to me.


Regards,
Christian.

  
  	/* detect hw virtualization here */

amdgpu_detect_virtualization(adev);
@@ -4042,20 +4044,25 @@ int amdgpu_device_init(struct amdgpu_device *adev,
r = amdgpu_device_get_job_timeout_settings(adev);
if (r) {
dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
+   iounmap(adev->rmmio);
return r;
}
  
  	/* early init functions */

r = amdgpu_device_ip_early_init(adev);
-   if (r)
+   if (r) {
+   iounmap(adev->rmmio);
return r;
+   }
  
  	amdgpu_device_set_mcbp(adev);
  
  	/* Get rid of things like offb */

r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, 
_kms_driver);
-   if (r)
+   if (r) {
+   iounmap(adev->rmmio);
return r;
+   }
  
  	/* Enable TMZ based on IP_VERSION */

amdgpu_gmc_tmz_set(adev);
@@ -4064,8 +4071,10 @@ int amdgpu_device_init(struct amdgpu_device *adev,
/* Need to get xgmi info early to decide the reset behavior*/
if (adev->gmc.xgmi.supported) {
r = adev->gfxhub.funcs->get_xgmi_info(adev);
-   if (r)
+   if (r) {
+   iounmap(adev->rmmio);
return r;
+   }
}
  
  	/* enable PCIE atomic ops */

@@ -4334,6 +4343,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
  failed:
amdgpu_vf_error_trans_all(adev);
  
+	iounmap(adev->rmmio);

return r;
  }