[PATCH 2/2] drm/amdgpu: added a sysfs interface for thermal throttling

2023-02-14 Thread kunliu13
implement apu_thermal_cap r/w callback for vangogh

Jira ID: SWDEV-354511
Signed-off-by: Kun Liu 
---
 .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c  | 23 +++
 1 file changed, 23 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
index cb10c7e31..d211b1dfe 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
@@ -1590,6 +1590,27 @@ static int vangogh_read_sensor(struct smu_context *smu,
return ret;
 }
 
+static int vangogh_get_apu_thermal_limit(struct smu_context *smu, uint32_t 
*limit)
+{
+   int ret = -EINVAL;
+
+   ret = smu_cmn_send_smc_msg_with_param(smu,
+ SMU_MSG_GetThermalLimit,
+ 0, limit);
+   return ret;
+}
+
+int vangogh_set_apu_thermal_limit(struct smu_context *smu, uint32_t limit)
+{
+   int ret = -EINVAL;
+
+   ret = smu_cmn_send_smc_msg_with_param(smu,
+ SMU_MSG_SetReducedThermalLimit,
+ limit, NULL);
+   return ret;
+}
+
+
 static int vangogh_set_watermarks_table(struct smu_context *smu,
   struct pp_smu_wm_range_sets 
*clock_ranges)
 {
@@ -2425,6 +2446,8 @@ static const struct pptable_funcs vangogh_ppt_funcs = {
.dpm_set_jpeg_enable = vangogh_dpm_set_jpeg_enable,
.is_dpm_running = vangogh_is_dpm_running,
.read_sensor = vangogh_read_sensor,
+   .get_apu_thermal_limit = vangogh_get_apu_thermal_limit,
+   .set_apu_thermal_limit = vangogh_set_apu_thermal_limit,
.get_enabled_mask = smu_cmn_get_enabled_mask,
.get_pp_feature_mask = smu_cmn_get_pp_feature_mask,
.set_watermarks_table = vangogh_set_watermarks_table,
-- 
2.25.1



Re: [PATCH 2/2] drm/amdgpu: added a sysfs interface for thermal throttling

2023-02-14 Thread Christian König

Am 14.02.23 um 09:02 schrieb kunliu13:

implement apu_thermal_cap r/w callback for vangogh

Jira ID: SWDEV-354511
Signed-off-by: Kun Liu 
---
  .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c  | 23 +++
  1 file changed, 23 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
index cb10c7e31..d211b1dfe 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
@@ -1590,6 +1590,27 @@ static int vangogh_read_sensor(struct smu_context *smu,
return ret;
  }
  
+static int vangogh_get_apu_thermal_limit(struct smu_context *smu, uint32_t *limit)

+{
+   int ret = -EINVAL;


Generally please don't initialize return values if it isn't necessary!

This is constantly reported by automated checkers as bad coding style 
since it prevent the compiler from properly warn if an uninitialized 
variable is used.


Regards,
Christian.


+
+   ret = smu_cmn_send_smc_msg_with_param(smu,
+ SMU_MSG_GetThermalLimit,
+ 0, limit);
+   return ret;
+}
+
+int vangogh_set_apu_thermal_limit(struct smu_context *smu, uint32_t limit)
+{
+   int ret = -EINVAL;
+
+   ret = smu_cmn_send_smc_msg_with_param(smu,
+ SMU_MSG_SetReducedThermalLimit,
+ limit, NULL);
+   return ret;
+}
+
+
  static int vangogh_set_watermarks_table(struct smu_context *smu,
   struct pp_smu_wm_range_sets 
*clock_ranges)
  {
@@ -2425,6 +2446,8 @@ static const struct pptable_funcs vangogh_ppt_funcs = {
.dpm_set_jpeg_enable = vangogh_dpm_set_jpeg_enable,
.is_dpm_running = vangogh_is_dpm_running,
.read_sensor = vangogh_read_sensor,
+   .get_apu_thermal_limit = vangogh_get_apu_thermal_limit,
+   .set_apu_thermal_limit = vangogh_set_apu_thermal_limit,
.get_enabled_mask = smu_cmn_get_enabled_mask,
.get_pp_feature_mask = smu_cmn_get_pp_feature_mask,
.set_watermarks_table = vangogh_set_watermarks_table,




RE: [PATCH 2/2] drm/amdgpu: added a sysfs interface for thermal throttling

2023-02-14 Thread Quan, Evan
[AMD Official Use Only - General]



> -Original Message-
> From: kunliu13 
> Sent: Tuesday, February 14, 2023 4:03 PM
> To: Limonciello, Mario ; Liang, Richard qi
> ; Yuan, Perry ; amd-
> g...@lists.freedesktop.org
> Cc: Deucher, Alexander ; Du, Xiaojian
> ; Quan, Evan ; Liu, Kun
> 
> Subject: [PATCH 2/2] drm/amdgpu: added a sysfs interface for thermal
> throttling
> 
> implement apu_thermal_cap r/w callback for vangogh
> 
> Jira ID: SWDEV-354511
[Quan, Evan] Please drop this internal link.
> Signed-off-by: Kun Liu 
> ---
>  .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c  | 23
> +++
>  1 file changed, 23 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
> b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
> index cb10c7e31..d211b1dfe 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
> @@ -1590,6 +1590,27 @@ static int vangogh_read_sensor(struct
> smu_context *smu,
>   return ret;
>  }
> 
> +static int vangogh_get_apu_thermal_limit(struct smu_context *smu,
> uint32_t *limit)
> +{
> + int ret = -EINVAL;
> +
> + ret = smu_cmn_send_smc_msg_with_param(smu,
> +   SMU_MSG_GetThermalLimit,
> +   0, limit);
> + return ret;
[Quan, Evan] "ret" here and below in vangogh_set_apu_thermal_limit seem 
unnecessary.
Other that those, the patch is reviewed-by: Evan Quan 

Evan
> +}
> +
> +int vangogh_set_apu_thermal_limit(struct smu_context *smu, uint32_t
> limit)
> +{
> + int ret = -EINVAL;
> +
> + ret = smu_cmn_send_smc_msg_with_param(smu,
> +
> SMU_MSG_SetReducedThermalLimit,
> +   limit, NULL);
> + return ret;
> +}
> +
> +
>  static int vangogh_set_watermarks_table(struct smu_context *smu,
>  struct pp_smu_wm_range_sets
> *clock_ranges)
>  {
> @@ -2425,6 +2446,8 @@ static const struct pptable_funcs
> vangogh_ppt_funcs = {
>   .dpm_set_jpeg_enable = vangogh_dpm_set_jpeg_enable,
>   .is_dpm_running = vangogh_is_dpm_running,
>   .read_sensor = vangogh_read_sensor,
> + .get_apu_thermal_limit = vangogh_get_apu_thermal_limit,
> + .set_apu_thermal_limit = vangogh_set_apu_thermal_limit,
>   .get_enabled_mask = smu_cmn_get_enabled_mask,
>   .get_pp_feature_mask = smu_cmn_get_pp_feature_mask,
>   .set_watermarks_table = vangogh_set_watermarks_table,
> --
> 2.25.1


RE: [PATCH 1/2] drm/amdgpu: added a sysfs interface for thermal throttling

2023-02-14 Thread Quan, Evan
[AMD Official Use Only - General]



> -Original Message-
> From: kunliu13 
> Sent: Tuesday, February 14, 2023 3:54 PM
> To: Limonciello, Mario ; Liang, Richard qi
> ; Yuan, Perry ; amd-
> g...@lists.freedesktop.org
> Cc: Deucher, Alexander ; Du, Xiaojian
> ; Quan, Evan ; Liu, Kun
> 
> Subject: [PATCH 1/2] drm/amdgpu: added a sysfs interface for thermal
> throttling
> 
> added a sysfs interface for thermal throttling, then userspace can get/update
> thermal limit
> 
> Jira ID: SWDEV-354511
[Quan, Evan] Please drop this internal link. Other than this, the patch is 
Reviewed-by: Evan Quan 

Evan
> Signed-off-by: Kun Liu 
> 
> Change-Id: I9948cb8966b731d2d74d7aad87cbcdc840dd34c8
> ---
>  .../gpu/drm/amd/include/kgd_pp_interface.h|  2 +
>  drivers/gpu/drm/amd/pm/amdgpu_dpm.c   | 28 +++
>  drivers/gpu/drm/amd/pm/amdgpu_pm.c| 76
> +++
>  drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h   |  3 +
>  drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 24 ++
>  drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 12 +++
>  6 files changed, 145 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> index f3d64c78f..8394464ea 100644
> --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> @@ -331,6 +331,8 @@ struct amd_pm_funcs {
>   int (*get_mclk_od)(void *handle);
>   int (*set_mclk_od)(void *handle, uint32_t value);
>   int (*read_sensor)(void *handle, int idx, void *value, int *size);
> + int (*get_apu_thermal_limit)(void *handle, uint32_t *limit);
> + int (*set_apu_thermal_limit)(void *handle, uint32_t limit);
>   enum amd_dpm_forced_level (*get_performance_level)(void
> *handle);
>   enum amd_pm_state_type (*get_current_power_state)(void
> *handle);
>   int (*get_fan_speed_rpm)(void *handle, uint32_t *rpm);
> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> index 1b300c569..d9a9cf189 100644
> --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> @@ -438,6 +438,34 @@ int amdgpu_dpm_read_sensor(struct
> amdgpu_device *adev, enum amd_pp_sensors senso
>   return ret;
>  }
> 
> +int amdgpu_dpm_get_apu_thermal_limit(struct amdgpu_device *adev,
> uint32_t *limit)
> +{
> + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
> + int ret = -EINVAL;
> +
> + if (pp_funcs && pp_funcs->get_apu_thermal_limit) {
> + mutex_lock(&adev->pm.mutex);
> + ret = pp_funcs->get_apu_thermal_limit(adev-
> >powerplay.pp_handle, limit);
> + mutex_unlock(&adev->pm.mutex);
> + }
> +
> + return ret;
> +}
> +
> +int amdgpu_dpm_set_apu_thermal_limit(struct amdgpu_device *adev,
> uint32_t limit)
> +{
> + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
> + int ret = -EINVAL;
> +
> + if (pp_funcs && pp_funcs->set_apu_thermal_limit) {
> + mutex_lock(&adev->pm.mutex);
> + ret = pp_funcs->set_apu_thermal_limit(adev-
> >powerplay.pp_handle, limit);
> + mutex_unlock(&adev->pm.mutex);
> + }
> +
> + return ret;
> +}
> +
>  void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev)
>  {
>   const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> index 236657eec..99b249e55 100644
> --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> @@ -1685,6 +1685,81 @@ static ssize_t
> amdgpu_set_thermal_throttling_logging(struct device *dev,
>   return count;
>  }
> 
> +/**
> + * DOC: apu_thermal_cap
> + *
> + * The amdgpu driver provides a sysfs API for retrieving/updating thermal
> + * limit temperature in millidegrees Celsius
> + *
> + * Reading back the file shows you core limit value
> + *
> + * Writing an integer to the file, sets a new thermal limit. The value
> + * should be between 0 and 100. If the value is less than 0 or greater
> + * than 100, then the write request will be ignored.
> + */
> +static ssize_t amdgpu_get_apu_thermal_cap(struct device *dev,
> +  struct device_attribute *attr,
> +  char *buf)
> +{
> + int ret, size = 0;
> + u32 limit;
> + struct drm_device *ddev = dev_get_drvdata(dev);
> + struct amdgpu_device *adev = drm_to_adev(ddev);
> +
> + ret = pm_runtime_get_sync(ddev->dev);
> + if (ret < 0) {
> + pm_runtime_put_autosuspend(ddev->dev);
> + return size;
> + }
> +
> + ret = amdgpu_dpm_get_apu_thermal_limit(adev, &limit);
> + if (!ret)
> + size = sysfs_emit(buf, "%u\n", limit);
> + else
> + size = sysfs_emit(buf, "failed to get thermal limit\n");
> +
> + pm_runtime_mark_last_busy(ddev->dev);
> + pm_runtime_put_autosuspend(ddev->d

Re: [RFC PATCH v2 00/18] Add DRM CRTC 3D LUT interface

2023-02-14 Thread Pekka Paalanen
On Mon, 13 Feb 2023 18:26:55 -0100
Melissa Wen  wrote:

> On 02/10, Pekka Paalanen wrote:
> > On Thu, 9 Feb 2023 13:27:02 -0100
> > Melissa Wen  wrote:
> >   
> > > On 01/31, Pekka Paalanen wrote:  
> > > > On Mon, 9 Jan 2023 14:38:09 -0100
> > > > Melissa Wen  wrote:
> > > > 
> > > > > On 01/09, Melissa Wen wrote:
> > > > > > Hi,
> > > > > > 
> > > > > > After collecting comments in different places, here is a second 
> > > > > > version
> > > > > > of the work on adding DRM CRTC 3D LUT support to the current DRM 
> > > > > > color
> > > > > > mgmt interface. In comparison to previous proposals [1][2][3], here 
> > > > > > we
> > > > > > add 3D LUT before gamma 1D LUT, but also a shaper 1D LUT before 3D 
> > > > > > LUT,
> > > > > > that means the following DRM CRTC color correction pipeline:
> > > > > > 
> > > > > > Blend -> Degamma 1D LUT -> CTM -> Shaper 1D LUT -> 3D LUT -> Gamma 
> > > > > > 1D LUT
> > > > 
> > > > Hi Melissa,
> > > > 
> > > > that makes sense to me, for CRTCs. It would be really good to have that
> > > > as a diagram in the KMS UAPI documentation.
> > > > 
> > > 
> > > Hi Pekka,
> > > 
> > > Thanks for your feedbacks and your time reviewing this proposal.  
> > 
> > No problem, and sorry it took so long!
> > 
> > I'm just finishing the catch-up with everything that happened during
> > winter holidays.
> >   
> > > > If someone wants to add a 3D LUT to KMS planes as well, then I'm not
> > > > sure if it should be this order or swapped. I will probably have an
> > > > opinion about that once Weston is fully HDR capable and has been tried
> > > > in the wild for a while with the HDR color operations fine-tuned based
> > > > on community feedback. IOW, not for a long time. The YUV to RGB
> > > > conversion factors in there as well.
> > > > 
> > > I see, this is also the reason I reuse here Alex Hung's proposal for
> > > pre-blending API. I'll work on better documentation.
> > >   
> > > > 
> > > > > > 
> > > > > > and we also add a DRM CRTC LUT3D_MODE property, based on Alex Hung
> > > > > > proposal for pre-blending 3D LUT [4] (Thanks!), instead of just a
> > > > > > LUT3D_SIZE, that allows userspace to use different supported 
> > > > > > settings of
> > > > > > 3D LUT, fitting VA-API and new color API better. In this sense, I
> > > > > > adjusted the pre-blending proposal for post-blending usage.
> > > > > > 
> > > > > > Patches 1-6 targets the addition of shaper LUT and 3D LUT 
> > > > > > properties to
> > > > > > the current DRM CRTC color mgmt pipeline. Patch 6 can be considered 
> > > > > > an
> > > > > > extra/optional patch to define a default value for LUT3D_MODE, 
> > > > > > inspired
> > > > > > by what we do for the plane blend mode property (pre-multiplied).
> > > > > > 
> > > > > > Patches 7-18 targets AMD display code to enable shaper and 3D LUT 
> > > > > > usage
> > > > > > on DCN 301 (our HW case). Patches 7-9 performs code cleanups on 
> > > > > > current
> > > > > > AMD DM colors code, patch 10 updates AMD stream in case of user 3D 
> > > > > > LUT
> > > > > > changes, patch 11/12 rework AMD MPC 3D LUT resource handling by 
> > > > > > context
> > > > > > for DCN 301 (easily extendible to other DCN families). Finally, from
> > > > > > 13-18, we wire up SHAPER LUT, LUT3D and LUT3D MODE to AMD display
> > > > > > driver, exposing modes supported by HW and programming user shaper 
> > > > > > and
> > > > > > 3D LUT accordingly.
> > > > > > 
> > > > > > Our target userspace is Gamescope/SteamOS.
> > > > > > 
> > > > > > Basic IGT tests were based on [5][6] and are available here 
> > > > > > (in-progress):
> > > > > > https://gitlab.freedesktop.org/mwen/igt-gpu-tools/-/commits/crtc-lut3d-api
> > > > > > 
> > > > > > [1] 
> > > > > > https://lore.kernel.org/all/20201221015730.28333-1-laurent.pinchart+rene...@ideasonboard.com/
> > > > > > [2] 
> > > > > > https://github.com/vsyrjala/linux/commit/4d28e8ddf2a076f30f9e5bdc17cbb4656fe23e69
> > > > > > [3] 
> > > > > > https://lore.kernel.org/amd-gfx/20220619223104.667413-1-m...@igalia.com/
> > > > > > [4] 
> > > > > > https://lore.kernel.org/dri-devel/20221004211451.1475215-1-alex.h...@amd.com/
> > > > > > [5] https://patchwork.freedesktop.org/series/90165/
> > > > > > [6] https://patchwork.freedesktop.org/series/109402/
> > > > > > [VA_API] 
> > > > > > http://intel.github.io/libva/structVAProcFilterParameterBuffer3DLUT.html
> > > > > > [KMS_pipe_API] 
> > > > > > https://gitlab.freedesktop.org/pq/color-and-hdr/-/issues/11
> > > > > > 
> > > > > > Let me know your thoughts.  
> > > > > 
> > > > > +Simon Ser, +Pekka Paalanen who might also be interested in this 
> > > > > series.
> > > > 
> > > > Unfortunately I don't have the patch emails to reply to, so here's a
> > > > messy bunch of comments. I'll concentrate on the UAPI design as always. 
> > > >
> > > 
> > > Sorry, the patchset is here: 
> > > https://lore.kernel.org/dri-devel/20230109143846.1966301-1-m...@igalia.com/
> > > In the nex

Re: [RFC PATCH v2 00/18] Add DRM CRTC 3D LUT interface

2023-02-14 Thread Pekka Paalanen
On Mon, 13 Feb 2023 18:45:40 -0100
Melissa Wen  wrote:

> On 02/13, Ville Syrjälä wrote:
> > On Mon, Feb 13, 2023 at 11:01:31AM +0200, Pekka Paalanen wrote:  
> > > On Fri, 10 Feb 2023 14:47:50 -0500
> > > Harry Wentland  wrote:
> > >   
> > > > On 2/10/23 04:28, Pekka Paalanen wrote:  
> > > > > On Thu, 9 Feb 2023 13:27:02 -0100
> > > > > Melissa Wen  wrote:
> > > > > 
> > > > >> On 01/31, Pekka Paalanen wrote:
> > > > >>> On Mon, 9 Jan 2023 14:38:09 -0100
> > > > >>> Melissa Wen  wrote:
> > > > >>>   
> > > >  On 01/09, Melissa Wen wrote:  
> > > > > Hi,
> > > > >
> > > > > After collecting comments in different places, here is a second 
> > > > > version
> > > > > of the work on adding DRM CRTC 3D LUT support to the current DRM 
> > > > > color
> > > > > mgmt interface. In comparison to previous proposals [1][2][3], 
> > > > > here we
> > > > > add 3D LUT before gamma 1D LUT, but also a shaper 1D LUT before 
> > > > > 3D LUT,
> > > > > that means the following DRM CRTC color correction pipeline:
> > > > >
> > > > > Blend -> Degamma 1D LUT -> CTM -> Shaper 1D LUT -> 3D LUT -> 
> > > > > Gamma 1D LUT  
> > > 
> > > ...
> > >   
> > > > >>> +/*
> > > > >>> + * struct drm_mode_lut3d_mode - 3D LUT mode information.
> > > > >>> + * @lut_size: number of valid points on every dimension of 3D LUT.
> > > > >>> + * @lut_stride: number of points on every dimension of 3D LUT.
> > > > >>> + * @bit_depth: number of bits of RGB. If color_mode defines 
> > > > >>> entries with higher
> > > > >>> + * bit_depth the least significant bits will be 
> > > > >>> truncated.
> > > > >>> + * @color_format: fourcc values, ex. DRM_FORMAT_XRGB16161616 or 
> > > > >>> DRM_FORMAT_XBGR16161616.
> > > > >>> + * @flags: flags for hardware-sepcific features
> > > > >>> + */
> > > > >>> +struct drm_mode_lut3d_mode {
> > > > >>> +   __u16 lut_size;
> > > > >>> +   __u16 lut_stride[3];
> > > > >>> +   __u16 bit_depth;
> > > > >>> +   __u32 color_format;
> > > > >>> +   __u32 flags;
> > > > >>> +};  
> > > 
> > > ...
> > >   
> > > > >>> What is "number of bits of RGB"? Input precision? Output precision?
> > > > >>> Integer or floating point?  
> > > > >>
> > > > >> It's the bit depth of the 3D LUT values, the same for every 
> > > > >> channels. In
> > > > >> the AMD case, it's supports 10-bit and 12-bit, for example.
> > > > > 
> > > > > Ok. So e.g. r5g6b5 is not a possible 3D LUT element type on any
> > > > > hardware ever?
> > > > > 
> > > > 
> > > > I haven't had a chance to go through all patches yet but if this is
> > > > modeled after Alex Hung's work this should be covered by color_format.
> > > > The idea is that color_format takes a FOURCC value and defines the
> > > > format of the entries in the 3DLUT blob.
> > > > 
> > > > The bit_depth describes the actual bit depth that the HW supports.
> > > > E.g., color_format could be DRM_FORMAT_XRGB16161616 but HW might only
> > > > support 12-bit precision. In that case the least significant bits get
> > > > truncated.
> > > > 
> > > > One could define the bit_depth per color, but I'm not sure that'll be
> > > > necessary.  
> > > 
> > > Exactly. I just have no idea how sure we should be about that.
> > >   
> > > > > What exactly is the truncation the comment refers to?
> > > > > 
> > > > > It sounds like if input has higher precision than the LUT elements,
> > > > > then "truncation" occurs. I can kind of see that, but I also think it
> > > > > is a false characterisation. The LUT input precision affects the
> > > > > precision of LUT indexing and the precision of interpolation between
> > > > > the LUT elements. I would not expect those two precisions to be
> > > > > truncated to the LUT element precision (but they could be truncated to
> > > > > something else hardware specific). Instead, I do expect the
> > > > > interpolation result to be truncated to the LUT output precision, 
> > > > > which
> > > > > probably is the same as the LUT element precision, but not 
> > > > > necessarily.
> > > > > 
> > > > > Maybe the comment about truncation should simply be removed? The 
> > > > > result
> > > > > is obvious if we know the LUT input, element, and output precision, 
> > > > > and
> > > > > what exactly happens with the indexing and interpolation is probably
> > > > > good enough to be left hardware-specific if it is difficult to 
> > > > > describe
> > > > > in generic terms across different hardware.
> > > > > 
> > > > 
> > > > Maybe it makes sense to just drop the bit_depth field.  
> > > 
> > > Well, it's really interesting information for userspace, but maybe it
> > > should have a more holistic design. Precision is a factor, when
> > > userspace considers whether it can use KMS hardware for a conversion or
> > > not. Unfortunately, none of the existing KMS color pipeline elements
> > > have any information on precision IIRC, so there is more to be fixed.
> > > 
> > > The 

RE: [PATCH] drm/amdgpu: don't increase UMC RAS UE count if no new bad page

2023-02-14 Thread Zhang, Hawking
[AMD Official Use Only - General]

-EINVAL looks better than EEXIST, but it still doesn't fit the case? 
Alternatively, Can we compare the count before and after 
amdgpu_ras_add_bad_pages to decide whether reset ue_count to 0 or not. That 
could be straightforward than struggling for returning which error code in this 
specific case.

Regards,
Hawking

-Original Message-
From: Yang, Stanley 
Sent: Tuesday, February 14, 2023 10:42
To: Zhou1, Tao ; Zhang, Hawking ; 
amd-gfx@lists.freedesktop.org; Chai, Thomas ; Li, Candice 

Subject: RE: [PATCH] drm/amdgpu: don't increase UMC RAS UE count if no new bad 
page

[AMD Official Use Only - General]



> -Original Message-
> From: Zhou1, Tao 
> Sent: Monday, February 13, 2023 4:25 PM
> To: Zhang, Hawking ; amd-
> g...@lists.freedesktop.org; Yang, Stanley ; Chai,
> Thomas ; Li, Candice 
> Subject: RE: [PATCH] drm/amdgpu: don't increase UMC RAS UE count if no
> new bad page
>
> [AMD Official Use Only - General]
>
> > -Original Message-
> > From: Zhang, Hawking 
> > Sent: Friday, February 10, 2023 11:02 PM
> > To: Zhou1, Tao ; amd-gfx@lists.freedesktop.org;
> > Yang, Stanley ; Chai, Thomas
> > ; Li, Candice 
> > Subject: RE: [PATCH] drm/amdgpu: don't increase UMC RAS UE count if
> > no new bad page
> >
> > [AMD Official Use Only - General]
> >
> > +   /* if no new bad page is found, no need to
> > + increase ue count
> */
> > +   if (ret == -EEXIST)
> > +   err_data->ue_count = 0;
> >
> > Returning EEXIST in such case is not reasonable. Might consider
> > return a bool for
> > amdgpu_ras_add_bad_pages: true means it does add some new bad page;
> > false means it doesn't change anything.
> >
> > Regards,
> > Hawking
>
> [Tao] but it can returns -ENOMEM, amdgpu_ras_load_bad_pages and
> amdgpu_ras_recovery_init also need to check the return value. I'd like
> to keep the type of return value unchanged.
> How about -EINVAL?

Stanley: How about return -EALREADY?

Regards,
Stanley
>
> >
> > -Original Message-
> > From: Zhou1, Tao 
> > Sent: Friday, February 10, 2023 16:45
> > To: amd-gfx@lists.freedesktop.org; Zhang, Hawking
> > ; Yang, Stanley ;
> Chai,
> > Thomas ; Li, Candice 
> > Cc: Zhou1, Tao 
> > Subject: [PATCH] drm/amdgpu: don't increase UMC RAS UE count if no
> new
> > bad page
> >
> > If a UMC bad page is reserved but not freed by an application, the
> > application may trigger uncorrectable error repeatly by accessing the page.
> >
> > Signed-off-by: Tao Zhou 
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 9 -
> > drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 6 +-
> >  2 files changed, 13 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> > index e85c4689ce2c..eafe01a24349 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> > @@ -2049,7 +2049,7 @@ int amdgpu_ras_add_bad_pages(struct
> > amdgpu_device *adev,  {
> > struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
> > struct ras_err_handler_data *data;
> > -   int ret = 0;
> > +   int ret = 0, old_cnt;
> > uint32_t i;
> >
> > if (!con || !con->eh_data || !bps || pages <= 0) @@ -2060,6
> > +2060,8 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device
> *adev,
> > if (!data)
> > goto out;
> >
> > +   old_cnt = data->count;
> > +
> > for (i = 0; i < pages; i++) {
> > if (amdgpu_ras_check_bad_page_unlock(con,
> > bps[i].retired_page <<
> > AMDGPU_GPU_PAGE_SHIFT)) @@ -2079,6
> > +2081,11 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device
> *adev,
> > data->count++;
> > data->space_left--;
> > }
> > +
> > +   /* all pages have been reserved before, no new bad page */
> > +   if (old_cnt == data->count)
> > +   ret = -EEXIST;
> > +
> >  out:
> > mutex_unlock(&con->recovery_lock);
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> > index 1c7fcb4f2380..772c431e4065 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> > @@ -145,8 +145,12 @@ static int
> amdgpu_umc_do_page_retirement(struct
> > amdgpu_device *adev,
> >
> > if ((amdgpu_bad_page_threshold != 0) &&
> > err_data->err_addr_cnt) {
> > -   amdgpu_ras_add_bad_pages(adev, err_data->err_addr,
> > +   ret = amdgpu_ras_add_bad_pages(adev,
> > + err_data->err_addr,
> >
> > err_data->err_addr_cnt);
> > +   /* if no new bad page is found, no need to
> > + increase ue count
> */
> > +   if (ret == -EEXIST)
> > +   err_data->ue_count = 0;
> > +
> > am

Re: [PATCH] drm/amd/amdgpu: fix warining during suspend

2023-02-14 Thread Jocelyn Falempe

On 13/02/2023 11:52, Jack Xiao wrote:

Freeing memory was warned during suspend.
Move the self test out of suspend.


Thanks a lot, this fixes the following warning during suspend/resume on 
v6.2-rc8


WARNING: CPU: 2 PID: 3980 at 
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c:425 
amdgpu_bo_free_kernel+0xf5/0x110 [amdgpu]


Tested-by: Jocelyn Falempe 

--

Jocelyn



RE: [PATCH] drm/amdgpu: don't increase UMC RAS UE count if no new bad page

2023-02-14 Thread Zhou1, Tao
[AMD Official Use Only - General]

OK, I'll add a new function to do the check.

Tao

> -Original Message-
> From: Zhang, Hawking 
> Sent: Tuesday, February 14, 2023 6:03 PM
> To: Yang, Stanley ; Zhou1, Tao
> ; amd-gfx@lists.freedesktop.org; Chai, Thomas
> ; Li, Candice 
> Subject: RE: [PATCH] drm/amdgpu: don't increase UMC RAS UE count if no new
> bad page
>
> [AMD Official Use Only - General]
>
> -EINVAL looks better than EEXIST, but it still doesn't fit the case? 
> Alternatively,
> Can we compare the count before and after amdgpu_ras_add_bad_pages to
> decide whether reset ue_count to 0 or not. That could be straightforward than
> struggling for returning which error code in this specific case.
>
> Regards,
> Hawking
>
> -Original Message-
> From: Yang, Stanley 
> Sent: Tuesday, February 14, 2023 10:42
> To: Zhou1, Tao ; Zhang, Hawking
> ; amd-gfx@lists.freedesktop.org; Chai, Thomas
> ; Li, Candice 
> Subject: RE: [PATCH] drm/amdgpu: don't increase UMC RAS UE count if no new
> bad page
>
> [AMD Official Use Only - General]
>
>
>
> > -Original Message-
> > From: Zhou1, Tao 
> > Sent: Monday, February 13, 2023 4:25 PM
> > To: Zhang, Hawking ; amd-
> > g...@lists.freedesktop.org; Yang, Stanley ; Chai,
> > Thomas ; Li, Candice 
> > Subject: RE: [PATCH] drm/amdgpu: don't increase UMC RAS UE count if no
> > new bad page
> >
> > [AMD Official Use Only - General]
> >
> > > -Original Message-
> > > From: Zhang, Hawking 
> > > Sent: Friday, February 10, 2023 11:02 PM
> > > To: Zhou1, Tao ; amd-gfx@lists.freedesktop.org;
> > > Yang, Stanley ; Chai, Thomas
> > > ; Li, Candice 
> > > Subject: RE: [PATCH] drm/amdgpu: don't increase UMC RAS UE count if
> > > no new bad page
> > >
> > > [AMD Official Use Only - General]
> > >
> > > +   /* if no new bad page is found, no need to
> > > + increase ue count
> > */
> > > +   if (ret == -EEXIST)
> > > +   err_data->ue_count = 0;
> > >
> > > Returning EEXIST in such case is not reasonable. Might consider
> > > return a bool for
> > > amdgpu_ras_add_bad_pages: true means it does add some new bad page;
> > > false means it doesn't change anything.
> > >
> > > Regards,
> > > Hawking
> >
> > [Tao] but it can returns -ENOMEM, amdgpu_ras_load_bad_pages and
> > amdgpu_ras_recovery_init also need to check the return value. I'd like
> > to keep the type of return value unchanged.
> > How about -EINVAL?
>
> Stanley: How about return -EALREADY?
>
> Regards,
> Stanley
> >
> > >
> > > -Original Message-
> > > From: Zhou1, Tao 
> > > Sent: Friday, February 10, 2023 16:45
> > > To: amd-gfx@lists.freedesktop.org; Zhang, Hawking
> > > ; Yang, Stanley ;
> > Chai,
> > > Thomas ; Li, Candice 
> > > Cc: Zhou1, Tao 
> > > Subject: [PATCH] drm/amdgpu: don't increase UMC RAS UE count if no
> > new
> > > bad page
> > >
> > > If a UMC bad page is reserved but not freed by an application, the
> > > application may trigger uncorrectable error repeatly by accessing the 
> > > page.
> > >
> > > Signed-off-by: Tao Zhou 
> > > ---
> > >  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 9 -
> > > drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 6 +-
> > >  2 files changed, 13 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> > > index e85c4689ce2c..eafe01a24349 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> > > @@ -2049,7 +2049,7 @@ int amdgpu_ras_add_bad_pages(struct
> > > amdgpu_device *adev,  {
> > > struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
> > > struct ras_err_handler_data *data;
> > > -   int ret = 0;
> > > +   int ret = 0, old_cnt;
> > > uint32_t i;
> > >
> > > if (!con || !con->eh_data || !bps || pages <= 0) @@ -2060,6
> > > +2060,8 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device
> > *adev,
> > > if (!data)
> > > goto out;
> > >
> > > +   old_cnt = data->count;
> > > +
> > > for (i = 0; i < pages; i++) {
> > > if (amdgpu_ras_check_bad_page_unlock(con,
> > > bps[i].retired_page <<
> > > AMDGPU_GPU_PAGE_SHIFT)) @@ -2079,6
> > > +2081,11 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device
> > *adev,
> > > data->count++;
> > > data->space_left--;
> > > }
> > > +
> > > +   /* all pages have been reserved before, no new bad page */
> > > +   if (old_cnt == data->count)
> > > +   ret = -EEXIST;
> > > +
> > >  out:
> > > mutex_unlock(&con->recovery_lock);
> > >
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> > > index 1c7fcb4f2380..772c431e4065 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> > > @@ -145,8 +145,12 @@ static

Re: [Intel-gfx] [PATCH 3/3] drm/ttm: Change the meaning of the fields in the drm_mm_nodes structure from pfn to bytes v2

2023-02-14 Thread Matthew Auld
On Tue, 14 Feb 2023 at 07:43, Christian König
 wrote:
>
> From: Somalapuram Amaranath 
>
> Change the ttm_range_man_alloc() allocation from pages to size in bytes.
> Fix the dependent drm_mm_nodes start and size from pages to bytes.
>
> v2 (chk): Change the drm_mm_node usage in amdgpu as well. re-order the
>   patch to be independent of the resource->start change.
>
> Signed-off-by: Somalapuram Amaranath 
> Reviewed-by: Christian König 
> Signed-off-by: Christian König 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c| 15 ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h |  8 
>  drivers/gpu/drm/i915/i915_scatterlist.c|  6 +++---
>  drivers/gpu/drm/ttm/ttm_range_manager.c| 17 -
>  4 files changed, 23 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
> index 44367f03316f..c90423cd1292 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
> @@ -116,7 +116,6 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager 
> *man,
>   struct ttm_resource **res)
>  {
> struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
> -   uint32_t num_pages = PFN_UP(tbo->base.size);
> struct ttm_range_mgr_node *node;
> int r;
>
> @@ -134,17 +133,19 @@ static int amdgpu_gtt_mgr_new(struct 
> ttm_resource_manager *man,
> if (place->lpfn) {
> spin_lock(&mgr->lock);
> r = drm_mm_insert_node_in_range(&mgr->mm, &node->mm_nodes[0],
> -   num_pages, 
> tbo->page_alignment,
> -   0, place->fpfn, place->lpfn,
> +   tbo->base.size,
> +   tbo->page_alignment << 
> PAGE_SHIFT, 0,
> +   place->fpfn << PAGE_SHIFT,
> +   place->lpfn << PAGE_SHIFT,
> DRM_MM_INSERT_BEST);
> spin_unlock(&mgr->lock);
> if (unlikely(r))
> goto err_free;
>
> -   node->base.start = node->mm_nodes[0].start;
> +   node->base.start = node->mm_nodes[0].start >> PAGE_SHIFT;
> } else {
> node->mm_nodes[0].start = 0;
> -   node->mm_nodes[0].size = PFN_UP(node->base.size);
> +   node->mm_nodes[0].size = node->base.size;
> node->base.start = AMDGPU_BO_INVALID_OFFSET;
> }
>
> @@ -285,8 +286,8 @@ int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, 
> uint64_t gtt_size)
>
> ttm_resource_manager_init(man, &adev->mman.bdev, gtt_size);
>
> -   start = AMDGPU_GTT_MAX_TRANSFER_SIZE * 
> AMDGPU_GTT_NUM_TRANSFER_WINDOWS;
> -   size = (adev->gmc.gart_size >> PAGE_SHIFT) - start;
> +   start = (AMDGPU_GTT_MAX_TRANSFER_SIZE * 
> AMDGPU_GTT_NUM_TRANSFER_WINDOWS) << PAGE_SHIFT;
> +   size = adev->gmc.gart_size - start;
> drm_mm_init(&mgr->mm, start, size);
> spin_lock_init(&mgr->lock);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
> index 5c4f93ee0c57..5c78f0b09351 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
> @@ -94,8 +94,8 @@ static inline void amdgpu_res_first(struct ttm_resource 
> *res,
> while (start >= node->size << PAGE_SHIFT)
> start -= node++->size << PAGE_SHIFT;
>
> -   cur->start = (node->start << PAGE_SHIFT) + start;
> -   cur->size = min((node->size << PAGE_SHIFT) - start, size);
> +   cur->start = node->start + start;
> +   cur->size = min(node->size - start, size);
> cur->remaining = size;
> cur->node = node;
> break;
> @@ -155,8 +155,8 @@ static inline void amdgpu_res_next(struct 
> amdgpu_res_cursor *cur, uint64_t size)
> node = cur->node;
>
> cur->node = ++node;
> -   cur->start = node->start << PAGE_SHIFT;
> -   cur->size = min(node->size << PAGE_SHIFT, cur->remaining);
> +   cur->start = node->start;
> +   cur->size = min(node->size, cur->remaining);
> break;
> default:
> return;
> diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c 
> b/drivers/gpu/drm/i915/i915_scatterlist.c
> index 756289e43dff..7defda1219d0 100644
> --- a/drivers/gpu/drm/i915/i915_scatterlist.c
> +++ b/drivers/gpu/drm/i915/i915_scatterlist.c
> @@ -94,7 +94,7 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct 
> drm_mm_node *node,
> if (!rsgt)
> return ERR_PTR(-ENOMEM);
>
> 

Re: [Intel-gfx] [PATCH 3/3] drm/ttm: Change the meaning of the fields in the drm_mm_nodes structure from pfn to bytes v2

2023-02-14 Thread Christian König

Am 14.02.23 um 11:31 schrieb Matthew Auld:

On Tue, 14 Feb 2023 at 07:43, Christian König
 wrote:

From: Somalapuram Amaranath 

Change the ttm_range_man_alloc() allocation from pages to size in bytes.
Fix the dependent drm_mm_nodes start and size from pages to bytes.

v2 (chk): Change the drm_mm_node usage in amdgpu as well. re-order the
   patch to be independent of the resource->start change.

Signed-off-by: Somalapuram Amaranath 
Reviewed-by: Christian König 
Signed-off-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c| 15 ---
  drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h |  8 
  drivers/gpu/drm/i915/i915_scatterlist.c|  6 +++---
  drivers/gpu/drm/ttm/ttm_range_manager.c| 17 -
  4 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 44367f03316f..c90423cd1292 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -116,7 +116,6 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager 
*man,
   struct ttm_resource **res)
  {
 struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
-   uint32_t num_pages = PFN_UP(tbo->base.size);
 struct ttm_range_mgr_node *node;
 int r;

@@ -134,17 +133,19 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager 
*man,
 if (place->lpfn) {
 spin_lock(&mgr->lock);
 r = drm_mm_insert_node_in_range(&mgr->mm, &node->mm_nodes[0],
-   num_pages, tbo->page_alignment,
-   0, place->fpfn, place->lpfn,
+   tbo->base.size,
+   tbo->page_alignment << 
PAGE_SHIFT, 0,
+   place->fpfn << PAGE_SHIFT,
+   place->lpfn << PAGE_SHIFT,
 DRM_MM_INSERT_BEST);
 spin_unlock(&mgr->lock);
 if (unlikely(r))
 goto err_free;

-   node->base.start = node->mm_nodes[0].start;
+   node->base.start = node->mm_nodes[0].start >> PAGE_SHIFT;
 } else {
 node->mm_nodes[0].start = 0;
-   node->mm_nodes[0].size = PFN_UP(node->base.size);
+   node->mm_nodes[0].size = node->base.size;
 node->base.start = AMDGPU_BO_INVALID_OFFSET;
 }

@@ -285,8 +286,8 @@ int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, 
uint64_t gtt_size)

 ttm_resource_manager_init(man, &adev->mman.bdev, gtt_size);

-   start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS;
-   size = (adev->gmc.gart_size >> PAGE_SHIFT) - start;
+   start = (AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS) 
<< PAGE_SHIFT;
+   size = adev->gmc.gart_size - start;
 drm_mm_init(&mgr->mm, start, size);
 spin_lock_init(&mgr->lock);

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
index 5c4f93ee0c57..5c78f0b09351 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -94,8 +94,8 @@ static inline void amdgpu_res_first(struct ttm_resource *res,
 while (start >= node->size << PAGE_SHIFT)
 start -= node++->size << PAGE_SHIFT;

-   cur->start = (node->start << PAGE_SHIFT) + start;
-   cur->size = min((node->size << PAGE_SHIFT) - start, size);
+   cur->start = node->start + start;
+   cur->size = min(node->size - start, size);
 cur->remaining = size;
 cur->node = node;
 break;
@@ -155,8 +155,8 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor 
*cur, uint64_t size)
 node = cur->node;

 cur->node = ++node;
-   cur->start = node->start << PAGE_SHIFT;
-   cur->size = min(node->size << PAGE_SHIFT, cur->remaining);
+   cur->start = node->start;
+   cur->size = min(node->size, cur->remaining);
 break;
 default:
 return;
diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c 
b/drivers/gpu/drm/i915/i915_scatterlist.c
index 756289e43dff..7defda1219d0 100644
--- a/drivers/gpu/drm/i915/i915_scatterlist.c
+++ b/drivers/gpu/drm/i915/i915_scatterlist.c
@@ -94,7 +94,7 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct 
drm_mm_node *node,
 if (!rsgt)
 return ERR_PTR(-ENOMEM);

-   i915_refct_sgt_init(rsgt, node->size << PAGE_SHIFT);
+   i915_refct_sgt_init(rsgt, node->size);
 st = &rsgt->table;

RE: [RFC PATCH v2 00/18] Add DRM CRTC 3D LUT interface

2023-02-14 Thread Sharma, Shashank
[AMD Official Use Only - General]

+ Uday, for awareness. 

Regards
Shashank
-Original Message-
From: Pekka Paalanen 
Sent: 14 February 2023 10:28
To: Melissa Wen 
Cc: Ville Syrjälä ; 
dri-de...@lists.freedesktop.org; airl...@gmail.com; 
laurent.pinchart+rene...@ideasonboard.com; Sharma, Shashank 
; Siqueira, Rodrigo ; 
amd-gfx@lists.freedesktop.org; Hung, Alex ; Wentland, Harry 
; tzimmerm...@suse.de; Li, Sun peng (Leo) 
; maarten.lankho...@linux.intel.com; mrip...@kernel.org; 
seanp...@chromium.org; dan...@ffwll.ch; Lakha, Bhawanpreet 
; Kim, Sung joon ; 
cont...@emersion.fr; Pan, Xinhui ; Koenig, Christian 
; kernel-...@igalia.com; Deucher, Alexander 
; Kazlauskas, Nicholas 
; Joshua Ashton 
Subject: Re: [RFC PATCH v2 00/18] Add DRM CRTC 3D LUT interface

On Mon, 13 Feb 2023 18:45:40 -0100
Melissa Wen  wrote:

> On 02/13, Ville Syrjälä wrote:
> > On Mon, Feb 13, 2023 at 11:01:31AM +0200, Pekka Paalanen wrote:  
> > > On Fri, 10 Feb 2023 14:47:50 -0500 Harry Wentland 
> > >  wrote:
> > >   
> > > > On 2/10/23 04:28, Pekka Paalanen wrote:  
> > > > > On Thu, 9 Feb 2023 13:27:02 -0100 Melissa Wen 
> > > > >  wrote:
> > > > > 
> > > > >> On 01/31, Pekka Paalanen wrote:
> > > > >>> On Mon, 9 Jan 2023 14:38:09 -0100 Melissa Wen 
> > > > >>>  wrote:
> > > > >>>   
> > > >  On 01/09, Melissa Wen wrote:  
> > > > > Hi,
> > > > >
> > > > > After collecting comments in different places, here is a 
> > > > > second version of the work on adding DRM CRTC 3D LUT 
> > > > > support to the current DRM color mgmt interface. In 
> > > > > comparison to previous proposals [1][2][3], here we add 3D 
> > > > > LUT before gamma 1D LUT, but also a shaper 1D LUT before 3D LUT, 
> > > > > that means the following DRM CRTC color correction pipeline:
> > > > >
> > > > > Blend -> Degamma 1D LUT -> CTM -> Shaper 1D LUT -> 3D LUT -> 
> > > > > Gamma 1D LUT  
> > > 
> > > ...
> > >   
> > > > >>> +/*
> > > > >>> + * struct drm_mode_lut3d_mode - 3D LUT mode information.
> > > > >>> + * @lut_size: number of valid points on every dimension of 3D LUT.
> > > > >>> + * @lut_stride: number of points on every dimension of 3D LUT.
> > > > >>> + * @bit_depth: number of bits of RGB. If color_mode defines 
> > > > >>> entries with higher
> > > > >>> + * bit_depth the least significant bits will be 
> > > > >>> truncated.
> > > > >>> + * @color_format: fourcc values, ex. DRM_FORMAT_XRGB16161616 or 
> > > > >>> DRM_FORMAT_XBGR16161616.
> > > > >>> + * @flags: flags for hardware-sepcific features  */ struct 
> > > > >>> +drm_mode_lut3d_mode {
> > > > >>> +   __u16 lut_size;
> > > > >>> +   __u16 lut_stride[3];
> > > > >>> +   __u16 bit_depth;
> > > > >>> +   __u32 color_format;
> > > > >>> +   __u32 flags;
> > > > >>> +};
> > > 
> > > ...
> > >   
> > > > >>> What is "number of bits of RGB"? Input precision? Output precision?
> > > > >>> Integer or floating point?  
> > > > >>
> > > > >> It's the bit depth of the 3D LUT values, the same for every 
> > > > >> channels. In
> > > > >> the AMD case, it's supports 10-bit and 12-bit, for example.
> > > > > 
> > > > > Ok. So e.g. r5g6b5 is not a possible 3D LUT element type on 
> > > > > any hardware ever?
> > > > > 
> > > > 
> > > > I haven't had a chance to go through all patches yet but if this 
> > > > is modeled after Alex Hung's work this should be covered by 
> > > > color_format.
> > > > The idea is that color_format takes a FOURCC value and defines 
> > > > the format of the entries in the 3DLUT blob.
> > > > 
> > > > The bit_depth describes the actual bit depth that the HW supports.
> > > > E.g., color_format could be DRM_FORMAT_XRGB16161616 but HW might 
> > > > only support 12-bit precision. In that case the least 
> > > > significant bits get truncated.
> > > > 
> > > > One could define the bit_depth per color, but I'm not sure 
> > > > that'll be necessary.
> > > 
> > > Exactly. I just have no idea how sure we should be about that.
> > >   
> > > > > What exactly is the truncation the comment refers to?
> > > > > 
> > > > > It sounds like if input has higher precision than the LUT 
> > > > > elements, then "truncation" occurs. I can kind of see that, 
> > > > > but I also think it is a false characterisation. The LUT input 
> > > > > precision affects the precision of LUT indexing and the 
> > > > > precision of interpolation between the LUT elements. I would 
> > > > > not expect those two precisions to be truncated to the LUT 
> > > > > element precision (but they could be truncated to something 
> > > > > else hardware specific). Instead, I do expect the 
> > > > > interpolation result to be truncated to the LUT output precision, 
> > > > > which probably is the same as the LUT element precision, but not 
> > > > > necessarily.
> > > > > 
> > > > > Maybe the comment about truncation should simply be removed? 
> > > > > The result is obvious if we know the LUT input, element, and 
> > > > > 

Re: [PATCH 06/10] drm/amd/display: Fix implicit enum conversion

2023-02-14 Thread Michel Dänzer
On 2/13/23 21:49, Arthur Grillo wrote:
> Make implicit enum conversion to avoid -Wenum-conversion warning, such
> as:
> 
> drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn21/display_mode_vba_21.c:4109:88:
>  warning: implicit conversion from ‘enum ’ to ‘enum 
> odm_combine_mode’ [-Wenum-conversion]
>  4109 | 
> locals->ODMCombineEnablePerState[i][k] = true;
>   |   
>  ^
> 
> [...]
> 
> @@ -3897,14 +3898,14 @@ void 
> dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
>   
> mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = 
> mode_lib->vba.PixelClock[k] / 2
>   * (1 + 
> mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
>  
> - locals->ODMCombineEnablePerState[i][k] = false;
> + locals->ODMCombineEnablePerState[i][k] = (enum 
> odm_combine_mode)false;

dm_odm_combine_mode_disabled would seem clearer than (enum 
odm_combine_mode)false.


> - 
> locals->ODMCombineEnablePerState[i][k] = true;
> + 
> locals->ODMCombineEnablePerState[i][k] = (enum odm_combine_mode)true;

I'm not sure which enum value (enum odm_combine_mode)true will be converted to, 
probably dm_odm_combine_mode_2to1? Is that really appropriate everywhere true 
is used? If so, again 
dm_odm_combine_mode_2to1 would seem clearer.


-- 
Earthling Michel Dänzer|  https://redhat.com
Libre software enthusiast  | Mesa and Xwayland developer



[PATCH 0/6] Trivial code cleanup around color resources

2023-02-14 Thread Melissa Wen
Hi,

Sorry for the noise, but while I've been working on wiring 3D LUT
support to AMD display driver [1] I found some annoying code style
issues in the shared-code part. So I'm just sending what I've been
cleaning to better examine the code.

Most seem trivial, except the last one "remove unused
_calculate_degamma_curve" since this could just be a matter of missing
parts. If so, happy to remove the patch and include a comment describing
the situation (or the potential usage of it).

Thanks,

Melissa

[1] https://lore.kernel.org/dri-devel/20230109143846.1966301-1-m...@igalia.com/

Melissa Wen (6):
  drm/amd/display: ident braces in dcn30_acquire_post_bldn_3dlut
correctly
  drm/amd/display: clean code-style issues in dcn30_set_mpc_shaper_3dlut
  drm/amd/display: camel case cleanup in color_gamma file
  drm/amd/display: unset initial value for tf since it's never used
  drm/amd/display: remove unused func declaration from resource headers
  drm/amd/display: remove unused _calculate_degamma_curve function

 .../drm/amd/display/dc/dcn30/dcn30_hwseq.c|  37 ++---
 .../drm/amd/display/dc/dcn30/dcn30_resource.c |   2 +-
 drivers/gpu/drm/amd/display/dc/inc/resource.h |   4 -
 .../amd/display/modules/color/color_gamma.c   | 140 --
 .../amd/display/modules/color/color_gamma.h   |   3 -
 5 files changed, 48 insertions(+), 138 deletions(-)

-- 
2.39.0



[PATCH 1/6] drm/amd/display: ident braces in dcn30_acquire_post_bldn_3dlut correctly

2023-02-14 Thread Melissa Wen
Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
index feb4bb491525..60bb5634b6e2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
@@ -1477,8 +1477,8 @@ bool dcn30_acquire_post_bldn_3dlut(
state->bits.mpc_rmu2_mux = mpcc_id;
ret = true;
break;
-   }
}
+   }
return ret;
 }
 
-- 
2.39.0



[PATCH 3/6] drm/amd/display: camel case cleanup in color_gamma file

2023-02-14 Thread Melissa Wen
Rename mapUserRamp to map_user_ramp and doClamping to do_clamping

Signed-off-by: Melissa Wen 
---
 .../amd/display/modules/color/color_gamma.c   | 50 ++-
 1 file changed, 26 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c 
b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
index f6034213c700..fdb20dfc70c9 100644
--- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
+++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
@@ -1715,8 +1715,8 @@ static bool map_regamma_hw_to_x_user(
const struct pwl_float_data_ex *rgb_regamma,
uint32_t hw_points_num,
struct dc_transfer_func_distributed_points *tf_pts,
-   bool mapUserRamp,
-   bool doClamping)
+   bool map_user_ramp,
+   bool do_clamping)
 {
/* setup to spare calculated ideal regamma values */
 
@@ -1724,7 +1724,7 @@ static bool map_regamma_hw_to_x_user(
struct hw_x_point *coords = coords_x;
const struct pwl_float_data_ex *regamma = rgb_regamma;
 
-   if (ramp && mapUserRamp) {
+   if (ramp && map_user_ramp) {
copy_rgb_regamma_to_coordinates_x(coords,
hw_points_num,
rgb_regamma);
@@ -1744,7 +1744,7 @@ static bool map_regamma_hw_to_x_user(
}
}
 
-   if (doClamping) {
+   if (do_clamping) {
/* this should be named differently, all it does is clamp to 
0-1 */
build_new_custom_resulted_curve(hw_points_num, tf_pts);
}
@@ -1875,7 +1875,7 @@ bool calculate_user_regamma_ramp(struct dc_transfer_func 
*output_tf,
 
 bool mod_color_calculate_degamma_params(struct dc_color_caps *dc_caps,
struct dc_transfer_func *input_tf,
-   const struct dc_gamma *ramp, bool mapUserRamp)
+   const struct dc_gamma *ramp, bool map_user_ramp)
 {
struct dc_transfer_func_distributed_points *tf_pts = &input_tf->tf_pts;
struct dividers dividers;
@@ -1891,12 +1891,12 @@ bool mod_color_calculate_degamma_params(struct 
dc_color_caps *dc_caps,
return false;
 
/* we can use hardcoded curve for plain SRGB TF
-* If linear, it's bypass if on user ramp
+* If linear, it's bypass if no user ramp
 */
if (input_tf->type == TF_TYPE_PREDEFINED) {
if ((input_tf->tf == TRANSFER_FUNCTION_SRGB ||
input_tf->tf == TRANSFER_FUNCTION_LINEAR) &&
-   !mapUserRamp)
+   !map_user_ramp)
return true;
 
if (dc_caps != NULL &&
@@ -1919,7 +1919,7 @@ bool mod_color_calculate_degamma_params(struct 
dc_color_caps *dc_caps,
 
input_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
 
-   if (mapUserRamp && ramp && ramp->type == GAMMA_RGB_256) {
+   if (map_user_ramp && ramp && ramp->type == GAMMA_RGB_256) {
rgb_user = kvcalloc(ramp->num_entries + _EXTRA_POINTS,
sizeof(*rgb_user),
GFP_KERNEL);
@@ -2007,7 +2007,7 @@ bool mod_color_calculate_degamma_params(struct 
dc_color_caps *dc_caps,
map_regamma_hw_to_x_user(ramp, coeff, rgb_user,
coordinates_x, axis_x, curve,
MAX_HW_POINTS, tf_pts,
-   mapUserRamp && ramp && ramp->type == 
GAMMA_RGB_256,
+   map_user_ramp && ramp && ramp->type == 
GAMMA_RGB_256,
true);
}
 
@@ -2112,9 +2112,11 @@ static bool calculate_curve(enum 
dc_transfer_func_predefined trans,
 }
 
 bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf,
-   const struct dc_gamma *ramp, bool mapUserRamp, bool 
canRomBeUsed,
-   const struct hdr_tm_params *fs_params,
-   struct calculate_buffer *cal_buffer)
+   const struct dc_gamma *ramp,
+   bool map_user_ramp,
+   bool can_rom_be_used,
+   const struct hdr_tm_params *fs_params,
+   struct calculate_buffer *cal_buffer)
 {
struct dc_transfer_func_distributed_points *tf_pts = &output_tf->tf_pts;
struct dividers dividers;
@@ -2124,26 +2126,26 @@ bool mod_color_calculate_regamma_params(struct 
dc_transfer_func *output_tf,
struct gamma_pixel *axis_x = NULL;
struct pixel_gamma_point *coeff = NULL;
enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB;
-   bool doClamping = true;
+   bool do_clamping = true;
bool ret = false;
 
if (output_tf->type == TF_TYPE_BYPASS)
return false;
 
/* we can use hardcoded curve for plain SRGB TF */
-   i

[PATCH 5/6] drm/amd/display: remove unused func declaration from resource headers

2023-02-14 Thread Melissa Wen
The function resource_validate_ctx_update_pointer_after_copy() is
declared in resource.h but never defined, therefore, remove its
declaration from headers.

Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/amd/display/dc/inc/resource.h | 4 
 1 file changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h 
b/drivers/gpu/drm/amd/display/dc/inc/resource.h
index 4ab029e3326d..fa6da93caa88 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/resource.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h
@@ -165,10 +165,6 @@ bool resource_validate_attach_surfaces(
struct dc_state *context,
const struct resource_pool *pool);
 
-void resource_validate_ctx_update_pointer_after_copy(
-   const struct dc_state *src_ctx,
-   struct dc_state *dst_ctx);
-
 enum dc_status resource_map_clock_resources(
const struct dc *dc,
struct dc_state *context,
-- 
2.39.0



[PATCH 2/6] drm/amd/display: clean code-style issues in dcn30_set_mpc_shaper_3dlut

2023-02-14 Thread Melissa Wen
This function has many conditions and all code style issues (identation,
missing braces, etc.) make reading it really annoying.

Signed-off-by: Melissa Wen 
---
 .../drm/amd/display/dc/dcn30/dcn30_hwseq.c| 37 ++-
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c 
b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
index 9ce86f288130..df787fcf8e86 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
@@ -90,8 +90,8 @@ bool dcn30_set_blend_lut(
return result;
 }
 
-static bool dcn30_set_mpc_shaper_3dlut(
-   struct pipe_ctx *pipe_ctx, const struct dc_stream_state *stream)
+static bool dcn30_set_mpc_shaper_3dlut(struct pipe_ctx *pipe_ctx,
+  const struct dc_stream_state *stream)
 {
struct dpp *dpp_base = pipe_ctx->plane_res.dpp;
int mpcc_id = pipe_ctx->plane_res.hubp->inst;
@@ -103,19 +103,18 @@ static bool dcn30_set_mpc_shaper_3dlut(
const struct pwl_params *shaper_lut = NULL;
//get the shaper lut params
if (stream->func_shaper) {
-   if (stream->func_shaper->type == TF_TYPE_HWPWL)
+   if (stream->func_shaper->type == TF_TYPE_HWPWL) {
shaper_lut = &stream->func_shaper->pwl;
-   else if (stream->func_shaper->type == 
TF_TYPE_DISTRIBUTED_POINTS) {
-   cm_helper_translate_curve_to_hw_format(
-   stream->func_shaper,
-   &dpp_base->shaper_params, true);
+   } else if (stream->func_shaper->type == 
TF_TYPE_DISTRIBUTED_POINTS) {
+   
cm_helper_translate_curve_to_hw_format(stream->func_shaper,
+  
&dpp_base->shaper_params, true);
shaper_lut = &dpp_base->shaper_params;
}
}
 
if (stream->lut3d_func &&
-   stream->lut3d_func->state.bits.initialized == 1 &&
-   stream->lut3d_func->state.bits.rmu_idx_valid == 1) {
+   stream->lut3d_func->state.bits.initialized == 1 &&
+   stream->lut3d_func->state.bits.rmu_idx_valid == 1) {
if (stream->lut3d_func->state.bits.rmu_mux_num == 0)
mpcc_id_projected = 
stream->lut3d_func->state.bits.mpc_rmu0_mux;
else if (stream->lut3d_func->state.bits.rmu_mux_num == 1)
@@ -124,20 +123,22 @@ static bool dcn30_set_mpc_shaper_3dlut(
mpcc_id_projected = 
stream->lut3d_func->state.bits.mpc_rmu2_mux;
if (mpcc_id_projected != mpcc_id)
BREAK_TO_DEBUGGER();
-   /*find the reason why logical layer assigned a differant 
mpcc_id into acquire_post_bldn_3dlut*/
+   /* find the reason why logical layer assigned a different
+* mpcc_id into acquire_post_bldn_3dlut
+*/
acquired_rmu = mpc->funcs->acquire_rmu(mpc, mpcc_id,
-   stream->lut3d_func->state.bits.rmu_mux_num);
+  
stream->lut3d_func->state.bits.rmu_mux_num);
if (acquired_rmu != stream->lut3d_func->state.bits.rmu_mux_num)
BREAK_TO_DEBUGGER();
-   result = mpc->funcs->program_3dlut(mpc,
-   
&stream->lut3d_func->lut_3d,
-   
stream->lut3d_func->state.bits.rmu_mux_num);
+
+   result = mpc->funcs->program_3dlut(mpc, 
&stream->lut3d_func->lut_3d,
+  
stream->lut3d_func->state.bits.rmu_mux_num);
result = mpc->funcs->program_shaper(mpc, shaper_lut,
-   stream->lut3d_func->state.bits.rmu_mux_num);
-   } else
-   /*loop through the available mux and release the requested 
mpcc_id*/
+   
stream->lut3d_func->state.bits.rmu_mux_num);
+   } else {
+   // loop through the available mux and release the requested 
mpcc_id
mpc->funcs->release_rmu(mpc, mpcc_id);
-
+   }
 
return result;
 }
-- 
2.39.0



[PATCH 4/6] drm/amd/display: unset initial value for tf since it's never used

2023-02-14 Thread Melissa Wen
In mod_color_calculate_{degamma/regamma}_params(), a tf variable is
initialized as TRANSFER_FUNCTION_SRGB but tf is only used after tf =
input->tf, therefore, better to just remove this initial value and avoid
misleading interpretations.

Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/amd/display/modules/color/color_gamma.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c 
b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
index fdb20dfc70c9..6e606b11286a 100644
--- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
+++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
@@ -1883,7 +1883,7 @@ bool mod_color_calculate_degamma_params(struct 
dc_color_caps *dc_caps,
struct pwl_float_data_ex *curve = NULL;
struct gamma_pixel *axis_x = NULL;
struct pixel_gamma_point *coeff = NULL;
-   enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB;
+   enum dc_transfer_func_predefined tf;
uint32_t i;
bool ret = false;
 
@@ -2125,7 +2125,7 @@ bool mod_color_calculate_regamma_params(struct 
dc_transfer_func *output_tf,
struct pwl_float_data_ex *rgb_regamma = NULL;
struct gamma_pixel *axis_x = NULL;
struct pixel_gamma_point *coeff = NULL;
-   enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB;
+   enum dc_transfer_func_predefined tf;
bool do_clamping = true;
bool ret = false;
 
-- 
2.39.0



[PATCH 6/6] drm/amd/display: remove unused _calculate_degamma_curve function

2023-02-14 Thread Melissa Wen
We don't use this function anywhere, therefore, remove it.

Signed-off-by: Melissa Wen 
---
 .../amd/display/modules/color/color_gamma.c   | 86 ---
 .../amd/display/modules/color/color_gamma.h   |  3 -
 2 files changed, 89 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c 
b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
index 6e606b11286a..67a062af3ab0 100644
--- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
+++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
@@ -2217,89 +2217,3 @@ bool mod_color_calculate_regamma_params(struct 
dc_transfer_func *output_tf,
 rgb_user_alloc_fail:
return ret;
 }
-
-bool  mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans,
-   struct dc_transfer_func_distributed_points 
*points)
-{
-   uint32_t i;
-   bool ret = false;
-   struct pwl_float_data_ex *rgb_degamma = NULL;
-
-   if (trans == TRANSFER_FUNCTION_UNITY ||
-   trans == TRANSFER_FUNCTION_LINEAR) {
-
-   for (i = 0; i <= MAX_HW_POINTS ; i++) {
-   points->red[i]= coordinates_x[i].x;
-   points->green[i]  = coordinates_x[i].x;
-   points->blue[i]   = coordinates_x[i].x;
-   }
-   ret = true;
-   } else if (trans == TRANSFER_FUNCTION_PQ) {
-   rgb_degamma = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS,
-  sizeof(*rgb_degamma),
-  GFP_KERNEL);
-   if (!rgb_degamma)
-   goto rgb_degamma_alloc_fail;
-
-
-   build_de_pq(rgb_degamma,
-   MAX_HW_POINTS,
-   coordinates_x);
-   for (i = 0; i <= MAX_HW_POINTS ; i++) {
-   points->red[i]= rgb_degamma[i].r;
-   points->green[i]  = rgb_degamma[i].g;
-   points->blue[i]   = rgb_degamma[i].b;
-   }
-   ret = true;
-
-   kvfree(rgb_degamma);
-   } else if (trans == TRANSFER_FUNCTION_SRGB ||
-   trans == TRANSFER_FUNCTION_BT709 ||
-   trans == TRANSFER_FUNCTION_GAMMA22 ||
-   trans == TRANSFER_FUNCTION_GAMMA24 ||
-   trans == TRANSFER_FUNCTION_GAMMA26) {
-   rgb_degamma = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS,
-  sizeof(*rgb_degamma),
-  GFP_KERNEL);
-   if (!rgb_degamma)
-   goto rgb_degamma_alloc_fail;
-
-   build_degamma(rgb_degamma,
-   MAX_HW_POINTS,
-   coordinates_x,
-   trans);
-   for (i = 0; i <= MAX_HW_POINTS ; i++) {
-   points->red[i]= rgb_degamma[i].r;
-   points->green[i]  = rgb_degamma[i].g;
-   points->blue[i]   = rgb_degamma[i].b;
-   }
-   ret = true;
-
-   kvfree(rgb_degamma);
-   } else if (trans == TRANSFER_FUNCTION_HLG) {
-   rgb_degamma = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS,
-  sizeof(*rgb_degamma),
-  GFP_KERNEL);
-   if (!rgb_degamma)
-   goto rgb_degamma_alloc_fail;
-
-   build_hlg_degamma(rgb_degamma,
-   MAX_HW_POINTS,
-   coordinates_x,
-   80, 1000);
-   for (i = 0; i <= MAX_HW_POINTS ; i++) {
-   points->red[i]= rgb_degamma[i].r;
-   points->green[i]  = rgb_degamma[i].g;
-   points->blue[i]   = rgb_degamma[i].b;
-   }
-   ret = true;
-   kvfree(rgb_degamma);
-   }
-   points->end_exponent = 0;
-   points->x_point_at_y1_red = 1;
-   points->x_point_at_y1_green = 1;
-   points->x_point_at_y1_blue = 1;
-
-rgb_degamma_alloc_fail:
-   return ret;
-}
diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h 
b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h
index 2893abf48208..ee5c466613de 100644
--- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h
+++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h
@@ -115,9 +115,6 @@ bool mod_color_calculate_degamma_params(struct 
dc_color_caps *dc_caps,
struct dc_transfer_func *output_tf,
const struct dc_gamma *ramp, bool mapUserRamp);
 
-bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans,
-   struct dc_transfer_func_distributed_points 
*points);
-
 bool calculate_user_regamma_coeff(struct dc_transfer_func *output_tf,
c

Re: [PATCH 0/6] Trivial code cleanup around color resources

2023-02-14 Thread Christian König

Am 14.02.23 um 13:14 schrieb Melissa Wen:

Hi,

Sorry for the noise, but while I've been working on wiring 3D LUT
support to AMD display driver [1] I found some annoying code style
issues in the shared-code part. So I'm just sending what I've been
cleaning to better examine the code.

Most seem trivial, except the last one "remove unused
_calculate_degamma_curve" since this could just be a matter of missing
parts. If so, happy to remove the patch and include a comment describing
the situation (or the potential usage of it).


The display stack is not my field of expertise, but those cleanups are 
so obvious that I think I can safely give my Reviewed-by: Christian 
König  for the entire series.


Thanks,
Christian.



Thanks,

Melissa

[1] https://lore.kernel.org/dri-devel/20230109143846.1966301-1-m...@igalia.com/

Melissa Wen (6):
   drm/amd/display: ident braces in dcn30_acquire_post_bldn_3dlut
 correctly
   drm/amd/display: clean code-style issues in dcn30_set_mpc_shaper_3dlut
   drm/amd/display: camel case cleanup in color_gamma file
   drm/amd/display: unset initial value for tf since it's never used
   drm/amd/display: remove unused func declaration from resource headers
   drm/amd/display: remove unused _calculate_degamma_curve function

  .../drm/amd/display/dc/dcn30/dcn30_hwseq.c|  37 ++---
  .../drm/amd/display/dc/dcn30/dcn30_resource.c |   2 +-
  drivers/gpu/drm/amd/display/dc/inc/resource.h |   4 -
  .../amd/display/modules/color/color_gamma.c   | 140 --
  .../amd/display/modules/color/color_gamma.h   |   3 -
  5 files changed, 48 insertions(+), 138 deletions(-)





[PATCH] drm/gem: Expose the buffer object handle to userspace last

2023-02-14 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

Currently drm_gem_handle_create_tail exposes the handle to userspace
before the buffer object constructions is complete. This allowing
of working against a partially constructed object, which may also be in
the process of having its creation fail, can have a range of negative
outcomes.

A lot of those will depend on what the individual drivers are doing in
their obj->funcs->open() callbacks, and also with a common failure mode
being -ENOMEM from drm_vma_node_allow.

We can make sure none of this can happen by allocating a handle last,
although with a downside that more of the function now runs under the
dev->object_name_lock.

Looking into the individual drivers open() hooks, we have
amdgpu_gem_object_open which seems like it could have a potential security
issue without this change.

A couple drivers like qxl_gem_object_open and vmw_gem_object_open
implement no-op hooks so no impact for them.

A bunch of other require a deeper look by individual owners to asses for
impact. Those are lima_gem_object_open, nouveau_gem_object_open,
panfrost_gem_open, radeon_gem_object_open and virtio_gpu_gem_object_open.

Putting aside the risk assesment of the above, some common scenarios to
think about are along these lines:

1)
Userspace closes a handle by speculatively "guessing" it from a second
thread.

This results in an unreachable buffer object so, a memory leak.

2)
Same as 1), but object is in the process of getting closed (failed
creation).

The second thread is then able to re-cycle the handle and idr_remove would
in the first thread would then remove the handle it does not own from the
idr.

3)
Going back to the earlier per driver problem space - individual impact
assesment of allowing a second thread to access and operate on a partially
constructed handle / object. (Can something crash? Leak information?)

In terms of identifying when the problem started I will tag some patches
as references, but not all, if even any, of them actually point to a
broken state. I am just identifying points at which more opportunity for
issues to arise was added.

References: 304eda32920b ("drm/gem: add hooks to notify driver when object 
handle is created/destroyed")
References: ca481c9b2a3a ("drm/gem: implement vma access management")
References: b39b5394fabc ("drm/gem: Add drm_gem_object_funcs")
Cc: dri-de...@lists.freedesktop.org
Cc: Rob Clark 
Cc: Ben Skeggs 
Cc: David Herrmann 
Cc: Noralf Trønnes 
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: amd-gfx@lists.freedesktop.org
Cc: l...@lists.freedesktop.org
Cc: nouv...@lists.freedesktop.org
Cc: Steven Price 
Cc: virtualizat...@lists.linux-foundation.org
Cc: spice-de...@lists.freedesktop.org
Cc: Zack Rusin 
---
 drivers/gpu/drm/drm_gem.c | 48 +++
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index aa15c52ae182..e3d897bca0f2 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -356,52 +356,52 @@ drm_gem_handle_create_tail(struct drm_file *file_priv,
   u32 *handlep)
 {
struct drm_device *dev = obj->dev;
-   u32 handle;
int ret;
 
WARN_ON(!mutex_is_locked(&dev->object_name_lock));
if (obj->handle_count++ == 0)
drm_gem_object_get(obj);
 
+   ret = drm_vma_node_allow(&obj->vma_node, file_priv);
+   if (ret)
+   goto err_put;
+
+   if (obj->funcs->open) {
+   ret = obj->funcs->open(obj, file_priv);
+   if (ret)
+   goto err_revoke;
+   }
+
/*
-* Get the user-visible handle using idr.  Preload and perform
-* allocation under our spinlock.
+* Get the user-visible handle using idr as the _last_ step.
+* Preload and perform allocation under our spinlock.
 */
idr_preload(GFP_KERNEL);
spin_lock(&file_priv->table_lock);
-
ret = idr_alloc(&file_priv->object_idr, obj, 1, 0, GFP_NOWAIT);
-
spin_unlock(&file_priv->table_lock);
idr_preload_end();
 
-   mutex_unlock(&dev->object_name_lock);
if (ret < 0)
-   goto err_unref;
-
-   handle = ret;
+   goto err_close;
 
-   ret = drm_vma_node_allow(&obj->vma_node, file_priv);
-   if (ret)
-   goto err_remove;
+   mutex_unlock(&dev->object_name_lock);
 
-   if (obj->funcs->open) {
-   ret = obj->funcs->open(obj, file_priv);
-   if (ret)
-   goto err_revoke;
-   }
+   *handlep = ret;
 
-   *handlep = handle;
return 0;
 
+err_close:
+   if (obj->funcs->close)
+   obj->funcs->close(obj, file_priv);
 err_revoke:
drm_vma_node_revoke(&obj->vma_node, file_priv);
-err_remove:
-   spin_lock(&file_priv->table_lock);
-   idr_remove(&file_priv->object_idr, handle);
-   spin_unlock(&file_priv->table_lock);
-err_unref:
-  

Re: [PATCH] drm/gem: Expose the buffer object handle to userspace last

2023-02-14 Thread Christian König

Am 14.02.23 um 13:50 schrieb Tvrtko Ursulin:

From: Tvrtko Ursulin 

Currently drm_gem_handle_create_tail exposes the handle to userspace
before the buffer object constructions is complete. This allowing
of working against a partially constructed object, which may also be in
the process of having its creation fail, can have a range of negative
outcomes.

A lot of those will depend on what the individual drivers are doing in
their obj->funcs->open() callbacks, and also with a common failure mode
being -ENOMEM from drm_vma_node_allow.

We can make sure none of this can happen by allocating a handle last,
although with a downside that more of the function now runs under the
dev->object_name_lock.

Looking into the individual drivers open() hooks, we have
amdgpu_gem_object_open which seems like it could have a potential security
issue without this change.

A couple drivers like qxl_gem_object_open and vmw_gem_object_open
implement no-op hooks so no impact for them.

A bunch of other require a deeper look by individual owners to asses for
impact. Those are lima_gem_object_open, nouveau_gem_object_open,
panfrost_gem_open, radeon_gem_object_open and virtio_gpu_gem_object_open.

Putting aside the risk assesment of the above, some common scenarios to
think about are along these lines:

1)
Userspace closes a handle by speculatively "guessing" it from a second
thread.

This results in an unreachable buffer object so, a memory leak.

2)
Same as 1), but object is in the process of getting closed (failed
creation).

The second thread is then able to re-cycle the handle and idr_remove would
in the first thread would then remove the handle it does not own from the
idr.

3)
Going back to the earlier per driver problem space - individual impact
assesment of allowing a second thread to access and operate on a partially
constructed handle / object. (Can something crash? Leak information?)

In terms of identifying when the problem started I will tag some patches
as references, but not all, if even any, of them actually point to a
broken state. I am just identifying points at which more opportunity for
issues to arise was added.


Yes I've looked into this once as well, but couldn't completely solve it 
for some reason.


Give me a day or two to get this tested and all the logic swapped back 
into my head again.


Christian.



References: 304eda32920b ("drm/gem: add hooks to notify driver when object handle is 
created/destroyed")
References: ca481c9b2a3a ("drm/gem: implement vma access management")
References: b39b5394fabc ("drm/gem: Add drm_gem_object_funcs")
Cc: dri-de...@lists.freedesktop.org
Cc: Rob Clark 
Cc: Ben Skeggs 
Cc: David Herrmann 
Cc: Noralf Trønnes 
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: amd-gfx@lists.freedesktop.org
Cc: l...@lists.freedesktop.org
Cc: nouv...@lists.freedesktop.org
Cc: Steven Price 
Cc: virtualizat...@lists.linux-foundation.org
Cc: spice-de...@lists.freedesktop.org
Cc: Zack Rusin 
---
  drivers/gpu/drm/drm_gem.c | 48 +++
  1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index aa15c52ae182..e3d897bca0f2 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -356,52 +356,52 @@ drm_gem_handle_create_tail(struct drm_file *file_priv,
   u32 *handlep)
  {
struct drm_device *dev = obj->dev;
-   u32 handle;
int ret;
  
  	WARN_ON(!mutex_is_locked(&dev->object_name_lock));

if (obj->handle_count++ == 0)
drm_gem_object_get(obj);
  
+	ret = drm_vma_node_allow(&obj->vma_node, file_priv);

+   if (ret)
+   goto err_put;
+
+   if (obj->funcs->open) {
+   ret = obj->funcs->open(obj, file_priv);
+   if (ret)
+   goto err_revoke;
+   }
+
/*
-* Get the user-visible handle using idr.  Preload and perform
-* allocation under our spinlock.
+* Get the user-visible handle using idr as the _last_ step.
+* Preload and perform allocation under our spinlock.
 */
idr_preload(GFP_KERNEL);
spin_lock(&file_priv->table_lock);
-
ret = idr_alloc(&file_priv->object_idr, obj, 1, 0, GFP_NOWAIT);
-
spin_unlock(&file_priv->table_lock);
idr_preload_end();
  
-	mutex_unlock(&dev->object_name_lock);

if (ret < 0)
-   goto err_unref;
-
-   handle = ret;
+   goto err_close;
  
-	ret = drm_vma_node_allow(&obj->vma_node, file_priv);

-   if (ret)
-   goto err_remove;
+   mutex_unlock(&dev->object_name_lock);
  
-	if (obj->funcs->open) {

-   ret = obj->funcs->open(obj, file_priv);
-   if (ret)
-   goto err_revoke;
-   }
+   *handlep = ret;
  
-	*handlep = handle;

return 0;
  
+err_close:

+   if (obj->funcs->close)
+   obj->funcs->close(obj, file_priv);
  err_revo

Re: [PATCH 3/3] drm/connector: Deprecate split for BT.2020 in drm_colorspace enum

2023-02-14 Thread Sebastian Wick
On Fri, Feb 3, 2023 at 5:00 PM Ville Syrjälä
 wrote:
>
> On Fri, Feb 03, 2023 at 10:24:52AM -0500, Harry Wentland wrote:
> >
> >
> > On 2/3/23 10:19, Ville Syrjälä wrote:
> > > On Fri, Feb 03, 2023 at 09:39:42AM -0500, Harry Wentland wrote:
> > >>
> > >>
> > >> On 2/3/23 07:59, Sebastian Wick wrote:
> > >>> On Fri, Feb 3, 2023 at 11:40 AM Ville Syrjälä
> > >>>  wrote:
> > 
> >  On Fri, Feb 03, 2023 at 02:07:44AM +, Joshua Ashton wrote:
> > > Userspace has no way of controlling or knowing the pixel encoding
> > > currently, so there is no way for it to ever get the right values 
> > > here.
> > 
> >  That applies to a lot of the other values as well (they are
> >  explicitly RGB or YCC). The idea was that this property sets the
> >  infoframe/MSA/SDP value exactly, and other properties should be
> >  added to for use userspace to control the pixel encoding/colorspace
> >  conversion(if desired, or userspace just makes sure to
> >  directly feed in correct kind of data).
> > >>>
> > >>> I'm all for getting userspace control over pixel encoding but even
> > >>> then the kernel always knows which pixel encoding is selected and
> > >>> which InfoFrame has to be sent. Is there a reason why userspace would
> > >>> want to control the variant explicitly to the wrong value?
> > >>>
> > >>
> > >> I've asked this before but haven't seen an answer: Is there an existing
> > >> upstream userspace project that makes use of this property (other than
> > >> what Joshua is working on in gamescope right now)? That would help us
> > >> understand the intent better.
> > >
> > > The intent was to control the infoframe colorimetry bits,
> > > nothing more. No idea what real userspace there was, if any.
> > >
> > >>
> > >> I don't think giving userspace explicit control over the exact infoframe
> > >> values is the right thing to do.
> > >
> > > Only userspace knows what kind of data it's stuffing into
> > > the pixels (and/or how it configures the csc units/etc.) to
> > > generate them.
> > >
> >
> > Yes, but userspace doesn't control or know whether we drive
> > RGB or YCbCr on the wire. In fact, in some cases our driver
> > needs to fallback to YCbCr420 for bandwidth reasons. There
> > is currently no way for userspace to know that and I don't
> > think it makes sense.
>
> People want that control as well for whatever reason. We've
> been asked to allow YCbCr 4:4:4 output many times.

I don't really think it's a question of if we want it but rather how
we get there. Harry is completely right that if we would make the
subsampling controllable by user space instead of the kernel handling
it magically, user space which does not adapt to the new control won't
be able to light up some modes which worked before.

This is obviously a problem and not one we can easily fix. We would
need a new cap for user space to signal "I know that I can control
bpc, subsampling and compression to lower the bandwidth and light up
modes which otherwise fail". That cap would also remove all the
properties which require kernel magic to work (that's also what I
proposed for my KMS color pipeline API).

We all want to expose more of the scanout capability and give user
space more control but I don't think an incremental approach works
here and we would all do better if we accept that the current API
requires kernel magic to work and has a few implicit assumptions baked
in.

With all that being said, I think the right decision here is to

1. Ignore subsampling for now
2. Let the kernel select YCC or RGB on the cable
3. Let the kernel figure out the conversion between RGB and YCC based
on the color space selected
4. Let the kernel send the correct infoframe based on the selected
color space and cable encoding
5. Only expose color spaces for which the kernel can do the conversion
and send the infoframe
6. Work on the new API which is hidden behind a cap

> The automagic 4:2:0 fallback I think is rather fundementally
> incompatible with fancy color management. How would we even
> know whether to use eg. BT.2020 vs. BT.709 matrix? In i915
> that stuff is just always BT.709 limited range, no questions
> asked.
>
> So I think if userspace wants real color management it's
> going to have to set up the whole pipeline. And for that
> we need at least one new property to control the RGB->YCbCr
> conversion (or to explicitly avoid it).
>
> And given that the proposed patch just swept all the
> non-BT.2020 issues under the rug makes me think no
> one has actually come up with any kind of consistent
> plan for anything else really.
>
> >
> > Userspace needs full control of framebuffer pixel formats,
> > as well as control over DEGAMMA, GAMMA, CTM color operations.
> > It also needs to be able to select whether to drive the panel
> > as sRGB or BT.2020/PQ but it doesn't make sense for it to
> > control the pixel encoding on the wire (RGB vs YCbCr).
> >
> > > I really don't want a repeat of the disaster of the
> > > 'B

[PATCH v2 0/8] Re-design doorbell framework for usermode queues

2023-02-14 Thread Shashank Sharma
From: Shashank Sharma 

This patch series re-designs the current doorbell handling of the AMDGPU
driver and prepares it for Usermode queues. The fundamental changes are:

- Introduce and accommodate a new GEM domain for doorbells.
- Prepare the AMDGPU ttm backend for handling doorbell memory.
- Create a doorbell BO for kernel-level doorbell opertations.
- Rename, move and re-arrange some existing structures.

The idea is that a usermode app can directly allocate a page from
the doorbell bar and use it's offsets for different usermode queues.

Corresponding libdrm changes (just addition of this new flag):
https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/286

Alex Deucher (6):
  drm/amdgpu: add UAPI for allocating doorbell memory
  drm/amdgpu: replace aper_base_kaddr with vram_aper_base_kaddr
  drm/amdgpu: rename gmc.aper_base/size
  drm/amdgpu: rename doorbell variables
  drm/amdgpu: accommodate DOMAIN/PL_DOORBELL
  drm/amdgpu: get doorbell memory

Shashank Sharma (2):
  drm/amdgpu: create doorbell kernel object
  drm/amdgpu: start using kernel doorbell bo

 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c   | 13 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c   | 67 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h |  6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c   |  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h  |  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c   | 32 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h   |  3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  | 51 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h  | 11 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   | 15 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h   |  3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c   | 10 +--
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c   | 10 +--
 drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c|  6 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c| 12 ++--
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c| 10 +--
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 10 +--
 drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c   |  4 +-
 drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c   |  4 +-
 drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c   |  4 +-
 drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c   |  4 +-
 drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c   |  4 +-
 drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c   |  4 +-
 drivers/gpu/drm/amd/amdgpu/psp_v11_0.c   | 10 +--
 drivers/gpu/drm/amd/amdgpu/psp_v13_0.c   | 10 +--
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c |  4 +-
 include/uapi/drm/amdgpu_drm.h|  7 +-
 31 files changed, 193 insertions(+), 137 deletions(-)

-- 
2.34.1



[PATCH v2 2/8] drm/amdgpu: replace aper_base_kaddr with vram_aper_base_kaddr

2023-02-14 Thread Shashank Sharma
From: Alex Deucher 

To differentiate it from the doorbell BAR.

V2: Added Christian's A-B

Acked-by: Christian Koenig 

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 10 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c| 14 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h|  2 +-
 drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 10 +-
 drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 10 +-
 5 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 2f28a8c02f64..0b6a394e109b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -354,12 +354,12 @@ size_t amdgpu_device_aper_access(struct amdgpu_device 
*adev, loff_t pos,
size_t count = 0;
uint64_t last;
 
-   if (!adev->mman.aper_base_kaddr)
+   if (!adev->mman.vram_aper_base_kaddr)
return 0;
 
last = min(pos + size, adev->gmc.visible_vram_size);
if (last > pos) {
-   addr = adev->mman.aper_base_kaddr + pos;
+   addr = adev->mman.vram_aper_base_kaddr + pos;
count = last - pos;
 
if (write) {
@@ -3954,9 +3954,9 @@ static void amdgpu_device_unmap_mmio(struct amdgpu_device 
*adev)
 
iounmap(adev->rmmio);
adev->rmmio = NULL;
-   if (adev->mman.aper_base_kaddr)
-   iounmap(adev->mman.aper_base_kaddr);
-   adev->mman.aper_base_kaddr = NULL;
+   if (adev->mman.vram_aper_base_kaddr)
+   iounmap(adev->mman.vram_aper_base_kaddr);
+   adev->mman.vram_aper_base_kaddr = NULL;
 
/* Memory manager related */
if (!adev->gmc.xgmi.connected_to_cpu) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 55e0284b2bdd..73b831b47892 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -578,9 +578,9 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device 
*bdev,
if ((mem->bus.offset + bus_size) > adev->gmc.visible_vram_size)
return -EINVAL;
 
-   if (adev->mman.aper_base_kaddr &&
+   if (adev->mman.vram_aper_base_kaddr &&
mem->placement & TTM_PL_FLAG_CONTIGUOUS)
-   mem->bus.addr = (u8 *)adev->mman.aper_base_kaddr +
+   mem->bus.addr = (u8 *)adev->mman.vram_aper_base_kaddr +
mem->bus.offset;
 
mem->bus.offset += adev->gmc.aper_base;
@@ -1752,12 +1752,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 #ifdef CONFIG_64BIT
 #ifdef CONFIG_X86
if (adev->gmc.xgmi.connected_to_cpu)
-   adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base,
+   adev->mman.vram_aper_base_kaddr = 
ioremap_cache(adev->gmc.aper_base,
adev->gmc.visible_vram_size);
 
else
 #endif
-   adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base,
+   adev->mman.vram_aper_base_kaddr = 
ioremap_wc(adev->gmc.aper_base,
adev->gmc.visible_vram_size);
 #endif
 
@@ -1904,9 +1904,9 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
 
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
 
-   if (adev->mman.aper_base_kaddr)
-   iounmap(adev->mman.aper_base_kaddr);
-   adev->mman.aper_base_kaddr = NULL;
+   if (adev->mman.vram_aper_base_kaddr)
+   iounmap(adev->mman.vram_aper_base_kaddr);
+   adev->mman.vram_aper_base_kaddr = NULL;
 
drm_dev_exit(idx);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index e2cd5894afc9..929bc8abac28 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -50,7 +50,7 @@ struct amdgpu_gtt_mgr {
 struct amdgpu_mman {
struct ttm_device   bdev;
boolinitialized;
-   void __iomem*aper_base_kaddr;
+   void __iomem*vram_aper_base_kaddr;
 
/* buffer handling */
const struct amdgpu_buffer_funcs*buffer_funcs;
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
index bd3e3e23a939..f39d4f593a2f 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -611,10 +611,10 @@ static int psp_v11_0_memory_training(struct psp_context 
*psp, uint32_t ops)
 */
sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE;
 
-   if (adev->gmc.visible_vram_size < sz || 
!adev->mman.aper_base_kaddr) {
-   DRM_ERROR("visible

[PATCH v2 6/8] drm/amdgpu: get doorbell memory

2023-02-14 Thread Shashank Sharma
From: Alex Deucher 

This patch adds section for doorbell memory in memory status
reporting functions like vm/bo_get_memory.

V2: Rebase

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c |  4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  9 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 15 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  3 ++-
 5 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
index 99a7855ab1bc..202df09ba5de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
@@ -60,7 +60,7 @@ void amdgpu_show_fdinfo(struct seq_file *m, struct file *f)
struct amdgpu_fpriv *fpriv = file->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
 
-   uint64_t vram_mem = 0, gtt_mem = 0, cpu_mem = 0;
+   uint64_t vram_mem = 0, gtt_mem = 0, cpu_mem = 0, doorbell_mem = 0;
ktime_t usage[AMDGPU_HW_IP_NUM];
uint32_t bus, dev, fn, domain;
unsigned int hw_ip;
@@ -75,7 +75,7 @@ void amdgpu_show_fdinfo(struct seq_file *m, struct file *f)
if (ret)
return;
 
-   amdgpu_vm_get_memory(vm, &vram_mem, >t_mem, &cpu_mem);
+   amdgpu_vm_get_memory(vm, &vram_mem, >t_mem, &cpu_mem, &doorbell_mem);
amdgpu_bo_unreserve(vm->root.bo);
 
amdgpu_ctx_mgr_usage(&fpriv->ctx_mgr, usage);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index ff9979fecfd2..90e97abc1454 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1275,7 +1275,8 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
 }
 
 void amdgpu_bo_get_memory(struct amdgpu_bo *bo, uint64_t *vram_mem,
-   uint64_t *gtt_mem, uint64_t *cpu_mem)
+ uint64_t *gtt_mem, uint64_t *cpu_mem,
+ uint64_t *doorbell_mem)
 {
unsigned int domain;
 
@@ -1287,6 +1288,9 @@ void amdgpu_bo_get_memory(struct amdgpu_bo *bo, uint64_t 
*vram_mem,
case AMDGPU_GEM_DOMAIN_GTT:
*gtt_mem += amdgpu_bo_size(bo);
break;
+   case AMDGPU_GEM_DOMAIN_DOORBELL:
+   *doorbell_mem += amdgpu_bo_size(bo);
+   break;
case AMDGPU_GEM_DOMAIN_CPU:
default:
*cpu_mem += amdgpu_bo_size(bo);
@@ -1565,6 +1569,9 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, 
struct seq_file *m)
case AMDGPU_GEM_DOMAIN_GTT:
placement = " GTT";
break;
+   case AMDGPU_GEM_DOMAIN_DOORBELL:
+   placement = "DOOR";
+   break;
case AMDGPU_GEM_DOMAIN_CPU:
default:
placement = " CPU";
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 93207badf83f..bf9759758f0d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -326,7 +326,8 @@ int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, 
bool intr);
 u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo);
 u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo);
 void amdgpu_bo_get_memory(struct amdgpu_bo *bo, uint64_t *vram_mem,
-   uint64_t *gtt_mem, uint64_t *cpu_mem);
+ uint64_t *gtt_mem, uint64_t *cpu_mem,
+ uint64_t *doorbell_mem);
 void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo_vm *vmbo);
 int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow,
 struct dma_fence **fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index dc379dc22c77..1561d138945b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -918,7 +918,8 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
 }
 
 void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,
-   uint64_t *gtt_mem, uint64_t *cpu_mem)
+ uint64_t *gtt_mem, uint64_t *cpu_mem,
+ uint64_t *doorbell_mem)
 {
struct amdgpu_bo_va *bo_va, *tmp;
 
@@ -927,37 +928,37 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t 
*vram_mem,
if (!bo_va->base.bo)
continue;
amdgpu_bo_get_memory(bo_va->base.bo, vram_mem,
-   gtt_mem, cpu_mem);
+gtt_mem, cpu_mem, doorbell_mem);
}
list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status) {
if (!bo_va->base.bo)
c

[PATCH v2 1/8] drm/amdgpu: add UAPI for allocating doorbell memory

2023-02-14 Thread Shashank Sharma
From: Alex Deucher 

This patch adds flags for a new gem domain AMDGPU_GEM_DOMAIN_DOORBELL
in the UAPI layer.

V2: Drop 'memory' from description (Christian)

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Alex Deucher 
---
 include/uapi/drm/amdgpu_drm.h | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 4038abe8505a..cc5d551abda5 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -94,6 +94,9 @@ extern "C" {
  *
  * %AMDGPU_GEM_DOMAIN_OA   Ordered append, used by 3D or Compute engines
  * for appending data.
+ *
+ * %AMDGPU_GEM_DOMAIN_DOORBELL Doorbell. It is an MMIO region for
+ * signalling user mode queues.
  */
 #define AMDGPU_GEM_DOMAIN_CPU  0x1
 #define AMDGPU_GEM_DOMAIN_GTT  0x2
@@ -101,12 +104,14 @@ extern "C" {
 #define AMDGPU_GEM_DOMAIN_GDS  0x8
 #define AMDGPU_GEM_DOMAIN_GWS  0x10
 #define AMDGPU_GEM_DOMAIN_OA   0x20
+#define AMDGPU_GEM_DOMAIN_DOORBELL 0x40
 #define AMDGPU_GEM_DOMAIN_MASK (AMDGPU_GEM_DOMAIN_CPU | \
 AMDGPU_GEM_DOMAIN_GTT | \
 AMDGPU_GEM_DOMAIN_VRAM | \
 AMDGPU_GEM_DOMAIN_GDS | \
 AMDGPU_GEM_DOMAIN_GWS | \
-AMDGPU_GEM_DOMAIN_OA)
+AMDGPU_GEM_DOMAIN_OA | \
+AMDGPU_GEM_DOMAIN_DOORBELL)
 
 /* Flag that CPU access will be required for the case of VRAM domain */
 #define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED  (1 << 0)
-- 
2.34.1



[PATCH v2 5/8] drm/amdgpu: accommodate DOMAIN/PL_DOORBELL

2023-02-14 Thread Shashank Sharma
From: Alex Deucher 

This patch adds changes:
- to accommodate the new GEM domain DOORBELL
- to accommodate the new TTM PL DOORBELL

to manage doorbell allocations as GEM Objects.

V2: Addressed reviwe comments from Christian
- drop the doorbell changes for pinning/unpinning
- drop the doorbell changes for dma-buf map
- drop the doorbell changes for sgt
- no need to handle TTM_PL_FLAG_CONTIGUOUS for doorbell

Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 11 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c| 11 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h|  1 +
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index b48c9fd60c43..ff9979fecfd2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -147,6 +147,14 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo 
*abo, u32 domain)
c++;
}
 
+   if (domain & AMDGPU_GEM_DOMAIN_DOORBELL) {
+   places[c].fpfn = 0;
+   places[c].lpfn = 0;
+   places[c].mem_type = AMDGPU_PL_DOORBELL;
+   places[c].flags = 0;
+   c++;
+   }
+
if (domain & AMDGPU_GEM_DOMAIN_GTT) {
places[c].fpfn = 0;
places[c].lpfn = 0;
@@ -466,7 +474,7 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device 
*adev,
goto fail;
}
 
-   /* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU */
+   /* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU,  
AMDGPU_GEM_DOMAIN_DOORBELL */
return true;
 
 fail:
@@ -1014,6 +1022,7 @@ void amdgpu_bo_unpin(struct amdgpu_bo *bo)
} else if (bo->tbo.resource->mem_type == TTM_PL_TT) {
atomic64_sub(amdgpu_bo_size(bo), &adev->gart_pin_size);
}
+
 }
 
 static const char *amdgpu_vram_names[] = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 0e8f580769ab..e9dc24191fc8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -128,6 +128,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
case AMDGPU_PL_GDS:
case AMDGPU_PL_GWS:
case AMDGPU_PL_OA:
+   case AMDGPU_PL_DOORBELL:
placement->num_placement = 0;
placement->num_busy_placement = 0;
return;
@@ -500,9 +501,11 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, 
bool evict,
if (old_mem->mem_type == AMDGPU_PL_GDS ||
old_mem->mem_type == AMDGPU_PL_GWS ||
old_mem->mem_type == AMDGPU_PL_OA ||
+   old_mem->mem_type == AMDGPU_PL_DOORBELL ||
new_mem->mem_type == AMDGPU_PL_GDS ||
new_mem->mem_type == AMDGPU_PL_GWS ||
-   new_mem->mem_type == AMDGPU_PL_OA) {
+   new_mem->mem_type == AMDGPU_PL_OA ||
+   new_mem->mem_type == AMDGPU_PL_DOORBELL) {
/* Nothing to save here */
ttm_bo_move_null(bo, new_mem);
goto out;
@@ -586,6 +589,11 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device 
*bdev,
mem->bus.offset += adev->gmc.vram_aper_base;
mem->bus.is_iomem = true;
break;
+   case AMDGPU_PL_DOORBELL:
+   mem->bus.offset = mem->start << PAGE_SHIFT;
+   mem->bus.offset += adev->doorbell.doorbell_aper_base;
+   mem->bus.is_iomem = true;
+   break;
default:
return -EINVAL;
}
@@ -1267,6 +1275,7 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, 
struct ttm_resource *mem)
flags |= AMDGPU_PTE_VALID;
 
if (mem && (mem->mem_type == TTM_PL_TT ||
+   mem->mem_type == AMDGPU_PL_DOORBELL ||
mem->mem_type == AMDGPU_PL_PREEMPT)) {
flags |= AMDGPU_PTE_SYSTEM;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 967b265dbfa1..9cf5d8419965 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -33,6 +33,7 @@
 #define AMDGPU_PL_GWS  (TTM_PL_PRIV + 1)
 #define AMDGPU_PL_OA   (TTM_PL_PRIV + 2)
 #define AMDGPU_PL_PREEMPT  (TTM_PL_PRIV + 3)
+#define AMDGPU_PL_DOORBELL (TTM_PL_PRIV + 4)
 
 #define AMDGPU_GTT_MAX_TRANSFER_SIZE   512
 #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS2
-- 
2.34.1



[PATCH v2 3/8] drm/amdgpu: rename gmc.aper_base/size

2023-02-14 Thread Shashank Sharma
From: Alex Deucher 

This patch renames aper_base and aper_size parameters (in adev->gmc),
to vram_aper_base and vram_aper_size, to differentiate it from the
doorbell BAR.

V2: rebase

Cc: Alex Deucher 
Cc: Christian Koenig 

Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c   |  6 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h  |  4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c   | 12 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  |  8 
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c   | 10 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c   | 10 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c|  6 +++---
 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c| 12 ++--
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c| 10 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 10 +-
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c |  4 ++--
 14 files changed, 49 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index f99d4873bf22..58689b2a2d1c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -438,7 +438,7 @@ void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device 
*adev,
mem_info->vram_width = adev->gmc.vram_width;
 
pr_debug("Address base: %pap public 0x%llx private 0x%llx\n",
-   &adev->gmc.aper_base,
+   &adev->gmc.vram_aper_base,
mem_info->local_mem_size_public,
mem_info->local_mem_size_private);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 0b6a394e109b..45588b7919fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3961,7 +3961,7 @@ static void amdgpu_device_unmap_mmio(struct amdgpu_device 
*adev)
/* Memory manager related */
if (!adev->gmc.xgmi.connected_to_cpu) {
arch_phys_wc_del(adev->gmc.vram_mtrr);
-   arch_io_free_memtype_wc(adev->gmc.aper_base, 
adev->gmc.aper_size);
+   arch_io_free_memtype_wc(adev->gmc.vram_aper_base, 
adev->gmc.vram_aper_size);
}
 }
 
@@ -5562,14 +5562,14 @@ bool amdgpu_device_is_peer_accessible(struct 
amdgpu_device *adev,
uint64_t address_mask = peer_adev->dev->dma_mask ?
~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
resource_size_t aper_limit =
-   adev->gmc.aper_base + adev->gmc.aper_size - 1;
+   adev->gmc.vram_aper_base + adev->gmc.vram_aper_size - 1;
bool p2p_access =
!adev->gmc.xgmi.connected_to_cpu &&
!(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
 
return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
-   !(adev->gmc.aper_base & address_mask ||
+   !(adev->gmc.vram_aper_base & address_mask ||
  aper_limit & address_mask));
 #else
return false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 02a4c93673ce..c7e64e234de6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -775,7 +775,7 @@ uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, 
struct amdgpu_bo *bo)
  */
 uint64_t amdgpu_gmc_vram_cpu_pa(struct amdgpu_device *adev, struct amdgpu_bo 
*bo)
 {
-   return amdgpu_bo_gpu_offset(bo) - adev->gmc.vram_start + 
adev->gmc.aper_base;
+   return amdgpu_bo_gpu_offset(bo) - adev->gmc.vram_start + 
adev->gmc.vram_aper_base;
 }
 
 int amdgpu_gmc_vram_checking(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 0305b660cd17..bb7076ecbf01 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -167,8 +167,8 @@ struct amdgpu_gmc {
 * gart/vram_start/end field as the later is from
 * GPU's view and aper_base is from CPU's view.
 */
-   resource_size_t aper_size;
-   resource_size_t aper_base;
+   resource_size_t vram_aper_size;
+   resource_size_t vram_aper_base;
/* for some chips with <= 32MB we need to lie
 * about vram size near mc fb location */
u64 mc_vram_size;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 25a68de0..b48c9fd60c43 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/

[PATCH v2 7/8] drm/amdgpu: create doorbell kernel object

2023-02-14 Thread Shashank Sharma
From: Shashank Sharma 

This patch does the following:
- Initializes TTM range management for domain DOORBELL.
- Introduces a kernel bo for doorbell management in form of 
mman.doorbell_kernel_bo.
  This bo holds the kernel doorbell space now.
- Removes ioremapping of doorbell-kernel memory, as its not required now.

V2:
- Addressed review comments from Christian:
- do not use kernel_create_at(0), use kernel_create() instead.
- do not use ttm_resource_manager, use range_manager instead.
- do not ioremap doorbell, TTM will do that.
- Split one big patch into 2

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 22 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |  7 +++
 2 files changed, 29 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index e9dc24191fc8..086e83c17c0f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1879,12 +1879,32 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
return r;
}
 
+   r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_DOORBELL, 
adev->doorbell.doorbell_aper_size);
+   if (r) {
+   DRM_ERROR("Failed initializing oa heap.\n");
+   return r;
+   }
+
if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
&adev->mman.sdma_access_bo, NULL,
&adev->mman.sdma_access_ptr))
DRM_WARN("Debug VRAM access will use slowpath MM access\n");
 
+   /* Create a doorbell BO for kernel usages */
+   r = amdgpu_bo_create_kernel(adev,
+   adev->mman.doorbell_kernel_bo_size,
+   PAGE_SIZE,
+   AMDGPU_GEM_DOMAIN_DOORBELL,
+   &adev->mman.doorbell_kernel_bo,
+   &adev->mman.doorbell_gpu_addr,
+   (void **)&adev->mman.doorbell_cpu_addr);
+
+   if (r) {
+   DRM_ERROR("Failed to create doorbell BO, err=%d\n", r);
+   return r;
+   }
+
return 0;
 }
 
@@ -1908,6 +1928,8 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
  NULL, NULL);
amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL,
&adev->mman.sdma_access_ptr);
+   amdgpu_bo_free_kernel(&adev->mman.doorbell_kernel_bo,
+ NULL, (void **)&adev->mman.doorbell_cpu_addr);
amdgpu_ttm_fw_reserve_vram_fini(adev);
amdgpu_ttm_drv_reserve_vram_fini(adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 9cf5d8419965..50748ff1dd3c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -97,6 +97,13 @@ struct amdgpu_mman {
/* PAGE_SIZE'd BO for process memory r/w over SDMA. */
struct amdgpu_bo*sdma_access_bo;
void*sdma_access_ptr;
+
+   /* doorbells reserved for the kernel driver */
+   u32 num_kernel_doorbells;   /* Number of doorbells 
actually reserved for kernel */
+   uint64_tdoorbell_kernel_bo_size;
+   uint64_tdoorbell_gpu_addr;
+   struct amdgpu_bo*doorbell_kernel_bo;
+   u32 *doorbell_cpu_addr;
 };
 
 struct amdgpu_copy_mem {
-- 
2.34.1



[PATCH v2 8/8] drm/amdgpu: start using kernel doorbell bo

2023-02-14 Thread Shashank Sharma
From: Shashank Sharma 

This patch does the following:

- Adds new variables like mman.doorbell_bo_size/gpu_addr/cpu_addr.
  The cpu_addr ptr will be used now for doorbell read/write from
  doorbell BAR.
- Adjusts the existing code to use kernel doorbell BO's size and its
  cpu_address.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c   |  5 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c   | 33 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h |  1 -
 3 files changed, 16 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 0493c64e9d0a..87f486f522ae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -109,11 +109,10 @@ static void amdgpu_doorbell_get_kfd_info(struct 
amdgpu_device *adev,
*aperture_base = adev->doorbell.doorbell_aper_base;
*aperture_size = 0;
*start_offset = 0;
-   } else if (adev->doorbell.doorbell_aper_size > 
adev->doorbell.num_doorbells *
-   sizeof(u32)) {
+   } else if (adev->doorbell.doorbell_aper_size > 
adev->mman.doorbell_kernel_bo_size) {
*aperture_base = adev->doorbell.doorbell_aper_base;
*aperture_size = adev->doorbell.doorbell_aper_size;
-   *start_offset = adev->doorbell.num_doorbells * sizeof(u32);
+   *start_offset = adev->mman.doorbell_kernel_bo_size;
} else {
*aperture_base = 0;
*aperture_size = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 43c1b67c2778..fde199434579 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -596,8 +596,8 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 
index)
if (amdgpu_device_skip_hw_access(adev))
return 0;
 
-   if (index < adev->doorbell.num_doorbells) {
-   return readl(adev->mman.doorbell_aper_base_kaddr + index);
+   if (index < adev->mman.num_kernel_doorbells) {
+   return readl(adev->mman.doorbell_cpu_addr + index);
} else {
DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
return 0;
@@ -619,8 +619,8 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 
index, u32 v)
if (amdgpu_device_skip_hw_access(adev))
return;
 
-   if (index < adev->doorbell.num_doorbells) {
-   writel(v, adev->mman.doorbell_aper_base_kaddr + index);
+   if (index < adev->mman.num_kernel_doorbells) {
+   writel(v, adev->mman.doorbell_cpu_addr + index);
} else {
DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
}
@@ -640,8 +640,8 @@ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 
index)
if (amdgpu_device_skip_hw_access(adev))
return 0;
 
-   if (index < adev->doorbell.num_doorbells) {
-   return atomic64_read((atomic64_t 
*)(adev->mman.doorbell_aper_base_kaddr + index));
+   if (index < adev->mman.num_kernel_doorbells) {
+   return atomic64_read((atomic64_t 
*)(adev->mman.doorbell_cpu_addr + index));
} else {
DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
return 0;
@@ -663,8 +663,8 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 
index, u64 v)
if (amdgpu_device_skip_hw_access(adev))
return;
 
-   if (index < adev->doorbell.num_doorbells) {
-   atomic64_set((atomic64_t *)(adev->mman.doorbell_aper_base_kaddr 
+ index), v);
+   if (index < adev->mman.num_kernel_doorbells) {
+   atomic64_set((atomic64_t *)(adev->mman.doorbell_cpu_addr + 
index), v);
} else {
DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
}
@@ -1037,7 +1037,7 @@ static int amdgpu_device_doorbell_init(struct 
amdgpu_device *adev)
if (adev->asic_type < CHIP_BONAIRE) {
adev->doorbell.doorbell_aper_base = 0;
adev->doorbell.doorbell_aper_size = 0;
-   adev->doorbell.num_doorbells = 0;
+   adev->mman.num_kernel_doorbells = 0;
adev->mman.doorbell_aper_base_kaddr = NULL;
return 0;
}
@@ -1052,13 +1052,13 @@ static int amdgpu_device_doorbell_init(struct 
amdgpu_device *adev)
adev->doorbell.doorbell_aper_size = pci_resource_len(adev->pdev, 2);
 
if (adev->enable_mes) {
-   adev->doorbell.num_doorbells =
+   adev->mman.num_kernel_doorbells =
adev->doorbell.doorbell_aper_size / sizeof(u32);
} else {
-

[PATCH v2 4/8] drm/amdgpu: rename doorbell variables

2023-02-14 Thread Shashank Sharma
From: Alex Deucher 

This patch:
- renames the adev->doorbell.base to adev->doorbell.doorbell_aper_base
- renames the adev->doorbell.size to adev->doorbell.doorbell_aper_size
- moves the adev->doorbell.ptr to adev->mman.doorbell_aper_base_kaddr

rest of the changes are just to accommodate these variable name changes.

V2: Mered 2 patches into this one doorbell clean-up patch.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c   |  8 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c   | 34 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h |  5 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h  |  1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c   |  4 +--
 drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c   |  4 +--
 drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c   |  4 +--
 drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c   |  4 +--
 drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c   |  4 +--
 drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c   |  4 +--
 12 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 58689b2a2d1c..0493c64e9d0a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -106,13 +106,13 @@ static void amdgpu_doorbell_get_kfd_info(struct 
amdgpu_device *adev,
 * not initialized as AMDGPU manages the whole
 * doorbell space.
 */
-   *aperture_base = adev->doorbell.base;
+   *aperture_base = adev->doorbell.doorbell_aper_base;
*aperture_size = 0;
*start_offset = 0;
-   } else if (adev->doorbell.size > adev->doorbell.num_doorbells *
+   } else if (adev->doorbell.doorbell_aper_size > 
adev->doorbell.num_doorbells *
sizeof(u32)) {
-   *aperture_base = adev->doorbell.base;
-   *aperture_size = adev->doorbell.size;
+   *aperture_base = adev->doorbell.doorbell_aper_base;
+   *aperture_size = adev->doorbell.doorbell_aper_size;
*start_offset = adev->doorbell.num_doorbells * sizeof(u32);
} else {
*aperture_base = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 45588b7919fe..43c1b67c2778 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -597,7 +597,7 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 
index)
return 0;
 
if (index < adev->doorbell.num_doorbells) {
-   return readl(adev->doorbell.ptr + index);
+   return readl(adev->mman.doorbell_aper_base_kaddr + index);
} else {
DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
return 0;
@@ -620,7 +620,7 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 
index, u32 v)
return;
 
if (index < adev->doorbell.num_doorbells) {
-   writel(v, adev->doorbell.ptr + index);
+   writel(v, adev->mman.doorbell_aper_base_kaddr + index);
} else {
DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
}
@@ -641,7 +641,7 @@ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 
index)
return 0;
 
if (index < adev->doorbell.num_doorbells) {
-   return atomic64_read((atomic64_t *)(adev->doorbell.ptr + 
index));
+   return atomic64_read((atomic64_t 
*)(adev->mman.doorbell_aper_base_kaddr + index));
} else {
DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
return 0;
@@ -664,7 +664,7 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 
index, u64 v)
return;
 
if (index < adev->doorbell.num_doorbells) {
-   atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
+   atomic64_set((atomic64_t *)(adev->mman.doorbell_aper_base_kaddr 
+ index), v);
} else {
DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
}
@@ -1035,10 +1035,10 @@ static int amdgpu_device_doorbell_init(struct 
amdgpu_device *adev)
 
/* No doorbell on SI hardware generation */
if (adev->asic_type < CHIP_BONAIRE) {
-   adev->doorbell.base = 0;
-   adev->doorbell.size = 0;
+   adev->doorbell.doorbell_aper_base = 0;
+   adev->doorbell.doorbell_aper_size = 0;
adev->doorbell.num_doorbells = 0;
-   adev->doorbell.ptr = NULL;
+   adev->mman.doorbell_aper_base_kaddr = NULL;
ret

Re: [PATCH 3/3] drm/connector: Deprecate split for BT.2020 in drm_colorspace enum

2023-02-14 Thread Harry Wentland



On 2/14/23 10:49, Sebastian Wick wrote:
> On Fri, Feb 3, 2023 at 5:00 PM Ville Syrjälä
>  wrote:
>>
>> On Fri, Feb 03, 2023 at 10:24:52AM -0500, Harry Wentland wrote:
>>>
>>>
>>> On 2/3/23 10:19, Ville Syrjälä wrote:
 On Fri, Feb 03, 2023 at 09:39:42AM -0500, Harry Wentland wrote:
>
>
> On 2/3/23 07:59, Sebastian Wick wrote:
>> On Fri, Feb 3, 2023 at 11:40 AM Ville Syrjälä
>>  wrote:
>>>
>>> On Fri, Feb 03, 2023 at 02:07:44AM +, Joshua Ashton wrote:
 Userspace has no way of controlling or knowing the pixel encoding
 currently, so there is no way for it to ever get the right values here.
>>>
>>> That applies to a lot of the other values as well (they are
>>> explicitly RGB or YCC). The idea was that this property sets the
>>> infoframe/MSA/SDP value exactly, and other properties should be
>>> added to for use userspace to control the pixel encoding/colorspace
>>> conversion(if desired, or userspace just makes sure to
>>> directly feed in correct kind of data).
>>
>> I'm all for getting userspace control over pixel encoding but even
>> then the kernel always knows which pixel encoding is selected and
>> which InfoFrame has to be sent. Is there a reason why userspace would
>> want to control the variant explicitly to the wrong value?
>>
>
> I've asked this before but haven't seen an answer: Is there an existing
> upstream userspace project that makes use of this property (other than
> what Joshua is working on in gamescope right now)? That would help us
> understand the intent better.

 The intent was to control the infoframe colorimetry bits,
 nothing more. No idea what real userspace there was, if any.

>
> I don't think giving userspace explicit control over the exact infoframe
> values is the right thing to do.

 Only userspace knows what kind of data it's stuffing into
 the pixels (and/or how it configures the csc units/etc.) to
 generate them.

>>>
>>> Yes, but userspace doesn't control or know whether we drive
>>> RGB or YCbCr on the wire. In fact, in some cases our driver
>>> needs to fallback to YCbCr420 for bandwidth reasons. There
>>> is currently no way for userspace to know that and I don't
>>> think it makes sense.
>>
>> People want that control as well for whatever reason. We've
>> been asked to allow YCbCr 4:4:4 output many times.
> 
> I don't really think it's a question of if we want it but rather how
> we get there. Harry is completely right that if we would make the
> subsampling controllable by user space instead of the kernel handling
> it magically, user space which does not adapt to the new control won't
> be able to light up some modes which worked before.
> 

Thanks for continuing this discussion and touching on the model of how
we get to where we want to go.

> This is obviously a problem and not one we can easily fix. We would
> need a new cap for user space to signal "I know that I can control
> bpc, subsampling and compression to lower the bandwidth and light up
> modes which otherwise fail". That cap would also remove all the
> properties which require kernel magic to work (that's also what I
> proposed for my KMS color pipeline API).
> 
> We all want to expose more of the scanout capability and give user
> space more control but I don't think an incremental approach works
> here and we would all do better if we accept that the current API
> requires kernel magic to work and has a few implicit assumptions baked
> in.
> 
> With all that being said, I think the right decision here is to
> 
> 1. Ignore subsampling for now
> 2. Let the kernel select YCC or RGB on the cable
> 3. Let the kernel figure out the conversion between RGB and YCC based
> on the color space selected
> 4. Let the kernel send the correct infoframe based on the selected
> color space and cable encoding
> 5. Only expose color spaces for which the kernel can do the conversion
> and send the infoframe

I agree. We don't want to break or change existing behavior (that is
used by userspace) and this will get us far without breaking things.

> 6. Work on the new API which is hidden behind a cap
> 

I assume you mean something like
https://gitlab.freedesktop.org/pq/color-and-hdr/-/issues/11

Above you say that you don't think an incremental approach works
here. Can you elaborate?

>From what I've seen recently I am inclined to favor an incremental
approach more. The reason is that any API, or portion thereof, is
useless unless it's enabled full stack. When it isn't it becomes
dead code quickly, or never really works because we overlooked
one thing. The colorspace debacle shows how even something as
simple as extra enum values in KMS APIs shouldn't be added unless
someone in a canonical upstream project actually uses them. I
would argue that such a canonical upstream project actually has
to be a production environment and not something like Weston.

[linux-next:master] BUILD REGRESSION 3ebb0ac55efaf1d0fb1b106f852c114e5021f7eb

2023-02-14 Thread kernel test robot
tree/branch: 
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git master
branch HEAD: 3ebb0ac55efaf1d0fb1b106f852c114e5021f7eb  Add linux-next specific 
files for 20230214

Error/Warning reports:

https://lore.kernel.org/oe-kbuild-all/202301300743.bp7dpazv-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202301302110.metnwkbd-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202302061911.c7xvhx9v-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202302062224.byzetxh1-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202302092211.54eydhyh-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202302111601.jty4lkra-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202302112104.g75cghzd-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202302142145.in5wznpf-...@intel.com

Error/Warning: (recently discovered and may have been fixed)

Documentation/riscv/uabi.rst:24: WARNING: Enumerated list ends without a blank 
line; unexpected unindent.
Documentation/sphinx/templates/kernel-toc.html: 1:36 Invalid token: #}
ERROR: modpost: "devm_platform_ioremap_resource" [drivers/dma/fsl-edma.ko] 
undefined!
ERROR: modpost: "devm_platform_ioremap_resource" [drivers/dma/idma64.ko] 
undefined!
arch/mips/kernel/vpe.c:643:41: error: 'struct module' has no member named 
'mod_mem'
drivers/cxl/core/region.c:2628:6: warning: variable 'rc' is used uninitialized 
whenever 'if' condition is false [-Wsometimes-uninitialized]
drivers/gpu/drm/amd/amdgpu/../display/dc/dcn30/dcn30_optc.c:294:6: warning: no 
previous prototype for 'optc3_wait_drr_doublebuffer_pending_clear' 
[-Wmissing-prototypes]
drivers/gpu/drm/amd/amdgpu/../display/dc/dcn31/dcn31_hubbub.c:1011:6: warning: 
no previous prototype for 'hubbub31_init' [-Wmissing-prototypes]
drivers/gpu/drm/amd/amdgpu/../display/dc/dcn32/dcn32_hubbub.c:948:6: warning: 
no previous prototype for 'hubbub32_init' [-Wmissing-prototypes]
drivers/gpu/drm/amd/amdgpu/../display/dc/dcn32/dcn32_hubp.c:158:6: warning: no 
previous prototype for 'hubp32_init' [-Wmissing-prototypes]
drivers/gpu/drm/amd/amdgpu/../display/dc/dcn32/dcn32_resource_helpers.c:62:18: 
warning: variable 'cursor_bpp' set but not used [-Wunused-but-set-variable]
drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dp_capability.c:1296:32:
 warning: variable 'result_write_min_hblank' set but not used 
[-Wunused-but-set-variable]
drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dp_capability.c:280:42:
 warning: variable 'ds_port' set but not used [-Wunused-but-set-variable]
drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dp_training.c:1586:38:
 warning: variable 'result' set but not used [-Wunused-but-set-variable]
ftrace-ops.c:(.init.text+0x2c3): undefined reference to `__udivdi3'

Unverified Error/Warning (likely false positive, please contact us if 
interested):

drivers/clk/ingenic/jz4760-cgu.c:80 jz4760_cgu_calc_m_n_od() error: 
uninitialized symbol 'od'.
drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c:415 bnxt_rdma_aux_device_init() 
warn: possible memory leak of 'edev'
drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8188e.c:1678 
rtl8188e_handle_ra_tx_report2() warn: ignoring unreachable code.
drivers/pci/setup-bus.c:1916:21-24: ERROR: invalid reference to the index 
variable of the iterator on line 1890
drivers/usb/gadget/composite.c:2082:33: sparse: sparse: restricted __le16 
degrades to integer
drivers/usb/host/xhci-plat.c:371 xhci_generic_plat_probe() error: we previously 
assumed 'sysdev' could be null (see line 361)

Error/Warning ids grouped by kconfigs:

gcc_recent_errors
|-- alpha-allyesconfig
|   |-- 
drivers-gpu-drm-amd-amdgpu-..-display-dc-link-protocols-link_dp_capability.c:warning:variable-ds_port-set-but-not-used
|   |-- 
drivers-gpu-drm-amd-amdgpu-..-display-dc-link-protocols-link_dp_capability.c:warning:variable-result_write_min_hblank-set-but-not-used
|   `-- 
drivers-gpu-drm-amd-amdgpu-..-display-dc-link-protocols-link_dp_training.c:warning:variable-result-set-but-not-used
|-- alpha-randconfig-c041-20230212
|   |-- 
drivers-gpu-drm-amd-amdgpu-..-display-dc-link-protocols-link_dp_capability.c:warning:variable-ds_port-set-but-not-used
|   `-- 
drivers-gpu-drm-amd-amdgpu-..-display-dc-link-protocols-link_dp_capability.c:warning:variable-result_write_min_hblank-set-but-not-used
|-- alpha-randconfig-s042-20230212
|   `-- 
drivers-usb-gadget-composite.c:sparse:sparse:restricted-__le16-degrades-to-integer
|-- arc-allyesconfig
|   |-- 
drivers-gpu-drm-amd-amdgpu-..-display-dc-link-protocols-link_dp_capability.c:warning:variable-ds_port-set-but-not-used
|   |-- 
drivers-gpu-drm-amd-amdgpu-..-display-dc-link-protocols-link_dp_capability.c:warning:variable-result_write_min_hblank-set-but-not-used
|   `-- 
drivers-gpu-drm-amd-amdgpu-..-display-dc-link-protocols-link_dp_training.c:warning:varia

Re: [PATCH 01/10] drm/amd/display: Turn global functions into static

2023-02-14 Thread Alex Deucher
On Mon, Feb 13, 2023 at 3:49 PM Arthur Grillo  wrote:
>
> Turn global functions that are only used locally into static ones. This
> reduces the number of -Wmissing-prototypes warnings.
>
> Signed-off-by: Arthur Grillo 

The first hunk was already fixed, but I applied the second hunk.

Alex

> ---
>  drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c | 2 +-
>  drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c | 2 +-
>  2 files changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c 
> b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c
> index 8c368bcc8e7e..a737782b2840 100644
> --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c
> +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c
> @@ -87,7 +87,7 @@ static int dcn315_get_active_display_cnt_wa(
> return display_count;
>  }
>
> -bool should_disable_otg(struct pipe_ctx *pipe)
> +static bool should_disable_otg(struct pipe_ctx *pipe)
>  {
> bool ret = true;
>
> diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c 
> b/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c
> index 27dc8c9955f4..3c7cb3dc046b 100644
> --- a/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c
> +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c
> @@ -37,7 +37,7 @@
>  #include "soc15_hw_ip.h"
>  #include "ivsrcid/dcn/irqsrcs_dcn_1_0.h"
>
> -enum dc_irq_source to_dal_irq_source_dcn201(
> +static enum dc_irq_source to_dal_irq_source_dcn201(
> struct irq_service *irq_service,
> uint32_t src_id,
> uint32_t ext_id)
> --
> 2.39.1
>


Re: [PATCH 0/6] Trivial code cleanup around color resources

2023-02-14 Thread Alex Deucher
Applied.  Thanks!

Alex

On Tue, Feb 14, 2023 at 7:48 AM Christian König
 wrote:
>
> Am 14.02.23 um 13:14 schrieb Melissa Wen:
> > Hi,
> >
> > Sorry for the noise, but while I've been working on wiring 3D LUT
> > support to AMD display driver [1] I found some annoying code style
> > issues in the shared-code part. So I'm just sending what I've been
> > cleaning to better examine the code.
> >
> > Most seem trivial, except the last one "remove unused
> > _calculate_degamma_curve" since this could just be a matter of missing
> > parts. If so, happy to remove the patch and include a comment describing
> > the situation (or the potential usage of it).
>
> The display stack is not my field of expertise, but those cleanups are
> so obvious that I think I can safely give my Reviewed-by: Christian
> König  for the entire series.
>
> Thanks,
> Christian.
>
> >
> > Thanks,
> >
> > Melissa
> >
> > [1] 
> > https://lore.kernel.org/dri-devel/20230109143846.1966301-1-m...@igalia.com/
> >
> > Melissa Wen (6):
> >drm/amd/display: ident braces in dcn30_acquire_post_bldn_3dlut
> >  correctly
> >drm/amd/display: clean code-style issues in dcn30_set_mpc_shaper_3dlut
> >drm/amd/display: camel case cleanup in color_gamma file
> >drm/amd/display: unset initial value for tf since it's never used
> >drm/amd/display: remove unused func declaration from resource headers
> >drm/amd/display: remove unused _calculate_degamma_curve function
> >
> >   .../drm/amd/display/dc/dcn30/dcn30_hwseq.c|  37 ++---
> >   .../drm/amd/display/dc/dcn30/dcn30_resource.c |   2 +-
> >   drivers/gpu/drm/amd/display/dc/inc/resource.h |   4 -
> >   .../amd/display/modules/color/color_gamma.c   | 140 --
> >   .../amd/display/modules/color/color_gamma.h   |   3 -
> >   5 files changed, 48 insertions(+), 138 deletions(-)
> >
>


Re: [PATCH 02/10] drm/amd/display: Add function prototypes to headers

2023-02-14 Thread Alex Deucher
Applied.  Thanks!

On Mon, Feb 13, 2023 at 3:50 PM Arthur Grillo  wrote:
>
> Add function prototypes to headers to reduce the number of
> -Wmissing-prototypes warnings.
>
> Signed-off-by: Arthur Grillo 
> ---
>  drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.h | 2 ++
>  drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.h | 2 ++
>  drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h   | 2 ++
>  3 files changed, 6 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.h 
> b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.h
> index e015e5a6c866..89d6208287b5 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.h
> +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.h
> @@ -133,6 +133,8 @@
>  int hubbub31_init_dchub_sys_ctx(struct hubbub *hubbub,
> struct dcn_hubbub_phys_addr_config *pa_config);
>
> +void hubbub31_init(struct hubbub *hubbub);
> +
>  void hubbub31_construct(struct dcn20_hubbub *hubbub3,
> struct dc_context *ctx,
> const struct dcn_hubbub_registers *hubbub_regs,
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.h 
> b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.h
> index bdc146890fca..b20eb04724bb 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.h
> +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.h
> @@ -204,6 +204,8 @@ void hubbub32_force_usr_retraining_allow(struct hubbub 
> *hubbub, bool allow);
>
>  void hubbub32_force_wm_propagate_to_pipes(struct hubbub *hubbub);
>
> +void hubbub32_init(struct hubbub *hubbub);
> +
>  void dcn32_program_det_size(struct hubbub *hubbub, int hubp_inst, unsigned 
> int det_buffer_size_in_kbyte);
>
>  void hubbub32_construct(struct dcn20_hubbub *hubbub2,
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h 
> b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h
> index 56ef71151536..4cdbf63c952b 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h
> +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h
> @@ -61,6 +61,8 @@ void hubp32_phantom_hubp_post_enable(struct hubp *hubp);
>  void hubp32_cursor_set_attributes(struct hubp *hubp,
> const struct dc_cursor_attributes *attr);
>
> +void hubp32_init(struct hubp *hubp);
> +
>  bool hubp32_construct(
> struct dcn20_hubp *hubp2,
> struct dc_context *ctx,
> --
> 2.39.1
>


Re: [PATCH 03/10] drm/amd/amdgpu: Add function prototypes to headers

2023-02-14 Thread Alex Deucher
Applied.  Thanks!

On Mon, Feb 13, 2023 at 3:50 PM Arthur Grillo  wrote:
>
> Add function prototypes to headers to reduce the number of
> -Wmissing-prototypes warnings.
>
> Signed-off-by: Arthur Grillo 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
> index bee93ab4298f..b03321e7d2d8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
> @@ -538,6 +538,7 @@ struct amdgpu_firmware {
>
>  void amdgpu_ucode_print_mc_hdr(const struct common_firmware_header *hdr);
>  void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr);
> +void amdgpu_ucode_print_imu_hdr(const struct common_firmware_header *hdr);
>  void amdgpu_ucode_print_gfx_hdr(const struct common_firmware_header *hdr);
>  void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr);
>  void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr);
> --
> 2.39.1
>


Re: [PATCH 04/10] drm/amd/display: Add previously missing includes

2023-02-14 Thread Alex Deucher
Applied.  Thanks!

On Mon, Feb 13, 2023 at 3:50 PM Arthur Grillo  wrote:
>
> Add includes that were previously missing to reduce the number of
> -Wmissing-prototypes warnings.
>
> Signed-off-by: Arthur Grillo 
> ---
>  drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c   | 1 +
>  drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.c | 1 +
>  2 files changed, 2 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c 
> b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c
> index 330d7cbc7398..3069af3684c6 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c
> +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c
> @@ -30,6 +30,7 @@
>  #include "dcn30/dcn30_hwseq.h"
>  #include "dcn31/dcn31_hwseq.h"
>  #include "dcn32_hwseq.h"
> +#include "dcn32_init.h"
>
>  static const struct hw_sequencer_funcs dcn32_funcs = {
> .program_gamut_remap = dcn10_program_gamut_remap,
> diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.c 
> b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.c
> index 04838a31e513..257f4fc065a5 100644
> --- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.c
> +++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.c
> @@ -24,6 +24,7 @@
>   */
>  #include "dc_link.h"
>  #include "link_dp_trace.h"
> +#include "link.h"
>
>  void dp_trace_init(struct dc_link *link)
>  {
> --
> 2.39.1
>


Re: [PATCH 05/10] drm/amd/display: Fix excess arguments on kernel-doc

2023-02-14 Thread Alex Deucher
Applied.  Thanks!

On Mon, Feb 13, 2023 at 3:50 PM Arthur Grillo  wrote:
>
> Remove arguments present on kernel-doc that are not present on the
> function declaration and add the new ones if present.
>
> Signed-off-by: Arthur Grillo 
> ---
>  drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c| 15 +++
>  drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c  |  2 +-
>  .../gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c |  2 +-
>  3 files changed, 9 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c 
> b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
> index 3d36329be384..40e6b22daa22 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
> @@ -273,8 +273,6 @@ static void sdma_v6_0_ring_emit_ib(struct amdgpu_ring 
> *ring,
>   * sdma_v6_0_ring_emit_mem_sync - flush the IB by graphics cache rinse
>   *
>   * @ring: amdgpu ring pointer
> - * @job: job to retrieve vmid from
> - * @ib: IB object to schedule
>   *
>   * flush the IB by graphics cache rinse.
>   */
> @@ -326,7 +324,9 @@ static void sdma_v6_0_ring_emit_hdp_flush(struct 
> amdgpu_ring *ring)
>   * sdma_v6_0_ring_emit_fence - emit a fence on the DMA ring
>   *
>   * @ring: amdgpu ring pointer
> - * @fence: amdgpu fence object
> + * @addr: address
> + * @seq: fence seq number
> + * @flags: fence flags
>   *
>   * Add a DMA fence packet to the ring to write
>   * the fence seq number and DMA trap packet to generate
> @@ -1060,10 +1060,9 @@ static void sdma_v6_0_vm_copy_pte(struct amdgpu_ib *ib,
>   *
>   * @ib: indirect buffer to fill with commands
>   * @pe: addr of the page entry
> - * @addr: dst addr to write into pe
> + * @value: dst addr to write into pe
>   * @count: number of page entries to update
>   * @incr: increase next addr by incr bytes
> - * @flags: access flags
>   *
>   * Update PTEs by writing them manually using sDMA.
>   */
> @@ -1167,7 +1166,6 @@ static void sdma_v6_0_ring_emit_pipeline_sync(struct 
> amdgpu_ring *ring)
>   * sdma_v6_0_ring_emit_vm_flush - vm flush using sDMA
>   *
>   * @ring: amdgpu_ring pointer
> - * @vm: amdgpu_vm pointer
>   *
>   * Update the page table base and flush the VM TLB
>   * using sDMA.
> @@ -1591,10 +1589,11 @@ static void sdma_v6_0_set_irq_funcs(struct 
> amdgpu_device *adev)
>  /**
>   * sdma_v6_0_emit_copy_buffer - copy buffer using the sDMA engine
>   *
> - * @ring: amdgpu_ring structure holding ring information
> + * @ib: indirect buffer to fill with commands
>   * @src_offset: src GPU address
>   * @dst_offset: dst GPU address
>   * @byte_count: number of bytes to xfer
> + * @tmz: if a secure copy should be used
>   *
>   * Copy GPU buffers using the DMA engine.
>   * Used by the amdgpu ttm implementation to move pages if
> @@ -1620,7 +1619,7 @@ static void sdma_v6_0_emit_copy_buffer(struct amdgpu_ib 
> *ib,
>  /**
>   * sdma_v6_0_emit_fill_buffer - fill buffer using the sDMA engine
>   *
> - * @ring: amdgpu_ring structure holding ring information
> + * @ib: indirect buffer to fill
>   * @src_data: value to write to buffer
>   * @dst_offset: dst GPU address
>   * @byte_count: number of bytes to xfer
> diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c 
> b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
> index 6ccf477d1c4d..c2092775ca88 100644
> --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
> +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
> @@ -698,7 +698,7 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc,
>   *
>   * @dc: [in] current dc state
>   * @context: [in] new dc state
> - * @cmd: [in] DMUB cmd to be populated with SubVP info
> + * @enable: [in] if true enables the pipes population
>   *
>   * This function loops through each pipe and populates the DMUB SubVP CMD 
> info
>   * based on the pipe (e.g. SubVP, VBLANK).
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c 
> b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c
> index f607a0e28f14..f62368da875d 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c
> +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c
> @@ -581,7 +581,7 @@ static void dpp1_dscl_set_manual_ratio_init(
>   * dpp1_dscl_set_recout - Set the first pixel of RECOUT in the OTG active 
> area
>   *
>   * @dpp: DPP data struct
> - * @recount: Rectangle information
> + * @recout: Rectangle information
>   *
>   * This function sets the MPC RECOUT_START and RECOUT_SIZE registers based on
>   * the values specified in the recount parameter.
> --
> 2.39.1
>


Re: [PATCH 09/10] drm/amd/display: Make variables declaration inside ifdef guard

2023-02-14 Thread Alex Deucher
Applied.  Thanks!

On Mon, Feb 13, 2023 at 3:50 PM Arthur Grillo  wrote:
>
> Make variables declaration inside ifdef guard, as they are only used
> inside the same ifdef guard. This remove some of the
> -Wunused-but-set-variable warning.
>
> Signed-off-by: Arthur Grillo 
> ---
>  .../gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c| 4 
>  .../gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c  | 4 
>  2 files changed, 8 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c 
> b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
> index ec351c8418cb..27f488405335 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
> @@ -878,7 +878,9 @@ static bool CalculatePrefetchSchedule(
> double DSTTotalPixelsAfterScaler;
> double LineTime;
> double dst_y_prefetch_equ;
> +#ifdef __DML_VBA_DEBUG__
> double Tsw_oto;
> +#endif
> double prefetch_bw_oto;
> double prefetch_bw_pr;
> double Tvm_oto;
> @@ -1060,7 +1062,9 @@ static bool CalculatePrefetchSchedule(
>
> min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, 
> PrefetchSourceLinesC) / max_vratio_pre);
> Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / 
> LineTime, min_Lsw), 1) / 4;
> +#ifdef __DML_VBA_DEBUG__
> Tsw_oto = Lsw_oto * LineTime;
> +#endif
>
>
>  #ifdef __DML_VBA_DEBUG__
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c 
> b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
> index 950669f2c10d..0fd3889c2061 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
> @@ -900,7 +900,9 @@ static bool CalculatePrefetchSchedule(
> double DSTTotalPixelsAfterScaler;
> double LineTime;
> double dst_y_prefetch_equ;
> +#ifdef __DML_VBA_DEBUG__
> double Tsw_oto;
> +#endif
> double prefetch_bw_oto;
> double prefetch_bw_pr;
> double Tvm_oto;
> @@ -1082,7 +1084,9 @@ static bool CalculatePrefetchSchedule(
>
> min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, 
> PrefetchSourceLinesC) / max_vratio_pre);
> Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / 
> LineTime, min_Lsw), 1) / 4;
> +#ifdef __DML_VBA_DEBUG__
> Tsw_oto = Lsw_oto * LineTime;
> +#endif
>
>
>  #ifdef __DML_VBA_DEBUG__
> --
> 2.39.1
>


Re: [PATCH v2 1/8] drm/amdgpu: add UAPI for allocating doorbell memory

2023-02-14 Thread Christian König

Am 14.02.23 um 17:15 schrieb Shashank Sharma:

From: Alex Deucher 

This patch adds flags for a new gem domain AMDGPU_GEM_DOMAIN_DOORBELL
in the UAPI layer.

V2: Drop 'memory' from description (Christian)

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Alex Deucher 
---
  include/uapi/drm/amdgpu_drm.h | 7 ++-
  1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 4038abe8505a..cc5d551abda5 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -94,6 +94,9 @@ extern "C" {
   *
   * %AMDGPU_GEM_DOMAIN_OA  Ordered append, used by 3D or Compute engines
   * for appending data.
+ *
+ * %AMDGPU_GEM_DOMAIN_DOORBELL Doorbell. It is an MMIO region for
+ * signalling user mode queues.


Maybe write "for signaling events to the firmware, used especially for 
user mode queues.".


With or without that Reviewed-by: Christian König 

Christian.


   */
  #define AMDGPU_GEM_DOMAIN_CPU 0x1
  #define AMDGPU_GEM_DOMAIN_GTT 0x2
@@ -101,12 +104,14 @@ extern "C" {
  #define AMDGPU_GEM_DOMAIN_GDS 0x8
  #define AMDGPU_GEM_DOMAIN_GWS 0x10
  #define AMDGPU_GEM_DOMAIN_OA  0x20
+#define AMDGPU_GEM_DOMAIN_DOORBELL 0x40
  #define AMDGPU_GEM_DOMAIN_MASK(AMDGPU_GEM_DOMAIN_CPU | \
 AMDGPU_GEM_DOMAIN_GTT | \
 AMDGPU_GEM_DOMAIN_VRAM | \
 AMDGPU_GEM_DOMAIN_GDS | \
 AMDGPU_GEM_DOMAIN_GWS | \
-AMDGPU_GEM_DOMAIN_OA)
+AMDGPU_GEM_DOMAIN_OA | \
+AMDGPU_GEM_DOMAIN_DOORBELL)
  
  /* Flag that CPU access will be required for the case of VRAM domain */

  #define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED (1 << 0)




Re: [PATCH v2 2/8] drm/amdgpu: replace aper_base_kaddr with vram_aper_base_kaddr

2023-02-14 Thread Christian König

Am 14.02.23 um 17:15 schrieb Shashank Sharma:

From: Alex Deucher 

To differentiate it from the doorbell BAR.


Since we removed the manual ioremap() for the doorbell BAR today we 
don't really need that patch any more, don't we?


On the other hand renaming the field still makes a lot of sense for 
better documenting what it's good for.


Christian.



V2: Added Christian's A-B

Acked-by: Christian Koenig 

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Alex Deucher 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 10 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c| 14 +++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h|  2 +-
  drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 10 +-
  drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 10 +-
  5 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 2f28a8c02f64..0b6a394e109b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -354,12 +354,12 @@ size_t amdgpu_device_aper_access(struct amdgpu_device 
*adev, loff_t pos,
size_t count = 0;
uint64_t last;
  
-	if (!adev->mman.aper_base_kaddr)

+   if (!adev->mman.vram_aper_base_kaddr)
return 0;
  
  	last = min(pos + size, adev->gmc.visible_vram_size);

if (last > pos) {
-   addr = adev->mman.aper_base_kaddr + pos;
+   addr = adev->mman.vram_aper_base_kaddr + pos;
count = last - pos;
  
  		if (write) {

@@ -3954,9 +3954,9 @@ static void amdgpu_device_unmap_mmio(struct amdgpu_device 
*adev)
  
  	iounmap(adev->rmmio);

adev->rmmio = NULL;
-   if (adev->mman.aper_base_kaddr)
-   iounmap(adev->mman.aper_base_kaddr);
-   adev->mman.aper_base_kaddr = NULL;
+   if (adev->mman.vram_aper_base_kaddr)
+   iounmap(adev->mman.vram_aper_base_kaddr);
+   adev->mman.vram_aper_base_kaddr = NULL;
  
  	/* Memory manager related */

if (!adev->gmc.xgmi.connected_to_cpu) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 55e0284b2bdd..73b831b47892 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -578,9 +578,9 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device 
*bdev,
if ((mem->bus.offset + bus_size) > adev->gmc.visible_vram_size)
return -EINVAL;
  
-		if (adev->mman.aper_base_kaddr &&

+   if (adev->mman.vram_aper_base_kaddr &&
mem->placement & TTM_PL_FLAG_CONTIGUOUS)
-   mem->bus.addr = (u8 *)adev->mman.aper_base_kaddr +
+   mem->bus.addr = (u8 *)adev->mman.vram_aper_base_kaddr +
mem->bus.offset;
  
  		mem->bus.offset += adev->gmc.aper_base;

@@ -1752,12 +1752,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
  #ifdef CONFIG_64BIT
  #ifdef CONFIG_X86
if (adev->gmc.xgmi.connected_to_cpu)
-   adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base,
+   adev->mman.vram_aper_base_kaddr = 
ioremap_cache(adev->gmc.aper_base,
adev->gmc.visible_vram_size);
  
  	else

  #endif
-   adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base,
+   adev->mman.vram_aper_base_kaddr = 
ioremap_wc(adev->gmc.aper_base,
adev->gmc.visible_vram_size);
  #endif
  
@@ -1904,9 +1904,9 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
  
  	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
  
-		if (adev->mman.aper_base_kaddr)

-   iounmap(adev->mman.aper_base_kaddr);
-   adev->mman.aper_base_kaddr = NULL;
+   if (adev->mman.vram_aper_base_kaddr)
+   iounmap(adev->mman.vram_aper_base_kaddr);
+   adev->mman.vram_aper_base_kaddr = NULL;
  
  		drm_dev_exit(idx);

}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index e2cd5894afc9..929bc8abac28 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -50,7 +50,7 @@ struct amdgpu_gtt_mgr {
  struct amdgpu_mman {
struct ttm_device   bdev;
boolinitialized;
-   void __iomem*aper_base_kaddr;
+   void __iomem*vram_aper_base_kaddr;
  
  	/* buffer handling */

const struct amdgpu_buffer_funcs*buffer_funcs;
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
index bd3e3e23a939..f39d4f593a2f 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -611,10 +611,10 @@ static int psp_v11_0_memory_training(struct psp_context 

Re: [PATCH v2 3/8] drm/amdgpu: rename gmc.aper_base/size

2023-02-14 Thread Christian König

Am 14.02.23 um 17:15 schrieb Shashank Sharma:

From: Alex Deucher 

This patch renames aper_base and aper_size parameters (in adev->gmc),
to vram_aper_base and vram_aper_size, to differentiate it from the
doorbell BAR.

V2: rebase

Cc: Alex Deucher 
Cc: Christian Koenig 

Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 


Acked-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c   |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c   |  6 +++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c  |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h  |  4 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c   | 12 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  |  8 
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c |  2 +-
  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c   | 10 +-
  drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c   | 10 +-
  drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c|  6 +++---
  drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c| 12 ++--
  drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c| 10 +-
  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 10 +-
  drivers/gpu/drm/amd/amdkfd/kfd_migrate.c |  4 ++--
  14 files changed, 49 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index f99d4873bf22..58689b2a2d1c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -438,7 +438,7 @@ void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device 
*adev,
mem_info->vram_width = adev->gmc.vram_width;
  
  	pr_debug("Address base: %pap public 0x%llx private 0x%llx\n",

-   &adev->gmc.aper_base,
+   &adev->gmc.vram_aper_base,
mem_info->local_mem_size_public,
mem_info->local_mem_size_private);
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 0b6a394e109b..45588b7919fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3961,7 +3961,7 @@ static void amdgpu_device_unmap_mmio(struct amdgpu_device 
*adev)
/* Memory manager related */
if (!adev->gmc.xgmi.connected_to_cpu) {
arch_phys_wc_del(adev->gmc.vram_mtrr);
-   arch_io_free_memtype_wc(adev->gmc.aper_base, 
adev->gmc.aper_size);
+   arch_io_free_memtype_wc(adev->gmc.vram_aper_base, 
adev->gmc.vram_aper_size);
}
  }
  
@@ -5562,14 +5562,14 @@ bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,

uint64_t address_mask = peer_adev->dev->dma_mask ?
~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
resource_size_t aper_limit =
-   adev->gmc.aper_base + adev->gmc.aper_size - 1;
+   adev->gmc.vram_aper_base + adev->gmc.vram_aper_size - 1;
bool p2p_access =
!adev->gmc.xgmi.connected_to_cpu &&
!(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
  
  	return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&

adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
-   !(adev->gmc.aper_base & address_mask ||
+   !(adev->gmc.vram_aper_base & address_mask ||
  aper_limit & address_mask));
  #else
return false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 02a4c93673ce..c7e64e234de6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -775,7 +775,7 @@ uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, 
struct amdgpu_bo *bo)
   */
  uint64_t amdgpu_gmc_vram_cpu_pa(struct amdgpu_device *adev, struct amdgpu_bo 
*bo)
  {
-   return amdgpu_bo_gpu_offset(bo) - adev->gmc.vram_start + 
adev->gmc.aper_base;
+   return amdgpu_bo_gpu_offset(bo) - adev->gmc.vram_start + 
adev->gmc.vram_aper_base;
  }
  
  int amdgpu_gmc_vram_checking(struct amdgpu_device *adev)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 0305b660cd17..bb7076ecbf01 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -167,8 +167,8 @@ struct amdgpu_gmc {
 * gart/vram_start/end field as the later is from
 * GPU's view and aper_base is from CPU's view.
 */
-   resource_size_t aper_size;
-   resource_size_t aper_base;
+   resource_size_t vram_aper_size;
+   resource_size_t vram_aper_base;
/* for some chips with <= 32MB we need to lie
 * about vram size near mc fb location */
u64 mc_vram_size;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 25a68de0..b48

Re: [PATCH v2 4/8] drm/amdgpu: rename doorbell variables

2023-02-14 Thread Christian König




Am 14.02.23 um 17:15 schrieb Shashank Sharma:

From: Alex Deucher 

This patch:
- renames the adev->doorbell.base to adev->doorbell.doorbell_aper_base
- renames the adev->doorbell.size to adev->doorbell.doorbell_aper_size
- moves the adev->doorbell.ptr to adev->mman.doorbell_aper_base_kaddr

rest of the changes are just to accommodate these variable name changes.

V2: Mered 2 patches into this one doorbell clean-up patch.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c   |  8 ++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c   | 34 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h |  5 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c  |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h  |  1 +
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c|  2 +-
  drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c   |  4 +--
  drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c   |  4 +--
  drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c   |  4 +--
  drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c   |  4 +--
  drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c   |  4 +--
  drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c   |  4 +--
  12 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 58689b2a2d1c..0493c64e9d0a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -106,13 +106,13 @@ static void amdgpu_doorbell_get_kfd_info(struct 
amdgpu_device *adev,
 * not initialized as AMDGPU manages the whole
 * doorbell space.
 */
-   *aperture_base = adev->doorbell.base;
+   *aperture_base = adev->doorbell.doorbell_aper_base;
*aperture_size = 0;
*start_offset = 0;
-   } else if (adev->doorbell.size > adev->doorbell.num_doorbells *
+   } else if (adev->doorbell.doorbell_aper_size > 
adev->doorbell.num_doorbells *
sizeof(u32)) {
-   *aperture_base = adev->doorbell.base;
-   *aperture_size = adev->doorbell.size;
+   *aperture_base = adev->doorbell.doorbell_aper_base;
+   *aperture_size = adev->doorbell.doorbell_aper_size;


Well that now looks a bit duplicated and you completely remove 
doorbell_aper_base_kaddr later on, right?


Christian.


*start_offset = adev->doorbell.num_doorbells * sizeof(u32);
} else {
*aperture_base = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 45588b7919fe..43c1b67c2778 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -597,7 +597,7 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 
index)
return 0;
  
  	if (index < adev->doorbell.num_doorbells) {

-   return readl(adev->doorbell.ptr + index);
+   return readl(adev->mman.doorbell_aper_base_kaddr + index);
} else {
DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
return 0;
@@ -620,7 +620,7 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 
index, u32 v)
return;
  
  	if (index < adev->doorbell.num_doorbells) {

-   writel(v, adev->doorbell.ptr + index);
+   writel(v, adev->mman.doorbell_aper_base_kaddr + index);
} else {
DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
}
@@ -641,7 +641,7 @@ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 
index)
return 0;
  
  	if (index < adev->doorbell.num_doorbells) {

-   return atomic64_read((atomic64_t *)(adev->doorbell.ptr + 
index));
+   return atomic64_read((atomic64_t 
*)(adev->mman.doorbell_aper_base_kaddr + index));
} else {
DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
return 0;
@@ -664,7 +664,7 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 
index, u64 v)
return;
  
  	if (index < adev->doorbell.num_doorbells) {

-   atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
+   atomic64_set((atomic64_t *)(adev->mman.doorbell_aper_base_kaddr 
+ index), v);
} else {
DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
}
@@ -1035,10 +1035,10 @@ static int amdgpu_device_doorbell_init(struct 
amdgpu_device *adev)
  
  	/* No doorbell on SI hardware generation */

if (adev->asic_type < CHIP_BONAIRE) {
-   adev->doorbell.base = 0;
-   adev->doorbell.size = 0;
+   adev->doorbell.doorbell_aper_base = 0;
+   adev->doorbell.doorbell_aper_size = 0;

Re: [PATCH v2 5/8] drm/amdgpu: accommodate DOMAIN/PL_DOORBELL

2023-02-14 Thread Christian König

Am 14.02.23 um 17:15 schrieb Shashank Sharma:

From: Alex Deucher 

This patch adds changes:
- to accommodate the new GEM domain DOORBELL
- to accommodate the new TTM PL DOORBELL

to manage doorbell allocations as GEM Objects.

V2: Addressed reviwe comments from Christian
 - drop the doorbell changes for pinning/unpinning
 - drop the doorbell changes for dma-buf map
 - drop the doorbell changes for sgt
 - no need to handle TTM_PL_FLAG_CONTIGUOUS for doorbell

Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 11 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c| 11 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h|  1 +
  3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index b48c9fd60c43..ff9979fecfd2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -147,6 +147,14 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo 
*abo, u32 domain)
c++;
}
  
+	if (domain & AMDGPU_GEM_DOMAIN_DOORBELL) {

+   places[c].fpfn = 0;
+   places[c].lpfn = 0;
+   places[c].mem_type = AMDGPU_PL_DOORBELL;
+   places[c].flags = 0;
+   c++;
+   }
+


Mhm, do we need to increase AMDGPU_BO_MAX_PLACEMENTS?

I think the answer is *no* since mixing DOORBELL with CPU, GTT or VRAM 
placement doesn't make sense, but do we enforce that somewhere?



if (domain & AMDGPU_GEM_DOMAIN_GTT) {
places[c].fpfn = 0;
places[c].lpfn = 0;
@@ -466,7 +474,7 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device 
*adev,
goto fail;
}
  
-	/* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU */

+   /* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU,  
AMDGPU_GEM_DOMAIN_DOORBELL */


Should we enforce that user space can only allocate 1 page doorbells?


return true;
  
  fail:

@@ -1014,6 +1022,7 @@ void amdgpu_bo_unpin(struct amdgpu_bo *bo)
} else if (bo->tbo.resource->mem_type == TTM_PL_TT) {
atomic64_sub(amdgpu_bo_size(bo), &adev->gart_pin_size);
}
+


Unrelated change.

Regards,
Christian.


  }
  
  static const char *amdgpu_vram_names[] = {

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 0e8f580769ab..e9dc24191fc8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -128,6 +128,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
case AMDGPU_PL_GDS:
case AMDGPU_PL_GWS:
case AMDGPU_PL_OA:
+   case AMDGPU_PL_DOORBELL:
placement->num_placement = 0;
placement->num_busy_placement = 0;
return;
@@ -500,9 +501,11 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, 
bool evict,
if (old_mem->mem_type == AMDGPU_PL_GDS ||
old_mem->mem_type == AMDGPU_PL_GWS ||
old_mem->mem_type == AMDGPU_PL_OA ||
+   old_mem->mem_type == AMDGPU_PL_DOORBELL ||
new_mem->mem_type == AMDGPU_PL_GDS ||
new_mem->mem_type == AMDGPU_PL_GWS ||
-   new_mem->mem_type == AMDGPU_PL_OA) {
+   new_mem->mem_type == AMDGPU_PL_OA ||
+   new_mem->mem_type == AMDGPU_PL_DOORBELL) {
/* Nothing to save here */
ttm_bo_move_null(bo, new_mem);
goto out;
@@ -586,6 +589,11 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device 
*bdev,
mem->bus.offset += adev->gmc.vram_aper_base;
mem->bus.is_iomem = true;
break;
+   case AMDGPU_PL_DOORBELL:
+   mem->bus.offset = mem->start << PAGE_SHIFT;
+   mem->bus.offset += adev->doorbell.doorbell_aper_base;
+   mem->bus.is_iomem = true;
+   break;
default:
return -EINVAL;
}
@@ -1267,6 +1275,7 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, 
struct ttm_resource *mem)
flags |= AMDGPU_PTE_VALID;
  
  	if (mem && (mem->mem_type == TTM_PL_TT ||

+   mem->mem_type == AMDGPU_PL_DOORBELL ||
mem->mem_type == AMDGPU_PL_PREEMPT)) {
flags |= AMDGPU_PTE_SYSTEM;
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h

index 967b265dbfa1..9cf5d8419965 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -33,6 +33,7 @@
  #define AMDGPU_PL_GWS (TTM_PL_PRIV + 1)
  #define AMDGPU_PL_OA  (TTM_PL_PRIV + 2)
  #define AMDGPU_PL_PREEMPT (TTM_PL_PRIV + 3)
+#define AMDGPU_PL_DOORBELL (TTM_PL_PRIV + 4)
  
  #define AMDGPU_GTT_MAX_TRANSFER_SIZE	512

  #define AMDGPU_GTT_NUM_TR

Re: [PATCH v2 7/8] drm/amdgpu: create doorbell kernel object

2023-02-14 Thread Christian König

Am 14.02.23 um 17:15 schrieb Shashank Sharma:

From: Shashank Sharma 

This patch does the following:
- Initializes TTM range management for domain DOORBELL.
- Introduces a kernel bo for doorbell management in form of 
mman.doorbell_kernel_bo.
   This bo holds the kernel doorbell space now.
- Removes ioremapping of doorbell-kernel memory, as its not required now.

V2:
- Addressed review comments from Christian:
 - do not use kernel_create_at(0), use kernel_create() instead.
 - do not use ttm_resource_manager, use range_manager instead.
 - do not ioremap doorbell, TTM will do that.
- Split one big patch into 2

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 22 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |  7 +++
  2 files changed, 29 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index e9dc24191fc8..086e83c17c0f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1879,12 +1879,32 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
return r;
}
  
+	r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_DOORBELL, adev->doorbell.doorbell_aper_size);

+   if (r) {
+   DRM_ERROR("Failed initializing oa heap.\n");
+   return r;
+   }
+
if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
&adev->mman.sdma_access_bo, NULL,
&adev->mman.sdma_access_ptr))
DRM_WARN("Debug VRAM access will use slowpath MM access\n");
  
+	/* Create a doorbell BO for kernel usages */

+   r = amdgpu_bo_create_kernel(adev,
+   adev->mman.doorbell_kernel_bo_size,
+   PAGE_SIZE,
+   AMDGPU_GEM_DOMAIN_DOORBELL,
+   &adev->mman.doorbell_kernel_bo,
+   &adev->mman.doorbell_gpu_addr,
+   (void **)&adev->mman.doorbell_cpu_addr);
+
+   if (r) {
+   DRM_ERROR("Failed to create doorbell BO, err=%d\n", r);
+   return r;
+   }
+


I would even move this before the SDMA VRAM buffer since the later is 
only nice to have while the doorbell is mandatory to have.



return 0;
  }
  
@@ -1908,6 +1928,8 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)

  NULL, NULL);
amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL,
&adev->mman.sdma_access_ptr);
+   amdgpu_bo_free_kernel(&adev->mman.doorbell_kernel_bo,
+ NULL, (void **)&adev->mman.doorbell_cpu_addr);
amdgpu_ttm_fw_reserve_vram_fini(adev);
amdgpu_ttm_drv_reserve_vram_fini(adev);
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h

index 9cf5d8419965..50748ff1dd3c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -97,6 +97,13 @@ struct amdgpu_mman {
/* PAGE_SIZE'd BO for process memory r/w over SDMA. */
struct amdgpu_bo*sdma_access_bo;
void*sdma_access_ptr;
+
+   /* doorbells reserved for the kernel driver */
+   u32 num_kernel_doorbells;   /* Number of doorbells 
actually reserved for kernel */
+   uint64_tdoorbell_kernel_bo_size;


That looks like duplicated information. We should only keep either the 
number of kernel doorbells or the kernel doorbell bo size around, not both.


And BTW please no comment after structure members.

Christian.


+   uint64_tdoorbell_gpu_addr;
+   struct amdgpu_bo*doorbell_kernel_bo;
+   u32 *doorbell_cpu_addr;
  };
  
  struct amdgpu_copy_mem {




Re: [PATCH v2 8/8] drm/amdgpu: start using kernel doorbell bo

2023-02-14 Thread Christian König

Am 14.02.23 um 17:15 schrieb Shashank Sharma:

From: Shashank Sharma 

This patch does the following:

- Adds new variables like mman.doorbell_bo_size/gpu_addr/cpu_addr.
   The cpu_addr ptr will be used now for doorbell read/write from
   doorbell BAR.
- Adjusts the existing code to use kernel doorbell BO's size and its
   cpu_address.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 


Maybe squash this one together with the previous patch.

But see below.


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c   |  5 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c   | 33 +---
  drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h |  1 -
  3 files changed, 16 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 0493c64e9d0a..87f486f522ae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -109,11 +109,10 @@ static void amdgpu_doorbell_get_kfd_info(struct 
amdgpu_device *adev,
*aperture_base = adev->doorbell.doorbell_aper_base;
*aperture_size = 0;
*start_offset = 0;
-   } else if (adev->doorbell.doorbell_aper_size > 
adev->doorbell.num_doorbells *
-   sizeof(u32)) {
+   } else if (adev->doorbell.doorbell_aper_size > 
adev->mman.doorbell_kernel_bo_size) {
*aperture_base = adev->doorbell.doorbell_aper_base;
*aperture_size = adev->doorbell.doorbell_aper_size;
-   *start_offset = adev->doorbell.num_doorbells * sizeof(u32);
+   *start_offset = adev->mman.doorbell_kernel_bo_size;
} else {
*aperture_base = 0;
*aperture_size = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 43c1b67c2778..fde199434579 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -596,8 +596,8 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 
index)
if (amdgpu_device_skip_hw_access(adev))
return 0;
  
-	if (index < adev->doorbell.num_doorbells) {

-   return readl(adev->mman.doorbell_aper_base_kaddr + index);
+   if (index < adev->mman.num_kernel_doorbells) {
+   return readl(adev->mman.doorbell_cpu_addr + index);
} else {
DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
return 0;
@@ -619,8 +619,8 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 
index, u32 v)
if (amdgpu_device_skip_hw_access(adev))
return;
  
-	if (index < adev->doorbell.num_doorbells) {

-   writel(v, adev->mman.doorbell_aper_base_kaddr + index);
+   if (index < adev->mman.num_kernel_doorbells) {
+   writel(v, adev->mman.doorbell_cpu_addr + index);
} else {
DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
}
@@ -640,8 +640,8 @@ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 
index)
if (amdgpu_device_skip_hw_access(adev))
return 0;
  
-	if (index < adev->doorbell.num_doorbells) {

-   return atomic64_read((atomic64_t 
*)(adev->mman.doorbell_aper_base_kaddr + index));
+   if (index < adev->mman.num_kernel_doorbells) {
+   return atomic64_read((atomic64_t 
*)(adev->mman.doorbell_cpu_addr + index));
} else {
DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
return 0;
@@ -663,8 +663,8 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 
index, u64 v)
if (amdgpu_device_skip_hw_access(adev))
return;
  
-	if (index < adev->doorbell.num_doorbells) {

-   atomic64_set((atomic64_t *)(adev->mman.doorbell_aper_base_kaddr 
+ index), v);
+   if (index < adev->mman.num_kernel_doorbells) {
+   atomic64_set((atomic64_t *)(adev->mman.doorbell_cpu_addr + 
index), v);
} else {
DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
}
@@ -1037,7 +1037,7 @@ static int amdgpu_device_doorbell_init(struct 
amdgpu_device *adev)
if (adev->asic_type < CHIP_BONAIRE) {
adev->doorbell.doorbell_aper_base = 0;
adev->doorbell.doorbell_aper_size = 0;
-   adev->doorbell.num_doorbells = 0;
+   adev->mman.num_kernel_doorbells = 0;
adev->mman.doorbell_aper_base_kaddr = NULL;
return 0;
}
@@ -1052,13 +1052,13 @@ static int amdgpu_device_doorbell_init(struct 
amdgpu_device *adev)
adev->doorbell.doorbell_aper_size = pci_resource_len(adev->pdev, 2);
  
  	if (adev->enable_mes) {

-   adev->doorbell.num_doorbells =
+   adev->mman.num_kernel

Re: [PATCH v2 1/8] drm/amdgpu: add UAPI for allocating doorbell memory

2023-02-14 Thread Shashank Sharma



On 14/02/2023 19:22, Christian König wrote:

Am 14.02.23 um 17:15 schrieb Shashank Sharma:

From: Alex Deucher 

This patch adds flags for a new gem domain AMDGPU_GEM_DOMAIN_DOORBELL
in the UAPI layer.

V2: Drop 'memory' from description (Christian)

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Alex Deucher 
---
  include/uapi/drm/amdgpu_drm.h | 7 ++-
  1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/uapi/drm/amdgpu_drm.h 
b/include/uapi/drm/amdgpu_drm.h

index 4038abe8505a..cc5d551abda5 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -94,6 +94,9 @@ extern "C" {
   *
   * %AMDGPU_GEM_DOMAIN_OA    Ordered append, used by 3D or Compute 
engines

   * for appending data.
+ *
+ * %AMDGPU_GEM_DOMAIN_DOORBELL    Doorbell. It is an MMIO region for
+ * signalling user mode queues.


Maybe write "for signaling events to the firmware, used especially for 
user mode queues.".


With or without that Reviewed-by: Christian König 




Will add that, thanks.

- Shashank



Christian.


   */
  #define AMDGPU_GEM_DOMAIN_CPU    0x1
  #define AMDGPU_GEM_DOMAIN_GTT    0x2
@@ -101,12 +104,14 @@ extern "C" {
  #define AMDGPU_GEM_DOMAIN_GDS    0x8
  #define AMDGPU_GEM_DOMAIN_GWS    0x10
  #define AMDGPU_GEM_DOMAIN_OA    0x20
+#define AMDGPU_GEM_DOMAIN_DOORBELL    0x40
  #define AMDGPU_GEM_DOMAIN_MASK    (AMDGPU_GEM_DOMAIN_CPU | \
   AMDGPU_GEM_DOMAIN_GTT | \
   AMDGPU_GEM_DOMAIN_VRAM | \
   AMDGPU_GEM_DOMAIN_GDS | \
   AMDGPU_GEM_DOMAIN_GWS | \
- AMDGPU_GEM_DOMAIN_OA)
+ AMDGPU_GEM_DOMAIN_OA | \
+ AMDGPU_GEM_DOMAIN_DOORBELL)
    /* Flag that CPU access will be required for the case of VRAM 
domain */

  #define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED    (1 << 0)




Re: [PATCH v2 2/8] drm/amdgpu: replace aper_base_kaddr with vram_aper_base_kaddr

2023-02-14 Thread Shashank Sharma



On 14/02/2023 19:24, Christian König wrote:

Am 14.02.23 um 17:15 schrieb Shashank Sharma:

From: Alex Deucher 

To differentiate it from the doorbell BAR.


Since we removed the manual ioremap() for the doorbell BAR today we 
don't really need that patch any more, don't we?


On the other hand renaming the field still makes a lot of sense for 
better documenting what it's good for.


Christian.

Exactly, I was also in two minds about this patch, but then realized 
that if nothing, its making stuff more readable.


- Shashank




V2: Added Christian's A-B

Acked-by: Christian Koenig 

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Alex Deucher 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 10 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 14 +++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h    |  2 +-
  drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 10 +-
  drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 10 +-
  5 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 2f28a8c02f64..0b6a394e109b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -354,12 +354,12 @@ size_t amdgpu_device_aper_access(struct 
amdgpu_device *adev, loff_t pos,

  size_t count = 0;
  uint64_t last;
  -    if (!adev->mman.aper_base_kaddr)
+    if (!adev->mman.vram_aper_base_kaddr)
  return 0;
    last = min(pos + size, adev->gmc.visible_vram_size);
  if (last > pos) {
-    addr = adev->mman.aper_base_kaddr + pos;
+    addr = adev->mman.vram_aper_base_kaddr + pos;
  count = last - pos;
    if (write) {
@@ -3954,9 +3954,9 @@ static void amdgpu_device_unmap_mmio(struct 
amdgpu_device *adev)

    iounmap(adev->rmmio);
  adev->rmmio = NULL;
-    if (adev->mman.aper_base_kaddr)
-    iounmap(adev->mman.aper_base_kaddr);
-    adev->mman.aper_base_kaddr = NULL;
+    if (adev->mman.vram_aper_base_kaddr)
+    iounmap(adev->mman.vram_aper_base_kaddr);
+    adev->mman.vram_aper_base_kaddr = NULL;
    /* Memory manager related */
  if (!adev->gmc.xgmi.connected_to_cpu) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

index 55e0284b2bdd..73b831b47892 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -578,9 +578,9 @@ static int amdgpu_ttm_io_mem_reserve(struct 
ttm_device *bdev,
  if ((mem->bus.offset + bus_size) > 
adev->gmc.visible_vram_size)

  return -EINVAL;
  -    if (adev->mman.aper_base_kaddr &&
+    if (adev->mman.vram_aper_base_kaddr &&
  mem->placement & TTM_PL_FLAG_CONTIGUOUS)
-    mem->bus.addr = (u8 *)adev->mman.aper_base_kaddr +
+    mem->bus.addr = (u8 *)adev->mman.vram_aper_base_kaddr +
  mem->bus.offset;
    mem->bus.offset += adev->gmc.aper_base;
@@ -1752,12 +1752,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
  #ifdef CONFIG_64BIT
  #ifdef CONFIG_X86
  if (adev->gmc.xgmi.connected_to_cpu)
-    adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base,
+    adev->mman.vram_aper_base_kaddr = 
ioremap_cache(adev->gmc.aper_base,

  adev->gmc.visible_vram_size);
    else
  #endif
-    adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base,
+    adev->mman.vram_aper_base_kaddr = 
ioremap_wc(adev->gmc.aper_base,

  adev->gmc.visible_vram_size);
  #endif
  @@ -1904,9 +1904,9 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
    if (drm_dev_enter(adev_to_drm(adev), &idx)) {
  -    if (adev->mman.aper_base_kaddr)
-    iounmap(adev->mman.aper_base_kaddr);
-    adev->mman.aper_base_kaddr = NULL;
+    if (adev->mman.vram_aper_base_kaddr)
+    iounmap(adev->mman.vram_aper_base_kaddr);
+    adev->mman.vram_aper_base_kaddr = NULL;
    drm_dev_exit(idx);
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h

index e2cd5894afc9..929bc8abac28 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -50,7 +50,7 @@ struct amdgpu_gtt_mgr {
  struct amdgpu_mman {
  struct ttm_device    bdev;
  bool    initialized;
-    void __iomem    *aper_base_kaddr;
+    void __iomem    *vram_aper_base_kaddr;
    /* buffer handling */
  const struct amdgpu_buffer_funcs    *buffer_funcs;
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c

index bd3e3e23a939..f39d4f593a2f 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -611,10 +611,10 @@ static int psp_v11_0_memory_training(struct 
psp_context *psp, uint32_t ops)

   */
  sz = GDDR6_MEM_TRAINING_

Re: [PATCH v2 4/8] drm/amdgpu: rename doorbell variables

2023-02-14 Thread Shashank Sharma



On 14/02/2023 19:27, Christian König wrote:



Am 14.02.23 um 17:15 schrieb Shashank Sharma:

From: Alex Deucher 

This patch:
- renames the adev->doorbell.base to adev->doorbell.doorbell_aper_base
- renames the adev->doorbell.size to adev->doorbell.doorbell_aper_size
- moves the adev->doorbell.ptr to adev->mman.doorbell_aper_base_kaddr

rest of the changes are just to accommodate these variable name changes.

V2: Mered 2 patches into this one doorbell clean-up patch.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c   |  8 ++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c   | 34 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h |  5 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c  |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h  |  1 +
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    |  2 +-
  drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c   |  4 +--
  drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c   |  4 +--
  drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c   |  4 +--
  drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c   |  4 +--
  drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c   |  4 +--
  drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c   |  4 +--
  12 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

index 58689b2a2d1c..0493c64e9d0a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -106,13 +106,13 @@ static void amdgpu_doorbell_get_kfd_info(struct 
amdgpu_device *adev,

   * not initialized as AMDGPU manages the whole
   * doorbell space.
   */
-    *aperture_base = adev->doorbell.base;
+    *aperture_base = adev->doorbell.doorbell_aper_base;
  *aperture_size = 0;
  *start_offset = 0;
-    } else if (adev->doorbell.size > adev->doorbell.num_doorbells *
+    } else if (adev->doorbell.doorbell_aper_size > 
adev->doorbell.num_doorbells *

  sizeof(u32)) {
-    *aperture_base = adev->doorbell.base;
-    *aperture_size = adev->doorbell.size;
+    *aperture_base = adev->doorbell.doorbell_aper_base;
+    *aperture_size = adev->doorbell.doorbell_aper_size;


Well that now looks a bit duplicated and you completely remove 
doorbell_aper_base_kaddr later on, right?


Christian.

Yeah, I still kept it as this cleanup is a bit peculiar and not easy to 
follow on review, but your right, lets keep it cleaner.


I will take remove doorbell_aper_base_kaddr.

- Shashank


  *start_offset = adev->doorbell.num_doorbells * sizeof(u32);
  } else {
  *aperture_base = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 45588b7919fe..43c1b67c2778 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -597,7 +597,7 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device 
*adev, u32 index)

  return 0;
    if (index < adev->doorbell.num_doorbells) {
-    return readl(adev->doorbell.ptr + index);
+    return readl(adev->mman.doorbell_aper_base_kaddr + index);
  } else {
  DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", 
index);

  return 0;
@@ -620,7 +620,7 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device 
*adev, u32 index, u32 v)

  return;
    if (index < adev->doorbell.num_doorbells) {
-    writel(v, adev->doorbell.ptr + index);
+    writel(v, adev->mman.doorbell_aper_base_kaddr + index);
  } else {
  DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", 
index);

  }
@@ -641,7 +641,7 @@ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device 
*adev, u32 index)

  return 0;
    if (index < adev->doorbell.num_doorbells) {
-    return atomic64_read((atomic64_t *)(adev->doorbell.ptr + 
index));
+    return atomic64_read((atomic64_t 
*)(adev->mman.doorbell_aper_base_kaddr + index));

  } else {
  DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", 
index);

  return 0;
@@ -664,7 +664,7 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device 
*adev, u32 index, u64 v)

  return;
    if (index < adev->doorbell.num_doorbells) {
-    atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
+    atomic64_set((atomic64_t 
*)(adev->mman.doorbell_aper_base_kaddr + index), v);

  } else {
  DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", 
index);

  }
@@ -1035,10 +1035,10 @@ static int amdgpu_device_doorbell_init(struct 
amdgpu_device *adev)

    /* No doorbell on SI hardware generation */
  if (adev->asic_type < CHIP_BONAIRE) {
-    adev->doorbell.base = 0;
-    adev->doorbell.size = 0;
+    adev->doorbell.doorbell_aper_base = 0;
+    adev->doorbell.doorbell_aper_size = 0;
  ade

Re: [PATCH v2 5/8] drm/amdgpu: accommodate DOMAIN/PL_DOORBELL

2023-02-14 Thread Shashank Sharma



On 14/02/2023 19:31, Christian König wrote:

Am 14.02.23 um 17:15 schrieb Shashank Sharma:

From: Alex Deucher 

This patch adds changes:
- to accommodate the new GEM domain DOORBELL
- to accommodate the new TTM PL DOORBELL

to manage doorbell allocations as GEM Objects.

V2: Addressed reviwe comments from Christian
 - drop the doorbell changes for pinning/unpinning
 - drop the doorbell changes for dma-buf map
 - drop the doorbell changes for sgt
 - no need to handle TTM_PL_FLAG_CONTIGUOUS for doorbell

Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 11 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 11 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h    |  1 +
  3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c

index b48c9fd60c43..ff9979fecfd2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -147,6 +147,14 @@ void amdgpu_bo_placement_from_domain(struct 
amdgpu_bo *abo, u32 domain)

  c++;
  }
  +    if (domain & AMDGPU_GEM_DOMAIN_DOORBELL) {
+    places[c].fpfn = 0;
+    places[c].lpfn = 0;
+    places[c].mem_type = AMDGPU_PL_DOORBELL;
+    places[c].flags = 0;
+    c++;
+    }
+


Mhm, do we need to increase AMDGPU_BO_MAX_PLACEMENTS?

I think the answer is *no* since mixing DOORBELL with CPU, GTT or VRAM 
placement doesn't make sense, but do we enforce that somewhere?

I am not sure why do we need that ?



  if (domain & AMDGPU_GEM_DOMAIN_GTT) {
  places[c].fpfn = 0;
  places[c].lpfn = 0;
@@ -466,7 +474,7 @@ static bool amdgpu_bo_validate_size(struct 
amdgpu_device *adev,

  goto fail;
  }
  -    /* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU */
+    /* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU,  
AMDGPU_GEM_DOMAIN_DOORBELL */


Should we enforce that user space can only allocate 1 page doorbells?


Should we add a per-PID basis check ?

- Shashank


  return true;
    fail:
@@ -1014,6 +1022,7 @@ void amdgpu_bo_unpin(struct amdgpu_bo *bo)
  } else if (bo->tbo.resource->mem_type == TTM_PL_TT) {
  atomic64_sub(amdgpu_bo_size(bo), &adev->gart_pin_size);
  }
+


Unrelated change.


Noted

- Shashank



Regards,
Christian.


  }
    static const char *amdgpu_vram_names[] = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

index 0e8f580769ab..e9dc24191fc8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -128,6 +128,7 @@ static void amdgpu_evict_flags(struct 
ttm_buffer_object *bo,

  case AMDGPU_PL_GDS:
  case AMDGPU_PL_GWS:
  case AMDGPU_PL_OA:
+    case AMDGPU_PL_DOORBELL:
  placement->num_placement = 0;
  placement->num_busy_placement = 0;
  return;
@@ -500,9 +501,11 @@ static int amdgpu_bo_move(struct 
ttm_buffer_object *bo, bool evict,

  if (old_mem->mem_type == AMDGPU_PL_GDS ||
  old_mem->mem_type == AMDGPU_PL_GWS ||
  old_mem->mem_type == AMDGPU_PL_OA ||
+    old_mem->mem_type == AMDGPU_PL_DOORBELL ||
  new_mem->mem_type == AMDGPU_PL_GDS ||
  new_mem->mem_type == AMDGPU_PL_GWS ||
-    new_mem->mem_type == AMDGPU_PL_OA) {
+    new_mem->mem_type == AMDGPU_PL_OA ||
+    new_mem->mem_type == AMDGPU_PL_DOORBELL) {
  /* Nothing to save here */
  ttm_bo_move_null(bo, new_mem);
  goto out;
@@ -586,6 +589,11 @@ static int amdgpu_ttm_io_mem_reserve(struct 
ttm_device *bdev,

  mem->bus.offset += adev->gmc.vram_aper_base;
  mem->bus.is_iomem = true;
  break;
+    case AMDGPU_PL_DOORBELL:
+    mem->bus.offset = mem->start << PAGE_SHIFT;
+    mem->bus.offset += adev->doorbell.doorbell_aper_base;
+    mem->bus.is_iomem = true;
+    break;
  default:
  return -EINVAL;
  }
@@ -1267,6 +1275,7 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt 
*ttm, struct ttm_resource *mem)

  flags |= AMDGPU_PTE_VALID;
    if (mem && (mem->mem_type == TTM_PL_TT ||
+    mem->mem_type == AMDGPU_PL_DOORBELL ||
  mem->mem_type == AMDGPU_PL_PREEMPT)) {
  flags |= AMDGPU_PTE_SYSTEM;
  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h

index 967b265dbfa1..9cf5d8419965 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -33,6 +33,7 @@
  #define AMDGPU_PL_GWS    (TTM_PL_PRIV + 1)
  #define AMDGPU_PL_OA    (TTM_PL_PRIV + 2)
  #define AMDGPU_PL_PREEMPT    (TTM_PL_PRIV + 3)
+#define AMDGPU_PL_DOORBELL    (TTM_PL_PRIV + 4)
    #define AMDGPU_GTT_MAX_TRANSFER_SIZE    512
  #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS    2




Re: [PATCH v2 7/8] drm/amdgpu: create doorbell kernel object

2023-02-14 Thread Shashank Sharma



On 14/02/2023 19:35, Christian König wrote:

Am 14.02.23 um 17:15 schrieb Shashank Sharma:

From: Shashank Sharma 

This patch does the following:
- Initializes TTM range management for domain DOORBELL.
- Introduces a kernel bo for doorbell management in form of 
mman.doorbell_kernel_bo.

   This bo holds the kernel doorbell space now.
- Removes ioremapping of doorbell-kernel memory, as its not required 
now.


V2:
- Addressed review comments from Christian:
 - do not use kernel_create_at(0), use kernel_create() instead.
 - do not use ttm_resource_manager, use range_manager instead.
 - do not ioremap doorbell, TTM will do that.
- Split one big patch into 2

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 22 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |  7 +++
  2 files changed, 29 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

index e9dc24191fc8..086e83c17c0f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1879,12 +1879,32 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
  return r;
  }
  +    r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_DOORBELL, 
adev->doorbell.doorbell_aper_size);

+    if (r) {
+    DRM_ERROR("Failed initializing oa heap.\n");
+    return r;
+    }
+
  if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
  AMDGPU_GEM_DOMAIN_GTT,
  &adev->mman.sdma_access_bo, NULL,
  &adev->mman.sdma_access_ptr))
  DRM_WARN("Debug VRAM access will use slowpath MM access\n");
  +    /* Create a doorbell BO for kernel usages */
+    r = amdgpu_bo_create_kernel(adev,
+    adev->mman.doorbell_kernel_bo_size,
+    PAGE_SIZE,
+    AMDGPU_GEM_DOMAIN_DOORBELL,
+    &adev->mman.doorbell_kernel_bo,
+    &adev->mman.doorbell_gpu_addr,
+    (void **)&adev->mman.doorbell_cpu_addr);
+
+    if (r) {
+    DRM_ERROR("Failed to create doorbell BO, err=%d\n", r);
+    return r;
+    }
+


I would even move this before the SDMA VRAM buffer since the later is 
only nice to have while the doorbell is mandatory to have.

Agree,



  return 0;
  }
  @@ -1908,6 +1928,8 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
    NULL, NULL);
  amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL,
  &adev->mman.sdma_access_ptr);
+ amdgpu_bo_free_kernel(&adev->mman.doorbell_kernel_bo,
+  NULL, (void **)&adev->mman.doorbell_cpu_addr);
  amdgpu_ttm_fw_reserve_vram_fini(adev);
  amdgpu_ttm_drv_reserve_vram_fini(adev);
  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h

index 9cf5d8419965..50748ff1dd3c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -97,6 +97,13 @@ struct amdgpu_mman {
  /* PAGE_SIZE'd BO for process memory r/w over SDMA. */
  struct amdgpu_bo    *sdma_access_bo;
  void    *sdma_access_ptr;
+
+    /* doorbells reserved for the kernel driver */
+    u32    num_kernel_doorbells;    /* Number of doorbells 
actually reserved for kernel */

+    uint64_t    doorbell_kernel_bo_size;


That looks like duplicated information. We should only keep either the 
number of kernel doorbells or the kernel doorbell bo size around, not 
both.

Yeah, agree. I can remove one of these two.


And BTW please no comment after structure members.


Noted,

- Shashank


Christian.


+    uint64_t    doorbell_gpu_addr;
+    struct amdgpu_bo    *doorbell_kernel_bo;
+    u32    *doorbell_cpu_addr;
  };
    struct amdgpu_copy_mem {




Re: [PATCH v2 8/8] drm/amdgpu: start using kernel doorbell bo

2023-02-14 Thread Shashank Sharma



On 14/02/2023 19:40, Christian König wrote:

Am 14.02.23 um 17:15 schrieb Shashank Sharma:

From: Shashank Sharma 

This patch does the following:

- Adds new variables like mman.doorbell_bo_size/gpu_addr/cpu_addr.
   The cpu_addr ptr will be used now for doorbell read/write from
   doorbell BAR.
- Adjusts the existing code to use kernel doorbell BO's size and its
   cpu_address.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 


Maybe squash this one together with the previous patch.


I just split it from the last patch in this series, thought it was too 
scattered and might not be


easy to review :D




But see below.


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c   |  5 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c   | 33 +---
  drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h |  1 -
  3 files changed, 16 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

index 0493c64e9d0a..87f486f522ae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -109,11 +109,10 @@ static void amdgpu_doorbell_get_kfd_info(struct 
amdgpu_device *adev,

  *aperture_base = adev->doorbell.doorbell_aper_base;
  *aperture_size = 0;
  *start_offset = 0;
-    } else if (adev->doorbell.doorbell_aper_size > 
adev->doorbell.num_doorbells *

-    sizeof(u32)) {
+    } else if (adev->doorbell.doorbell_aper_size > 
adev->mman.doorbell_kernel_bo_size) {

  *aperture_base = adev->doorbell.doorbell_aper_base;
  *aperture_size = adev->doorbell.doorbell_aper_size;
-    *start_offset = adev->doorbell.num_doorbells * sizeof(u32);
+    *start_offset = adev->mman.doorbell_kernel_bo_size;
  } else {
  *aperture_base = 0;
  *aperture_size = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 43c1b67c2778..fde199434579 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -596,8 +596,8 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device 
*adev, u32 index)

  if (amdgpu_device_skip_hw_access(adev))
  return 0;
  -    if (index < adev->doorbell.num_doorbells) {
-    return readl(adev->mman.doorbell_aper_base_kaddr + index);
+    if (index < adev->mman.num_kernel_doorbells) {
+    return readl(adev->mman.doorbell_cpu_addr + index);
  } else {
  DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", 
index);

  return 0;
@@ -619,8 +619,8 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device 
*adev, u32 index, u32 v)

  if (amdgpu_device_skip_hw_access(adev))
  return;
  -    if (index < adev->doorbell.num_doorbells) {
-    writel(v, adev->mman.doorbell_aper_base_kaddr + index);
+    if (index < adev->mman.num_kernel_doorbells) {
+    writel(v, adev->mman.doorbell_cpu_addr + index);
  } else {
  DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", 
index);

  }
@@ -640,8 +640,8 @@ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device 
*adev, u32 index)

  if (amdgpu_device_skip_hw_access(adev))
  return 0;
  -    if (index < adev->doorbell.num_doorbells) {
-    return atomic64_read((atomic64_t 
*)(adev->mman.doorbell_aper_base_kaddr + index));

+    if (index < adev->mman.num_kernel_doorbells) {
+    return atomic64_read((atomic64_t 
*)(adev->mman.doorbell_cpu_addr + index));

  } else {
  DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", 
index);

  return 0;
@@ -663,8 +663,8 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device 
*adev, u32 index, u64 v)

  if (amdgpu_device_skip_hw_access(adev))
  return;
  -    if (index < adev->doorbell.num_doorbells) {
-    atomic64_set((atomic64_t 
*)(adev->mman.doorbell_aper_base_kaddr + index), v);

+    if (index < adev->mman.num_kernel_doorbells) {
+    atomic64_set((atomic64_t *)(adev->mman.doorbell_cpu_addr + 
index), v);

  } else {
  DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", 
index);

  }
@@ -1037,7 +1037,7 @@ static int amdgpu_device_doorbell_init(struct 
amdgpu_device *adev)

  if (adev->asic_type < CHIP_BONAIRE) {
  adev->doorbell.doorbell_aper_base = 0;
  adev->doorbell.doorbell_aper_size = 0;
-    adev->doorbell.num_doorbells = 0;
+    adev->mman.num_kernel_doorbells = 0;
  adev->mman.doorbell_aper_base_kaddr = NULL;
  return 0;
  }
@@ -1052,13 +1052,13 @@ static int amdgpu_device_doorbell_init(struct 
amdgpu_device *adev)
  adev->doorbell.doorbell_aper_size = 
pci_resource_len(adev->pdev, 2);

    if (adev->enable_mes) {
-    adev->doorbell.num_doorbells =
+    adev->mman.num_kernel_doorbells =
  adev->doorbell.doorbell_aper_size / sizeof(u32);
  } else {
- 

Re: [PATCH 3/3] drm/connector: Deprecate split for BT.2020 in drm_colorspace enum

2023-02-14 Thread Sebastian Wick
On Tue, Feb 14, 2023 at 5:57 PM Harry Wentland  wrote:
>
>
>
> On 2/14/23 10:49, Sebastian Wick wrote:
> > On Fri, Feb 3, 2023 at 5:00 PM Ville Syrjälä
> >  wrote:
> >>
> >> On Fri, Feb 03, 2023 at 10:24:52AM -0500, Harry Wentland wrote:
> >>>
> >>>
> >>> On 2/3/23 10:19, Ville Syrjälä wrote:
>  On Fri, Feb 03, 2023 at 09:39:42AM -0500, Harry Wentland wrote:
> >
> >
> > On 2/3/23 07:59, Sebastian Wick wrote:
> >> On Fri, Feb 3, 2023 at 11:40 AM Ville Syrjälä
> >>  wrote:
> >>>
> >>> On Fri, Feb 03, 2023 at 02:07:44AM +, Joshua Ashton wrote:
>  Userspace has no way of controlling or knowing the pixel encoding
>  currently, so there is no way for it to ever get the right values 
>  here.
> >>>
> >>> That applies to a lot of the other values as well (they are
> >>> explicitly RGB or YCC). The idea was that this property sets the
> >>> infoframe/MSA/SDP value exactly, and other properties should be
> >>> added to for use userspace to control the pixel encoding/colorspace
> >>> conversion(if desired, or userspace just makes sure to
> >>> directly feed in correct kind of data).
> >>
> >> I'm all for getting userspace control over pixel encoding but even
> >> then the kernel always knows which pixel encoding is selected and
> >> which InfoFrame has to be sent. Is there a reason why userspace would
> >> want to control the variant explicitly to the wrong value?
> >>
> >
> > I've asked this before but haven't seen an answer: Is there an existing
> > upstream userspace project that makes use of this property (other than
> > what Joshua is working on in gamescope right now)? That would help us
> > understand the intent better.
> 
>  The intent was to control the infoframe colorimetry bits,
>  nothing more. No idea what real userspace there was, if any.
> 
> >
> > I don't think giving userspace explicit control over the exact infoframe
> > values is the right thing to do.
> 
>  Only userspace knows what kind of data it's stuffing into
>  the pixels (and/or how it configures the csc units/etc.) to
>  generate them.
> 
> >>>
> >>> Yes, but userspace doesn't control or know whether we drive
> >>> RGB or YCbCr on the wire. In fact, in some cases our driver
> >>> needs to fallback to YCbCr420 for bandwidth reasons. There
> >>> is currently no way for userspace to know that and I don't
> >>> think it makes sense.
> >>
> >> People want that control as well for whatever reason. We've
> >> been asked to allow YCbCr 4:4:4 output many times.
> >
> > I don't really think it's a question of if we want it but rather how
> > we get there. Harry is completely right that if we would make the
> > subsampling controllable by user space instead of the kernel handling
> > it magically, user space which does not adapt to the new control won't
> > be able to light up some modes which worked before.
> >
>
> Thanks for continuing this discussion and touching on the model of how
> we get to where we want to go.
>
> > This is obviously a problem and not one we can easily fix. We would
> > need a new cap for user space to signal "I know that I can control
> > bpc, subsampling and compression to lower the bandwidth and light up
> > modes which otherwise fail". That cap would also remove all the
> > properties which require kernel magic to work (that's also what I
> > proposed for my KMS color pipeline API).
> >
> > We all want to expose more of the scanout capability and give user
> > space more control but I don't think an incremental approach works
> > here and we would all do better if we accept that the current API
> > requires kernel magic to work and has a few implicit assumptions baked
> > in.
> >
> > With all that being said, I think the right decision here is to
> >
> > 1. Ignore subsampling for now
> > 2. Let the kernel select YCC or RGB on the cable
> > 3. Let the kernel figure out the conversion between RGB and YCC based
> > on the color space selected
> > 4. Let the kernel send the correct infoframe based on the selected
> > color space and cable encoding
> > 5. Only expose color spaces for which the kernel can do the conversion
> > and send the infoframe
>
> I agree. We don't want to break or change existing behavior (that is
> used by userspace) and this will get us far without breaking things.
>
> > 6. Work on the new API which is hidden behind a cap
> >
>
> I assume you mean something like
> https://gitlab.freedesktop.org/pq/color-and-hdr/-/issues/11

Something like that, yes. The main point being a cap which removes a
lot of properties and sets new expectations between user space and
kernel. The actual API is not that important.

> Above you say that you don't think an incremental approach works
> here. Can you elaborate?

Backwards compatibility is really hard. If we add another property to
control e.g. the color range infoframe which doesn't magicall

Re: [PATCH 3/3] drm/connector: Deprecate split for BT.2020 in drm_colorspace enum

2023-02-14 Thread Harry Wentland




On 2/14/23 14:45, Sebastian Wick wrote:

On Tue, Feb 14, 2023 at 5:57 PM Harry Wentland  wrote:




On 2/14/23 10:49, Sebastian Wick wrote:

On Fri, Feb 3, 2023 at 5:00 PM Ville Syrjälä
 wrote:


On Fri, Feb 03, 2023 at 10:24:52AM -0500, Harry Wentland wrote:



On 2/3/23 10:19, Ville Syrjälä wrote:

On Fri, Feb 03, 2023 at 09:39:42AM -0500, Harry Wentland wrote:



On 2/3/23 07:59, Sebastian Wick wrote:

On Fri, Feb 3, 2023 at 11:40 AM Ville Syrjälä
 wrote:


On Fri, Feb 03, 2023 at 02:07:44AM +, Joshua Ashton wrote:

Userspace has no way of controlling or knowing the pixel encoding
currently, so there is no way for it to ever get the right values here.


That applies to a lot of the other values as well (they are
explicitly RGB or YCC). The idea was that this property sets the
infoframe/MSA/SDP value exactly, and other properties should be
added to for use userspace to control the pixel encoding/colorspace
conversion(if desired, or userspace just makes sure to
directly feed in correct kind of data).


I'm all for getting userspace control over pixel encoding but even
then the kernel always knows which pixel encoding is selected and
which InfoFrame has to be sent. Is there a reason why userspace would
want to control the variant explicitly to the wrong value?



I've asked this before but haven't seen an answer: Is there an existing
upstream userspace project that makes use of this property (other than
what Joshua is working on in gamescope right now)? That would help us
understand the intent better.


The intent was to control the infoframe colorimetry bits,
nothing more. No idea what real userspace there was, if any.



I don't think giving userspace explicit control over the exact infoframe
values is the right thing to do.


Only userspace knows what kind of data it's stuffing into
the pixels (and/or how it configures the csc units/etc.) to
generate them.



Yes, but userspace doesn't control or know whether we drive
RGB or YCbCr on the wire. In fact, in some cases our driver
needs to fallback to YCbCr420 for bandwidth reasons. There
is currently no way for userspace to know that and I don't
think it makes sense.


People want that control as well for whatever reason. We've
been asked to allow YCbCr 4:4:4 output many times.


I don't really think it's a question of if we want it but rather how
we get there. Harry is completely right that if we would make the
subsampling controllable by user space instead of the kernel handling
it magically, user space which does not adapt to the new control won't
be able to light up some modes which worked before.



Thanks for continuing this discussion and touching on the model of how
we get to where we want to go.


This is obviously a problem and not one we can easily fix. We would
need a new cap for user space to signal "I know that I can control
bpc, subsampling and compression to lower the bandwidth and light up
modes which otherwise fail". That cap would also remove all the
properties which require kernel magic to work (that's also what I
proposed for my KMS color pipeline API).

We all want to expose more of the scanout capability and give user
space more control but I don't think an incremental approach works
here and we would all do better if we accept that the current API
requires kernel magic to work and has a few implicit assumptions baked
in.

With all that being said, I think the right decision here is to

1. Ignore subsampling for now
2. Let the kernel select YCC or RGB on the cable
3. Let the kernel figure out the conversion between RGB and YCC based
on the color space selected
4. Let the kernel send the correct infoframe based on the selected
color space and cable encoding
5. Only expose color spaces for which the kernel can do the conversion
and send the infoframe


I agree. We don't want to break or change existing behavior (that is
used by userspace) and this will get us far without breaking things.


6. Work on the new API which is hidden behind a cap



I assume you mean something like
https://gitlab.freedesktop.org/pq/color-and-hdr/-/issues/11


Something like that, yes. The main point being a cap which removes a
lot of properties and sets new expectations between user space and
kernel. The actual API is not that important.


Above you say that you don't think an incremental approach works
here. Can you elaborate?


Backwards compatibility is really hard. If we add another property to
control e.g. the color range infoframe which doesn't magically convert
colors, we now have to define how it interacts with the existing
property. We also have to figure out how a user space which doesn't
know about the new property behaves when another client has set that
property. If any property which currently might change the pixel
values is used, we can't expose the entire color pipeline because the
kernel might have to use some element in it to achieve its magic
conversion. So essentially you already have this hard device between
"old" and "new" and 

Re: [PATCH 3/3] drm/connector: Deprecate split for BT.2020 in drm_colorspace enum

2023-02-14 Thread Ville Syrjälä
On Tue, Feb 14, 2023 at 08:45:00PM +0100, Sebastian Wick wrote:
> On Tue, Feb 14, 2023 at 5:57 PM Harry Wentland  wrote:
> >
> >
> >
> > On 2/14/23 10:49, Sebastian Wick wrote:
> > > On Fri, Feb 3, 2023 at 5:00 PM Ville Syrjälä
> > >  wrote:
> > >>
> > >> On Fri, Feb 03, 2023 at 10:24:52AM -0500, Harry Wentland wrote:
> > >>>
> > >>>
> > >>> On 2/3/23 10:19, Ville Syrjälä wrote:
> >  On Fri, Feb 03, 2023 at 09:39:42AM -0500, Harry Wentland wrote:
> > >
> > >
> > > On 2/3/23 07:59, Sebastian Wick wrote:
> > >> On Fri, Feb 3, 2023 at 11:40 AM Ville Syrjälä
> > >>  wrote:
> > >>>
> > >>> On Fri, Feb 03, 2023 at 02:07:44AM +, Joshua Ashton wrote:
> >  Userspace has no way of controlling or knowing the pixel encoding
> >  currently, so there is no way for it to ever get the right values 
> >  here.
> > >>>
> > >>> That applies to a lot of the other values as well (they are
> > >>> explicitly RGB or YCC). The idea was that this property sets the
> > >>> infoframe/MSA/SDP value exactly, and other properties should be
> > >>> added to for use userspace to control the pixel encoding/colorspace
> > >>> conversion(if desired, or userspace just makes sure to
> > >>> directly feed in correct kind of data).
> > >>
> > >> I'm all for getting userspace control over pixel encoding but even
> > >> then the kernel always knows which pixel encoding is selected and
> > >> which InfoFrame has to be sent. Is there a reason why userspace would
> > >> want to control the variant explicitly to the wrong value?
> > >>
> > >
> > > I've asked this before but haven't seen an answer: Is there an 
> > > existing
> > > upstream userspace project that makes use of this property (other than
> > > what Joshua is working on in gamescope right now)? That would help us
> > > understand the intent better.
> > 
> >  The intent was to control the infoframe colorimetry bits,
> >  nothing more. No idea what real userspace there was, if any.
> > 
> > >
> > > I don't think giving userspace explicit control over the exact 
> > > infoframe
> > > values is the right thing to do.
> > 
> >  Only userspace knows what kind of data it's stuffing into
> >  the pixels (and/or how it configures the csc units/etc.) to
> >  generate them.
> > 
> > >>>
> > >>> Yes, but userspace doesn't control or know whether we drive
> > >>> RGB or YCbCr on the wire. In fact, in some cases our driver
> > >>> needs to fallback to YCbCr420 for bandwidth reasons. There
> > >>> is currently no way for userspace to know that and I don't
> > >>> think it makes sense.
> > >>
> > >> People want that control as well for whatever reason. We've
> > >> been asked to allow YCbCr 4:4:4 output many times.
> > >
> > > I don't really think it's a question of if we want it but rather how
> > > we get there. Harry is completely right that if we would make the
> > > subsampling controllable by user space instead of the kernel handling
> > > it magically, user space which does not adapt to the new control won't
> > > be able to light up some modes which worked before.
> > >
> >
> > Thanks for continuing this discussion and touching on the model of how
> > we get to where we want to go.
> >
> > > This is obviously a problem and not one we can easily fix. We would
> > > need a new cap for user space to signal "I know that I can control
> > > bpc, subsampling and compression to lower the bandwidth and light up
> > > modes which otherwise fail". That cap would also remove all the
> > > properties which require kernel magic to work (that's also what I
> > > proposed for my KMS color pipeline API).
> > >
> > > We all want to expose more of the scanout capability and give user
> > > space more control but I don't think an incremental approach works
> > > here and we would all do better if we accept that the current API
> > > requires kernel magic to work and has a few implicit assumptions baked
> > > in.
> > >
> > > With all that being said, I think the right decision here is to
> > >
> > > 1. Ignore subsampling for now
> > > 2. Let the kernel select YCC or RGB on the cable
> > > 3. Let the kernel figure out the conversion between RGB and YCC based
> > > on the color space selected
> > > 4. Let the kernel send the correct infoframe based on the selected
> > > color space and cable encoding
> > > 5. Only expose color spaces for which the kernel can do the conversion
> > > and send the infoframe
> >
> > I agree. We don't want to break or change existing behavior (that is
> > used by userspace) and this will get us far without breaking things.
> >
> > > 6. Work on the new API which is hidden behind a cap
> > >
> >
> > I assume you mean something like
> > https://gitlab.freedesktop.org/pq/color-and-hdr/-/issues/11
> 
> Something like that, yes. The main point being a cap which removes a
> lot of properties and sets new expecta

[PATCH] drm/amd/display: only warn once in dce110_edp_wait_for_hpd_ready()

2023-02-14 Thread Hamza Mahfooz
Since, hot plugging eDP displays isn't supported, it is sufficient for
us to warn about the lack of a connected display once. So, use ASSERT()
in dce110_edp_wait_for_hpd_ready() instead of DC_LOG_WARNING().

Signed-off-by: Hamza Mahfooz 
---
 drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c 
b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index fb3fd5b7c78b..0d4d3d586166 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -779,10 +779,8 @@ void dce110_edp_wait_for_hpd_ready(
 
dal_gpio_destroy_irq(&hpd);
 
-   if (false == edp_hpd_high) {
-   DC_LOG_WARNING(
-   "%s: wait timed out!\n", __func__);
-   }
+   /* ensure that the panel is detected */
+   ASSERT(edp_hpd_high);
 }
 
 void dce110_edp_power_control(
-- 
2.39.1



Re: [PATCH 3/3] drm/connector: Deprecate split for BT.2020 in drm_colorspace enum

2023-02-14 Thread Sebastian Wick
On Tue, Feb 14, 2023 at 9:10 PM Ville Syrjälä
 wrote:
>
> On Tue, Feb 14, 2023 at 08:45:00PM +0100, Sebastian Wick wrote:
> > On Tue, Feb 14, 2023 at 5:57 PM Harry Wentland  
> > wrote:
> > >
> > >
> > >
> > > On 2/14/23 10:49, Sebastian Wick wrote:
> > > > On Fri, Feb 3, 2023 at 5:00 PM Ville Syrjälä
> > > >  wrote:
> > > >>
> > > >> On Fri, Feb 03, 2023 at 10:24:52AM -0500, Harry Wentland wrote:
> > > >>>
> > > >>>
> > > >>> On 2/3/23 10:19, Ville Syrjälä wrote:
> > >  On Fri, Feb 03, 2023 at 09:39:42AM -0500, Harry Wentland wrote:
> > > >
> > > >
> > > > On 2/3/23 07:59, Sebastian Wick wrote:
> > > >> On Fri, Feb 3, 2023 at 11:40 AM Ville Syrjälä
> > > >>  wrote:
> > > >>>
> > > >>> On Fri, Feb 03, 2023 at 02:07:44AM +, Joshua Ashton wrote:
> > >  Userspace has no way of controlling or knowing the pixel encoding
> > >  currently, so there is no way for it to ever get the right 
> > >  values here.
> > > >>>
> > > >>> That applies to a lot of the other values as well (they are
> > > >>> explicitly RGB or YCC). The idea was that this property sets the
> > > >>> infoframe/MSA/SDP value exactly, and other properties should be
> > > >>> added to for use userspace to control the pixel 
> > > >>> encoding/colorspace
> > > >>> conversion(if desired, or userspace just makes sure to
> > > >>> directly feed in correct kind of data).
> > > >>
> > > >> I'm all for getting userspace control over pixel encoding but even
> > > >> then the kernel always knows which pixel encoding is selected and
> > > >> which InfoFrame has to be sent. Is there a reason why userspace 
> > > >> would
> > > >> want to control the variant explicitly to the wrong value?
> > > >>
> > > >
> > > > I've asked this before but haven't seen an answer: Is there an 
> > > > existing
> > > > upstream userspace project that makes use of this property (other 
> > > > than
> > > > what Joshua is working on in gamescope right now)? That would help 
> > > > us
> > > > understand the intent better.
> > > 
> > >  The intent was to control the infoframe colorimetry bits,
> > >  nothing more. No idea what real userspace there was, if any.
> > > 
> > > >
> > > > I don't think giving userspace explicit control over the exact 
> > > > infoframe
> > > > values is the right thing to do.
> > > 
> > >  Only userspace knows what kind of data it's stuffing into
> > >  the pixels (and/or how it configures the csc units/etc.) to
> > >  generate them.
> > > 
> > > >>>
> > > >>> Yes, but userspace doesn't control or know whether we drive
> > > >>> RGB or YCbCr on the wire. In fact, in some cases our driver
> > > >>> needs to fallback to YCbCr420 for bandwidth reasons. There
> > > >>> is currently no way for userspace to know that and I don't
> > > >>> think it makes sense.
> > > >>
> > > >> People want that control as well for whatever reason. We've
> > > >> been asked to allow YCbCr 4:4:4 output many times.
> > > >
> > > > I don't really think it's a question of if we want it but rather how
> > > > we get there. Harry is completely right that if we would make the
> > > > subsampling controllable by user space instead of the kernel handling
> > > > it magically, user space which does not adapt to the new control won't
> > > > be able to light up some modes which worked before.
> > > >
> > >
> > > Thanks for continuing this discussion and touching on the model of how
> > > we get to where we want to go.
> > >
> > > > This is obviously a problem and not one we can easily fix. We would
> > > > need a new cap for user space to signal "I know that I can control
> > > > bpc, subsampling and compression to lower the bandwidth and light up
> > > > modes which otherwise fail". That cap would also remove all the
> > > > properties which require kernel magic to work (that's also what I
> > > > proposed for my KMS color pipeline API).
> > > >
> > > > We all want to expose more of the scanout capability and give user
> > > > space more control but I don't think an incremental approach works
> > > > here and we would all do better if we accept that the current API
> > > > requires kernel magic to work and has a few implicit assumptions baked
> > > > in.
> > > >
> > > > With all that being said, I think the right decision here is to
> > > >
> > > > 1. Ignore subsampling for now
> > > > 2. Let the kernel select YCC or RGB on the cable
> > > > 3. Let the kernel figure out the conversion between RGB and YCC based
> > > > on the color space selected
> > > > 4. Let the kernel send the correct infoframe based on the selected
> > > > color space and cable encoding
> > > > 5. Only expose color spaces for which the kernel can do the conversion
> > > > and send the infoframe
> > >
> > > I agree. We don't want to break or change existing behavior (that is
> > > used by userspace) and this will get

Re: [PATCH 3/3] drm/connector: Deprecate split for BT.2020 in drm_colorspace enum

2023-02-14 Thread Ville Syrjälä
On Tue, Feb 14, 2023 at 10:18:49PM +0100, Sebastian Wick wrote:
> On Tue, Feb 14, 2023 at 9:10 PM Ville Syrjälä
>  wrote:
> >
> > On Tue, Feb 14, 2023 at 08:45:00PM +0100, Sebastian Wick wrote:
> > > On Tue, Feb 14, 2023 at 5:57 PM Harry Wentland  
> > > wrote:
> > > >
> > > >
> > > >
> > > > On 2/14/23 10:49, Sebastian Wick wrote:
> > > > > On Fri, Feb 3, 2023 at 5:00 PM Ville Syrjälä
> > > > >  wrote:
> > > > >>
> > > > >> On Fri, Feb 03, 2023 at 10:24:52AM -0500, Harry Wentland wrote:
> > > > >>>
> > > > >>>
> > > > >>> On 2/3/23 10:19, Ville Syrjälä wrote:
> > > >  On Fri, Feb 03, 2023 at 09:39:42AM -0500, Harry Wentland wrote:
> > > > >
> > > > >
> > > > > On 2/3/23 07:59, Sebastian Wick wrote:
> > > > >> On Fri, Feb 3, 2023 at 11:40 AM Ville Syrjälä
> > > > >>  wrote:
> > > > >>>
> > > > >>> On Fri, Feb 03, 2023 at 02:07:44AM +, Joshua Ashton wrote:
> > > >  Userspace has no way of controlling or knowing the pixel 
> > > >  encoding
> > > >  currently, so there is no way for it to ever get the right 
> > > >  values here.
> > > > >>>
> > > > >>> That applies to a lot of the other values as well (they are
> > > > >>> explicitly RGB or YCC). The idea was that this property sets the
> > > > >>> infoframe/MSA/SDP value exactly, and other properties should be
> > > > >>> added to for use userspace to control the pixel 
> > > > >>> encoding/colorspace
> > > > >>> conversion(if desired, or userspace just makes sure to
> > > > >>> directly feed in correct kind of data).
> > > > >>
> > > > >> I'm all for getting userspace control over pixel encoding but 
> > > > >> even
> > > > >> then the kernel always knows which pixel encoding is selected and
> > > > >> which InfoFrame has to be sent. Is there a reason why userspace 
> > > > >> would
> > > > >> want to control the variant explicitly to the wrong value?
> > > > >>
> > > > >
> > > > > I've asked this before but haven't seen an answer: Is there an 
> > > > > existing
> > > > > upstream userspace project that makes use of this property (other 
> > > > > than
> > > > > what Joshua is working on in gamescope right now)? That would 
> > > > > help us
> > > > > understand the intent better.
> > > > 
> > > >  The intent was to control the infoframe colorimetry bits,
> > > >  nothing more. No idea what real userspace there was, if any.
> > > > 
> > > > >
> > > > > I don't think giving userspace explicit control over the exact 
> > > > > infoframe
> > > > > values is the right thing to do.
> > > > 
> > > >  Only userspace knows what kind of data it's stuffing into
> > > >  the pixels (and/or how it configures the csc units/etc.) to
> > > >  generate them.
> > > > 
> > > > >>>
> > > > >>> Yes, but userspace doesn't control or know whether we drive
> > > > >>> RGB or YCbCr on the wire. In fact, in some cases our driver
> > > > >>> needs to fallback to YCbCr420 for bandwidth reasons. There
> > > > >>> is currently no way for userspace to know that and I don't
> > > > >>> think it makes sense.
> > > > >>
> > > > >> People want that control as well for whatever reason. We've
> > > > >> been asked to allow YCbCr 4:4:4 output many times.
> > > > >
> > > > > I don't really think it's a question of if we want it but rather how
> > > > > we get there. Harry is completely right that if we would make the
> > > > > subsampling controllable by user space instead of the kernel handling
> > > > > it magically, user space which does not adapt to the new control won't
> > > > > be able to light up some modes which worked before.
> > > > >
> > > >
> > > > Thanks for continuing this discussion and touching on the model of how
> > > > we get to where we want to go.
> > > >
> > > > > This is obviously a problem and not one we can easily fix. We would
> > > > > need a new cap for user space to signal "I know that I can control
> > > > > bpc, subsampling and compression to lower the bandwidth and light up
> > > > > modes which otherwise fail". That cap would also remove all the
> > > > > properties which require kernel magic to work (that's also what I
> > > > > proposed for my KMS color pipeline API).
> > > > >
> > > > > We all want to expose more of the scanout capability and give user
> > > > > space more control but I don't think an incremental approach works
> > > > > here and we would all do better if we accept that the current API
> > > > > requires kernel magic to work and has a few implicit assumptions baked
> > > > > in.
> > > > >
> > > > > With all that being said, I think the right decision here is to
> > > > >
> > > > > 1. Ignore subsampling for now
> > > > > 2. Let the kernel select YCC or RGB on the cable
> > > > > 3. Let the kernel figure out the conversion between RGB and YCC based
> > > > > on the color space selected
> > > > > 4. Let the kernel send the correc

RE: [PATCH 1/2] drm/amdgpu: optimize VRAM allocation when using drm buddy

2023-02-14 Thread Xiao, Shane

For public review

> -Original Message-
> From: Koenig, Christian 
> Sent: Wednesday, February 15, 2023 3:02 AM
> To: Xiao, Shane ; Paneer Selvam, Arunpravin
> 
> Subject: Re: [PATCH 1/2] drm/amdgpu: optimize VRAM allocation when using
> drm buddy
> 
> Am 14.02.23 um 15:53 schrieb Xiao, Shane:
> >> -Original Message-
> >> From: Koenig, Christian 
> >> Sent: Tuesday, February 14, 2023 8:41 PM
> >> To: Xiao, Shane ; brahma_sw_dev
> >> 
> >> Cc: Paneer Selvam, Arunpravin 
> >> Subject: Re: [PATCH 1/2] drm/amdgpu: optimize VRAM allocation when
> >> using drm buddy
> >>
> >> Am 14.02.23 um 12:18 schrieb Shane Xiao:
> >>> Since the VRAM manager changed from drm mm to drm buddy. It's not
> >>> necessary to allocate 2MB aligned VRAM for more than 2MB unaligned
> >>> size, and then do trim. This method improves the allocation
> >>> efficiency and reduces memory fragmentation.
> >> Well that is a trade off.
> >>
> >> Allocating the BO as one contiguous chunk and then trimming is
> >> beneficial because if we then later need it contiguous we don't need
> >> to re-allocate and copy. This can be needed to display something for
> example.

Hi Christian,

This case means that you allocate BO that is unnecessary to be continuous at 
first time,
and latter the BO should be continuous. I'm not familiar with display. Could 
you give me 
a few more specific examples ?
   

> > Yes, I agree that one contiguous chunk may get beneficial sometimes.
> > But as far as I know, you cannot guarantee that amdgpu_vram_mgr_new
> can get one contiguous chunk  if you don't set TTM_PL_FLAG_CONTIGUOUS
> flags.
> > For example, if you want to allocate 4M+4K BO, it will allocate one 4M block
> + one 2M block which is unnecessary to be continuous, then 2M block will be
> trimmed.
> 
> Oh, that's indeed not something which should happen. Sounds more like a
> bug fix then.

Yes, I think this case should not be happened. 
Actually, I'm not sure that why the allocated BO should be aligned with 
pages_per_block, which is set to 2MB by default.
Does this help improve performance when allocating 2M or above BO?
From my point of view, the TLB may be one of reason of this. But I'm not sure 
about this.

Best Regards,
Shane 

> 
> >
> >> On the other hand I completely agree allocating big and then trimming
> >> creates more fragmentation than necessary.
> >>
> >> Do you have some test case which can show the difference?
> > I have use rocrtst to show the difference.
> > The attachment is shown that after applying this patch, the order < 9 total
> vram size decrease from 99MB to 43MB.
> > And the latter has more higher order block memory.
> 
> Arun can you take a look? That problem here sounds important.
> 
> Thanks,
> Christian.
> 
> >
> >> BTW: No need to discuss that on the internal mailing list, please use
> >> the public one instead.
> >>
> > I will send it to public. Thank you for your remind.
> >
> > Best Regards,
> > Shane
> >
> >> Regards,
> >> Christian.
> >>
> >>> Signed-off-by: Shane Xiao 
> >>> ---
> >>>drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 2 +-
> >>>1 file changed, 1 insertion(+), 1 deletion(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> >>> index 75c80c557b6e..3fea58f9427c 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> >>> @@ -453,7 +453,7 @@ static int amdgpu_vram_mgr_new(struct
> >> ttm_resource_manager *man,
> >>>   /* Limit maximum size to 2GiB due to SG table 
> >>> limitations */
> >>>   size = min(remaining_size, 2ULL << 30);
> >>>
> >>> - if (size >= (u64)pages_per_block << PAGE_SHIFT)
> >>> + if (!(size % ((u64)pages_per_block << PAGE_SHIFT)))
> >>>   min_block_size = (u64)pages_per_block <<
> >> PAGE_SHIFT;
> >>>   cur_size = size;



Re: [PATCH] drm/amd/display: Remove duplicate/repeating expression

2023-02-14 Thread Alex Deucher
Applied.  Thanks!

On Fri, Feb 10, 2023 at 10:22 AM Harry Wentland  wrote:
>
> On 2/10/23 05:11, Deepak R Varma wrote:
> > Remove duplicate or repeating expressions in the if condition
> > evaluation. Issue identified using doubletest.cocci Coccinelle semantic
> > patch.
> >
> > Signed-off-by: Deepak R Varma 
>
> Reviewed-by: Harry Wentland 
>
> Harry
>
> > ---
> >  .../gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git 
> > a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c 
> > b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c
> > index 61ee9ba063a7..6576b897a512 100644
> > --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c
> > +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c
> > @@ -51,7 +51,7 @@ static bool CalculateBytePerPixelAnd256BBlockSizes(
> >   *BytePerPixelDETC = 0;
> >   *BytePerPixelY = 4;
> >   *BytePerPixelC = 0;
> > - } else if (SourcePixelFormat == dm_444_16 || SourcePixelFormat == 
> > dm_444_16) {
> > + } else if (SourcePixelFormat == dm_444_16) {
> >   *BytePerPixelDETY = 2;
> >   *BytePerPixelDETC = 0;
> >   *BytePerPixelY = 2;
>


Re: [PATCH] drm/amd/display: Remove duplicate/repeating expressions

2023-02-14 Thread Alex Deucher
Applied.  Thanks!

Alex

On Fri, Feb 10, 2023 at 2:37 PM Harry Wentland  wrote:
>
> On 2/10/23 05:00, Deepak R Varma wrote:
> > Remove duplicate or repeating expressions in the if condition
> > evaluation. Issue identified using doubletest.cocci Coccinelle semantic
> > patch.
> >
> > Signed-off-by: Deepak R Varma 
>
> Reviewed-by: Harry Wentland 
>
> Harry
>
> > ---
> >  .../gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c| 4 +---
> >  1 file changed, 1 insertion(+), 3 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c 
> > b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
> > index 4b8f5fa0f0ad..ae89760d887d 100644
> > --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
> > +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
> > @@ -2335,8 +2335,7 @@ void 
> > dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
> >
> >   if (mode_lib->vba.DSCEnable[k] && 
> > mode_lib->vba.ForcedOutputLinkBPP[k] != 0)
> >   mode_lib->vba.DSCOnlyIfNecessaryWithBPP = 
> > true;
> > - if ((mode_lib->vba.DSCEnable[k] || 
> > mode_lib->vba.DSCEnable[k])
> > - && mode_lib->vba.OutputFormat[k] == 
> > dm_n422
> > + if (mode_lib->vba.DSCEnable[k] && 
> > mode_lib->vba.OutputFormat[k] == dm_n422
> >   && !mode_lib->vba.DSC422NativeSupport)
> >   mode_lib->vba.DSC422NativeNotSupported = true;
> >
> > @@ -3639,7 +3638,6 @@ void 
> > dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
> >   if (mode_lib->vba.SourcePixelFormat[k] != dm_444_64
> >   && mode_lib->vba.SourcePixelFormat[k] 
> > != dm_444_32
> >   && mode_lib->vba.SourcePixelFormat[k] 
> > != dm_444_16
> > - && mode_lib->vba.SourcePixelFormat[k] 
> > != dm_444_16
> >   && mode_lib->vba.SourcePixelFormat[k] 
> > != dm_444_8
> >   && mode_lib->vba.SourcePixelFormat[k] 
> > != dm_rgbe) {
> >   if (mode_lib->vba.ViewportWidthChroma[k] > 
> > mode_lib->vba.SurfaceWidthC[k]
>


Re: [PATCH] drm/amd/display: avoid unaligned access warnings

2023-02-14 Thread Alex Deucher
Applied.  Thanks!

On Tue, Feb 14, 2023 at 1:56 AM Jonathan Gray  wrote:
>
> When building on OpenBSD/arm64 with clang 15, unaligned access
> warnings are seen when a union is embedded inside a packed struct.
>
> drm/amd/display/dmub/inc/dmub_cmd.h:941:18: error: field
>   cursor_copy_src within 'struct dmub_rb_cmd_mall' is less aligned than
>   'union dmub_addr' and is usually due to 'struct dmub_rb_cmd_mall'
>   being packed, which can lead to unaligned accesses
>   [-Werror,-Wunaligned-access]
> union dmub_addr cursor_copy_src; /**< Cursor copy address */
> ^
> drm/amd/display/dmub/inc/dmub_cmd.h:942:18: error: field cursor_copy_dst
>   within 'struct dmub_rb_cmd_mall' is less aligned than
>   'union dmub_addr' and is usually due to 'struct dmub_rb_cmd_mall'
>   being packed, which can lead to unaligned accesses
>   [-Werror,-Wunaligned-access]
> union dmub_addr cursor_copy_dst; /**< Cursor copy destination */
> ^
>
> Add pragma pack around dmub_addr to avoid this.
>
> Signed-off-by: Jonathan Gray 
> ---
>  drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h 
> b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
> index 33907feefebb..dc92d06572a3 100644
> --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
> +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
> @@ -162,6 +162,7 @@ extern "C" {
>  #define dmub_udelay(microseconds) udelay(microseconds)
>  #endif
>
> +#pragma pack(push, 1)
>  /**
>   * union dmub_addr - DMUB physical/virtual 64-bit address.
>   */
> @@ -172,6 +173,7 @@ union dmub_addr {
> } u; /*<< Low/high bit access */
> uint64_t quad_part; /*<< 64 bit address */
>  };
> +#pragma pack(pop)
>
>  /**
>   * Dirty rect definition.
> --
> 2.39.1
>


Re: [PATCH] drm/amd/pm: avoid unaligned access warnings

2023-02-14 Thread Alex Deucher
Applied.  Thanks!

Alex

On Tue, Feb 14, 2023 at 1:58 AM Jonathan Gray  wrote:
>
> When building on OpenBSD/arm64 with clang 15, unaligned access
> warnings are seen when a union is embedded inside a packed struct.
>
> drm/amd/pm/powerplay/hwmgr/vega20_pptable.h:136:17: error: field
>   smcPPTable within 'struct _ATOM_VEGA20_POWERPLAYTABLE' is less aligned
>   than 'PPTable_t' and is usually due to
>   'struct _ATOM_VEGA20_POWERPLAYTABLE' being packed, which can lead to
>unaligned accesses [-Werror,-Wunaligned-access]
>   PPTable_t smcPPTable;
> ^
>
> Make PPTable_t packed to avoid this.
>
> Signed-off-by: Jonathan Gray 
> ---
>  drivers/gpu/drm/amd/pm/powerplay/inc/smu11_driver_if.h  | 2 ++
>  drivers/gpu/drm/amd/pm/powerplay/inc/smu9_driver_if.h   | 2 ++
>  drivers/gpu/drm/amd/pm/powerplay/inc/vega12/smu9_driver_if.h| 2 ++
>  .../gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_arcturus.h | 2 ++
>  .../gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_navi10.h   | 2 ++
>  .../amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_sienna_cichlid.h   | 2 ++
>  .../drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_aldebaran.h| 2 ++
>  .../gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h  | 2 ++
>  .../gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h  | 2 ++
>  9 files changed, 18 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/pm/powerplay/inc/smu11_driver_if.h 
> b/drivers/gpu/drm/amd/pm/powerplay/inc/smu11_driver_if.h
> index fdc6b7a57bc9..c2efc70ef288 100644
> --- a/drivers/gpu/drm/amd/pm/powerplay/inc/smu11_driver_if.h
> +++ b/drivers/gpu/drm/amd/pm/powerplay/inc/smu11_driver_if.h
> @@ -358,6 +358,7 @@ typedef struct {
>QuadraticInt_t SsCurve;
>  } DpmDescriptor_t;
>
> +#pragma pack(push, 1)
>  typedef struct {
>uint32_t Version;
>
> @@ -609,6 +610,7 @@ typedef struct {
>uint32_t MmHubPadding[8];
>
>  } PPTable_t;
> +#pragma pack(pop)
>
>  typedef struct {
>
> diff --git a/drivers/gpu/drm/amd/pm/powerplay/inc/smu9_driver_if.h 
> b/drivers/gpu/drm/amd/pm/powerplay/inc/smu9_driver_if.h
> index 2818c98ff5ca..faae4b918d90 100644
> --- a/drivers/gpu/drm/amd/pm/powerplay/inc/smu9_driver_if.h
> +++ b/drivers/gpu/drm/amd/pm/powerplay/inc/smu9_driver_if.h
> @@ -122,6 +122,7 @@ typedef struct {
>uint16_t Vid;  /* min voltage in SVI2 VID */
>  } DisplayClockTable_t;
>
> +#pragma pack(push, 1)
>  typedef struct {
>/* PowerTune */
>uint16_t SocketPowerLimit; /* Watts */
> @@ -323,6 +324,7 @@ typedef struct {
>uint32_t MmHubPadding[3]; /* SMU internal use */
>
>  } PPTable_t;
> +#pragma pack(pop)
>
>  typedef struct {
>uint16_t MinClock; // This is either DCEFCLK or SOCCLK (in MHz)
> diff --git a/drivers/gpu/drm/amd/pm/powerplay/inc/vega12/smu9_driver_if.h 
> b/drivers/gpu/drm/amd/pm/powerplay/inc/vega12/smu9_driver_if.h
> index b6ffd08784e7..6456bea5d2d5 100644
> --- a/drivers/gpu/drm/amd/pm/powerplay/inc/vega12/smu9_driver_if.h
> +++ b/drivers/gpu/drm/amd/pm/powerplay/inc/vega12/smu9_driver_if.h
> @@ -245,6 +245,7 @@ typedef struct {
>QuadraticInt_t SsCurve;
>  } DpmDescriptor_t;
>
> +#pragma pack(push, 1)
>  typedef struct {
>uint32_t Version;
>
> @@ -508,6 +509,7 @@ typedef struct {
>uint32_t MmHubPadding[7];
>
>  } PPTable_t;
> +#pragma pack(pop)
>
>  typedef struct {
>
> diff --git 
> a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_arcturus.h 
> b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_arcturus.h
> index 43d43d6addc0..d518dee18e1b 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_arcturus.h
> +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_arcturus.h
> @@ -464,6 +464,7 @@ typedef struct {
>uint16_t   Padding16;
>  } DpmDescriptor_t;
>
> +#pragma pack(push, 1)
>  typedef struct {
>uint32_t Version;
>
> @@ -733,6 +734,7 @@ typedef struct {
>uint32_t MmHubPadding[8]; // SMU internal use
>
>  } PPTable_t;
> +#pragma pack(pop)
>
>  typedef struct {
>// Time constant parameters for clock averages in ms
> diff --git 
> a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_navi10.h 
> b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_navi10.h
> index 04752ade1016..c5c1943fb6a1 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_navi10.h
> +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_navi10.h
> @@ -515,6 +515,7 @@ typedef struct {
>uint32_t BoardLevelEnergyAccumulator;
>  } OutOfBandMonitor_t;
>
> +#pragma pack(push, 1)
>  typedef struct {
>uint32_t Version;
>
> @@ -814,6 +815,7 @@ typedef struct {
>uint32_t MmHubPadding[8]; // SMU internal use
>
>  } PPTable_t;
> +#pragma pack(pop)
>
>  typedef struct {
>// Time constant parameters for clock averages in ms
> diff --git 
> a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_sienna_cichlid.h 
> b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_sienna_cichlid.h
> index 351a4af429b3..aa6d29de4

[PATCH] drm/amdgpu: remove TOPDOWN flags when allocating VRAM in large bar system

2023-02-14 Thread Shane Xiao
Since VRAM manager is changed from drm mm to drm buddy, the
TOP_DOWN flag should not be set by default in the large bar system.
Removing this flag helps improve drm buddy allactor efficiency and
reduce the risk of splitting higher order block into lower order.

Signed-off-by: Shane Xiao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 2d237f3d3a2e..1c3e647400bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -139,7 +139,7 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, 
u32 domain)
 
if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
places[c].lpfn = visible_pfn;
-   else
+   else if (adev->gmc.real_vram_size != 
adev->gmc.visible_vram_size)
places[c].flags |= TTM_PL_FLAG_TOPDOWN;
 
if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
-- 
2.25.1



Re: [PATCH v2 5/8] drm/amdgpu: accommodate DOMAIN/PL_DOORBELL

2023-02-14 Thread Christian König

Am 14.02.23 um 20:24 schrieb Shashank Sharma:


On 14/02/2023 19:31, Christian König wrote:

Am 14.02.23 um 17:15 schrieb Shashank Sharma:

From: Alex Deucher 

This patch adds changes:
- to accommodate the new GEM domain DOORBELL
- to accommodate the new TTM PL DOORBELL

to manage doorbell allocations as GEM Objects.

V2: Addressed reviwe comments from Christian
 - drop the doorbell changes for pinning/unpinning
 - drop the doorbell changes for dma-buf map
 - drop the doorbell changes for sgt
 - no need to handle TTM_PL_FLAG_CONTIGUOUS for doorbell

Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 11 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 11 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h    |  1 +
  3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c

index b48c9fd60c43..ff9979fecfd2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -147,6 +147,14 @@ void amdgpu_bo_placement_from_domain(struct 
amdgpu_bo *abo, u32 domain)

  c++;
  }
  +    if (domain & AMDGPU_GEM_DOMAIN_DOORBELL) {
+    places[c].fpfn = 0;
+    places[c].lpfn = 0;
+    places[c].mem_type = AMDGPU_PL_DOORBELL;
+    places[c].flags = 0;
+    c++;
+    }
+


Mhm, do we need to increase AMDGPU_BO_MAX_PLACEMENTS?

I think the answer is *no* since mixing DOORBELL with CPU, GTT or 
VRAM placement doesn't make sense, but do we enforce that somewhere?

I am not sure why do we need that ?


Userspace could otherwise specify DOORBEEL|CPU|GTT|VRAM as placement 
which would overrun the array and be illegal.





  if (domain & AMDGPU_GEM_DOMAIN_GTT) {
  places[c].fpfn = 0;
  places[c].lpfn = 0;
@@ -466,7 +474,7 @@ static bool amdgpu_bo_validate_size(struct 
amdgpu_device *adev,

  goto fail;
  }
  -    /* TODO add more domains checks, such as 
AMDGPU_GEM_DOMAIN_CPU */
+    /* TODO add more domains checks, such as 
AMDGPU_GEM_DOMAIN_CPU,  AMDGPU_GEM_DOMAIN_DOORBELL */


Should we enforce that user space can only allocate 1 page doorbells?


Should we add a per-PID basis check ?


No, just a check that the allocation size of the doorbell BOs is just 1 
page.


Christian.



- Shashank


  return true;
    fail:
@@ -1014,6 +1022,7 @@ void amdgpu_bo_unpin(struct amdgpu_bo *bo)
  } else if (bo->tbo.resource->mem_type == TTM_PL_TT) {
  atomic64_sub(amdgpu_bo_size(bo), &adev->gart_pin_size);
  }
+


Unrelated change.


Noted

- Shashank



Regards,
Christian.


  }
    static const char *amdgpu_vram_names[] = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

index 0e8f580769ab..e9dc24191fc8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -128,6 +128,7 @@ static void amdgpu_evict_flags(struct 
ttm_buffer_object *bo,

  case AMDGPU_PL_GDS:
  case AMDGPU_PL_GWS:
  case AMDGPU_PL_OA:
+    case AMDGPU_PL_DOORBELL:
  placement->num_placement = 0;
  placement->num_busy_placement = 0;
  return;
@@ -500,9 +501,11 @@ static int amdgpu_bo_move(struct 
ttm_buffer_object *bo, bool evict,

  if (old_mem->mem_type == AMDGPU_PL_GDS ||
  old_mem->mem_type == AMDGPU_PL_GWS ||
  old_mem->mem_type == AMDGPU_PL_OA ||
+    old_mem->mem_type == AMDGPU_PL_DOORBELL ||
  new_mem->mem_type == AMDGPU_PL_GDS ||
  new_mem->mem_type == AMDGPU_PL_GWS ||
-    new_mem->mem_type == AMDGPU_PL_OA) {
+    new_mem->mem_type == AMDGPU_PL_OA ||
+    new_mem->mem_type == AMDGPU_PL_DOORBELL) {
  /* Nothing to save here */
  ttm_bo_move_null(bo, new_mem);
  goto out;
@@ -586,6 +589,11 @@ static int amdgpu_ttm_io_mem_reserve(struct 
ttm_device *bdev,

  mem->bus.offset += adev->gmc.vram_aper_base;
  mem->bus.is_iomem = true;
  break;
+    case AMDGPU_PL_DOORBELL:
+    mem->bus.offset = mem->start << PAGE_SHIFT;
+    mem->bus.offset += adev->doorbell.doorbell_aper_base;
+    mem->bus.is_iomem = true;
+    break;
  default:
  return -EINVAL;
  }
@@ -1267,6 +1275,7 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt 
*ttm, struct ttm_resource *mem)

  flags |= AMDGPU_PTE_VALID;
    if (mem && (mem->mem_type == TTM_PL_TT ||
+    mem->mem_type == AMDGPU_PL_DOORBELL ||
  mem->mem_type == AMDGPU_PL_PREEMPT)) {
  flags |= AMDGPU_PTE_SYSTEM;
  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h

index 967b265dbfa1..9cf5d8419965 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -33,6 +33,7 @@
  #define AMDGPU_PL_GWS    (TTM_PL_PRIV + 1)
  #define AMDGPU_PL_OA    

Re: [PATCH v2 8/8] drm/amdgpu: start using kernel doorbell bo

2023-02-14 Thread Christian König

Am 14.02.23 um 20:28 schrieb Shashank Sharma:


On 14/02/2023 19:40, Christian König wrote:

Am 14.02.23 um 17:15 schrieb Shashank Sharma:

From: Shashank Sharma 

This patch does the following:

- Adds new variables like mman.doorbell_bo_size/gpu_addr/cpu_addr.
   The cpu_addr ptr will be used now for doorbell read/write from
   doorbell BAR.
- Adjusts the existing code to use kernel doorbell BO's size and its
   cpu_address.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 


Maybe squash this one together with the previous patch.


I just split it from the last patch in this series, thought it was too 
scattered and might not be


easy to review :D


Yeah, ok good point as well :D

Christian.






But see below.


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c   |  5 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c   | 33 
+---

  drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h |  1 -
  3 files changed, 16 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

index 0493c64e9d0a..87f486f522ae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -109,11 +109,10 @@ static void 
amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,

  *aperture_base = adev->doorbell.doorbell_aper_base;
  *aperture_size = 0;
  *start_offset = 0;
-    } else if (adev->doorbell.doorbell_aper_size > 
adev->doorbell.num_doorbells *

-    sizeof(u32)) {
+    } else if (adev->doorbell.doorbell_aper_size > 
adev->mman.doorbell_kernel_bo_size) {

  *aperture_base = adev->doorbell.doorbell_aper_base;
  *aperture_size = adev->doorbell.doorbell_aper_size;
-    *start_offset = adev->doorbell.num_doorbells * sizeof(u32);
+    *start_offset = adev->mman.doorbell_kernel_bo_size;
  } else {
  *aperture_base = 0;
  *aperture_size = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 43c1b67c2778..fde199434579 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -596,8 +596,8 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device 
*adev, u32 index)

  if (amdgpu_device_skip_hw_access(adev))
  return 0;
  -    if (index < adev->doorbell.num_doorbells) {
-    return readl(adev->mman.doorbell_aper_base_kaddr + index);
+    if (index < adev->mman.num_kernel_doorbells) {
+    return readl(adev->mman.doorbell_cpu_addr + index);
  } else {
  DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", 
index);

  return 0;
@@ -619,8 +619,8 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device 
*adev, u32 index, u32 v)

  if (amdgpu_device_skip_hw_access(adev))
  return;
  -    if (index < adev->doorbell.num_doorbells) {
-    writel(v, adev->mman.doorbell_aper_base_kaddr + index);
+    if (index < adev->mman.num_kernel_doorbells) {
+    writel(v, adev->mman.doorbell_cpu_addr + index);
  } else {
  DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", 
index);

  }
@@ -640,8 +640,8 @@ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device 
*adev, u32 index)

  if (amdgpu_device_skip_hw_access(adev))
  return 0;
  -    if (index < adev->doorbell.num_doorbells) {
-    return atomic64_read((atomic64_t 
*)(adev->mman.doorbell_aper_base_kaddr + index));

+    if (index < adev->mman.num_kernel_doorbells) {
+    return atomic64_read((atomic64_t 
*)(adev->mman.doorbell_cpu_addr + index));

  } else {
  DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", 
index);

  return 0;
@@ -663,8 +663,8 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device 
*adev, u32 index, u64 v)

  if (amdgpu_device_skip_hw_access(adev))
  return;
  -    if (index < adev->doorbell.num_doorbells) {
-    atomic64_set((atomic64_t 
*)(adev->mman.doorbell_aper_base_kaddr + index), v);

+    if (index < adev->mman.num_kernel_doorbells) {
+    atomic64_set((atomic64_t *)(adev->mman.doorbell_cpu_addr + 
index), v);

  } else {
  DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", 
index);

  }
@@ -1037,7 +1037,7 @@ static int amdgpu_device_doorbell_init(struct 
amdgpu_device *adev)

  if (adev->asic_type < CHIP_BONAIRE) {
  adev->doorbell.doorbell_aper_base = 0;
  adev->doorbell.doorbell_aper_size = 0;
-    adev->doorbell.num_doorbells = 0;
+    adev->mman.num_kernel_doorbells = 0;
  adev->mman.doorbell_aper_base_kaddr = NULL;
  return 0;
  }
@@ -1052,13 +1052,13 @@ static int 
amdgpu_device_doorbell_init(struct amdgpu_device *adev)
  adev->doorbell.doorbell_aper_size = 
pci_resource_len(adev->pdev, 2);

    if (adev->enable_mes) {
-    adev->doorbell.num_doorbells =
+    adev->mman.num_kern

[PATCH] drm/amd/pm: re-enable ac/dc on smu_v13_0_0/10

2023-02-14 Thread Kenneth Feng
re-enable ac/dc on smu_v13_0_0/10

Signed-off-by: Kenneth Feng 
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 7c906ab3ddd2..923a9fb3c887 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -147,6 +147,7 @@ static struct cmn2asic_msg_mapping 
smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] =
PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel,  
 0),
MSG_MAP(AllowGpo,   PPSMC_MSG_SetGpoAllow,  
 0),
MSG_MAP(AllowIHHostInterrupt,   PPSMC_MSG_AllowIHHostInterrupt, 
  0),
+   MSG_MAP(ReenableAcDcInterrupt,  
PPSMC_MSG_ReenableAcDcInterrupt,   0),
 };
 
 static struct cmn2asic_mapping smu_v13_0_0_clk_map[SMU_CLK_COUNT] = {
-- 
2.25.1



Re: [PATCH] drm/amdgpu: remove TOPDOWN flags when allocating VRAM in large bar system

2023-02-14 Thread Christian König

Am 15.02.23 um 06:25 schrieb Shane Xiao:

Since VRAM manager is changed from drm mm to drm buddy, the
TOP_DOWN flag should not be set by default in the large bar system.
Removing this flag helps improve drm buddy allactor efficiency and
reduce the risk of splitting higher order block into lower order.

Signed-off-by: Shane Xiao 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 2d237f3d3a2e..1c3e647400bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -139,7 +139,7 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, 
u32 domain)
  
  		if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)

places[c].lpfn = visible_pfn;
-   else
+   else if (adev->gmc.real_vram_size != 
adev->gmc.visible_vram_size)
places[c].flags |= TTM_PL_FLAG_TOPDOWN;
  
  		if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)




[PATCH] Revert "drm/amd/display: enable DPG when disabling plane for phantom pipe"

2023-02-14 Thread Qingqing Zhuo
This reverts commit b73cf50bd1d0008027cc1b41881b671d9c9054b9.

regression detected by the change. Revert until
fix is available.

Signed-off-by: Qingqing Zhuo 
---
 drivers/gpu/drm/amd/display/dc/core/dc.c | 47 +---
 1 file changed, 1 insertion(+), 46 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 510661d303e8..1c218c526650 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -74,8 +74,6 @@
 
 #include "dc_trace.h"
 
-#include "hw_sequencer_private.h"
-
 #include "dce/dmub_outbox.h"
 
 #define CTX \
@@ -1059,44 +1057,6 @@ static void apply_ctx_interdependent_lock(struct dc *dc, 
struct dc_state *contex
}
 }
 
-static void phantom_pipe_blank(
-   struct dc *dc,
-   struct timing_generator *tg,
-   int width,
-   int height)
-{
-   struct dce_hwseq *hws = dc->hwseq;
-   enum dc_color_space color_space;
-   struct tg_color black_color = {0};
-   struct output_pixel_processor *opp = NULL;
-   uint32_t num_opps, opp_id_src0, opp_id_src1;
-   uint32_t otg_active_width, otg_active_height;
-
-   /* program opp dpg blank color */
-   color_space = COLOR_SPACE_SRGB;
-   color_space_to_black_color(dc, color_space, &black_color);
-
-   otg_active_width = width;
-   otg_active_height = height;
-
-   /* get the OPTC source */
-   tg->funcs->get_optc_source(tg, &num_opps, &opp_id_src0, &opp_id_src1);
-   ASSERT(opp_id_src0 < dc->res_pool->res_cap->num_opp);
-   opp = dc->res_pool->opps[opp_id_src0];
-
-   opp->funcs->opp_set_disp_pattern_generator(
-   opp,
-   CONTROLLER_DP_TEST_PATTERN_SOLID_COLOR,
-   CONTROLLER_DP_COLOR_SPACE_UDEFINED,
-   COLOR_DEPTH_UNDEFINED,
-   &black_color,
-   otg_active_width,
-   otg_active_height,
-   0);
-
-   hws->funcs.wait_for_blank_complete(opp);
-}
-
 static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
 {
int i, j;
@@ -1155,13 +1115,8 @@ static void disable_dangling_plane(struct dc *dc, struct 
dc_state *context)
 * again for different use.
 */
if (old_stream->mall_stream_config.type == 
SUBVP_PHANTOM) {
-   if (tg->funcs->enable_crtc) {
-   int main_pipe_width, main_pipe_height;
-   main_pipe_width = 
old_stream->mall_stream_config.paired_stream->dst.width;
-   main_pipe_height = 
old_stream->mall_stream_config.paired_stream->dst.height;
-   phantom_pipe_blank(dc, tg, 
main_pipe_width, main_pipe_height);
+   if (tg->funcs->enable_crtc)
tg->funcs->enable_crtc(tg);
-   }
}
dc_rem_all_planes_for_stream(dc, old_stream, 
dangling_context);
disable_all_writeback_pipes_for_stream(dc, old_stream, 
dangling_context);
-- 
2.34.1