[PATCH 2/2] drm/amdgpu: added a sysfs interface for thermal throttling
implement apu_thermal_cap r/w callback for vangogh Jira ID: SWDEV-354511 Signed-off-by: Kun Liu --- .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 23 +++ 1 file changed, 23 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index cb10c7e31..d211b1dfe 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -1590,6 +1590,27 @@ static int vangogh_read_sensor(struct smu_context *smu, return ret; } +static int vangogh_get_apu_thermal_limit(struct smu_context *smu, uint32_t *limit) +{ + int ret = -EINVAL; + + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_GetThermalLimit, + 0, limit); + return ret; +} + +int vangogh_set_apu_thermal_limit(struct smu_context *smu, uint32_t limit) +{ + int ret = -EINVAL; + + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetReducedThermalLimit, + limit, NULL); + return ret; +} + + static int vangogh_set_watermarks_table(struct smu_context *smu, struct pp_smu_wm_range_sets *clock_ranges) { @@ -2425,6 +2446,8 @@ static const struct pptable_funcs vangogh_ppt_funcs = { .dpm_set_jpeg_enable = vangogh_dpm_set_jpeg_enable, .is_dpm_running = vangogh_is_dpm_running, .read_sensor = vangogh_read_sensor, + .get_apu_thermal_limit = vangogh_get_apu_thermal_limit, + .set_apu_thermal_limit = vangogh_set_apu_thermal_limit, .get_enabled_mask = smu_cmn_get_enabled_mask, .get_pp_feature_mask = smu_cmn_get_pp_feature_mask, .set_watermarks_table = vangogh_set_watermarks_table, -- 2.25.1
Re: [PATCH 2/2] drm/amdgpu: added a sysfs interface for thermal throttling
Am 14.02.23 um 09:02 schrieb kunliu13: implement apu_thermal_cap r/w callback for vangogh Jira ID: SWDEV-354511 Signed-off-by: Kun Liu --- .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 23 +++ 1 file changed, 23 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index cb10c7e31..d211b1dfe 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -1590,6 +1590,27 @@ static int vangogh_read_sensor(struct smu_context *smu, return ret; } +static int vangogh_get_apu_thermal_limit(struct smu_context *smu, uint32_t *limit) +{ + int ret = -EINVAL; Generally please don't initialize return values if it isn't necessary! This is constantly reported by automated checkers as bad coding style since it prevent the compiler from properly warn if an uninitialized variable is used. Regards, Christian. + + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_GetThermalLimit, + 0, limit); + return ret; +} + +int vangogh_set_apu_thermal_limit(struct smu_context *smu, uint32_t limit) +{ + int ret = -EINVAL; + + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetReducedThermalLimit, + limit, NULL); + return ret; +} + + static int vangogh_set_watermarks_table(struct smu_context *smu, struct pp_smu_wm_range_sets *clock_ranges) { @@ -2425,6 +2446,8 @@ static const struct pptable_funcs vangogh_ppt_funcs = { .dpm_set_jpeg_enable = vangogh_dpm_set_jpeg_enable, .is_dpm_running = vangogh_is_dpm_running, .read_sensor = vangogh_read_sensor, + .get_apu_thermal_limit = vangogh_get_apu_thermal_limit, + .set_apu_thermal_limit = vangogh_set_apu_thermal_limit, .get_enabled_mask = smu_cmn_get_enabled_mask, .get_pp_feature_mask = smu_cmn_get_pp_feature_mask, .set_watermarks_table = vangogh_set_watermarks_table,
RE: [PATCH 2/2] drm/amdgpu: added a sysfs interface for thermal throttling
[AMD Official Use Only - General] > -Original Message- > From: kunliu13 > Sent: Tuesday, February 14, 2023 4:03 PM > To: Limonciello, Mario ; Liang, Richard qi > ; Yuan, Perry ; amd- > g...@lists.freedesktop.org > Cc: Deucher, Alexander ; Du, Xiaojian > ; Quan, Evan ; Liu, Kun > > Subject: [PATCH 2/2] drm/amdgpu: added a sysfs interface for thermal > throttling > > implement apu_thermal_cap r/w callback for vangogh > > Jira ID: SWDEV-354511 [Quan, Evan] Please drop this internal link. > Signed-off-by: Kun Liu > --- > .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 23 > +++ > 1 file changed, 23 insertions(+) > > diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c > b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c > index cb10c7e31..d211b1dfe 100644 > --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c > +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c > @@ -1590,6 +1590,27 @@ static int vangogh_read_sensor(struct > smu_context *smu, > return ret; > } > > +static int vangogh_get_apu_thermal_limit(struct smu_context *smu, > uint32_t *limit) > +{ > + int ret = -EINVAL; > + > + ret = smu_cmn_send_smc_msg_with_param(smu, > + SMU_MSG_GetThermalLimit, > + 0, limit); > + return ret; [Quan, Evan] "ret" here and below in vangogh_set_apu_thermal_limit seem unnecessary. Other that those, the patch is reviewed-by: Evan Quan Evan > +} > + > +int vangogh_set_apu_thermal_limit(struct smu_context *smu, uint32_t > limit) > +{ > + int ret = -EINVAL; > + > + ret = smu_cmn_send_smc_msg_with_param(smu, > + > SMU_MSG_SetReducedThermalLimit, > + limit, NULL); > + return ret; > +} > + > + > static int vangogh_set_watermarks_table(struct smu_context *smu, > struct pp_smu_wm_range_sets > *clock_ranges) > { > @@ -2425,6 +2446,8 @@ static const struct pptable_funcs > vangogh_ppt_funcs = { > .dpm_set_jpeg_enable = vangogh_dpm_set_jpeg_enable, > .is_dpm_running = vangogh_is_dpm_running, > .read_sensor = vangogh_read_sensor, > + .get_apu_thermal_limit = vangogh_get_apu_thermal_limit, > + .set_apu_thermal_limit = vangogh_set_apu_thermal_limit, > .get_enabled_mask = smu_cmn_get_enabled_mask, > .get_pp_feature_mask = smu_cmn_get_pp_feature_mask, > .set_watermarks_table = vangogh_set_watermarks_table, > -- > 2.25.1
RE: [PATCH 1/2] drm/amdgpu: added a sysfs interface for thermal throttling
[AMD Official Use Only - General] > -Original Message- > From: kunliu13 > Sent: Tuesday, February 14, 2023 3:54 PM > To: Limonciello, Mario ; Liang, Richard qi > ; Yuan, Perry ; amd- > g...@lists.freedesktop.org > Cc: Deucher, Alexander ; Du, Xiaojian > ; Quan, Evan ; Liu, Kun > > Subject: [PATCH 1/2] drm/amdgpu: added a sysfs interface for thermal > throttling > > added a sysfs interface for thermal throttling, then userspace can get/update > thermal limit > > Jira ID: SWDEV-354511 [Quan, Evan] Please drop this internal link. Other than this, the patch is Reviewed-by: Evan Quan Evan > Signed-off-by: Kun Liu > > Change-Id: I9948cb8966b731d2d74d7aad87cbcdc840dd34c8 > --- > .../gpu/drm/amd/include/kgd_pp_interface.h| 2 + > drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 28 +++ > drivers/gpu/drm/amd/pm/amdgpu_pm.c| 76 > +++ > drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 3 + > drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 24 ++ > drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 12 +++ > 6 files changed, 145 insertions(+) > > diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h > b/drivers/gpu/drm/amd/include/kgd_pp_interface.h > index f3d64c78f..8394464ea 100644 > --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h > +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h > @@ -331,6 +331,8 @@ struct amd_pm_funcs { > int (*get_mclk_od)(void *handle); > int (*set_mclk_od)(void *handle, uint32_t value); > int (*read_sensor)(void *handle, int idx, void *value, int *size); > + int (*get_apu_thermal_limit)(void *handle, uint32_t *limit); > + int (*set_apu_thermal_limit)(void *handle, uint32_t limit); > enum amd_dpm_forced_level (*get_performance_level)(void > *handle); > enum amd_pm_state_type (*get_current_power_state)(void > *handle); > int (*get_fan_speed_rpm)(void *handle, uint32_t *rpm); > diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > index 1b300c569..d9a9cf189 100644 > --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > @@ -438,6 +438,34 @@ int amdgpu_dpm_read_sensor(struct > amdgpu_device *adev, enum amd_pp_sensors senso > return ret; > } > > +int amdgpu_dpm_get_apu_thermal_limit(struct amdgpu_device *adev, > uint32_t *limit) > +{ > + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; > + int ret = -EINVAL; > + > + if (pp_funcs && pp_funcs->get_apu_thermal_limit) { > + mutex_lock(&adev->pm.mutex); > + ret = pp_funcs->get_apu_thermal_limit(adev- > >powerplay.pp_handle, limit); > + mutex_unlock(&adev->pm.mutex); > + } > + > + return ret; > +} > + > +int amdgpu_dpm_set_apu_thermal_limit(struct amdgpu_device *adev, > uint32_t limit) > +{ > + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; > + int ret = -EINVAL; > + > + if (pp_funcs && pp_funcs->set_apu_thermal_limit) { > + mutex_lock(&adev->pm.mutex); > + ret = pp_funcs->set_apu_thermal_limit(adev- > >powerplay.pp_handle, limit); > + mutex_unlock(&adev->pm.mutex); > + } > + > + return ret; > +} > + > void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev) > { > const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; > diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c > b/drivers/gpu/drm/amd/pm/amdgpu_pm.c > index 236657eec..99b249e55 100644 > --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c > +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c > @@ -1685,6 +1685,81 @@ static ssize_t > amdgpu_set_thermal_throttling_logging(struct device *dev, > return count; > } > > +/** > + * DOC: apu_thermal_cap > + * > + * The amdgpu driver provides a sysfs API for retrieving/updating thermal > + * limit temperature in millidegrees Celsius > + * > + * Reading back the file shows you core limit value > + * > + * Writing an integer to the file, sets a new thermal limit. The value > + * should be between 0 and 100. If the value is less than 0 or greater > + * than 100, then the write request will be ignored. > + */ > +static ssize_t amdgpu_get_apu_thermal_cap(struct device *dev, > + struct device_attribute *attr, > + char *buf) > +{ > + int ret, size = 0; > + u32 limit; > + struct drm_device *ddev = dev_get_drvdata(dev); > + struct amdgpu_device *adev = drm_to_adev(ddev); > + > + ret = pm_runtime_get_sync(ddev->dev); > + if (ret < 0) { > + pm_runtime_put_autosuspend(ddev->dev); > + return size; > + } > + > + ret = amdgpu_dpm_get_apu_thermal_limit(adev, &limit); > + if (!ret) > + size = sysfs_emit(buf, "%u\n", limit); > + else > + size = sysfs_emit(buf, "failed to get thermal limit\n"); > + > + pm_runtime_mark_last_busy(ddev->dev); > + pm_runtime_put_autosuspend(ddev->d
Re: [RFC PATCH v2 00/18] Add DRM CRTC 3D LUT interface
On Mon, 13 Feb 2023 18:26:55 -0100 Melissa Wen wrote: > On 02/10, Pekka Paalanen wrote: > > On Thu, 9 Feb 2023 13:27:02 -0100 > > Melissa Wen wrote: > > > > > On 01/31, Pekka Paalanen wrote: > > > > On Mon, 9 Jan 2023 14:38:09 -0100 > > > > Melissa Wen wrote: > > > > > > > > > On 01/09, Melissa Wen wrote: > > > > > > Hi, > > > > > > > > > > > > After collecting comments in different places, here is a second > > > > > > version > > > > > > of the work on adding DRM CRTC 3D LUT support to the current DRM > > > > > > color > > > > > > mgmt interface. In comparison to previous proposals [1][2][3], here > > > > > > we > > > > > > add 3D LUT before gamma 1D LUT, but also a shaper 1D LUT before 3D > > > > > > LUT, > > > > > > that means the following DRM CRTC color correction pipeline: > > > > > > > > > > > > Blend -> Degamma 1D LUT -> CTM -> Shaper 1D LUT -> 3D LUT -> Gamma > > > > > > 1D LUT > > > > > > > > Hi Melissa, > > > > > > > > that makes sense to me, for CRTCs. It would be really good to have that > > > > as a diagram in the KMS UAPI documentation. > > > > > > > > > > Hi Pekka, > > > > > > Thanks for your feedbacks and your time reviewing this proposal. > > > > No problem, and sorry it took so long! > > > > I'm just finishing the catch-up with everything that happened during > > winter holidays. > > > > > > If someone wants to add a 3D LUT to KMS planes as well, then I'm not > > > > sure if it should be this order or swapped. I will probably have an > > > > opinion about that once Weston is fully HDR capable and has been tried > > > > in the wild for a while with the HDR color operations fine-tuned based > > > > on community feedback. IOW, not for a long time. The YUV to RGB > > > > conversion factors in there as well. > > > > > > > I see, this is also the reason I reuse here Alex Hung's proposal for > > > pre-blending API. I'll work on better documentation. > > > > > > > > > > > > > > > > > > > and we also add a DRM CRTC LUT3D_MODE property, based on Alex Hung > > > > > > proposal for pre-blending 3D LUT [4] (Thanks!), instead of just a > > > > > > LUT3D_SIZE, that allows userspace to use different supported > > > > > > settings of > > > > > > 3D LUT, fitting VA-API and new color API better. In this sense, I > > > > > > adjusted the pre-blending proposal for post-blending usage. > > > > > > > > > > > > Patches 1-6 targets the addition of shaper LUT and 3D LUT > > > > > > properties to > > > > > > the current DRM CRTC color mgmt pipeline. Patch 6 can be considered > > > > > > an > > > > > > extra/optional patch to define a default value for LUT3D_MODE, > > > > > > inspired > > > > > > by what we do for the plane blend mode property (pre-multiplied). > > > > > > > > > > > > Patches 7-18 targets AMD display code to enable shaper and 3D LUT > > > > > > usage > > > > > > on DCN 301 (our HW case). Patches 7-9 performs code cleanups on > > > > > > current > > > > > > AMD DM colors code, patch 10 updates AMD stream in case of user 3D > > > > > > LUT > > > > > > changes, patch 11/12 rework AMD MPC 3D LUT resource handling by > > > > > > context > > > > > > for DCN 301 (easily extendible to other DCN families). Finally, from > > > > > > 13-18, we wire up SHAPER LUT, LUT3D and LUT3D MODE to AMD display > > > > > > driver, exposing modes supported by HW and programming user shaper > > > > > > and > > > > > > 3D LUT accordingly. > > > > > > > > > > > > Our target userspace is Gamescope/SteamOS. > > > > > > > > > > > > Basic IGT tests were based on [5][6] and are available here > > > > > > (in-progress): > > > > > > https://gitlab.freedesktop.org/mwen/igt-gpu-tools/-/commits/crtc-lut3d-api > > > > > > > > > > > > [1] > > > > > > https://lore.kernel.org/all/20201221015730.28333-1-laurent.pinchart+rene...@ideasonboard.com/ > > > > > > [2] > > > > > > https://github.com/vsyrjala/linux/commit/4d28e8ddf2a076f30f9e5bdc17cbb4656fe23e69 > > > > > > [3] > > > > > > https://lore.kernel.org/amd-gfx/20220619223104.667413-1-m...@igalia.com/ > > > > > > [4] > > > > > > https://lore.kernel.org/dri-devel/20221004211451.1475215-1-alex.h...@amd.com/ > > > > > > [5] https://patchwork.freedesktop.org/series/90165/ > > > > > > [6] https://patchwork.freedesktop.org/series/109402/ > > > > > > [VA_API] > > > > > > http://intel.github.io/libva/structVAProcFilterParameterBuffer3DLUT.html > > > > > > [KMS_pipe_API] > > > > > > https://gitlab.freedesktop.org/pq/color-and-hdr/-/issues/11 > > > > > > > > > > > > Let me know your thoughts. > > > > > > > > > > +Simon Ser, +Pekka Paalanen who might also be interested in this > > > > > series. > > > > > > > > Unfortunately I don't have the patch emails to reply to, so here's a > > > > messy bunch of comments. I'll concentrate on the UAPI design as always. > > > > > > > > > > Sorry, the patchset is here: > > > https://lore.kernel.org/dri-devel/20230109143846.1966301-1-m...@igalia.com/ > > > In the nex
Re: [RFC PATCH v2 00/18] Add DRM CRTC 3D LUT interface
On Mon, 13 Feb 2023 18:45:40 -0100 Melissa Wen wrote: > On 02/13, Ville Syrjälä wrote: > > On Mon, Feb 13, 2023 at 11:01:31AM +0200, Pekka Paalanen wrote: > > > On Fri, 10 Feb 2023 14:47:50 -0500 > > > Harry Wentland wrote: > > > > > > > On 2/10/23 04:28, Pekka Paalanen wrote: > > > > > On Thu, 9 Feb 2023 13:27:02 -0100 > > > > > Melissa Wen wrote: > > > > > > > > > >> On 01/31, Pekka Paalanen wrote: > > > > >>> On Mon, 9 Jan 2023 14:38:09 -0100 > > > > >>> Melissa Wen wrote: > > > > >>> > > > > On 01/09, Melissa Wen wrote: > > > > > Hi, > > > > > > > > > > After collecting comments in different places, here is a second > > > > > version > > > > > of the work on adding DRM CRTC 3D LUT support to the current DRM > > > > > color > > > > > mgmt interface. In comparison to previous proposals [1][2][3], > > > > > here we > > > > > add 3D LUT before gamma 1D LUT, but also a shaper 1D LUT before > > > > > 3D LUT, > > > > > that means the following DRM CRTC color correction pipeline: > > > > > > > > > > Blend -> Degamma 1D LUT -> CTM -> Shaper 1D LUT -> 3D LUT -> > > > > > Gamma 1D LUT > > > > > > ... > > > > > > > >>> +/* > > > > >>> + * struct drm_mode_lut3d_mode - 3D LUT mode information. > > > > >>> + * @lut_size: number of valid points on every dimension of 3D LUT. > > > > >>> + * @lut_stride: number of points on every dimension of 3D LUT. > > > > >>> + * @bit_depth: number of bits of RGB. If color_mode defines > > > > >>> entries with higher > > > > >>> + * bit_depth the least significant bits will be > > > > >>> truncated. > > > > >>> + * @color_format: fourcc values, ex. DRM_FORMAT_XRGB16161616 or > > > > >>> DRM_FORMAT_XBGR16161616. > > > > >>> + * @flags: flags for hardware-sepcific features > > > > >>> + */ > > > > >>> +struct drm_mode_lut3d_mode { > > > > >>> + __u16 lut_size; > > > > >>> + __u16 lut_stride[3]; > > > > >>> + __u16 bit_depth; > > > > >>> + __u32 color_format; > > > > >>> + __u32 flags; > > > > >>> +}; > > > > > > ... > > > > > > > >>> What is "number of bits of RGB"? Input precision? Output precision? > > > > >>> Integer or floating point? > > > > >> > > > > >> It's the bit depth of the 3D LUT values, the same for every > > > > >> channels. In > > > > >> the AMD case, it's supports 10-bit and 12-bit, for example. > > > > > > > > > > Ok. So e.g. r5g6b5 is not a possible 3D LUT element type on any > > > > > hardware ever? > > > > > > > > > > > > > I haven't had a chance to go through all patches yet but if this is > > > > modeled after Alex Hung's work this should be covered by color_format. > > > > The idea is that color_format takes a FOURCC value and defines the > > > > format of the entries in the 3DLUT blob. > > > > > > > > The bit_depth describes the actual bit depth that the HW supports. > > > > E.g., color_format could be DRM_FORMAT_XRGB16161616 but HW might only > > > > support 12-bit precision. In that case the least significant bits get > > > > truncated. > > > > > > > > One could define the bit_depth per color, but I'm not sure that'll be > > > > necessary. > > > > > > Exactly. I just have no idea how sure we should be about that. > > > > > > > > What exactly is the truncation the comment refers to? > > > > > > > > > > It sounds like if input has higher precision than the LUT elements, > > > > > then "truncation" occurs. I can kind of see that, but I also think it > > > > > is a false characterisation. The LUT input precision affects the > > > > > precision of LUT indexing and the precision of interpolation between > > > > > the LUT elements. I would not expect those two precisions to be > > > > > truncated to the LUT element precision (but they could be truncated to > > > > > something else hardware specific). Instead, I do expect the > > > > > interpolation result to be truncated to the LUT output precision, > > > > > which > > > > > probably is the same as the LUT element precision, but not > > > > > necessarily. > > > > > > > > > > Maybe the comment about truncation should simply be removed? The > > > > > result > > > > > is obvious if we know the LUT input, element, and output precision, > > > > > and > > > > > what exactly happens with the indexing and interpolation is probably > > > > > good enough to be left hardware-specific if it is difficult to > > > > > describe > > > > > in generic terms across different hardware. > > > > > > > > > > > > > Maybe it makes sense to just drop the bit_depth field. > > > > > > Well, it's really interesting information for userspace, but maybe it > > > should have a more holistic design. Precision is a factor, when > > > userspace considers whether it can use KMS hardware for a conversion or > > > not. Unfortunately, none of the existing KMS color pipeline elements > > > have any information on precision IIRC, so there is more to be fixed. > > > > > > The
RE: [PATCH] drm/amdgpu: don't increase UMC RAS UE count if no new bad page
[AMD Official Use Only - General] -EINVAL looks better than EEXIST, but it still doesn't fit the case? Alternatively, Can we compare the count before and after amdgpu_ras_add_bad_pages to decide whether reset ue_count to 0 or not. That could be straightforward than struggling for returning which error code in this specific case. Regards, Hawking -Original Message- From: Yang, Stanley Sent: Tuesday, February 14, 2023 10:42 To: Zhou1, Tao ; Zhang, Hawking ; amd-gfx@lists.freedesktop.org; Chai, Thomas ; Li, Candice Subject: RE: [PATCH] drm/amdgpu: don't increase UMC RAS UE count if no new bad page [AMD Official Use Only - General] > -Original Message- > From: Zhou1, Tao > Sent: Monday, February 13, 2023 4:25 PM > To: Zhang, Hawking ; amd- > g...@lists.freedesktop.org; Yang, Stanley ; Chai, > Thomas ; Li, Candice > Subject: RE: [PATCH] drm/amdgpu: don't increase UMC RAS UE count if no > new bad page > > [AMD Official Use Only - General] > > > -Original Message- > > From: Zhang, Hawking > > Sent: Friday, February 10, 2023 11:02 PM > > To: Zhou1, Tao ; amd-gfx@lists.freedesktop.org; > > Yang, Stanley ; Chai, Thomas > > ; Li, Candice > > Subject: RE: [PATCH] drm/amdgpu: don't increase UMC RAS UE count if > > no new bad page > > > > [AMD Official Use Only - General] > > > > + /* if no new bad page is found, no need to > > + increase ue count > */ > > + if (ret == -EEXIST) > > + err_data->ue_count = 0; > > > > Returning EEXIST in such case is not reasonable. Might consider > > return a bool for > > amdgpu_ras_add_bad_pages: true means it does add some new bad page; > > false means it doesn't change anything. > > > > Regards, > > Hawking > > [Tao] but it can returns -ENOMEM, amdgpu_ras_load_bad_pages and > amdgpu_ras_recovery_init also need to check the return value. I'd like > to keep the type of return value unchanged. > How about -EINVAL? Stanley: How about return -EALREADY? Regards, Stanley > > > > > -Original Message- > > From: Zhou1, Tao > > Sent: Friday, February 10, 2023 16:45 > > To: amd-gfx@lists.freedesktop.org; Zhang, Hawking > > ; Yang, Stanley ; > Chai, > > Thomas ; Li, Candice > > Cc: Zhou1, Tao > > Subject: [PATCH] drm/amdgpu: don't increase UMC RAS UE count if no > new > > bad page > > > > If a UMC bad page is reserved but not freed by an application, the > > application may trigger uncorrectable error repeatly by accessing the page. > > > > Signed-off-by: Tao Zhou > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 9 - > > drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 6 +- > > 2 files changed, 13 insertions(+), 2 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > > index e85c4689ce2c..eafe01a24349 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > > @@ -2049,7 +2049,7 @@ int amdgpu_ras_add_bad_pages(struct > > amdgpu_device *adev, { > > struct amdgpu_ras *con = amdgpu_ras_get_context(adev); > > struct ras_err_handler_data *data; > > - int ret = 0; > > + int ret = 0, old_cnt; > > uint32_t i; > > > > if (!con || !con->eh_data || !bps || pages <= 0) @@ -2060,6 > > +2060,8 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device > *adev, > > if (!data) > > goto out; > > > > + old_cnt = data->count; > > + > > for (i = 0; i < pages; i++) { > > if (amdgpu_ras_check_bad_page_unlock(con, > > bps[i].retired_page << > > AMDGPU_GPU_PAGE_SHIFT)) @@ -2079,6 > > +2081,11 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device > *adev, > > data->count++; > > data->space_left--; > > } > > + > > + /* all pages have been reserved before, no new bad page */ > > + if (old_cnt == data->count) > > + ret = -EEXIST; > > + > > out: > > mutex_unlock(&con->recovery_lock); > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c > > index 1c7fcb4f2380..772c431e4065 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c > > @@ -145,8 +145,12 @@ static int > amdgpu_umc_do_page_retirement(struct > > amdgpu_device *adev, > > > > if ((amdgpu_bad_page_threshold != 0) && > > err_data->err_addr_cnt) { > > - amdgpu_ras_add_bad_pages(adev, err_data->err_addr, > > + ret = amdgpu_ras_add_bad_pages(adev, > > + err_data->err_addr, > > > > err_data->err_addr_cnt); > > + /* if no new bad page is found, no need to > > + increase ue count > */ > > + if (ret == -EEXIST) > > + err_data->ue_count = 0; > > + > > am
Re: [PATCH] drm/amd/amdgpu: fix warining during suspend
On 13/02/2023 11:52, Jack Xiao wrote: Freeing memory was warned during suspend. Move the self test out of suspend. Thanks a lot, this fixes the following warning during suspend/resume on v6.2-rc8 WARNING: CPU: 2 PID: 3980 at drivers/gpu/drm/amd/amdgpu/amdgpu_object.c:425 amdgpu_bo_free_kernel+0xf5/0x110 [amdgpu] Tested-by: Jocelyn Falempe -- Jocelyn
RE: [PATCH] drm/amdgpu: don't increase UMC RAS UE count if no new bad page
[AMD Official Use Only - General] OK, I'll add a new function to do the check. Tao > -Original Message- > From: Zhang, Hawking > Sent: Tuesday, February 14, 2023 6:03 PM > To: Yang, Stanley ; Zhou1, Tao > ; amd-gfx@lists.freedesktop.org; Chai, Thomas > ; Li, Candice > Subject: RE: [PATCH] drm/amdgpu: don't increase UMC RAS UE count if no new > bad page > > [AMD Official Use Only - General] > > -EINVAL looks better than EEXIST, but it still doesn't fit the case? > Alternatively, > Can we compare the count before and after amdgpu_ras_add_bad_pages to > decide whether reset ue_count to 0 or not. That could be straightforward than > struggling for returning which error code in this specific case. > > Regards, > Hawking > > -Original Message- > From: Yang, Stanley > Sent: Tuesday, February 14, 2023 10:42 > To: Zhou1, Tao ; Zhang, Hawking > ; amd-gfx@lists.freedesktop.org; Chai, Thomas > ; Li, Candice > Subject: RE: [PATCH] drm/amdgpu: don't increase UMC RAS UE count if no new > bad page > > [AMD Official Use Only - General] > > > > > -Original Message- > > From: Zhou1, Tao > > Sent: Monday, February 13, 2023 4:25 PM > > To: Zhang, Hawking ; amd- > > g...@lists.freedesktop.org; Yang, Stanley ; Chai, > > Thomas ; Li, Candice > > Subject: RE: [PATCH] drm/amdgpu: don't increase UMC RAS UE count if no > > new bad page > > > > [AMD Official Use Only - General] > > > > > -Original Message- > > > From: Zhang, Hawking > > > Sent: Friday, February 10, 2023 11:02 PM > > > To: Zhou1, Tao ; amd-gfx@lists.freedesktop.org; > > > Yang, Stanley ; Chai, Thomas > > > ; Li, Candice > > > Subject: RE: [PATCH] drm/amdgpu: don't increase UMC RAS UE count if > > > no new bad page > > > > > > [AMD Official Use Only - General] > > > > > > + /* if no new bad page is found, no need to > > > + increase ue count > > */ > > > + if (ret == -EEXIST) > > > + err_data->ue_count = 0; > > > > > > Returning EEXIST in such case is not reasonable. Might consider > > > return a bool for > > > amdgpu_ras_add_bad_pages: true means it does add some new bad page; > > > false means it doesn't change anything. > > > > > > Regards, > > > Hawking > > > > [Tao] but it can returns -ENOMEM, amdgpu_ras_load_bad_pages and > > amdgpu_ras_recovery_init also need to check the return value. I'd like > > to keep the type of return value unchanged. > > How about -EINVAL? > > Stanley: How about return -EALREADY? > > Regards, > Stanley > > > > > > > > -Original Message- > > > From: Zhou1, Tao > > > Sent: Friday, February 10, 2023 16:45 > > > To: amd-gfx@lists.freedesktop.org; Zhang, Hawking > > > ; Yang, Stanley ; > > Chai, > > > Thomas ; Li, Candice > > > Cc: Zhou1, Tao > > > Subject: [PATCH] drm/amdgpu: don't increase UMC RAS UE count if no > > new > > > bad page > > > > > > If a UMC bad page is reserved but not freed by an application, the > > > application may trigger uncorrectable error repeatly by accessing the > > > page. > > > > > > Signed-off-by: Tao Zhou > > > --- > > > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 9 - > > > drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 6 +- > > > 2 files changed, 13 insertions(+), 2 deletions(-) > > > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > > > index e85c4689ce2c..eafe01a24349 100644 > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > > > @@ -2049,7 +2049,7 @@ int amdgpu_ras_add_bad_pages(struct > > > amdgpu_device *adev, { > > > struct amdgpu_ras *con = amdgpu_ras_get_context(adev); > > > struct ras_err_handler_data *data; > > > - int ret = 0; > > > + int ret = 0, old_cnt; > > > uint32_t i; > > > > > > if (!con || !con->eh_data || !bps || pages <= 0) @@ -2060,6 > > > +2060,8 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device > > *adev, > > > if (!data) > > > goto out; > > > > > > + old_cnt = data->count; > > > + > > > for (i = 0; i < pages; i++) { > > > if (amdgpu_ras_check_bad_page_unlock(con, > > > bps[i].retired_page << > > > AMDGPU_GPU_PAGE_SHIFT)) @@ -2079,6 > > > +2081,11 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device > > *adev, > > > data->count++; > > > data->space_left--; > > > } > > > + > > > + /* all pages have been reserved before, no new bad page */ > > > + if (old_cnt == data->count) > > > + ret = -EEXIST; > > > + > > > out: > > > mutex_unlock(&con->recovery_lock); > > > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c > > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c > > > index 1c7fcb4f2380..772c431e4065 100644 > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c > > > @@ -145,8 +145,12 @@ static
Re: [Intel-gfx] [PATCH 3/3] drm/ttm: Change the meaning of the fields in the drm_mm_nodes structure from pfn to bytes v2
On Tue, 14 Feb 2023 at 07:43, Christian König wrote: > > From: Somalapuram Amaranath > > Change the ttm_range_man_alloc() allocation from pages to size in bytes. > Fix the dependent drm_mm_nodes start and size from pages to bytes. > > v2 (chk): Change the drm_mm_node usage in amdgpu as well. re-order the > patch to be independent of the resource->start change. > > Signed-off-by: Somalapuram Amaranath > Reviewed-by: Christian König > Signed-off-by: Christian König > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c| 15 --- > drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h | 8 > drivers/gpu/drm/i915/i915_scatterlist.c| 6 +++--- > drivers/gpu/drm/ttm/ttm_range_manager.c| 17 - > 4 files changed, 23 insertions(+), 23 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c > index 44367f03316f..c90423cd1292 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c > @@ -116,7 +116,6 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager > *man, > struct ttm_resource **res) > { > struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man); > - uint32_t num_pages = PFN_UP(tbo->base.size); > struct ttm_range_mgr_node *node; > int r; > > @@ -134,17 +133,19 @@ static int amdgpu_gtt_mgr_new(struct > ttm_resource_manager *man, > if (place->lpfn) { > spin_lock(&mgr->lock); > r = drm_mm_insert_node_in_range(&mgr->mm, &node->mm_nodes[0], > - num_pages, > tbo->page_alignment, > - 0, place->fpfn, place->lpfn, > + tbo->base.size, > + tbo->page_alignment << > PAGE_SHIFT, 0, > + place->fpfn << PAGE_SHIFT, > + place->lpfn << PAGE_SHIFT, > DRM_MM_INSERT_BEST); > spin_unlock(&mgr->lock); > if (unlikely(r)) > goto err_free; > > - node->base.start = node->mm_nodes[0].start; > + node->base.start = node->mm_nodes[0].start >> PAGE_SHIFT; > } else { > node->mm_nodes[0].start = 0; > - node->mm_nodes[0].size = PFN_UP(node->base.size); > + node->mm_nodes[0].size = node->base.size; > node->base.start = AMDGPU_BO_INVALID_OFFSET; > } > > @@ -285,8 +286,8 @@ int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, > uint64_t gtt_size) > > ttm_resource_manager_init(man, &adev->mman.bdev, gtt_size); > > - start = AMDGPU_GTT_MAX_TRANSFER_SIZE * > AMDGPU_GTT_NUM_TRANSFER_WINDOWS; > - size = (adev->gmc.gart_size >> PAGE_SHIFT) - start; > + start = (AMDGPU_GTT_MAX_TRANSFER_SIZE * > AMDGPU_GTT_NUM_TRANSFER_WINDOWS) << PAGE_SHIFT; > + size = adev->gmc.gart_size - start; > drm_mm_init(&mgr->mm, start, size); > spin_lock_init(&mgr->lock); > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h > index 5c4f93ee0c57..5c78f0b09351 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h > @@ -94,8 +94,8 @@ static inline void amdgpu_res_first(struct ttm_resource > *res, > while (start >= node->size << PAGE_SHIFT) > start -= node++->size << PAGE_SHIFT; > > - cur->start = (node->start << PAGE_SHIFT) + start; > - cur->size = min((node->size << PAGE_SHIFT) - start, size); > + cur->start = node->start + start; > + cur->size = min(node->size - start, size); > cur->remaining = size; > cur->node = node; > break; > @@ -155,8 +155,8 @@ static inline void amdgpu_res_next(struct > amdgpu_res_cursor *cur, uint64_t size) > node = cur->node; > > cur->node = ++node; > - cur->start = node->start << PAGE_SHIFT; > - cur->size = min(node->size << PAGE_SHIFT, cur->remaining); > + cur->start = node->start; > + cur->size = min(node->size, cur->remaining); > break; > default: > return; > diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c > b/drivers/gpu/drm/i915/i915_scatterlist.c > index 756289e43dff..7defda1219d0 100644 > --- a/drivers/gpu/drm/i915/i915_scatterlist.c > +++ b/drivers/gpu/drm/i915/i915_scatterlist.c > @@ -94,7 +94,7 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct > drm_mm_node *node, > if (!rsgt) > return ERR_PTR(-ENOMEM); > >
Re: [Intel-gfx] [PATCH 3/3] drm/ttm: Change the meaning of the fields in the drm_mm_nodes structure from pfn to bytes v2
Am 14.02.23 um 11:31 schrieb Matthew Auld: On Tue, 14 Feb 2023 at 07:43, Christian König wrote: From: Somalapuram Amaranath Change the ttm_range_man_alloc() allocation from pages to size in bytes. Fix the dependent drm_mm_nodes start and size from pages to bytes. v2 (chk): Change the drm_mm_node usage in amdgpu as well. re-order the patch to be independent of the resource->start change. Signed-off-by: Somalapuram Amaranath Reviewed-by: Christian König Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c| 15 --- drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h | 8 drivers/gpu/drm/i915/i915_scatterlist.c| 6 +++--- drivers/gpu/drm/ttm/ttm_range_manager.c| 17 - 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index 44367f03316f..c90423cd1292 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -116,7 +116,6 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man, struct ttm_resource **res) { struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man); - uint32_t num_pages = PFN_UP(tbo->base.size); struct ttm_range_mgr_node *node; int r; @@ -134,17 +133,19 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man, if (place->lpfn) { spin_lock(&mgr->lock); r = drm_mm_insert_node_in_range(&mgr->mm, &node->mm_nodes[0], - num_pages, tbo->page_alignment, - 0, place->fpfn, place->lpfn, + tbo->base.size, + tbo->page_alignment << PAGE_SHIFT, 0, + place->fpfn << PAGE_SHIFT, + place->lpfn << PAGE_SHIFT, DRM_MM_INSERT_BEST); spin_unlock(&mgr->lock); if (unlikely(r)) goto err_free; - node->base.start = node->mm_nodes[0].start; + node->base.start = node->mm_nodes[0].start >> PAGE_SHIFT; } else { node->mm_nodes[0].start = 0; - node->mm_nodes[0].size = PFN_UP(node->base.size); + node->mm_nodes[0].size = node->base.size; node->base.start = AMDGPU_BO_INVALID_OFFSET; } @@ -285,8 +286,8 @@ int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size) ttm_resource_manager_init(man, &adev->mman.bdev, gtt_size); - start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS; - size = (adev->gmc.gart_size >> PAGE_SHIFT) - start; + start = (AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS) << PAGE_SHIFT; + size = adev->gmc.gart_size - start; drm_mm_init(&mgr->mm, start, size); spin_lock_init(&mgr->lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h index 5c4f93ee0c57..5c78f0b09351 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h @@ -94,8 +94,8 @@ static inline void amdgpu_res_first(struct ttm_resource *res, while (start >= node->size << PAGE_SHIFT) start -= node++->size << PAGE_SHIFT; - cur->start = (node->start << PAGE_SHIFT) + start; - cur->size = min((node->size << PAGE_SHIFT) - start, size); + cur->start = node->start + start; + cur->size = min(node->size - start, size); cur->remaining = size; cur->node = node; break; @@ -155,8 +155,8 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size) node = cur->node; cur->node = ++node; - cur->start = node->start << PAGE_SHIFT; - cur->size = min(node->size << PAGE_SHIFT, cur->remaining); + cur->start = node->start; + cur->size = min(node->size, cur->remaining); break; default: return; diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c b/drivers/gpu/drm/i915/i915_scatterlist.c index 756289e43dff..7defda1219d0 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.c +++ b/drivers/gpu/drm/i915/i915_scatterlist.c @@ -94,7 +94,7 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, if (!rsgt) return ERR_PTR(-ENOMEM); - i915_refct_sgt_init(rsgt, node->size << PAGE_SHIFT); + i915_refct_sgt_init(rsgt, node->size); st = &rsgt->table;
RE: [RFC PATCH v2 00/18] Add DRM CRTC 3D LUT interface
[AMD Official Use Only - General] + Uday, for awareness. Regards Shashank -Original Message- From: Pekka Paalanen Sent: 14 February 2023 10:28 To: Melissa Wen Cc: Ville Syrjälä ; dri-de...@lists.freedesktop.org; airl...@gmail.com; laurent.pinchart+rene...@ideasonboard.com; Sharma, Shashank ; Siqueira, Rodrigo ; amd-gfx@lists.freedesktop.org; Hung, Alex ; Wentland, Harry ; tzimmerm...@suse.de; Li, Sun peng (Leo) ; maarten.lankho...@linux.intel.com; mrip...@kernel.org; seanp...@chromium.org; dan...@ffwll.ch; Lakha, Bhawanpreet ; Kim, Sung joon ; cont...@emersion.fr; Pan, Xinhui ; Koenig, Christian ; kernel-...@igalia.com; Deucher, Alexander ; Kazlauskas, Nicholas ; Joshua Ashton Subject: Re: [RFC PATCH v2 00/18] Add DRM CRTC 3D LUT interface On Mon, 13 Feb 2023 18:45:40 -0100 Melissa Wen wrote: > On 02/13, Ville Syrjälä wrote: > > On Mon, Feb 13, 2023 at 11:01:31AM +0200, Pekka Paalanen wrote: > > > On Fri, 10 Feb 2023 14:47:50 -0500 Harry Wentland > > > wrote: > > > > > > > On 2/10/23 04:28, Pekka Paalanen wrote: > > > > > On Thu, 9 Feb 2023 13:27:02 -0100 Melissa Wen > > > > > wrote: > > > > > > > > > >> On 01/31, Pekka Paalanen wrote: > > > > >>> On Mon, 9 Jan 2023 14:38:09 -0100 Melissa Wen > > > > >>> wrote: > > > > >>> > > > > On 01/09, Melissa Wen wrote: > > > > > Hi, > > > > > > > > > > After collecting comments in different places, here is a > > > > > second version of the work on adding DRM CRTC 3D LUT > > > > > support to the current DRM color mgmt interface. In > > > > > comparison to previous proposals [1][2][3], here we add 3D > > > > > LUT before gamma 1D LUT, but also a shaper 1D LUT before 3D LUT, > > > > > that means the following DRM CRTC color correction pipeline: > > > > > > > > > > Blend -> Degamma 1D LUT -> CTM -> Shaper 1D LUT -> 3D LUT -> > > > > > Gamma 1D LUT > > > > > > ... > > > > > > > >>> +/* > > > > >>> + * struct drm_mode_lut3d_mode - 3D LUT mode information. > > > > >>> + * @lut_size: number of valid points on every dimension of 3D LUT. > > > > >>> + * @lut_stride: number of points on every dimension of 3D LUT. > > > > >>> + * @bit_depth: number of bits of RGB. If color_mode defines > > > > >>> entries with higher > > > > >>> + * bit_depth the least significant bits will be > > > > >>> truncated. > > > > >>> + * @color_format: fourcc values, ex. DRM_FORMAT_XRGB16161616 or > > > > >>> DRM_FORMAT_XBGR16161616. > > > > >>> + * @flags: flags for hardware-sepcific features */ struct > > > > >>> +drm_mode_lut3d_mode { > > > > >>> + __u16 lut_size; > > > > >>> + __u16 lut_stride[3]; > > > > >>> + __u16 bit_depth; > > > > >>> + __u32 color_format; > > > > >>> + __u32 flags; > > > > >>> +}; > > > > > > ... > > > > > > > >>> What is "number of bits of RGB"? Input precision? Output precision? > > > > >>> Integer or floating point? > > > > >> > > > > >> It's the bit depth of the 3D LUT values, the same for every > > > > >> channels. In > > > > >> the AMD case, it's supports 10-bit and 12-bit, for example. > > > > > > > > > > Ok. So e.g. r5g6b5 is not a possible 3D LUT element type on > > > > > any hardware ever? > > > > > > > > > > > > > I haven't had a chance to go through all patches yet but if this > > > > is modeled after Alex Hung's work this should be covered by > > > > color_format. > > > > The idea is that color_format takes a FOURCC value and defines > > > > the format of the entries in the 3DLUT blob. > > > > > > > > The bit_depth describes the actual bit depth that the HW supports. > > > > E.g., color_format could be DRM_FORMAT_XRGB16161616 but HW might > > > > only support 12-bit precision. In that case the least > > > > significant bits get truncated. > > > > > > > > One could define the bit_depth per color, but I'm not sure > > > > that'll be necessary. > > > > > > Exactly. I just have no idea how sure we should be about that. > > > > > > > > What exactly is the truncation the comment refers to? > > > > > > > > > > It sounds like if input has higher precision than the LUT > > > > > elements, then "truncation" occurs. I can kind of see that, > > > > > but I also think it is a false characterisation. The LUT input > > > > > precision affects the precision of LUT indexing and the > > > > > precision of interpolation between the LUT elements. I would > > > > > not expect those two precisions to be truncated to the LUT > > > > > element precision (but they could be truncated to something > > > > > else hardware specific). Instead, I do expect the > > > > > interpolation result to be truncated to the LUT output precision, > > > > > which probably is the same as the LUT element precision, but not > > > > > necessarily. > > > > > > > > > > Maybe the comment about truncation should simply be removed? > > > > > The result is obvious if we know the LUT input, element, and > > > > >
Re: [PATCH 06/10] drm/amd/display: Fix implicit enum conversion
On 2/13/23 21:49, Arthur Grillo wrote: > Make implicit enum conversion to avoid -Wenum-conversion warning, such > as: > > drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn21/display_mode_vba_21.c:4109:88: > warning: implicit conversion from ‘enum ’ to ‘enum > odm_combine_mode’ [-Wenum-conversion] > 4109 | > locals->ODMCombineEnablePerState[i][k] = true; > | > ^ > > [...] > > @@ -3897,14 +3898,14 @@ void > dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l > > mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = > mode_lib->vba.PixelClock[k] / 2 > * (1 + > mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); > > - locals->ODMCombineEnablePerState[i][k] = false; > + locals->ODMCombineEnablePerState[i][k] = (enum > odm_combine_mode)false; dm_odm_combine_mode_disabled would seem clearer than (enum odm_combine_mode)false. > - > locals->ODMCombineEnablePerState[i][k] = true; > + > locals->ODMCombineEnablePerState[i][k] = (enum odm_combine_mode)true; I'm not sure which enum value (enum odm_combine_mode)true will be converted to, probably dm_odm_combine_mode_2to1? Is that really appropriate everywhere true is used? If so, again dm_odm_combine_mode_2to1 would seem clearer. -- Earthling Michel Dänzer| https://redhat.com Libre software enthusiast | Mesa and Xwayland developer
[PATCH 0/6] Trivial code cleanup around color resources
Hi, Sorry for the noise, but while I've been working on wiring 3D LUT support to AMD display driver [1] I found some annoying code style issues in the shared-code part. So I'm just sending what I've been cleaning to better examine the code. Most seem trivial, except the last one "remove unused _calculate_degamma_curve" since this could just be a matter of missing parts. If so, happy to remove the patch and include a comment describing the situation (or the potential usage of it). Thanks, Melissa [1] https://lore.kernel.org/dri-devel/20230109143846.1966301-1-m...@igalia.com/ Melissa Wen (6): drm/amd/display: ident braces in dcn30_acquire_post_bldn_3dlut correctly drm/amd/display: clean code-style issues in dcn30_set_mpc_shaper_3dlut drm/amd/display: camel case cleanup in color_gamma file drm/amd/display: unset initial value for tf since it's never used drm/amd/display: remove unused func declaration from resource headers drm/amd/display: remove unused _calculate_degamma_curve function .../drm/amd/display/dc/dcn30/dcn30_hwseq.c| 37 ++--- .../drm/amd/display/dc/dcn30/dcn30_resource.c | 2 +- drivers/gpu/drm/amd/display/dc/inc/resource.h | 4 - .../amd/display/modules/color/color_gamma.c | 140 -- .../amd/display/modules/color/color_gamma.h | 3 - 5 files changed, 48 insertions(+), 138 deletions(-) -- 2.39.0
[PATCH 1/6] drm/amd/display: ident braces in dcn30_acquire_post_bldn_3dlut correctly
Signed-off-by: Melissa Wen --- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index feb4bb491525..60bb5634b6e2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -1477,8 +1477,8 @@ bool dcn30_acquire_post_bldn_3dlut( state->bits.mpc_rmu2_mux = mpcc_id; ret = true; break; - } } + } return ret; } -- 2.39.0
[PATCH 3/6] drm/amd/display: camel case cleanup in color_gamma file
Rename mapUserRamp to map_user_ramp and doClamping to do_clamping Signed-off-by: Melissa Wen --- .../amd/display/modules/color/color_gamma.c | 50 ++- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c index f6034213c700..fdb20dfc70c9 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c @@ -1715,8 +1715,8 @@ static bool map_regamma_hw_to_x_user( const struct pwl_float_data_ex *rgb_regamma, uint32_t hw_points_num, struct dc_transfer_func_distributed_points *tf_pts, - bool mapUserRamp, - bool doClamping) + bool map_user_ramp, + bool do_clamping) { /* setup to spare calculated ideal regamma values */ @@ -1724,7 +1724,7 @@ static bool map_regamma_hw_to_x_user( struct hw_x_point *coords = coords_x; const struct pwl_float_data_ex *regamma = rgb_regamma; - if (ramp && mapUserRamp) { + if (ramp && map_user_ramp) { copy_rgb_regamma_to_coordinates_x(coords, hw_points_num, rgb_regamma); @@ -1744,7 +1744,7 @@ static bool map_regamma_hw_to_x_user( } } - if (doClamping) { + if (do_clamping) { /* this should be named differently, all it does is clamp to 0-1 */ build_new_custom_resulted_curve(hw_points_num, tf_pts); } @@ -1875,7 +1875,7 @@ bool calculate_user_regamma_ramp(struct dc_transfer_func *output_tf, bool mod_color_calculate_degamma_params(struct dc_color_caps *dc_caps, struct dc_transfer_func *input_tf, - const struct dc_gamma *ramp, bool mapUserRamp) + const struct dc_gamma *ramp, bool map_user_ramp) { struct dc_transfer_func_distributed_points *tf_pts = &input_tf->tf_pts; struct dividers dividers; @@ -1891,12 +1891,12 @@ bool mod_color_calculate_degamma_params(struct dc_color_caps *dc_caps, return false; /* we can use hardcoded curve for plain SRGB TF -* If linear, it's bypass if on user ramp +* If linear, it's bypass if no user ramp */ if (input_tf->type == TF_TYPE_PREDEFINED) { if ((input_tf->tf == TRANSFER_FUNCTION_SRGB || input_tf->tf == TRANSFER_FUNCTION_LINEAR) && - !mapUserRamp) + !map_user_ramp) return true; if (dc_caps != NULL && @@ -1919,7 +1919,7 @@ bool mod_color_calculate_degamma_params(struct dc_color_caps *dc_caps, input_tf->type = TF_TYPE_DISTRIBUTED_POINTS; - if (mapUserRamp && ramp && ramp->type == GAMMA_RGB_256) { + if (map_user_ramp && ramp && ramp->type == GAMMA_RGB_256) { rgb_user = kvcalloc(ramp->num_entries + _EXTRA_POINTS, sizeof(*rgb_user), GFP_KERNEL); @@ -2007,7 +2007,7 @@ bool mod_color_calculate_degamma_params(struct dc_color_caps *dc_caps, map_regamma_hw_to_x_user(ramp, coeff, rgb_user, coordinates_x, axis_x, curve, MAX_HW_POINTS, tf_pts, - mapUserRamp && ramp && ramp->type == GAMMA_RGB_256, + map_user_ramp && ramp && ramp->type == GAMMA_RGB_256, true); } @@ -2112,9 +2112,11 @@ static bool calculate_curve(enum dc_transfer_func_predefined trans, } bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf, - const struct dc_gamma *ramp, bool mapUserRamp, bool canRomBeUsed, - const struct hdr_tm_params *fs_params, - struct calculate_buffer *cal_buffer) + const struct dc_gamma *ramp, + bool map_user_ramp, + bool can_rom_be_used, + const struct hdr_tm_params *fs_params, + struct calculate_buffer *cal_buffer) { struct dc_transfer_func_distributed_points *tf_pts = &output_tf->tf_pts; struct dividers dividers; @@ -2124,26 +2126,26 @@ bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf, struct gamma_pixel *axis_x = NULL; struct pixel_gamma_point *coeff = NULL; enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB; - bool doClamping = true; + bool do_clamping = true; bool ret = false; if (output_tf->type == TF_TYPE_BYPASS) return false; /* we can use hardcoded curve for plain SRGB TF */ - i
[PATCH 5/6] drm/amd/display: remove unused func declaration from resource headers
The function resource_validate_ctx_update_pointer_after_copy() is declared in resource.h but never defined, therefore, remove its declaration from headers. Signed-off-by: Melissa Wen --- drivers/gpu/drm/amd/display/dc/inc/resource.h | 4 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h index 4ab029e3326d..fa6da93caa88 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/resource.h +++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h @@ -165,10 +165,6 @@ bool resource_validate_attach_surfaces( struct dc_state *context, const struct resource_pool *pool); -void resource_validate_ctx_update_pointer_after_copy( - const struct dc_state *src_ctx, - struct dc_state *dst_ctx); - enum dc_status resource_map_clock_resources( const struct dc *dc, struct dc_state *context, -- 2.39.0
[PATCH 2/6] drm/amd/display: clean code-style issues in dcn30_set_mpc_shaper_3dlut
This function has many conditions and all code style issues (identation, missing braces, etc.) make reading it really annoying. Signed-off-by: Melissa Wen --- .../drm/amd/display/dc/dcn30/dcn30_hwseq.c| 37 ++- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c index 9ce86f288130..df787fcf8e86 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c @@ -90,8 +90,8 @@ bool dcn30_set_blend_lut( return result; } -static bool dcn30_set_mpc_shaper_3dlut( - struct pipe_ctx *pipe_ctx, const struct dc_stream_state *stream) +static bool dcn30_set_mpc_shaper_3dlut(struct pipe_ctx *pipe_ctx, + const struct dc_stream_state *stream) { struct dpp *dpp_base = pipe_ctx->plane_res.dpp; int mpcc_id = pipe_ctx->plane_res.hubp->inst; @@ -103,19 +103,18 @@ static bool dcn30_set_mpc_shaper_3dlut( const struct pwl_params *shaper_lut = NULL; //get the shaper lut params if (stream->func_shaper) { - if (stream->func_shaper->type == TF_TYPE_HWPWL) + if (stream->func_shaper->type == TF_TYPE_HWPWL) { shaper_lut = &stream->func_shaper->pwl; - else if (stream->func_shaper->type == TF_TYPE_DISTRIBUTED_POINTS) { - cm_helper_translate_curve_to_hw_format( - stream->func_shaper, - &dpp_base->shaper_params, true); + } else if (stream->func_shaper->type == TF_TYPE_DISTRIBUTED_POINTS) { + cm_helper_translate_curve_to_hw_format(stream->func_shaper, + &dpp_base->shaper_params, true); shaper_lut = &dpp_base->shaper_params; } } if (stream->lut3d_func && - stream->lut3d_func->state.bits.initialized == 1 && - stream->lut3d_func->state.bits.rmu_idx_valid == 1) { + stream->lut3d_func->state.bits.initialized == 1 && + stream->lut3d_func->state.bits.rmu_idx_valid == 1) { if (stream->lut3d_func->state.bits.rmu_mux_num == 0) mpcc_id_projected = stream->lut3d_func->state.bits.mpc_rmu0_mux; else if (stream->lut3d_func->state.bits.rmu_mux_num == 1) @@ -124,20 +123,22 @@ static bool dcn30_set_mpc_shaper_3dlut( mpcc_id_projected = stream->lut3d_func->state.bits.mpc_rmu2_mux; if (mpcc_id_projected != mpcc_id) BREAK_TO_DEBUGGER(); - /*find the reason why logical layer assigned a differant mpcc_id into acquire_post_bldn_3dlut*/ + /* find the reason why logical layer assigned a different +* mpcc_id into acquire_post_bldn_3dlut +*/ acquired_rmu = mpc->funcs->acquire_rmu(mpc, mpcc_id, - stream->lut3d_func->state.bits.rmu_mux_num); + stream->lut3d_func->state.bits.rmu_mux_num); if (acquired_rmu != stream->lut3d_func->state.bits.rmu_mux_num) BREAK_TO_DEBUGGER(); - result = mpc->funcs->program_3dlut(mpc, - &stream->lut3d_func->lut_3d, - stream->lut3d_func->state.bits.rmu_mux_num); + + result = mpc->funcs->program_3dlut(mpc, &stream->lut3d_func->lut_3d, + stream->lut3d_func->state.bits.rmu_mux_num); result = mpc->funcs->program_shaper(mpc, shaper_lut, - stream->lut3d_func->state.bits.rmu_mux_num); - } else - /*loop through the available mux and release the requested mpcc_id*/ + stream->lut3d_func->state.bits.rmu_mux_num); + } else { + // loop through the available mux and release the requested mpcc_id mpc->funcs->release_rmu(mpc, mpcc_id); - + } return result; } -- 2.39.0
[PATCH 4/6] drm/amd/display: unset initial value for tf since it's never used
In mod_color_calculate_{degamma/regamma}_params(), a tf variable is initialized as TRANSFER_FUNCTION_SRGB but tf is only used after tf = input->tf, therefore, better to just remove this initial value and avoid misleading interpretations. Signed-off-by: Melissa Wen --- drivers/gpu/drm/amd/display/modules/color/color_gamma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c index fdb20dfc70c9..6e606b11286a 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c @@ -1883,7 +1883,7 @@ bool mod_color_calculate_degamma_params(struct dc_color_caps *dc_caps, struct pwl_float_data_ex *curve = NULL; struct gamma_pixel *axis_x = NULL; struct pixel_gamma_point *coeff = NULL; - enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB; + enum dc_transfer_func_predefined tf; uint32_t i; bool ret = false; @@ -2125,7 +2125,7 @@ bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf, struct pwl_float_data_ex *rgb_regamma = NULL; struct gamma_pixel *axis_x = NULL; struct pixel_gamma_point *coeff = NULL; - enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB; + enum dc_transfer_func_predefined tf; bool do_clamping = true; bool ret = false; -- 2.39.0
[PATCH 6/6] drm/amd/display: remove unused _calculate_degamma_curve function
We don't use this function anywhere, therefore, remove it. Signed-off-by: Melissa Wen --- .../amd/display/modules/color/color_gamma.c | 86 --- .../amd/display/modules/color/color_gamma.h | 3 - 2 files changed, 89 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c index 6e606b11286a..67a062af3ab0 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c @@ -2217,89 +2217,3 @@ bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf, rgb_user_alloc_fail: return ret; } - -bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans, - struct dc_transfer_func_distributed_points *points) -{ - uint32_t i; - bool ret = false; - struct pwl_float_data_ex *rgb_degamma = NULL; - - if (trans == TRANSFER_FUNCTION_UNITY || - trans == TRANSFER_FUNCTION_LINEAR) { - - for (i = 0; i <= MAX_HW_POINTS ; i++) { - points->red[i]= coordinates_x[i].x; - points->green[i] = coordinates_x[i].x; - points->blue[i] = coordinates_x[i].x; - } - ret = true; - } else if (trans == TRANSFER_FUNCTION_PQ) { - rgb_degamma = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS, - sizeof(*rgb_degamma), - GFP_KERNEL); - if (!rgb_degamma) - goto rgb_degamma_alloc_fail; - - - build_de_pq(rgb_degamma, - MAX_HW_POINTS, - coordinates_x); - for (i = 0; i <= MAX_HW_POINTS ; i++) { - points->red[i]= rgb_degamma[i].r; - points->green[i] = rgb_degamma[i].g; - points->blue[i] = rgb_degamma[i].b; - } - ret = true; - - kvfree(rgb_degamma); - } else if (trans == TRANSFER_FUNCTION_SRGB || - trans == TRANSFER_FUNCTION_BT709 || - trans == TRANSFER_FUNCTION_GAMMA22 || - trans == TRANSFER_FUNCTION_GAMMA24 || - trans == TRANSFER_FUNCTION_GAMMA26) { - rgb_degamma = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS, - sizeof(*rgb_degamma), - GFP_KERNEL); - if (!rgb_degamma) - goto rgb_degamma_alloc_fail; - - build_degamma(rgb_degamma, - MAX_HW_POINTS, - coordinates_x, - trans); - for (i = 0; i <= MAX_HW_POINTS ; i++) { - points->red[i]= rgb_degamma[i].r; - points->green[i] = rgb_degamma[i].g; - points->blue[i] = rgb_degamma[i].b; - } - ret = true; - - kvfree(rgb_degamma); - } else if (trans == TRANSFER_FUNCTION_HLG) { - rgb_degamma = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS, - sizeof(*rgb_degamma), - GFP_KERNEL); - if (!rgb_degamma) - goto rgb_degamma_alloc_fail; - - build_hlg_degamma(rgb_degamma, - MAX_HW_POINTS, - coordinates_x, - 80, 1000); - for (i = 0; i <= MAX_HW_POINTS ; i++) { - points->red[i]= rgb_degamma[i].r; - points->green[i] = rgb_degamma[i].g; - points->blue[i] = rgb_degamma[i].b; - } - ret = true; - kvfree(rgb_degamma); - } - points->end_exponent = 0; - points->x_point_at_y1_red = 1; - points->x_point_at_y1_green = 1; - points->x_point_at_y1_blue = 1; - -rgb_degamma_alloc_fail: - return ret; -} diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h index 2893abf48208..ee5c466613de 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h @@ -115,9 +115,6 @@ bool mod_color_calculate_degamma_params(struct dc_color_caps *dc_caps, struct dc_transfer_func *output_tf, const struct dc_gamma *ramp, bool mapUserRamp); -bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans, - struct dc_transfer_func_distributed_points *points); - bool calculate_user_regamma_coeff(struct dc_transfer_func *output_tf, c
Re: [PATCH 0/6] Trivial code cleanup around color resources
Am 14.02.23 um 13:14 schrieb Melissa Wen: Hi, Sorry for the noise, but while I've been working on wiring 3D LUT support to AMD display driver [1] I found some annoying code style issues in the shared-code part. So I'm just sending what I've been cleaning to better examine the code. Most seem trivial, except the last one "remove unused _calculate_degamma_curve" since this could just be a matter of missing parts. If so, happy to remove the patch and include a comment describing the situation (or the potential usage of it). The display stack is not my field of expertise, but those cleanups are so obvious that I think I can safely give my Reviewed-by: Christian König for the entire series. Thanks, Christian. Thanks, Melissa [1] https://lore.kernel.org/dri-devel/20230109143846.1966301-1-m...@igalia.com/ Melissa Wen (6): drm/amd/display: ident braces in dcn30_acquire_post_bldn_3dlut correctly drm/amd/display: clean code-style issues in dcn30_set_mpc_shaper_3dlut drm/amd/display: camel case cleanup in color_gamma file drm/amd/display: unset initial value for tf since it's never used drm/amd/display: remove unused func declaration from resource headers drm/amd/display: remove unused _calculate_degamma_curve function .../drm/amd/display/dc/dcn30/dcn30_hwseq.c| 37 ++--- .../drm/amd/display/dc/dcn30/dcn30_resource.c | 2 +- drivers/gpu/drm/amd/display/dc/inc/resource.h | 4 - .../amd/display/modules/color/color_gamma.c | 140 -- .../amd/display/modules/color/color_gamma.h | 3 - 5 files changed, 48 insertions(+), 138 deletions(-)
[PATCH] drm/gem: Expose the buffer object handle to userspace last
From: Tvrtko Ursulin Currently drm_gem_handle_create_tail exposes the handle to userspace before the buffer object constructions is complete. This allowing of working against a partially constructed object, which may also be in the process of having its creation fail, can have a range of negative outcomes. A lot of those will depend on what the individual drivers are doing in their obj->funcs->open() callbacks, and also with a common failure mode being -ENOMEM from drm_vma_node_allow. We can make sure none of this can happen by allocating a handle last, although with a downside that more of the function now runs under the dev->object_name_lock. Looking into the individual drivers open() hooks, we have amdgpu_gem_object_open which seems like it could have a potential security issue without this change. A couple drivers like qxl_gem_object_open and vmw_gem_object_open implement no-op hooks so no impact for them. A bunch of other require a deeper look by individual owners to asses for impact. Those are lima_gem_object_open, nouveau_gem_object_open, panfrost_gem_open, radeon_gem_object_open and virtio_gpu_gem_object_open. Putting aside the risk assesment of the above, some common scenarios to think about are along these lines: 1) Userspace closes a handle by speculatively "guessing" it from a second thread. This results in an unreachable buffer object so, a memory leak. 2) Same as 1), but object is in the process of getting closed (failed creation). The second thread is then able to re-cycle the handle and idr_remove would in the first thread would then remove the handle it does not own from the idr. 3) Going back to the earlier per driver problem space - individual impact assesment of allowing a second thread to access and operate on a partially constructed handle / object. (Can something crash? Leak information?) In terms of identifying when the problem started I will tag some patches as references, but not all, if even any, of them actually point to a broken state. I am just identifying points at which more opportunity for issues to arise was added. References: 304eda32920b ("drm/gem: add hooks to notify driver when object handle is created/destroyed") References: ca481c9b2a3a ("drm/gem: implement vma access management") References: b39b5394fabc ("drm/gem: Add drm_gem_object_funcs") Cc: dri-de...@lists.freedesktop.org Cc: Rob Clark Cc: Ben Skeggs Cc: David Herrmann Cc: Noralf Trønnes Cc: David Airlie Cc: Daniel Vetter Cc: amd-gfx@lists.freedesktop.org Cc: l...@lists.freedesktop.org Cc: nouv...@lists.freedesktop.org Cc: Steven Price Cc: virtualizat...@lists.linux-foundation.org Cc: spice-de...@lists.freedesktop.org Cc: Zack Rusin --- drivers/gpu/drm/drm_gem.c | 48 +++ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index aa15c52ae182..e3d897bca0f2 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -356,52 +356,52 @@ drm_gem_handle_create_tail(struct drm_file *file_priv, u32 *handlep) { struct drm_device *dev = obj->dev; - u32 handle; int ret; WARN_ON(!mutex_is_locked(&dev->object_name_lock)); if (obj->handle_count++ == 0) drm_gem_object_get(obj); + ret = drm_vma_node_allow(&obj->vma_node, file_priv); + if (ret) + goto err_put; + + if (obj->funcs->open) { + ret = obj->funcs->open(obj, file_priv); + if (ret) + goto err_revoke; + } + /* -* Get the user-visible handle using idr. Preload and perform -* allocation under our spinlock. +* Get the user-visible handle using idr as the _last_ step. +* Preload and perform allocation under our spinlock. */ idr_preload(GFP_KERNEL); spin_lock(&file_priv->table_lock); - ret = idr_alloc(&file_priv->object_idr, obj, 1, 0, GFP_NOWAIT); - spin_unlock(&file_priv->table_lock); idr_preload_end(); - mutex_unlock(&dev->object_name_lock); if (ret < 0) - goto err_unref; - - handle = ret; + goto err_close; - ret = drm_vma_node_allow(&obj->vma_node, file_priv); - if (ret) - goto err_remove; + mutex_unlock(&dev->object_name_lock); - if (obj->funcs->open) { - ret = obj->funcs->open(obj, file_priv); - if (ret) - goto err_revoke; - } + *handlep = ret; - *handlep = handle; return 0; +err_close: + if (obj->funcs->close) + obj->funcs->close(obj, file_priv); err_revoke: drm_vma_node_revoke(&obj->vma_node, file_priv); -err_remove: - spin_lock(&file_priv->table_lock); - idr_remove(&file_priv->object_idr, handle); - spin_unlock(&file_priv->table_lock); -err_unref: -
Re: [PATCH] drm/gem: Expose the buffer object handle to userspace last
Am 14.02.23 um 13:50 schrieb Tvrtko Ursulin: From: Tvrtko Ursulin Currently drm_gem_handle_create_tail exposes the handle to userspace before the buffer object constructions is complete. This allowing of working against a partially constructed object, which may also be in the process of having its creation fail, can have a range of negative outcomes. A lot of those will depend on what the individual drivers are doing in their obj->funcs->open() callbacks, and also with a common failure mode being -ENOMEM from drm_vma_node_allow. We can make sure none of this can happen by allocating a handle last, although with a downside that more of the function now runs under the dev->object_name_lock. Looking into the individual drivers open() hooks, we have amdgpu_gem_object_open which seems like it could have a potential security issue without this change. A couple drivers like qxl_gem_object_open and vmw_gem_object_open implement no-op hooks so no impact for them. A bunch of other require a deeper look by individual owners to asses for impact. Those are lima_gem_object_open, nouveau_gem_object_open, panfrost_gem_open, radeon_gem_object_open and virtio_gpu_gem_object_open. Putting aside the risk assesment of the above, some common scenarios to think about are along these lines: 1) Userspace closes a handle by speculatively "guessing" it from a second thread. This results in an unreachable buffer object so, a memory leak. 2) Same as 1), but object is in the process of getting closed (failed creation). The second thread is then able to re-cycle the handle and idr_remove would in the first thread would then remove the handle it does not own from the idr. 3) Going back to the earlier per driver problem space - individual impact assesment of allowing a second thread to access and operate on a partially constructed handle / object. (Can something crash? Leak information?) In terms of identifying when the problem started I will tag some patches as references, but not all, if even any, of them actually point to a broken state. I am just identifying points at which more opportunity for issues to arise was added. Yes I've looked into this once as well, but couldn't completely solve it for some reason. Give me a day or two to get this tested and all the logic swapped back into my head again. Christian. References: 304eda32920b ("drm/gem: add hooks to notify driver when object handle is created/destroyed") References: ca481c9b2a3a ("drm/gem: implement vma access management") References: b39b5394fabc ("drm/gem: Add drm_gem_object_funcs") Cc: dri-de...@lists.freedesktop.org Cc: Rob Clark Cc: Ben Skeggs Cc: David Herrmann Cc: Noralf Trønnes Cc: David Airlie Cc: Daniel Vetter Cc: amd-gfx@lists.freedesktop.org Cc: l...@lists.freedesktop.org Cc: nouv...@lists.freedesktop.org Cc: Steven Price Cc: virtualizat...@lists.linux-foundation.org Cc: spice-de...@lists.freedesktop.org Cc: Zack Rusin --- drivers/gpu/drm/drm_gem.c | 48 +++ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index aa15c52ae182..e3d897bca0f2 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -356,52 +356,52 @@ drm_gem_handle_create_tail(struct drm_file *file_priv, u32 *handlep) { struct drm_device *dev = obj->dev; - u32 handle; int ret; WARN_ON(!mutex_is_locked(&dev->object_name_lock)); if (obj->handle_count++ == 0) drm_gem_object_get(obj); + ret = drm_vma_node_allow(&obj->vma_node, file_priv); + if (ret) + goto err_put; + + if (obj->funcs->open) { + ret = obj->funcs->open(obj, file_priv); + if (ret) + goto err_revoke; + } + /* -* Get the user-visible handle using idr. Preload and perform -* allocation under our spinlock. +* Get the user-visible handle using idr as the _last_ step. +* Preload and perform allocation under our spinlock. */ idr_preload(GFP_KERNEL); spin_lock(&file_priv->table_lock); - ret = idr_alloc(&file_priv->object_idr, obj, 1, 0, GFP_NOWAIT); - spin_unlock(&file_priv->table_lock); idr_preload_end(); - mutex_unlock(&dev->object_name_lock); if (ret < 0) - goto err_unref; - - handle = ret; + goto err_close; - ret = drm_vma_node_allow(&obj->vma_node, file_priv); - if (ret) - goto err_remove; + mutex_unlock(&dev->object_name_lock); - if (obj->funcs->open) { - ret = obj->funcs->open(obj, file_priv); - if (ret) - goto err_revoke; - } + *handlep = ret; - *handlep = handle; return 0; +err_close: + if (obj->funcs->close) + obj->funcs->close(obj, file_priv); err_revo
Re: [PATCH 3/3] drm/connector: Deprecate split for BT.2020 in drm_colorspace enum
On Fri, Feb 3, 2023 at 5:00 PM Ville Syrjälä wrote: > > On Fri, Feb 03, 2023 at 10:24:52AM -0500, Harry Wentland wrote: > > > > > > On 2/3/23 10:19, Ville Syrjälä wrote: > > > On Fri, Feb 03, 2023 at 09:39:42AM -0500, Harry Wentland wrote: > > >> > > >> > > >> On 2/3/23 07:59, Sebastian Wick wrote: > > >>> On Fri, Feb 3, 2023 at 11:40 AM Ville Syrjälä > > >>> wrote: > > > > On Fri, Feb 03, 2023 at 02:07:44AM +, Joshua Ashton wrote: > > > Userspace has no way of controlling or knowing the pixel encoding > > > currently, so there is no way for it to ever get the right values > > > here. > > > > That applies to a lot of the other values as well (they are > > explicitly RGB or YCC). The idea was that this property sets the > > infoframe/MSA/SDP value exactly, and other properties should be > > added to for use userspace to control the pixel encoding/colorspace > > conversion(if desired, or userspace just makes sure to > > directly feed in correct kind of data). > > >>> > > >>> I'm all for getting userspace control over pixel encoding but even > > >>> then the kernel always knows which pixel encoding is selected and > > >>> which InfoFrame has to be sent. Is there a reason why userspace would > > >>> want to control the variant explicitly to the wrong value? > > >>> > > >> > > >> I've asked this before but haven't seen an answer: Is there an existing > > >> upstream userspace project that makes use of this property (other than > > >> what Joshua is working on in gamescope right now)? That would help us > > >> understand the intent better. > > > > > > The intent was to control the infoframe colorimetry bits, > > > nothing more. No idea what real userspace there was, if any. > > > > > >> > > >> I don't think giving userspace explicit control over the exact infoframe > > >> values is the right thing to do. > > > > > > Only userspace knows what kind of data it's stuffing into > > > the pixels (and/or how it configures the csc units/etc.) to > > > generate them. > > > > > > > Yes, but userspace doesn't control or know whether we drive > > RGB or YCbCr on the wire. In fact, in some cases our driver > > needs to fallback to YCbCr420 for bandwidth reasons. There > > is currently no way for userspace to know that and I don't > > think it makes sense. > > People want that control as well for whatever reason. We've > been asked to allow YCbCr 4:4:4 output many times. I don't really think it's a question of if we want it but rather how we get there. Harry is completely right that if we would make the subsampling controllable by user space instead of the kernel handling it magically, user space which does not adapt to the new control won't be able to light up some modes which worked before. This is obviously a problem and not one we can easily fix. We would need a new cap for user space to signal "I know that I can control bpc, subsampling and compression to lower the bandwidth and light up modes which otherwise fail". That cap would also remove all the properties which require kernel magic to work (that's also what I proposed for my KMS color pipeline API). We all want to expose more of the scanout capability and give user space more control but I don't think an incremental approach works here and we would all do better if we accept that the current API requires kernel magic to work and has a few implicit assumptions baked in. With all that being said, I think the right decision here is to 1. Ignore subsampling for now 2. Let the kernel select YCC or RGB on the cable 3. Let the kernel figure out the conversion between RGB and YCC based on the color space selected 4. Let the kernel send the correct infoframe based on the selected color space and cable encoding 5. Only expose color spaces for which the kernel can do the conversion and send the infoframe 6. Work on the new API which is hidden behind a cap > The automagic 4:2:0 fallback I think is rather fundementally > incompatible with fancy color management. How would we even > know whether to use eg. BT.2020 vs. BT.709 matrix? In i915 > that stuff is just always BT.709 limited range, no questions > asked. > > So I think if userspace wants real color management it's > going to have to set up the whole pipeline. And for that > we need at least one new property to control the RGB->YCbCr > conversion (or to explicitly avoid it). > > And given that the proposed patch just swept all the > non-BT.2020 issues under the rug makes me think no > one has actually come up with any kind of consistent > plan for anything else really. > > > > > Userspace needs full control of framebuffer pixel formats, > > as well as control over DEGAMMA, GAMMA, CTM color operations. > > It also needs to be able to select whether to drive the panel > > as sRGB or BT.2020/PQ but it doesn't make sense for it to > > control the pixel encoding on the wire (RGB vs YCbCr). > > > > > I really don't want a repeat of the disaster of the > > > 'B
[PATCH v2 0/8] Re-design doorbell framework for usermode queues
From: Shashank Sharma This patch series re-designs the current doorbell handling of the AMDGPU driver and prepares it for Usermode queues. The fundamental changes are: - Introduce and accommodate a new GEM domain for doorbells. - Prepare the AMDGPU ttm backend for handling doorbell memory. - Create a doorbell BO for kernel-level doorbell opertations. - Rename, move and re-arrange some existing structures. The idea is that a usermode app can directly allocate a page from the doorbell bar and use it's offsets for different usermode queues. Corresponding libdrm changes (just addition of this new flag): https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/286 Alex Deucher (6): drm/amdgpu: add UAPI for allocating doorbell memory drm/amdgpu: replace aper_base_kaddr with vram_aper_base_kaddr drm/amdgpu: rename gmc.aper_base/size drm/amdgpu: rename doorbell variables drm/amdgpu: accommodate DOMAIN/PL_DOORBELL drm/amdgpu: get doorbell memory Shashank Sharma (2): drm/amdgpu: create doorbell kernel object drm/amdgpu: start using kernel doorbell bo drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 13 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 67 +--- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 6 +- drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 32 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 51 --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 11 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 15 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 10 +-- drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 10 +-- drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c| 6 +- drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c| 12 ++-- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c| 10 +-- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 10 +-- drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c | 4 +- drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c | 4 +- drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c | 4 +- drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c | 4 +- drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 4 +- drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c | 4 +- drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 10 +-- drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 10 +-- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 4 +- include/uapi/drm/amdgpu_drm.h| 7 +- 31 files changed, 193 insertions(+), 137 deletions(-) -- 2.34.1
[PATCH v2 2/8] drm/amdgpu: replace aper_base_kaddr with vram_aper_base_kaddr
From: Alex Deucher To differentiate it from the doorbell BAR. V2: Added Christian's A-B Acked-by: Christian Koenig Cc: Alex Deucher Cc: Christian Koenig Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 10 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c| 14 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h| 2 +- drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 10 +- drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 10 +- 5 files changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 2f28a8c02f64..0b6a394e109b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -354,12 +354,12 @@ size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos, size_t count = 0; uint64_t last; - if (!adev->mman.aper_base_kaddr) + if (!adev->mman.vram_aper_base_kaddr) return 0; last = min(pos + size, adev->gmc.visible_vram_size); if (last > pos) { - addr = adev->mman.aper_base_kaddr + pos; + addr = adev->mman.vram_aper_base_kaddr + pos; count = last - pos; if (write) { @@ -3954,9 +3954,9 @@ static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev) iounmap(adev->rmmio); adev->rmmio = NULL; - if (adev->mman.aper_base_kaddr) - iounmap(adev->mman.aper_base_kaddr); - adev->mman.aper_base_kaddr = NULL; + if (adev->mman.vram_aper_base_kaddr) + iounmap(adev->mman.vram_aper_base_kaddr); + adev->mman.vram_aper_base_kaddr = NULL; /* Memory manager related */ if (!adev->gmc.xgmi.connected_to_cpu) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 55e0284b2bdd..73b831b47892 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -578,9 +578,9 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev, if ((mem->bus.offset + bus_size) > adev->gmc.visible_vram_size) return -EINVAL; - if (adev->mman.aper_base_kaddr && + if (adev->mman.vram_aper_base_kaddr && mem->placement & TTM_PL_FLAG_CONTIGUOUS) - mem->bus.addr = (u8 *)adev->mman.aper_base_kaddr + + mem->bus.addr = (u8 *)adev->mman.vram_aper_base_kaddr + mem->bus.offset; mem->bus.offset += adev->gmc.aper_base; @@ -1752,12 +1752,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) #ifdef CONFIG_64BIT #ifdef CONFIG_X86 if (adev->gmc.xgmi.connected_to_cpu) - adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base, + adev->mman.vram_aper_base_kaddr = ioremap_cache(adev->gmc.aper_base, adev->gmc.visible_vram_size); else #endif - adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base, + adev->mman.vram_aper_base_kaddr = ioremap_wc(adev->gmc.aper_base, adev->gmc.visible_vram_size); #endif @@ -1904,9 +1904,9 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) if (drm_dev_enter(adev_to_drm(adev), &idx)) { - if (adev->mman.aper_base_kaddr) - iounmap(adev->mman.aper_base_kaddr); - adev->mman.aper_base_kaddr = NULL; + if (adev->mman.vram_aper_base_kaddr) + iounmap(adev->mman.vram_aper_base_kaddr); + adev->mman.vram_aper_base_kaddr = NULL; drm_dev_exit(idx); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index e2cd5894afc9..929bc8abac28 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -50,7 +50,7 @@ struct amdgpu_gtt_mgr { struct amdgpu_mman { struct ttm_device bdev; boolinitialized; - void __iomem*aper_base_kaddr; + void __iomem*vram_aper_base_kaddr; /* buffer handling */ const struct amdgpu_buffer_funcs*buffer_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index bd3e3e23a939..f39d4f593a2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -611,10 +611,10 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) */ sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE; - if (adev->gmc.visible_vram_size < sz || !adev->mman.aper_base_kaddr) { - DRM_ERROR("visible
[PATCH v2 6/8] drm/amdgpu: get doorbell memory
From: Alex Deucher This patch adds section for doorbell memory in memory status reporting functions like vm/bo_get_memory. V2: Rebase Cc: Alex Deucher Cc: Christian Koenig Signed-off-by: Alex Deucher Signed-off-by: Shashank Sharma --- drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 9 - drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 15 --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 ++- 5 files changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c index 99a7855ab1bc..202df09ba5de 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c @@ -60,7 +60,7 @@ void amdgpu_show_fdinfo(struct seq_file *m, struct file *f) struct amdgpu_fpriv *fpriv = file->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; - uint64_t vram_mem = 0, gtt_mem = 0, cpu_mem = 0; + uint64_t vram_mem = 0, gtt_mem = 0, cpu_mem = 0, doorbell_mem = 0; ktime_t usage[AMDGPU_HW_IP_NUM]; uint32_t bus, dev, fn, domain; unsigned int hw_ip; @@ -75,7 +75,7 @@ void amdgpu_show_fdinfo(struct seq_file *m, struct file *f) if (ret) return; - amdgpu_vm_get_memory(vm, &vram_mem, >t_mem, &cpu_mem); + amdgpu_vm_get_memory(vm, &vram_mem, >t_mem, &cpu_mem, &doorbell_mem); amdgpu_bo_unreserve(vm->root.bo); amdgpu_ctx_mgr_usage(&fpriv->ctx_mgr, usage); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index ff9979fecfd2..90e97abc1454 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1275,7 +1275,8 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, } void amdgpu_bo_get_memory(struct amdgpu_bo *bo, uint64_t *vram_mem, - uint64_t *gtt_mem, uint64_t *cpu_mem) + uint64_t *gtt_mem, uint64_t *cpu_mem, + uint64_t *doorbell_mem) { unsigned int domain; @@ -1287,6 +1288,9 @@ void amdgpu_bo_get_memory(struct amdgpu_bo *bo, uint64_t *vram_mem, case AMDGPU_GEM_DOMAIN_GTT: *gtt_mem += amdgpu_bo_size(bo); break; + case AMDGPU_GEM_DOMAIN_DOORBELL: + *doorbell_mem += amdgpu_bo_size(bo); + break; case AMDGPU_GEM_DOMAIN_CPU: default: *cpu_mem += amdgpu_bo_size(bo); @@ -1565,6 +1569,9 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m) case AMDGPU_GEM_DOMAIN_GTT: placement = " GTT"; break; + case AMDGPU_GEM_DOMAIN_DOORBELL: + placement = "DOOR"; + break; case AMDGPU_GEM_DOMAIN_CPU: default: placement = " CPU"; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 93207badf83f..bf9759758f0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -326,7 +326,8 @@ int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr); u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo); u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo); void amdgpu_bo_get_memory(struct amdgpu_bo *bo, uint64_t *vram_mem, - uint64_t *gtt_mem, uint64_t *cpu_mem); + uint64_t *gtt_mem, uint64_t *cpu_mem, + uint64_t *doorbell_mem); void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo_vm *vmbo); int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index dc379dc22c77..1561d138945b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -918,7 +918,8 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, } void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem, - uint64_t *gtt_mem, uint64_t *cpu_mem) + uint64_t *gtt_mem, uint64_t *cpu_mem, + uint64_t *doorbell_mem) { struct amdgpu_bo_va *bo_va, *tmp; @@ -927,37 +928,37 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem, if (!bo_va->base.bo) continue; amdgpu_bo_get_memory(bo_va->base.bo, vram_mem, - gtt_mem, cpu_mem); +gtt_mem, cpu_mem, doorbell_mem); } list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status) { if (!bo_va->base.bo) c
[PATCH v2 1/8] drm/amdgpu: add UAPI for allocating doorbell memory
From: Alex Deucher This patch adds flags for a new gem domain AMDGPU_GEM_DOMAIN_DOORBELL in the UAPI layer. V2: Drop 'memory' from description (Christian) Cc: Alex Deucher Cc: Christian Koenig Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 4038abe8505a..cc5d551abda5 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -94,6 +94,9 @@ extern "C" { * * %AMDGPU_GEM_DOMAIN_OA Ordered append, used by 3D or Compute engines * for appending data. + * + * %AMDGPU_GEM_DOMAIN_DOORBELL Doorbell. It is an MMIO region for + * signalling user mode queues. */ #define AMDGPU_GEM_DOMAIN_CPU 0x1 #define AMDGPU_GEM_DOMAIN_GTT 0x2 @@ -101,12 +104,14 @@ extern "C" { #define AMDGPU_GEM_DOMAIN_GDS 0x8 #define AMDGPU_GEM_DOMAIN_GWS 0x10 #define AMDGPU_GEM_DOMAIN_OA 0x20 +#define AMDGPU_GEM_DOMAIN_DOORBELL 0x40 #define AMDGPU_GEM_DOMAIN_MASK (AMDGPU_GEM_DOMAIN_CPU | \ AMDGPU_GEM_DOMAIN_GTT | \ AMDGPU_GEM_DOMAIN_VRAM | \ AMDGPU_GEM_DOMAIN_GDS | \ AMDGPU_GEM_DOMAIN_GWS | \ -AMDGPU_GEM_DOMAIN_OA) +AMDGPU_GEM_DOMAIN_OA | \ +AMDGPU_GEM_DOMAIN_DOORBELL) /* Flag that CPU access will be required for the case of VRAM domain */ #define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED (1 << 0) -- 2.34.1
[PATCH v2 5/8] drm/amdgpu: accommodate DOMAIN/PL_DOORBELL
From: Alex Deucher This patch adds changes: - to accommodate the new GEM domain DOORBELL - to accommodate the new TTM PL DOORBELL to manage doorbell allocations as GEM Objects. V2: Addressed reviwe comments from Christian - drop the doorbell changes for pinning/unpinning - drop the doorbell changes for dma-buf map - drop the doorbell changes for sgt - no need to handle TTM_PL_FLAG_CONTIGUOUS for doorbell Signed-off-by: Alex Deucher Signed-off-by: Shashank Sharma --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 11 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c| 11 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h| 1 + 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index b48c9fd60c43..ff9979fecfd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -147,6 +147,14 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) c++; } + if (domain & AMDGPU_GEM_DOMAIN_DOORBELL) { + places[c].fpfn = 0; + places[c].lpfn = 0; + places[c].mem_type = AMDGPU_PL_DOORBELL; + places[c].flags = 0; + c++; + } + if (domain & AMDGPU_GEM_DOMAIN_GTT) { places[c].fpfn = 0; places[c].lpfn = 0; @@ -466,7 +474,7 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev, goto fail; } - /* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU */ + /* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_DOMAIN_DOORBELL */ return true; fail: @@ -1014,6 +1022,7 @@ void amdgpu_bo_unpin(struct amdgpu_bo *bo) } else if (bo->tbo.resource->mem_type == TTM_PL_TT) { atomic64_sub(amdgpu_bo_size(bo), &adev->gart_pin_size); } + } static const char *amdgpu_vram_names[] = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 0e8f580769ab..e9dc24191fc8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -128,6 +128,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, case AMDGPU_PL_GDS: case AMDGPU_PL_GWS: case AMDGPU_PL_OA: + case AMDGPU_PL_DOORBELL: placement->num_placement = 0; placement->num_busy_placement = 0; return; @@ -500,9 +501,11 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, if (old_mem->mem_type == AMDGPU_PL_GDS || old_mem->mem_type == AMDGPU_PL_GWS || old_mem->mem_type == AMDGPU_PL_OA || + old_mem->mem_type == AMDGPU_PL_DOORBELL || new_mem->mem_type == AMDGPU_PL_GDS || new_mem->mem_type == AMDGPU_PL_GWS || - new_mem->mem_type == AMDGPU_PL_OA) { + new_mem->mem_type == AMDGPU_PL_OA || + new_mem->mem_type == AMDGPU_PL_DOORBELL) { /* Nothing to save here */ ttm_bo_move_null(bo, new_mem); goto out; @@ -586,6 +589,11 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev, mem->bus.offset += adev->gmc.vram_aper_base; mem->bus.is_iomem = true; break; + case AMDGPU_PL_DOORBELL: + mem->bus.offset = mem->start << PAGE_SHIFT; + mem->bus.offset += adev->doorbell.doorbell_aper_base; + mem->bus.is_iomem = true; + break; default: return -EINVAL; } @@ -1267,6 +1275,7 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem) flags |= AMDGPU_PTE_VALID; if (mem && (mem->mem_type == TTM_PL_TT || + mem->mem_type == AMDGPU_PL_DOORBELL || mem->mem_type == AMDGPU_PL_PREEMPT)) { flags |= AMDGPU_PTE_SYSTEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 967b265dbfa1..9cf5d8419965 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -33,6 +33,7 @@ #define AMDGPU_PL_GWS (TTM_PL_PRIV + 1) #define AMDGPU_PL_OA (TTM_PL_PRIV + 2) #define AMDGPU_PL_PREEMPT (TTM_PL_PRIV + 3) +#define AMDGPU_PL_DOORBELL (TTM_PL_PRIV + 4) #define AMDGPU_GTT_MAX_TRANSFER_SIZE 512 #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS2 -- 2.34.1
[PATCH v2 3/8] drm/amdgpu: rename gmc.aper_base/size
From: Alex Deucher This patch renames aper_base and aper_size parameters (in adev->gmc), to vram_aper_base and vram_aper_size, to differentiate it from the doorbell BAR. V2: rebase Cc: Alex Deucher Cc: Christian Koenig Signed-off-by: Alex Deucher Signed-off-by: Shashank Sharma --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 12 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 8 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 10 +- drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 10 +- drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c| 6 +++--- drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c| 12 ++-- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c| 10 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 10 +- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 4 ++-- 14 files changed, 49 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index f99d4873bf22..58689b2a2d1c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -438,7 +438,7 @@ void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev, mem_info->vram_width = adev->gmc.vram_width; pr_debug("Address base: %pap public 0x%llx private 0x%llx\n", - &adev->gmc.aper_base, + &adev->gmc.vram_aper_base, mem_info->local_mem_size_public, mem_info->local_mem_size_private); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 0b6a394e109b..45588b7919fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3961,7 +3961,7 @@ static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev) /* Memory manager related */ if (!adev->gmc.xgmi.connected_to_cpu) { arch_phys_wc_del(adev->gmc.vram_mtrr); - arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size); + arch_io_free_memtype_wc(adev->gmc.vram_aper_base, adev->gmc.vram_aper_size); } } @@ -5562,14 +5562,14 @@ bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, uint64_t address_mask = peer_adev->dev->dma_mask ? ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1); resource_size_t aper_limit = - adev->gmc.aper_base + adev->gmc.aper_size - 1; + adev->gmc.vram_aper_base + adev->gmc.vram_aper_size - 1; bool p2p_access = !adev->gmc.xgmi.connected_to_cpu && !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0); return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size && adev->gmc.real_vram_size == adev->gmc.visible_vram_size && - !(adev->gmc.aper_base & address_mask || + !(adev->gmc.vram_aper_base & address_mask || aper_limit & address_mask)); #else return false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 02a4c93673ce..c7e64e234de6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -775,7 +775,7 @@ uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo) */ uint64_t amdgpu_gmc_vram_cpu_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo) { - return amdgpu_bo_gpu_offset(bo) - adev->gmc.vram_start + adev->gmc.aper_base; + return amdgpu_bo_gpu_offset(bo) - adev->gmc.vram_start + adev->gmc.vram_aper_base; } int amdgpu_gmc_vram_checking(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 0305b660cd17..bb7076ecbf01 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -167,8 +167,8 @@ struct amdgpu_gmc { * gart/vram_start/end field as the later is from * GPU's view and aper_base is from CPU's view. */ - resource_size_t aper_size; - resource_size_t aper_base; + resource_size_t vram_aper_size; + resource_size_t vram_aper_base; /* for some chips with <= 32MB we need to lie * about vram size near mc fb location */ u64 mc_vram_size; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 25a68de0..b48c9fd60c43 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/
[PATCH v2 7/8] drm/amdgpu: create doorbell kernel object
From: Shashank Sharma This patch does the following: - Initializes TTM range management for domain DOORBELL. - Introduces a kernel bo for doorbell management in form of mman.doorbell_kernel_bo. This bo holds the kernel doorbell space now. - Removes ioremapping of doorbell-kernel memory, as its not required now. V2: - Addressed review comments from Christian: - do not use kernel_create_at(0), use kernel_create() instead. - do not use ttm_resource_manager, use range_manager instead. - do not ioremap doorbell, TTM will do that. - Split one big patch into 2 Cc: Alex Deucher Cc: Christian Koenig Signed-off-by: Alex Deucher Signed-off-by: Shashank Sharma --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 22 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 7 +++ 2 files changed, 29 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index e9dc24191fc8..086e83c17c0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1879,12 +1879,32 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) return r; } + r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_DOORBELL, adev->doorbell.doorbell_aper_size); + if (r) { + DRM_ERROR("Failed initializing oa heap.\n"); + return r; + } + if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &adev->mman.sdma_access_bo, NULL, &adev->mman.sdma_access_ptr)) DRM_WARN("Debug VRAM access will use slowpath MM access\n"); + /* Create a doorbell BO for kernel usages */ + r = amdgpu_bo_create_kernel(adev, + adev->mman.doorbell_kernel_bo_size, + PAGE_SIZE, + AMDGPU_GEM_DOMAIN_DOORBELL, + &adev->mman.doorbell_kernel_bo, + &adev->mman.doorbell_gpu_addr, + (void **)&adev->mman.doorbell_cpu_addr); + + if (r) { + DRM_ERROR("Failed to create doorbell BO, err=%d\n", r); + return r; + } + return 0; } @@ -1908,6 +1928,8 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) NULL, NULL); amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL, &adev->mman.sdma_access_ptr); + amdgpu_bo_free_kernel(&adev->mman.doorbell_kernel_bo, + NULL, (void **)&adev->mman.doorbell_cpu_addr); amdgpu_ttm_fw_reserve_vram_fini(adev); amdgpu_ttm_drv_reserve_vram_fini(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 9cf5d8419965..50748ff1dd3c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -97,6 +97,13 @@ struct amdgpu_mman { /* PAGE_SIZE'd BO for process memory r/w over SDMA. */ struct amdgpu_bo*sdma_access_bo; void*sdma_access_ptr; + + /* doorbells reserved for the kernel driver */ + u32 num_kernel_doorbells; /* Number of doorbells actually reserved for kernel */ + uint64_tdoorbell_kernel_bo_size; + uint64_tdoorbell_gpu_addr; + struct amdgpu_bo*doorbell_kernel_bo; + u32 *doorbell_cpu_addr; }; struct amdgpu_copy_mem { -- 2.34.1
[PATCH v2 8/8] drm/amdgpu: start using kernel doorbell bo
From: Shashank Sharma This patch does the following: - Adds new variables like mman.doorbell_bo_size/gpu_addr/cpu_addr. The cpu_addr ptr will be used now for doorbell read/write from doorbell BAR. - Adjusts the existing code to use kernel doorbell BO's size and its cpu_address. Cc: Alex Deucher Cc: Christian Koenig Signed-off-by: Alex Deucher Signed-off-by: Shashank Sharma --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 5 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 33 +--- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 1 - 3 files changed, 16 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 0493c64e9d0a..87f486f522ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -109,11 +109,10 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, *aperture_base = adev->doorbell.doorbell_aper_base; *aperture_size = 0; *start_offset = 0; - } else if (adev->doorbell.doorbell_aper_size > adev->doorbell.num_doorbells * - sizeof(u32)) { + } else if (adev->doorbell.doorbell_aper_size > adev->mman.doorbell_kernel_bo_size) { *aperture_base = adev->doorbell.doorbell_aper_base; *aperture_size = adev->doorbell.doorbell_aper_size; - *start_offset = adev->doorbell.num_doorbells * sizeof(u32); + *start_offset = adev->mman.doorbell_kernel_bo_size; } else { *aperture_base = 0; *aperture_size = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 43c1b67c2778..fde199434579 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -596,8 +596,8 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index) if (amdgpu_device_skip_hw_access(adev)) return 0; - if (index < adev->doorbell.num_doorbells) { - return readl(adev->mman.doorbell_aper_base_kaddr + index); + if (index < adev->mman.num_kernel_doorbells) { + return readl(adev->mman.doorbell_cpu_addr + index); } else { DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); return 0; @@ -619,8 +619,8 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v) if (amdgpu_device_skip_hw_access(adev)) return; - if (index < adev->doorbell.num_doorbells) { - writel(v, adev->mman.doorbell_aper_base_kaddr + index); + if (index < adev->mman.num_kernel_doorbells) { + writel(v, adev->mman.doorbell_cpu_addr + index); } else { DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); } @@ -640,8 +640,8 @@ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index) if (amdgpu_device_skip_hw_access(adev)) return 0; - if (index < adev->doorbell.num_doorbells) { - return atomic64_read((atomic64_t *)(adev->mman.doorbell_aper_base_kaddr + index)); + if (index < adev->mman.num_kernel_doorbells) { + return atomic64_read((atomic64_t *)(adev->mman.doorbell_cpu_addr + index)); } else { DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); return 0; @@ -663,8 +663,8 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v) if (amdgpu_device_skip_hw_access(adev)) return; - if (index < adev->doorbell.num_doorbells) { - atomic64_set((atomic64_t *)(adev->mman.doorbell_aper_base_kaddr + index), v); + if (index < adev->mman.num_kernel_doorbells) { + atomic64_set((atomic64_t *)(adev->mman.doorbell_cpu_addr + index), v); } else { DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); } @@ -1037,7 +1037,7 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) if (adev->asic_type < CHIP_BONAIRE) { adev->doorbell.doorbell_aper_base = 0; adev->doorbell.doorbell_aper_size = 0; - adev->doorbell.num_doorbells = 0; + adev->mman.num_kernel_doorbells = 0; adev->mman.doorbell_aper_base_kaddr = NULL; return 0; } @@ -1052,13 +1052,13 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) adev->doorbell.doorbell_aper_size = pci_resource_len(adev->pdev, 2); if (adev->enable_mes) { - adev->doorbell.num_doorbells = + adev->mman.num_kernel_doorbells = adev->doorbell.doorbell_aper_size / sizeof(u32); } else { -
[PATCH v2 4/8] drm/amdgpu: rename doorbell variables
From: Alex Deucher This patch: - renames the adev->doorbell.base to adev->doorbell.doorbell_aper_base - renames the adev->doorbell.size to adev->doorbell.doorbell_aper_size - moves the adev->doorbell.ptr to adev->mman.doorbell_aper_base_kaddr rest of the changes are just to accommodate these variable name changes. V2: Mered 2 patches into this one doorbell clean-up patch. Cc: Alex Deucher Cc: Christian Koenig Signed-off-by: Alex Deucher Signed-off-by: Shashank Sharma --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 8 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 34 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 5 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 2 +- drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c | 4 +-- drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c | 4 +-- drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c | 4 +-- drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c | 4 +-- drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 4 +-- drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c | 4 +-- 12 files changed, 38 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 58689b2a2d1c..0493c64e9d0a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -106,13 +106,13 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, * not initialized as AMDGPU manages the whole * doorbell space. */ - *aperture_base = adev->doorbell.base; + *aperture_base = adev->doorbell.doorbell_aper_base; *aperture_size = 0; *start_offset = 0; - } else if (adev->doorbell.size > adev->doorbell.num_doorbells * + } else if (adev->doorbell.doorbell_aper_size > adev->doorbell.num_doorbells * sizeof(u32)) { - *aperture_base = adev->doorbell.base; - *aperture_size = adev->doorbell.size; + *aperture_base = adev->doorbell.doorbell_aper_base; + *aperture_size = adev->doorbell.doorbell_aper_size; *start_offset = adev->doorbell.num_doorbells * sizeof(u32); } else { *aperture_base = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 45588b7919fe..43c1b67c2778 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -597,7 +597,7 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index) return 0; if (index < adev->doorbell.num_doorbells) { - return readl(adev->doorbell.ptr + index); + return readl(adev->mman.doorbell_aper_base_kaddr + index); } else { DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); return 0; @@ -620,7 +620,7 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v) return; if (index < adev->doorbell.num_doorbells) { - writel(v, adev->doorbell.ptr + index); + writel(v, adev->mman.doorbell_aper_base_kaddr + index); } else { DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); } @@ -641,7 +641,7 @@ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index) return 0; if (index < adev->doorbell.num_doorbells) { - return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index)); + return atomic64_read((atomic64_t *)(adev->mman.doorbell_aper_base_kaddr + index)); } else { DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); return 0; @@ -664,7 +664,7 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v) return; if (index < adev->doorbell.num_doorbells) { - atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v); + atomic64_set((atomic64_t *)(adev->mman.doorbell_aper_base_kaddr + index), v); } else { DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); } @@ -1035,10 +1035,10 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) /* No doorbell on SI hardware generation */ if (adev->asic_type < CHIP_BONAIRE) { - adev->doorbell.base = 0; - adev->doorbell.size = 0; + adev->doorbell.doorbell_aper_base = 0; + adev->doorbell.doorbell_aper_size = 0; adev->doorbell.num_doorbells = 0; - adev->doorbell.ptr = NULL; + adev->mman.doorbell_aper_base_kaddr = NULL; ret
Re: [PATCH 3/3] drm/connector: Deprecate split for BT.2020 in drm_colorspace enum
On 2/14/23 10:49, Sebastian Wick wrote: > On Fri, Feb 3, 2023 at 5:00 PM Ville Syrjälä > wrote: >> >> On Fri, Feb 03, 2023 at 10:24:52AM -0500, Harry Wentland wrote: >>> >>> >>> On 2/3/23 10:19, Ville Syrjälä wrote: On Fri, Feb 03, 2023 at 09:39:42AM -0500, Harry Wentland wrote: > > > On 2/3/23 07:59, Sebastian Wick wrote: >> On Fri, Feb 3, 2023 at 11:40 AM Ville Syrjälä >> wrote: >>> >>> On Fri, Feb 03, 2023 at 02:07:44AM +, Joshua Ashton wrote: Userspace has no way of controlling or knowing the pixel encoding currently, so there is no way for it to ever get the right values here. >>> >>> That applies to a lot of the other values as well (they are >>> explicitly RGB or YCC). The idea was that this property sets the >>> infoframe/MSA/SDP value exactly, and other properties should be >>> added to for use userspace to control the pixel encoding/colorspace >>> conversion(if desired, or userspace just makes sure to >>> directly feed in correct kind of data). >> >> I'm all for getting userspace control over pixel encoding but even >> then the kernel always knows which pixel encoding is selected and >> which InfoFrame has to be sent. Is there a reason why userspace would >> want to control the variant explicitly to the wrong value? >> > > I've asked this before but haven't seen an answer: Is there an existing > upstream userspace project that makes use of this property (other than > what Joshua is working on in gamescope right now)? That would help us > understand the intent better. The intent was to control the infoframe colorimetry bits, nothing more. No idea what real userspace there was, if any. > > I don't think giving userspace explicit control over the exact infoframe > values is the right thing to do. Only userspace knows what kind of data it's stuffing into the pixels (and/or how it configures the csc units/etc.) to generate them. >>> >>> Yes, but userspace doesn't control or know whether we drive >>> RGB or YCbCr on the wire. In fact, in some cases our driver >>> needs to fallback to YCbCr420 for bandwidth reasons. There >>> is currently no way for userspace to know that and I don't >>> think it makes sense. >> >> People want that control as well for whatever reason. We've >> been asked to allow YCbCr 4:4:4 output many times. > > I don't really think it's a question of if we want it but rather how > we get there. Harry is completely right that if we would make the > subsampling controllable by user space instead of the kernel handling > it magically, user space which does not adapt to the new control won't > be able to light up some modes which worked before. > Thanks for continuing this discussion and touching on the model of how we get to where we want to go. > This is obviously a problem and not one we can easily fix. We would > need a new cap for user space to signal "I know that I can control > bpc, subsampling and compression to lower the bandwidth and light up > modes which otherwise fail". That cap would also remove all the > properties which require kernel magic to work (that's also what I > proposed for my KMS color pipeline API). > > We all want to expose more of the scanout capability and give user > space more control but I don't think an incremental approach works > here and we would all do better if we accept that the current API > requires kernel magic to work and has a few implicit assumptions baked > in. > > With all that being said, I think the right decision here is to > > 1. Ignore subsampling for now > 2. Let the kernel select YCC or RGB on the cable > 3. Let the kernel figure out the conversion between RGB and YCC based > on the color space selected > 4. Let the kernel send the correct infoframe based on the selected > color space and cable encoding > 5. Only expose color spaces for which the kernel can do the conversion > and send the infoframe I agree. We don't want to break or change existing behavior (that is used by userspace) and this will get us far without breaking things. > 6. Work on the new API which is hidden behind a cap > I assume you mean something like https://gitlab.freedesktop.org/pq/color-and-hdr/-/issues/11 Above you say that you don't think an incremental approach works here. Can you elaborate? >From what I've seen recently I am inclined to favor an incremental approach more. The reason is that any API, or portion thereof, is useless unless it's enabled full stack. When it isn't it becomes dead code quickly, or never really works because we overlooked one thing. The colorspace debacle shows how even something as simple as extra enum values in KMS APIs shouldn't be added unless someone in a canonical upstream project actually uses them. I would argue that such a canonical upstream project actually has to be a production environment and not something like Weston.
[linux-next:master] BUILD REGRESSION 3ebb0ac55efaf1d0fb1b106f852c114e5021f7eb
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git master branch HEAD: 3ebb0ac55efaf1d0fb1b106f852c114e5021f7eb Add linux-next specific files for 20230214 Error/Warning reports: https://lore.kernel.org/oe-kbuild-all/202301300743.bp7dpazv-...@intel.com https://lore.kernel.org/oe-kbuild-all/202301302110.metnwkbd-...@intel.com https://lore.kernel.org/oe-kbuild-all/202302061911.c7xvhx9v-...@intel.com https://lore.kernel.org/oe-kbuild-all/202302062224.byzetxh1-...@intel.com https://lore.kernel.org/oe-kbuild-all/202302092211.54eydhyh-...@intel.com https://lore.kernel.org/oe-kbuild-all/202302111601.jty4lkra-...@intel.com https://lore.kernel.org/oe-kbuild-all/202302112104.g75cghzd-...@intel.com https://lore.kernel.org/oe-kbuild-all/202302142145.in5wznpf-...@intel.com Error/Warning: (recently discovered and may have been fixed) Documentation/riscv/uabi.rst:24: WARNING: Enumerated list ends without a blank line; unexpected unindent. Documentation/sphinx/templates/kernel-toc.html: 1:36 Invalid token: #} ERROR: modpost: "devm_platform_ioremap_resource" [drivers/dma/fsl-edma.ko] undefined! ERROR: modpost: "devm_platform_ioremap_resource" [drivers/dma/idma64.ko] undefined! arch/mips/kernel/vpe.c:643:41: error: 'struct module' has no member named 'mod_mem' drivers/cxl/core/region.c:2628:6: warning: variable 'rc' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized] drivers/gpu/drm/amd/amdgpu/../display/dc/dcn30/dcn30_optc.c:294:6: warning: no previous prototype for 'optc3_wait_drr_doublebuffer_pending_clear' [-Wmissing-prototypes] drivers/gpu/drm/amd/amdgpu/../display/dc/dcn31/dcn31_hubbub.c:1011:6: warning: no previous prototype for 'hubbub31_init' [-Wmissing-prototypes] drivers/gpu/drm/amd/amdgpu/../display/dc/dcn32/dcn32_hubbub.c:948:6: warning: no previous prototype for 'hubbub32_init' [-Wmissing-prototypes] drivers/gpu/drm/amd/amdgpu/../display/dc/dcn32/dcn32_hubp.c:158:6: warning: no previous prototype for 'hubp32_init' [-Wmissing-prototypes] drivers/gpu/drm/amd/amdgpu/../display/dc/dcn32/dcn32_resource_helpers.c:62:18: warning: variable 'cursor_bpp' set but not used [-Wunused-but-set-variable] drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dp_capability.c:1296:32: warning: variable 'result_write_min_hblank' set but not used [-Wunused-but-set-variable] drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dp_capability.c:280:42: warning: variable 'ds_port' set but not used [-Wunused-but-set-variable] drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dp_training.c:1586:38: warning: variable 'result' set but not used [-Wunused-but-set-variable] ftrace-ops.c:(.init.text+0x2c3): undefined reference to `__udivdi3' Unverified Error/Warning (likely false positive, please contact us if interested): drivers/clk/ingenic/jz4760-cgu.c:80 jz4760_cgu_calc_m_n_od() error: uninitialized symbol 'od'. drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c:415 bnxt_rdma_aux_device_init() warn: possible memory leak of 'edev' drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8188e.c:1678 rtl8188e_handle_ra_tx_report2() warn: ignoring unreachable code. drivers/pci/setup-bus.c:1916:21-24: ERROR: invalid reference to the index variable of the iterator on line 1890 drivers/usb/gadget/composite.c:2082:33: sparse: sparse: restricted __le16 degrades to integer drivers/usb/host/xhci-plat.c:371 xhci_generic_plat_probe() error: we previously assumed 'sysdev' could be null (see line 361) Error/Warning ids grouped by kconfigs: gcc_recent_errors |-- alpha-allyesconfig | |-- drivers-gpu-drm-amd-amdgpu-..-display-dc-link-protocols-link_dp_capability.c:warning:variable-ds_port-set-but-not-used | |-- drivers-gpu-drm-amd-amdgpu-..-display-dc-link-protocols-link_dp_capability.c:warning:variable-result_write_min_hblank-set-but-not-used | `-- drivers-gpu-drm-amd-amdgpu-..-display-dc-link-protocols-link_dp_training.c:warning:variable-result-set-but-not-used |-- alpha-randconfig-c041-20230212 | |-- drivers-gpu-drm-amd-amdgpu-..-display-dc-link-protocols-link_dp_capability.c:warning:variable-ds_port-set-but-not-used | `-- drivers-gpu-drm-amd-amdgpu-..-display-dc-link-protocols-link_dp_capability.c:warning:variable-result_write_min_hblank-set-but-not-used |-- alpha-randconfig-s042-20230212 | `-- drivers-usb-gadget-composite.c:sparse:sparse:restricted-__le16-degrades-to-integer |-- arc-allyesconfig | |-- drivers-gpu-drm-amd-amdgpu-..-display-dc-link-protocols-link_dp_capability.c:warning:variable-ds_port-set-but-not-used | |-- drivers-gpu-drm-amd-amdgpu-..-display-dc-link-protocols-link_dp_capability.c:warning:variable-result_write_min_hblank-set-but-not-used | `-- drivers-gpu-drm-amd-amdgpu-..-display-dc-link-protocols-link_dp_training.c:warning:varia
Re: [PATCH 01/10] drm/amd/display: Turn global functions into static
On Mon, Feb 13, 2023 at 3:49 PM Arthur Grillo wrote: > > Turn global functions that are only used locally into static ones. This > reduces the number of -Wmissing-prototypes warnings. > > Signed-off-by: Arthur Grillo The first hunk was already fixed, but I applied the second hunk. Alex > --- > drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c | 2 +- > drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c | 2 +- > 2 files changed, 2 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c > b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c > index 8c368bcc8e7e..a737782b2840 100644 > --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c > +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c > @@ -87,7 +87,7 @@ static int dcn315_get_active_display_cnt_wa( > return display_count; > } > > -bool should_disable_otg(struct pipe_ctx *pipe) > +static bool should_disable_otg(struct pipe_ctx *pipe) > { > bool ret = true; > > diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c > b/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c > index 27dc8c9955f4..3c7cb3dc046b 100644 > --- a/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c > +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c > @@ -37,7 +37,7 @@ > #include "soc15_hw_ip.h" > #include "ivsrcid/dcn/irqsrcs_dcn_1_0.h" > > -enum dc_irq_source to_dal_irq_source_dcn201( > +static enum dc_irq_source to_dal_irq_source_dcn201( > struct irq_service *irq_service, > uint32_t src_id, > uint32_t ext_id) > -- > 2.39.1 >
Re: [PATCH 0/6] Trivial code cleanup around color resources
Applied. Thanks! Alex On Tue, Feb 14, 2023 at 7:48 AM Christian König wrote: > > Am 14.02.23 um 13:14 schrieb Melissa Wen: > > Hi, > > > > Sorry for the noise, but while I've been working on wiring 3D LUT > > support to AMD display driver [1] I found some annoying code style > > issues in the shared-code part. So I'm just sending what I've been > > cleaning to better examine the code. > > > > Most seem trivial, except the last one "remove unused > > _calculate_degamma_curve" since this could just be a matter of missing > > parts. If so, happy to remove the patch and include a comment describing > > the situation (or the potential usage of it). > > The display stack is not my field of expertise, but those cleanups are > so obvious that I think I can safely give my Reviewed-by: Christian > König for the entire series. > > Thanks, > Christian. > > > > > Thanks, > > > > Melissa > > > > [1] > > https://lore.kernel.org/dri-devel/20230109143846.1966301-1-m...@igalia.com/ > > > > Melissa Wen (6): > >drm/amd/display: ident braces in dcn30_acquire_post_bldn_3dlut > > correctly > >drm/amd/display: clean code-style issues in dcn30_set_mpc_shaper_3dlut > >drm/amd/display: camel case cleanup in color_gamma file > >drm/amd/display: unset initial value for tf since it's never used > >drm/amd/display: remove unused func declaration from resource headers > >drm/amd/display: remove unused _calculate_degamma_curve function > > > > .../drm/amd/display/dc/dcn30/dcn30_hwseq.c| 37 ++--- > > .../drm/amd/display/dc/dcn30/dcn30_resource.c | 2 +- > > drivers/gpu/drm/amd/display/dc/inc/resource.h | 4 - > > .../amd/display/modules/color/color_gamma.c | 140 -- > > .../amd/display/modules/color/color_gamma.h | 3 - > > 5 files changed, 48 insertions(+), 138 deletions(-) > > >
Re: [PATCH 02/10] drm/amd/display: Add function prototypes to headers
Applied. Thanks! On Mon, Feb 13, 2023 at 3:50 PM Arthur Grillo wrote: > > Add function prototypes to headers to reduce the number of > -Wmissing-prototypes warnings. > > Signed-off-by: Arthur Grillo > --- > drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.h | 2 ++ > drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.h | 2 ++ > drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h | 2 ++ > 3 files changed, 6 insertions(+) > > diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.h > b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.h > index e015e5a6c866..89d6208287b5 100644 > --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.h > +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.h > @@ -133,6 +133,8 @@ > int hubbub31_init_dchub_sys_ctx(struct hubbub *hubbub, > struct dcn_hubbub_phys_addr_config *pa_config); > > +void hubbub31_init(struct hubbub *hubbub); > + > void hubbub31_construct(struct dcn20_hubbub *hubbub3, > struct dc_context *ctx, > const struct dcn_hubbub_registers *hubbub_regs, > diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.h > b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.h > index bdc146890fca..b20eb04724bb 100644 > --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.h > +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.h > @@ -204,6 +204,8 @@ void hubbub32_force_usr_retraining_allow(struct hubbub > *hubbub, bool allow); > > void hubbub32_force_wm_propagate_to_pipes(struct hubbub *hubbub); > > +void hubbub32_init(struct hubbub *hubbub); > + > void dcn32_program_det_size(struct hubbub *hubbub, int hubp_inst, unsigned > int det_buffer_size_in_kbyte); > > void hubbub32_construct(struct dcn20_hubbub *hubbub2, > diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h > b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h > index 56ef71151536..4cdbf63c952b 100644 > --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h > +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h > @@ -61,6 +61,8 @@ void hubp32_phantom_hubp_post_enable(struct hubp *hubp); > void hubp32_cursor_set_attributes(struct hubp *hubp, > const struct dc_cursor_attributes *attr); > > +void hubp32_init(struct hubp *hubp); > + > bool hubp32_construct( > struct dcn20_hubp *hubp2, > struct dc_context *ctx, > -- > 2.39.1 >
Re: [PATCH 03/10] drm/amd/amdgpu: Add function prototypes to headers
Applied. Thanks! On Mon, Feb 13, 2023 at 3:50 PM Arthur Grillo wrote: > > Add function prototypes to headers to reduce the number of > -Wmissing-prototypes warnings. > > Signed-off-by: Arthur Grillo > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h > index bee93ab4298f..b03321e7d2d8 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h > @@ -538,6 +538,7 @@ struct amdgpu_firmware { > > void amdgpu_ucode_print_mc_hdr(const struct common_firmware_header *hdr); > void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr); > +void amdgpu_ucode_print_imu_hdr(const struct common_firmware_header *hdr); > void amdgpu_ucode_print_gfx_hdr(const struct common_firmware_header *hdr); > void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr); > void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr); > -- > 2.39.1 >
Re: [PATCH 04/10] drm/amd/display: Add previously missing includes
Applied. Thanks! On Mon, Feb 13, 2023 at 3:50 PM Arthur Grillo wrote: > > Add includes that were previously missing to reduce the number of > -Wmissing-prototypes warnings. > > Signed-off-by: Arthur Grillo > --- > drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c | 1 + > drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.c | 1 + > 2 files changed, 2 insertions(+) > > diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c > b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c > index 330d7cbc7398..3069af3684c6 100644 > --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c > +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c > @@ -30,6 +30,7 @@ > #include "dcn30/dcn30_hwseq.h" > #include "dcn31/dcn31_hwseq.h" > #include "dcn32_hwseq.h" > +#include "dcn32_init.h" > > static const struct hw_sequencer_funcs dcn32_funcs = { > .program_gamut_remap = dcn10_program_gamut_remap, > diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.c > b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.c > index 04838a31e513..257f4fc065a5 100644 > --- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.c > +++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.c > @@ -24,6 +24,7 @@ > */ > #include "dc_link.h" > #include "link_dp_trace.h" > +#include "link.h" > > void dp_trace_init(struct dc_link *link) > { > -- > 2.39.1 >
Re: [PATCH 05/10] drm/amd/display: Fix excess arguments on kernel-doc
Applied. Thanks! On Mon, Feb 13, 2023 at 3:50 PM Arthur Grillo wrote: > > Remove arguments present on kernel-doc that are not present on the > function declaration and add the new ones if present. > > Signed-off-by: Arthur Grillo > --- > drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c| 15 +++ > drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 2 +- > .../gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c | 2 +- > 3 files changed, 9 insertions(+), 10 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c > b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c > index 3d36329be384..40e6b22daa22 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c > @@ -273,8 +273,6 @@ static void sdma_v6_0_ring_emit_ib(struct amdgpu_ring > *ring, > * sdma_v6_0_ring_emit_mem_sync - flush the IB by graphics cache rinse > * > * @ring: amdgpu ring pointer > - * @job: job to retrieve vmid from > - * @ib: IB object to schedule > * > * flush the IB by graphics cache rinse. > */ > @@ -326,7 +324,9 @@ static void sdma_v6_0_ring_emit_hdp_flush(struct > amdgpu_ring *ring) > * sdma_v6_0_ring_emit_fence - emit a fence on the DMA ring > * > * @ring: amdgpu ring pointer > - * @fence: amdgpu fence object > + * @addr: address > + * @seq: fence seq number > + * @flags: fence flags > * > * Add a DMA fence packet to the ring to write > * the fence seq number and DMA trap packet to generate > @@ -1060,10 +1060,9 @@ static void sdma_v6_0_vm_copy_pte(struct amdgpu_ib *ib, > * > * @ib: indirect buffer to fill with commands > * @pe: addr of the page entry > - * @addr: dst addr to write into pe > + * @value: dst addr to write into pe > * @count: number of page entries to update > * @incr: increase next addr by incr bytes > - * @flags: access flags > * > * Update PTEs by writing them manually using sDMA. > */ > @@ -1167,7 +1166,6 @@ static void sdma_v6_0_ring_emit_pipeline_sync(struct > amdgpu_ring *ring) > * sdma_v6_0_ring_emit_vm_flush - vm flush using sDMA > * > * @ring: amdgpu_ring pointer > - * @vm: amdgpu_vm pointer > * > * Update the page table base and flush the VM TLB > * using sDMA. > @@ -1591,10 +1589,11 @@ static void sdma_v6_0_set_irq_funcs(struct > amdgpu_device *adev) > /** > * sdma_v6_0_emit_copy_buffer - copy buffer using the sDMA engine > * > - * @ring: amdgpu_ring structure holding ring information > + * @ib: indirect buffer to fill with commands > * @src_offset: src GPU address > * @dst_offset: dst GPU address > * @byte_count: number of bytes to xfer > + * @tmz: if a secure copy should be used > * > * Copy GPU buffers using the DMA engine. > * Used by the amdgpu ttm implementation to move pages if > @@ -1620,7 +1619,7 @@ static void sdma_v6_0_emit_copy_buffer(struct amdgpu_ib > *ib, > /** > * sdma_v6_0_emit_fill_buffer - fill buffer using the sDMA engine > * > - * @ring: amdgpu_ring structure holding ring information > + * @ib: indirect buffer to fill > * @src_data: value to write to buffer > * @dst_offset: dst GPU address > * @byte_count: number of bytes to xfer > diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c > b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c > index 6ccf477d1c4d..c2092775ca88 100644 > --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c > +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c > @@ -698,7 +698,7 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc, > * > * @dc: [in] current dc state > * @context: [in] new dc state > - * @cmd: [in] DMUB cmd to be populated with SubVP info > + * @enable: [in] if true enables the pipes population > * > * This function loops through each pipe and populates the DMUB SubVP CMD > info > * based on the pipe (e.g. SubVP, VBLANK). > diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c > b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c > index f607a0e28f14..f62368da875d 100644 > --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c > +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c > @@ -581,7 +581,7 @@ static void dpp1_dscl_set_manual_ratio_init( > * dpp1_dscl_set_recout - Set the first pixel of RECOUT in the OTG active > area > * > * @dpp: DPP data struct > - * @recount: Rectangle information > + * @recout: Rectangle information > * > * This function sets the MPC RECOUT_START and RECOUT_SIZE registers based on > * the values specified in the recount parameter. > -- > 2.39.1 >
Re: [PATCH 09/10] drm/amd/display: Make variables declaration inside ifdef guard
Applied. Thanks! On Mon, Feb 13, 2023 at 3:50 PM Arthur Grillo wrote: > > Make variables declaration inside ifdef guard, as they are only used > inside the same ifdef guard. This remove some of the > -Wunused-but-set-variable warning. > > Signed-off-by: Arthur Grillo > --- > .../gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c| 4 > .../gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c | 4 > 2 files changed, 8 insertions(+) > > diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c > b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c > index ec351c8418cb..27f488405335 100644 > --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c > +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c > @@ -878,7 +878,9 @@ static bool CalculatePrefetchSchedule( > double DSTTotalPixelsAfterScaler; > double LineTime; > double dst_y_prefetch_equ; > +#ifdef __DML_VBA_DEBUG__ > double Tsw_oto; > +#endif > double prefetch_bw_oto; > double prefetch_bw_pr; > double Tvm_oto; > @@ -1060,7 +1062,9 @@ static bool CalculatePrefetchSchedule( > > min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, > PrefetchSourceLinesC) / max_vratio_pre); > Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / > LineTime, min_Lsw), 1) / 4; > +#ifdef __DML_VBA_DEBUG__ > Tsw_oto = Lsw_oto * LineTime; > +#endif > > > #ifdef __DML_VBA_DEBUG__ > diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c > b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c > index 950669f2c10d..0fd3889c2061 100644 > --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c > +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c > @@ -900,7 +900,9 @@ static bool CalculatePrefetchSchedule( > double DSTTotalPixelsAfterScaler; > double LineTime; > double dst_y_prefetch_equ; > +#ifdef __DML_VBA_DEBUG__ > double Tsw_oto; > +#endif > double prefetch_bw_oto; > double prefetch_bw_pr; > double Tvm_oto; > @@ -1082,7 +1084,9 @@ static bool CalculatePrefetchSchedule( > > min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, > PrefetchSourceLinesC) / max_vratio_pre); > Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / > LineTime, min_Lsw), 1) / 4; > +#ifdef __DML_VBA_DEBUG__ > Tsw_oto = Lsw_oto * LineTime; > +#endif > > > #ifdef __DML_VBA_DEBUG__ > -- > 2.39.1 >
Re: [PATCH v2 1/8] drm/amdgpu: add UAPI for allocating doorbell memory
Am 14.02.23 um 17:15 schrieb Shashank Sharma: From: Alex Deucher This patch adds flags for a new gem domain AMDGPU_GEM_DOMAIN_DOORBELL in the UAPI layer. V2: Drop 'memory' from description (Christian) Cc: Alex Deucher Cc: Christian Koenig Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 4038abe8505a..cc5d551abda5 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -94,6 +94,9 @@ extern "C" { * * %AMDGPU_GEM_DOMAIN_OA Ordered append, used by 3D or Compute engines * for appending data. + * + * %AMDGPU_GEM_DOMAIN_DOORBELL Doorbell. It is an MMIO region for + * signalling user mode queues. Maybe write "for signaling events to the firmware, used especially for user mode queues.". With or without that Reviewed-by: Christian König Christian. */ #define AMDGPU_GEM_DOMAIN_CPU 0x1 #define AMDGPU_GEM_DOMAIN_GTT 0x2 @@ -101,12 +104,14 @@ extern "C" { #define AMDGPU_GEM_DOMAIN_GDS 0x8 #define AMDGPU_GEM_DOMAIN_GWS 0x10 #define AMDGPU_GEM_DOMAIN_OA 0x20 +#define AMDGPU_GEM_DOMAIN_DOORBELL 0x40 #define AMDGPU_GEM_DOMAIN_MASK(AMDGPU_GEM_DOMAIN_CPU | \ AMDGPU_GEM_DOMAIN_GTT | \ AMDGPU_GEM_DOMAIN_VRAM | \ AMDGPU_GEM_DOMAIN_GDS | \ AMDGPU_GEM_DOMAIN_GWS | \ -AMDGPU_GEM_DOMAIN_OA) +AMDGPU_GEM_DOMAIN_OA | \ +AMDGPU_GEM_DOMAIN_DOORBELL) /* Flag that CPU access will be required for the case of VRAM domain */ #define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED (1 << 0)
Re: [PATCH v2 2/8] drm/amdgpu: replace aper_base_kaddr with vram_aper_base_kaddr
Am 14.02.23 um 17:15 schrieb Shashank Sharma: From: Alex Deucher To differentiate it from the doorbell BAR. Since we removed the manual ioremap() for the doorbell BAR today we don't really need that patch any more, don't we? On the other hand renaming the field still makes a lot of sense for better documenting what it's good for. Christian. V2: Added Christian's A-B Acked-by: Christian Koenig Cc: Alex Deucher Cc: Christian Koenig Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 10 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c| 14 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h| 2 +- drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 10 +- drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 10 +- 5 files changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 2f28a8c02f64..0b6a394e109b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -354,12 +354,12 @@ size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos, size_t count = 0; uint64_t last; - if (!adev->mman.aper_base_kaddr) + if (!adev->mman.vram_aper_base_kaddr) return 0; last = min(pos + size, adev->gmc.visible_vram_size); if (last > pos) { - addr = adev->mman.aper_base_kaddr + pos; + addr = adev->mman.vram_aper_base_kaddr + pos; count = last - pos; if (write) { @@ -3954,9 +3954,9 @@ static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev) iounmap(adev->rmmio); adev->rmmio = NULL; - if (adev->mman.aper_base_kaddr) - iounmap(adev->mman.aper_base_kaddr); - adev->mman.aper_base_kaddr = NULL; + if (adev->mman.vram_aper_base_kaddr) + iounmap(adev->mman.vram_aper_base_kaddr); + adev->mman.vram_aper_base_kaddr = NULL; /* Memory manager related */ if (!adev->gmc.xgmi.connected_to_cpu) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 55e0284b2bdd..73b831b47892 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -578,9 +578,9 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev, if ((mem->bus.offset + bus_size) > adev->gmc.visible_vram_size) return -EINVAL; - if (adev->mman.aper_base_kaddr && + if (adev->mman.vram_aper_base_kaddr && mem->placement & TTM_PL_FLAG_CONTIGUOUS) - mem->bus.addr = (u8 *)adev->mman.aper_base_kaddr + + mem->bus.addr = (u8 *)adev->mman.vram_aper_base_kaddr + mem->bus.offset; mem->bus.offset += adev->gmc.aper_base; @@ -1752,12 +1752,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) #ifdef CONFIG_64BIT #ifdef CONFIG_X86 if (adev->gmc.xgmi.connected_to_cpu) - adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base, + adev->mman.vram_aper_base_kaddr = ioremap_cache(adev->gmc.aper_base, adev->gmc.visible_vram_size); else #endif - adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base, + adev->mman.vram_aper_base_kaddr = ioremap_wc(adev->gmc.aper_base, adev->gmc.visible_vram_size); #endif @@ -1904,9 +1904,9 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) if (drm_dev_enter(adev_to_drm(adev), &idx)) { - if (adev->mman.aper_base_kaddr) - iounmap(adev->mman.aper_base_kaddr); - adev->mman.aper_base_kaddr = NULL; + if (adev->mman.vram_aper_base_kaddr) + iounmap(adev->mman.vram_aper_base_kaddr); + adev->mman.vram_aper_base_kaddr = NULL; drm_dev_exit(idx); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index e2cd5894afc9..929bc8abac28 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -50,7 +50,7 @@ struct amdgpu_gtt_mgr { struct amdgpu_mman { struct ttm_device bdev; boolinitialized; - void __iomem*aper_base_kaddr; + void __iomem*vram_aper_base_kaddr; /* buffer handling */ const struct amdgpu_buffer_funcs*buffer_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index bd3e3e23a939..f39d4f593a2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -611,10 +611,10 @@ static int psp_v11_0_memory_training(struct psp_context
Re: [PATCH v2 3/8] drm/amdgpu: rename gmc.aper_base/size
Am 14.02.23 um 17:15 schrieb Shashank Sharma: From: Alex Deucher This patch renames aper_base and aper_size parameters (in adev->gmc), to vram_aper_base and vram_aper_size, to differentiate it from the doorbell BAR. V2: rebase Cc: Alex Deucher Cc: Christian Koenig Signed-off-by: Alex Deucher Signed-off-by: Shashank Sharma Acked-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 12 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 8 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 10 +- drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 10 +- drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c| 6 +++--- drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c| 12 ++-- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c| 10 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 10 +- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 4 ++-- 14 files changed, 49 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index f99d4873bf22..58689b2a2d1c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -438,7 +438,7 @@ void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev, mem_info->vram_width = adev->gmc.vram_width; pr_debug("Address base: %pap public 0x%llx private 0x%llx\n", - &adev->gmc.aper_base, + &adev->gmc.vram_aper_base, mem_info->local_mem_size_public, mem_info->local_mem_size_private); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 0b6a394e109b..45588b7919fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3961,7 +3961,7 @@ static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev) /* Memory manager related */ if (!adev->gmc.xgmi.connected_to_cpu) { arch_phys_wc_del(adev->gmc.vram_mtrr); - arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size); + arch_io_free_memtype_wc(adev->gmc.vram_aper_base, adev->gmc.vram_aper_size); } } @@ -5562,14 +5562,14 @@ bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, uint64_t address_mask = peer_adev->dev->dma_mask ? ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1); resource_size_t aper_limit = - adev->gmc.aper_base + adev->gmc.aper_size - 1; + adev->gmc.vram_aper_base + adev->gmc.vram_aper_size - 1; bool p2p_access = !adev->gmc.xgmi.connected_to_cpu && !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0); return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size && adev->gmc.real_vram_size == adev->gmc.visible_vram_size && - !(adev->gmc.aper_base & address_mask || + !(adev->gmc.vram_aper_base & address_mask || aper_limit & address_mask)); #else return false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 02a4c93673ce..c7e64e234de6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -775,7 +775,7 @@ uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo) */ uint64_t amdgpu_gmc_vram_cpu_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo) { - return amdgpu_bo_gpu_offset(bo) - adev->gmc.vram_start + adev->gmc.aper_base; + return amdgpu_bo_gpu_offset(bo) - adev->gmc.vram_start + adev->gmc.vram_aper_base; } int amdgpu_gmc_vram_checking(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 0305b660cd17..bb7076ecbf01 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -167,8 +167,8 @@ struct amdgpu_gmc { * gart/vram_start/end field as the later is from * GPU's view and aper_base is from CPU's view. */ - resource_size_t aper_size; - resource_size_t aper_base; + resource_size_t vram_aper_size; + resource_size_t vram_aper_base; /* for some chips with <= 32MB we need to lie * about vram size near mc fb location */ u64 mc_vram_size; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 25a68de0..b48
Re: [PATCH v2 4/8] drm/amdgpu: rename doorbell variables
Am 14.02.23 um 17:15 schrieb Shashank Sharma: From: Alex Deucher This patch: - renames the adev->doorbell.base to adev->doorbell.doorbell_aper_base - renames the adev->doorbell.size to adev->doorbell.doorbell_aper_size - moves the adev->doorbell.ptr to adev->mman.doorbell_aper_base_kaddr rest of the changes are just to accommodate these variable name changes. V2: Mered 2 patches into this one doorbell clean-up patch. Cc: Alex Deucher Cc: Christian Koenig Signed-off-by: Alex Deucher Signed-off-by: Shashank Sharma --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 8 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 34 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 5 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 2 +- drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c | 4 +-- drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c | 4 +-- drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c | 4 +-- drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c | 4 +-- drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 4 +-- drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c | 4 +-- 12 files changed, 38 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 58689b2a2d1c..0493c64e9d0a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -106,13 +106,13 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, * not initialized as AMDGPU manages the whole * doorbell space. */ - *aperture_base = adev->doorbell.base; + *aperture_base = adev->doorbell.doorbell_aper_base; *aperture_size = 0; *start_offset = 0; - } else if (adev->doorbell.size > adev->doorbell.num_doorbells * + } else if (adev->doorbell.doorbell_aper_size > adev->doorbell.num_doorbells * sizeof(u32)) { - *aperture_base = adev->doorbell.base; - *aperture_size = adev->doorbell.size; + *aperture_base = adev->doorbell.doorbell_aper_base; + *aperture_size = adev->doorbell.doorbell_aper_size; Well that now looks a bit duplicated and you completely remove doorbell_aper_base_kaddr later on, right? Christian. *start_offset = adev->doorbell.num_doorbells * sizeof(u32); } else { *aperture_base = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 45588b7919fe..43c1b67c2778 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -597,7 +597,7 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index) return 0; if (index < adev->doorbell.num_doorbells) { - return readl(adev->doorbell.ptr + index); + return readl(adev->mman.doorbell_aper_base_kaddr + index); } else { DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); return 0; @@ -620,7 +620,7 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v) return; if (index < adev->doorbell.num_doorbells) { - writel(v, adev->doorbell.ptr + index); + writel(v, adev->mman.doorbell_aper_base_kaddr + index); } else { DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); } @@ -641,7 +641,7 @@ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index) return 0; if (index < adev->doorbell.num_doorbells) { - return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index)); + return atomic64_read((atomic64_t *)(adev->mman.doorbell_aper_base_kaddr + index)); } else { DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); return 0; @@ -664,7 +664,7 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v) return; if (index < adev->doorbell.num_doorbells) { - atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v); + atomic64_set((atomic64_t *)(adev->mman.doorbell_aper_base_kaddr + index), v); } else { DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); } @@ -1035,10 +1035,10 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) /* No doorbell on SI hardware generation */ if (adev->asic_type < CHIP_BONAIRE) { - adev->doorbell.base = 0; - adev->doorbell.size = 0; + adev->doorbell.doorbell_aper_base = 0; + adev->doorbell.doorbell_aper_size = 0;
Re: [PATCH v2 5/8] drm/amdgpu: accommodate DOMAIN/PL_DOORBELL
Am 14.02.23 um 17:15 schrieb Shashank Sharma: From: Alex Deucher This patch adds changes: - to accommodate the new GEM domain DOORBELL - to accommodate the new TTM PL DOORBELL to manage doorbell allocations as GEM Objects. V2: Addressed reviwe comments from Christian - drop the doorbell changes for pinning/unpinning - drop the doorbell changes for dma-buf map - drop the doorbell changes for sgt - no need to handle TTM_PL_FLAG_CONTIGUOUS for doorbell Signed-off-by: Alex Deucher Signed-off-by: Shashank Sharma --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 11 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c| 11 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h| 1 + 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index b48c9fd60c43..ff9979fecfd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -147,6 +147,14 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) c++; } + if (domain & AMDGPU_GEM_DOMAIN_DOORBELL) { + places[c].fpfn = 0; + places[c].lpfn = 0; + places[c].mem_type = AMDGPU_PL_DOORBELL; + places[c].flags = 0; + c++; + } + Mhm, do we need to increase AMDGPU_BO_MAX_PLACEMENTS? I think the answer is *no* since mixing DOORBELL with CPU, GTT or VRAM placement doesn't make sense, but do we enforce that somewhere? if (domain & AMDGPU_GEM_DOMAIN_GTT) { places[c].fpfn = 0; places[c].lpfn = 0; @@ -466,7 +474,7 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev, goto fail; } - /* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU */ + /* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_DOMAIN_DOORBELL */ Should we enforce that user space can only allocate 1 page doorbells? return true; fail: @@ -1014,6 +1022,7 @@ void amdgpu_bo_unpin(struct amdgpu_bo *bo) } else if (bo->tbo.resource->mem_type == TTM_PL_TT) { atomic64_sub(amdgpu_bo_size(bo), &adev->gart_pin_size); } + Unrelated change. Regards, Christian. } static const char *amdgpu_vram_names[] = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 0e8f580769ab..e9dc24191fc8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -128,6 +128,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, case AMDGPU_PL_GDS: case AMDGPU_PL_GWS: case AMDGPU_PL_OA: + case AMDGPU_PL_DOORBELL: placement->num_placement = 0; placement->num_busy_placement = 0; return; @@ -500,9 +501,11 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, if (old_mem->mem_type == AMDGPU_PL_GDS || old_mem->mem_type == AMDGPU_PL_GWS || old_mem->mem_type == AMDGPU_PL_OA || + old_mem->mem_type == AMDGPU_PL_DOORBELL || new_mem->mem_type == AMDGPU_PL_GDS || new_mem->mem_type == AMDGPU_PL_GWS || - new_mem->mem_type == AMDGPU_PL_OA) { + new_mem->mem_type == AMDGPU_PL_OA || + new_mem->mem_type == AMDGPU_PL_DOORBELL) { /* Nothing to save here */ ttm_bo_move_null(bo, new_mem); goto out; @@ -586,6 +589,11 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev, mem->bus.offset += adev->gmc.vram_aper_base; mem->bus.is_iomem = true; break; + case AMDGPU_PL_DOORBELL: + mem->bus.offset = mem->start << PAGE_SHIFT; + mem->bus.offset += adev->doorbell.doorbell_aper_base; + mem->bus.is_iomem = true; + break; default: return -EINVAL; } @@ -1267,6 +1275,7 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem) flags |= AMDGPU_PTE_VALID; if (mem && (mem->mem_type == TTM_PL_TT || + mem->mem_type == AMDGPU_PL_DOORBELL || mem->mem_type == AMDGPU_PL_PREEMPT)) { flags |= AMDGPU_PTE_SYSTEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 967b265dbfa1..9cf5d8419965 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -33,6 +33,7 @@ #define AMDGPU_PL_GWS (TTM_PL_PRIV + 1) #define AMDGPU_PL_OA (TTM_PL_PRIV + 2) #define AMDGPU_PL_PREEMPT (TTM_PL_PRIV + 3) +#define AMDGPU_PL_DOORBELL (TTM_PL_PRIV + 4) #define AMDGPU_GTT_MAX_TRANSFER_SIZE 512 #define AMDGPU_GTT_NUM_TR
Re: [PATCH v2 7/8] drm/amdgpu: create doorbell kernel object
Am 14.02.23 um 17:15 schrieb Shashank Sharma: From: Shashank Sharma This patch does the following: - Initializes TTM range management for domain DOORBELL. - Introduces a kernel bo for doorbell management in form of mman.doorbell_kernel_bo. This bo holds the kernel doorbell space now. - Removes ioremapping of doorbell-kernel memory, as its not required now. V2: - Addressed review comments from Christian: - do not use kernel_create_at(0), use kernel_create() instead. - do not use ttm_resource_manager, use range_manager instead. - do not ioremap doorbell, TTM will do that. - Split one big patch into 2 Cc: Alex Deucher Cc: Christian Koenig Signed-off-by: Alex Deucher Signed-off-by: Shashank Sharma --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 22 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 7 +++ 2 files changed, 29 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index e9dc24191fc8..086e83c17c0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1879,12 +1879,32 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) return r; } + r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_DOORBELL, adev->doorbell.doorbell_aper_size); + if (r) { + DRM_ERROR("Failed initializing oa heap.\n"); + return r; + } + if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &adev->mman.sdma_access_bo, NULL, &adev->mman.sdma_access_ptr)) DRM_WARN("Debug VRAM access will use slowpath MM access\n"); + /* Create a doorbell BO for kernel usages */ + r = amdgpu_bo_create_kernel(adev, + adev->mman.doorbell_kernel_bo_size, + PAGE_SIZE, + AMDGPU_GEM_DOMAIN_DOORBELL, + &adev->mman.doorbell_kernel_bo, + &adev->mman.doorbell_gpu_addr, + (void **)&adev->mman.doorbell_cpu_addr); + + if (r) { + DRM_ERROR("Failed to create doorbell BO, err=%d\n", r); + return r; + } + I would even move this before the SDMA VRAM buffer since the later is only nice to have while the doorbell is mandatory to have. return 0; } @@ -1908,6 +1928,8 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) NULL, NULL); amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL, &adev->mman.sdma_access_ptr); + amdgpu_bo_free_kernel(&adev->mman.doorbell_kernel_bo, + NULL, (void **)&adev->mman.doorbell_cpu_addr); amdgpu_ttm_fw_reserve_vram_fini(adev); amdgpu_ttm_drv_reserve_vram_fini(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 9cf5d8419965..50748ff1dd3c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -97,6 +97,13 @@ struct amdgpu_mman { /* PAGE_SIZE'd BO for process memory r/w over SDMA. */ struct amdgpu_bo*sdma_access_bo; void*sdma_access_ptr; + + /* doorbells reserved for the kernel driver */ + u32 num_kernel_doorbells; /* Number of doorbells actually reserved for kernel */ + uint64_tdoorbell_kernel_bo_size; That looks like duplicated information. We should only keep either the number of kernel doorbells or the kernel doorbell bo size around, not both. And BTW please no comment after structure members. Christian. + uint64_tdoorbell_gpu_addr; + struct amdgpu_bo*doorbell_kernel_bo; + u32 *doorbell_cpu_addr; }; struct amdgpu_copy_mem {
Re: [PATCH v2 8/8] drm/amdgpu: start using kernel doorbell bo
Am 14.02.23 um 17:15 schrieb Shashank Sharma: From: Shashank Sharma This patch does the following: - Adds new variables like mman.doorbell_bo_size/gpu_addr/cpu_addr. The cpu_addr ptr will be used now for doorbell read/write from doorbell BAR. - Adjusts the existing code to use kernel doorbell BO's size and its cpu_address. Cc: Alex Deucher Cc: Christian Koenig Signed-off-by: Alex Deucher Signed-off-by: Shashank Sharma Maybe squash this one together with the previous patch. But see below. --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 5 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 33 +--- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 1 - 3 files changed, 16 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 0493c64e9d0a..87f486f522ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -109,11 +109,10 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, *aperture_base = adev->doorbell.doorbell_aper_base; *aperture_size = 0; *start_offset = 0; - } else if (adev->doorbell.doorbell_aper_size > adev->doorbell.num_doorbells * - sizeof(u32)) { + } else if (adev->doorbell.doorbell_aper_size > adev->mman.doorbell_kernel_bo_size) { *aperture_base = adev->doorbell.doorbell_aper_base; *aperture_size = adev->doorbell.doorbell_aper_size; - *start_offset = adev->doorbell.num_doorbells * sizeof(u32); + *start_offset = adev->mman.doorbell_kernel_bo_size; } else { *aperture_base = 0; *aperture_size = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 43c1b67c2778..fde199434579 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -596,8 +596,8 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index) if (amdgpu_device_skip_hw_access(adev)) return 0; - if (index < adev->doorbell.num_doorbells) { - return readl(adev->mman.doorbell_aper_base_kaddr + index); + if (index < adev->mman.num_kernel_doorbells) { + return readl(adev->mman.doorbell_cpu_addr + index); } else { DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); return 0; @@ -619,8 +619,8 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v) if (amdgpu_device_skip_hw_access(adev)) return; - if (index < adev->doorbell.num_doorbells) { - writel(v, adev->mman.doorbell_aper_base_kaddr + index); + if (index < adev->mman.num_kernel_doorbells) { + writel(v, adev->mman.doorbell_cpu_addr + index); } else { DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); } @@ -640,8 +640,8 @@ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index) if (amdgpu_device_skip_hw_access(adev)) return 0; - if (index < adev->doorbell.num_doorbells) { - return atomic64_read((atomic64_t *)(adev->mman.doorbell_aper_base_kaddr + index)); + if (index < adev->mman.num_kernel_doorbells) { + return atomic64_read((atomic64_t *)(adev->mman.doorbell_cpu_addr + index)); } else { DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); return 0; @@ -663,8 +663,8 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v) if (amdgpu_device_skip_hw_access(adev)) return; - if (index < adev->doorbell.num_doorbells) { - atomic64_set((atomic64_t *)(adev->mman.doorbell_aper_base_kaddr + index), v); + if (index < adev->mman.num_kernel_doorbells) { + atomic64_set((atomic64_t *)(adev->mman.doorbell_cpu_addr + index), v); } else { DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); } @@ -1037,7 +1037,7 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) if (adev->asic_type < CHIP_BONAIRE) { adev->doorbell.doorbell_aper_base = 0; adev->doorbell.doorbell_aper_size = 0; - adev->doorbell.num_doorbells = 0; + adev->mman.num_kernel_doorbells = 0; adev->mman.doorbell_aper_base_kaddr = NULL; return 0; } @@ -1052,13 +1052,13 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) adev->doorbell.doorbell_aper_size = pci_resource_len(adev->pdev, 2); if (adev->enable_mes) { - adev->doorbell.num_doorbells = + adev->mman.num_kernel
Re: [PATCH v2 1/8] drm/amdgpu: add UAPI for allocating doorbell memory
On 14/02/2023 19:22, Christian König wrote: Am 14.02.23 um 17:15 schrieb Shashank Sharma: From: Alex Deucher This patch adds flags for a new gem domain AMDGPU_GEM_DOMAIN_DOORBELL in the UAPI layer. V2: Drop 'memory' from description (Christian) Cc: Alex Deucher Cc: Christian Koenig Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 4038abe8505a..cc5d551abda5 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -94,6 +94,9 @@ extern "C" { * * %AMDGPU_GEM_DOMAIN_OA Ordered append, used by 3D or Compute engines * for appending data. + * + * %AMDGPU_GEM_DOMAIN_DOORBELL Doorbell. It is an MMIO region for + * signalling user mode queues. Maybe write "for signaling events to the firmware, used especially for user mode queues.". With or without that Reviewed-by: Christian König Will add that, thanks. - Shashank Christian. */ #define AMDGPU_GEM_DOMAIN_CPU 0x1 #define AMDGPU_GEM_DOMAIN_GTT 0x2 @@ -101,12 +104,14 @@ extern "C" { #define AMDGPU_GEM_DOMAIN_GDS 0x8 #define AMDGPU_GEM_DOMAIN_GWS 0x10 #define AMDGPU_GEM_DOMAIN_OA 0x20 +#define AMDGPU_GEM_DOMAIN_DOORBELL 0x40 #define AMDGPU_GEM_DOMAIN_MASK (AMDGPU_GEM_DOMAIN_CPU | \ AMDGPU_GEM_DOMAIN_GTT | \ AMDGPU_GEM_DOMAIN_VRAM | \ AMDGPU_GEM_DOMAIN_GDS | \ AMDGPU_GEM_DOMAIN_GWS | \ - AMDGPU_GEM_DOMAIN_OA) + AMDGPU_GEM_DOMAIN_OA | \ + AMDGPU_GEM_DOMAIN_DOORBELL) /* Flag that CPU access will be required for the case of VRAM domain */ #define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED (1 << 0)
Re: [PATCH v2 2/8] drm/amdgpu: replace aper_base_kaddr with vram_aper_base_kaddr
On 14/02/2023 19:24, Christian König wrote: Am 14.02.23 um 17:15 schrieb Shashank Sharma: From: Alex Deucher To differentiate it from the doorbell BAR. Since we removed the manual ioremap() for the doorbell BAR today we don't really need that patch any more, don't we? On the other hand renaming the field still makes a lot of sense for better documenting what it's good for. Christian. Exactly, I was also in two minds about this patch, but then realized that if nothing, its making stuff more readable. - Shashank V2: Added Christian's A-B Acked-by: Christian Koenig Cc: Alex Deucher Cc: Christian Koenig Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 10 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 14 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 2 +- drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 10 +- drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 10 +- 5 files changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 2f28a8c02f64..0b6a394e109b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -354,12 +354,12 @@ size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos, size_t count = 0; uint64_t last; - if (!adev->mman.aper_base_kaddr) + if (!adev->mman.vram_aper_base_kaddr) return 0; last = min(pos + size, adev->gmc.visible_vram_size); if (last > pos) { - addr = adev->mman.aper_base_kaddr + pos; + addr = adev->mman.vram_aper_base_kaddr + pos; count = last - pos; if (write) { @@ -3954,9 +3954,9 @@ static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev) iounmap(adev->rmmio); adev->rmmio = NULL; - if (adev->mman.aper_base_kaddr) - iounmap(adev->mman.aper_base_kaddr); - adev->mman.aper_base_kaddr = NULL; + if (adev->mman.vram_aper_base_kaddr) + iounmap(adev->mman.vram_aper_base_kaddr); + adev->mman.vram_aper_base_kaddr = NULL; /* Memory manager related */ if (!adev->gmc.xgmi.connected_to_cpu) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 55e0284b2bdd..73b831b47892 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -578,9 +578,9 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev, if ((mem->bus.offset + bus_size) > adev->gmc.visible_vram_size) return -EINVAL; - if (adev->mman.aper_base_kaddr && + if (adev->mman.vram_aper_base_kaddr && mem->placement & TTM_PL_FLAG_CONTIGUOUS) - mem->bus.addr = (u8 *)adev->mman.aper_base_kaddr + + mem->bus.addr = (u8 *)adev->mman.vram_aper_base_kaddr + mem->bus.offset; mem->bus.offset += adev->gmc.aper_base; @@ -1752,12 +1752,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) #ifdef CONFIG_64BIT #ifdef CONFIG_X86 if (adev->gmc.xgmi.connected_to_cpu) - adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base, + adev->mman.vram_aper_base_kaddr = ioremap_cache(adev->gmc.aper_base, adev->gmc.visible_vram_size); else #endif - adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base, + adev->mman.vram_aper_base_kaddr = ioremap_wc(adev->gmc.aper_base, adev->gmc.visible_vram_size); #endif @@ -1904,9 +1904,9 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) if (drm_dev_enter(adev_to_drm(adev), &idx)) { - if (adev->mman.aper_base_kaddr) - iounmap(adev->mman.aper_base_kaddr); - adev->mman.aper_base_kaddr = NULL; + if (adev->mman.vram_aper_base_kaddr) + iounmap(adev->mman.vram_aper_base_kaddr); + adev->mman.vram_aper_base_kaddr = NULL; drm_dev_exit(idx); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index e2cd5894afc9..929bc8abac28 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -50,7 +50,7 @@ struct amdgpu_gtt_mgr { struct amdgpu_mman { struct ttm_device bdev; bool initialized; - void __iomem *aper_base_kaddr; + void __iomem *vram_aper_base_kaddr; /* buffer handling */ const struct amdgpu_buffer_funcs *buffer_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index bd3e3e23a939..f39d4f593a2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -611,10 +611,10 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) */ sz = GDDR6_MEM_TRAINING_
Re: [PATCH v2 4/8] drm/amdgpu: rename doorbell variables
On 14/02/2023 19:27, Christian König wrote: Am 14.02.23 um 17:15 schrieb Shashank Sharma: From: Alex Deucher This patch: - renames the adev->doorbell.base to adev->doorbell.doorbell_aper_base - renames the adev->doorbell.size to adev->doorbell.doorbell_aper_size - moves the adev->doorbell.ptr to adev->mman.doorbell_aper_base_kaddr rest of the changes are just to accommodate these variable name changes. V2: Mered 2 patches into this one doorbell clean-up patch. Cc: Alex Deucher Cc: Christian Koenig Signed-off-by: Alex Deucher Signed-off-by: Shashank Sharma --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 8 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 34 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 5 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c | 4 +-- drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c | 4 +-- drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c | 4 +-- drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c | 4 +-- drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 4 +-- drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c | 4 +-- 12 files changed, 38 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 58689b2a2d1c..0493c64e9d0a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -106,13 +106,13 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, * not initialized as AMDGPU manages the whole * doorbell space. */ - *aperture_base = adev->doorbell.base; + *aperture_base = adev->doorbell.doorbell_aper_base; *aperture_size = 0; *start_offset = 0; - } else if (adev->doorbell.size > adev->doorbell.num_doorbells * + } else if (adev->doorbell.doorbell_aper_size > adev->doorbell.num_doorbells * sizeof(u32)) { - *aperture_base = adev->doorbell.base; - *aperture_size = adev->doorbell.size; + *aperture_base = adev->doorbell.doorbell_aper_base; + *aperture_size = adev->doorbell.doorbell_aper_size; Well that now looks a bit duplicated and you completely remove doorbell_aper_base_kaddr later on, right? Christian. Yeah, I still kept it as this cleanup is a bit peculiar and not easy to follow on review, but your right, lets keep it cleaner. I will take remove doorbell_aper_base_kaddr. - Shashank *start_offset = adev->doorbell.num_doorbells * sizeof(u32); } else { *aperture_base = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 45588b7919fe..43c1b67c2778 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -597,7 +597,7 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index) return 0; if (index < adev->doorbell.num_doorbells) { - return readl(adev->doorbell.ptr + index); + return readl(adev->mman.doorbell_aper_base_kaddr + index); } else { DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); return 0; @@ -620,7 +620,7 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v) return; if (index < adev->doorbell.num_doorbells) { - writel(v, adev->doorbell.ptr + index); + writel(v, adev->mman.doorbell_aper_base_kaddr + index); } else { DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); } @@ -641,7 +641,7 @@ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index) return 0; if (index < adev->doorbell.num_doorbells) { - return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index)); + return atomic64_read((atomic64_t *)(adev->mman.doorbell_aper_base_kaddr + index)); } else { DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); return 0; @@ -664,7 +664,7 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v) return; if (index < adev->doorbell.num_doorbells) { - atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v); + atomic64_set((atomic64_t *)(adev->mman.doorbell_aper_base_kaddr + index), v); } else { DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); } @@ -1035,10 +1035,10 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) /* No doorbell on SI hardware generation */ if (adev->asic_type < CHIP_BONAIRE) { - adev->doorbell.base = 0; - adev->doorbell.size = 0; + adev->doorbell.doorbell_aper_base = 0; + adev->doorbell.doorbell_aper_size = 0; ade
Re: [PATCH v2 5/8] drm/amdgpu: accommodate DOMAIN/PL_DOORBELL
On 14/02/2023 19:31, Christian König wrote: Am 14.02.23 um 17:15 schrieb Shashank Sharma: From: Alex Deucher This patch adds changes: - to accommodate the new GEM domain DOORBELL - to accommodate the new TTM PL DOORBELL to manage doorbell allocations as GEM Objects. V2: Addressed reviwe comments from Christian - drop the doorbell changes for pinning/unpinning - drop the doorbell changes for dma-buf map - drop the doorbell changes for sgt - no need to handle TTM_PL_FLAG_CONTIGUOUS for doorbell Signed-off-by: Alex Deucher Signed-off-by: Shashank Sharma --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 11 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 11 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 1 + 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index b48c9fd60c43..ff9979fecfd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -147,6 +147,14 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) c++; } + if (domain & AMDGPU_GEM_DOMAIN_DOORBELL) { + places[c].fpfn = 0; + places[c].lpfn = 0; + places[c].mem_type = AMDGPU_PL_DOORBELL; + places[c].flags = 0; + c++; + } + Mhm, do we need to increase AMDGPU_BO_MAX_PLACEMENTS? I think the answer is *no* since mixing DOORBELL with CPU, GTT or VRAM placement doesn't make sense, but do we enforce that somewhere? I am not sure why do we need that ? if (domain & AMDGPU_GEM_DOMAIN_GTT) { places[c].fpfn = 0; places[c].lpfn = 0; @@ -466,7 +474,7 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev, goto fail; } - /* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU */ + /* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_DOMAIN_DOORBELL */ Should we enforce that user space can only allocate 1 page doorbells? Should we add a per-PID basis check ? - Shashank return true; fail: @@ -1014,6 +1022,7 @@ void amdgpu_bo_unpin(struct amdgpu_bo *bo) } else if (bo->tbo.resource->mem_type == TTM_PL_TT) { atomic64_sub(amdgpu_bo_size(bo), &adev->gart_pin_size); } + Unrelated change. Noted - Shashank Regards, Christian. } static const char *amdgpu_vram_names[] = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 0e8f580769ab..e9dc24191fc8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -128,6 +128,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, case AMDGPU_PL_GDS: case AMDGPU_PL_GWS: case AMDGPU_PL_OA: + case AMDGPU_PL_DOORBELL: placement->num_placement = 0; placement->num_busy_placement = 0; return; @@ -500,9 +501,11 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, if (old_mem->mem_type == AMDGPU_PL_GDS || old_mem->mem_type == AMDGPU_PL_GWS || old_mem->mem_type == AMDGPU_PL_OA || + old_mem->mem_type == AMDGPU_PL_DOORBELL || new_mem->mem_type == AMDGPU_PL_GDS || new_mem->mem_type == AMDGPU_PL_GWS || - new_mem->mem_type == AMDGPU_PL_OA) { + new_mem->mem_type == AMDGPU_PL_OA || + new_mem->mem_type == AMDGPU_PL_DOORBELL) { /* Nothing to save here */ ttm_bo_move_null(bo, new_mem); goto out; @@ -586,6 +589,11 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev, mem->bus.offset += adev->gmc.vram_aper_base; mem->bus.is_iomem = true; break; + case AMDGPU_PL_DOORBELL: + mem->bus.offset = mem->start << PAGE_SHIFT; + mem->bus.offset += adev->doorbell.doorbell_aper_base; + mem->bus.is_iomem = true; + break; default: return -EINVAL; } @@ -1267,6 +1275,7 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem) flags |= AMDGPU_PTE_VALID; if (mem && (mem->mem_type == TTM_PL_TT || + mem->mem_type == AMDGPU_PL_DOORBELL || mem->mem_type == AMDGPU_PL_PREEMPT)) { flags |= AMDGPU_PTE_SYSTEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 967b265dbfa1..9cf5d8419965 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -33,6 +33,7 @@ #define AMDGPU_PL_GWS (TTM_PL_PRIV + 1) #define AMDGPU_PL_OA (TTM_PL_PRIV + 2) #define AMDGPU_PL_PREEMPT (TTM_PL_PRIV + 3) +#define AMDGPU_PL_DOORBELL (TTM_PL_PRIV + 4) #define AMDGPU_GTT_MAX_TRANSFER_SIZE 512 #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2
Re: [PATCH v2 7/8] drm/amdgpu: create doorbell kernel object
On 14/02/2023 19:35, Christian König wrote: Am 14.02.23 um 17:15 schrieb Shashank Sharma: From: Shashank Sharma This patch does the following: - Initializes TTM range management for domain DOORBELL. - Introduces a kernel bo for doorbell management in form of mman.doorbell_kernel_bo. This bo holds the kernel doorbell space now. - Removes ioremapping of doorbell-kernel memory, as its not required now. V2: - Addressed review comments from Christian: - do not use kernel_create_at(0), use kernel_create() instead. - do not use ttm_resource_manager, use range_manager instead. - do not ioremap doorbell, TTM will do that. - Split one big patch into 2 Cc: Alex Deucher Cc: Christian Koenig Signed-off-by: Alex Deucher Signed-off-by: Shashank Sharma --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 22 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 7 +++ 2 files changed, 29 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index e9dc24191fc8..086e83c17c0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1879,12 +1879,32 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) return r; } + r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_DOORBELL, adev->doorbell.doorbell_aper_size); + if (r) { + DRM_ERROR("Failed initializing oa heap.\n"); + return r; + } + if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &adev->mman.sdma_access_bo, NULL, &adev->mman.sdma_access_ptr)) DRM_WARN("Debug VRAM access will use slowpath MM access\n"); + /* Create a doorbell BO for kernel usages */ + r = amdgpu_bo_create_kernel(adev, + adev->mman.doorbell_kernel_bo_size, + PAGE_SIZE, + AMDGPU_GEM_DOMAIN_DOORBELL, + &adev->mman.doorbell_kernel_bo, + &adev->mman.doorbell_gpu_addr, + (void **)&adev->mman.doorbell_cpu_addr); + + if (r) { + DRM_ERROR("Failed to create doorbell BO, err=%d\n", r); + return r; + } + I would even move this before the SDMA VRAM buffer since the later is only nice to have while the doorbell is mandatory to have. Agree, return 0; } @@ -1908,6 +1928,8 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) NULL, NULL); amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL, &adev->mman.sdma_access_ptr); + amdgpu_bo_free_kernel(&adev->mman.doorbell_kernel_bo, + NULL, (void **)&adev->mman.doorbell_cpu_addr); amdgpu_ttm_fw_reserve_vram_fini(adev); amdgpu_ttm_drv_reserve_vram_fini(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 9cf5d8419965..50748ff1dd3c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -97,6 +97,13 @@ struct amdgpu_mman { /* PAGE_SIZE'd BO for process memory r/w over SDMA. */ struct amdgpu_bo *sdma_access_bo; void *sdma_access_ptr; + + /* doorbells reserved for the kernel driver */ + u32 num_kernel_doorbells; /* Number of doorbells actually reserved for kernel */ + uint64_t doorbell_kernel_bo_size; That looks like duplicated information. We should only keep either the number of kernel doorbells or the kernel doorbell bo size around, not both. Yeah, agree. I can remove one of these two. And BTW please no comment after structure members. Noted, - Shashank Christian. + uint64_t doorbell_gpu_addr; + struct amdgpu_bo *doorbell_kernel_bo; + u32 *doorbell_cpu_addr; }; struct amdgpu_copy_mem {
Re: [PATCH v2 8/8] drm/amdgpu: start using kernel doorbell bo
On 14/02/2023 19:40, Christian König wrote: Am 14.02.23 um 17:15 schrieb Shashank Sharma: From: Shashank Sharma This patch does the following: - Adds new variables like mman.doorbell_bo_size/gpu_addr/cpu_addr. The cpu_addr ptr will be used now for doorbell read/write from doorbell BAR. - Adjusts the existing code to use kernel doorbell BO's size and its cpu_address. Cc: Alex Deucher Cc: Christian Koenig Signed-off-by: Alex Deucher Signed-off-by: Shashank Sharma Maybe squash this one together with the previous patch. I just split it from the last patch in this series, thought it was too scattered and might not be easy to review :D But see below. --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 5 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 33 +--- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 1 - 3 files changed, 16 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 0493c64e9d0a..87f486f522ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -109,11 +109,10 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, *aperture_base = adev->doorbell.doorbell_aper_base; *aperture_size = 0; *start_offset = 0; - } else if (adev->doorbell.doorbell_aper_size > adev->doorbell.num_doorbells * - sizeof(u32)) { + } else if (adev->doorbell.doorbell_aper_size > adev->mman.doorbell_kernel_bo_size) { *aperture_base = adev->doorbell.doorbell_aper_base; *aperture_size = adev->doorbell.doorbell_aper_size; - *start_offset = adev->doorbell.num_doorbells * sizeof(u32); + *start_offset = adev->mman.doorbell_kernel_bo_size; } else { *aperture_base = 0; *aperture_size = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 43c1b67c2778..fde199434579 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -596,8 +596,8 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index) if (amdgpu_device_skip_hw_access(adev)) return 0; - if (index < adev->doorbell.num_doorbells) { - return readl(adev->mman.doorbell_aper_base_kaddr + index); + if (index < adev->mman.num_kernel_doorbells) { + return readl(adev->mman.doorbell_cpu_addr + index); } else { DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); return 0; @@ -619,8 +619,8 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v) if (amdgpu_device_skip_hw_access(adev)) return; - if (index < adev->doorbell.num_doorbells) { - writel(v, adev->mman.doorbell_aper_base_kaddr + index); + if (index < adev->mman.num_kernel_doorbells) { + writel(v, adev->mman.doorbell_cpu_addr + index); } else { DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); } @@ -640,8 +640,8 @@ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index) if (amdgpu_device_skip_hw_access(adev)) return 0; - if (index < adev->doorbell.num_doorbells) { - return atomic64_read((atomic64_t *)(adev->mman.doorbell_aper_base_kaddr + index)); + if (index < adev->mman.num_kernel_doorbells) { + return atomic64_read((atomic64_t *)(adev->mman.doorbell_cpu_addr + index)); } else { DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); return 0; @@ -663,8 +663,8 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v) if (amdgpu_device_skip_hw_access(adev)) return; - if (index < adev->doorbell.num_doorbells) { - atomic64_set((atomic64_t *)(adev->mman.doorbell_aper_base_kaddr + index), v); + if (index < adev->mman.num_kernel_doorbells) { + atomic64_set((atomic64_t *)(adev->mman.doorbell_cpu_addr + index), v); } else { DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); } @@ -1037,7 +1037,7 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) if (adev->asic_type < CHIP_BONAIRE) { adev->doorbell.doorbell_aper_base = 0; adev->doorbell.doorbell_aper_size = 0; - adev->doorbell.num_doorbells = 0; + adev->mman.num_kernel_doorbells = 0; adev->mman.doorbell_aper_base_kaddr = NULL; return 0; } @@ -1052,13 +1052,13 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) adev->doorbell.doorbell_aper_size = pci_resource_len(adev->pdev, 2); if (adev->enable_mes) { - adev->doorbell.num_doorbells = + adev->mman.num_kernel_doorbells = adev->doorbell.doorbell_aper_size / sizeof(u32); } else { -
Re: [PATCH 3/3] drm/connector: Deprecate split for BT.2020 in drm_colorspace enum
On Tue, Feb 14, 2023 at 5:57 PM Harry Wentland wrote: > > > > On 2/14/23 10:49, Sebastian Wick wrote: > > On Fri, Feb 3, 2023 at 5:00 PM Ville Syrjälä > > wrote: > >> > >> On Fri, Feb 03, 2023 at 10:24:52AM -0500, Harry Wentland wrote: > >>> > >>> > >>> On 2/3/23 10:19, Ville Syrjälä wrote: > On Fri, Feb 03, 2023 at 09:39:42AM -0500, Harry Wentland wrote: > > > > > > On 2/3/23 07:59, Sebastian Wick wrote: > >> On Fri, Feb 3, 2023 at 11:40 AM Ville Syrjälä > >> wrote: > >>> > >>> On Fri, Feb 03, 2023 at 02:07:44AM +, Joshua Ashton wrote: > Userspace has no way of controlling or knowing the pixel encoding > currently, so there is no way for it to ever get the right values > here. > >>> > >>> That applies to a lot of the other values as well (they are > >>> explicitly RGB or YCC). The idea was that this property sets the > >>> infoframe/MSA/SDP value exactly, and other properties should be > >>> added to for use userspace to control the pixel encoding/colorspace > >>> conversion(if desired, or userspace just makes sure to > >>> directly feed in correct kind of data). > >> > >> I'm all for getting userspace control over pixel encoding but even > >> then the kernel always knows which pixel encoding is selected and > >> which InfoFrame has to be sent. Is there a reason why userspace would > >> want to control the variant explicitly to the wrong value? > >> > > > > I've asked this before but haven't seen an answer: Is there an existing > > upstream userspace project that makes use of this property (other than > > what Joshua is working on in gamescope right now)? That would help us > > understand the intent better. > > The intent was to control the infoframe colorimetry bits, > nothing more. No idea what real userspace there was, if any. > > > > > I don't think giving userspace explicit control over the exact infoframe > > values is the right thing to do. > > Only userspace knows what kind of data it's stuffing into > the pixels (and/or how it configures the csc units/etc.) to > generate them. > > >>> > >>> Yes, but userspace doesn't control or know whether we drive > >>> RGB or YCbCr on the wire. In fact, in some cases our driver > >>> needs to fallback to YCbCr420 for bandwidth reasons. There > >>> is currently no way for userspace to know that and I don't > >>> think it makes sense. > >> > >> People want that control as well for whatever reason. We've > >> been asked to allow YCbCr 4:4:4 output many times. > > > > I don't really think it's a question of if we want it but rather how > > we get there. Harry is completely right that if we would make the > > subsampling controllable by user space instead of the kernel handling > > it magically, user space which does not adapt to the new control won't > > be able to light up some modes which worked before. > > > > Thanks for continuing this discussion and touching on the model of how > we get to where we want to go. > > > This is obviously a problem and not one we can easily fix. We would > > need a new cap for user space to signal "I know that I can control > > bpc, subsampling and compression to lower the bandwidth and light up > > modes which otherwise fail". That cap would also remove all the > > properties which require kernel magic to work (that's also what I > > proposed for my KMS color pipeline API). > > > > We all want to expose more of the scanout capability and give user > > space more control but I don't think an incremental approach works > > here and we would all do better if we accept that the current API > > requires kernel magic to work and has a few implicit assumptions baked > > in. > > > > With all that being said, I think the right decision here is to > > > > 1. Ignore subsampling for now > > 2. Let the kernel select YCC or RGB on the cable > > 3. Let the kernel figure out the conversion between RGB and YCC based > > on the color space selected > > 4. Let the kernel send the correct infoframe based on the selected > > color space and cable encoding > > 5. Only expose color spaces for which the kernel can do the conversion > > and send the infoframe > > I agree. We don't want to break or change existing behavior (that is > used by userspace) and this will get us far without breaking things. > > > 6. Work on the new API which is hidden behind a cap > > > > I assume you mean something like > https://gitlab.freedesktop.org/pq/color-and-hdr/-/issues/11 Something like that, yes. The main point being a cap which removes a lot of properties and sets new expectations between user space and kernel. The actual API is not that important. > Above you say that you don't think an incremental approach works > here. Can you elaborate? Backwards compatibility is really hard. If we add another property to control e.g. the color range infoframe which doesn't magicall
Re: [PATCH 3/3] drm/connector: Deprecate split for BT.2020 in drm_colorspace enum
On 2/14/23 14:45, Sebastian Wick wrote: On Tue, Feb 14, 2023 at 5:57 PM Harry Wentland wrote: On 2/14/23 10:49, Sebastian Wick wrote: On Fri, Feb 3, 2023 at 5:00 PM Ville Syrjälä wrote: On Fri, Feb 03, 2023 at 10:24:52AM -0500, Harry Wentland wrote: On 2/3/23 10:19, Ville Syrjälä wrote: On Fri, Feb 03, 2023 at 09:39:42AM -0500, Harry Wentland wrote: On 2/3/23 07:59, Sebastian Wick wrote: On Fri, Feb 3, 2023 at 11:40 AM Ville Syrjälä wrote: On Fri, Feb 03, 2023 at 02:07:44AM +, Joshua Ashton wrote: Userspace has no way of controlling or knowing the pixel encoding currently, so there is no way for it to ever get the right values here. That applies to a lot of the other values as well (they are explicitly RGB or YCC). The idea was that this property sets the infoframe/MSA/SDP value exactly, and other properties should be added to for use userspace to control the pixel encoding/colorspace conversion(if desired, or userspace just makes sure to directly feed in correct kind of data). I'm all for getting userspace control over pixel encoding but even then the kernel always knows which pixel encoding is selected and which InfoFrame has to be sent. Is there a reason why userspace would want to control the variant explicitly to the wrong value? I've asked this before but haven't seen an answer: Is there an existing upstream userspace project that makes use of this property (other than what Joshua is working on in gamescope right now)? That would help us understand the intent better. The intent was to control the infoframe colorimetry bits, nothing more. No idea what real userspace there was, if any. I don't think giving userspace explicit control over the exact infoframe values is the right thing to do. Only userspace knows what kind of data it's stuffing into the pixels (and/or how it configures the csc units/etc.) to generate them. Yes, but userspace doesn't control or know whether we drive RGB or YCbCr on the wire. In fact, in some cases our driver needs to fallback to YCbCr420 for bandwidth reasons. There is currently no way for userspace to know that and I don't think it makes sense. People want that control as well for whatever reason. We've been asked to allow YCbCr 4:4:4 output many times. I don't really think it's a question of if we want it but rather how we get there. Harry is completely right that if we would make the subsampling controllable by user space instead of the kernel handling it magically, user space which does not adapt to the new control won't be able to light up some modes which worked before. Thanks for continuing this discussion and touching on the model of how we get to where we want to go. This is obviously a problem and not one we can easily fix. We would need a new cap for user space to signal "I know that I can control bpc, subsampling and compression to lower the bandwidth and light up modes which otherwise fail". That cap would also remove all the properties which require kernel magic to work (that's also what I proposed for my KMS color pipeline API). We all want to expose more of the scanout capability and give user space more control but I don't think an incremental approach works here and we would all do better if we accept that the current API requires kernel magic to work and has a few implicit assumptions baked in. With all that being said, I think the right decision here is to 1. Ignore subsampling for now 2. Let the kernel select YCC or RGB on the cable 3. Let the kernel figure out the conversion between RGB and YCC based on the color space selected 4. Let the kernel send the correct infoframe based on the selected color space and cable encoding 5. Only expose color spaces for which the kernel can do the conversion and send the infoframe I agree. We don't want to break or change existing behavior (that is used by userspace) and this will get us far without breaking things. 6. Work on the new API which is hidden behind a cap I assume you mean something like https://gitlab.freedesktop.org/pq/color-and-hdr/-/issues/11 Something like that, yes. The main point being a cap which removes a lot of properties and sets new expectations between user space and kernel. The actual API is not that important. Above you say that you don't think an incremental approach works here. Can you elaborate? Backwards compatibility is really hard. If we add another property to control e.g. the color range infoframe which doesn't magically convert colors, we now have to define how it interacts with the existing property. We also have to figure out how a user space which doesn't know about the new property behaves when another client has set that property. If any property which currently might change the pixel values is used, we can't expose the entire color pipeline because the kernel might have to use some element in it to achieve its magic conversion. So essentially you already have this hard device between "old" and "new" and
Re: [PATCH 3/3] drm/connector: Deprecate split for BT.2020 in drm_colorspace enum
On Tue, Feb 14, 2023 at 08:45:00PM +0100, Sebastian Wick wrote: > On Tue, Feb 14, 2023 at 5:57 PM Harry Wentland wrote: > > > > > > > > On 2/14/23 10:49, Sebastian Wick wrote: > > > On Fri, Feb 3, 2023 at 5:00 PM Ville Syrjälä > > > wrote: > > >> > > >> On Fri, Feb 03, 2023 at 10:24:52AM -0500, Harry Wentland wrote: > > >>> > > >>> > > >>> On 2/3/23 10:19, Ville Syrjälä wrote: > > On Fri, Feb 03, 2023 at 09:39:42AM -0500, Harry Wentland wrote: > > > > > > > > > On 2/3/23 07:59, Sebastian Wick wrote: > > >> On Fri, Feb 3, 2023 at 11:40 AM Ville Syrjälä > > >> wrote: > > >>> > > >>> On Fri, Feb 03, 2023 at 02:07:44AM +, Joshua Ashton wrote: > > Userspace has no way of controlling or knowing the pixel encoding > > currently, so there is no way for it to ever get the right values > > here. > > >>> > > >>> That applies to a lot of the other values as well (they are > > >>> explicitly RGB or YCC). The idea was that this property sets the > > >>> infoframe/MSA/SDP value exactly, and other properties should be > > >>> added to for use userspace to control the pixel encoding/colorspace > > >>> conversion(if desired, or userspace just makes sure to > > >>> directly feed in correct kind of data). > > >> > > >> I'm all for getting userspace control over pixel encoding but even > > >> then the kernel always knows which pixel encoding is selected and > > >> which InfoFrame has to be sent. Is there a reason why userspace would > > >> want to control the variant explicitly to the wrong value? > > >> > > > > > > I've asked this before but haven't seen an answer: Is there an > > > existing > > > upstream userspace project that makes use of this property (other than > > > what Joshua is working on in gamescope right now)? That would help us > > > understand the intent better. > > > > The intent was to control the infoframe colorimetry bits, > > nothing more. No idea what real userspace there was, if any. > > > > > > > > I don't think giving userspace explicit control over the exact > > > infoframe > > > values is the right thing to do. > > > > Only userspace knows what kind of data it's stuffing into > > the pixels (and/or how it configures the csc units/etc.) to > > generate them. > > > > >>> > > >>> Yes, but userspace doesn't control or know whether we drive > > >>> RGB or YCbCr on the wire. In fact, in some cases our driver > > >>> needs to fallback to YCbCr420 for bandwidth reasons. There > > >>> is currently no way for userspace to know that and I don't > > >>> think it makes sense. > > >> > > >> People want that control as well for whatever reason. We've > > >> been asked to allow YCbCr 4:4:4 output many times. > > > > > > I don't really think it's a question of if we want it but rather how > > > we get there. Harry is completely right that if we would make the > > > subsampling controllable by user space instead of the kernel handling > > > it magically, user space which does not adapt to the new control won't > > > be able to light up some modes which worked before. > > > > > > > Thanks for continuing this discussion and touching on the model of how > > we get to where we want to go. > > > > > This is obviously a problem and not one we can easily fix. We would > > > need a new cap for user space to signal "I know that I can control > > > bpc, subsampling and compression to lower the bandwidth and light up > > > modes which otherwise fail". That cap would also remove all the > > > properties which require kernel magic to work (that's also what I > > > proposed for my KMS color pipeline API). > > > > > > We all want to expose more of the scanout capability and give user > > > space more control but I don't think an incremental approach works > > > here and we would all do better if we accept that the current API > > > requires kernel magic to work and has a few implicit assumptions baked > > > in. > > > > > > With all that being said, I think the right decision here is to > > > > > > 1. Ignore subsampling for now > > > 2. Let the kernel select YCC or RGB on the cable > > > 3. Let the kernel figure out the conversion between RGB and YCC based > > > on the color space selected > > > 4. Let the kernel send the correct infoframe based on the selected > > > color space and cable encoding > > > 5. Only expose color spaces for which the kernel can do the conversion > > > and send the infoframe > > > > I agree. We don't want to break or change existing behavior (that is > > used by userspace) and this will get us far without breaking things. > > > > > 6. Work on the new API which is hidden behind a cap > > > > > > > I assume you mean something like > > https://gitlab.freedesktop.org/pq/color-and-hdr/-/issues/11 > > Something like that, yes. The main point being a cap which removes a > lot of properties and sets new expecta
[PATCH] drm/amd/display: only warn once in dce110_edp_wait_for_hpd_ready()
Since, hot plugging eDP displays isn't supported, it is sufficient for us to warn about the lack of a connected display once. So, use ASSERT() in dce110_edp_wait_for_hpd_ready() instead of DC_LOG_WARNING(). Signed-off-by: Hamza Mahfooz --- drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c | 6 ++ 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index fb3fd5b7c78b..0d4d3d586166 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -779,10 +779,8 @@ void dce110_edp_wait_for_hpd_ready( dal_gpio_destroy_irq(&hpd); - if (false == edp_hpd_high) { - DC_LOG_WARNING( - "%s: wait timed out!\n", __func__); - } + /* ensure that the panel is detected */ + ASSERT(edp_hpd_high); } void dce110_edp_power_control( -- 2.39.1
Re: [PATCH 3/3] drm/connector: Deprecate split for BT.2020 in drm_colorspace enum
On Tue, Feb 14, 2023 at 9:10 PM Ville Syrjälä wrote: > > On Tue, Feb 14, 2023 at 08:45:00PM +0100, Sebastian Wick wrote: > > On Tue, Feb 14, 2023 at 5:57 PM Harry Wentland > > wrote: > > > > > > > > > > > > On 2/14/23 10:49, Sebastian Wick wrote: > > > > On Fri, Feb 3, 2023 at 5:00 PM Ville Syrjälä > > > > wrote: > > > >> > > > >> On Fri, Feb 03, 2023 at 10:24:52AM -0500, Harry Wentland wrote: > > > >>> > > > >>> > > > >>> On 2/3/23 10:19, Ville Syrjälä wrote: > > > On Fri, Feb 03, 2023 at 09:39:42AM -0500, Harry Wentland wrote: > > > > > > > > > > > > On 2/3/23 07:59, Sebastian Wick wrote: > > > >> On Fri, Feb 3, 2023 at 11:40 AM Ville Syrjälä > > > >> wrote: > > > >>> > > > >>> On Fri, Feb 03, 2023 at 02:07:44AM +, Joshua Ashton wrote: > > > Userspace has no way of controlling or knowing the pixel encoding > > > currently, so there is no way for it to ever get the right > > > values here. > > > >>> > > > >>> That applies to a lot of the other values as well (they are > > > >>> explicitly RGB or YCC). The idea was that this property sets the > > > >>> infoframe/MSA/SDP value exactly, and other properties should be > > > >>> added to for use userspace to control the pixel > > > >>> encoding/colorspace > > > >>> conversion(if desired, or userspace just makes sure to > > > >>> directly feed in correct kind of data). > > > >> > > > >> I'm all for getting userspace control over pixel encoding but even > > > >> then the kernel always knows which pixel encoding is selected and > > > >> which InfoFrame has to be sent. Is there a reason why userspace > > > >> would > > > >> want to control the variant explicitly to the wrong value? > > > >> > > > > > > > > I've asked this before but haven't seen an answer: Is there an > > > > existing > > > > upstream userspace project that makes use of this property (other > > > > than > > > > what Joshua is working on in gamescope right now)? That would help > > > > us > > > > understand the intent better. > > > > > > The intent was to control the infoframe colorimetry bits, > > > nothing more. No idea what real userspace there was, if any. > > > > > > > > > > > I don't think giving userspace explicit control over the exact > > > > infoframe > > > > values is the right thing to do. > > > > > > Only userspace knows what kind of data it's stuffing into > > > the pixels (and/or how it configures the csc units/etc.) to > > > generate them. > > > > > > >>> > > > >>> Yes, but userspace doesn't control or know whether we drive > > > >>> RGB or YCbCr on the wire. In fact, in some cases our driver > > > >>> needs to fallback to YCbCr420 for bandwidth reasons. There > > > >>> is currently no way for userspace to know that and I don't > > > >>> think it makes sense. > > > >> > > > >> People want that control as well for whatever reason. We've > > > >> been asked to allow YCbCr 4:4:4 output many times. > > > > > > > > I don't really think it's a question of if we want it but rather how > > > > we get there. Harry is completely right that if we would make the > > > > subsampling controllable by user space instead of the kernel handling > > > > it magically, user space which does not adapt to the new control won't > > > > be able to light up some modes which worked before. > > > > > > > > > > Thanks for continuing this discussion and touching on the model of how > > > we get to where we want to go. > > > > > > > This is obviously a problem and not one we can easily fix. We would > > > > need a new cap for user space to signal "I know that I can control > > > > bpc, subsampling and compression to lower the bandwidth and light up > > > > modes which otherwise fail". That cap would also remove all the > > > > properties which require kernel magic to work (that's also what I > > > > proposed for my KMS color pipeline API). > > > > > > > > We all want to expose more of the scanout capability and give user > > > > space more control but I don't think an incremental approach works > > > > here and we would all do better if we accept that the current API > > > > requires kernel magic to work and has a few implicit assumptions baked > > > > in. > > > > > > > > With all that being said, I think the right decision here is to > > > > > > > > 1. Ignore subsampling for now > > > > 2. Let the kernel select YCC or RGB on the cable > > > > 3. Let the kernel figure out the conversion between RGB and YCC based > > > > on the color space selected > > > > 4. Let the kernel send the correct infoframe based on the selected > > > > color space and cable encoding > > > > 5. Only expose color spaces for which the kernel can do the conversion > > > > and send the infoframe > > > > > > I agree. We don't want to break or change existing behavior (that is > > > used by userspace) and this will get
Re: [PATCH 3/3] drm/connector: Deprecate split for BT.2020 in drm_colorspace enum
On Tue, Feb 14, 2023 at 10:18:49PM +0100, Sebastian Wick wrote: > On Tue, Feb 14, 2023 at 9:10 PM Ville Syrjälä > wrote: > > > > On Tue, Feb 14, 2023 at 08:45:00PM +0100, Sebastian Wick wrote: > > > On Tue, Feb 14, 2023 at 5:57 PM Harry Wentland > > > wrote: > > > > > > > > > > > > > > > > On 2/14/23 10:49, Sebastian Wick wrote: > > > > > On Fri, Feb 3, 2023 at 5:00 PM Ville Syrjälä > > > > > wrote: > > > > >> > > > > >> On Fri, Feb 03, 2023 at 10:24:52AM -0500, Harry Wentland wrote: > > > > >>> > > > > >>> > > > > >>> On 2/3/23 10:19, Ville Syrjälä wrote: > > > > On Fri, Feb 03, 2023 at 09:39:42AM -0500, Harry Wentland wrote: > > > > > > > > > > > > > > > On 2/3/23 07:59, Sebastian Wick wrote: > > > > >> On Fri, Feb 3, 2023 at 11:40 AM Ville Syrjälä > > > > >> wrote: > > > > >>> > > > > >>> On Fri, Feb 03, 2023 at 02:07:44AM +, Joshua Ashton wrote: > > > > Userspace has no way of controlling or knowing the pixel > > > > encoding > > > > currently, so there is no way for it to ever get the right > > > > values here. > > > > >>> > > > > >>> That applies to a lot of the other values as well (they are > > > > >>> explicitly RGB or YCC). The idea was that this property sets the > > > > >>> infoframe/MSA/SDP value exactly, and other properties should be > > > > >>> added to for use userspace to control the pixel > > > > >>> encoding/colorspace > > > > >>> conversion(if desired, or userspace just makes sure to > > > > >>> directly feed in correct kind of data). > > > > >> > > > > >> I'm all for getting userspace control over pixel encoding but > > > > >> even > > > > >> then the kernel always knows which pixel encoding is selected and > > > > >> which InfoFrame has to be sent. Is there a reason why userspace > > > > >> would > > > > >> want to control the variant explicitly to the wrong value? > > > > >> > > > > > > > > > > I've asked this before but haven't seen an answer: Is there an > > > > > existing > > > > > upstream userspace project that makes use of this property (other > > > > > than > > > > > what Joshua is working on in gamescope right now)? That would > > > > > help us > > > > > understand the intent better. > > > > > > > > The intent was to control the infoframe colorimetry bits, > > > > nothing more. No idea what real userspace there was, if any. > > > > > > > > > > > > > > I don't think giving userspace explicit control over the exact > > > > > infoframe > > > > > values is the right thing to do. > > > > > > > > Only userspace knows what kind of data it's stuffing into > > > > the pixels (and/or how it configures the csc units/etc.) to > > > > generate them. > > > > > > > > >>> > > > > >>> Yes, but userspace doesn't control or know whether we drive > > > > >>> RGB or YCbCr on the wire. In fact, in some cases our driver > > > > >>> needs to fallback to YCbCr420 for bandwidth reasons. There > > > > >>> is currently no way for userspace to know that and I don't > > > > >>> think it makes sense. > > > > >> > > > > >> People want that control as well for whatever reason. We've > > > > >> been asked to allow YCbCr 4:4:4 output many times. > > > > > > > > > > I don't really think it's a question of if we want it but rather how > > > > > we get there. Harry is completely right that if we would make the > > > > > subsampling controllable by user space instead of the kernel handling > > > > > it magically, user space which does not adapt to the new control won't > > > > > be able to light up some modes which worked before. > > > > > > > > > > > > > Thanks for continuing this discussion and touching on the model of how > > > > we get to where we want to go. > > > > > > > > > This is obviously a problem and not one we can easily fix. We would > > > > > need a new cap for user space to signal "I know that I can control > > > > > bpc, subsampling and compression to lower the bandwidth and light up > > > > > modes which otherwise fail". That cap would also remove all the > > > > > properties which require kernel magic to work (that's also what I > > > > > proposed for my KMS color pipeline API). > > > > > > > > > > We all want to expose more of the scanout capability and give user > > > > > space more control but I don't think an incremental approach works > > > > > here and we would all do better if we accept that the current API > > > > > requires kernel magic to work and has a few implicit assumptions baked > > > > > in. > > > > > > > > > > With all that being said, I think the right decision here is to > > > > > > > > > > 1. Ignore subsampling for now > > > > > 2. Let the kernel select YCC or RGB on the cable > > > > > 3. Let the kernel figure out the conversion between RGB and YCC based > > > > > on the color space selected > > > > > 4. Let the kernel send the correc
RE: [PATCH 1/2] drm/amdgpu: optimize VRAM allocation when using drm buddy
For public review > -Original Message- > From: Koenig, Christian > Sent: Wednesday, February 15, 2023 3:02 AM > To: Xiao, Shane ; Paneer Selvam, Arunpravin > > Subject: Re: [PATCH 1/2] drm/amdgpu: optimize VRAM allocation when using > drm buddy > > Am 14.02.23 um 15:53 schrieb Xiao, Shane: > >> -Original Message- > >> From: Koenig, Christian > >> Sent: Tuesday, February 14, 2023 8:41 PM > >> To: Xiao, Shane ; brahma_sw_dev > >> > >> Cc: Paneer Selvam, Arunpravin > >> Subject: Re: [PATCH 1/2] drm/amdgpu: optimize VRAM allocation when > >> using drm buddy > >> > >> Am 14.02.23 um 12:18 schrieb Shane Xiao: > >>> Since the VRAM manager changed from drm mm to drm buddy. It's not > >>> necessary to allocate 2MB aligned VRAM for more than 2MB unaligned > >>> size, and then do trim. This method improves the allocation > >>> efficiency and reduces memory fragmentation. > >> Well that is a trade off. > >> > >> Allocating the BO as one contiguous chunk and then trimming is > >> beneficial because if we then later need it contiguous we don't need > >> to re-allocate and copy. This can be needed to display something for > example. Hi Christian, This case means that you allocate BO that is unnecessary to be continuous at first time, and latter the BO should be continuous. I'm not familiar with display. Could you give me a few more specific examples ? > > Yes, I agree that one contiguous chunk may get beneficial sometimes. > > But as far as I know, you cannot guarantee that amdgpu_vram_mgr_new > can get one contiguous chunk if you don't set TTM_PL_FLAG_CONTIGUOUS > flags. > > For example, if you want to allocate 4M+4K BO, it will allocate one 4M block > + one 2M block which is unnecessary to be continuous, then 2M block will be > trimmed. > > Oh, that's indeed not something which should happen. Sounds more like a > bug fix then. Yes, I think this case should not be happened. Actually, I'm not sure that why the allocated BO should be aligned with pages_per_block, which is set to 2MB by default. Does this help improve performance when allocating 2M or above BO? From my point of view, the TLB may be one of reason of this. But I'm not sure about this. Best Regards, Shane > > > > >> On the other hand I completely agree allocating big and then trimming > >> creates more fragmentation than necessary. > >> > >> Do you have some test case which can show the difference? > > I have use rocrtst to show the difference. > > The attachment is shown that after applying this patch, the order < 9 total > vram size decrease from 99MB to 43MB. > > And the latter has more higher order block memory. > > Arun can you take a look? That problem here sounds important. > > Thanks, > Christian. > > > > >> BTW: No need to discuss that on the internal mailing list, please use > >> the public one instead. > >> > > I will send it to public. Thank you for your remind. > > > > Best Regards, > > Shane > > > >> Regards, > >> Christian. > >> > >>> Signed-off-by: Shane Xiao > >>> --- > >>>drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 2 +- > >>>1 file changed, 1 insertion(+), 1 deletion(-) > >>> > >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c > >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c > >>> index 75c80c557b6e..3fea58f9427c 100644 > >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c > >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c > >>> @@ -453,7 +453,7 @@ static int amdgpu_vram_mgr_new(struct > >> ttm_resource_manager *man, > >>> /* Limit maximum size to 2GiB due to SG table > >>> limitations */ > >>> size = min(remaining_size, 2ULL << 30); > >>> > >>> - if (size >= (u64)pages_per_block << PAGE_SHIFT) > >>> + if (!(size % ((u64)pages_per_block << PAGE_SHIFT))) > >>> min_block_size = (u64)pages_per_block << > >> PAGE_SHIFT; > >>> cur_size = size;
Re: [PATCH] drm/amd/display: Remove duplicate/repeating expression
Applied. Thanks! On Fri, Feb 10, 2023 at 10:22 AM Harry Wentland wrote: > > On 2/10/23 05:11, Deepak R Varma wrote: > > Remove duplicate or repeating expressions in the if condition > > evaluation. Issue identified using doubletest.cocci Coccinelle semantic > > patch. > > > > Signed-off-by: Deepak R Varma > > Reviewed-by: Harry Wentland > > Harry > > > --- > > .../gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c | 2 +- > > 1 file changed, 1 insertion(+), 1 deletion(-) > > > > diff --git > > a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c > > b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c > > index 61ee9ba063a7..6576b897a512 100644 > > --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c > > +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c > > @@ -51,7 +51,7 @@ static bool CalculateBytePerPixelAnd256BBlockSizes( > > *BytePerPixelDETC = 0; > > *BytePerPixelY = 4; > > *BytePerPixelC = 0; > > - } else if (SourcePixelFormat == dm_444_16 || SourcePixelFormat == > > dm_444_16) { > > + } else if (SourcePixelFormat == dm_444_16) { > > *BytePerPixelDETY = 2; > > *BytePerPixelDETC = 0; > > *BytePerPixelY = 2; >
Re: [PATCH] drm/amd/display: Remove duplicate/repeating expressions
Applied. Thanks! Alex On Fri, Feb 10, 2023 at 2:37 PM Harry Wentland wrote: > > On 2/10/23 05:00, Deepak R Varma wrote: > > Remove duplicate or repeating expressions in the if condition > > evaluation. Issue identified using doubletest.cocci Coccinelle semantic > > patch. > > > > Signed-off-by: Deepak R Varma > > Reviewed-by: Harry Wentland > > Harry > > > --- > > .../gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c| 4 +--- > > 1 file changed, 1 insertion(+), 3 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c > > b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c > > index 4b8f5fa0f0ad..ae89760d887d 100644 > > --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c > > +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c > > @@ -2335,8 +2335,7 @@ void > > dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l > > > > if (mode_lib->vba.DSCEnable[k] && > > mode_lib->vba.ForcedOutputLinkBPP[k] != 0) > > mode_lib->vba.DSCOnlyIfNecessaryWithBPP = > > true; > > - if ((mode_lib->vba.DSCEnable[k] || > > mode_lib->vba.DSCEnable[k]) > > - && mode_lib->vba.OutputFormat[k] == > > dm_n422 > > + if (mode_lib->vba.DSCEnable[k] && > > mode_lib->vba.OutputFormat[k] == dm_n422 > > && !mode_lib->vba.DSC422NativeSupport) > > mode_lib->vba.DSC422NativeNotSupported = true; > > > > @@ -3639,7 +3638,6 @@ void > > dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l > > if (mode_lib->vba.SourcePixelFormat[k] != dm_444_64 > > && mode_lib->vba.SourcePixelFormat[k] > > != dm_444_32 > > && mode_lib->vba.SourcePixelFormat[k] > > != dm_444_16 > > - && mode_lib->vba.SourcePixelFormat[k] > > != dm_444_16 > > && mode_lib->vba.SourcePixelFormat[k] > > != dm_444_8 > > && mode_lib->vba.SourcePixelFormat[k] > > != dm_rgbe) { > > if (mode_lib->vba.ViewportWidthChroma[k] > > > mode_lib->vba.SurfaceWidthC[k] >
Re: [PATCH] drm/amd/display: avoid unaligned access warnings
Applied. Thanks! On Tue, Feb 14, 2023 at 1:56 AM Jonathan Gray wrote: > > When building on OpenBSD/arm64 with clang 15, unaligned access > warnings are seen when a union is embedded inside a packed struct. > > drm/amd/display/dmub/inc/dmub_cmd.h:941:18: error: field > cursor_copy_src within 'struct dmub_rb_cmd_mall' is less aligned than > 'union dmub_addr' and is usually due to 'struct dmub_rb_cmd_mall' > being packed, which can lead to unaligned accesses > [-Werror,-Wunaligned-access] > union dmub_addr cursor_copy_src; /**< Cursor copy address */ > ^ > drm/amd/display/dmub/inc/dmub_cmd.h:942:18: error: field cursor_copy_dst > within 'struct dmub_rb_cmd_mall' is less aligned than > 'union dmub_addr' and is usually due to 'struct dmub_rb_cmd_mall' > being packed, which can lead to unaligned accesses > [-Werror,-Wunaligned-access] > union dmub_addr cursor_copy_dst; /**< Cursor copy destination */ > ^ > > Add pragma pack around dmub_addr to avoid this. > > Signed-off-by: Jonathan Gray > --- > drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 2 ++ > 1 file changed, 2 insertions(+) > > diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h > b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h > index 33907feefebb..dc92d06572a3 100644 > --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h > +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h > @@ -162,6 +162,7 @@ extern "C" { > #define dmub_udelay(microseconds) udelay(microseconds) > #endif > > +#pragma pack(push, 1) > /** > * union dmub_addr - DMUB physical/virtual 64-bit address. > */ > @@ -172,6 +173,7 @@ union dmub_addr { > } u; /*<< Low/high bit access */ > uint64_t quad_part; /*<< 64 bit address */ > }; > +#pragma pack(pop) > > /** > * Dirty rect definition. > -- > 2.39.1 >
Re: [PATCH] drm/amd/pm: avoid unaligned access warnings
Applied. Thanks! Alex On Tue, Feb 14, 2023 at 1:58 AM Jonathan Gray wrote: > > When building on OpenBSD/arm64 with clang 15, unaligned access > warnings are seen when a union is embedded inside a packed struct. > > drm/amd/pm/powerplay/hwmgr/vega20_pptable.h:136:17: error: field > smcPPTable within 'struct _ATOM_VEGA20_POWERPLAYTABLE' is less aligned > than 'PPTable_t' and is usually due to > 'struct _ATOM_VEGA20_POWERPLAYTABLE' being packed, which can lead to >unaligned accesses [-Werror,-Wunaligned-access] > PPTable_t smcPPTable; > ^ > > Make PPTable_t packed to avoid this. > > Signed-off-by: Jonathan Gray > --- > drivers/gpu/drm/amd/pm/powerplay/inc/smu11_driver_if.h | 2 ++ > drivers/gpu/drm/amd/pm/powerplay/inc/smu9_driver_if.h | 2 ++ > drivers/gpu/drm/amd/pm/powerplay/inc/vega12/smu9_driver_if.h| 2 ++ > .../gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_arcturus.h | 2 ++ > .../gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_navi10.h | 2 ++ > .../amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_sienna_cichlid.h | 2 ++ > .../drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_aldebaran.h| 2 ++ > .../gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h | 2 ++ > .../gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h | 2 ++ > 9 files changed, 18 insertions(+) > > diff --git a/drivers/gpu/drm/amd/pm/powerplay/inc/smu11_driver_if.h > b/drivers/gpu/drm/amd/pm/powerplay/inc/smu11_driver_if.h > index fdc6b7a57bc9..c2efc70ef288 100644 > --- a/drivers/gpu/drm/amd/pm/powerplay/inc/smu11_driver_if.h > +++ b/drivers/gpu/drm/amd/pm/powerplay/inc/smu11_driver_if.h > @@ -358,6 +358,7 @@ typedef struct { >QuadraticInt_t SsCurve; > } DpmDescriptor_t; > > +#pragma pack(push, 1) > typedef struct { >uint32_t Version; > > @@ -609,6 +610,7 @@ typedef struct { >uint32_t MmHubPadding[8]; > > } PPTable_t; > +#pragma pack(pop) > > typedef struct { > > diff --git a/drivers/gpu/drm/amd/pm/powerplay/inc/smu9_driver_if.h > b/drivers/gpu/drm/amd/pm/powerplay/inc/smu9_driver_if.h > index 2818c98ff5ca..faae4b918d90 100644 > --- a/drivers/gpu/drm/amd/pm/powerplay/inc/smu9_driver_if.h > +++ b/drivers/gpu/drm/amd/pm/powerplay/inc/smu9_driver_if.h > @@ -122,6 +122,7 @@ typedef struct { >uint16_t Vid; /* min voltage in SVI2 VID */ > } DisplayClockTable_t; > > +#pragma pack(push, 1) > typedef struct { >/* PowerTune */ >uint16_t SocketPowerLimit; /* Watts */ > @@ -323,6 +324,7 @@ typedef struct { >uint32_t MmHubPadding[3]; /* SMU internal use */ > > } PPTable_t; > +#pragma pack(pop) > > typedef struct { >uint16_t MinClock; // This is either DCEFCLK or SOCCLK (in MHz) > diff --git a/drivers/gpu/drm/amd/pm/powerplay/inc/vega12/smu9_driver_if.h > b/drivers/gpu/drm/amd/pm/powerplay/inc/vega12/smu9_driver_if.h > index b6ffd08784e7..6456bea5d2d5 100644 > --- a/drivers/gpu/drm/amd/pm/powerplay/inc/vega12/smu9_driver_if.h > +++ b/drivers/gpu/drm/amd/pm/powerplay/inc/vega12/smu9_driver_if.h > @@ -245,6 +245,7 @@ typedef struct { >QuadraticInt_t SsCurve; > } DpmDescriptor_t; > > +#pragma pack(push, 1) > typedef struct { >uint32_t Version; > > @@ -508,6 +509,7 @@ typedef struct { >uint32_t MmHubPadding[7]; > > } PPTable_t; > +#pragma pack(pop) > > typedef struct { > > diff --git > a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_arcturus.h > b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_arcturus.h > index 43d43d6addc0..d518dee18e1b 100644 > --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_arcturus.h > +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_arcturus.h > @@ -464,6 +464,7 @@ typedef struct { >uint16_t Padding16; > } DpmDescriptor_t; > > +#pragma pack(push, 1) > typedef struct { >uint32_t Version; > > @@ -733,6 +734,7 @@ typedef struct { >uint32_t MmHubPadding[8]; // SMU internal use > > } PPTable_t; > +#pragma pack(pop) > > typedef struct { >// Time constant parameters for clock averages in ms > diff --git > a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_navi10.h > b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_navi10.h > index 04752ade1016..c5c1943fb6a1 100644 > --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_navi10.h > +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_navi10.h > @@ -515,6 +515,7 @@ typedef struct { >uint32_t BoardLevelEnergyAccumulator; > } OutOfBandMonitor_t; > > +#pragma pack(push, 1) > typedef struct { >uint32_t Version; > > @@ -814,6 +815,7 @@ typedef struct { >uint32_t MmHubPadding[8]; // SMU internal use > > } PPTable_t; > +#pragma pack(pop) > > typedef struct { >// Time constant parameters for clock averages in ms > diff --git > a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_sienna_cichlid.h > b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_sienna_cichlid.h > index 351a4af429b3..aa6d29de4
[PATCH] drm/amdgpu: remove TOPDOWN flags when allocating VRAM in large bar system
Since VRAM manager is changed from drm mm to drm buddy, the TOP_DOWN flag should not be set by default in the large bar system. Removing this flag helps improve drm buddy allactor efficiency and reduce the risk of splitting higher order block into lower order. Signed-off-by: Shane Xiao --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 2d237f3d3a2e..1c3e647400bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -139,7 +139,7 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) places[c].lpfn = visible_pfn; - else + else if (adev->gmc.real_vram_size != adev->gmc.visible_vram_size) places[c].flags |= TTM_PL_FLAG_TOPDOWN; if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) -- 2.25.1
Re: [PATCH v2 5/8] drm/amdgpu: accommodate DOMAIN/PL_DOORBELL
Am 14.02.23 um 20:24 schrieb Shashank Sharma: On 14/02/2023 19:31, Christian König wrote: Am 14.02.23 um 17:15 schrieb Shashank Sharma: From: Alex Deucher This patch adds changes: - to accommodate the new GEM domain DOORBELL - to accommodate the new TTM PL DOORBELL to manage doorbell allocations as GEM Objects. V2: Addressed reviwe comments from Christian - drop the doorbell changes for pinning/unpinning - drop the doorbell changes for dma-buf map - drop the doorbell changes for sgt - no need to handle TTM_PL_FLAG_CONTIGUOUS for doorbell Signed-off-by: Alex Deucher Signed-off-by: Shashank Sharma --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 11 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 11 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 1 + 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index b48c9fd60c43..ff9979fecfd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -147,6 +147,14 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) c++; } + if (domain & AMDGPU_GEM_DOMAIN_DOORBELL) { + places[c].fpfn = 0; + places[c].lpfn = 0; + places[c].mem_type = AMDGPU_PL_DOORBELL; + places[c].flags = 0; + c++; + } + Mhm, do we need to increase AMDGPU_BO_MAX_PLACEMENTS? I think the answer is *no* since mixing DOORBELL with CPU, GTT or VRAM placement doesn't make sense, but do we enforce that somewhere? I am not sure why do we need that ? Userspace could otherwise specify DOORBEEL|CPU|GTT|VRAM as placement which would overrun the array and be illegal. if (domain & AMDGPU_GEM_DOMAIN_GTT) { places[c].fpfn = 0; places[c].lpfn = 0; @@ -466,7 +474,7 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev, goto fail; } - /* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU */ + /* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_DOMAIN_DOORBELL */ Should we enforce that user space can only allocate 1 page doorbells? Should we add a per-PID basis check ? No, just a check that the allocation size of the doorbell BOs is just 1 page. Christian. - Shashank return true; fail: @@ -1014,6 +1022,7 @@ void amdgpu_bo_unpin(struct amdgpu_bo *bo) } else if (bo->tbo.resource->mem_type == TTM_PL_TT) { atomic64_sub(amdgpu_bo_size(bo), &adev->gart_pin_size); } + Unrelated change. Noted - Shashank Regards, Christian. } static const char *amdgpu_vram_names[] = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 0e8f580769ab..e9dc24191fc8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -128,6 +128,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, case AMDGPU_PL_GDS: case AMDGPU_PL_GWS: case AMDGPU_PL_OA: + case AMDGPU_PL_DOORBELL: placement->num_placement = 0; placement->num_busy_placement = 0; return; @@ -500,9 +501,11 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, if (old_mem->mem_type == AMDGPU_PL_GDS || old_mem->mem_type == AMDGPU_PL_GWS || old_mem->mem_type == AMDGPU_PL_OA || + old_mem->mem_type == AMDGPU_PL_DOORBELL || new_mem->mem_type == AMDGPU_PL_GDS || new_mem->mem_type == AMDGPU_PL_GWS || - new_mem->mem_type == AMDGPU_PL_OA) { + new_mem->mem_type == AMDGPU_PL_OA || + new_mem->mem_type == AMDGPU_PL_DOORBELL) { /* Nothing to save here */ ttm_bo_move_null(bo, new_mem); goto out; @@ -586,6 +589,11 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev, mem->bus.offset += adev->gmc.vram_aper_base; mem->bus.is_iomem = true; break; + case AMDGPU_PL_DOORBELL: + mem->bus.offset = mem->start << PAGE_SHIFT; + mem->bus.offset += adev->doorbell.doorbell_aper_base; + mem->bus.is_iomem = true; + break; default: return -EINVAL; } @@ -1267,6 +1275,7 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem) flags |= AMDGPU_PTE_VALID; if (mem && (mem->mem_type == TTM_PL_TT || + mem->mem_type == AMDGPU_PL_DOORBELL || mem->mem_type == AMDGPU_PL_PREEMPT)) { flags |= AMDGPU_PTE_SYSTEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 967b265dbfa1..9cf5d8419965 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -33,6 +33,7 @@ #define AMDGPU_PL_GWS (TTM_PL_PRIV + 1) #define AMDGPU_PL_OA
Re: [PATCH v2 8/8] drm/amdgpu: start using kernel doorbell bo
Am 14.02.23 um 20:28 schrieb Shashank Sharma: On 14/02/2023 19:40, Christian König wrote: Am 14.02.23 um 17:15 schrieb Shashank Sharma: From: Shashank Sharma This patch does the following: - Adds new variables like mman.doorbell_bo_size/gpu_addr/cpu_addr. The cpu_addr ptr will be used now for doorbell read/write from doorbell BAR. - Adjusts the existing code to use kernel doorbell BO's size and its cpu_address. Cc: Alex Deucher Cc: Christian Koenig Signed-off-by: Alex Deucher Signed-off-by: Shashank Sharma Maybe squash this one together with the previous patch. I just split it from the last patch in this series, thought it was too scattered and might not be easy to review :D Yeah, ok good point as well :D Christian. But see below. --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 5 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 33 +--- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 1 - 3 files changed, 16 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 0493c64e9d0a..87f486f522ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -109,11 +109,10 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, *aperture_base = adev->doorbell.doorbell_aper_base; *aperture_size = 0; *start_offset = 0; - } else if (adev->doorbell.doorbell_aper_size > adev->doorbell.num_doorbells * - sizeof(u32)) { + } else if (adev->doorbell.doorbell_aper_size > adev->mman.doorbell_kernel_bo_size) { *aperture_base = adev->doorbell.doorbell_aper_base; *aperture_size = adev->doorbell.doorbell_aper_size; - *start_offset = adev->doorbell.num_doorbells * sizeof(u32); + *start_offset = adev->mman.doorbell_kernel_bo_size; } else { *aperture_base = 0; *aperture_size = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 43c1b67c2778..fde199434579 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -596,8 +596,8 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index) if (amdgpu_device_skip_hw_access(adev)) return 0; - if (index < adev->doorbell.num_doorbells) { - return readl(adev->mman.doorbell_aper_base_kaddr + index); + if (index < adev->mman.num_kernel_doorbells) { + return readl(adev->mman.doorbell_cpu_addr + index); } else { DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); return 0; @@ -619,8 +619,8 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v) if (amdgpu_device_skip_hw_access(adev)) return; - if (index < adev->doorbell.num_doorbells) { - writel(v, adev->mman.doorbell_aper_base_kaddr + index); + if (index < adev->mman.num_kernel_doorbells) { + writel(v, adev->mman.doorbell_cpu_addr + index); } else { DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); } @@ -640,8 +640,8 @@ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index) if (amdgpu_device_skip_hw_access(adev)) return 0; - if (index < adev->doorbell.num_doorbells) { - return atomic64_read((atomic64_t *)(adev->mman.doorbell_aper_base_kaddr + index)); + if (index < adev->mman.num_kernel_doorbells) { + return atomic64_read((atomic64_t *)(adev->mman.doorbell_cpu_addr + index)); } else { DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); return 0; @@ -663,8 +663,8 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v) if (amdgpu_device_skip_hw_access(adev)) return; - if (index < adev->doorbell.num_doorbells) { - atomic64_set((atomic64_t *)(adev->mman.doorbell_aper_base_kaddr + index), v); + if (index < adev->mman.num_kernel_doorbells) { + atomic64_set((atomic64_t *)(adev->mman.doorbell_cpu_addr + index), v); } else { DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); } @@ -1037,7 +1037,7 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) if (adev->asic_type < CHIP_BONAIRE) { adev->doorbell.doorbell_aper_base = 0; adev->doorbell.doorbell_aper_size = 0; - adev->doorbell.num_doorbells = 0; + adev->mman.num_kernel_doorbells = 0; adev->mman.doorbell_aper_base_kaddr = NULL; return 0; } @@ -1052,13 +1052,13 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) adev->doorbell.doorbell_aper_size = pci_resource_len(adev->pdev, 2); if (adev->enable_mes) { - adev->doorbell.num_doorbells = + adev->mman.num_kern
[PATCH] drm/amd/pm: re-enable ac/dc on smu_v13_0_0/10
re-enable ac/dc on smu_v13_0_0/10 Signed-off-by: Kenneth Feng --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 7c906ab3ddd2..923a9fb3c887 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -147,6 +147,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] = PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel, 0), MSG_MAP(AllowGpo, PPSMC_MSG_SetGpoAllow, 0), MSG_MAP(AllowIHHostInterrupt, PPSMC_MSG_AllowIHHostInterrupt, 0), + MSG_MAP(ReenableAcDcInterrupt, PPSMC_MSG_ReenableAcDcInterrupt, 0), }; static struct cmn2asic_mapping smu_v13_0_0_clk_map[SMU_CLK_COUNT] = { -- 2.25.1
Re: [PATCH] drm/amdgpu: remove TOPDOWN flags when allocating VRAM in large bar system
Am 15.02.23 um 06:25 schrieb Shane Xiao: Since VRAM manager is changed from drm mm to drm buddy, the TOP_DOWN flag should not be set by default in the large bar system. Removing this flag helps improve drm buddy allactor efficiency and reduce the risk of splitting higher order block into lower order. Signed-off-by: Shane Xiao Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 2d237f3d3a2e..1c3e647400bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -139,7 +139,7 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) places[c].lpfn = visible_pfn; - else + else if (adev->gmc.real_vram_size != adev->gmc.visible_vram_size) places[c].flags |= TTM_PL_FLAG_TOPDOWN; if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
[PATCH] Revert "drm/amd/display: enable DPG when disabling plane for phantom pipe"
This reverts commit b73cf50bd1d0008027cc1b41881b671d9c9054b9. regression detected by the change. Revert until fix is available. Signed-off-by: Qingqing Zhuo --- drivers/gpu/drm/amd/display/dc/core/dc.c | 47 +--- 1 file changed, 1 insertion(+), 46 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 510661d303e8..1c218c526650 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -74,8 +74,6 @@ #include "dc_trace.h" -#include "hw_sequencer_private.h" - #include "dce/dmub_outbox.h" #define CTX \ @@ -1059,44 +1057,6 @@ static void apply_ctx_interdependent_lock(struct dc *dc, struct dc_state *contex } } -static void phantom_pipe_blank( - struct dc *dc, - struct timing_generator *tg, - int width, - int height) -{ - struct dce_hwseq *hws = dc->hwseq; - enum dc_color_space color_space; - struct tg_color black_color = {0}; - struct output_pixel_processor *opp = NULL; - uint32_t num_opps, opp_id_src0, opp_id_src1; - uint32_t otg_active_width, otg_active_height; - - /* program opp dpg blank color */ - color_space = COLOR_SPACE_SRGB; - color_space_to_black_color(dc, color_space, &black_color); - - otg_active_width = width; - otg_active_height = height; - - /* get the OPTC source */ - tg->funcs->get_optc_source(tg, &num_opps, &opp_id_src0, &opp_id_src1); - ASSERT(opp_id_src0 < dc->res_pool->res_cap->num_opp); - opp = dc->res_pool->opps[opp_id_src0]; - - opp->funcs->opp_set_disp_pattern_generator( - opp, - CONTROLLER_DP_TEST_PATTERN_SOLID_COLOR, - CONTROLLER_DP_COLOR_SPACE_UDEFINED, - COLOR_DEPTH_UNDEFINED, - &black_color, - otg_active_width, - otg_active_height, - 0); - - hws->funcs.wait_for_blank_complete(opp); -} - static void disable_dangling_plane(struct dc *dc, struct dc_state *context) { int i, j; @@ -1155,13 +1115,8 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context) * again for different use. */ if (old_stream->mall_stream_config.type == SUBVP_PHANTOM) { - if (tg->funcs->enable_crtc) { - int main_pipe_width, main_pipe_height; - main_pipe_width = old_stream->mall_stream_config.paired_stream->dst.width; - main_pipe_height = old_stream->mall_stream_config.paired_stream->dst.height; - phantom_pipe_blank(dc, tg, main_pipe_width, main_pipe_height); + if (tg->funcs->enable_crtc) tg->funcs->enable_crtc(tg); - } } dc_rem_all_planes_for_stream(dc, old_stream, dangling_context); disable_all_writeback_pipes_for_stream(dc, old_stream, dangling_context); -- 2.34.1