[PATCH 1/2] drm/amd/pm: expose swctf threshold setting for legacy powerplay
Preparation for coming optimization which eliminates the influence of GPU temperature momentary fluctuation. Signed-off-by: Evan Quan --- drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h| 2 ++ .../gpu/drm/amd/pm/powerplay/hwmgr/hardwaremanager.c | 4 +++- drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c| 2 ++ drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c | 10 ++ drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c | 4 drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c | 4 drivers/gpu/drm/amd/pm/powerplay/inc/power_state.h | 1 + 7 files changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index a9161f3da8b5..7faad759a6cc 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -90,6 +90,8 @@ struct amdgpu_dpm_thermal { intmax_mem_crit_temp; /* memory max emergency(shutdown) temp */ intmax_mem_emergency_temp; + /* SWCTF threshold */ + intsw_ctf_threshold; /* was last interrupt low to high or high to low */ bool high_to_low; /* interrupt source */ diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/hardwaremanager.c index 981dc8c7112d..90452b66e107 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/hardwaremanager.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/hardwaremanager.c @@ -241,7 +241,8 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr) TEMP_RANGE_MAX, TEMP_RANGE_MIN, TEMP_RANGE_MAX, - TEMP_RANGE_MAX}; + TEMP_RANGE_MAX, + 0}; struct amdgpu_device *adev = hwmgr->adev; if (!hwmgr->not_vf) @@ -265,6 +266,7 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr) adev->pm.dpm.thermal.min_mem_temp = range.mem_min; adev->pm.dpm.thermal.max_mem_crit_temp = range.mem_crit_max; adev->pm.dpm.thermal.max_mem_emergency_temp = range.mem_emergency_max; + adev->pm.dpm.thermal.sw_ctf_threshold = range.sw_ctf_threshold; return ret; } diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c index d82767866ac1..6d887ead2967 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c @@ -5433,6 +5433,8 @@ static int smu7_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, thermal_data->max = data->thermal_temp_setting.temperature_shutdown * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->sw_ctf_threshold = thermal_data->max; + return 0; } diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c index 6f5161738bf8..d8cd23438b76 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c @@ -5242,6 +5242,9 @@ static int vega10_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, { struct vega10_hwmgr *data = hwmgr->backend; PPTable_t *pp_table = &(data->smc_state_table.pp_table); + struct phm_ppt_v2_information *pp_table_info = + (struct phm_ppt_v2_information *)(hwmgr->pptable); + struct phm_tdp_table *tdp_table = pp_table_info->tdp_table; memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange)); @@ -5258,6 +5261,13 @@ static int vega10_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)* PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + if (tdp_table->usSoftwareShutdownTemp > pp_table->ThotspotLimit && + tdp_table->usSoftwareShutdownTemp < VEGA10_THERMAL_MAXIMUM_ALERT_TEMP) + thermal_data->sw_ctf_threshold = tdp_table->usSoftwareShutdownTemp; + else + thermal_data->sw_ctf_threshold = VEGA10_THERMAL_MAXIMUM_ALERT_TEMP; + thermal_data->sw_ctf_threshold *= PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + return 0; } diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c index 33f31461ea6c..1069eaaae2f8 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c @@ -2764,6 +2764,8 @@ static int vega12_notify_cac_buffer_info(struct pp_hwmgr *hwmgr, static int vega12_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, struct PP_TemperatureRange *thermal_data) { + struct phm_ppt_v3_information *pptable_information = + (struct phm_ppt_v3_information *)hwmgr->ppt
[PATCH 2/2] drm/amd/pm: avoid unintentional shutdown due to temperature momentary fluctuation
An intentional delay is added on soft ctf triggered. Then there will be a double check for the GPU temperature before taking further action. This can avoid unintended shutdown due to temperature momentary fluctuation. Signed-off-by: Evan Quan --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 ++ .../gpu/drm/amd/pm/powerplay/amd_powerplay.c | 48 +++ .../drm/amd/pm/powerplay/hwmgr/smu_helper.c | 27 --- drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h | 2 + drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 34 + drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 2 + .../gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c| 9 +--- .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c| 9 +--- 8 files changed, 102 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index e459381dc759..5ef1f31e703c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -287,6 +287,9 @@ extern int amdgpu_user_partt_mode; #define AMDGPU_SMARTSHIFT_MAX_BIAS (100) #define AMDGPU_SMARTSHIFT_MIN_BIAS (-100) +/* Extra time delay(in ms) to eliminate the influence of temperature momentary fluctuation */ +#define AMDGPU_SWCTF_EXTRA_DELAY 50 + struct amdgpu_xcp_mgr; struct amdgpu_device; struct amdgpu_irq_src; diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c index 11b7b4cffaae..ff360c699171 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "amd_shared.h" #include "amd_powerplay.h" #include "power_state.h" @@ -91,6 +92,45 @@ static int pp_early_init(void *handle) return 0; } +static void pp_swctf_delayed_work_handler(struct work_struct *work) +{ + struct pp_hwmgr *hwmgr = + container_of(work, struct pp_hwmgr, swctf_delayed_work.work); + struct amdgpu_device *adev = hwmgr->adev; + struct amdgpu_dpm_thermal *range = + &adev->pm.dpm.thermal; + uint32_t gpu_temperature, size; + int ret; + + /* +* If the hotspot/edge temperature is confirmed as below SW CTF setting point +* after the delay enforced, nothing will be done. +* Otherwise, a graceful shutdown will be performed to prevent further damage. +*/ + if (range->sw_ctf_threshold && + hwmgr->hwmgr_func->read_sensor) { + ret = hwmgr->hwmgr_func->read_sensor(hwmgr, + AMDGPU_PP_SENSOR_HOTSPOT_TEMP, +&gpu_temperature, +&size); + /* +* For some legacy ASICs, hotspot temperature retrieving might be not +* supported. Check the edge temperature instead then. +*/ + if (ret == -EOPNOTSUPP) + ret = hwmgr->hwmgr_func->read_sensor(hwmgr, + AMDGPU_PP_SENSOR_EDGE_TEMP, +&gpu_temperature, +&size); + if (!ret && gpu_temperature / 1000 < range->sw_ctf_threshold) + return; + } + + dev_emerg(adev->dev, "ERROR: GPU over temperature range(SW CTF) detected!\n"); + dev_emerg(adev->dev, "ERROR: System is going to shutdown due to GPU SW CTF!\n"); + orderly_poweroff(true); +} + static int pp_sw_init(void *handle) { struct amdgpu_device *adev = handle; @@ -101,6 +141,10 @@ static int pp_sw_init(void *handle) pr_debug("powerplay sw init %s\n", ret ? "failed" : "successfully"); + if (!ret) + INIT_DELAYED_WORK(&hwmgr->swctf_delayed_work, + pp_swctf_delayed_work_handler); + return ret; } @@ -135,6 +179,8 @@ static int pp_hw_fini(void *handle) struct amdgpu_device *adev = handle; struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; + cancel_delayed_work_sync(&hwmgr->swctf_delayed_work); + hwmgr_hw_fini(hwmgr); return 0; @@ -221,6 +267,8 @@ static int pp_suspend(void *handle) struct amdgpu_device *adev = handle; struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; + cancel_delayed_work_sync(&hwmgr->swctf_delayed_work); + return hwmgr_suspend(hwmgr); } diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.c index bfe80ac0ad8c..d0b1ab6c4523 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.c @@ -603,21 +603,17 @@ int phm_irq_process(struct a
Re: [PATCH] drm: Remove the deprecated drm_put_dev() function
On Sun, 25 Jun 2023, Sui Jingfeng wrote: > As this function can be replaced with drm_dev_unregister() + drm_dev_put(), > it is already marked as deprecated, so remove it. No functional change. > > Signed-off-by: Sui Jingfeng > --- > drivers/gpu/drm/drm_drv.c | 28 > drivers/gpu/drm/drm_pci.c | 3 ++- > drivers/gpu/drm/radeon/radeon_drv.c | 3 ++- > include/drm/drm_drv.h | 1 - > 4 files changed, 4 insertions(+), 31 deletions(-) > > diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c > index 12687dd9e1ac..5057307fe22a 100644 > --- a/drivers/gpu/drm/drm_drv.c > +++ b/drivers/gpu/drm/drm_drv.c > @@ -406,34 +406,6 @@ void drm_minor_release(struct drm_minor *minor) > * possibly leaving the hardware enabled. > */ > > -/** > - * drm_put_dev - Unregister and release a DRM device > - * @dev: DRM device > - * > - * Called at module unload time or when a PCI device is unplugged. > - * > - * Cleans up all DRM device, calling drm_lastclose(). > - * > - * Note: Use of this function is deprecated. It will eventually go away > - * completely. Please use drm_dev_unregister() and drm_dev_put() explicitly > - * instead to make sure that the device isn't userspace accessible any more > - * while teardown is in progress, ensuring that userspace can't access an > - * inconsistent state. The last sentence is the crucial one. While the patch has no functional changes, I believe the goal never was to just mechanically replace one call with the two. BR, Jani. > - */ > -void drm_put_dev(struct drm_device *dev) > -{ > - DRM_DEBUG("\n"); > - > - if (!dev) { > - DRM_ERROR("cleanup called no dev\n"); > - return; > - } > - > - drm_dev_unregister(dev); > - drm_dev_put(dev); > -} > -EXPORT_SYMBOL(drm_put_dev); > - > /** > * drm_dev_enter - Enter device critical section > * @dev: DRM device > diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c > index 39d35fc3a43b..b3a68a92eaa6 100644 > --- a/drivers/gpu/drm/drm_pci.c > +++ b/drivers/gpu/drm/drm_pci.c > @@ -257,7 +257,8 @@ void drm_legacy_pci_exit(const struct drm_driver *driver, >legacy_dev_list) { > if (dev->driver == driver) { > list_del(&dev->legacy_dev_list); > - drm_put_dev(dev); > + drm_dev_unregister(dev); > + drm_dev_put(dev); > } > } > mutex_unlock(&legacy_dev_list_lock); > diff --git a/drivers/gpu/drm/radeon/radeon_drv.c > b/drivers/gpu/drm/radeon/radeon_drv.c > index e4374814f0ef..a4955ae10659 100644 > --- a/drivers/gpu/drm/radeon/radeon_drv.c > +++ b/drivers/gpu/drm/radeon/radeon_drv.c > @@ -357,7 +357,8 @@ radeon_pci_remove(struct pci_dev *pdev) > { > struct drm_device *dev = pci_get_drvdata(pdev); > > - drm_put_dev(dev); > + drm_dev_unregister(dev); > + drm_dev_put(dev); > } > > static void > diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h > index 89e2706cac56..289c97b12e82 100644 > --- a/include/drm/drm_drv.h > +++ b/include/drm/drm_drv.h > @@ -511,7 +511,6 @@ void drm_dev_unregister(struct drm_device *dev); > > void drm_dev_get(struct drm_device *dev); > void drm_dev_put(struct drm_device *dev); > -void drm_put_dev(struct drm_device *dev); > bool drm_dev_enter(struct drm_device *dev, int *idx); > void drm_dev_exit(int idx); > void drm_dev_unplug(struct drm_device *dev); -- Jani Nikula, Intel Open Source Graphics Center
Re: [PATCH] drm: Remove the deprecated drm_put_dev() function
Hi Am 25.06.23 um 07:09 schrieb Sui Jingfeng: As this function can be replaced with drm_dev_unregister() + drm_dev_put(), it is already marked as deprecated, so remove it. No functional change. Signed-off-by: Sui Jingfeng --- drivers/gpu/drm/drm_drv.c | 28 drivers/gpu/drm/drm_pci.c | 3 ++- drivers/gpu/drm/radeon/radeon_drv.c | 3 ++- include/drm/drm_drv.h | 1 - 4 files changed, 4 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 12687dd9e1ac..5057307fe22a 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -406,34 +406,6 @@ void drm_minor_release(struct drm_minor *minor) * possibly leaving the hardware enabled. */ -/** - * drm_put_dev - Unregister and release a DRM device - * @dev: DRM device - * - * Called at module unload time or when a PCI device is unplugged. - * - * Cleans up all DRM device, calling drm_lastclose(). - * - * Note: Use of this function is deprecated. It will eventually go away - * completely. Please use drm_dev_unregister() and drm_dev_put() explicitly - * instead to make sure that the device isn't userspace accessible any more - * while teardown is in progress, ensuring that userspace can't access an - * inconsistent state. - */ -void drm_put_dev(struct drm_device *dev) -{ - DRM_DEBUG("\n"); - - if (!dev) { - DRM_ERROR("cleanup called no dev\n"); - return; - } - - drm_dev_unregister(dev); - drm_dev_put(dev); -} -EXPORT_SYMBOL(drm_put_dev); - /** * drm_dev_enter - Enter device critical section * @dev: DRM device diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c index 39d35fc3a43b..b3a68a92eaa6 100644 --- a/drivers/gpu/drm/drm_pci.c +++ b/drivers/gpu/drm/drm_pci.c @@ -257,7 +257,8 @@ void drm_legacy_pci_exit(const struct drm_driver *driver, legacy_dev_list) { if (dev->driver == driver) { list_del(&dev->legacy_dev_list); - drm_put_dev(dev); + drm_dev_unregister(dev); + drm_dev_put(dev); } } mutex_unlock(&legacy_dev_list_lock); diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index e4374814f0ef..a4955ae10659 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -357,7 +357,8 @@ radeon_pci_remove(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); - drm_put_dev(dev); Did you verify that dev cannot be NULL here? There was a check in drm_put_dev() for !dev. Best regards Thomas + drm_dev_unregister(dev); + drm_dev_put(dev); } static void diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h index 89e2706cac56..289c97b12e82 100644 --- a/include/drm/drm_drv.h +++ b/include/drm/drm_drv.h @@ -511,7 +511,6 @@ void drm_dev_unregister(struct drm_device *dev); void drm_dev_get(struct drm_device *dev); void drm_dev_put(struct drm_device *dev); -void drm_put_dev(struct drm_device *dev); bool drm_dev_enter(struct drm_device *dev, int *idx); void drm_dev_exit(int idx); void drm_dev_unplug(struct drm_device *dev); -- Thomas Zimmermann Graphics Driver Developer SUSE Software Solutions Germany GmbH Frankenstrasse 146, 90461 Nuernberg, Germany GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman HRB 36809 (AG Nuernberg) OpenPGP_signature Description: OpenPGP digital signature
RE: [PATCH 1/2] drm/amdgpu: make mcbp a per device setting
[AMD Official Use Only - General] Reviewed-and-tested-by: Jiadong Zhu -Original Message- From: amd-gfx On Behalf Of Alex Deucher Sent: Saturday, June 17, 2023 5:10 AM To: amd-gfx@lists.freedesktop.org Cc: Deucher, Alexander Subject: [PATCH 1/2] drm/amdgpu: make mcbp a per device setting So we can selectively enable it on certain devices. No intended functional change. Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 19 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h| 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c| 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 3 --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2 +- 7 files changed, 22 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index f39db4a2c2cf..78c6265fe79b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2551,7 +2551,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) adev->ip_blocks[i].status.hw = true; /* right after GMC hw init, we create CSA */ - if (amdgpu_mcbp) { + if (adev->gfx.mcbp) { r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj, AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT, @@ -3672,6 +3672,18 @@ static const struct attribute *amdgpu_dev_attributes[] = { NULL }; +static void amdgpu_device_set_mcbp(struct amdgpu_device *adev) { + if (amdgpu_mcbp == 1) + adev->gfx.mcbp = true; + + if (amdgpu_sriov_vf(adev)) + adev->gfx.mcbp = true; + + if (adev->gfx.mcbp) + DRM_INFO("MCBP is enabled\n"); +} + /** * amdgpu_device_init - initialize the driver * @@ -3823,9 +3835,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base); DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size); - if (amdgpu_mcbp) - DRM_INFO("MCBP is enabled\n"); - /* * Reset domain needs to be present early, before XGMI hive discovered * (if any) and intitialized to use reset sem and in_gpu reset flag @@ -3851,6 +3860,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (r) return r; + amdgpu_device_set_mcbp(adev); + /* Get rid of things like offb */ r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index ce0f7a8ad4b8..a4ff515ce896 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -434,6 +434,7 @@ struct amdgpu_gfx { uint16_txcc_mask; uint32_tnum_xcc_per_xcp; struct mutexpartition_mutex; + boolmcbp; /* mid command buffer preemption */ }; struct amdgpu_gfx_ras_reg_entry { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index e3531aa3c8bd..cca5a495611f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -805,7 +805,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) dev_info->ids_flags = 0; if (adev->flags & AMD_IS_APU) dev_info->ids_flags |= AMDGPU_IDS_FLAGS_FUSION; - if (amdgpu_mcbp) + if (adev->gfx.mcbp) dev_info->ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION; if (amdgpu_is_tmz(adev)) dev_info->ids_flags |= AMDGPU_IDS_FLAGS_TMZ; @@ -1247,7 +1247,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) goto error_vm; } - if (amdgpu_mcbp) { + if (adev->gfx.mcbp) { uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK; r = amdgpu_map_static_csa(adev, &fpriv->vm, adev->virt.csa_obj, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 78ec3420ef85..dacf281d2b21 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -72,7 +72,7 @@ uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, int r; /* don't enable OS preemption on SDMA under SRIOV */ - if (amdgpu_sriov_vf(adev) || vmid == 0 |
RE: [PATCH 2/2] drm/amdgpu: enable mcbp by default on gfx9
[AMD Official Use Only - General] Reviewed-and-tested-by: Jiadong Zhu -Original Message- From: amd-gfx On Behalf Of Alex Deucher Sent: Saturday, June 17, 2023 5:10 AM To: amd-gfx@lists.freedesktop.org Cc: Deucher, Alexander Subject: [PATCH 2/2] drm/amdgpu: enable mcbp by default on gfx9 It's required for high priority queues. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2535 Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c| 6 +++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 78c6265fe79b..3eb370b77ad9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3677,6 +3677,11 @@ static void amdgpu_device_set_mcbp(struct amdgpu_device *adev) if (amdgpu_mcbp == 1) adev->gfx.mcbp = true; + if ((adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 0, 0)) && + (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 0, 0)) && + adev->gfx.num_gfx_rings) + adev->gfx.mcbp = true; + if (amdgpu_sriov_vf(adev)) adev->gfx.mcbp = true; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 03874371af60..308149dd7d00 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -180,7 +180,7 @@ uint amdgpu_dc_feature_mask = 2; uint amdgpu_dc_debug_mask; uint amdgpu_dc_visual_confirm; int amdgpu_async_gfx_ring = 1; -int amdgpu_mcbp; +int amdgpu_mcbp = -1; int amdgpu_discovery = -1; int amdgpu_mes; int amdgpu_mes_kiq; @@ -635,10 +635,10 @@ module_param_named(async_gfx_ring, amdgpu_async_gfx_ring, int, 0444); /** * DOC: mcbp (int) - * It is used to enable mid command buffer preemption. (0 = disabled (default), 1 = enabled) + * It is used to enable mid command buffer preemption. (0 = disabled, 1 + = enabled, -1 auto (default)) */ MODULE_PARM_DESC(mcbp, - "Enable Mid-command buffer preemption (0 = disabled (default), 1 = enabled)"); + "Enable Mid-command buffer preemption (0 = disabled, 1 = enabled), -1 += auto (default)"); module_param_named(mcbp, amdgpu_mcbp, int, 0444); /** -- 2.40.1
Re: [PATCH 3/3] drm/amdgpu: add new INFO ioctl query for the last GPU page fault
Am 25.05.23 um 18:52 schrieb Alex Deucher: Add a interface to query the last GPU page fault for the process. Useful for debugging context lost errors. v2: split vmhub representation between kernel and userspace Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23238 libdrm MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23238 Cc: samuel.pitoi...@gmail.com Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 16 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 16 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 13 ++--- include/uapi/drm/amdgpu_drm.h | 16 5 files changed, 59 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 7300df2a342c..7e17b285decc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -112,9 +112,10 @@ *gl1c_cache_size, gl2c_cache_size, mall_size, enabled_rb_pipes_mask_hi * 3.53.0 - Support for GFX11 CP GFX shadowing * 3.54.0 - Add AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS support + * - 3.55.0 - Add AMDGPU_INFO_GPUVM_FAULT query */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 54 +#define KMS_DRIVER_MINOR 55 #define KMS_DRIVER_PATCHLEVEL 0 unsigned int amdgpu_vram_limit = UINT_MAX; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 41d047e5de69..bca2a56046ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -1163,6 +1163,22 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) return copy_to_user(out, max_ibs, min((size_t)size, sizeof(max_ibs))) ? -EFAULT : 0; } + case AMDGPU_INFO_GPUVM_FAULT: { + struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct amdgpu_vm *vm = &fpriv->vm; + struct drm_amdgpu_info_gpuvm_fault gpuvm_fault; + + if (!vm) + return -EINVAL; + + memset(&gpuvm_fault, 0, sizeof(gpuvm_fault)); + gpuvm_fault.addr = vm->fault_info.addr; + gpuvm_fault.status = vm->fault_info.status; + gpuvm_fault.vmhub = vm->fault_info.vmhub; You need something to provide locking and barrier here. I suggest to just grab the xa lock. + + return copy_to_user(out, &gpuvm_fault, + min((size_t)size, sizeof(gpuvm_fault))) ? -EFAULT : 0; + } default: DRM_DEBUG_KMS("Invalid request %d\n", info->query); return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 73e022f3daa4..c1b0c5f3c1f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2657,7 +2657,21 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev, if (vm) { vm->fault_info.addr = addr; vm->fault_info.status = status; - vm->fault_info.vmhub = vmhub; + if (AMDGPU_IS_GFXHUB(vmhub)) { + vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_GFX; + vm->fault_info.vmhub |= + (vmhub - AMDGPU_GFXHUB_START) << AMDGPU_VMHUB_IDX_SHIFT; + } else if (AMDGPU_IS_MMHUB0(vmhub)) { + vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_MM0; + vm->fault_info.vmhub |= + (vmhub - AMDGPU_MMHUB0_START) << AMDGPU_VMHUB_IDX_SHIFT; + } else if (AMDGPU_IS_MMHUB1(vmhub)) { + vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_MM1; + vm->fault_info.vmhub |= + (vmhub - AMDGPU_MMHUB1_START) << AMDGPU_VMHUB_IDX_SHIFT; + } else { + WARN_ONCE(1, "Invalid vmhub %u\n", vmhub); + } } xa_unlock_irqrestore(&adev->vm_manager.pasids, flags); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index fb66a413110c..1a34fea9acb9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -116,9 +116,16 @@ struct amdgpu_mem_stats; * layout: max 8 GFXHUB + 4 MMHUB0 + 1 MMHUB1 */ #define AMDGPU_MAX_VMHUBS 13 -#define AMDGPU_GFXHUB(x) (x) -#define AMDGPU_MMHUB0(x) (8 + x) -#define AMDGPU_MMHUB1(x) (8 + 4 + x) +#define AMDGPU_GFXHUB_START0 +#define AMDGPU_MMHUB0_START8 +#define AMDGPU_MMHUB1_START12 +#define AM
Re: [PATCH 2/2] drm/amd/pm: avoid unintentional shutdown due to temperature momentary fluctuation
On 6/26/2023 1:17 PM, Evan Quan wrote: An intentional delay is added on soft ctf triggered. Then there will be a double check for the GPU temperature before taking further action. This can avoid unintended shutdown due to temperature momentary fluctuation. Signed-off-by: Evan Quan --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 ++ .../gpu/drm/amd/pm/powerplay/amd_powerplay.c | 48 +++ .../drm/amd/pm/powerplay/hwmgr/smu_helper.c | 27 --- drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h | 2 + drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 34 + drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 2 + .../gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c| 9 +--- .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c| 9 +--- 8 files changed, 102 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index e459381dc759..5ef1f31e703c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -287,6 +287,9 @@ extern int amdgpu_user_partt_mode; #define AMDGPU_SMARTSHIFT_MAX_BIAS (100) #define AMDGPU_SMARTSHIFT_MIN_BIAS (-100) +/* Extra time delay(in ms) to eliminate the influence of temperature momentary fluctuation */ +#define AMDGPU_SWCTF_EXTRA_DELAY 50 I think a delay of 10-15ms is good enough to filter out any spike. With that change, the series is Reviewed-by: Lijo Lazar Thanks, Lijo + struct amdgpu_xcp_mgr; struct amdgpu_device; struct amdgpu_irq_src; diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c index 11b7b4cffaae..ff360c699171 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "amd_shared.h" #include "amd_powerplay.h" #include "power_state.h" @@ -91,6 +92,45 @@ static int pp_early_init(void *handle) return 0; } +static void pp_swctf_delayed_work_handler(struct work_struct *work) +{ + struct pp_hwmgr *hwmgr = + container_of(work, struct pp_hwmgr, swctf_delayed_work.work); + struct amdgpu_device *adev = hwmgr->adev; + struct amdgpu_dpm_thermal *range = + &adev->pm.dpm.thermal; + uint32_t gpu_temperature, size; + int ret; + + /* +* If the hotspot/edge temperature is confirmed as below SW CTF setting point +* after the delay enforced, nothing will be done. +* Otherwise, a graceful shutdown will be performed to prevent further damage. +*/ + if (range->sw_ctf_threshold && + hwmgr->hwmgr_func->read_sensor) { + ret = hwmgr->hwmgr_func->read_sensor(hwmgr, + AMDGPU_PP_SENSOR_HOTSPOT_TEMP, +&gpu_temperature, +&size); + /* +* For some legacy ASICs, hotspot temperature retrieving might be not +* supported. Check the edge temperature instead then. +*/ + if (ret == -EOPNOTSUPP) + ret = hwmgr->hwmgr_func->read_sensor(hwmgr, + AMDGPU_PP_SENSOR_EDGE_TEMP, +&gpu_temperature, +&size); + if (!ret && gpu_temperature / 1000 < range->sw_ctf_threshold) + return; + } + + dev_emerg(adev->dev, "ERROR: GPU over temperature range(SW CTF) detected!\n"); + dev_emerg(adev->dev, "ERROR: System is going to shutdown due to GPU SW CTF!\n"); + orderly_poweroff(true); +} + static int pp_sw_init(void *handle) { struct amdgpu_device *adev = handle; @@ -101,6 +141,10 @@ static int pp_sw_init(void *handle) pr_debug("powerplay sw init %s\n", ret ? "failed" : "successfully"); + if (!ret) + INIT_DELAYED_WORK(&hwmgr->swctf_delayed_work, + pp_swctf_delayed_work_handler); + return ret; } @@ -135,6 +179,8 @@ static int pp_hw_fini(void *handle) struct amdgpu_device *adev = handle; struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; + cancel_delayed_work_sync(&hwmgr->swctf_delayed_work); + hwmgr_hw_fini(hwmgr); return 0; @@ -221,6 +267,8 @@ static int pp_suspend(void *handle) struct amdgpu_device *adev = handle; struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; + cancel_delayed_work_sync(&hwmgr->swctf_delayed_work); + return hwmgr_suspend(hwmgr); } diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.c
Re: [PATCH] drm/amdgpu: add VISIBLE info in amdgpu_bo_print_info
[Public] Thanks Christian for the review. I'll remove the leading blanks before submitting the patch. Pierre-Eric From: Koenig, Christian Sent: Wednesday, June 21, 2023 5:00 PM To: Pelloux-Prayer, Pierre-Eric ; amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: add VISIBLE info in amdgpu_bo_print_info Am 21.06.23 um 16:35 schrieb Pierre-Eric Pelloux-Prayer: > This allows tools to distinguish between VRAM and visible VRAM. > > Use the opportunity to fix locking before accessing bo. > > Signed-off-by: Pierre-Eric Pelloux-Prayer > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 33 ++ > 1 file changed, 21 insertions(+), 12 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > index ff73cc11d47e..f12f019d7f99 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > @@ -1583,18 +1583,27 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo > *bo, struct seq_file *m) >unsigned int pin_count; >u64 size; > > - domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type); > - switch (domain) { > - case AMDGPU_GEM_DOMAIN_VRAM: > - placement = "VRAM"; > - break; > - case AMDGPU_GEM_DOMAIN_GTT: > - placement = " GTT"; > - break; > - case AMDGPU_GEM_DOMAIN_CPU: > - default: > - placement = " CPU"; > - break; > + if (dma_resv_trylock(bo->tbo.base.resv)) { > + unsigned int domain; > + domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type); > + switch (domain) { > + case AMDGPU_GEM_DOMAIN_VRAM: > + if (amdgpu_bo_in_cpu_visible_vram(bo)) > + placement = "VRAM VISIBLE"; > + else > + placement = "VRAM"; > + break; > + case AMDGPU_GEM_DOMAIN_GTT: > + placement = " GTT"; We can probably drop the leading blank here and > + break; > + case AMDGPU_GEM_DOMAIN_CPU: > + default: > + placement = " CPU"; here when we don't keep the strings at the same length anyway. With that fixed the change is Reviewed-by: Christian König Regards, Christian. > + break; > + } > + dma_resv_unlock(bo->tbo.base.resv); > + } else { > + placement = "UNKNOWN"; >} > >size = amdgpu_bo_size(bo);
[PATCH] drm/amd/pm: Enable pp_feature attribute
on APUs with GFX v9.4.3 Signed-off-by: Lijo Lazar --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 9ec51f50fc52..9ef88a0b1b57 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2083,7 +2083,9 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ *states = ATTR_STATE_UNSUPPORTED; } } else if (DEVICE_ATTR_IS(pp_features)) { - if (adev->flags & AMD_IS_APU || gc_ver < IP_VERSION(9, 0, 0)) + if ((adev->flags & AMD_IS_APU && +gc_ver != IP_VERSION(9, 4, 3)) || + gc_ver < IP_VERSION(9, 0, 0)) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(gpu_metrics)) { if (gc_ver < IP_VERSION(9, 1, 0)) -- 2.25.1
[PATCH v3] drm/amd/display: Remove unnecessary casts in amdgpu_dm_helpers.c
Fixes the following category of checkpatch complaints: WARNING: unnecessary cast may hide bugs, see http://c-faq.com/malloc/mallocnocast.html + char *buf = (char *)kvcalloc(total, sizeof(char), GFP_KERNEL); Cc: Rodrigo Siqueira Cc: Aurabindo Pillai Signed-off-by: Srinivasan Shanmugam --- v3: - Keeping same as v1 - so that variable "buf" remains to local to the block, whereever it is declared, by having just removed the casting. drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index cd20cfc04996..4590deca25f8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -400,7 +400,7 @@ void dm_dtn_log_append_v(struct dc_context *ctx, total = log_ctx->pos + n + 1; if (total > log_ctx->size) { - char *buf = (char *)kvcalloc(total, sizeof(char), GFP_KERNEL); + char *buf = kvcalloc(total, sizeof(char), GFP_KERNEL); if (buf) { memcpy(buf, log_ctx->buf, log_ctx->pos); -- 2.25.1
Re: [PATCH] Revert "drm/amdgpu: Enable VM_CONTEXT1_CNTL after page table addr is set."
Am 31.05.23 um 16:39 schrieb Alex Deucher: This reverts commit f57a74f5b42d1627bd5366f88952d42819e91146. After talking this over with Christian, the original programming sequence was correct. The enable bit needs to be set before programming the rest of the context. Signed-off-by: Alex Deucher Cc: Zibin Liu Sorry for the delay, I'm only catching up to mails from lost month by now. Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 5 + drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c | 5 + drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c | 5 + drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c | 5 + drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c | 5 + drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c | 5 + drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c| 5 + drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c| 5 + drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c| 6 +- drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c| 5 + drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c| 5 + drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c| 5 + drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c | 5 + drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c | 5 + drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c| 7 +-- 15 files changed, 15 insertions(+), 63 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 52a1e79ee4d8..d94cc1ec7242 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -261,7 +261,7 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) for (i = 0; i <= 14; i++) { tmp = RREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 0); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, num_level); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, @@ -302,9 +302,6 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i * hub->ctx_addr_distance, upper_32_bits(adev->vm_manager.max_pfn - 1)); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); - WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, - i * hub->ctx_distance, tmp); } } diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c index 108674f6eef0..4dabf910334b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c @@ -330,7 +330,7 @@ static void gfxhub_v1_2_xcc_setup_vmid_config(struct amdgpu_device *adev, hub = &adev->vmhub[AMDGPU_GFXHUB(j)]; for (i = 0; i <= 14; i++) { tmp = RREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT1_CNTL, i); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 0); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, num_level); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, @@ -377,9 +377,6 @@ static void gfxhub_v1_2_xcc_setup_vmid_config(struct amdgpu_device *adev, regVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i * hub->ctx_addr_distance, upper_32_bits(adev->vm_manager.max_pfn - 1)); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); - WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT1_CNTL, - i * hub->ctx_distance, tmp); } } } diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c index 502cb6e1fe84..f173a61c6c15 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c @@ -288,7 +288,7 @@ static void gfxhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) for (i = 0; i <= 14; i++) { tmp = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i); - tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 0); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, adev->vm_manager.num_level); tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, @@ -324,9 +324,6 @@ static void gfxh
Re: [PATCH] drm/amd/pm: Enable pp_feature attribute
Acked-by: Alex Deucher On Mon, Jun 26, 2023 at 8:35 AM Lijo Lazar wrote: > > on APUs with GFX v9.4.3 > > Signed-off-by: Lijo Lazar > --- > drivers/gpu/drm/amd/pm/amdgpu_pm.c | 4 +++- > 1 file changed, 3 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c > b/drivers/gpu/drm/amd/pm/amdgpu_pm.c > index 9ec51f50fc52..9ef88a0b1b57 100644 > --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c > +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c > @@ -2083,7 +2083,9 @@ static int default_attr_update(struct amdgpu_device > *adev, struct amdgpu_device_ > *states = ATTR_STATE_UNSUPPORTED; > } > } else if (DEVICE_ATTR_IS(pp_features)) { > - if (adev->flags & AMD_IS_APU || gc_ver < IP_VERSION(9, 0, 0)) > + if ((adev->flags & AMD_IS_APU && > +gc_ver != IP_VERSION(9, 4, 3)) || > + gc_ver < IP_VERSION(9, 0, 0)) > *states = ATTR_STATE_UNSUPPORTED; > } else if (DEVICE_ATTR_IS(gpu_metrics)) { > if (gc_ver < IP_VERSION(9, 1, 0)) > -- > 2.25.1 >
Re: [PATCH 1/3] Revert "drm/amdgpu: change the reference clock for raven/raven2"
Hi guys, Vitaly and Jasber have been recently working on disabling the IGT tests for the TSC query on RV/RV2 (which I'm not very keen on). And additional to that we have this RADV merge request here: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23481 What exactly is going on here? That sounds like we are working around a FW or more specific GFXOFF bug in userspace. Regards, Christian. Am 05.06.23 um 10:57 schrieb Michel Dänzer: On 6/2/23 20:43, Alex Deucher wrote: This reverts commit fbc24293ca16b3b9ef891fe32ccd04735a6f8dc1. This results in inconsistent timing reported via asynchronous GPU queries. Link: https://lists.freedesktop.org/archives/amd-gfx/2023-May/093731.html Cc: jesse.zh...@amd.com Cc: mic...@daenzer.net Signed-off-by: Alex Deucher The series is Reviewed-by: Michel Dänzer Thanks!
Re: [PATCH] drm/amd/display: Clean up warnings in amdgpu_dm _mst_types, _plane, _psr.c
On 6/23/23 23:49, Srinivasan Shanmugam wrote: > Fix the following warnings reported by checkpatch: > > WARNING: Missing a blank line after declarations > WARNING: Prefer 'unsigned int' to bare use of 'unsigned' > > Cc: Rodrigo Siqueira > Cc: Aurabindo Pillai > Signed-off-by: Srinivasan Shanmugam Hi Srini, I've seen a lot of these minor fixes from you. It's great. But please put them in a patchset when sending so (sensible) email clients can organize them. Ideally with a cover letter that describes overall what the patch set is trying to accomplish: git format-patch --cover-letter .. Thanks, Harry > --- > drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 1 + > drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 4 ++-- > drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c | 1 + > 3 files changed, 4 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c > index 46d0a8f57e55..95eefa6b4f2f 100644 > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c > @@ -296,6 +296,7 @@ static int dm_dp_mst_get_modes(struct drm_connector > *connector) > > if (!aconnector->edid) { > struct edid *edid; > + > edid = drm_dp_mst_get_edid(connector, > &aconnector->mst_root->mst_mgr, aconnector->mst_output_port); > > if (!edid) { > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c > index 322668973747..de1c7026ffcd 100644 > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c > @@ -164,7 +164,7 @@ static bool modifier_has_dcc(uint64_t modifier) > return IS_AMD_FMT_MOD(modifier) && AMD_FMT_MOD_GET(DCC, modifier); > } > > -static unsigned modifier_gfx9_swizzle_mode(uint64_t modifier) > +static unsigned int modifier_gfx9_swizzle_mode(uint64_t modifier) > { > if (modifier == DRM_FORMAT_MOD_LINEAR) > return 0; > @@ -581,7 +581,7 @@ static void add_gfx11_modifiers(struct amdgpu_device > *adev, > int pkrs = 0; > u32 gb_addr_config; > u8 i = 0; > - unsigned swizzle_r_x; > + unsigned int swizzle_r_x; > uint64_t modifier_r_x; > uint64_t modifier_dcc_best; > uint64_t modifier_dcc_4k; > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c > index d647f68fd563..be63d34400d4 100644 > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c > @@ -165,6 +165,7 @@ bool amdgpu_dm_psr_enable(struct dc_stream_state *stream) >*/ > if (vsync_rate_hz != 0) { > unsigned int frame_time_microsec = 100 / vsync_rate_hz; > + > num_frames_static = (3 / frame_time_microsec) + 1; > } >
Re: [PATCH 1/3] Revert "drm/amdgpu: change the reference clock for raven/raven2"
On Mon, Jun 26, 2023 at 9:58 AM Christian König wrote: > > Hi guys, > > Vitaly and Jasber have been recently working on disabling the IGT tests > for the TSC query on RV/RV2 (which I'm not very keen on). > > And additional to that we have this RADV merge request here: > https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23481 > > What exactly is going on here? That sounds like we are working around a > FW or more specific GFXOFF bug in userspace. The clock source used by the gfx firmware on Raven is part of the gfx domain so it gets powered off when gfxoff turns off the graphics block. There is a clock source in an always on domain, but the gfx block doesn't use it. I don't know off hand if the clock source used by gfx can be changed or not on raven (IIRC, I don't think it can, which is why this was never fixed on RV). Since they are different clock sources, the GPU timestamp doesn't match the GPU timestamp. On renoir and newer APUs, gfx uses the always on clock source so it's always consistent between CPU and GPU. Alex > > Regards, > Christian. > > Am 05.06.23 um 10:57 schrieb Michel Dänzer: > > On 6/2/23 20:43, Alex Deucher wrote: > >> This reverts commit fbc24293ca16b3b9ef891fe32ccd04735a6f8dc1. > >> > >> This results in inconsistent timing reported via asynchronous > >> GPU queries. > >> > >> Link: https://lists.freedesktop.org/archives/amd-gfx/2023-May/093731.html > >> Cc: jesse.zh...@amd.com > >> Cc: mic...@daenzer.net > >> Signed-off-by: Alex Deucher > > The series is > > > > Reviewed-by: Michel Dänzer > > > > Thanks! > > > > >
[PATCH 1/2] drm/amdgpu:update kernel vcn ring test
add session context buffer to decoder ring test. v2 - put the buffer at the end of the IB (Christian) Signed-off-by: Saleemkhan Jamadar Acked-by: Leo Liu --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 30 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 3 +++ 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 2d94f1b63bd6..04daaaf6ab34 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -691,7 +691,8 @@ static void amdgpu_vcn_unified_ring_ib_checksum(uint32_t **ib_checksum, static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, struct amdgpu_ib *ib_msg, - struct dma_fence **fence) + struct dma_fence **fence, + uint64_t session_ctx_buf_gaddr) { struct amdgpu_vcn_decode_buffer *decode_buffer = NULL; unsigned int ib_size_dw = 64; @@ -730,6 +731,14 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, ib->length_dw += sizeof(struct amdgpu_vcn_decode_buffer) / 4; memset(decode_buffer, 0, sizeof(struct amdgpu_vcn_decode_buffer)); + if (session_ctx_buf_gaddr) { + decode_buffer->valid_buf_flag |= + cpu_to_le32(AMDGPU_VCN_CMD_FLAG_SESSION_CONTEXT_BUFFER); + decode_buffer->session_context_buffer_address_hi = + cpu_to_le32(session_ctx_buf_gaddr >> 32); + decode_buffer->session_context_buffer_address_lo = + cpu_to_le32(session_ctx_buf_gaddr); + } decode_buffer->valid_buf_flag |= cpu_to_le32(AMDGPU_VCN_CMD_FLAG_MSG_BUFFER); decode_buffer->msg_buffer_address_hi = cpu_to_le32(addr >> 32); decode_buffer->msg_buffer_address_lo = cpu_to_le32(addr); @@ -763,20 +772,34 @@ int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout) { struct dma_fence *fence = NULL; struct amdgpu_ib ib; + struct amdgpu_bo *session_ctx_buf = NULL; + void *cpu_addr = NULL; + uint64_t gpu_addr = 0; long r; + r = amdgpu_bo_create_kernel(ring->adev, 128*1024, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT, + &session_ctx_buf, + &gpu_addr, + &cpu_addr); + if (r) { + dev_err(ring->adev->dev, "VCN ib test:%ld failed to allocate session ctx bo\n", r); + return r; + } + r = amdgpu_vcn_dec_get_create_msg(ring, 1, &ib); if (r) goto error; - r = amdgpu_vcn_dec_sw_send_msg(ring, &ib, NULL); + r = amdgpu_vcn_dec_sw_send_msg(ring, &ib, NULL, gpu_addr); if (r) goto error; r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &ib); if (r) goto error; - r = amdgpu_vcn_dec_sw_send_msg(ring, &ib, &fence); + r = amdgpu_vcn_dec_sw_send_msg(ring, &ib, &fence, gpu_addr); if (r) goto error; @@ -788,6 +811,7 @@ int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout) dma_fence_put(fence); error: + amdgpu_bo_free_kernel(&session_ctx_buf, &gpu_addr, &cpu_addr); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index f1397ef66fd7..06f9ee91a1e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -166,6 +166,7 @@ #define AMDGPU_VCN_IB_FLAG_DECODE_BUFFER 0x0001 #define AMDGPU_VCN_CMD_FLAG_MSG_BUFFER 0x0001 +#define AMDGPU_VCN_CMD_FLAG_SESSION_CONTEXT_BUFFER 0x0010 #define VCN_CODEC_DISABLE_MASK_AV1 (1 << 0) #define VCN_CODEC_DISABLE_MASK_VP9 (1 << 1) @@ -357,6 +358,8 @@ struct amdgpu_vcn_decode_buffer { uint32_t valid_buf_flag; uint32_t msg_buffer_address_hi; uint32_t msg_buffer_address_lo; + unsigned int session_context_buffer_address_hi; + unsigned int session_context_buffer_address_lo; uint32_t pad[30]; }; -- 2.25.1
[PATCH 2/2] drm/amdgpu:update kernel vcn ring test
add session context buffer to decoder ring test fro vcn v1 to v3. Signed-off-by: Saleemkhan Jamadar --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 43 ++--- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 04daaaf6ab34..3e9c023e6c42 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -518,9 +518,11 @@ int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring *ring) static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, struct amdgpu_ib *ib_msg, - struct dma_fence **fence) + struct dma_fence **fence, + uint64_t session_ctx_buf_gaddr) { u64 addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); +struct amdgpu_vcn_decode_buffer *decode_buffer = NULL; struct amdgpu_device *adev = ring->adev; struct dma_fence *f = NULL; struct amdgpu_job *job; @@ -534,6 +536,22 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, goto err; ib = &job->ibs[0]; + ib->length_dw = 0; +ib->ptr[ib->length_dw++] = sizeof(struct amdgpu_vcn_decode_buffer) + 8; +ib->ptr[ib->length_dw++] = cpu_to_le32(AMDGPU_VCN_IB_FLAG_DECODE_BUFFER); +decode_buffer = (struct amdgpu_vcn_decode_buffer *)&(ib->ptr[ib->length_dw]); +ib->length_dw += sizeof(struct amdgpu_vcn_decode_buffer) / 4; +memset(decode_buffer, 0, sizeof(struct amdgpu_vcn_decode_buffer)); + +if (session_ctx_buf_gaddr) { +decode_buffer->valid_buf_flag |= + cpu_to_le32(AMDGPU_VCN_CMD_FLAG_SESSION_CONTEXT_BUFFER); +decode_buffer->session_context_buffer_address_hi = + cpu_to_le32(session_ctx_buf_gaddr >> 32); +decode_buffer->session_context_buffer_address_lo = + cpu_to_le32(session_ctx_buf_gaddr); +} + ib->ptr[0] = PACKET0(adev->vcn.internal.data0, 0); ib->ptr[1] = addr; ib->ptr[2] = PACKET0(adev->vcn.internal.data1, 0); @@ -544,7 +562,7 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, ib->ptr[i] = PACKET0(adev->vcn.internal.nop, 0); ib->ptr[i+1] = 0; } - ib->length_dw = 16; + ib->length_dw += 16; r = amdgpu_job_submit_direct(job, ring, &f); if (r) @@ -631,20 +649,34 @@ int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) { struct dma_fence *fence = NULL; struct amdgpu_ib ib; + struct amdgpu_bo *session_ctx_buf = NULL; +void *cpu_addr = NULL; +uint64_t gpu_addr = 0; long r; + r = amdgpu_bo_create_kernel(ring->adev, 128*1024, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT, + &session_ctx_buf, + &gpu_addr, + &cpu_addr); + if (r) { + dev_err(ring->adev->dev, "VCN ib test:%ld failed to allocate session ctx bo\n", r); + return r; + } + r = amdgpu_vcn_dec_get_create_msg(ring, 1, &ib); if (r) goto error; - r = amdgpu_vcn_dec_send_msg(ring, &ib, NULL); + r = amdgpu_vcn_dec_send_msg(ring, &ib, NULL, gpu_addr); if (r) goto error; r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &ib); if (r) goto error; - r = amdgpu_vcn_dec_send_msg(ring, &ib, &fence); + r = amdgpu_vcn_dec_send_msg(ring, &ib, &fence, gpu_addr); if (r) goto error; @@ -656,6 +688,7 @@ int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) dma_fence_put(fence); error: + amdgpu_bo_free_kernel(&session_ctx_buf, &gpu_addr, &cpu_addr); return r; } @@ -692,7 +725,7 @@ static void amdgpu_vcn_unified_ring_ib_checksum(uint32_t **ib_checksum, static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, struct amdgpu_ib *ib_msg, struct dma_fence **fence, - uint64_t session_ctx_buf_gaddr) + uint64_t session_ctx_buf_gaddr) { struct amdgpu_vcn_decode_buffer *decode_buffer = NULL; unsigned int ib_size_dw = 64; -- 2.25.1
[PATCH] drm/amd: Fix a documentation warning about excess parameters
`pcie_index` and `pcie_data` aren't used by amdgpu_device_indirect_wreg() since commit 65ba96e91b68 ("drm/amdgpu: Move to common indirect reg access helper") but the documentation wasn't updated. This causes a warning while building documentation. Fixes: 65ba96e91b68 ("drm/amdgpu: Move to common indirect reg access helper") Signed-off-by: Mario Limonciello --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 65fe0f3488679..a3dae8ffbdb10 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -747,8 +747,6 @@ u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev, * amdgpu_device_indirect_wreg - write an indirect register address * * @adev: amdgpu_device pointer - * @pcie_index: mmio register offset - * @pcie_data: mmio register offset * @reg_addr: indirect register offset * @reg_data: indirect register data * @@ -778,8 +776,6 @@ void amdgpu_device_indirect_wreg(struct amdgpu_device *adev, * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address * * @adev: amdgpu_device pointer - * @pcie_index: mmio register offset - * @pcie_data: mmio register offset * @reg_addr: indirect register offset * @reg_data: indirect register data * -- 2.34.1
[PATCH 1/5] drm/amd: Don't initialize PSP twice for Navi3x
PSP functions are already set by psp_early_init() so initializing them a second time is unnecessary. No intended functional changes. Signed-off-by: Mario Limonciello --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 4 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index a150b7a4b4aae..eb687a338a1bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -3621,10 +3621,6 @@ int amdgpu_psp_sysfs_init(struct amdgpu_device *adev) switch (adev->ip_versions[MP0_HWIP][0]) { case IP_VERSION(13, 0, 0): case IP_VERSION(13, 0, 7): - if (!psp->adev) { - psp->adev = adev; - psp_v13_0_set_psp_funcs(psp); - } ret = sysfs_create_bin_file(&adev->dev->kobj, &psp_vbflash_bin_attr); if (ret) dev_err(adev->dev, "Failed to create device file psp_vbflash"); -- 2.34.1
[PATCH 3/5] drm/amd: Make flashing messages quieter
Debug messages related to the kernel process of flashing an updated IFWI are needlessly noisy and also confusing. Downgrade them to debug instead and clarify what they are actually doing. Signed-off-by: Mario Limonciello --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 4286c0b4beb90..93d014e69ee39 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -3531,7 +3531,7 @@ static ssize_t amdgpu_psp_vbflash_write(struct file *filp, struct kobject *kobj, adev->psp.vbflash_image_size += count; mutex_unlock(&adev->psp.mutex); - dev_info(adev->dev, "VBIOS flash write PSP done"); + dev_dbg(adev->dev, "IFWI staged for update"); return count; } @@ -3551,7 +3551,7 @@ static ssize_t amdgpu_psp_vbflash_read(struct file *filp, struct kobject *kobj, if (adev->psp.vbflash_image_size == 0) return -EINVAL; - dev_info(adev->dev, "VBIOS flash to PSP started"); + dev_dbg(adev->dev, "PSP IFWI flash process initiated"); ret = amdgpu_bo_create_kernel(adev, adev->psp.vbflash_image_size, AMDGPU_GPU_PAGE_SIZE, @@ -3576,11 +3576,11 @@ static ssize_t amdgpu_psp_vbflash_read(struct file *filp, struct kobject *kobj, adev->psp.vbflash_image_size = 0; if (ret) { - dev_err(adev->dev, "Failed to load VBIOS FW, err = %d", ret); + dev_err(adev->dev, "Failed to load IFWI, err = %d", ret); return ret; } - dev_info(adev->dev, "VBIOS flash to PSP done"); + dev_dbg(adev->dev, "PSP IFWI flash process done"); return 0; } -- 2.34.1
[PATCH 4/5] drm/amd: Convert USB-C PD F/W attributes into groups
Rather than special casing the creation of the file, special case the visibility to the supported dGPUs. Signed-off-by: Mario Limonciello --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 40 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 1 - 2 files changed, 9 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 93d014e69ee39..7872004ed7f9b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -45,9 +45,6 @@ #define AMD_VBIOS_FILE_MAX_SIZE_B (1024*1024*3) -static int psp_sysfs_init(struct amdgpu_device *adev); -static void psp_sysfs_fini(struct amdgpu_device *adev); - static int psp_load_smu_fw(struct psp_context *psp); static int psp_rap_terminate(struct psp_context *psp); static int psp_securedisplay_terminate(struct psp_context *psp); @@ -456,14 +453,6 @@ static int psp_sw_init(void *handle) } } - if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 0) || - adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 7)) { - ret= psp_sysfs_init(adev); - if (ret) { - return ret; - } - } - ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, @@ -513,10 +502,6 @@ static int psp_sw_fini(void *handle) amdgpu_ucode_release(&psp->cap_fw); amdgpu_ucode_release(&psp->toc_fw); - if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 0) || - adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 7)) - psp_sysfs_fini(adev); - kfree(cmd); cmd = NULL; @@ -3612,6 +3597,7 @@ static DEVICE_ATTR(psp_vbflash_status, 0440, amdgpu_psp_vbflash_status, NULL); static struct attribute *flash_attrs[] = { &dev_attr_psp_vbflash_status.attr, &psp_vbflash_bin_attr.attr, + &dev_attr_usbc_pd_fw.attr, NULL }; @@ -3625,9 +3611,16 @@ static umode_t amdgpu_flash_attr_is_visible(struct kobject *kobj, struct attribu return 0; switch (adev->ip_versions[MP0_HWIP][0]) { + case IP_VERSION(11, 0, 0): + case IP_VERSION(11, 0, 7): + if (attr == &dev_attr_usbc_pd_fw.attr) + return 0660; + return 0; case IP_VERSION(13, 0, 0): case IP_VERSION(13, 0, 7): - if (attr == &psp_vbflash_bin_attr.attr) + if (attr == &dev_attr_usbc_pd_fw.attr) + return 0; + else if (attr == &psp_vbflash_bin_attr.attr) return 0660; return 0440; default: @@ -3658,21 +3651,6 @@ const struct amd_ip_funcs psp_ip_funcs = { .set_powergating_state = psp_set_powergating_state, }; -static int psp_sysfs_init(struct amdgpu_device *adev) -{ - int ret = device_create_file(adev->dev, &dev_attr_usbc_pd_fw); - - if (ret) - DRM_ERROR("Failed to create USBC PD FW control file!"); - - return ret; -} - -static void psp_sysfs_fini(struct amdgpu_device *adev) -{ - device_remove_file(adev->dev, &dev_attr_usbc_pd_fw); -} - const struct amdgpu_ip_block_version psp_v3_1_ip_block = { .type = AMD_IP_BLOCK_TYPE_PSP, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index b441c07e5a16f..619b27e891b5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -521,5 +521,4 @@ void psp_copy_fw(struct psp_context *psp, uint8_t *start_addr, uint32_t bin_size int is_psp_fw_valid(struct psp_bin_desc bin); -int amdgpu_psp_sysfs_init(struct amdgpu_device *adev); #endif -- 2.34.1
[PATCH 2/5] drm/amd: Use attribute groups for PSP flashing attributes
Individually creating attributes can be racy, instead make attributes using attribute groups and control their visibility with an is_visible callback to only show when using appropriate products. Signed-off-by: Mario Limonciello --- drivers/gpu/drm/amd/amdgpu/amdgpu.h| 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 10 - drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c| 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c| 49 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h| 1 - 5 files changed, 27 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 02b827785e399..a7ef43e25c758 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1004,7 +1004,6 @@ struct amdgpu_device { boolhas_pr3; boolucode_sysfs_en; - boolpsp_sysfs_en; /* Chip product information */ charproduct_number[20]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5c7d40873ee20..65fe0f3488679 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3907,14 +3907,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, } else adev->ucode_sysfs_en = true; - r = amdgpu_psp_sysfs_init(adev); - if (r) { - adev->psp_sysfs_en = false; - if (!amdgpu_sriov_vf(adev)) - DRM_ERROR("Creating psp sysfs failed\n"); - } else - adev->psp_sysfs_en = true; - /* * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost. * Otherwise the mgpu fan boost feature will be skipped due to the @@ -4064,8 +4056,6 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) amdgpu_pm_sysfs_fini(adev); if (adev->ucode_sysfs_en) amdgpu_ucode_sysfs_fini(adev); - if (adev->psp_sysfs_en) - amdgpu_psp_sysfs_fini(adev); sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes); /* disable ras feature must before hw fini */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 393b6fb7a71d3..99b8d3113d6af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2827,11 +2827,13 @@ static struct pci_error_handlers amdgpu_pci_err_handler = { extern const struct attribute_group amdgpu_vram_mgr_attr_group; extern const struct attribute_group amdgpu_gtt_mgr_attr_group; extern const struct attribute_group amdgpu_vbios_version_attr_group; +extern const struct attribute_group amdgpu_flash_attr_group; static const struct attribute_group *amdgpu_sysfs_groups[] = { &amdgpu_vram_mgr_attr_group, &amdgpu_gtt_mgr_attr_group, &amdgpu_vbios_version_attr_group, + &amdgpu_flash_attr_group, NULL, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index eb687a338a1bd..4286c0b4beb90 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -3584,6 +3584,13 @@ static ssize_t amdgpu_psp_vbflash_read(struct file *filp, struct kobject *kobj, return 0; } +static struct bin_attribute psp_vbflash_bin_attr = { + .attr = {.name = "psp_vbflash", .mode = 0660}, + .size = 0, + .write = amdgpu_psp_vbflash_write, + .read = amdgpu_psp_vbflash_read, +}; + static ssize_t amdgpu_psp_vbflash_status(struct device *dev, struct device_attribute *attr, char *buf) @@ -3600,39 +3607,39 @@ static ssize_t amdgpu_psp_vbflash_status(struct device *dev, return sysfs_emit(buf, "0x%x\n", vbflash_status); } +static DEVICE_ATTR(psp_vbflash_status, 0440, amdgpu_psp_vbflash_status, NULL); -static const struct bin_attribute psp_vbflash_bin_attr = { - .attr = {.name = "psp_vbflash", .mode = 0660}, - .size = 0, - .write = amdgpu_psp_vbflash_write, - .read = amdgpu_psp_vbflash_read, +static struct attribute *flash_attrs[] = { + &dev_attr_psp_vbflash_status.attr, + &psp_vbflash_bin_attr.attr, + NULL }; -static DEVICE_ATTR(psp_vbflash_status, 0440, amdgpu_psp_vbflash_status, NULL); - -int amdgpu_psp_sysfs_init(struct amdgpu_device *adev) +static umode_t amdgpu_flash_attr_is_visible(struct kobject *kobj, struct attribute *attr, int idx) { - int ret = 0; - struct psp_context *psp = &adev->psp; + struct device *dev = kobj_to_dev(kobj); + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); if (amdgpu_sriov_vf(adev)) - return -E
[PATCH 5/5] drm/amd: Add documentation for how to flash a dGPU
Signed-off-by: Mario Limonciello --- Documentation/gpu/amdgpu/flashing.rst | 33 + Documentation/gpu/amdgpu/index.rst | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 16 3 files changed, 50 insertions(+) create mode 100644 Documentation/gpu/amdgpu/flashing.rst diff --git a/Documentation/gpu/amdgpu/flashing.rst b/Documentation/gpu/amdgpu/flashing.rst new file mode 100644 index 0..bd745c42a538f --- /dev/null +++ b/Documentation/gpu/amdgpu/flashing.rst @@ -0,0 +1,33 @@ +=== + dGPU firmware flashing +=== + +IFWI + +Flashing the dGPU integrated firmware image (IFWI) is supported by GPUs that +use the PSP to orchestrate the update (Navi3x or newer GPUs). +For supported GPUs, `amdgpu` will export a series of sysfs files that can be +used for the flash process. + +The IFWI flash process is: + +1. Ensure the IFWI image is intended for the dGPU on the system. +2. "Write" the IFWI image to the sysfs file `psp_vbflash`. This will stage the IFWI in memory. +3. "Read" from the `psp_vbflash` sysfs file to initiate the flash process. +4. Poll the `psp_vbflash_status` sysfs file to determine when the flash process completes. + +USB-C PD F/W + +On GPUs that support flashing an updated USB-C PD firmware image, the process +is done using the `usbc_pd_fw` sysfs file. + +* Reading the file will provide the current firmware version. +* Writing the name of a firmware payload stored in `/lib/firmware/amdgpu` to the sysfs file will initiate the flash process. + +The firmware payload stored in `/lib/firmware/amdgpu` can be named any name +as long as it doesn't conflict with other existing binaries that are used by +`amdgpu`. + +sysfs files +--- +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c diff --git a/Documentation/gpu/amdgpu/index.rst b/Documentation/gpu/amdgpu/index.rst index 03c2966cae798..912e699fd3731 100644 --- a/Documentation/gpu/amdgpu/index.rst +++ b/Documentation/gpu/amdgpu/index.rst @@ -10,6 +10,7 @@ Next (GCN), Radeon DNA (RDNA), and Compute DNA (CDNA) architectures. module-parameters driver-core display/index + flashing xgmi ras thermal diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 7872004ed7f9b..047760bafcc23 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -3476,6 +3476,11 @@ void psp_copy_fw(struct psp_context *psp, uint8_t *start_addr, uint32_t bin_size drm_dev_exit(idx); } +/** + * DOC: usbc_pd_fw + * Reading from this file will retrieve the USB-C PD firmware version. Writing to + * this file will trigger the update process. + */ static DEVICE_ATTR(usbc_pd_fw, S_IRUGO | S_IWUSR, psp_usbc_pd_fw_sysfs_read, psp_usbc_pd_fw_sysfs_write); @@ -3569,6 +3574,11 @@ static ssize_t amdgpu_psp_vbflash_read(struct file *filp, struct kobject *kobj, return 0; } +/** + * DOC: psp_vbflash + * Writing to this file will stage an IFWI for update. Reading from this file + * will trigger the update process. + */ static struct bin_attribute psp_vbflash_bin_attr = { .attr = {.name = "psp_vbflash", .mode = 0660}, .size = 0, @@ -3576,6 +3586,12 @@ static struct bin_attribute psp_vbflash_bin_attr = { .read = amdgpu_psp_vbflash_read, }; +/** + * DOC: psp_vbflash_status + * The status of the flash process. + * 0: IFWI flash not complete. + * 1: IFWI flash complete. + */ static ssize_t amdgpu_psp_vbflash_status(struct device *dev, struct device_attribute *attr, char *buf) -- 2.34.1
Re: [PATCH] drm/amd: Fix a documentation warning about excess parameters
On Mon, Jun 26, 2023 at 11:00 AM Mario Limonciello wrote: > > `pcie_index` and `pcie_data` aren't used by > amdgpu_device_indirect_wreg() since commit 65ba96e91b68 > ("drm/amdgpu: Move to common indirect reg access helper") but > the documentation wasn't updated. This causes a warning while > building documentation. > > Fixes: 65ba96e91b68 ("drm/amdgpu: Move to common indirect reg access helper") > Signed-off-by: Mario Limonciello Reviewed-by: Alex Deucher > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 > 1 file changed, 4 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index 65fe0f3488679..a3dae8ffbdb10 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -747,8 +747,6 @@ u64 amdgpu_device_indirect_rreg64(struct amdgpu_device > *adev, > * amdgpu_device_indirect_wreg - write an indirect register address > * > * @adev: amdgpu_device pointer > - * @pcie_index: mmio register offset > - * @pcie_data: mmio register offset > * @reg_addr: indirect register offset > * @reg_data: indirect register data > * > @@ -778,8 +776,6 @@ void amdgpu_device_indirect_wreg(struct amdgpu_device > *adev, > * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address > * > * @adev: amdgpu_device pointer > - * @pcie_index: mmio register offset > - * @pcie_data: mmio register offset > * @reg_addr: indirect register offset > * @reg_data: indirect register data > * > -- > 2.34.1 >
Re: [PATCH] drm/amd: Fix a documentation warning about excess parameters
On 6/26/2023 10:05 AM, Alex Deucher wrote: On Mon, Jun 26, 2023 at 11:00 AM Mario Limonciello wrote: `pcie_index` and `pcie_data` aren't used by amdgpu_device_indirect_wreg() since commit 65ba96e91b68 ("drm/amdgpu: Move to common indirect reg access helper") but the documentation wasn't updated. This causes a warning while building documentation. Fixes: 65ba96e91b68 ("drm/amdgpu: Move to common indirect reg access helper") Signed-off-by: Mario Limonciello Reviewed-by: Alex Deucher It turns out that the exact same patch already landed in ASDN as: fbdfbe84aaf4 ("drm/amdgpu: Fix up kdoc in amdgpu_device.c") and I missed this. Sorry for that. --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 65fe0f3488679..a3dae8ffbdb10 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -747,8 +747,6 @@ u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev, * amdgpu_device_indirect_wreg - write an indirect register address * * @adev: amdgpu_device pointer - * @pcie_index: mmio register offset - * @pcie_data: mmio register offset * @reg_addr: indirect register offset * @reg_data: indirect register data * @@ -778,8 +776,6 @@ void amdgpu_device_indirect_wreg(struct amdgpu_device *adev, * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address * * @adev: amdgpu_device pointer - * @pcie_index: mmio register offset - * @pcie_data: mmio register offset * @reg_addr: indirect register offset * @reg_data: indirect register data * -- 2.34.1
Re: [PATCH v3] drm/amd/display: Remove unnecessary casts in amdgpu_dm_helpers.c
Am 26.06.23 um 14:43 schrieb Srinivasan Shanmugam: Fixes the following category of checkpatch complaints: WARNING: unnecessary cast may hide bugs, see http://c-faq.com/malloc/mallocnocast.html + char *buf = (char *)kvcalloc(total, sizeof(char), GFP_KERNEL); Cc: Rodrigo Siqueira Cc: Aurabindo Pillai Signed-off-by: Srinivasan Shanmugam Reviewed-by: Christian König --- v3: - Keeping same as v1 - so that variable "buf" remains to local to the block, whereever it is declared, by having just removed the casting. drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index cd20cfc04996..4590deca25f8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -400,7 +400,7 @@ void dm_dtn_log_append_v(struct dc_context *ctx, total = log_ctx->pos + n + 1; if (total > log_ctx->size) { - char *buf = (char *)kvcalloc(total, sizeof(char), GFP_KERNEL); + char *buf = kvcalloc(total, sizeof(char), GFP_KERNEL); if (buf) { memcpy(buf, log_ctx->buf, log_ctx->pos);
Re: [PATCH 1/2] drm/amdgpu:update kernel vcn ring test
Am 26.06.23 um 16:50 schrieb Saleemkhan Jamadar: add session context buffer to decoder ring test. v2 - put the buffer at the end of the IB (Christian) Signed-off-by: Saleemkhan Jamadar Acked-by: Leo Liu --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 30 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 3 +++ 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 2d94f1b63bd6..04daaaf6ab34 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -691,7 +691,8 @@ static void amdgpu_vcn_unified_ring_ib_checksum(uint32_t **ib_checksum, static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, struct amdgpu_ib *ib_msg, - struct dma_fence **fence) + struct dma_fence **fence, + uint64_t session_ctx_buf_gaddr) That looks like it isn't correctly indented. { struct amdgpu_vcn_decode_buffer *decode_buffer = NULL; unsigned int ib_size_dw = 64; @@ -730,6 +731,14 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, ib->length_dw += sizeof(struct amdgpu_vcn_decode_buffer) / 4; memset(decode_buffer, 0, sizeof(struct amdgpu_vcn_decode_buffer)); + if (session_ctx_buf_gaddr) { + decode_buffer->valid_buf_flag |= + cpu_to_le32(AMDGPU_VCN_CMD_FLAG_SESSION_CONTEXT_BUFFER); + decode_buffer->session_context_buffer_address_hi = + cpu_to_le32(session_ctx_buf_gaddr >> 32); + decode_buffer->session_context_buffer_address_lo = + cpu_to_le32(session_ctx_buf_gaddr); + } decode_buffer->valid_buf_flag |= cpu_to_le32(AMDGPU_VCN_CMD_FLAG_MSG_BUFFER); decode_buffer->msg_buffer_address_hi = cpu_to_le32(addr >> 32); decode_buffer->msg_buffer_address_lo = cpu_to_le32(addr); @@ -763,20 +772,34 @@ int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout) { struct dma_fence *fence = NULL; struct amdgpu_ib ib; + struct amdgpu_bo *session_ctx_buf = NULL; + void *cpu_addr = NULL; + uint64_t gpu_addr = 0; long r; + r = amdgpu_bo_create_kernel(ring->adev, 128*1024, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT, + &session_ctx_buf, + &gpu_addr, + &cpu_addr); That still creates a buffer for the session ctx instead of putting it into the IB. + if (r) { + dev_err(ring->adev->dev, "VCN ib test:%ld failed to allocate session ctx bo\n", r); + return r; + } + r = amdgpu_vcn_dec_get_create_msg(ring, 1, &ib); if (r) goto error; - r = amdgpu_vcn_dec_sw_send_msg(ring, &ib, NULL); + r = amdgpu_vcn_dec_sw_send_msg(ring, &ib, NULL, gpu_addr); if (r) goto error; r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &ib); if (r) goto error; - r = amdgpu_vcn_dec_sw_send_msg(ring, &ib, &fence); + r = amdgpu_vcn_dec_sw_send_msg(ring, &ib, &fence, gpu_addr); if (r) goto error; @@ -788,6 +811,7 @@ int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout) dma_fence_put(fence); error: + amdgpu_bo_free_kernel(&session_ctx_buf, &gpu_addr, &cpu_addr);ebd59851c796c Which is freed here. return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index f1397ef66fd7..06f9ee91a1e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -166,6 +166,7 @@ #define AMDGPU_VCN_IB_FLAG_DECODE_BUFFER 0x0001 #define AMDGPU_VCN_CMD_FLAG_MSG_BUFFER0x0001 +#define AMDGPU_VCN_CMD_FLAG_SESSION_CONTEXT_BUFFER 0x0010 #define VCN_CODEC_DISABLE_MASK_AV1 (1 << 0) #define VCN_CODEC_DISABLE_MASK_VP9 (1 << 1) @@ -357,6 +358,8 @@ struct amdgpu_vcn_decode_buffer { uint32_t valid_buf_flag; uint32_t msg_buffer_address_hi; uint32_t msg_buffer_address_lo; + unsigned int session_context_buffer_address_hi; + unsigned int session_context_buffer_address_lo; uint32_t pad[30]; that here looks incorrect as well. Christian. };
Re: [RFC PATCH v3 1/4] drm/doc: Document DRM device reset expectations
Em 22/06/2023 05:12, Pekka Paalanen escreveu: On Wed, 21 Jun 2023 13:28:34 -0300 André Almeida wrote: Em 21/06/2023 04:58, Pekka Paalanen escreveu: On Tue, 20 Jun 2023 21:57:16 -0300 André Almeida wrote: Create a section that specifies how to deal with DRM device resets for kernel and userspace drivers. Signed-off-by: André Almeida Hi André, nice to see this! I ended up giving lots of grammar comments, but I'm not a native speaker. Generally it looks good to me. Thank you for your feedback :) --- Documentation/gpu/drm-uapi.rst | 65 ++ 1 file changed, 65 insertions(+) diff --git a/Documentation/gpu/drm-uapi.rst b/Documentation/gpu/drm-uapi.rst index 65fb3036a580..da4f8a694d8d 100644 --- a/Documentation/gpu/drm-uapi.rst +++ b/Documentation/gpu/drm-uapi.rst @@ -285,6 +285,71 @@ for GPU1 and GPU2 from different vendors, and a third handler for mmapped regular files. Threads cause additional pain with signal handling as well. +Device reset + + +The GPU stack is really complex and is prone to errors, from hardware bugs, +faulty applications and everything in between the many layers. To recover +from this kind of state, sometimes is needed to reset the device. This section It seems unclear what "this kind of state" refers to, so maybe just write "errors"? Maybe: Some errors require resetting the device in order to make the device usable again. I presume that recovery does not mean that the failed job could recover. +describes what's the expectations for DRM and usermode drivers when a device +resets and how to propagate the reset status. + +Kernel Mode Driver +-- + +The KMD is responsible for checking if the device needs a reset, and to perform +it as needed. Usually a hung is detected when a job gets stuck executing. KMD s/hung/hang/ ? +then update it's internal reset tracking to be ready when userspace asks the updates its "update reset tracking"... do you mean that KMD records information about the reset in case userspace asks for it later? Yes, kernel drivers do annotate whenever a reset happens, so it can report to userspace when it asks about resets. For instance, this is the amdgpu implementation of AMDGPU_CTX_OP_QUERY_STATE2: https://elixir.bootlin.com/linux/v6.3.8/source/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c#L548 You can see there stored information about resets. Hi André, right. What I mean is, if I have to ask this, then that implies that the wording could be more clear. I don't know if "reset tracking" is some sub-system that is turned on and off as needed or what updating it would mean. Understood, I'll rewrite it to be more clear. +kernel about reset information. Drivers should implement the DRM_IOCTL_GET_RESET +for that. At this point, I'm not sure what "reset tracking" or "reset information" entails. Could something be said about those? >> + +User Mode Driver + + +The UMD should check before submitting new commands to the KMD if the device has +been reset, and this can be checked more often if it requires to. The +DRM_IOCTL_GET_RESET is the default interface for those kind of checks. After +detecting a reset, UMD will then proceed to report it to the application using +the appropriated API error code, as explained in the bellow section about s/bellow/below/ +robustness. + +Robustness +-- + +The only way to try to keep an application working after a reset is if it +complies with the robustness aspects of the graphical API that is using. that it is using. + +Graphical APIs provide ways to application to deal with device resets. However, provide ways for applications to deal with +there's no guarantee that the app will be correctly using such features, and UMD +can implement policies to close the app if it's a repeating offender, likely in +a broken loop. This is done to ensure that it doesn't keeps blocking the user does not keep I think contractions are usually avoided in documents, but I'm not bothering to flag them all. +interface to be correctly displayed. interface from being correctly displayed. + +OpenGL +~~ + +Apps using OpenGL can rely on ``GL_ARB_robustness`` to be robust. This extension +tells if a reset has happened, and if so, all the context state is considered +lost and the app proceeds by creating new ones. If robustness isn't in use, UMD +will terminate the app when a reset is detected, giving that the contexts are +lost and the app won't be able to figure this out and recreate the contexts. What about GL ES? Is GL_ARB_robustness implemented or even defined there? I found this: https://registry.khronos.org/OpenGL/extensions/EXT/EXT_robustness.txt "Since this is intended to be a version of ARB_robustness for OpenGL ES, it should be named accordingly." I can add this to this paragraph. Yes, please! I suppose there could be even more extensio
[PATCH] Revert "drm/amd/display: edp do not add non-edid timings"
This change causes regression when eDP and external display in mirror mode. When external display supports low resolution than eDP, use eDP timing to driver external display may cause corruption on external display. This reverts commit aa9704d5127f06c9ffedb0480d2788b87fecedfb. Signed-off-by: Hersen Wu --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 8 +--- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index a46b8b47b756..073bf00c6fdc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -7258,13 +7258,7 @@ static int amdgpu_dm_connector_get_modes(struct drm_connector *connector) drm_add_modes_noedid(connector, 1920, 1080); } else { amdgpu_dm_connector_ddc_get_modes(connector, edid); - /* most eDP supports only timings from its edid, -* usually only detailed timings are available -* from eDP edid. timings which are not from edid -* may damage eDP -*/ - if (connector->connector_type != DRM_MODE_CONNECTOR_eDP) - amdgpu_dm_connector_add_common_modes(encoder, connector); + amdgpu_dm_connector_add_common_modes(encoder, connector); amdgpu_dm_connector_add_freesync_modes(connector, edid); } amdgpu_dm_fbc_init(connector); -- 2.25.1
RE: [PATCH] Revert "drm/amd/display: edp do not add non-edid timings"
[Public] > This change causes regression when eDP and external display in mirror > mode. When external display supports low resolution than eDP, use eDP > timing to driver external display may cause corruption on external > display. > > This reverts commit aa9704d5127f06c9ffedb0480d2788b87fecedfb. > > Signed-off-by: Hersen Wu The original commit CC to stable, we need this to go to stable too. Here's some tags to pick up when merging. Cc: sta...@vger.kernel.org Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2655 Reviewed-by: Mario Limonciello > --- > drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 8 +--- > 1 file changed, 1 insertion(+), 7 deletions(-) > > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > index a46b8b47b756..073bf00c6fdc 100644 > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > @@ -7258,13 +7258,7 @@ static int > amdgpu_dm_connector_get_modes(struct drm_connector *connector) > drm_add_modes_noedid(connector, 1920, > 1080); > } else { > amdgpu_dm_connector_ddc_get_modes(connector, edid); > - /* most eDP supports only timings from its edid, > - * usually only detailed timings are available > - * from eDP edid. timings which are not from edid > - * may damage eDP > - */ > - if (connector->connector_type != > DRM_MODE_CONNECTOR_eDP) > - > amdgpu_dm_connector_add_common_modes(encoder, connector); > + amdgpu_dm_connector_add_common_modes(encoder, > connector); > amdgpu_dm_connector_add_freesync_modes(connector, > edid); > } > amdgpu_dm_fbc_init(connector); > -- > 2.25.1
RE: [PATCH] Revert "drm/amd/display: edp do not add non-edid timings"
[Public] > -Original Message- > From: Limonciello, Mario > Sent: Monday, June 26, 2023 12:45 PM > To: Hersen Wu ; amd-gfx@lists.freedesktop.org; > Wentland, Harry > Cc: Wu, Hersen > Subject: RE: [PATCH] Revert "drm/amd/display: edp do not add non-edid > timings" > > > This change causes regression when eDP and external display in mirror > > mode. When external display supports low resolution than eDP, use eDP > > timing to driver external display may cause corruption on external > > display. > > > > This reverts commit aa9704d5127f06c9ffedb0480d2788b87fecedfb. One more thing - although this is the correct hash for ASDN, this merged into Linus' tree as e749dd10e5f292061ad63d2b030194bf7d7d452c. As this has to go back to stable trees properly, I think the hash should reflect what's in Linus' tree instead of what's in ASDN. > > > > Signed-off-by: Hersen Wu > > The original commit CC to stable, we need this to go to stable too. > > Here's some tags to pick up when merging. > > Cc: sta...@vger.kernel.org > Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2655 > Reviewed-by: Mario Limonciello > > > --- > > drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 8 +--- > > 1 file changed, 1 insertion(+), 7 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > > index a46b8b47b756..073bf00c6fdc 100644 > > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > > @@ -7258,13 +7258,7 @@ static int > > amdgpu_dm_connector_get_modes(struct drm_connector *connector) > > drm_add_modes_noedid(connector, 1920, > > 1080); > > } else { > > amdgpu_dm_connector_ddc_get_modes(connector, edid); > > - /* most eDP supports only timings from its edid, > > -* usually only detailed timings are available > > -* from eDP edid. timings which are not from edid > > -* may damage eDP > > -*/ > > - if (connector->connector_type != > > DRM_MODE_CONNECTOR_eDP) > > - > > amdgpu_dm_connector_add_common_modes(encoder, connector); > > + amdgpu_dm_connector_add_common_modes(encoder, > > connector); > > amdgpu_dm_connector_add_freesync_modes(connector, > > edid); > > } > > amdgpu_dm_fbc_init(connector); > > -- > > 2.25.1
[PATCH v4 1/1] drm/doc: Document DRM device reset expectations
Create a section that specifies how to deal with DRM device resets for kernel and userspace drivers. Signed-off-by: André Almeida --- Documentation/gpu/drm-uapi.rst | 68 ++ 1 file changed, 68 insertions(+) diff --git a/Documentation/gpu/drm-uapi.rst b/Documentation/gpu/drm-uapi.rst index 65fb3036a580..25a11b9b98fa 100644 --- a/Documentation/gpu/drm-uapi.rst +++ b/Documentation/gpu/drm-uapi.rst @@ -285,6 +285,74 @@ for GPU1 and GPU2 from different vendors, and a third handler for mmapped regular files. Threads cause additional pain with signal handling as well. +Device reset + + +The GPU stack is really complex and is prone to errors, from hardware bugs, +faulty applications and everything in between the many layers. Some errors +require resetting the device in order to make the device usable again. This +section describes what is the expectations for DRM and usermode drivers when a +device resets and how to propagate the reset status. + +Kernel Mode Driver +-- + +The KMD is responsible for checking if the device needs a reset, and to perform +it as needed. Usually a hang is detected when a job gets stuck executing. KMD +should keep track of resets, because userspace can query any time about the +reset stats for an specific context. This is needed to propagate to the rest of +the stack that a reset has happened. Currently, this is implemented by each +driver separately, with no common DRM interface. + +User Mode Driver + + +The UMD should check before submitting new commands to the KMD if the device has +been reset, and this can be checked more often if it requires to. After +detecting a reset, UMD will then proceed to report it to the application using +the appropriated API error code, as explained in the below section about +robustness. + +Robustness +-- + +The only way to try to keep an application working after a reset is if it +complies with the robustness aspects of the graphical API that it is using. + +Graphical APIs provide ways to application to deal with device resets. However, +there is no guarantee that the app will be correctly using such features, and +UMD can implement policies to close the app if it is a repeating offender, +likely in a broken loop. This is done to ensure that it does not keeps blocking +the user interface from being correctly displayed. This should be done even if +the app is correct but happens to trigger some bug in the hardware/driver. + +OpenGL +~~ + +Apps using OpenGL should use the available robust interfaces, like the +extension ``GL_ARB_robustness`` (or ``GL_EXT_robustness`` for OpenGL ES). This +interface tells if a reset has happened, and if so, all the context state is +considered lost and the app proceeds by creating new ones. If is possible to +determine that robustness is not in use, UMD will terminate the app when a reset +is detected, giving that the contexts are lost and the app won't be able to +figure this out and recreate the contexts. + +Vulkan +~~ + +Apps using Vulkan should check for ``VK_ERROR_DEVICE_LOST`` for submissions. +This error code means, among other things, that a device reset has happened and +it needs to recreate the contexts to keep going. + +Reporting resets causes +--- + +Apart from propagating the reset through the stack so apps can recover, it's +really useful for driver developers to learn more about what caused the reset in +first place. DRM devices should make use of devcoredump to store relevant +information about the reset, so this information can be added to user bug +reports. + .. _drm_driver_ioctl: IOCTL Support on Device Nodes -- 2.41.0
[PATCH v4 0/1] drm/doc: Document DRM device reset expectations
This v4 removes the common DRM ioctl, and adds just the documentation for now, giving the lack of a common "DRM context" infrascture make it hard to implement. v3: https://lore.kernel.org/lkml/20230621005719.836857-1-andrealm...@igalia.com/ Changes: - Drop the ioctl - Addresed comments com Pekka, as making the documentation more clear and consistent. André Almeida (1): drm/doc: Document DRM device reset expectations Documentation/gpu/drm-uapi.rst | 68 ++ 1 file changed, 68 insertions(+) -- 2.41.0
Re: [PATCH 5/5] drm/amd: Add documentation for how to flash a dGPU
On Mon, Jun 26, 2023 at 11:04 AM Mario Limonciello wrote: > Needs a basic patch description. Even just "add documentation" With that fixed, the series is: Reviewed-by: Alex Deucher > Signed-off-by: Mario Limonciello > --- > Documentation/gpu/amdgpu/flashing.rst | 33 + > Documentation/gpu/amdgpu/index.rst | 1 + > drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 16 > 3 files changed, 50 insertions(+) > create mode 100644 Documentation/gpu/amdgpu/flashing.rst > > diff --git a/Documentation/gpu/amdgpu/flashing.rst > b/Documentation/gpu/amdgpu/flashing.rst > new file mode 100644 > index 0..bd745c42a538f > --- /dev/null > +++ b/Documentation/gpu/amdgpu/flashing.rst > @@ -0,0 +1,33 @@ > +=== > + dGPU firmware flashing > +=== > + > +IFWI > + > +Flashing the dGPU integrated firmware image (IFWI) is supported by GPUs that > +use the PSP to orchestrate the update (Navi3x or newer GPUs). > +For supported GPUs, `amdgpu` will export a series of sysfs files that can be > +used for the flash process. > + > +The IFWI flash process is: > + > +1. Ensure the IFWI image is intended for the dGPU on the system. > +2. "Write" the IFWI image to the sysfs file `psp_vbflash`. This will stage > the IFWI in memory. > +3. "Read" from the `psp_vbflash` sysfs file to initiate the flash process. > +4. Poll the `psp_vbflash_status` sysfs file to determine when the flash > process completes. > + > +USB-C PD F/W > + > +On GPUs that support flashing an updated USB-C PD firmware image, the process > +is done using the `usbc_pd_fw` sysfs file. > + > +* Reading the file will provide the current firmware version. > +* Writing the name of a firmware payload stored in `/lib/firmware/amdgpu` to > the sysfs file will initiate the flash process. > + > +The firmware payload stored in `/lib/firmware/amdgpu` can be named any name > +as long as it doesn't conflict with other existing binaries that are used by > +`amdgpu`. > + > +sysfs files > +--- > +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c > diff --git a/Documentation/gpu/amdgpu/index.rst > b/Documentation/gpu/amdgpu/index.rst > index 03c2966cae798..912e699fd3731 100644 > --- a/Documentation/gpu/amdgpu/index.rst > +++ b/Documentation/gpu/amdgpu/index.rst > @@ -10,6 +10,7 @@ Next (GCN), Radeon DNA (RDNA), and Compute DNA (CDNA) > architectures. > module-parameters > driver-core > display/index > + flashing > xgmi > ras > thermal > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c > index 7872004ed7f9b..047760bafcc23 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c > @@ -3476,6 +3476,11 @@ void psp_copy_fw(struct psp_context *psp, uint8_t > *start_addr, uint32_t bin_size > drm_dev_exit(idx); > } > > +/** > + * DOC: usbc_pd_fw > + * Reading from this file will retrieve the USB-C PD firmware version. > Writing to > + * this file will trigger the update process. > + */ > static DEVICE_ATTR(usbc_pd_fw, S_IRUGO | S_IWUSR, >psp_usbc_pd_fw_sysfs_read, >psp_usbc_pd_fw_sysfs_write); > @@ -3569,6 +3574,11 @@ static ssize_t amdgpu_psp_vbflash_read(struct file > *filp, struct kobject *kobj, > return 0; > } > > +/** > + * DOC: psp_vbflash > + * Writing to this file will stage an IFWI for update. Reading from this file > + * will trigger the update process. > + */ > static struct bin_attribute psp_vbflash_bin_attr = { > .attr = {.name = "psp_vbflash", .mode = 0660}, > .size = 0, > @@ -3576,6 +3586,12 @@ static struct bin_attribute psp_vbflash_bin_attr = { > .read = amdgpu_psp_vbflash_read, > }; > > +/** > + * DOC: psp_vbflash_status > + * The status of the flash process. > + * 0: IFWI flash not complete. > + * 1: IFWI flash complete. > + */ > static ssize_t amdgpu_psp_vbflash_status(struct device *dev, > struct device_attribute *attr, > char *buf) > -- > 2.34.1 >
Re: [PATCH v2 2/4] drm/amd/display: Set minimum requirement for using PSR-SU on Rembrandt
On 6/23/23 11:05, Mario Limonciello wrote: A number of parade TCONs are causing system hangs when utilized with older DMUB firmware and PSR-SU. Some changes have been introduced into DMUB firmware to add resilience against these failures. Don't allow running PSR-SU unless on the newer firmware. Cc: Sean Wang Cc: Marc Rossi Cc: Hamza Mahfooz Cc: Tsung-hua (Ryan) Lin Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2443 Signed-off-by: Mario Limonciello Reviewed-by: Leo Li --- v1->v2: * Fix a s/dcn314/dcn31/ mixup --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c | 3 ++- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 7 +++ drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h | 1 + drivers/gpu/drm/amd/display/dmub/dmub_srv.h | 2 ++ drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c | 5 + drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h | 2 ++ drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c | 10 ++ 7 files changed, 25 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c index d647f68fd563..4f61d4f257cd 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c @@ -24,6 +24,7 @@ */ #include "amdgpu_dm_psr.h" +#include "dc_dmub_srv.h" #include "dc.h" #include "dm_helpers.h" #include "amdgpu_dm.h" @@ -50,7 +51,7 @@ static bool link_supports_psrsu(struct dc_link *link) !link->dpcd_caps.psr_info.psr2_su_y_granularity_cap) return false; - return true; + return dc_dmub_check_min_version(dc->ctx->dmub_srv->dmub); } /* diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index c52c40b16387..c753c6f30dd7 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -1011,3 +1011,10 @@ void dc_send_update_cursor_info_to_dmu( dm_execute_dmub_cmd_list(pCtx->stream->ctx, 2, cmd, DM_DMUB_WAIT_TYPE_WAIT); } } + +bool dc_dmub_check_min_version(struct dmub_srv *srv) +{ + if (!srv->hw_funcs.is_psrsu_supported) + return true; + return srv->hw_funcs.is_psrsu_supported(srv); +} diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h index a5196a9292b3..099f94b6107c 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h @@ -86,4 +86,5 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc, struct dc_state *context, b void dc_dmub_srv_log_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv); void dc_send_update_cursor_info_to_dmu(struct pipe_ctx *pCtx, uint8_t pipe_idx); +bool dc_dmub_check_min_version(struct dmub_srv *srv); #endif /* _DMUB_DC_SRV_H_ */ diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index 2a66a305679a..4585e0419da6 100644 --- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -367,6 +367,8 @@ struct dmub_srv_hw_funcs { bool (*is_supported)(struct dmub_srv *dmub); + bool (*is_psrsu_supported)(struct dmub_srv *dmub); + bool (*is_hw_init)(struct dmub_srv *dmub); void (*enable_dmub_boot_options)(struct dmub_srv *dmub, diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c index ebf7aeec4029..c8445d474107 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c @@ -302,6 +302,11 @@ bool dmub_dcn31_is_supported(struct dmub_srv *dmub) return supported; } +bool dmub_dcn31_is_psrsu_supported(struct dmub_srv *dmub) +{ + return dmub->fw_version >= DMUB_FW_VERSION(4, 0, 58); +} + void dmub_dcn31_set_gpint(struct dmub_srv *dmub, union dmub_gpint_data_register reg) { diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h index 7d5c10ee539b..89c5a948b67d 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h @@ -221,6 +221,8 @@ bool dmub_dcn31_is_hw_init(struct dmub_srv *dmub); bool dmub_dcn31_is_supported(struct dmub_srv *dmub); +bool dmub_dcn31_is_psrsu_supported(struct dmub_srv *dmub); + void dmub_dcn31_set_gpint(struct dmub_srv *dmub, union dmub_gpint_data_register reg); diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index 9e9a6a44a7ac..7a31e3e27bab 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -226,14 +226,16 @@ static bool dmub_srv_hw_setup
Re: [PATCH v2 3/4] drm/amd/display: Set minimum requirement for using PSR-SU on Phoenix
On 6/23/23 11:05, Mario Limonciello wrote: The same parade TCON issue can potentially happen on Phoenix, and the same PSR resilience changes have been ported into the DMUB firmware. Don't allow running PSR-SU unless on the newer firmware. Cc: Sean Wang Cc: Marc Rossi Cc: Hamza Mahfooz Cc: Tsung-hua (Ryan) Lin Signed-off-by: Mario Limonciello Reviewed-by: Leo Li --- v1->v2: * Fix a s/dcn31/dcn314/ mixup --- drivers/gpu/drm/amd/display/dmub/src/dmub_dcn314.c | 5 + drivers/gpu/drm/amd/display/dmub/src/dmub_dcn314.h | 2 ++ drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c| 1 + 3 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn314.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn314.c index 48a06dbd9be7..f161aeb7e7c4 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn314.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn314.c @@ -60,3 +60,8 @@ const struct dmub_srv_dcn31_regs dmub_srv_dcn314_regs = { { DMUB_DCN31_FIELDS() }, #undef DMUB_SF }; + +bool dmub_dcn314_is_psrsu_supported(struct dmub_srv *dmub) +{ + return dmub->fw_version >= DMUB_FW_VERSION(8, 0, 16); +} diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn314.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn314.h index 674267a2940e..f213bd82c911 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn314.h +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn314.h @@ -30,4 +30,6 @@ extern const struct dmub_srv_dcn31_regs dmub_srv_dcn314_regs; +bool dmub_dcn314_is_psrsu_supported(struct dmub_srv *dmub); + #endif /* _DMUB_DCN314_H_ */ diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index 7a31e3e27bab..bdaf43892f47 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -228,6 +228,7 @@ static bool dmub_srv_hw_setup(struct dmub_srv *dmub, enum dmub_asic asic) case DMUB_ASIC_DCN316: if (asic == DMUB_ASIC_DCN314) { dmub->regs_dcn31 = &dmub_srv_dcn314_regs; + funcs->is_psrsu_supported = dmub_dcn314_is_psrsu_supported; } else if (asic == DMUB_ASIC_DCN315) { dmub->regs_dcn31 = &dmub_srv_dcn315_regs; } else if (asic == DMUB_ASIC_DCN316) {
[PATCH] drm/amdgpu: rename psp_update_vcn_sram to a common name
Rename psp_update_vcn_sram to psp_execute_ucode_loading so that other clients can feel free to use it. Signed-off-by: Lang Yu --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 19 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 7 +-- drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 7 --- drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 8 +--- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 6 -- drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 6 -- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 5 +++-- 7 files changed, 35 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index aa69269169a1..33f8b8389979 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -2931,15 +2931,16 @@ int psp_rlc_autoload_start(struct psp_context *psp) return ret; } -int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx, - uint64_t cmd_gpu_addr, int cmd_size) -{ - struct amdgpu_firmware_info ucode = {0}; - - ucode.ucode_id = inst_idx ? AMDGPU_UCODE_ID_VCN1_RAM : - AMDGPU_UCODE_ID_VCN0_RAM; - ucode.mc_addr = cmd_gpu_addr; - ucode.ucode_size = cmd_size; +int psp_execute_ucode_loading(struct amdgpu_device *adev, + enum AMDGPU_UCODE_ID ucode_id, + uint64_t ucode_gpu_addr, + int ucode_size) +{ + struct amdgpu_firmware_info ucode = { + .ucode_id = ucode_id, + .mc_addr = ucode_gpu_addr, + .ucode_size = ucode_size, + }; return psp_execute_non_psp_fw_load(&adev->psp, &ucode); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 2cae0b1a0b8a..93849db18696 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -459,8 +459,11 @@ extern int psp_wait_for_spirom_update(struct psp_context *psp, uint32_t reg_inde uint32_t field_val, uint32_t mask, uint32_t msec_timeout); int psp_gpu_reset(struct amdgpu_device *adev); -int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx, - uint64_t cmd_gpu_addr, int cmd_size); + +int psp_execute_ucode_loading(struct amdgpu_device *adev, + enum AMDGPU_UCODE_ID ucode_id, + uint64_t ucode_gpu_addr, + int ucode_size); int psp_ta_init_shared_buf(struct psp_context *psp, struct ta_mem_context *mem_ctx); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index c975aed2f6c7..e89b1e76 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -881,9 +881,10 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); if (indirect) - psp_update_vcn_sram(adev, 0, adev->vcn.inst->dpg_sram_gpu_addr, - (uint32_t)((uintptr_t)adev->vcn.inst->dpg_sram_curr_addr - - (uintptr_t)adev->vcn.inst->dpg_sram_cpu_addr)); + psp_execute_ucode_loading(adev, AMDGPU_UCODE_ID_VCN0_RAM, + adev->vcn.inst->dpg_sram_gpu_addr, + (uint32_t)((uintptr_t)adev->vcn.inst->dpg_sram_curr_addr - + (uintptr_t)adev->vcn.inst->dpg_sram_cpu_addr)); /* force RBC into idle state */ rb_bufsz = order_base_2(ring->ring_size); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index bb1875f926f1..e5df190a79a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -912,9 +912,11 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); if (indirect) - psp_update_vcn_sram(adev, inst_idx, adev->vcn.inst[inst_idx].dpg_sram_gpu_addr, - (uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr - - (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr)); + psp_execute_ucode_loading(adev, + inst_idx ? AMDGPU_UCODE_ID_VCN1_RAM : AMDGPU_UCODE_ID_VCN0_RAM, + adev->vcn.inst[inst_idx].dpg_sram_gpu_addr, + (uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr - + (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr)); ring = &adev->vcn.inst[inst_idx].ring_dec; /* force RBC into idle state */ diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/dr
Re: [PATCH] drm/amdgpu: rename psp_update_vcn_sram to a common name
Please ignore this patch, will send a new one. Regards, Lang On 06/27/ , Lang Yu wrote: > Rename psp_update_vcn_sram to psp_execute_ucode_loading > so that other clients can feel free to use it. > > Signed-off-by: Lang Yu > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 19 ++- > drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 7 +-- > drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 7 --- > drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 8 +--- > drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 6 -- > drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 6 -- > drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 5 +++-- > 7 files changed, 35 insertions(+), 23 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c > index aa69269169a1..33f8b8389979 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c > @@ -2931,15 +2931,16 @@ int psp_rlc_autoload_start(struct psp_context *psp) > return ret; > } > > -int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx, > - uint64_t cmd_gpu_addr, int cmd_size) > -{ > - struct amdgpu_firmware_info ucode = {0}; > - > - ucode.ucode_id = inst_idx ? AMDGPU_UCODE_ID_VCN1_RAM : > - AMDGPU_UCODE_ID_VCN0_RAM; > - ucode.mc_addr = cmd_gpu_addr; > - ucode.ucode_size = cmd_size; > +int psp_execute_ucode_loading(struct amdgpu_device *adev, > + enum AMDGPU_UCODE_ID ucode_id, > + uint64_t ucode_gpu_addr, > + int ucode_size) > +{ > + struct amdgpu_firmware_info ucode = { > + .ucode_id = ucode_id, > + .mc_addr = ucode_gpu_addr, > + .ucode_size = ucode_size, > + }; > > return psp_execute_non_psp_fw_load(&adev->psp, &ucode); > } > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h > index 2cae0b1a0b8a..93849db18696 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h > @@ -459,8 +459,11 @@ extern int psp_wait_for_spirom_update(struct psp_context > *psp, uint32_t reg_inde > uint32_t field_val, uint32_t mask, uint32_t > msec_timeout); > > int psp_gpu_reset(struct amdgpu_device *adev); > -int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx, > - uint64_t cmd_gpu_addr, int cmd_size); > + > +int psp_execute_ucode_loading(struct amdgpu_device *adev, > + enum AMDGPU_UCODE_ID ucode_id, > + uint64_t ucode_gpu_addr, > + int ucode_size); > > int psp_ta_init_shared_buf(struct psp_context *psp, > struct ta_mem_context *mem_ctx); > diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c > b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c > index c975aed2f6c7..e89b1e76 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c > @@ -881,9 +881,10 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device > *adev, bool indirect) > UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); > > if (indirect) > - psp_update_vcn_sram(adev, 0, adev->vcn.inst->dpg_sram_gpu_addr, > - > (uint32_t)((uintptr_t)adev->vcn.inst->dpg_sram_curr_addr - > - > (uintptr_t)adev->vcn.inst->dpg_sram_cpu_addr)); > + psp_execute_ucode_loading(adev, AMDGPU_UCODE_ID_VCN0_RAM, > + adev->vcn.inst->dpg_sram_gpu_addr, > + > (uint32_t)((uintptr_t)adev->vcn.inst->dpg_sram_curr_addr - > + > (uintptr_t)adev->vcn.inst->dpg_sram_cpu_addr)); > > /* force RBC into idle state */ > rb_bufsz = order_base_2(ring->ring_size); > diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c > b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c > index bb1875f926f1..e5df190a79a0 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c > +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c > @@ -912,9 +912,11 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device > *adev, int inst_idx, boo > UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); > > if (indirect) > - psp_update_vcn_sram(adev, inst_idx, > adev->vcn.inst[inst_idx].dpg_sram_gpu_addr, > - > (uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr - > - > (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr)); > + psp_execute_ucode_loading(adev, > + inst_idx ? AMDGPU_UCODE_ID_VCN1_RAM : > AMDGPU_UCODE_ID_VCN0_RAM, > + adev->vcn.inst[inst_idx].dpg_sram_gpu_addr, > + > (uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr - > +
RE: [PATCH 2/2] drm/amd/pm: avoid unintentional shutdown due to temperature momentary fluctuation
[AMD Official Use Only - General] > -Original Message- > From: Lazar, Lijo > Sent: Monday, June 26, 2023 7:54 PM > To: Quan, Evan ; amd-gfx@lists.freedesktop.org > Cc: Deucher, Alexander > Subject: Re: [PATCH 2/2] drm/amd/pm: avoid unintentional shutdown due to > temperature momentary fluctuation > > > > On 6/26/2023 1:17 PM, Evan Quan wrote: > > An intentional delay is added on soft ctf triggered. Then there will > > be a double check for the GPU temperature before taking further > > action. This can avoid unintended shutdown due to temperature > > momentary fluctuation. > > > > Signed-off-by: Evan Quan > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 ++ > > .../gpu/drm/amd/pm/powerplay/amd_powerplay.c | 48 > +++ > > .../drm/amd/pm/powerplay/hwmgr/smu_helper.c | 27 --- > > drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h | 2 + > > drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 34 > + > > drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 2 + > > .../gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c| 9 +--- > > .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c| 9 +--- > > 8 files changed, 102 insertions(+), 32 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > index e459381dc759..5ef1f31e703c 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > @@ -287,6 +287,9 @@ extern int amdgpu_user_partt_mode; > > #define AMDGPU_SMARTSHIFT_MAX_BIAS (100) > > #define AMDGPU_SMARTSHIFT_MIN_BIAS (-100) > > > > +/* Extra time delay(in ms) to eliminate the influence of temperature > momentary fluctuation */ > > +#define AMDGPU_SWCTF_EXTRA_DELAY 50 > > I think a delay of 10-15ms is good enough to filter out any spike. 50ms is required by our CE team for supporting the customer. It is also aligned with Windows side. Considering we cannot guard that(10-15ms is good), I think it's better to stick to the 50ms recommended setting. How do you think? Evan > > With that change, the series is > Reviewed-by: Lijo Lazar > > Thanks, > Lijo > > > + > > struct amdgpu_xcp_mgr; > > struct amdgpu_device; > > struct amdgpu_irq_src; > > diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c > > b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c > > index 11b7b4cffaae..ff360c699171 100644 > > --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c > > +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c > > @@ -26,6 +26,7 @@ > > #include > > #include > > #include > > +#include > > #include "amd_shared.h" > > #include "amd_powerplay.h" > > #include "power_state.h" > > @@ -91,6 +92,45 @@ static int pp_early_init(void *handle) > > return 0; > > } > > > > +static void pp_swctf_delayed_work_handler(struct work_struct *work) { > > + struct pp_hwmgr *hwmgr = > > + container_of(work, struct pp_hwmgr, > swctf_delayed_work.work); > > + struct amdgpu_device *adev = hwmgr->adev; > > + struct amdgpu_dpm_thermal *range = > > + &adev->pm.dpm.thermal; > > + uint32_t gpu_temperature, size; > > + int ret; > > + > > + /* > > +* If the hotspot/edge temperature is confirmed as below SW CTF > setting point > > +* after the delay enforced, nothing will be done. > > +* Otherwise, a graceful shutdown will be performed to prevent > further damage. > > +*/ > > + if (range->sw_ctf_threshold && > > + hwmgr->hwmgr_func->read_sensor) { > > + ret = hwmgr->hwmgr_func->read_sensor(hwmgr, > > + > AMDGPU_PP_SENSOR_HOTSPOT_TEMP, > > +&gpu_temperature, > > +&size); > > + /* > > +* For some legacy ASICs, hotspot temperature retrieving > might be not > > +* supported. Check the edge temperature instead then. > > +*/ > > + if (ret == -EOPNOTSUPP) > > + ret = hwmgr->hwmgr_func->read_sensor(hwmgr, > > + > AMDGPU_PP_SENSOR_EDGE_TEMP, > > + > &gpu_temperature, > > +&size); > > + if (!ret && gpu_temperature / 1000 < range- > >sw_ctf_threshold) > > + return; > > + } > > + > > + dev_emerg(adev->dev, "ERROR: GPU over temperature range(SW > CTF) detected!\n"); > > + dev_emerg(adev->dev, "ERROR: System is going to shutdown due to > GPU SW CTF!\n"); > > + orderly_poweroff(true); > > +} > > + > > static int pp_sw_init(void *handle) > > { > > struct amdgpu_device *adev = handle; @@ -101,6 +141,10 @@ static > > int pp_sw_init(void *handle) > > > > pr_debug("powerplay sw init %s\n", ret ? "failed" : > > "successfully"); > > > > + if (!ret) > > + INIT_DELAYED_WORK(&hwmgr->swctf_delayed_work, > > + pp_swctf_delayed_work_handler); > > + > > return ret; > > } > > >
[PATCH 1/1] drm/amdgpu: remove duplicated doorbell range init for sdma v4.4.2
Handled in earlier phase Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 5 - 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 6be19ffc502b..f413898dda37 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -902,11 +902,6 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev, WREG32_SDMA(i, regSDMA_CNTL, temp); if (!amdgpu_sriov_vf(adev)) { - ring = &adev->sdma.instance[i].ring; - adev->nbio.funcs->sdma_doorbell_range(adev, i, - ring->use_doorbell, ring->doorbell_index, - adev->doorbell_index.sdma_doorbell_range); - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { /* unhalt engine */ temp = RREG32_SDMA(i, regSDMA_F32_CNTL); -- 2.38.1
RE: [PATCH 1/1] drm/amdgpu: remove duplicated doorbell range init for sdma v4.4.2
[AMD Official Use Only - General] Reviewed-by: Hawking Zhang Regards, Hawking -Original Message- From: amd-gfx On Behalf Of Le Ma Sent: Tuesday, June 27, 2023 11:56 To: amd-gfx@lists.freedesktop.org Subject: [PATCH 1/1] drm/amdgpu: remove duplicated doorbell range init for sdma v4.4.2 Handled in earlier phase Signed-off-by: Le Ma --- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 5 - 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 6be19ffc502b..f413898dda37 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -902,11 +902,6 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev, WREG32_SDMA(i, regSDMA_CNTL, temp); if (!amdgpu_sriov_vf(adev)) { - ring = &adev->sdma.instance[i].ring; - adev->nbio.funcs->sdma_doorbell_range(adev, i, - ring->use_doorbell, ring->doorbell_index, - adev->doorbell_index.sdma_doorbell_range); - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { /* unhalt engine */ temp = RREG32_SDMA(i, regSDMA_F32_CNTL); -- 2.38.1
RE: [PATCH] drm/amd/pm: Add GFX v9.4.3 unique id to sysfs
[AMD Official Use Only - General] Thanks, Lijo -Original Message- From: amd-gfx On Behalf Of Lijo Lazar Sent: Wednesday, June 21, 2023 6:49 PM To: amd-gfx@lists.freedesktop.org Cc: Deucher, Alexander ; Ma, Le ; Kasiviswanathan, Harish ; Kamal, Asad ; Zhang, Hawking Subject: [PATCH] drm/amd/pm: Add GFX v9.4.3 unique id to sysfs Expose unique id of GFX v9.4.3 ASICs as device attribute. Signed-off-by: Lijo Lazar --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 386ccf11e657..9ec51f50fc52 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2072,6 +2072,7 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ case IP_VERSION(9, 4, 0): case IP_VERSION(9, 4, 1): case IP_VERSION(9, 4, 2): + case IP_VERSION(9, 4, 3): case IP_VERSION(10, 3, 0): case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 1): -- 2.25.1
[PATCH 2/2] drm/amdgpu: use psp_execute_load_ip_fw_cmd_buf instead
Replace the old ones with psp_execute_load_ip_fw_cmd_buf. Signed-off-by: Lang Yu --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 31 - drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 2 -- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 9 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 2 ++ drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 4 +--- drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 4 +--- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 4 +--- drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 4 +--- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 4 +--- 9 files changed, 20 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index a1cb541f315f..b61963112118 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -2474,21 +2474,11 @@ int psp_execute_load_ip_fw_cmd_buf(struct amdgpu_device *adev, return ret; } -static int psp_execute_non_psp_fw_load(struct psp_context *psp, - struct amdgpu_firmware_info *ucode) +static inline +int psp_execute_non_psp_fw_load(struct psp_context *psp, + struct amdgpu_firmware_info *ucode) { - int ret = 0; - struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); - - ret = psp_prep_load_ip_fw_cmd_buf(ucode, cmd); - if (!ret) { - ret = psp_cmd_submit_buf(psp, ucode, cmd, -psp->fence_buf_mc_addr); - } - - release_psp_cmd_buf(psp); - - return ret; + return psp_execute_load_ip_fw_cmd_buf(psp->adev, ucode, 0, 0, 0); } static int psp_load_smu_fw(struct psp_context *psp) @@ -2946,19 +2936,6 @@ int psp_rlc_autoload_start(struct psp_context *psp) return ret; } -int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx, - uint64_t cmd_gpu_addr, int cmd_size) -{ - struct amdgpu_firmware_info ucode = {0}; - - ucode.ucode_id = inst_idx ? AMDGPU_UCODE_ID_VCN1_RAM : - AMDGPU_UCODE_ID_VCN0_RAM; - ucode.mc_addr = cmd_gpu_addr; - ucode.ucode_size = cmd_size; - - return psp_execute_non_psp_fw_load(&adev->psp, &ucode); -} - int psp_ring_cmd_submit(struct psp_context *psp, uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index bd324fed6237..e49984a9d570 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -459,8 +459,6 @@ extern int psp_wait_for_spirom_update(struct psp_context *psp, uint32_t reg_inde uint32_t field_val, uint32_t mask, uint32_t msec_timeout); int psp_gpu_reset(struct amdgpu_device *adev); -int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx, - uint64_t cmd_gpu_addr, int cmd_size); int psp_execute_load_ip_fw_cmd_buf(struct amdgpu_device *adev, struct amdgpu_firmware_info *ucode, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index d37ebd4402ef..1805cd042d34 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -1257,3 +1257,12 @@ int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev) return 0; } + +int amdgpu_vcn_psp_update_sram(struct amdgpu_device *adev, int inst_idx) +{ + return psp_execute_load_ip_fw_cmd_buf(adev, NULL, + inst_idx ? AMDGPU_UCODE_ID_VCN1_RAM : AMDGPU_UCODE_ID_VCN0_RAM, + adev->vcn.inst[inst_idx].dpg_sram_gpu_addr, + (uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr - + (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr)); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 92d5534df5f4..3ac5ad91ed08 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -414,4 +414,6 @@ int amdgpu_vcn_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev); +int amdgpu_vcn_psp_update_sram(struct amdgpu_device *adev, int inst_idx); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index c975aed2f6c7..74cd1522067c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -881,9 +881,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); if (indirect) - psp_update_vcn_sram(adev, 0, adev->vcn.inst->dpg_sram_gpu_addr, - (uint32_t)((uintptr_t)adev->vcn.inst->dpg_sram_curr_addr
[PATCH 1/2] drm/amdgpu: extract a PSP function to execute IP FW loading commands
This function is more general and easy to use by more clients. Signed-off-by: Lang Yu --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 29 + drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 6 + 2 files changed, 35 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index a33c155dddcf..a1cb541f315f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -2445,6 +2445,35 @@ static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode, return ret; } +int psp_execute_load_ip_fw_cmd_buf(struct amdgpu_device *adev, + struct amdgpu_firmware_info *ucode, + uint32_t ucode_id, + uint64_t cmd_buf_gpu_addr, + int cmd_buf_size) +{ + struct amdgpu_firmware_info fw_info = { + .ucode_id = ucode_id, + .mc_addr = cmd_buf_gpu_addr, + .ucode_size = cmd_buf_size, + }; + struct psp_context *psp = &adev->psp; + struct psp_gfx_cmd_resp *cmd = + acquire_psp_cmd_buf(psp); + int ret; + + if (!ucode) + ucode = &fw_info; + + ret = psp_prep_load_ip_fw_cmd_buf(ucode, cmd); + if (!ret) + ret = psp_cmd_submit_buf(psp, ucode, cmd, +psp->fence_buf_mc_addr); + + release_psp_cmd_buf(psp); + + return ret; +} + static int psp_execute_non_psp_fw_load(struct psp_context *psp, struct amdgpu_firmware_info *ucode) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 4847aacdf9dc..bd324fed6237 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -462,6 +462,12 @@ int psp_gpu_reset(struct amdgpu_device *adev); int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx, uint64_t cmd_gpu_addr, int cmd_size); +int psp_execute_load_ip_fw_cmd_buf(struct amdgpu_device *adev, + struct amdgpu_firmware_info *ucode, + uint32_t ucode_id, + uint64_t cmd_buf_gpu_addr, + int cmd_buf_size); + int psp_ta_init_shared_buf(struct psp_context *psp, struct ta_mem_context *mem_ctx); void psp_ta_free_shared_buf(struct ta_mem_context *mem_ctx); -- 2.25.1
Re: [PATCH 2/2] drm/amd/pm: avoid unintentional shutdown due to temperature momentary fluctuation
On 6/27/2023 9:02 AM, Quan, Evan wrote: [AMD Official Use Only - General] -Original Message- From: Lazar, Lijo Sent: Monday, June 26, 2023 7:54 PM To: Quan, Evan ; amd-gfx@lists.freedesktop.org Cc: Deucher, Alexander Subject: Re: [PATCH 2/2] drm/amd/pm: avoid unintentional shutdown due to temperature momentary fluctuation On 6/26/2023 1:17 PM, Evan Quan wrote: An intentional delay is added on soft ctf triggered. Then there will be a double check for the GPU temperature before taking further action. This can avoid unintended shutdown due to temperature momentary fluctuation. Signed-off-by: Evan Quan --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 ++ .../gpu/drm/amd/pm/powerplay/amd_powerplay.c | 48 +++ .../drm/amd/pm/powerplay/hwmgr/smu_helper.c | 27 --- drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h | 2 + drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 34 + drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 2 + .../gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c| 9 +--- .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c| 9 +--- 8 files changed, 102 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index e459381dc759..5ef1f31e703c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -287,6 +287,9 @@ extern int amdgpu_user_partt_mode; #define AMDGPU_SMARTSHIFT_MAX_BIAS (100) #define AMDGPU_SMARTSHIFT_MIN_BIAS (-100) +/* Extra time delay(in ms) to eliminate the influence of temperature momentary fluctuation */ +#define AMDGPU_SWCTF_EXTRA_DELAY 50 I think a delay of 10-15ms is good enough to filter out any spike. 50ms is required by our CE team for supporting the customer. It is also aligned with Windows side. Considering we cannot guard that(10-15ms is good), I think it's better to stick to the 50ms recommended setting. How do you think? IMO, a temperature reading consistenly remaining high for 10-15 ms shouldn't be considered a spike since thermal controller (given its clock) would have taken multiple readings by that time for the same sensor. I'm fine if you want to align with Windows side. Thanks, Lijo Evan With that change, the series is Reviewed-by: Lijo Lazar Thanks, Lijo + struct amdgpu_xcp_mgr; struct amdgpu_device; struct amdgpu_irq_src; diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c index 11b7b4cffaae..ff360c699171 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "amd_shared.h" #include "amd_powerplay.h" #include "power_state.h" @@ -91,6 +92,45 @@ static int pp_early_init(void *handle) return 0; } +static void pp_swctf_delayed_work_handler(struct work_struct *work) { + struct pp_hwmgr *hwmgr = + container_of(work, struct pp_hwmgr, swctf_delayed_work.work); + struct amdgpu_device *adev = hwmgr->adev; + struct amdgpu_dpm_thermal *range = + &adev->pm.dpm.thermal; + uint32_t gpu_temperature, size; + int ret; + + /* +* If the hotspot/edge temperature is confirmed as below SW CTF setting point +* after the delay enforced, nothing will be done. +* Otherwise, a graceful shutdown will be performed to prevent further damage. +*/ + if (range->sw_ctf_threshold && + hwmgr->hwmgr_func->read_sensor) { + ret = hwmgr->hwmgr_func->read_sensor(hwmgr, + AMDGPU_PP_SENSOR_HOTSPOT_TEMP, +&gpu_temperature, +&size); + /* +* For some legacy ASICs, hotspot temperature retrieving might be not +* supported. Check the edge temperature instead then. +*/ + if (ret == -EOPNOTSUPP) + ret = hwmgr->hwmgr_func->read_sensor(hwmgr, + AMDGPU_PP_SENSOR_EDGE_TEMP, + &gpu_temperature, +&size); + if (!ret && gpu_temperature / 1000 < range- sw_ctf_threshold) + return; + } + + dev_emerg(adev->dev, "ERROR: GPU over temperature range(SW CTF) detected!\n"); + dev_emerg(adev->dev, "ERROR: System is going to shutdown due to GPU SW CTF!\n"); + orderly_poweroff(true); +} + static int pp_sw_init(void *handle) { struct amdgpu_device *adev = handle; @@ -101,6 +141,10 @@ static int pp_sw_init(void *handle) pr_debug("powerplay sw init %s\n", ret ? "failed" : "successfully"); + if (!ret) + INIT_DELAYED_WORK(&hwmgr->swctf_delayed_work, + pp_swctf_delayed_work_handler); + return ret; } @@ -135,6 +179,8 @@ static int pp_hw_fini(void *handle) struct amdgpu_de
RE: [PATCH] drm/amd/pm: Add GFX v9.4.3 unique id to sysfs
[AMD Official Use Only - General] Reviewed-by: Yang Wang Best Regards, Kevin -Original Message- From: amd-gfx On Behalf Of Lazar, Lijo Sent: Tuesday, June 27, 2023 12:39 PM To: Lazar, Lijo ; amd-gfx@lists.freedesktop.org Cc: Deucher, Alexander ; Ma, Le ; Kasiviswanathan, Harish ; Kamal, Asad ; Zhang, Hawking Subject: RE: [PATCH] drm/amd/pm: Add GFX v9.4.3 unique id to sysfs [AMD Official Use Only - General] [AMD Official Use Only - General] Thanks, Lijo -Original Message- From: amd-gfx On Behalf Of Lijo Lazar Sent: Wednesday, June 21, 2023 6:49 PM To: amd-gfx@lists.freedesktop.org Cc: Deucher, Alexander ; Ma, Le ; Kasiviswanathan, Harish ; Kamal, Asad ; Zhang, Hawking Subject: [PATCH] drm/amd/pm: Add GFX v9.4.3 unique id to sysfs Expose unique id of GFX v9.4.3 ASICs as device attribute. Signed-off-by: Lijo Lazar --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 386ccf11e657..9ec51f50fc52 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2072,6 +2072,7 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ case IP_VERSION(9, 4, 0): case IP_VERSION(9, 4, 1): case IP_VERSION(9, 4, 2): + case IP_VERSION(9, 4, 3): case IP_VERSION(10, 3, 0): case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 1): -- 2.25.1 <>
[PATCH] drm/amdgpu: Keep non-psp path for partition switch
When PSP block is not present, use direct programming. Signed-off-by: Lijo Lazar Acked-by: Mangesh Gadre Tested-by: Mangesh Gadre --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 28 +++-- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 76b189bd244a..9e3b835bdbb2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -623,12 +623,28 @@ static void gfx_v9_4_3_select_me_pipe_q(struct amdgpu_device *adev, static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev, int num_xccs_per_xcp) { - int ret; - - ret = psp_spatial_partition(&adev->psp, NUM_XCC(adev->gfx.xcc_mask) / - num_xccs_per_xcp); - if (ret) - return ret; + int ret, i, num_xcc; + u32 tmp = 0; + + if (adev->psp.funcs) { + ret = psp_spatial_partition(&adev->psp, + NUM_XCC(adev->gfx.xcc_mask) / + num_xccs_per_xcp); + if (ret) + return ret; + } else { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + + for (i = 0; i < num_xcc; i++) { + tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, NUM_XCC_IN_XCP, + num_xccs_per_xcp); + tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, VIRTUAL_XCC_ID, + i % num_xccs_per_xcp); + WREG32_SOC15(GC, GET_INST(GC, i), regCP_HYP_XCP_CTL, +tmp); + } + ret = 0; + } adev->gfx.num_xcc_per_xcp = num_xccs_per_xcp; -- 2.25.1
RE: [PATCH] drm/amdgpu: Keep non-psp path for partition switch
[AMD Official Use Only - General] Reviewed-by: Hawking Zhang Regards, Hawking -Original Message- From: Lazar, Lijo Sent: Tuesday, June 27, 2023 13:19 To: amd-gfx@lists.freedesktop.org Cc: Zhang, Hawking ; Deucher, Alexander ; Gadre, Mangesh Subject: [PATCH] drm/amdgpu: Keep non-psp path for partition switch When PSP block is not present, use direct programming. Signed-off-by: Lijo Lazar Acked-by: Mangesh Gadre Tested-by: Mangesh Gadre --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 28 +++-- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 76b189bd244a..9e3b835bdbb2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -623,12 +623,28 @@ static void gfx_v9_4_3_select_me_pipe_q(struct amdgpu_device *adev, static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev, int num_xccs_per_xcp) { - int ret; - - ret = psp_spatial_partition(&adev->psp, NUM_XCC(adev->gfx.xcc_mask) / - num_xccs_per_xcp); - if (ret) - return ret; + int ret, i, num_xcc; + u32 tmp = 0; + + if (adev->psp.funcs) { + ret = psp_spatial_partition(&adev->psp, + NUM_XCC(adev->gfx.xcc_mask) / + num_xccs_per_xcp); + if (ret) + return ret; + } else { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + + for (i = 0; i < num_xcc; i++) { + tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, NUM_XCC_IN_XCP, + num_xccs_per_xcp); + tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, VIRTUAL_XCC_ID, + i % num_xccs_per_xcp); + WREG32_SOC15(GC, GET_INST(GC, i), regCP_HYP_XCP_CTL, +tmp); + } + ret = 0; + } adev->gfx.num_xcc_per_xcp = num_xccs_per_xcp; -- 2.25.1
RE: [PATCH] drm/amd/pm: Add GFX v9.4.3 unique id to sysfs
[AMD Official Use Only - General] Reviewed-by: Hawking Zhang Regards, Hawking -Original Message- From: Lazar, Lijo Sent: Tuesday, June 27, 2023 12:39 To: Lazar, Lijo ; amd-gfx@lists.freedesktop.org Cc: Deucher, Alexander ; Ma, Le ; Kasiviswanathan, Harish ; Kamal, Asad ; Zhang, Hawking Subject: RE: [PATCH] drm/amd/pm: Add GFX v9.4.3 unique id to sysfs [AMD Official Use Only - General] Thanks, Lijo -Original Message- From: amd-gfx On Behalf Of Lijo Lazar Sent: Wednesday, June 21, 2023 6:49 PM To: amd-gfx@lists.freedesktop.org Cc: Deucher, Alexander ; Ma, Le ; Kasiviswanathan, Harish ; Kamal, Asad ; Zhang, Hawking Subject: [PATCH] drm/amd/pm: Add GFX v9.4.3 unique id to sysfs Expose unique id of GFX v9.4.3 ASICs as device attribute. Signed-off-by: Lijo Lazar --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 386ccf11e657..9ec51f50fc52 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2072,6 +2072,7 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ case IP_VERSION(9, 4, 0): case IP_VERSION(9, 4, 1): case IP_VERSION(9, 4, 2): + case IP_VERSION(9, 4, 3): case IP_VERSION(10, 3, 0): case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 1): -- 2.25.1