On 11/3/25 12:36, Lijo Lazar wrote:
> For a mode-1 reset done at the end of S3 on PSPv11 dGPUs, only check if
> TOS is unloaded.
>
> Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4649
> Fixes : 440cec4ca1c2 ("drm/amdgpu: Wait for bootloader after PSPv11 reset")
>
> Signed-off-by: Lijo Lazar <[email protected]>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 9 +++++++--
> drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 26 ++++++++++++++++++++++++-
> 2 files changed, 32 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index cee90f9e58a9..3f42cf7c6193 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -2627,9 +2627,14 @@ static int amdgpu_pmops_suspend_noirq(struct device
> *dev)
> {
> struct drm_device *drm_dev = dev_get_drvdata(dev);
> struct amdgpu_device *adev = drm_to_adev(drm_dev);
> + int r;
>
> - if (amdgpu_acpi_should_gpu_reset(adev))
> - return amdgpu_asic_reset(adev);
> + if (amdgpu_acpi_should_gpu_reset(adev)) {
> + amdgpu_device_lock_reset_domain(adev->reset_domain);
> + r = amdgpu_asic_reset(adev);
> + amdgpu_device_unlock_reset_domain(adev->reset_domain);
> + return r;
> + }
That chunk here looks reasonable, but it would be even more cleaner if we push
the reset to the reset queue.
Otherwise XGMI might not work any more.
>
> return 0;
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
> b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
> index 64b240b51f1a..a9be7a505026 100644
> --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
> @@ -142,13 +142,37 @@ static int psp_v11_0_init_microcode(struct psp_context
> *psp)
> return err;
> }
>
> -static int psp_v11_0_wait_for_bootloader(struct psp_context *psp)
> +static int psp_v11_wait_for_tos_unload(struct psp_context *psp)
> {
> struct amdgpu_device *adev = psp->adev;
> + uint32_t sol_reg1, sol_reg2;
> + int retry_loop;
>
> + /* Wait for the TOS to be unloaded */
> + for (retry_loop = 0; retry_loop < 20; retry_loop++) {
> + sol_reg1 = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
> + usleep_range(1000, 2000);
> + sol_reg2 = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
> + if (sol_reg1 == sol_reg2)
> + return 0;
> + }
> + dev_err(adev->dev, "TOS unload failed, C2PMSG_33: %x C2PMSG_81: %x",
> + RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_33),
> + RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81));
> +
> + return -ETIME;
> +}
I can't really judge that.
Regards,
Christian.
> +
> +static int psp_v11_0_wait_for_bootloader(struct psp_context *psp)
> +{
> + struct amdgpu_device *adev = psp->adev;
> int ret;
> int retry_loop;
>
> + /* For a reset done at the end of S3, only wait for TOS to be unloaded
> */
> + if (adev->in_s3 && !(adev->flags & AMD_IS_APU) && amdgpu_in_reset(adev))
> + return psp_v11_wait_for_tos_unload(psp);
> +
> for (retry_loop = 0; retry_loop < 20; retry_loop++) {
> /* Wait for bootloader to signify that is
> ready having bit 31 of C2PMSG_35 set to 1 */