On 11/3/25 12:36, Lijo Lazar wrote:
> For a mode-1 reset done at the end of S3 on PSPv11 dGPUs, only check if
> TOS is unloaded.
> 
> Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4649
> Fixes : 440cec4ca1c2 ("drm/amdgpu: Wait for bootloader after PSPv11 reset")
> 
> Signed-off-by: Lijo Lazar <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  9 +++++++--
>  drivers/gpu/drm/amd/amdgpu/psp_v11_0.c  | 26 ++++++++++++++++++++++++-
>  2 files changed, 32 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index cee90f9e58a9..3f42cf7c6193 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -2627,9 +2627,14 @@ static int amdgpu_pmops_suspend_noirq(struct device 
> *dev)
>  {
>       struct drm_device *drm_dev = dev_get_drvdata(dev);
>       struct amdgpu_device *adev = drm_to_adev(drm_dev);
> +     int r;
>  
> -     if (amdgpu_acpi_should_gpu_reset(adev))
> -             return amdgpu_asic_reset(adev);
> +     if (amdgpu_acpi_should_gpu_reset(adev)) {
> +             amdgpu_device_lock_reset_domain(adev->reset_domain);
> +             r = amdgpu_asic_reset(adev);
> +             amdgpu_device_unlock_reset_domain(adev->reset_domain);
> +             return r;
> +     }

That chunk here looks reasonable, but it would be even more cleaner if we push 
the reset to the reset queue.

Otherwise XGMI might not work any more.

>  
>       return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c 
> b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
> index 64b240b51f1a..a9be7a505026 100644
> --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
> @@ -142,13 +142,37 @@ static int psp_v11_0_init_microcode(struct psp_context 
> *psp)
>       return err;
>  }
>  
> -static int psp_v11_0_wait_for_bootloader(struct psp_context *psp)
> +static int psp_v11_wait_for_tos_unload(struct psp_context *psp)
>  {
>       struct amdgpu_device *adev = psp->adev;
> +     uint32_t sol_reg1, sol_reg2;
> +     int retry_loop;
>  
> +     /* Wait for the TOS to be unloaded */
> +     for (retry_loop = 0; retry_loop < 20; retry_loop++) {
> +             sol_reg1 = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
> +             usleep_range(1000, 2000);
> +             sol_reg2 = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
> +             if (sol_reg1 == sol_reg2)
> +                     return 0;
> +     }
> +     dev_err(adev->dev, "TOS unload failed, C2PMSG_33: %x C2PMSG_81: %x",
> +             RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_33),
> +             RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81));
> +
> +     return -ETIME;
> +}

I can't really judge that.

Regards,
Christian.

> +
> +static int psp_v11_0_wait_for_bootloader(struct psp_context *psp)
> +{
> +     struct amdgpu_device *adev = psp->adev;
>       int ret;
>       int retry_loop;
>  
> +     /* For a reset done at the end of S3, only wait for TOS to be unloaded 
> */
> +     if (adev->in_s3 && !(adev->flags & AMD_IS_APU) && amdgpu_in_reset(adev))
> +             return psp_v11_wait_for_tos_unload(psp);
> +
>       for (retry_loop = 0; retry_loop < 20; retry_loop++) {
>               /* Wait for bootloader to signify that is
>                   ready having bit 31 of C2PMSG_35 set to 1 */

Reply via email to