[AMD Official Use Only - AMD Internal Distribution Only]

this series is Reveiwed-by: Jesse Zhang <[email protected]>

> -----Original Message-----
> From: amd-gfx <[email protected]> On Behalf Of Alex
> Deucher
> Sent: Friday, October 24, 2025 2:45 AM
> To: [email protected]
> Cc: Deucher, Alexander <[email protected]>
> Subject: [PATCH 2/2] drm/amdgpu: move reset debug disable handling
>
> Move everything to the supported resets masks rather than having an explicit 
> misc
> checks for this.
>
> Signed-off-by: Alex Deucher <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c  | 8 +++-----
> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 3 ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 3 ++-
>  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c   | 6 ++++--
>  drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c   | 3 ++-
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 2 +-
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c  | 6 ++++--
> drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 8 ++++++--
>  drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c   | 3 ++-
>  drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c   | 6 ++++--
>  drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c   | 3 ++-
>  drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c   | 3 ++-
>  12 files changed, 32 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index 3842a15e2df8a..3d396ab625f33 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -130,11 +130,9 @@ static enum drm_gpu_sched_stat
> amdgpu_job_timedout(struct drm_sched_job *s_job)
>       }
>
>       /* attempt a per ring reset */
> -     if (unlikely(adev->debug_disable_gpu_ring_reset)) {
> -             dev_err(adev->dev, "Ring reset disabled by debug mask\n");
> -     } else if (amdgpu_gpu_recovery &&
> -                amdgpu_ring_is_reset_type_supported(ring,
> AMDGPU_RESET_TYPE_PER_QUEUE) &&
> -                ring->funcs->reset) {
> +     if (amdgpu_gpu_recovery &&
> +         amdgpu_ring_is_reset_type_supported(ring,
> AMDGPU_RESET_TYPE_PER_QUEUE) &&
> +         ring->funcs->reset) {
>               dev_err(adev->dev, "Starting %s ring reset\n",
>                       s_job->sched->name);
>               ring->in_ring_reset = true;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> index 43f769fed810e..bf1b90a341d8d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> @@ -468,9 +468,6 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring,
> unsigned int vmid,
>       ktime_t deadline;
>       bool ret;
>
> -     if (unlikely(ring->adev->debug_disable_soft_recovery))
> -             return false;
> -
>       deadline = ktime_add_us(ktime_get(), 10000);
>
>       if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || 
> !fence) diff
> --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 5bbd264f8357c..39b8adf23a9fa 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -4959,7 +4959,8 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block
> *ip_block)
>               amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
>       adev->gfx.compute_supported_reset =
>               amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
> -     if (!amdgpu_sriov_vf(adev)) {
> +     if (!amdgpu_sriov_vf(adev) &&
> +         !adev->debug_disable_gpu_ring_reset) {
>               adev->gfx.compute_supported_reset |=
> AMDGPU_RESET_TYPE_PER_QUEUE;
>               adev->gfx.gfx_supported_reset |=
> AMDGPU_RESET_TYPE_PER_QUEUE;
>       }
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> index 6994fb2cbf917..e1785a8984662 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> @@ -1821,13 +1821,15 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block
> *ip_block)
>       case IP_VERSION(11, 0, 3):
>               if ((adev->gfx.me_fw_version >= 2280) &&
>                   (adev->gfx.mec_fw_version >= 2410) &&
> -                 !amdgpu_sriov_vf(adev)) {
> +                 !amdgpu_sriov_vf(adev) &&
> +                 !adev->debug_disable_gpu_ring_reset) {
>                       adev->gfx.compute_supported_reset |=
> AMDGPU_RESET_TYPE_PER_QUEUE;
>                       adev->gfx.gfx_supported_reset |=
> AMDGPU_RESET_TYPE_PER_QUEUE;
>               }
>               break;
>       default:
> -             if (!amdgpu_sriov_vf(adev)) {
> +             if (!amdgpu_sriov_vf(adev) &&
> +                 !adev->debug_disable_gpu_ring_reset) {
>                       adev->gfx.compute_supported_reset |=
> AMDGPU_RESET_TYPE_PER_QUEUE;
>                       adev->gfx.gfx_supported_reset |=
> AMDGPU_RESET_TYPE_PER_QUEUE;
>               }
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> index 8d6000c7ce26d..b86a40e7c2d3b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> @@ -1548,7 +1548,8 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block
> *ip_block)
>       case IP_VERSION(12, 0, 1):
>               if ((adev->gfx.me_fw_version >= 2660) &&
>                   (adev->gfx.mec_fw_version >= 2920) &&
> -                 !amdgpu_sriov_vf(adev)) {
> +                 !amdgpu_sriov_vf(adev) &&
> +                 !adev->debug_disable_gpu_ring_reset) {
>                       adev->gfx.compute_supported_reset |=
> AMDGPU_RESET_TYPE_PER_QUEUE;
>                       adev->gfx.gfx_supported_reset |=
> AMDGPU_RESET_TYPE_PER_QUEUE;
>               }
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index f1a2efc2a8d0a..0148d7ff34d99 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -2409,7 +2409,7 @@ static int gfx_v9_0_sw_init(struct amdgpu_ip_block
> *ip_block)
>               amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
>       adev->gfx.compute_supported_reset =
>               amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
> -     if (!amdgpu_sriov_vf(adev))
> +     if (!amdgpu_sriov_vf(adev) && !adev->debug_disable_gpu_ring_reset)
>               adev->gfx.compute_supported_reset |=
> AMDGPU_RESET_TYPE_PER_QUEUE;
>
>       r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0); diff --git
> a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> index e0b50c690f8cb..c4c551ef6b874 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> @@ -1149,14 +1149,16 @@ static int gfx_v9_4_3_sw_init(struct amdgpu_ip_block
> *ip_block)
>       case IP_VERSION(9, 4, 3):
>       case IP_VERSION(9, 4, 4):
>               if ((adev->gfx.mec_fw_version >= 155) &&
> -                 !amdgpu_sriov_vf(adev)) {
> +                 !amdgpu_sriov_vf(adev) &&
> +                 !adev->debug_disable_gpu_ring_reset) {
>                       adev->gfx.compute_supported_reset |=
> AMDGPU_RESET_TYPE_PER_QUEUE;
>                       adev->gfx.compute_supported_reset |=
> AMDGPU_RESET_TYPE_PER_PIPE;
>               }
>               break;
>       case IP_VERSION(9, 5, 0):
>               if ((adev->gfx.mec_fw_version >= 21) &&
> -                 !amdgpu_sriov_vf(adev)) {
> +                 !amdgpu_sriov_vf(adev) &&
> +                 !adev->debug_disable_gpu_ring_reset) {
>                       adev->gfx.compute_supported_reset |=
> AMDGPU_RESET_TYPE_PER_QUEUE;
>                       adev->gfx.compute_supported_reset |=
> AMDGPU_RESET_TYPE_PER_PIPE;
>               }
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> index b95afb4afd032..5ec8e28980d5f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> @@ -2361,11 +2361,15 @@ static void sdma_v4_4_2_update_reset_mask(struct
> amdgpu_device *adev)
>       switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
>       case IP_VERSION(9, 4, 3):
>       case IP_VERSION(9, 4, 4):
> -             if ((adev->gfx.mec_fw_version >= 0xb0) &&
> amdgpu_dpm_reset_sdma_is_supported(adev))
> +             if ((adev->gfx.mec_fw_version >= 0xb0) &&
> +                 amdgpu_dpm_reset_sdma_is_supported(adev) &&
> +                 !adev->debug_disable_gpu_ring_reset)
>                       adev->sdma.supported_reset |=
> AMDGPU_RESET_TYPE_PER_QUEUE;
>               break;
>       case IP_VERSION(9, 5, 0):
> -             if ((adev->gfx.mec_fw_version >= 0xf) &&
> amdgpu_dpm_reset_sdma_is_supported(adev))
> +             if ((adev->gfx.mec_fw_version >= 0xf) &&
> +                 amdgpu_dpm_reset_sdma_is_supported(adev) &&
> +                 !adev->debug_disable_gpu_ring_reset)
>                       adev->sdma.supported_reset |=
> AMDGPU_RESET_TYPE_PER_QUEUE;
>               break;
>       default:
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> index 7dc67a22a7a01..8ddc4df06a1fd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> @@ -1429,7 +1429,8 @@ static int sdma_v5_0_sw_init(struct amdgpu_ip_block
> *ip_block)
>       case IP_VERSION(5, 0, 2):
>       case IP_VERSION(5, 0, 5):
>               if ((adev->sdma.instance[0].fw_version >= 35) &&
> -                 !amdgpu_sriov_vf(adev))
> +                 !amdgpu_sriov_vf(adev) &&
> +                 !adev->debug_disable_gpu_ring_reset)
>                       adev->sdma.supported_reset |=
> AMDGPU_RESET_TYPE_PER_QUEUE;
>               break;
>       default:
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> index d3b2ac5813383..e163369773adc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> @@ -1348,12 +1348,14 @@ static int sdma_v5_2_sw_init(struct amdgpu_ip_block
> *ip_block)
>       case IP_VERSION(5, 2, 3):
>       case IP_VERSION(5, 2, 4):
>               if ((adev->sdma.instance[0].fw_version >= 76) &&
> -                 !amdgpu_sriov_vf(adev))
> +                 !amdgpu_sriov_vf(adev) &&
> +                 !adev->debug_disable_gpu_ring_reset)
>                       adev->sdma.supported_reset |=
> AMDGPU_RESET_TYPE_PER_QUEUE;
>               break;
>       case IP_VERSION(5, 2, 5):
>               if ((adev->sdma.instance[0].fw_version >= 34) &&
> -                 !amdgpu_sriov_vf(adev))
> +                 !amdgpu_sriov_vf(adev) &&
> +                 !adev->debug_disable_gpu_ring_reset)
>                       adev->sdma.supported_reset |=
> AMDGPU_RESET_TYPE_PER_QUEUE;
>               break;
>       default:
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
> index 032cabd8fa8d0..fbe166a4b9b88 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
> @@ -1356,7 +1356,8 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block
> *ip_block)
>       case IP_VERSION(6, 0, 2):
>       case IP_VERSION(6, 0, 3):
>               if ((adev->sdma.instance[0].fw_version >= 21) &&
> -                 !amdgpu_sriov_vf(adev))
> +                 !amdgpu_sriov_vf(adev) &&
> +                 !adev->debug_disable_gpu_ring_reset)
>                       adev->sdma.supported_reset |=
> AMDGPU_RESET_TYPE_PER_QUEUE;
>               break;
>       default:
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
> index cb5a9daed63ce..007f527d54e7d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
> @@ -1337,7 +1337,8 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block
> *ip_block)
>
>       adev->sdma.supported_reset =
>               amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
> -     if (!amdgpu_sriov_vf(adev))
> +     if (!amdgpu_sriov_vf(adev) &&
> +         !adev->debug_disable_gpu_ring_reset)
>               adev->sdma.supported_reset |=
> AMDGPU_RESET_TYPE_PER_QUEUE;
>
>       r = amdgpu_sdma_sysfs_reset_mask_init(adev);
> --
> 2.51.0

Reply via email to