[AMD Official Use Only - AMD Internal Distribution Only] this series is Reveiwed-by: Jesse Zhang <[email protected]>
> -----Original Message----- > From: amd-gfx <[email protected]> On Behalf Of Alex > Deucher > Sent: Friday, October 24, 2025 2:45 AM > To: [email protected] > Cc: Deucher, Alexander <[email protected]> > Subject: [PATCH 2/2] drm/amdgpu: move reset debug disable handling > > Move everything to the supported resets masks rather than having an explicit > misc > checks for this. > > Signed-off-by: Alex Deucher <[email protected]> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 8 +++----- > drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 3 --- > drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 3 ++- > drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 6 ++++-- > drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 3 ++- > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 +- > drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 6 ++++-- > drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 8 ++++++-- > drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 3 ++- > drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 6 ++++-- > drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 3 ++- > drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 3 ++- > 12 files changed, 32 insertions(+), 22 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > index 3842a15e2df8a..3d396ab625f33 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > @@ -130,11 +130,9 @@ static enum drm_gpu_sched_stat > amdgpu_job_timedout(struct drm_sched_job *s_job) > } > > /* attempt a per ring reset */ > - if (unlikely(adev->debug_disable_gpu_ring_reset)) { > - dev_err(adev->dev, "Ring reset disabled by debug mask\n"); > - } else if (amdgpu_gpu_recovery && > - amdgpu_ring_is_reset_type_supported(ring, > AMDGPU_RESET_TYPE_PER_QUEUE) && > - ring->funcs->reset) { > + if (amdgpu_gpu_recovery && > + amdgpu_ring_is_reset_type_supported(ring, > AMDGPU_RESET_TYPE_PER_QUEUE) && > + ring->funcs->reset) { > dev_err(adev->dev, "Starting %s ring reset\n", > s_job->sched->name); > ring->in_ring_reset = true; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c > index 43f769fed810e..bf1b90a341d8d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c > @@ -468,9 +468,6 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, > unsigned int vmid, > ktime_t deadline; > bool ret; > > - if (unlikely(ring->adev->debug_disable_soft_recovery)) > - return false; > - > deadline = ktime_add_us(ktime_get(), 10000); > > if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || > !fence) diff > --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > index 5bbd264f8357c..39b8adf23a9fa 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > @@ -4959,7 +4959,8 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block > *ip_block) > amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); > adev->gfx.compute_supported_reset = > amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); > - if (!amdgpu_sriov_vf(adev)) { > + if (!amdgpu_sriov_vf(adev) && > + !adev->debug_disable_gpu_ring_reset) { > adev->gfx.compute_supported_reset |= > AMDGPU_RESET_TYPE_PER_QUEUE; > adev->gfx.gfx_supported_reset |= > AMDGPU_RESET_TYPE_PER_QUEUE; > } > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c > index 6994fb2cbf917..e1785a8984662 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c > @@ -1821,13 +1821,15 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block > *ip_block) > case IP_VERSION(11, 0, 3): > if ((adev->gfx.me_fw_version >= 2280) && > (adev->gfx.mec_fw_version >= 2410) && > - !amdgpu_sriov_vf(adev)) { > + !amdgpu_sriov_vf(adev) && > + !adev->debug_disable_gpu_ring_reset) { > adev->gfx.compute_supported_reset |= > AMDGPU_RESET_TYPE_PER_QUEUE; > adev->gfx.gfx_supported_reset |= > AMDGPU_RESET_TYPE_PER_QUEUE; > } > break; > default: > - if (!amdgpu_sriov_vf(adev)) { > + if (!amdgpu_sriov_vf(adev) && > + !adev->debug_disable_gpu_ring_reset) { > adev->gfx.compute_supported_reset |= > AMDGPU_RESET_TYPE_PER_QUEUE; > adev->gfx.gfx_supported_reset |= > AMDGPU_RESET_TYPE_PER_QUEUE; > } > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c > index 8d6000c7ce26d..b86a40e7c2d3b 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c > @@ -1548,7 +1548,8 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block > *ip_block) > case IP_VERSION(12, 0, 1): > if ((adev->gfx.me_fw_version >= 2660) && > (adev->gfx.mec_fw_version >= 2920) && > - !amdgpu_sriov_vf(adev)) { > + !amdgpu_sriov_vf(adev) && > + !adev->debug_disable_gpu_ring_reset) { > adev->gfx.compute_supported_reset |= > AMDGPU_RESET_TYPE_PER_QUEUE; > adev->gfx.gfx_supported_reset |= > AMDGPU_RESET_TYPE_PER_QUEUE; > } > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > index f1a2efc2a8d0a..0148d7ff34d99 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > @@ -2409,7 +2409,7 @@ static int gfx_v9_0_sw_init(struct amdgpu_ip_block > *ip_block) > amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); > adev->gfx.compute_supported_reset = > amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); > - if (!amdgpu_sriov_vf(adev)) > + if (!amdgpu_sriov_vf(adev) && !adev->debug_disable_gpu_ring_reset) > adev->gfx.compute_supported_reset |= > AMDGPU_RESET_TYPE_PER_QUEUE; > > r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0); diff --git > a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c > index e0b50c690f8cb..c4c551ef6b874 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c > @@ -1149,14 +1149,16 @@ static int gfx_v9_4_3_sw_init(struct amdgpu_ip_block > *ip_block) > case IP_VERSION(9, 4, 3): > case IP_VERSION(9, 4, 4): > if ((adev->gfx.mec_fw_version >= 155) && > - !amdgpu_sriov_vf(adev)) { > + !amdgpu_sriov_vf(adev) && > + !adev->debug_disable_gpu_ring_reset) { > adev->gfx.compute_supported_reset |= > AMDGPU_RESET_TYPE_PER_QUEUE; > adev->gfx.compute_supported_reset |= > AMDGPU_RESET_TYPE_PER_PIPE; > } > break; > case IP_VERSION(9, 5, 0): > if ((adev->gfx.mec_fw_version >= 21) && > - !amdgpu_sriov_vf(adev)) { > + !amdgpu_sriov_vf(adev) && > + !adev->debug_disable_gpu_ring_reset) { > adev->gfx.compute_supported_reset |= > AMDGPU_RESET_TYPE_PER_QUEUE; > adev->gfx.compute_supported_reset |= > AMDGPU_RESET_TYPE_PER_PIPE; > } > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > index b95afb4afd032..5ec8e28980d5f 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > @@ -2361,11 +2361,15 @@ static void sdma_v4_4_2_update_reset_mask(struct > amdgpu_device *adev) > switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { > case IP_VERSION(9, 4, 3): > case IP_VERSION(9, 4, 4): > - if ((adev->gfx.mec_fw_version >= 0xb0) && > amdgpu_dpm_reset_sdma_is_supported(adev)) > + if ((adev->gfx.mec_fw_version >= 0xb0) && > + amdgpu_dpm_reset_sdma_is_supported(adev) && > + !adev->debug_disable_gpu_ring_reset) > adev->sdma.supported_reset |= > AMDGPU_RESET_TYPE_PER_QUEUE; > break; > case IP_VERSION(9, 5, 0): > - if ((adev->gfx.mec_fw_version >= 0xf) && > amdgpu_dpm_reset_sdma_is_supported(adev)) > + if ((adev->gfx.mec_fw_version >= 0xf) && > + amdgpu_dpm_reset_sdma_is_supported(adev) && > + !adev->debug_disable_gpu_ring_reset) > adev->sdma.supported_reset |= > AMDGPU_RESET_TYPE_PER_QUEUE; > break; > default: > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c > b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c > index 7dc67a22a7a01..8ddc4df06a1fd 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c > @@ -1429,7 +1429,8 @@ static int sdma_v5_0_sw_init(struct amdgpu_ip_block > *ip_block) > case IP_VERSION(5, 0, 2): > case IP_VERSION(5, 0, 5): > if ((adev->sdma.instance[0].fw_version >= 35) && > - !amdgpu_sriov_vf(adev)) > + !amdgpu_sriov_vf(adev) && > + !adev->debug_disable_gpu_ring_reset) > adev->sdma.supported_reset |= > AMDGPU_RESET_TYPE_PER_QUEUE; > break; > default: > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c > b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c > index d3b2ac5813383..e163369773adc 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c > @@ -1348,12 +1348,14 @@ static int sdma_v5_2_sw_init(struct amdgpu_ip_block > *ip_block) > case IP_VERSION(5, 2, 3): > case IP_VERSION(5, 2, 4): > if ((adev->sdma.instance[0].fw_version >= 76) && > - !amdgpu_sriov_vf(adev)) > + !amdgpu_sriov_vf(adev) && > + !adev->debug_disable_gpu_ring_reset) > adev->sdma.supported_reset |= > AMDGPU_RESET_TYPE_PER_QUEUE; > break; > case IP_VERSION(5, 2, 5): > if ((adev->sdma.instance[0].fw_version >= 34) && > - !amdgpu_sriov_vf(adev)) > + !amdgpu_sriov_vf(adev) && > + !adev->debug_disable_gpu_ring_reset) > adev->sdma.supported_reset |= > AMDGPU_RESET_TYPE_PER_QUEUE; > break; > default: > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c > b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c > index 032cabd8fa8d0..fbe166a4b9b88 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c > @@ -1356,7 +1356,8 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block > *ip_block) > case IP_VERSION(6, 0, 2): > case IP_VERSION(6, 0, 3): > if ((adev->sdma.instance[0].fw_version >= 21) && > - !amdgpu_sriov_vf(adev)) > + !amdgpu_sriov_vf(adev) && > + !adev->debug_disable_gpu_ring_reset) > adev->sdma.supported_reset |= > AMDGPU_RESET_TYPE_PER_QUEUE; > break; > default: > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c > b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c > index cb5a9daed63ce..007f527d54e7d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c > @@ -1337,7 +1337,8 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block > *ip_block) > > adev->sdma.supported_reset = > amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring); > - if (!amdgpu_sriov_vf(adev)) > + if (!amdgpu_sriov_vf(adev) && > + !adev->debug_disable_gpu_ring_reset) > adev->sdma.supported_reset |= > AMDGPU_RESET_TYPE_PER_QUEUE; > > r = amdgpu_sdma_sysfs_reset_mask_init(adev); > -- > 2.51.0
