Re: [PATCH 2/5] drm/amdgpu/gfx11: enable per-pipe reset support for compute queues

2026-04-03 Thread Alex Deucher
On Fri, Apr 3, 2026 at 5:34 AM Jesse Zhang  wrote:
>
> Previously, the per-pipe reset sequence was incorrect, leading to unreliable
> recovery and potential firmware hangs. The reset logic has now been fixed
> to properly handle HQD cleanup while the pipe is held in reset before
> bringing it out of reset.
>
> Signed-off-by: Jesse Zhang 
> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 9 -
>  1 file changed, 8 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> index 18b92990179d..43a89816f794 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> @@ -1851,6 +1851,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block 
> *ip_block)
> !adev->debug_disable_gpu_ring_reset) {
> adev->gfx.compute_supported_reset |= 
> AMDGPU_RESET_TYPE_PER_QUEUE;
> adev->gfx.gfx_supported_reset |= 
> AMDGPU_RESET_TYPE_PER_QUEUE;
> +   adev->gfx.compute_supported_reset |= 
> AMDGPU_RESET_TYPE_PER_PIPE;
> }
> break;
> default:
> @@ -1858,6 +1859,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block 
> *ip_block)
> !adev->debug_disable_gpu_ring_reset) {
> adev->gfx.compute_supported_reset |= 
> AMDGPU_RESET_TYPE_PER_QUEUE;
> adev->gfx.gfx_supported_reset |= 
> AMDGPU_RESET_TYPE_PER_QUEUE;
> +   adev->gfx.compute_supported_reset |= 
> AMDGPU_RESET_TYPE_PER_PIPE;
> }
> break;
> }
> @@ -6906,6 +6908,11 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring 
> *ring,
> return amdgpu_ring_reset_helper_end(ring, timedout_fence);
>  }
>
> +static bool gfx_v11_compute_pipe_reset_support(struct amdgpu_device *adev)
> +{
> +   return !!(adev->gfx.compute_supported_reset & 
> AMDGPU_RESET_TYPE_PER_PIPE);
> +}

I think you can replace both gfx_v11_compute_pipe_reset_support() and
gfx_v11_pipe_reset_support() with
amdgpu_ring_is_reset_type_supported(ring, AMDGPU_RESET_TYPE_PER_PIPE).

Alex

> +
>  /*
>   * With MEC pipe reset asserted, clear CP_HQD_ACTIVE / 
> CP_HQD_DEQUEUE_REQUEST for
>   * every queue on (me, pipe). HQDs must be torn down while pipe reset stays
> @@ -6941,7 +6948,7 @@ static int gfx_v11_0_reset_compute_pipe(struct 
> amdgpu_ring *ring)
> uint32_t reset_val, clean_val;
> int r;
>
> -   if (!gfx_v11_pipe_reset_support(adev))
> +   if (!gfx_v11_compute_pipe_reset_support(adev))
> return -EOPNOTSUPP;
>
> gfx_v11_0_set_safe_mode(adev, 0);
> --
> 2.49.0
>


[PATCH 2/5] drm/amdgpu/gfx11: enable per-pipe reset support for compute queues

2026-04-03 Thread Jesse Zhang
Previously, the per-pipe reset sequence was incorrect, leading to unreliable
recovery and potential firmware hangs. The reset logic has now been fixed
to properly handle HQD cleanup while the pipe is held in reset before
bringing it out of reset.

Signed-off-by: Jesse Zhang 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 18b92990179d..43a89816f794 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -1851,6 +1851,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block 
*ip_block)
!adev->debug_disable_gpu_ring_reset) {
adev->gfx.compute_supported_reset |= 
AMDGPU_RESET_TYPE_PER_QUEUE;
adev->gfx.gfx_supported_reset |= 
AMDGPU_RESET_TYPE_PER_QUEUE;
+   adev->gfx.compute_supported_reset |= 
AMDGPU_RESET_TYPE_PER_PIPE;
}
break;
default:
@@ -1858,6 +1859,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block 
*ip_block)
!adev->debug_disable_gpu_ring_reset) {
adev->gfx.compute_supported_reset |= 
AMDGPU_RESET_TYPE_PER_QUEUE;
adev->gfx.gfx_supported_reset |= 
AMDGPU_RESET_TYPE_PER_QUEUE;
+   adev->gfx.compute_supported_reset |= 
AMDGPU_RESET_TYPE_PER_PIPE;
}
break;
}
@@ -6906,6 +6908,11 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring,
return amdgpu_ring_reset_helper_end(ring, timedout_fence);
 }
 
+static bool gfx_v11_compute_pipe_reset_support(struct amdgpu_device *adev)
+{
+   return !!(adev->gfx.compute_supported_reset & 
AMDGPU_RESET_TYPE_PER_PIPE);
+}
+
 /*
  * With MEC pipe reset asserted, clear CP_HQD_ACTIVE / CP_HQD_DEQUEUE_REQUEST 
for
  * every queue on (me, pipe). HQDs must be torn down while pipe reset stays
@@ -6941,7 +6948,7 @@ static int gfx_v11_0_reset_compute_pipe(struct 
amdgpu_ring *ring)
uint32_t reset_val, clean_val;
int r;
 
-   if (!gfx_v11_pipe_reset_support(adev))
+   if (!gfx_v11_compute_pipe_reset_support(adev))
return -EOPNOTSUPP;
 
gfx_v11_0_set_safe_mode(adev, 0);
-- 
2.49.0