After a per-queue reset via MES, verify that the queue is functional by performing a ring test. If the test fails, fall back to a pipe reset to ensure proper recovery.
Signed-off-by: Jesse Zhang <[email protected]> --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 4db6b4e398e5..bec99d149845 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -7044,13 +7044,16 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; + int reset_mode = AMDGPU_RESET_TYPE_PER_QUEUE; int r = 0; amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true, 0); +pipe_reset: if (r) { dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r); + reset_mode = AMDGPU_RESET_TYPE_PER_PIPE; amdgpu_gfx_mec_pre_pipe_reset(adev, ring); r = gfx_v11_0_reset_compute_pipe(ring); if (r) { @@ -7071,6 +7074,13 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, return r; } + if (reset_mode == AMDGPU_RESET_TYPE_PER_QUEUE) { + if (amdgpu_ring_reset_helper_end(ring, timedout_fence)) + goto pipe_reset; + else + return 0; + } + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } -- 2.49.0
