There is some probability that reset workqueue is blocked by KIQ I/O for 10+ seconds after gpu hangs. So we need to add a in_reset check during each KIQ register poll.
Signed-off-by: Heng Zhou <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 89fc1015d3a6..b6a88103adfd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1102,6 +1102,9 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_ might_sleep(); while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + if (amdgpu_in_reset(adev)) { + goto failed_kiq_read; + } msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); } @@ -1172,6 +1175,9 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint3 might_sleep(); while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + if (amdgpu_in_reset(adev)) { + goto failed_kiq_write; + } msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); } -- 2.43.0
