There is some probability that reset workqueue is blocked by KIQ I/O for 10+ 
seconds after gpu hangs.
So we need to add a in_reset check during each KIQ register poll.

Signed-off-by: Heng Zhou <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 89fc1015d3a6..7f02e36ccc1e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -1102,6 +1102,9 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, 
uint32_t reg, uint32_t xcc_
 
        might_sleep();
        while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+               if (amdgpu_in_reset(adev))
+                       goto failed_kiq_read;
+
                msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
                r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
        }
@@ -1171,6 +1174,8 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t 
reg, uint32_t v, uint3
 
        might_sleep();
        while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+               if (amdgpu_in_reset(adev))
+                       goto failed_kiq_write;
 
                msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
                r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
-- 
2.43.0

Reply via email to