Thread 1: Running amdgpu_userq_destroy which eventually remove
the queue from door bell and set userq_mgr = NULL.

Thread2: An interrupt might have scheduled the hang_detect_work
which still need userq_mgr to be valid but could get an NULL
ptrs.

To fix that make sure we cancel the hang_detect_work again before
setting userq_mgr to NULL.

Signed-off-by: Sunil Khatri <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index c8f7bb23e2c3..57136b80d62d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -427,7 +427,6 @@ static void amdgpu_userq_cleanup(struct 
amdgpu_usermode_queue *queue)
        xa_erase_irq(&adev->userq_doorbell_xa, queue->doorbell_index);
        amdgpu_userq_fence_driver_free(queue);
        queue->fence_drv = NULL;
-       queue->userq_mgr = NULL;
        list_del(&queue->userq_va_list);
 
        up_read(&adev->reset_domain->sem);
@@ -635,6 +634,10 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, 
struct amdgpu_usermode_que
        amdgpu_userq_cleanup(queue);
        mutex_unlock(&uq_mgr->userq_mutex);
 
+       /* This is case an interrupt was fired and a hang detection work is 
pending */
+       cancel_delayed_work_sync(&queue->hang_detect_work);
+       queue->userq_mgr = NULL;
+
        amdgpu_bo_reserve(queue->db_obj.obj, true);
        amdgpu_bo_unpin(queue->db_obj.obj);
        amdgpu_bo_unreserve(queue->db_obj.obj);
-- 
2.34.1

Reply via email to