Signed-off-by: Sunil Khatri <[email protected]>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 16 ++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 1 +
2 files changed, 17 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 0a1fc45f5b4e..1440f51b667f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -109,6 +109,19 @@ static void amdgpu_userq_mgr_reset_work(struct work_struct
*work)
if (!amdgpu_gpu_recovery)
return;
+ /*
+ * Prevent concurrent/duplicate reset executions. Both hang_detect_work
+ * (direct call) and evict_all (via schedule+flush_work) can invoke this
+ * function simultaneously. Use an atomic test-and-set so only the first
+ * caller proceeds; the second exits early.
+ *
+ * Note: amdgpu_in_reset() cannot be used here because in_gpu_reset is
+ * only set deep inside amdgpu_device_gpu_recover(), well after we've
+ * already entered this function.
+ */
+ if (atomic_cmpxchg(&uq_mgr->reset_in_progress, 0, 1) != 0)
+ return;
+
/*
* Iterate through all queue types to detect and reset problematic
queues
* Process each queue type in the defined order
@@ -145,6 +158,8 @@ static void amdgpu_userq_mgr_reset_work(struct work_struct
*work)
amdgpu_device_gpu_recover(adev, NULL, &reset_context);
}
+
+ atomic_set(&uq_mgr->reset_in_progress, 0);
}
static void amdgpu_userq_hang_detect_work(struct work_struct *work)
@@ -1304,6 +1319,7 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr
*userq_mgr, struct drm_file *f
INIT_DELAYED_WORK(&userq_mgr->resume_work, amdgpu_userq_restore_worker);
INIT_WORK(&userq_mgr->reset_work, amdgpu_userq_mgr_reset_work);
+ atomic_set(&userq_mgr->reset_in_progress, 0);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
index 49b33e2d6932..2748ecc0f6c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
@@ -129,6 +129,7 @@ struct amdgpu_userq_mgr {
* Reset work which is used when eviction fails.
*/
struct work_struct reset_work;
+ atomic_t reset_in_progress;
atomic_t userq_count[AMDGPU_RING_TYPE_MAX];
};