Detect queue reset while a queue-scoped WAIT_EVENT waiter is blocked.

If the selected queue enters the AMDGPU_USERQ_STATE_HUNG state while a
queue-scoped WAIT_EVENT wait is in progress, return -EIO instead of
waiting indefinitely.

The queue lookup uses the same doorbell-index keyed user queue mapping
used by the existing USERQ_EOP path.

This does not change WAIT_EVENT UAPI semantics.

Cc: Alex Deucher <[email protected]>
Cc: Christian König <[email protected]>
Signed-off-by: Srinivasan Shanmugam <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c     | 19 +++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h     |  3 +++
 .../gpu/drm/amd/amdgpu/amdgpu_wait_event.c    | 14 ++++++++++++++
 3 files changed, 36 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 29fd98b7ef50..1fe025785300 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -170,6 +170,25 @@ static void amdgpu_userq_hang_detect_work(struct 
work_struct *work)
        mutex_unlock(&uq_mgr->userq_mutex);
 }
 
+int amdgpu_userq_wait_event_check_queue_reset(struct drm_file *file_priv,
+                                             u32 queue_id)
+{
+       struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
+       struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
+       struct amdgpu_device *adev = uq_mgr->adev;
+       struct amdgpu_usermode_queue *queue;
+
+       rcu_read_lock();
+       queue = xa_load(&adev->userq_doorbell_xa, queue_id);
+       if (queue && queue->state == AMDGPU_USERQ_STATE_HUNG) {
+               rcu_read_unlock();
+               return -EIO;
+       }
+       rcu_read_unlock();
+
+       return 0;
+}
+
 /*
  * Start hang detection for a user queue fence. A delayed work will be 
scheduled
  * to check if the fence is still pending after the timeout period.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
index 725c33ab5c44..4f3e7807e561 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
@@ -159,6 +159,9 @@ void amdgpu_userq_pre_reset(struct amdgpu_device *adev);
 int amdgpu_userq_post_reset(struct amdgpu_device *adev, bool vram_lost);
 void amdgpu_userq_start_hang_detect_work(struct amdgpu_usermode_queue *queue);
 
+int amdgpu_userq_wait_event_check_queue_reset(struct drm_file *file_priv,
+                                             u32 queue_id);
+
 int amdgpu_userq_input_va_validate(struct amdgpu_device *adev,
                                   struct amdgpu_usermode_queue *queue,
                                   u64 addr, u64 expected_size);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_wait_event.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_wait_event.c
index f900a7f5f90e..3bdf5b4e40ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_wait_event.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_wait_event.c
@@ -29,6 +29,9 @@
 #include "amdgpu.h"
 #include "amdgpu_wait_event.h"
 
+int amdgpu_userq_wait_event_check_queue_reset(struct drm_file *file_priv,
+                                             u32 queue_id);
+
 static int amdgpu_wait_event_do_compare(u64 addr, u64 value, u64 mask, u16 op)
 {
        u64 rvalue;
@@ -282,6 +285,13 @@ void amdgpu_wait_event_push_gpu_reset(struct 
amdgpu_wait_event_mgr *mgr,
        amdgpu_wait_event_push_common(mgr, &data);
 }
 
+static int amdgpu_wait_event_check_queue_reset(struct drm_file *file_priv,
+                                              const struct 
drm_amdgpu_wait_event *args)
+{
+       return amdgpu_userq_wait_event_check_queue_reset(file_priv,
+                                                        args->queue_id);
+}
+
 int amdgpu_wait_event_drm_ioctl(struct drm_device *dev, void *data,
                                struct drm_file *file_priv)
 {
@@ -348,6 +358,10 @@ int amdgpu_wait_event_drm_ioctl(struct drm_device *dev, 
void *data,
                if (signal_pending(current))
                        return -ERESTARTSYS;
 
+               ret = amdgpu_wait_event_check_queue_reset(file_priv, args);
+               if (ret)
+                       return ret;
+
                if (!timeout) {
                        rec = amdgpu_wait_event_peek_match(mgr, args);
                        if (rec) {
-- 
2.34.1

Reply via email to