Detect queue reset while a queue-scoped WAIT_EVENT waiter is blocked. If the selected queue enters the AMDGPU_USERQ_STATE_HUNG state while a queue-scoped WAIT_EVENT wait is in progress, return -EIO instead of waiting indefinitely.
The queue lookup uses the same doorbell-index keyed user queue mapping used by the existing USERQ_EOP path. This does not change WAIT_EVENT UAPI semantics. Cc: Alex Deucher <[email protected]> Cc: Christian König <[email protected]> Signed-off-by: Srinivasan Shanmugam <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 19 +++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 3 +++ .../gpu/drm/amd/amdgpu/amdgpu_wait_event.c | 14 ++++++++++++++ 3 files changed, 36 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 29fd98b7ef50..1fe025785300 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -170,6 +170,25 @@ static void amdgpu_userq_hang_detect_work(struct work_struct *work) mutex_unlock(&uq_mgr->userq_mutex); } +int amdgpu_userq_wait_event_check_queue_reset(struct drm_file *file_priv, + u32 queue_id) +{ + struct amdgpu_fpriv *fpriv = file_priv->driver_priv; + struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr; + struct amdgpu_device *adev = uq_mgr->adev; + struct amdgpu_usermode_queue *queue; + + rcu_read_lock(); + queue = xa_load(&adev->userq_doorbell_xa, queue_id); + if (queue && queue->state == AMDGPU_USERQ_STATE_HUNG) { + rcu_read_unlock(); + return -EIO; + } + rcu_read_unlock(); + + return 0; +} + /* * Start hang detection for a user queue fence. A delayed work will be scheduled * to check if the fence is still pending after the timeout period. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h index 725c33ab5c44..4f3e7807e561 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h @@ -159,6 +159,9 @@ void amdgpu_userq_pre_reset(struct amdgpu_device *adev); int amdgpu_userq_post_reset(struct amdgpu_device *adev, bool vram_lost); void amdgpu_userq_start_hang_detect_work(struct amdgpu_usermode_queue *queue); +int amdgpu_userq_wait_event_check_queue_reset(struct drm_file *file_priv, + u32 queue_id); + int amdgpu_userq_input_va_validate(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue, u64 addr, u64 expected_size); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_wait_event.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_wait_event.c index f900a7f5f90e..3bdf5b4e40ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_wait_event.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_wait_event.c @@ -29,6 +29,9 @@ #include "amdgpu.h" #include "amdgpu_wait_event.h" +int amdgpu_userq_wait_event_check_queue_reset(struct drm_file *file_priv, + u32 queue_id); + static int amdgpu_wait_event_do_compare(u64 addr, u64 value, u64 mask, u16 op) { u64 rvalue; @@ -282,6 +285,13 @@ void amdgpu_wait_event_push_gpu_reset(struct amdgpu_wait_event_mgr *mgr, amdgpu_wait_event_push_common(mgr, &data); } +static int amdgpu_wait_event_check_queue_reset(struct drm_file *file_priv, + const struct drm_amdgpu_wait_event *args) +{ + return amdgpu_userq_wait_event_check_queue_reset(file_priv, + args->queue_id); +} + int amdgpu_wait_event_drm_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { @@ -348,6 +358,10 @@ int amdgpu_wait_event_drm_ioctl(struct drm_device *dev, void *data, if (signal_pending(current)) return -ERESTARTSYS; + ret = amdgpu_wait_event_check_queue_reset(file_priv, args); + if (ret) + return ret; + if (!timeout) { rec = amdgpu_wait_event_peek_match(mgr, args); if (rec) { -- 2.34.1
