On 4/8/26 10:30, Sunil Khatri wrote:
> Signal ioctl does not depend on eviction fence and neither it
> needs to hold userq_mutex as we just wait for userq fences
> to be signalled before we go to suspend state in hardware
> and we prempt and unmap the queues.
> 
> This unblocks other thread which hold userq_mutex to go on
> without breaking the code.
> 
> Signed-off-by: Sunil Khatri <[email protected]>
> ---
>  .../drm/amd/amdgpu/amdgpu_eviction_fence.c    | 38 +++++++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c     | 18 ---------
>  2 files changed, 38 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
> index 5ae477c49a53..53f0bf6b3ca0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
> @@ -54,6 +54,41 @@ static const struct dma_fence_ops 
> amdgpu_eviction_fence_ops = {
>       .enable_signaling = amdgpu_eviction_fence_enable_signaling,
>  };
>  
> +static void
> +amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr)
> +{
> +     struct amdgpu_usermode_queue *queue;
> +     unsigned long queue_id = 0;
> +     struct dma_fence *f;
> +
> +     for (;;) {
> +             xa_lock(&uq_mgr->userq_xa);
> +             queue = xa_find(&uq_mgr->userq_xa, &queue_id, ULONG_MAX,
> +                             XA_PRESENT);
> +             if (!queue) {
> +                     xa_unlock(&uq_mgr->userq_xa);
> +                     break;
> +             }
> +
> +             kref_get(&queue->refcount);
> +             xa_unlock(&uq_mgr->userq_xa);
> +
> +             mutex_lock(&uq_mgr->userq_mutex);
> +             f = dma_fence_get(queue->last_fence);
> +             mutex_unlock(&uq_mgr->userq_mutex);
> +             if (!f) {
> +                     amdgpu_userq_put(queue);
> +                     queue_id++;
> +                     continue;
> +             }
> +
> +             dma_fence_wait(f, false);

That doesn't looks correct to me. We need to hold the userq_mutex while waiting 
for this fence to make sure that userspace doesn't installs a new one.

I've already pointed that out on teams.

Regards,
Christian.

> +             dma_fence_put(f);
> +             amdgpu_userq_put(queue);
> +             queue_id++;
> +     }
> +}
> +
>  static void
>  amdgpu_eviction_fence_suspend_worker(struct work_struct *work)
>  {
> @@ -66,6 +101,9 @@ amdgpu_eviction_fence_suspend_worker(struct work_struct 
> *work)
>       struct dma_fence *ev_fence;
>       bool cookie;
>  
> +     /* Wait for any pending userqueue fence work to finish */
> +     amdgpu_userq_wait_for_signal(uq_mgr);
> +
>       mutex_lock(&uq_mgr->userq_mutex);
>  
>       /*
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> index c4e92113b557..1e8c08b63f65 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> @@ -1269,27 +1269,9 @@ void amdgpu_userq_reset_work(struct work_struct *work)
>       amdgpu_device_gpu_recover(adev, NULL, &reset_context);
>  }
>  
> -static void
> -amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr)
> -{
> -     struct amdgpu_usermode_queue *queue;
> -     unsigned long queue_id;
> -
> -     xa_for_each(&uq_mgr->userq_xa, queue_id, queue) {
> -             struct dma_fence *f = queue->last_fence;
> -
> -             if (!f)
> -                     continue;
> -
> -             dma_fence_wait(f, false);
> -     }
> -}
> -
>  void
>  amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr)
>  {
> -     /* Wait for any pending userqueue fence work to finish */
> -     amdgpu_userq_wait_for_signal(uq_mgr);
>       amdgpu_userq_evict_all(uq_mgr);
>  }
>  

Reply via email to