On 4/8/26 10:30, Sunil Khatri wrote:
> Signal ioctl does not depend on eviction fence and neither it
> needs to hold userq_mutex as we just wait for userq fences
> to be signalled before we go to suspend state in hardware
> and we prempt and unmap the queues.
>
> This unblocks other thread which hold userq_mutex to go on
> without breaking the code.
>
> Signed-off-by: Sunil Khatri <[email protected]>
> ---
> .../drm/amd/amdgpu/amdgpu_eviction_fence.c | 38 +++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 18 ---------
> 2 files changed, 38 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
> index 5ae477c49a53..53f0bf6b3ca0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
> @@ -54,6 +54,41 @@ static const struct dma_fence_ops
> amdgpu_eviction_fence_ops = {
> .enable_signaling = amdgpu_eviction_fence_enable_signaling,
> };
>
> +static void
> +amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr)
> +{
> + struct amdgpu_usermode_queue *queue;
> + unsigned long queue_id = 0;
> + struct dma_fence *f;
> +
> + for (;;) {
> + xa_lock(&uq_mgr->userq_xa);
> + queue = xa_find(&uq_mgr->userq_xa, &queue_id, ULONG_MAX,
> + XA_PRESENT);
> + if (!queue) {
> + xa_unlock(&uq_mgr->userq_xa);
> + break;
> + }
> +
> + kref_get(&queue->refcount);
> + xa_unlock(&uq_mgr->userq_xa);
> +
> + mutex_lock(&uq_mgr->userq_mutex);
> + f = dma_fence_get(queue->last_fence);
> + mutex_unlock(&uq_mgr->userq_mutex);
> + if (!f) {
> + amdgpu_userq_put(queue);
> + queue_id++;
> + continue;
> + }
> +
> + dma_fence_wait(f, false);
That doesn't looks correct to me. We need to hold the userq_mutex while waiting
for this fence to make sure that userspace doesn't installs a new one.
I've already pointed that out on teams.
Regards,
Christian.
> + dma_fence_put(f);
> + amdgpu_userq_put(queue);
> + queue_id++;
> + }
> +}
> +
> static void
> amdgpu_eviction_fence_suspend_worker(struct work_struct *work)
> {
> @@ -66,6 +101,9 @@ amdgpu_eviction_fence_suspend_worker(struct work_struct
> *work)
> struct dma_fence *ev_fence;
> bool cookie;
>
> + /* Wait for any pending userqueue fence work to finish */
> + amdgpu_userq_wait_for_signal(uq_mgr);
> +
> mutex_lock(&uq_mgr->userq_mutex);
>
> /*
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> index c4e92113b557..1e8c08b63f65 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> @@ -1269,27 +1269,9 @@ void amdgpu_userq_reset_work(struct work_struct *work)
> amdgpu_device_gpu_recover(adev, NULL, &reset_context);
> }
>
> -static void
> -amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr)
> -{
> - struct amdgpu_usermode_queue *queue;
> - unsigned long queue_id;
> -
> - xa_for_each(&uq_mgr->userq_xa, queue_id, queue) {
> - struct dma_fence *f = queue->last_fence;
> -
> - if (!f)
> - continue;
> -
> - dma_fence_wait(f, false);
> - }
> -}
> -
> void
> amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr)
> {
> - /* Wait for any pending userqueue fence work to finish */
> - amdgpu_userq_wait_for_signal(uq_mgr);
> amdgpu_userq_evict_all(uq_mgr);
> }
>