On Fri, Sep 12, 2025 at 2:04 AM Prike Liang <[email protected]> wrote:
>
> When an user unmaps a userq VA, the driver must ensure
> the queue has no in-flight jobs. If there is pending work,
> the kernel should wait for the attached eviction (bookkeeping)
> fence to signal before deleting the mapping.
>
> Suggested-by: Christian König <[email protected]>
> Signed-off-by: Prike Liang <[email protected]>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 29 +++++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 3 +++
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 11 +++++++++
> 3 files changed, 43 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> index a13f186f0a8a..e14dcdcfe36e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> @@ -1182,3 +1182,32 @@ int
> amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
> mutex_unlock(&adev->userq_mutex);
> return ret;
> }
> +
> +int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev,
> + struct amdgpu_bo_va_mapping *mapping,
> + uint64_t saddr)
> +{
> + u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
> + struct amdgpu_bo_va *bo_va = mapping->bo_va;
> + struct dma_resv *resv = bo_va->base.bo->tbo.base.resv;
> + int ret;
> +
> + if (!ip_mask)
> + return 0;
> +
> + dev_warn(adev->dev, "now unmapping a vital queue va:%llx\n", saddr);
dev_warn_once() so we don't spam the logs.
> + /**
> + * The userq VA mapping reservation should include the eviction
> fence, if the eviction fence
> + * can't signal successfully during unmapping, then driver will warn
> to flag this improper unmap
> + * of the userq VA.
> + * Note: The eviction fence may be attached to different BOs, and
> this unmap is only for one kind
> + * of userq VAs, so at this point suppose the eviction fence is
> always unsignaled.
> + */
> + if (!dma_resv_test_signaled(resv, DMA_RESV_USAGE_BOOKKEEP)) {
> + ret = dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP,
> true, MAX_SCHEDULE_TIMEOUT);
> + if (ret <= 0)
> + return -EBUSY;
> + }
> +
> + return 0;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> index 8cd307be7256..c9a41876f10e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> @@ -157,4 +157,7 @@ int amdgpu_userq_buffer_va_put(struct amdgpu_vm *vm, u64
> addr);
> int amdgpu_userq_buffer_vas_put(struct amdgpu_device *adev,
> struct amdgpu_usermode_queue *queue);
> bool amdgpu_userq_buffer_vas_mapped(struct amdgpu_usermode_queue *queue);
> +int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev,
> + struct amdgpu_bo_va_mapping *mapping,
> + uint64_t saddr);
> #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index bd12d8ff15a4..ccde1f040cef 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1941,6 +1941,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
> struct amdgpu_bo_va_mapping *mapping;
> struct amdgpu_vm *vm = bo_va->base.vm;
> bool valid = true;
> + int r;
>
> saddr /= AMDGPU_GPU_PAGE_SIZE;
>
> @@ -1961,6 +1962,16 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
> return -ENOENT;
> }
>
> + /* It's unlikely to happen that the mapping userq hasn't been idled
> + * during user requests GEM unmap IOCTL except for forcing the unmap
> + * from user space.
> + */
> + if (unlikely(atomic_read(&bo_va->userq_va_mapped) > 0)) {
> + r = amdgpu_userq_gem_va_unmap_validate(adev, mapping, saddr);
> + if (unlikely(r == -EBUSY))
> + dev_warn(adev->dev, "Here should be an improper unmap
> request from user space\n");
dev_warn_once().
This looks good to me, but it would be good to get Christian's input as well.
Alex
> + }
> +
> list_del(&mapping->list);
> amdgpu_vm_it_remove(mapping, &vm->va);
> mapping->bo_va = NULL;
> --
> 2.34.1
>