[Public]
Regards,
Prike
> -----Original Message-----
> From: Alex Deucher <[email protected]>
> Sent: Thursday, August 28, 2025 6:13 AM
> To: Liang, Prike <[email protected]>; Koenig, Christian
> <[email protected]>
> Cc: [email protected]; Deucher, Alexander
> <[email protected]>
> Subject: Re: [PATCH v9 08/14] drm/amdgpu: add userq object va track helpers
>
> On Wed, Aug 27, 2025 at 8:07 AM Liang, Prike
> <[email protected]<mailto:[email protected]>> wrote:
> >
> > [Public]
> >
> > Regards,
> > Prike
> >
> > > -----Original Message-----
> > > From: Alex Deucher <[email protected]<mailto:[email protected]>>
> > > Sent: Tuesday, August 26, 2025 11:22 PM
> > > To: Liang, Prike <[email protected]<mailto:[email protected]>>
> > > Cc: [email protected]<mailto:[email protected]>;
> > > Deucher, Alexander
> > > <[email protected]<mailto:[email protected]>>; Koenig,
> > > Christian
> > > <[email protected]<mailto:[email protected]>>
> > > Subject: Re: [PATCH v9 08/14] drm/amdgpu: add userq object va track
> > > helpers
> > >
> > > On Tue, Aug 26, 2025 at 3:47 AM Prike Liang
> > > <[email protected]<mailto:[email protected]>> wrote:
> > > >
> > > > Add the userq object virtual address get(),mapped() and put()
> > > > helpers for tracking the userq obj va address usage.
> > > >
> > > > Signed-off-by: Prike Liang
> > > > <[email protected]<mailto:[email protected]>>
> > > > ---
> > > > drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 172
> > > > ++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> > > > ++++++++++++++++++++| 14
> > > ++
> > > > drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 4 +
> > > > 3 files changed, 189 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> > > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> > > > index 0aeb7a96ccbf..562d12f9d0d2 100644
> > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> > > > @@ -76,6 +76,174 @@ int amdgpu_userq_input_va_validate(struct
> > > > amdgpu_vm
> > > *vm, u64 addr,
> > > > return r;
> > > > }
> > > >
> > > > +int amdgpu_userq_buffer_va_get(struct amdgpu_vm *vm, u64 addr) {
> > > > + struct amdgpu_bo_va_mapping *mapping;
> > > > + u64 user_addr;
> > > > + int r;
> > > > +
> > > > + user_addr = (addr & AMDGPU_GMC_HOLE_MASK) >>
> > > AMDGPU_GPU_PAGE_SHIFT;
> > > > + r = amdgpu_bo_reserve(vm->root.bo, false);
> > > > + if (r)
> > > > + return r;
> > > > +
> > > > + mapping = amdgpu_vm_bo_lookup_mapping(vm, user_addr);
> > > > + if (!mapping)
> > > > + goto out_err;
> > > > +
> > > > + /*
> > > > + * Need to unify the following userq va reference.
> > > > + * mqd bo
> > > > + * rptr bo
> > > > + * wptr bo
> > > > + * eop bo
> > > > + * shadow bo
> > > > + * csa bo
> > > > + */
> > > > + /*amdgpu_bo_ref(mapping->bo_va->base.bo);*/
> > > > + mapping->bo_va->queue_refcount++;
> > > > +
> > > > + amdgpu_bo_unreserve(vm->root.bo);
> > > > + return 0;
> > > > +
> > > > +out_err:
> > > > + amdgpu_bo_unreserve(vm->root.bo);
> > > > + return -EINVAL;
> > > > +}
> > > > +
> > > > +bool amdgpu_userq_buffer_va_mapped(struct amdgpu_vm *vm, u64 addr) {
> > > > + struct amdgpu_bo_va_mapping *mapping;
> > > > + u64 user_addr;
> > > > + bool r;
> > > > +
> > > > + user_addr = (addr & AMDGPU_GMC_HOLE_MASK) >>
> > > > + AMDGPU_GPU_PAGE_SHIFT;
> > > > +
> > > > + if (amdgpu_bo_reserve(vm->root.bo, false))
> > > > + return false;
> > > > +
> > > > + mapping = amdgpu_vm_bo_lookup_mapping(vm, user_addr);
> > > > + if (!IS_ERR_OR_NULL(mapping) && mapping->bo_va-
> >queue_refcount > 0)
> > > > + r = true;
> > > > + else
> > > > + r = false;
> > > > + amdgpu_bo_unreserve(vm->root.bo);
> > > > +
> > > > + return r;
> > > > +}
> > > > +
> > > > +bool amdgpu_userq_buffer_vas_mapped(struct amdgpu_vm *vm,
> > > > + struct amdgpu_usermode_queue *queue) {
> > > > +
> > > > + switch (queue->queue_type) {
> > > > + case AMDGPU_HW_IP_GFX:
> > > > + if (amdgpu_userq_buffer_va_mapped(vm, queue->queue_va)
> > > > ||
> > > > + amdgpu_userq_buffer_va_mapped(vm, queue->rptr_va) ||
> > > > + amdgpu_userq_buffer_va_mapped(vm, queue->wptr_va) ||
> > > > + amdgpu_userq_buffer_va_mapped(vm, queue->shadow_va)
> > > > ||
> > > > + amdgpu_userq_buffer_va_mapped(vm, queue->csa_va))
> > > > + return true;
> > > > + break;
> > > > + case AMDGPU_HW_IP_COMPUTE:
> > > > + if (amdgpu_userq_buffer_va_mapped(vm, queue->queue_va)
> > > > ||
> > > > + amdgpu_userq_buffer_va_mapped(vm, queue->rptr_va) ||
> > > > + amdgpu_userq_buffer_va_mapped(vm, queue->wptr_va) ||
> > > > + amdgpu_userq_buffer_va_mapped(vm, queue->eop_va))
> > > > + return true;
> > > > + break;
> > > > + case AMDGPU_HW_IP_DMA:
> > > > + if (amdgpu_userq_buffer_va_mapped(vm, queue->queue_va)
> > > > ||
> > > > + amdgpu_userq_buffer_va_mapped(vm, queue->rptr_va) ||
> > > > + amdgpu_userq_buffer_va_mapped(vm, queue->wptr_va) ||
> > > > + amdgpu_userq_buffer_va_mapped(vm, queue->csa_va))
> > > > + return true;
> > > > + break;
> > > > + default:
> > > > + break;
> > > > + }
> > > > +
> > > > + return false;
> > > > +}
> > > > +
> > > > +int amdgpu_userq_buffer_va_put(struct amdgpu_vm *vm, u64 addr) {
> > > > + struct amdgpu_bo_va_mapping *mapping;
> > > > + u64 user_addr;
> > > > + int r;
> > > > +
> > > > + user_addr = (addr & AMDGPU_GMC_HOLE_MASK) >>
> > > AMDGPU_GPU_PAGE_SHIFT;
> > > > + r = amdgpu_bo_reserve(vm->root.bo, false);
> > > > + if (r)
> > > > + return r;
> > > > +
> > > > + mapping = amdgpu_vm_bo_lookup_mapping(vm, user_addr);
> > > > + if (!mapping)
> > > > + goto out_err;
> > > > + /*
> > > > + * TODO: It requires figuring out the root cause of userq va
> > > > mapping
> > > > + * reference imbalance issue.
> > > > + */
> > > > + /*amdgpu_bo_unref(&mapping->bo_va->base.bo);*/
> > > > + mapping->bo_va->queue_refcount--;
> > > > +
> > > > + amdgpu_bo_unreserve(vm->root.bo);
> > > > + return 0;
> > > > +
> > > > +out_err:
> > > > + amdgpu_bo_unreserve(vm->root.bo);
> > > > + return -EINVAL;
> > > > +}
> > > > +
> > > > +static void amdgpu_userq_buffer_vas_get(struct amdgpu_vm *vm,
> > > > + struct amdgpu_usermode_queue *queue) {
> > > > +
> > > > +
> > > > + amdgpu_userq_buffer_va_get(vm, queue->queue_va);
> > > > + amdgpu_userq_buffer_va_get(vm, queue->rptr_va);
> > > > + amdgpu_userq_buffer_va_get(vm, queue->wptr_va);
> > > > +
> > > > + switch (queue->queue_type) {
> > > > + case AMDGPU_HW_IP_GFX:
> > > > + amdgpu_userq_buffer_va_get(vm, queue->shadow_va);
> > > > + amdgpu_userq_buffer_va_get(vm, queue->csa_va);
> > > > + break;
> > > > + case AMDGPU_HW_IP_COMPUTE:
> > > > + amdgpu_userq_buffer_va_get(vm, queue->eop_va);
> > > > + break;
> > > > + case AMDGPU_HW_IP_DMA:
> > > > + amdgpu_userq_buffer_va_get(vm, queue->csa_va);
> > > > + break;
> > > > + default:
> > > > + break;
> > > > + }
> > > > +}
> > > > +
> > > > +int amdgpu_userq_buffer_vas_put(struct amdgpu_vm *vm,
> > > > + struct amdgpu_usermode_queue *queue) {
> > > > + amdgpu_userq_buffer_va_put(vm, queue->queue_va);
> > > > + amdgpu_userq_buffer_va_put(vm, queue->rptr_va);
> > > > + amdgpu_userq_buffer_va_put(vm, queue->wptr_va);
> > > > +
> > > > + switch (queue->queue_type) {
> > > > + case AMDGPU_HW_IP_GFX:
> > > > + amdgpu_userq_buffer_va_put(vm, queue->shadow_va);
> > > > + amdgpu_userq_buffer_va_put(vm, queue->csa_va);
> > > > + break;
> > > > + case AMDGPU_HW_IP_COMPUTE:
> > > > + amdgpu_userq_buffer_va_put(vm, queue->eop_va);
> > > > + break;
> > > > + case AMDGPU_HW_IP_DMA:
> > > > + amdgpu_userq_buffer_va_put(vm, queue->csa_va);
> > > > + break;
> > > > + default:
> > > > + break;
> > > > + }
> > > > + return 0;
> > > > +}
> > > > +
> > > > static int
> > > > amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr,
> > > > struct amdgpu_usermode_queue *queue) @@
> > > > -445,6 +613,9 @@ amdgpu_userq_create(struct drm_file *filp, union
> > > drm_amdgpu_userq *args)
> > > > queue->vm = &fpriv->vm;
> > > > queue->priority = priority;
> > > > queue->generation = amdgpu_vm_generation(adev,
> > > > &fpriv->vm);
> > > > + queue->queue_va = args->in.queue_va;
> > > > + queue->rptr_va = args->in.rptr_va;
> > > > + queue->wptr_va = args->in.wptr_va;
> > > >
> > > > db_info.queue_type = queue->queue_type;
> > > > db_info.doorbell_handle = queue->doorbell_handle; @@
> > > > -475,7
> > > > +646,6 @@ amdgpu_userq_create(struct drm_file *filp, union
> > > > +drm_amdgpu_userq
> > > *args)
> > > > goto unlock;
> > > > }
> > > >
> > > > -
> > > > qid = idr_alloc(&uq_mgr->userq_idr, queue, 1,
> > > AMDGPU_MAX_USERQ_COUNT, GFP_KERNEL);
> > > > if (qid < 0) {
> > > > drm_file_err(uq_mgr->file, "Failed to allocate a
> > > > queue id\n"); diff --git
> > > > a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> > > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> > > > index 0eb2a9c2e340..30067f80eadf 100644
> > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> > > > @@ -54,6 +54,13 @@ struct amdgpu_usermode_queue {
> > > > enum amdgpu_userq_state state;
> > > > uint64_t doorbell_handle;
> > > > uint64_t doorbell_index;
> > > > + uint64_t queue_va;
> > > > + uint64_t rptr_va;
> > > > + uint64_t wptr_va;
> > > > + uint64_t eop_va;
> > > > + uint64_t shadow_va;
> > > > + uint64_t csa_va;
> > >
> > > Just store a list of critical virtual addresses. Otherwise we are
> > > going to have a ton of IP specific things in here. For each critical
> > > address, just
> push the address on the list.
> > > Then in the VM unmap code, just walk the list for each queue and if
> > > the user tries to umap a critical buffer, preempt the queue and set an
> > > error on it.
> > Is enough to track the queue_va for validating the userq VA unmap operation?
> > I proposed a similar solution for retrieving the userq by walking over VA
> > list, but
> Christian rejected this for the overhead cause.
> >
>
> I think queue creation and unmap are the only cases that we care about. We
> don't
> want to create a queue with an invalid addresses and we don't want to unmap a
> virtual address that is critical for the queue before the queue is destroyed.
>
> I think all we need to do in the unmap case is to walk the critical VA lists
> for each
> user queue associated with the VM and check it.
> @Christian Koenig did you have something else in mind?
• Limiting tracking to a subset of user-queue VAs would lower overhead.
Would tracking only queue_va, rptr_va, and wptr_va be sufficient to validate
user-queue VA unmaps?
• However, partial tracking risks missing invalid unmap events on
untracked VAs.
• Although patch #14 can detect invalid user-queue VA unmaps by checking
bo_va->queue_refcount, but if we only look up a partial VA list, we may still
fail to identify the specific queue whose VA was improperly unmapped, and
therefore we cannot preempt and mark that queue as invalid. Thought?
Thanks,
Prike
> Thanks,
>
> Alex
>
> > > Alex
> > >
> > > > +
> > > > uint64_t flags;
> > > > struct amdgpu_mqd_prop *userq_prop;
> > > > struct amdgpu_userq_mgr *userq_mgr; @@ -137,4 +144,11 @@
> > > > int amdgpu_userq_start_sched_for_enforce_isolation(struct
> > > > amdgpu_device *adev,
> > > >
> > > > int amdgpu_userq_input_va_validate(struct amdgpu_vm *vm, u64 addr,
> > > > u64 expected_size);
> > > > +int amdgpu_userq_buffer_va_get(struct amdgpu_vm *vm, u64 addr);
> > > > +bool amdgpu_userq_buffer_va_mapped(struct amdgpu_vm *vm, u64
> > > > +addr); bool amdgpu_userq_buffer_vas_mapped(struct amdgpu_vm *vm,
> > > > + struct amdgpu_usermode_queue *queue); int
> > > > +amdgpu_userq_buffer_va_put(struct amdgpu_vm *vm, u64 addr); int
> > > > +amdgpu_userq_buffer_vas_put(struct amdgpu_vm *vm,
> > > > + struct amdgpu_usermode_queue *queue);
> > > > #endif
> > > > diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> > > > b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> > > > index 6e29e85bbf9f..42d6cd90be59 100644
> > > > --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> > > > +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> > > > @@ -262,6 +262,7 @@ static int mes_userq_mqd_create(struct
> > > amdgpu_userq_mgr *uq_mgr,
> > > > userq_props->hqd_active = false;
> > > > userq_props->tmz_queue =
> > > > mqd_user->flags &
> > > > AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE;
> > > > + queue->eop_va = compute_mqd->eop_va;
> > > > kfree(compute_mqd);
> > > > } else if (queue->queue_type == AMDGPU_HW_IP_GFX) {
> > > > struct drm_amdgpu_userq_mqd_gfx11 *mqd_gfx_v11; @@
> > > > -283,6 +284,8 @@ static int mes_userq_mqd_create(struct
> > > > amdgpu_userq_mgr
> > > *uq_mgr,
> > > > userq_props->csa_addr = mqd_gfx_v11->csa_va;
> > > > userq_props->tmz_queue =
> > > > mqd_user->flags &
> > > > AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE;
> > > > + queue->shadow_va = mqd_gfx_v11->shadow_va;
> > > > + queue->csa_va = mqd_gfx_v11->csa_va;
> > > >
> > > > if (amdgpu_userq_input_va_validate(queue->vm,
> > > >mqd_gfx_v11- shadow_va,
> > > > shadow_info.shadow_size))
> > > >{ @@
> > > > -314,6 +317,7 @@ static int mes_userq_mqd_create(struct
> > > >amdgpu_userq_mgr
> > > *uq_mgr,
> > > > }
> > > >
> > > > userq_props->csa_addr = mqd_sdma_v11->csa_va;
> > > > + queue->csa_va = mqd_sdma_v11->csa_va;
> > > > kfree(mqd_sdma_v11);
> > > > }
> > > >
> > > > --
> > > > 2.34.1
> > > >