[AMD Official Use Only - AMD Internal Distribution Only] > -----Original Message----- > From: Alex Deucher <[email protected]> > Sent: Friday, September 19, 2025 8:54 PM > To: Christian König <[email protected]> > Cc: Deucher, Alexander <[email protected]>; Zhu, James > <[email protected]>; SHANMUGAM, SRINIVASAN > <[email protected]>; [email protected] > Subject: Re: [PATCH 2/2] drm/amdgpu: revert "rework reserved VMID handling" > > On Fri, Sep 19, 2025 at 8:29 AM Christian König > <[email protected]> wrote: > > > > This reverts commit e44a0fe630c58b0a87d8281f5c1077a3479e5fce. > > > > Initially we used VMID reservation to enforce isolation between > > processes. That has now been replaced by proper fence handling. > > > > Both OpenGL, RADV and ROCm developers requested a way to reserve a > > VMID for SPM, so restore that approach by reverting back to only > > allowing a single process to use the reserved VMID. > > > > Only compile tested for now. > > > > Signed-off-by: Christian König <[email protected]> > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 61 > > ++++++++++++++++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h | > > 11 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 17 ++----- > > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 +- > > 4 files changed, 50 insertions(+), 41 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c > > index cbdf108612d2..e35f7525fbff 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c > > @@ -275,13 +275,12 @@ static int amdgpu_vmid_grab_reserved(struct > > amdgpu_vm *vm, { > > struct amdgpu_device *adev = ring->adev; > > unsigned vmhub = ring->vm_hub; > > - struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; > > uint64_t fence_context = adev->fence_context + ring->idx; > > bool needs_flush = vm->use_cpu_for_update; > > uint64_t updates = amdgpu_vm_tlb_seq(vm); > > int r; > > > > - *id = id_mgr->reserved; > > + *id = vm->reserved_vmid[vmhub]; > > if ((*id)->owner != vm->immediate.fence_context || > > !amdgpu_vmid_compatible(*id, job) || > > (*id)->flushed_updates < updates || @@ -474,40 +473,61 @@ > > bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub) > > return vm->reserved_vmid[vmhub]; } > > > > -int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, > > +/* > > + * amdgpu_vmid_alloc_reserved - reserve a specific VMID for this vm > > + * @adev: amdgpu device structure > > + * @vm: the VM to reserve an ID for > > + * @vmhub: the VMHUB which should be used > > + * > > + * Mostly used to have a reserved VMID for debugging and SPM. > > + * > > + * Returns: 0 for success, -EINVAL if an ID is already reserved. > > I think -ENOENT or -ENOMEM make more sense. Other than that, looks good to > me. > Acked-by: Alex Deucher <[email protected]> > > > + */ > > +int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, struct > > +amdgpu_vm *vm, > > unsigned vmhub) { > > struct amdgpu_vmid_mgr *id_mgr = > > &adev->vm_manager.id_mgr[vmhub]; > > + struct amdgpu_vmid *id; > > + int r = 0; > > > > mutex_lock(&id_mgr->lock); > > - > > - ++id_mgr->reserved_use_count; > > - if (!id_mgr->reserved) { > > - struct amdgpu_vmid *id; > > - > > - id = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid, > > - list); > > - /* Remove from normal round robin handling */ > > - list_del_init(&id->list); > > - id_mgr->reserved = id; > > + if (vm->reserved_vmid[vmhub]) > > + goto unlock; > > + if (id_mgr->reserved_vmid) { > > + r = -EINVAL; > > + goto unlock; > > } > > - > > + /* Remove from normal round robin handling */ > > + id = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid, list); > > + list_del_init(&id->list); > > + vm->reserved_vmid[vmhub] = id; > > + id_mgr->reserved_vmid = true; > > mutex_unlock(&id_mgr->lock); > > + > > return 0; > > +unlock: > > + mutex_unlock(&id_mgr->lock); > > + return r; > > } > > > > -void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, > > +/* > > + * amdgpu_vmid_free_reserved - free up a reserved VMID again > > + * @adev: amdgpu device structure > > + * @vm: the VM with the reserved ID > > + * @vmhub: the VMHUB which should be used */ void > > +amdgpu_vmid_free_reserved(struct amdgpu_device *adev, struct > > +amdgpu_vm *vm, > > unsigned vmhub) { > > struct amdgpu_vmid_mgr *id_mgr = > > &adev->vm_manager.id_mgr[vmhub]; > > > > mutex_lock(&id_mgr->lock); > > - if (!--id_mgr->reserved_use_count) { > > - /* give the reserved ID back to normal round robin */ > > - list_add(&id_mgr->reserved->list, &id_mgr->ids_lru); > > - id_mgr->reserved = NULL; > > + if (vm->reserved_vmid[vmhub]) { > > + list_add(&vm->reserved_vmid[vmhub]->list, > > + &id_mgr->ids_lru); > > + vm->reserved_vmid[vmhub] = NULL; > > + id_mgr->reserved_vmid = false; > > } > > - > > mutex_unlock(&id_mgr->lock); > > } > > > > @@ -574,7 +594,6 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device > > *adev) > > > > mutex_init(&id_mgr->lock); > > INIT_LIST_HEAD(&id_mgr->ids_lru); > > - id_mgr->reserved_use_count = 0; > > > > /* for GC <10, SDMA uses MMHUB so use first_kfd_vmid for > > both GC > and MM */ > > if (amdgpu_ip_version(adev, GC_HWIP, 0) < > > IP_VERSION(10, 0, 0)) diff --git > > a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h > > index 240fa6751260..b3649cd3af56 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h > > @@ -67,8 +67,7 @@ struct amdgpu_vmid_mgr { > > unsigned num_ids; > > struct list_head ids_lru; > > struct amdgpu_vmid ids[AMDGPU_NUM_VMID]; > > - struct amdgpu_vmid *reserved; > > - unsigned int reserved_use_count; > > + bool reserved_vmid; > > }; > > > > int amdgpu_pasid_alloc(unsigned int bits); @@ -79,10 +78,10 @@ void > > amdgpu_pasid_free_delayed(struct dma_resv *resv, bool > > amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, > > struct amdgpu_vmid *id); bool > > amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub); > > -int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, > > - unsigned vmhub); > > -void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, > > - unsigned vmhub); > > +int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, struct > amdgpu_vm *vm, > > + unsigned vmhub); void > > +amdgpu_vmid_free_reserved(struct amdgpu_device *adev, struct amdgpu_vm > *vm, > > + unsigned vmhub); > > int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, > > struct amdgpu_job *job, struct dma_fence > > **fence); void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned > > vmhub, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > > index 1f8b43253eea..108d2a838ef0 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > > @@ -2788,10 +2788,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, > struct amdgpu_vm *vm) > > dma_fence_put(vm->last_update); > > > > for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) { > > - if (vm->reserved_vmid[i]) { > > - amdgpu_vmid_free_reserved(adev, i); > > - vm->reserved_vmid[i] = false; > > - } > > + amdgpu_vmid_free_reserved(adev, vm, i); > > } > > > > ttm_lru_bulk_move_fini(&adev->mman.bdev, &vm->lru_bulk_move); > > @@ -2887,6 +2884,7 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void > *data, struct drm_file *filp) > > union drm_amdgpu_vm *args = data; > > struct amdgpu_device *adev = drm_to_adev(dev); > > struct amdgpu_fpriv *fpriv = filp->driver_priv; > > + struct amdgpu_vm *vm = &fpriv->vm; > > > > /* No valid flags defined yet */ > > if (args->in.flags) > > @@ -2895,17 +2893,10 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void > *data, struct drm_file *filp) > > switch (args->in.op) { > > case AMDGPU_VM_OP_RESERVE_VMID: > > /* We only have requirement to reserve vmid from gfxhub */ > > - if (!fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) { > > - amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0)); > > - fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = true; > > - } > > - > > + amdgpu_vmid_alloc_reserved(adev, vm, > > + AMDGPU_GFXHUB(0));
Thanks, Christian, for these patches. Should we need to return the error back to userspace (instead of ignoring it). for both vmid_alloc_reserved & free_reserved? Now isolation is done properly using fences (ordering of jobs) . VMID reservation is now only for SPM/debug, so developers can get a stable VMID for testing and performance checks without mixing it with isolation. With this understanding. Series is: Acked-by: Srinivasan Shanmugam <[email protected]> > > break; > > case AMDGPU_VM_OP_UNRESERVE_VMID: > > - if (fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) { > > - amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(0)); > > - fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = false; > > - } > > + amdgpu_vmid_free_reserved(adev, vm, AMDGPU_GFXHUB(0)); > > break; > > default: > > return -EINVAL; > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > > index 829b400cb8c0..3b9d583358b0 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > > @@ -415,7 +415,7 @@ struct amdgpu_vm { > > struct dma_fence *last_unlocked; > > > > unsigned int pasid; > > - bool reserved_vmid[AMDGPU_MAX_VMHUBS]; > > + struct amdgpu_vmid *reserved_vmid[AMDGPU_MAX_VMHUBS]; > > > > /* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */ > > bool use_cpu_for_update; > > -- > > 2.43.0 > >
