On Fri, Sep 19, 2025 at 8:29 AM Christian König <[email protected]> wrote: > > This reverts commit e44a0fe630c58b0a87d8281f5c1077a3479e5fce. > > Initially we used VMID reservation to enforce isolation between > processes. That has now been replaced by proper fence handling. > > Both OpenGL, RADV and ROCm developers requested a way to reserve a VMID > for SPM, so restore that approach by reverting back to only allowing a > single process to use the reserved VMID. > > Only compile tested for now. > > Signed-off-by: Christian König <[email protected]> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 61 ++++++++++++++++--------- > drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h | 11 ++--- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 17 ++----- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 +- > 4 files changed, 50 insertions(+), 41 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c > index cbdf108612d2..e35f7525fbff 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c > @@ -275,13 +275,12 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm > *vm, > { > struct amdgpu_device *adev = ring->adev; > unsigned vmhub = ring->vm_hub; > - struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; > uint64_t fence_context = adev->fence_context + ring->idx; > bool needs_flush = vm->use_cpu_for_update; > uint64_t updates = amdgpu_vm_tlb_seq(vm); > int r; > > - *id = id_mgr->reserved; > + *id = vm->reserved_vmid[vmhub]; > if ((*id)->owner != vm->immediate.fence_context || > !amdgpu_vmid_compatible(*id, job) || > (*id)->flushed_updates < updates || > @@ -474,40 +473,61 @@ bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, > unsigned int vmhub) > return vm->reserved_vmid[vmhub]; > } > > -int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, > +/* > + * amdgpu_vmid_alloc_reserved - reserve a specific VMID for this vm > + * @adev: amdgpu device structure > + * @vm: the VM to reserve an ID for > + * @vmhub: the VMHUB which should be used > + * > + * Mostly used to have a reserved VMID for debugging and SPM. > + * > + * Returns: 0 for success, -EINVAL if an ID is already reserved.
I think -ENOENT or -ENOMEM make more sense. Other than that, looks good to me. Acked-by: Alex Deucher <[email protected]> > + */ > +int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, struct amdgpu_vm > *vm, > unsigned vmhub) > { > struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; > + struct amdgpu_vmid *id; > + int r = 0; > > mutex_lock(&id_mgr->lock); > - > - ++id_mgr->reserved_use_count; > - if (!id_mgr->reserved) { > - struct amdgpu_vmid *id; > - > - id = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid, > - list); > - /* Remove from normal round robin handling */ > - list_del_init(&id->list); > - id_mgr->reserved = id; > + if (vm->reserved_vmid[vmhub]) > + goto unlock; > + if (id_mgr->reserved_vmid) { > + r = -EINVAL; > + goto unlock; > } > - > + /* Remove from normal round robin handling */ > + id = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid, list); > + list_del_init(&id->list); > + vm->reserved_vmid[vmhub] = id; > + id_mgr->reserved_vmid = true; > mutex_unlock(&id_mgr->lock); > + > return 0; > +unlock: > + mutex_unlock(&id_mgr->lock); > + return r; > } > > -void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, > +/* > + * amdgpu_vmid_free_reserved - free up a reserved VMID again > + * @adev: amdgpu device structure > + * @vm: the VM with the reserved ID > + * @vmhub: the VMHUB which should be used > + */ > +void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, struct amdgpu_vm > *vm, > unsigned vmhub) > { > struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; > > mutex_lock(&id_mgr->lock); > - if (!--id_mgr->reserved_use_count) { > - /* give the reserved ID back to normal round robin */ > - list_add(&id_mgr->reserved->list, &id_mgr->ids_lru); > - id_mgr->reserved = NULL; > + if (vm->reserved_vmid[vmhub]) { > + list_add(&vm->reserved_vmid[vmhub]->list, > + &id_mgr->ids_lru); > + vm->reserved_vmid[vmhub] = NULL; > + id_mgr->reserved_vmid = false; > } > - > mutex_unlock(&id_mgr->lock); > } > > @@ -574,7 +594,6 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev) > > mutex_init(&id_mgr->lock); > INIT_LIST_HEAD(&id_mgr->ids_lru); > - id_mgr->reserved_use_count = 0; > > /* for GC <10, SDMA uses MMHUB so use first_kfd_vmid for both > GC and MM */ > if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, > 0)) > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h > index 240fa6751260..b3649cd3af56 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h > @@ -67,8 +67,7 @@ struct amdgpu_vmid_mgr { > unsigned num_ids; > struct list_head ids_lru; > struct amdgpu_vmid ids[AMDGPU_NUM_VMID]; > - struct amdgpu_vmid *reserved; > - unsigned int reserved_use_count; > + bool reserved_vmid; > }; > > int amdgpu_pasid_alloc(unsigned int bits); > @@ -79,10 +78,10 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv, > bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, > struct amdgpu_vmid *id); > bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub); > -int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, > - unsigned vmhub); > -void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, > - unsigned vmhub); > +int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, struct amdgpu_vm > *vm, > + unsigned vmhub); > +void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, struct amdgpu_vm > *vm, > + unsigned vmhub); > int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, > struct amdgpu_job *job, struct dma_fence **fence); > void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub, > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > index 1f8b43253eea..108d2a838ef0 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > @@ -2788,10 +2788,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct > amdgpu_vm *vm) > dma_fence_put(vm->last_update); > > for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) { > - if (vm->reserved_vmid[i]) { > - amdgpu_vmid_free_reserved(adev, i); > - vm->reserved_vmid[i] = false; > - } > + amdgpu_vmid_free_reserved(adev, vm, i); > } > > ttm_lru_bulk_move_fini(&adev->mman.bdev, &vm->lru_bulk_move); > @@ -2887,6 +2884,7 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, > struct drm_file *filp) > union drm_amdgpu_vm *args = data; > struct amdgpu_device *adev = drm_to_adev(dev); > struct amdgpu_fpriv *fpriv = filp->driver_priv; > + struct amdgpu_vm *vm = &fpriv->vm; > > /* No valid flags defined yet */ > if (args->in.flags) > @@ -2895,17 +2893,10 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void > *data, struct drm_file *filp) > switch (args->in.op) { > case AMDGPU_VM_OP_RESERVE_VMID: > /* We only have requirement to reserve vmid from gfxhub */ > - if (!fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) { > - amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0)); > - fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = true; > - } > - > + amdgpu_vmid_alloc_reserved(adev, vm, AMDGPU_GFXHUB(0)); > break; > case AMDGPU_VM_OP_UNRESERVE_VMID: > - if (fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) { > - amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(0)); > - fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = false; > - } > + amdgpu_vmid_free_reserved(adev, vm, AMDGPU_GFXHUB(0)); > break; > default: > return -EINVAL; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > index 829b400cb8c0..3b9d583358b0 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > @@ -415,7 +415,7 @@ struct amdgpu_vm { > struct dma_fence *last_unlocked; > > unsigned int pasid; > - bool reserved_vmid[AMDGPU_MAX_VMHUBS]; > + struct amdgpu_vmid *reserved_vmid[AMDGPU_MAX_VMHUBS]; > > /* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */ > bool use_cpu_for_update; > -- > 2.43.0 >
