amdgpu: revert "rework reserved VMID handling"

Alex Deucher Fri, 19 Sep 2025 08:24:14 -0700

On Fri, Sep 19, 2025 at 8:29 AM Christian König
<[email protected]> wrote:
>
> This reverts commit e44a0fe630c58b0a87d8281f5c1077a3479e5fce.
>
> Initially we used VMID reservation to enforce isolation between
> processes. That has now been replaced by proper fence handling.
>
> Both OpenGL, RADV and ROCm developers requested a way to reserve a VMID
> for SPM, so restore that approach by reverting back to only allowing a
> single process to use the reserved VMID.
>
> Only compile tested for now.
>
> Signed-off-by: Christian König <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 61 ++++++++++++++++---------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h | 11 ++---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 17 ++-----
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  2 +-
>  4 files changed, 50 insertions(+), 41 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> index cbdf108612d2..e35f7525fbff 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> @@ -275,13 +275,12 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm 
> *vm,
>  {
>         struct amdgpu_device *adev = ring->adev;
>         unsigned vmhub = ring->vm_hub;
> -       struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
>         uint64_t fence_context = adev->fence_context + ring->idx;
>         bool needs_flush = vm->use_cpu_for_update;
>         uint64_t updates = amdgpu_vm_tlb_seq(vm);
>         int r;
>
> -       *id = id_mgr->reserved;
> +       *id = vm->reserved_vmid[vmhub];
>         if ((*id)->owner != vm->immediate.fence_context ||
>             !amdgpu_vmid_compatible(*id, job) ||
>             (*id)->flushed_updates < updates ||
> @@ -474,40 +473,61 @@ bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, 
> unsigned int vmhub)
>         return vm->reserved_vmid[vmhub];
>  }
>
> -int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
> +/*
> + * amdgpu_vmid_alloc_reserved - reserve a specific VMID for this vm
> + * @adev: amdgpu device structure
> + * @vm: the VM to reserve an ID for
> + * @vmhub: the VMHUB which should be used
> + *
> + * Mostly used to have a reserved VMID for debugging and SPM.
> + *
> + * Returns: 0 for success, -EINVAL if an ID is already reserved.


I think -ENOENT or -ENOMEM make more sense.  Other than that, looks good to me.
Acked-by: Alex Deucher <[email protected]>

> + */
> +int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, struct amdgpu_vm 
> *vm,
>                                unsigned vmhub)
>  {
>         struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
> +       struct amdgpu_vmid *id;
> +       int r = 0;
>
>         mutex_lock(&id_mgr->lock);
> -
> -       ++id_mgr->reserved_use_count;
> -       if (!id_mgr->reserved) {
> -               struct amdgpu_vmid *id;
> -
> -               id = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid,
> -                                     list);
> -               /* Remove from normal round robin handling */
> -               list_del_init(&id->list);
> -               id_mgr->reserved = id;
> +       if (vm->reserved_vmid[vmhub])
> +               goto unlock;
> +       if (id_mgr->reserved_vmid) {
> +               r = -EINVAL;
> +               goto unlock;
>         }
> -
> +       /* Remove from normal round robin handling */
> +       id = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid, list);
> +       list_del_init(&id->list);
> +       vm->reserved_vmid[vmhub] = id;
> +       id_mgr->reserved_vmid = true;
>         mutex_unlock(&id_mgr->lock);
> +
>         return 0;
> +unlock:
> +       mutex_unlock(&id_mgr->lock);
> +       return r;
>  }
>
> -void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
> +/*
> + * amdgpu_vmid_free_reserved - free up a reserved VMID again
> + * @adev: amdgpu device structure
> + * @vm: the VM with the reserved ID
> + * @vmhub: the VMHUB which should be used
> + */
> +void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, struct amdgpu_vm 
> *vm,
>                                unsigned vmhub)
>  {
>         struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
>
>         mutex_lock(&id_mgr->lock);
> -       if (!--id_mgr->reserved_use_count) {
> -               /* give the reserved ID back to normal round robin */
> -               list_add(&id_mgr->reserved->list, &id_mgr->ids_lru);
> -               id_mgr->reserved = NULL;
> +       if (vm->reserved_vmid[vmhub]) {
> +               list_add(&vm->reserved_vmid[vmhub]->list,
> +                       &id_mgr->ids_lru);
> +               vm->reserved_vmid[vmhub] = NULL;
> +               id_mgr->reserved_vmid = false;
>         }
> -
>         mutex_unlock(&id_mgr->lock);
>  }
>
> @@ -574,7 +594,6 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
>
>                 mutex_init(&id_mgr->lock);
>                 INIT_LIST_HEAD(&id_mgr->ids_lru);
> -               id_mgr->reserved_use_count = 0;
>
>                 /* for GC <10, SDMA uses MMHUB so use first_kfd_vmid for both 
> GC and MM */
>                 if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 
> 0))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
> index 240fa6751260..b3649cd3af56 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
> @@ -67,8 +67,7 @@ struct amdgpu_vmid_mgr {
>         unsigned                num_ids;
>         struct list_head        ids_lru;
>         struct amdgpu_vmid      ids[AMDGPU_NUM_VMID];
> -       struct amdgpu_vmid      *reserved;
> -       unsigned int            reserved_use_count;
> +       bool                    reserved_vmid;
>  };
>
>  int amdgpu_pasid_alloc(unsigned int bits);
> @@ -79,10 +78,10 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv,
>  bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
>                                struct amdgpu_vmid *id);
>  bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub);
> -int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
> -                               unsigned vmhub);
> -void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
> -                               unsigned vmhub);
> +int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, struct amdgpu_vm 
> *vm,
> +                              unsigned vmhub);
> +void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, struct amdgpu_vm 
> *vm,
> +                              unsigned vmhub);
>  int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
>                      struct amdgpu_job *job, struct dma_fence **fence);
>  void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 1f8b43253eea..108d2a838ef0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -2788,10 +2788,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct 
> amdgpu_vm *vm)
>         dma_fence_put(vm->last_update);
>
>         for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) {
> -               if (vm->reserved_vmid[i]) {
> -                       amdgpu_vmid_free_reserved(adev, i);
> -                       vm->reserved_vmid[i] = false;
> -               }
> +               amdgpu_vmid_free_reserved(adev, vm, i);
>         }
>
>         ttm_lru_bulk_move_fini(&adev->mman.bdev, &vm->lru_bulk_move);
> @@ -2887,6 +2884,7 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, 
> struct drm_file *filp)
>         union drm_amdgpu_vm *args = data;
>         struct amdgpu_device *adev = drm_to_adev(dev);
>         struct amdgpu_fpriv *fpriv = filp->driver_priv;
> +       struct amdgpu_vm *vm = &fpriv->vm;
>
>         /* No valid flags defined yet */
>         if (args->in.flags)
> @@ -2895,17 +2893,10 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void 
> *data, struct drm_file *filp)
>         switch (args->in.op) {
>         case AMDGPU_VM_OP_RESERVE_VMID:
>                 /* We only have requirement to reserve vmid from gfxhub */
> -               if (!fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) {
> -                       amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0));
> -                       fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = true;
> -               }
> -
> +               amdgpu_vmid_alloc_reserved(adev, vm, AMDGPU_GFXHUB(0));
>                 break;
>         case AMDGPU_VM_OP_UNRESERVE_VMID:
> -               if (fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) {
> -                       amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(0));
> -                       fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = false;
> -               }
> +               amdgpu_vmid_free_reserved(adev, vm, AMDGPU_GFXHUB(0));
>                 break;
>         default:
>                 return -EINVAL;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 829b400cb8c0..3b9d583358b0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -415,7 +415,7 @@ struct amdgpu_vm {
>         struct dma_fence        *last_unlocked;
>
>         unsigned int            pasid;
> -       bool                    reserved_vmid[AMDGPU_MAX_VMHUBS];
> +       struct amdgpu_vmid      *reserved_vmid[AMDGPU_MAX_VMHUBS];
>
>         /* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */
>         bool                                    use_cpu_for_update;
> --
> 2.43.0
>

Re: [PATCH 2/2] drm/amdgpu: revert "rework reserved VMID handling"

Reply via email to