[AMD Official Use Only - AMD Internal Distribution Only]

> -----Original Message-----
> From: Koenig, Christian <[email protected]>
> Sent: Wednesday, September 24, 2025 8:18 PM
> To: Zhang, Jesse(Jie) <[email protected]>; [email protected]
> Cc: Deucher, Alexander <[email protected]>
> Subject: Re: [v6 1/2] drm/amdgpu: Convert amdgpu userqueue management from
> IDR to XArray
>
> On 24.09.25 12:26, Zhang, Jesse(Jie) wrote:
> > [AMD Official Use Only - AMD Internal Distribution Only]
> >
> >> -----Original Message-----
> >> From: Koenig, Christian <[email protected]>
> >> Sent: Wednesday, September 24, 2025 5:29 PM
> >> To: Zhang, Jesse(Jie) <[email protected]>;
> >> [email protected]
> >> Cc: Deucher, Alexander <[email protected]>
> >> Subject: Re: [v6 1/2] drm/amdgpu: Convert amdgpu userqueue management
> >> from IDR to XArray
> >>
> >> On 24.09.25 09:34, Jesse.Zhang wrote:
> >>> This commit refactors the AMDGPU userqueue management subsystem to
> >>> replace IDR (ID Allocation) with XArray for improved performance,
> >>> scalability, and maintainability. The changes address several issues
> >>> with the previous IDR implementation and provide better locking semantics.
> >>>
> >>> Key changes:
> >>>
> >>> 1. **Global XArray Introduction**:
> >>>    - Added `userq_doorbell_xa` to `struct amdgpu_device` for global
> >>> queue
> >> tracking
> >>>    - Uses doorbell_index as key for efficient global lookup
> >>>    - Replaces the previous `userq_mgr_list` linked list approach
> >>>
> >>> 2. **Per-process XArray Conversion**:
> >>>    - Replaced `userq_idr` with `userq_mgr_xa` in `struct amdgpu_userq_mgr`
> >>>    - Maintains per-process queue tracking with queue_id as key
> >>>    - Uses XA_FLAGS_ALLOC for automatic ID allocation
> >>>
> >>> 3. **Locking Improvements**:
> >>>    - Removed global `userq_mutex` from `struct amdgpu_device`
> >>>    - Replaced with fine-grained XArray locking using XArray's
> >>> internal spinlocks
> >>>
> >>> 4. **Runtime Idle Check Optimization**:
> >>>    - Updated `amdgpu_runtime_idle_check_userq()` to use xa_empty
> >>>
> >>> 5. **Queue Management Functions**:
> >>>    - Converted all IDR operations to equivalent XArray functions:
> >>>      - `idr_alloc()` → `xa_alloc()`
> >>>      - `idr_find()` → `xa_load()`
> >>>      - `idr_remove()` → `xa_erase()`
> >>>      - `idr_for_each()` → `xa_for_each()`
> >>>
> >>> Benefits:
> >>> - **Performance**: XArray provides better scalability for large
> >>> numbers of queues
> >>> - **Memory Efficiency**: Reduced memory overhead compared to IDR
> >>> - **Thread Safety**: Improved locking semantics with XArray's
> >>> internal spinlocks
> >>>
> >>> v2: rename userq_global_xa/userq_xa to userq_doorbell_xa/userq_mgr_xa
> >>>     Remove xa_lock and use its own lock.
> >>>
> >>> v3: Set queue->userq_mgr = uq_mgr in amdgpu_userq_create()
> >>>
> >>> Suggested-by: Christian König <[email protected]>
> >>> Signed-off-by: Jesse Zhang <[email protected]>
> >>> ---
> >>>  drivers/gpu/drm/amd/amdgpu/amdgpu.h           |   8 +-
> >>>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    |   3 +-
> >>>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c       |  16 +-
> >>>  drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c     | 143 +++++++++---------
> >>>  drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h     |   8 +-
> >>>  .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c   |   4 +-
> >>>  drivers/gpu/drm/amd/amdgpu/mes_userqueue.c    |  22 ++-
> >>>  7 files changed, 97 insertions(+), 107 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>> index 2a0df4cabb99..ae10832a09f4 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>> @@ -1174,6 +1174,12 @@ struct amdgpu_device {
> >>>      * queue fence.
> >>>      */
> >>>     struct xarray                   userq_xa;
> >>> +   /**
> >>> +    * @userq_doorbell_xa: Global user queue map (doorbell index → queue)
> >>> +    * Key: doorbell_index (unique global identifier for the queue)
> >>> +    * Value: struct amdgpu_usermode_queue
> >>> +    */
> >>> +   struct xarray userq_doorbell_xa;
> >>>
> >>>     /* df */
> >>>     struct amdgpu_df                df;
> >>> @@ -1308,8 +1314,6 @@ struct amdgpu_device {
> >>>      */
> >>>     bool                            apu_prefer_gtt;
> >>>
> >>> -   struct list_head                userq_mgr_list;
> >>> -   struct mutex                    userq_mutex;
> >>>     bool                            userq_halt_for_enforce_isolation;
> >>>     struct amdgpu_uid *uid_info;
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>> index 0fdfde3dcb9f..e066f7ff2a82 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>> @@ -4483,7 +4483,6 @@ int amdgpu_device_init(struct amdgpu_device
> *adev,
> >>>     mutex_init(&adev->gfx.userq_sch_mutex);
> >>>     mutex_init(&adev->gfx.workload_profile_mutex);
> >>>     mutex_init(&adev->vcn.workload_profile_mutex);
> >>> -   mutex_init(&adev->userq_mutex);
> >>>
> >>>     amdgpu_device_init_apu_flags(adev);
> >>>
> >>> @@ -4511,7 +4510,7 @@ int amdgpu_device_init(struct amdgpu_device
> >>> *adev,
> >>>
> >>>     INIT_LIST_HEAD(&adev->pm.od_kobj_list);
> >>>
> >>> -   INIT_LIST_HEAD(&adev->userq_mgr_list);
> >>> +   xa_init(&adev->userq_doorbell_xa);
> >>>
> >>>     INIT_DELAYED_WORK(&adev->delayed_init_work,
> >>>                       amdgpu_device_delayed_init_work_handler);
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >>> index ece251cbe8c3..fdea7cd4b3ce 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >>> @@ -2771,22 +2771,8 @@ static int
> >>> amdgpu_runtime_idle_check_userq(struct
> >> device *dev)
> >>>     struct pci_dev *pdev = to_pci_dev(dev);
> >>>     struct drm_device *drm_dev = pci_get_drvdata(pdev);
> >>>     struct amdgpu_device *adev = drm_to_adev(drm_dev);
> >>> -   struct amdgpu_usermode_queue *queue;
> >>> -   struct amdgpu_userq_mgr *uqm, *tmp;
> >>> -   int queue_id;
> >>> -   int ret = 0;
> >>> -
> >>> -   mutex_lock(&adev->userq_mutex);
> >>> -   list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
> >>> -           idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
> >>> -                   ret = -EBUSY;
> >>> -                   goto done;
> >>> -           }
> >>> -   }
> >>> -done:
> >>> -   mutex_unlock(&adev->userq_mutex);
> >>>
> >>> -   return ret;
> >>> +   return xa_empty(&adev->userq_doorbell_xa) ? 0 : -EBUSY;
> >>>  }
> >>>
> >>>  static int amdgpu_pmops_runtime_suspend(struct device *dev) diff
> >>> --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> >>> index b649ac0cedff..0712b88782c8 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> >>> @@ -180,17 +180,28 @@ amdgpu_userq_cleanup(struct amdgpu_userq_mgr
> >>> *uq_mgr,  {
> >>>     struct amdgpu_device *adev = uq_mgr->adev;
> >>>     const struct amdgpu_userq_funcs *uq_funcs =
> >>> adev->userq_funcs[queue->queue_type];
> >>> +   unsigned long flags;
> >>>
> >>>     uq_funcs->mqd_destroy(uq_mgr, queue);
> >>>     amdgpu_userq_fence_driver_free(queue);
> >>> -   idr_remove(&uq_mgr->userq_idr, queue_id);
> >>> +
> >>> +   /* Use interrupt-safe locking since IRQ handlers may access
> >>> +these XArrays */
> >>
> >> Wait a second where are IRQ handlers accessing those XAs?
> > [Zhang, Jesse(Jie)] gfx_v11_0_eop_irq on gfx11 also accesses these XAs. With
> lockdep enabled, a warning will appear without xa_lock_irqsave.
>
> Yeah, but only the fence_drv xa needs to be irq save.
>
> If the uq_mgr->userq_mgr_xa is used in the irq handler then something seems 
> to be
> wrong here.
>
> > We can replace it with xa_erase_irq, just like you suggested with 
> > xa_store_irq.
>
> Yeah that's certainly a good idea.
>
> >
> >>
> >>> +   xa_lock_irqsave(&uq_mgr->userq_mgr_xa, flags);
> >>> +   __xa_erase(&uq_mgr->userq_mgr_xa, (unsigned long)queue_id);
> >>> +   xa_unlock_irqrestore(&uq_mgr->userq_mgr_xa, flags);
> >>> +
> >>> +   xa_lock_irqsave(&adev->userq_doorbell_xa, flags);
> >>> +   __xa_erase(&adev->userq_doorbell_xa, queue->doorbell_index);
> >>> +   xa_unlock_irqrestore(&adev->userq_doorbell_xa, flags);
> >>> +
> >>> +   queue->userq_mgr = NULL;
> >>>     kfree(queue);
> >>>  }
> >>>
> >>>  static struct amdgpu_usermode_queue *  amdgpu_userq_find(struct
> >>> amdgpu_userq_mgr *uq_mgr, int qid)  {
> >>> -   return idr_find(&uq_mgr->userq_idr, qid);
> >>> +   return xa_load(&uq_mgr->userq_mgr_xa, qid);
> >>>  }
> >>>
> >>>  void
> >>> @@ -462,8 +473,10 @@ amdgpu_userq_create(struct drm_file *filp,
> >>> union
> >> drm_amdgpu_userq *args)
> >>>     struct amdgpu_db_info db_info;
> >>>     char *queue_name;
> >>>     bool skip_map_queue;
> >>> +   unsigned long flags;
> >>> +   u32 qid;
> >>>     uint64_t index;
> >>> -   int qid, r = 0;
> >>> +   int r = 0;
> >>>     int priority =
> >>>             (args->in.flags &
> >> AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK) >>
> >>>
> >>       AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT;
> >>> @@ -486,7 +499,6 @@ amdgpu_userq_create(struct drm_file *filp, union
> >> drm_amdgpu_userq *args)
> >>>      *
> >>>      * This will also make sure we have a valid eviction fence ready to 
> >>> be used.
> >>>      */
> >>> -   mutex_lock(&adev->userq_mutex);
> >>>     amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr,
> >>> &fpriv->evf_mgr);
> >>>
> >>>     uq_funcs = adev->userq_funcs[args->in.ip_type];
> >>> @@ -546,9 +558,17 @@ amdgpu_userq_create(struct drm_file *filp,
> >>> union
> >> drm_amdgpu_userq *args)
> >>>             goto unlock;
> >>>     }
> >>>
> >>> +   xa_lock_irqsave(&adev->userq_doorbell_xa, flags);
> >>> +   r =xa_err(__xa_store(&adev->userq_doorbell_xa, index, queue,
> >> GFP_KERNEL));
> >>> +   if (r) {
> >>> +           kfree(queue);
> >>> +           xa_unlock_irqrestore(&adev->userq_doorbell_xa, flags);
> >>> +           goto unlock;
> >>> +   }
> >>> +   xa_unlock_irqrestore(&adev->userq_doorbell_xa, flags);
> >>
> >> You can use xa_store_irq() here.
> >>
> >>>
> >>> -   qid = idr_alloc(&uq_mgr->userq_idr, queue, 1,
> >> AMDGPU_MAX_USERQ_COUNT, GFP_KERNEL);
> >>> -   if (qid < 0) {
> >>> +   r = xa_alloc(&uq_mgr->userq_mgr_xa, &qid, queue, XA_LIMIT(1,
> >> AMDGPU_MAX_USERQ_COUNT), GFP_KERNEL);
> >>> +   if (r) {
> >>>             drm_file_err(uq_mgr->file, "Failed to allocate a queue id\n");
> >>>             amdgpu_userq_fence_driver_free(queue);
> >>>             uq_funcs->mqd_destroy(uq_mgr, queue); @@ -556,6 +576,7
> >>> @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq
> *args)
> >>>             r = -ENOMEM;
> >>>             goto unlock;
> >>>     }
> >>> +   queue->userq_mgr = uq_mgr;
> >>>
> >>>     /* don't map the queue if scheduling is halted */
> >>>     if (adev->userq_halt_for_enforce_isolation && @@ -568,7 +589,7
> >>> @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq
> *args)
> >>>             r = amdgpu_userq_map_helper(uq_mgr, queue);
> >>>             if (r) {
> >>>                     drm_file_err(uq_mgr->file, "Failed to map Queue\n");
> >>> -                   idr_remove(&uq_mgr->userq_idr, qid);
> >>> +                   xa_erase(&uq_mgr->userq_mgr_xa, qid);
> >>>                     amdgpu_userq_fence_driver_free(queue);
> >>>                     uq_funcs->mqd_destroy(uq_mgr, queue);
> >>>                     kfree(queue);
> >>> @@ -591,7 +612,6 @@ amdgpu_userq_create(struct drm_file *filp, union
> >>> drm_amdgpu_userq *args)
> >>>
> >>>  unlock:
> >>>     mutex_unlock(&uq_mgr->userq_mutex);
> >>> -   mutex_unlock(&adev->userq_mutex);
> >>>
> >>>     return r;
> >>>  }
> >>> @@ -689,11 +709,11 @@ static int
> >>>  amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr)  {
> >>>     struct amdgpu_usermode_queue *queue;
> >>> -   int queue_id;
> >>> +   unsigned long queue_id;
> >>>     int ret = 0, r;
> >>>
> >>>     /* Resume all the queues for this process */
> >>> -   idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
> >>> +   xa_for_each(&uq_mgr->userq_mgr_xa, queue_id, queue) {
> >>>             r = amdgpu_userq_restore_helper(uq_mgr, queue);
> >>>             if (r)
> >>>                     ret = r;
> >>> @@ -846,11 +866,11 @@ static int
> >>>  amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr)  {
> >>>     struct amdgpu_usermode_queue *queue;
> >>> -   int queue_id;
> >>> +   unsigned long queue_id;
> >>>     int ret = 0, r;
> >>>
> >>>     /* Try to unmap all the queues in this process ctx */
> >>> -   idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
> >>> +   xa_for_each(&uq_mgr->userq_mgr_xa, queue_id, queue) {
> >>>             r = amdgpu_userq_preempt_helper(uq_mgr, queue);
> >>>             if (r)
> >>>                     ret = r;
> >>> @@ -865,9 +885,10 @@ static int
> >>>  amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr)  {
> >>>     struct amdgpu_usermode_queue *queue;
> >>> -   int queue_id, ret;
> >>> +   unsigned long queue_id;
> >>> +   int ret;
> >>>
> >>> -   idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
> >>> +   xa_for_each(&uq_mgr->userq_mgr_xa, queue_id, queue) {
> >>>             struct dma_fence *f = queue->last_fence;
> >>>
> >>>             if (!f || dma_fence_is_signaled(f)) @@ -920,44 +941,30
> >>> @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct
> drm_file *f
> >>>                       struct amdgpu_device *adev)  {
> >>>     mutex_init(&userq_mgr->userq_mutex);
> >>> -   idr_init_base(&userq_mgr->userq_idr, 1);
> >>> +   xa_init_flags(&userq_mgr->userq_mgr_xa, XA_FLAGS_ALLOC);
> >>>     userq_mgr->adev = adev;
> >>>     userq_mgr->file = file_priv;
> >>>
> >>> -   mutex_lock(&adev->userq_mutex);
> >>> -   list_add(&userq_mgr->list, &adev->userq_mgr_list);
> >>> -   mutex_unlock(&adev->userq_mutex);
> >>> -
> >>>     INIT_DELAYED_WORK(&userq_mgr->resume_work,
> >> amdgpu_userq_restore_worker);
> >>>     return 0;
> >>>  }
> >>>
> >>>  void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr)  {
> >>> -   struct amdgpu_device *adev = userq_mgr->adev;
> >>>     struct amdgpu_usermode_queue *queue;
> >>> -   struct amdgpu_userq_mgr *uqm, *tmp;
> >>> -   uint32_t queue_id;
> >>> +   unsigned long queue_id;
> >>>
> >>>     cancel_delayed_work_sync(&userq_mgr->resume_work);
> >>>
> >>> -   mutex_lock(&adev->userq_mutex);
> >>>     mutex_lock(&userq_mgr->userq_mutex);
> >>> -   idr_for_each_entry(&userq_mgr->userq_idr, queue, queue_id) {
> >>> +   xa_for_each(&userq_mgr->userq_mgr_xa, queue_id, queue) {
> >>>             amdgpu_userq_wait_for_last_fence(userq_mgr, queue);
> >>>             amdgpu_userq_unmap_helper(userq_mgr, queue);
> >>>             amdgpu_userq_cleanup(userq_mgr, queue, queue_id);
> >>>     }
> >>>
> >>> -   list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
> >>> -           if (uqm == userq_mgr) {
> >>> -                   list_del(&uqm->list);
> >>> -                   break;
> >>> -           }
> >>> -   }
> >>> -   idr_destroy(&userq_mgr->userq_idr);
> >>> +   xa_destroy(&userq_mgr->userq_mgr_xa);
> >>>     mutex_unlock(&userq_mgr->userq_mutex);
> >>> -   mutex_unlock(&adev->userq_mutex);
> >>>     mutex_destroy(&userq_mgr->userq_mutex);
> >>>  }
> >>>
> >>> @@ -965,25 +972,23 @@ int amdgpu_userq_suspend(struct amdgpu_device
> >>> *adev)  {
> >>>     u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
> >>>     struct amdgpu_usermode_queue *queue;
> >>> -   struct amdgpu_userq_mgr *uqm, *tmp;
> >>> -   int queue_id;
> >>> +   struct amdgpu_userq_mgr *uqm;
> >>> +   unsigned long queue_id;
> >>>     int ret = 0, r;
> >>>
> >>>     if (!ip_mask)
> >>>             return 0;
> >>>
> >>> -   mutex_lock(&adev->userq_mutex);
> >>> -   list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
> >>> +   xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
> >>> +           uqm = queue->userq_mgr;
> >>
> >> That function is used during reset, isn't it?
> > [Zhang, Jesse(Jie)]  No, amdgpu_userq_suspend/amdgpu_userq_resume are not
> used during reset.
> > but They are called when executing the suspend command.  I tried the suspend
> case, amdgpu_userq_suspend/amdgpu_userq_resume and no error occurred.
>
> Testing can only disprove a design and not validate it.
>
> During normal suspend/resume this isn't an issue because userspace is 
> completely
> frozen.
>
> But when this is used during reset as well we need to be super careful to not
> access fredd up memory.
>
> Regards,
> Christian.
>
> >> If yes we somehow need to make sure that neither queue nor userq_mgr
> >> is properly initialized and not freed up while the reset is ongoing.
> >>
> >> The easiest way of doing that is to hold the read side of the reset
> >> lock while creating/destroying queues and the manager data structure.
[Zhang, Jesse(Jie)]  thanks Christian, will update the patch, and hold the read 
side of reset lock by down_read(&adev->reset_domain->sem).

Thanks
Jesse
> >>
> >>>             cancel_delayed_work_sync(&uqm->resume_work);
> >>>             mutex_lock(&uqm->userq_mutex);
> >>> -           idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
> >>> -                   r = amdgpu_userq_unmap_helper(uqm, queue);
> >>> -                   if (r)
> >>> -                           ret = r;
> >>> -           }
> >>> +           r = amdgpu_userq_unmap_helper(uqm, queue);
> >>> +           if (r)
> >>> +                   ret = r;
> >>>             mutex_unlock(&uqm->userq_mutex);
> >>>     }
> >>> -   mutex_unlock(&adev->userq_mutex);
> >>> +
> >>>     return ret;
> >>>  }
> >>>
> >>> @@ -991,24 +996,22 @@ int amdgpu_userq_resume(struct amdgpu_device
> >>> *adev)  {
> >>>     u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
> >>>     struct amdgpu_usermode_queue *queue;
> >>> -   struct amdgpu_userq_mgr *uqm, *tmp;
> >>> -   int queue_id;
> >>> +   struct amdgpu_userq_mgr *uqm;
> >>> +   unsigned long queue_id;
> >>>     int ret = 0, r;
> >>>
> >>>     if (!ip_mask)
> >>>             return 0;
> >>>
> >>> -   mutex_lock(&adev->userq_mutex);
> >>> -   list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
> >>> +   xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
> >>> +           uqm = queue->userq_mgr;
> >>
> >> Not sure about those ones here, but the same point that applies to
> >> the reset handling applies here as well.
> >>
> >> Apart from that the patch looks good to me.
> >>
> >> Regards,
> >> Christian.
> >>
> >>>             mutex_lock(&uqm->userq_mutex);
> >>> -           idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
> >>> -                   r = amdgpu_userq_map_helper(uqm, queue);
> >>> -                   if (r)
> >>> -                           ret = r;
> >>> -           }
> >>> +           r = amdgpu_userq_map_helper(uqm, queue);
> >>> +           if (r)
> >>> +                   ret = r;
> >>>             mutex_unlock(&uqm->userq_mutex);
> >>>     }
> >>> -   mutex_unlock(&adev->userq_mutex);
> >>> +
> >>>     return ret;
> >>>  }
> >>>
> >>> @@ -1017,33 +1020,31 @@ int
> >>> amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device
> *adev,  {
> >>>     u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
> >>>     struct amdgpu_usermode_queue *queue;
> >>> -   struct amdgpu_userq_mgr *uqm, *tmp;
> >>> -   int queue_id;
> >>> +   struct amdgpu_userq_mgr *uqm;
> >>> +   unsigned long queue_id;
> >>>     int ret = 0, r;
> >>>
> >>>     /* only need to stop gfx/compute */
> >>>     if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 <<
> >> AMDGPU_HW_IP_COMPUTE))))
> >>>             return 0;
> >>>
> >>> -   mutex_lock(&adev->userq_mutex);
> >>>     if (adev->userq_halt_for_enforce_isolation)
> >>>             dev_warn(adev->dev, "userq scheduling already stopped!\n");
> >>>     adev->userq_halt_for_enforce_isolation = true;
> >>> -   list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
> >>> +   xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
> >>> +           uqm = queue->userq_mgr;
> >>>             cancel_delayed_work_sync(&uqm->resume_work);
> >>>             mutex_lock(&uqm->userq_mutex);
> >>> -           idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
> >>> -                   if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
> >>> -                        (queue->queue_type == AMDGPU_HW_IP_COMPUTE))
> >> &&
> >>> -                       (queue->xcp_id == idx)) {
> >>> -                           r = amdgpu_userq_preempt_helper(uqm, queue);
> >>> -                           if (r)
> >>> -                                   ret = r;
> >>> -                   }
> >>> +           if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
> >>> +                (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
> >>> +               (queue->xcp_id == idx)) {
> >>> +                   r = amdgpu_userq_preempt_helper(uqm, queue);
> >>> +                   if (r)
> >>> +                           ret = r;
> >>>             }
> >>>             mutex_unlock(&uqm->userq_mutex);
> >>>     }
> >>> -   mutex_unlock(&adev->userq_mutex);
> >>> +
> >>>     return ret;
> >>>  }
> >>>
> >>> @@ -1052,21 +1053,20 @@ int
> >>> amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device
> >>> *adev,
> >> {
> >>>     u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
> >>>     struct amdgpu_usermode_queue *queue;
> >>> -   struct amdgpu_userq_mgr *uqm, *tmp;
> >>> -   int queue_id;
> >>> +   struct amdgpu_userq_mgr *uqm;
> >>> +   unsigned long queue_id;
> >>>     int ret = 0, r;
> >>>
> >>>     /* only need to stop gfx/compute */
> >>>     if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 <<
> >> AMDGPU_HW_IP_COMPUTE))))
> >>>             return 0;
> >>>
> >>> -   mutex_lock(&adev->userq_mutex);
> >>>     if (!adev->userq_halt_for_enforce_isolation)
> >>>             dev_warn(adev->dev, "userq scheduling already started!\n");
> >>>     adev->userq_halt_for_enforce_isolation = false;
> >>> -   list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
> >>> +   xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
> >>> +           uqm = queue->userq_mgr;
> >>>             mutex_lock(&uqm->userq_mutex);
> >>> -           idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
> >>>                     if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
> >>>                          (queue->queue_type ==
> >>> AMDGPU_HW_IP_COMPUTE))
> >> &&
> >>>                         (queue->xcp_id == idx)) { @@ -1074,9 +1074,8
> >>> @@ int
> >> amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device
> >> *adev,
> >>>                             if (r)
> >>>                                     ret = r;
> >>>                     }
> >>> -           }
> >>>             mutex_unlock(&uqm->userq_mutex);
> >>>     }
> >>> -   mutex_unlock(&adev->userq_mutex);
> >>> +
> >>>     return ret;
> >>>  }
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> >>> index c027dd916672..c562c8d2b53a 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> >>> @@ -88,11 +88,15 @@ struct amdgpu_userq_funcs {
> >>>
> >>>  /* Usermode queues for gfx */
> >>>  struct amdgpu_userq_mgr {
> >>> -   struct idr                      userq_idr;
> >>> +   /**
> >>> +    * @userq_mgr_xa: Per-process user queue map (queue ID → queue)
> >>> +    * Key: queue_id (unique ID within the process's userq manager)
> >>> +    * Value: struct amdgpu_usermode_queue
> >>> +    */
> >>> +   struct xarray                   userq_mgr_xa;
> >>>     struct mutex                    userq_mutex;
> >>>     struct amdgpu_device            *adev;
> >>>     struct delayed_work             resume_work;
> >>> -   struct list_head                list;
> >>>     struct drm_file                 *file;
> >>>  };
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> >>> index 95e91d1dc58a..cc597fae8971 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> >>> @@ -537,7 +537,7 @@ int amdgpu_userq_signal_ioctl(struct drm_device
> >>> *dev,
> >> void *data,
> >>>     }
> >>>
> >>>     /* Retrieve the user queue */
> >>> -   queue = idr_find(&userq_mgr->userq_idr, args->queue_id);
> >>> +   queue = xa_load(&userq_mgr->userq_mgr_xa, args->queue_id);
> >>>     if (!queue) {
> >>>             r = -ENOENT;
> >>>             goto put_gobj_write;
> >>> @@ -899,7 +899,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device
> >>> *dev,
> >> void *data,
> >>>              */
> >>>             num_fences = dma_fence_dedup_array(fences, num_fences);
> >>>
> >>> -           waitq = idr_find(&userq_mgr->userq_idr, wait_info->waitq_id);
> >>> +           waitq = xa_load(&userq_mgr->userq_mgr_xa,
> >>> + wait_info->waitq_id);
> >>>             if (!waitq) {
> >>>                     r = -EINVAL;
> >>>                     goto free_fences; diff --git
> >>> a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> >>> b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> >>> index 2db9b2c63693..9e4229c79c69 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> >>> @@ -205,9 +205,9 @@ static int mes_userq_detect_and_reset(struct
> >> amdgpu_device *adev,
> >>>     int db_array_size = amdgpu_mes_get_hung_queue_db_array_size(adev);
> >>>     struct mes_detect_and_reset_queue_input input;
> >>>     struct amdgpu_usermode_queue *queue;
> >>> -   struct amdgpu_userq_mgr *uqm, *tmp;
> >>>     unsigned int hung_db_num = 0;
> >>> -   int queue_id, r, i;
> >>> +   unsigned long queue_id;
> >>> +   int r, i;
> >>>     u32 db_array[4];
> >>>
> >>>     if (db_array_size > 4) {
> >>> @@ -227,16 +227,14 @@ static int mes_userq_detect_and_reset(struct
> >> amdgpu_device *adev,
> >>>     if (r) {
> >>>             dev_err(adev->dev, "Failed to detect and reset queues,
> >>> err (%d)\n",
> >> r);
> >>>     } else if (hung_db_num) {
> >>> -           list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, 
> >>> list) {
> >>> -                   idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
> >>> -                           if (queue->queue_type == queue_type) {
> >>> -                                   for (i = 0; i < hung_db_num; i++) {
> >>> -                                           if (queue->doorbell_index ==
> >> db_array[i]) {
> >>> -                                                   queue->state =
> >> AMDGPU_USERQ_STATE_HUNG;
> >>> -                                                   atomic_inc(&adev-
> >>> gpu_reset_counter);
> >>> -
> >>       amdgpu_userq_fence_driver_force_completion(queue);
> >>> -
> >>       drm_dev_wedged_event(adev_to_drm(adev),
> >> DRM_WEDGE_RECOVERY_NONE, NULL);
> >>> -                                           }
> >>> +           xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
> >>> +                   if (queue->queue_type == queue_type) {
> >>> +                           for (i = 0; i < hung_db_num; i++) {
> >>> +                                   if (queue->doorbell_index == 
> >>> db_array[i]) {
> >>> +                                           queue->state =
> >> AMDGPU_USERQ_STATE_HUNG;
> >>> +                                           atomic_inc(&adev-
> >>> gpu_reset_counter);
> >>> +
> >>       amdgpu_userq_fence_driver_force_completion(queue);
> >>> +
> >>       drm_dev_wedged_event(adev_to_drm(adev),
> >>> +DRM_WEDGE_RECOVERY_NONE, NULL);
> >>>                                     }
> >>>                             }
> >>>                     }
> >

Reply via email to