amdgpu: Add queue update IOCTL support

Zhang, Jesse(Jie) Fri, 23 Jan 2026 01:58:47 -0800

[AMD Official Use Only - AMD Internal Distribution Only]

> -----Original Message-----
> From: Alex Deucher <[email protected]>
> Sent: Friday, January 23, 2026 4:33 AM
> To: Zhang, Jesse(Jie) <[email protected]>
> Cc: [email protected]; Deucher, Alexander
> <[email protected]>; Koenig, Christian <[email protected]>
> Subject: Re: [PATCH 9/9] drm/amdgpu: Add queue update IOCTL support
>
> On Thu, Jan 22, 2026 at 4:37 AM Jesse.Zhang <[email protected]> wrote:
> >
> > Add AMDGPU_USERQ_UPDATE_QUEUE IOCTL to support updating user
> mode
> > queue properties after creation. This allows userspace to modify queue
> > attributes like ring buffer address, ring size, queue priority, and
> > queue percentage dynamically.
> >
> > Signed-off-by: Jesse Zhang <[email protected]>
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu.h        |   3 +
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    |   1 +
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c  | 106 +++++++++++++++++++-
> -
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h  |   6 ++
> >  drivers/gpu/drm/amd/amdgpu/mes_userqueue.c |   1 +
> >  include/uapi/drm/amdgpu_drm.h              |  13 +++
> >  6 files changed, 122 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > index 246d74205b48..1679075f679b 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > @@ -813,6 +813,9 @@ struct amdgpu_mqd_prop {
> >         uint32_t cu_mask_count;
> >         uint32_t cu_flags;
> >         bool is_user_cu_masked;
> > +       uint32_t queue_percentage;
> > +       /* used in gfx9 and gfx12.1 */
> > +       uint32_t pm4_target_xcc;
> >  };
> >
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > index 9c425169a4f9..bc6cc1517221 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > @@ -3075,6 +3075,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
> >         DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl,
> DRM_AUTH|DRM_RENDER_ALLOW),
> >         DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR,
> amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> >         DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl,
> > DRM_AUTH|DRM_RENDER_ALLOW),
> > +       DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_UPDATE_QUEUE,
> > + amdgpu_update_queue_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
>
> I don't think we need a new IOCTL.  Just a new op for AMDGPU_USERQ.
> E.g., AMDGPU_USERQ_OP_MODIFY
  Yes, will fix it.
  Maybe we can add a new op for CU mask  too.
Like AMDGPU_USERQ_OP_CU_MASK.


Thanks
Jesse
>
> >         DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SET_CU_MASK,
> amdgpu_userq_set_cu_mask_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> >         DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL,
> amdgpu_userq_signal_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> >         DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT,
> amdgpu_userq_wait_ioctl,
> > DRM_AUTH|DRM_RENDER_ALLOW), diff --git
> > a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> > index 4cbf75723c08..922f73b92db3 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> > @@ -34,6 +34,26 @@
> >  #include "amdgpu_hmm.h"
> >  #include "amdgpu_userq_fence.h"
> >
> > +/* Mapping queue priority to pipe priority, indexed by queue priority
> > +*/ int amdgpu_userq_pipe_priority_map[] = {
> > +       AMDGPU_RING_PRIO_0,
> > +       AMDGPU_RING_PRIO_0,
> > +       AMDGPU_RING_PRIO_0,
> > +       AMDGPU_RING_PRIO_0,
> > +       AMDGPU_RING_PRIO_0,
> > +       AMDGPU_RING_PRIO_0,
> > +       AMDGPU_RING_PRIO_0,
> > +       AMDGPU_RING_PRIO_1,
> > +       AMDGPU_RING_PRIO_1,
> > +       AMDGPU_RING_PRIO_1,
> > +       AMDGPU_RING_PRIO_1,
> > +       AMDGPU_RING_PRIO_2,
> > +       AMDGPU_RING_PRIO_2,
> > +       AMDGPU_RING_PRIO_2,
> > +       AMDGPU_RING_PRIO_2,
> > +       AMDGPU_RING_PRIO_2
> > +};
> > +
> >  u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev)  {
> >         int i;
> > @@ -907,7 +927,6 @@ static int amdgpu_userq_update_queue(struct
> amdgpu_usermode_queue *queue,
> >         struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
> >         struct amdgpu_device *adev = uq_mgr->adev;
> >         const struct amdgpu_userq_funcs *uq_funcs;
> > -       bool unmap_queue = false;
> >         int r;
> >
> >         uq_funcs = adev->userq_funcs[queue->queue_type];
> > @@ -923,23 +942,94 @@ static int amdgpu_userq_update_queue(struct
> amdgpu_usermode_queue *queue,
> >                 r = amdgpu_userq_unmap_helper(queue);
> >                 if (r)
> >                         return r;
> > -               unmap_queue = true;
> >         }
> >
> >         r = uq_funcs->mqd_update(queue, minfo);
> > +       if (r)
> > +               return r;
> >
> > -       if (unmap_queue) {
> > -               int map_r = amdgpu_userq_map_helper(queue);
> > -               if (map_r)
> > -                       dev_err(adev->dev, "Failed to remap queue %llu 
> > after update\n",
> > +       /*
> > +        * If the queue is considered active (has valid size, address, and
> percentage),
> > +        * we attempt to map it. This effectively starts the queue or 
> > restarts it
> > +        * if it was previously running.
> > +        */
> > +       if (AMDGPU_USERQ_IS_ACTIVE(queue)) {
> > +               r = amdgpu_userq_map_helper(queue);
> > +               if (r)
> > +                       drm_file_err(uq_mgr->file, "Failed to remap
> > + queue %llu after update\n",
> >                                 queue->doorbell_index);
> > -               if (!r)
> > -                       r = map_r;
> >         }
> >
> >         return r;
> >  }
> >
> > +int amdgpu_update_queue_ioctl(struct drm_device *dev, void *data,
> > +                             struct drm_file *filp) {
> > +       struct amdgpu_fpriv *fpriv = filp->driver_priv;
> > +       struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
> > +       struct amdgpu_userq_update_queue_args *args = data;
> > +       struct amdgpu_usermode_queue *queue;
> > +       struct amdgpu_mqd_prop *props;
> > +       int r;
> > +
> > +       /*
> > +        * Repurpose queue percentage to accommodate new features:
> > +        * bit 0-7: queue percentage
> > +        * bit 8-15: pm4_target_xcc
> > +        */
>
> Just make the target xcc an explicit parameter.
>
> > +       if ((args->queue_percentage & 0xFF) >
> AMDGPU_USERQ_MAX_QUEUE_PERCENTAGE) {
> > +               drm_file_err(uq_mgr->file, "Queue percentage must be 
> > between 0 to
> AMDGPU_USERQ_MAX_QUEUE_PERCENTAGE\n");
> > +               return -EINVAL;
> > +       }
> > +
> > +       /* Validate priority */
> > +       if (args->queue_priority >
> AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM) {
> > +               drm_file_err(uq_mgr->file, "Queue priority must be between 
> > 0 to
> KFD_MAX_QUEUE_PRIORITY\n");
> > +               return -EINVAL;
> > +       }
> > +
> > +       /* Validate ring size */
> > +       if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
> > +               drm_file_err(uq_mgr->file, "Ring size must be a power of 2 
> > or 0\n");
> > +               return -EINVAL;
> > +       }
> > +
> > +       if (args->ring_size > 0 && args->ring_size < AMDGPU_GPU_PAGE_SIZE)
> {
> > +               args->ring_size = AMDGPU_GPU_PAGE_SIZE;
> > +               drm_file_err(uq_mgr->file, "Size clamped to
> AMDGPU_GPU_PAGE_SIZE\n");
> > +       }
> > +
> > +       if ((args->ring_base_address) &&
> > +               (!access_ok((const void __user *) args->ring_base_address,
> > +                       sizeof(uint64_t)))) {
> > +               drm_file_err(uq_mgr->file, "Can't access ring base 
> > address\n");
> > +               return -EFAULT;
> > +       }
> > +
> > +       mutex_lock(&uq_mgr->userq_mutex);
> > +       queue = amdgpu_userq_find(uq_mgr, args->queue_id);
> > +       if (!queue) {
> > +               mutex_unlock(&uq_mgr->userq_mutex);
> > +               return -EINVAL;
> > +       }
> > +
> > +       props = queue->userq_prop;
> > +       props->queue_size = args->ring_size;
> > +       props->hqd_base_gpu_addr = args->ring_base_address;
> > +       props->queue_percentage = args->queue_percentage & 0xFF;
> > +       /* bit 8-15 are repurposed to be PM4 target XCC */
>
> We should handle this explicitly rather than stashing it in percentage.
>
> > +       props->pm4_target_xcc = (args->queue_percentage >> 8) & 0xFF;
> > +       props->hqd_pipe_priority = amdgpu_userq_pipe_priority_map[args-
> >queue_priority];
> > +       props->hqd_queue_priority = args->queue_priority;
> > +
> > +       r = amdgpu_userq_update_queue(queue, NULL);
> > +
> > +       mutex_unlock(&uq_mgr->userq_mutex);
> > +
> > +       return r;
> > +}
> > +
> >  int amdgpu_userq_set_cu_mask_ioctl(struct drm_device *dev, void *data,
> >                                      struct drm_file *filp)  { diff
> > --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> > index 43bf104d2fb8..c8ea81c17c6e 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> > @@ -31,6 +31,9 @@
> >  #define to_ev_fence(f) container_of(f, struct amdgpu_eviction_fence,
> > base)  #define uq_mgr_to_fpriv(u) container_of(u, struct amdgpu_fpriv,
> > userq_mgr)  #define work_to_uq_mgr(w, name) container_of(w, struct
> > amdgpu_userq_mgr, name)
> > +#define AMDGPU_USERQ_IS_ACTIVE(q) ((q)->userq_prop->queue_size > 0
> &&  \
> > +                           (q)->userq_prop->hqd_base_gpu_addr != 0 &&  \
> > +                           (q)->userq_prop->queue_percentage > 0)
> >
> >  enum amdgpu_userq_state {
> >         AMDGPU_USERQ_STATE_UNMAPPED = 0, @@ -118,6 +121,9 @@ int
> > amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file
> > *filp  int amdgpu_userq_set_cu_mask_ioctl(struct drm_device *dev, void 
> > *data,
> >                                         struct drm_file *filp);
> >
> > +int amdgpu_update_queue_ioctl(struct drm_device *dev, void *data,
> > +                             struct drm_file *filp);
> > +
> >  int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct
> drm_file *file_priv,
> >                           struct amdgpu_device *adev);
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> > b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> > index ae221eaa5b82..cfa3cb46a983 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> > @@ -304,6 +304,7 @@ static int mes_userq_mqd_create(struct
> amdgpu_usermode_queue *queue,
> >         userq_props->use_doorbell = true;
> >         userq_props->doorbell_index = queue->doorbell_index;
> >         userq_props->fence_address = queue->fence_drv->gpu_addr;
> > +       userq_props->queue_percentage = 100;
> >
> >         if (queue->queue_type == AMDGPU_HW_IP_COMPUTE) {
> >                 struct drm_amdgpu_userq_mqd_compute_gfx11
> > *compute_mqd; diff --git a/include/uapi/drm/amdgpu_drm.h
> > b/include/uapi/drm/amdgpu_drm.h index 41b6b3cea834..c42328453652
> > 100644
> > --- a/include/uapi/drm/amdgpu_drm.h
> > +++ b/include/uapi/drm/amdgpu_drm.h
> > @@ -59,6 +59,7 @@ extern "C" {
> >  #define DRM_AMDGPU_USERQ_WAIT          0x18
> >  #define DRM_AMDGPU_GEM_LIST_HANDLES    0x19
> >  #define DRM_AMDGPU_USERQ_SET_CU_MASK   0x1a
> > +#define DRM_AMDGPU_USERQ_UPDATE_QUEUE  0x1b
> >
> >  #define DRM_IOCTL_AMDGPU_GEM_CREATE
> DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union
> drm_amdgpu_gem_create)
> >  #define DRM_IOCTL_AMDGPU_GEM_MMAP
> DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union
> drm_amdgpu_gem_mmap)
> > @@ -81,6 +82,7 @@ extern "C" {
> >  #define DRM_IOCTL_AMDGPU_USERQ_WAIT
> DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct
> drm_amdgpu_userq_wait)
> >  #define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES
> DRM_IOWR(DRM_COMMAND_BASE +
> > DRM_AMDGPU_GEM_LIST_HANDLES, struct drm_amdgpu_gem_list_handles)
> > #define DRM_IOCTL_AMDGPU_USERQ_SET_CU_MASK
> DRM_IOWR(DRM_COMMAND_BASE +
> > DRM_AMDGPU_USERQ_SET_CU_MASK, struct
> amdgpu_userq_set_cu_mask_args)
> > +#define DRM_IOCTL_AMDGPU_USERQ_UPDATE_QUEUE
> DRM_IOWR(DRM_COMMAND_BASE
> > ++ DRM_AMDGPU_USERQ_UPDATE_QUEUE, struct
> > +amdgpu_userq_update_queue_args)
> >
> >  /**
> >   * DOC: memory domains
> > @@ -343,6 +345,7 @@ union drm_amdgpu_ctx {  #define
> > AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH 3 /* admin
> only */
> >  /* for queues that need access to protected content */  #define
> > AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE  (1 << 2)
> > +#define AMDGPU_USERQ_MAX_QUEUE_PERCENTAGE      100
> >
> >  /*
> >   * This structure is a container to pass input configuration @@
> > -440,6 +443,16 @@ struct amdgpu_userq_set_cu_mask_args {
> >         __u64 cu_mask_ptr;
> >  };
> >
> > +
> > +/* IOCTL parameters used to set user queue updates */ struct
> > +amdgpu_userq_update_queue_args {
> > +       __u64 ring_base_address;
> > +       __u32 queue_id;
> > +       __u32 ring_size;
>
> Do we actually need to be able to change the ring base address and size?  I 
> know
> KFD can do it, but we should verify if we really need to be able to change 
> the ring
> size at runtime.
>
> > +       __u32 queue_percentage;
> > +       __u32 queue_priority;
>
> We should probably also make these available for USERQ create as well.
> I would fold CU mask in as well.
>
> Alex
>
> > +};
> > +
> >  /* GFX V11 IP specific MQD parameters */  struct
> > drm_amdgpu_userq_mqd_gfx11 {
> >         /**
> > --
> > 2.49.0
> >

RE: [PATCH 9/9] drm/amdgpu: Add queue update IOCTL support

Reply via email to