[AMD Official Use Only - AMD Internal Distribution Only]
> -----Original Message-----
> From: Alex Deucher <[email protected]>
> Sent: Friday, January 23, 2026 4:33 AM
> To: Zhang, Jesse(Jie) <[email protected]>
> Cc: [email protected]; Deucher, Alexander
> <[email protected]>; Koenig, Christian <[email protected]>
> Subject: Re: [PATCH 9/9] drm/amdgpu: Add queue update IOCTL support
>
> On Thu, Jan 22, 2026 at 4:37 AM Jesse.Zhang <[email protected]> wrote:
> >
> > Add AMDGPU_USERQ_UPDATE_QUEUE IOCTL to support updating user
> mode
> > queue properties after creation. This allows userspace to modify queue
> > attributes like ring buffer address, ring size, queue priority, and
> > queue percentage dynamically.
> >
> > Signed-off-by: Jesse Zhang <[email protected]>
> > ---
> > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +
> > drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 +
> > drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 106 +++++++++++++++++++-
> -
> > drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 6 ++
> > drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 1 +
> > include/uapi/drm/amdgpu_drm.h | 13 +++
> > 6 files changed, 122 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > index 246d74205b48..1679075f679b 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > @@ -813,6 +813,9 @@ struct amdgpu_mqd_prop {
> > uint32_t cu_mask_count;
> > uint32_t cu_flags;
> > bool is_user_cu_masked;
> > + uint32_t queue_percentage;
> > + /* used in gfx9 and gfx12.1 */
> > + uint32_t pm4_target_xcc;
> > };
> >
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > index 9c425169a4f9..bc6cc1517221 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > @@ -3075,6 +3075,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
> > DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl,
> DRM_AUTH|DRM_RENDER_ALLOW),
> > DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR,
> amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> > DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl,
> > DRM_AUTH|DRM_RENDER_ALLOW),
> > + DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_UPDATE_QUEUE,
> > + amdgpu_update_queue_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
>
> I don't think we need a new IOCTL. Just a new op for AMDGPU_USERQ.
> E.g., AMDGPU_USERQ_OP_MODIFY
Yes, will fix it.
Maybe we can add a new op for CU mask too.
Like AMDGPU_USERQ_OP_CU_MASK.
Thanks
Jesse
>
> > DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SET_CU_MASK,
> amdgpu_userq_set_cu_mask_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> > DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL,
> amdgpu_userq_signal_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> > DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT,
> amdgpu_userq_wait_ioctl,
> > DRM_AUTH|DRM_RENDER_ALLOW), diff --git
> > a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> > index 4cbf75723c08..922f73b92db3 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> > @@ -34,6 +34,26 @@
> > #include "amdgpu_hmm.h"
> > #include "amdgpu_userq_fence.h"
> >
> > +/* Mapping queue priority to pipe priority, indexed by queue priority
> > +*/ int amdgpu_userq_pipe_priority_map[] = {
> > + AMDGPU_RING_PRIO_0,
> > + AMDGPU_RING_PRIO_0,
> > + AMDGPU_RING_PRIO_0,
> > + AMDGPU_RING_PRIO_0,
> > + AMDGPU_RING_PRIO_0,
> > + AMDGPU_RING_PRIO_0,
> > + AMDGPU_RING_PRIO_0,
> > + AMDGPU_RING_PRIO_1,
> > + AMDGPU_RING_PRIO_1,
> > + AMDGPU_RING_PRIO_1,
> > + AMDGPU_RING_PRIO_1,
> > + AMDGPU_RING_PRIO_2,
> > + AMDGPU_RING_PRIO_2,
> > + AMDGPU_RING_PRIO_2,
> > + AMDGPU_RING_PRIO_2,
> > + AMDGPU_RING_PRIO_2
> > +};
> > +
> > u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev) {
> > int i;
> > @@ -907,7 +927,6 @@ static int amdgpu_userq_update_queue(struct
> amdgpu_usermode_queue *queue,
> > struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
> > struct amdgpu_device *adev = uq_mgr->adev;
> > const struct amdgpu_userq_funcs *uq_funcs;
> > - bool unmap_queue = false;
> > int r;
> >
> > uq_funcs = adev->userq_funcs[queue->queue_type];
> > @@ -923,23 +942,94 @@ static int amdgpu_userq_update_queue(struct
> amdgpu_usermode_queue *queue,
> > r = amdgpu_userq_unmap_helper(queue);
> > if (r)
> > return r;
> > - unmap_queue = true;
> > }
> >
> > r = uq_funcs->mqd_update(queue, minfo);
> > + if (r)
> > + return r;
> >
> > - if (unmap_queue) {
> > - int map_r = amdgpu_userq_map_helper(queue);
> > - if (map_r)
> > - dev_err(adev->dev, "Failed to remap queue %llu
> > after update\n",
> > + /*
> > + * If the queue is considered active (has valid size, address, and
> percentage),
> > + * we attempt to map it. This effectively starts the queue or
> > restarts it
> > + * if it was previously running.
> > + */
> > + if (AMDGPU_USERQ_IS_ACTIVE(queue)) {
> > + r = amdgpu_userq_map_helper(queue);
> > + if (r)
> > + drm_file_err(uq_mgr->file, "Failed to remap
> > + queue %llu after update\n",
> > queue->doorbell_index);
> > - if (!r)
> > - r = map_r;
> > }
> >
> > return r;
> > }
> >
> > +int amdgpu_update_queue_ioctl(struct drm_device *dev, void *data,
> > + struct drm_file *filp) {
> > + struct amdgpu_fpriv *fpriv = filp->driver_priv;
> > + struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
> > + struct amdgpu_userq_update_queue_args *args = data;
> > + struct amdgpu_usermode_queue *queue;
> > + struct amdgpu_mqd_prop *props;
> > + int r;
> > +
> > + /*
> > + * Repurpose queue percentage to accommodate new features:
> > + * bit 0-7: queue percentage
> > + * bit 8-15: pm4_target_xcc
> > + */
>
> Just make the target xcc an explicit parameter.
>
> > + if ((args->queue_percentage & 0xFF) >
> AMDGPU_USERQ_MAX_QUEUE_PERCENTAGE) {
> > + drm_file_err(uq_mgr->file, "Queue percentage must be
> > between 0 to
> AMDGPU_USERQ_MAX_QUEUE_PERCENTAGE\n");
> > + return -EINVAL;
> > + }
> > +
> > + /* Validate priority */
> > + if (args->queue_priority >
> AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM) {
> > + drm_file_err(uq_mgr->file, "Queue priority must be between
> > 0 to
> KFD_MAX_QUEUE_PRIORITY\n");
> > + return -EINVAL;
> > + }
> > +
> > + /* Validate ring size */
> > + if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
> > + drm_file_err(uq_mgr->file, "Ring size must be a power of 2
> > or 0\n");
> > + return -EINVAL;
> > + }
> > +
> > + if (args->ring_size > 0 && args->ring_size < AMDGPU_GPU_PAGE_SIZE)
> {
> > + args->ring_size = AMDGPU_GPU_PAGE_SIZE;
> > + drm_file_err(uq_mgr->file, "Size clamped to
> AMDGPU_GPU_PAGE_SIZE\n");
> > + }
> > +
> > + if ((args->ring_base_address) &&
> > + (!access_ok((const void __user *) args->ring_base_address,
> > + sizeof(uint64_t)))) {
> > + drm_file_err(uq_mgr->file, "Can't access ring base
> > address\n");
> > + return -EFAULT;
> > + }
> > +
> > + mutex_lock(&uq_mgr->userq_mutex);
> > + queue = amdgpu_userq_find(uq_mgr, args->queue_id);
> > + if (!queue) {
> > + mutex_unlock(&uq_mgr->userq_mutex);
> > + return -EINVAL;
> > + }
> > +
> > + props = queue->userq_prop;
> > + props->queue_size = args->ring_size;
> > + props->hqd_base_gpu_addr = args->ring_base_address;
> > + props->queue_percentage = args->queue_percentage & 0xFF;
> > + /* bit 8-15 are repurposed to be PM4 target XCC */
>
> We should handle this explicitly rather than stashing it in percentage.
>
> > + props->pm4_target_xcc = (args->queue_percentage >> 8) & 0xFF;
> > + props->hqd_pipe_priority = amdgpu_userq_pipe_priority_map[args-
> >queue_priority];
> > + props->hqd_queue_priority = args->queue_priority;
> > +
> > + r = amdgpu_userq_update_queue(queue, NULL);
> > +
> > + mutex_unlock(&uq_mgr->userq_mutex);
> > +
> > + return r;
> > +}
> > +
> > int amdgpu_userq_set_cu_mask_ioctl(struct drm_device *dev, void *data,
> > struct drm_file *filp) { diff
> > --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> > index 43bf104d2fb8..c8ea81c17c6e 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> > @@ -31,6 +31,9 @@
> > #define to_ev_fence(f) container_of(f, struct amdgpu_eviction_fence,
> > base) #define uq_mgr_to_fpriv(u) container_of(u, struct amdgpu_fpriv,
> > userq_mgr) #define work_to_uq_mgr(w, name) container_of(w, struct
> > amdgpu_userq_mgr, name)
> > +#define AMDGPU_USERQ_IS_ACTIVE(q) ((q)->userq_prop->queue_size > 0
> && \
> > + (q)->userq_prop->hqd_base_gpu_addr != 0 && \
> > + (q)->userq_prop->queue_percentage > 0)
> >
> > enum amdgpu_userq_state {
> > AMDGPU_USERQ_STATE_UNMAPPED = 0, @@ -118,6 +121,9 @@ int
> > amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file
> > *filp int amdgpu_userq_set_cu_mask_ioctl(struct drm_device *dev, void
> > *data,
> > struct drm_file *filp);
> >
> > +int amdgpu_update_queue_ioctl(struct drm_device *dev, void *data,
> > + struct drm_file *filp);
> > +
> > int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct
> drm_file *file_priv,
> > struct amdgpu_device *adev);
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> > b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> > index ae221eaa5b82..cfa3cb46a983 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> > @@ -304,6 +304,7 @@ static int mes_userq_mqd_create(struct
> amdgpu_usermode_queue *queue,
> > userq_props->use_doorbell = true;
> > userq_props->doorbell_index = queue->doorbell_index;
> > userq_props->fence_address = queue->fence_drv->gpu_addr;
> > + userq_props->queue_percentage = 100;
> >
> > if (queue->queue_type == AMDGPU_HW_IP_COMPUTE) {
> > struct drm_amdgpu_userq_mqd_compute_gfx11
> > *compute_mqd; diff --git a/include/uapi/drm/amdgpu_drm.h
> > b/include/uapi/drm/amdgpu_drm.h index 41b6b3cea834..c42328453652
> > 100644
> > --- a/include/uapi/drm/amdgpu_drm.h
> > +++ b/include/uapi/drm/amdgpu_drm.h
> > @@ -59,6 +59,7 @@ extern "C" {
> > #define DRM_AMDGPU_USERQ_WAIT 0x18
> > #define DRM_AMDGPU_GEM_LIST_HANDLES 0x19
> > #define DRM_AMDGPU_USERQ_SET_CU_MASK 0x1a
> > +#define DRM_AMDGPU_USERQ_UPDATE_QUEUE 0x1b
> >
> > #define DRM_IOCTL_AMDGPU_GEM_CREATE
> DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union
> drm_amdgpu_gem_create)
> > #define DRM_IOCTL_AMDGPU_GEM_MMAP
> DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union
> drm_amdgpu_gem_mmap)
> > @@ -81,6 +82,7 @@ extern "C" {
> > #define DRM_IOCTL_AMDGPU_USERQ_WAIT
> DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct
> drm_amdgpu_userq_wait)
> > #define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES
> DRM_IOWR(DRM_COMMAND_BASE +
> > DRM_AMDGPU_GEM_LIST_HANDLES, struct drm_amdgpu_gem_list_handles)
> > #define DRM_IOCTL_AMDGPU_USERQ_SET_CU_MASK
> DRM_IOWR(DRM_COMMAND_BASE +
> > DRM_AMDGPU_USERQ_SET_CU_MASK, struct
> amdgpu_userq_set_cu_mask_args)
> > +#define DRM_IOCTL_AMDGPU_USERQ_UPDATE_QUEUE
> DRM_IOWR(DRM_COMMAND_BASE
> > ++ DRM_AMDGPU_USERQ_UPDATE_QUEUE, struct
> > +amdgpu_userq_update_queue_args)
> >
> > /**
> > * DOC: memory domains
> > @@ -343,6 +345,7 @@ union drm_amdgpu_ctx { #define
> > AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH 3 /* admin
> only */
> > /* for queues that need access to protected content */ #define
> > AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE (1 << 2)
> > +#define AMDGPU_USERQ_MAX_QUEUE_PERCENTAGE 100
> >
> > /*
> > * This structure is a container to pass input configuration @@
> > -440,6 +443,16 @@ struct amdgpu_userq_set_cu_mask_args {
> > __u64 cu_mask_ptr;
> > };
> >
> > +
> > +/* IOCTL parameters used to set user queue updates */ struct
> > +amdgpu_userq_update_queue_args {
> > + __u64 ring_base_address;
> > + __u32 queue_id;
> > + __u32 ring_size;
>
> Do we actually need to be able to change the ring base address and size? I
> know
> KFD can do it, but we should verify if we really need to be able to change
> the ring
> size at runtime.
>
> > + __u32 queue_percentage;
> > + __u32 queue_priority;
>
> We should probably also make these available for USERQ create as well.
> I would fold CU mask in as well.
>
> Alex
>
> > +};
> > +
> > /* GFX V11 IP specific MQD parameters */ struct
> > drm_amdgpu_userq_mqd_gfx11 {
> > /**
> > --
> > 2.49.0
> >