On Thu, Jan 22, 2026 at 4:37 AM Jesse.Zhang <[email protected]> wrote:
>
> Add AMDGPU_USERQ_UPDATE_QUEUE IOCTL to support updating user mode
> queue properties after creation. This allows userspace to modify
> queue attributes like ring buffer address, ring size, queue
> priority, and queue percentage dynamically.
>
> Signed-off-by: Jesse Zhang <[email protected]>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 106 +++++++++++++++++++--
> drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 6 ++
> drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 1 +
> include/uapi/drm/amdgpu_drm.h | 13 +++
> 6 files changed, 122 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 246d74205b48..1679075f679b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -813,6 +813,9 @@ struct amdgpu_mqd_prop {
> uint32_t cu_mask_count;
> uint32_t cu_flags;
> bool is_user_cu_masked;
> + uint32_t queue_percentage;
> + /* used in gfx9 and gfx12.1 */
> + uint32_t pm4_target_xcc;
> };
>
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 9c425169a4f9..bc6cc1517221 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -3075,6 +3075,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
> DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl,
> DRM_AUTH|DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl,
> DRM_AUTH|DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl,
> DRM_AUTH|DRM_RENDER_ALLOW),
> + DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_UPDATE_QUEUE,
> amdgpu_update_queue_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
I don't think we need a new IOCTL. Just a new op for AMDGPU_USERQ.
E.g., AMDGPU_USERQ_OP_MODIFY
> DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SET_CU_MASK,
> amdgpu_userq_set_cu_mask_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL, amdgpu_userq_signal_ioctl,
> DRM_AUTH|DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl,
> DRM_AUTH|DRM_RENDER_ALLOW),
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> index 4cbf75723c08..922f73b92db3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> @@ -34,6 +34,26 @@
> #include "amdgpu_hmm.h"
> #include "amdgpu_userq_fence.h"
>
> +/* Mapping queue priority to pipe priority, indexed by queue priority */
> +int amdgpu_userq_pipe_priority_map[] = {
> + AMDGPU_RING_PRIO_0,
> + AMDGPU_RING_PRIO_0,
> + AMDGPU_RING_PRIO_0,
> + AMDGPU_RING_PRIO_0,
> + AMDGPU_RING_PRIO_0,
> + AMDGPU_RING_PRIO_0,
> + AMDGPU_RING_PRIO_0,
> + AMDGPU_RING_PRIO_1,
> + AMDGPU_RING_PRIO_1,
> + AMDGPU_RING_PRIO_1,
> + AMDGPU_RING_PRIO_1,
> + AMDGPU_RING_PRIO_2,
> + AMDGPU_RING_PRIO_2,
> + AMDGPU_RING_PRIO_2,
> + AMDGPU_RING_PRIO_2,
> + AMDGPU_RING_PRIO_2
> +};
> +
> u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev)
> {
> int i;
> @@ -907,7 +927,6 @@ static int amdgpu_userq_update_queue(struct
> amdgpu_usermode_queue *queue,
> struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
> struct amdgpu_device *adev = uq_mgr->adev;
> const struct amdgpu_userq_funcs *uq_funcs;
> - bool unmap_queue = false;
> int r;
>
> uq_funcs = adev->userq_funcs[queue->queue_type];
> @@ -923,23 +942,94 @@ static int amdgpu_userq_update_queue(struct
> amdgpu_usermode_queue *queue,
> r = amdgpu_userq_unmap_helper(queue);
> if (r)
> return r;
> - unmap_queue = true;
> }
>
> r = uq_funcs->mqd_update(queue, minfo);
> + if (r)
> + return r;
>
> - if (unmap_queue) {
> - int map_r = amdgpu_userq_map_helper(queue);
> - if (map_r)
> - dev_err(adev->dev, "Failed to remap queue %llu after
> update\n",
> + /*
> + * If the queue is considered active (has valid size, address, and
> percentage),
> + * we attempt to map it. This effectively starts the queue or
> restarts it
> + * if it was previously running.
> + */
> + if (AMDGPU_USERQ_IS_ACTIVE(queue)) {
> + r = amdgpu_userq_map_helper(queue);
> + if (r)
> + drm_file_err(uq_mgr->file, "Failed to remap queue
> %llu after update\n",
> queue->doorbell_index);
> - if (!r)
> - r = map_r;
> }
>
> return r;
> }
>
> +int amdgpu_update_queue_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *filp)
> +{
> + struct amdgpu_fpriv *fpriv = filp->driver_priv;
> + struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
> + struct amdgpu_userq_update_queue_args *args = data;
> + struct amdgpu_usermode_queue *queue;
> + struct amdgpu_mqd_prop *props;
> + int r;
> +
> + /*
> + * Repurpose queue percentage to accommodate new features:
> + * bit 0-7: queue percentage
> + * bit 8-15: pm4_target_xcc
> + */
Just make the target xcc an explicit parameter.
> + if ((args->queue_percentage & 0xFF) >
> AMDGPU_USERQ_MAX_QUEUE_PERCENTAGE) {
> + drm_file_err(uq_mgr->file, "Queue percentage must be between
> 0 to AMDGPU_USERQ_MAX_QUEUE_PERCENTAGE\n");
> + return -EINVAL;
> + }
> +
> + /* Validate priority */
> + if (args->queue_priority > AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM) {
> + drm_file_err(uq_mgr->file, "Queue priority must be between 0
> to KFD_MAX_QUEUE_PRIORITY\n");
> + return -EINVAL;
> + }
> +
> + /* Validate ring size */
> + if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
> + drm_file_err(uq_mgr->file, "Ring size must be a power of 2 or
> 0\n");
> + return -EINVAL;
> + }
> +
> + if (args->ring_size > 0 && args->ring_size < AMDGPU_GPU_PAGE_SIZE) {
> + args->ring_size = AMDGPU_GPU_PAGE_SIZE;
> + drm_file_err(uq_mgr->file, "Size clamped to
> AMDGPU_GPU_PAGE_SIZE\n");
> + }
> +
> + if ((args->ring_base_address) &&
> + (!access_ok((const void __user *) args->ring_base_address,
> + sizeof(uint64_t)))) {
> + drm_file_err(uq_mgr->file, "Can't access ring base
> address\n");
> + return -EFAULT;
> + }
> +
> + mutex_lock(&uq_mgr->userq_mutex);
> + queue = amdgpu_userq_find(uq_mgr, args->queue_id);
> + if (!queue) {
> + mutex_unlock(&uq_mgr->userq_mutex);
> + return -EINVAL;
> + }
> +
> + props = queue->userq_prop;
> + props->queue_size = args->ring_size;
> + props->hqd_base_gpu_addr = args->ring_base_address;
> + props->queue_percentage = args->queue_percentage & 0xFF;
> + /* bit 8-15 are repurposed to be PM4 target XCC */
We should handle this explicitly rather than stashing it in percentage.
> + props->pm4_target_xcc = (args->queue_percentage >> 8) & 0xFF;
> + props->hqd_pipe_priority =
> amdgpu_userq_pipe_priority_map[args->queue_priority];
> + props->hqd_queue_priority = args->queue_priority;
> +
> + r = amdgpu_userq_update_queue(queue, NULL);
> +
> + mutex_unlock(&uq_mgr->userq_mutex);
> +
> + return r;
> +}
> +
> int amdgpu_userq_set_cu_mask_ioctl(struct drm_device *dev, void *data,
> struct drm_file *filp)
> {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> index 43bf104d2fb8..c8ea81c17c6e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> @@ -31,6 +31,9 @@
> #define to_ev_fence(f) container_of(f, struct amdgpu_eviction_fence, base)
> #define uq_mgr_to_fpriv(u) container_of(u, struct amdgpu_fpriv, userq_mgr)
> #define work_to_uq_mgr(w, name) container_of(w, struct amdgpu_userq_mgr,
> name)
> +#define AMDGPU_USERQ_IS_ACTIVE(q) ((q)->userq_prop->queue_size > 0 && \
> + (q)->userq_prop->hqd_base_gpu_addr != 0 && \
> + (q)->userq_prop->queue_percentage > 0)
>
> enum amdgpu_userq_state {
> AMDGPU_USERQ_STATE_UNMAPPED = 0,
> @@ -118,6 +121,9 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void
> *data, struct drm_file *filp
> int amdgpu_userq_set_cu_mask_ioctl(struct drm_device *dev, void *data,
> struct drm_file *filp);
>
> +int amdgpu_update_queue_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *filp);
> +
> int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct
> drm_file *file_priv,
> struct amdgpu_device *adev);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> index ae221eaa5b82..cfa3cb46a983 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> @@ -304,6 +304,7 @@ static int mes_userq_mqd_create(struct
> amdgpu_usermode_queue *queue,
> userq_props->use_doorbell = true;
> userq_props->doorbell_index = queue->doorbell_index;
> userq_props->fence_address = queue->fence_drv->gpu_addr;
> + userq_props->queue_percentage = 100;
>
> if (queue->queue_type == AMDGPU_HW_IP_COMPUTE) {
> struct drm_amdgpu_userq_mqd_compute_gfx11 *compute_mqd;
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index 41b6b3cea834..c42328453652 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -59,6 +59,7 @@ extern "C" {
> #define DRM_AMDGPU_USERQ_WAIT 0x18
> #define DRM_AMDGPU_GEM_LIST_HANDLES 0x19
> #define DRM_AMDGPU_USERQ_SET_CU_MASK 0x1a
> +#define DRM_AMDGPU_USERQ_UPDATE_QUEUE 0x1b
>
> #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE +
> DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
> #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE +
> DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> @@ -81,6 +82,7 @@ extern "C" {
> #define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE +
> DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
> #define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES DRM_IOWR(DRM_COMMAND_BASE +
> DRM_AMDGPU_GEM_LIST_HANDLES, struct drm_amdgpu_gem_list_handles)
> #define DRM_IOCTL_AMDGPU_USERQ_SET_CU_MASK DRM_IOWR(DRM_COMMAND_BASE +
> DRM_AMDGPU_USERQ_SET_CU_MASK, struct amdgpu_userq_set_cu_mask_args)
> +#define DRM_IOCTL_AMDGPU_USERQ_UPDATE_QUEUE DRM_IOWR(DRM_COMMAND_BASE +
> DRM_AMDGPU_USERQ_UPDATE_QUEUE, struct amdgpu_userq_update_queue_args)
>
> /**
> * DOC: memory domains
> @@ -343,6 +345,7 @@ union drm_amdgpu_ctx {
> #define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH 3 /* admin only */
> /* for queues that need access to protected content */
> #define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE (1 << 2)
> +#define AMDGPU_USERQ_MAX_QUEUE_PERCENTAGE 100
>
> /*
> * This structure is a container to pass input configuration
> @@ -440,6 +443,16 @@ struct amdgpu_userq_set_cu_mask_args {
> __u64 cu_mask_ptr;
> };
>
> +
> +/* IOCTL parameters used to set user queue updates */
> +struct amdgpu_userq_update_queue_args {
> + __u64 ring_base_address;
> + __u32 queue_id;
> + __u32 ring_size;
Do we actually need to be able to change the ring base address and
size? I know KFD can do it, but we should verify if we really need to
be able to change the ring size at runtime.
> + __u32 queue_percentage;
> + __u32 queue_priority;
We should probably also make these available for USERQ create as well.
I would fold CU mask in as well.
Alex
> +};
> +
> /* GFX V11 IP specific MQD parameters */
> struct drm_amdgpu_userq_mqd_gfx11 {
> /**
> --
> 2.49.0
>