On Thu, Jan 22, 2026 at 4:37 AM Jesse.Zhang <[email protected]> wrote:
>
> Add AMDGPU_USERQ_UPDATE_QUEUE IOCTL to support updating user mode
> queue properties after creation. This allows userspace to modify
> queue attributes like ring buffer address, ring size, queue
> priority, and queue percentage dynamically.
>
> Signed-off-by: Jesse Zhang <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h        |   3 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    |   1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c  | 106 +++++++++++++++++++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h  |   6 ++
>  drivers/gpu/drm/amd/amdgpu/mes_userqueue.c |   1 +
>  include/uapi/drm/amdgpu_drm.h              |  13 +++
>  6 files changed, 122 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 246d74205b48..1679075f679b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -813,6 +813,9 @@ struct amdgpu_mqd_prop {
>         uint32_t cu_mask_count;
>         uint32_t cu_flags;
>         bool is_user_cu_masked;
> +       uint32_t queue_percentage;
> +       /* used in gfx9 and gfx12.1 */
> +       uint32_t pm4_target_xcc;
>  };
>
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 9c425169a4f9..bc6cc1517221 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -3075,6 +3075,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
>         DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, 
> DRM_AUTH|DRM_RENDER_ALLOW),
>         DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, 
> DRM_AUTH|DRM_RENDER_ALLOW),
>         DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl, 
> DRM_AUTH|DRM_RENDER_ALLOW),
> +       DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_UPDATE_QUEUE, 
> amdgpu_update_queue_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),

I don't think we need a new IOCTL.  Just a new op for AMDGPU_USERQ.
E.g., AMDGPU_USERQ_OP_MODIFY

>         DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SET_CU_MASK, 
> amdgpu_userq_set_cu_mask_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
>         DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL, amdgpu_userq_signal_ioctl, 
> DRM_AUTH|DRM_RENDER_ALLOW),
>         DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl, 
> DRM_AUTH|DRM_RENDER_ALLOW),
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> index 4cbf75723c08..922f73b92db3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> @@ -34,6 +34,26 @@
>  #include "amdgpu_hmm.h"
>  #include "amdgpu_userq_fence.h"
>
> +/* Mapping queue priority to pipe priority, indexed by queue priority */
> +int amdgpu_userq_pipe_priority_map[] = {
> +       AMDGPU_RING_PRIO_0,
> +       AMDGPU_RING_PRIO_0,
> +       AMDGPU_RING_PRIO_0,
> +       AMDGPU_RING_PRIO_0,
> +       AMDGPU_RING_PRIO_0,
> +       AMDGPU_RING_PRIO_0,
> +       AMDGPU_RING_PRIO_0,
> +       AMDGPU_RING_PRIO_1,
> +       AMDGPU_RING_PRIO_1,
> +       AMDGPU_RING_PRIO_1,
> +       AMDGPU_RING_PRIO_1,
> +       AMDGPU_RING_PRIO_2,
> +       AMDGPU_RING_PRIO_2,
> +       AMDGPU_RING_PRIO_2,
> +       AMDGPU_RING_PRIO_2,
> +       AMDGPU_RING_PRIO_2
> +};
> +
>  u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev)
>  {
>         int i;
> @@ -907,7 +927,6 @@ static int amdgpu_userq_update_queue(struct 
> amdgpu_usermode_queue *queue,
>         struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
>         struct amdgpu_device *adev = uq_mgr->adev;
>         const struct amdgpu_userq_funcs *uq_funcs;
> -       bool unmap_queue = false;
>         int r;
>
>         uq_funcs = adev->userq_funcs[queue->queue_type];
> @@ -923,23 +942,94 @@ static int amdgpu_userq_update_queue(struct 
> amdgpu_usermode_queue *queue,
>                 r = amdgpu_userq_unmap_helper(queue);
>                 if (r)
>                         return r;
> -               unmap_queue = true;
>         }
>
>         r = uq_funcs->mqd_update(queue, minfo);
> +       if (r)
> +               return r;
>
> -       if (unmap_queue) {
> -               int map_r = amdgpu_userq_map_helper(queue);
> -               if (map_r)
> -                       dev_err(adev->dev, "Failed to remap queue %llu after 
> update\n",
> +       /*
> +        * If the queue is considered active (has valid size, address, and 
> percentage),
> +        * we attempt to map it. This effectively starts the queue or 
> restarts it
> +        * if it was previously running.
> +        */
> +       if (AMDGPU_USERQ_IS_ACTIVE(queue)) {
> +               r = amdgpu_userq_map_helper(queue);
> +               if (r)
> +                       drm_file_err(uq_mgr->file, "Failed to remap queue 
> %llu after update\n",
>                                 queue->doorbell_index);
> -               if (!r)
> -                       r = map_r;
>         }
>
>         return r;
>  }
>
> +int amdgpu_update_queue_ioctl(struct drm_device *dev, void *data,
> +                             struct drm_file *filp)
> +{
> +       struct amdgpu_fpriv *fpriv = filp->driver_priv;
> +       struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
> +       struct amdgpu_userq_update_queue_args *args = data;
> +       struct amdgpu_usermode_queue *queue;
> +       struct amdgpu_mqd_prop *props;
> +       int r;
> +
> +       /*
> +        * Repurpose queue percentage to accommodate new features:
> +        * bit 0-7: queue percentage
> +        * bit 8-15: pm4_target_xcc
> +        */

Just make the target xcc an explicit parameter.

> +       if ((args->queue_percentage & 0xFF) > 
> AMDGPU_USERQ_MAX_QUEUE_PERCENTAGE) {
> +               drm_file_err(uq_mgr->file, "Queue percentage must be between 
> 0 to AMDGPU_USERQ_MAX_QUEUE_PERCENTAGE\n");
> +               return -EINVAL;
> +       }
> +
> +       /* Validate priority */
> +       if (args->queue_priority > AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM) {
> +               drm_file_err(uq_mgr->file, "Queue priority must be between 0 
> to KFD_MAX_QUEUE_PRIORITY\n");
> +               return -EINVAL;
> +       }
> +
> +       /* Validate ring size */
> +       if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
> +               drm_file_err(uq_mgr->file, "Ring size must be a power of 2 or 
> 0\n");
> +               return -EINVAL;
> +       }
> +
> +       if (args->ring_size > 0 && args->ring_size < AMDGPU_GPU_PAGE_SIZE) {
> +               args->ring_size = AMDGPU_GPU_PAGE_SIZE;
> +               drm_file_err(uq_mgr->file, "Size clamped to 
> AMDGPU_GPU_PAGE_SIZE\n");
> +       }
> +
> +       if ((args->ring_base_address) &&
> +               (!access_ok((const void __user *) args->ring_base_address,
> +                       sizeof(uint64_t)))) {
> +               drm_file_err(uq_mgr->file, "Can't access ring base 
> address\n");
> +               return -EFAULT;
> +       }
> +
> +       mutex_lock(&uq_mgr->userq_mutex);
> +       queue = amdgpu_userq_find(uq_mgr, args->queue_id);
> +       if (!queue) {
> +               mutex_unlock(&uq_mgr->userq_mutex);
> +               return -EINVAL;
> +       }
> +
> +       props = queue->userq_prop;
> +       props->queue_size = args->ring_size;
> +       props->hqd_base_gpu_addr = args->ring_base_address;
> +       props->queue_percentage = args->queue_percentage & 0xFF;
> +       /* bit 8-15 are repurposed to be PM4 target XCC */

We should handle this explicitly rather than stashing it in percentage.

> +       props->pm4_target_xcc = (args->queue_percentage >> 8) & 0xFF;
> +       props->hqd_pipe_priority = 
> amdgpu_userq_pipe_priority_map[args->queue_priority];
> +       props->hqd_queue_priority = args->queue_priority;
> +
> +       r = amdgpu_userq_update_queue(queue, NULL);
> +
> +       mutex_unlock(&uq_mgr->userq_mutex);
> +
> +       return r;
> +}
> +
>  int amdgpu_userq_set_cu_mask_ioctl(struct drm_device *dev, void *data,
>                                      struct drm_file *filp)
>  {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> index 43bf104d2fb8..c8ea81c17c6e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> @@ -31,6 +31,9 @@
>  #define to_ev_fence(f) container_of(f, struct amdgpu_eviction_fence, base)
>  #define uq_mgr_to_fpriv(u) container_of(u, struct amdgpu_fpriv, userq_mgr)
>  #define work_to_uq_mgr(w, name) container_of(w, struct amdgpu_userq_mgr, 
> name)
> +#define AMDGPU_USERQ_IS_ACTIVE(q) ((q)->userq_prop->queue_size > 0 &&  \
> +                           (q)->userq_prop->hqd_base_gpu_addr != 0 &&  \
> +                           (q)->userq_prop->queue_percentage > 0)
>
>  enum amdgpu_userq_state {
>         AMDGPU_USERQ_STATE_UNMAPPED = 0,
> @@ -118,6 +121,9 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void 
> *data, struct drm_file *filp
>  int amdgpu_userq_set_cu_mask_ioctl(struct drm_device *dev, void *data,
>                                         struct drm_file *filp);
>
> +int amdgpu_update_queue_ioctl(struct drm_device *dev, void *data,
> +                             struct drm_file *filp);
> +
>  int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct 
> drm_file *file_priv,
>                           struct amdgpu_device *adev);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c 
> b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> index ae221eaa5b82..cfa3cb46a983 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> @@ -304,6 +304,7 @@ static int mes_userq_mqd_create(struct 
> amdgpu_usermode_queue *queue,
>         userq_props->use_doorbell = true;
>         userq_props->doorbell_index = queue->doorbell_index;
>         userq_props->fence_address = queue->fence_drv->gpu_addr;
> +       userq_props->queue_percentage = 100;
>
>         if (queue->queue_type == AMDGPU_HW_IP_COMPUTE) {
>                 struct drm_amdgpu_userq_mqd_compute_gfx11 *compute_mqd;
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index 41b6b3cea834..c42328453652 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -59,6 +59,7 @@ extern "C" {
>  #define DRM_AMDGPU_USERQ_WAIT          0x18
>  #define DRM_AMDGPU_GEM_LIST_HANDLES    0x19
>  #define DRM_AMDGPU_USERQ_SET_CU_MASK   0x1a
> +#define DRM_AMDGPU_USERQ_UPDATE_QUEUE  0x1b
>
>  #define DRM_IOCTL_AMDGPU_GEM_CREATE    DRM_IOWR(DRM_COMMAND_BASE + 
> DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
>  #define DRM_IOCTL_AMDGPU_GEM_MMAP      DRM_IOWR(DRM_COMMAND_BASE + 
> DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> @@ -81,6 +82,7 @@ extern "C" {
>  #define DRM_IOCTL_AMDGPU_USERQ_WAIT    DRM_IOWR(DRM_COMMAND_BASE + 
> DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
>  #define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES DRM_IOWR(DRM_COMMAND_BASE + 
> DRM_AMDGPU_GEM_LIST_HANDLES, struct drm_amdgpu_gem_list_handles)
>  #define DRM_IOCTL_AMDGPU_USERQ_SET_CU_MASK DRM_IOWR(DRM_COMMAND_BASE + 
> DRM_AMDGPU_USERQ_SET_CU_MASK, struct amdgpu_userq_set_cu_mask_args)
> +#define DRM_IOCTL_AMDGPU_USERQ_UPDATE_QUEUE DRM_IOWR(DRM_COMMAND_BASE + 
> DRM_AMDGPU_USERQ_UPDATE_QUEUE, struct amdgpu_userq_update_queue_args)
>
>  /**
>   * DOC: memory domains
> @@ -343,6 +345,7 @@ union drm_amdgpu_ctx {
>  #define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH 3 /* admin only */
>  /* for queues that need access to protected content */
>  #define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE  (1 << 2)
> +#define AMDGPU_USERQ_MAX_QUEUE_PERCENTAGE      100
>
>  /*
>   * This structure is a container to pass input configuration
> @@ -440,6 +443,16 @@ struct amdgpu_userq_set_cu_mask_args {
>         __u64 cu_mask_ptr;
>  };
>
> +
> +/* IOCTL parameters used to set user queue updates */
> +struct amdgpu_userq_update_queue_args {
> +       __u64 ring_base_address;
> +       __u32 queue_id;
> +       __u32 ring_size;

Do we actually need to be able to change the ring base address and
size?  I know KFD can do it, but we should verify if we really need to
be able to change the ring size at runtime.

> +       __u32 queue_percentage;
> +       __u32 queue_priority;

We should probably also make these available for USERQ create as well.
I would fold CU mask in as well.

Alex

> +};
> +
>  /* GFX V11 IP specific MQD parameters */
>  struct drm_amdgpu_userq_mqd_gfx11 {
>         /**
> --
> 2.49.0
>

Reply via email to