amdgpu: add MQD update support for user mode compute queues

Jesse . Zhang Mon, 26 Jan 2026 23:19:14 -0800

The update functionality allows dynamic adjustment of queue properties at 
runtime,
enabling better resource management and performance tuning for compute 
workloads.


v2: Return an error for non-compute queues. (Alex)
    remove the parameter minfo

V3: put the new paramters in drm_amdgpu_userq_mqd_compute_gfx11. (Alex)

Suggested-by: Alex Deucher <[email protected]>
Signed-off-by: Jesse Zhang <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |   3 +
 drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 167 ++++++++++++++++++++-
 include/uapi/drm/amdgpu_drm.h              |  29 ++++
 3 files changed, 196 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index a8f4f73fa0ce..ad136145316b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -819,6 +819,9 @@ struct amdgpu_mqd_prop {
        uint32_t cu_mask_count;
        uint32_t cu_flags;
        bool is_user_cu_masked;
+       uint32_t queue_percentage;
+       /* used in gfx9 and gfx12.1 */
+       uint32_t pm4_target_xcc;
 };
 
 struct amdgpu_mqd {
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
index f2309d72bbe6..ee91721f322b 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
@@ -30,6 +30,26 @@
 #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE
 #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE
 
+/* Mapping queue priority to pipe priority, indexed by queue priority */
+int amdgpu_userq_pipe_priority_map[] = {
+       AMDGPU_RING_PRIO_0,
+       AMDGPU_RING_PRIO_0,
+       AMDGPU_RING_PRIO_0,
+       AMDGPU_RING_PRIO_0,
+       AMDGPU_RING_PRIO_0,
+       AMDGPU_RING_PRIO_0,
+       AMDGPU_RING_PRIO_0,
+       AMDGPU_RING_PRIO_1,
+       AMDGPU_RING_PRIO_1,
+       AMDGPU_RING_PRIO_1,
+       AMDGPU_RING_PRIO_1,
+       AMDGPU_RING_PRIO_2,
+       AMDGPU_RING_PRIO_2,
+       AMDGPU_RING_PRIO_2,
+       AMDGPU_RING_PRIO_2,
+       AMDGPU_RING_PRIO_2
+};
+
 static int
 mes_userq_map_gtt_bo_to_gart(struct amdgpu_bo *bo)
 {
@@ -272,6 +292,104 @@ static int mes_userq_detect_and_reset(struct 
amdgpu_device *adev,
        return r;
 }
 
+/**
+ * amdgpu_userq_set_compute_mqd - Parse compute MQD and update queue props
+ * @queue: Target user mode queue
+ * @args: User queue input arguments
+ * @uq_mgr: User queue manager (for logging)
+ *
+ * This function only parses and validates user input, updating queue props
+ * (no hardware MQD configuration - that's handled in MES layer)
+ * Returns: 0 on success, negative error code on failure
+ */
+static int amdgpu_userq_set_compute_mqd(struct amdgpu_usermode_queue *queue,
+                                          struct 
drm_amdgpu_userq_mqd_compute_gfx11 * compute_mqd)
+{
+       struct amdgpu_mqd_prop *props = queue->userq_prop;
+       struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
+       struct amdgpu_device *adev = uq_mgr->adev;
+       const int max_num_cus = 1024;
+       size_t cu_mask_size;
+       uint32_t count;
+       uint32_t *cu_mask = NULL;
+       int ret = 0;
+
+       if (!queue || !compute_mqd)
+               return -EINVAL;
+
+       if (compute_mqd->queue_percentage > AMDGPU_USERQ_MAX_QUEUE_PERCENTAGE) {
+               DRM_ERROR("Queue percentage must be between 0 to 
AMDGPU_USERQ_MAX_QUEUE_PERCENTAGE\n");
+               return -EINVAL;
+       }
+
+       /* Validate priority */
+       if (compute_mqd->hqd_queue_priority > 
AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM) {
+               DRM_ERROR("Queue priority must be between 0 to 
KFD_MAX_QUEUE_PRIORITY\n");
+               return -EINVAL;
+       }
+
+
+       /* validate and set CU mask property */
+       if (compute_mqd->cu_mask_count) {
+               if (compute_mqd->cu_mask_count % 32 != 0) {
+                       DRM_ERROR("CU mask count must be a multiple of 32\n");
+                       return -EINVAL;
+               }
+               count = compute_mqd->cu_mask_count;
+
+               /* Limit CU mask size to prevent excessive memory allocation */
+               if (count > max_num_cus) {
+                       DRM_ERROR("CU mask cannot be greater than 1024 bits");
+                       count = max_num_cus;
+                       cu_mask_size = sizeof(uint32_t) * (max_num_cus / 32);
+               } else {
+                       cu_mask_size = sizeof(uint32_t) * 
(compute_mqd->cu_mask_count / 32);
+               }
+
+               /* Copy CU mask from user space */
+               cu_mask = 
memdup_user(u64_to_user_ptr(compute_mqd->cu_mask_ptr), cu_mask_size);
+               if (IS_ERR(cu_mask)) {
+                       ret = PTR_ERR(cu_mask);
+                       cu_mask = NULL;
+                       goto cleanup;
+               }
+
+               /* Validate pairwise CU mask for WGP-based ASICs */
+               if (cu_mask && adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 
0, 0)) {
+                       for (int i = 0; i < count; i += 2) {
+                              uint32_t cu_pair = (cu_mask[i / 32] >> (i % 32)) 
& 0x3;
+                              if (cu_pair && cu_pair != 0x3) {
+                                      DRM_ERROR("CUs must be adjacent pairwise 
enabled.\n");
+                                      kfree(cu_mask);
+                                      cu_mask = NULL;
+                                      ret = -EINVAL;
+                                      goto cleanup;
+                              }
+                       }
+               }
+
+               /* Free old CU mask */
+               if (props->cu_mask) {
+                       kfree(props->cu_mask);
+                       props->cu_mask = NULL;
+               }
+
+               props->cu_mask = cu_mask;
+               props->cu_mask_count = count;
+               props->is_user_cu_masked = (cu_mask != NULL);
+       }
+
+       /* Parse HQD priority and other compute properties */
+       props->queue_percentage = compute_mqd->queue_percentage;
+       props->pm4_target_xcc = compute_mqd->pm4_target_xcc;
+       props->hqd_queue_priority = compute_mqd->hqd_queue_priority;
+       props->hqd_pipe_priority = 
amdgpu_userq_pipe_priority_map[compute_mqd->hqd_queue_priority];
+       props->eop_gpu_addr = compute_mqd->eop_va;
+
+cleanup:
+       return ret;
+}
+
 static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue,
                                struct drm_amdgpu_userq_in *args_in)
 {
@@ -326,9 +444,10 @@ static int mes_userq_mqd_create(struct 
amdgpu_usermode_queue *queue,
                if (r)
                        goto free_mqd;
 
-               userq_props->eop_gpu_addr = compute_mqd->eop_va;
-               userq_props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL;
-               userq_props->hqd_queue_priority = 
AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM;
+               r = amdgpu_userq_set_compute_mqd(queue, compute_mqd);
+               if (r)
+                       goto free_mqd;
+
                userq_props->hqd_active = false;
                userq_props->tmz_queue =
                        mqd_user->flags & 
AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE;
@@ -432,11 +551,52 @@ static int mes_userq_mqd_create(struct 
amdgpu_usermode_queue *queue,
        return r;
 }
 
+static int mes_userq_mqd_update(struct amdgpu_usermode_queue *queue, struct 
drm_amdgpu_userq_in *args_in)
+{
+       int retval = 0;
+       struct amdgpu_device *adev = queue->userq_mgr->adev;
+       struct amdgpu_mqd_prop *userq_props = queue->userq_prop;
+       struct amdgpu_mqd *mqd_hw_default = &adev->mqds[queue->queue_type];
+       struct drm_amdgpu_userq_mqd_compute_gfx11 *compute_mqd_v11;
+       struct drm_amdgpu_userq_in *mqd_user = args_in;
+
+       if (!queue || !userq_props)
+               return -EINVAL;
+
+       if (queue->queue_type != AMDGPU_HW_IP_COMPUTE)
+               return -EINVAL;
+
+       if (arg_in->mqd_size != sizeof(*compute_mqd_v11)) {
+               DRM_ERROR("Invalid compute IP MQD size\n");
+               return -EINVAL;
+       }
+
+       compute_mqd_v11 = memdup_user(u64_to_user_ptr(args_in->mqd), 
args_in->mqd_size);
+       if (IS_ERR(compute_mqd_v11)) {
+               DRM_ERROR("Failed to read user MQD\n");
+               return -ENOMEM;
+       }
+
+       retval = amdgpu_userq_set_compute_mqd(queue, compute_mqd_v11);
+       if (retval)
+               goto free;
+
+       userq_props->queue_size = args_in->queue_size;
+       userq_props->hqd_base_gpu_addr = args_in->queue_va;
+
+       retval = mqd_hw_default->init_mqd(adev, (void *)queue->mqd.cpu_ptr, 
userq_props);
+
+free:
+       kfree(compute_mqd_v11);
+       return retval;
+}
+
 static void mes_userq_mqd_destroy(struct amdgpu_usermode_queue *queue)
 {
        struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
 
        amdgpu_userq_destroy_object(uq_mgr, &queue->fw_obj);
+       kfree(queue->userq_prop->cu_mask);
        kfree(queue->userq_prop);
        amdgpu_userq_destroy_object(uq_mgr, &queue->mqd);
 }
@@ -513,6 +673,7 @@ static int mes_userq_restore(struct amdgpu_usermode_queue 
*queue)
 
 const struct amdgpu_userq_funcs userq_mes_funcs = {
        .mqd_create = mes_userq_mqd_create,
+       .mqd_update = mes_userq_mqd_update,
        .mqd_destroy = mes_userq_mqd_destroy,
        .unmap = mes_userq_unmap,
        .map = mes_userq_map,
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index ab2bf47553e1..aa9b31578c6b 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -330,6 +330,7 @@ union drm_amdgpu_ctx {
 /* user queue IOCTL operations */
 #define AMDGPU_USERQ_OP_CREATE 1
 #define AMDGPU_USERQ_OP_FREE   2
+#define AMDGPU_USERQ_OP_MODIFY 3
 
 /* queue priority levels */
 /* low < normal low < normal high < high */
@@ -341,6 +342,7 @@ union drm_amdgpu_ctx {
 #define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH 3 /* admin only */
 /* for queues that need access to protected content */
 #define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE  (1 << 2)
+#define AMDGPU_USERQ_MAX_QUEUE_PERCENTAGE      100
 
 /*
  * This structure is a container to pass input configuration
@@ -460,6 +462,33 @@ struct drm_amdgpu_userq_mqd_compute_gfx11 {
         * to get the size.
         */
        __u64   eop_va;
+       /**
+        * @cu_mask_ptr: User-space pointer to CU (Compute Unit) mask array
+        * Points to an array of __u32 values that define which CUs are enabled
+        * for this queue (0 = disabled, 1 = enabled per bit)
+        */
+       __u64 cu_mask_ptr;
+       /**
+        * @cu_mask_count: Number of entries in the CU mask array
+        * Total count of __u32 elements in the cu_mask_ptr array (each element
+        * represents 32 CUs/WGPs)
+        */
+       __u32 cu_mask_count;
+       /**
+        * @queue_percentage: Queue resource allocation percentage (0-100)
+        * Defines the percentage of GPU resources allocated to this queue
+        */
+       __u32 queue_percentage;
+       /**
+        * @hqd_queue_priority: Hqd Queue priority (0-15)
+        * Higher values indicate higher scheduling priority for the queue
+        */
+       __u32 hqd_queue_priority;
+       /**
+        * @pm4_target_xcc: PM4 target XCC identifier (for gfx9/gfx12.1)
+        * Specifies the target XCC (Cross Compute Complex) for PM4 commands
+        */
+       __u32 pm4_target_xcc;
 };
 
 /* userq signal/wait ioctl */
-- 
2.49.0

[PATCH v3 5/6] drm/amdgpu: add MQD update support for user mode compute queues

Reply via email to