The update functionality allows dynamic adjustment of queue properties at
runtime,
enabling better resource management and performance tuning for compute
workloads.
v2: Return an error for non-compute queues. (Alex)
remove the parameter minfo
V3: put the new paramters in drm_amdgpu_userq_mqd_compute_gfx11. (Alex)
Suggested-by: Alex Deucher <[email protected]>
Signed-off-by: Jesse Zhang <[email protected]>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +
drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 167 ++++++++++++++++++++-
include/uapi/drm/amdgpu_drm.h | 29 ++++
3 files changed, 196 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index a8f4f73fa0ce..ad136145316b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -819,6 +819,9 @@ struct amdgpu_mqd_prop {
uint32_t cu_mask_count;
uint32_t cu_flags;
bool is_user_cu_masked;
+ uint32_t queue_percentage;
+ /* used in gfx9 and gfx12.1 */
+ uint32_t pm4_target_xcc;
};
struct amdgpu_mqd {
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
index f2309d72bbe6..ee91721f322b 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
@@ -30,6 +30,26 @@
#define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE
#define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE
+/* Mapping queue priority to pipe priority, indexed by queue priority */
+int amdgpu_userq_pipe_priority_map[] = {
+ AMDGPU_RING_PRIO_0,
+ AMDGPU_RING_PRIO_0,
+ AMDGPU_RING_PRIO_0,
+ AMDGPU_RING_PRIO_0,
+ AMDGPU_RING_PRIO_0,
+ AMDGPU_RING_PRIO_0,
+ AMDGPU_RING_PRIO_0,
+ AMDGPU_RING_PRIO_1,
+ AMDGPU_RING_PRIO_1,
+ AMDGPU_RING_PRIO_1,
+ AMDGPU_RING_PRIO_1,
+ AMDGPU_RING_PRIO_2,
+ AMDGPU_RING_PRIO_2,
+ AMDGPU_RING_PRIO_2,
+ AMDGPU_RING_PRIO_2,
+ AMDGPU_RING_PRIO_2
+};
+
static int
mes_userq_map_gtt_bo_to_gart(struct amdgpu_bo *bo)
{
@@ -272,6 +292,104 @@ static int mes_userq_detect_and_reset(struct
amdgpu_device *adev,
return r;
}
+/**
+ * amdgpu_userq_set_compute_mqd - Parse compute MQD and update queue props
+ * @queue: Target user mode queue
+ * @args: User queue input arguments
+ * @uq_mgr: User queue manager (for logging)
+ *
+ * This function only parses and validates user input, updating queue props
+ * (no hardware MQD configuration - that's handled in MES layer)
+ * Returns: 0 on success, negative error code on failure
+ */
+static int amdgpu_userq_set_compute_mqd(struct amdgpu_usermode_queue *queue,
+ struct
drm_amdgpu_userq_mqd_compute_gfx11 * compute_mqd)
+{
+ struct amdgpu_mqd_prop *props = queue->userq_prop;
+ struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
+ struct amdgpu_device *adev = uq_mgr->adev;
+ const int max_num_cus = 1024;
+ size_t cu_mask_size;
+ uint32_t count;
+ uint32_t *cu_mask = NULL;
+ int ret = 0;
+
+ if (!queue || !compute_mqd)
+ return -EINVAL;
+
+ if (compute_mqd->queue_percentage > AMDGPU_USERQ_MAX_QUEUE_PERCENTAGE) {
+ DRM_ERROR("Queue percentage must be between 0 to
AMDGPU_USERQ_MAX_QUEUE_PERCENTAGE\n");
+ return -EINVAL;
+ }
+
+ /* Validate priority */
+ if (compute_mqd->hqd_queue_priority >
AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM) {
+ DRM_ERROR("Queue priority must be between 0 to
KFD_MAX_QUEUE_PRIORITY\n");
+ return -EINVAL;
+ }
+
+
+ /* validate and set CU mask property */
+ if (compute_mqd->cu_mask_count) {
+ if (compute_mqd->cu_mask_count % 32 != 0) {
+ DRM_ERROR("CU mask count must be a multiple of 32\n");
+ return -EINVAL;
+ }
+ count = compute_mqd->cu_mask_count;
+
+ /* Limit CU mask size to prevent excessive memory allocation */
+ if (count > max_num_cus) {
+ DRM_ERROR("CU mask cannot be greater than 1024 bits");
+ count = max_num_cus;
+ cu_mask_size = sizeof(uint32_t) * (max_num_cus / 32);
+ } else {
+ cu_mask_size = sizeof(uint32_t) *
(compute_mqd->cu_mask_count / 32);
+ }
+
+ /* Copy CU mask from user space */
+ cu_mask =
memdup_user(u64_to_user_ptr(compute_mqd->cu_mask_ptr), cu_mask_size);
+ if (IS_ERR(cu_mask)) {
+ ret = PTR_ERR(cu_mask);
+ cu_mask = NULL;
+ goto cleanup;
+ }
+
+ /* Validate pairwise CU mask for WGP-based ASICs */
+ if (cu_mask && adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10,
0, 0)) {
+ for (int i = 0; i < count; i += 2) {
+ uint32_t cu_pair = (cu_mask[i / 32] >> (i % 32))
& 0x3;
+ if (cu_pair && cu_pair != 0x3) {
+ DRM_ERROR("CUs must be adjacent pairwise
enabled.\n");
+ kfree(cu_mask);
+ cu_mask = NULL;
+ ret = -EINVAL;
+ goto cleanup;
+ }
+ }
+ }
+
+ /* Free old CU mask */
+ if (props->cu_mask) {
+ kfree(props->cu_mask);
+ props->cu_mask = NULL;
+ }
+
+ props->cu_mask = cu_mask;
+ props->cu_mask_count = count;
+ props->is_user_cu_masked = (cu_mask != NULL);
+ }
+
+ /* Parse HQD priority and other compute properties */
+ props->queue_percentage = compute_mqd->queue_percentage;
+ props->pm4_target_xcc = compute_mqd->pm4_target_xcc;
+ props->hqd_queue_priority = compute_mqd->hqd_queue_priority;
+ props->hqd_pipe_priority =
amdgpu_userq_pipe_priority_map[compute_mqd->hqd_queue_priority];
+ props->eop_gpu_addr = compute_mqd->eop_va;
+
+cleanup:
+ return ret;
+}
+
static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue,
struct drm_amdgpu_userq_in *args_in)
{
@@ -326,9 +444,10 @@ static int mes_userq_mqd_create(struct
amdgpu_usermode_queue *queue,
if (r)
goto free_mqd;
- userq_props->eop_gpu_addr = compute_mqd->eop_va;
- userq_props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL;
- userq_props->hqd_queue_priority =
AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM;
+ r = amdgpu_userq_set_compute_mqd(queue, compute_mqd);
+ if (r)
+ goto free_mqd;
+
userq_props->hqd_active = false;
userq_props->tmz_queue =
mqd_user->flags &
AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE;
@@ -432,11 +551,52 @@ static int mes_userq_mqd_create(struct
amdgpu_usermode_queue *queue,
return r;
}
+static int mes_userq_mqd_update(struct amdgpu_usermode_queue *queue, struct
drm_amdgpu_userq_in *args_in)
+{
+ int retval = 0;
+ struct amdgpu_device *adev = queue->userq_mgr->adev;
+ struct amdgpu_mqd_prop *userq_props = queue->userq_prop;
+ struct amdgpu_mqd *mqd_hw_default = &adev->mqds[queue->queue_type];
+ struct drm_amdgpu_userq_mqd_compute_gfx11 *compute_mqd_v11;
+ struct drm_amdgpu_userq_in *mqd_user = args_in;
+
+ if (!queue || !userq_props)
+ return -EINVAL;
+
+ if (queue->queue_type != AMDGPU_HW_IP_COMPUTE)
+ return -EINVAL;
+
+ if (arg_in->mqd_size != sizeof(*compute_mqd_v11)) {
+ DRM_ERROR("Invalid compute IP MQD size\n");
+ return -EINVAL;
+ }
+
+ compute_mqd_v11 = memdup_user(u64_to_user_ptr(args_in->mqd),
args_in->mqd_size);
+ if (IS_ERR(compute_mqd_v11)) {
+ DRM_ERROR("Failed to read user MQD\n");
+ return -ENOMEM;
+ }
+
+ retval = amdgpu_userq_set_compute_mqd(queue, compute_mqd_v11);
+ if (retval)
+ goto free;
+
+ userq_props->queue_size = args_in->queue_size;
+ userq_props->hqd_base_gpu_addr = args_in->queue_va;
+
+ retval = mqd_hw_default->init_mqd(adev, (void *)queue->mqd.cpu_ptr,
userq_props);
+
+free:
+ kfree(compute_mqd_v11);
+ return retval;
+}
+
static void mes_userq_mqd_destroy(struct amdgpu_usermode_queue *queue)
{
struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
amdgpu_userq_destroy_object(uq_mgr, &queue->fw_obj);
+ kfree(queue->userq_prop->cu_mask);
kfree(queue->userq_prop);
amdgpu_userq_destroy_object(uq_mgr, &queue->mqd);
}
@@ -513,6 +673,7 @@ static int mes_userq_restore(struct amdgpu_usermode_queue
*queue)
const struct amdgpu_userq_funcs userq_mes_funcs = {
.mqd_create = mes_userq_mqd_create,
+ .mqd_update = mes_userq_mqd_update,
.mqd_destroy = mes_userq_mqd_destroy,
.unmap = mes_userq_unmap,
.map = mes_userq_map,
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index ab2bf47553e1..aa9b31578c6b 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -330,6 +330,7 @@ union drm_amdgpu_ctx {
/* user queue IOCTL operations */
#define AMDGPU_USERQ_OP_CREATE 1
#define AMDGPU_USERQ_OP_FREE 2
+#define AMDGPU_USERQ_OP_MODIFY 3
/* queue priority levels */
/* low < normal low < normal high < high */
@@ -341,6 +342,7 @@ union drm_amdgpu_ctx {
#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH 3 /* admin only */
/* for queues that need access to protected content */
#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE (1 << 2)
+#define AMDGPU_USERQ_MAX_QUEUE_PERCENTAGE 100
/*
* This structure is a container to pass input configuration
@@ -460,6 +462,33 @@ struct drm_amdgpu_userq_mqd_compute_gfx11 {
* to get the size.
*/
__u64 eop_va;
+ /**
+ * @cu_mask_ptr: User-space pointer to CU (Compute Unit) mask array
+ * Points to an array of __u32 values that define which CUs are enabled
+ * for this queue (0 = disabled, 1 = enabled per bit)
+ */
+ __u64 cu_mask_ptr;
+ /**
+ * @cu_mask_count: Number of entries in the CU mask array
+ * Total count of __u32 elements in the cu_mask_ptr array (each element
+ * represents 32 CUs/WGPs)
+ */
+ __u32 cu_mask_count;
+ /**
+ * @queue_percentage: Queue resource allocation percentage (0-100)
+ * Defines the percentage of GPU resources allocated to this queue
+ */
+ __u32 queue_percentage;
+ /**
+ * @hqd_queue_priority: Hqd Queue priority (0-15)
+ * Higher values indicate higher scheduling priority for the queue
+ */
+ __u32 hqd_queue_priority;
+ /**
+ * @pm4_target_xcc: PM4 target XCC identifier (for gfx9/gfx12.1)
+ * Specifies the target XCC (Cross Compute Complex) for PM4 commands
+ */
+ __u32 pm4_target_xcc;
};
/* userq signal/wait ioctl */
--
2.49.0