Add AMDGPU_USERQ_UPDATE_QUEUE IOCTL to support updating user mode queue properties after creation. This allows userspace to modify queue attributes like ring buffer address, ring size, queue priority, and queue percentage dynamically.
Signed-off-by: Jesse Zhang <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 106 +++++++++++++++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 6 ++ drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 1 + include/uapi/drm/amdgpu_drm.h | 13 +++ 6 files changed, 122 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 246d74205b48..1679075f679b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -813,6 +813,9 @@ struct amdgpu_mqd_prop { uint32_t cu_mask_count; uint32_t cu_flags; bool is_user_cu_masked; + uint32_t queue_percentage; + /* used in gfx9 and gfx12.1 */ + uint32_t pm4_target_xcc; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 9c425169a4f9..bc6cc1517221 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -3075,6 +3075,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_UPDATE_QUEUE, amdgpu_update_queue_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SET_CU_MASK, amdgpu_userq_set_cu_mask_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL, amdgpu_userq_signal_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 4cbf75723c08..922f73b92db3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -34,6 +34,26 @@ #include "amdgpu_hmm.h" #include "amdgpu_userq_fence.h" +/* Mapping queue priority to pipe priority, indexed by queue priority */ +int amdgpu_userq_pipe_priority_map[] = { + AMDGPU_RING_PRIO_0, + AMDGPU_RING_PRIO_0, + AMDGPU_RING_PRIO_0, + AMDGPU_RING_PRIO_0, + AMDGPU_RING_PRIO_0, + AMDGPU_RING_PRIO_0, + AMDGPU_RING_PRIO_0, + AMDGPU_RING_PRIO_1, + AMDGPU_RING_PRIO_1, + AMDGPU_RING_PRIO_1, + AMDGPU_RING_PRIO_1, + AMDGPU_RING_PRIO_2, + AMDGPU_RING_PRIO_2, + AMDGPU_RING_PRIO_2, + AMDGPU_RING_PRIO_2, + AMDGPU_RING_PRIO_2 +}; + u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev) { int i; @@ -907,7 +927,6 @@ static int amdgpu_userq_update_queue(struct amdgpu_usermode_queue *queue, struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr; struct amdgpu_device *adev = uq_mgr->adev; const struct amdgpu_userq_funcs *uq_funcs; - bool unmap_queue = false; int r; uq_funcs = adev->userq_funcs[queue->queue_type]; @@ -923,23 +942,94 @@ static int amdgpu_userq_update_queue(struct amdgpu_usermode_queue *queue, r = amdgpu_userq_unmap_helper(queue); if (r) return r; - unmap_queue = true; } r = uq_funcs->mqd_update(queue, minfo); + if (r) + return r; - if (unmap_queue) { - int map_r = amdgpu_userq_map_helper(queue); - if (map_r) - dev_err(adev->dev, "Failed to remap queue %llu after update\n", + /* + * If the queue is considered active (has valid size, address, and percentage), + * we attempt to map it. This effectively starts the queue or restarts it + * if it was previously running. + */ + if (AMDGPU_USERQ_IS_ACTIVE(queue)) { + r = amdgpu_userq_map_helper(queue); + if (r) + drm_file_err(uq_mgr->file, "Failed to remap queue %llu after update\n", queue->doorbell_index); - if (!r) - r = map_r; } return r; } +int amdgpu_update_queue_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr; + struct amdgpu_userq_update_queue_args *args = data; + struct amdgpu_usermode_queue *queue; + struct amdgpu_mqd_prop *props; + int r; + + /* + * Repurpose queue percentage to accommodate new features: + * bit 0-7: queue percentage + * bit 8-15: pm4_target_xcc + */ + if ((args->queue_percentage & 0xFF) > AMDGPU_USERQ_MAX_QUEUE_PERCENTAGE) { + drm_file_err(uq_mgr->file, "Queue percentage must be between 0 to AMDGPU_USERQ_MAX_QUEUE_PERCENTAGE\n"); + return -EINVAL; + } + + /* Validate priority */ + if (args->queue_priority > AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM) { + drm_file_err(uq_mgr->file, "Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); + return -EINVAL; + } + + /* Validate ring size */ + if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) { + drm_file_err(uq_mgr->file, "Ring size must be a power of 2 or 0\n"); + return -EINVAL; + } + + if (args->ring_size > 0 && args->ring_size < AMDGPU_GPU_PAGE_SIZE) { + args->ring_size = AMDGPU_GPU_PAGE_SIZE; + drm_file_err(uq_mgr->file, "Size clamped to AMDGPU_GPU_PAGE_SIZE\n"); + } + + if ((args->ring_base_address) && + (!access_ok((const void __user *) args->ring_base_address, + sizeof(uint64_t)))) { + drm_file_err(uq_mgr->file, "Can't access ring base address\n"); + return -EFAULT; + } + + mutex_lock(&uq_mgr->userq_mutex); + queue = amdgpu_userq_find(uq_mgr, args->queue_id); + if (!queue) { + mutex_unlock(&uq_mgr->userq_mutex); + return -EINVAL; + } + + props = queue->userq_prop; + props->queue_size = args->ring_size; + props->hqd_base_gpu_addr = args->ring_base_address; + props->queue_percentage = args->queue_percentage & 0xFF; + /* bit 8-15 are repurposed to be PM4 target XCC */ + props->pm4_target_xcc = (args->queue_percentage >> 8) & 0xFF; + props->hqd_pipe_priority = amdgpu_userq_pipe_priority_map[args->queue_priority]; + props->hqd_queue_priority = args->queue_priority; + + r = amdgpu_userq_update_queue(queue, NULL); + + mutex_unlock(&uq_mgr->userq_mutex); + + return r; +} + int amdgpu_userq_set_cu_mask_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h index 43bf104d2fb8..c8ea81c17c6e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h @@ -31,6 +31,9 @@ #define to_ev_fence(f) container_of(f, struct amdgpu_eviction_fence, base) #define uq_mgr_to_fpriv(u) container_of(u, struct amdgpu_fpriv, userq_mgr) #define work_to_uq_mgr(w, name) container_of(w, struct amdgpu_userq_mgr, name) +#define AMDGPU_USERQ_IS_ACTIVE(q) ((q)->userq_prop->queue_size > 0 && \ + (q)->userq_prop->hqd_base_gpu_addr != 0 && \ + (q)->userq_prop->queue_percentage > 0) enum amdgpu_userq_state { AMDGPU_USERQ_STATE_UNMAPPED = 0, @@ -118,6 +121,9 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp int amdgpu_userq_set_cu_mask_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int amdgpu_update_queue_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); + int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv, struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index ae221eaa5b82..cfa3cb46a983 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -304,6 +304,7 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue, userq_props->use_doorbell = true; userq_props->doorbell_index = queue->doorbell_index; userq_props->fence_address = queue->fence_drv->gpu_addr; + userq_props->queue_percentage = 100; if (queue->queue_type == AMDGPU_HW_IP_COMPUTE) { struct drm_amdgpu_userq_mqd_compute_gfx11 *compute_mqd; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 41b6b3cea834..c42328453652 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -59,6 +59,7 @@ extern "C" { #define DRM_AMDGPU_USERQ_WAIT 0x18 #define DRM_AMDGPU_GEM_LIST_HANDLES 0x19 #define DRM_AMDGPU_USERQ_SET_CU_MASK 0x1a +#define DRM_AMDGPU_USERQ_UPDATE_QUEUE 0x1b #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create) #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap) @@ -81,6 +82,7 @@ extern "C" { #define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait) #define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_LIST_HANDLES, struct drm_amdgpu_gem_list_handles) #define DRM_IOCTL_AMDGPU_USERQ_SET_CU_MASK DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SET_CU_MASK, struct amdgpu_userq_set_cu_mask_args) +#define DRM_IOCTL_AMDGPU_USERQ_UPDATE_QUEUE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_UPDATE_QUEUE, struct amdgpu_userq_update_queue_args) /** * DOC: memory domains @@ -343,6 +345,7 @@ union drm_amdgpu_ctx { #define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH 3 /* admin only */ /* for queues that need access to protected content */ #define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE (1 << 2) +#define AMDGPU_USERQ_MAX_QUEUE_PERCENTAGE 100 /* * This structure is a container to pass input configuration @@ -440,6 +443,16 @@ struct amdgpu_userq_set_cu_mask_args { __u64 cu_mask_ptr; }; + +/* IOCTL parameters used to set user queue updates */ +struct amdgpu_userq_update_queue_args { + __u64 ring_base_address; + __u32 queue_id; + __u32 ring_size; + __u32 queue_percentage; + __u32 queue_priority; +}; + /* GFX V11 IP specific MQD parameters */ struct drm_amdgpu_userq_mqd_gfx11 { /** -- 2.49.0
