From: Ben Goz <ben....@amd.com> This patch adds support for SDMA user-mode queues to the QCM - the Queue management system that manages queues-per-device and queues-per-process.
Signed-off-by: Ben Goz <ben.goz at amd.com> --- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 167 +++++++++++++++++++-- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 5 + .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 2 +- 3 files changed, 164 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 7b6df51..55ee2da 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -46,9 +46,24 @@ static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd); + static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock); static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock); +static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, + struct queue *q, + struct qcm_process_device *qpd); + +static void deallocate_sdma_queue(struct device_queue_manager *dqm, + unsigned int sdma_queue_id); + +static inline +enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) +{ + if (type == KFD_QUEUE_TYPE_SDMA) + return KFD_MQD_TYPE_CIK_SDMA; + return KFD_MQD_TYPE_CIK_CP; +} static inline unsigned int get_pipes_num(struct device_queue_manager *dqm) { @@ -189,7 +204,10 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, *allocated_vmid = qpd->vmid; q->properties.vmid = qpd->vmid; - retval = create_compute_queue_nocpsch(dqm, q, qpd); + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) + retval = create_compute_queue_nocpsch(dqm, q, qpd); + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + retval = create_sdma_queue_nocpsch(dqm, q, qpd); if (retval != 0) { if (list_empty(&qpd->queues_list)) { @@ -202,7 +220,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, list_add(&q->list, &qpd->queues_list); dqm->queue_count++; - + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + dqm->sdma_queue_count++; mutex_unlock(&dqm->lock); return 0; } @@ -279,8 +298,7 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, struct queue *q) { int retval; - struct mqd_manager *mqd; - + struct mqd_manager *mqd, *mqd_sdma; BUG_ON(!dqm || !q || !q->mqd || !qpd); retval = 0; @@ -294,6 +312,12 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, goto out; } + mqd_sdma = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_SDMA); + if (mqd_sdma == NULL) { + mutex_unlock(&dqm->lock); + return -ENOMEM; + } + retval = mqd->destroy_mqd(mqd, q->mqd, KFD_PREEMPT_TYPE_WAVEFRONT, QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS, @@ -302,7 +326,12 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, if (retval != 0) goto out; - deallocate_hqd(dqm, q); + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) + deallocate_hqd(dqm, q); + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { + dqm->sdma_queue_count--; + deallocate_sdma_queue(dqm, q->sdma_id); + } mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); @@ -324,7 +353,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) BUG_ON(!dqm || !q || !q->mqd); mutex_lock(&dqm->lock); - mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE); + mqd = dqm->get_mqd_manager(dqm, q->properties.type); if (mqd == NULL) { mutex_unlock(&dqm->lock); return -ENOMEM; @@ -536,6 +565,11 @@ static int init_pipelines(struct device_queue_manager *dqm, } +static int init_sdma_engines(struct device_queue_manager *dqm) +{ + return kfd2kgd->init_sdma_engines(dqm->dev->kgd); +} + static int init_scheduler(struct device_queue_manager *dqm) { int retval; @@ -549,7 +583,10 @@ static int init_scheduler(struct device_queue_manager *dqm) return retval; retval = init_memory(dqm); + if (retval != 0) + return retval; + retval = init_sdma_engines(dqm); return retval; } @@ -565,6 +602,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) mutex_init(&dqm->lock); INIT_LIST_HEAD(&dqm->queues); dqm->queue_count = dqm->next_pipe_to_allocate = 0; + dqm->sdma_queue_count = 0; dqm->allocated_queues = kcalloc(get_pipes_num(dqm), sizeof(unsigned int), GFP_KERNEL); if (!dqm->allocated_queues) { @@ -576,6 +614,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) dqm->allocated_queues[i] = (1 << QUEUES_PER_PIPE) - 1; dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1; + dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1; init_scheduler(dqm); return 0; @@ -607,6 +646,77 @@ static int stop_nocpsch(struct device_queue_manager *dqm) return 0; } +static int allocate_sdma_queue(struct device_queue_manager *dqm, + unsigned int *sdma_queue_id) +{ + int bit; + + if (dqm->sdma_bitmap == 0) + return -ENOMEM; + + bit = find_first_bit((unsigned long *)&dqm->sdma_bitmap, + CIK_SDMA_QUEUES); + + clear_bit(bit, (unsigned long *)&dqm->sdma_bitmap); + *sdma_queue_id = bit; + + return 0; +} + +static void deallocate_sdma_queue(struct device_queue_manager *dqm, + unsigned int sdma_queue_id) +{ + if (sdma_queue_id < 0 || sdma_queue_id >= CIK_SDMA_QUEUES) + return; + set_bit(sdma_queue_id, (unsigned long *)&dqm->sdma_bitmap); +} + +static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, + struct qcm_process_device *qpd) +{ + uint32_t value = ATC; + + if (q->process->is_32bit_user_mode) + value |= VA_PTR32 | get_sh_mem_bases_32(qpd_to_pdd(qpd)); + else + value |= VA_SHARED_BASE(get_sh_mem_bases_nybble_64( + qpd_to_pdd(qpd))); + q->properties.sdma_vm_addr = value; +} + +static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, + struct queue *q, + struct qcm_process_device *qpd) +{ + struct mqd_manager *mqd; + int retval; + + mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_SDMA); + if (!mqd) + return -ENOMEM; + + retval = allocate_sdma_queue(dqm, &q->sdma_id); + if (retval != 0) + return retval; + + q->properties.sdma_queue_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE; + q->properties.sdma_engine_id = q->sdma_id / CIK_SDMA_ENGINE_NUM; + + pr_debug("kfd: sdma id is: %d\n", q->sdma_id); + pr_debug(" sdma queue id: %d\n", q->properties.sdma_queue_id); + pr_debug(" sdma engine id: %d\n", q->properties.sdma_engine_id); + + retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, + &q->gart_mqd_addr, &q->properties); + if (retval != 0) { + deallocate_sdma_queue(dqm, q->sdma_id); + return retval; + } + + init_sdma_vm(dqm, q, qpd); + return 0; +} + /* * Device Queue Manager implementation for cp scheduler */ @@ -648,6 +758,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm) mutex_init(&dqm->lock); INIT_LIST_HEAD(&dqm->queues); dqm->queue_count = dqm->processes_count = 0; + dqm->sdma_queue_count = 0; dqm->active_runlist = false; retval = init_pipelines(dqm, get_pipes_num(dqm), 0); if (retval != 0) @@ -692,6 +803,8 @@ static int start_cpsch(struct device_queue_manager *dqm) dqm->fence_addr = dqm->fence_mem->cpu_ptr; dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; + init_sdma_engines(dqm); + list_for_each_entry(node, &dqm->queues, list) if (node->qpd->pqm->process && dqm->dev) kfd_bind_process_to_device(dqm->dev, @@ -762,6 +875,14 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, mutex_unlock(&dqm->lock); } +static void select_sdma_engine_id(struct queue *q) +{ + static int sdma_id; + + q->sdma_id = sdma_id; + sdma_id = (sdma_id + 1) % 2; +} + static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd, int *allocate_vmid) { @@ -777,7 +898,12 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, mutex_lock(&dqm->lock); - mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP); + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + select_sdma_engine_id(q); + + mqd = dqm->get_mqd_manager(dqm, + get_mqd_type_from_queue_type(q->properties.type)); + if (mqd == NULL) { mutex_unlock(&dqm->lock); return -ENOMEM; @@ -794,6 +920,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, retval = execute_queues_cpsch(dqm, false); } + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + dqm->sdma_queue_count++; + out: mutex_unlock(&dqm->lock); return retval; @@ -817,6 +946,14 @@ static int fence_wait_timeout(unsigned int *fence_addr, return 0; } +static int destroy_sdma_queues(struct device_queue_manager *dqm, + unsigned int sdma_engine) +{ + return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA, + KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, + sdma_engine); +} + static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock) { int retval; @@ -829,6 +966,15 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock) mutex_lock(&dqm->lock); if (dqm->active_runlist == false) goto out; + + pr_debug("kfd: Before destroying queues, sdma queue count is : %u\n", + dqm->sdma_queue_count); + + if (dqm->sdma_queue_count > 0) { + destroy_sdma_queues(dqm, 0); + destroy_sdma_queues(dqm, 1); + } + retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, 0); if (retval != 0) @@ -900,13 +1046,16 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, /* remove queue from list to prevent rescheduling after preemption */ mutex_lock(&dqm->lock); - - mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP); + mqd = dqm->get_mqd_manager(dqm, + get_mqd_type_from_queue_type(q->properties.type)); if (!mqd) { retval = -ENOMEM; goto failed; } + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + dqm->sdma_queue_count--; + list_del(&q->list); dqm->queue_count--; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index c3f189e8..554c06e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -36,6 +36,9 @@ #define KFD_VMID_START_OFFSET (8) #define VMID_PER_DEVICE CIK_VMID_NUM #define KFD_DQM_FIRST_PIPE (0) +#define CIK_SDMA_QUEUES (4) +#define CIK_SDMA_QUEUES_PER_ENGINE (2) +#define CIK_SDMA_ENGINE_NUM (2) struct device_process_node { struct qcm_process_device *qpd; @@ -130,8 +133,10 @@ struct device_queue_manager { struct list_head queues; unsigned int processes_count; unsigned int queue_count; + unsigned int sdma_queue_count; unsigned int next_pipe_to_allocate; unsigned int *allocated_queues; + unsigned int sdma_bitmap; unsigned int vmid_bitmap; uint64_t pipelines_addr; struct kfd_mem_obj *pipeline_mem; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index d12f9d3..948b1ca 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -128,7 +128,6 @@ static int create_cp_queue(struct process_queue_manager *pqm, /* let DQM handle it*/ q_properties->vmid = 0; q_properties->queue_id = qid; - q_properties->type = KFD_QUEUE_TYPE_COMPUTE; retval = init_queue(q, *q_properties); if (retval != 0) @@ -189,6 +188,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, } switch (type) { + case KFD_QUEUE_TYPE_SDMA: case KFD_QUEUE_TYPE_COMPUTE: /* check if there is over subscription */ if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && -- 1.9.1