Avoid hotspotting of allocations of SDMA engines from the
XGMI pool by making each process attempt to allocate engines
starting from the engine after the last one that was allocated.

Signed-off-by: Joseph Greathouse <joseph.greatho...@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 8 +++++++-
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 2 ++
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h                 | 1 +
 drivers/gpu/drm/amd/amdkfd/kfd_process.c              | 1 +
 4 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 86bdb765f350..7f06ad6bdcd2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1096,9 +1096,12 @@ static int allocate_sdma_queue(struct 
device_queue_manager *dqm,
                        return -ENOMEM;
                }
        } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
+               if (qpd->initial_xgmi_engine == -1)
+                       qpd->initial_xgmi_engine = dqm->next_xgmi_engine;
                num_engines = get_num_xgmi_sdma_engines(dqm);
                for_each_set_bit(engine, &(qpd->xgmi_sdma_engine_bitmap), 
num_engines) {
-                       available_queue_bitmap = sdma_engine_mask(engine, 
num_engines);
+                       available_queue_bitmap = sdma_engine_mask(
+                                       qpd->initial_xgmi_engine + engine, 
num_engines);
                        available_queue_bitmap &= dqm->xgmi_sdma_bitmap;
 
                        if (!available_queue_bitmap)
@@ -1109,6 +1112,9 @@ static int allocate_sdma_queue(struct 
device_queue_manager *dqm,
                        qpd->xgmi_sdma_engine_bitmap &= ~(1ULL << engine);
                        found_sdma = true;
 
+                       dqm->next_xgmi_engine = qpd->initial_xgmi_engine + 
engine + 1;
+                       dqm->next_xgmi_engine %= num_engines;
+
                        bit = __ffs64(available_queue_bitmap);
                        dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
                        q->sdma_id = bit;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index c8719682c4da..b5955e7401e5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -183,6 +183,8 @@ struct device_queue_manager {
        unsigned int            *allocated_queues;
        uint64_t                sdma_bitmap;
        uint64_t                xgmi_sdma_bitmap;
+       /* which XGMI engine the next process should attempt to start on */
+       unsigned int            next_xgmi_engine;
        /* the pasid mapping for each kfd vmid */
        uint16_t                vmid_pasid[VMID_NUM];
        uint64_t                pipelines_addr;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index c38eebc9db4d..bcf56280c7ed 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -578,6 +578,7 @@ struct qcm_process_device {
        unsigned int queue_count;
        unsigned long sdma_engine_bitmap;
        unsigned long xgmi_sdma_engine_bitmap;
+       int initial_xgmi_engine;
        unsigned int vmid;
        bool is_debug;
        unsigned int evicted; /* eviction counter, 0=active */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 13c85624bf7d..24ce1b52a1d5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1449,6 +1449,7 @@ struct kfd_process_device 
*kfd_create_process_device_data(struct kfd_dev *dev,
        pdd->qpd.mapped_gws_queue = false;
        pdd->qpd.sdma_engine_bitmap = BIT_ULL(dev_info->num_sdma_engines) - 1;
        pdd->qpd.xgmi_sdma_engine_bitmap = 
BIT_ULL(dev_info->num_xgmi_sdma_engines) - 1;
+       pdd->qpd.initial_xgmi_engine = -1;
        pdd->process = p;
        pdd->bound = PDD_UNBOUND;
        pdd->already_dequeued = false;
-- 
2.20.1

Reply via email to