This is prep work for updating each PSP FW in hive after
GPU reset.
Split into build topology SW state and update each PSP FW in the hive.
Save topology and count of XGMI devices for reuse.

Signed-off-by: Andrey Grodzovsky <andrey.grodzov...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h      |  5 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 55 +++++++++++++++++++-------------
 2 files changed, 38 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 2c80453..3e5bede 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1226,6 +1226,11 @@ long amdgpu_kms_compat_ioctl(struct file *filp, unsigned 
int cmd,
 /*
  * functions used by amdgpu_xgmi.c
  */
+
+struct amdgpu_hive_info;
+
+struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev);
+int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive);
 int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 909216a..23e4e16 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -34,12 +34,14 @@ static DEFINE_MUTEX(xgmi_mutex);
 struct amdgpu_hive_info {
        uint64_t                hive_id;
        struct list_head        device_list;
+       struct psp_xgmi_topology_info   topology_info;
+       int number_devices;
 };
 
 static struct amdgpu_hive_info xgmi_hives[AMDGPU_MAX_XGMI_HIVE];
 static unsigned hive_count = 0;
 
-static struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device 
*adev)
+struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
 {
        int i;
        struct amdgpu_hive_info *tmp;
@@ -61,12 +63,33 @@ static struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct 
amdgpu_device *adev)
        return tmp;
 }
 
+int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct 
amdgpu_device *adev)
+{
+       int ret = -EINVAL;
+
+       /* Each psp need to set the latest topology */
+               ret = psp_xgmi_set_topology_info(&adev->psp,
+                                                hive->number_devices,
+                                                &hive->topology_info);
+               if (ret)
+                       dev_err(adev->dev,
+                               "XGMI: Set topology failure on device %llx, 
hive %llx, ret %d",
+                               adev->gmc.xgmi.node_id,
+                               adev->gmc.xgmi.hive_id, ret);
+               else
+                       dev_info(adev->dev, "XGMI: Add node %d to hive 
0x%llx.\n",
+                                adev->gmc.xgmi.physical_node_id,
+                                adev->gmc.xgmi.hive_id);
+
+       return ret;
+}
+
 int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
 {
-       struct psp_xgmi_topology_info *tmp_topology;
+       struct psp_xgmi_topology_info *hive_topology;
        struct amdgpu_hive_info *hive;
        struct amdgpu_xgmi      *entry;
-       struct amdgpu_device    *tmp_adev;
+       struct amdgpu_device *tmp_adev = NULL;
 
        int count = 0, ret = -EINVAL;
 
@@ -76,21 +99,21 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
        adev->gmc.xgmi.node_id = psp_xgmi_get_node_id(&adev->psp);
        adev->gmc.xgmi.hive_id = psp_xgmi_get_hive_id(&adev->psp);
 
-       tmp_topology = kzalloc(sizeof(struct psp_xgmi_topology_info), 
GFP_KERNEL);
-       if (!tmp_topology)
-               return -ENOMEM;
        mutex_lock(&xgmi_mutex);
        hive = amdgpu_get_xgmi_hive(adev);
        if (!hive)
                goto exit;
 
+       hive_topology = &hive->topology_info;
+
        list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
        list_for_each_entry(entry, &hive->device_list, head)
-               tmp_topology->nodes[count++].node_id = entry->node_id;
+               hive_topology->nodes[count++].node_id = entry->node_id;
+       hive->number_devices = count;
 
        /* Each psp need to get the latest topology */
        list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
-               ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, 
tmp_topology);
+               ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, 
hive_topology);
                if (ret) {
                        dev_err(tmp_adev->dev,
                                "XGMI: Get topology failure on device %llx, 
hive %llx, ret %d",
@@ -101,25 +124,13 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
                }
        }
 
-       /* Each psp need to set the latest topology */
        list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
-               ret = psp_xgmi_set_topology_info(&tmp_adev->psp, count, 
tmp_topology);
-               if (ret) {
-                       dev_err(tmp_adev->dev,
-                               "XGMI: Set topology failure on device %llx, 
hive %llx, ret %d",
-                               tmp_adev->gmc.xgmi.node_id,
-                               tmp_adev->gmc.xgmi.hive_id, ret);
-                       /* To do : continue with some  node failed or disable 
the  whole  hive */
+               ret = amdgpu_xgmi_update_topology(hive, tmp_adev);
+               if (ret)
                        break;
-               }
        }
-       if (!ret)
-               dev_info(adev->dev, "XGMI: Add node %d to hive 0x%llx.\n",
-                       adev->gmc.xgmi.physical_node_id,
-                       adev->gmc.xgmi.hive_id);
 
 exit:
        mutex_unlock(&xgmi_mutex);
-       kfree(tmp_topology);
        return ret;
 }
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to