Use task barrier in XGMI hive to synchronize ASIC resets
across devices in XGMI hive.

v2: Retrun right away with a warning if no xgmi hive, update doc.
Signed-off-by: Andrey Grodzovsky <andrey.grodzov...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 37 +++++++++++++++++++++++++-----
 1 file changed, 31 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1d19edfa..2ae944c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -67,6 +67,7 @@
 #include "amdgpu_tmz.h"
 
 #include <linux/suspend.h>
+#include <drm/task_barrier.h>
 
 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
@@ -2663,14 +2664,38 @@ static void amdgpu_device_xgmi_reset_func(struct 
work_struct *__work)
 {
        struct amdgpu_device *adev =
                container_of(__work, struct amdgpu_device, xgmi_reset_work);
+       struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
 
-       if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)
-               adev->asic_reset_res = (adev->in_baco == false) ?
-                               amdgpu_device_baco_enter(adev->ddev) :
-                               qamdgpu_device_baco_exit(adev->ddev);
-       else
-               adev->asic_reset_res = amdgpu_asic_reset(adev);
+       /* It's a bug to not have a hive within this function */
+       if (WARN_ON(!hive))
+               return;
+
+       /*
+        * Use task barrier to synchronize all xgmi reset works across the
+        * hive. task_barrier_enter and task_barrier_exit will block
+        * until all the threads running the xgmi reset works reach
+        * those points. task_barrier_full will do both blocks.
+        */
+       if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
+
+               task_barrier_enter(&hive->tb);
+               adev->asic_reset_res = amdgpu_device_baco_enter(adev->ddev);
+
+               if (adev->asic_reset_res)
+                       goto fail;
+
+               task_barrier_exit(&hive->tb);
+               adev->asic_reset_res = amdgpu_device_baco_exit(adev->ddev);
+
+               if (adev->asic_reset_res)
+                       goto fail;
+       } else {
+
+               task_barrier_full(&hive->tb);
+               adev->asic_reset_res =  amdgpu_asic_reset(adev);
+       }
 
+fail:
        if (adev->asic_reset_res)
                DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
                         adev->asic_reset_res, adev->ddev->unique);
-- 
2.7.4

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

Reply via email to