Re: [PATCH] drm/amdgpu: fix MGPU fan boost enablement for XGMI reset

2019-06-28 Thread Deucher, Alexander
Reviewed-by: Alex Deucher 


From: amd-gfx  on behalf of Evan Quan 

Sent: Thursday, June 27, 2019 11:31 PM
To: amd-gfx@lists.freedesktop.org
Cc: Quan, Evan
Subject: [PATCH] drm/amdgpu: fix MGPU fan boost enablement for XGMI reset

MGPU fan boost feature should not be enabled until all the
devices from the same hive are all back from reset.

Change-Id: I03a69434ff28f4eac209bd91320dde8a238a33cf
Signed-off-by: Evan Quan 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  4 
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 13 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c|  4 ++--
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 7541e1b076b0..9efa0423c242 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1219,6 +1219,10 @@ int amdgpu_dm_display_resume(struct amdgpu_device *adev 
);
 static inline int amdgpu_dm_display_resume(struct amdgpu_device *adev) { 
return 0; }
 #endif

+
+void amdgpu_register_gpu_instance(struct amdgpu_device *adev);
+void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev);
+
 #include "amdgpu_object.h"

 /* used by df_v3_6.c and amdgpu_pmu.c */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index a2d234c07fc4..f39eb7b37c8b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3558,6 +3558,12 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info 
*hive,
 if (vram_lost)
 
amdgpu_device_fill_reset_magic(tmp_adev);

+   /*
+* Add this ASIC as tracked as reset was already
+* complete successfully.
+*/
+   amdgpu_register_gpu_instance(tmp_adev);
+
 r = amdgpu_device_ip_late_init(tmp_adev);
 if (r)
 goto out;
@@ -3692,6 +3698,13 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 device_list_handle = _list;
 }

+   /*
+* Mark these ASICs to be reseted as untracked first
+* And add them back after reset completed
+*/
+   list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head)
+   amdgpu_unregister_gpu_instance(tmp_adev);
+
 /* block all schedulers and reset given job's ring */
 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index ed051fdb509f..e2c9d8d31ed8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -41,7 +41,7 @@
 #include "amdgpu_display.h"
 #include "amdgpu_ras.h"

-static void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
+void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
 {
 struct amdgpu_gpu_instance *gpu_instance;
 int i;
@@ -102,7 +102,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev)
 dev->dev_private = NULL;
 }

-static void amdgpu_register_gpu_instance(struct amdgpu_device *adev)
+void amdgpu_register_gpu_instance(struct amdgpu_device *adev)
 {
 struct amdgpu_gpu_instance *gpu_instance;

--
2.21.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: fix MGPU fan boost enablement for XGMI reset

2019-06-27 Thread Evan Quan
MGPU fan boost feature should not be enabled until all the
devices from the same hive are all back from reset.

Change-Id: I03a69434ff28f4eac209bd91320dde8a238a33cf
Signed-off-by: Evan Quan 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  4 
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 13 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c|  4 ++--
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 7541e1b076b0..9efa0423c242 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1219,6 +1219,10 @@ int amdgpu_dm_display_resume(struct amdgpu_device *adev 
);
 static inline int amdgpu_dm_display_resume(struct amdgpu_device *adev) { 
return 0; }
 #endif
 
+
+void amdgpu_register_gpu_instance(struct amdgpu_device *adev);
+void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev);
+
 #include "amdgpu_object.h"
 
 /* used by df_v3_6.c and amdgpu_pmu.c */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index a2d234c07fc4..f39eb7b37c8b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3558,6 +3558,12 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info 
*hive,
if (vram_lost)

amdgpu_device_fill_reset_magic(tmp_adev);
 
+   /*
+* Add this ASIC as tracked as reset was already
+* complete successfully.
+*/
+   amdgpu_register_gpu_instance(tmp_adev);
+
r = amdgpu_device_ip_late_init(tmp_adev);
if (r)
goto out;
@@ -3692,6 +3698,13 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
device_list_handle = _list;
}
 
+   /*
+* Mark these ASICs to be reseted as untracked first
+* And add them back after reset completed
+*/
+   list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head)
+   amdgpu_unregister_gpu_instance(tmp_adev);
+
/* block all schedulers and reset given job's ring */
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index ed051fdb509f..e2c9d8d31ed8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -41,7 +41,7 @@
 #include "amdgpu_display.h"
 #include "amdgpu_ras.h"
 
-static void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
+void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
 {
struct amdgpu_gpu_instance *gpu_instance;
int i;
@@ -102,7 +102,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev)
dev->dev_private = NULL;
 }
 
-static void amdgpu_register_gpu_instance(struct amdgpu_device *adev)
+void amdgpu_register_gpu_instance(struct amdgpu_device *adev)
 {
struct amdgpu_gpu_instance *gpu_instance;
 
-- 
2.21.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx