The patch is enabling mode-1 reset for RAS recovery in fatal error mode.

Signed-off-by: YiPeng Chai <yipeng.c...@amd.com>
Reviewed-by: Hawking Zhang <hawking.zh...@amd.com>
Reviewed-by: Tao Zhou <tao.zh...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c    | 7 ++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5b9f992e4607..ac824eb93285 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4582,6 +4582,10 @@ bool amdgpu_device_should_recover_gpu(struct 
amdgpu_device *adev)
        if (amdgpu_gpu_recovery == 0)
                goto disabled;
 
+       /* Skip soft reset check in fatal error mode */
+       if (!amdgpu_ras_is_poison_mode_supported(adev))
+               return true;
+
        if (!amdgpu_device_ip_check_soft_reset(adev)) {
                dev_info(adev->dev,"Timeout, but no hardware hang detected.\n");
                return false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 693bce07eb46..8fca3cc273e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1948,7 +1948,12 @@ static void amdgpu_ras_do_recovery(struct work_struct 
*work)
 
                reset_context.method = AMD_RESET_METHOD_NONE;
                reset_context.reset_req_dev = adev;
-               clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
+               /* Perform full reset in fatal error mode */
+               if (!amdgpu_ras_is_poison_mode_supported(ras->adev))
+                       set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+               else
+                       clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
 
                amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context);
        }
-- 
2.25.1

Reply via email to