Add poison mode check error condition for umc v12_0.

Signed-off-by: YiPeng Chai <yipeng.c...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/umc_v12_0.c        | 20 ++++++++++++++-----
 drivers/gpu/drm/amd/amdgpu/umc_v12_0.h        |  4 ++--
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c  |  4 ++--
 3 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c 
b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
index 8d60c39ae1c5..8430888760ba 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
@@ -88,16 +88,26 @@ static void umc_v12_0_reset_error_count(struct 
amdgpu_device *adev)
                umc_v12_0_reset_error_count_per_channel, NULL);
 }
 
-bool umc_v12_0_is_uncorrectable_error(uint64_t mc_umc_status)
+bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t 
mc_umc_status)
 {
+       if (amdgpu_ras_is_poison_mode_supported(adev) &&
+           (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 
1) &&
+           (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, 
Deferred) == 1))
+               return true;
+
        return ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) 
== 1) &&
                (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) 
== 1 ||
                REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) 
== 1 ||
                REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) 
== 1));
 }
 
-bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status)
+bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t 
mc_umc_status)
 {
+       if (amdgpu_ras_is_poison_mode_supported(adev) &&
+           (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 
1) &&
+           (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, 
Deferred) == 1))
+               return false;
+
        return (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) 
== 1 &&
                (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, 
CECC) == 1 ||
                (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, 
UECC) == 1 &&
@@ -105,7 +115,7 @@ bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status)
                /* Identify data parity error in replay mode */
                ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, 
ErrorCodeExt) == 0x5 ||
                REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, 
ErrorCodeExt) == 0xb) &&
-               !(umc_v12_0_is_uncorrectable_error(mc_umc_status)))));
+               !(umc_v12_0_is_uncorrectable_error(adev, mc_umc_status)))));
 }
 
 static void umc_v12_0_query_correctable_error_count(struct amdgpu_device *adev,
@@ -124,7 +134,7 @@ static void umc_v12_0_query_correctable_error_count(struct 
amdgpu_device *adev,
        mc_umc_status =
                RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4);
 
-       if (umc_v12_0_is_correctable_error(mc_umc_status))
+       if (umc_v12_0_is_correctable_error(adev, mc_umc_status))
                *error_count += 1;
 }
 
@@ -142,7 +152,7 @@ static void 
umc_v12_0_query_uncorrectable_error_count(struct amdgpu_device *adev
        mc_umc_status =
                RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4);
 
-       if (umc_v12_0_is_uncorrectable_error(mc_umc_status))
+       if (umc_v12_0_is_uncorrectable_error(adev, mc_umc_status))
                *error_count += 1;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h 
b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
index b34b1e358f8b..17b4b52d6f13 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
@@ -117,8 +117,8 @@
                (pa) |= (UMC_V12_0_CHANNEL_HASH_CH6(channel_idx, pa) << 
UMC_V12_0_PA_CH6_BIT); \
        } while (0)
 
-bool umc_v12_0_is_uncorrectable_error(uint64_t mc_umc_status);
-bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status);
+bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t 
mc_umc_status);
+bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t 
mc_umc_status);
 
 extern const uint32_t
        umc_v12_0_channel_idx_tbl[]
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index ddd782fbee7a..bb3e953bfed5 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -2524,9 +2524,9 @@ static int mca_umc_mca_get_err_count(const struct 
mca_ras_info *mca_ras, struct
                return 0;
        }
 
-       if (type == AMDGPU_MCA_ERROR_TYPE_UE && 
umc_v12_0_is_uncorrectable_error(status0))
+       if (type == AMDGPU_MCA_ERROR_TYPE_UE && 
umc_v12_0_is_uncorrectable_error(adev, status0))
                *count = 1;
-       else if (type == AMDGPU_MCA_ERROR_TYPE_CE && 
umc_v12_0_is_correctable_error(status0))
+       else if (type == AMDGPU_MCA_ERROR_TYPE_CE && 
umc_v12_0_is_correctable_error(adev, status0))
                *count = 1;
 
        return 0;
-- 
2.34.1

Reply via email to