Add interface to check mca umc status.

Signed-off-by: YiPeng Chai <yipeng.c...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c       | 12 ++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h       |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h       |  4 +++-
 drivers/gpu/drm/amd/amdgpu/umc_v12_0.c        | 20 +++++++++++++++++++
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c  |  6 +++---
 5 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
index fde20857b3dd..65ed8bb5c120 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
@@ -27,6 +27,16 @@
 #include "umc/umc_6_7_0_offset.h"
 #include "umc/umc_6_7_0_sh_mask.h"
 
+static bool amdgpu_mca_is_deferred_error(struct amdgpu_device *adev,
+                                       uint64_t mc_status)
+{
+       if (adev->umc.ras->check_ecc_err_status)
+               return adev->umc.ras->check_ecc_err_status(adev,
+                               AMDGPU_MCA_ERROR_TYPE_DE, &mc_status);
+
+       return false;
+}
+
 void amdgpu_mca_query_correctable_error_count(struct amdgpu_device *adev,
                                              uint64_t mc_status_addr,
                                              unsigned long *error_count)
@@ -257,7 +267,7 @@ int amdgpu_mca_smu_log_ras_error(struct amdgpu_device 
*adev, enum amdgpu_ras_blo
                        amdgpu_ras_error_statistic_ue_count(err_data,
                                &mcm_info, &err_addr, (uint64_t)count);
                else {
-                       if 
(!!(MCA_REG__STATUS__DEFERRED(entry->regs[MCA_REG_IDX_STATUS])))
+                       if (amdgpu_mca_is_deferred_error(adev, 
entry->regs[MCA_REG_IDX_STATUS]))
                                amdgpu_ras_error_statistic_de_count(err_data,
                                        &mcm_info, &err_addr, (uint64_t)count);
                        else
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
index b399f1b62887..b964110ed1e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
@@ -65,6 +65,7 @@ enum amdgpu_mca_ip {
 enum amdgpu_mca_error_type {
        AMDGPU_MCA_ERROR_TYPE_UE = 0,
        AMDGPU_MCA_ERROR_TYPE_CE,
+       AMDGPU_MCA_ERROR_TYPE_DE,
 };
 
 struct amdgpu_mca_ras_block {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index de2dc1853636..83199296ed10 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -21,7 +21,7 @@
 #ifndef __AMDGPU_UMC_H__
 #define __AMDGPU_UMC_H__
 #include "amdgpu_ras.h"
-
+#include "amdgpu_mca.h"
 /*
  * (addr / 256) * 4096, the higher 26 bits in ErrorAddr
  * is the index of 4KB block
@@ -64,6 +64,8 @@ struct amdgpu_umc_ras {
                                      void *ras_error_status);
        void (*ecc_info_query_ras_error_address)(struct amdgpu_device *adev,
                                        void *ras_error_status);
+       bool (*check_ecc_err_status)(struct amdgpu_device *adev,
+                       enum amdgpu_mca_error_type type, void 
*ras_error_status);
        /* support different eeprom table version for different asic */
        void (*set_eeprom_table_version)(struct amdgpu_ras_eeprom_table_header 
*hdr);
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c 
b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
index fa2168f1d3bf..1e8e97d72f1e 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
@@ -425,6 +425,25 @@ static void 
umc_v12_0_ecc_info_query_ras_error_address(struct amdgpu_device *ade
        }
 }
 
+static bool umc_v12_0_check_ecc_err_status(struct amdgpu_device *adev,
+                       enum amdgpu_mca_error_type type, void *ras_error_status)
+{
+       uint64_t mc_umc_status = *(uint64_t *)ras_error_status;
+
+       switch (type) {
+       case AMDGPU_MCA_ERROR_TYPE_UE:
+               return umc_v12_0_is_uncorrectable_error(adev, mc_umc_status);
+       case AMDGPU_MCA_ERROR_TYPE_CE:
+               return umc_v12_0_is_correctable_error(adev, mc_umc_status);
+       case AMDGPU_MCA_ERROR_TYPE_DE:
+               return umc_v12_0_is_deferred_error(adev, mc_umc_status);
+       default:
+               return false;
+       }
+
+       return false;
+}
+
 static void umc_v12_0_err_cnt_init(struct amdgpu_device *adev)
 {
        amdgpu_umc_loop_channels(adev,
@@ -510,5 +529,6 @@ struct amdgpu_umc_ras umc_v12_0_ras = {
        .query_ras_poison_mode = umc_v12_0_query_ras_poison_mode,
        .ecc_info_query_ras_error_count = 
umc_v12_0_ecc_info_query_ras_error_count,
        .ecc_info_query_ras_error_address = 
umc_v12_0_ecc_info_query_ras_error_address,
+       .check_ecc_err_status = umc_v12_0_check_ecc_err_status,
 };
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 952a983da49a..67fc01e0f9c6 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -2557,9 +2557,9 @@ static int mca_umc_mca_get_err_count(const struct 
mca_ras_info *mca_ras, struct
                return 0;
        }
 
-       if ((type == AMDGPU_MCA_ERROR_TYPE_UE && 
umc_v12_0_is_uncorrectable_error(adev, status0)) ||
-           (type == AMDGPU_MCA_ERROR_TYPE_CE && 
(umc_v12_0_is_correctable_error(adev, status0) ||
-            umc_v12_0_is_deferred_error(adev, status0))))
+       if (umc_v12_0_is_deferred_error(adev, status0) ||
+           umc_v12_0_is_uncorrectable_error(adev, status0) ||
+           umc_v12_0_is_correctable_error(adev, status0))
                *count = 1;
 
        return 0;
-- 
2.34.1

Reply via email to