Define page retirement functions for MCA platform.

v2: remove page retirement handling from MCA poison handler,
    let MCA notifier do page retirement.

v3: remove specific poison handler for MCA to simplify code.

Signed-off-by: Tao Zhou <tao.zh...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 53 +++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h |  2 +
 2 files changed, 55 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index aad3c8b4c810..3c83129f4090 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -22,6 +22,59 @@
  */
 
 #include "amdgpu.h"
+#include "umc_v6_7.h"
+
+static int amdgpu_umc_convert_error_address(struct amdgpu_device *adev,
+                                   struct ras_err_data *err_data, uint64_t 
err_addr,
+                                   uint32_t ch_inst, uint32_t umc_inst)
+{
+       switch (adev->ip_versions[UMC_HWIP][0]) {
+       case IP_VERSION(6, 7, 0):
+               umc_v6_7_convert_error_address(adev,
+                               err_data, err_addr, ch_inst, umc_inst);
+               break;
+       default:
+               dev_warn(adev->dev,
+                        "UMC address to Physical address translation is not 
supported\n");
+               return AMDGPU_RAS_FAIL;
+       }
+
+       return AMDGPU_RAS_SUCCESS;
+}
+
+int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev,
+                       uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst)
+{
+       struct ras_err_data err_data = {0, 0, 0, NULL};
+       int ret = AMDGPU_RAS_FAIL;
+
+       err_data.err_addr =
+               kcalloc(adev->umc.max_ras_err_cnt_per_query,
+                       sizeof(struct eeprom_table_record), GFP_KERNEL);
+       if (!err_data.err_addr) {
+               dev_warn(adev->dev,
+                       "Failed to alloc memory for umc error record in MCA 
notifier!\n");
+               return AMDGPU_RAS_FAIL;
+       }
+
+       /*
+        * Translate UMC channel address to Physical address
+        */
+       ret = amdgpu_umc_convert_error_address(adev, &err_data, err_addr,
+                                       ch_inst, umc_inst);
+       if (ret)
+               goto out;
+
+       if (amdgpu_bad_page_threshold != 0) {
+               amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
+                                               err_data.err_addr_cnt);
+               amdgpu_ras_save_bad_pages(adev);
+       }
+
+out:
+       kfree(err_data.err_addr);
+       return ret;
+}
 
 static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
                void *ras_error_status,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index 3629d8f292ef..659a10de29c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -98,4 +98,6 @@ void amdgpu_umc_fill_error_record(struct ras_err_data 
*err_data,
 int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
                void *ras_error_status,
                struct amdgpu_iv_entry *entry);
+int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev,
+                       uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst);
 #endif
-- 
2.35.1

Reply via email to