add read func for pmfw eeprom, and adapt address converting for bad pages loaded from pmfw eeprom
Signed-off-by: Tao Zhou <[email protected]> Signed-off-by: Gangliang Xie <[email protected]> --- drivers/gpu/drm/amd/ras/rascore/ras.h | 1 + drivers/gpu/drm/amd/ras/rascore/ras_core.c | 5 +- .../gpu/drm/amd/ras/rascore/ras_eeprom_fw.c | 70 +++++++++++++++++++ .../gpu/drm/amd/ras/rascore/ras_eeprom_fw.h | 5 ++ drivers/gpu/drm/amd/ras/rascore/ras_umc.c | 27 +++++-- .../gpu/drm/amd/ras/rascore/ras_umc_v12_0.c | 2 +- 6 files changed, 101 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/ras/rascore/ras.h b/drivers/gpu/drm/amd/ras/rascore/ras.h index ae10d853c565..05c7923e8f0f 100644 --- a/drivers/gpu/drm/amd/ras/rascore/ras.h +++ b/drivers/gpu/drm/amd/ras/rascore/ras.h @@ -241,6 +241,7 @@ struct ras_bank_ecc { uint64_t status; uint64_t ipid; uint64_t addr; + uint64_t ts; }; struct ras_bank_ecc_node { diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_core.c b/drivers/gpu/drm/amd/ras/rascore/ras_core.c index 1f2ce3749d43..fe188a5304d9 100644 --- a/drivers/gpu/drm/amd/ras/rascore/ras_core.c +++ b/drivers/gpu/drm/amd/ras/rascore/ras_core.c @@ -239,7 +239,10 @@ static int ras_core_eeprom_recovery(struct ras_core_context *ras_core) int count; int ret; - count = ras_eeprom_get_record_count(ras_core); + if (ras_fw_eeprom_supported(ras_core)) + count = ras_fw_eeprom_get_record_count(ras_core); + else + count = ras_eeprom_get_record_count(ras_core); if (!count) return 0; diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.c b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.c index f7a6f2368530..69e1aef67ab9 100644 --- a/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.c +++ b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.c @@ -259,3 +259,73 @@ int ras_fw_eeprom_append(struct ras_core_context *ras_core, mutex_unlock(&control->ras_tbl_mutex); return 0; } + +int ras_fw_eeprom_read_idx(struct ras_core_context *ras_core, + struct eeprom_umc_record *record_umc, + struct ras_bank_ecc *ras_ecc, + u32 rec_idx, const u32 num) +{ + struct ras_fw_eeprom_control *control = &ras_core->ras_fw_eeprom; + int i, ret, end_idx; + u64 mca, ipid, ts; + + if (!ras_core->ras_umc.ip_func || + !ras_core->ras_umc.ip_func->mca_ipid_parse) + return -EOPNOTSUPP; + + mutex_lock(&control->ras_tbl_mutex); + + end_idx = rec_idx + num; + for (i = rec_idx; i < end_idx; i++) { + ret = ras_fw_get_badpage_mca_addr(ras_core, i, &mca); + if (ret) + goto Out; + + ret = ras_fw_get_badpage_ipid(ras_core, i, &ipid); + if (ret) + goto Out; + + ret = ras_fw_get_timestamp(ras_core, i, &ts); + if (ret) + goto Out; + + if (record_umc) { + record_umc[i - rec_idx].address = mca; + /* retired_page (pa) is unused now */ + record_umc[i - rec_idx].retired_row_pfn = 0x1ULL; + record_umc[i - rec_idx].ts = ts; + record_umc[i - rec_idx].err_type = RAS_EEPROM_ERR_NON_RECOVERABLE; + + ras_core->ras_umc.ip_func->mca_ipid_parse(ras_core, ipid, + (uint32_t *)&(record_umc[i - rec_idx].cu), + (uint32_t *)&(record_umc[i - rec_idx].mem_channel), + (uint32_t *)&(record_umc[i - rec_idx].mcumc_id), NULL); + + /* update bad channel bitmap */ + if ((record_umc[i].mem_channel < BITS_PER_TYPE(control->bad_channel_bitmap)) && + !(control->bad_channel_bitmap & (1 << record_umc[i].mem_channel))) { + control->bad_channel_bitmap |= 1 << record_umc[i].mem_channel; + control->update_channel_flag = true; + } + } + + if (ras_ecc) { + ras_ecc[i - rec_idx].addr = mca; + ras_ecc[i - rec_idx].ipid = ipid; + ras_ecc[i - rec_idx].ts = ts; + } + + } + +Out: + mutex_unlock(&control->ras_tbl_mutex); + return ret; +} + +uint32_t ras_fw_eeprom_get_record_count(struct ras_core_context *ras_core) +{ + if (!ras_core) + return 0; + + return ras_core->ras_fw_eeprom.ras_num_recs; +} diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.h b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.h index 27507bb38135..7daf903ad5aa 100644 --- a/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.h +++ b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.h @@ -70,5 +70,10 @@ int ras_fw_eeprom_reset_table(struct ras_core_context *ras_core); bool ras_fw_eeprom_check_safety_watermark(struct ras_core_context *ras_core); int ras_fw_eeprom_append(struct ras_core_context *ras_core, struct eeprom_umc_record *record, const u32 num); +int ras_fw_eeprom_read_idx(struct ras_core_context *ras_core, + struct eeprom_umc_record *record_umc, + struct ras_bank_ecc *ras_ecc, + u32 rec_idx, const u32 num); +uint32_t ras_fw_eeprom_get_record_count(struct ras_core_context *ras_core); #endif diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_umc.c b/drivers/gpu/drm/amd/ras/rascore/ras_umc.c index fd427fd59ecf..eb5bb6df18f5 100644 --- a/drivers/gpu/drm/amd/ras/rascore/ras_umc.c +++ b/drivers/gpu/drm/amd/ras/rascore/ras_umc.c @@ -436,17 +436,27 @@ int ras_umc_load_bad_pages(struct ras_core_context *ras_core) uint32_t ras_num_recs; int ret; - ras_num_recs = ras_eeprom_get_record_count(ras_core); - /* no bad page record, skip eeprom access */ - if (!ras_num_recs || - ras_core->ras_eeprom.record_threshold_config == DISABLE_RETIRE_PAGE) - return 0; + if (ras_fw_eeprom_supported(ras_core)) { + ras_num_recs = ras_fw_eeprom_get_record_count(ras_core); + /* no bad page record, skip eeprom access */ + if (!ras_num_recs || + ras_core->ras_fw_eeprom.record_threshold_config == DISABLE_RETIRE_PAGE) + return 0; + } else { + ras_num_recs = ras_eeprom_get_record_count(ras_core); + if (!ras_num_recs || + ras_core->ras_eeprom.record_threshold_config == DISABLE_RETIRE_PAGE) + return 0; + } bps = kcalloc(ras_num_recs, sizeof(*bps), GFP_KERNEL); if (!bps) return -ENOMEM; - ret = ras_eeprom_read(ras_core, bps, ras_num_recs); + if (ras_fw_eeprom_supported(ras_core)) + ret = ras_fw_eeprom_read_idx(ras_core, bps, 0, 0, ras_num_recs); + else + ret = ras_eeprom_read(ras_core, bps, ras_num_recs); if (ret) { RAS_DEV_ERR(ras_core->dev, "Failed to load EEPROM table records!"); } else { @@ -474,7 +484,10 @@ static int ras_umc_save_bad_pages(struct ras_core_context *ras_core) if (!data->bps) return 0; - eeprom_record_num = ras_eeprom_get_record_count(ras_core); + if (ras_fw_eeprom_supported(ras_core)) + eeprom_record_num = ras_fw_eeprom_get_record_count(ras_core); + else + eeprom_record_num = ras_eeprom_get_record_count(ras_core); mutex_lock(&ras_umc->umc_lock); save_count = data->count - eeprom_record_num; /* only new entries are saved */ diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c b/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c index e2792b239bea..53dc59e4de0c 100644 --- a/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c +++ b/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c @@ -413,7 +413,7 @@ static int umc_v12_0_eeprom_record_to_nps_record(struct ras_core_context *ras_co uint64_t pa = 0; int ret = 0; - if (nps == EEPROM_RECORD_UMC_NPS_MODE(record)) { + if (nps == EEPROM_RECORD_UMC_NPS_MODE(record) && !ras_fw_eeprom_supported(ras_core)) { record->cur_nps_retired_row_pfn = EEPROM_RECORD_UMC_ADDR_PFN(record); } else { ret = convert_eeprom_record_to_nps_addr(ras_core, -- 2.34.1
