[AMD Official Use Only - AMD Internal Distribution Only]
> -----Original Message-----
> From: Xie, Patrick <[email protected]>
> Sent: Monday, January 26, 2026 11:55 AM
> To: [email protected]
> Cc: Zhou1, Tao <[email protected]>; Chai, Thomas <[email protected]>;
> Xie, Patrick <[email protected]>
> Subject: [PATCH 10/14] drm/amd/ras: add read func for pmfw eeprom
>
> add read func for pmfw eeprom, and adapt address converting for bad pages
> loaded
> from pmfw eeprom
>
> Signed-off-by: Tao Zhou <[email protected]>
> Signed-off-by: Gangliang Xie <[email protected]>
> ---
> drivers/gpu/drm/amd/ras/rascore/ras.h | 1 +
> drivers/gpu/drm/amd/ras/rascore/ras_core.c | 5 +-
> .../gpu/drm/amd/ras/rascore/ras_eeprom_fw.c | 70 +++++++++++++++++++
> .../gpu/drm/amd/ras/rascore/ras_eeprom_fw.h | 5 ++
> drivers/gpu/drm/amd/ras/rascore/ras_umc.c | 27 +++++--
> .../gpu/drm/amd/ras/rascore/ras_umc_v12_0.c | 2 +-
> 6 files changed, 101 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/ras/rascore/ras.h
> b/drivers/gpu/drm/amd/ras/rascore/ras.h
> index ae10d853c565..05c7923e8f0f 100644
> --- a/drivers/gpu/drm/amd/ras/rascore/ras.h
> +++ b/drivers/gpu/drm/amd/ras/rascore/ras.h
> @@ -241,6 +241,7 @@ struct ras_bank_ecc {
> uint64_t status;
> uint64_t ipid;
> uint64_t addr;
> + uint64_t ts;
> };
>
> struct ras_bank_ecc_node {
> diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_core.c
> b/drivers/gpu/drm/amd/ras/rascore/ras_core.c
> index 1f2ce3749d43..fe188a5304d9 100644
> --- a/drivers/gpu/drm/amd/ras/rascore/ras_core.c
> +++ b/drivers/gpu/drm/amd/ras/rascore/ras_core.c
> @@ -239,7 +239,10 @@ static int ras_core_eeprom_recovery(struct
> ras_core_context *ras_core)
> int count;
> int ret;
>
> - count = ras_eeprom_get_record_count(ras_core);
> + if (ras_fw_eeprom_supported(ras_core))
> + count = ras_fw_eeprom_get_record_count(ras_core);
> + else
> + count = ras_eeprom_get_record_count(ras_core);
> if (!count)
> return 0;
>
> diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.c
> b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.c
> index f7a6f2368530..69e1aef67ab9 100644
> --- a/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.c
> +++ b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.c
> @@ -259,3 +259,73 @@ int ras_fw_eeprom_append(struct ras_core_context
> *ras_core,
> mutex_unlock(&control->ras_tbl_mutex);
> return 0;
> }
> +
> +int ras_fw_eeprom_read_idx(struct ras_core_context *ras_core,
> + struct eeprom_umc_record *record_umc,
> + struct ras_bank_ecc *ras_ecc,
> + u32 rec_idx, const u32 num)
> +{
> + struct ras_fw_eeprom_control *control = &ras_core->ras_fw_eeprom;
> + int i, ret, end_idx;
> + u64 mca, ipid, ts;
> +
> + if (!ras_core->ras_umc.ip_func ||
> + !ras_core->ras_umc.ip_func->mca_ipid_parse)
> + return -EOPNOTSUPP;
> +
> + mutex_lock(&control->ras_tbl_mutex);
> +
> + end_idx = rec_idx + num;
> + for (i = rec_idx; i < end_idx; i++) {
> + ret = ras_fw_get_badpage_mca_addr(ras_core, i, &mca);
> + if (ret)
> + goto Out;
> +
> + ret = ras_fw_get_badpage_ipid(ras_core, i, &ipid);
> + if (ret)
> + goto Out;
> +
> + ret = ras_fw_get_timestamp(ras_core, i, &ts);
> + if (ret)
> + goto Out;
> +
> + if (record_umc) {
> + record_umc[i - rec_idx].address = mca;
> + /* retired_page (pa) is unused now */
> + record_umc[i - rec_idx].retired_row_pfn = 0x1ULL;
> + record_umc[i - rec_idx].ts = ts;
> + record_umc[i - rec_idx].err_type =
> RAS_EEPROM_ERR_NON_RECOVERABLE;
> +
> + ras_core->ras_umc.ip_func->mca_ipid_parse(ras_core,
> ipid,
> + (uint32_t *)&(record_umc[i - rec_idx].cu),
> + (uint32_t *)&(record_umc[i -
> rec_idx].mem_channel),
> + (uint32_t *)&(record_umc[i - rec_idx].mcumc_id),
> NULL);
> +
> + /* update bad channel bitmap */
> + if ((record_umc[i].mem_channel < BITS_PER_TYPE(control-
> >bad_channel_bitmap)) &&
> + !(control->bad_channel_bitmap & (1 <<
> record_umc[i].mem_channel))) {
> + control->bad_channel_bitmap |= 1 <<
> record_umc[i].mem_channel;
> + control->update_channel_flag = true;
> + }
> + }
> +
> + if (ras_ecc) {
> + ras_ecc[i - rec_idx].addr = mca;
> + ras_ecc[i - rec_idx].ipid = ipid;
> + ras_ecc[i - rec_idx].ts = ts;
> + }
> +
> + }
> +
> +Out:
[Tao] better to use 'out'
> + mutex_unlock(&control->ras_tbl_mutex);
> + return ret;
> +}
> +
> +uint32_t ras_fw_eeprom_get_record_count(struct ras_core_context
> +*ras_core) {
> + if (!ras_core)
> + return 0;
> +
> + return ras_core->ras_fw_eeprom.ras_num_recs;
> +}
> diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.h
> b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.h
> index 27507bb38135..7daf903ad5aa 100644
> --- a/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.h
> +++ b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.h
> @@ -70,5 +70,10 @@ int ras_fw_eeprom_reset_table(struct ras_core_context
> *ras_core); bool ras_fw_eeprom_check_safety_watermark(struct ras_core_context
> *ras_core); int ras_fw_eeprom_append(struct ras_core_context *ras_core,
> struct eeprom_umc_record *record, const u32 num);
> +int ras_fw_eeprom_read_idx(struct ras_core_context *ras_core,
> + struct eeprom_umc_record *record_umc,
> + struct ras_bank_ecc *ras_ecc,
> + u32 rec_idx, const u32 num);
> +uint32_t ras_fw_eeprom_get_record_count(struct ras_core_context
> +*ras_core);
>
> #endif
> diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_umc.c
> b/drivers/gpu/drm/amd/ras/rascore/ras_umc.c
> index fd427fd59ecf..eb5bb6df18f5 100644
> --- a/drivers/gpu/drm/amd/ras/rascore/ras_umc.c
> +++ b/drivers/gpu/drm/amd/ras/rascore/ras_umc.c
> @@ -436,17 +436,27 @@ int ras_umc_load_bad_pages(struct ras_core_context
> *ras_core)
> uint32_t ras_num_recs;
> int ret;
>
> - ras_num_recs = ras_eeprom_get_record_count(ras_core);
> - /* no bad page record, skip eeprom access */
> - if (!ras_num_recs ||
> - ras_core->ras_eeprom.record_threshold_config ==
> DISABLE_RETIRE_PAGE)
> - return 0;
> + if (ras_fw_eeprom_supported(ras_core)) {
> + ras_num_recs = ras_fw_eeprom_get_record_count(ras_core);
> + /* no bad page record, skip eeprom access */
> + if (!ras_num_recs ||
> + ras_core->ras_fw_eeprom.record_threshold_config ==
> DISABLE_RETIRE_PAGE)
> + return 0;
> + } else {
> + ras_num_recs = ras_eeprom_get_record_count(ras_core);
> + if (!ras_num_recs ||
> + ras_core->ras_eeprom.record_threshold_config ==
> DISABLE_RETIRE_PAGE)
> + return 0;
> + }
>
> bps = kcalloc(ras_num_recs, sizeof(*bps), GFP_KERNEL);
> if (!bps)
> return -ENOMEM;
>
> - ret = ras_eeprom_read(ras_core, bps, ras_num_recs);
> + if (ras_fw_eeprom_supported(ras_core))
> + ret = ras_fw_eeprom_read_idx(ras_core, bps, 0, 0, ras_num_recs);
> + else
> + ret = ras_eeprom_read(ras_core, bps, ras_num_recs);
> if (ret) {
> RAS_DEV_ERR(ras_core->dev, "Failed to load EEPROM table
> records!");
> } else {
> @@ -474,7 +484,10 @@ static int ras_umc_save_bad_pages(struct
> ras_core_context *ras_core)
> if (!data->bps)
> return 0;
>
> - eeprom_record_num = ras_eeprom_get_record_count(ras_core);
> + if (ras_fw_eeprom_supported(ras_core))
> + eeprom_record_num = ras_fw_eeprom_get_record_count(ras_core);
> + else
> + eeprom_record_num = ras_eeprom_get_record_count(ras_core);
> mutex_lock(&ras_umc->umc_lock);
> save_count = data->count - eeprom_record_num;
> /* only new entries are saved */
> diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c
> b/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c
> index e2792b239bea..53dc59e4de0c 100644
> --- a/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c
> +++ b/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c
> @@ -413,7 +413,7 @@ static int umc_v12_0_eeprom_record_to_nps_record(struct
> ras_core_context *ras_co
> uint64_t pa = 0;
> int ret = 0;
>
> - if (nps == EEPROM_RECORD_UMC_NPS_MODE(record)) {
> + if (nps == EEPROM_RECORD_UMC_NPS_MODE(record) &&
> +!ras_fw_eeprom_supported(ras_core)) {
> record->cur_nps_retired_row_pfn =
> EEPROM_RECORD_UMC_ADDR_PFN(record);
> } else {
> ret = convert_eeprom_record_to_nps_addr(ras_core,
> --
> 2.34.1