[AMD Official Use Only - AMD Internal Distribution Only] Thanks , will change to 'out'
-----Original Message----- From: Zhou1, Tao <[email protected]> Sent: Tuesday, January 27, 2026 11:58 AM To: Xie, Patrick <[email protected]>; [email protected] Cc: Chai, Thomas <[email protected]> Subject: RE: [PATCH 10/14] drm/amd/ras: add read func for pmfw eeprom [AMD Official Use Only - AMD Internal Distribution Only] > -----Original Message----- > From: Xie, Patrick <[email protected]> > Sent: Monday, January 26, 2026 11:55 AM > To: [email protected] > Cc: Zhou1, Tao <[email protected]>; Chai, Thomas > <[email protected]>; Xie, Patrick <[email protected]> > Subject: [PATCH 10/14] drm/amd/ras: add read func for pmfw eeprom > > add read func for pmfw eeprom, and adapt address converting for bad > pages loaded from pmfw eeprom > > Signed-off-by: Tao Zhou <[email protected]> > Signed-off-by: Gangliang Xie <[email protected]> > --- > drivers/gpu/drm/amd/ras/rascore/ras.h | 1 + > drivers/gpu/drm/amd/ras/rascore/ras_core.c | 5 +- > .../gpu/drm/amd/ras/rascore/ras_eeprom_fw.c | 70 +++++++++++++++++++ > .../gpu/drm/amd/ras/rascore/ras_eeprom_fw.h | 5 ++ > drivers/gpu/drm/amd/ras/rascore/ras_umc.c | 27 +++++-- > .../gpu/drm/amd/ras/rascore/ras_umc_v12_0.c | 2 +- > 6 files changed, 101 insertions(+), 9 deletions(-) > > diff --git a/drivers/gpu/drm/amd/ras/rascore/ras.h > b/drivers/gpu/drm/amd/ras/rascore/ras.h > index ae10d853c565..05c7923e8f0f 100644 > --- a/drivers/gpu/drm/amd/ras/rascore/ras.h > +++ b/drivers/gpu/drm/amd/ras/rascore/ras.h > @@ -241,6 +241,7 @@ struct ras_bank_ecc { > uint64_t status; > uint64_t ipid; > uint64_t addr; > + uint64_t ts; > }; > > struct ras_bank_ecc_node { > diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_core.c > b/drivers/gpu/drm/amd/ras/rascore/ras_core.c > index 1f2ce3749d43..fe188a5304d9 100644 > --- a/drivers/gpu/drm/amd/ras/rascore/ras_core.c > +++ b/drivers/gpu/drm/amd/ras/rascore/ras_core.c > @@ -239,7 +239,10 @@ static int ras_core_eeprom_recovery(struct > ras_core_context *ras_core) > int count; > int ret; > > - count = ras_eeprom_get_record_count(ras_core); > + if (ras_fw_eeprom_supported(ras_core)) > + count = ras_fw_eeprom_get_record_count(ras_core); > + else > + count = ras_eeprom_get_record_count(ras_core); > if (!count) > return 0; > > diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.c > b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.c > index f7a6f2368530..69e1aef67ab9 100644 > --- a/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.c > +++ b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.c > @@ -259,3 +259,73 @@ int ras_fw_eeprom_append(struct ras_core_context > *ras_core, > mutex_unlock(&control->ras_tbl_mutex); > return 0; > } > + > +int ras_fw_eeprom_read_idx(struct ras_core_context *ras_core, > + struct eeprom_umc_record *record_umc, > + struct ras_bank_ecc *ras_ecc, > + u32 rec_idx, const u32 num) { > + struct ras_fw_eeprom_control *control = &ras_core->ras_fw_eeprom; > + int i, ret, end_idx; > + u64 mca, ipid, ts; > + > + if (!ras_core->ras_umc.ip_func || > + !ras_core->ras_umc.ip_func->mca_ipid_parse) > + return -EOPNOTSUPP; > + > + mutex_lock(&control->ras_tbl_mutex); > + > + end_idx = rec_idx + num; > + for (i = rec_idx; i < end_idx; i++) { > + ret = ras_fw_get_badpage_mca_addr(ras_core, i, &mca); > + if (ret) > + goto Out; > + > + ret = ras_fw_get_badpage_ipid(ras_core, i, &ipid); > + if (ret) > + goto Out; > + > + ret = ras_fw_get_timestamp(ras_core, i, &ts); > + if (ret) > + goto Out; > + > + if (record_umc) { > + record_umc[i - rec_idx].address = mca; > + /* retired_page (pa) is unused now */ > + record_umc[i - rec_idx].retired_row_pfn = 0x1ULL; > + record_umc[i - rec_idx].ts = ts; > + record_umc[i - rec_idx].err_type = > RAS_EEPROM_ERR_NON_RECOVERABLE; > + > + ras_core->ras_umc.ip_func->mca_ipid_parse(ras_core, > ipid, > + (uint32_t *)&(record_umc[i - rec_idx].cu), > + (uint32_t *)&(record_umc[i - > rec_idx].mem_channel), > + (uint32_t *)&(record_umc[i - > + rec_idx].mcumc_id), > NULL); > + > + /* update bad channel bitmap */ > + if ((record_umc[i].mem_channel < > + BITS_PER_TYPE(control- > >bad_channel_bitmap)) && > + !(control->bad_channel_bitmap & (1 << > record_umc[i].mem_channel))) { > + control->bad_channel_bitmap |= 1 << > record_umc[i].mem_channel; > + control->update_channel_flag = true; > + } > + } > + > + if (ras_ecc) { > + ras_ecc[i - rec_idx].addr = mca; > + ras_ecc[i - rec_idx].ipid = ipid; > + ras_ecc[i - rec_idx].ts = ts; > + } > + > + } > + > +Out: [Tao] better to use 'out' > + mutex_unlock(&control->ras_tbl_mutex); > + return ret; > +} > + > +uint32_t ras_fw_eeprom_get_record_count(struct ras_core_context > +*ras_core) { > + if (!ras_core) > + return 0; > + > + return ras_core->ras_fw_eeprom.ras_num_recs; > +} > diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.h > b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.h > index 27507bb38135..7daf903ad5aa 100644 > --- a/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.h > +++ b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.h > @@ -70,5 +70,10 @@ int ras_fw_eeprom_reset_table(struct > ras_core_context *ras_core); bool > ras_fw_eeprom_check_safety_watermark(struct ras_core_context *ras_core); int > ras_fw_eeprom_append(struct ras_core_context *ras_core, > struct eeprom_umc_record *record, const u32 > num); > +int ras_fw_eeprom_read_idx(struct ras_core_context *ras_core, > + struct eeprom_umc_record *record_umc, > + struct ras_bank_ecc *ras_ecc, > + u32 rec_idx, const u32 num); uint32_t > +ras_fw_eeprom_get_record_count(struct ras_core_context *ras_core); > > #endif > diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_umc.c > b/drivers/gpu/drm/amd/ras/rascore/ras_umc.c > index fd427fd59ecf..eb5bb6df18f5 100644 > --- a/drivers/gpu/drm/amd/ras/rascore/ras_umc.c > +++ b/drivers/gpu/drm/amd/ras/rascore/ras_umc.c > @@ -436,17 +436,27 @@ int ras_umc_load_bad_pages(struct > ras_core_context > *ras_core) > uint32_t ras_num_recs; > int ret; > > - ras_num_recs = ras_eeprom_get_record_count(ras_core); > - /* no bad page record, skip eeprom access */ > - if (!ras_num_recs || > - ras_core->ras_eeprom.record_threshold_config == > DISABLE_RETIRE_PAGE) > - return 0; > + if (ras_fw_eeprom_supported(ras_core)) { > + ras_num_recs = ras_fw_eeprom_get_record_count(ras_core); > + /* no bad page record, skip eeprom access */ > + if (!ras_num_recs || > + ras_core->ras_fw_eeprom.record_threshold_config == > DISABLE_RETIRE_PAGE) > + return 0; > + } else { > + ras_num_recs = ras_eeprom_get_record_count(ras_core); > + if (!ras_num_recs || > + ras_core->ras_eeprom.record_threshold_config == > DISABLE_RETIRE_PAGE) > + return 0; > + } > > bps = kcalloc(ras_num_recs, sizeof(*bps), GFP_KERNEL); > if (!bps) > return -ENOMEM; > > - ret = ras_eeprom_read(ras_core, bps, ras_num_recs); > + if (ras_fw_eeprom_supported(ras_core)) > + ret = ras_fw_eeprom_read_idx(ras_core, bps, 0, 0, ras_num_recs); > + else > + ret = ras_eeprom_read(ras_core, bps, ras_num_recs); > if (ret) { > RAS_DEV_ERR(ras_core->dev, "Failed to load EEPROM table > records!"); > } else { > @@ -474,7 +484,10 @@ static int ras_umc_save_bad_pages(struct > ras_core_context *ras_core) > if (!data->bps) > return 0; > > - eeprom_record_num = ras_eeprom_get_record_count(ras_core); > + if (ras_fw_eeprom_supported(ras_core)) > + eeprom_record_num = ras_fw_eeprom_get_record_count(ras_core); > + else > + eeprom_record_num = > + ras_eeprom_get_record_count(ras_core); > mutex_lock(&ras_umc->umc_lock); > save_count = data->count - eeprom_record_num; > /* only new entries are saved */ diff --git > a/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c > b/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c > index e2792b239bea..53dc59e4de0c 100644 > --- a/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c > +++ b/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c > @@ -413,7 +413,7 @@ static int > umc_v12_0_eeprom_record_to_nps_record(struct > ras_core_context *ras_co > uint64_t pa = 0; > int ret = 0; > > - if (nps == EEPROM_RECORD_UMC_NPS_MODE(record)) { > + if (nps == EEPROM_RECORD_UMC_NPS_MODE(record) && > +!ras_fw_eeprom_supported(ras_core)) { > record->cur_nps_retired_row_pfn = > EEPROM_RECORD_UMC_ADDR_PFN(record); > } else { > ret = convert_eeprom_record_to_nps_addr(ras_core, > -- > 2.34.1
