[AMD Official Use Only - AMD Internal Distribution Only]

> -----Original Message-----
> From: Xie, Patrick <[email protected]>
> Sent: Monday, January 26, 2026 11:55 AM
> To: [email protected]
> Cc: Zhou1, Tao <[email protected]>; Chai, Thomas <[email protected]>;
> Xie, Patrick <[email protected]>
> Subject: [PATCH 10/14] drm/amd/ras: add read func for pmfw eeprom
>
> add read func for pmfw eeprom, and adapt address converting for bad pages 
> loaded
> from pmfw eeprom
>
> Signed-off-by: Tao Zhou <[email protected]>
> Signed-off-by: Gangliang Xie <[email protected]>
> ---
>  drivers/gpu/drm/amd/ras/rascore/ras.h         |  1 +
>  drivers/gpu/drm/amd/ras/rascore/ras_core.c    |  5 +-
>  .../gpu/drm/amd/ras/rascore/ras_eeprom_fw.c   | 70 +++++++++++++++++++
>  .../gpu/drm/amd/ras/rascore/ras_eeprom_fw.h   |  5 ++
>  drivers/gpu/drm/amd/ras/rascore/ras_umc.c     | 27 +++++--
>  .../gpu/drm/amd/ras/rascore/ras_umc_v12_0.c   |  2 +-
>  6 files changed, 101 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/ras/rascore/ras.h
> b/drivers/gpu/drm/amd/ras/rascore/ras.h
> index ae10d853c565..05c7923e8f0f 100644
> --- a/drivers/gpu/drm/amd/ras/rascore/ras.h
> +++ b/drivers/gpu/drm/amd/ras/rascore/ras.h
> @@ -241,6 +241,7 @@ struct ras_bank_ecc {
>       uint64_t status;
>       uint64_t ipid;
>       uint64_t addr;
> +     uint64_t ts;
>  };
>
>  struct ras_bank_ecc_node {
> diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_core.c
> b/drivers/gpu/drm/amd/ras/rascore/ras_core.c
> index 1f2ce3749d43..fe188a5304d9 100644
> --- a/drivers/gpu/drm/amd/ras/rascore/ras_core.c
> +++ b/drivers/gpu/drm/amd/ras/rascore/ras_core.c
> @@ -239,7 +239,10 @@ static int ras_core_eeprom_recovery(struct
> ras_core_context *ras_core)
>       int count;
>       int ret;
>
> -     count = ras_eeprom_get_record_count(ras_core);
> +     if (ras_fw_eeprom_supported(ras_core))
> +             count = ras_fw_eeprom_get_record_count(ras_core);
> +     else
> +             count = ras_eeprom_get_record_count(ras_core);
>       if (!count)
>               return 0;
>
> diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.c
> b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.c
> index f7a6f2368530..69e1aef67ab9 100644
> --- a/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.c
> +++ b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.c
> @@ -259,3 +259,73 @@ int ras_fw_eeprom_append(struct ras_core_context
> *ras_core,
>       mutex_unlock(&control->ras_tbl_mutex);
>       return 0;
>  }
> +
> +int ras_fw_eeprom_read_idx(struct ras_core_context *ras_core,
> +                      struct eeprom_umc_record *record_umc,
> +                      struct ras_bank_ecc *ras_ecc,
> +                      u32 rec_idx, const u32 num)
> +{
> +     struct ras_fw_eeprom_control *control = &ras_core->ras_fw_eeprom;
> +     int i, ret, end_idx;
> +     u64 mca, ipid, ts;
> +
> +     if (!ras_core->ras_umc.ip_func ||
> +         !ras_core->ras_umc.ip_func->mca_ipid_parse)
> +             return -EOPNOTSUPP;
> +
> +     mutex_lock(&control->ras_tbl_mutex);
> +
> +     end_idx = rec_idx + num;
> +     for (i = rec_idx; i < end_idx; i++) {
> +             ret = ras_fw_get_badpage_mca_addr(ras_core, i, &mca);
> +             if (ret)
> +                     goto Out;
> +
> +             ret = ras_fw_get_badpage_ipid(ras_core, i, &ipid);
> +             if (ret)
> +                     goto Out;
> +
> +             ret = ras_fw_get_timestamp(ras_core, i, &ts);
> +             if (ret)
> +                     goto Out;
> +
> +             if (record_umc) {
> +                     record_umc[i - rec_idx].address = mca;
> +                     /* retired_page (pa) is unused now */
> +                     record_umc[i - rec_idx].retired_row_pfn = 0x1ULL;
> +                     record_umc[i - rec_idx].ts = ts;
> +                     record_umc[i - rec_idx].err_type =
> RAS_EEPROM_ERR_NON_RECOVERABLE;
> +
> +                     ras_core->ras_umc.ip_func->mca_ipid_parse(ras_core, 
> ipid,
> +                             (uint32_t *)&(record_umc[i - rec_idx].cu),
> +                             (uint32_t *)&(record_umc[i - 
> rec_idx].mem_channel),
> +                             (uint32_t *)&(record_umc[i - rec_idx].mcumc_id),
> NULL);
> +
> +                     /* update bad channel bitmap */
> +                     if ((record_umc[i].mem_channel < BITS_PER_TYPE(control-
> >bad_channel_bitmap)) &&
> +                             !(control->bad_channel_bitmap & (1 <<
> record_umc[i].mem_channel))) {
> +                             control->bad_channel_bitmap |= 1 <<
> record_umc[i].mem_channel;
> +                             control->update_channel_flag = true;
> +                     }
> +             }
> +
> +             if (ras_ecc) {
> +                     ras_ecc[i - rec_idx].addr = mca;
> +                     ras_ecc[i - rec_idx].ipid = ipid;
> +                     ras_ecc[i - rec_idx].ts = ts;
> +             }
> +
> +     }
> +
> +Out:

[Tao] better to use 'out'

> +     mutex_unlock(&control->ras_tbl_mutex);
> +     return ret;
> +}
> +
> +uint32_t ras_fw_eeprom_get_record_count(struct ras_core_context
> +*ras_core) {
> +     if (!ras_core)
> +             return 0;
> +
> +     return ras_core->ras_fw_eeprom.ras_num_recs;
> +}
> diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.h
> b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.h
> index 27507bb38135..7daf903ad5aa 100644
> --- a/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.h
> +++ b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.h
> @@ -70,5 +70,10 @@ int ras_fw_eeprom_reset_table(struct ras_core_context
> *ras_core);  bool ras_fw_eeprom_check_safety_watermark(struct ras_core_context
> *ras_core);  int ras_fw_eeprom_append(struct ras_core_context *ras_core,
>                          struct eeprom_umc_record *record, const u32 num);
> +int ras_fw_eeprom_read_idx(struct ras_core_context *ras_core,
> +                      struct eeprom_umc_record *record_umc,
> +                      struct ras_bank_ecc *ras_ecc,
> +                      u32 rec_idx, const u32 num);
> +uint32_t ras_fw_eeprom_get_record_count(struct ras_core_context
> +*ras_core);
>
>  #endif
> diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_umc.c
> b/drivers/gpu/drm/amd/ras/rascore/ras_umc.c
> index fd427fd59ecf..eb5bb6df18f5 100644
> --- a/drivers/gpu/drm/amd/ras/rascore/ras_umc.c
> +++ b/drivers/gpu/drm/amd/ras/rascore/ras_umc.c
> @@ -436,17 +436,27 @@ int ras_umc_load_bad_pages(struct ras_core_context
> *ras_core)
>       uint32_t ras_num_recs;
>       int ret;
>
> -     ras_num_recs = ras_eeprom_get_record_count(ras_core);
> -     /* no bad page record, skip eeprom access */
> -     if (!ras_num_recs ||
> -         ras_core->ras_eeprom.record_threshold_config ==
> DISABLE_RETIRE_PAGE)
> -             return 0;
> +     if (ras_fw_eeprom_supported(ras_core)) {
> +             ras_num_recs = ras_fw_eeprom_get_record_count(ras_core);
> +             /* no bad page record, skip eeprom access */
> +             if (!ras_num_recs ||
> +                 ras_core->ras_fw_eeprom.record_threshold_config ==
> DISABLE_RETIRE_PAGE)
> +                     return 0;
> +     } else {
> +             ras_num_recs = ras_eeprom_get_record_count(ras_core);
> +             if (!ras_num_recs ||
> +                 ras_core->ras_eeprom.record_threshold_config ==
> DISABLE_RETIRE_PAGE)
> +                     return 0;
> +     }
>
>       bps = kcalloc(ras_num_recs, sizeof(*bps), GFP_KERNEL);
>       if (!bps)
>               return -ENOMEM;
>
> -     ret = ras_eeprom_read(ras_core, bps, ras_num_recs);
> +     if (ras_fw_eeprom_supported(ras_core))
> +             ret = ras_fw_eeprom_read_idx(ras_core, bps, 0, 0, ras_num_recs);
> +     else
> +             ret = ras_eeprom_read(ras_core, bps, ras_num_recs);
>       if (ret) {
>               RAS_DEV_ERR(ras_core->dev, "Failed to load EEPROM table
> records!");
>       } else {
> @@ -474,7 +484,10 @@ static int ras_umc_save_bad_pages(struct
> ras_core_context *ras_core)
>       if (!data->bps)
>               return 0;
>
> -     eeprom_record_num = ras_eeprom_get_record_count(ras_core);
> +     if (ras_fw_eeprom_supported(ras_core))
> +             eeprom_record_num = ras_fw_eeprom_get_record_count(ras_core);
> +     else
> +             eeprom_record_num = ras_eeprom_get_record_count(ras_core);
>       mutex_lock(&ras_umc->umc_lock);
>       save_count = data->count - eeprom_record_num;
>       /* only new entries are saved */
> diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c
> b/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c
> index e2792b239bea..53dc59e4de0c 100644
> --- a/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c
> +++ b/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c
> @@ -413,7 +413,7 @@ static int umc_v12_0_eeprom_record_to_nps_record(struct
> ras_core_context *ras_co
>       uint64_t pa = 0;
>       int ret = 0;
>
> -     if (nps == EEPROM_RECORD_UMC_NPS_MODE(record)) {
> +     if (nps == EEPROM_RECORD_UMC_NPS_MODE(record) &&
> +!ras_fw_eeprom_supported(ras_core)) {
>               record->cur_nps_retired_row_pfn =
> EEPROM_RECORD_UMC_ADDR_PFN(record);
>       } else {
>               ret = convert_eeprom_record_to_nps_addr(ras_core,
> --
> 2.34.1

Reply via email to