[AMD Official Use Only - General]

Hi Tao,

> -----Original Message-----
> From: Zhou1, Tao <tao.zh...@amd.com>
> Sent: Friday, September 23, 2022 5:21 PM
> To: amd-gfx@lists.freedesktop.org; Zhang, Hawking
> <hawking.zh...@amd.com>; Yang, Stanley <stanley.y...@amd.com>
> Cc: Zhou1, Tao <tao.zh...@amd.com>
> Subject: [PATCH 1/4] drm/amdgpu: export umc error address translation
> interface
> 
> Make it globally so we can convert specific mca address.
> 
> Signed-off-by: Tao Zhou <tao.zh...@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h |  6 ++++++
>  drivers/gpu/drm/amd/amdgpu/umc_v6_7.c   | 11 +++++------
>  2 files changed, 11 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> index 3629d8f292ef..31fbefaaf676 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> @@ -22,6 +22,8 @@
>  #define __AMDGPU_UMC_H__
>  #include "amdgpu_ras.h"
> 
> +#define UMC_INVALID_ADDR 0x1ULL
> +
>  /*
>   * (addr / 256) * 4096, the higher 26 bits in ErrorAddr
>   * is the index of 4KB block
> @@ -51,6 +53,10 @@ struct amdgpu_umc_ras {
>       struct amdgpu_ras_block_object ras_block;
>       void (*err_cnt_init)(struct amdgpu_device *adev);
>       bool (*query_ras_poison_mode)(struct amdgpu_device *adev);
> +     void (*query_error_address_per_channel)(struct amdgpu_device
> *adev,
> +                                              struct ras_err_data
> *err_data,
> +                                              uint32_t umc_reg_offset,
> uint32_t ch_inst,
> +                                              uint32_t umc_inst, uint64_t
> mca_addr);
>       void (*ecc_info_query_ras_error_count)(struct amdgpu_device
> *adev,
>                                     void *ras_error_status);
>       void (*ecc_info_query_ras_error_address)(struct amdgpu_device
> *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
> b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
> index bf7524f16b66..0f1b215653f3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
> +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
> @@ -452,9 +452,8 @@ static void umc_v6_7_query_ras_error_count(struct
> amdgpu_device *adev,
> 
>  static void umc_v6_7_query_error_address(struct amdgpu_device *adev,
>                                        struct ras_err_data *err_data,
> -                                      uint32_t umc_reg_offset,
> -                                      uint32_t ch_inst,
> -                                      uint32_t umc_inst)
> +                                      uint32_t umc_reg_offset, uint32_t
> ch_inst,
> +                                      uint32_t umc_inst, uint64_t
> mca_addr)
>  {
>       uint32_t mc_umc_status_addr;
>       uint32_t channel_index;
> @@ -540,9 +539,8 @@ static void
> umc_v6_7_query_ras_error_address(struct amdgpu_device *adev,
>                                                        ch_inst);
>               umc_v6_7_query_error_address(adev,
>                                            err_data,
> -                                          umc_reg_offset,
> -                                          ch_inst,
> -                                          umc_inst);
> +                                          umc_reg_offset, ch_inst,
> +                                          umc_inst, UMC_INVALID_ADDR);
>       }
>  }
> 
> @@ -583,4 +581,5 @@ struct amdgpu_umc_ras umc_v6_7_ras = {
>       .query_ras_poison_mode = umc_v6_7_query_ras_poison_mode,
>       .ecc_info_query_ras_error_count =
> umc_v6_7_ecc_info_query_ras_error_count,
>       .ecc_info_query_ras_error_address =
> umc_v6_7_ecc_info_query_ras_error_address,
> +     .query_error_address_per_channel =
> umc_v6_7_query_error_address,

Stanley: According to patch#3, it's better to rename 
query_error_address_per_channel to 
covert/query_error_address_at_specific_channel due to the channel_instance and 
umc_instance get form the mce structure, using per_channel may cause 
misunderstanding.

>  };
> --
> 2.35.1

Reply via email to