On 09.10.25 05:30, Ellen Pan wrote:
>     1. Introduced amdgpu_virt_init_critical_region during VF init.
>      - VFs use init_data_header_offset and init_data_header_size_kb
>             transmitted via PF2VF mailbox to fetch the offset of
>             critical regions' offsets/sizes in VRAM and save to
>             adev->virt.crit_region_offsets and 
> adev->virt.crit_region_sizes_kb.
> 
> Signed-off-by: Ellen Pan <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  |   6 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c    | 103 ++++++++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h    |   7 ++
>  drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h |  31 ++++++
>  4 files changed, 147 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 929936c8d87c..2a33b950d511 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2754,6 +2754,12 @@ static int amdgpu_device_ip_early_init(struct 
> amdgpu_device *adev)
>               r = amdgpu_virt_request_full_gpu(adev, true);
>               if (r)
>                       return r;
> +
> +             if (adev->virt.req_init_data_ver == GPU_CRIT_REGION_V2) {
> +                     r = amdgpu_virt_init_critical_region(adev);
> +                     if (r)
> +                             return r;
> +             }

Alex has the final word on this, but that looks a bit misplaced.

Why can't that be done in some early_init callback or any of the 
amdgpu_virt_init* functions in amdgpu_virt.c?

>       }
>  
>       switch (adev->asic_type) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index 3a6b0e1084d7..46c19e96086a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -843,6 +843,109 @@ static void amdgpu_virt_init_ras(struct amdgpu_device 
> *adev)
>       adev->virt.ras.cper_rptr = 0;
>  }
>  
> +static uint8_t amdgpu_virt_crit_region_calc_checksum(uint8_t *buf_start, 
> uint8_t *buf_end)
> +{
> +     uint32_t sum = 0;
> +
> +     if (buf_start >= buf_end)
> +             return 0;
> +
> +     for (; buf_start < buf_end; buf_start++)
> +             sum += buf_start[0];
> +
> +     return 0xffffffff - sum;
> +}
> +
> +#define mmRCC_CONFIG_MEMSIZE 0xde3

Why is that not in a header?

> +int amdgpu_virt_init_critical_region(struct amdgpu_device *adev)
> +{
> +     struct amd_sriov_msg_init_data_header *init_data_hdr = NULL;
> +     uint32_t init_hdr_offset = adev->virt.init_data_header_offset;
> +     uint32_t init_hdr_size = adev->virt.init_data_header_size_kb << 10;
> +     uint64_t pos = 0;
> +     uint64_t vram_size;
> +     int r = 0;
> +     uint8_t checksum = 0;
> +
> +     if (init_hdr_offset < 0) {
> +             DRM_ERROR("Invalid init header offset\n");

The DRM_* type of functions are usually used with everything related to the DRM 
interfaces, e.g. display controller, render interfaces etc...

But that here is HW or at least VF->PF interface and here using the dev_err, 
dev_warn... class of functions is usually more adequate.

> +             return -EINVAL;
> +     }
> +
> +     vram_size = RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
> +     if ((init_hdr_offset + init_hdr_size) > vram_size) {
> +             DRM_ERROR("init_data_header exceeds VRAM size, exiting\n");
> +             return -EINVAL;
> +     }
> +
> +     /* Allocate for init_data_hdr */
> +     init_data_hdr = kzalloc(sizeof(struct amd_sriov_msg_init_data_header), 
> GFP_KERNEL);
> +     if (!init_data_hdr)
> +             return -ENOMEM;
> +
> +     pos = (uint64_t)init_hdr_offset;
> +     amdgpu_device_vram_access(adev, pos, (uint32_t *)init_data_hdr,
> +                                     sizeof(struct 
> amd_sriov_msg_init_data_header), false);
> +
> +     switch (init_data_hdr->version) {
> +     case GPU_CRIT_REGION_V2:
> +             if (strncmp(init_data_hdr->signature, "INDA", 4) != 0) {
> +                     DRM_ERROR("Invalid init data signature: %.4s\n", 
> init_data_hdr->signature);
> +                     r = -EINVAL;
> +                     goto out;
> +             }
> +
> +             checksum =
> +                     amdgpu_virt_crit_region_calc_checksum((uint8_t 
> *)&init_data_hdr->initdata_offset,
> +                             (uint8_t *)init_data_hdr + sizeof(struct 
> amd_sriov_msg_init_data_header));

Please make sure that checkpatch.pl doesn't complain about the coding style 
here.

Regards,
Christian.

> +             if (checksum != init_data_hdr->checksum) {
> +                     DRM_ERROR("Found unmatching checksum from calculation 
> 0x%x and init_data 0x%x\n",
> +                                             checksum, 
> init_data_hdr->checksum);
> +                     r = -EINVAL;
> +                     goto out;
> +             }
> +
> +             /* Initialize critical region offsets */
> +             adev->virt.crit_region_base_offset = 
> init_data_hdr->initdata_offset;
> +             adev->virt.crit_region_offsets[AMD_SRIOV_MSG_IPD_TABLE_ID] =
> +                     init_data_hdr->ip_discovery_offset;
> +             
> adev->virt.crit_region_offsets[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID] =
> +                     init_data_hdr->vbios_img_offset;
> +             
> adev->virt.crit_region_offsets[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID] =
> +                     init_data_hdr->ras_tele_info_offset;
> +             
> adev->virt.crit_region_offsets[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID] =
> +                     init_data_hdr->dataexchange_offset;
> +             
> adev->virt.crit_region_offsets[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID] =
> +                     init_data_hdr->bad_page_info_offset;
> +
> +             /* Initialize critical region sizes */
> +             adev->virt.crit_region_size_in_kb = 
> init_data_hdr->initdata_size_in_kb;
> +             adev->virt.crit_region_sizes_kb[AMD_SRIOV_MSG_IPD_TABLE_ID] =
> +                     init_data_hdr->ip_discovery_size_in_kb;
> +             
> adev->virt.crit_region_sizes_kb[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID] =
> +                     init_data_hdr->vbios_img_size_in_kb;
> +             
> adev->virt.crit_region_sizes_kb[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID] =
> +                     init_data_hdr->ras_tele_info_size_in_kb;
> +             
> adev->virt.crit_region_sizes_kb[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID] =
> +                     init_data_hdr->dataexchange_size_in_kb;
> +             
> adev->virt.crit_region_sizes_kb[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID] =
> +                     init_data_hdr->bad_page_size_in_kb;
> +
> +             adev->virt.init_data_done = true;
> +             break;
> +     default:
> +             DRM_ERROR("Invalid init header version: %u\n", 
> init_data_hdr->version);
> +             r = -EINVAL;
> +             goto out;
> +     }
> +
> +out:
> +     kfree(init_data_hdr);
> +     init_data_hdr = NULL;
> +
> +     return r;
> +}
> +
>  void amdgpu_virt_init(struct amdgpu_device *adev)
>  {
>       bool is_sriov = false;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> index 2a0627596bd2..5f6014b2f349 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> @@ -292,6 +292,11 @@ struct amdgpu_virt {
>       /* critical regions v2 */
>       uint32_t init_data_header_offset;
>       uint32_t init_data_header_size_kb;
> +     uint32_t crit_region_base_offset;
> +     uint32_t crit_region_size_in_kb;
> +     uint64_t crit_region_offsets[AMD_SRIOV_MSG_MAX_TABLE_ID];
> +     uint64_t crit_region_sizes_kb[AMD_SRIOV_MSG_MAX_TABLE_ID];
> +     bool init_data_done;
>  
>       /* vf2pf message */
>       struct delayed_work vf2pf_work;
> @@ -428,6 +433,8 @@ void amdgpu_virt_exchange_data(struct amdgpu_device 
> *adev);
>  void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev);
>  void amdgpu_virt_init(struct amdgpu_device *adev);
>  
> +int amdgpu_virt_init_critical_region(struct amdgpu_device *adev);
> +
>  bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev);
>  int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev);
>  void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
> index b53caab5b706..d15c256f9abd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
> @@ -70,6 +70,37 @@ enum amd_sriov_crit_region_version {
>       GPU_CRIT_REGION_V2 = 2,
>  };
>  
> +/* v2 layout offset enum (in order of allocation) */
> +enum amd_sriov_msg_table_id_enum {
> +     AMD_SRIOV_MSG_IPD_TABLE_ID = 0,
> +     AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID,
> +     AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID,
> +     AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID,
> +     AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID,
> +     AMD_SRIOV_MSG_INITD_H_TABLE_ID,
> +     AMD_SRIOV_MSG_MAX_TABLE_ID,
> +};
> +
> +struct amd_sriov_msg_init_data_header {
> +     char     signature[4];  /* "INDA"  */
> +     uint32_t version;
> +     uint32_t checksum;
> +     uint32_t initdata_offset; /* 0 */
> +     uint32_t initdata_size_in_kb; /* 5MB */
> +     uint32_t valid_tables;
> +     uint32_t vbios_img_offset;
> +     uint32_t vbios_img_size_in_kb;
> +     uint32_t dataexchange_offset;
> +     uint32_t dataexchange_size_in_kb;
> +     uint32_t ras_tele_info_offset;
> +     uint32_t ras_tele_info_size_in_kb;
> +     uint32_t ip_discovery_offset;
> +     uint32_t ip_discovery_size_in_kb;
> +     uint32_t bad_page_info_offset;
> +     uint32_t bad_page_size_in_kb;
> +     uint32_t reserved[8];
> +};
> +
>  /*
>   * PF2VF history log:
>   * v1 defined in amdgim

Reply via email to