On Wed, Oct 15, 2025 at 5:49 PM Ellen Pan <[email protected]> wrote: > > 1. Introduced amdgpu_virt_init_critical_region during VF init. > - VFs use init_data_header_offset and init_data_header_size_kb > transmitted via PF2VF mailbox to fetch the offset of > critical regions' offsets/sizes in VRAM and save to > adev->virt.crit_region_offsets and > adev->virt.crit_region_sizes_kb. > > Signed-off-by: Ellen Pan <[email protected]>
Reviewed-by: Alex Deucher <[email protected]> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 + > drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 165 ++++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 11 ++ > drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h | 31 ++++ > 4 files changed, 211 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index a99185ed0642..3ffb9bb1ec0b 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -2782,6 +2782,10 @@ static int amdgpu_device_ip_early_init(struct > amdgpu_device *adev) > r = amdgpu_virt_request_full_gpu(adev, true); > if (r) > return r; > + > + r = amdgpu_virt_init_critical_region(adev); > + if (r) > + return r; > } > > switch (adev->asic_type) { > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > index 56573fb27f63..805ecc69a8b5 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > @@ -44,6 +44,18 @@ > vf2pf_info->ucode_info[ucode].version = ver; \ > } while (0) > > +#define mmRCC_CONFIG_MEMSIZE 0xde3 > + > +const char *amdgpu_virt_dynamic_crit_table_name[] = { > + "IP DISCOVERY", > + "VBIOS IMG", > + "RAS TELEMETRY", > + "DATA EXCHANGE", > + "BAD PAGE INFO", > + "INIT HEADER", > + "LAST", > +}; > + > bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev) > { > /* By now all MMIO pages except mailbox are blocked */ > @@ -843,6 +855,159 @@ static void amdgpu_virt_init_ras(struct amdgpu_device > *adev) > adev->virt.ras.cper_rptr = 0; > } > > +static uint8_t amdgpu_virt_crit_region_calc_checksum(uint8_t *buf_start, > uint8_t *buf_end) > +{ > + uint32_t sum = 0; > + > + if (buf_start >= buf_end) > + return 0; > + > + for (; buf_start < buf_end; buf_start++) > + sum += buf_start[0]; > + > + return 0xffffffff - sum; > +} > + > +int amdgpu_virt_init_critical_region(struct amdgpu_device *adev) > +{ > + struct amd_sriov_msg_init_data_header *init_data_hdr = NULL; > + uint32_t init_hdr_offset = adev->virt.init_data_header.offset; > + uint32_t init_hdr_size = adev->virt.init_data_header.size_kb << 10; > + uint64_t vram_size; > + int r = 0; > + uint8_t checksum = 0; > + > + /* Skip below init if critical region version != v2 */ > + if (adev->virt.req_init_data_ver != GPU_CRIT_REGION_V2) > + return 0; > + > + if (init_hdr_offset < 0) { > + dev_err(adev->dev, "Invalid init header offset\n"); > + return -EINVAL; > + } > + > + vram_size = RREG32(mmRCC_CONFIG_MEMSIZE); > + if (!vram_size || vram_size == U32_MAX) > + return -EINVAL; > + vram_size <<= 20; > + > + if ((init_hdr_offset + init_hdr_size) > vram_size) { > + dev_err(adev->dev, "init_data_header exceeds VRAM size, > exiting\n"); > + return -EINVAL; > + } > + > + /* Allocate for init_data_hdr */ > + init_data_hdr = kzalloc(sizeof(struct > amd_sriov_msg_init_data_header), GFP_KERNEL); > + if (!init_data_hdr) > + return -ENOMEM; > + > + amdgpu_device_vram_access(adev, (uint64_t)init_hdr_offset, (uint32_t > *)init_data_hdr, > + sizeof(struct > amd_sriov_msg_init_data_header), false); > + > + /* Table validation */ > + if (strncmp(init_data_hdr->signature, > + AMDGPU_SRIOV_CRIT_DATA_SIGNATURE, > + AMDGPU_SRIOV_CRIT_DATA_SIG_LEN) != 0) { > + dev_err(adev->dev, "Invalid init data signature: %.4s\n", > + init_data_hdr->signature); > + r = -EINVAL; > + goto out; > + } > + > + checksum = amdgpu_virt_crit_region_calc_checksum( > + (uint8_t *)&init_data_hdr->initdata_offset, > + (uint8_t *)init_data_hdr + > + sizeof(struct amd_sriov_msg_init_data_header)); > + if (checksum != init_data_hdr->checksum) { > + dev_err(adev->dev, "Found unmatching checksum from > calculation 0x%x and init_data 0x%x\n", > + checksum, init_data_hdr->checksum); > + r = -EINVAL; > + goto out; > + } > + > + memset(&adev->virt.crit_regn, 0, sizeof(adev->virt.crit_regn)); > + memset(adev->virt.crit_regn_tbl, 0, sizeof(adev->virt.crit_regn_tbl)); > + > + adev->virt.crit_regn.offset = init_data_hdr->initdata_offset; > + adev->virt.crit_regn.size_kb = init_data_hdr->initdata_size_in_kb; > + > + /* Validation and initialization for each table entry */ > + if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, > AMD_SRIOV_MSG_IPD_TABLE_ID)) { > + if (init_data_hdr->ip_discovery_size_in_kb > > DISCOVERY_TMR_SIZE) { > + dev_err(adev->dev, "Invalid IP discovery size: > 0x%x\n", > + > init_data_hdr->ip_discovery_size_in_kb); > + r = -EINVAL; > + goto out; > + } > + > + adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].offset = > + init_data_hdr->ip_discovery_offset; > + adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb = > + init_data_hdr->ip_discovery_size_in_kb; > + } else { > + dev_err(adev->dev, "Missing dynamic %s info from critical > region v2.\n", > + > amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_IPD_TABLE_ID]); > + r = -EINVAL; > + goto out; > + } > + > + if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, > AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID)) { > + > adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID].offset = > + init_data_hdr->vbios_img_offset; > + > adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID].size_kb = > + init_data_hdr->vbios_img_size_in_kb; > + } else { > + dev_err(adev->dev, "Missing dynamic %s info from critical > region v2.\n", > + > amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID]); > + r = -EINVAL; > + goto out; > + } > + > + if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, > AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID)) { > + > adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].offset = > + init_data_hdr->ras_tele_info_offset; > + > adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].size_kb = > + init_data_hdr->ras_tele_info_size_in_kb; > + } else { > + dev_err(adev->dev, "Missing dynamic %s info from critical > region v2.\n", > + > amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID]); > + r = -EINVAL; > + goto out; > + } > + > + if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, > AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID)) { > + > adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset = > + init_data_hdr->dataexchange_offset; > + > adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb = > + init_data_hdr->dataexchange_size_in_kb; > + } else { > + dev_err(adev->dev, "Missing dynamic %s info from critical > region v2.\n", > + > amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID]); > + r = -EINVAL; > + goto out; > + } > + > + if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, > AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID)) { > + > adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].offset = > + init_data_hdr->bad_page_info_offset; > + > adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].size_kb = > + init_data_hdr->bad_page_size_in_kb; > + } else { > + dev_err(adev->dev, "Missing dynamic %s info from critical > region v2.\n", > + > amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID]); > + r = -EINVAL; > + goto out; > + } > + > + adev->virt.is_dynamic_crit_regn_enabled = true; > + > +out: > + kfree(init_data_hdr); > + init_data_hdr = NULL; > + > + return r; > +} > + > void amdgpu_virt_init(struct amdgpu_device *adev) > { > bool is_sriov = false; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > index 36247a160aa6..8d03a8620de9 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > @@ -54,6 +54,12 @@ > > #define AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT 2 > > +/* Signature used to validate the SR-IOV dynamic critical region init data > header ("INDA") */ > +#define AMDGPU_SRIOV_CRIT_DATA_SIGNATURE "INDA" > +#define AMDGPU_SRIOV_CRIT_DATA_SIG_LEN 4 > + > +#define IS_SRIOV_CRIT_REGN_ENTRY_VALID(hdr, id) ((hdr)->valid_tables & (1 << > (id))) > + > enum amdgpu_sriov_vf_mode { > SRIOV_VF_MODE_BARE_METAL = 0, > SRIOV_VF_MODE_ONE_VF, > @@ -296,6 +302,9 @@ struct amdgpu_virt { > > /* dynamic(v2) critical regions */ > struct amdgpu_virt_region init_data_header; > + struct amdgpu_virt_region crit_regn; > + struct amdgpu_virt_region crit_regn_tbl[AMD_SRIOV_MSG_MAX_TABLE_ID]; > + bool is_dynamic_crit_regn_enabled; > > /* vf2pf message */ > struct delayed_work vf2pf_work; > @@ -432,6 +441,8 @@ void amdgpu_virt_exchange_data(struct amdgpu_device > *adev); > void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev); > void amdgpu_virt_init(struct amdgpu_device *adev); > > +int amdgpu_virt_init_critical_region(struct amdgpu_device *adev); > + > bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev); > int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev); > void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h > b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h > index 9228fd2c6dfd..1cee083fb6bd 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h > @@ -71,6 +71,37 @@ enum amd_sriov_crit_region_version { > GPU_CRIT_REGION_V2 = 2, > }; > > +/* v2 layout offset enum (in order of allocation) */ > +enum amd_sriov_msg_table_id_enum { > + AMD_SRIOV_MSG_IPD_TABLE_ID = 0, > + AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID, > + AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID, > + AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID, > + AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID, > + AMD_SRIOV_MSG_INITD_H_TABLE_ID, > + AMD_SRIOV_MSG_MAX_TABLE_ID, > +}; > + > +struct amd_sriov_msg_init_data_header { > + char signature[4]; /* "INDA" */ > + uint32_t version; > + uint32_t checksum; > + uint32_t initdata_offset; /* 0 */ > + uint32_t initdata_size_in_kb; /* 5MB */ > + uint32_t valid_tables; > + uint32_t vbios_img_offset; > + uint32_t vbios_img_size_in_kb; > + uint32_t dataexchange_offset; > + uint32_t dataexchange_size_in_kb; > + uint32_t ras_tele_info_offset; > + uint32_t ras_tele_info_size_in_kb; > + uint32_t ip_discovery_offset; > + uint32_t ip_discovery_size_in_kb; > + uint32_t bad_page_info_offset; > + uint32_t bad_page_size_in_kb; > + uint32_t reserved[8]; > +}; > + > /* > * PF2VF history log: > * v1 defined in amdgim > -- > 2.34.1 >
