On 10/15/2025 1:43 AM, Ellen Pan wrote:
1. Added VF logic in amdgpu_virt to init IP discovery using the offsets from
dynamic(v2) critical regions;
2. Added VF logic in amdgpu_virt to init bios image using the offsets from
dynamic(v2) critical regions;
Signed-off-by: Ellen Pan <[email protected]>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c | 36 ++++++++---
drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 23 +++++--
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 63 +++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 2 +
4 files changed, 111 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
index 00e96419fcda..5960ab1be4d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
@@ -96,11 +96,12 @@ void amdgpu_bios_release(struct amdgpu_device *adev)
* part of the system bios. On boot, the system bios puts a
* copy of the igp rom at the start of vram if a discrete card is
* present.
- * For SR-IOV, the vbios image is also put in VRAM in the VF.
+ * For SR-IOV, if dynamic critical region is not enabled,
+ * the vbios image is also put at the start of VRAM in the VF.
*/
static bool amdgpu_read_bios_from_vram(struct amdgpu_device *adev)
{
- uint8_t __iomem *bios;
+ uint8_t __iomem *bios = NULL;
resource_size_t vram_base;
resource_size_t size = 256 * 1024; /* ??? */
@@ -114,18 +115,35 @@ static bool amdgpu_read_bios_from_vram(struct amdgpu_device *adev)
adev->bios = NULL;
vram_base = pci_resource_start(adev->pdev, 0);
- bios = ioremap_wc(vram_base, size);
- if (!bios)
- return false;
+
+ /* For SR-IOV, if dynamic critical region is enabled,
+ * the vbios image is put at a dynamic offset of VRAM in the VF.
+ * If dynamic critical region is disabled, follow the same seq as on
baremetal.
+ */
+ if (!(amdgpu_sriov_vf(adev) &&
adev->virt.is_dynamic_crit_regn_enabled)) {
+ bios = ioremap_wc(vram_base, size);
+ if (!bios)
+ return false;
+ }
adev->bios = kmalloc(size, GFP_KERNEL);
if (!adev->bios) {
- iounmap(bios);
- return false;
+ if (bios)
+ iounmap(bios);
+ return false;
}
+
+ if (amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled) {
+ if (amdgpu_virt_get_dynamic_data_info(adev,
+ AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID,
adev->bios, &size))
+ return false;
+ }
+
It may be cleaner to keep this code up - allocate, read the binary and
then use a label to jump to verification (check_atom_bios) for a
successful read.
adev->bios_size = size;
- memcpy_fromio(adev->bios, bios, size);
- iounmap(bios);
+ if (bios) {
+ memcpy_fromio(adev->bios, bios, size);
+ iounmap(bios);
+ }
if (!check_atom_bios(adev, size)) {
amdgpu_bios_release(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 73401f0aeb34..e035dba96790 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -283,7 +283,7 @@ static int amdgpu_discovery_read_binary_from_mem(struct
amdgpu_device *adev,
* wait for this to complete. Once the C2PMSG is updated, we
can
* continue.
*/
-
+
for (i = 0; i < 2000; i++) {
msg = RREG32(mmMP0_SMN_C2PMSG_33);
if (msg & 0x80000000)
@@ -299,13 +299,28 @@ static int amdgpu_discovery_read_binary_from_mem(struct
amdgpu_device *adev,
vram_size <<= 20;
if (sz_valid) {
- uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET;
- amdgpu_device_vram_access(adev, pos, (uint32_t *)binary,
- adev->mman.discovery_tmr_size, false);
+ if (amdgpu_sriov_vf(adev) &&
adev->virt.is_dynamic_crit_regn_enabled) {
+ /* For SRIOV VFs with dynamic critical region enabled,
+ * we will get the IPD binary via below call.
+ * If dynamic critical is disabled, fall through to
normal seq.
+ */
+ valid_size = vram_size;
Only mman.discovery_tmr_size is the allocation size of binary.
+ if (amdgpu_virt_get_dynamic_data_info(adev,
+ AMD_SRIOV_MSG_IPD_TABLE_ID, binary,
&valid_size)) {
+ ret = -EINVAL;
+ goto exit;
+ }
+ } else {
+ uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET;
+
+ amdgpu_device_vram_access(adev, pos, (uint32_t *)binary,
+ adev->mman.discovery_tmr_size, false);
+ }
} else {
ret = amdgpu_discovery_read_binary_from_sysmem(adev, binary);
}
+exit:
if (ret)
dev_err(adev->dev,
"failed to read discovery info from memory, vram size read:
%llx",
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 820dab538164..fef4ebb0f879 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -940,6 +940,14 @@ int amdgpu_virt_init_critical_region(struct amdgpu_device
*adev)
adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].size_kb =
init_data_hdr->bad_page_size_in_kb;
+ /* Validation for critical region info */
+ if (adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb >
DISCOVERY_TMR_SIZE) {
+ dev_err(adev->dev, "Invalid IP discovery size: 0x%x\n",
+
adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb);
+ r = -EINVAL;
+ goto out;
+ }
+
/* reserved memory starts from crit region base offset with the size of
5MB */
adev->mman.fw_vram_usage_start_offset = adev->virt.crit_regn.offset;
adev->mman.fw_vram_usage_size = adev->virt.crit_regn.size_kb << 10;
@@ -958,6 +966,61 @@ int amdgpu_virt_init_critical_region(struct amdgpu_device
*adev)
return r;
}
+int amdgpu_virt_get_dynamic_data_info(struct amdgpu_device *adev,
+ int data_id, uint8_t *binary, uint64_t *size)
+{
+ uint32_t data_offset = 0;
+ uint32_t data_size = 0;
+ enum amd_sriov_msg_table_id_enum data_table_id = data_id;
+ char *data_name;
+
+ if (data_table_id >= AMD_SRIOV_MSG_MAX_TABLE_ID)
+ return -EINVAL;
+
+ data_offset = adev->virt.crit_regn_tbl[data_table_id].offset;
+ data_size = adev->virt.crit_regn_tbl[data_table_id].size_kb << 10;
+
+ switch (data_id) {
+ case AMD_SRIOV_MSG_IPD_TABLE_ID:
+ data_name = "IPD";
+ if (!IS_ALIGNED(data_offset, 4) || !IS_ALIGNED(data_size, 4)) {
+ dev_err(adev->dev, "IP discovery data not aligned to 4
bytes\n");
+ return -EINVAL;
+ }
I think this should also be part of table verification.
+
+ amdgpu_device_vram_access(adev,
+ (uint64_t)data_offset, (uint32_t *)binary,
data_size, false);
+ if (!binary)
+ return -EINVAL;
This NULL check should be before reading any content to the binary.> +
+ if (((uint64_t)data_offset + (uint64_t)data_size) > *size)
+ return -EINVAL;
This doesn't look correct either.
You may generically do like -
// not valid binary or not enough size allocated
if (!binary || !size || *size < data_size)
return -EINVAL;
// otherwise, copy the content
amdgpu_device_vram_access(adev, (uint64_t)data_offset, (uint32_t
*)binary, data_size, false);
*size = (uint64_t)data_size; // update the size as out param.
+
+ *size = (uint64_t)data_size;
+
+ break;
+
+ case AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID:
+ data_name = "BIOS";
If this name is preferred for printing, probably return as
amdgpu_virt_get_dyn_table_name(tbl_id) so that it can be used at other
places as well.
+ if (data_size > *size) {
+ dev_err(adev->dev, "Invalid vbios size: 0x%x\n",
data_size);
+ return -EINVAL;
+ }
+
+ amdgpu_device_vram_access(adev,
+ (uint64_t)data_offset, (uint32_t *)binary,
data_size, false);
+
+ *size = (uint64_t)data_size;
+ break;
+ }
+
+ dev_info(adev->dev,
+ "Got %s info from dynamic crit_region_table at offset 0x%x with size
of 0x%x bytes.\n",
+ data_name, data_offset, data_size);
This could be kept as dev_dbg().
Thanks,
Lijo
+
+ return 0;
+}
+
void amdgpu_virt_init(struct amdgpu_device *adev)
{
bool is_sriov = false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 5c1dce9731e1..a3ae1ff40e84 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -440,6 +440,8 @@ void amdgpu_virt_fini_data_exchange(struct amdgpu_device
*adev);
void amdgpu_virt_init(struct amdgpu_device *adev);
int amdgpu_virt_init_critical_region(struct amdgpu_device *adev);
+int amdgpu_virt_get_dynamic_data_info(struct amdgpu_device *adev,
+ int data_id, uint8_t *binary, uint64_t *size);
bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev);
int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev);