On Tue, Oct 14, 2025 at 4:14 PM Ellen Pan <[email protected]> wrote:
>
> 1. Added VF logic to init data exchange region using the offsets from
> dynamic(v2) critical regions;
>
> Signed-off-by: Ellen Pan <[email protected]>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 104 ++++++++++++++++++-----
> 1 file changed, 85 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index fef4ebb0f879..35cb716ec594 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -208,12 +208,12 @@ int amdgpu_virt_alloc_mm_table(struct amdgpu_device
> *adev)
> &adev->virt.mm_table.gpu_addr,
> (void *)&adev->virt.mm_table.cpu_addr);
> if (r) {
> - DRM_ERROR("failed to alloc mm table and error = %d.\n", r);
> + dev_err(adev->dev, "failed to alloc mm table and error =
> %d.\n", r);
> return r;
> }
>
> memset((void *)adev->virt.mm_table.cpu_addr, 0, PAGE_SIZE);
> - DRM_INFO("MM table gpu addr = 0x%llx, cpu addr = %p.\n",
> + dev_info(adev->dev, "MM table gpu addr = 0x%llx, cpu addr = %p.\n",
> adev->virt.mm_table.gpu_addr,
> adev->virt.mm_table.cpu_addr);
> return 0;
> @@ -393,7 +393,9 @@ static void amdgpu_virt_ras_reserve_bps(struct
> amdgpu_device *adev)
> if (amdgpu_bo_create_kernel_at(adev, bp <<
> AMDGPU_GPU_PAGE_SHIFT,
> AMDGPU_GPU_PAGE_SIZE,
> &bo, NULL))
> - DRM_DEBUG("RAS WARN: reserve vram for retired
> page %llx fail\n", bp);
> + dev_dbg(adev->dev,
> + "RAS WARN: reserve vram for
> retired page %llx fail\n",
> + bp);
> data->bps_bo[i] = bo;
> }
> data->last_reserved = i + 1;
> @@ -661,10 +663,34 @@ static void amdgpu_virt_update_vf2pf_work_item(struct
> work_struct *work)
> schedule_delayed_work(&(adev->virt.vf2pf_work),
> adev->virt.vf2pf_update_interval_ms);
> }
>
> +static int amdgpu_virt_read_exchange_data_from_mem(struct amdgpu_device
> *adev, uint32_t *pfvf_data)
> +{
> + uint32_t dataexchange_offset =
> +
> adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset;
> + uint32_t dataexchange_size =
> +
> adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb << 10;
> + uint64_t pos = 0;
> +
> + dev_info(adev->dev,
> + "Got data exchange info from dynamic
> crit_region_table at offset 0x%x with size of 0x%x bytes.\n",
> + dataexchange_offset, dataexchange_size);
> +
> + if (!IS_ALIGNED(dataexchange_offset, 4) ||
> !IS_ALIGNED(dataexchange_size, 4)) {
> + dev_err(adev->dev, "Data exchange data not aligned to 4
> bytes\n");
> + return -EINVAL;
> + }
> +
> + pos = (uint64_t)dataexchange_offset;
> + amdgpu_device_vram_access(adev, pos, pfvf_data,
> + dataexchange_size, false);
> +
> + return 0;
> +}
> +
> void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev)
> {
> if (adev->virt.vf2pf_update_interval_ms != 0) {
> - DRM_INFO("clean up the vf2pf work item\n");
> + dev_info(adev->dev, "clean up the vf2pf work item\n");
> cancel_delayed_work_sync(&adev->virt.vf2pf_work);
> adev->virt.vf2pf_update_interval_ms = 0;
> }
> @@ -672,13 +698,15 @@ void amdgpu_virt_fini_data_exchange(struct
> amdgpu_device *adev)
>
> void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
> {
> + uint32_t *pfvf_data = NULL;
> +
> adev->virt.fw_reserve.p_pf2vf = NULL;
> adev->virt.fw_reserve.p_vf2pf = NULL;
> adev->virt.vf2pf_update_interval_ms = 0;
> adev->virt.vf2pf_update_retry_cnt = 0;
>
> if (adev->mman.fw_vram_usage_va && adev->mman.drv_vram_usage_va) {
> - DRM_WARN("Currently fw_vram and drv_vram should not have
> values at the same time!");
> + dev_warn(adev->dev, "Currently fw_vram and drv_vram should
> not have values at the same time!");
> } else if (adev->mman.fw_vram_usage_va ||
> adev->mman.drv_vram_usage_va) {
> /* go through this logic in ip_init and reset to init
> workqueue*/
> amdgpu_virt_exchange_data(adev);
> @@ -687,11 +715,34 @@ void amdgpu_virt_init_data_exchange(struct
> amdgpu_device *adev)
> schedule_delayed_work(&(adev->virt.vf2pf_work),
> msecs_to_jiffies(adev->virt.vf2pf_update_interval_ms));
> } else if (adev->bios != NULL) {
> /* got through this logic in early init stage to get
> necessary flags, e.g. rlcg_acc related*/
> - adev->virt.fw_reserve.p_pf2vf =
> - (struct amd_sriov_msg_pf2vf_info_header *)
> - (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 <<
> 10));
> + if (adev->virt.req_init_data_ver == GPU_CRIT_REGION_V2) {
> + pfvf_data =
> +
> kzalloc(adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb
> << 10,
> + GFP_KERNEL);
> + if (!pfvf_data) {
> + dev_err(adev->dev, "Failed to allocate memory
> for pfvf_data\n");
> + return;
> + }
>
> - amdgpu_virt_read_pf2vf_data(adev);
> + if (amdgpu_virt_read_exchange_data_from_mem(adev,
> pfvf_data))
> + goto free_pfvf_data;
> +
> + adev->virt.fw_reserve.p_pf2vf =
> + (struct amd_sriov_msg_pf2vf_info_header
> *)pfvf_data;
> +
> + amdgpu_virt_read_pf2vf_data(adev);
> +
> +free_pfvf_data:
> + kfree(pfvf_data);
> + pfvf_data = NULL;
> + adev->virt.fw_reserve.p_pf2vf = NULL;
> + } else {
> + adev->virt.fw_reserve.p_pf2vf =
> + (struct amd_sriov_msg_pf2vf_info_header *)
> + (adev->bios +
> (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
> +
> + amdgpu_virt_read_pf2vf_data(adev);
> + }
> }
> }
>
> @@ -704,14 +755,29 @@ void amdgpu_virt_exchange_data(struct amdgpu_device
> *adev)
>
> if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
> if (adev->mman.fw_vram_usage_va) {
> - adev->virt.fw_reserve.p_pf2vf =
> - (struct amd_sriov_msg_pf2vf_info_header *)
> - (adev->mman.fw_vram_usage_va +
> (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
> - adev->virt.fw_reserve.p_vf2pf =
> - (struct amd_sriov_msg_vf2pf_info_header *)
> - (adev->mman.fw_vram_usage_va +
> (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
> - adev->virt.fw_reserve.ras_telemetry =
> - (adev->mman.fw_vram_usage_va +
> (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
> + if (adev->virt.req_init_data_ver ==
> GPU_CRIT_REGION_V2) {
> + adev->virt.fw_reserve.p_pf2vf =
> + (struct
> amd_sriov_msg_pf2vf_info_header *)
> + (adev->mman.fw_vram_usage_va +
> +
> adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset);
> + adev->virt.fw_reserve.p_vf2pf =
> + (struct
> amd_sriov_msg_vf2pf_info_header *)
> + (adev->mman.fw_vram_usage_va +
> +
> adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset +
> + (AMD_SRIOV_MSG_SIZE_KB_V1 << 10));
AMD_SRIOV_MSG_SIZE_KB_V1? Is this common for both V1 and V2? Other
than that, this patch looks good to me.
Alex
> + adev->virt.fw_reserve.ras_telemetry =
> + (adev->mman.fw_vram_usage_va +
> +
> adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].offset);
> + } else {
> + adev->virt.fw_reserve.p_pf2vf =
> + (struct
> amd_sriov_msg_pf2vf_info_header *)
> + (adev->mman.fw_vram_usage_va +
> (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
> + adev->virt.fw_reserve.p_vf2pf =
> + (struct
> amd_sriov_msg_vf2pf_info_header *)
> + (adev->mman.fw_vram_usage_va +
> (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
> + adev->virt.fw_reserve.ras_telemetry =
> + (adev->mman.fw_vram_usage_va +
> (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
> + }
> } else if (adev->mman.drv_vram_usage_va) {
> adev->virt.fw_reserve.p_pf2vf =
> (struct amd_sriov_msg_pf2vf_info_header *)
> @@ -819,7 +885,7 @@ static bool amdgpu_virt_init_req_data(struct
> amdgpu_device *adev, u32 reg)
> break;
> default: /* other chip doesn't support SRIOV */
> is_sriov = false;
> - DRM_ERROR("Unknown asic type: %d!\n",
> adev->asic_type);
> + dev_err(adev->dev, "Unknown asic type: %d!\n",
> adev->asic_type);
> break;
> }
> }
> @@ -1468,7 +1534,7 @@ amdgpu_ras_block_to_sriov(struct amdgpu_device *adev,
> enum amdgpu_ras_block bloc
> case AMDGPU_RAS_BLOCK__MPIO:
> return RAS_TELEMETRY_GPU_BLOCK_MPIO;
> default:
> - DRM_WARN_ONCE("Unsupported SRIOV RAS telemetry block 0x%x\n",
> + dev_warn(adev->dev, "Unsupported SRIOV RAS telemetry block
> 0x%x\n",
> block);
> return RAS_TELEMETRY_GPU_BLOCK_COUNT;
> }
> --
> 2.34.1
>