amdgpu: Add logic for VF data exchange region to init from dynamic crit_region offsets

Alex Deucher Sat, 18 Oct 2025 13:54:03 -0700

On Tue, Oct 14, 2025 at 4:14 PM Ellen Pan <[email protected]> wrote:
>
> 1. Added VF logic to init data exchange region using the offsets from 
> dynamic(v2) critical regions;
>
> Signed-off-by: Ellen Pan <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 104 ++++++++++++++++++-----
>  1 file changed, 85 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index fef4ebb0f879..35cb716ec594 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -208,12 +208,12 @@ int amdgpu_virt_alloc_mm_table(struct amdgpu_device 
> *adev)
>                                     &adev->virt.mm_table.gpu_addr,
>                                     (void *)&adev->virt.mm_table.cpu_addr);
>         if (r) {
> -               DRM_ERROR("failed to alloc mm table and error = %d.\n", r);
> +               dev_err(adev->dev, "failed to alloc mm table and error = 
> %d.\n", r);
>                 return r;
>         }
>
>         memset((void *)adev->virt.mm_table.cpu_addr, 0, PAGE_SIZE);
> -       DRM_INFO("MM table gpu addr = 0x%llx, cpu addr = %p.\n",
> +       dev_info(adev->dev, "MM table gpu addr = 0x%llx, cpu addr = %p.\n",
>                  adev->virt.mm_table.gpu_addr,
>                  adev->virt.mm_table.cpu_addr);
>         return 0;
> @@ -393,7 +393,9 @@ static void amdgpu_virt_ras_reserve_bps(struct 
> amdgpu_device *adev)
>                         if (amdgpu_bo_create_kernel_at(adev, bp << 
> AMDGPU_GPU_PAGE_SHIFT,
>                                                         AMDGPU_GPU_PAGE_SIZE,
>                                                         &bo, NULL))
> -                               DRM_DEBUG("RAS WARN: reserve vram for retired 
> page %llx fail\n", bp);
> +                               dev_dbg(adev->dev,
> +                                               "RAS WARN: reserve vram for 
> retired page %llx fail\n",
> +                                               bp);
>                         data->bps_bo[i] = bo;
>                 }
>                 data->last_reserved = i + 1;
> @@ -661,10 +663,34 @@ static void amdgpu_virt_update_vf2pf_work_item(struct 
> work_struct *work)
>         schedule_delayed_work(&(adev->virt.vf2pf_work), 
> adev->virt.vf2pf_update_interval_ms);
>  }
>
> +static int amdgpu_virt_read_exchange_data_from_mem(struct amdgpu_device 
> *adev, uint32_t *pfvf_data)
> +{
> +       uint32_t dataexchange_offset =
> +               
> adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset;
> +       uint32_t dataexchange_size =
> +               
> adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb << 10;
> +       uint64_t pos = 0;
> +
> +       dev_info(adev->dev,
> +                       "Got data exchange info from dynamic 
> crit_region_table at offset 0x%x with size of 0x%x bytes.\n",
> +                       dataexchange_offset, dataexchange_size);
> +
> +       if (!IS_ALIGNED(dataexchange_offset, 4) || 
> !IS_ALIGNED(dataexchange_size, 4)) {
> +               dev_err(adev->dev, "Data exchange data not aligned to 4 
> bytes\n");
> +               return -EINVAL;
> +       }
> +
> +       pos = (uint64_t)dataexchange_offset;
> +       amdgpu_device_vram_access(adev, pos, pfvf_data,
> +                       dataexchange_size, false);
> +
> +       return 0;
> +}
> +
>  void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev)
>  {
>         if (adev->virt.vf2pf_update_interval_ms != 0) {
> -               DRM_INFO("clean up the vf2pf work item\n");
> +               dev_info(adev->dev, "clean up the vf2pf work item\n");
>                 cancel_delayed_work_sync(&adev->virt.vf2pf_work);
>                 adev->virt.vf2pf_update_interval_ms = 0;
>         }
> @@ -672,13 +698,15 @@ void amdgpu_virt_fini_data_exchange(struct 
> amdgpu_device *adev)
>
>  void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
>  {
> +       uint32_t *pfvf_data = NULL;
> +
>         adev->virt.fw_reserve.p_pf2vf = NULL;
>         adev->virt.fw_reserve.p_vf2pf = NULL;
>         adev->virt.vf2pf_update_interval_ms = 0;
>         adev->virt.vf2pf_update_retry_cnt = 0;
>
>         if (adev->mman.fw_vram_usage_va && adev->mman.drv_vram_usage_va) {
> -               DRM_WARN("Currently fw_vram and drv_vram should not have 
> values at the same time!");
> +               dev_warn(adev->dev, "Currently fw_vram and drv_vram should 
> not have values at the same time!");
>         } else if (adev->mman.fw_vram_usage_va || 
> adev->mman.drv_vram_usage_va) {
>                 /* go through this logic in ip_init and reset to init 
> workqueue*/
>                 amdgpu_virt_exchange_data(adev);
> @@ -687,11 +715,34 @@ void amdgpu_virt_init_data_exchange(struct 
> amdgpu_device *adev)
>                 schedule_delayed_work(&(adev->virt.vf2pf_work), 
> msecs_to_jiffies(adev->virt.vf2pf_update_interval_ms));
>         } else if (adev->bios != NULL) {
>                 /* got through this logic in early init stage to get 
> necessary flags, e.g. rlcg_acc related*/
> -               adev->virt.fw_reserve.p_pf2vf =
> -                       (struct amd_sriov_msg_pf2vf_info_header *)
> -                       (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 
> 10));
> +               if (adev->virt.req_init_data_ver == GPU_CRIT_REGION_V2) {
> +                       pfvf_data =
> +                               
> kzalloc(adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb 
> << 10,
> +                                       GFP_KERNEL);
> +                       if (!pfvf_data) {
> +                               dev_err(adev->dev, "Failed to allocate memory 
> for pfvf_data\n");
> +                               return;
> +                       }
>
> -               amdgpu_virt_read_pf2vf_data(adev);
> +                       if (amdgpu_virt_read_exchange_data_from_mem(adev, 
> pfvf_data))
> +                               goto free_pfvf_data;
> +
> +                       adev->virt.fw_reserve.p_pf2vf =
> +                               (struct amd_sriov_msg_pf2vf_info_header 
> *)pfvf_data;
> +
> +                       amdgpu_virt_read_pf2vf_data(adev);
> +
> +free_pfvf_data:
> +                       kfree(pfvf_data);
> +                       pfvf_data = NULL;
> +                       adev->virt.fw_reserve.p_pf2vf = NULL;
> +               } else {
> +                       adev->virt.fw_reserve.p_pf2vf =
> +                               (struct amd_sriov_msg_pf2vf_info_header *)
> +                               (adev->bios + 
> (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
> +
> +                       amdgpu_virt_read_pf2vf_data(adev);
> +               }
>         }
>  }
>
> @@ -704,14 +755,29 @@ void amdgpu_virt_exchange_data(struct amdgpu_device 
> *adev)
>
>         if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
>                 if (adev->mman.fw_vram_usage_va) {
> -                       adev->virt.fw_reserve.p_pf2vf =
> -                               (struct amd_sriov_msg_pf2vf_info_header *)
> -                               (adev->mman.fw_vram_usage_va + 
> (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
> -                       adev->virt.fw_reserve.p_vf2pf =
> -                               (struct amd_sriov_msg_vf2pf_info_header *)
> -                               (adev->mman.fw_vram_usage_va + 
> (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
> -                       adev->virt.fw_reserve.ras_telemetry =
> -                               (adev->mman.fw_vram_usage_va + 
> (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
> +                       if (adev->virt.req_init_data_ver == 
> GPU_CRIT_REGION_V2) {
> +                               adev->virt.fw_reserve.p_pf2vf =
> +                                       (struct 
> amd_sriov_msg_pf2vf_info_header *)
> +                                       (adev->mman.fw_vram_usage_va +
> +                                       
> adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset);
> +                               adev->virt.fw_reserve.p_vf2pf =
> +                                       (struct 
> amd_sriov_msg_vf2pf_info_header *)
> +                                       (adev->mman.fw_vram_usage_va +
> +                                       
> adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset +
> +                                       (AMD_SRIOV_MSG_SIZE_KB_V1 << 10));


AMD_SRIOV_MSG_SIZE_KB_V1?  Is this common for both V1 and V2?  Other
than that, this patch looks good to me.

Alex

> +                               adev->virt.fw_reserve.ras_telemetry =
> +                                       (adev->mman.fw_vram_usage_va +
> +                                       
> adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].offset);
> +                       } else {
> +                               adev->virt.fw_reserve.p_pf2vf =
> +                                       (struct 
> amd_sriov_msg_pf2vf_info_header *)
> +                                       (adev->mman.fw_vram_usage_va + 
> (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
> +                               adev->virt.fw_reserve.p_vf2pf =
> +                                       (struct 
> amd_sriov_msg_vf2pf_info_header *)
> +                                       (adev->mman.fw_vram_usage_va + 
> (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
> +                               adev->virt.fw_reserve.ras_telemetry =
> +                                       (adev->mman.fw_vram_usage_va + 
> (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
> +                       }
>                 } else if (adev->mman.drv_vram_usage_va) {
>                         adev->virt.fw_reserve.p_pf2vf =
>                                 (struct amd_sriov_msg_pf2vf_info_header *)
> @@ -819,7 +885,7 @@ static bool amdgpu_virt_init_req_data(struct 
> amdgpu_device *adev, u32 reg)
>                         break;
>                 default: /* other chip doesn't support SRIOV */
>                         is_sriov = false;
> -                       DRM_ERROR("Unknown asic type: %d!\n", 
> adev->asic_type);
> +                       dev_err(adev->dev, "Unknown asic type: %d!\n", 
> adev->asic_type);
>                         break;
>                 }
>         }
> @@ -1468,7 +1534,7 @@ amdgpu_ras_block_to_sriov(struct amdgpu_device *adev, 
> enum amdgpu_ras_block bloc
>         case AMDGPU_RAS_BLOCK__MPIO:
>                 return RAS_TELEMETRY_GPU_BLOCK_MPIO;
>         default:
> -               DRM_WARN_ONCE("Unsupported SRIOV RAS telemetry block 0x%x\n",
> +               dev_warn(adev->dev, "Unsupported SRIOV RAS telemetry block 
> 0x%x\n",
>                               block);
>                 return RAS_TELEMETRY_GPU_BLOCK_COUNT;
>         }
> --
> 2.34.1
>

Re: [PATCH v4 6/6] drm/amdgpu: Add logic for VF data exchange region to init from dynamic crit_region offsets

Reply via email to