On 10/28/25 23:06, Timur Kristóf wrote:
> Try to load the VCE firmware at early_init.
>
> When the correct firmware is not found, return -ENOENT.
> This way, the driver initialization will complete even
> without VCE, and the GPU will be functional, albeit
> without video encoding capabilities.
>
> This is necessary because we are planning to add support
> for the VCE1, and AMD hasn't yet publised the correct
> firmware for this version. So we need to anticipate that
> users will try to boot amdgpu on SI GPUs without the
> correct VCE1 firmware present on their system.
>
> Signed-off-by: Timur Kristóf <[email protected]>
Looks reasonable to me, but Leo and his team should probably take a look as
well.
Regards,
Christian.
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 121 +++++++++++++++---------
> drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h | 1 +
> drivers/gpu/drm/amd/amdgpu/vce_v2_0.c | 5 +
> drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 5 +
> drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 5 +
> 5 files changed, 91 insertions(+), 46 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> index eaa06dbef5c4..b23a48a1efc1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> @@ -88,82 +88,87 @@ static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring
> *ring, uint32_t handle,
> bool direct, struct dma_fence **fence);
>
> /**
> - * amdgpu_vce_sw_init - allocate memory, load vce firmware
> + * amdgpu_vce_firmware_name() - determine the firmware file name for VCE
> *
> * @adev: amdgpu_device pointer
> - * @size: size for the new BO
> *
> - * First step to get VCE online, allocate memory and load the firmware
> + * Each chip that has VCE IP may need a different firmware.
> + * This function returns the name of the VCE firmware file
> + * appropriate for the current chip.
> */
> -int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
> +static const char *amdgpu_vce_firmware_name(struct amdgpu_device *adev)
> {
> - const char *fw_name;
> - const struct common_firmware_header *hdr;
> - unsigned int ucode_version, version_major, version_minor, binary_id;
> - int i, r;
> -
> switch (adev->asic_type) {
> #ifdef CONFIG_DRM_AMDGPU_CIK
> case CHIP_BONAIRE:
> - fw_name = FIRMWARE_BONAIRE;
> - break;
> + return FIRMWARE_BONAIRE;
> case CHIP_KAVERI:
> - fw_name = FIRMWARE_KAVERI;
> - break;
> + return FIRMWARE_KAVERI;
> case CHIP_KABINI:
> - fw_name = FIRMWARE_KABINI;
> - break;
> + return FIRMWARE_KABINI;
> case CHIP_HAWAII:
> - fw_name = FIRMWARE_HAWAII;
> - break;
> + return FIRMWARE_HAWAII;
> case CHIP_MULLINS:
> - fw_name = FIRMWARE_MULLINS;
> - break;
> + return FIRMWARE_MULLINS;
> #endif
> case CHIP_TONGA:
> - fw_name = FIRMWARE_TONGA;
> - break;
> + return FIRMWARE_TONGA;
> case CHIP_CARRIZO:
> - fw_name = FIRMWARE_CARRIZO;
> - break;
> + return FIRMWARE_CARRIZO;
> case CHIP_FIJI:
> - fw_name = FIRMWARE_FIJI;
> - break;
> + return FIRMWARE_FIJI;
> case CHIP_STONEY:
> - fw_name = FIRMWARE_STONEY;
> - break;
> + return FIRMWARE_STONEY;
> case CHIP_POLARIS10:
> - fw_name = FIRMWARE_POLARIS10;
> - break;
> + return FIRMWARE_POLARIS10;
> case CHIP_POLARIS11:
> - fw_name = FIRMWARE_POLARIS11;
> - break;
> + return FIRMWARE_POLARIS11;
> case CHIP_POLARIS12:
> - fw_name = FIRMWARE_POLARIS12;
> - break;
> + return FIRMWARE_POLARIS12;
> case CHIP_VEGAM:
> - fw_name = FIRMWARE_VEGAM;
> - break;
> + return FIRMWARE_VEGAM;
> case CHIP_VEGA10:
> - fw_name = FIRMWARE_VEGA10;
> - break;
> + return FIRMWARE_VEGA10;
> case CHIP_VEGA12:
> - fw_name = FIRMWARE_VEGA12;
> - break;
> + return FIRMWARE_VEGA12;
> case CHIP_VEGA20:
> - fw_name = FIRMWARE_VEGA20;
> - break;
> + return FIRMWARE_VEGA20;
>
> default:
> - return -EINVAL;
> + return NULL;
> }
> +}
> +
> +/**
> + * amdgpu_vce_early_init() - try to load VCE firmware
> + *
> + * @adev: amdgpu_device pointer
> + *
> + * Tries to load the VCE firmware.
> + *
> + * When not found, returns ENOENT so that the driver can
> + * still load and initialize the rest of the IP blocks.
> + * The GPU can function just fine without VCE, they will just
> + * not support video encoding.
> + */
> +int amdgpu_vce_early_init(struct amdgpu_device *adev)
> +{
> + const char *fw_name = amdgpu_vce_firmware_name(adev);
> + const struct common_firmware_header *hdr;
> + unsigned int ucode_version, version_major, version_minor, binary_id;
> + int r;
> +
> + if (!fw_name)
> + return -ENOENT;
>
> r = amdgpu_ucode_request(adev, &adev->vce.fw, AMDGPU_UCODE_REQUIRED,
> "%s", fw_name);
> if (r) {
> - dev_err(adev->dev, "amdgpu_vce: Can't validate firmware
> \"%s\"\n",
> - fw_name);
> + dev_err(adev->dev,
> + "amdgpu_vce: Firmware \"%s\" not found or failed to
> validate (%d)\n",
> + fw_name, r);
> +
> amdgpu_ucode_release(&adev->vce.fw);
> - return r;
> + return -ENOENT;
> }
>
> hdr = (const struct common_firmware_header *)adev->vce.fw->data;
> @@ -172,11 +177,35 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev,
> unsigned long size)
> version_major = (ucode_version >> 20) & 0xfff;
> version_minor = (ucode_version >> 8) & 0xfff;
> binary_id = ucode_version & 0xff;
> - DRM_INFO("Found VCE firmware Version: %d.%d Binary ID: %d\n",
> + dev_info(adev->dev, "Found VCE firmware Version: %d.%d Binary ID: %d\n",
> version_major, version_minor, binary_id);
> adev->vce.fw_version = ((version_major << 24) | (version_minor << 16) |
> (binary_id << 8));
>
> + return 0;
> +}
> +
> +/**
> + * amdgpu_vce_sw_init() - allocate memory for VCE BO
> + *
> + * @adev: amdgpu_device pointer
> + * @size: size for the new BO
> + *
> + * First step to get VCE online: allocate memory for VCE BO.
> + * The VCE firmware binary is copied into the VCE BO later,
> + * in amdgpu_vce_resume. The VCE executes its code from the
> + * VCE BO and also uses the space in this BO for its stack and data.
> + *
> + * Ideally this BO should be placed in VRAM for optimal performance,
> + * although technically it also runs from system RAM (albeit slowly).
> + */
> +int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
> +{
> + int i, r;
> +
> + if (!adev->vce.fw)
> + return -ENOENT;
> +
> r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
> AMDGPU_GEM_DOMAIN_VRAM |
> AMDGPU_GEM_DOMAIN_GTT,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
> index 6e53f872d084..22acd7b35945 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
> @@ -53,6 +53,7 @@ struct amdgpu_vce {
> unsigned num_rings;
> };
>
> +int amdgpu_vce_early_init(struct amdgpu_device *adev);
> int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size);
> int amdgpu_vce_sw_fini(struct amdgpu_device *adev);
> int amdgpu_vce_entity_init(struct amdgpu_device *adev, struct amdgpu_ring
> *ring);
> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> index bee3e904a6bc..8ea8a6193492 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> @@ -407,6 +407,11 @@ static void vce_v2_0_enable_mgcg(struct amdgpu_device
> *adev, bool enable,
> static int vce_v2_0_early_init(struct amdgpu_ip_block *ip_block)
> {
> struct amdgpu_device *adev = ip_block->adev;
> + int r;
> +
> + r = amdgpu_vce_early_init(adev);
> + if (r)
> + return r;
>
> adev->vce.num_rings = 2;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> index 708123899c41..719e9643c43d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> @@ -399,6 +399,7 @@ static unsigned vce_v3_0_get_harvest_config(struct
> amdgpu_device *adev)
> static int vce_v3_0_early_init(struct amdgpu_ip_block *ip_block)
> {
> struct amdgpu_device *adev = ip_block->adev;
> + int r;
>
> adev->vce.harvest_config = vce_v3_0_get_harvest_config(adev);
>
> @@ -407,6 +408,10 @@ static int vce_v3_0_early_init(struct amdgpu_ip_block
> *ip_block)
> (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1))
> return -ENOENT;
>
> + r = amdgpu_vce_early_init(adev);
> + if (r)
> + return r;
> +
> adev->vce.num_rings = 3;
>
> vce_v3_0_set_ring_funcs(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> index 335bda64ff5b..2d64002bed61 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> @@ -410,6 +410,11 @@ static int vce_v4_0_stop(struct amdgpu_device *adev)
> static int vce_v4_0_early_init(struct amdgpu_ip_block *ip_block)
> {
> struct amdgpu_device *adev = ip_block->adev;
> + int r;
> +
> + r = amdgpu_vce_early_init(adev);
> + if (r)
> + return r;
>
> if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
> adev->vce.num_rings = 1;