On 11/7/25 10:53, Timur Kristóf wrote: > On Fri, 2025-11-07 at 10:49 +0100, Christian König wrote: >> On 11/6/25 19:44, Timur Kristóf wrote: >>> VCE uses the VCPU BO to keep track of currently active >>> encoding sessions. To make sure the VCE functions correctly >>> after suspend/resume, save the VCPU BO to system RAM on >>> suspend and restore it on resume. >>> >>> Additionally, make sure to keep the VCPU BO pinned. >>> The VCPU BO needs to stay at the same location before and after >>> sleep/resume because the FW code is not relocatable once it's >>> started. >>> >>> Unfortunately this is not enough to make VCE suspend work when >>> there are encoding sessions active, so don't allow that yet. >> >> The question if this is the right approach or not can only Leo and >> Ruijing answer. >> >> If we can get VCE1-3 to keep session working after suspend/resume we >> should make this change otherwise we should rather make all old >> sessions invalid after suspend/resume and only re-load the FW. >> >> Anyway I think you should make the removal of "amdgpu_bo_kmap(adev- >>> vce.vcpu_bo, &cpu_addr);" a separate patch, cause that seems to be a >> good cleanup no matter what. >> > > This change is necessary for the VCE1 code when it loads the firmware > signature. Without this patch, we would need to call reserve(), kmap(), > kunmap(), kunreserve() in vce_v1_0_load_fw_signature(). > > Let me know which approach you prefer.
In this case definately make removal of amdgpu_bo_kmap(adev->vce.vcpu_bo, &cpu_addr) a separate patch. I want to get initial VCE1 working and landed independent of what Leo/Ruijing say to suspend/resume. Can be that suspend/resume is then still broken, but that is also the case for VCE2-3 then. Regards, Christian. > >> >>> >>> Signed-off-by: Timur Kristóf <[email protected]> >>> --- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 44 ++++++++------------- >>> drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 52 ++++----------------- >>> ---- >>> 2 files changed, 24 insertions(+), 72 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c >>> index 2297608c5191..4beec5b56c4f 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c >>> @@ -206,6 +206,10 @@ int amdgpu_vce_sw_init(struct amdgpu_device >>> *adev, unsigned long size) >>> if (!adev->vce.fw) >>> return -ENOENT; >>> >>> + adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL); >>> + if (!adev->vce.saved_bo) >>> + return -ENOMEM; >>> + >>> r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, >>> AMDGPU_GEM_DOMAIN_VRAM | >>> AMDGPU_GEM_DOMAIN_GTT, >>> @@ -254,6 +258,9 @@ int amdgpu_vce_sw_fini(struct amdgpu_device >>> *adev) >>> amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev- >>>> vce.gpu_addr, >>> (void **)&adev->vce.cpu_addr); >>> >>> + kvfree(adev->vce.saved_bo); >>> + adev->vce.saved_bo = NULL; >>> + >>> return 0; >>> } >>> >>> @@ -290,13 +297,19 @@ int amdgpu_vce_entity_init(struct >>> amdgpu_device *adev, struct amdgpu_ring *ring) >>> */ >>> int amdgpu_vce_suspend(struct amdgpu_device *adev) >>> { >>> - int i; >>> + int i, idx; >>> >>> cancel_delayed_work_sync(&adev->vce.idle_work); >>> >>> if (adev->vce.vcpu_bo == NULL) >>> return 0; >>> >>> + if (drm_dev_enter(adev_to_drm(adev), &idx)) { >>> + memcpy_fromio(adev->vce.saved_bo, adev- >>>> vce.cpu_addr, >>> + amdgpu_bo_size(adev->vce.vcpu_bo)); >>> + drm_dev_exit(idx); >>> + } >>> + >>> for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) >>> if (atomic_read(&adev->vce.handles[i])) >>> break; >>> @@ -316,40 +329,17 @@ int amdgpu_vce_suspend(struct amdgpu_device >>> *adev) >>> */ >>> int amdgpu_vce_resume(struct amdgpu_device *adev) >>> { >>> - void *cpu_addr; >>> - const struct common_firmware_header *hdr; >>> - unsigned int offset; >>> - int r, idx; >>> + int idx; >>> >>> if (adev->vce.vcpu_bo == NULL) >>> return -EINVAL; >>> >>> - r = amdgpu_bo_reserve(adev->vce.vcpu_bo, false); >>> - if (r) { >>> - dev_err(adev->dev, "(%d) failed to reserve VCE >>> bo\n", r); >>> - return r; >>> - } >>> - >>> - r = amdgpu_bo_kmap(adev->vce.vcpu_bo, &cpu_addr); >>> - if (r) { >>> - amdgpu_bo_unreserve(adev->vce.vcpu_bo); >>> - dev_err(adev->dev, "(%d) VCE map failed\n", r); >>> - return r; >>> - } >>> - >>> - hdr = (const struct common_firmware_header *)adev->vce.fw- >>>> data; >>> - offset = le32_to_cpu(hdr->ucode_array_offset_bytes); >>> - >>> if (drm_dev_enter(adev_to_drm(adev), &idx)) { >>> - memcpy_toio(cpu_addr, adev->vce.fw->data + offset, >>> - adev->vce.fw->size - offset); >>> + memcpy_toio(adev->vce.cpu_addr, adev- >>>> vce.saved_bo, >>> + amdgpu_bo_size(adev->vce.vcpu_bo)); >>> drm_dev_exit(idx); >>> } >>> >>> - amdgpu_bo_kunmap(adev->vce.vcpu_bo); >>> - >>> - amdgpu_bo_unreserve(adev->vce.vcpu_bo); >>> - >>> return 0; >>> } >>> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c >>> b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c >>> index 2d64002bed61..21b6656b2f41 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c >>> @@ -448,14 +448,8 @@ static int vce_v4_0_sw_init(struct >>> amdgpu_ip_block *ip_block) >>> return r; >>> >>> if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { >>> - const struct common_firmware_header *hdr; >>> - unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); >>> - >>> - adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL); >>> - if (!adev->vce.saved_bo) >>> - return -ENOMEM; >>> - >>> - hdr = (const struct common_firmware_header *)adev- >>>> vce.fw->data; >>> + const struct common_firmware_header *hdr = >>> + (const struct common_firmware_header >>> *)adev->vce.fw->data; >>> adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id >>> = AMDGPU_UCODE_ID_VCE; >>> adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = >>> adev->vce.fw; >>> adev->firmware.fw_size += >>> @@ -506,11 +500,6 @@ static int vce_v4_0_sw_fini(struct >>> amdgpu_ip_block *ip_block) >>> /* free MM table */ >>> amdgpu_virt_free_mm_table(adev); >>> >>> - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { >>> - kvfree(adev->vce.saved_bo); >>> - adev->vce.saved_bo = NULL; >>> - } >>> - >>> r = amdgpu_vce_suspend(adev); >>> if (r) >>> return r; >>> @@ -561,20 +550,7 @@ static int vce_v4_0_hw_fini(struct >>> amdgpu_ip_block *ip_block) >>> static int vce_v4_0_suspend(struct amdgpu_ip_block *ip_block) >>> { >>> struct amdgpu_device *adev = ip_block->adev; >>> - int r, idx; >>> - >>> - if (adev->vce.vcpu_bo == NULL) >>> - return 0; >>> - >>> - if (drm_dev_enter(adev_to_drm(adev), &idx)) { >>> - if (adev->firmware.load_type == >>> AMDGPU_FW_LOAD_PSP) { >>> - unsigned size = amdgpu_bo_size(adev- >>>> vce.vcpu_bo); >>> - void *ptr = adev->vce.cpu_addr; >>> - >>> - memcpy_fromio(adev->vce.saved_bo, ptr, >>> size); >>> - } >>> - drm_dev_exit(idx); >>> - } >>> + int r; >>> >>> /* >>> * Proper cleanups before halting the HW engine: >>> @@ -609,25 +585,11 @@ static int vce_v4_0_suspend(struct >>> amdgpu_ip_block *ip_block) >>> static int vce_v4_0_resume(struct amdgpu_ip_block *ip_block) >>> { >>> struct amdgpu_device *adev = ip_block->adev; >>> - int r, idx; >>> - >>> - if (adev->vce.vcpu_bo == NULL) >>> - return -EINVAL; >>> - >>> - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { >>> - >>> - if (drm_dev_enter(adev_to_drm(adev), &idx)) { >>> - unsigned size = amdgpu_bo_size(adev- >>>> vce.vcpu_bo); >>> - void *ptr = adev->vce.cpu_addr; >>> + int r; >>> >>> - memcpy_toio(ptr, adev->vce.saved_bo, >>> size); >>> - drm_dev_exit(idx); >>> - } >>> - } else { >>> - r = amdgpu_vce_resume(adev); >>> - if (r) >>> - return r; >>> - } >>> + r = amdgpu_vce_resume(adev); >>> + if (r) >>> + return r; >>> >>> return vce_v4_0_hw_init(ip_block); >>> }
