On 11/7/25 11:47, Timur Kristóf wrote: > On Fri, 2025-11-07 at 11:01 +0100, Christian König wrote: >> On 11/7/25 10:53, Timur Kristóf wrote: >>> On Fri, 2025-11-07 at 10:49 +0100, Christian König wrote: >>>> On 11/6/25 19:44, Timur Kristóf wrote: >>>>> VCE uses the VCPU BO to keep track of currently active >>>>> encoding sessions. To make sure the VCE functions correctly >>>>> after suspend/resume, save the VCPU BO to system RAM on >>>>> suspend and restore it on resume. >>>>> >>>>> Additionally, make sure to keep the VCPU BO pinned. >>>>> The VCPU BO needs to stay at the same location before and after >>>>> sleep/resume because the FW code is not relocatable once it's >>>>> started. >>>>> >>>>> Unfortunately this is not enough to make VCE suspend work when >>>>> there are encoding sessions active, so don't allow that yet. >>>> >>>> The question if this is the right approach or not can only Leo >>>> and >>>> Ruijing answer. >>>> >>>> If we can get VCE1-3 to keep session working after suspend/resume >>>> we >>>> should make this change otherwise we should rather make all old >>>> sessions invalid after suspend/resume and only re-load the FW. >>>> >>>> Anyway I think you should make the removal of >>>> "amdgpu_bo_kmap(adev- >>>>> vce.vcpu_bo, &cpu_addr);" a separate patch, cause that seems to >>>>> be a >>>> good cleanup no matter what. >>>> >>> >>> This change is necessary for the VCE1 code when it loads the >>> firmware >>> signature. Without this patch, we would need to call reserve(), >>> kmap(), >>> kunmap(), kunreserve() in vce_v1_0_load_fw_signature(). >>> >>> Let me know which approach you prefer. >> >> In this case definately make removal of amdgpu_bo_kmap(adev- >>> vce.vcpu_bo, &cpu_addr) a separate patch. > > Sorry, can you clarify what you mean? > Should I just go back to the way things were on the first version of > the series, and then clean it up later?
Just add a patch (early in the series, probably as first patch) to remove amdgpu_bo_kmap(adev-> vce.vcpu_bo, &cpu_addr). Then put the memset_io() into amdgpu_vce_resume() like you had in the first series of the patch so that VCE1 behaves the same as VCE2-3. And when the series has landed we can clean everything up depending on what Leo/Ruijing has decided to do with suspend/resume on VCE1-3. Regards, Christian. > >> >> I want to get initial VCE1 working and landed independent of what >> Leo/Ruijing say to suspend/resume. > > Agreed. > >> >> Can be that suspend/resume is then still broken, but that is also the >> case for VCE2-3 then. > > Yes, some extra work is going to be needed on top of this patch to make > suspend/resume work (if it's possible at all). > >> >> >>> >>>> >>>>> >>>>> Signed-off-by: Timur Kristóf <[email protected]> >>>>> --- >>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 44 ++++++++--------- >>>>> ---- >>>>> drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 52 ++++------------- >>>>> ---- >>>>> ---- >>>>> 2 files changed, 24 insertions(+), 72 deletions(-) >>>>> >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c >>>>> index 2297608c5191..4beec5b56c4f 100644 >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c >>>>> @@ -206,6 +206,10 @@ int amdgpu_vce_sw_init(struct >>>>> amdgpu_device >>>>> *adev, unsigned long size) >>>>> if (!adev->vce.fw) >>>>> return -ENOENT; >>>>> >>>>> + adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL); >>>>> + if (!adev->vce.saved_bo) >>>>> + return -ENOMEM; >>>>> + >>>>> r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, >>>>> AMDGPU_GEM_DOMAIN_VRAM | >>>>> AMDGPU_GEM_DOMAIN_GTT, >>>>> @@ -254,6 +258,9 @@ int amdgpu_vce_sw_fini(struct amdgpu_device >>>>> *adev) >>>>> amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev- >>>>>> vce.gpu_addr, >>>>> (void **)&adev->vce.cpu_addr); >>>>> >>>>> + kvfree(adev->vce.saved_bo); >>>>> + adev->vce.saved_bo = NULL; >>>>> + >>>>> return 0; >>>>> } >>>>> >>>>> @@ -290,13 +297,19 @@ int amdgpu_vce_entity_init(struct >>>>> amdgpu_device *adev, struct amdgpu_ring *ring) >>>>> */ >>>>> int amdgpu_vce_suspend(struct amdgpu_device *adev) >>>>> { >>>>> - int i; >>>>> + int i, idx; >>>>> >>>>> cancel_delayed_work_sync(&adev->vce.idle_work); >>>>> >>>>> if (adev->vce.vcpu_bo == NULL) >>>>> return 0; >>>>> >>>>> + if (drm_dev_enter(adev_to_drm(adev), &idx)) { >>>>> + memcpy_fromio(adev->vce.saved_bo, adev- >>>>>> vce.cpu_addr, >>>>> + amdgpu_bo_size(adev- >>>>>> vce.vcpu_bo)); >>>>> + drm_dev_exit(idx); >>>>> + } >>>>> + >>>>> for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) >>>>> if (atomic_read(&adev->vce.handles[i])) >>>>> break; >>>>> @@ -316,40 +329,17 @@ int amdgpu_vce_suspend(struct >>>>> amdgpu_device >>>>> *adev) >>>>> */ >>>>> int amdgpu_vce_resume(struct amdgpu_device *adev) >>>>> { >>>>> - void *cpu_addr; >>>>> - const struct common_firmware_header *hdr; >>>>> - unsigned int offset; >>>>> - int r, idx; >>>>> + int idx; >>>>> >>>>> if (adev->vce.vcpu_bo == NULL) >>>>> return -EINVAL; >>>>> >>>>> - r = amdgpu_bo_reserve(adev->vce.vcpu_bo, false); >>>>> - if (r) { >>>>> - dev_err(adev->dev, "(%d) failed to reserve VCE >>>>> bo\n", r); >>>>> - return r; >>>>> - } >>>>> - >>>>> - r = amdgpu_bo_kmap(adev->vce.vcpu_bo, &cpu_addr); >>>>> - if (r) { >>>>> - amdgpu_bo_unreserve(adev->vce.vcpu_bo); >>>>> - dev_err(adev->dev, "(%d) VCE map failed\n", >>>>> r); >>>>> - return r; >>>>> - } >>>>> - >>>>> - hdr = (const struct common_firmware_header *)adev- >>>>>> vce.fw- >>>>>> data; >>>>> - offset = le32_to_cpu(hdr->ucode_array_offset_bytes); >>>>> - >>>>> if (drm_dev_enter(adev_to_drm(adev), &idx)) { >>>>> - memcpy_toio(cpu_addr, adev->vce.fw->data + >>>>> offset, >>>>> - adev->vce.fw->size - offset); >>>>> + memcpy_toio(adev->vce.cpu_addr, adev- >>>>>> vce.saved_bo, >>>>> + amdgpu_bo_size(adev- >>>>>> vce.vcpu_bo)); >>>>> drm_dev_exit(idx); >>>>> } >>>>> >>>>> - amdgpu_bo_kunmap(adev->vce.vcpu_bo); >>>>> - >>>>> - amdgpu_bo_unreserve(adev->vce.vcpu_bo); >>>>> - >>>>> return 0; >>>>> } >>>>> >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c >>>>> b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c >>>>> index 2d64002bed61..21b6656b2f41 100644 >>>>> --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c >>>>> @@ -448,14 +448,8 @@ static int vce_v4_0_sw_init(struct >>>>> amdgpu_ip_block *ip_block) >>>>> return r; >>>>> >>>>> if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { >>>>> - const struct common_firmware_header *hdr; >>>>> - unsigned size = amdgpu_bo_size(adev- >>>>>> vce.vcpu_bo); >>>>> - >>>>> - adev->vce.saved_bo = kvmalloc(size, >>>>> GFP_KERNEL); >>>>> - if (!adev->vce.saved_bo) >>>>> - return -ENOMEM; >>>>> - >>>>> - hdr = (const struct common_firmware_header >>>>> *)adev- >>>>>> vce.fw->data; >>>>> + const struct common_firmware_header *hdr = >>>>> + (const struct common_firmware_header >>>>> *)adev->vce.fw->data; >>>>> adev- >>>>>> firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id >>>>> = AMDGPU_UCODE_ID_VCE; >>>>> adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = >>>>> adev->vce.fw; >>>>> adev->firmware.fw_size += >>>>> @@ -506,11 +500,6 @@ static int vce_v4_0_sw_fini(struct >>>>> amdgpu_ip_block *ip_block) >>>>> /* free MM table */ >>>>> amdgpu_virt_free_mm_table(adev); >>>>> >>>>> - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { >>>>> - kvfree(adev->vce.saved_bo); >>>>> - adev->vce.saved_bo = NULL; >>>>> - } >>>>> - >>>>> r = amdgpu_vce_suspend(adev); >>>>> if (r) >>>>> return r; >>>>> @@ -561,20 +550,7 @@ static int vce_v4_0_hw_fini(struct >>>>> amdgpu_ip_block *ip_block) >>>>> static int vce_v4_0_suspend(struct amdgpu_ip_block *ip_block) >>>>> { >>>>> struct amdgpu_device *adev = ip_block->adev; >>>>> - int r, idx; >>>>> - >>>>> - if (adev->vce.vcpu_bo == NULL) >>>>> - return 0; >>>>> - >>>>> - if (drm_dev_enter(adev_to_drm(adev), &idx)) { >>>>> - if (adev->firmware.load_type == >>>>> AMDGPU_FW_LOAD_PSP) { >>>>> - unsigned size = amdgpu_bo_size(adev- >>>>>> vce.vcpu_bo); >>>>> - void *ptr = adev->vce.cpu_addr; >>>>> - >>>>> - memcpy_fromio(adev->vce.saved_bo, ptr, >>>>> size); >>>>> - } >>>>> - drm_dev_exit(idx); >>>>> - } >>>>> + int r; >>>>> >>>>> /* >>>>> * Proper cleanups before halting the HW engine: >>>>> @@ -609,25 +585,11 @@ static int vce_v4_0_suspend(struct >>>>> amdgpu_ip_block *ip_block) >>>>> static int vce_v4_0_resume(struct amdgpu_ip_block *ip_block) >>>>> { >>>>> struct amdgpu_device *adev = ip_block->adev; >>>>> - int r, idx; >>>>> - >>>>> - if (adev->vce.vcpu_bo == NULL) >>>>> - return -EINVAL; >>>>> - >>>>> - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { >>>>> - >>>>> - if (drm_dev_enter(adev_to_drm(adev), &idx)) { >>>>> - unsigned size = amdgpu_bo_size(adev- >>>>>> vce.vcpu_bo); >>>>> - void *ptr = adev->vce.cpu_addr; >>>>> + int r; >>>>> >>>>> - memcpy_toio(ptr, adev->vce.saved_bo, >>>>> size); >>>>> - drm_dev_exit(idx); >>>>> - } >>>>> - } else { >>>>> - r = amdgpu_vce_resume(adev); >>>>> - if (r) >>>>> - return r; >>>>> - } >>>>> + r = amdgpu_vce_resume(adev); >>>>> + if (r) >>>>> + return r; >>>>> >>>>> return vce_v4_0_hw_init(ip_block); >>>>> }
