On 11/7/25 11:47, Timur Kristóf wrote:
> On Fri, 2025-11-07 at 11:01 +0100, Christian König wrote:
>> On 11/7/25 10:53, Timur Kristóf wrote:
>>> On Fri, 2025-11-07 at 10:49 +0100, Christian König wrote:
>>>> On 11/6/25 19:44, Timur Kristóf wrote:
>>>>> VCE uses the VCPU BO to keep track of currently active
>>>>> encoding sessions. To make sure the VCE functions correctly
>>>>> after suspend/resume, save the VCPU BO to system RAM on
>>>>> suspend and restore it on resume.
>>>>>
>>>>> Additionally, make sure to keep the VCPU BO pinned.
>>>>> The VCPU BO needs to stay at the same location before and after
>>>>> sleep/resume because the FW code is not relocatable once it's
>>>>> started.
>>>>>
>>>>> Unfortunately this is not enough to make VCE suspend work when
>>>>> there are encoding sessions active, so don't allow that yet.
>>>>
>>>> The question if this is the right approach or not can only Leo
>>>> and
>>>> Ruijing answer.
>>>>
>>>> If we can get VCE1-3 to keep session working after suspend/resume
>>>> we
>>>> should make this change otherwise we should rather make all old
>>>> sessions invalid after suspend/resume and only re-load the FW.
>>>>
>>>> Anyway I think you should make the removal of
>>>> "amdgpu_bo_kmap(adev-
>>>>> vce.vcpu_bo, &cpu_addr);" a separate patch, cause that seems to
>>>>> be a
>>>> good cleanup no matter what.
>>>>
>>>
>>> This change is necessary for the VCE1 code when it loads the
>>> firmware
>>> signature. Without this patch, we would need to call reserve(),
>>> kmap(),
>>> kunmap(), kunreserve() in vce_v1_0_load_fw_signature().
>>>
>>> Let me know which approach you prefer.
>>
>> In this case definately make removal of amdgpu_bo_kmap(adev-
>>> vce.vcpu_bo, &cpu_addr) a separate patch.
> 
> Sorry, can you clarify what you mean?
> Should I just go back to the way things were on the first version of
> the series, and then clean it up later?

Just add a patch (early in the series, probably as first patch) to remove 
amdgpu_bo_kmap(adev-> vce.vcpu_bo, &cpu_addr).

Then put the memset_io() into amdgpu_vce_resume() like you had in the first 
series of the patch so that VCE1 behaves the same as VCE2-3.

And when the series has landed we can clean everything up depending on what 
Leo/Ruijing has decided to do with suspend/resume on VCE1-3.

Regards,
Christian.

> 
>>
>> I want to get initial VCE1 working and landed independent of what
>> Leo/Ruijing say to suspend/resume.
> 
> Agreed.
> 
>>
>> Can be that suspend/resume is then still broken, but that is also the
>> case for VCE2-3 then.
> 
> Yes, some extra work is going to be needed on top of this patch to make
> suspend/resume work (if it's possible at all).
> 
>>
>>
>>>
>>>>
>>>>>
>>>>> Signed-off-by: Timur Kristóf <[email protected]>
>>>>> ---
>>>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 44 ++++++++---------
>>>>> ----
>>>>>  drivers/gpu/drm/amd/amdgpu/vce_v4_0.c   | 52 ++++-------------
>>>>> ----
>>>>> ----
>>>>>  2 files changed, 24 insertions(+), 72 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>>>>> index 2297608c5191..4beec5b56c4f 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>>>>> @@ -206,6 +206,10 @@ int amdgpu_vce_sw_init(struct
>>>>> amdgpu_device
>>>>> *adev, unsigned long size)
>>>>>   if (!adev->vce.fw)
>>>>>           return -ENOENT;
>>>>>  
>>>>> + adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);
>>>>> + if (!adev->vce.saved_bo)
>>>>> +         return -ENOMEM;
>>>>> +
>>>>>   r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
>>>>>                               AMDGPU_GEM_DOMAIN_VRAM |
>>>>>                               AMDGPU_GEM_DOMAIN_GTT,
>>>>> @@ -254,6 +258,9 @@ int amdgpu_vce_sw_fini(struct amdgpu_device
>>>>> *adev)
>>>>>   amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev-
>>>>>> vce.gpu_addr,
>>>>>           (void **)&adev->vce.cpu_addr);
>>>>>  
>>>>> + kvfree(adev->vce.saved_bo);
>>>>> + adev->vce.saved_bo = NULL;
>>>>> +
>>>>>   return 0;
>>>>>  }
>>>>>  
>>>>> @@ -290,13 +297,19 @@ int amdgpu_vce_entity_init(struct
>>>>> amdgpu_device *adev, struct amdgpu_ring *ring)
>>>>>   */
>>>>>  int amdgpu_vce_suspend(struct amdgpu_device *adev)
>>>>>  {
>>>>> - int i;
>>>>> + int i, idx;
>>>>>  
>>>>>   cancel_delayed_work_sync(&adev->vce.idle_work);
>>>>>  
>>>>>   if (adev->vce.vcpu_bo == NULL)
>>>>>           return 0;
>>>>>  
>>>>> + if (drm_dev_enter(adev_to_drm(adev), &idx)) {
>>>>> +         memcpy_fromio(adev->vce.saved_bo, adev-
>>>>>> vce.cpu_addr,
>>>>> +                       amdgpu_bo_size(adev-
>>>>>> vce.vcpu_bo));
>>>>> +         drm_dev_exit(idx);
>>>>> + }
>>>>> +
>>>>>   for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
>>>>>           if (atomic_read(&adev->vce.handles[i]))
>>>>>                   break;
>>>>> @@ -316,40 +329,17 @@ int amdgpu_vce_suspend(struct
>>>>> amdgpu_device
>>>>> *adev)
>>>>>   */
>>>>>  int amdgpu_vce_resume(struct amdgpu_device *adev)
>>>>>  {
>>>>> - void *cpu_addr;
>>>>> - const struct common_firmware_header *hdr;
>>>>> - unsigned int offset;
>>>>> - int r, idx;
>>>>> + int idx;
>>>>>  
>>>>>   if (adev->vce.vcpu_bo == NULL)
>>>>>           return -EINVAL;
>>>>>  
>>>>> - r = amdgpu_bo_reserve(adev->vce.vcpu_bo, false);
>>>>> - if (r) {
>>>>> -         dev_err(adev->dev, "(%d) failed to reserve VCE
>>>>> bo\n", r);
>>>>> -         return r;
>>>>> - }
>>>>> -
>>>>> - r = amdgpu_bo_kmap(adev->vce.vcpu_bo, &cpu_addr);
>>>>> - if (r) {
>>>>> -         amdgpu_bo_unreserve(adev->vce.vcpu_bo);
>>>>> -         dev_err(adev->dev, "(%d) VCE map failed\n",
>>>>> r);
>>>>> -         return r;
>>>>> - }
>>>>> -
>>>>> - hdr = (const struct common_firmware_header *)adev-
>>>>>> vce.fw-
>>>>>> data;
>>>>> - offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
>>>>> -
>>>>>   if (drm_dev_enter(adev_to_drm(adev), &idx)) {
>>>>> -         memcpy_toio(cpu_addr, adev->vce.fw->data +
>>>>> offset,
>>>>> -                     adev->vce.fw->size - offset);
>>>>> +         memcpy_toio(adev->vce.cpu_addr, adev-
>>>>>> vce.saved_bo,
>>>>> +                     amdgpu_bo_size(adev-
>>>>>> vce.vcpu_bo));
>>>>>           drm_dev_exit(idx);
>>>>>   }
>>>>>  
>>>>> - amdgpu_bo_kunmap(adev->vce.vcpu_bo);
>>>>> -
>>>>> - amdgpu_bo_unreserve(adev->vce.vcpu_bo);
>>>>> -
>>>>>   return 0;
>>>>>  }
>>>>>  
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
>>>>> index 2d64002bed61..21b6656b2f41 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
>>>>> @@ -448,14 +448,8 @@ static int vce_v4_0_sw_init(struct
>>>>> amdgpu_ip_block *ip_block)
>>>>>           return r;
>>>>>  
>>>>>   if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
>>>>> -         const struct common_firmware_header *hdr;
>>>>> -         unsigned size = amdgpu_bo_size(adev-
>>>>>> vce.vcpu_bo);
>>>>> -
>>>>> -         adev->vce.saved_bo = kvmalloc(size,
>>>>> GFP_KERNEL);
>>>>> -         if (!adev->vce.saved_bo)
>>>>> -                 return -ENOMEM;
>>>>> -
>>>>> -         hdr = (const struct common_firmware_header
>>>>> *)adev-
>>>>>> vce.fw->data;
>>>>> +         const struct common_firmware_header *hdr =
>>>>> +                 (const struct common_firmware_header
>>>>> *)adev->vce.fw->data;
>>>>>           adev-
>>>>>> firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id
>>>>> = AMDGPU_UCODE_ID_VCE;
>>>>>           adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw =
>>>>> adev->vce.fw;
>>>>>           adev->firmware.fw_size +=
>>>>> @@ -506,11 +500,6 @@ static int vce_v4_0_sw_fini(struct
>>>>> amdgpu_ip_block *ip_block)
>>>>>   /* free MM table */
>>>>>   amdgpu_virt_free_mm_table(adev);
>>>>>  
>>>>> - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
>>>>> -         kvfree(adev->vce.saved_bo);
>>>>> -         adev->vce.saved_bo = NULL;
>>>>> - }
>>>>> -
>>>>>   r = amdgpu_vce_suspend(adev);
>>>>>   if (r)
>>>>>           return r;
>>>>> @@ -561,20 +550,7 @@ static int vce_v4_0_hw_fini(struct
>>>>> amdgpu_ip_block *ip_block)
>>>>>  static int vce_v4_0_suspend(struct amdgpu_ip_block *ip_block)
>>>>>  {
>>>>>   struct amdgpu_device *adev = ip_block->adev;
>>>>> - int r, idx;
>>>>> -
>>>>> - if (adev->vce.vcpu_bo == NULL)
>>>>> -         return 0;
>>>>> -
>>>>> - if (drm_dev_enter(adev_to_drm(adev), &idx)) {
>>>>> -         if (adev->firmware.load_type ==
>>>>> AMDGPU_FW_LOAD_PSP) {
>>>>> -                 unsigned size = amdgpu_bo_size(adev-
>>>>>> vce.vcpu_bo);
>>>>> -                 void *ptr = adev->vce.cpu_addr;
>>>>> -
>>>>> -                 memcpy_fromio(adev->vce.saved_bo, ptr,
>>>>> size);
>>>>> -         }
>>>>> -         drm_dev_exit(idx);
>>>>> - }
>>>>> + int r;
>>>>>  
>>>>>   /*
>>>>>    * Proper cleanups before halting the HW engine:
>>>>> @@ -609,25 +585,11 @@ static int vce_v4_0_suspend(struct
>>>>> amdgpu_ip_block *ip_block)
>>>>>  static int vce_v4_0_resume(struct amdgpu_ip_block *ip_block)
>>>>>  {
>>>>>   struct amdgpu_device *adev = ip_block->adev;
>>>>> - int r, idx;
>>>>> -
>>>>> - if (adev->vce.vcpu_bo == NULL)
>>>>> -         return -EINVAL;
>>>>> -
>>>>> - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
>>>>> -
>>>>> -         if (drm_dev_enter(adev_to_drm(adev), &idx)) {
>>>>> -                 unsigned size = amdgpu_bo_size(adev-
>>>>>> vce.vcpu_bo);
>>>>> -                 void *ptr = adev->vce.cpu_addr;
>>>>> + int r;
>>>>>  
>>>>> -                 memcpy_toio(ptr, adev->vce.saved_bo,
>>>>> size);
>>>>> -                 drm_dev_exit(idx);
>>>>> -         }
>>>>> - } else {
>>>>> -         r = amdgpu_vce_resume(adev);
>>>>> -         if (r)
>>>>> -                 return r;
>>>>> - }
>>>>> + r = amdgpu_vce_resume(adev);
>>>>> + if (r)
>>>>> +         return r;
>>>>>  
>>>>>   return vce_v4_0_hw_init(ip_block);
>>>>>  }

Reply via email to