On 11/6/25 19:44, Timur Kristóf wrote:
> Based on research and ideas by Alexandre and Christian.
> 
> VCE1 actually executes its code from the VCPU BO.
> Due to various hardware limitations, the VCE1 requires
> the VCPU BO to be in the low 32 bit address range.
> However, VRAM is typically mapped at the high address range,
> which means the VCPU can't access VRAM through the FB aperture.
> 
> To solve this, we write a few page table entries to
> map the VCPU BO in the GART address range. And we make sure
> that the GART is located at the low address range.
> That way the VCE1 can access the VCPU BO.
> 
> v2:
> - Adjust to v2 of the GART helper commit.
> - Add empty line to multi-line comment.
> 
> v3:
> - Instead of relying on gmc_v6 to set the GART space before GTT,
>   add a new function amdgpu_vce_required_gart_pages() which is
>   called from amdgpu_gtt_mgr_init() directly.
> 
> Signed-off-by: Timur Kristóf <[email protected]>
> Co-developed-by: Alexandre Demers <[email protected]>
> Signed-off-by: Alexandre Demers <[email protected]>

Reviewed-by: Christian König <[email protected]>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c |  1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c     | 18 +++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h     |  1 +
>  drivers/gpu/drm/amd/amdgpu/vce_v1_0.c       | 55 +++++++++++++++++++++
>  4 files changed, 75 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
> index 0760e70402ec..895c1e4c6747 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
> @@ -284,6 +284,7 @@ int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, 
> uint64_t gtt_size)
>       ttm_resource_manager_init(man, &adev->mman.bdev, gtt_size);
>  
>       start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS;
> +     start += amdgpu_vce_required_gart_pages(adev);
>       size = (adev->gmc.gart_size >> PAGE_SHIFT) - start;
>       drm_mm_init(&mgr->mm, start, size);
>       spin_lock_init(&mgr->lock);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> index 2761c096c4cd..e825184f244c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> @@ -459,6 +459,24 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, 
> struct drm_file *filp)
>       }
>  }
>  
> +/**
> + * amdgpu_vce_required_gart_pages() - gets number of GART pages required by 
> VCE
> + *
> + * @adev: amdgpu_device pointer
> + *
> + * Returns how many GART pages we need before GTT for the VCE IP block.
> + * For VCE1, see vce_v1_0_ensure_vcpu_bo_32bit_addr for details.
> + * For VCE2+, this is not needed so return zero.
> + */
> +u32 amdgpu_vce_required_gart_pages(struct amdgpu_device *adev)
> +{
> +     /* VCE IP block not added yet, so can't use amdgpu_ip_version */
> +     if (adev->family == AMDGPU_FAMILY_SI)
> +             return 512;
> +
> +     return 0;
> +}
> +
>  /**
>   * amdgpu_vce_get_create_msg - generate a VCE create msg
>   *
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
> index 050783802623..1c3464ce5037 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
> @@ -61,6 +61,7 @@ int amdgpu_vce_entity_init(struct amdgpu_device *adev, 
> struct amdgpu_ring *ring)
>  int amdgpu_vce_suspend(struct amdgpu_device *adev);
>  int amdgpu_vce_resume(struct amdgpu_device *adev);
>  void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file 
> *filp);
> +u32 amdgpu_vce_required_gart_pages(struct amdgpu_device *adev);
>  int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, struct amdgpu_job 
> *job,
>                            struct amdgpu_ib *ib);
>  int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p,
> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c 
> b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c
> index bf9f943852cb..9ae424618556 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c
> @@ -34,6 +34,7 @@
>  
>  #include "amdgpu.h"
>  #include "amdgpu_vce.h"
> +#include "amdgpu_gart.h"
>  #include "sid.h"
>  #include "vce_v1_0.h"
>  #include "vce/vce_1_0_d.h"
> @@ -46,6 +47,11 @@
>  #define VCE_V1_0_DATA_SIZE   (7808 * (AMDGPU_MAX_VCE_HANDLES + 1))
>  #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK        0x02
>  
> +#define VCE_V1_0_GART_PAGE_START \
> +     (AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS)
> +#define VCE_V1_0_GART_ADDR_START \
> +     (VCE_V1_0_GART_PAGE_START * AMDGPU_GPU_PAGE_SIZE)
> +
>  static void vce_v1_0_set_ring_funcs(struct amdgpu_device *adev);
>  static void vce_v1_0_set_irq_funcs(struct amdgpu_device *adev);
>  
> @@ -513,6 +519,49 @@ static int vce_v1_0_early_init(struct amdgpu_ip_block 
> *ip_block)
>       return 0;
>  }
>  
> +/**
> + * vce_v1_0_ensure_vcpu_bo_32bit_addr() - ensure the VCPU BO has a 32-bit 
> address
> + *
> + * @adev: amdgpu_device pointer
> + *
> + * Due to various hardware limitations, the VCE1 requires
> + * the VCPU BO to be in the low 32 bit address range.
> + * Ensure that the VCPU BO has a 32-bit GPU address,
> + * or return an error code when that isn't possible.
> + *
> + * To accomodate that, we put GART to the LOW address range
> + * and reserve some GART pages where we map the VCPU BO,
> + * so that it gets a 32-bit address.
> + */
> +static int vce_v1_0_ensure_vcpu_bo_32bit_addr(struct amdgpu_device *adev)
> +{
> +     u64 gpu_addr = amdgpu_bo_gpu_offset(adev->vce.vcpu_bo);
> +     u64 bo_size = amdgpu_bo_size(adev->vce.vcpu_bo);
> +     u64 max_vcpu_bo_addr = 0xffffffff - bo_size;
> +     u64 num_pages = ALIGN(bo_size, AMDGPU_GPU_PAGE_SIZE) / 
> AMDGPU_GPU_PAGE_SIZE;
> +     u64 pa = amdgpu_gmc_vram_pa(adev, adev->vce.vcpu_bo);
> +     u64 flags = AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | 
> AMDGPU_PTE_VALID;
> +
> +     /*
> +      * Check if the VCPU BO already has a 32-bit address.
> +      * Eg. if MC is configured to put VRAM in the low address range.
> +      */
> +     if (gpu_addr <= max_vcpu_bo_addr)
> +             return 0;
> +
> +     /* Check if we can map the VCPU BO in GART to a 32-bit address. */
> +     if (adev->gmc.gart_start + VCE_V1_0_GART_ADDR_START > max_vcpu_bo_addr)
> +             return -EINVAL;
> +
> +     amdgpu_gart_map_vram_range(adev, pa, VCE_V1_0_GART_PAGE_START,
> +                                num_pages, flags, adev->gart.ptr);
> +     adev->vce.gpu_addr = adev->gmc.gart_start + VCE_V1_0_GART_ADDR_START;
> +     if (adev->vce.gpu_addr > max_vcpu_bo_addr)
> +             return -EINVAL;
> +
> +     return 0;
> +}
> +
>  static int vce_v1_0_sw_init(struct amdgpu_ip_block *ip_block)
>  {
>       struct amdgpu_device *adev = ip_block->adev;
> @@ -532,6 +581,9 @@ static int vce_v1_0_sw_init(struct amdgpu_ip_block 
> *ip_block)
>       if (r)
>               return r;
>       r = vce_v1_0_load_fw_signature(adev);
> +     if (r)
> +             return r;
> +     r = vce_v1_0_ensure_vcpu_bo_32bit_addr(adev);
>       if (r)
>               return r;
>  
> @@ -647,6 +699,9 @@ static int vce_v1_0_resume(struct amdgpu_ip_block 
> *ip_block)
>       if (r)
>               return r;
>       r = vce_v1_0_load_fw_signature(adev);
> +     if (r)
> +             return r;
> +     r = vce_v1_0_ensure_vcpu_bo_32bit_addr(adev);
>       if (r)
>               return r;
>  

Reply via email to