On 11/6/25 19:44, Timur Kristóf wrote: > Based on research and ideas by Alexandre and Christian. > > VCE1 actually executes its code from the VCPU BO. > Due to various hardware limitations, the VCE1 requires > the VCPU BO to be in the low 32 bit address range. > However, VRAM is typically mapped at the high address range, > which means the VCPU can't access VRAM through the FB aperture. > > To solve this, we write a few page table entries to > map the VCPU BO in the GART address range. And we make sure > that the GART is located at the low address range. > That way the VCE1 can access the VCPU BO. > > v2: > - Adjust to v2 of the GART helper commit. > - Add empty line to multi-line comment. > > v3: > - Instead of relying on gmc_v6 to set the GART space before GTT, > add a new function amdgpu_vce_required_gart_pages() which is > called from amdgpu_gtt_mgr_init() directly. > > Signed-off-by: Timur Kristóf <[email protected]> > Co-developed-by: Alexandre Demers <[email protected]> > Signed-off-by: Alexandre Demers <[email protected]>
Reviewed-by: Christian König <[email protected]> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c | 1 + > drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 18 +++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h | 1 + > drivers/gpu/drm/amd/amdgpu/vce_v1_0.c | 55 +++++++++++++++++++++ > 4 files changed, 75 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c > index 0760e70402ec..895c1e4c6747 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c > @@ -284,6 +284,7 @@ int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, > uint64_t gtt_size) > ttm_resource_manager_init(man, &adev->mman.bdev, gtt_size); > > start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS; > + start += amdgpu_vce_required_gart_pages(adev); > size = (adev->gmc.gart_size >> PAGE_SHIFT) - start; > drm_mm_init(&mgr->mm, start, size); > spin_lock_init(&mgr->lock); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c > index 2761c096c4cd..e825184f244c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c > @@ -459,6 +459,24 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, > struct drm_file *filp) > } > } > > +/** > + * amdgpu_vce_required_gart_pages() - gets number of GART pages required by > VCE > + * > + * @adev: amdgpu_device pointer > + * > + * Returns how many GART pages we need before GTT for the VCE IP block. > + * For VCE1, see vce_v1_0_ensure_vcpu_bo_32bit_addr for details. > + * For VCE2+, this is not needed so return zero. > + */ > +u32 amdgpu_vce_required_gart_pages(struct amdgpu_device *adev) > +{ > + /* VCE IP block not added yet, so can't use amdgpu_ip_version */ > + if (adev->family == AMDGPU_FAMILY_SI) > + return 512; > + > + return 0; > +} > + > /** > * amdgpu_vce_get_create_msg - generate a VCE create msg > * > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h > index 050783802623..1c3464ce5037 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h > @@ -61,6 +61,7 @@ int amdgpu_vce_entity_init(struct amdgpu_device *adev, > struct amdgpu_ring *ring) > int amdgpu_vce_suspend(struct amdgpu_device *adev); > int amdgpu_vce_resume(struct amdgpu_device *adev); > void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file > *filp); > +u32 amdgpu_vce_required_gart_pages(struct amdgpu_device *adev); > int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, struct amdgpu_job > *job, > struct amdgpu_ib *ib); > int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, > diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c > b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c > index bf9f943852cb..9ae424618556 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c > @@ -34,6 +34,7 @@ > > #include "amdgpu.h" > #include "amdgpu_vce.h" > +#include "amdgpu_gart.h" > #include "sid.h" > #include "vce_v1_0.h" > #include "vce/vce_1_0_d.h" > @@ -46,6 +47,11 @@ > #define VCE_V1_0_DATA_SIZE (7808 * (AMDGPU_MAX_VCE_HANDLES + 1)) > #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02 > > +#define VCE_V1_0_GART_PAGE_START \ > + (AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS) > +#define VCE_V1_0_GART_ADDR_START \ > + (VCE_V1_0_GART_PAGE_START * AMDGPU_GPU_PAGE_SIZE) > + > static void vce_v1_0_set_ring_funcs(struct amdgpu_device *adev); > static void vce_v1_0_set_irq_funcs(struct amdgpu_device *adev); > > @@ -513,6 +519,49 @@ static int vce_v1_0_early_init(struct amdgpu_ip_block > *ip_block) > return 0; > } > > +/** > + * vce_v1_0_ensure_vcpu_bo_32bit_addr() - ensure the VCPU BO has a 32-bit > address > + * > + * @adev: amdgpu_device pointer > + * > + * Due to various hardware limitations, the VCE1 requires > + * the VCPU BO to be in the low 32 bit address range. > + * Ensure that the VCPU BO has a 32-bit GPU address, > + * or return an error code when that isn't possible. > + * > + * To accomodate that, we put GART to the LOW address range > + * and reserve some GART pages where we map the VCPU BO, > + * so that it gets a 32-bit address. > + */ > +static int vce_v1_0_ensure_vcpu_bo_32bit_addr(struct amdgpu_device *adev) > +{ > + u64 gpu_addr = amdgpu_bo_gpu_offset(adev->vce.vcpu_bo); > + u64 bo_size = amdgpu_bo_size(adev->vce.vcpu_bo); > + u64 max_vcpu_bo_addr = 0xffffffff - bo_size; > + u64 num_pages = ALIGN(bo_size, AMDGPU_GPU_PAGE_SIZE) / > AMDGPU_GPU_PAGE_SIZE; > + u64 pa = amdgpu_gmc_vram_pa(adev, adev->vce.vcpu_bo); > + u64 flags = AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | > AMDGPU_PTE_VALID; > + > + /* > + * Check if the VCPU BO already has a 32-bit address. > + * Eg. if MC is configured to put VRAM in the low address range. > + */ > + if (gpu_addr <= max_vcpu_bo_addr) > + return 0; > + > + /* Check if we can map the VCPU BO in GART to a 32-bit address. */ > + if (adev->gmc.gart_start + VCE_V1_0_GART_ADDR_START > max_vcpu_bo_addr) > + return -EINVAL; > + > + amdgpu_gart_map_vram_range(adev, pa, VCE_V1_0_GART_PAGE_START, > + num_pages, flags, adev->gart.ptr); > + adev->vce.gpu_addr = adev->gmc.gart_start + VCE_V1_0_GART_ADDR_START; > + if (adev->vce.gpu_addr > max_vcpu_bo_addr) > + return -EINVAL; > + > + return 0; > +} > + > static int vce_v1_0_sw_init(struct amdgpu_ip_block *ip_block) > { > struct amdgpu_device *adev = ip_block->adev; > @@ -532,6 +581,9 @@ static int vce_v1_0_sw_init(struct amdgpu_ip_block > *ip_block) > if (r) > return r; > r = vce_v1_0_load_fw_signature(adev); > + if (r) > + return r; > + r = vce_v1_0_ensure_vcpu_bo_32bit_addr(adev); > if (r) > return r; > > @@ -647,6 +699,9 @@ static int vce_v1_0_resume(struct amdgpu_ip_block > *ip_block) > if (r) > return r; > r = vce_v1_0_load_fw_signature(adev); > + if (r) > + return r; > + r = vce_v1_0_ensure_vcpu_bo_32bit_addr(adev); > if (r) > return r; >
