From: Christian K?nig <christian.koe...@amd.com> This improves concurrent stream decoding.
Signed-off-by: Christian K?nig <christian.koenig at amd.com> --- drivers/gpu/drm/radeon/radeon.h | 3 ++- drivers/gpu/drm/radeon/radeon_object.c | 5 +++-- drivers/gpu/drm/radeon/radeon_uvd.c | 20 ++++++++++++++++++-- 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index cae3adc..f2dba50 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1640,7 +1640,8 @@ int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring, uint32_t handle, struct radeon_fence **fence); int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, uint32_t handle, struct radeon_fence **fence); -void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo); +void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo, + uint32_t allowed_domains); void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp); int radeon_uvd_cs_parse(struct radeon_cs_parser *parser); diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 0129c7e..c97a424 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -491,6 +491,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev, bo = lobj->robj; if (!bo->pin_count) { u32 domain = lobj->prefered_domains; + u32 allowed = lobj->allowed_domains; u32 current_domain = radeon_mem_type_to_domain(bo->tbo.mem.mem_type); @@ -502,7 +503,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev, * into account. We don't want to disallow buffer moves * completely. */ - if ((lobj->allowed_domains & current_domain) != 0 && + if ((allowed & current_domain) != 0 && (domain & current_domain) == 0 && /* will be moved */ bytes_moved > bytes_moved_threshold) { /* don't move it */ @@ -512,7 +513,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev, retry: radeon_ttm_placement_from_domain(bo, domain); if (ring == R600_RING_TYPE_UVD_INDEX) - radeon_uvd_force_into_uvd_segment(bo); + radeon_uvd_force_into_uvd_segment(bo, allowed); initial_bytes_moved = atomic64_read(&rdev->num_bytes_moved); r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index 1168de7..d33dacc 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -231,7 +231,8 @@ int radeon_uvd_resume(struct radeon_device *rdev) return 0; } -void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo) +void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo, + uint32_t allowed_domains) { int i; @@ -239,6 +240,21 @@ void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo) rbo->placements[i].fpfn = 0 >> PAGE_SHIFT; rbo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; } + + /* If it must be in VRAM it must be in the first segment as well */ + if (allowed_domains == RADEON_GEM_DOMAIN_VRAM) + return; + + /* abort if we already have more than one placement */ + if (rbo->placement.num_placement > 1) + return; + + /* add another 256MB segment */ + rbo->placements[1] = rbo->placements[0]; + rbo->placements[1].fpfn += (256 * 1024 * 1024) >> PAGE_SHIFT; + rbo->placements[1].lpfn += (256 * 1024 * 1024) >> PAGE_SHIFT; + rbo->placement.num_placement++; + rbo->placement.num_busy_placement++; } void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp) @@ -629,7 +645,7 @@ static int radeon_uvd_send_msg(struct radeon_device *rdev, return r; radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_VRAM); - radeon_uvd_force_into_uvd_segment(bo); + radeon_uvd_force_into_uvd_segment(bo, RADEON_GEM_DOMAIN_VRAM); r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); if (r) -- 1.9.1