Same for OA Cheers
Mike On Tue, 27 Nov 2018, 01:57 Marek Olšák, <mar...@gmail.com> wrote: > From: Marek Olšák <marek.ol...@amd.com> > > --- > src/gallium/drivers/radeon/radeon_winsys.h | 4 +- > src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 55 +++++++++++++--------- > 2 files changed, 36 insertions(+), 23 deletions(-) > > diff --git a/src/gallium/drivers/radeon/radeon_winsys.h > b/src/gallium/drivers/radeon/radeon_winsys.h > index 3d0bb75ef6e..a5dd3e6f9b1 100644 > --- a/src/gallium/drivers/radeon/radeon_winsys.h > +++ b/src/gallium/drivers/radeon/radeon_winsys.h > @@ -45,21 +45,23 @@ enum radeon_bo_layout { > RADEON_LAYOUT_LINEAR = 0, > RADEON_LAYOUT_TILED, > RADEON_LAYOUT_SQUARETILED, > > RADEON_LAYOUT_UNKNOWN > }; > > enum radeon_bo_domain { /* bitfield */ > RADEON_DOMAIN_GTT = 2, > RADEON_DOMAIN_VRAM = 4, > - RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT > + RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT, > + RADEON_DOMAIN_GDS = 8, > + RADEON_DOMAIN_OA = 16, > }; > > enum radeon_bo_flag { /* bitfield */ > RADEON_FLAG_GTT_WC = (1 << 0), > RADEON_FLAG_NO_CPU_ACCESS = (1 << 1), > RADEON_FLAG_NO_SUBALLOC = (1 << 2), > RADEON_FLAG_SPARSE = (1 << 3), > RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 4), > RADEON_FLAG_READ_ONLY = (1 << 5), > RADEON_FLAG_32BIT = (1 << 6), > diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c > b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c > index a9170a2bc69..1470c873a6a 100644 > --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c > +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c > @@ -177,22 +177,24 @@ void amdgpu_bo_destroy(struct pb_buffer *_buf) > simple_mtx_lock(&ws->global_bo_list_lock); > LIST_DEL(&bo->u.real.global_list_item); > ws->num_buffers--; > simple_mtx_unlock(&ws->global_bo_list_lock); > } > > simple_mtx_lock(&ws->bo_export_table_lock); > util_hash_table_remove(ws->bo_export_table, bo->bo); > simple_mtx_unlock(&ws->bo_export_table_lock); > > - amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, > AMDGPU_VA_OP_UNMAP); > - amdgpu_va_range_free(bo->u.real.va_handle); > + if (bo->initial_domain & RADEON_DOMAIN_VRAM_GTT) { > + amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, > AMDGPU_VA_OP_UNMAP); > + amdgpu_va_range_free(bo->u.real.va_handle); > + } > amdgpu_bo_free(bo->bo); > > amdgpu_bo_remove_fences(bo); > > if (bo->initial_domain & RADEON_DOMAIN_VRAM) > ws->allocated_vram -= align64(bo->base.size, > ws->info.gart_page_size); > else if (bo->initial_domain & RADEON_DOMAIN_GTT) > ws->allocated_gtt -= align64(bo->base.size, > ws->info.gart_page_size); > > if (bo->u.real.map_count >= 1) { > @@ -418,25 +420,26 @@ static struct amdgpu_winsys_bo > *amdgpu_create_bo(struct amdgpu_winsys *ws, > unsigned alignment, > enum radeon_bo_domain > initial_domain, > unsigned flags, > int heap) > { > struct amdgpu_bo_alloc_request request = {0}; > amdgpu_bo_handle buf_handle; > uint64_t va = 0; > struct amdgpu_winsys_bo *bo; > amdgpu_va_handle va_handle; > - unsigned va_gap_size; > int r; > > /* VRAM or GTT must be specified, but not both at the same time. */ > - assert(util_bitcount(initial_domain & RADEON_DOMAIN_VRAM_GTT) == 1); > + assert(util_bitcount(initial_domain & (RADEON_DOMAIN_VRAM_GTT | > + RADEON_DOMAIN_GDS | > + RADEON_DOMAIN_OA)) == 1); > > /* Gfx9: Overallocate the size to the next power of two for faster > address > * translation if we don't waste too much memory. > */ > if (ws->info.chip_class >= GFX9) { > uint64_t next_pot_size = util_next_power_of_two64(size); > > /* For slightly lower than 4 GB allocations, at most 32 MB are > wasted. > * For slightly lower than 256 MB allocations, at most 2 MB are > wasted. > * For slightly lower than 64 MB allocations, at most 512 KB are > wasted. > @@ -464,20 +467,24 @@ static struct amdgpu_winsys_bo > *amdgpu_create_bo(struct amdgpu_winsys *ws, > pb_cache_init_entry(&ws->bo_cache, &bo->u.real.cache_entry, > &bo->base, > heap); > } > request.alloc_size = size; > request.phys_alignment = alignment; > > if (initial_domain & RADEON_DOMAIN_VRAM) > request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM; > if (initial_domain & RADEON_DOMAIN_GTT) > request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT; > + if (initial_domain & RADEON_DOMAIN_GDS) > + request.preferred_heap |= AMDGPU_GEM_DOMAIN_GDS; > + if (initial_domain & RADEON_DOMAIN_OA) > + request.preferred_heap |= AMDGPU_GEM_DOMAIN_OA; > > /* Since VRAM and GTT have almost the same performance on APUs, we > could > * just set GTT. However, in order to decrease GTT(RAM) usage, which is > * shared with the OS, allow VRAM placements too. The idea is not to > use > * VRAM usefully, but to use it so that it's not unused and wasted. > */ > if (!ws->info.has_dedicated_vram) > request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT; > > if (flags & RADEON_FLAG_NO_CPU_ACCESS) > @@ -493,41 +500,43 @@ static struct amdgpu_winsys_bo > *amdgpu_create_bo(struct amdgpu_winsys *ws, > > r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle); > if (r) { > fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n"); > fprintf(stderr, "amdgpu: size : %"PRIu64" bytes\n", size); > fprintf(stderr, "amdgpu: alignment : %u bytes\n", alignment); > fprintf(stderr, "amdgpu: domains : %u\n", initial_domain); > goto error_bo_alloc; > } > > - va_gap_size = ws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0; > + if (initial_domain & RADEON_DOMAIN_VRAM_GTT) { > + unsigned va_gap_size = ws->check_vm ? MAX2(4 * alignment, 64 * > 1024) : 0; > > - r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, > - size + va_gap_size, > - amdgpu_get_optimal_vm_alignment(ws, size, > alignment), > - 0, &va, &va_handle, > - (flags & RADEON_FLAG_32BIT ? > AMDGPU_VA_RANGE_32_BIT : 0) | > - AMDGPU_VA_RANGE_HIGH); > - if (r) > - goto error_va_alloc; > + r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, > + size + va_gap_size, > + amdgpu_get_optimal_vm_alignment(ws, size, > alignment), > + 0, &va, &va_handle, > + (flags & RADEON_FLAG_32BIT ? > AMDGPU_VA_RANGE_32_BIT : 0) | > + AMDGPU_VA_RANGE_HIGH); > + if (r) > + goto error_va_alloc; > > - unsigned vm_flags = AMDGPU_VM_PAGE_READABLE | > - AMDGPU_VM_PAGE_EXECUTABLE; > + unsigned vm_flags = AMDGPU_VM_PAGE_READABLE | > + AMDGPU_VM_PAGE_EXECUTABLE; > > - if (!(flags & RADEON_FLAG_READ_ONLY)) > - vm_flags |= AMDGPU_VM_PAGE_WRITEABLE; > + if (!(flags & RADEON_FLAG_READ_ONLY)) > + vm_flags |= AMDGPU_VM_PAGE_WRITEABLE; > > - r = amdgpu_bo_va_op_raw(ws->dev, buf_handle, 0, size, va, vm_flags, > + r = amdgpu_bo_va_op_raw(ws->dev, buf_handle, 0, size, va, vm_flags, > AMDGPU_VA_OP_MAP); > - if (r) > - goto error_va_map; > + if (r) > + goto error_va_map; > + } > > pipe_reference_init(&bo->base.reference, 1); > bo->base.alignment = alignment; > bo->base.usage = 0; > bo->base.size = size; > bo->base.vtbl = &amdgpu_winsys_bo_vtbl; > bo->ws = ws; > bo->bo = buf_handle; > bo->va = va; > bo->u.real.va_handle = va_handle; > @@ -1328,22 +1337,24 @@ no_slab: > return amdgpu_bo_sparse_create(ws, size, domain, flags); > } > > /* This flag is irrelevant for the cache. */ > flags &= ~RADEON_FLAG_NO_SUBALLOC; > > /* Align size to page size. This is the minimum alignment for normal > * BOs. Aligning this here helps the cached bufmgr. Especially small > BOs, > * like constant/uniform buffers, can benefit from better and more > reuse. > */ > - size = align64(size, ws->info.gart_page_size); > - alignment = align(alignment, ws->info.gart_page_size); > + if (domain & RADEON_DOMAIN_VRAM_GTT) { > + size = align64(size, ws->info.gart_page_size); > + alignment = align(alignment, ws->info.gart_page_size); > + } > > bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING; > > if (use_reusable_pool) { > heap = radeon_get_heap_index(domain, flags); > assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS); > > /* Get a buffer from the cache. */ > bo = (struct amdgpu_winsys_bo*) > pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, 0, > heap); > -- > 2.17.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev