From: Marek Olšák <marek.ol...@amd.com> There is no change in behavior. It just makes it easier to change the number of slab allocators. --- src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 60 +++++++++++++++---- src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 40 +++++++++---- src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h | 10 ++-- 3 files changed, 86 insertions(+), 24 deletions(-)
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c index 49de30bb57c..9c5e5e1ebc1 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c @@ -213,21 +213,23 @@ static void amdgpu_bo_destroy_or_cache(struct pb_buffer *_buf) assert(bo->bo); /* slab buffers have a separate vtbl */ if (bo->u.real.use_reusable_pool) pb_cache_add_buffer(&bo->u.real.cache_entry); else amdgpu_bo_destroy(_buf); } static void amdgpu_clean_up_buffer_managers(struct amdgpu_winsys *ws) { - pb_slabs_reclaim(&ws->bo_slabs); + for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) + pb_slabs_reclaim(&ws->bo_slabs[i]); + pb_cache_release_all_buffers(&ws->bo_cache); } static void *amdgpu_bo_map(struct pb_buffer *buf, struct radeon_cmdbuf *rcs, enum pipe_transfer_usage usage) { struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf; struct amdgpu_winsys_bo *real; struct amdgpu_cs *cs = (struct amdgpu_cs*)rcs; @@ -521,56 +523,80 @@ bool amdgpu_bo_can_reclaim(struct pb_buffer *_buf) } bool amdgpu_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry) { struct amdgpu_winsys_bo *bo = NULL; /* fix container_of */ bo = container_of(entry, bo, u.slab.entry); return amdgpu_bo_can_reclaim(&bo->base); } +static struct pb_slabs *get_slabs(struct amdgpu_winsys *ws, uint64_t size) +{ + /* Find the correct slab allocator for the given size. */ + for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) { + struct pb_slabs *slabs = &ws->bo_slabs[i]; + + if (size <= 1 << (slabs->min_order + slabs->num_orders - 1)) + return slabs; + } + + assert(0); + return NULL; +} + static void amdgpu_bo_slab_destroy(struct pb_buffer *_buf) { struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf); assert(!bo->bo); - pb_slab_free(&bo->ws->bo_slabs, &bo->u.slab.entry); + pb_slab_free(get_slabs(bo->ws, bo->base.size), &bo->u.slab.entry); } static const struct pb_vtbl amdgpu_winsys_bo_slab_vtbl = { amdgpu_bo_slab_destroy /* other functions are never called */ }; struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap, unsigned entry_size, unsigned group_index) { struct amdgpu_winsys *ws = priv; struct amdgpu_slab *slab = CALLOC_STRUCT(amdgpu_slab); enum radeon_bo_domain domains = radeon_domain_from_heap(heap); enum radeon_bo_flag flags = radeon_flags_from_heap(heap); uint32_t base_id; + unsigned slab_size = 0; if (!slab) return NULL; - unsigned slab_size = 1 << AMDGPU_SLAB_BO_SIZE_LOG2; + /* Determine the slab buffer size. */ + for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) { + struct pb_slabs *slabs = &ws->bo_slabs[i]; + unsigned max_entry_size = 1 << (slabs->min_order + slabs->num_orders - 1); + + if (entry_size <= max_entry_size) { + /* The slab size is twice the size of the largest possible entry. */ + slab_size = max_entry_size * 2; + } + } + assert(slab_size != 0); + slab->buffer = amdgpu_winsys_bo(amdgpu_bo_create(&ws->base, slab_size, slab_size, domains, flags)); if (!slab->buffer) goto fail; - assert(slab->buffer->bo); - slab->base.num_entries = slab->buffer->base.size / entry_size; slab->base.num_free = slab->base.num_entries; slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries)); if (!slab->entries) goto fail_buffer; LIST_INITHEAD(&slab->base.free); base_id = __sync_fetch_and_add(&ws->next_bo_unique_id, slab->base.num_entries); @@ -580,21 +606,29 @@ struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap, bo->base.alignment = entry_size; bo->base.usage = slab->buffer->base.usage; bo->base.size = entry_size; bo->base.vtbl = &amdgpu_winsys_bo_slab_vtbl; bo->ws = ws; bo->va = slab->buffer->va + i * entry_size; bo->initial_domain = domains; bo->unique_id = base_id + i; bo->u.slab.entry.slab = &slab->base; bo->u.slab.entry.group_index = group_index; - bo->u.slab.real = slab->buffer; + + if (slab->buffer->bo) { + /* The slab is not suballocated. */ + bo->u.slab.real = slab->buffer; + } else { + /* The slab is allocated out of a bigger slab. */ + bo->u.slab.real = slab->buffer->u.slab.real; + assert(bo->u.slab.real->bo); + } LIST_ADDTAIL(&bo->u.slab.entry.head, &slab->base.free); } return &slab->base; fail_buffer: amdgpu_winsys_bo_reference(&slab->buffer, NULL); fail: FREE(slab); @@ -1192,36 +1226,42 @@ amdgpu_bo_create(struct radeon_winsys *rws, /* VRAM implies WC. This is not optional. */ assert(!(domain & RADEON_DOMAIN_VRAM) || flags & RADEON_FLAG_GTT_WC); /* NO_CPU_ACCESS is valid with VRAM only. */ assert(domain == RADEON_DOMAIN_VRAM || !(flags & RADEON_FLAG_NO_CPU_ACCESS)); /* Sparse buffers must have NO_CPU_ACCESS set. */ assert(!(flags & RADEON_FLAG_SPARSE) || flags & RADEON_FLAG_NO_CPU_ACCESS); + struct pb_slabs *last_slab = &ws->bo_slabs[NUM_SLAB_ALLOCATORS - 1]; + unsigned max_slab_entry_size = 1 << (last_slab->min_order + last_slab->num_orders - 1); + /* Sub-allocate small buffers from slabs. */ if (!(flags & (RADEON_FLAG_NO_SUBALLOC | RADEON_FLAG_SPARSE)) && - size <= (1 << AMDGPU_SLAB_MAX_SIZE_LOG2) && - alignment <= MAX2(1 << AMDGPU_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) { + size <= max_slab_entry_size && + /* The alignment must be at most the size of the smallest slab entry or + * the next power of two. */ + alignment <= MAX2(1 << ws->bo_slabs[0].min_order, util_next_power_of_two(size))) { struct pb_slab_entry *entry; int heap = radeon_get_heap_index(domain, flags); if (heap < 0 || heap >= RADEON_MAX_SLAB_HEAPS) goto no_slab; - entry = pb_slab_alloc(&ws->bo_slabs, size, heap); + struct pb_slabs *slabs = get_slabs(ws, size); + entry = pb_slab_alloc(slabs, size, heap); if (!entry) { /* Clean up buffer managers and try again. */ amdgpu_clean_up_buffer_managers(ws); - entry = pb_slab_alloc(&ws->bo_slabs, size, heap); + entry = pb_slab_alloc(slabs, size, heap); } if (!entry) return NULL; bo = NULL; bo = container_of(entry, bo, u.slab.entry); pipe_reference_init(&bo->base.reference, 1); return &bo->base; diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c index b20d702670d..91120e3c474 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c @@ -88,21 +88,24 @@ static void amdgpu_winsys_destroy(struct radeon_winsys *rws) { struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws; if (ws->reserve_vmid) amdgpu_vm_unreserve_vmid(ws->dev, 0); if (util_queue_is_initialized(&ws->cs_queue)) util_queue_destroy(&ws->cs_queue); simple_mtx_destroy(&ws->bo_fence_lock); - pb_slabs_deinit(&ws->bo_slabs); + for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) { + if (ws->bo_slabs[i].groups) + pb_slabs_deinit(&ws->bo_slabs[i]); + } pb_cache_deinit(&ws->bo_cache); util_hash_table_destroy(ws->bo_export_table); simple_mtx_destroy(&ws->global_bo_list_lock); simple_mtx_destroy(&ws->bo_export_table_lock); do_winsys_deinit(ws); FREE(rws); } static void amdgpu_winsys_query_info(struct radeon_winsys *rws, struct radeon_info *info) @@ -300,30 +303,47 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config, if (!do_winsys_init(ws, config, fd)) goto fail_alloc; /* Create managers. */ pb_cache_init(&ws->bo_cache, RADEON_MAX_CACHED_HEAPS, 500000, ws->check_vm ? 1.0f : 2.0f, 0, (ws->info.vram_size + ws->info.gart_size) / 8, amdgpu_bo_destroy, amdgpu_bo_can_reclaim); - if (!pb_slabs_init(&ws->bo_slabs, - AMDGPU_SLAB_MIN_SIZE_LOG2, AMDGPU_SLAB_MAX_SIZE_LOG2, - RADEON_MAX_SLAB_HEAPS, - ws, - amdgpu_bo_can_reclaim_slab, - amdgpu_bo_slab_alloc, - amdgpu_bo_slab_free)) - goto fail_cache; + unsigned min_slab_order = 9; /* 512 bytes */ + unsigned max_slab_order = 16; /* 64 KB - higher numbers increase memory usage */ + unsigned num_slab_orders_per_allocator = (max_slab_order - min_slab_order) / + NUM_SLAB_ALLOCATORS; + + /* Divide the size order range among slab managers. */ + for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) { + unsigned min_order = min_slab_order; + unsigned max_order = MIN2(min_order + num_slab_orders_per_allocator, + max_slab_order); + + if (!pb_slabs_init(&ws->bo_slabs[i], + min_order, max_order, + RADEON_MAX_SLAB_HEAPS, + ws, + amdgpu_bo_can_reclaim_slab, + amdgpu_bo_slab_alloc, + amdgpu_bo_slab_free)) { + amdgpu_winsys_destroy(&ws->base); + simple_mtx_unlock(&dev_tab_mutex); + return NULL; + } + + min_slab_order = max_order + 1; + } - ws->info.min_alloc_size = 1 << AMDGPU_SLAB_MIN_SIZE_LOG2; + ws->info.min_alloc_size = 1 << ws->bo_slabs[0].min_order; /* init reference */ pipe_reference_init(&ws->reference, 1); /* Set functions. */ ws->base.unref = amdgpu_winsys_unref; ws->base.destroy = amdgpu_winsys_destroy; ws->base.query_info = amdgpu_winsys_query_info; ws->base.cs_request_feature = amdgpu_cs_request_feature; ws->base.query_value = amdgpu_query_value; diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h index c355eff5262..fc8f04544a9 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h @@ -31,29 +31,31 @@ #include "pipebuffer/pb_cache.h" #include "pipebuffer/pb_slab.h" #include "gallium/drivers/radeon/radeon_winsys.h" #include "addrlib/addrinterface.h" #include "util/simple_mtx.h" #include "util/u_queue.h" #include <amdgpu.h> struct amdgpu_cs; -#define AMDGPU_SLAB_MIN_SIZE_LOG2 9 /* 512 bytes */ -#define AMDGPU_SLAB_MAX_SIZE_LOG2 16 /* 64 KB */ -#define AMDGPU_SLAB_BO_SIZE_LOG2 17 /* 128 KB */ +#define NUM_SLAB_ALLOCATORS 1 struct amdgpu_winsys { struct radeon_winsys base; struct pipe_reference reference; struct pb_cache bo_cache; - struct pb_slabs bo_slabs; + + /* Each slab buffer can only contain suballocations of equal sizes, so we + * need to layer the allocators, so that we don't waste too much memory. + */ + struct pb_slabs bo_slabs[NUM_SLAB_ALLOCATORS]; amdgpu_device_handle dev; simple_mtx_t bo_fence_lock; int num_cs; /* The number of command streams created. */ unsigned num_total_rejected_cs; uint32_t surf_index_color; uint32_t surf_index_fmask; uint32_t next_bo_unique_id; -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev