Module: Mesa
Branch: master
Commit: c9d4b571ad5dc3e622603a0f50d293192850d749
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=c9d4b571ad5dc3e622603a0f50d293192850d749

Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl>
Date:   Sun Mar 12 22:43:51 2017 +0100

radv: Add suballocation for shaders.

This reduces the number of BOs that we need for the BO lists during
a submission.

Currently uses a fairly simple linear search for finding free space,
that could eventually be improved to a binary tree, which with some
per-node info could make a check for space O(1) and finding it O(log n),
in the number of buffers in that slab.

Signed-off-by: Bas Nieuwenhuizen <ba...@google.com>
Reviewed-by: Dave Airlie <airl...@redhat.com>

---

 src/amd/vulkan/radv_cmd_buffer.c     | 15 ++++----
 src/amd/vulkan/radv_device.c         |  5 +++
 src/amd/vulkan/radv_pipeline.c       | 66 +++++++++++++++++++++++++++++++-----
 src/amd/vulkan/radv_pipeline_cache.c |  7 ++--
 src/amd/vulkan/radv_private.h        | 21 ++++++++++++
 5 files changed, 93 insertions(+), 21 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 7b41e16e91..e7bff2a88c 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -546,7 +546,7 @@ radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer,
                struct ac_vs_output_info *outinfo)
 {
        struct radeon_winsys *ws = cmd_buffer->device->ws;
-       uint64_t va = ws->buffer_get_va(shader->bo);
+       uint64_t va = ws->buffer_get_va(shader->bo) + shader->bo_offset;
        unsigned export_count;
 
        ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8);
@@ -596,7 +596,7 @@ radv_emit_hw_es(struct radv_cmd_buffer *cmd_buffer,
                struct ac_es_output_info *outinfo)
 {
        struct radeon_winsys *ws = cmd_buffer->device->ws;
-       uint64_t va = ws->buffer_get_va(shader->bo);
+       uint64_t va = ws->buffer_get_va(shader->bo) + shader->bo_offset;
 
        ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8);
        radv_emit_prefetch(cmd_buffer, va, shader->code_size);
@@ -615,7 +615,7 @@ radv_emit_hw_ls(struct radv_cmd_buffer *cmd_buffer,
                struct radv_shader_variant *shader)
 {
        struct radeon_winsys *ws = cmd_buffer->device->ws;
-       uint64_t va = ws->buffer_get_va(shader->bo);
+       uint64_t va = ws->buffer_get_va(shader->bo) + shader->bo_offset;
        uint32_t rsrc2 = shader->rsrc2;
 
        ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8);
@@ -640,7 +640,7 @@ radv_emit_hw_hs(struct radv_cmd_buffer *cmd_buffer,
                struct radv_shader_variant *shader)
 {
        struct radeon_winsys *ws = cmd_buffer->device->ws;
-       uint64_t va = ws->buffer_get_va(shader->bo);
+       uint64_t va = ws->buffer_get_va(shader->bo) + shader->bo_offset;
 
        ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8);
        radv_emit_prefetch(cmd_buffer, va, shader->code_size);
@@ -775,7 +775,7 @@ radv_emit_geometry_shader(struct radv_cmd_buffer 
*cmd_buffer,
                               S_028B90_CNT(MIN2(gs_num_invocations, 127)) |
                               S_028B90_ENABLE(gs_num_invocations > 0));
 
-       va = ws->buffer_get_va(gs->bo);
+       va = ws->buffer_get_va(gs->bo) + gs->bo_offset;
        ws->cs_add_buffer(cmd_buffer->cs, gs->bo, 8);
        radv_emit_prefetch(cmd_buffer, va, gs->code_size);
 
@@ -816,8 +816,7 @@ radv_emit_fragment_shader(struct radv_cmd_buffer 
*cmd_buffer,
        assert (pipeline->shaders[MESA_SHADER_FRAGMENT]);
 
        ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
-
-       va = ws->buffer_get_va(ps->bo);
+       va = ws->buffer_get_va(ps->bo) + ps->bo_offset;
        ws->cs_add_buffer(cmd_buffer->cs, ps->bo, 8);
        radv_emit_prefetch(cmd_buffer, va, ps->code_size);
 
@@ -2256,7 +2255,7 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer 
*cmd_buffer)
        cmd_buffer->state.emitted_compute_pipeline = pipeline;
 
        compute_shader = pipeline->shaders[MESA_SHADER_COMPUTE];
-       va = ws->buffer_get_va(compute_shader->bo);
+       va = ws->buffer_get_va(compute_shader->bo) + compute_shader->bo_offset;
 
        ws->cs_add_buffer(cmd_buffer->cs, compute_shader->bo, 8);
        radv_emit_prefetch(cmd_buffer, va, compute_shader->code_size);
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 19f1e10577..13fb19cf48 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -1080,6 +1080,9 @@ VkResult radv_CreateDevice(
        else
                device->alloc = physical_device->instance->alloc;
 
+       mtx_init(&device->shader_slab_mutex, mtx_plain);
+       list_inithead(&device->shader_slabs);
+
        for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
                const VkDeviceQueueCreateInfo *queue_create = 
&pCreateInfo->pQueueCreateInfos[i];
                uint32_t qfi = queue_create->queueFamilyIndex;
@@ -1270,6 +1273,8 @@ void radv_DestroyDevice(
        VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
        radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
 
+       radv_destroy_shader_slabs(device);
+
        vk_free(&device->alloc, device);
 }
 
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 28389368ce..bd5eeb776c 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -381,7 +381,10 @@ void radv_shader_variant_destroy(struct radv_device 
*device,
        if (!p_atomic_dec_zero(&variant->ref_count))
                return;
 
-       device->ws->buffer_destroy(variant->bo);
+       mtx_lock(&device->shader_slab_mutex);
+       list_del(&variant->slab_list);
+       mtx_unlock(&device->shader_slab_mutex);
+
        free(variant);
 }
 
@@ -431,14 +434,8 @@ static void radv_fill_shader_variant(struct radv_device 
*device,
                S_00B848_DX10_CLAMP(1) |
                S_00B848_FLOAT_MODE(variant->config.float_mode);
 
-       variant->bo = device->ws->buffer_create(device->ws, binary->code_size, 
256,
-                                               RADEON_DOMAIN_VRAM, 
RADEON_FLAG_CPU_ACCESS);
-
-       void *ptr = device->ws->buffer_map(variant->bo);
+       void *ptr = radv_alloc_shader_memory(device, variant);
        memcpy(ptr, binary->code, binary->code_size);
-       device->ws->buffer_unmap(variant->bo);
-
-
 }
 
 static struct radv_shader_variant *radv_shader_variant_create(struct 
radv_device *device,
@@ -2426,3 +2423,56 @@ VkResult radv_CreateComputePipelines(
 
        return result;
 }
+
+void *radv_alloc_shader_memory(struct radv_device *device,
+                               struct radv_shader_variant *shader)
+{
+       mtx_lock(&device->shader_slab_mutex);
+       list_for_each_entry(struct radv_shader_slab, slab, 
&device->shader_slabs, slabs) {
+               uint64_t offset = 0;
+               list_for_each_entry(struct radv_shader_variant, s, 
&slab->shaders, slab_list) {
+                       if (s->bo_offset - offset >= shader->code_size) {
+                               shader->bo = slab->bo;
+                               shader->bo_offset = offset;
+                               list_addtail(&shader->slab_list, &s->slab_list);
+                               mtx_unlock(&device->shader_slab_mutex);
+                               return slab->ptr + offset;
+                       }
+                       offset = align_u64(s->bo_offset + s->code_size, 256);
+               }
+               if (slab->size - offset >= shader->code_size) {
+                       shader->bo = slab->bo;
+                       shader->bo_offset = offset;
+                       list_addtail(&shader->slab_list, &slab->shaders);
+                       mtx_unlock(&device->shader_slab_mutex);
+                       return slab->ptr + offset;
+               }
+       }
+
+       mtx_unlock(&device->shader_slab_mutex);
+       struct radv_shader_slab *slab = calloc(1, sizeof(struct 
radv_shader_slab));
+
+       slab->size = 256 * 1024;
+       slab->bo = device->ws->buffer_create(device->ws, slab->size, 256,
+                                            RADEON_DOMAIN_VRAM, 0);
+       slab->ptr = (char*)device->ws->buffer_map(slab->bo);
+       list_inithead(&slab->shaders);
+
+       mtx_lock(&device->shader_slab_mutex);
+       list_add(&slab->slabs, &device->shader_slabs);
+
+       shader->bo = slab->bo;
+       shader->bo_offset = 0;
+       list_add(&shader->slab_list, &slab->shaders);
+       mtx_unlock(&device->shader_slab_mutex);
+       return slab->ptr;
+}
+
+void radv_destroy_shader_slabs(struct radv_device *device)
+{
+       list_for_each_entry_safe(struct radv_shader_slab, slab, 
&device->shader_slabs, slabs) {
+               device->ws->buffer_destroy(slab->bo);
+               free(slab);
+       }
+       mtx_destroy(&device->shader_slab_mutex);
+}
diff --git a/src/amd/vulkan/radv_pipeline_cache.c 
b/src/amd/vulkan/radv_pipeline_cache.c
index 86479f6623..3995434d90 100644
--- a/src/amd/vulkan/radv_pipeline_cache.c
+++ b/src/amd/vulkan/radv_pipeline_cache.c
@@ -168,6 +168,7 @@ radv_create_shader_variant_from_pipeline_cache(struct 
radv_device *device,
                if (!variant)
                        return NULL;
 
+               variant->code_size = entry->code_size;
                variant->config = entry->config;
                variant->info = entry->variant_info;
                variant->rsrc1 = entry->rsrc1;
@@ -175,12 +176,8 @@ radv_create_shader_variant_from_pipeline_cache(struct 
radv_device *device,
                variant->code_size = entry->code_size;
                variant->ref_count = 1;
 
-               variant->bo = device->ws->buffer_create(device->ws, 
entry->code_size, 256,
-                                               RADEON_DOMAIN_VRAM, 
RADEON_FLAG_CPU_ACCESS);
-
-               void *ptr = device->ws->buffer_map(variant->bo);
+               void *ptr = radv_alloc_shader_memory(device, variant);
                memcpy(ptr, entry->code, entry->code_size);
-               device->ws->buffer_unmap(variant->bo);
 
                entry->variant = variant;
        }
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 25afd497da..8e86f5c1d5 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -549,6 +549,9 @@ struct radv_device {
        struct radv_pipeline_cache *                mem_cache;
 
        uint32_t image_mrt_offset_counter;
+
+       struct list_head shader_slabs;
+       mtx_t shader_slab_mutex;
 };
 
 struct radv_device_memory {
@@ -981,17 +984,35 @@ mesa_to_vk_shader_stage(gl_shader_stage mesa_stage)
             stage = __builtin_ffs(__tmp) - 1, __tmp;                   \
             __tmp &= ~(1 << (stage)))
 
+
+struct radv_shader_slab {
+       struct list_head slabs;
+       struct list_head shaders;
+       struct radeon_winsys_bo *bo;
+       uint64_t size;
+       char *ptr;
+};
+
 struct radv_shader_variant {
        uint32_t ref_count;
 
        struct radeon_winsys_bo *bo;
+       uint64_t bo_offset;
        struct ac_shader_config config;
        struct ac_shader_variant_info info;
        unsigned rsrc1;
        unsigned rsrc2;
        uint32_t code_size;
+
+       struct list_head slab_list;
 };
 
+
+void *radv_alloc_shader_memory(struct radv_device *device,
+                              struct radv_shader_variant *shader);
+
+void radv_destroy_shader_slabs(struct radv_device *device);
+
 struct radv_depth_stencil_state {
        uint32_t db_depth_control;
        uint32_t db_stencil_control;

_______________________________________________
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to