Module: Mesa
Branch: main
Commit: f1c909edd5c9159609ab27e7b6a7374796cceab3
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=f1c909edd5c9159609ab27e7b6a7374796cceab3

Author: Faith Ekstrand <faith.ekstr...@collabora.com>
Date:   Fri Dec  8 15:15:48 2023 -0600

nvk/nir: Add cbuf analysis to nvi_nir_lower_descriptors()

This builds a list of the 15 most commonly used UBOs or descriptor
buffers and places them in a new nvk_cbuf_map data structure.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26615>

---

 src/nouveau/vulkan/nvk_nir_lower_descriptors.c | 461 ++++++++++++++++++++++++-
 src/nouveau/vulkan/nvk_private.h               |   3 +
 src/nouveau/vulkan/nvk_shader.c                |   2 +-
 src/nouveau/vulkan/nvk_shader.h                |  23 +-
 4 files changed, 472 insertions(+), 17 deletions(-)

diff --git a/src/nouveau/vulkan/nvk_nir_lower_descriptors.c 
b/src/nouveau/vulkan/nvk_nir_lower_descriptors.c
index 52e88ef49c2..9ed3d4d9a1d 100644
--- a/src/nouveau/vulkan/nvk_nir_lower_descriptors.c
+++ b/src/nouveau/vulkan/nvk_nir_lower_descriptors.c
@@ -13,23 +13,82 @@
 #include "nir_builder.h"
 #include "nir_deref.h"
 
+struct lower_desc_cbuf {
+   struct nvk_cbuf key;
+
+   uint32_t use_count;
+
+   uint64_t start;
+   uint64_t end;
+};
+
+static uint32_t
+hash_cbuf(const void *data)
+{
+   return _mesa_hash_data(data, sizeof(struct nvk_cbuf));
+}
+
+static bool
+cbufs_equal(const void *a, const void *b)
+{
+   return memcmp(a, b, sizeof(struct nvk_cbuf)) == 0;
+}
+
+static int
+compar_cbufs(const void *_a, const void *_b)
+{
+   const struct lower_desc_cbuf *a = _a;
+   const struct lower_desc_cbuf *b = _b;
+
+#define COMPAR(field, pos) \
+   if (a->field < b->field) return -(pos); \
+   if (a->field > b->field) return (pos);
+
+   /* Sort by most used first */
+   COMPAR(use_count, -1)
+
+   /* Keep the list stable by then sorting by key fields. */
+   COMPAR(key.type, 1)
+   COMPAR(key.desc_set, 1)
+   COMPAR(key.dynamic_idx, 1)
+   COMPAR(key.desc_offset, 1)
+
+#undef COMPAR
+
+   return 0;
+}
+
 struct lower_descriptors_ctx {
    const struct vk_pipeline_layout *layout;
    bool clamp_desc_array_bounds;
    nir_address_format ubo_addr_format;
    nir_address_format ssbo_addr_format;
+
+   struct hash_table *cbufs;
+   struct nvk_cbuf_map *cbuf_map;
 };
 
-static nir_def *
-load_descriptor_set_addr(nir_builder *b, uint32_t set,
-                         UNUSED const struct lower_descriptors_ctx *ctx)
+static void
+record_cbuf_use(const struct nvk_cbuf *key, uint64_t start, uint64_t end,
+                struct lower_descriptors_ctx *ctx)
 {
-   uint32_t set_addr_offset =
-      nvk_root_descriptor_offset(sets) + set * sizeof(uint64_t);
-
-   return nir_load_ubo(b, 1, 64, nir_imm_int(b, 0),
-                       nir_imm_int(b, set_addr_offset),
-                       .align_mul = 8, .align_offset = 0, .range = ~0);
+   struct hash_entry *entry = _mesa_hash_table_search(ctx->cbufs, key);
+   if (entry != NULL) {
+      struct lower_desc_cbuf *cbuf = entry->data;
+      cbuf->use_count++;
+      cbuf->start = MIN2(cbuf->start, start);
+      cbuf->end = MAX2(cbuf->end, end);
+   } else {
+      struct lower_desc_cbuf *cbuf =
+         ralloc(ctx->cbufs, struct lower_desc_cbuf);
+      *cbuf = (struct lower_desc_cbuf) {
+         .key = *key,
+         .use_count = 1,
+         .start = start,
+         .end = end,
+      };
+      _mesa_hash_table_insert(ctx->cbufs, &cbuf->key, cbuf);
+   }
 }
 
 static const struct nvk_descriptor_set_binding_layout *
@@ -46,6 +105,366 @@ get_binding_layout(uint32_t set, uint32_t binding,
    return &set_layout->binding[binding];
 }
 
+static void
+record_descriptor_cbuf_use(uint32_t set, uint32_t binding, nir_src *index,
+                           struct lower_descriptors_ctx *ctx)
+{
+   const struct nvk_descriptor_set_binding_layout *binding_layout =
+      get_binding_layout(set, binding, ctx);
+
+   const struct nvk_cbuf key = {
+      .type = NVK_CBUF_TYPE_DESC_SET,
+      .desc_set = set,
+   };
+
+   uint64_t start, end;
+   if (index == NULL) {
+      /* When we don't have an index, assume 0 */
+      start = binding_layout->offset;
+      end = start + binding_layout->stride;
+   } else if (nir_src_is_const(*index)) {
+      start = binding_layout->offset +
+              nir_src_as_uint(*index) * binding_layout->stride;
+      end = start + binding_layout->stride;
+   } else {
+      start = binding_layout->offset;
+      end = start + binding_layout->array_size * binding_layout->stride;
+   }
+
+   record_cbuf_use(&key, start, end, ctx);
+}
+
+static void
+record_vulkan_resource_cbuf_use(nir_intrinsic_instr *intrin,
+                                struct lower_descriptors_ctx *ctx)
+{
+   assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
+   const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
+
+   /* These we'll handle later */
+   if (desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER &&
+       desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC)
+      return;
+
+   record_descriptor_cbuf_use(nir_intrinsic_desc_set(intrin),
+                              nir_intrinsic_binding(intrin),
+                              &intrin->src[0], ctx);
+}
+
+static void
+record_deref_descriptor_cbuf_use(nir_deref_instr *deref,
+                                 struct lower_descriptors_ctx *ctx)
+{
+   nir_src *index_src = NULL;
+   if (deref->deref_type == nir_deref_type_array) {
+      index_src = &deref->arr.index;
+      deref = nir_deref_instr_parent(deref);
+   }
+
+   assert(deref->deref_type == nir_deref_type_var);
+   nir_variable *var = deref->var;
+
+   record_descriptor_cbuf_use(var->data.descriptor_set,
+                              var->data.binding,
+                              index_src, ctx);
+}
+
+static void
+record_tex_descriptor_cbuf_use(nir_tex_instr *tex,
+                               struct lower_descriptors_ctx *ctx)
+{
+   const int texture_src_idx =
+      nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
+   const int sampler_src_idx =
+      nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
+
+   if (texture_src_idx >= 0) {
+      nir_deref_instr *deref = nir_src_as_deref(tex->src[texture_src_idx].src);
+      record_deref_descriptor_cbuf_use(deref, ctx);
+   }
+
+   if (sampler_src_idx >= 0) {
+      nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src);
+      record_deref_descriptor_cbuf_use(deref, ctx);
+   }
+}
+
+static struct nvk_cbuf
+ubo_deref_to_cbuf(nir_deref_instr *deref,
+                  nir_intrinsic_instr **resource_index_out,
+                  uint64_t *offset_out,
+                  uint64_t *start_out, uint64_t *end_out,
+                  const struct lower_descriptors_ctx *ctx)
+{
+   assert(nir_deref_mode_is(deref, nir_var_mem_ubo));
+
+   /* In case we early return */
+   *offset_out = 0;
+   *start_out = 0;
+   *end_out = UINT64_MAX;
+   *resource_index_out = NULL;
+
+   const struct nvk_cbuf invalid = {
+      .type = NVK_CBUF_TYPE_INVALID,
+   };
+
+   uint64_t offset = 0;
+   uint64_t range = glsl_get_explicit_size(deref->type, false);
+   bool offset_valid = true;
+   while (true) {
+      nir_deref_instr *parent = nir_deref_instr_parent(deref);
+      if (parent == NULL) {
+         assert(deref->deref_type == nir_deref_type_cast);
+         break;
+      }
+
+      switch (deref->deref_type) {
+      case nir_deref_type_var:
+         unreachable("Buffers don't use variables in Vulkan");
+
+      case nir_deref_type_array:
+      case nir_deref_type_array_wildcard: {
+         uint32_t stride = nir_deref_instr_array_stride(deref);
+         if (range > stride)
+            offset_valid = false;
+
+         if (deref->deref_type == nir_deref_type_array &&
+             nir_src_is_const(deref->arr.index)) {
+            offset += nir_src_as_uint(deref->arr.index) * stride;
+         } else {
+            range = glsl_get_length(parent->type) * stride;
+         }
+         break;
+      }
+
+      case nir_deref_type_ptr_as_array:
+         /* All bets are off.  We shouldn't see these most of the time
+          * anyway, even with variable pointers.
+          */
+         offset_valid = false;
+         break;
+
+      case nir_deref_type_struct:
+         offset += glsl_get_struct_field_offset(parent->type,
+                                                deref->strct.index);
+         break;
+
+      case nir_deref_type_cast:
+         /* nir_explicit_io_address_from_deref() can't handle casts */
+         offset_valid = false;
+         break;
+
+      default:
+         unreachable("Unknown deref type");
+      }
+
+      deref = parent;
+   }
+
+   nir_intrinsic_instr *load_desc = nir_src_as_intrinsic(deref->parent);
+   if (load_desc == NULL ||
+       load_desc->intrinsic != nir_intrinsic_load_vulkan_descriptor)
+      return invalid;
+
+   nir_intrinsic_instr *res_index = nir_src_as_intrinsic(load_desc->src[0]);
+   if (res_index == NULL ||
+       res_index->intrinsic != nir_intrinsic_vulkan_resource_index)
+      return invalid;
+
+   /* We try to early return as little as possible prior to this point so we
+    * can return the resource index intrinsic in as many cases as possible.
+    * After this point, though, early returns are fair game.
+    */
+   *resource_index_out = res_index;
+
+   if (!offset_valid || !nir_src_is_const(res_index->src[0]))
+      return invalid;
+
+   uint32_t set = nir_intrinsic_desc_set(res_index);
+   uint32_t binding = nir_intrinsic_binding(res_index);
+   uint32_t index = nir_src_as_uint(res_index->src[0]);
+
+   const struct nvk_descriptor_set_binding_layout *binding_layout =
+      get_binding_layout(set, binding, ctx);
+
+   switch (binding_layout->type) {
+   case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: {
+      *offset_out = 0;
+      *start_out = offset;
+      *end_out = offset + range;
+      return (struct nvk_cbuf) {
+         .type = NVK_CBUF_TYPE_UBO_DESC,
+         .desc_set = set,
+         .desc_offset = binding_layout->offset +
+                        index * binding_layout->stride,
+      };
+   }
+
+   case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: {
+      uint8_t dynamic_buffer_index =
+         nvk_descriptor_set_layout_dynbuf_start(ctx->layout, set) +
+         binding_layout->dynamic_buffer_index + index;
+
+      *offset_out = 0;
+      *start_out = offset;
+      *end_out = offset + range;
+
+      return (struct nvk_cbuf) {
+         .type = NVK_CBUF_TYPE_DYNAMIC_UBO,
+         .dynamic_idx = dynamic_buffer_index,
+      };
+   }
+
+   case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: {
+      *offset_out = binding_layout->offset;
+      *start_out = binding_layout->offset + offset;
+      *end_out = *start_out + range;
+
+      return (struct nvk_cbuf) {
+         .type = NVK_CBUF_TYPE_DESC_SET,
+         .desc_set = set,
+      };
+   }
+
+   default:
+      return invalid;
+   }
+}
+
+static void
+record_load_ubo_cbuf_uses(nir_deref_instr *deref,
+                          struct lower_descriptors_ctx *ctx)
+{
+   assert(nir_deref_mode_is(deref, nir_var_mem_ubo));
+
+   UNUSED uint64_t offset;
+   uint64_t start, end;
+   nir_intrinsic_instr *res_index;
+   struct nvk_cbuf cbuf =
+      ubo_deref_to_cbuf(deref, &res_index, &offset, &start, &end, ctx);
+
+   if (cbuf.type != NVK_CBUF_TYPE_INVALID) {
+      record_cbuf_use(&cbuf, start, end, ctx);
+   } else if (res_index != NULL) {
+      record_vulkan_resource_cbuf_use(res_index, ctx);
+   }
+}
+
+static bool
+record_cbuf_uses_instr(UNUSED nir_builder *b, nir_instr *instr, void *_ctx)
+{
+   struct lower_descriptors_ctx *ctx = _ctx;
+
+   switch (instr->type) {
+   case nir_instr_type_intrinsic: {
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+      switch (intrin->intrinsic) {
+      case nir_intrinsic_vulkan_resource_index:
+         record_vulkan_resource_cbuf_use(intrin, ctx);
+         return false;
+
+      case nir_intrinsic_load_deref: {
+         nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+         if (nir_deref_mode_is(deref, nir_var_mem_ubo))
+            record_load_ubo_cbuf_uses(deref, ctx);
+         return false;
+      }
+
+      case nir_intrinsic_image_deref_load:
+      case nir_intrinsic_image_deref_store:
+      case nir_intrinsic_image_deref_atomic:
+      case nir_intrinsic_image_deref_atomic_swap:
+      case nir_intrinsic_image_deref_size:
+      case nir_intrinsic_image_deref_samples:
+      case nir_intrinsic_image_deref_load_param_intel:
+      case nir_intrinsic_image_deref_load_raw_intel:
+      case nir_intrinsic_image_deref_store_raw_intel: {
+         nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+         record_deref_descriptor_cbuf_use(deref, ctx);
+         return false;
+      }
+
+      default:
+         return false;
+      }
+      unreachable("All cases return false");
+   }
+
+   case nir_instr_type_tex:
+      record_tex_descriptor_cbuf_use(nir_instr_as_tex(instr), ctx);
+      return false;
+
+   default:
+      return false;
+   }
+}
+
+static void
+build_cbuf_map(nir_shader *nir, struct lower_descriptors_ctx *ctx)
+{
+   ctx->cbufs = _mesa_hash_table_create(NULL, hash_cbuf, cbufs_equal);
+
+   nir_shader_instructions_pass(nir, record_cbuf_uses_instr,
+                                nir_metadata_all, (void *)ctx);
+
+   struct lower_desc_cbuf *cbufs =
+      ralloc_array(ctx->cbufs, struct lower_desc_cbuf,
+                   _mesa_hash_table_num_entries(ctx->cbufs));
+
+   uint32_t num_cbufs = 0;
+   hash_table_foreach(ctx->cbufs, entry) {
+      struct lower_desc_cbuf *cbuf = entry->data;
+
+      /* We currently only start cbufs at the beginning so if it starts after
+       * the max cbuf size, there's no point in including it in the list.
+       */
+      if (cbuf->start > NVK_MAX_CBUF_SIZE)
+         continue;
+
+      cbufs[num_cbufs++] = *cbuf;
+   }
+
+   qsort(cbufs, num_cbufs, sizeof(*cbufs), compar_cbufs);
+
+   uint32_t mapped_cbuf_count = 0;
+
+   /* Root descriptors always go in cbuf 0 */
+   ctx->cbuf_map->cbufs[mapped_cbuf_count++] = (struct nvk_cbuf) {
+      .type = NVK_CBUF_TYPE_ROOT_DESC,
+   };
+
+   uint8_t max_cbuf_bindings;
+   if (nir->info.stage == MESA_SHADER_COMPUTE ||
+       nir->info.stage == MESA_SHADER_KERNEL) {
+      max_cbuf_bindings = 8;
+   } else {
+      max_cbuf_bindings = 16;
+   }
+
+   for (uint32_t i = 0; i < num_cbufs; i++) {
+      if (mapped_cbuf_count >= max_cbuf_bindings)
+         break;
+
+      ctx->cbuf_map->cbufs[mapped_cbuf_count++] = cbufs[i].key;
+   }
+   ctx->cbuf_map->cbuf_count = mapped_cbuf_count;
+
+   ralloc_free(ctx->cbufs);
+   ctx->cbufs = NULL;
+}
+
+static nir_def *
+load_descriptor_set_addr(nir_builder *b, uint32_t set,
+                         UNUSED const struct lower_descriptors_ctx *ctx)
+{
+   uint32_t set_addr_offset =
+      nvk_root_descriptor_offset(sets) + set * sizeof(uint64_t);
+
+   return nir_load_ubo(b, 1, 64, nir_imm_int(b, 0),
+                       nir_imm_int(b, set_addr_offset),
+                       .align_mul = 8, .align_offset = 0, .range = ~0);
+}
+
 static nir_def *
 load_descriptor(nir_builder *b, unsigned num_components, unsigned bit_size,
                 uint32_t set, uint32_t binding, nir_def *index,
@@ -651,7 +1070,8 @@ lower_ssbo_descriptor_instr(nir_builder *b, nir_instr 
*instr,
 bool
 nvk_nir_lower_descriptors(nir_shader *nir,
                           const struct vk_pipeline_robustness_state *rs,
-                          const struct vk_pipeline_layout *layout)
+                          const struct vk_pipeline_layout *layout,
+                          struct nvk_cbuf_map *cbuf_map_out)
 {
    struct lower_descriptors_ctx ctx = {
       .layout = layout,
@@ -663,12 +1083,23 @@ nvk_nir_lower_descriptors(nir_shader *nir,
       .ubo_addr_format = nvk_buffer_addr_format(rs->uniform_buffers),
    };
 
-   /* We run in two passes.  The first attempts to lower everything it can.
-    * In the variable pointers case, some SSBO intrinsics may fail to lower
-    * but that's okay.  The second pass cleans up any SSBO intrinsics which
-    * are left and lowers them to slightly less efficient but variable-
-    * pointers-correct versions.
+   /* We run in three passes:
+    *
+    *  1. Find ranges of UBOs that we can promote to bound UBOs.  Nothing is
+    *     actually lowered in this pass.  It's just analysis.
+    *
+    *  2. Attempt to lower everything with direct descriptors.  This may fail
+    *     to lower some SSBO intrinsics when variable pointers are used.
+    *
+    *  3. Clean up any SSBO intrinsics which are left and lower them to
+    *     slightly less efficient but variable- pointers-correct versions.
     */
+
+   if (cbuf_map_out != NULL) {
+      ctx.cbuf_map = cbuf_map_out;
+      build_cbuf_map(nir, &ctx);
+   }
+
    bool pass_lower_descriptors =
       nir_shader_instructions_pass(nir, try_lower_descriptors_instr,
                                    nir_metadata_block_index |
diff --git a/src/nouveau/vulkan/nvk_private.h b/src/nouveau/vulkan/nvk_private.h
index 06924f081ea..89c464b8eb5 100644
--- a/src/nouveau/vulkan/nvk_private.h
+++ b/src/nouveau/vulkan/nvk_private.h
@@ -30,6 +30,9 @@
 #define NVK_SPARSE_ADDR_SPACE_SIZE (1ull << 39)
 #define NVK_MAX_BUFFER_SIZE (1ull << 31)
 
+/* Max size of a bound cbuf */
+#define NVK_MAX_CBUF_SIZE (1u << 16)
+
 struct nvk_addr_range {
    uint64_t addr;
    uint64_t range;
diff --git a/src/nouveau/vulkan/nvk_shader.c b/src/nouveau/vulkan/nvk_shader.c
index 34245429432..ab6f98fbefd 100644
--- a/src/nouveau/vulkan/nvk_shader.c
+++ b/src/nouveau/vulkan/nvk_shader.c
@@ -348,7 +348,7 @@ nvk_lower_nir(struct nvk_device *dev, nir_shader *nir,
     */
    assert(dev->pdev->info.cls_eng3d >= MAXWELL_A || !nir_has_image_var(nir));
 
-   NIR_PASS(_, nir, nvk_nir_lower_descriptors, rs, layout);
+   NIR_PASS(_, nir, nvk_nir_lower_descriptors, rs, layout, NULL);
    NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_global,
             nir_address_format_64bit_global);
    NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ssbo,
diff --git a/src/nouveau/vulkan/nvk_shader.h b/src/nouveau/vulkan/nvk_shader.h
index 62e1879e161..a432a237336 100644
--- a/src/nouveau/vulkan/nvk_shader.h
+++ b/src/nouveau/vulkan/nvk_shader.h
@@ -25,6 +25,26 @@ struct vk_shader_module;
 #define TU102_SHADER_HEADER_SIZE (32 * 4)
 #define NVC0_MAX_SHADER_HEADER_SIZE TU102_SHADER_HEADER_SIZE
 
+enum PACKED nvk_cbuf_type {
+   NVK_CBUF_TYPE_INVALID = 0,
+   NVK_CBUF_TYPE_ROOT_DESC,
+   NVK_CBUF_TYPE_DESC_SET,
+   NVK_CBUF_TYPE_DYNAMIC_UBO,
+   NVK_CBUF_TYPE_UBO_DESC,
+};
+
+struct nvk_cbuf {
+   enum nvk_cbuf_type type;
+   uint8_t desc_set;
+   uint8_t dynamic_idx;
+   uint32_t desc_offset;
+};
+
+struct nvk_cbuf_map {
+   uint32_t cbuf_count;
+   struct nvk_cbuf cbufs[16];
+};
+
 struct nvk_shader {
    struct nak_shader_info info;
 
@@ -73,7 +93,8 @@ nvk_physical_device_spirv_options(const struct 
nvk_physical_device *pdev,
 bool
 nvk_nir_lower_descriptors(nir_shader *nir,
                           const struct vk_pipeline_robustness_state *rs,
-                          const struct vk_pipeline_layout *layout);
+                          const struct vk_pipeline_layout *layout,
+                          struct nvk_cbuf_map *cbuf_map_out);
 
 VkResult
 nvk_shader_stage_to_nir(struct nvk_device *dev,

Reply via email to