Module: Mesa
Branch: main
Commit: 24527b3d28332a453ad7e8bdcb3d49cc08ee9747
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=24527b3d28332a453ad7e8bdcb3d49cc08ee9747

Author: Connor Abbott <[email protected]>
Date:   Tue Oct  3 19:21:49 2023 +0200

tu: Move shader linking to tu_shader.cc

This creates a function that can be used by both pipelines and shaders.

Note that we cannot yet call tu_CreateShadersEXT directly inside the
pipeline due to things like pipeline feedback, multiview, and so on, but
further extensions will hopefully bring us closer to that ideal.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25679>

---

 src/freedreno/vulkan/tu_pipeline.cc | 440 ++++++++----------------------------
 src/freedreno/vulkan/tu_pipeline.h  |   2 -
 src/freedreno/vulkan/tu_shader.cc   | 268 ++++++++++++++++++++++
 src/freedreno/vulkan/tu_shader.h    |  20 ++
 4 files changed, 387 insertions(+), 343 deletions(-)

diff --git a/src/freedreno/vulkan/tu_pipeline.cc 
b/src/freedreno/vulkan/tu_pipeline.cc
index 92a7efc2c55..b520b1d9195 100644
--- a/src/freedreno/vulkan/tu_pipeline.cc
+++ b/src/freedreno/vulkan/tu_pipeline.cc
@@ -1314,111 +1314,6 @@ tu_pipeline_allocate_cs(struct tu_device *dev,
    return VK_SUCCESS;
 }
 
-static void
-tu_pipeline_shader_key_init(struct ir3_shader_key *key,
-                            const struct tu_pipeline *pipeline,
-                            struct tu_pipeline_builder *builder,
-                            nir_shader **nir)
-{
-   /* We set this after we compile to NIR because we need the prim mode */
-   key->tessellation = IR3_TESS_NONE;
-
-   for (unsigned i = 0; i < builder->num_libraries; i++) {
-      if (!(builder->libraries[i]->state &
-            (VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT |
-             VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT)))
-         continue;
-
-      const struct ir3_shader_key *library_key =
-         &builder->libraries[i]->ir3_key;
-
-      if (library_key->tessellation != IR3_TESS_NONE)
-         key->tessellation = library_key->tessellation;
-      key->has_gs |= library_key->has_gs;
-      key->sample_shading |= library_key->sample_shading;
-   }
-
-   for (uint32_t i = 0; i < builder->create_info->stageCount; i++) {
-      if (builder->create_info->pStages[i].stage == 
VK_SHADER_STAGE_GEOMETRY_BIT) {
-         key->has_gs = true;
-         break;
-      }
-   }
-
-   if (!(builder->state & 
VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT))
-      return;
-
-   if (builder->rasterizer_discard)
-      return;
-
-   const VkPipelineMultisampleStateCreateInfo *msaa_info =
-      builder->create_info->pMultisampleState;
-
-   /* The 1.3.215 spec says:
-    *
-    *    Sample shading can be used to specify a minimum number of unique
-    *    samples to process for each fragment. If sample shading is enabled,
-    *    an implementation must provide a minimum of
-    *
-    *       max(ceil(minSampleShadingFactor * totalSamples), 1)
-    *
-    *    unique associated data for each fragment, where
-    *    minSampleShadingFactor is the minimum fraction of sample shading.
-    *
-    * The definition is pretty much the same as OpenGL's GL_SAMPLE_SHADING.
-    * They both require unique associated data.
-    *
-    * There are discussions to change the definition, such that
-    * sampleShadingEnable does not imply unique associated data.  Before the
-    * discussions are settled and before apps (i.e., ANGLE) are fixed to
-    * follow the new and incompatible definition, we should stick to the
-    * current definition.
-    *
-    * Note that ir3_shader_key::sample_shading is not actually used by ir3,
-    * just checked in tu6_emit_fs_inputs.  We will also copy the value to
-    * tu_shader_key::force_sample_interp in a bit.
-    */
-   if (msaa_info && msaa_info->sampleShadingEnable)
-      key->sample_shading = true;
-}
-
-static uint32_t
-tu6_get_tessmode(const struct nir_shader *shader)
-{
-   enum tess_primitive_mode primitive_mode = shader->info.tess._primitive_mode;
-   switch (primitive_mode) {
-   case TESS_PRIMITIVE_ISOLINES:
-      return IR3_TESS_ISOLINES;
-   case TESS_PRIMITIVE_TRIANGLES:
-      return IR3_TESS_TRIANGLES;
-   case TESS_PRIMITIVE_QUADS:
-      return IR3_TESS_QUADS;
-   case TESS_PRIMITIVE_UNSPECIFIED:
-      return IR3_TESS_NONE;
-   default:
-      unreachable("bad tessmode");
-   }
-}
-
-static uint64_t
-tu_upload_variant(struct tu_pipeline *pipeline,
-                  const struct ir3_shader_variant *variant)
-{
-   struct tu_cs_memory memory;
-
-   if (!variant)
-      return 0;
-
-   /* this expects to get enough alignment because shaders are allocated first
-    * and total size is always aligned correctly
-    * note: an assert in tu6_emit_xs_config validates the alignment
-    */
-   tu_cs_alloc(&pipeline->cs, variant->info.size / 4, 1, &memory);
-
-   memcpy(memory.map, variant->bin, variant->info.size);
-   return memory.iova;
-}
-
 static void
 tu_append_executable(struct tu_pipeline *pipeline,
                      const struct ir3_shader_variant *variant,
@@ -1436,106 +1331,6 @@ tu_append_executable(struct tu_pipeline *pipeline,
    util_dynarray_append(&pipeline->executables, struct tu_pipeline_executable, 
exe);
 }
 
-static void
-tu_link_shaders(struct tu_pipeline_builder *builder,
-                nir_shader **shaders, unsigned shaders_count)
-{
-   nir_shader *consumer = NULL;
-   for (gl_shader_stage stage = (gl_shader_stage) (shaders_count - 1);
-        stage >= MESA_SHADER_VERTEX; stage = (gl_shader_stage) (stage - 1)) {
-      if (!shaders[stage])
-         continue;
-
-      nir_shader *producer = shaders[stage];
-      if (!consumer) {
-         consumer = producer;
-         continue;
-      }
-
-      if (nir_link_opt_varyings(producer, consumer)) {
-         NIR_PASS_V(consumer, nir_opt_constant_folding);
-         NIR_PASS_V(consumer, nir_opt_algebraic);
-         NIR_PASS_V(consumer, nir_opt_dce);
-      }
-
-      const nir_remove_dead_variables_options out_var_opts = {
-         .can_remove_var = nir_vk_is_not_xfb_output,
-      };
-      NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, 
&out_var_opts);
-
-      NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
-
-      bool progress = nir_remove_unused_varyings(producer, consumer);
-
-      nir_compact_varyings(producer, consumer, true);
-      if (progress) {
-         if (nir_lower_global_vars_to_local(producer)) {
-            /* Remove dead writes, which can remove input loads */
-            NIR_PASS_V(producer, nir_remove_dead_variables, 
nir_var_shader_temp, NULL);
-            NIR_PASS_V(producer, nir_opt_dce);
-         }
-         nir_lower_global_vars_to_local(consumer);
-      }
-
-      consumer = producer;
-   }
-
-   /* Gather info after linking so that we can fill out the ir3 shader key.
-    */
-   for (gl_shader_stage stage = MESA_SHADER_VERTEX;
-        stage <= MESA_SHADER_FRAGMENT; stage = (gl_shader_stage) (stage + 1)) {
-      if (shaders[stage])
-         nir_shader_gather_info(shaders[stage],
-                                nir_shader_get_entrypoint(shaders[stage]));
-   }
-}
-
-static void
-tu_shader_key_init(struct tu_shader_key *key,
-                   const VkPipelineShaderStageCreateInfo *stage_info,
-                   struct tu_device *dev)
-{
-   enum ir3_wavesize_option api_wavesize, real_wavesize;
-   if (!dev->physical_device->info->a6xx.supports_double_threadsize) {
-      api_wavesize = IR3_SINGLE_ONLY;
-      real_wavesize = IR3_SINGLE_ONLY;
-   } else if (stage_info) {
-      if (stage_info->flags &
-          VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT) {
-         api_wavesize = real_wavesize = IR3_SINGLE_OR_DOUBLE;
-      } else {
-         const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *size_info =
-            vk_find_struct_const(stage_info->pNext,
-                                 
PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO);
-
-         if (size_info) {
-            if (size_info->requiredSubgroupSize == 
dev->compiler->threadsize_base) {
-               api_wavesize = IR3_SINGLE_ONLY;
-            } else {
-               assert(size_info->requiredSubgroupSize == 
dev->compiler->threadsize_base * 2);
-               api_wavesize = IR3_DOUBLE_ONLY;
-            }
-         } else {
-            /* Match the exposed subgroupSize. */
-            api_wavesize = IR3_DOUBLE_ONLY;
-         }
-
-         if (stage_info->flags &
-             VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT)
-            real_wavesize = api_wavesize;
-         else if (api_wavesize == IR3_SINGLE_ONLY)
-            real_wavesize = IR3_SINGLE_ONLY;
-         else
-            real_wavesize = IR3_SINGLE_OR_DOUBLE;
-      }
-   } else {
-      api_wavesize = real_wavesize = IR3_SINGLE_OR_DOUBLE;
-   }
-
-   key->api_wavesize = api_wavesize;
-   key->real_wavesize = real_wavesize;
-}
-
 static void
 tu_hash_stage(struct mesa_sha1 *ctx,
               const VkPipelineShaderStageCreateInfo *stage,
@@ -1574,7 +1369,6 @@ tu_hash_shaders(unsigned char *hash,
                 nir_shader *const *nir,
                 const struct tu_pipeline_layout *layout,
                 const struct tu_shader_key *keys,
-                const struct ir3_shader_key *ir3_key,
                 VkGraphicsPipelineLibraryFlagsEXT state,
                 const struct ir3_compiler *compiler)
 {
@@ -1585,8 +1379,6 @@ tu_hash_shaders(unsigned char *hash,
    if (layout)
       _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
 
-   _mesa_sha1_update(&ctx, ir3_key, sizeof(ir3_key));
-
    for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
       if (stages[i] || nir[i]) {
          tu_hash_stage(&ctx, stages[i], nir[i], &keys[i]);
@@ -1772,6 +1564,10 @@ tu_pipeline_builder_compile_shaders(struct 
tu_pipeline_builder *builder,
       builder->create_flags &
       VK_PIPELINE_CREATE_2_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
 
+   bool retain_nir =
+      builder->create_flags &
+      VK_PIPELINE_CREATE_2_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT;
+
    int64_t pipeline_start = os_time_get_nano();
 
    const VkPipelineCreationFeedbackCreateInfo *creation_feedback =
@@ -1793,11 +1589,26 @@ tu_pipeline_builder_compile_shaders(struct 
tu_pipeline_builder *builder,
    struct tu_shader *shaders[ARRAY_SIZE(stage_infos)] = { NULL };
    nir_shader *post_link_nir[ARRAY_SIZE(nir)] = { NULL };
    char *nir_initial_disasm[ARRAY_SIZE(stage_infos)] = { NULL };
+   bool cache_hit = false;
 
    struct tu_shader_key keys[ARRAY_SIZE(stage_infos)] = { };
    for (gl_shader_stage stage = MESA_SHADER_VERTEX;
         stage < ARRAY_SIZE(keys); stage = (gl_shader_stage) (stage+1)) {
-      tu_shader_key_init(&keys[stage], stage_infos[stage], builder->device);
+      const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *subgroup_info 
= NULL;
+      if (stage_infos[stage])
+         subgroup_info = vk_find_struct_const(stage_infos[stage],
+                                              
PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO);
+      bool allow_varying_subgroup_size =
+         !stage_infos[stage] ||
+         (stage_infos[stage]->flags &
+          VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT);
+      bool require_full_subgroups =
+         stage_infos[stage] &&
+         (stage_infos[stage]->flags &
+          VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT);
+      tu_shader_key_subgroup_size(&keys[stage], allow_varying_subgroup_size,
+                                  require_full_subgroups, subgroup_info,
+                                  builder->device);
    }
 
    if (builder->create_flags &
@@ -1817,9 +1628,6 @@ tu_pipeline_builder_compile_shaders(struct 
tu_pipeline_builder *builder,
       }
    }
 
-   struct ir3_shader_key ir3_key = {};
-   tu_pipeline_shader_key_init(&ir3_key, pipeline, builder, nir);
-
    struct tu_nir_shaders *nir_shaders = NULL;
    if (!must_compile)
       goto done;
@@ -1833,23 +1641,52 @@ tu_pipeline_builder_compile_shaders(struct 
tu_pipeline_builder *builder,
    if (builder->state & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) {
       keys[MESA_SHADER_FRAGMENT].multiview_mask =
          builder->graphics_state.rp->view_mask;
-      keys[MESA_SHADER_FRAGMENT].force_sample_interp = ir3_key.sample_shading;
       keys[MESA_SHADER_FRAGMENT].fragment_density_map =
          builder->fragment_density_map;
       keys[MESA_SHADER_FRAGMENT].unscaled_input_fragcoord =
          builder->unscaled_input_fragcoord;
+
+      const VkPipelineMultisampleStateCreateInfo *msaa_info =
+         builder->create_info->pMultisampleState;
+
+      /* The 1.3.215 spec says:
+       *
+       *    Sample shading can be used to specify a minimum number of unique
+       *    samples to process for each fragment. If sample shading is enabled,
+       *    an implementation must provide a minimum of
+       *
+       *       max(ceil(minSampleShadingFactor * totalSamples), 1)
+       *
+       *    unique associated data for each fragment, where
+       *    minSampleShadingFactor is the minimum fraction of sample shading.
+       *
+       * The definition is pretty much the same as OpenGL's GL_SAMPLE_SHADING.
+       * They both require unique associated data.
+       *
+       * There are discussions to change the definition, such that
+       * sampleShadingEnable does not imply unique associated data.  Before the
+       * discussions are settled and before apps (i.e., ANGLE) are fixed to
+       * follow the new and incompatible definition, we should stick to the
+       * current definition.
+       *
+       * Note that ir3_shader_key::sample_shading is not actually used by ir3,
+       * just checked in tu6_emit_fs_inputs.  We will also copy the value to
+       * tu_shader_key::force_sample_interp in a bit.
+       */
+      keys[MESA_SHADER_FRAGMENT].force_sample_interp =
+         !builder->rasterizer_discard && msaa_info && 
msaa_info->sampleShadingEnable;
    }
 
    unsigned char pipeline_sha1[20];
    tu_hash_shaders(pipeline_sha1, stage_infos, nir, &builder->layout, keys,
-                   &ir3_key, builder->state, compiler);
+                   builder->state, compiler);
 
    unsigned char nir_sha1[21];
    memcpy(nir_sha1, pipeline_sha1, sizeof(pipeline_sha1));
    nir_sha1[20] = 'N';
 
    if (!executable_info) {
-      bool cache_hit = true;
+      cache_hit = true;
       bool application_cache_hit = false;
 
       unsigned char shader_sha1[21];
@@ -1893,139 +1730,54 @@ tu_pipeline_builder_compile_shaders(struct 
tu_pipeline_builder *builder,
          pipeline_feedback.flags |=
             VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
       }
-
-      if (cache_hit)
-         goto done;
-   }
-
-   if (builder->create_flags &
-       VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR) {
-      return VK_PIPELINE_COMPILE_REQUIRED;
    }
 
-   for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < ARRAY_SIZE(nir);
-        stage = (gl_shader_stage) (stage + 1)) {
-      const VkPipelineShaderStageCreateInfo *stage_info = stage_infos[stage];
-      if (!stage_info)
-         continue;
-
-      int64_t stage_start = os_time_get_nano();
+   if (!cache_hit) {
+      if (builder->create_flags &
+          VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR) {
+         return VK_PIPELINE_COMPILE_REQUIRED;
+      }
 
-      nir[stage] = tu_spirv_to_nir(builder->device, builder->mem_ctx, 
stage_info, stage);
-      if (!nir[stage]) {
-         result = VK_ERROR_OUT_OF_HOST_MEMORY;
+      result = tu_compile_shaders(builder->device,
+                                  stage_infos,
+                                  nir,
+                                  keys,
+                                  &builder->layout,
+                                  pipeline_sha1,
+                                  shaders,
+                                  executable_info ? nir_initial_disasm : NULL,
+                                  pipeline->executables_mem_ctx,
+                                  retain_nir ? post_link_nir : NULL,
+                                  stage_feedbacks);
+
+      if (result != VK_SUCCESS)
          goto fail;
-      }
 
-      stage_feedbacks[stage].flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
-      stage_feedbacks[stage].duration += os_time_get_nano() - stage_start;
-   }
+      if (retain_nir) {
+         nir_shaders =
+            tu_nir_shaders_init(builder->device, &nir_sha1, sizeof(nir_sha1));
+         for (gl_shader_stage stage = MESA_SHADER_VERTEX;
+              stage < ARRAY_SIZE(nir); stage = (gl_shader_stage) (stage + 1)) {
+            if (!post_link_nir[stage])
+               continue;
 
-   if (executable_info) {
-      for (gl_shader_stage stage = MESA_SHADER_VERTEX;
-           stage < ARRAY_SIZE(nir);
-           stage = (gl_shader_stage) (stage + 1)) {
-      if (!nir[stage])
-         continue;
+            nir_shaders->nir[stage] = post_link_nir[stage];
+         }
 
-      nir_initial_disasm[stage] =
-         nir_shader_as_str(nir[stage], pipeline->executables_mem_ctx);
+         nir_shaders = tu_nir_cache_insert(builder->cache, nir_shaders);
       }
-   }
-
-   tu_link_shaders(builder, nir, ARRAY_SIZE(nir));
 
-   if (builder->create_flags &
-       VK_PIPELINE_CREATE_2_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT) {
-      nir_shaders =
-         tu_nir_shaders_init(builder->device, &nir_sha1, sizeof(nir_sha1));
-      for (gl_shader_stage stage = MESA_SHADER_VERTEX;
-           stage < ARRAY_SIZE(nir); stage = (gl_shader_stage) (stage + 1)) {
+      for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < ARRAY_SIZE(nir);
+           stage = (gl_shader_stage) (stage + 1)) {
          if (!nir[stage])
             continue;
 
-         nir_shaders->nir[stage] = nir_shader_clone(NULL, nir[stage]);
-      }
-
-      nir_shaders = tu_nir_cache_insert(builder->cache, nir_shaders);
-   }
-
-   /* With pipelines, tessellation modes can be set on either shader, for
-    * compatibility with HLSL and GLSL, and the driver is supposed to merge
-    * them. Shader objects requires modes to be set on at least the TES except
-    * for OutputVertices which has to be set at least on the TCS. Make sure
-    * all modes are set on the TES when compiling together multiple shaders,
-    * and then from this point on we will use the modes in the TES (and output
-    * vertices on the TCS).
-    */
-   if (nir[MESA_SHADER_TESS_EVAL]) {
-      nir_shader *tcs = nir[MESA_SHADER_TESS_CTRL];
-      nir_shader *tes = nir[MESA_SHADER_TESS_EVAL];
-
-      if (tes->info.tess._primitive_mode == TESS_PRIMITIVE_UNSPECIFIED)
-         tes->info.tess._primitive_mode = tcs->info.tess._primitive_mode;
-
-      tes->info.tess.point_mode |= tcs->info.tess.point_mode;
-      tes->info.tess.ccw |= tcs->info.tess.ccw;
-
-      if (tes->info.tess.spacing == TESS_SPACING_UNSPECIFIED) {
-         tes->info.tess.spacing = tcs->info.tess.spacing;
-      }
-
-      if (tcs->info.tess.tcs_vertices_out == 0)
-         tcs->info.tess.tcs_vertices_out = tes->info.tess.tcs_vertices_out;
-
-      ir3_key.tessellation = tu6_get_tessmode(tes);
-   }
-
-   for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < ARRAY_SIZE(nir);
-        stage = (gl_shader_stage) (stage + 1)) {
-      if (!nir[stage])
-         continue;
-
-      if (stage > MESA_SHADER_TESS_CTRL) {
-         if (stage == MESA_SHADER_FRAGMENT) {
-            ir3_key.tcs_store_primid = ir3_key.tcs_store_primid ||
-               (nir[stage]->info.inputs_read & (1ull << 
VARYING_SLOT_PRIMITIVE_ID));
-         } else {
-            ir3_key.tcs_store_primid = ir3_key.tcs_store_primid ||
-               BITSET_TEST(nir[stage]->info.system_values_read, 
SYSTEM_VALUE_PRIMITIVE_ID);
-         }
-      }
-   }
-
-   /* In the the tess-but-not-FS case we don't know whether the FS will read
-    * PrimID so we need to unconditionally store it.
-    */
-   if (nir[MESA_SHADER_TESS_CTRL] && !nir[MESA_SHADER_FRAGMENT])
-      ir3_key.tcs_store_primid = true;
-
-
-   for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < ARRAY_SIZE(nir);
-        stage = (gl_shader_stage) (stage + 1)) {
-      if (!nir[stage] || shaders[stage])
-         continue;
-
-      int64_t stage_start = os_time_get_nano();
-
-      unsigned char shader_sha1[21];
-      memcpy(shader_sha1, pipeline_sha1, sizeof(pipeline_sha1));
-      shader_sha1[20] = (unsigned char) stage;
-
-      result = tu_shader_create(builder->device,
-                                &shaders[stage], nir[stage], &keys[stage],
-                                &ir3_key, shader_sha1, sizeof(shader_sha1),
-                                &builder->layout, executable_info);
-      if (result != VK_SUCCESS) {
-         goto fail;
+         shaders[stage] = tu_pipeline_cache_insert(builder->cache, 
shaders[stage]);
       }
-
-      shaders[stage] = tu_pipeline_cache_insert(builder->cache, 
shaders[stage]);
-
-      stage_feedbacks[stage].duration += os_time_get_nano() - stage_start;
    }
 
 done:
+
    /* Create empty shaders which contain the draw states to initialize
     * registers for unused shader stages.
     */
@@ -2063,6 +1815,9 @@ done:
       }
    }
 
+   /* We may have deduplicated a cache entry, in which case our original
+    * post_link_nir may be gone.
+    */
    if (nir_shaders) {
       for (gl_shader_stage stage = MESA_SHADER_VERTEX;
            stage < ARRAY_SIZE(nir); stage = (gl_shader_stage) (stage + 1)) {
@@ -2113,7 +1868,6 @@ done:
       struct tu_graphics_lib_pipeline *library =
          tu_pipeline_to_graphics_lib(pipeline);
       library->nir_shaders = nir_shaders;
-      library->ir3_key = ir3_key;
       for (gl_shader_stage stage = MESA_SHADER_VERTEX;
            stage < ARRAY_SIZE(library->shaders);
            stage = (gl_shader_stage) (stage + 1)) {
@@ -2143,13 +1897,6 @@ done:
    return VK_SUCCESS;
 
 fail:
-   for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < ARRAY_SIZE(nir);
-        stage = (gl_shader_stage) (stage + 1)) {
-      if (shaders[stage]) {
-         tu_shader_destroy(builder->device, shaders[stage]);
-      }
-   }
-
    if (nir_shaders)
       vk_pipeline_cache_object_unref(&builder->device->vk,
                                      &nir_shaders->base);
@@ -4287,7 +4034,18 @@ tu_compute_pipeline_create(VkDevice device,
    pipeline->base.active_stages = VK_SHADER_STAGE_COMPUTE_BIT;
 
    struct tu_shader_key key = { };
-   tu_shader_key_init(&key, stage_info, dev);
+   bool allow_varying_subgroup_size =
+      (stage_info->flags &
+       VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT);
+   bool require_full_subgroups =
+      stage_info->flags &
+      VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT;
+   const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *subgroup_info =
+      vk_find_struct_const(stage_info,
+                           
PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO);
+   tu_shader_key_subgroup_size(&key, allow_varying_subgroup_size,
+                               require_full_subgroups, subgroup_info,
+                               dev);
 
    void *pipeline_mem_ctx = ralloc_context(NULL);
 
diff --git a/src/freedreno/vulkan/tu_pipeline.h 
b/src/freedreno/vulkan/tu_pipeline.h
index 523d025e6fe..a99675ccd4c 100644
--- a/src/freedreno/vulkan/tu_pipeline.h
+++ b/src/freedreno/vulkan/tu_pipeline.h
@@ -187,8 +187,6 @@ struct tu_graphics_lib_pipeline {
       struct tu_shader_key key;
    } shaders[MESA_SHADER_FRAGMENT + 1];
 
-   struct ir3_shader_key ir3_key;
-
    /* Used to stitch together an overall layout for the final pipeline. */
    struct tu_descriptor_set_layout *layouts[MAX_SETS];
    unsigned num_sets;
diff --git a/src/freedreno/vulkan/tu_shader.cc 
b/src/freedreno/vulkan/tu_shader.cc
index fd4118a04a8..2740e7aa9ea 100644
--- a/src/freedreno/vulkan/tu_shader.cc
+++ b/src/freedreno/vulkan/tu_shader.cc
@@ -8,6 +8,7 @@
 #include "spirv/nir_spirv.h"
 #include "util/mesa-sha1.h"
 #include "nir/nir_xfb_info.h"
+#include "vk_nir.h"
 #include "vk_nir_convert_ycbcr.h"
 #include "vk_pipeline.h"
 #include "vk_util.h"
@@ -2449,6 +2450,273 @@ tu_shader_create(struct tu_device *dev,
    return VK_SUCCESS;
 }
 
+static void
+tu_link_shaders(nir_shader **shaders, unsigned shaders_count)
+{
+   nir_shader *consumer = NULL;
+   for (gl_shader_stage stage = (gl_shader_stage) (shaders_count - 1);
+        stage >= MESA_SHADER_VERTEX; stage = (gl_shader_stage) (stage - 1)) {
+      if (!shaders[stage])
+         continue;
+
+      nir_shader *producer = shaders[stage];
+      if (!consumer) {
+         consumer = producer;
+         continue;
+      }
+
+      if (nir_link_opt_varyings(producer, consumer)) {
+         NIR_PASS_V(consumer, nir_opt_constant_folding);
+         NIR_PASS_V(consumer, nir_opt_algebraic);
+         NIR_PASS_V(consumer, nir_opt_dce);
+      }
+
+      const nir_remove_dead_variables_options out_var_opts = {
+         .can_remove_var = nir_vk_is_not_xfb_output,
+      };
+      NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, 
&out_var_opts);
+
+      NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
+
+      bool progress = nir_remove_unused_varyings(producer, consumer);
+
+      nir_compact_varyings(producer, consumer, true);
+      if (progress) {
+         if (nir_lower_global_vars_to_local(producer)) {
+            /* Remove dead writes, which can remove input loads */
+            NIR_PASS_V(producer, nir_remove_dead_variables, 
nir_var_shader_temp, NULL);
+            NIR_PASS_V(producer, nir_opt_dce);
+         }
+         nir_lower_global_vars_to_local(consumer);
+      }
+
+      consumer = producer;
+   }
+
+   /* Gather info after linking so that we can fill out the ir3 shader key.
+    */
+   for (gl_shader_stage stage = MESA_SHADER_VERTEX;
+        stage <= MESA_SHADER_FRAGMENT; stage = (gl_shader_stage) (stage + 1)) {
+      if (shaders[stage])
+         nir_shader_gather_info(shaders[stage],
+                                nir_shader_get_entrypoint(shaders[stage]));
+   }
+}
+
+static uint32_t
+tu6_get_tessmode(const struct nir_shader *shader)
+{
+   enum tess_primitive_mode primitive_mode = shader->info.tess._primitive_mode;
+   switch (primitive_mode) {
+   case TESS_PRIMITIVE_ISOLINES:
+      return IR3_TESS_ISOLINES;
+   case TESS_PRIMITIVE_TRIANGLES:
+      return IR3_TESS_TRIANGLES;
+   case TESS_PRIMITIVE_QUADS:
+      return IR3_TESS_QUADS;
+   case TESS_PRIMITIVE_UNSPECIFIED:
+      return IR3_TESS_NONE;
+   default:
+      unreachable("bad tessmode");
+   }
+}
+
+VkResult
+tu_compile_shaders(struct tu_device *device,
+                   const VkPipelineShaderStageCreateInfo **stage_infos,
+                   nir_shader **nir,
+                   const struct tu_shader_key *keys,
+                   struct tu_pipeline_layout *layout,
+                   const unsigned char *pipeline_sha1,
+                   struct tu_shader **shaders,
+                   char **nir_initial_disasm,
+                   void *nir_initial_disasm_mem_ctx,
+                   nir_shader **nir_out,
+                   VkPipelineCreationFeedback *stage_feedbacks)
+{
+   struct ir3_shader_key ir3_key = {};
+   VkResult result = VK_SUCCESS;
+   void *mem_ctx = ralloc_context(NULL);
+
+   for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < MESA_SHADER_STAGES;
+        stage = (gl_shader_stage) (stage + 1)) {
+      const VkPipelineShaderStageCreateInfo *stage_info = stage_infos[stage];
+      if (!stage_info)
+         continue;
+
+      int64_t stage_start = os_time_get_nano();
+
+      nir[stage] = tu_spirv_to_nir(device, mem_ctx, stage_info, stage);
+      if (!nir[stage]) {
+         result = VK_ERROR_OUT_OF_HOST_MEMORY;
+         goto fail;
+      }
+
+      stage_feedbacks[stage].flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
+      stage_feedbacks[stage].duration += os_time_get_nano() - stage_start;
+   }
+
+   if (nir[MESA_SHADER_GEOMETRY])
+      ir3_key.has_gs = true;
+
+   ir3_key.sample_shading = keys[MESA_SHADER_FRAGMENT].force_sample_interp;
+
+   if (nir_initial_disasm) {
+      for (gl_shader_stage stage = MESA_SHADER_VERTEX;
+           stage < MESA_SHADER_STAGES;
+           stage = (gl_shader_stage) (stage + 1)) {
+      if (!nir[stage])
+         continue;
+
+      nir_initial_disasm[stage] =
+         nir_shader_as_str(nir[stage], nir_initial_disasm_mem_ctx);
+      }
+   }
+
+   tu_link_shaders(nir, MESA_SHADER_STAGES);
+
+   if (nir_out) {
+      for (gl_shader_stage stage = MESA_SHADER_VERTEX;
+           stage < MESA_SHADER_STAGES; stage = (gl_shader_stage) (stage + 1)) {
+         if (!nir[stage])
+            continue;
+
+         nir_out[stage] = nir_shader_clone(NULL, nir[stage]);
+      }
+   }
+
+   /* With pipelines, tessellation modes can be set on either shader, for
+    * compatibility with HLSL and GLSL, and the driver is supposed to merge
+    * them. Shader objects requires modes to be set on at least the TES except
+    * for OutputVertices which has to be set at least on the TCS. Make sure
+    * all modes are set on the TES when compiling together multiple shaders,
+    * and then from this point on we will use the modes in the TES (and output
+    * vertices on the TCS).
+    */
+   if (nir[MESA_SHADER_TESS_EVAL]) {
+      nir_shader *tcs = nir[MESA_SHADER_TESS_CTRL];
+      nir_shader *tes = nir[MESA_SHADER_TESS_EVAL];
+
+      if (tes->info.tess._primitive_mode == TESS_PRIMITIVE_UNSPECIFIED)
+         tes->info.tess._primitive_mode = tcs->info.tess._primitive_mode;
+
+      tes->info.tess.point_mode |= tcs->info.tess.point_mode;
+      tes->info.tess.ccw |= tcs->info.tess.ccw;
+
+      if (tes->info.tess.spacing == TESS_SPACING_UNSPECIFIED) {
+         tes->info.tess.spacing = tcs->info.tess.spacing;
+      }
+
+      if (tcs->info.tess.tcs_vertices_out == 0)
+         tcs->info.tess.tcs_vertices_out = tes->info.tess.tcs_vertices_out;
+
+      ir3_key.tessellation = tu6_get_tessmode(tes);
+   }
+
+   for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < MESA_SHADER_STAGES;
+        stage = (gl_shader_stage) (stage + 1)) {
+      if (!nir[stage])
+         continue;
+
+      if (stage > MESA_SHADER_TESS_CTRL) {
+         if (stage == MESA_SHADER_FRAGMENT) {
+            ir3_key.tcs_store_primid = ir3_key.tcs_store_primid ||
+               (nir[stage]->info.inputs_read & (1ull << 
VARYING_SLOT_PRIMITIVE_ID));
+         } else {
+            ir3_key.tcs_store_primid = ir3_key.tcs_store_primid ||
+               BITSET_TEST(nir[stage]->info.system_values_read, 
SYSTEM_VALUE_PRIMITIVE_ID);
+         }
+      }
+   }
+
+   /* In the the tess-but-not-FS case we don't know whether the FS will read
+    * PrimID so we need to unconditionally store it.
+    */
+   if (nir[MESA_SHADER_TESS_CTRL] && !nir[MESA_SHADER_FRAGMENT])
+      ir3_key.tcs_store_primid = true;
+
+   for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < MESA_SHADER_STAGES;
+        stage = (gl_shader_stage) (stage + 1)) {
+      if (!nir[stage] || shaders[stage])
+         continue;
+
+      int64_t stage_start = os_time_get_nano();
+
+      unsigned char shader_sha1[21];
+      memcpy(shader_sha1, pipeline_sha1, 20);
+      shader_sha1[20] = (unsigned char) stage;
+
+      result = tu_shader_create(device,
+                                &shaders[stage], nir[stage], &keys[stage],
+                                &ir3_key, shader_sha1, sizeof(shader_sha1),
+                                layout, !!nir_initial_disasm);
+      if (result != VK_SUCCESS) {
+         goto fail;
+      }
+
+      stage_feedbacks[stage].duration += os_time_get_nano() - stage_start;
+   }
+
+   ralloc_free(mem_ctx);
+
+   return VK_SUCCESS;
+
+fail:
+   ralloc_free(mem_ctx);
+
+   for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < MESA_SHADER_STAGES;
+        stage = (gl_shader_stage) (stage + 1)) {
+      if (shaders[stage]) {
+         tu_shader_destroy(device, shaders[stage]);
+      }
+      if (nir_out && nir_out[stage]) {
+         ralloc_free(nir_out[stage]);
+      }
+   }
+
+   return result;
+}
+
+void
+tu_shader_key_subgroup_size(struct tu_shader_key *key,
+                            bool allow_varying_subgroup_size,
+                            bool require_full_subgroups,
+                            const 
VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *subgroup_info,
+                            struct tu_device *dev)
+{
+   enum ir3_wavesize_option api_wavesize, real_wavesize;
+   if (!dev->physical_device->info->a6xx.supports_double_threadsize) {
+      api_wavesize = IR3_SINGLE_ONLY;
+      real_wavesize = IR3_SINGLE_ONLY;
+   } else {
+      if (allow_varying_subgroup_size) {
+         api_wavesize = real_wavesize = IR3_SINGLE_OR_DOUBLE;
+      } else {
+         if (subgroup_info) {
+            if (subgroup_info->requiredSubgroupSize == 
dev->compiler->threadsize_base) {
+               api_wavesize = IR3_SINGLE_ONLY;
+            } else {
+               assert(subgroup_info->requiredSubgroupSize == 
dev->compiler->threadsize_base * 2);
+               api_wavesize = IR3_DOUBLE_ONLY;
+            }
+         } else {
+            /* Match the exposed subgroupSize. */
+            api_wavesize = IR3_DOUBLE_ONLY;
+         }
+
+         if (require_full_subgroups)
+            real_wavesize = api_wavesize;
+         else if (api_wavesize == IR3_SINGLE_ONLY)
+            real_wavesize = IR3_SINGLE_ONLY;
+         else
+            real_wavesize = IR3_SINGLE_OR_DOUBLE;
+      }
+   }
+
+   key->api_wavesize = api_wavesize;
+   key->real_wavesize = real_wavesize;
+}
+
 static VkResult
 tu_empty_shader_create(struct tu_device *dev,
                        struct tu_shader **shader_out,
diff --git a/src/freedreno/vulkan/tu_shader.h b/src/freedreno/vulkan/tu_shader.h
index eed38243e9d..495c42d962e 100644
--- a/src/freedreno/vulkan/tu_shader.h
+++ b/src/freedreno/vulkan/tu_shader.h
@@ -153,6 +153,26 @@ tu_shader_create(struct tu_device *dev,
                  struct tu_pipeline_layout *layout,
                  bool executable_info);
 
+void
+tu_shader_key_subgroup_size(struct tu_shader_key *key,
+                            bool allow_varying_subgroup_size,
+                            bool require_full_subgroups,
+                            const 
VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *subgroup_info,
+                            struct tu_device *dev);
+
+VkResult
+tu_compile_shaders(struct tu_device *device,
+                   const VkPipelineShaderStageCreateInfo **stage_infos,
+                   nir_shader **nir,
+                   const struct tu_shader_key *keys,
+                   struct tu_pipeline_layout *layout,
+                   const unsigned char *pipeline_sha1,
+                   struct tu_shader **shaders,
+                   char **nir_initial_disasm,
+                   void *nir_initial_disasm_mem_ctx,
+                   nir_shader **nir_out,
+                   VkPipelineCreationFeedback *stage_feedbacks);
+
 VkResult
 tu_init_empty_shaders(struct tu_device *device);
 

Reply via email to