Module: Mesa Branch: main Commit: 24527b3d28332a453ad7e8bdcb3d49cc08ee9747 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=24527b3d28332a453ad7e8bdcb3d49cc08ee9747
Author: Connor Abbott <[email protected]> Date: Tue Oct 3 19:21:49 2023 +0200 tu: Move shader linking to tu_shader.cc This creates a function that can be used by both pipelines and shaders. Note that we cannot yet call tu_CreateShadersEXT directly inside the pipeline due to things like pipeline feedback, multiview, and so on, but further extensions will hopefully bring us closer to that ideal. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25679> --- src/freedreno/vulkan/tu_pipeline.cc | 440 ++++++++---------------------------- src/freedreno/vulkan/tu_pipeline.h | 2 - src/freedreno/vulkan/tu_shader.cc | 268 ++++++++++++++++++++++ src/freedreno/vulkan/tu_shader.h | 20 ++ 4 files changed, 387 insertions(+), 343 deletions(-) diff --git a/src/freedreno/vulkan/tu_pipeline.cc b/src/freedreno/vulkan/tu_pipeline.cc index 92a7efc2c55..b520b1d9195 100644 --- a/src/freedreno/vulkan/tu_pipeline.cc +++ b/src/freedreno/vulkan/tu_pipeline.cc @@ -1314,111 +1314,6 @@ tu_pipeline_allocate_cs(struct tu_device *dev, return VK_SUCCESS; } -static void -tu_pipeline_shader_key_init(struct ir3_shader_key *key, - const struct tu_pipeline *pipeline, - struct tu_pipeline_builder *builder, - nir_shader **nir) -{ - /* We set this after we compile to NIR because we need the prim mode */ - key->tessellation = IR3_TESS_NONE; - - for (unsigned i = 0; i < builder->num_libraries; i++) { - if (!(builder->libraries[i]->state & - (VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT | - VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT))) - continue; - - const struct ir3_shader_key *library_key = - &builder->libraries[i]->ir3_key; - - if (library_key->tessellation != IR3_TESS_NONE) - key->tessellation = library_key->tessellation; - key->has_gs |= library_key->has_gs; - key->sample_shading |= library_key->sample_shading; - } - - for (uint32_t i = 0; i < builder->create_info->stageCount; i++) { - if (builder->create_info->pStages[i].stage == VK_SHADER_STAGE_GEOMETRY_BIT) { - key->has_gs = true; - break; - } - } - - if (!(builder->state & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT)) - return; - - if (builder->rasterizer_discard) - return; - - const VkPipelineMultisampleStateCreateInfo *msaa_info = - builder->create_info->pMultisampleState; - - /* The 1.3.215 spec says: - * - * Sample shading can be used to specify a minimum number of unique - * samples to process for each fragment. If sample shading is enabled, - * an implementation must provide a minimum of - * - * max(ceil(minSampleShadingFactor * totalSamples), 1) - * - * unique associated data for each fragment, where - * minSampleShadingFactor is the minimum fraction of sample shading. - * - * The definition is pretty much the same as OpenGL's GL_SAMPLE_SHADING. - * They both require unique associated data. - * - * There are discussions to change the definition, such that - * sampleShadingEnable does not imply unique associated data. Before the - * discussions are settled and before apps (i.e., ANGLE) are fixed to - * follow the new and incompatible definition, we should stick to the - * current definition. - * - * Note that ir3_shader_key::sample_shading is not actually used by ir3, - * just checked in tu6_emit_fs_inputs. We will also copy the value to - * tu_shader_key::force_sample_interp in a bit. - */ - if (msaa_info && msaa_info->sampleShadingEnable) - key->sample_shading = true; -} - -static uint32_t -tu6_get_tessmode(const struct nir_shader *shader) -{ - enum tess_primitive_mode primitive_mode = shader->info.tess._primitive_mode; - switch (primitive_mode) { - case TESS_PRIMITIVE_ISOLINES: - return IR3_TESS_ISOLINES; - case TESS_PRIMITIVE_TRIANGLES: - return IR3_TESS_TRIANGLES; - case TESS_PRIMITIVE_QUADS: - return IR3_TESS_QUADS; - case TESS_PRIMITIVE_UNSPECIFIED: - return IR3_TESS_NONE; - default: - unreachable("bad tessmode"); - } -} - -static uint64_t -tu_upload_variant(struct tu_pipeline *pipeline, - const struct ir3_shader_variant *variant) -{ - struct tu_cs_memory memory; - - if (!variant) - return 0; - - /* this expects to get enough alignment because shaders are allocated first - * and total size is always aligned correctly - * note: an assert in tu6_emit_xs_config validates the alignment - */ - tu_cs_alloc(&pipeline->cs, variant->info.size / 4, 1, &memory); - - memcpy(memory.map, variant->bin, variant->info.size); - return memory.iova; -} - static void tu_append_executable(struct tu_pipeline *pipeline, const struct ir3_shader_variant *variant, @@ -1436,106 +1331,6 @@ tu_append_executable(struct tu_pipeline *pipeline, util_dynarray_append(&pipeline->executables, struct tu_pipeline_executable, exe); } -static void -tu_link_shaders(struct tu_pipeline_builder *builder, - nir_shader **shaders, unsigned shaders_count) -{ - nir_shader *consumer = NULL; - for (gl_shader_stage stage = (gl_shader_stage) (shaders_count - 1); - stage >= MESA_SHADER_VERTEX; stage = (gl_shader_stage) (stage - 1)) { - if (!shaders[stage]) - continue; - - nir_shader *producer = shaders[stage]; - if (!consumer) { - consumer = producer; - continue; - } - - if (nir_link_opt_varyings(producer, consumer)) { - NIR_PASS_V(consumer, nir_opt_constant_folding); - NIR_PASS_V(consumer, nir_opt_algebraic); - NIR_PASS_V(consumer, nir_opt_dce); - } - - const nir_remove_dead_variables_options out_var_opts = { - .can_remove_var = nir_vk_is_not_xfb_output, - }; - NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, &out_var_opts); - - NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL); - - bool progress = nir_remove_unused_varyings(producer, consumer); - - nir_compact_varyings(producer, consumer, true); - if (progress) { - if (nir_lower_global_vars_to_local(producer)) { - /* Remove dead writes, which can remove input loads */ - NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_temp, NULL); - NIR_PASS_V(producer, nir_opt_dce); - } - nir_lower_global_vars_to_local(consumer); - } - - consumer = producer; - } - - /* Gather info after linking so that we can fill out the ir3 shader key. - */ - for (gl_shader_stage stage = MESA_SHADER_VERTEX; - stage <= MESA_SHADER_FRAGMENT; stage = (gl_shader_stage) (stage + 1)) { - if (shaders[stage]) - nir_shader_gather_info(shaders[stage], - nir_shader_get_entrypoint(shaders[stage])); - } -} - -static void -tu_shader_key_init(struct tu_shader_key *key, - const VkPipelineShaderStageCreateInfo *stage_info, - struct tu_device *dev) -{ - enum ir3_wavesize_option api_wavesize, real_wavesize; - if (!dev->physical_device->info->a6xx.supports_double_threadsize) { - api_wavesize = IR3_SINGLE_ONLY; - real_wavesize = IR3_SINGLE_ONLY; - } else if (stage_info) { - if (stage_info->flags & - VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT) { - api_wavesize = real_wavesize = IR3_SINGLE_OR_DOUBLE; - } else { - const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *size_info = - vk_find_struct_const(stage_info->pNext, - PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO); - - if (size_info) { - if (size_info->requiredSubgroupSize == dev->compiler->threadsize_base) { - api_wavesize = IR3_SINGLE_ONLY; - } else { - assert(size_info->requiredSubgroupSize == dev->compiler->threadsize_base * 2); - api_wavesize = IR3_DOUBLE_ONLY; - } - } else { - /* Match the exposed subgroupSize. */ - api_wavesize = IR3_DOUBLE_ONLY; - } - - if (stage_info->flags & - VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT) - real_wavesize = api_wavesize; - else if (api_wavesize == IR3_SINGLE_ONLY) - real_wavesize = IR3_SINGLE_ONLY; - else - real_wavesize = IR3_SINGLE_OR_DOUBLE; - } - } else { - api_wavesize = real_wavesize = IR3_SINGLE_OR_DOUBLE; - } - - key->api_wavesize = api_wavesize; - key->real_wavesize = real_wavesize; -} - static void tu_hash_stage(struct mesa_sha1 *ctx, const VkPipelineShaderStageCreateInfo *stage, @@ -1574,7 +1369,6 @@ tu_hash_shaders(unsigned char *hash, nir_shader *const *nir, const struct tu_pipeline_layout *layout, const struct tu_shader_key *keys, - const struct ir3_shader_key *ir3_key, VkGraphicsPipelineLibraryFlagsEXT state, const struct ir3_compiler *compiler) { @@ -1585,8 +1379,6 @@ tu_hash_shaders(unsigned char *hash, if (layout) _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1)); - _mesa_sha1_update(&ctx, ir3_key, sizeof(ir3_key)); - for (int i = 0; i < MESA_SHADER_STAGES; ++i) { if (stages[i] || nir[i]) { tu_hash_stage(&ctx, stages[i], nir[i], &keys[i]); @@ -1772,6 +1564,10 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder, builder->create_flags & VK_PIPELINE_CREATE_2_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR; + bool retain_nir = + builder->create_flags & + VK_PIPELINE_CREATE_2_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT; + int64_t pipeline_start = os_time_get_nano(); const VkPipelineCreationFeedbackCreateInfo *creation_feedback = @@ -1793,11 +1589,26 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder, struct tu_shader *shaders[ARRAY_SIZE(stage_infos)] = { NULL }; nir_shader *post_link_nir[ARRAY_SIZE(nir)] = { NULL }; char *nir_initial_disasm[ARRAY_SIZE(stage_infos)] = { NULL }; + bool cache_hit = false; struct tu_shader_key keys[ARRAY_SIZE(stage_infos)] = { }; for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < ARRAY_SIZE(keys); stage = (gl_shader_stage) (stage+1)) { - tu_shader_key_init(&keys[stage], stage_infos[stage], builder->device); + const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *subgroup_info = NULL; + if (stage_infos[stage]) + subgroup_info = vk_find_struct_const(stage_infos[stage], + PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO); + bool allow_varying_subgroup_size = + !stage_infos[stage] || + (stage_infos[stage]->flags & + VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT); + bool require_full_subgroups = + stage_infos[stage] && + (stage_infos[stage]->flags & + VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT); + tu_shader_key_subgroup_size(&keys[stage], allow_varying_subgroup_size, + require_full_subgroups, subgroup_info, + builder->device); } if (builder->create_flags & @@ -1817,9 +1628,6 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder, } } - struct ir3_shader_key ir3_key = {}; - tu_pipeline_shader_key_init(&ir3_key, pipeline, builder, nir); - struct tu_nir_shaders *nir_shaders = NULL; if (!must_compile) goto done; @@ -1833,23 +1641,52 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder, if (builder->state & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) { keys[MESA_SHADER_FRAGMENT].multiview_mask = builder->graphics_state.rp->view_mask; - keys[MESA_SHADER_FRAGMENT].force_sample_interp = ir3_key.sample_shading; keys[MESA_SHADER_FRAGMENT].fragment_density_map = builder->fragment_density_map; keys[MESA_SHADER_FRAGMENT].unscaled_input_fragcoord = builder->unscaled_input_fragcoord; + + const VkPipelineMultisampleStateCreateInfo *msaa_info = + builder->create_info->pMultisampleState; + + /* The 1.3.215 spec says: + * + * Sample shading can be used to specify a minimum number of unique + * samples to process for each fragment. If sample shading is enabled, + * an implementation must provide a minimum of + * + * max(ceil(minSampleShadingFactor * totalSamples), 1) + * + * unique associated data for each fragment, where + * minSampleShadingFactor is the minimum fraction of sample shading. + * + * The definition is pretty much the same as OpenGL's GL_SAMPLE_SHADING. + * They both require unique associated data. + * + * There are discussions to change the definition, such that + * sampleShadingEnable does not imply unique associated data. Before the + * discussions are settled and before apps (i.e., ANGLE) are fixed to + * follow the new and incompatible definition, we should stick to the + * current definition. + * + * Note that ir3_shader_key::sample_shading is not actually used by ir3, + * just checked in tu6_emit_fs_inputs. We will also copy the value to + * tu_shader_key::force_sample_interp in a bit. + */ + keys[MESA_SHADER_FRAGMENT].force_sample_interp = + !builder->rasterizer_discard && msaa_info && msaa_info->sampleShadingEnable; } unsigned char pipeline_sha1[20]; tu_hash_shaders(pipeline_sha1, stage_infos, nir, &builder->layout, keys, - &ir3_key, builder->state, compiler); + builder->state, compiler); unsigned char nir_sha1[21]; memcpy(nir_sha1, pipeline_sha1, sizeof(pipeline_sha1)); nir_sha1[20] = 'N'; if (!executable_info) { - bool cache_hit = true; + cache_hit = true; bool application_cache_hit = false; unsigned char shader_sha1[21]; @@ -1893,139 +1730,54 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder, pipeline_feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; } - - if (cache_hit) - goto done; - } - - if (builder->create_flags & - VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR) { - return VK_PIPELINE_COMPILE_REQUIRED; } - for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < ARRAY_SIZE(nir); - stage = (gl_shader_stage) (stage + 1)) { - const VkPipelineShaderStageCreateInfo *stage_info = stage_infos[stage]; - if (!stage_info) - continue; - - int64_t stage_start = os_time_get_nano(); + if (!cache_hit) { + if (builder->create_flags & + VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR) { + return VK_PIPELINE_COMPILE_REQUIRED; + } - nir[stage] = tu_spirv_to_nir(builder->device, builder->mem_ctx, stage_info, stage); - if (!nir[stage]) { - result = VK_ERROR_OUT_OF_HOST_MEMORY; + result = tu_compile_shaders(builder->device, + stage_infos, + nir, + keys, + &builder->layout, + pipeline_sha1, + shaders, + executable_info ? nir_initial_disasm : NULL, + pipeline->executables_mem_ctx, + retain_nir ? post_link_nir : NULL, + stage_feedbacks); + + if (result != VK_SUCCESS) goto fail; - } - stage_feedbacks[stage].flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT; - stage_feedbacks[stage].duration += os_time_get_nano() - stage_start; - } + if (retain_nir) { + nir_shaders = + tu_nir_shaders_init(builder->device, &nir_sha1, sizeof(nir_sha1)); + for (gl_shader_stage stage = MESA_SHADER_VERTEX; + stage < ARRAY_SIZE(nir); stage = (gl_shader_stage) (stage + 1)) { + if (!post_link_nir[stage]) + continue; - if (executable_info) { - for (gl_shader_stage stage = MESA_SHADER_VERTEX; - stage < ARRAY_SIZE(nir); - stage = (gl_shader_stage) (stage + 1)) { - if (!nir[stage]) - continue; + nir_shaders->nir[stage] = post_link_nir[stage]; + } - nir_initial_disasm[stage] = - nir_shader_as_str(nir[stage], pipeline->executables_mem_ctx); + nir_shaders = tu_nir_cache_insert(builder->cache, nir_shaders); } - } - - tu_link_shaders(builder, nir, ARRAY_SIZE(nir)); - if (builder->create_flags & - VK_PIPELINE_CREATE_2_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT) { - nir_shaders = - tu_nir_shaders_init(builder->device, &nir_sha1, sizeof(nir_sha1)); - for (gl_shader_stage stage = MESA_SHADER_VERTEX; - stage < ARRAY_SIZE(nir); stage = (gl_shader_stage) (stage + 1)) { + for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < ARRAY_SIZE(nir); + stage = (gl_shader_stage) (stage + 1)) { if (!nir[stage]) continue; - nir_shaders->nir[stage] = nir_shader_clone(NULL, nir[stage]); - } - - nir_shaders = tu_nir_cache_insert(builder->cache, nir_shaders); - } - - /* With pipelines, tessellation modes can be set on either shader, for - * compatibility with HLSL and GLSL, and the driver is supposed to merge - * them. Shader objects requires modes to be set on at least the TES except - * for OutputVertices which has to be set at least on the TCS. Make sure - * all modes are set on the TES when compiling together multiple shaders, - * and then from this point on we will use the modes in the TES (and output - * vertices on the TCS). - */ - if (nir[MESA_SHADER_TESS_EVAL]) { - nir_shader *tcs = nir[MESA_SHADER_TESS_CTRL]; - nir_shader *tes = nir[MESA_SHADER_TESS_EVAL]; - - if (tes->info.tess._primitive_mode == TESS_PRIMITIVE_UNSPECIFIED) - tes->info.tess._primitive_mode = tcs->info.tess._primitive_mode; - - tes->info.tess.point_mode |= tcs->info.tess.point_mode; - tes->info.tess.ccw |= tcs->info.tess.ccw; - - if (tes->info.tess.spacing == TESS_SPACING_UNSPECIFIED) { - tes->info.tess.spacing = tcs->info.tess.spacing; - } - - if (tcs->info.tess.tcs_vertices_out == 0) - tcs->info.tess.tcs_vertices_out = tes->info.tess.tcs_vertices_out; - - ir3_key.tessellation = tu6_get_tessmode(tes); - } - - for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < ARRAY_SIZE(nir); - stage = (gl_shader_stage) (stage + 1)) { - if (!nir[stage]) - continue; - - if (stage > MESA_SHADER_TESS_CTRL) { - if (stage == MESA_SHADER_FRAGMENT) { - ir3_key.tcs_store_primid = ir3_key.tcs_store_primid || - (nir[stage]->info.inputs_read & (1ull << VARYING_SLOT_PRIMITIVE_ID)); - } else { - ir3_key.tcs_store_primid = ir3_key.tcs_store_primid || - BITSET_TEST(nir[stage]->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID); - } - } - } - - /* In the the tess-but-not-FS case we don't know whether the FS will read - * PrimID so we need to unconditionally store it. - */ - if (nir[MESA_SHADER_TESS_CTRL] && !nir[MESA_SHADER_FRAGMENT]) - ir3_key.tcs_store_primid = true; - - - for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < ARRAY_SIZE(nir); - stage = (gl_shader_stage) (stage + 1)) { - if (!nir[stage] || shaders[stage]) - continue; - - int64_t stage_start = os_time_get_nano(); - - unsigned char shader_sha1[21]; - memcpy(shader_sha1, pipeline_sha1, sizeof(pipeline_sha1)); - shader_sha1[20] = (unsigned char) stage; - - result = tu_shader_create(builder->device, - &shaders[stage], nir[stage], &keys[stage], - &ir3_key, shader_sha1, sizeof(shader_sha1), - &builder->layout, executable_info); - if (result != VK_SUCCESS) { - goto fail; + shaders[stage] = tu_pipeline_cache_insert(builder->cache, shaders[stage]); } - - shaders[stage] = tu_pipeline_cache_insert(builder->cache, shaders[stage]); - - stage_feedbacks[stage].duration += os_time_get_nano() - stage_start; } done: + /* Create empty shaders which contain the draw states to initialize * registers for unused shader stages. */ @@ -2063,6 +1815,9 @@ done: } } + /* We may have deduplicated a cache entry, in which case our original + * post_link_nir may be gone. + */ if (nir_shaders) { for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < ARRAY_SIZE(nir); stage = (gl_shader_stage) (stage + 1)) { @@ -2113,7 +1868,6 @@ done: struct tu_graphics_lib_pipeline *library = tu_pipeline_to_graphics_lib(pipeline); library->nir_shaders = nir_shaders; - library->ir3_key = ir3_key; for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < ARRAY_SIZE(library->shaders); stage = (gl_shader_stage) (stage + 1)) { @@ -2143,13 +1897,6 @@ done: return VK_SUCCESS; fail: - for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < ARRAY_SIZE(nir); - stage = (gl_shader_stage) (stage + 1)) { - if (shaders[stage]) { - tu_shader_destroy(builder->device, shaders[stage]); - } - } - if (nir_shaders) vk_pipeline_cache_object_unref(&builder->device->vk, &nir_shaders->base); @@ -4287,7 +4034,18 @@ tu_compute_pipeline_create(VkDevice device, pipeline->base.active_stages = VK_SHADER_STAGE_COMPUTE_BIT; struct tu_shader_key key = { }; - tu_shader_key_init(&key, stage_info, dev); + bool allow_varying_subgroup_size = + (stage_info->flags & + VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT); + bool require_full_subgroups = + stage_info->flags & + VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT; + const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *subgroup_info = + vk_find_struct_const(stage_info, + PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO); + tu_shader_key_subgroup_size(&key, allow_varying_subgroup_size, + require_full_subgroups, subgroup_info, + dev); void *pipeline_mem_ctx = ralloc_context(NULL); diff --git a/src/freedreno/vulkan/tu_pipeline.h b/src/freedreno/vulkan/tu_pipeline.h index 523d025e6fe..a99675ccd4c 100644 --- a/src/freedreno/vulkan/tu_pipeline.h +++ b/src/freedreno/vulkan/tu_pipeline.h @@ -187,8 +187,6 @@ struct tu_graphics_lib_pipeline { struct tu_shader_key key; } shaders[MESA_SHADER_FRAGMENT + 1]; - struct ir3_shader_key ir3_key; - /* Used to stitch together an overall layout for the final pipeline. */ struct tu_descriptor_set_layout *layouts[MAX_SETS]; unsigned num_sets; diff --git a/src/freedreno/vulkan/tu_shader.cc b/src/freedreno/vulkan/tu_shader.cc index fd4118a04a8..2740e7aa9ea 100644 --- a/src/freedreno/vulkan/tu_shader.cc +++ b/src/freedreno/vulkan/tu_shader.cc @@ -8,6 +8,7 @@ #include "spirv/nir_spirv.h" #include "util/mesa-sha1.h" #include "nir/nir_xfb_info.h" +#include "vk_nir.h" #include "vk_nir_convert_ycbcr.h" #include "vk_pipeline.h" #include "vk_util.h" @@ -2449,6 +2450,273 @@ tu_shader_create(struct tu_device *dev, return VK_SUCCESS; } +static void +tu_link_shaders(nir_shader **shaders, unsigned shaders_count) +{ + nir_shader *consumer = NULL; + for (gl_shader_stage stage = (gl_shader_stage) (shaders_count - 1); + stage >= MESA_SHADER_VERTEX; stage = (gl_shader_stage) (stage - 1)) { + if (!shaders[stage]) + continue; + + nir_shader *producer = shaders[stage]; + if (!consumer) { + consumer = producer; + continue; + } + + if (nir_link_opt_varyings(producer, consumer)) { + NIR_PASS_V(consumer, nir_opt_constant_folding); + NIR_PASS_V(consumer, nir_opt_algebraic); + NIR_PASS_V(consumer, nir_opt_dce); + } + + const nir_remove_dead_variables_options out_var_opts = { + .can_remove_var = nir_vk_is_not_xfb_output, + }; + NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, &out_var_opts); + + NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL); + + bool progress = nir_remove_unused_varyings(producer, consumer); + + nir_compact_varyings(producer, consumer, true); + if (progress) { + if (nir_lower_global_vars_to_local(producer)) { + /* Remove dead writes, which can remove input loads */ + NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_temp, NULL); + NIR_PASS_V(producer, nir_opt_dce); + } + nir_lower_global_vars_to_local(consumer); + } + + consumer = producer; + } + + /* Gather info after linking so that we can fill out the ir3 shader key. + */ + for (gl_shader_stage stage = MESA_SHADER_VERTEX; + stage <= MESA_SHADER_FRAGMENT; stage = (gl_shader_stage) (stage + 1)) { + if (shaders[stage]) + nir_shader_gather_info(shaders[stage], + nir_shader_get_entrypoint(shaders[stage])); + } +} + +static uint32_t +tu6_get_tessmode(const struct nir_shader *shader) +{ + enum tess_primitive_mode primitive_mode = shader->info.tess._primitive_mode; + switch (primitive_mode) { + case TESS_PRIMITIVE_ISOLINES: + return IR3_TESS_ISOLINES; + case TESS_PRIMITIVE_TRIANGLES: + return IR3_TESS_TRIANGLES; + case TESS_PRIMITIVE_QUADS: + return IR3_TESS_QUADS; + case TESS_PRIMITIVE_UNSPECIFIED: + return IR3_TESS_NONE; + default: + unreachable("bad tessmode"); + } +} + +VkResult +tu_compile_shaders(struct tu_device *device, + const VkPipelineShaderStageCreateInfo **stage_infos, + nir_shader **nir, + const struct tu_shader_key *keys, + struct tu_pipeline_layout *layout, + const unsigned char *pipeline_sha1, + struct tu_shader **shaders, + char **nir_initial_disasm, + void *nir_initial_disasm_mem_ctx, + nir_shader **nir_out, + VkPipelineCreationFeedback *stage_feedbacks) +{ + struct ir3_shader_key ir3_key = {}; + VkResult result = VK_SUCCESS; + void *mem_ctx = ralloc_context(NULL); + + for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < MESA_SHADER_STAGES; + stage = (gl_shader_stage) (stage + 1)) { + const VkPipelineShaderStageCreateInfo *stage_info = stage_infos[stage]; + if (!stage_info) + continue; + + int64_t stage_start = os_time_get_nano(); + + nir[stage] = tu_spirv_to_nir(device, mem_ctx, stage_info, stage); + if (!nir[stage]) { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + goto fail; + } + + stage_feedbacks[stage].flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT; + stage_feedbacks[stage].duration += os_time_get_nano() - stage_start; + } + + if (nir[MESA_SHADER_GEOMETRY]) + ir3_key.has_gs = true; + + ir3_key.sample_shading = keys[MESA_SHADER_FRAGMENT].force_sample_interp; + + if (nir_initial_disasm) { + for (gl_shader_stage stage = MESA_SHADER_VERTEX; + stage < MESA_SHADER_STAGES; + stage = (gl_shader_stage) (stage + 1)) { + if (!nir[stage]) + continue; + + nir_initial_disasm[stage] = + nir_shader_as_str(nir[stage], nir_initial_disasm_mem_ctx); + } + } + + tu_link_shaders(nir, MESA_SHADER_STAGES); + + if (nir_out) { + for (gl_shader_stage stage = MESA_SHADER_VERTEX; + stage < MESA_SHADER_STAGES; stage = (gl_shader_stage) (stage + 1)) { + if (!nir[stage]) + continue; + + nir_out[stage] = nir_shader_clone(NULL, nir[stage]); + } + } + + /* With pipelines, tessellation modes can be set on either shader, for + * compatibility with HLSL and GLSL, and the driver is supposed to merge + * them. Shader objects requires modes to be set on at least the TES except + * for OutputVertices which has to be set at least on the TCS. Make sure + * all modes are set on the TES when compiling together multiple shaders, + * and then from this point on we will use the modes in the TES (and output + * vertices on the TCS). + */ + if (nir[MESA_SHADER_TESS_EVAL]) { + nir_shader *tcs = nir[MESA_SHADER_TESS_CTRL]; + nir_shader *tes = nir[MESA_SHADER_TESS_EVAL]; + + if (tes->info.tess._primitive_mode == TESS_PRIMITIVE_UNSPECIFIED) + tes->info.tess._primitive_mode = tcs->info.tess._primitive_mode; + + tes->info.tess.point_mode |= tcs->info.tess.point_mode; + tes->info.tess.ccw |= tcs->info.tess.ccw; + + if (tes->info.tess.spacing == TESS_SPACING_UNSPECIFIED) { + tes->info.tess.spacing = tcs->info.tess.spacing; + } + + if (tcs->info.tess.tcs_vertices_out == 0) + tcs->info.tess.tcs_vertices_out = tes->info.tess.tcs_vertices_out; + + ir3_key.tessellation = tu6_get_tessmode(tes); + } + + for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < MESA_SHADER_STAGES; + stage = (gl_shader_stage) (stage + 1)) { + if (!nir[stage]) + continue; + + if (stage > MESA_SHADER_TESS_CTRL) { + if (stage == MESA_SHADER_FRAGMENT) { + ir3_key.tcs_store_primid = ir3_key.tcs_store_primid || + (nir[stage]->info.inputs_read & (1ull << VARYING_SLOT_PRIMITIVE_ID)); + } else { + ir3_key.tcs_store_primid = ir3_key.tcs_store_primid || + BITSET_TEST(nir[stage]->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID); + } + } + } + + /* In the the tess-but-not-FS case we don't know whether the FS will read + * PrimID so we need to unconditionally store it. + */ + if (nir[MESA_SHADER_TESS_CTRL] && !nir[MESA_SHADER_FRAGMENT]) + ir3_key.tcs_store_primid = true; + + for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < MESA_SHADER_STAGES; + stage = (gl_shader_stage) (stage + 1)) { + if (!nir[stage] || shaders[stage]) + continue; + + int64_t stage_start = os_time_get_nano(); + + unsigned char shader_sha1[21]; + memcpy(shader_sha1, pipeline_sha1, 20); + shader_sha1[20] = (unsigned char) stage; + + result = tu_shader_create(device, + &shaders[stage], nir[stage], &keys[stage], + &ir3_key, shader_sha1, sizeof(shader_sha1), + layout, !!nir_initial_disasm); + if (result != VK_SUCCESS) { + goto fail; + } + + stage_feedbacks[stage].duration += os_time_get_nano() - stage_start; + } + + ralloc_free(mem_ctx); + + return VK_SUCCESS; + +fail: + ralloc_free(mem_ctx); + + for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < MESA_SHADER_STAGES; + stage = (gl_shader_stage) (stage + 1)) { + if (shaders[stage]) { + tu_shader_destroy(device, shaders[stage]); + } + if (nir_out && nir_out[stage]) { + ralloc_free(nir_out[stage]); + } + } + + return result; +} + +void +tu_shader_key_subgroup_size(struct tu_shader_key *key, + bool allow_varying_subgroup_size, + bool require_full_subgroups, + const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *subgroup_info, + struct tu_device *dev) +{ + enum ir3_wavesize_option api_wavesize, real_wavesize; + if (!dev->physical_device->info->a6xx.supports_double_threadsize) { + api_wavesize = IR3_SINGLE_ONLY; + real_wavesize = IR3_SINGLE_ONLY; + } else { + if (allow_varying_subgroup_size) { + api_wavesize = real_wavesize = IR3_SINGLE_OR_DOUBLE; + } else { + if (subgroup_info) { + if (subgroup_info->requiredSubgroupSize == dev->compiler->threadsize_base) { + api_wavesize = IR3_SINGLE_ONLY; + } else { + assert(subgroup_info->requiredSubgroupSize == dev->compiler->threadsize_base * 2); + api_wavesize = IR3_DOUBLE_ONLY; + } + } else { + /* Match the exposed subgroupSize. */ + api_wavesize = IR3_DOUBLE_ONLY; + } + + if (require_full_subgroups) + real_wavesize = api_wavesize; + else if (api_wavesize == IR3_SINGLE_ONLY) + real_wavesize = IR3_SINGLE_ONLY; + else + real_wavesize = IR3_SINGLE_OR_DOUBLE; + } + } + + key->api_wavesize = api_wavesize; + key->real_wavesize = real_wavesize; +} + static VkResult tu_empty_shader_create(struct tu_device *dev, struct tu_shader **shader_out, diff --git a/src/freedreno/vulkan/tu_shader.h b/src/freedreno/vulkan/tu_shader.h index eed38243e9d..495c42d962e 100644 --- a/src/freedreno/vulkan/tu_shader.h +++ b/src/freedreno/vulkan/tu_shader.h @@ -153,6 +153,26 @@ tu_shader_create(struct tu_device *dev, struct tu_pipeline_layout *layout, bool executable_info); +void +tu_shader_key_subgroup_size(struct tu_shader_key *key, + bool allow_varying_subgroup_size, + bool require_full_subgroups, + const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *subgroup_info, + struct tu_device *dev); + +VkResult +tu_compile_shaders(struct tu_device *device, + const VkPipelineShaderStageCreateInfo **stage_infos, + nir_shader **nir, + const struct tu_shader_key *keys, + struct tu_pipeline_layout *layout, + const unsigned char *pipeline_sha1, + struct tu_shader **shaders, + char **nir_initial_disasm, + void *nir_initial_disasm_mem_ctx, + nir_shader **nir_out, + VkPipelineCreationFeedback *stage_feedbacks); + VkResult tu_init_empty_shaders(struct tu_device *device);
