Module: Mesa Branch: main Commit: 8810c89b44bd123b4f084f66c9890d46a3292f12 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8810c89b44bd123b4f084f66c9890d46a3292f12
Author: Connor Abbott <[email protected]> Date: Tue Sep 5 16:15:42 2023 +0200 tu: Move FS-specific pipeline information to the shader Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25276> --- src/freedreno/vulkan/tu_cmd_buffer.cc | 30 ++++++++++++------ src/freedreno/vulkan/tu_device.h | 2 +- src/freedreno/vulkan/tu_lrz.cc | 6 ++-- src/freedreno/vulkan/tu_pipeline.cc | 47 +++------------------------- src/freedreno/vulkan/tu_pipeline.h | 12 ------- src/freedreno/vulkan/tu_shader.cc | 59 +++++++++++++++++++++++++++++++++-- src/freedreno/vulkan/tu_shader.h | 12 +++++++ 7 files changed, 98 insertions(+), 70 deletions(-) diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index e7168370368..bb8006d207e 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -2974,7 +2974,10 @@ tu_bind_gs(struct tu_cmd_buffer *cmd, struct tu_shader *gs) static void tu_bind_fs(struct tu_cmd_buffer *cmd, struct tu_shader *fs) { - cmd->state.shaders[MESA_SHADER_FRAGMENT] = fs; + if (cmd->state.shaders[MESA_SHADER_FRAGMENT] != fs) { + cmd->state.shaders[MESA_SHADER_FRAGMENT] = fs; + cmd->state.dirty |= TU_CMD_DIRTY_LRZ; + } } VKAPI_ATTR void VKAPI_CALL @@ -4457,8 +4460,11 @@ tu6_build_depth_plane_z_mode(struct tu_cmd_buffer *cmd, struct tu_cs *cs) bool depth_test_enable = cmd->vk.dynamic_graphics_state.ds.depth.test_enable; bool depth_write = tu6_writes_depth(cmd, depth_test_enable); bool stencil_write = tu6_writes_stencil(cmd); + const struct tu_shader *fs = cmd->state.shaders[MESA_SHADER_FRAGMENT]; + const struct tu_render_pass *pass = cmd->state.pass; + const struct tu_subpass *subpass = cmd->state.subpass; - if ((cmd->state.pipeline->base.lrz.fs.has_kill || + if ((fs->variant->has_kill || cmd->state.pipeline->feedback_loop_ds) && (depth_write || stencil_write)) { zmode = (cmd->state.lrz.valid && cmd->state.lrz.enabled) @@ -4466,15 +4472,19 @@ tu6_build_depth_plane_z_mode(struct tu_cmd_buffer *cmd, struct tu_cs *cs) : A6XX_LATE_Z; } - bool force_late_z = cmd->state.pipeline->base.lrz.force_late_z || + bool force_late_z = + (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED && + pass->attachments[subpass->depth_stencil_attachment.attachment].format + == VK_FORMAT_S8_UINT) || + fs->fs.lrz.force_late_z || /* alpha-to-coverage can behave like a discard. */ cmd->vk.dynamic_graphics_state.ms.alpha_to_coverage_enable; - if ((force_late_z && !cmd->state.pipeline->base.lrz.fs.force_early_z) || + if ((force_late_z && !fs->variant->fs.early_fragment_tests) || !depth_test_enable) zmode = A6XX_LATE_Z; /* User defined early tests take precedence above all else */ - if (cmd->state.pipeline->base.lrz.fs.early_fragment_tests) + if (fs->variant->fs.early_fragment_tests) zmode = A6XX_EARLY_Z; tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_DEPTH_PLANE_CNTL, 1); @@ -4544,11 +4554,11 @@ tu6_emit_fs_params(struct tu_cmd_buffer *cmd) return; } - struct tu_graphics_pipeline *pipeline = cmd->state.pipeline; + struct tu_shader *fs = cmd->state.shaders[MESA_SHADER_FRAGMENT]; unsigned num_units = fs_params_size(cmd); - if (pipeline->has_fdm) + if (fs->fs.has_fdm) tu_cs_set_writeable(&cmd->sub_cs, true); struct tu_cs cs; @@ -4569,7 +4579,7 @@ tu6_emit_fs_params(struct tu_cmd_buffer *cmd) tu_cs_emit(&cs, 0); STATIC_ASSERT(IR3_DP_FS_FRAG_INVOCATION_COUNT == IR3_DP_FS_DYNAMIC); - tu_cs_emit(&cs, pipeline->base.fs.per_samp ? + tu_cs_emit(&cs, fs->fs.per_samp ? cmd->vk.dynamic_graphics_state.ms.rasterization_samples : 1); tu_cs_emit(&cs, 0); tu_cs_emit(&cs, 0); @@ -4578,7 +4588,7 @@ tu6_emit_fs_params(struct tu_cmd_buffer *cmd) STATIC_ASSERT(IR3_DP_FS_FRAG_SIZE == IR3_DP_FS_DYNAMIC + 4); STATIC_ASSERT(IR3_DP_FS_FRAG_OFFSET == IR3_DP_FS_DYNAMIC + 6); if (num_units > 1) { - if (pipeline->has_fdm) { + if (fs->fs.has_fdm) { struct apply_fs_params_state state = { .num_consts = num_units - 1, }; @@ -4596,7 +4606,7 @@ tu6_emit_fs_params(struct tu_cmd_buffer *cmd) cmd->state.fs_params = tu_cs_end_draw_state(&cmd->sub_cs, &cs); - if (pipeline->has_fdm) + if (fs->fs.has_fdm) tu_cs_set_writeable(&cmd->sub_cs, false); } diff --git a/src/freedreno/vulkan/tu_device.h b/src/freedreno/vulkan/tu_device.h index 286789af6e8..4878823fc47 100644 --- a/src/freedreno/vulkan/tu_device.h +++ b/src/freedreno/vulkan/tu_device.h @@ -287,7 +287,7 @@ struct tu_device struct ir3_shader *global_shaders[GLOBAL_SH_COUNT]; uint64_t global_shader_va[GLOBAL_SH_COUNT]; - struct tu_shader *empty_tcs, *empty_tes, *empty_gs, *empty_fs; + struct tu_shader *empty_tcs, *empty_tes, *empty_gs, *empty_fs, *empty_fs_fdm; uint32_t vsc_draw_strm_pitch; uint32_t vsc_prim_strm_pitch; diff --git a/src/freedreno/vulkan/tu_lrz.cc b/src/freedreno/vulkan/tu_lrz.cc index 2744e6bfdf5..3c8baf1e9d1 100644 --- a/src/freedreno/vulkan/tu_lrz.cc +++ b/src/freedreno/vulkan/tu_lrz.cc @@ -559,6 +559,7 @@ tu6_calculate_lrz_state(struct tu_cmd_buffer *cmd, const uint32_t a) { struct tu_pipeline *pipeline = &cmd->state.pipeline->base; + const struct tu_shader *fs = cmd->state.shaders[MESA_SHADER_FRAGMENT]; bool z_test_enable = cmd->vk.dynamic_graphics_state.ds.depth.test_enable; bool z_write_enable = cmd->vk.dynamic_graphics_state.ds.depth.write_enable; bool z_bounds_enable = cmd->vk.dynamic_graphics_state.ds.depth.bounds_test.enable; @@ -587,7 +588,8 @@ tu6_calculate_lrz_state(struct tu_cmd_buffer *cmd, gras_lrz_cntl.enable = true; gras_lrz_cntl.lrz_write = z_write_enable && - !(pipeline->lrz.lrz_status & TU_LRZ_FORCE_DISABLE_WRITE); + !(pipeline->lrz.lrz_status & TU_LRZ_FORCE_DISABLE_WRITE) && + !(fs->fs.lrz.status & TU_LRZ_FORCE_DISABLE_WRITE); gras_lrz_cntl.z_test_enable = z_write_enable; gras_lrz_cntl.z_bounds_enable = z_bounds_enable; gras_lrz_cntl.fc_enable = cmd->state.lrz.fast_clear; @@ -608,7 +610,7 @@ tu6_calculate_lrz_state(struct tu_cmd_buffer *cmd, * fragment tests. We have to skip LRZ testing and updating, but as long as * the depth direction stayed the same we can continue with LRZ testing later. */ - if (pipeline->lrz.lrz_status & TU_LRZ_FORCE_DISABLE_LRZ) { + if (fs->fs.lrz.status & TU_LRZ_FORCE_DISABLE_LRZ) { if (cmd->state.lrz.prev_direction != TU_LRZ_UNKNOWN || !cmd->state.lrz.gpu_dir_tracking) { perf_debug(cmd->device, "Skipping LRZ due to FS"); temporary_disable_lrz = true; diff --git a/src/freedreno/vulkan/tu_pipeline.cc b/src/freedreno/vulkan/tu_pipeline.cc index d1be0cf9b4f..86c5d7cc2a6 100644 --- a/src/freedreno/vulkan/tu_pipeline.cc +++ b/src/freedreno/vulkan/tu_pipeline.cc @@ -933,29 +933,6 @@ tu6_emit_vpc(struct tu_cs *cs, } TU_GENX(tu6_emit_vpc); -static void -tu_emit_fs_pipeline(const struct ir3_shader_variant *fs, - struct tu_pipeline *pipeline) -{ - if (fs->has_kill) { - pipeline->lrz.lrz_status |= TU_LRZ_FORCE_DISABLE_WRITE; - } - if (fs->no_earlyz || fs->writes_pos) { - pipeline->lrz.lrz_status = TU_LRZ_FORCE_DISABLE_LRZ; - } - pipeline->lrz.fs.has_kill = fs->has_kill; - pipeline->lrz.fs.early_fragment_tests = fs->fs.early_fragment_tests; - - if (!fs->fs.early_fragment_tests && - (fs->no_earlyz || fs->writes_pos || fs->writes_stencilref || fs->writes_smask)) { - pipeline->lrz.force_late_z = true; - } - - pipeline->lrz.fs.force_early_z = fs->fs.early_fragment_tests; - - pipeline->fs.per_samp = fs->per_samp || fs->key.sample_shading; -} - static void tu6_emit_vs_params(struct tu_cs *cs, const struct ir3_const_state *const_state, @@ -2024,7 +2001,9 @@ done: if (builder->state & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) { if (!shaders[MESA_SHADER_FRAGMENT]) { - shaders[MESA_SHADER_FRAGMENT] = builder->device->empty_fs; + shaders[MESA_SHADER_FRAGMENT] = + builder->fragment_density_map ? + builder->device->empty_fs_fdm : builder->device->empty_fs; vk_pipeline_cache_object_ref(&shaders[MESA_SHADER_FRAGMENT]->base); } } @@ -2177,10 +2156,7 @@ tu_pipeline_builder_parse_libraries(struct tu_pipeline_builder *builder, if (library->state & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) { pipeline->ds = library->base.ds; - pipeline->fs = library->base.fs; - pipeline->lrz.fs = library->base.lrz.fs; pipeline->lrz.lrz_status |= library->base.lrz.lrz_status; - pipeline->lrz.force_late_z |= library->base.lrz.force_late_z; pipeline->shared_consts = library->base.shared_consts; } @@ -2188,7 +2164,6 @@ tu_pipeline_builder_parse_libraries(struct tu_pipeline_builder *builder, VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) { pipeline->output = library->base.output; pipeline->lrz.lrz_status |= library->base.lrz.lrz_status; - pipeline->lrz.force_late_z |= library->base.lrz.force_late_z; pipeline->prim_order = library->base.prim_order; } @@ -2422,8 +2397,6 @@ tu_pipeline_builder_parse_shader_stages(struct tu_pipeline_builder *builder, !last_shader->writes_viewport && builder->fragment_density_map && builder->device->physical_device->info->a6xx.has_per_view_viewport; - - tu_emit_fs_pipeline(fs, pipeline); } static const enum mesa_vk_dynamic_graphics_state tu_vertex_input_state[] = { @@ -3608,7 +3581,7 @@ tu_emit_draw_state(struct tu_cmd_buffer *cmd) #define DRAW_STATE_FDM(name, id, ...) \ if ((EMIT_STATE(name) || (cmd->state.dirty & TU_CMD_DIRTY_FDM)) && \ !(cmd->state.pipeline->base.set_state_mask & (1u << id))) { \ - if (cmd->state.pipeline->has_fdm) { \ + if (cmd->state.shaders[MESA_SHADER_FRAGMENT]->fs.has_fdm) { \ tu_cs_set_writeable(&cmd->sub_cs, true); \ tu6_emit_##name##_fdm(&cs, cmd, __VA_ARGS__); \ cmd->state.dynamic_state[id] = \ @@ -3727,14 +3700,6 @@ tu_pipeline_builder_parse_depth_stencil( (VK_PIPELINE_DEPTH_STENCIL_STATE_CREATE_RASTERIZATION_ORDER_ATTACHMENT_DEPTH_ACCESS_BIT_ARM | VK_PIPELINE_DEPTH_STENCIL_STATE_CREATE_RASTERIZATION_ORDER_ATTACHMENT_STENCIL_ACCESS_BIT_ARM); } - - /* FDM isn't compatible with LRZ, because the LRZ image uses the original - * resolution and we would need to use the low resolution. - * - * TODO: Use a patchpoint to only disable LRZ for scaled bins. - */ - if (builder->fragment_density_map) - pipeline->lrz.lrz_status = TU_LRZ_FORCE_DISABLE_LRZ; } static void @@ -3768,9 +3733,6 @@ tu_pipeline_builder_parse_multisample_and_color_blend( VK_IMAGE_ASPECT_COLOR_BIT) ? builder->create_info->pColorBlendState : &dummy_blend_info; - pipeline->lrz.force_late_z |= - builder->graphics_state.rp->depth_attachment_format == VK_FORMAT_S8_UINT; - if (builder->graphics_state.rp->attachment_aspects & VK_IMAGE_ASPECT_COLOR_BIT) { pipeline->output.raster_order_attachment_access = blend_info->flags & @@ -4017,7 +3979,6 @@ tu_pipeline_builder_build(struct tu_pipeline_builder *builder, (gfx_pipeline->feedback_loop_color || gfx_pipeline->feedback_loop_ds) && !builder->graphics_state.rp->feedback_loop_input_only; - gfx_pipeline->has_fdm = builder->fragment_density_map; } return VK_SUCCESS; diff --git a/src/freedreno/vulkan/tu_pipeline.h b/src/freedreno/vulkan/tu_pipeline.h index 59b1c106722..f5b57704c92 100644 --- a/src/freedreno/vulkan/tu_pipeline.h +++ b/src/freedreno/vulkan/tu_pipeline.h @@ -40,13 +40,6 @@ struct tu_lrz_pipeline { uint32_t lrz_status; - struct { - bool has_kill; - bool force_early_z; - bool early_fragment_tests; - } fs; - - bool force_late_z; bool blend_valid; }; @@ -150,10 +143,6 @@ struct tu_pipeline struct tu_shader *shaders[MESA_SHADER_STAGES]; - struct { - bool per_samp; - } fs; - struct { struct tu_draw_state config_state; @@ -222,7 +211,6 @@ struct tu_graphics_pipeline { bool feedback_loop_color, feedback_loop_ds; bool feedback_loop_may_involve_textures; - bool has_fdm; }; struct tu_compute_pipeline { diff --git a/src/freedreno/vulkan/tu_shader.cc b/src/freedreno/vulkan/tu_shader.cc index b3f9b3163ac..dc62737b264 100644 --- a/src/freedreno/vulkan/tu_shader.cc +++ b/src/freedreno/vulkan/tu_shader.cc @@ -18,6 +18,7 @@ #include "tu_device.h" #include "tu_descriptor_set.h" #include "tu_pipeline.h" +#include "tu_lrz.h" nir_shader * tu_spirv_to_nir(struct tu_device *dev, @@ -2095,6 +2096,14 @@ tu_shader_serialize(struct vk_pipeline_cache_object *object, blob_write_uint8(blob, 0); } + switch (shader->variant->type) { + case MESA_SHADER_FRAGMENT: + blob_write_bytes(blob, &shader->fs, sizeof(shader->fs)); + break; + default: + break; + } + return true; } @@ -2122,6 +2131,14 @@ tu_shader_deserialize(struct vk_pipeline_cache *cache, if (has_safe_const) shader->safe_const_variant = ir3_retrieve_variant(blob, dev->compiler, NULL); + switch (shader->variant->type) { + case MESA_SHADER_FRAGMENT: + blob_copy_bytes(blob, &shader->fs, sizeof(shader->fs)); + break; + default: + break; + } + VkResult result = tu_upload_shader(dev, shader); if (result != VK_SUCCESS) { vk_free(&dev->vk.alloc, shader); @@ -2279,6 +2296,32 @@ tu_shader_create(struct tu_device *dev, shader->view_mask = key->multiview_mask; + switch (shader->variant->type) { + case MESA_SHADER_FRAGMENT: { + const struct ir3_shader_variant *fs = shader->variant; + shader->fs.per_samp = fs->per_samp || ir3_key->sample_shading; + shader->fs.has_fdm = key->fragment_density_map; + if (fs->has_kill) + shader->fs.lrz.status |= TU_LRZ_FORCE_DISABLE_WRITE; + if (fs->no_earlyz || fs->writes_pos) + shader->fs.lrz.status = TU_LRZ_FORCE_DISABLE_LRZ; + /* FDM isn't compatible with LRZ, because the LRZ image uses the original + * resolution and we would need to use the low resolution. + * + * TODO: Use a patchpoint to only disable LRZ for scaled bins. + */ + if (key->fragment_density_map) + shader->fs.lrz.status = TU_LRZ_FORCE_DISABLE_LRZ; + if (!fs->fs.early_fragment_tests && + (fs->no_earlyz || fs->writes_pos || fs->writes_stencilref || fs->writes_smask)) { + shader->fs.lrz.force_late_z = true; + } + break; + } + default: + break; + } + VkResult result = tu_upload_shader(dev, shader); if (result != VK_SUCCESS) { vk_free(&dev->vk.alloc, shader); @@ -2323,7 +2366,8 @@ tu_empty_shader_create(struct tu_device *dev, } static VkResult -tu_empty_fs_create(struct tu_device *dev, struct tu_shader **shader) +tu_empty_fs_create(struct tu_device *dev, struct tu_shader **shader, + bool fragment_density_map) { struct ir3_shader_key key = {}; const struct ir3_shader_options options = {}; @@ -2339,6 +2383,10 @@ tu_empty_fs_create(struct tu_device *dev, struct tu_shader **shader) if (!*shader) return VK_ERROR_OUT_OF_HOST_MEMORY; + (*shader)->fs.has_fdm = fragment_density_map; + if (fragment_density_map) + (*shader)->fs.lrz.status = TU_LRZ_FORCE_DISABLE_LRZ; + struct ir3_shader *ir3_shader = ir3_shader_from_nir(dev->compiler, fs_b.shader, &options, &so_info); (*shader)->variant = ir3_shader_create_variant(ir3_shader, &key, false); @@ -2363,7 +2411,11 @@ tu_init_empty_shaders(struct tu_device *dev) if (result != VK_SUCCESS) goto out; - result = tu_empty_fs_create(dev, &dev->empty_fs); + result = tu_empty_fs_create(dev, &dev->empty_fs, false); + if (result != VK_SUCCESS) + goto out; + + result = tu_empty_fs_create(dev, &dev->empty_fs_fdm, true); if (result != VK_SUCCESS) goto out; @@ -2378,6 +2430,8 @@ out: vk_pipeline_cache_object_unref(&dev->vk, &dev->empty_gs->base); if (dev->empty_fs) vk_pipeline_cache_object_unref(&dev->vk, &dev->empty_fs->base); + if (dev->empty_fs_fdm) + vk_pipeline_cache_object_unref(&dev->vk, &dev->empty_fs_fdm->base); return result; } @@ -2388,6 +2442,7 @@ tu_destroy_empty_shaders(struct tu_device *dev) vk_pipeline_cache_object_unref(&dev->vk, &dev->empty_tes->base); vk_pipeline_cache_object_unref(&dev->vk, &dev->empty_gs->base); vk_pipeline_cache_object_unref(&dev->vk, &dev->empty_fs->base); + vk_pipeline_cache_object_unref(&dev->vk, &dev->empty_fs_fdm->base); } void diff --git a/src/freedreno/vulkan/tu_shader.h b/src/freedreno/vulkan/tu_shader.h index a89ebe07d9d..15e5e9df858 100644 --- a/src/freedreno/vulkan/tu_shader.h +++ b/src/freedreno/vulkan/tu_shader.h @@ -62,6 +62,18 @@ struct tu_shader struct tu_const_state const_state; uint32_t view_mask; uint8_t active_desc_sets; + + union { + struct { + bool per_samp; + bool has_fdm; + + struct { + uint32_t status; + bool force_late_z; + } lrz; + } fs; + }; }; struct tu_shader_key {
