From: Marek Olšák <marek.ol...@amd.com>

---
 src/gallium/drivers/radeonsi/si_shader.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index d2b9b73e039..a0a00d722cb 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -6147,30 +6147,38 @@ static bool si_compile_tgsi_main(struct 
si_shader_context *ctx,
                    (ctx->type == PIPE_SHADER_TESS_EVAL ||
                     (ctx->type == PIPE_SHADER_VERTEX &&
                      !si_vs_needs_prolog(sel, &shader->key.part.vs.prolog)))) {
                        si_init_exec_from_input(ctx,
                                                ctx->param_merged_wave_info, 0);
                } else if (ctx->type == PIPE_SHADER_TESS_CTRL ||
                           ctx->type == PIPE_SHADER_GEOMETRY) {
                        if (!is_monolithic)
                                ac_init_exec_full_mask(&ctx->ac);
 
-                       /* The barrier must execute for all shaders in a
-                        * threadgroup.
-                        */
-                       si_llvm_emit_barrier(NULL, bld_base, NULL);
-
                        LLVMValueRef num_threads = si_unpack_param(ctx, 
ctx->param_merged_wave_info, 8, 8);
                        LLVMValueRef ena =
                                LLVMBuildICmp(ctx->ac.builder, LLVMIntULT,
                                            ac_get_thread_id(&ctx->ac), 
num_threads, "");
                        lp_build_if(&ctx->merged_wrap_if_state, &ctx->gallivm, 
ena);
+
+                       /* The barrier must execute for all shaders in a
+                        * threadgroup.
+                        *
+                        * Execute the barrier inside the conditional block,
+                        * so that empty waves can jump directly to s_endpgm,
+                        * which will also signal the barrier.
+                        *
+                        * If the shader is TCS and the TCS epilog is present
+                        * and contains a barrier, it will wait there and then
+                        * reach s_endpgm.
+                        */
+                       si_llvm_emit_barrier(NULL, bld_base, NULL);
                }
        }
 
        if (ctx->type == PIPE_SHADER_TESS_CTRL &&
            sel->tcs_info.tessfactors_are_def_in_all_invocs) {
                for (unsigned i = 0; i < 6; i++) {
                        ctx->invoc0_tess_factors[i] =
                                lp_build_alloca_undef(&ctx->gallivm, ctx->i32, 
"");
                }
        }
-- 
2.17.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to