From: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_shader.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index d2b9b73e039..a0a00d722cb 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -6147,30 +6147,38 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx, (ctx->type == PIPE_SHADER_TESS_EVAL || (ctx->type == PIPE_SHADER_VERTEX && !si_vs_needs_prolog(sel, &shader->key.part.vs.prolog)))) { si_init_exec_from_input(ctx, ctx->param_merged_wave_info, 0); } else if (ctx->type == PIPE_SHADER_TESS_CTRL || ctx->type == PIPE_SHADER_GEOMETRY) { if (!is_monolithic) ac_init_exec_full_mask(&ctx->ac); - /* The barrier must execute for all shaders in a - * threadgroup. - */ - si_llvm_emit_barrier(NULL, bld_base, NULL); - LLVMValueRef num_threads = si_unpack_param(ctx, ctx->param_merged_wave_info, 8, 8); LLVMValueRef ena = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, ac_get_thread_id(&ctx->ac), num_threads, ""); lp_build_if(&ctx->merged_wrap_if_state, &ctx->gallivm, ena); + + /* The barrier must execute for all shaders in a + * threadgroup. + * + * Execute the barrier inside the conditional block, + * so that empty waves can jump directly to s_endpgm, + * which will also signal the barrier. + * + * If the shader is TCS and the TCS epilog is present + * and contains a barrier, it will wait there and then + * reach s_endpgm. + */ + si_llvm_emit_barrier(NULL, bld_base, NULL); } } if (ctx->type == PIPE_SHADER_TESS_CTRL && sel->tcs_info.tessfactors_are_def_in_all_invocs) { for (unsigned i = 0; i < 6; i++) { ctx->invoc0_tess_factors[i] = lp_build_alloca_undef(&ctx->gallivm, ctx->i32, ""); } } -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev