Module: Mesa Branch: master Commit: 5f1b3544729178715a1ed0714bd1029737089824 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5f1b3544729178715a1ed0714bd1029737089824
Author: Rhys Perry <[email protected]> Date: Thu Feb 25 15:37:17 2021 +0000 aco: calculate all p_as_uniform and v_readfirstlane_b32 sources in WQM We should avoid a situation where a v_readfirstlane_b32 is in WQM but it's source is calculated in Exact. Fixes hang when running Assassin's Creed: Valhalla benchmark. fossil-db (GFX10.3): Totals from 1021 (0.70% of 146267) affected shaders: CodeSize: 7835228 -> 7842992 (+0.10%); split: -0.00%, +0.10% Instrs: 1519208 -> 1521149 (+0.13%); split: -0.00%, +0.13% SClause: 78921 -> 78920 (-0.00%) Copies: 44456 -> 45421 (+2.17%); split: -0.05%, +2.22% Branches: 12987 -> 13933 (+7.28%) PreSGPRs: 47599 -> 47813 (+0.45%) Cycles: 10037540 -> 10045304 (+0.08%); split: -0.00%, +0.08% VMEM: 538381 -> 538777 (+0.07%); split: +0.11%, -0.03% SMEM: 84553 -> 84554 (+0.00%); split: +0.01%, -0.01% Signed-off-by: Rhys Perry <[email protected]> Reviewed-by: Daniel Schürmann <[email protected]> Cc: mesa-stable Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9288> --- src/amd/compiler/aco_insert_exec_mask.cpp | 3 ++- src/amd/compiler/aco_instruction_selection.cpp | 8 ++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/amd/compiler/aco_insert_exec_mask.cpp b/src/amd/compiler/aco_insert_exec_mask.cpp index ea6e1218a90..49367e39830 100644 --- a/src/amd/compiler/aco_insert_exec_mask.cpp +++ b/src/amd/compiler/aco_insert_exec_mask.cpp @@ -143,7 +143,8 @@ void get_block_needs(wqm_ctx &ctx, exec_ctx &exec_ctx, Block* block) aco_ptr<Instruction>& instr = block->instructions[i]; WQMState needs = needs_exact(instr) ? Exact : Unspecified; - bool propagate_wqm = instr->opcode == aco_opcode::p_wqm; + bool propagate_wqm = instr->opcode == aco_opcode::p_wqm || + instr->opcode == aco_opcode::p_as_uniform; bool preserve_wqm = instr->opcode == aco_opcode::p_discard_if; bool pred_by_exec = needs_exec_mask(instr.get()); for (const Definition& definition : instr->definitions) { diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index c2c4ed868d4..2422d46177f 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -736,8 +736,10 @@ Temp convert_pointer_to_64_bit(isel_context *ctx, Temp ptr, bool non_uniform=fal if (ptr.size() == 2) return ptr; Builder bld(ctx->program, ctx->block); - if (ptr.type() == RegType::vgpr && !non_uniform) + if (ptr.type() == RegType::vgpr && !non_uniform) { ptr = bld.vop1(aco_opcode::v_readfirstlane_b32, bld.def(s1), ptr); + ptr = emit_wqm(bld, ptr); + } return bld.pseudo(aco_opcode::p_create_vector, bld.def(RegClass(ptr.type(), 2)), ptr, Operand((unsigned)ctx->options->address32_hi)); } @@ -5696,8 +5698,10 @@ Temp get_sampler_desc(isel_context *ctx, nir_deref_instr *deref_instr, constant_index += array_size * const_value->u32; } else { Temp indirect = get_ssa_temp(ctx, deref_instr->arr.index.ssa); - if (indirect.type() == RegType::vgpr) + if (indirect.type() == RegType::vgpr) { indirect = bld.vop1(aco_opcode::v_readfirstlane_b32, bld.def(s1), indirect); + indirect = emit_wqm(bld, indirect); + } if (array_size != 1) indirect = bld.sop2(aco_opcode::s_mul_i32, bld.def(s1), Operand(array_size), indirect); _______________________________________________ mesa-commit mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-commit
