From: Connor Abbott <cwabbo...@gmail.com> These optimizations happened to work with derivatives, but they won't with upcoming shader_ballot and group_vote instructions.
Signed-off-by: Connor Abbott <cwabbo...@gmail.com> --- src/compiler/nir/nir_instr_set.c | 22 ++++++++++++++++++++++ src/compiler/nir/nir_opt_peephole_select.c | 11 +++++++++++ 2 files changed, 33 insertions(+) diff --git a/src/compiler/nir/nir_instr_set.c b/src/compiler/nir/nir_instr_set.c index 9cb9ed4..4bd0717 100644 --- a/src/compiler/nir/nir_instr_set.c +++ b/src/compiler/nir/nir_instr_set.c @@ -178,6 +178,14 @@ hash_instr(const void *data) const nir_instr *instr = data; uint32_t hash = _mesa_fnv32_1a_offset_bias; + /* + * In nir_instrs_equal(), we compare the instruction's basic blocks in this + * case. See the comment there for the explanation. + */ + if (nir_instr_is_cross_thread(instr) && !nir_instr_is_convergent(instr)) { + HASH(hash, instr->block); + } + switch (instr->type) { case nir_instr_type_alu: hash = hash_alu(hash, nir_instr_as_alu(instr)); @@ -256,6 +264,20 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2) if (instr1->type != instr2->type) return false; + /* + * If the instructions are cross-thread, then they must have the same + * execution mask. If they are convergent, then we can always replace one + * invocation with another since every invocation is guaranteed convergent. + * But not so for non-convergent instructions, since different invocations + * may be called with different execution maskes and therefore have + * different results. Conservatively enforce that the instructions are in + * the same basic block. + */ + if (nir_instr_is_cross_thread(instr1) && !nir_instr_is_convergent(instr1)) { + if (instr1->block != instr2->block) + return false; + } + switch (instr1->type) { case nir_instr_type_alu: { nir_alu_instr *alu1 = nir_instr_as_alu(instr1); diff --git a/src/compiler/nir/nir_opt_peephole_select.c b/src/compiler/nir/nir_opt_peephole_select.c index 4ca4f80..ce41781 100644 --- a/src/compiler/nir/nir_opt_peephole_select.c +++ b/src/compiler/nir/nir_opt_peephole_select.c @@ -61,6 +61,17 @@ static bool block_check_for_allowed_instrs(nir_block *block, unsigned *count, bool alu_ok) { nir_foreach_instr(instr, block) { + if (nir_instr_is_cross_thread(instr) && !nir_instr_is_convergent(instr)) { + /* If the instruction is cross-thread, then we can't execute it + * conditionally when we would've executed it unconditionally before, + * except when the condition is uniform. If the instruction is + * convergent, though, we're already guaranteed that the entire + * region is convergent (including the condition) so we can go ahead. + * + * TODO: allow when the if-condition is uniform + */ + return false; + } switch (instr->type) { case nir_instr_type_intrinsic: { nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); -- 2.9.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev