Module: Mesa
Branch: main
Commit: 5e20fbd424543d2c919c8baae247f0d909659640
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=5e20fbd424543d2c919c8baae247f0d909659640

Author: Rhys Perry <[email protected]>
Date:   Fri Feb 11 19:19:45 2022 +0000

aco: don't move exec reads around exec writes

Fixes flickering and blocky plants in Jedi: Fallen Order.

Also fixes flickering squares in The Last of Us Part 1.

fossil-db (navi21):
Totals from 92 (0.07% of 135636) affected shaders:
Instrs: 35324 -> 35354 (+0.08%); split: -0.03%, +0.11%
CodeSize: 189568 -> 189668 (+0.05%); split: -0.03%, +0.08%
Latency: 345305 -> 346529 (+0.35%); split: -0.02%, +0.37%
InvThroughput: 78632 -> 78625 (-0.01%)
SClause: 1955 -> 1972 (+0.87%); split: -0.61%, +1.48%
Copies: 1311 -> 1304 (-0.53%); split: -0.69%, +0.15%

Signed-off-by: Rhys Perry <[email protected]>
Reviewed-by: Timur Kristóf <[email protected]>
Cc: mesa-stable
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8883
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8878
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22696>

---

 src/amd/compiler/aco_scheduler.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/amd/compiler/aco_scheduler.cpp 
b/src/amd/compiler/aco_scheduler.cpp
index 94429b34393..690773a863e 100644
--- a/src/amd/compiler/aco_scheduler.cpp
+++ b/src/amd/compiler/aco_scheduler.cpp
@@ -459,6 +459,7 @@ struct hazard_query {
    bool contains_spill;
    bool contains_sendmsg;
    bool uses_exec;
+   bool writes_exec;
    memory_event_set mem_events;
    unsigned aliasing_storage;      /* storage classes which are accessed 
(non-SMEM) */
    unsigned aliasing_storage_smem; /* storage classes which are accessed 
(SMEM) */
@@ -471,6 +472,7 @@ init_hazard_query(const sched_ctx& ctx, hazard_query* query)
    query->contains_spill = false;
    query->contains_sendmsg = false;
    query->uses_exec = false;
+   query->writes_exec = false;
    memset(&query->mem_events, 0, sizeof(query->mem_events));
    query->aliasing_storage = 0;
    query->aliasing_storage_smem = 0;
@@ -515,6 +517,10 @@ add_to_hazard_query(hazard_query* query, Instruction* 
instr)
       query->contains_spill = true;
    query->contains_sendmsg |= instr->opcode == aco_opcode::s_sendmsg;
    query->uses_exec |= needs_exec_mask(instr);
+   for (const Definition& def : instr->definitions) {
+      if (def.isFixed() && def.physReg() == exec)
+         query->writes_exec = true;
+   }
 
    memory_sync_info sync = get_sync_info_with_hack(instr);
 
@@ -560,6 +566,8 @@ perform_hazard_query(hazard_query* query, Instruction* 
instr, bool upwards)
             return hazard_fail_exec;
       }
    }
+   if (query->writes_exec && needs_exec_mask(instr))
+      return hazard_fail_exec;
 
    /* don't move exports so that they stay closer together */
    if (instr->isEXP())

Reply via email to