Module: Mesa
Branch: main
Commit: 484df5b30e815ffb8b8664580ef1b79d91a4d970
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=484df5b30e815ffb8b8664580ef1b79d91a4d970

Author: Rhys Perry <[email protected]>
Date:   Thu Aug 25 17:32:49 2022 +0100

aco: add search_backwards helper

This will be useful for VALUPartialForwardingHazard.

Signed-off-by: Rhys Perry <[email protected]>
Reviewed-by: Daniel Schürmann <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18273>

---

 src/amd/compiler/aco_insert_NOPs.cpp | 122 ++++++++++++++++++++++-------------
 1 file changed, 77 insertions(+), 45 deletions(-)

diff --git a/src/amd/compiler/aco_insert_NOPs.cpp 
b/src/amd/compiler/aco_insert_NOPs.cpp
index a932efe2105..f4eb8165483 100644
--- a/src/amd/compiler/aco_insert_NOPs.cpp
+++ b/src/amd/compiler/aco_insert_NOPs.cpp
@@ -210,16 +210,72 @@ regs_intersect(PhysReg a_reg, unsigned a_size, PhysReg 
b_reg, unsigned b_size)
    return a_reg > b_reg ? (a_reg - b_reg < b_size) : (b_reg - a_reg < a_size);
 }
 
+template <typename GlobalState, typename BlockState,
+          bool (*block_cb)(GlobalState&, BlockState&, Block*),
+          bool (*instr_cb)(GlobalState&, BlockState&, aco_ptr<Instruction>&)>
+void
+search_backwards_internal(State& state, GlobalState& global_state, BlockState 
block_state,
+                          Block* block, bool start_at_end)
+{
+   if (block == state.block && start_at_end) {
+      /* If it's the current block, block->instructions is incomplete. */
+      for (int pred_idx = state.old_instructions.size() - 1; pred_idx >= 0; 
pred_idx--) {
+         aco_ptr<Instruction>& instr = state.old_instructions[pred_idx];
+         if (!instr)
+            break; /* Instruction has been moved to block->instructions. */
+         if (instr_cb(global_state, block_state, instr))
+            return;
+      }
+   }
+
+   for (int pred_idx = block->instructions.size() - 1; pred_idx >= 0; 
pred_idx--) {
+      if (instr_cb(global_state, block_state, block->instructions[pred_idx]))
+         return;
+   }
+
+PRAGMA_DIAGNOSTIC_PUSH
+PRAGMA_DIAGNOSTIC_IGNORED(-Waddress)
+   if (block_cb != nullptr && !block_cb(global_state, block_state, block))
+      return;
+PRAGMA_DIAGNOSTIC_POP
+
+   for (unsigned lin_pred : block->linear_preds) {
+      search_backwards_internal<GlobalState, BlockState, block_cb, instr_cb>(
+         state, global_state, block_state, &state.program->blocks[lin_pred], 
true);
+   }
+}
+
+template <typename GlobalState, typename BlockState,
+          bool (*block_cb)(GlobalState&, BlockState&, Block*),
+          bool (*instr_cb)(GlobalState&, BlockState&, aco_ptr<Instruction>&)>
+void
+search_backwards(State& state, GlobalState& global_state, BlockState& 
block_state)
+{
+   search_backwards_internal<GlobalState, BlockState, block_cb, instr_cb>(
+      state, global_state, block_state, state.block, false);
+}
+
+struct HandleRawHazardGlobalState {
+   PhysReg reg;
+   int nops_needed;
+};
+
+struct HandleRawHazardBlockState {
+   uint32_t mask;
+   int nops_needed;
+};
+
 template <bool Valu, bool Vintrp, bool Salu>
 bool
-handle_raw_hazard_instr(aco_ptr<Instruction>& pred, PhysReg reg, int* 
nops_needed, uint32_t* mask)
+handle_raw_hazard_instr(HandleRawHazardGlobalState& global_state,
+                        HandleRawHazardBlockState& block_state, 
aco_ptr<Instruction>& pred)
 {
-   unsigned mask_size = util_last_bit(*mask);
+   unsigned mask_size = util_last_bit(block_state.mask);
 
    uint32_t writemask = 0;
    for (Definition& def : pred->definitions) {
-      if (regs_intersect(reg, mask_size, def.physReg(), def.size())) {
-         unsigned start = def.physReg() > reg ? def.physReg() - reg : 0;
+      if (regs_intersect(global_state.reg, mask_size, def.physReg(), 
def.size())) {
+         unsigned start = def.physReg() > global_state.reg ? def.physReg() - 
global_state.reg : 0;
          unsigned end = MIN2(mask_size, start + def.size());
          writemask |= u_bit_consecutive(start, end - start);
       }
@@ -227,61 +283,37 @@ handle_raw_hazard_instr(aco_ptr<Instruction>& pred, 
PhysReg reg, int* nops_neede
 
    bool is_hazard = writemask != 0 && ((pred->isVALU() && Valu) || 
(pred->isVINTRP() && Vintrp) ||
                                        (pred->isSALU() && Salu));
-   if (is_hazard)
+   if (is_hazard) {
+      global_state.nops_needed = MAX2(global_state.nops_needed, 
block_state.nops_needed);
       return true;
+   }
 
-   *mask &= ~writemask;
-   *nops_needed = MAX2(*nops_needed - get_wait_states(pred), 0);
+   block_state.mask &= ~writemask;
+   block_state.nops_needed = MAX2(block_state.nops_needed - 
get_wait_states(pred), 0);
 
-   if (*mask == 0)
-      *nops_needed = 0;
+   if (block_state.mask == 0)
+      block_state.nops_needed = 0;
 
-   return *nops_needed == 0;
+   return block_state.nops_needed == 0;
 }
 
 template <bool Valu, bool Vintrp, bool Salu>
-int
-handle_raw_hazard_internal(State& state, Block* block, int nops_needed, 
PhysReg reg, uint32_t mask,
-                           bool start_at_end)
+void
+handle_raw_hazard(State& state, int* NOPs, int min_states, Operand op)
 {
-   if (block == state.block && start_at_end) {
-      /* If it's the current block, block->instructions is incomplete. */
-      for (int pred_idx = state.old_instructions.size() - 1; pred_idx >= 0; 
pred_idx--) {
-         aco_ptr<Instruction>& instr = state.old_instructions[pred_idx];
-         if (!instr)
-            break; /* Instruction has been moved to block->instructions. */
-         if (handle_raw_hazard_instr<Valu, Vintrp, Salu>(instr, reg, 
&nops_needed, &mask))
-            return nops_needed;
-      }
-   }
-   for (int pred_idx = block->instructions.size() - 1; pred_idx >= 0; 
pred_idx--) {
-      if (handle_raw_hazard_instr<Valu, Vintrp, 
Salu>(block->instructions[pred_idx], reg,
-                                                      &nops_needed, &mask))
-         return nops_needed;
-   }
+   if (*NOPs >= min_states)
+      return;
 
-   int res = 0;
+   HandleRawHazardGlobalState global = {op.physReg(), 0};
+   HandleRawHazardBlockState block = {u_bit_consecutive(0, op.size()), 
min_states};
 
    /* Loops require branch instructions, which count towards the wait
     * states. So even with loops this should finish unless nops_needed is some
     * huge value. */
-   for (unsigned lin_pred : block->linear_preds) {
-      res =
-         std::max(res, handle_raw_hazard_internal<Valu, Vintrp, Salu>(
-                          state, &state.program->blocks[lin_pred], 
nops_needed, reg, mask, true));
-   }
-   return res;
-}
+   search_backwards<HandleRawHazardGlobalState, HandleRawHazardBlockState, 
nullptr,
+                    handle_raw_hazard_instr<Valu, Vintrp, Salu>>(state, 
global, block);
 
-template <bool Valu, bool Vintrp, bool Salu>
-void
-handle_raw_hazard(State& state, int* NOPs, int min_states, Operand op)
-{
-   if (*NOPs >= min_states)
-      return;
-   int res = handle_raw_hazard_internal<Valu, Vintrp, Salu>(
-      state, state.block, min_states, op.physReg(), u_bit_consecutive(0, 
op.size()), false);
-   *NOPs = MAX2(*NOPs, res);
+   *NOPs = MAX2(*NOPs, global.nops_needed);
 }
 
 static auto handle_valu_then_read_hazard = handle_raw_hazard<true, true, 
false>;

Reply via email to