Module: Mesa Branch: main Commit: 484df5b30e815ffb8b8664580ef1b79d91a4d970 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=484df5b30e815ffb8b8664580ef1b79d91a4d970
Author: Rhys Perry <[email protected]> Date: Thu Aug 25 17:32:49 2022 +0100 aco: add search_backwards helper This will be useful for VALUPartialForwardingHazard. Signed-off-by: Rhys Perry <[email protected]> Reviewed-by: Daniel Schürmann <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18273> --- src/amd/compiler/aco_insert_NOPs.cpp | 122 ++++++++++++++++++++++------------- 1 file changed, 77 insertions(+), 45 deletions(-) diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp index a932efe2105..f4eb8165483 100644 --- a/src/amd/compiler/aco_insert_NOPs.cpp +++ b/src/amd/compiler/aco_insert_NOPs.cpp @@ -210,16 +210,72 @@ regs_intersect(PhysReg a_reg, unsigned a_size, PhysReg b_reg, unsigned b_size) return a_reg > b_reg ? (a_reg - b_reg < b_size) : (b_reg - a_reg < a_size); } +template <typename GlobalState, typename BlockState, + bool (*block_cb)(GlobalState&, BlockState&, Block*), + bool (*instr_cb)(GlobalState&, BlockState&, aco_ptr<Instruction>&)> +void +search_backwards_internal(State& state, GlobalState& global_state, BlockState block_state, + Block* block, bool start_at_end) +{ + if (block == state.block && start_at_end) { + /* If it's the current block, block->instructions is incomplete. */ + for (int pred_idx = state.old_instructions.size() - 1; pred_idx >= 0; pred_idx--) { + aco_ptr<Instruction>& instr = state.old_instructions[pred_idx]; + if (!instr) + break; /* Instruction has been moved to block->instructions. */ + if (instr_cb(global_state, block_state, instr)) + return; + } + } + + for (int pred_idx = block->instructions.size() - 1; pred_idx >= 0; pred_idx--) { + if (instr_cb(global_state, block_state, block->instructions[pred_idx])) + return; + } + +PRAGMA_DIAGNOSTIC_PUSH +PRAGMA_DIAGNOSTIC_IGNORED(-Waddress) + if (block_cb != nullptr && !block_cb(global_state, block_state, block)) + return; +PRAGMA_DIAGNOSTIC_POP + + for (unsigned lin_pred : block->linear_preds) { + search_backwards_internal<GlobalState, BlockState, block_cb, instr_cb>( + state, global_state, block_state, &state.program->blocks[lin_pred], true); + } +} + +template <typename GlobalState, typename BlockState, + bool (*block_cb)(GlobalState&, BlockState&, Block*), + bool (*instr_cb)(GlobalState&, BlockState&, aco_ptr<Instruction>&)> +void +search_backwards(State& state, GlobalState& global_state, BlockState& block_state) +{ + search_backwards_internal<GlobalState, BlockState, block_cb, instr_cb>( + state, global_state, block_state, state.block, false); +} + +struct HandleRawHazardGlobalState { + PhysReg reg; + int nops_needed; +}; + +struct HandleRawHazardBlockState { + uint32_t mask; + int nops_needed; +}; + template <bool Valu, bool Vintrp, bool Salu> bool -handle_raw_hazard_instr(aco_ptr<Instruction>& pred, PhysReg reg, int* nops_needed, uint32_t* mask) +handle_raw_hazard_instr(HandleRawHazardGlobalState& global_state, + HandleRawHazardBlockState& block_state, aco_ptr<Instruction>& pred) { - unsigned mask_size = util_last_bit(*mask); + unsigned mask_size = util_last_bit(block_state.mask); uint32_t writemask = 0; for (Definition& def : pred->definitions) { - if (regs_intersect(reg, mask_size, def.physReg(), def.size())) { - unsigned start = def.physReg() > reg ? def.physReg() - reg : 0; + if (regs_intersect(global_state.reg, mask_size, def.physReg(), def.size())) { + unsigned start = def.physReg() > global_state.reg ? def.physReg() - global_state.reg : 0; unsigned end = MIN2(mask_size, start + def.size()); writemask |= u_bit_consecutive(start, end - start); } @@ -227,61 +283,37 @@ handle_raw_hazard_instr(aco_ptr<Instruction>& pred, PhysReg reg, int* nops_neede bool is_hazard = writemask != 0 && ((pred->isVALU() && Valu) || (pred->isVINTRP() && Vintrp) || (pred->isSALU() && Salu)); - if (is_hazard) + if (is_hazard) { + global_state.nops_needed = MAX2(global_state.nops_needed, block_state.nops_needed); return true; + } - *mask &= ~writemask; - *nops_needed = MAX2(*nops_needed - get_wait_states(pred), 0); + block_state.mask &= ~writemask; + block_state.nops_needed = MAX2(block_state.nops_needed - get_wait_states(pred), 0); - if (*mask == 0) - *nops_needed = 0; + if (block_state.mask == 0) + block_state.nops_needed = 0; - return *nops_needed == 0; + return block_state.nops_needed == 0; } template <bool Valu, bool Vintrp, bool Salu> -int -handle_raw_hazard_internal(State& state, Block* block, int nops_needed, PhysReg reg, uint32_t mask, - bool start_at_end) +void +handle_raw_hazard(State& state, int* NOPs, int min_states, Operand op) { - if (block == state.block && start_at_end) { - /* If it's the current block, block->instructions is incomplete. */ - for (int pred_idx = state.old_instructions.size() - 1; pred_idx >= 0; pred_idx--) { - aco_ptr<Instruction>& instr = state.old_instructions[pred_idx]; - if (!instr) - break; /* Instruction has been moved to block->instructions. */ - if (handle_raw_hazard_instr<Valu, Vintrp, Salu>(instr, reg, &nops_needed, &mask)) - return nops_needed; - } - } - for (int pred_idx = block->instructions.size() - 1; pred_idx >= 0; pred_idx--) { - if (handle_raw_hazard_instr<Valu, Vintrp, Salu>(block->instructions[pred_idx], reg, - &nops_needed, &mask)) - return nops_needed; - } + if (*NOPs >= min_states) + return; - int res = 0; + HandleRawHazardGlobalState global = {op.physReg(), 0}; + HandleRawHazardBlockState block = {u_bit_consecutive(0, op.size()), min_states}; /* Loops require branch instructions, which count towards the wait * states. So even with loops this should finish unless nops_needed is some * huge value. */ - for (unsigned lin_pred : block->linear_preds) { - res = - std::max(res, handle_raw_hazard_internal<Valu, Vintrp, Salu>( - state, &state.program->blocks[lin_pred], nops_needed, reg, mask, true)); - } - return res; -} + search_backwards<HandleRawHazardGlobalState, HandleRawHazardBlockState, nullptr, + handle_raw_hazard_instr<Valu, Vintrp, Salu>>(state, global, block); -template <bool Valu, bool Vintrp, bool Salu> -void -handle_raw_hazard(State& state, int* NOPs, int min_states, Operand op) -{ - if (*NOPs >= min_states) - return; - int res = handle_raw_hazard_internal<Valu, Vintrp, Salu>( - state, state.block, min_states, op.physReg(), u_bit_consecutive(0, op.size()), false); - *NOPs = MAX2(*NOPs, res); + *NOPs = MAX2(*NOPs, global.nops_needed); } static auto handle_valu_then_read_hazard = handle_raw_hazard<true, true, false>;
