Module: Mesa Branch: main Commit: e11b23f7cd3d848ea278d09f285a186371c70b44 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e11b23f7cd3d848ea278d09f285a186371c70b44
Author: Daniel Schürmann <[email protected]> Date: Wed Aug 18 18:56:59 2021 +0200 aco: add instr_is_16bit() helper function to indicate whether some instruction writes partial registers, only. Reviewed-by: Rhys Perry <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12364> --- src/amd/compiler/aco_ir.cpp | 59 +++++++++++++++++++++++++++++++++++++++++++++ src/amd/compiler/aco_ir.h | 1 + 2 files changed, 60 insertions(+) diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index 61cf3c2c43c..a276c2e7527 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -419,6 +419,65 @@ can_use_opsel(chip_class chip, aco_opcode op, int idx, bool high) } } +bool +instr_is_16bit(chip_class chip, aco_opcode op) +{ + /* partial register writes are GFX9+, only */ + if (chip < GFX9) + return false; + + switch (op) { + /* VOP3 */ + case aco_opcode::v_mad_f16: + case aco_opcode::v_mad_u16: + case aco_opcode::v_mad_i16: + case aco_opcode::v_fma_f16: + case aco_opcode::v_div_fixup_f16: + case aco_opcode::v_interp_p2_f16: + case aco_opcode::v_fma_mixlo_f16: + /* VOP2 */ + case aco_opcode::v_mac_f16: + case aco_opcode::v_madak_f16: + case aco_opcode::v_madmk_f16: return chip >= GFX9; + case aco_opcode::v_add_f16: + case aco_opcode::v_sub_f16: + case aco_opcode::v_subrev_f16: + case aco_opcode::v_mul_f16: + case aco_opcode::v_max_f16: + case aco_opcode::v_min_f16: + case aco_opcode::v_ldexp_f16: + case aco_opcode::v_fmac_f16: + case aco_opcode::v_fmamk_f16: + case aco_opcode::v_fmaak_f16: + /* VOP1 */ + case aco_opcode::v_cvt_f16_f32: + case aco_opcode::v_cvt_f16_u16: + case aco_opcode::v_cvt_f16_i16: + case aco_opcode::v_rcp_f16: + case aco_opcode::v_sqrt_f16: + case aco_opcode::v_rsq_f16: + case aco_opcode::v_log_f16: + case aco_opcode::v_exp_f16: + case aco_opcode::v_frexp_mant_f16: + case aco_opcode::v_frexp_exp_i16_f16: + case aco_opcode::v_floor_f16: + case aco_opcode::v_ceil_f16: + case aco_opcode::v_trunc_f16: + case aco_opcode::v_rndne_f16: + case aco_opcode::v_fract_f16: + case aco_opcode::v_sin_f16: + case aco_opcode::v_cos_f16: return chip >= GFX10; + // TODO: confirm whether these write 16 or 32 bit on GFX10+ + // case aco_opcode::v_cvt_u16_f16: + // case aco_opcode::v_cvt_i16_f16: + // case aco_opcode::p_cvt_f16_f32_rtne: + // case aco_opcode::v_cvt_norm_i16_f16: + // case aco_opcode::v_cvt_norm_u16_f16: + /* on GFX10, all opsel instructions preserve the high bits */ + default: return chip >= GFX10 && can_use_opsel(chip, op, -1, false); + } +} + uint32_t get_reduction_identity(ReduceOp op, unsigned idx) { diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 0c5c8b767aa..0fa4fc824d5 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -1729,6 +1729,7 @@ memory_sync_info get_sync_info(const Instruction* instr); bool is_dead(const std::vector<uint16_t>& uses, Instruction* instr); bool can_use_opsel(chip_class chip, aco_opcode op, int idx, bool high); +bool instr_is_16bit(chip_class chip, aco_opcode op); bool can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr, bool pre_ra); bool can_use_DPP(const aco_ptr<Instruction>& instr, bool pre_ra); /* updates "instr" and returns the old instruction (or NULL if no update was needed) */
