Module: Mesa Branch: main Commit: 3c25edfdb7456648d4226cf08bf62c57c998e894 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3c25edfdb7456648d4226cf08bf62c57c998e894
Author: Georg Lehmann <[email protected]> Date: Tue Jan 3 22:54:10 2023 +0100 aco: Improve wave64 cycle estimates. Reviewed-By: Tatsuyuki Ishi <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20507> --- src/amd/compiler/aco_statistics.cpp | 50 ++++++++++++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 4 deletions(-) diff --git a/src/amd/compiler/aco_statistics.cpp b/src/amd/compiler/aco_statistics.cpp index 800d9a91c3a..5662bc7af4c 100644 --- a/src/amd/compiler/aco_statistics.cpp +++ b/src/amd/compiler/aco_statistics.cpp @@ -107,13 +107,11 @@ struct perf_info { static bool is_dual_issue_capable(const Program& program, const Instruction& instruction) { - if (program.gfx_level < GFX11 || !instruction.isVALU()) + if (program.gfx_level < GFX11 || !instruction.isVALU() || instruction.isDPP()) return false; - /* Currently assumed to be just the instructions that are allowed as both - * VOPD X and VOPD Y operation. - */ switch (instruction.opcode) { + case aco_opcode::v_fma_f32: case aco_opcode::v_fmac_f32: case aco_opcode::v_fmaak_f32: case aco_opcode::v_fmamk_f32: @@ -122,10 +120,54 @@ is_dual_issue_capable(const Program& program, const Instruction& instruction) case aco_opcode::v_sub_f32: case aco_opcode::v_subrev_f32: case aco_opcode::v_mul_legacy_f32: + case aco_opcode::v_fma_legacy_f32: + case aco_opcode::v_fmac_legacy_f32: + case aco_opcode::v_fma_mix_f32: + case aco_opcode::v_fma_mixlo_f16: + case aco_opcode::v_fma_mixhi_f16: + case aco_opcode::v_fma_f16: + case aco_opcode::v_fmac_f16: + case aco_opcode::v_fmaak_f16: + case aco_opcode::v_fmamk_f16: + case aco_opcode::v_mul_f16: + case aco_opcode::v_add_f16: + case aco_opcode::v_sub_f16: + case aco_opcode::v_subrev_f16: case aco_opcode::v_mov_b32: + case aco_opcode::v_movreld_b32: + case aco_opcode::v_movrels_b32: + case aco_opcode::v_movrelsd_b32: + case aco_opcode::v_movrelsd_2_b32: case aco_opcode::v_cndmask_b32: + case aco_opcode::v_writelane_b32_e64: + case aco_opcode::v_mov_b16: + case aco_opcode::v_cndmask_b16: case aco_opcode::v_max_f32: case aco_opcode::v_min_f32: + case aco_opcode::v_max_f16: + case aco_opcode::v_min_f16: + case aco_opcode::v_max_i16_e64: + case aco_opcode::v_min_i16_e64: + case aco_opcode::v_max_u16_e64: + case aco_opcode::v_min_u16_e64: + case aco_opcode::v_add_i16: + case aco_opcode::v_sub_i16: + case aco_opcode::v_mad_i16: + case aco_opcode::v_add_u16_e64: + case aco_opcode::v_sub_u16_e64: + case aco_opcode::v_mad_u16: + case aco_opcode::v_mul_lo_u16_e64: + case aco_opcode::v_not_b16: + case aco_opcode::v_and_b16: + case aco_opcode::v_or_b16: + case aco_opcode::v_xor_b16: + case aco_opcode::v_lshrrev_b16_e64: + case aco_opcode::v_ashrrev_i16_e64: + case aco_opcode::v_lshlrev_b16_e64: + case aco_opcode::v_dot2_bf16_bf16: + case aco_opcode::v_dot2_f32_bf16: + case aco_opcode::v_dot2_f16_f16: + case aco_opcode::v_dot2_f32_f16: case aco_opcode::v_dot2c_f32_f16: return true; default: return false; }
