Module: Mesa Branch: master Commit: 441ead5fb35f84dcbaf4724a771ff3475257d400 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=441ead5fb35f84dcbaf4724a771ff3475257d400
Author: Rhys Perry <pendingchao...@gmail.com> Date: Wed Jan 20 13:50:45 2021 +0000 aco: remove Format::{VOP3A,VOP3B} These are really the same as Format::VOP3. Signed-off-by: Rhys Perry <pendingchao...@gmail.com> Reviewed-by: Daniel Schürmann <dan...@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8595> --- src/amd/compiler/aco_assembler.cpp | 4 +- src/amd/compiler/aco_builder_h.py | 10 ++--- src/amd/compiler/aco_instruction_selection.cpp | 18 ++++---- src/amd/compiler/aco_ir.cpp | 4 +- src/amd/compiler/aco_ir.h | 16 +++---- src/amd/compiler/aco_lower_to_hw_instr.cpp | 10 ++--- src/amd/compiler/aco_opcodes.py | 5 +-- src/amd/compiler/aco_opt_value_numbering.cpp | 6 +-- src/amd/compiler/aco_optimizer.cpp | 60 +++++++++++++------------- src/amd/compiler/aco_print_ir.cpp | 6 +-- src/amd/compiler/aco_register_allocation.cpp | 6 +-- src/amd/compiler/aco_validate.cpp | 4 +- src/amd/compiler/tests/test_assembler.cpp | 4 +- src/amd/compiler/tests/test_optimizer.cpp | 4 +- 14 files changed, 76 insertions(+), 81 deletions(-) diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp index 05ec485a2cf..3efdf663e76 100644 --- a/src/amd/compiler/aco_assembler.cpp +++ b/src/amd/compiler/aco_assembler.cpp @@ -563,8 +563,8 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* unreachable("Pseudo instructions should be lowered before assembly."); break; default: - if ((uint16_t) instr->format & (uint16_t) Format::VOP3A) { - VOP3A_instruction* vop3 = static_cast<VOP3A_instruction*>(instr); + if ((uint16_t) instr->format & (uint16_t) Format::VOP3) { + VOP3_instruction* vop3 = static_cast<VOP3_instruction*>(instr); if ((uint16_t) instr->format & (uint16_t) Format::VOP2) { opcode = opcode + 0x100; diff --git a/src/amd/compiler/aco_builder_h.py b/src/amd/compiler/aco_builder_h.py index af0eacdb7ac..502ffc7cd34 100644 --- a/src/amd/compiler/aco_builder_h.py +++ b/src/amd/compiler/aco_builder_h.py @@ -484,7 +484,7 @@ public: int num_defs = carry_out ? 2 : 1; aco_ptr<Instruction> sub; if (vop3) - sub.reset(create_instruction<VOP3A_instruction>(op, Format::VOP3B, num_ops, num_defs)); + sub.reset(create_instruction<VOP3_instruction>(op, Format::VOP3, num_ops, num_defs)); else sub.reset(create_instruction<VOP2_instruction>(op, Format::VOP2, num_ops, num_defs)); sub->operands[0] = a.op; @@ -534,15 +534,15 @@ formats = [("pseudo", [Format.PSEUDO], 'Pseudo_instruction', list(itertools.prod ("vop2", [Format.VOP2], 'VOP2_instruction', itertools.product([1, 2], [2, 3])), ("vop2_sdwa", [Format.VOP2, Format.SDWA], 'SDWA_instruction', itertools.product([1, 2], [2, 3])), ("vopc", [Format.VOPC], 'VOPC_instruction', itertools.product([1, 2], [2])), - ("vop3", [Format.VOP3A], 'VOP3A_instruction', [(1, 3), (1, 2), (1, 1), (2, 2)]), + ("vop3", [Format.VOP3], 'VOP3_instruction', [(1, 3), (1, 2), (1, 1), (2, 2)]), ("vop3p", [Format.VOP3P], 'VOP3P_instruction', [(1, 2), (1, 3)]), ("vintrp", [Format.VINTRP], 'Interp_instruction', [(1, 2), (1, 3)]), ("vop1_dpp", [Format.VOP1, Format.DPP], 'DPP_instruction', [(1, 1)]), ("vop2_dpp", [Format.VOP2, Format.DPP], 'DPP_instruction', itertools.product([1, 2], [2, 3])), ("vopc_dpp", [Format.VOPC, Format.DPP], 'DPP_instruction', itertools.product([1, 2], [2])), - ("vop1_e64", [Format.VOP1, Format.VOP3A], 'VOP3A_instruction', itertools.product([1], [1])), - ("vop2_e64", [Format.VOP2, Format.VOP3A], 'VOP3A_instruction', itertools.product([1, 2], [2, 3])), - ("vopc_e64", [Format.VOPC, Format.VOP3A], 'VOP3A_instruction', itertools.product([1, 2], [2])), + ("vop1_e64", [Format.VOP1, Format.VOP3], 'VOP3_instruction', itertools.product([1], [1])), + ("vop2_e64", [Format.VOP2, Format.VOP3], 'VOP3_instruction', itertools.product([1, 2], [2, 3])), + ("vopc_e64", [Format.VOPC, Format.VOP3], 'VOP3_instruction', itertools.product([1, 2], [2])), ("flat", [Format.FLAT], 'FLAT_instruction', [(0, 3), (1, 2)]), ("global", [Format.GLOBAL], 'FLAT_instruction', [(0, 3), (1, 2)])] formats = [(f if len(f) == 5 else f + ('',)) for f in formats] diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 7e614071bb0..3aa890fe6b2 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -1224,7 +1224,7 @@ Temp emit_floor_f64(isel_context *ctx, Builder& bld, Definition dst, Temp val) Temp v = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), dst0, dst1); Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), src0, v); - static_cast<VOP3A_instruction*>(add)->neg[1] = true; + static_cast<VOP3_instruction*>(add)->neg[1] = true; return add->definitions[0].getTemp(); } @@ -1692,10 +1692,10 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) std::swap(src0, src1); add_instr = bld.vop2_e64(aco_opcode::v_add_u16, Definition(dst), src0, as_vgpr(ctx, src1)).instr; } - static_cast<VOP3A_instruction*>(add_instr)->clamp = 1; + static_cast<VOP3_instruction*>(add_instr)->clamp = 1; } else if (dst.regClass() == v1) { if (ctx->options->chip_class >= GFX9) { - aco_ptr<VOP3A_instruction> add{create_instruction<VOP3A_instruction>(aco_opcode::v_add_u32, asVOP3(Format::VOP2), 2, 1)}; + aco_ptr<VOP3_instruction> add{create_instruction<VOP3_instruction>(aco_opcode::v_add_u32, asVOP3(Format::VOP2), 2, 1)}; add->operands[0] = Operand(src0); add->operands[1] = Operand(src1); add->definitions[0] = Definition(dst); @@ -1965,7 +1965,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) } else if (dst.regClass() == v2) { Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), as_vgpr(ctx, src0), as_vgpr(ctx, src1)); - VOP3A_instruction* sub = static_cast<VOP3A_instruction*>(add); + VOP3_instruction* sub = static_cast<VOP3_instruction*>(add); sub->neg[1] = true; } else { isel_err(&instr->instr, "Unimplemented NIR instr bit size"); @@ -2115,7 +2115,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) // TODO: confirm that this holds under any circumstances } else if (dst.regClass() == v2) { Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), src, Operand(0u)); - VOP3A_instruction* vop3 = static_cast<VOP3A_instruction*>(add); + VOP3_instruction* vop3 = static_cast<VOP3_instruction*>(add); vop3->clamp = true; } else { isel_err(&instr->instr, "Unimplemented NIR instr bit size"); @@ -2255,12 +2255,12 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) Temp bfi = bld.vop3(aco_opcode::v_bfi_b32, bld.def(v1), bitmask, bld.copy(bld.def(v1), Operand(0x43300000u)), as_vgpr(ctx, src0_hi)); Temp tmp = bld.vop3(aco_opcode::v_add_f64, bld.def(v2), src0, bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(0u), bfi)); Instruction *sub = bld.vop3(aco_opcode::v_add_f64, bld.def(v2), tmp, bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(0u), bfi)); - static_cast<VOP3A_instruction*>(sub)->neg[1] = true; + static_cast<VOP3_instruction*>(sub)->neg[1] = true; tmp = sub->definitions[0].getTemp(); Temp v = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(-1u), Operand(0x432fffffu)); Instruction* vop3 = bld.vopc_e64(aco_opcode::v_cmp_gt_f64, bld.hint_vcc(bld.def(bld.lm)), src0, v); - static_cast<VOP3A_instruction*>(vop3)->abs[0] = true; + static_cast<VOP3_instruction*>(vop3)->abs[0] = true; Temp cond = vop3->definitions[0].getTemp(); Temp tmp_lo = bld.tmp(v1), tmp_hi = bld.tmp(v1); @@ -2926,7 +2926,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) f32 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), f16); Temp smallest = bld.copy(bld.def(s1), Operand(0x38800000u)); Instruction* vop3 = bld.vopc_e64(aco_opcode::v_cmp_nlt_f32, bld.hint_vcc(bld.def(bld.lm)), f32, smallest); - static_cast<VOP3A_instruction*>(vop3)->abs[0] = true; + static_cast<VOP3_instruction*>(vop3)->abs[0] = true; cmp_res = vop3->definitions[0].getTemp(); } @@ -8847,7 +8847,7 @@ void prepare_cube_coords(isel_context *ctx, std::vector<Temp>& coords, Temp* ddx ma = bld.vop3(aco_opcode::v_cubema_f32, bld.def(v1), coords[0], coords[1], coords[2]); - aco_ptr<VOP3A_instruction> vop3a{create_instruction<VOP3A_instruction>(aco_opcode::v_rcp_f32, asVOP3(Format::VOP1), 1, 1)}; + aco_ptr<VOP3_instruction> vop3a{create_instruction<VOP3_instruction>(aco_opcode::v_rcp_f32, asVOP3(Format::VOP1), 1, 1)}; vop3a->operands[0] = Operand(ma); vop3a->abs[0] = true; Temp invma = bld.tmp(v1); diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index 676a047c8b4..5b46e8a67ee 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -170,7 +170,7 @@ bool can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr) return true; if (instr->isVOP3()) { - VOP3A_instruction *vop3 = static_cast<VOP3A_instruction*>(instr.get()); + VOP3_instruction *vop3 = static_cast<VOP3_instruction*>(instr.get()); if (instr->format == Format::VOP3) return false; if (vop3->clamp && instr->format == asVOP3(Format::VOPC) && chip != GFX8) @@ -235,7 +235,7 @@ aco_ptr<Instruction> convert_to_SDWA(chip_class chip, aco_ptr<Instruction>& inst SDWA_instruction *sdwa = static_cast<SDWA_instruction*>(instr.get()); if (tmp->isVOP3()) { - VOP3A_instruction *vop3 = static_cast<VOP3A_instruction*>(tmp.get()); + VOP3_instruction *vop3 = static_cast<VOP3_instruction*>(tmp.get()); memcpy(sdwa->neg, vop3->neg, sizeof(sdwa->neg)); memcpy(sdwa->abs, vop3->abs, sizeof(sdwa->abs)); sdwa->omod = vop3->omod; diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 5beca44ff2b..798d9cc80a6 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -60,7 +60,7 @@ enum { /** * Representation of the instruction's microcode encoding format * Note: Some Vector ALU Formats can be combined, such that: - * - VOP2* | VOP3A represents a VOP2 instruction in VOP3A encoding + * - VOP2* | VOP3 represents a VOP2 instruction in VOP3 encoding * - VOP2* | DPP represents a VOP2 instruction with data parallel primitive. * - VOP2* | SDWA represents a VOP2 instruction with sub-dword addressing. * @@ -101,8 +101,6 @@ enum class Format : std::uint16_t { VOP2 = 1 << 9, VOPC = 1 << 10, VOP3 = 1 << 11, - VOP3A = 1 << 11, - VOP3B = 1 << 11, /* Vector Parameter Interpolation Format */ VINTRP = 1 << 12, DPP = 1 << 13, @@ -1001,8 +999,7 @@ struct Instruction { return ((uint16_t) format & (uint16_t) Format::VOP1) == (uint16_t) Format::VOP1 || ((uint16_t) format & (uint16_t) Format::VOP2) == (uint16_t) Format::VOP2 || ((uint16_t) format & (uint16_t) Format::VOPC) == (uint16_t) Format::VOPC - || ((uint16_t) format & (uint16_t) Format::VOP3A) == (uint16_t) Format::VOP3A - || ((uint16_t) format & (uint16_t) Format::VOP3B) == (uint16_t) Format::VOP3B + || ((uint16_t) format & (uint16_t) Format::VOP3) == (uint16_t) Format::VOP3 || format == Format::VOP3P; } @@ -1029,8 +1026,7 @@ struct Instruction { constexpr bool isVOP3() const noexcept { - return ((uint16_t) format & (uint16_t) Format::VOP3A) || - ((uint16_t) format & (uint16_t) Format::VOP3B); + return (uint16_t) format & (uint16_t) Format::VOP3; } constexpr bool isSDWA() const noexcept @@ -1114,7 +1110,7 @@ struct VOPC_instruction : public Instruction { }; static_assert(sizeof(VOPC_instruction) == sizeof(Instruction) + 0, "Unexpected padding"); -struct VOP3A_instruction : public Instruction { +struct VOP3_instruction : public Instruction { bool abs[3]; bool neg[3]; uint8_t opsel : 4; @@ -1123,7 +1119,7 @@ struct VOP3A_instruction : public Instruction { uint8_t padding0 : 1; uint8_t padding1; }; -static_assert(sizeof(VOP3A_instruction) == sizeof(Instruction) + 8, "Unexpected padding"); +static_assert(sizeof(VOP3_instruction) == sizeof(Instruction) + 8, "Unexpected padding"); struct VOP3P_instruction : public Instruction { bool neg_lo[3]; @@ -1450,7 +1446,7 @@ constexpr bool Instruction::usesModifiers() const noexcept } return vop3p->opsel_lo || vop3p->clamp; } else if (isVOP3()) { - const VOP3A_instruction *vop3 = static_cast<const VOP3A_instruction*>(this); + const VOP3_instruction *vop3 = static_cast<const VOP3_instruction*>(this); for (unsigned i = 0; i < operands.size(); i++) { if (vop3->abs[i] || vop3->neg[i]) return true; diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index 56da58bd333..f9eb9323f11 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -646,7 +646,7 @@ void emit_reduction(lower_context *ctx, aco_opcode op, ReduceOp reduce_op, unsig Definition(PhysReg{vtmp+i}, v1), Operand(PhysReg{tmp+i}, v1), Operand(0xffffffffu), Operand(0xffffffffu)).instr; - static_cast<VOP3A_instruction*>(perm)->opsel = 1; /* FI (Fetch Inactive) */ + static_cast<VOP3_instruction*>(perm)->opsel = 1; /* FI (Fetch Inactive) */ } bld.sop1(Builder::s_mov, Definition(exec, bld.lm), Operand(UINT64_MAX)); @@ -757,7 +757,7 @@ void emit_reduction(lower_context *ctx, aco_opcode op, ReduceOp reduce_op, unsig Definition(PhysReg{vtmp+i}, v1), Operand(PhysReg{tmp+i}, v1), Operand(0xffffffffu), Operand(0xffffffffu)).instr; - static_cast<VOP3A_instruction*>(perm)->opsel = 1; /* FI (Fetch Inactive) */ + static_cast<VOP3_instruction*>(perm)->opsel = 1; /* FI (Fetch Inactive) */ } emit_op(ctx, tmp, tmp, vtmp, PhysReg{0}, reduce_op, src.size()); @@ -1052,12 +1052,12 @@ void copy_constant(lower_context *ctx, Builder& bld, Definition dst, Operand op) if (dst.physReg().byte() == 2) { Operand def_lo(dst.physReg().advance(-2), v2b); Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, dst, def_lo, op); - static_cast<VOP3A_instruction*>(instr)->opsel = 0; + static_cast<VOP3_instruction*>(instr)->opsel = 0; } else { assert(dst.physReg().byte() == 0); Operand def_hi(dst.physReg().advance(2), v2b); Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, dst, op, def_hi); - static_cast<VOP3A_instruction*>(instr)->opsel = 2; + static_cast<VOP3_instruction*>(instr)->opsel = 2; } } else { uint32_t offset = dst.physReg().byte() * 8u; @@ -1251,7 +1251,7 @@ void do_pack_2x16(lower_context *ctx, Builder& bld, Definition def, Operand lo, if (can_use_pack) { Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, def, lo, hi); /* opsel: 0 = select low half, 1 = select high half. [0] = src0, [1] = src1 */ - static_cast<VOP3A_instruction*>(instr)->opsel = hi.physReg().byte() | (lo.physReg().byte() >> 1); + static_cast<VOP3_instruction*>(instr)->opsel = hi.physReg().byte() | (lo.physReg().byte() >> 1); return; } diff --git a/src/amd/compiler/aco_opcodes.py b/src/amd/compiler/aco_opcodes.py index 2a8bc8c4cdc..20f5006c186 100644 --- a/src/amd/compiler/aco_opcodes.py +++ b/src/amd/compiler/aco_opcodes.py @@ -53,8 +53,7 @@ class Format(Enum): VOP1 = 1 << 8 VOP2 = 1 << 9 VOPC = 1 << 10 - VOP3A = 1 << 11 - VOP3B = 1 << 11 + VOP3 = 1 << 11 VINTRP = 1 << 12 DPP = 1 << 13 SDWA = 1 << 14 @@ -1082,7 +1081,7 @@ VOP3 = { ( -1, -1, -1, -1, 0x140, "v_fma_legacy_f32", True, True), #GFX10.3+ } for (gfx6, gfx7, gfx8, gfx9, gfx10, name, in_mod, out_mod) in VOP3: - opcode(name, gfx7, gfx9, gfx10, Format.VOP3A, in_mod, out_mod) + opcode(name, gfx7, gfx9, gfx10, Format.VOP3, in_mod, out_mod) # DS instructions: 3 inputs (1 addr, 2 data), 1 output diff --git a/src/amd/compiler/aco_opt_value_numbering.cpp b/src/amd/compiler/aco_opt_value_numbering.cpp index 2d1a69b1492..a5a3a8c9cca 100644 --- a/src/amd/compiler/aco_opt_value_numbering.cpp +++ b/src/amd/compiler/aco_opt_value_numbering.cpp @@ -81,7 +81,7 @@ struct InstrHash { std::size_t operator()(Instruction* instr) const { if (instr->isVOP3()) - return hash_murmur_32<VOP3A_instruction>(instr); + return hash_murmur_32<VOP3_instruction>(instr); if (instr->isDPP()) return hash_murmur_32<DPP_instruction>(instr); @@ -178,8 +178,8 @@ struct InstrPred { return false; if (a->isVOP3()) { - VOP3A_instruction* a3 = static_cast<VOP3A_instruction*>(a); - VOP3A_instruction* b3 = static_cast<VOP3A_instruction*>(b); + VOP3_instruction* a3 = static_cast<VOP3_instruction*>(a); + VOP3_instruction* b3 = static_cast<VOP3_instruction*>(b); for (unsigned i = 0; i < 3; i++) { if (a3->abs[i] != b3->abs[i] || a3->neg[i] != b3->neg[i]) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index b1e786408a1..7806c37a3a6 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -706,7 +706,7 @@ void to_VOP3(opt_ctx& ctx, aco_ptr<Instruction>& instr) aco_ptr<Instruction> tmp = std::move(instr); Format format = asVOP3(tmp->format); - instr.reset(create_instruction<VOP3A_instruction>(tmp->opcode, format, tmp->operands.size(), tmp->definitions.size())); + instr.reset(create_instruction<VOP3_instruction>(tmp->opcode, format, tmp->operands.size(), tmp->definitions.size())); std::copy(tmp->operands.cbegin(), tmp->operands.cend(), instr->operands.begin()); for (unsigned i = 0; i < instr->definitions.size(); i++) { instr->definitions[i] = tmp->definitions[i]; @@ -953,7 +953,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr) else if (instr->isSDWA()) static_cast<SDWA_instruction*>(instr.get())->abs[i] = true; else - static_cast<VOP3A_instruction*>(instr.get())->abs[i] = true; + static_cast<VOP3_instruction*>(instr.get())->abs[i] = true; } if (info.is_neg() && instr->opcode == aco_opcode::v_add_f32) { instr->opcode = i ? aco_opcode::v_sub_f32 : aco_opcode::v_subrev_f32; @@ -972,7 +972,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr) else if (instr->isSDWA()) static_cast<SDWA_instruction*>(instr.get())->neg[i] = true; else - static_cast<VOP3A_instruction*>(instr.get())->neg[i] = true; + static_cast<VOP3_instruction*>(instr.get())->neg[i] = true; continue; } unsigned bits = get_operand_size(instr, i); @@ -1365,7 +1365,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr) } case aco_opcode::v_med3_f16: case aco_opcode::v_med3_f32: { /* clamp */ - VOP3A_instruction* vop3 = static_cast<VOP3A_instruction*>(instr.get()); + VOP3_instruction* vop3 = static_cast<VOP3_instruction*>(instr.get()); if (vop3->abs[0] || vop3->abs[1] || vop3->abs[2] || vop3->neg[0] || vop3->neg[1] || vop3->neg[2] || vop3->omod != 0 || vop3->opsel != 0) @@ -1682,7 +1682,7 @@ bool combine_ordering_test(opt_ctx &ctx, aco_ptr<Instruction>& instr) return false; if (op_instr[i]->isVOP3()) { - VOP3A_instruction *vop3 = static_cast<VOP3A_instruction*>(op_instr[i]); + VOP3_instruction *vop3 = static_cast<VOP3_instruction*>(op_instr[i]); if (vop3->neg[0] != vop3->neg[1] || vop3->abs[0] != vop3->abs[1] || vop3->opsel == 1 || vop3->opsel == 2) return false; neg[i] = vop3->neg[0]; @@ -1726,7 +1726,7 @@ bool combine_ordering_test(opt_ctx &ctx, aco_ptr<Instruction>& instr) } Instruction *new_instr; if (neg[0] || neg[1] || abs[0] || abs[1] || opsel || num_sgprs > 1) { - VOP3A_instruction *vop3 = create_instruction<VOP3A_instruction>(new_op, asVOP3(Format::VOPC), 2, 1); + VOP3_instruction *vop3 = create_instruction<VOP3_instruction>(new_op, asVOP3(Format::VOPC), 2, 1); for (unsigned i = 0; i < 2; i++) { vop3->neg[i] = neg[i]; vop3->abs[i] = abs[i]; @@ -1797,8 +1797,8 @@ bool combine_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& instr) aco_opcode new_op = is_or ? get_unordered(cmp->opcode) : get_ordered(cmp->opcode); Instruction *new_instr; if (cmp->isVOP3()) { - VOP3A_instruction *new_vop3 = create_instruction<VOP3A_instruction>(new_op, asVOP3(Format::VOPC), 2, 1); - VOP3A_instruction *cmp_vop3 = static_cast<VOP3A_instruction*>(cmp); + VOP3_instruction *new_vop3 = create_instruction<VOP3_instruction>(new_op, asVOP3(Format::VOPC), 2, 1); + VOP3_instruction *cmp_vop3 = static_cast<VOP3_instruction*>(cmp); memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs)); memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg)); new_vop3->clamp = cmp_vop3->clamp; @@ -1885,7 +1885,7 @@ bool combine_constant_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& in return false; if (nan_test->isVOP3()) { - VOP3A_instruction *vop3 = static_cast<VOP3A_instruction*>(nan_test); + VOP3_instruction *vop3 = static_cast<VOP3_instruction*>(nan_test); if (vop3->neg[0] != vop3->neg[1] || vop3->abs[0] != vop3->abs[1] || vop3->opsel == 1 || vop3->opsel == 2) return false; } @@ -1916,8 +1916,8 @@ bool combine_constant_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& in aco_opcode new_op = is_or ? get_unordered(cmp->opcode) : get_ordered(cmp->opcode); Instruction *new_instr; if (cmp->isVOP3()) { - VOP3A_instruction *new_vop3 = create_instruction<VOP3A_instruction>(new_op, asVOP3(Format::VOPC), 2, 1); - VOP3A_instruction *cmp_vop3 = static_cast<VOP3A_instruction*>(cmp); + VOP3_instruction *new_vop3 = create_instruction<VOP3_instruction>(new_op, asVOP3(Format::VOPC), 2, 1); + VOP3_instruction *cmp_vop3 = static_cast<VOP3_instruction*>(cmp); memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs)); memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg)); new_vop3->clamp = cmp_vop3->clamp; @@ -1965,8 +1965,8 @@ bool combine_inverse_comparison(opt_ctx &ctx, aco_ptr<Instruction>& instr) * comparison so that the comparison is done with the correct exec mask. */ Instruction *new_instr; if (cmp->isVOP3()) { - VOP3A_instruction *new_vop3 = create_instruction<VOP3A_instruction>(new_opcode, asVOP3(Format::VOPC), 2, 1); - VOP3A_instruction *cmp_vop3 = static_cast<VOP3A_instruction*>(cmp); + VOP3_instruction *new_vop3 = create_instruction<VOP3_instruction>(new_opcode, asVOP3(Format::VOPC), 2, 1); + VOP3_instruction *cmp_vop3 = static_cast<VOP3_instruction*>(cmp); memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs)); memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg)); new_vop3->clamp = cmp_vop3->clamp; @@ -2019,8 +2019,8 @@ bool match_op3_for_vop3(opt_ctx &ctx, aco_opcode op1, aco_opcode op2, if (fixed_to_exec(op2_instr->operands[0]) || fixed_to_exec(op2_instr->operands[1])) return false; - VOP3A_instruction *op1_vop3 = op1_instr->isVOP3() ? static_cast<VOP3A_instruction *>(op1_instr) : NULL; - VOP3A_instruction *op2_vop3 = op2_instr->isVOP3() ? static_cast<VOP3A_instruction *>(op2_instr) : NULL; + VOP3_instruction *op1_vop3 = op1_instr->isVOP3() ? static_cast<VOP3_instruction *>(op1_instr) : NULL; + VOP3_instruction *op2_vop3 = op2_instr->isVOP3() ? static_cast<VOP3_instruction *>(op2_instr) : NULL; if (op1_instr->isSDWA() || op2_instr->isSDWA()) return false; @@ -2081,7 +2081,7 @@ void create_vop3_for_op3(opt_ctx& ctx, aco_opcode opcode, aco_ptr<Instruction>& Operand operands[3], bool neg[3], bool abs[3], uint8_t opsel, bool clamp, unsigned omod) { - VOP3A_instruction *new_instr = create_instruction<VOP3A_instruction>(opcode, Format::VOP3A, 3, 1); + VOP3_instruction *new_instr = create_instruction<VOP3_instruction>(opcode, Format::VOP3, 3, 1); memcpy(new_instr->abs, abs, sizeof(bool[3])); memcpy(new_instr->neg, neg, sizeof(bool[3])); new_instr->clamp = clamp; @@ -2306,7 +2306,7 @@ bool combine_add_sub_b2i(opt_ctx& ctx, aco_ptr<Instruction>& instr, aco_opcode n new_instr.reset(create_instruction<VOP2_instruction>(new_op, Format::VOP2, 3, 2)); } else if (ctx.program->chip_class >= GFX10 || (instr->operands[!i].isConstant() && !instr->operands[!i].isLiteral())) { - new_instr.reset(create_instruction<VOP3A_instruction>(new_op, asVOP3(Format::VOP2), 3, 2)); + new_instr.reset(create_instruction<VOP3_instruction>(new_op, asVOP3(Format::VOP2), 3, 2)); } else { return false; } @@ -2347,7 +2347,7 @@ bool combine_add_bcnt(opt_ctx& ctx, aco_ptr<Instruction>& instr) op_instr->operands[0].isTemp() && op_instr->operands[0].getTemp().type() == RegType::vgpr && op_instr->operands[1].constantEquals(0)) { - aco_ptr<Instruction> new_instr{create_instruction<VOP3A_instruction>(aco_opcode::v_bcnt_u32_b32, Format::VOP3, 2, 1)}; + aco_ptr<Instruction> new_instr{create_instruction<VOP3_instruction>(aco_opcode::v_bcnt_u32_b32, Format::VOP3, 2, 1)}; ctx.uses[instr->operands[i].tempId()]--; new_instr->operands[0] = op_instr->operands[0]; new_instr->operands[1] = instr->operands[!i]; @@ -2645,7 +2645,7 @@ bool apply_omod_clamp(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr) return false; } else { to_VOP3(ctx, instr); - if (!apply_omod_clamp_helper(ctx, static_cast<VOP3A_instruction *>(instr.get()), def_info)) + if (!apply_omod_clamp_helper(ctx, static_cast<VOP3_instruction *>(instr.get()), def_info)) return false; } @@ -2675,7 +2675,7 @@ bool combine_and_subbrev(opt_ctx& ctx, aco_ptr<Instruction>& instr) new_instr.reset(create_instruction<VOP2_instruction>(aco_opcode::v_cndmask_b32, Format::VOP2, 3, 1)); } else if (ctx.program->chip_class >= GFX10 || (instr->operands[!i].isConstant() && !instr->operands[!i].isLiteral())) { - new_instr.reset(create_instruction<VOP3A_instruction>(aco_opcode::v_cndmask_b32, asVOP3(Format::VOP2), 3, 1)); + new_instr.reset(create_instruction<VOP3_instruction>(aco_opcode::v_cndmask_b32, asVOP3(Format::VOP2), 3, 1)); } else { return false; } @@ -2729,7 +2729,7 @@ bool combine_add_lshl(opt_ctx& ctx, aco_ptr<Instruction>& instr) ctx.uses[instr->operands[i].tempId()]--; - aco_ptr<VOP3A_instruction> new_instr{create_instruction<VOP3A_instruction>(aco_opcode::v_mad_u32_u24, Format::VOP3A, 3, 1)}; + aco_ptr<VOP3_instruction> new_instr{create_instruction<VOP3_instruction>(aco_opcode::v_mad_u32_u24, Format::VOP3, 3, 1)}; new_instr->operands[0] = op_instr->operands[!shift_op_idx]; new_instr->operands[1] = Operand(multiplier); new_instr->operands[2] = instr->operands[!i]; @@ -2944,7 +2944,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr if (mul_instr->operands[0].isLiteral()) return; - if (mul_instr->isVOP3() && static_cast<VOP3A_instruction*>(mul_instr)->clamp) + if (mul_instr->isVOP3() && static_cast<VOP3_instruction*>(mul_instr)->clamp) return; if (mul_instr->isSDWA()) return; @@ -2954,13 +2954,13 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr Definition def = instr->definitions[0]; /* neg(abs(mul(a, b))) -> mul(neg(abs(a)), abs(b)) */ bool is_abs = ctx.info[instr->definitions[0].tempId()].is_abs(); - instr.reset(create_instruction<VOP3A_instruction>(mul_instr->opcode, asVOP3(Format::VOP2), 2, 1)); + instr.reset(create_instruction<VOP3_instruction>(mul_instr->opcode, asVOP3(Format::VOP2), 2, 1)); instr->operands[0] = mul_instr->operands[0]; instr->operands[1] = mul_instr->operands[1]; instr->definitions[0] = def; - VOP3A_instruction* new_mul = static_cast<VOP3A_instruction*>(instr.get()); + VOP3_instruction* new_mul = static_cast<VOP3_instruction*>(instr.get()); if (mul_instr->isVOP3()) { - VOP3A_instruction* mul = static_cast<VOP3A_instruction*>(mul_instr); + VOP3_instruction* mul = static_cast<VOP3_instruction*>(mul_instr); new_mul->neg[0] = mul->neg[0] && !is_abs; new_mul->neg[1] = mul->neg[1] && !is_abs; new_mul->abs[0] = mul->abs[0] || is_abs; @@ -3003,8 +3003,8 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr /* no clamp/omod allowed between mul and add */ if (info.instr->isVOP3() && - (static_cast<VOP3A_instruction*>(info.instr)->clamp || - static_cast<VOP3A_instruction*>(info.instr)->omod)) + (static_cast<VOP3_instruction*>(info.instr)->clamp || + static_cast<VOP3_instruction*>(info.instr)->omod)) continue; Operand op[3] = {info.instr->operands[0], info.instr->operands[1], instr->operands[1 - i]}; @@ -3035,7 +3035,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr bool clamp = false; if (mul_instr->isVOP3()) { - VOP3A_instruction* vop3 = static_cast<VOP3A_instruction*> (mul_instr); + VOP3_instruction* vop3 = static_cast<VOP3_instruction*> (mul_instr); neg[0] = vop3->neg[0]; neg[1] = vop3->neg[1]; abs[0] = vop3->abs[0]; @@ -3043,7 +3043,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr } if (instr->isVOP3()) { - VOP3A_instruction* vop3 = static_cast<VOP3A_instruction*> (instr.get()); + VOP3_instruction* vop3 = static_cast<VOP3_instruction*> (instr.get()); neg[2] = vop3->neg[add_op_idx]; abs[2] = vop3->abs[add_op_idx]; omod = vop3->omod; @@ -3068,7 +3068,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr mad_op = need_fma ? (ctx.program->chip_class == GFX8 ? aco_opcode::v_fma_legacy_f16 : aco_opcode::v_fma_f16) : (ctx.program->chip_class == GFX8 ? aco_opcode::v_mad_legacy_f16 : aco_opcode::v_mad_f16); - aco_ptr<VOP3A_instruction> mad{create_instruction<VOP3A_instruction>(mad_op, Format::VOP3A, 3, 1)}; + aco_ptr<VOP3_instruction> mad{create_instruction<VOP3_instruction>(mad_op, Format::VOP3, 3, 1)}; for (unsigned i = 0; i < 3; i++) { mad->operands[i] = op[i]; mad->neg[i] = neg[i]; diff --git a/src/amd/compiler/aco_print_ir.cpp b/src/amd/compiler/aco_print_ir.cpp index f99046da007..6324355cb92 100644 --- a/src/amd/compiler/aco_print_ir.cpp +++ b/src/amd/compiler/aco_print_ir.cpp @@ -588,7 +588,7 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output) } } if (instr->isVOP3()) { - const VOP3A_instruction* vop3 = static_cast<const VOP3A_instruction*>(instr); + const VOP3_instruction* vop3 = static_cast<const VOP3_instruction*>(instr); switch (vop3->omod) { case 1: fprintf(output, " *2"); @@ -693,8 +693,8 @@ void aco_print_instr(const Instruction *instr, FILE *output) bool *const neg = (bool *)alloca(instr->operands.size() * sizeof(bool)); bool *const opsel = (bool *)alloca(instr->operands.size() * sizeof(bool)); uint8_t *const sel = (uint8_t *)alloca(instr->operands.size() * sizeof(uint8_t)); - if ((int)instr->format & (int)Format::VOP3A) { - const VOP3A_instruction* vop3 = static_cast<const VOP3A_instruction*>(instr); + if ((int)instr->format & (int)Format::VOP3) { + const VOP3_instruction* vop3 = static_cast<const VOP3_instruction*>(instr); for (unsigned i = 0; i < instr->operands.size(); ++i) { abs[i] = vop3->abs[i]; neg[i] = vop3->neg[i]; diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 12d8cb354cf..4b083ee6013 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -503,7 +503,7 @@ void add_subdword_operand(ra_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx update_phi_map(ctx, tmp.get(), instr.get()); return; } else if (rc.bytes() == 2 && can_use_opsel(chip, instr->opcode, idx, byte / 2)) { - VOP3A_instruction* vop3 = static_cast<VOP3A_instruction *>(instr.get()); + VOP3_instruction* vop3 = static_cast<VOP3_instruction *>(instr.get()); vop3->opsel |= (byte / 2) << idx; return; } else if (instr->format == Format::VOP3P && byte == 2) { @@ -614,7 +614,7 @@ void add_subdword_definition(Program *program, aco_ptr<Instruction>& instr, unsi convert_to_SDWA(chip, instr); return; } else if (reg.byte() && rc.bytes() == 2 && can_use_opsel(chip, instr->opcode, -1, reg.byte() / 2)) { - VOP3A_instruction *vop3 = static_cast<VOP3A_instruction *>(instr.get()); + VOP3_instruction *vop3 = static_cast<VOP3_instruction *>(instr.get()); if (reg.byte() == 2) vop3->opsel |= (1 << 3); /* dst in high half */ return; @@ -2478,7 +2478,7 @@ void register_allocation(Program *program, std::vector<IDSet>& live_out_per_bloc /* change the instruction to VOP3 to enable an arbitrary register pair as dst */ aco_ptr<Instruction> tmp = std::move(instr); Format format = asVOP3(tmp->format); - instr.reset(create_instruction<VOP3A_instruction>(tmp->opcode, format, tmp->operands.size(), tmp->definitions.size())); + instr.reset(create_instruction<VOP3_instruction>(tmp->opcode, format, tmp->operands.size(), tmp->definitions.size())); std::copy(tmp->operands.begin(), tmp->operands.end(), instr->operands.begin()); std::copy(tmp->definitions.begin(), tmp->definitions.end(), instr->definitions.begin()); update_phi_map(ctx, tmp.get(), instr.get()); diff --git a/src/amd/compiler/aco_validate.cpp b/src/amd/compiler/aco_validate.cpp index b7f9fef2b36..5b6aa53d6ea 100644 --- a/src/amd/compiler/aco_validate.cpp +++ b/src/amd/compiler/aco_validate.cpp @@ -136,7 +136,7 @@ bool validate_ir(Program* program) base_format == Format::VOP1 || base_format == Format::VOPC || base_format == Format::VINTRP, - "Format cannot have VOP3A/VOP3B applied", instr.get()); + "Format cannot have VOP3/VOP3B applied", instr.get()); } /* check SDWA */ @@ -188,7 +188,7 @@ bool validate_ir(Program* program) /* check opsel */ if (instr->isVOP3()) { - VOP3A_instruction *vop3 = static_cast<VOP3A_instruction*>(instr.get()); + VOP3_instruction *vop3 = static_cast<VOP3_instruction*>(instr.get()); check(vop3->opsel == 0 || program->chip_class >= GFX9, "Opsel is only supported on GFX9+", instr.get()); for (unsigned i = 0; i < 3; i++) { diff --git a/src/amd/compiler/tests/test_assembler.cpp b/src/amd/compiler/tests/test_assembler.cpp index 23f02dd3926..bd6055cc20a 100644 --- a/src/amd/compiler/tests/test_assembler.cpp +++ b/src/amd/compiler/tests/test_assembler.cpp @@ -235,7 +235,7 @@ BEGIN_TEST(assembler.v_add3) //~gfx9>> v_add3_u32 v0, 0, 0, 0 ; d1ff0000 02010080 //~gfx10>> v_add3_u32 v0, 0, 0, 0 ; d76d0000 02010080 - aco_ptr<VOP3A_instruction> add3{create_instruction<VOP3A_instruction>(aco_opcode::v_add3_u32, Format::VOP3A, 3, 1)}; + aco_ptr<VOP3_instruction> add3{create_instruction<VOP3_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)}; add3->operands[0] = Operand(0u); add3->operands[1] = Operand(0u); add3->operands[2] = Operand(0u); @@ -253,7 +253,7 @@ BEGIN_TEST(assembler.v_add3_clamp) //~gfx9>> integer addition + clamp ; d1ff8000 02010080 //~gfx10>> integer addition + clamp ; d76d8000 02010080 - aco_ptr<VOP3A_instruction> add3{create_instruction<VOP3A_instruction>(aco_opcode::v_add3_u32, Format::VOP3A, 3, 1)}; + aco_ptr<VOP3_instruction> add3{create_instruction<VOP3_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)}; add3->operands[0] = Operand(0u); add3->operands[1] = Operand(0u); add3->operands[2] = Operand(0u); diff --git a/src/amd/compiler/tests/test_optimizer.cpp b/src/amd/compiler/tests/test_optimizer.cpp index 679812faac8..f914bce922f 100644 --- a/src/amd/compiler/tests/test_optimizer.cpp +++ b/src/amd/compiler/tests/test_optimizer.cpp @@ -735,7 +735,7 @@ BEGIN_TEST(optimize.add3) //! v1: %res1 = v_add_u32 %a, %tmp1 //! p_unit_test 1, %res1 tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]); - static_cast<VOP3A_instruction *>(tmp.instr)->clamp = true; + static_cast<VOP3_instruction *>(tmp.instr)->clamp = true; writeout(1, bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp)); //! v1: %tmp2 = v_add_u32 %b, %c @@ -743,7 +743,7 @@ BEGIN_TEST(optimize.add3) //! p_unit_test 2, %res2 tmp = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]); tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp); - static_cast<VOP3A_instruction *>(tmp.instr)->clamp = true; + static_cast<VOP3_instruction *>(tmp.instr)->clamp = true; writeout(2, tmp); finish_opt_test(); _______________________________________________ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit