https://github.com/piotrAMD updated https://github.com/llvm/llvm-project/pull/68778
>From 6b5ada294d999ba1412020806ce8fab8e34a408e Mon Sep 17 00:00:00 2001 From: Piotr Sobczak <piotr.sobc...@amd.com> Date: Wed, 11 Oct 2023 10:32:57 +0200 Subject: [PATCH 1/5] [AMDGPU] Rematerialize scalar loads Extend the list of instructions that can be rematerialized in SIInstrInfo::isReallyTriviallyReMaterializable() to support scalar loads. Try shrinking instructions to remat only the part needed for current context. Add SIInstrInfo::reMaterialize target hook, and handle shrinking of S_LOAD_DWORDX16_IMM to S_LOAD_DWORDX8_IMM as a proof of concept. --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 110 +++++++++++++++++ llvm/lib/Target/AMDGPU/SIInstrInfo.h | 5 + .../hsa-metadata-kernel-code-props-v3.ll | 4 +- llvm/test/CodeGen/AMDGPU/remat-smrd.mir | 116 +++++------------- .../AMDGPU/snippet-copy-bundle-regression.mir | 42 +------ 5 files changed, 151 insertions(+), 126 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 51397cbb791469d..31a086cd2a61820 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -108,7 +108,18 @@ static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) { bool SIInstrInfo::isReallyTriviallyReMaterializable( const MachineInstr &MI) const { + + bool CanRemat = false; if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isSDWA(MI) || isSALU(MI)) { + CanRemat = true; + } else if (isSMRD(MI)) { + CanRemat = !MI.memoperands_empty() && + llvm::all_of(MI.memoperands(), [](const MachineMemOperand *MMO) { + return MMO->isLoad() && MMO->isInvariant(); + }); + } + + if (CanRemat) { // Normally VALU use of exec would block the rematerialization, but that // is OK in this case to have an implicit exec read as all VALU do. // We really want all of the generic logic for this except for this. @@ -2434,6 +2445,105 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return true; } +void SIInstrInfo::reMaterialize(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, Register DestReg, + unsigned SubIdx, const MachineInstr &Orig, + const TargetRegisterInfo &RI) const { + + // Try shrinking the instruction to remat only the part needed for current + // context. + // TODO: Handle more cases. + unsigned Opcode = Orig.getOpcode(); + switch (Opcode) { + case AMDGPU::S_LOAD_DWORDX16_IMM: + case AMDGPU::S_LOAD_DWORDX8_IMM: { + if (SubIdx != 0) + break; + + if (I == MBB.end()) + break; + + if (I->isBundled()) + break; + + // Look for a single use of the register that is also a subreg. + Register RegToFind = Orig.getOperand(0).getReg(); + int SingleUseIdx = -1; + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + const MachineOperand &CandMO = I->getOperand(i); + if (!CandMO.isReg()) + continue; + Register CandReg = CandMO.getReg(); + if (!CandReg) + continue; + + if (CandReg == RegToFind || RI.regsOverlap(CandReg, RegToFind)) { + if (SingleUseIdx == -1 && CandMO.isUse()) { + SingleUseIdx = i; + } else { + SingleUseIdx = -1; + break; + } + } + } + if (SingleUseIdx == -1) + break; + MachineOperand *UseMO = &I->getOperand(SingleUseIdx); + if (UseMO->getSubReg() == AMDGPU::NoSubRegister) + break; + + unsigned Offset = RI.getSubRegIdxOffset(UseMO->getSubReg()); + unsigned SubregSize = RI.getSubRegIdxSize(UseMO->getSubReg()); + + MachineFunction *MF = MBB.getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + assert(MRI.hasAtMostUserInstrs(DestReg, 0) && + "DestReg should have no users yet."); + + unsigned NewOpcode = -1; + if (SubregSize == 256) + NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM; + else if (SubregSize == 128) + NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM; + else + break; + + const MCInstrDesc &TID = get(NewOpcode); + const TargetRegisterClass *NewRC = + RI.getAllocatableClass(getRegClass(TID, 0, &RI, *MF)); + MRI.setRegClass(DestReg, NewRC); + + UseMO->setReg(DestReg); + UseMO->setSubReg(AMDGPU::NoSubRegister); + + // Use a smaller load with the desired size, possibly with updated offset. + MachineInstr *MI = MF->CloneMachineInstr(&Orig); + MI->setDesc(TID); + MI->getOperand(0).setReg(DestReg); + MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister); + if (Offset) { + MachineOperand *OffsetMO = getNamedOperand(*MI, AMDGPU::OpName::offset); + int64_t FinalOffset = OffsetMO->getImm() + Offset / 8; + OffsetMO->setImm(FinalOffset); + } + SmallVector<MachineMemOperand *> NewMMOs; + for (const MachineMemOperand *MemOp : Orig.memoperands()) + NewMMOs.push_back(MF->getMachineMemOperand(MemOp, MemOp->getPointerInfo(), + SubregSize / 8)); + MI->setMemRefs(*MF, NewMMOs); + + MBB.insert(I, MI); + return; + } + + default: + break; + } + MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig); + MI->substituteRegister(MI->getOperand(0).getReg(), DestReg, SubIdx, RI); + MBB.insert(I, MI); +} + std::pair<MachineInstr*, MachineInstr*> SIInstrInfo::expandMovDPP64(MachineInstr &MI) const { assert (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 5ef17c44f7de389..a64cf0244e4c08c 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -275,6 +275,11 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { bool expandPostRAPseudo(MachineInstr &MI) const override; + void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + Register DestReg, unsigned SubIdx, + const MachineInstr &Orig, + const TargetRegisterInfo &TRI) const override; + // Splits a V_MOV_B64_DPP_PSEUDO opcode into a pair of v_mov_b32_dpp // instructions. Returns a pair of generated instructions. // Can split either post-RA with physical registers or pre-RA with diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll index d6f7a92af9dcb6f..1999c7b065e6854 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll @@ -47,8 +47,8 @@ entry: } ; CHECK: .name: num_spilled_sgprs -; GFX700: .sgpr_spill_count: 38 -; GFX803: .sgpr_spill_count: 22 +; GFX700: .sgpr_spill_count: 12 +; GFX803: .sgpr_spill_count: 12 ; GFX900: .sgpr_spill_count: 48 ; GFX1010: .sgpr_spill_count: 48 ; CHECK: .symbol: num_spilled_sgprs.kd diff --git a/llvm/test/CodeGen/AMDGPU/remat-smrd.mir b/llvm/test/CodeGen/AMDGPU/remat-smrd.mir index 753cb135fdae9c0..95eac12a65389ef 100644 --- a/llvm/test/CodeGen/AMDGPU/remat-smrd.mir +++ b/llvm/test/CodeGen/AMDGPU/remat-smrd.mir @@ -12,15 +12,11 @@ body: | ; GCN: liveins: $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $sgpr2_sgpr3 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: renamable $sgpr0 = S_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 0, 0 :: (dereferenceable invariant load (s32), addrspace 4) - ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.0, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.0, addrspace 5) ; GCN-NEXT: renamable $sgpr1 = S_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 4, 0 :: (dereferenceable invariant load (s32), addrspace 4) - ; GCN-NEXT: renamable $sgpr0 = S_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 8, 0 :: (dereferenceable invariant load (s32), addrspace 4) - ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.1, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.1, addrspace 5) - ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.0, addrspace 5) + ; GCN-NEXT: renamable $sgpr0 = S_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 0, 0 :: (dereferenceable invariant load (s32), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0 ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr1 - ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.1, addrspace 5) + ; GCN-NEXT: renamable $sgpr0 = S_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 8, 0 :: (dereferenceable invariant load (s32), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0 ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr2_sgpr3 %0:sreg_64_xexec = COPY $sgpr8_sgpr9 @@ -44,16 +40,10 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $sgpr0_sgpr1 = COPY $sgpr8_sgpr9 ; GCN-NEXT: renamable $sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM renamable $sgpr0_sgpr1, 0, 0 :: (dereferenceable invariant load (s64), addrspace 4) - ; GCN-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr2_sgpr3, %stack.1, implicit $exec, implicit $sp_reg :: (store (s64) into %stack.1, align 4, addrspace 5) - ; GCN-NEXT: renamable $sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM renamable $sgpr0_sgpr1, 4, 0 :: (dereferenceable invariant load (s64), addrspace 4) - ; GCN-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr2_sgpr3, %stack.0, implicit $exec, implicit $sp_reg :: (store (s64) into %stack.0, align 4, addrspace 5) - ; GCN-NEXT: renamable $sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM renamable $sgpr0_sgpr1, 8, 0 :: (dereferenceable invariant load (s64), addrspace 4) - ; GCN-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr2_sgpr3, %stack.2, implicit $exec, implicit $sp_reg :: (store (s64) into %stack.2, align 4, addrspace 5) - ; GCN-NEXT: renamable $sgpr2_sgpr3 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sp_reg :: (load (s64) from %stack.1, align 4, addrspace 5) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr2_sgpr3 - ; GCN-NEXT: renamable $sgpr2_sgpr3 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s64) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: renamable $sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM renamable $sgpr0_sgpr1, 4, 0 :: (dereferenceable invariant load (s64), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr2_sgpr3 - ; GCN-NEXT: renamable $sgpr2_sgpr3 = SI_SPILL_S64_RESTORE %stack.2, implicit $exec, implicit $sp_reg :: (load (s64) from %stack.2, align 4, addrspace 5) + ; GCN-NEXT: renamable $sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM renamable $sgpr0_sgpr1, 8, 0 :: (dereferenceable invariant load (s64), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr2_sgpr3 ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr0_sgpr1 %0:sreg_64_xexec = COPY $sgpr8_sgpr9 @@ -77,16 +67,10 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $sgpr0_sgpr1 = COPY $sgpr8_sgpr9 ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0 :: (dereferenceable invariant load (s128), addrspace 4) - ; GCN-NEXT: SI_SPILL_S128_SAVE killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, %stack.1, implicit $exec, implicit $sp_reg :: (store (s128) into %stack.1, align 4, addrspace 5) - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 4, 0 :: (dereferenceable invariant load (s128), addrspace 4) - ; GCN-NEXT: SI_SPILL_S128_SAVE killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, %stack.0, implicit $exec, implicit $sp_reg :: (store (s128) into %stack.0, align 4, addrspace 5) - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 8, 0 :: (dereferenceable invariant load (s128), addrspace 4) - ; GCN-NEXT: SI_SPILL_S128_SAVE killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, %stack.2, implicit $exec, implicit $sp_reg :: (store (s128) into %stack.2, align 4, addrspace 5) - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = SI_SPILL_S128_RESTORE %stack.1, implicit $exec, implicit $sp_reg :: (load (s128) from %stack.1, align 4, addrspace 5) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7 - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = SI_SPILL_S128_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s128) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 4, 0 :: (dereferenceable invariant load (s128), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7 - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = SI_SPILL_S128_RESTORE %stack.2, implicit $exec, implicit $sp_reg :: (load (s128) from %stack.2, align 4, addrspace 5) + ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 8, 0 :: (dereferenceable invariant load (s128), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr0_sgpr1 %0:sreg_64_xexec = COPY $sgpr8_sgpr9 @@ -110,16 +94,10 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $sgpr0_sgpr1 = COPY $sgpr8_sgpr9 ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr0_sgpr1, 0, 0 :: (dereferenceable invariant load (s256), addrspace 4) - ; GCN-NEXT: SI_SPILL_S256_SAVE killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, %stack.1, implicit $exec, implicit $sp_reg :: (store (s256) into %stack.1, align 4, addrspace 5) - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr0_sgpr1, 4, 0 :: (dereferenceable invariant load (s256), addrspace 4) - ; GCN-NEXT: SI_SPILL_S256_SAVE killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, %stack.0, implicit $exec, implicit $sp_reg :: (store (s256) into %stack.0, align 4, addrspace 5) - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr0_sgpr1, 8, 0 :: (dereferenceable invariant load (s256), addrspace 4) - ; GCN-NEXT: SI_SPILL_S256_SAVE killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, %stack.2, implicit $exec, implicit $sp_reg :: (store (s256) into %stack.2, align 4, addrspace 5) - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = SI_SPILL_S256_RESTORE %stack.1, implicit $exec, implicit $sp_reg :: (load (s256) from %stack.1, align 4, addrspace 5) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = SI_SPILL_S256_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s256) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr0_sgpr1, 4, 0 :: (dereferenceable invariant load (s256), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = SI_SPILL_S256_RESTORE %stack.2, implicit $exec, implicit $sp_reg :: (load (s256) from %stack.2, align 4, addrspace 5) + ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr0_sgpr1, 8, 0 :: (dereferenceable invariant load (s256), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr0_sgpr1 %0:sreg_64_xexec = COPY $sgpr8_sgpr9 @@ -143,16 +121,10 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $sgpr0_sgpr1 = COPY $sgpr8_sgpr9 ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr0_sgpr1, 0, 0 :: (dereferenceable invariant load (s512), addrspace 4) - ; GCN-NEXT: SI_SPILL_S512_SAVE killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19, %stack.1, implicit $exec, implicit $sp_reg :: (store (s512) into %stack.1, align 4, addrspace 5) - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr0_sgpr1, 4, 0 :: (dereferenceable invariant load (s512), addrspace 4) - ; GCN-NEXT: SI_SPILL_S512_SAVE killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19, %stack.0, implicit $exec, implicit $sp_reg :: (store (s512) into %stack.0, align 4, addrspace 5) - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr0_sgpr1, 8, 0 :: (dereferenceable invariant load (s512), addrspace 4) - ; GCN-NEXT: SI_SPILL_S512_SAVE killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19, %stack.2, implicit $exec, implicit $sp_reg :: (store (s512) into %stack.2, align 4, addrspace 5) - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = SI_SPILL_S512_RESTORE %stack.1, implicit $exec, implicit $sp_reg :: (load (s512) from %stack.1, align 4, addrspace 5) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s512) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr0_sgpr1, 4, 0 :: (dereferenceable invariant load (s512), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = SI_SPILL_S512_RESTORE %stack.2, implicit $exec, implicit $sp_reg :: (load (s512) from %stack.2, align 4, addrspace 5) + ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr0_sgpr1, 8, 0 :: (dereferenceable invariant load (s512), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr0_sgpr1 %0:sreg_64_xexec = COPY $sgpr8_sgpr9 @@ -175,13 +147,12 @@ body: | ; GCN: liveins: $sgpr8_sgpr9, $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $sgpr0_sgpr1 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr0_sgpr1, 0, 0 :: (dereferenceable invariant load (s512), align 4, addrspace 4) - ; GCN-NEXT: SI_SPILL_S512_SAVE killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19, %stack.0, implicit $exec, implicit $sp_reg :: (store (s512) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr0_sgpr1, 128, 0 :: (dereferenceable invariant load (s512), align 4, addrspace 4) - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s512) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr0_sgpr1, 32, 0 :: (dereferenceable invariant load (s256), align 4, addrspace 4) + ; GCN-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = COPY killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr0_sgpr1, 128, 0 :: (dereferenceable invariant load (s512), align 4, addrspace 4) - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s512) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr0_sgpr1, 0, 0 :: (dereferenceable invariant load (s256), align 4, addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr0_sgpr1 %0:sreg_64_xexec = COPY $sgpr8_sgpr9 @@ -206,14 +177,12 @@ body: | ; GCN: liveins: $sgpr8_sgpr9, $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $sgpr0_sgpr1 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr0_sgpr1, 0, 0 :: (dereferenceable invariant load (s512), align 4, addrspace 4) - ; GCN-NEXT: SI_SPILL_S512_SAVE killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19, %stack.0, implicit $exec, implicit $sp_reg :: (store (s512) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr0_sgpr1, 128, 0 :: (dereferenceable invariant load (s512), align 4, addrspace 4) - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s512) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr0_sgpr1, 0, 0 :: (dereferenceable invariant load (s512), align 4, addrspace 4) ; GCN-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = KILL killed renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19, implicit renamable $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr0_sgpr1, 128, 0 :: (dereferenceable invariant load (s512), align 4, addrspace 4) - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s512) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr0_sgpr1, 0, 0 :: (dereferenceable invariant load (s256), align 4, addrspace 4) ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = KILL killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, implicit renamable $sgpr0_sgpr1 ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr0_sgpr1 @@ -238,10 +207,8 @@ body: | ; GCN: liveins: $sgpr8_sgpr9, $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $sgpr0_sgpr1 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr0_sgpr1, 0, 0 :: (dereferenceable invariant load (s512), align 4, addrspace 4) - ; GCN-NEXT: SI_SPILL_S512_SAVE killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19, %stack.0, implicit $exec, implicit $sp_reg :: (store (s512) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr0_sgpr1, 128, 0 :: (dereferenceable invariant load (s512), align 4, addrspace 4) - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s512) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr0_sgpr1, 0, 0 :: (dereferenceable invariant load (s512), align 4, addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr0_sgpr1 %0:sreg_64_xexec = COPY $sgpr8_sgpr9 @@ -265,13 +232,12 @@ body: | ; GCN: liveins: $sgpr8_sgpr9, $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $sgpr0_sgpr1 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr0_sgpr1, 0, 0 :: (dereferenceable invariant load (s256), align 4, addrspace 4) - ; GCN-NEXT: SI_SPILL_S256_SAVE killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, %stack.0, implicit $exec, implicit $sp_reg :: (store (s256) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr0_sgpr1, 128, 0 :: (dereferenceable invariant load (s256), align 4, addrspace 4) - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = SI_SPILL_S256_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s256) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 16, 0 :: (dereferenceable invariant load (s128), align 4, addrspace 4) + ; GCN-NEXT: renamable $sgpr8_sgpr9_sgpr10_sgpr11 = COPY killed renamable $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr8_sgpr9_sgpr10_sgpr11 ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr0_sgpr1, 128, 0 :: (dereferenceable invariant load (s256), align 4, addrspace 4) - ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = SI_SPILL_S256_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s256) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0 :: (dereferenceable invariant load (s128), align 4, addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr0_sgpr1 %0:sreg_64_xexec = COPY $sgpr8_sgpr9 @@ -331,16 +297,10 @@ body: | ; GCN-NEXT: renamable $sgpr2_sgpr3 = COPY $sgpr8_sgpr9 ; GCN-NEXT: renamable $sgpr0 = COPY $sgpr10 ; GCN-NEXT: renamable $sgpr1 = S_LOAD_DWORD_SGPR renamable $sgpr2_sgpr3, renamable $sgpr0, 0 :: (dereferenceable invariant load (s32), addrspace 4) - ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.1, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.1, addrspace 5) - ; GCN-NEXT: renamable $sgpr1 = S_LOAD_DWORD_SGPR renamable $sgpr2_sgpr3, renamable $sgpr0, 0 :: (dereferenceable invariant load (s32), addrspace 4) - ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.0, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.0, addrspace 5) - ; GCN-NEXT: renamable $sgpr1 = S_LOAD_DWORD_SGPR renamable $sgpr2_sgpr3, renamable $sgpr0, 0 :: (dereferenceable invariant load (s32), addrspace 4) - ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.2, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.2, addrspace 5) - ; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.1, addrspace 5) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr1 - ; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.0, addrspace 5) + ; GCN-NEXT: renamable $sgpr1 = S_LOAD_DWORD_SGPR renamable $sgpr2_sgpr3, renamable $sgpr0, 0 :: (dereferenceable invariant load (s32), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr1 - ; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.2, addrspace 5) + ; GCN-NEXT: renamable $sgpr1 = S_LOAD_DWORD_SGPR renamable $sgpr2_sgpr3, renamable $sgpr0, 0 :: (dereferenceable invariant load (s32), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr1 ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr2_sgpr3, implicit killed renamable $sgpr0 %0:sreg_64_xexec = COPY $sgpr8_sgpr9 @@ -366,16 +326,10 @@ body: | ; GCN-NEXT: renamable $sgpr2_sgpr3 = COPY $sgpr8_sgpr9 ; GCN-NEXT: renamable $sgpr0 = COPY $sgpr10 ; GCN-NEXT: renamable $sgpr1 = S_LOAD_DWORD_SGPR_IMM renamable $sgpr2_sgpr3, renamable $sgpr0, 0, 0 :: (dereferenceable invariant load (s32), addrspace 4) - ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.1, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.1, addrspace 5) - ; GCN-NEXT: renamable $sgpr1 = S_LOAD_DWORD_SGPR_IMM renamable $sgpr2_sgpr3, renamable $sgpr0, 4, 0 :: (dereferenceable invariant load (s32), addrspace 4) - ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.0, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.0, addrspace 5) - ; GCN-NEXT: renamable $sgpr1 = S_LOAD_DWORD_SGPR_IMM renamable $sgpr2_sgpr3, renamable $sgpr0, 8, 0 :: (dereferenceable invariant load (s32), addrspace 4) - ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.2, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.2, addrspace 5) - ; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.1, addrspace 5) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr1 - ; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.0, addrspace 5) + ; GCN-NEXT: renamable $sgpr1 = S_LOAD_DWORD_SGPR_IMM renamable $sgpr2_sgpr3, renamable $sgpr0, 4, 0 :: (dereferenceable invariant load (s32), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr1 - ; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.2, addrspace 5) + ; GCN-NEXT: renamable $sgpr1 = S_LOAD_DWORD_SGPR_IMM renamable $sgpr2_sgpr3, renamable $sgpr0, 8, 0 :: (dereferenceable invariant load (s32), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr1 ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr2_sgpr3, implicit killed renamable $sgpr0 %0:sreg_64_xexec = COPY $sgpr8_sgpr9 @@ -430,15 +384,11 @@ body: | ; GCN: liveins: $sgpr8_sgpr9_sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = COPY $sgpr8_sgpr9_sgpr10_sgpr11 - ; GCN-NEXT: renamable $sgpr0 = S_BUFFER_LOAD_DWORD_IMM renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0 :: (dereferenceable invariant load (s32), addrspace 4) - ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.0, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.0, addrspace 5) ; GCN-NEXT: renamable $sgpr1 = S_BUFFER_LOAD_DWORD_IMM renamable $sgpr4_sgpr5_sgpr6_sgpr7, 4, 0 :: (dereferenceable invariant load (s32), addrspace 4) - ; GCN-NEXT: renamable $sgpr0 = S_BUFFER_LOAD_DWORD_IMM renamable $sgpr4_sgpr5_sgpr6_sgpr7, 8, 0 :: (dereferenceable invariant load (s32), addrspace 4) - ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.1, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.1, addrspace 5) - ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.0, addrspace 5) + ; GCN-NEXT: renamable $sgpr0 = S_BUFFER_LOAD_DWORD_IMM renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0 :: (dereferenceable invariant load (s32), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0 ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr1 - ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.1, addrspace 5) + ; GCN-NEXT: renamable $sgpr0 = S_BUFFER_LOAD_DWORD_IMM renamable $sgpr4_sgpr5_sgpr6_sgpr7, 8, 0 :: (dereferenceable invariant load (s32), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0 ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7 %0:sgpr_128 = COPY $sgpr8_sgpr9_sgpr10_sgpr11 @@ -461,15 +411,11 @@ body: | ; GCN: liveins: $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $sgpr2_sgpr3 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: renamable $sgpr0 = S_SCRATCH_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 0, 0, implicit $flat_scr :: (dereferenceable invariant load (s32), addrspace 4) - ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.0, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.0, addrspace 5) ; GCN-NEXT: renamable $sgpr1 = S_SCRATCH_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 4, 0, implicit $flat_scr :: (dereferenceable invariant load (s32), addrspace 4) - ; GCN-NEXT: renamable $sgpr0 = S_SCRATCH_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 8, 0, implicit $flat_scr :: (dereferenceable invariant load (s32), addrspace 4) - ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.1, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.1, addrspace 5) - ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.0, addrspace 5) + ; GCN-NEXT: renamable $sgpr0 = S_SCRATCH_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 0, 0, implicit $flat_scr :: (dereferenceable invariant load (s32), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0 ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr1 - ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.1, addrspace 5) + ; GCN-NEXT: renamable $sgpr0 = S_SCRATCH_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 8, 0, implicit $flat_scr :: (dereferenceable invariant load (s32), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0 ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr2_sgpr3 %0:sreg_64_xexec = COPY $sgpr8_sgpr9 @@ -492,15 +438,11 @@ body: | ; GCN: liveins: $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $sgpr2_sgpr3 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: renamable $sgpr0 = S_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 0, 0 :: (invariant load (s32), addrspace 4) - ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.0, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.0, addrspace 5) ; GCN-NEXT: renamable $sgpr1 = S_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 4, 0 :: (invariant load (s32), addrspace 4) - ; GCN-NEXT: renamable $sgpr0 = S_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 8, 0 :: (invariant load (s32), addrspace 4) - ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.1, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.1, addrspace 5) - ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.0, addrspace 5) + ; GCN-NEXT: renamable $sgpr0 = S_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 0, 0 :: (invariant load (s32), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0 ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr1 - ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.1, addrspace 5) + ; GCN-NEXT: renamable $sgpr0 = S_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 8, 0 :: (invariant load (s32), addrspace 4) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0 ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr2_sgpr3 %0:sreg_64_xexec = COPY $sgpr8_sgpr9 diff --git a/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir b/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir index 55639a27fd5c746..355829825146dfe 100644 --- a/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir +++ b/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir @@ -33,7 +33,6 @@ body: | ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $vgpr1 = IMPLICIT_DEF ; CHECK-NEXT: renamable $sgpr34_sgpr35 = IMPLICIT_DEF ; CHECK-NEXT: dead renamable $vgpr0 = IMPLICIT_DEF ; CHECK-NEXT: renamable $sgpr41 = IMPLICIT_DEF @@ -41,16 +40,6 @@ body: | ; CHECK-NEXT: renamable $sgpr36_sgpr37 = IMPLICIT_DEF ; CHECK-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX8_IMM renamable $sgpr38_sgpr39, 0, 0 :: (dereferenceable invariant load (s256), align 16, addrspace 4) ; CHECK-NEXT: dead renamable $sgpr4 = S_LOAD_DWORD_IMM renamable $sgpr38_sgpr39, 48, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4) - ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr38_sgpr39, 56, 0 :: (dereferenceable invariant load (s256), align 8, addrspace 4) - ; CHECK-NEXT: renamable $vgpr1 = V_WRITELANE_B32 $sgpr4, 0, killed $vgpr1, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 - ; CHECK-NEXT: renamable $vgpr1 = V_WRITELANE_B32 $sgpr5, 1, killed $vgpr1 - ; CHECK-NEXT: renamable $vgpr1 = V_WRITELANE_B32 $sgpr6, 2, killed $vgpr1 - ; CHECK-NEXT: renamable $vgpr1 = V_WRITELANE_B32 $sgpr7, 3, killed $vgpr1 - ; CHECK-NEXT: renamable $vgpr1 = V_WRITELANE_B32 $sgpr8, 4, killed $vgpr1 - ; CHECK-NEXT: renamable $vgpr1 = V_WRITELANE_B32 $sgpr9, 5, killed $vgpr1 - ; CHECK-NEXT: renamable $vgpr1 = V_WRITELANE_B32 $sgpr10, 6, killed $vgpr1 - ; CHECK-NEXT: renamable $vgpr1 = V_WRITELANE_B32 killed $sgpr11, 7, killed $vgpr1, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 - ; CHECK-NEXT: SI_SPILL_WWM_V32_SAVE killed $vgpr1, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) ; CHECK-NEXT: dead renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM renamable $sgpr44_sgpr45, 0, 0 :: (invariant load (s64), align 16, addrspace 4) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; CHECK-NEXT: $vgpr1 = COPY renamable $sgpr51 @@ -63,50 +52,30 @@ body: | ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FC00 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $vgpr1 = SI_SPILL_WWM_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) - ; CHECK-NEXT: $sgpr4 = V_READLANE_B32 $vgpr1, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 - ; CHECK-NEXT: $sgpr5 = V_READLANE_B32 $vgpr1, 1 - ; CHECK-NEXT: $sgpr6 = V_READLANE_B32 $vgpr1, 2 - ; CHECK-NEXT: $sgpr7 = V_READLANE_B32 $vgpr1, 3 - ; CHECK-NEXT: $sgpr8 = V_READLANE_B32 $vgpr1, 4 - ; CHECK-NEXT: $sgpr9 = V_READLANE_B32 $vgpr1, 5 - ; CHECK-NEXT: $sgpr10 = V_READLANE_B32 $vgpr1, 6 - ; CHECK-NEXT: $sgpr11 = V_READLANE_B32 $vgpr1, 7 - ; CHECK-NEXT: $noreg = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; CHECK-NEXT: $exec = S_MOV_B64 killed $noreg + ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr38_sgpr39, 56, 0 :: (dereferenceable invariant load (s256), align 8, addrspace 4) ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FC00 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $vgpr1 = SI_SPILL_WWM_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) - ; CHECK-NEXT: $sgpr4 = V_READLANE_B32 $vgpr1, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 - ; CHECK-NEXT: $sgpr5 = V_READLANE_B32 $vgpr1, 1 - ; CHECK-NEXT: $sgpr6 = V_READLANE_B32 $vgpr1, 2 - ; CHECK-NEXT: $sgpr7 = V_READLANE_B32 $vgpr1, 3 - ; CHECK-NEXT: $sgpr8 = V_READLANE_B32 $vgpr1, 4 - ; CHECK-NEXT: $sgpr9 = V_READLANE_B32 $vgpr1, 5 - ; CHECK-NEXT: $sgpr10 = V_READLANE_B32 $vgpr1, 6 - ; CHECK-NEXT: $sgpr11 = V_READLANE_B32 $vgpr1, 7 + ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr38_sgpr39, 56, 0 :: (dereferenceable invariant load (s256), align 8, addrspace 4) ; CHECK-NEXT: S_CMP_LG_U64 renamable $sgpr4_sgpr5, 0, implicit-def $scc - ; CHECK-NEXT: $noreg = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; CHECK-NEXT: $exec = S_MOV_B64 killed $noreg ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) - ; CHECK-NEXT: liveins: $vgpr1, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000003F0, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FC00 + ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000003F0, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FC00 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: S_CBRANCH_VCCZ %bb.5, implicit undef $vcc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) - ; CHECK-NEXT: liveins: $vgpr1, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000003F0, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FC00 + ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000003F0, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FC00 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: S_CMP_EQ_U32 renamable $sgpr8, 0, implicit-def $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: liveins: $vgpr1, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000000F0, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FC00 + ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000000F0, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FC00 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: dead renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr38_sgpr39, 40, 0 :: (dereferenceable invariant load (s64), addrspace 4) ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR undef renamable $vgpr0, undef renamable $vgpr0, killed renamable $sgpr6_sgpr7, 0, 0, implicit $exec :: (store (s32), addrspace 1) @@ -116,7 +85,6 @@ body: | ; CHECK-NEXT: $sgpr6_sgpr7 = COPY killed renamable $sgpr36_sgpr37 ; CHECK-NEXT: $sgpr10_sgpr11 = COPY killed renamable $sgpr34_sgpr35 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; CHECK-NEXT: KILL killed renamable $vgpr1 ; CHECK-NEXT: S_ENDPGM 0 bb.0: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16 >From b684eb7e72157a24660cce36b206af0ac4659869 Mon Sep 17 00:00:00 2001 From: Piotr Sobczak <piotr.sobc...@amd.com> Date: Fri, 13 Oct 2023 12:09:53 +0200 Subject: [PATCH 2/5] Fixup - addressing review comments --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 31a086cd2a61820..93380ae56be1220 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2488,17 +2488,16 @@ void SIInstrInfo::reMaterialize(MachineBasicBlock &MBB, } if (SingleUseIdx == -1) break; - MachineOperand *UseMO = &I->getOperand(SingleUseIdx); - if (UseMO->getSubReg() == AMDGPU::NoSubRegister) + MachineOperand &UseMO = I->getOperand(SingleUseIdx); + if (UseMO.getSubReg() == AMDGPU::NoSubRegister) break; - unsigned Offset = RI.getSubRegIdxOffset(UseMO->getSubReg()); - unsigned SubregSize = RI.getSubRegIdxSize(UseMO->getSubReg()); + unsigned Offset = RI.getSubRegIdxOffset(UseMO.getSubReg()); + unsigned SubregSize = RI.getSubRegIdxSize(UseMO.getSubReg()); MachineFunction *MF = MBB.getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); - assert(MRI.hasAtMostUserInstrs(DestReg, 0) && - "DestReg should have no users yet."); + assert(MRI.use_nodbg_empty(DestReg) && "DestReg should have no users yet."); unsigned NewOpcode = -1; if (SubregSize == 256) @@ -2513,8 +2512,8 @@ void SIInstrInfo::reMaterialize(MachineBasicBlock &MBB, RI.getAllocatableClass(getRegClass(TID, 0, &RI, *MF)); MRI.setRegClass(DestReg, NewRC); - UseMO->setReg(DestReg); - UseMO->setSubReg(AMDGPU::NoSubRegister); + UseMO.setReg(DestReg); + UseMO.setSubReg(AMDGPU::NoSubRegister); // Use a smaller load with the desired size, possibly with updated offset. MachineInstr *MI = MF->CloneMachineInstr(&Orig); @@ -2539,9 +2538,8 @@ void SIInstrInfo::reMaterialize(MachineBasicBlock &MBB, default: break; } - MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig); - MI->substituteRegister(MI->getOperand(0).getReg(), DestReg, SubIdx, RI); - MBB.insert(I, MI); + + TargetInstrInfo::reMaterialize(MBB, I, DestReg, SubIdx, Orig, RI); } std::pair<MachineInstr*, MachineInstr*> >From 3b3f987ad234aa6da9f8b3b03e0ddfdd78566278 Mon Sep 17 00:00:00 2001 From: Piotr Sobczak <piotr.sobc...@amd.com> Date: Mon, 16 Oct 2023 16:33:51 +0200 Subject: [PATCH 3/5] Fixup - addressing review comments --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 93380ae56be1220..d91dbf578a7fef4 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2477,7 +2477,7 @@ void SIInstrInfo::reMaterialize(MachineBasicBlock &MBB, if (!CandReg) continue; - if (CandReg == RegToFind || RI.regsOverlap(CandReg, RegToFind)) { + if (CandReg == RegToFind) { if (SingleUseIdx == -1 && CandMO.isUse()) { SingleUseIdx = i; } else { >From 4f66c207d004c1e3483922c19f71fdd812996541 Mon Sep 17 00:00:00 2001 From: Piotr Sobczak <piotr.sobc...@amd.com> Date: Mon, 16 Oct 2023 17:36:09 +0200 Subject: [PATCH 4/5] Fixup - addressing review comments --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 35 +++++++++----------------- 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index d91dbf578a7fef4..a742eccb15e2bd2 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2468,32 +2468,21 @@ void SIInstrInfo::reMaterialize(MachineBasicBlock &MBB, // Look for a single use of the register that is also a subreg. Register RegToFind = Orig.getOperand(0).getReg(); - int SingleUseIdx = -1; - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { - const MachineOperand &CandMO = I->getOperand(i); - if (!CandMO.isReg()) + MachineOperand *UseMO = nullptr; + for (auto &CandMO : I->operands()) { + if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef()) continue; - Register CandReg = CandMO.getReg(); - if (!CandReg) - continue; - - if (CandReg == RegToFind) { - if (SingleUseIdx == -1 && CandMO.isUse()) { - SingleUseIdx = i; - } else { - SingleUseIdx = -1; - break; - } + if (UseMO) { + UseMO = nullptr; + break; } + UseMO = &CandMO; } - if (SingleUseIdx == -1) - break; - MachineOperand &UseMO = I->getOperand(SingleUseIdx); - if (UseMO.getSubReg() == AMDGPU::NoSubRegister) + if (!UseMO || UseMO->getSubReg() == AMDGPU::NoSubRegister) break; - unsigned Offset = RI.getSubRegIdxOffset(UseMO.getSubReg()); - unsigned SubregSize = RI.getSubRegIdxSize(UseMO.getSubReg()); + unsigned Offset = RI.getSubRegIdxOffset(UseMO->getSubReg()); + unsigned SubregSize = RI.getSubRegIdxSize(UseMO->getSubReg()); MachineFunction *MF = MBB.getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); @@ -2512,8 +2501,8 @@ void SIInstrInfo::reMaterialize(MachineBasicBlock &MBB, RI.getAllocatableClass(getRegClass(TID, 0, &RI, *MF)); MRI.setRegClass(DestReg, NewRC); - UseMO.setReg(DestReg); - UseMO.setSubReg(AMDGPU::NoSubRegister); + UseMO->setReg(DestReg); + UseMO->setSubReg(AMDGPU::NoSubRegister); // Use a smaller load with the desired size, possibly with updated offset. MachineInstr *MI = MF->CloneMachineInstr(&Orig); >From 51415cd507c2f08cbd9e03bb35e8af858f0bd3e0 Mon Sep 17 00:00:00 2001 From: Piotr Sobczak <piotr.sobc...@amd.com> Date: Wed, 25 Oct 2023 12:32:57 +0200 Subject: [PATCH 5/5] Fixup - addressing review comments --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 29 ++++++++++++++++---------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index a742eccb15e2bd2..54fde2bd8f57b8f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -106,20 +106,27 @@ static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) { return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx); } -bool SIInstrInfo::isReallyTriviallyReMaterializable( - const MachineInstr &MI) const { +static bool canRemat(const MachineInstr &MI) { - bool CanRemat = false; - if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isSDWA(MI) || isSALU(MI)) { - CanRemat = true; - } else if (isSMRD(MI)) { - CanRemat = !MI.memoperands_empty() && - llvm::all_of(MI.memoperands(), [](const MachineMemOperand *MMO) { - return MMO->isLoad() && MMO->isInvariant(); - }); + if (SIInstrInfo::isVOP1(MI) || SIInstrInfo::isVOP2(MI) || + SIInstrInfo::isVOP3(MI) || SIInstrInfo::isSDWA(MI) || + SIInstrInfo::isSALU(MI)) + return true; + + if (SIInstrInfo::isSMRD(MI)) { + return !MI.memoperands_empty() && + llvm::all_of(MI.memoperands(), [](const MachineMemOperand *MMO) { + return MMO->isLoad() && MMO->isInvariant(); + }); } - if (CanRemat) { + return false; +} + +bool SIInstrInfo::isReallyTriviallyReMaterializable( + const MachineInstr &MI) const { + + if (canRemat(MI)) { // Normally VALU use of exec would block the rematerialization, but that // is OK in this case to have an implicit exec read as all VALU do. // We really want all of the generic logic for this except for this. _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits