https://github.com/phoebewang updated https://github.com/llvm/llvm-project/pull/191368
>From 3a58e769c59f87fa00b123cb80e778a047f99f8a Mon Sep 17 00:00:00 2001 From: Phoebe Wang <[email protected]> Date: Fri, 10 Apr 2026 17:10:06 +0800 Subject: [PATCH 1/3] [X86][APX] Return CopyMI when added in foldMemoryOperandImpl Fixes: #190962 #191165 --- llvm/include/llvm/CodeGen/TargetInstrInfo.h | 11 +++--- llvm/lib/CodeGen/InlineSpiller.cpp | 17 +++++++-- llvm/lib/CodeGen/LiveRangeEdit.cpp | 14 ++++++-- llvm/lib/CodeGen/PeepholeOptimizer.cpp | 7 ++-- llvm/lib/CodeGen/TargetInstrInfo.cpp | 12 ++++--- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 2 +- llvm/lib/Target/AArch64/AArch64InstrInfo.h | 12 +++---- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 4 +-- llvm/lib/Target/AMDGPU/SIInstrInfo.h | 2 +- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 6 ++-- llvm/lib/Target/RISCV/RISCVInstrInfo.h | 4 +-- llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp | 4 +-- llvm/lib/Target/SystemZ/SystemZInstrInfo.h | 14 ++++---- llvm/lib/Target/X86/X86FastISel.cpp | 3 +- llvm/lib/Target/X86/X86InstrInfo.cpp | 35 ++++++++++-------- llvm/lib/Target/X86/X86InstrInfo.h | 17 ++++----- llvm/test/CodeGen/X86/apx/or.ll | 38 ++++++++++++++++++++ 17 files changed, 138 insertions(+), 64 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index cd5561e57d033..e69f377fd0bd4 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1257,14 +1257,14 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo { /// If VRM is passed, the assigned physregs can be inspected by target to /// decide on using an opcode (note that those assignments can still change). MachineInstr *foldMemoryOperand(MachineInstr &MI, ArrayRef<unsigned> Ops, - int FI, + int FI, MachineInstr *&CopyMI, LiveIntervals *LIS = nullptr, VirtRegMap *VRM = nullptr) const; /// Same as the previous version except it allows folding of any load and /// store from / to any address, not just from a specific stack slot. MachineInstr *foldMemoryOperand(MachineInstr &MI, ArrayRef<unsigned> Ops, - MachineInstr &LoadMI, + MachineInstr &LoadMI, MachineInstr *&CopyMI, LiveIntervals *LIS = nullptr) const; /// This function defines the logic to lower COPY instruction to @@ -1446,7 +1446,7 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo { foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS = nullptr, + MachineInstr *&CopyMI, LiveIntervals *LIS = nullptr, VirtRegMap *VRM = nullptr) const { return nullptr; } @@ -1459,7 +1459,7 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo { virtual MachineInstr *foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI, - LiveIntervals *LIS = nullptr) const { + MachineInstr *&CopyMI, LiveIntervals *LIS = nullptr) const { return nullptr; } @@ -1845,7 +1845,8 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo { virtual MachineInstr *optimizeLoadInstr(MachineInstr &MI, const MachineRegisterInfo *MRI, Register &FoldAsLoadDefReg, - MachineInstr *&DefMI) const; + MachineInstr *&DefMI, + MachineInstr *&CopyMI) const; /// 'Reg' is known to be defined by a move immediate instruction, /// try to fold the immediate into the use instruction. diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp index 668c7c0a78098..fab04c44371e9 100644 --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -1016,9 +1016,11 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops, MI->untieRegOperand(Idx); } + MachineInstr *CopyMI = nullptr; MachineInstr *FoldMI = - LoadMI ? TII.foldMemoryOperand(*MI, FoldOps, *LoadMI, &LIS) - : TII.foldMemoryOperand(*MI, FoldOps, StackSlot, &LIS, &VRM); + LoadMI + ? TII.foldMemoryOperand(*MI, FoldOps, *LoadMI, CopyMI, &LIS) + : TII.foldMemoryOperand(*MI, FoldOps, StackSlot, CopyMI, &LIS, &VRM); if (!FoldMI) { // Re-tie operands. for (auto Tied : TiedOps) @@ -1050,7 +1052,16 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops, if (TII.isStoreToStackSlot(*MI, FI) && HSpiller.rmFromMergeableSpills(*MI, FI)) --NumSpills; - LIS.ReplaceMachineInstrInMaps(*MI, *FoldMI); + SlotIndex FoldIdx = LIS.ReplaceMachineInstrInMaps(*MI, *FoldMI); + if (CopyMI) { + LIS.InsertMachineInstrInMaps(*CopyMI); + if (!MRI.isSSA()) { + SlotIndex CopyIdx = LIS.InsertMachineInstrInMaps(*CopyMI).getRegSlot(); + LiveInterval &LI = LIS.getInterval(CopyMI->getOperand(0).getReg()); + VNInfo *VNI = LI.getNextValue(CopyIdx, LIS.getVNInfoAllocator()); + LI.addSegment(LiveRange::Segment(CopyIdx, FoldIdx.getRegSlot(), VNI)); + } + } // Update the call info. if (MI->isCandidateForAdditionalCallInfo()) MI->getMF()->moveAdditionalCallInfo(MI, FoldMI); diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp index 4e1b0c0c66e69..638adf18997ba 100644 --- a/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -151,11 +151,21 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, if (UseMI->readsWritesVirtualRegister(LI->reg(), &Ops).second) return false; - MachineInstr *FoldMI = TII.foldMemoryOperand(*UseMI, Ops, *DefMI, &LIS); + MachineInstr *CopyMI = nullptr; + MachineInstr *FoldMI = + TII.foldMemoryOperand(*UseMI, Ops, *DefMI, CopyMI, &LIS); if (!FoldMI) return false; LLVM_DEBUG(dbgs() << " folded: " << *FoldMI); - LIS.ReplaceMachineInstrInMaps(*UseMI, *FoldMI); + SlotIndex FoldIdx = LIS.ReplaceMachineInstrInMaps(*UseMI, *FoldMI); + if (CopyMI) { + SlotIndex CopyIdx = LIS.InsertMachineInstrInMaps(*CopyMI).getRegSlot(); + if (!MRI.isSSA()) { + LiveInterval &LI = LIS.getInterval(CopyMI->getOperand(0).getReg()); + VNInfo *VNI = LI.getNextValue(CopyIdx, LIS.getVNInfoAllocator()); + LI.addSegment(LiveRange::Segment(CopyIdx, FoldIdx.getRegSlot(), VNI)); + } + } // Update the call info. if (UseMI->shouldUpdateAdditionalCallInfo()) UseMI->getMF()->moveAdditionalCallInfo(UseMI, FoldMI); diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp index 73aecda4e522c..9365ea883eec9 100644 --- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -1865,8 +1865,9 @@ bool PeepholeOptimizer::run(MachineFunction &MF) { // we need it for markUsesInDebugValueAsUndef(). Register FoldedReg = FoldAsLoadDefReg; MachineInstr *DefMI = nullptr; - if (MachineInstr *FoldMI = - TII->optimizeLoadInstr(*MI, MRI, FoldAsLoadDefReg, DefMI)) { + MachineInstr *CopyMI = nullptr; + if (MachineInstr *FoldMI = TII->optimizeLoadInstr( + *MI, MRI, FoldAsLoadDefReg, DefMI, CopyMI)) { // Update LocalMIs since we replaced MI with FoldMI and deleted // DefMI. LLVM_DEBUG(dbgs() << "Replacing: " << *MI); @@ -1874,6 +1875,8 @@ bool PeepholeOptimizer::run(MachineFunction &MF) { LocalMIs.erase(MI); LocalMIs.erase(DefMI); LocalMIs.insert(FoldMI); + if (CopyMI) + LocalMIs.insert(CopyMI); // Update the call info. if (MI->shouldUpdateAdditionalCallInfo()) MI->getMF()->moveAdditionalCallInfo(MI, FoldMI); diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index 3e3b935135350..ba836df02048c 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -518,7 +518,8 @@ MCInst TargetInstrInfo::getNop() const { llvm_unreachable("Not implemented"); } MachineInstr *TargetInstrInfo::optimizeLoadInstr(MachineInstr &MI, const MachineRegisterInfo *MRI, Register &FoldAsLoadDefReg, - MachineInstr *&DefMI) const { + MachineInstr *&DefMI, + MachineInstr *&CopyMI) const { // Check whether we can move DefMI here. DefMI = MRI->getVRegDef(FoldAsLoadDefReg); assert(DefMI); @@ -544,7 +545,8 @@ MachineInstr *TargetInstrInfo::optimizeLoadInstr(MachineInstr &MI, return nullptr; // Check whether we can fold the def into SrcOperandId. - if (MachineInstr *FoldMI = foldMemoryOperand(MI, SrcOperandIds, *DefMI)) { + if (MachineInstr *FoldMI = + foldMemoryOperand(MI, SrcOperandIds, *DefMI, CopyMI)) { FoldAsLoadDefReg = 0; return FoldMI; } @@ -703,6 +705,7 @@ static MachineInstr *foldInlineAsmMemOperand(MachineInstr &MI, MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, ArrayRef<unsigned> Ops, int FI, + MachineInstr *&CopyMI, LiveIntervals *LIS, VirtRegMap *VRM) const { auto Flags = MachineMemOperand::MONone; @@ -751,7 +754,7 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, return foldInlineAsmMemOperand(MI, Ops, FI, *this); } else { // Ask the target to do the actual folding. - NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, LIS, VRM); + NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, CopyMI, LIS, VRM); } if (NewMI) { @@ -804,6 +807,7 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, ArrayRef<unsigned> Ops, MachineInstr &LoadMI, + MachineInstr *&CopyMI, LiveIntervals *LIS) const { assert(LoadMI.canFoldAsLoad() && "LoadMI isn't foldable!"); #ifndef NDEBUG @@ -830,7 +834,7 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, return foldInlineAsmMemOperand(MI, Ops, FrameIndex, *this); } else { // Ask the target to do the actual folding. - NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, LoadMI, LIS); + NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, LoadMI, CopyMI, LIS); } if (!NewMI) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 4094526574d7a..988fe7014ddcf 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -6807,7 +6807,7 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB, MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, - MachineBasicBlock::iterator InsertPt, int FrameIndex, + MachineBasicBlock::iterator InsertPt, int FrameIndex, MachineInstr *&CopyMI, LiveIntervals *LIS, VirtRegMap *VRM) const { // This is a bit of a hack. Consider this instruction: // diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index a1ec4cbffdf02..fa63c56739d62 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -382,12 +382,12 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { bool isSubregFoldable() const override { return true; } using TargetInstrInfo::foldMemoryOperandImpl; - MachineInstr * - foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, - ArrayRef<unsigned> Ops, - MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS = nullptr, - VirtRegMap *VRM = nullptr) const override; + MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, + ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, + int FrameIndex, MachineInstr *&CopyMI, + LiveIntervals *LIS = nullptr, + VirtRegMap *VRM = nullptr) const override; /// \returns true if a branch from an instruction with opcode \p BranchOpc /// bytes is capable of jumping to a position \p BrOffset bytes away. diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 3179c65340d18..bc3052b139d18 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -10656,8 +10656,8 @@ bool llvm::SIInstrInfo::isWave32() const { return ST.isWave32(); } MachineInstr *SIInstrInfo::foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, - MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS, - VirtRegMap *VRM) const { + MachineBasicBlock::iterator InsertPt, int FrameIndex, MachineInstr *&CopyMI, + LiveIntervals *LIS, VirtRegMap *VRM) const { // This is a bit of a hack (copied from AArch64). Consider this instruction: // // %0:sreg_32 = COPY $m0 diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index a58e281de17a1..3c1232ac098a0 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -1682,7 +1682,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, MachineBasicBlock::iterator InsertPt, - int FrameIndex, + int FrameIndex, MachineInstr *&CopyMI, LiveIntervals *LIS = nullptr, VirtRegMap *VRM = nullptr) const override; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index db559f4949904..ee10d71727c39 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -905,8 +905,8 @@ std::optional<unsigned> getFoldedOpcode(MachineFunction &MF, MachineInstr &MI, // This is the version used during InlineSpiller::spillAroundUses MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, - MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS, - VirtRegMap *VRM) const { + MachineBasicBlock::iterator InsertPt, int FrameIndex, MachineInstr *&CopyMI, + LiveIntervals *LIS, VirtRegMap *VRM) const { std::optional<unsigned> LoadOpc = getFoldedOpcode(MF, MI, Ops, STI); if (!LoadOpc) @@ -952,7 +952,7 @@ static unsigned getLoadPredicatedOpcode(unsigned Opcode) { MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI, - LiveIntervals *LIS) const { + MachineInstr *&CopyMI, LiveIntervals *LIS) const { // For now, only handle RISCV::PseudoCCMOVGPR. if (MI.getOpcode() != RISCV::PseudoCCMOVGPR) return nullptr; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index 5c36ff7525200..119b2a7eae6bf 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -130,14 +130,14 @@ class RISCVInstrInfo : public RISCVGenInstrInfo { MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, MachineBasicBlock::iterator InsertPt, - int FrameIndex, + int FrameIndex, MachineInstr *&CopyMI, LiveIntervals *LIS = nullptr, VirtRegMap *VRM = nullptr) const override; MachineInstr *foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI, - LiveIntervals *LIS = nullptr) const override; + MachineInstr *&CopyMI, LiveIntervals *LIS = nullptr) const override; // Materializes the given integer Val into DstReg. void movImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index a76424eff1e49..5d85a64844592 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -1225,7 +1225,7 @@ SystemZInstrInfo::getInverseOpcode(unsigned Opcode) const { MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, - MachineBasicBlock::iterator InsertPt, int FrameIndex, + MachineBasicBlock::iterator InsertPt, int FrameIndex, MachineInstr *&CopyMI, LiveIntervals *LIS, VirtRegMap *VRM) const { const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -1558,7 +1558,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI, - LiveIntervals *LIS) const { + MachineInstr *&CopyMI, LiveIntervals *LIS) const { MachineRegisterInfo *MRI = &MF.getRegInfo(); MachineBasicBlock *MBB = MI.getParent(); diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h index 029fe93d5b15c..9fbd8e9a28d1d 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h @@ -292,16 +292,16 @@ class SystemZInstrInfo : public SystemZGenInstrInfo { bool Invert) const override; std::optional<unsigned> getInverseOpcode(unsigned Opcode) const override; - MachineInstr * - foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, - ArrayRef<unsigned> Ops, - MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS = nullptr, - VirtRegMap *VRM = nullptr) const override; + MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, + ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, + int FrameIndex, MachineInstr *&CopyMI, + LiveIntervals *LIS = nullptr, + VirtRegMap *VRM = nullptr) const override; MachineInstr *foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI, - LiveIntervals *LIS = nullptr) const override; + MachineInstr *&CopyMI, LiveIntervals *LIS = nullptr) const override; bool expandPostRAPseudo(MachineInstr &MBBI) const override; bool reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override; diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index 3dbe6d14c610e..2017897166f3c 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -3992,9 +3992,10 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, SmallVector<MachineOperand, 8> AddrOps; AM.getFullAddress(AddrOps); + MachineInstr *CopyMI = nullptr; MachineInstr *Result = XII.foldMemoryOperandImpl( *FuncInfo.MF, *MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, LI->getAlign(), - /*AllowCommute=*/true); + /*AllowCommute=*/true, CopyMI); if (!Result) return false; diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 5b6858f59e6d6..50e0bfcac9321 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -7485,7 +7485,8 @@ static void printFailMsgforFold(const MachineInstr &MI, unsigned Idx) { MachineInstr *X86InstrInfo::foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, unsigned OpNum, ArrayRef<MachineOperand> MOs, MachineBasicBlock::iterator InsertPt, - unsigned Size, Align Alignment, bool AllowCommute) const { + unsigned Size, Align Alignment, bool AllowCommute, + MachineInstr *&CopyMI) const { bool isSlowTwoMemOps = Subtarget.slowTwoMemOps(); unsigned Opc = MI.getOpcode(); @@ -7598,11 +7599,13 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( return NewMI; const TargetRegisterClass &RC = *MF.getRegInfo().getRegClass(SrcReg); - Register NewSrc = MF.getRegInfo().createVirtualRegister(&RC); - BuildMI(*NewMI->getParent(), *NewMI, MI.getDebugLoc(), - get(TargetOpcode::COPY)) - .addReg(NewSrc, RegState::Define) - .addReg(SrcReg); + MachineRegisterInfo &MRI = MF.getRegInfo(); + Register NewSrc = MRI.isSSA() ? MRI.createVirtualRegister(&RC) + : MI.getOperand(0).getReg(); + CopyMI = BuildMI(*NewMI->getParent(), *NewMI, MI.getDebugLoc(), + get(TargetOpcode::COPY)) + .addReg(NewSrc, RegState::Define) + .addReg(SrcReg); NewMI->getOperand(1).setReg(NewSrc); } return NewMI; @@ -7618,7 +7621,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( } // Attempt to fold with the commuted version of the instruction. NewMI = foldMemoryOperandImpl(MF, MI, CommuteOpIdx2, MOs, InsertPt, Size, - Alignment, /*AllowCommute=*/false); + Alignment, /*AllowCommute=*/false, CopyMI); if (NewMI) return NewMI; // Folding failed again - undo the commute before returning. @@ -7631,8 +7634,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( MachineInstr *X86InstrInfo::foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, - MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS, - VirtRegMap *VRM) const { + MachineBasicBlock::iterator InsertPt, int FrameIndex, MachineInstr *&CopyMI, + LiveIntervals *LIS, VirtRegMap *VRM) const { // Check switch flag if (NoFusing) return nullptr; @@ -7665,9 +7668,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( std::min(Alignment, Subtarget.getFrameLowering()->getStackAlign()); auto Impl = [&]() { - return foldMemoryOperandImpl(MF, MI, Ops[0], - MachineOperand::CreateFI(FrameIndex), InsertPt, - Size, Alignment, /*AllowCommute=*/true); + return foldMemoryOperandImpl( + MF, MI, Ops[0], MachineOperand::CreateFI(FrameIndex), InsertPt, Size, + Alignment, /*AllowCommute=*/true, CopyMI); }; if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { unsigned NewOpc = 0; @@ -8147,7 +8150,7 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, MachineInstr *X86InstrInfo::foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI, - LiveIntervals *LIS) const { + MachineInstr *&CopyMI, LiveIntervals *LIS) const { // If LoadMI is a masked load, check MI having the same mask. const MCInstrDesc &MCID = get(LoadMI.getOpcode()); @@ -8199,7 +8202,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( if (isLoadFromStackSlot(LoadMI, FrameIndex)) { if (isNonFoldablePartialRegisterLoad(LoadMI, MI, MF)) return nullptr; - return foldMemoryOperandImpl(MF, MI, Ops, InsertPt, FrameIndex, LIS); + return foldMemoryOperandImpl(MF, MI, Ops, InsertPt, FrameIndex, CopyMI, + LIS); } // Check switch flag @@ -8444,7 +8448,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( } } return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, InsertPt, - /*Size=*/0, Alignment, /*AllowCommute=*/true); + /*Size=*/0, Alignment, /*AllowCommute=*/true, + CopyMI); } MachineInstr * diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index 9695d9a79ec96..e9c5d7ccb32bf 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -492,19 +492,19 @@ class X86InstrInfo final : public X86GenInstrInfo { /// is likely that the referenced instruction has been changed. /// /// \returns true on success. - MachineInstr * - foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, - ArrayRef<unsigned> Ops, - MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS = nullptr, - VirtRegMap *VRM = nullptr) const override; + MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, + ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, + int FrameIndex, MachineInstr *&CopyMI, + LiveIntervals *LIS = nullptr, + VirtRegMap *VRM = nullptr) const override; /// Same as the previous version except it allows folding of any load and /// store from / to any address, not just from a specific stack slot. MachineInstr *foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI, - LiveIntervals *LIS = nullptr) const override; + MachineInstr *&CopyMI, LiveIntervals *LIS = nullptr) const override; bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr &MI, Register Reg, @@ -582,7 +582,8 @@ class X86InstrInfo final : public X86GenInstrInfo { ArrayRef<MachineOperand> MOs, MachineBasicBlock::iterator InsertPt, unsigned Size, Align Alignment, - bool AllowCommute) const; + bool AllowCommute, + MachineInstr *&CopyMI) const; bool isHighLatencyDef(int opc) const override; diff --git a/llvm/test/CodeGen/X86/apx/or.ll b/llvm/test/CodeGen/X86/apx/or.ll index 594ed7b29216b..3bb2c4041308f 100644 --- a/llvm/test/CodeGen/X86/apx/or.ll +++ b/llvm/test/CodeGen/X86/apx/or.ll @@ -1240,3 +1240,41 @@ entry: store i64 %or, ptr %a ret void } + +define i64 @pr191165(i32 %0, ptr %1) { +; NDD-LABEL: pr191165: +; NDD: # %bb.0: +; NDD-NEXT: movl (%rsi), %eax # encoding: [0x8b,0x06] +; NDD-NEXT: orl %edi, %eax # encoding: [0x09,0xf8] +; NDD-NEXT: movl %eax, 0 # encoding: [0x89,0x04,0x25,0x00,0x00,0x00,0x00] +; NDD-NEXT: movslq %edi, %rax # encoding: [0x48,0x63,0xc7] +; NDD-NEXT: retq # encoding: [0xc3] +; +; IMMONLY-LABEL: pr191165: +; IMMONLY: # %bb.0: +; IMMONLY-NEXT: movl (%rsi), %eax # encoding: [0x8b,0x06] +; IMMONLY-NEXT: orl %edi, %eax # encoding: [0x09,0xf8] +; IMMONLY-NEXT: movl %eax, 0 # encoding: [0x89,0x04,0x25,0x00,0x00,0x00,0x00] +; IMMONLY-NEXT: movslq %edi, %rax # encoding: [0x48,0x63,0xc7] +; IMMONLY-NEXT: retq # encoding: [0xc3] +; +; MEM-LABEL: pr191165: +; MEM: # %bb.0: +; MEM-NEXT: orl (%rsi), %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x0b,0x3e] +; MEM-NEXT: movl %eax, 0 # encoding: [0x89,0x04,0x25,0x00,0x00,0x00,0x00] +; MEM-NEXT: movslq %edi, %rax # encoding: [0x48,0x63,0xc7] +; MEM-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: pr191165: +; NF: # %bb.0: +; NF-NEXT: movl (%rsi), %eax # encoding: [0x8b,0x06] +; NF-NEXT: orl %edi, %eax # encoding: [0x09,0xf8] +; NF-NEXT: movl %eax, 0 # encoding: [0x89,0x04,0x25,0x00,0x00,0x00,0x00] +; NF-NEXT: movslq %edi, %rax # encoding: [0x48,0x63,0xc7] +; NF-NEXT: retq # encoding: [0xc3] + %3 = load i32, ptr %1, align 4 + %4 = or i32 %3, %0 + store volatile i32 %4, ptr null, align 4 + %5 = sext i32 %0 to i64 + ret i64 %5 +} >From 789470e6a9a1b91cf541ebf1950eb84ca935b8f0 Mon Sep 17 00:00:00 2001 From: Phoebe Wang <[email protected]> Date: Fri, 10 Apr 2026 17:16:17 +0800 Subject: [PATCH 2/3] Add missing test --- clang/test/CodeGen/X86/pr190962.ll | 65 ++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 clang/test/CodeGen/X86/pr190962.ll diff --git a/clang/test/CodeGen/X86/pr190962.ll b/clang/test/CodeGen/X86/pr190962.ll new file mode 100644 index 0000000000000..acd59b2ee3c46 --- /dev/null +++ b/clang/test/CodeGen/X86/pr190962.ll @@ -0,0 +1,65 @@ +; REQUIRES: x86-registered-target +; RUN: %clang -O1 -mapx-features=ndd --target=x86_64-pc-windows-gnu -S %s -mllvm -verify-machineinstrs -o /dev/null + +;; Check no crash when building below IR with Clang. + +define i32 @foo(ptr %0, ptr %1, ptr %2, i64 %3, i64 %4, i64 %5) { + %7 = call i64 @"_ZZN3jxl15PatchDictionary6DecodeEP22JxlMemoryManagerStructPNS_9BitReaderEyyyPbENK3$_0clEy"() + %8 = mul i64 %3, %4 + %9 = icmp ugt i64 1, %8 + br i1 %9, label %common.ret1, label %10 + +common.ret1: ; preds = %26, %23, %16, %6 + %common.ret1.op = phi i32 [ 0, %23 ], [ 0, %16 ], [ 0, %26 ], [ 0, %6 ] + ret i32 %common.ret1.op + +10: ; preds = %6 + %11 = load volatile i64, ptr null, align 8 + %12 = call i64 @"_ZZN3jxl15PatchDictionary6DecodeEP22JxlMemoryManagerStructPNS_9BitReaderEyyyPbENK3$_0clEy"() + %13 = load volatile i64, ptr null, align 8 + %14 = or i64 %11, %5 + %15 = icmp ugt i64 %14, 0 + br i1 %15, label %16, label %19 + +16: ; preds = %10 + call void @_ZN3jxl6StatusC2ENS_10StatusCodeE() + %17 = load i32, ptr null, align 4 + %18 = call i32 (i32, ptr, ...) @_ZN3jxl13StatusMessageENS_6StatusEPKcz(i32 %17, ptr null, ptr null, i32 0) + call void @_ZN3jxl6StatusC2ENS_10StatusCodeE() + br label %common.ret1 + +19: ; preds = %10 + %20 = call i64 @_ZNK3jxl11ImageBundle5ysizeEv(ptr %1) + %21 = or i64 %12, %13 + %22 = icmp ugt i64 %21, 0 + br i1 %22, label %23, label %26 + +23: ; preds = %19 + call void @_ZN3jxl6StatusC2ENS_10StatusCodeE() + %24 = load i32, ptr null, align 4 + %25 = call i32 (i32, ptr, ...) @_ZN3jxl13StatusMessageENS_6StatusEPKcz(i32 %24, ptr null, ptr null, i32 1) + call void @_ZN3jxl6StatusC2ENS_10StatusCodeE() + br label %common.ret1 + +26: ; preds = %19 + %27 = icmp ugt i64 1, %3 + br i1 %27, label %common.ret1, label %28 + +28: ; preds = %26 + store i32 0, ptr %0, align 4 + %29 = call i32 (i32, ptr, ...) @_ZN3jxl13StatusMessageENS_6StatusEPKcz(i32 0, ptr null, ptr null, i32 0, i64 0, i64 0, i64 %4) + unreachable +} + +declare i32 @_ZN3jxl13StatusMessageENS_6StatusEPKcz(i32, ptr, ...) + +declare i64 @"_ZZN3jxl15PatchDictionary6DecodeEP22JxlMemoryManagerStructPNS_9BitReaderEyyyPbENK3$_0clEy"() + +declare void @_ZN3jxl6StatusC2ENS_10StatusCodeE() + +declare i64 @_ZNK3jxl11ImageBundle5ysizeEv(ptr) + +; uselistorder directives +uselistorder ptr @_ZN3jxl13StatusMessageENS_6StatusEPKcz, { 2, 1, 0 } +uselistorder ptr @"_ZZN3jxl15PatchDictionary6DecodeEP22JxlMemoryManagerStructPNS_9BitReaderEyyyPbENK3$_0clEy", { 1, 0 } +uselistorder ptr @_ZN3jxl6StatusC2ENS_10StatusCodeE, { 3, 2, 1, 0 } >From d701f200442de49d1661edbd14345b4d1500747f Mon Sep 17 00:00:00 2001 From: Phoebe Wang <[email protected]> Date: Fri, 10 Apr 2026 21:02:55 +0800 Subject: [PATCH 3/3] Address comments and new failure --- clang/test/CodeGen/X86/pr190962.ll | 65 ---------------- llvm/lib/CodeGen/InlineSpiller.cpp | 5 +- llvm/lib/CodeGen/LiveRangeEdit.cpp | 8 +- llvm/lib/Target/X86/X86InstrInfo.cpp | 15 +++- llvm/test/CodeGen/X86/apx/or.ll | 12 +-- llvm/test/CodeGen/X86/apx/pr191368.ll | 104 ++++++++++++++++++++++++++ 6 files changed, 127 insertions(+), 82 deletions(-) delete mode 100644 clang/test/CodeGen/X86/pr190962.ll create mode 100644 llvm/test/CodeGen/X86/apx/pr191368.ll diff --git a/clang/test/CodeGen/X86/pr190962.ll b/clang/test/CodeGen/X86/pr190962.ll deleted file mode 100644 index acd59b2ee3c46..0000000000000 --- a/clang/test/CodeGen/X86/pr190962.ll +++ /dev/null @@ -1,65 +0,0 @@ -; REQUIRES: x86-registered-target -; RUN: %clang -O1 -mapx-features=ndd --target=x86_64-pc-windows-gnu -S %s -mllvm -verify-machineinstrs -o /dev/null - -;; Check no crash when building below IR with Clang. - -define i32 @foo(ptr %0, ptr %1, ptr %2, i64 %3, i64 %4, i64 %5) { - %7 = call i64 @"_ZZN3jxl15PatchDictionary6DecodeEP22JxlMemoryManagerStructPNS_9BitReaderEyyyPbENK3$_0clEy"() - %8 = mul i64 %3, %4 - %9 = icmp ugt i64 1, %8 - br i1 %9, label %common.ret1, label %10 - -common.ret1: ; preds = %26, %23, %16, %6 - %common.ret1.op = phi i32 [ 0, %23 ], [ 0, %16 ], [ 0, %26 ], [ 0, %6 ] - ret i32 %common.ret1.op - -10: ; preds = %6 - %11 = load volatile i64, ptr null, align 8 - %12 = call i64 @"_ZZN3jxl15PatchDictionary6DecodeEP22JxlMemoryManagerStructPNS_9BitReaderEyyyPbENK3$_0clEy"() - %13 = load volatile i64, ptr null, align 8 - %14 = or i64 %11, %5 - %15 = icmp ugt i64 %14, 0 - br i1 %15, label %16, label %19 - -16: ; preds = %10 - call void @_ZN3jxl6StatusC2ENS_10StatusCodeE() - %17 = load i32, ptr null, align 4 - %18 = call i32 (i32, ptr, ...) @_ZN3jxl13StatusMessageENS_6StatusEPKcz(i32 %17, ptr null, ptr null, i32 0) - call void @_ZN3jxl6StatusC2ENS_10StatusCodeE() - br label %common.ret1 - -19: ; preds = %10 - %20 = call i64 @_ZNK3jxl11ImageBundle5ysizeEv(ptr %1) - %21 = or i64 %12, %13 - %22 = icmp ugt i64 %21, 0 - br i1 %22, label %23, label %26 - -23: ; preds = %19 - call void @_ZN3jxl6StatusC2ENS_10StatusCodeE() - %24 = load i32, ptr null, align 4 - %25 = call i32 (i32, ptr, ...) @_ZN3jxl13StatusMessageENS_6StatusEPKcz(i32 %24, ptr null, ptr null, i32 1) - call void @_ZN3jxl6StatusC2ENS_10StatusCodeE() - br label %common.ret1 - -26: ; preds = %19 - %27 = icmp ugt i64 1, %3 - br i1 %27, label %common.ret1, label %28 - -28: ; preds = %26 - store i32 0, ptr %0, align 4 - %29 = call i32 (i32, ptr, ...) @_ZN3jxl13StatusMessageENS_6StatusEPKcz(i32 0, ptr null, ptr null, i32 0, i64 0, i64 0, i64 %4) - unreachable -} - -declare i32 @_ZN3jxl13StatusMessageENS_6StatusEPKcz(i32, ptr, ...) - -declare i64 @"_ZZN3jxl15PatchDictionary6DecodeEP22JxlMemoryManagerStructPNS_9BitReaderEyyyPbENK3$_0clEy"() - -declare void @_ZN3jxl6StatusC2ENS_10StatusCodeE() - -declare i64 @_ZNK3jxl11ImageBundle5ysizeEv(ptr) - -; uselistorder directives -uselistorder ptr @_ZN3jxl13StatusMessageENS_6StatusEPKcz, { 2, 1, 0 } -uselistorder ptr @"_ZZN3jxl15PatchDictionary6DecodeEP22JxlMemoryManagerStructPNS_9BitReaderEyyyPbENK3$_0clEy", { 1, 0 } -uselistorder ptr @_ZN3jxl6StatusC2ENS_10StatusCodeE, { 3, 2, 1, 0 } diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp index fab04c44371e9..768c4a733639f 100644 --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -1054,9 +1054,8 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops, --NumSpills; SlotIndex FoldIdx = LIS.ReplaceMachineInstrInMaps(*MI, *FoldMI); if (CopyMI) { - LIS.InsertMachineInstrInMaps(*CopyMI); + SlotIndex CopyIdx = LIS.InsertMachineInstrInMaps(*CopyMI).getRegSlot(); if (!MRI.isSSA()) { - SlotIndex CopyIdx = LIS.InsertMachineInstrInMaps(*CopyMI).getRegSlot(); LiveInterval &LI = LIS.getInterval(CopyMI->getOperand(0).getReg()); VNInfo *VNI = LI.getNextValue(CopyIdx, LIS.getVNInfoAllocator()); LI.addSegment(LiveRange::Segment(CopyIdx, FoldIdx.getRegSlot(), VNI)); @@ -1103,7 +1102,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops, // Insert any new instructions other than FoldMI into the LIS maps. assert(!MIS.empty() && "Unexpected empty span of instructions!"); for (MachineInstr &MI : MIS) - if (&MI != FoldMI) + if (&MI != FoldMI && &MI != CopyMI) LIS.InsertMachineInstrInMaps(MI); // TII.foldMemoryOperand may have left some implicit operands on the diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp index 638adf18997ba..22ea4a08f3957 100644 --- a/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -160,11 +160,9 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, SlotIndex FoldIdx = LIS.ReplaceMachineInstrInMaps(*UseMI, *FoldMI); if (CopyMI) { SlotIndex CopyIdx = LIS.InsertMachineInstrInMaps(*CopyMI).getRegSlot(); - if (!MRI.isSSA()) { - LiveInterval &LI = LIS.getInterval(CopyMI->getOperand(0).getReg()); - VNInfo *VNI = LI.getNextValue(CopyIdx, LIS.getVNInfoAllocator()); - LI.addSegment(LiveRange::Segment(CopyIdx, FoldIdx.getRegSlot(), VNI)); - } + LiveInterval &LI = LIS.getInterval(CopyMI->getOperand(0).getReg()); + VNInfo *VNI = LI.getNextValue(CopyIdx, LIS.getVNInfoAllocator()); + LI.addSegment(LiveRange::Segment(CopyIdx, FoldIdx.getRegSlot(), VNI)); } // Update the call info. if (UseMI->shouldUpdateAdditionalCallInfo()) diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 50e0bfcac9321..2f6f2eafa1005 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -7600,13 +7600,22 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( const TargetRegisterClass &RC = *MF.getRegInfo().getRegClass(SrcReg); MachineRegisterInfo &MRI = MF.getRegInfo(); - Register NewSrc = MRI.isSSA() ? MRI.createVirtualRegister(&RC) - : MI.getOperand(0).getReg(); + Register SubReg = X86::NoSubRegister; + Register NewSrc = X86::NoSubRegister; + + if (MRI.isSSA()) { + NewSrc = MRI.createVirtualRegister(&RC); + } else { + NewSrc = MI.getOperand(0).getReg(); + SubReg = MI.getOperand(0).getSubReg(); + } + CopyMI = BuildMI(*NewMI->getParent(), *NewMI, MI.getDebugLoc(), get(TargetOpcode::COPY)) - .addReg(NewSrc, RegState::Define) + .addReg(NewSrc, RegState::Define, SubReg) .addReg(SrcReg); NewMI->getOperand(1).setReg(NewSrc); + NewMI->getOperand(1).setSubReg(SubReg); } return NewMI; } diff --git a/llvm/test/CodeGen/X86/apx/or.ll b/llvm/test/CodeGen/X86/apx/or.ll index 3bb2c4041308f..0a6e98e89f3fc 100644 --- a/llvm/test/CodeGen/X86/apx/or.ll +++ b/llvm/test/CodeGen/X86/apx/or.ll @@ -1241,7 +1241,7 @@ entry: ret void } -define i64 @pr191165(i32 %0, ptr %1) { +define i64 @pr191165(i32 %a, ptr %b) { ; NDD-LABEL: pr191165: ; NDD: # %bb.0: ; NDD-NEXT: movl (%rsi), %eax # encoding: [0x8b,0x06] @@ -1272,9 +1272,9 @@ define i64 @pr191165(i32 %0, ptr %1) { ; NF-NEXT: movl %eax, 0 # encoding: [0x89,0x04,0x25,0x00,0x00,0x00,0x00] ; NF-NEXT: movslq %edi, %rax # encoding: [0x48,0x63,0xc7] ; NF-NEXT: retq # encoding: [0xc3] - %3 = load i32, ptr %1, align 4 - %4 = or i32 %3, %0 - store volatile i32 %4, ptr null, align 4 - %5 = sext i32 %0 to i64 - ret i64 %5 + %x = load i32, ptr %b, align 4 + %y = or i32 %x, %a + store volatile i32 %y, ptr null, align 4 + %z = sext i32 %a to i64 + ret i64 %z } diff --git a/llvm/test/CodeGen/X86/apx/pr191368.ll b/llvm/test/CodeGen/X86/apx/pr191368.ll new file mode 100644 index 0000000000000..fe3964ee22fd9 --- /dev/null +++ b/llvm/test/CodeGen/X86/apx/pr191368.ll @@ -0,0 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s -mtriple=x86_64-pc-windows-gnu -mattr=ndd | FileCheck %s + +define ptr @foo(ptr %a, i32 %b, ptr %c, ptr %d, i32 %e, i32 %f, i32 %g) nounwind { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %l0 +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %r13 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: pushq %rsi +; CHECK-NEXT: pushq %rdi +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: movq %r9, %r14 +; CHECK-NEXT: movq %r8, %rbx +; CHECK-NEXT: movl %edx, %esi +; CHECK-NEXT: movq %rcx, %rdi +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r15d +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r13d +; CHECK-NEXT: xorl %ebp, %ebp +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: callq bitstob +; CHECK-NEXT: cmpl $-1, %r15d +; CHECK-NEXT: movl $-1, %r12d +; CHECK-NEXT: cmovll %r15d, %r12d +; CHECK-NEXT: cmpl $5, %r13d +; CHECK-NEXT: ja .LBB0_5 +; CHECK-NEXT: # %bb.1: # %l0 +; CHECK-NEXT: movl $3, %eax +; CHECK-NEXT: btl %r13d, %eax +; CHECK-NEXT: jb .LBB0_4 +; CHECK-NEXT: # %bb.2: # %l0 +; CHECK-NEXT: movl $20, %eax +; CHECK-NEXT: btl %r13d, %eax +; CHECK-NEXT: jb .LBB0_6 +; CHECK-NEXT: # %bb.3: # %l1 +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, %ebp +; CHECK-NEXT: .LBB0_4: # %.sink.split +; CHECK-NEXT: movl %ebp, (%r14) +; CHECK-NEXT: .LBB0_5: # %l2 +; CHECK-NEXT: callq __rv_alloc_D2A +; CHECK-NEXT: movl $0, (%rbx) +; CHECK-NEXT: movl %r12d, %eax +; CHECK-NEXT: subl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq 0, %rcx +; CHECK-NEXT: leal 1(%rax,%r15), %eax +; CHECK-NEXT: movl %eax, (%rdi) +; CHECK-NEXT: callq __Bfree_D2A +; CHECK-NEXT: movl %esi, (%rdi) +; CHECK-NEXT: .LBB0_6: # %common.ret1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: popq %rdi +; CHECK-NEXT: popq %rsi +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: popq %r13 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: retq +l0: + %x = tail call ptr @bitstob(ptr null) + %y = tail call i32 @llvm.smin.i32(i32 %g, i32 -1) + switch i32 %e, label %l2 [ + i32 0, label %.sink.split + i32 1, label %.sink.split + i32 2, label %common.ret1 + i32 4, label %common.ret1 + i32 3, label %l1 + i32 5, label %l1 + ] + +common.ret1: ; preds = %l2, %7, %7 + ret ptr null + +l1: ; preds = %7, %7 + br label %.sink.split + +.sink.split: ; preds = %7, %7, %l1 + %.sink = phi i32 [ %f, %l1 ], [ 0, %l0 ], [ 0, %l0 ] + store i32 %.sink, ptr %d, align 4 + br label %l2 + +l2: ; preds = %.sink.split, %7 + %b2 = tail call ptr @__rv_alloc_D2A() + store i32 0, ptr %c, align 4 + %reass.sub = sub i32 %y, %f + %b3 = add i32 %reass.sub, 1 + %b4 = load volatile ptr, ptr null, align 4294967296 + %b5 = add i32 %b3, %g + store i32 %b5, ptr %a, align 4 + tail call void @__Bfree_D2A() + store i32 %b, ptr %a, align 4 + br label %common.ret1 +} + +declare ptr @bitstob(ptr) +declare void @__Bfree_D2A() +declare ptr @__rv_alloc_D2A() +declare i32 @llvm.smin.i32(i32, i32) _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
