Changes in directory llvm/lib/Target/PowerPC:
PPCISelDAGToDAG.cpp updated: 1.164 -> 1.165 PPCInstrInfo.td updated: 1.181 -> 1.182 PPCRegisterInfo.cpp updated: 1.43 -> 1.44 PPCRegisterInfo.td updated: 1.29 -> 1.30 --- Log message: For functions that use vector registers, save VRSAVE, mark used registers, and update it on entry to each function, then restore it on exit. This compiles: void func(vfloat *a, vfloat *b, vfloat *c) { *a = *b * *c + *c; } to this: _func: mfspr r2, 256 oris r6, r2, 49152 mtspr 256: http://llvm.cs.uiuc.edu/PR256 , r6 lvx v0, 0, r5 lvx v1, 0, r4 vmaddfp v0, v1, v0, v0 stvx v0, 0, r3 mtspr 256: http://llvm.cs.uiuc.edu/PR256 , r2 blr GCC produces this (which has additional stack accesses): _func: mfspr r0,256 stw r0,-4(r1) oris r0,r0,0xc000 mtspr 256: http://llvm.cs.uiuc.edu/PR256 ,r0 lvx v0,0,r5 lvx v1,0,r4 lwz r12,-4(r1) vmaddfp v0,v0,v1,v0 stvx v0,0,r3 mtspr 256: http://llvm.cs.uiuc.edu/PR256 ,r12 blr --- Diffs of the changes: (+130 -10) PPCISelDAGToDAG.cpp | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++- PPCInstrInfo.td | 26 ++++++++++++++++------ PPCRegisterInfo.cpp | 51 ++++++++++++++++++++++++++++++++++++++++++++ PPCRegisterInfo.td | 4 +-- 4 files changed, 130 insertions(+), 10 deletions(-) Index: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp diff -u llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp:1.164 llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp:1.165 --- llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp:1.164 Sun Mar 12 03:13:49 2006 +++ llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp Mon Mar 13 15:52:10 2006 @@ -196,8 +196,65 @@ CodeGenMap.clear(); DAG.RemoveDeadNodes(); - // Emit machine code to BB. + // Emit machine code to BB. ScheduleAndEmitDAG(DAG); + + // Check to see if this function uses vector registers, which means we have to + // save and restore the VRSAVE register and update it with the regs we use. + // + // In this case, there will be virtual registers of vector type type created + // by the scheduler. Detect them now. + SSARegMap *RegMap = DAG.getMachineFunction().getSSARegMap(); + bool HasVectorVReg = false; + for (unsigned i = MRegisterInfo::FirstVirtualRegister, + e = RegMap->getLastVirtReg(); i != e; ++i) + if (RegMap->getRegClass(i) == &PPC::VRRCRegClass) { + HasVectorVReg = true; + break; + } + + // If we have a vector register, we want to emit code into the entry and exit + // blocks to save and restore the VRSAVE register. We do this here (instead + // of marking all vector instructions as clobbering VRSAVE) for two reasons: + // + // 1. This (trivially) reduces the load on the register allocator, by not + // having to represent the live range of the VRSAVE register. + // 2. This (more significantly) allows us to create a temporary virtual + // register to hold the saved VRSAVE value, allowing this temporary to be + // register allocated, instead of forcing it to be spilled to the stack. + if (HasVectorVReg) { + // Create two vregs - one to hold the VRSAVE register that is live-in to the + // function and one for the value after having bits or'd into it. + unsigned InVRSAVE = RegMap->createVirtualRegister(&PPC::GPRCRegClass); + unsigned UpdatedVRSAVE = RegMap->createVirtualRegister(&PPC::GPRCRegClass); + + MachineFunction &MF = DAG.getMachineFunction(); + MachineBasicBlock &EntryBB = *MF.begin(); + // Emit the following code into the entry block: + // InVRSAVE = MFVRSAVE + // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE + // MTVRSAVE UpdatedVRSAVE + MachineBasicBlock::iterator IP = EntryBB.begin(); // Insert Point + BuildMI(EntryBB, IP, PPC::MFVRSAVE, 0, InVRSAVE); + BuildMI(EntryBB, IP, PPC::UPDATE_VRSAVE, 1, UpdatedVRSAVE).addReg(InVRSAVE); + BuildMI(EntryBB, IP, PPC::MTVRSAVE, 1).addReg(UpdatedVRSAVE); + + // Find all return blocks, outputting a restore in each epilog. + const TargetInstrInfo &TII = *DAG.getTarget().getInstrInfo(); + for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) + if (!BB->empty() && TII.isReturn(BB->back().getOpcode())) { + IP = BB->end(); --IP; + + // Skip over all terminator instructions, which are part of the return + // sequence. + MachineBasicBlock::iterator I2 = IP; + while (I2 != BB->begin() && TII.isTerminatorInstr((--I2)->getOpcode())) + IP = I2; + + // Emit: MTVRSAVE InVRSave + BuildMI(*BB, IP, PPC::MTVRSAVE, 1).addReg(InVRSAVE); + } + } } /// getGlobalBaseReg - Output the instructions required to put the Index: llvm/lib/Target/PowerPC/PPCInstrInfo.td diff -u llvm/lib/Target/PowerPC/PPCInstrInfo.td:1.181 llvm/lib/Target/PowerPC/PPCInstrInfo.td:1.182 --- llvm/lib/Target/PowerPC/PPCInstrInfo.td:1.181 Sun Mar 12 23:15:10 2006 +++ llvm/lib/Target/PowerPC/PPCInstrInfo.td Mon Mar 13 15:52:10 2006 @@ -210,6 +210,9 @@ def ADJCALLSTACKUP : Pseudo<(ops u16imm:$amt), "; ADJCALLSTACKUP", [(callseq_end imm:$amt)]>; + +def UPDATE_VRSAVE : Pseudo<(ops GPRC:$rD, GPRC:$rS), + "UPDATE_VRSAVE $rD, $rS", []>; } def IMPLICIT_DEF_GPR : Pseudo<(ops GPRC:$rD), "; $rD = IMPLICIT_DEF_GPRC", [(set GPRC:$rD, (undef))]>; @@ -694,8 +697,24 @@ // def MFCTR : XFXForm_1_ext<31, 339, 9, (ops GPRC:$rT), "mfctr $rT", SprMFSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; +def MTCTR : XFXForm_7_ext<31, 467, 9, (ops GPRC:$rS), "mtctr $rS", SprMTSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; + +def MTLR : XFXForm_7_ext<31, 467, 8, (ops GPRC:$rS), "mtlr $rS", SprMTSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; def MFLR : XFXForm_1_ext<31, 339, 8, (ops GPRC:$rT), "mflr $rT", SprMFSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; + +// Move to/from VRSAVE: despite being a SPR, the VRSAVE register is renamed like +// a GPR on the PPC970. As such, copies in and out have the same performance +// characteristics as an OR instruction. +def MTVRSAVE : XFXForm_7_ext<31, 467, 256, (ops GPRC:$rS), + "mtspr 256, $rS", IntGeneral>, + PPC970_Unit_FXU; +def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (ops GPRC:$rT), + "mfspr $rT, 256", IntGeneral>, + PPC970_Unit_FXU; + def MFCR : XFXForm_3<31, 19, (ops GPRC:$rT), "mfcr $rT", SprMFCR>, PPC970_MicroCode, PPC970_Unit_CRU; def MTCRF : XFXForm_5<31, 144, (ops crbitm:$FXM, GPRC:$rS), @@ -704,13 +723,6 @@ def MFOCRF: XFXForm_5a<31, 19, (ops GPRC:$rT, crbitm:$FXM), "mfcr $rT, $FXM", SprMFCR>, PPC970_DGroup_First, PPC970_Unit_CRU; -def MTCTR : XFXForm_7_ext<31, 467, 9, (ops GPRC:$rS), "mtctr $rS", SprMTSPR>, - PPC970_DGroup_First, PPC970_Unit_FXU; -def MTLR : XFXForm_7_ext<31, 467, 8, (ops GPRC:$rS), "mtlr $rS", SprMTSPR>, - PPC970_DGroup_First, PPC970_Unit_FXU; -def MTSPR : XFXForm_7<31, 467, (ops GPRC:$rS, u16imm:$UIMM), "mtspr $UIMM, $rS", - SprMTSPR>, - PPC970_DGroup_Single, PPC970_Unit_FXU; // XS-Form instructions. Just 'sradi' // Index: llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp diff -u llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp:1.43 llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp:1.44 --- llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp:1.43 Thu Feb 2 14:12:32 2006 +++ llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp Mon Mar 13 15:52:10 2006 @@ -266,12 +266,63 @@ } } +// HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the +// instruction selector. Based on the vector registers that have been used, +// transform this into the appropriate ORI instruction. +static void HandleVRSaveUpdate(MachineInstr *MI, const bool *UsedRegs) { + unsigned UsedRegMask = 0; +#define HANDLEREG(N) if (UsedRegs[PPC::V##N]) UsedRegMask |= 1 << (31-N) + HANDLEREG( 0); HANDLEREG( 1); HANDLEREG( 2); HANDLEREG( 3); + HANDLEREG( 4); HANDLEREG( 5); HANDLEREG( 6); HANDLEREG( 7); + HANDLEREG( 8); HANDLEREG( 9); HANDLEREG(10); HANDLEREG(11); + HANDLEREG(12); HANDLEREG(13); HANDLEREG(14); HANDLEREG(15); + HANDLEREG(16); HANDLEREG(17); HANDLEREG(18); HANDLEREG(19); + HANDLEREG(20); HANDLEREG(21); HANDLEREG(22); HANDLEREG(23); + HANDLEREG(24); HANDLEREG(25); HANDLEREG(26); HANDLEREG(27); + HANDLEREG(28); HANDLEREG(29); HANDLEREG(30); HANDLEREG(31); +#undef HANDLEREG + unsigned SrcReg = MI->getOperand(1).getReg(); + unsigned DstReg = MI->getOperand(0).getReg(); + // If no registers are used, turn this into a copy. + if (UsedRegMask == 0) { + if (SrcReg != DstReg) + BuildMI(*MI->getParent(), MI, PPC::OR4, 2, DstReg) + .addReg(SrcReg).addReg(SrcReg); + } else if ((UsedRegMask & 0xFFFF) == UsedRegMask) { + BuildMI(*MI->getParent(), MI, PPC::ORI, 2, DstReg) + .addReg(SrcReg).addImm(UsedRegMask); + } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) { + BuildMI(*MI->getParent(), MI, PPC::ORIS, 2, DstReg) + .addReg(SrcReg).addImm(UsedRegMask >> 16); + } else { + BuildMI(*MI->getParent(), MI, PPC::ORIS, 2, DstReg) + .addReg(SrcReg).addImm(UsedRegMask >> 16); + BuildMI(*MI->getParent(), MI, PPC::ORI, 2, DstReg) + .addReg(DstReg).addImm(UsedRegMask & 0xFFFF); + } + + // Remove the old UPDATE_VRSAVE instruction. + MI->getParent()->erase(MI); +} + void PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); + // Scan the first few instructions of the prolog, looking for an UPDATE_VRSAVE + // instruction. If we find it, process it. + for (unsigned i = 0; MBBI != MBB.end() && i < 5; ++i, ++MBBI) { + if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) { + HandleVRSaveUpdate(MBBI, MF.getUsedPhysregs()); + break; + } + } + + // Move MBBI back to the beginning of the function. + MBBI = MBB.begin(); + // Get the number of bytes to allocate from the FrameInfo unsigned NumBytes = MFI->getStackSize(); Index: llvm/lib/Target/PowerPC/PPCRegisterInfo.td diff -u llvm/lib/Target/PowerPC/PPCRegisterInfo.td:1.29 llvm/lib/Target/PowerPC/PPCRegisterInfo.td:1.30 --- llvm/lib/Target/PowerPC/PPCRegisterInfo.td:1.29 Wed Dec 21 20:26:21 2005 +++ llvm/lib/Target/PowerPC/PPCRegisterInfo.td Mon Mar 13 15:52:10 2006 @@ -152,9 +152,9 @@ GPRCClass::iterator GPRCClass::allocation_order_end(MachineFunction &MF) const { if (hasFP(MF)) - return end()-4; + return end()-4; // don't allocate R31, R0, R1, LR else - return end()-3; + return end()-3; // don't allocate R0, R1, LR } }]; } _______________________________________________ llvm-commits mailing list llvm-commits@cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits