Changes in directory llvm/lib/Target/PowerPC:
PPCHazardRecognizers.cpp updated: 1.11 -> 1.12 PPCISelLowering.cpp updated: 1.107 -> 1.108 PPCISelLowering.h updated: 1.30 -> 1.31 PPCInstrInfo.td updated: 1.202 -> 1.203 PPCRegisterInfo.cpp updated: 1.48 -> 1.49 --- Log message: When possible, custom lower 32-bit SINT_TO_FP to this: _foo2: extsw r2, r3 std r2, -8(r1) lfd f0, -8(r1) fcfid f0, f0 frsp f1, f0 blr instead of this: _foo2: lis r2, ha16(LCPI2_0) lis r4, 17200 xoris r3, r3, 32768 stw r3, -4(r1) stw r4, -8(r1) lfs f0, lo16(LCPI2_0)(r2) lfd f1, -8(r1) fsub f0, f1, f0 frsp f1, f0 blr This speeds up Misc/pi from 2.44s->2.09s with LLC and from 3.01->2.18s with llcbeta (16.7% and 38.1% respectively). --- Diffs of the changes: (+92 -32) PPCHazardRecognizers.cpp | 3 + PPCISelLowering.cpp | 86 ++++++++++++++++++++++++++++++++--------------- PPCISelLowering.h | 7 +++ PPCInstrInfo.td | 26 ++++++++++++-- PPCRegisterInfo.cpp | 2 - 5 files changed, 92 insertions(+), 32 deletions(-) Index: llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp diff -u llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp:1.11 llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp:1.12 --- llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp:1.11 Sun Mar 12 23:23:59 2006 +++ llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp Tue Mar 21 23:30:33 2006 @@ -245,8 +245,9 @@ case PPC::STFIWX: ThisStoreSize = 4; break; + case PPC::STD_32: + case PPC::STDX_32: case PPC::STD: - case PPC::STDU: case PPC::STFD: case PPC::STFDX: case PPC::STDX: Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp diff -u llvm/lib/Target/PowerPC/PPCISelLowering.cpp:1.107 llvm/lib/Target/PowerPC/PPCISelLowering.cpp:1.108 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp:1.107 Tue Mar 21 14:51:05 2006 +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp Tue Mar 21 23:30:33 2006 @@ -140,6 +140,7 @@ // They also have instructions for converting between i64 and fp. setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); } else { @@ -222,6 +223,8 @@ case PPCISD::SRL: return "PPCISD::SRL"; case PPCISD::SRA: return "PPCISD::SRA"; case PPCISD::SHL: return "PPCISD::SHL"; + case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32"; + case PPCISD::STD_32: return "PPCISD::STD_32"; case PPCISD::CALL: return "PPCISD::CALL"; case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; } @@ -302,15 +305,41 @@ Bits = DAG.getNode(ISD::TRUNCATE, MVT::i32, Bits); return Bits; } - case ISD::SINT_TO_FP: { - assert(MVT::i64 == Op.getOperand(0).getValueType() && - "Unhandled SINT_TO_FP type in custom expander!"); - SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0)); - SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Bits); - if (MVT::f32 == Op.getValueType()) - FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP); - return FP; - } + case ISD::SINT_TO_FP: + if (Op.getOperand(0).getValueType() == MVT::i64) { + SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0)); + SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Bits); + if (Op.getValueType() == MVT::f32) + FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP); + return FP; + } else { + assert(Op.getOperand(0).getValueType() == MVT::i32 && + "Unhandled SINT_TO_FP type in custom expander!"); + // Since we only generate this in 64-bit mode, we can take advantage of + // 64-bit registers. In particular, sign extend the input value into the + // 64-bit register with extsw, store the WHOLE 64-bit value into the stack + // then lfd it and fcfid it. + MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); + int FrameIdx = FrameInfo->CreateStackObject(8, 8); + SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32); + + SDOperand Ext64 = DAG.getNode(PPCISD::EXTSW_32, MVT::i32, + Op.getOperand(0)); + + // STD the extended value into the stack slot. + SDOperand Store = DAG.getNode(PPCISD::STD_32, MVT::Other, + DAG.getEntryNode(), Ext64, FIdx, + DAG.getSrcValue(NULL)); + // Load the value as a double. + SDOperand Ld = DAG.getLoad(MVT::f64, Store, FIdx, DAG.getSrcValue(NULL)); + + // FCFID it and return it. + SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Ld); + if (Op.getValueType() == MVT::f32) + FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP); + return FP; + } + case ISD::SELECT_CC: { // Turn FP only select_cc's into fsel instructions. if (!MVT::isFloatingPoint(Op.getOperand(0).getValueType()) || @@ -1106,27 +1135,30 @@ default: break; case ISD::SINT_TO_FP: if (TM.getSubtarget<PPCSubtarget>().is64Bit()) { - // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores. - // We allow the src/dst to be either f32/f64, but force the intermediate - // type to be i64. - if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT && - N->getOperand(0).getValueType() == MVT::i64) { - - SDOperand Val = N->getOperand(0).getOperand(0); - if (Val.getValueType() == MVT::f32) { - Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val); + if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) { + // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores. + // We allow the src/dst to be either f32/f64, but the intermediate + // type must be i64. + if (N->getOperand(0).getValueType() == MVT::i64) { + SDOperand Val = N->getOperand(0).getOperand(0); + if (Val.getValueType() == MVT::f32) { + Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val); + DCI.AddToWorklist(Val.Val); + } + + Val = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Val); DCI.AddToWorklist(Val.Val); - } - - Val = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Val); - DCI.AddToWorklist(Val.Val); - Val = DAG.getNode(PPCISD::FCFID, MVT::f64, Val); - DCI.AddToWorklist(Val.Val); - if (N->getValueType(0) == MVT::f32) { - Val = DAG.getNode(ISD::FP_ROUND, MVT::f32, Val); + Val = DAG.getNode(PPCISD::FCFID, MVT::f64, Val); DCI.AddToWorklist(Val.Val); + if (N->getValueType(0) == MVT::f32) { + Val = DAG.getNode(ISD::FP_ROUND, MVT::f32, Val); + DCI.AddToWorklist(Val.Val); + } + return Val; + } else if (N->getOperand(0).getValueType() == MVT::i32) { + // If the intermediate type is i32, we can avoid the load/store here + // too. } - return Val; } } break; Index: llvm/lib/Target/PowerPC/PPCISelLowering.h diff -u llvm/lib/Target/PowerPC/PPCISelLowering.h:1.30 llvm/lib/Target/PowerPC/PPCISelLowering.h:1.31 --- llvm/lib/Target/PowerPC/PPCISelLowering.h:1.30 Mon Mar 20 00:33:01 2006 +++ llvm/lib/Target/PowerPC/PPCISelLowering.h Tue Mar 21 23:30:33 2006 @@ -75,7 +75,14 @@ /// shift amounts. These nodes are generated by the multi-precision shift /// code. SRL, SRA, SHL, + + /// EXTSW_32 - This is the EXTSW instruction for use with "32-bit" + /// registers. + EXTSW_32, + /// STD_32 - This is the STD instruction for use with "32-bit" registers. + STD_32, + /// CALL - A function call. CALL, Index: llvm/lib/Target/PowerPC/PPCInstrInfo.td diff -u llvm/lib/Target/PowerPC/PPCInstrInfo.td:1.202 llvm/lib/Target/PowerPC/PPCInstrInfo.td:1.203 --- llvm/lib/Target/PowerPC/PPCInstrInfo.td:1.202 Tue Mar 21 19:44:36 2006 +++ llvm/lib/Target/PowerPC/PPCInstrInfo.td Tue Mar 21 23:30:33 2006 @@ -58,6 +58,9 @@ def PPCsra : SDNode<"PPCISD::SRA" , SDT_PPCShiftOp>; def PPCshl : SDNode<"PPCISD::SHL" , SDT_PPCShiftOp>; +def PPCextsw_32 : SDNode<"PPCISD::EXTSW_32" , SDTIntUnaryOp>; +def PPCstd_32 : SDNode<"PPCISD::STD_32" , SDTStore, [SDNPHasChain]>; + // These are target-independent nodes, but have target-specific formats. def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeq,[SDNPHasChain]>; def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PPCCallSeq,[SDNPHasChain]>; @@ -194,11 +197,17 @@ let NumMIOperands = 2; let MIOperandInfo = (ops GPRC, GPRC); } +def memrix : Operand<i32> { // memri where the imm is shifted 2 bits. + let PrintMethod = "printMemRegImmShifted"; + let NumMIOperands = 2; + let MIOperandInfo = (ops i32imm, GPRC); +} // Define PowerPC specific addressing mode. def iaddr : ComplexPattern<i32, 2, "SelectAddrImm", []>; def xaddr : ComplexPattern<i32, 2, "SelectAddrIdx", []>; def xoaddr : ComplexPattern<i32, 2, "SelectAddrIdxOnly",[]>; +def ixaddr : ComplexPattern<i32, 2, "SelectAddrImmShift", []>; // "std" //===----------------------------------------------------------------------===// // PowerPC Instruction Predicate Definitions. @@ -428,9 +437,15 @@ def STD : DSForm_2<62, 0, (ops GPRC:$rT, s16immX4:$DS, GPRC:$rA), "std $rT, $DS($rA)", LdStSTD, []>, isPPC64; -def STDU : DSForm_2<62, 1, (ops GPRC:$rT, s16immX4:$DS, GPRC:$rA), - "stdu $rT, $DS($rA)", LdStSTD, - []>, isPPC64; + +// STD_32/STDX_32 - Just like STD/STDX, but uses a '32-bit' input register. +def STD_32 : DSForm_2<62, 0, (ops GPRC:$rT, memrix:$dst), + "std $rT, $dst", LdStSTD, + [(PPCstd_32 GPRC:$rT, ixaddr:$dst)]>, isPPC64; +def STDX_32 : XForm_8<31, 149, (ops GPRC:$rT, memrr:$dst), + "stdx $rT, $dst", LdStSTD, + [(PPCstd_32 GPRC:$rT, xaddr:$dst)]>, isPPC64, + PPC970_DGroup_Cracked; } // X-Form instructions. Most instructions that perform an operation on a @@ -586,6 +601,11 @@ def EXTSW : XForm_11<31, 986, (ops G8RC:$rA, G8RC:$rS), "extsw $rA, $rS", IntGeneral, [(set G8RC:$rA, (sext_inreg G8RC:$rS, i32))]>, isPPC64; +/// EXTSW_32 - Just like EXTSW, but works on '32-bit' registers. +def EXTSW_32 : XForm_11<31, 986, (ops GPRC:$rA, GPRC:$rS), + "extsw $rA, $rS", IntGeneral, + [(set GPRC:$rA, (PPCextsw_32 GPRC:$rS))]>, isPPC64; + def CMP : XForm_16<31, 0, (ops CRRC:$crD, i1imm:$long, GPRC:$rA, GPRC:$rB), "cmp $crD, $long, $rA, $rB", IntCompare>; def CMPL : XForm_16<31, 32, (ops CRRC:$crD, i1imm:$long, GPRC:$rA, GPRC:$rB), Index: llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp diff -u llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp:1.48 llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp:1.49 --- llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp:1.48 Thu Mar 16 17:52:08 2006 +++ llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp Tue Mar 21 23:30:33 2006 @@ -276,7 +276,7 @@ case PPC::LWA: case PPC::LD: case PPC::STD: - case PPC::STDU: + case PPC::STD_32: assert((Offset & 3) == 0 && "Invalid frame offset!"); Offset >>= 2; // The actual encoded value has the low two bits zero. break; _______________________________________________ llvm-commits mailing list llvm-commits@cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits