Changes in directory llvm/lib/Target/X86:
X86ISelLowering.cpp updated: 1.129 -> 1.130 X86ISelLowering.h updated: 1.41 -> 1.42 X86InstrInfo.cpp updated: 1.46 -> 1.47 X86InstrSSE.td updated: 1.26 -> 1.27 --- Log message: Support for scalar to vector with zero extension. --- Diffs of the changes: (+119 -52) X86ISelLowering.cpp | 70 ++++++++++++++++++++++++++------------------ X86ISelLowering.h | 16 +++++----- X86InstrInfo.cpp | 3 + X86InstrSSE.td | 82 +++++++++++++++++++++++++++++++++++++++++----------- 4 files changed, 119 insertions(+), 52 deletions(-) Index: llvm/lib/Target/X86/X86ISelLowering.cpp diff -u llvm/lib/Target/X86/X86ISelLowering.cpp:1.129 llvm/lib/Target/X86/X86ISelLowering.cpp:1.130 --- llvm/lib/Target/X86/X86ISelLowering.cpp:1.129 Fri Mar 24 01:29:27 2006 +++ llvm/lib/Target/X86/X86ISelLowering.cpp Fri Mar 24 17:15:12 2006 @@ -1398,8 +1398,8 @@ bool X86::isSHUFPMask(SDNode *N) { assert(N->getOpcode() == ISD::BUILD_VECTOR); - unsigned NumOperands = N->getNumOperands(); - if (NumOperands == 2) { + unsigned NumElems = N->getNumOperands(); + if (NumElems == 2) { // The only case that ought be handled by SHUFPD is // Dest { 2, 1 } <= shuffle( Dest { 1, 0 }, Src { 3, 2 } // Expect bit 0 == 1, bit1 == 2 @@ -1411,21 +1411,21 @@ cast<ConstantSDNode>(Bit1)->getValue() == 2); } - if (NumOperands != 4) return false; + if (NumElems != 4) return false; // Each half must refer to only one of the vector. SDOperand Elt = N->getOperand(0); assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!"); - for (unsigned i = 1; i != NumOperands / 2; ++i) { + for (unsigned i = 1; i != NumElems / 2; ++i) { assert(isa<ConstantSDNode>(N->getOperand(i)) && "Invalid VECTOR_SHUFFLE mask!"); if (cast<ConstantSDNode>(N->getOperand(i))->getValue() != cast<ConstantSDNode>(Elt)->getValue()) return false; } - Elt = N->getOperand(NumOperands / 2); + Elt = N->getOperand(NumElems / 2); assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!"); - for (unsigned i = NumOperands / 2; i != NumOperands; ++i) { + for (unsigned i = NumElems / 2; i != NumElems; ++i) { assert(isa<ConstantSDNode>(N->getOperand(i)) && "Invalid VECTOR_SHUFFLE mask!"); if (cast<ConstantSDNode>(N->getOperand(i))->getValue() != @@ -1530,20 +1530,23 @@ return Mask; } -/// isZeroVector - Return true if all elements of BUILD_VECTOR are 0 or +0.0. +/// isZeroVector - Return true if this build_vector is an all-zero vector. +/// bool X86::isZeroVector(SDNode *N) { - for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); - I != E; ++I) { - if (ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(*I)) { - if (!FPC->isExactlyValue(+0.0)) + if (MVT::isInteger(N->getOperand(0).getValueType())) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (!isa<ConstantSDNode>(N->getOperand(i)) || + cast<ConstantSDNode>(N->getOperand(i))->getValue() != 0) return false; - } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(*I)) { - if (!C->isNullValue()) + } else { + assert(MVT::isFloatingPoint(N->getOperand(0).getValueType()) && + "Vector of non-int, non-float values?"); + // See if this is all zeros. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (!isa<ConstantFPSDNode>(N->getOperand(i)) || + !cast<ConstantFPSDNode>(N->getOperand(i))->isExactlyValue(0.0)) return false; - } else - return false; } - return true; } @@ -2318,7 +2321,7 @@ } case ISD::SCALAR_TO_VECTOR: { SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); - return DAG.getNode(X86ISD::SCALAR_TO_VECTOR, Op.getValueType(), AnyExt); + return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt); } case ISD::VECTOR_SHUFFLE: { SDOperand V1 = Op.getOperand(0); @@ -2338,6 +2341,9 @@ return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, DAG.getNode(ISD::UNDEF, V1.getValueType()), PermMask); + } else if (NumElems == 2) { + // All v2f64 cases are handled. + return SDOperand(); } else if (X86::isPSHUFDMask(PermMask.Val)) { if (V2.getOpcode() == ISD::UNDEF) // Leave the VECTOR_SHUFFLE alone. It matches PSHUFD. @@ -2347,9 +2353,6 @@ return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, DAG.getNode(ISD::UNDEF, V1.getValueType()), PermMask); - } else if (NumElems == 2) { - // All v2f64 cases are handled. - return SDOperand(); } else if (X86::isSHUFPMask(PermMask.Val)) { SDOperand Elt = PermMask.getOperand(0); if (cast<ConstantSDNode>(Elt)->getValue() >= NumElems) { @@ -2370,22 +2373,32 @@ abort(); } case ISD::BUILD_VECTOR: { - bool isZero = true; + SDOperand Elt0 = Op.getOperand(0); + bool Elt0IsZero = (isa<ConstantSDNode>(Elt0) && + cast<ConstantSDNode>(Elt0)->getValue() == 0) || + (isa<ConstantFPSDNode>(Elt0) && + cast<ConstantFPSDNode>(Elt0)->isExactlyValue(0.0)); + bool RestAreZero = true; unsigned NumElems = Op.getNumOperands(); - for (unsigned i = 0; i < NumElems; ++i) { + for (unsigned i = 1; i < NumElems; ++i) { SDOperand V = Op.getOperand(i); if (ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(V)) { if (!FPC->isExactlyValue(+0.0)) - isZero = false; + RestAreZero = false; } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(V)) { if (!C->isNullValue()) - isZero = false; + RestAreZero = false; } else - isZero = false; + RestAreZero = false; + } + + if (RestAreZero) { + if (Elt0IsZero) return Op; + + // Zero extend a scalar to a vector. + return DAG.getNode(X86ISD::ZEXT_S2VEC, Op.getValueType(), Elt0); } - if (isZero) - return Op; return SDOperand(); } } @@ -2421,7 +2434,8 @@ case X86ISD::LOAD_PACK: return "X86ISD::LOAD_PACK"; case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; case X86ISD::Wrapper: return "X86ISD::Wrapper"; - case X86ISD::SCALAR_TO_VECTOR: return "X86ISD::SCALAR_TO_VECTOR"; + case X86ISD::S2VEC: return "X86ISD::S2VEC"; + case X86ISD::ZEXT_S2VEC: return "X86ISD::ZEXT_S2VEC"; } } Index: llvm/lib/Target/X86/X86ISelLowering.h diff -u llvm/lib/Target/X86/X86ISelLowering.h:1.41 llvm/lib/Target/X86/X86ISelLowering.h:1.42 --- llvm/lib/Target/X86/X86ISelLowering.h:1.41 Fri Mar 24 01:29:27 2006 +++ llvm/lib/Target/X86/X86ISelLowering.h Fri Mar 24 17:15:12 2006 @@ -146,12 +146,13 @@ /// TargetExternalSymbol, and TargetGlobalAddress. Wrapper, - /// SCALAR_TO_VECTOR - X86 version of SCALAR_TO_VECTOR. The destination base - /// type does not have to match the operand type. - SCALAR_TO_VECTOR, - - /// UNPCKLP - X86 unpack and interleave low instructions. - UNPCKLP, + /// S2VEC - X86 version of SCALAR_TO_VECTOR. The destination base does not + /// have to match the operand type. + S2VEC, + + /// ZEXT_S2VEC - SCALAR_TO_VECTOR with zero extension. The destination base + /// does not have to match the operand type. + ZEXT_S2VEC, }; // X86 specific condition code. These correspond to X86_*_COND in @@ -209,7 +210,8 @@ /// instructions. unsigned getShuffleSHUFImmediate(SDNode *N); - /// isZeroVector - Return true if all elements of BUILD_VECTOR are 0 or +0.0. + /// isZeroVector - Return true if this build_vector is an all-zero vector. + /// bool isZeroVector(SDNode *N); } Index: llvm/lib/Target/X86/X86InstrInfo.cpp diff -u llvm/lib/Target/X86/X86InstrInfo.cpp:1.46 llvm/lib/Target/X86/X86InstrInfo.cpp:1.47 --- llvm/lib/Target/X86/X86InstrInfo.cpp:1.46 Tue Mar 21 01:09:35 2006 +++ llvm/lib/Target/X86/X86InstrInfo.cpp Fri Mar 24 17:15:12 2006 @@ -31,7 +31,8 @@ oc == X86::FpMOV || oc == X86::MOVSSrr || oc == X86::MOVSDrr || oc == X86::FsMOVAPSrr || oc == X86::FsMOVAPDrr || oc == X86::MOVAPSrr || oc == X86::MOVAPDrr || - oc == X86::FR32ToV4F32 || oc == X86::FR64ToV2F64) { + oc == X86::MOVSS128rr || oc == X86::MOVSD128rr || + oc == X86::MOVD128rr || oc == X86::MOVQ128rr) { assert(MI.getNumOperands() == 2 && MI.getOperand(0).isRegister() && MI.getOperand(1).isRegister() && Index: llvm/lib/Target/X86/X86InstrSSE.td diff -u llvm/lib/Target/X86/X86InstrSSE.td:1.26 llvm/lib/Target/X86/X86InstrSSE.td:1.27 --- llvm/lib/Target/X86/X86InstrSSE.td:1.26 Fri Mar 24 16:28:37 2006 +++ llvm/lib/Target/X86/X86InstrSSE.td Fri Mar 24 17:15:12 2006 @@ -17,19 +17,16 @@ // SSE specific DAG Nodes. //===----------------------------------------------------------------------===// -def SDTX86Unpcklp : SDTypeProfile<1, 2, - [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>]>; - def X86loadp : SDNode<"X86ISD::LOAD_PACK", SDTLoad, [SDNPHasChain]>; def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp, [SDNPCommutative, SDNPAssociative]>; def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp, [SDNPCommutative, SDNPAssociative]>; -def X86s2vec : SDNode<"X86ISD::SCALAR_TO_VECTOR", +def X86s2vec : SDNode<"X86ISD::S2VEC", SDTypeProfile<1, 1, []>, []>; -def X86unpcklp : SDNode<"X86ISD::UNPCKLP", - SDTX86Unpcklp, []>; +def X86zexts2vec : SDNode<"X86ISD::ZEXT_S2VEC", + SDTypeProfile<1, 1, []>, []>; //===----------------------------------------------------------------------===// // SSE pattern fragments @@ -156,6 +153,25 @@ "movsd {$src, $dst|$dst, $src}", [(store FR64:$src, addr:$dst)]>; +// FR32 / FR64 to 128-bit vector conversion. +def MOVSS128rr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, FR32:$src), + "movss {$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v4f32 (scalar_to_vector FR32:$src)))]>; +def MOVSS128rm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src), + "movss {$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v4f32 (scalar_to_vector (loadf32 addr:$src))))]>; +def MOVSD128rr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, FR64:$src), + "movsd {$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2f64 (scalar_to_vector FR64:$src)))]>; +def MOVSD128rm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src), + "movsd {$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v4f32 (scalar_to_vector (loadf64 addr:$src))))]>; + + // Conversion instructions def CVTTSS2SIrr: SSI<0x2C, MRMSrcReg, (ops R32:$dst, FR32:$src), "cvttss2si {$src, $dst|$dst, $src}", @@ -788,7 +804,10 @@ [(set VR128:$dst, (v4i32 (scalar_to_vector R32:$src)))]>; def MOVD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src), - "movd {$src, $dst|$dst, $src}", []>; + "movd {$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>; + def MOVD128mr : PDI<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src), "movd {$src, $dst|$dst, $src}", []>; @@ -808,8 +827,8 @@ (v2i64 (scalar_to_vector VR64:$src)))]>, XS, Requires<[HasSSE2]>; def MOVQ128rm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src), - "movq {$src, $dst|$dst, $src}", []>, XS; - + "movq {$src, $dst|$dst, $src}", []>, XS, + Requires<[HasSSE2]>; def MOVQ128mr : PDI<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src), "movq {$src, $dst|$dst, $src}", []>; @@ -870,15 +889,32 @@ def VZEROv2f64 : PDI<0x57, MRMInitReg, (ops VR128:$dst), "xorpd $dst, $dst", [(set VR128:$dst, (v2f64 vecimm0))]>; -def FR32ToV4F32 : PSI<0x28, MRMSrcReg, (ops VR128:$dst, FR32:$src), - "movaps {$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v4f32 (scalar_to_vector FR32:$src)))]>; +// Scalar to 128-bit vector with zero extension. +// Three operand (but two address) aliases. +let isTwoAddress = 1 in { +def MOVZSS128rr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR32:$src2), + "movss {$src2, $dst|$dst, $src2}", []>; +def MOVZSD128rr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR64:$src2), + "movsd {$src2, $dst|$dst, $src2}", []>; +def MOVZD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, R32:$src2), + "movd {$src2, $dst|$dst, $src2}", []>; +def MOVZQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR64:$src2), + "movq {$src2, $dst|$dst, $src2}", []>; +} -def FR64ToV2F64 : PDI<0x28, MRMSrcReg, (ops VR128:$dst, FR64:$src), - "movapd {$src, $dst|$dst, $src}", +// Loading from memory automatically zeroing upper bits. +def MOVZSS128rm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src), + "movss {$src, $dst|$dst, $src}", [(set VR128:$dst, - (v2f64 (scalar_to_vector FR64:$src)))]>; + (v4f32 (X86zexts2vec (loadf32 addr:$src))))]>; +def MOVZSD128rm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src), + "movsd {$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2f64 (X86zexts2vec (loadf64 addr:$src))))]>; +def MOVZD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src), + "movd {$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v4i32 (X86zexts2vec (loadi32 addr:$src))))]>; //===----------------------------------------------------------------------===// // Non-Instruction Patterns @@ -922,6 +958,20 @@ def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>; def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>; +// Zeroing a VR128 then do a MOVS* to the lower bits. +def : Pat<(v2f64 (X86zexts2vec FR64:$src)), + (MOVZSD128rr (VZEROv2f64), FR64:$src)>; +def : Pat<(v4f32 (X86zexts2vec FR32:$src)), + (MOVZSS128rr (VZEROv4f32), FR32:$src)>; +def : Pat<(v2i64 (X86zexts2vec VR64:$src)), + (MOVZQ128rr (VZEROv2i64), VR64:$src)>, Requires<[HasSSE2]>; +def : Pat<(v4i32 (X86zexts2vec R32:$src)), + (MOVZD128rr (VZEROv4i32), R32:$src)>; +def : Pat<(v8i16 (X86zexts2vec R16:$src)), + (MOVZD128rr (VZEROv8i16), (MOVZX32rr16 R16:$src))>; +def : Pat<(v16i8 (X86zexts2vec R8:$src)), + (MOVZD128rr (VZEROv16i8), (MOVZX32rr8 R8:$src))>; + // Splat v4f32 / v4i32 def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), SHUFP_splat_mask:$sm), (v4f32 (SHUFPSrr VR128:$src, VR128:$src, SHUFP_splat_mask:$sm))>, _______________________________________________ llvm-commits mailing list llvm-commits@cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits