Changes in directory llvm/lib/Target/X86:
X86ISelDAGToDAG.cpp updated: 1.110 -> 1.111 X86InstrSSE.td updated: 1.161 -> 1.162 --- Log message: completely disable folding of loads into scalar sse instructions and provide a framework for doing it right. This fixes CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll. Once X86DAGToDAGISel::SelectScalarSSELoad is implemented right, this task will be done. --- Diffs of the changes: (+61 -14) X86ISelDAGToDAG.cpp | 25 +++++++++++++++++++++++++ X86InstrSSE.td | 50 ++++++++++++++++++++++++++++++++++++-------------- 2 files changed, 61 insertions(+), 14 deletions(-) Index: llvm/lib/Target/X86/X86ISelDAGToDAG.cpp diff -u llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.110 llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.111 --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.110 Fri Sep 29 17:05:10 2006 +++ llvm/lib/Target/X86/X86ISelDAGToDAG.cpp Sat Oct 7 16:55:32 2006 @@ -147,6 +147,8 @@ SDOperand &Index, SDOperand &Disp); bool SelectLEAAddr(SDOperand N, SDOperand &Base, SDOperand &Scale, SDOperand &Index, SDOperand &Disp); + bool SelectScalarSSELoad(SDOperand N, SDOperand &Base, SDOperand &Scale, + SDOperand &Index, SDOperand &Disp); bool TryFoldLoad(SDOperand P, SDOperand N, SDOperand &Base, SDOperand &Scale, SDOperand &Index, SDOperand &Disp); @@ -724,6 +726,29 @@ return true; } +/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to +/// match a load whose top elements are either undef or zeros. The load flavor +/// is derived from the type of N, which is either v4f32 or v2f64. +bool X86DAGToDAGISel::SelectScalarSSELoad(SDOperand N, SDOperand &Base, + SDOperand &Scale, + SDOperand &Index, SDOperand &Disp) { +#if 0 + if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) { + if (N.getOperand(0).getOpcode() == ISD::LOAD) { + SDOperand LoadAddr = N.getOperand(0).getOperand(0); + if (!SelectAddr(LoadAddr, Base, Scale, Index, Disp)) + return false; + return true; + } + } + // TODO: Also handle the case where we explicitly require zeros in the top + // elements. This is a vector shuffle from the zero vector. +#endif + + return false; +} + + /// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing /// mode it matches can be cost effectively emitted as an LEA instruction. bool X86DAGToDAGISel::SelectLEAAddr(SDOperand N, SDOperand &Base, Index: llvm/lib/Target/X86/X86InstrSSE.td diff -u llvm/lib/Target/X86/X86InstrSSE.td:1.161 llvm/lib/Target/X86/X86InstrSSE.td:1.162 --- llvm/lib/Target/X86/X86InstrSSE.td:1.161 Sat Oct 7 16:17:13 2006 +++ llvm/lib/Target/X86/X86InstrSSE.td Sat Oct 7 16:55:32 2006 @@ -13,6 +13,7 @@ // //===----------------------------------------------------------------------===// + //===----------------------------------------------------------------------===// // SSE specific DAG Nodes. //===----------------------------------------------------------------------===// @@ -32,6 +33,27 @@ def X86pinsrw : SDNode<"X86ISD::PINSRW", SDTypeProfile<1, 3, []>, []>; //===----------------------------------------------------------------------===// +// SSE Complex Patterns +//===----------------------------------------------------------------------===// + +// These are 'extloads' from a scalar to the low element of a vector, zeroing +// the top elements. These are used for the SSE 'ss' and 'sd' instruction +// forms. +def sse_load_f32 : ComplexPattern<v4f32, 4, "SelectScalarSSELoad", []>; +def sse_load_f64 : ComplexPattern<v2f64, 4, "SelectScalarSSELoad", []>; + +def ssmem : Operand<v4f32> { + let PrintMethod = "printf32mem"; + let NumMIOperands = 4; + let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm); +} +def sdmem : Operand<v2f64> { + let PrintMethod = "printf64mem"; + let NumMIOperands = 4; + let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm); +} + +//===----------------------------------------------------------------------===// // SSE pattern fragments //===----------------------------------------------------------------------===// @@ -185,18 +207,18 @@ def r : SSI<o, MRMSrcReg, (ops VR128:$dst, VR128:$src), !strconcat(OpcodeStr, " {$src, $dst|$dst, $src"), [(set VR128:$dst, (v4f32 (IntId VR128:$src)))]>; - def m : SSI<o, MRMSrcMem, (ops VR128:$dst, f32mem:$src), + def m : SSI<o, MRMSrcMem, (ops VR128:$dst, ssmem:$src), !strconcat(OpcodeStr, " {$src, $dst|$dst, $src"), - [(set VR128:$dst, (v4f32 (IntId (load addr:$src))))]>; + [(set VR128:$dst, (v4f32 (IntId sse_load_f32:$src)))]>; } multiclass SD_IntUnary<bits<8> o, string OpcodeStr, Intrinsic IntId> { def r : SDI<o, MRMSrcReg, (ops VR128:$dst, VR128:$src), !strconcat(OpcodeStr, " {$src, $dst|$dst, $src"), [(set VR128:$dst, (v2f64 (IntId VR128:$src)))]>; - def m : SDI<o, MRMSrcMem, (ops VR128:$dst, f64mem:$src), + def m : SDI<o, MRMSrcMem, (ops VR128:$dst, sdmem:$src), !strconcat(OpcodeStr, " {$src, $dst|$dst, $src"), - [(set VR128:$dst, (v2f64 (IntId (load addr:$src))))]>; + [(set VR128:$dst, (v2f64 (IntId sse_load_f64:$src)))]>; } class PS_Intr<bits<8> o, string OpcodeStr, Intrinsic IntId> @@ -315,10 +337,10 @@ // Scalar operation, reg+mem. def SSrm : SSI<opc, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2), !strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2"), - [(set FR32:$dst, (OpNode FR32:$src1, (loadf32 addr:$src2)))]>; + [(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>; def SDrm : SDI<opc, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2), !strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2"), - [(set FR64:$dst, (OpNode FR64:$src1, (loadf64 addr:$src2)))]>; + [(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>; // Vector intrinsic operation, reg+reg. def SSrr_Int : SSI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), @@ -332,14 +354,14 @@ let isCommutable = Commutable; } // Vector intrinsic operation, reg+mem. - def SSrm_Int : SSI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f32mem:$src2), + def SSrm_Int : SSI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, ssmem:$src2), !strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2"), [(set VR128:$dst, (F32Int VR128:$src1, - (load addr:$src2)))]>; - def SDrm_Int : SDI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2), + sse_load_f32:$src2))]>; + def SDrm_Int : SDI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, sdmem:$src2), !strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2"), [(set VR128:$dst, (F64Int VR128:$src1, - (load addr:$src2)))]>; + sse_load_f64:$src2))]>; } } @@ -373,17 +395,17 @@ !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (v4f32 (IntId VR128:$src1, VR128:$src2)))]>; class SS_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId> - : SSI<o, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f32mem:$src2), + : SSI<o, MRMSrcMem, (ops VR128:$dst, VR128:$src1, ssmem:$src2), !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (v4f32 (IntId VR128:$src1, (load addr:$src2))))]>; + [(set VR128:$dst, (v4f32 (IntId VR128:$src1, sse_load_f32:$src2)))]>; class SD_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId> : SDI<o, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (v2f64 (IntId VR128:$src1, VR128:$src2)))]>; class SD_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId> - : SDI<o, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2), + : SDI<o, MRMSrcMem, (ops VR128:$dst, VR128:$src1, sdmem:$src2), !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (v2f64 (IntId VR128:$src1, (load addr:$src2))))]>; + [(set VR128:$dst, (v2f64 (IntId VR128:$src1, sse_load_f64:$src2)))]>; // Aliases to match intrinsics which expect XMM operand(s). _______________________________________________ llvm-commits mailing list llvm-commits@cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits