On Sat, Feb 02, 2013 at 07:03:02PM +0100, Vincent Lejeune wrote: > --- > lib/Target/R600/R600ISelLowering.cpp | 111 > ++++++++++++++++------------------- > lib/Target/R600/R600Instructions.td | 20 ++----- > lib/Target/R600/R600Intrinsics.td | 3 - > 3 files changed, 55 insertions(+), 79 deletions(-)
Reviewed-by: Tom Stellard <thomas.stell...@amd.com> > > diff --git a/lib/Target/R600/R600ISelLowering.cpp > b/lib/Target/R600/R600ISelLowering.cpp > index 4dc6729..f796738 100644 > --- a/lib/Target/R600/R600ISelLowering.cpp > +++ b/lib/Target/R600/R600ISelLowering.cpp > @@ -279,57 +279,6 @@ MachineBasicBlock * > R600TargetLowering::EmitInstrWithCustomInserter( > using namespace llvm::Intrinsic; > using namespace llvm::AMDGPUIntrinsic; > > -static SDValue > -InsertScalarToRegisterExport(SelectionDAG &DAG, DebugLoc DL, SDNode > **ExportMap, > - unsigned Slot, unsigned Channel, unsigned Inst, unsigned Type, > - SDValue Scalar, SDValue Chain) { > - if (!ExportMap[Slot]) { > - SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, > - DL, MVT::v4f32, > - DAG.getUNDEF(MVT::v4f32), > - Scalar, > - DAG.getConstant(Channel, MVT::i32)); > - > - unsigned Mask = 1 << Channel; > - > - const SDValue Ops[] = {Chain, Vector, DAG.getConstant(Inst, MVT::i32), > - DAG.getConstant(Type, MVT::i32), DAG.getConstant(Slot, MVT::i32), > - DAG.getConstant(Mask, MVT::i32)}; > - > - SDValue Res = DAG.getNode( > - AMDGPUISD::EXPORT, > - DL, > - MVT::Other, > - Ops, 6); > - ExportMap[Slot] = Res.getNode(); > - return Res; > - } > - > - SDNode *ExportInstruction = (SDNode *) ExportMap[Slot] ; > - SDValue PreviousVector = ExportInstruction->getOperand(1); > - SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, > - DL, MVT::v4f32, > - PreviousVector, > - Scalar, > - DAG.getConstant(Channel, MVT::i32)); > - > - unsigned Mask = dyn_cast<ConstantSDNode>(ExportInstruction->getOperand(5)) > - ->getZExtValue(); > - Mask |= (1 << Channel); > - > - const SDValue Ops[] = {ExportInstruction->getOperand(0), Vector, > - DAG.getConstant(Inst, MVT::i32), > - DAG.getConstant(Type, MVT::i32), > - DAG.getConstant(Slot, MVT::i32), > - DAG.getConstant(Mask, MVT::i32)}; > - > - DAG.UpdateNodeOperands(ExportInstruction, > - Ops, 6); > - > - return Chain; > - > -} > - > SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) > const { > switch (Op.getOpcode()) { > default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); > @@ -356,16 +305,19 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, > SelectionDAG &DAG) const > } > return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, > Op.getOperand(2)); > } > - case AMDGPUIntrinsic::R600_store_pixel_color: { > - MachineFunction &MF = DAG.getMachineFunction(); > - R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); > - int64_t RegIndex = > cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); > - > - SDNode **OutputsMap = MFI->Outputs; > - return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap, > - RegIndex / 4, RegIndex % 4, 0, 0, Op.getOperand(2), > - Chain); > - > + case AMDGPUIntrinsic::R600_store_swizzle: { > + const SDValue Args[8] = { > + Chain, > + Op.getOperand(2), // Export Value > + Op.getOperand(3), // ArrayBase > + Op.getOperand(4), // Type > + DAG.getConstant(0, MVT::i32), // SWZ_X > + DAG.getConstant(1, MVT::i32), // SWZ_Y > + DAG.getConstant(2, MVT::i32), // SWZ_Z > + DAG.getConstant(3, MVT::i32) // SWZ_W > + }; > + return DAG.getNode(AMDGPUISD::EXPORT, Op.getDebugLoc(), > Op.getValueType(), > + Args, 8); > } > > // default for switch(IntrinsicID) > @@ -962,6 +914,43 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, > } > } > } > + case AMDGPUISD::EXPORT: { > + SDValue Arg = N->getOperand(1); > + if (Arg.getOpcode() != ISD::BUILD_VECTOR) > + break; > + SDValue NewBldVec[4] = { > + DAG.getUNDEF(MVT::f32), > + DAG.getUNDEF(MVT::f32), > + DAG.getUNDEF(MVT::f32), > + DAG.getUNDEF(MVT::f32) > + }; > + SDValue NewArgs[8] = { > + N->getOperand(0), // Chain > + SDValue(), > + N->getOperand(2), // ArrayBase > + N->getOperand(3), // Type > + N->getOperand(4), // SWZ_X > + N->getOperand(5), // SWZ_Y > + N->getOperand(6), // SWZ_Z > + N->getOperand(7) // SWZ_W > + }; > + for (unsigned i = 0; i < Arg.getNumOperands(); i++) { > + if (ConstantFPSDNode *C = > dyn_cast<ConstantFPSDNode>(Arg.getOperand(i))) { > + if (C->isZero()) { > + NewArgs[4 + i] = DAG.getConstant(4, MVT::i32); // SEL_0 > + } else if (C->isExactlyValue(1.0)) { > + NewArgs[4 + i] = DAG.getConstant(5, MVT::i32); // SEL_0 > + } else { > + NewBldVec[i] = Arg.getOperand(i); > + } > + } else { > + NewBldVec[i] = Arg.getOperand(i); > + } > + } > + DebugLoc DL = N->getDebugLoc(); > + NewArgs[1] = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4f32, NewBldVec, > 4); > + return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8); > + } > } > return SDValue(); > } > diff --git a/lib/Target/R600/R600Instructions.td > b/lib/Target/R600/R600Instructions.td > index d3cee56..3c043aa 100644 > --- a/lib/Target/R600/R600Instructions.td > +++ b/lib/Target/R600/R600Instructions.td > @@ -521,7 +521,7 @@ def INTERP_LOAD_P0 : R600_1OP <0xE0, "INTERP_LOAD_P0", > []>; > // Export Instructions > > //===----------------------------------------------------------------------===// > > -def ExportType : SDTypeProfile<0, 5, [SDTCisFP<0>, SDTCisInt<1>]>; > +def ExportType : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>; > > def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType, > [SDNPHasChain, SDNPSideEffect]>; > @@ -593,22 +593,12 @@ multiclass ExportPattern<Instruction ExportInst, > bits<8> cf_inst> { > (v4f32 (IMPLICIT_DEF)), 1, 60, 7, 7, 7, 7, cf_inst, 0) > >; > > - def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 0), > - (i32 imm:$type), (i32 imm:$arraybase), (i32 imm)), > - (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, > - 0, 1, 2, 3, cf_inst, 0) > - >; > - def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 1), > - (i32 imm:$type), (i32 imm:$arraybase), (i32 imm)), > - (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, > - 0, 1, 2, 3, cf_inst, 0) > + def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 > imm:$type), > + (i32 imm:$swz_x), (i32 imm:$swz_y), (i32 imm:$swz_z), (i32 imm:$swz_w)), > + (ExportInst R600_Reg128:$src, imm:$type, imm:$base, > + imm:$swz_x, imm:$swz_y, imm:$swz_z, imm:$swz_w, cf_inst, 0) > >; > > - def : Pat<(int_R600_store_swizzle (v4f32 R600_Reg128:$src), imm:$arraybase, > - imm:$type), > - (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, > - 0, 1, 2, 3, cf_inst, 0) > - >; > } > > multiclass SteamOutputExportPattern<Instruction ExportInst, > diff --git a/lib/Target/R600/R600Intrinsics.td > b/lib/Target/R600/R600Intrinsics.td > index 284d4d8..b5e4f1e 100644 > --- a/lib/Target/R600/R600Intrinsics.td > +++ b/lib/Target/R600/R600Intrinsics.td > @@ -18,11 +18,8 @@ let TargetPrefix = "R600", isTarget = 1 in { > Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; > def int_R600_store_swizzle : > Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>; > - > def int_R600_store_stream_output : > Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], > []>; > - def int_R600_store_pixel_color : > - Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>; > def int_R600_store_pixel_depth : > Intrinsic<[], [llvm_float_ty], []>; > def int_R600_store_pixel_stencil : > -- > 1.8.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev