Re: [Mesa-dev] [PATCH] R600/SI: expose TBUFFER_STORE_FORMAT_* for OpenGL transform feedback
No, we use 11 user data SGPRs for the vertex shader, but there are also 6 additional SGPRs loaded by the hw based on the VGT state (4 streamout offsets, streamout_enable, and streamout_write_index). The 6 SGPRs can be enabled by setting SPI_SHADER_PGM_RSRC2_VS.SO_* = 1. Marek On Thu, Sep 5, 2013 at 5:44 PM, Tom Stellard wrote: > On Mon, Sep 02, 2013 at 09:07:18PM +0200, Marek Olšák wrote: >> For _XYZ, the type of VDATA is v4i32, because v3i32 doesn't exist. >> >> The ADDR64 bit is not exposed. A simpler intrinsic that doesn't take >> a resource descriptor might be nicer. >> >> The maximum number of input SGPRs is bumped to 17. >> >> Signed-off-by: Marek Olšák >> --- >> lib/Target/R600/AMDGPUCallingConv.td | 3 ++- >> lib/Target/R600/AMDGPUISelLowering.cpp | 1 + >> lib/Target/R600/AMDGPUISelLowering.h | 1 + >> lib/Target/R600/SIISelLowering.cpp | 39 >> ++ >> lib/Target/R600/SIInstrInfo.td | 27 +++ >> lib/Target/R600/SIInstructions.td | 29 + >> lib/Target/R600/SIIntrinsics.td| 18 >> 7 files changed, 113 insertions(+), 5 deletions(-) >> >> diff --git a/lib/Target/R600/AMDGPUCallingConv.td >> b/lib/Target/R600/AMDGPUCallingConv.td >> index 84d3118..d26be32 100644 >> --- a/lib/Target/R600/AMDGPUCallingConv.td >> +++ b/lib/Target/R600/AMDGPUCallingConv.td >> @@ -19,7 +19,8 @@ def CC_SI : CallingConv<[ >> >>CCIfInReg> SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7, >> -SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15 >> +SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15, >> +SGPR16 > > Why is this necessary? Are we using all 16 user sgprs now? > >>]>>>, >> >>CCIfInReg> diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp >> b/lib/Target/R600/AMDGPUISelLowering.cpp >> index 1237323..30d9503 100644 >> --- a/lib/Target/R600/AMDGPUISelLowering.cpp >> +++ b/lib/Target/R600/AMDGPUISelLowering.cpp >> @@ -718,5 +718,6 @@ const char* >> AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { >>NODE_NAME_CASE(SAMPLED) >>NODE_NAME_CASE(SAMPLEL) >>NODE_NAME_CASE(STORE_MSKOR) >> + NODE_NAME_CASE(TBUFFER_STORE_FORMAT) >>} >> } >> diff --git a/lib/Target/R600/AMDGPUISelLowering.h >> b/lib/Target/R600/AMDGPUISelLowering.h >> index 75ac4c2..8a68356 100644 >> --- a/lib/Target/R600/AMDGPUISelLowering.h >> +++ b/lib/Target/R600/AMDGPUISelLowering.h >> @@ -160,6 +160,7 @@ enum { >>FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, >>STORE_MSKOR, >>LOAD_CONSTANT, >> + TBUFFER_STORE_FORMAT, >>LAST_AMDGPU_ISD_NUMBER >> }; >> >> diff --git a/lib/Target/R600/SIISelLowering.cpp >> b/lib/Target/R600/SIISelLowering.cpp >> index f196059..6fa0c85 100644 >> --- a/lib/Target/R600/SIISelLowering.cpp >> +++ b/lib/Target/R600/SIISelLowering.cpp >> @@ -86,6 +86,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : >>setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v16i8, Custom); >>setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom); >> >> + setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); >> + >>setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand); >> >>setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); >> @@ -462,6 +464,43 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, >> SelectionDAG &DAG) const { >> Op.getOperand(3)); >> } >>} >> + >> + case ISD::INTRINSIC_VOID: >> +SDValue Chain = Op.getOperand(0); >> +unsigned IntrinsicID = >> cast(Op.getOperand(1))->getZExtValue(); >> + >> +switch (IntrinsicID) { >> + case AMDGPUIntrinsic::SI_tbuffer_store: { >> +SDLoc DL(Op); >> +SDValue Ops [] = { >> + Chain, >> + ResourceDescriptorToi128(Op.getOperand(2), DAG), >> + Op.getOperand(3), >> + Op.getOperand(4), >> + Op.getOperand(5), >> + Op.getOperand(6), >> + Op.getOperand(7), >> + Op.getOperand(8), >> + Op.getOperand(9), >> + Op.getOperand(10), >> + Op.getOperand(11), >> + Op.getOperand(12), >> + Op.getOperand(13), >> + Op.getOperand(14) >> +}; >> +EVT VT = Op.getOperand(3).getValueType(); >> + >> +MachineMemOperand *MMO = MF.getMachineMemOperand( >> +MachinePointerInfo(), >> +MachineMemOperand::MOStore, >> +VT.getSizeInBits() / 8, 4); >> +return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL, >> + Op->getVTList(), Ops, >> + sizeof(Ops)/sizeof(Ops[0]), VT, MMO); >> + } >> + default: >> +break; >> +} >>} >>return SDValue(); >> } >> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td >> index ecc4718..c902feb 100644 >> --- a/lib/Target/R600/SIInstrInfo.td
Re: [Mesa-dev] [PATCH] R600/SI: expose TBUFFER_STORE_FORMAT_* for OpenGL transform feedback
On Mon, Sep 02, 2013 at 09:07:18PM +0200, Marek Olšák wrote: > For _XYZ, the type of VDATA is v4i32, because v3i32 doesn't exist. > > The ADDR64 bit is not exposed. A simpler intrinsic that doesn't take > a resource descriptor might be nicer. > > The maximum number of input SGPRs is bumped to 17. > > Signed-off-by: Marek Olšák > --- > lib/Target/R600/AMDGPUCallingConv.td | 3 ++- > lib/Target/R600/AMDGPUISelLowering.cpp | 1 + > lib/Target/R600/AMDGPUISelLowering.h | 1 + > lib/Target/R600/SIISelLowering.cpp | 39 > ++ > lib/Target/R600/SIInstrInfo.td | 27 +++ > lib/Target/R600/SIInstructions.td | 29 + > lib/Target/R600/SIIntrinsics.td| 18 > 7 files changed, 113 insertions(+), 5 deletions(-) > > diff --git a/lib/Target/R600/AMDGPUCallingConv.td > b/lib/Target/R600/AMDGPUCallingConv.td > index 84d3118..d26be32 100644 > --- a/lib/Target/R600/AMDGPUCallingConv.td > +++ b/lib/Target/R600/AMDGPUCallingConv.td > @@ -19,7 +19,8 @@ def CC_SI : CallingConv<[ > >CCIfInReg SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7, > -SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15 > +SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15, > +SGPR16 Why is this necessary? Are we using all 16 user sgprs now? >]>>>, > >CCIfInReg diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp > b/lib/Target/R600/AMDGPUISelLowering.cpp > index 1237323..30d9503 100644 > --- a/lib/Target/R600/AMDGPUISelLowering.cpp > +++ b/lib/Target/R600/AMDGPUISelLowering.cpp > @@ -718,5 +718,6 @@ const char* > AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { >NODE_NAME_CASE(SAMPLED) >NODE_NAME_CASE(SAMPLEL) >NODE_NAME_CASE(STORE_MSKOR) > + NODE_NAME_CASE(TBUFFER_STORE_FORMAT) >} > } > diff --git a/lib/Target/R600/AMDGPUISelLowering.h > b/lib/Target/R600/AMDGPUISelLowering.h > index 75ac4c2..8a68356 100644 > --- a/lib/Target/R600/AMDGPUISelLowering.h > +++ b/lib/Target/R600/AMDGPUISelLowering.h > @@ -160,6 +160,7 @@ enum { >FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, >STORE_MSKOR, >LOAD_CONSTANT, > + TBUFFER_STORE_FORMAT, >LAST_AMDGPU_ISD_NUMBER > }; > > diff --git a/lib/Target/R600/SIISelLowering.cpp > b/lib/Target/R600/SIISelLowering.cpp > index f196059..6fa0c85 100644 > --- a/lib/Target/R600/SIISelLowering.cpp > +++ b/lib/Target/R600/SIISelLowering.cpp > @@ -86,6 +86,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : >setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v16i8, Custom); >setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom); > > + setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); > + >setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand); > >setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); > @@ -462,6 +464,43 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, > SelectionDAG &DAG) const { > Op.getOperand(3)); > } >} > + > + case ISD::INTRINSIC_VOID: > +SDValue Chain = Op.getOperand(0); > +unsigned IntrinsicID = > cast(Op.getOperand(1))->getZExtValue(); > + > +switch (IntrinsicID) { > + case AMDGPUIntrinsic::SI_tbuffer_store: { > +SDLoc DL(Op); > +SDValue Ops [] = { > + Chain, > + ResourceDescriptorToi128(Op.getOperand(2), DAG), > + Op.getOperand(3), > + Op.getOperand(4), > + Op.getOperand(5), > + Op.getOperand(6), > + Op.getOperand(7), > + Op.getOperand(8), > + Op.getOperand(9), > + Op.getOperand(10), > + Op.getOperand(11), > + Op.getOperand(12), > + Op.getOperand(13), > + Op.getOperand(14) > +}; > +EVT VT = Op.getOperand(3).getValueType(); > + > +MachineMemOperand *MMO = MF.getMachineMemOperand( > +MachinePointerInfo(), > +MachineMemOperand::MOStore, > +VT.getSizeInBits() / 8, 4); > +return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL, > + Op->getVTList(), Ops, > + sizeof(Ops)/sizeof(Ops[0]), VT, MMO); > + } > + default: > +break; > +} >} >return SDValue(); > } > diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td > index ecc4718..c902feb 100644 > --- a/lib/Target/R600/SIInstrInfo.td > +++ b/lib/Target/R600/SIInstrInfo.td > @@ -21,6 +21,25 @@ def SIload_constant : SDNode<"AMDGPUISD::LOAD_CONSTANT", >[SDNPMayLoad, SDNPMemOperand] > >; > > +def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", > + SDTypeProfile<0, 13, > +[SDTCisVT<0, i128>, // rsrc(SGPR) > + SDTCisVT<1, iAny>, // vdata(VGPR) > + SDTCisVT<2, i32>,// num_channels(imm) > + SDTCisVT<3, i32>,// vad