Re: [Mesa-dev] [PATCH] R600/SI: expose TBUFFER_STORE_FORMAT_* for OpenGL transform feedback
On Mon, Sep 02, 2013 at 09:07:18PM +0200, Marek Olšák wrote: For _XYZ, the type of VDATA is v4i32, because v3i32 doesn't exist. The ADDR64 bit is not exposed. A simpler intrinsic that doesn't take a resource descriptor might be nicer. The maximum number of input SGPRs is bumped to 17. Signed-off-by: Marek Olšák marek.ol...@amd.com --- lib/Target/R600/AMDGPUCallingConv.td | 3 ++- lib/Target/R600/AMDGPUISelLowering.cpp | 1 + lib/Target/R600/AMDGPUISelLowering.h | 1 + lib/Target/R600/SIISelLowering.cpp | 39 ++ lib/Target/R600/SIInstrInfo.td | 27 +++ lib/Target/R600/SIInstructions.td | 29 + lib/Target/R600/SIIntrinsics.td| 18 7 files changed, 113 insertions(+), 5 deletions(-) diff --git a/lib/Target/R600/AMDGPUCallingConv.td b/lib/Target/R600/AMDGPUCallingConv.td index 84d3118..d26be32 100644 --- a/lib/Target/R600/AMDGPUCallingConv.td +++ b/lib/Target/R600/AMDGPUCallingConv.td @@ -19,7 +19,8 @@ def CC_SI : CallingConv[ CCIfInRegCCIfType[f32, i32] , CCAssignToReg[ SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7, -SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15 +SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15, +SGPR16 Why is this necessary? Are we using all 16 user sgprs now? ], CCIfInRegCCIfType[i64] , CCAssignToRegWithShadow diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 1237323..30d9503 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -718,5 +718,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(SAMPLED) NODE_NAME_CASE(SAMPLEL) NODE_NAME_CASE(STORE_MSKOR) + NODE_NAME_CASE(TBUFFER_STORE_FORMAT) } } diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index 75ac4c2..8a68356 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -160,6 +160,7 @@ enum { FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, STORE_MSKOR, LOAD_CONSTANT, + TBUFFER_STORE_FORMAT, LAST_AMDGPU_ISD_NUMBER }; diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index f196059..6fa0c85 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -86,6 +86,8 @@ SITargetLowering::SITargetLowering(TargetMachine TM) : setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v16i8, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); + setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); @@ -462,6 +464,43 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG DAG) const { Op.getOperand(3)); } } + + case ISD::INTRINSIC_VOID: +SDValue Chain = Op.getOperand(0); +unsigned IntrinsicID = castConstantSDNode(Op.getOperand(1))-getZExtValue(); + +switch (IntrinsicID) { + case AMDGPUIntrinsic::SI_tbuffer_store: { +SDLoc DL(Op); +SDValue Ops [] = { + Chain, + ResourceDescriptorToi128(Op.getOperand(2), DAG), + Op.getOperand(3), + Op.getOperand(4), + Op.getOperand(5), + Op.getOperand(6), + Op.getOperand(7), + Op.getOperand(8), + Op.getOperand(9), + Op.getOperand(10), + Op.getOperand(11), + Op.getOperand(12), + Op.getOperand(13), + Op.getOperand(14) +}; +EVT VT = Op.getOperand(3).getValueType(); + +MachineMemOperand *MMO = MF.getMachineMemOperand( +MachinePointerInfo(), +MachineMemOperand::MOStore, +VT.getSizeInBits() / 8, 4); +return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL, + Op-getVTList(), Ops, + sizeof(Ops)/sizeof(Ops[0]), VT, MMO); + } + default: +break; +} } return SDValue(); } diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index ecc4718..c902feb 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -21,6 +21,25 @@ def SIload_constant : SDNodeAMDGPUISD::LOAD_CONSTANT, [SDNPMayLoad, SDNPMemOperand] ; +def SItbuffer_store : SDNodeAMDGPUISD::TBUFFER_STORE_FORMAT, + SDTypeProfile0, 13, +[SDTCisVT0, i128, // rsrc(SGPR) + SDTCisVT1, iAny, // vdata(VGPR) + SDTCisVT2, i32,// num_channels(imm) + SDTCisVT3, i32,// vaddr(VGPR) + SDTCisVT4, i32,//
Re: [Mesa-dev] [PATCH] R600/SI: expose TBUFFER_STORE_FORMAT_* for OpenGL transform feedback
No, we use 11 user data SGPRs for the vertex shader, but there are also 6 additional SGPRs loaded by the hw based on the VGT state (4 streamout offsets, streamout_enable, and streamout_write_index). The 6 SGPRs can be enabled by setting SPI_SHADER_PGM_RSRC2_VS.SO_* = 1. Marek On Thu, Sep 5, 2013 at 5:44 PM, Tom Stellard t...@stellard.net wrote: On Mon, Sep 02, 2013 at 09:07:18PM +0200, Marek Olšák wrote: For _XYZ, the type of VDATA is v4i32, because v3i32 doesn't exist. The ADDR64 bit is not exposed. A simpler intrinsic that doesn't take a resource descriptor might be nicer. The maximum number of input SGPRs is bumped to 17. Signed-off-by: Marek Olšák marek.ol...@amd.com --- lib/Target/R600/AMDGPUCallingConv.td | 3 ++- lib/Target/R600/AMDGPUISelLowering.cpp | 1 + lib/Target/R600/AMDGPUISelLowering.h | 1 + lib/Target/R600/SIISelLowering.cpp | 39 ++ lib/Target/R600/SIInstrInfo.td | 27 +++ lib/Target/R600/SIInstructions.td | 29 + lib/Target/R600/SIIntrinsics.td| 18 7 files changed, 113 insertions(+), 5 deletions(-) diff --git a/lib/Target/R600/AMDGPUCallingConv.td b/lib/Target/R600/AMDGPUCallingConv.td index 84d3118..d26be32 100644 --- a/lib/Target/R600/AMDGPUCallingConv.td +++ b/lib/Target/R600/AMDGPUCallingConv.td @@ -19,7 +19,8 @@ def CC_SI : CallingConv[ CCIfInRegCCIfType[f32, i32] , CCAssignToReg[ SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7, -SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15 +SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15, +SGPR16 Why is this necessary? Are we using all 16 user sgprs now? ], CCIfInRegCCIfType[i64] , CCAssignToRegWithShadow diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 1237323..30d9503 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -718,5 +718,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(SAMPLED) NODE_NAME_CASE(SAMPLEL) NODE_NAME_CASE(STORE_MSKOR) + NODE_NAME_CASE(TBUFFER_STORE_FORMAT) } } diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index 75ac4c2..8a68356 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -160,6 +160,7 @@ enum { FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, STORE_MSKOR, LOAD_CONSTANT, + TBUFFER_STORE_FORMAT, LAST_AMDGPU_ISD_NUMBER }; diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index f196059..6fa0c85 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -86,6 +86,8 @@ SITargetLowering::SITargetLowering(TargetMachine TM) : setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v16i8, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); + setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); @@ -462,6 +464,43 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG DAG) const { Op.getOperand(3)); } } + + case ISD::INTRINSIC_VOID: +SDValue Chain = Op.getOperand(0); +unsigned IntrinsicID = castConstantSDNode(Op.getOperand(1))-getZExtValue(); + +switch (IntrinsicID) { + case AMDGPUIntrinsic::SI_tbuffer_store: { +SDLoc DL(Op); +SDValue Ops [] = { + Chain, + ResourceDescriptorToi128(Op.getOperand(2), DAG), + Op.getOperand(3), + Op.getOperand(4), + Op.getOperand(5), + Op.getOperand(6), + Op.getOperand(7), + Op.getOperand(8), + Op.getOperand(9), + Op.getOperand(10), + Op.getOperand(11), + Op.getOperand(12), + Op.getOperand(13), + Op.getOperand(14) +}; +EVT VT = Op.getOperand(3).getValueType(); + +MachineMemOperand *MMO = MF.getMachineMemOperand( +MachinePointerInfo(), +MachineMemOperand::MOStore, +VT.getSizeInBits() / 8, 4); +return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL, + Op-getVTList(), Ops, + sizeof(Ops)/sizeof(Ops[0]), VT, MMO); + } + default: +break; +} } return SDValue(); } diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index ecc4718..c902feb 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -21,6 +21,25 @@ def SIload_constant : SDNodeAMDGPUISD::LOAD_CONSTANT,
[Mesa-dev] [PATCH] R600/SI: expose TBUFFER_STORE_FORMAT_* for OpenGL transform feedback
For _XYZ, the type of VDATA is v4i32, because v3i32 doesn't exist. The ADDR64 bit is not exposed. A simpler intrinsic that doesn't take a resource descriptor might be nicer. The maximum number of input SGPRs is bumped to 17. Signed-off-by: Marek Olšák marek.ol...@amd.com --- lib/Target/R600/AMDGPUCallingConv.td | 3 ++- lib/Target/R600/AMDGPUISelLowering.cpp | 1 + lib/Target/R600/AMDGPUISelLowering.h | 1 + lib/Target/R600/SIISelLowering.cpp | 39 ++ lib/Target/R600/SIInstrInfo.td | 27 +++ lib/Target/R600/SIInstructions.td | 29 + lib/Target/R600/SIIntrinsics.td| 18 7 files changed, 113 insertions(+), 5 deletions(-) diff --git a/lib/Target/R600/AMDGPUCallingConv.td b/lib/Target/R600/AMDGPUCallingConv.td index 84d3118..d26be32 100644 --- a/lib/Target/R600/AMDGPUCallingConv.td +++ b/lib/Target/R600/AMDGPUCallingConv.td @@ -19,7 +19,8 @@ def CC_SI : CallingConv[ CCIfInRegCCIfType[f32, i32] , CCAssignToReg[ SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7, -SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15 +SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15, +SGPR16 ], CCIfInRegCCIfType[i64] , CCAssignToRegWithShadow diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 1237323..30d9503 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -718,5 +718,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(SAMPLED) NODE_NAME_CASE(SAMPLEL) NODE_NAME_CASE(STORE_MSKOR) + NODE_NAME_CASE(TBUFFER_STORE_FORMAT) } } diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index 75ac4c2..8a68356 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -160,6 +160,7 @@ enum { FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, STORE_MSKOR, LOAD_CONSTANT, + TBUFFER_STORE_FORMAT, LAST_AMDGPU_ISD_NUMBER }; diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index f196059..6fa0c85 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -86,6 +86,8 @@ SITargetLowering::SITargetLowering(TargetMachine TM) : setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v16i8, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); + setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); @@ -462,6 +464,43 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG DAG) const { Op.getOperand(3)); } } + + case ISD::INTRINSIC_VOID: +SDValue Chain = Op.getOperand(0); +unsigned IntrinsicID = castConstantSDNode(Op.getOperand(1))-getZExtValue(); + +switch (IntrinsicID) { + case AMDGPUIntrinsic::SI_tbuffer_store: { +SDLoc DL(Op); +SDValue Ops [] = { + Chain, + ResourceDescriptorToi128(Op.getOperand(2), DAG), + Op.getOperand(3), + Op.getOperand(4), + Op.getOperand(5), + Op.getOperand(6), + Op.getOperand(7), + Op.getOperand(8), + Op.getOperand(9), + Op.getOperand(10), + Op.getOperand(11), + Op.getOperand(12), + Op.getOperand(13), + Op.getOperand(14) +}; +EVT VT = Op.getOperand(3).getValueType(); + +MachineMemOperand *MMO = MF.getMachineMemOperand( +MachinePointerInfo(), +MachineMemOperand::MOStore, +VT.getSizeInBits() / 8, 4); +return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL, + Op-getVTList(), Ops, + sizeof(Ops)/sizeof(Ops[0]), VT, MMO); + } + default: +break; +} } return SDValue(); } diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index ecc4718..c902feb 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -21,6 +21,25 @@ def SIload_constant : SDNodeAMDGPUISD::LOAD_CONSTANT, [SDNPMayLoad, SDNPMemOperand] ; +def SItbuffer_store : SDNodeAMDGPUISD::TBUFFER_STORE_FORMAT, + SDTypeProfile0, 13, +[SDTCisVT0, i128, // rsrc(SGPR) + SDTCisVT1, iAny, // vdata(VGPR) + SDTCisVT2, i32,// num_channels(imm) + SDTCisVT3, i32,// vaddr(VGPR) + SDTCisVT4, i32,// soffset(SGPR) + SDTCisVT5, i32,// inst_offset(imm) + SDTCisVT6, i32,// dfmt(imm) + SDTCisVT7, i32,// nfmt(imm) + SDTCisVT8, i32,// offen(imm) + SDTCisVT9, i32,// idxen(imm) + SDTCisVT10, i32, // glc(imm) + SDTCisVT11,