Re: [Mesa-dev] [PATCH] R600/SI: expose TBUFFER_STORE_FORMAT_* for OpenGL transform feedback

2013-09-05 Thread Marek Olšák
No, we use 11 user data SGPRs for the vertex shader, but there are
also 6 additional SGPRs loaded by the hw based on the VGT state (4
streamout offsets, streamout_enable, and streamout_write_index). The 6
SGPRs can be enabled by setting SPI_SHADER_PGM_RSRC2_VS.SO_* = 1.

Marek

On Thu, Sep 5, 2013 at 5:44 PM, Tom Stellard  wrote:
> On Mon, Sep 02, 2013 at 09:07:18PM +0200, Marek Olšák wrote:
>> For _XYZ, the type of VDATA is v4i32, because v3i32 doesn't exist.
>>
>> The ADDR64 bit is not exposed. A simpler intrinsic that doesn't take
>> a resource descriptor might be nicer.
>>
>> The maximum number of input SGPRs is bumped to 17.
>>
>> Signed-off-by: Marek Olšák 
>> ---
>>  lib/Target/R600/AMDGPUCallingConv.td   |  3 ++-
>>  lib/Target/R600/AMDGPUISelLowering.cpp |  1 +
>>  lib/Target/R600/AMDGPUISelLowering.h   |  1 +
>>  lib/Target/R600/SIISelLowering.cpp | 39 
>> ++
>>  lib/Target/R600/SIInstrInfo.td | 27 +++
>>  lib/Target/R600/SIInstructions.td  | 29 +
>>  lib/Target/R600/SIIntrinsics.td| 18 
>>  7 files changed, 113 insertions(+), 5 deletions(-)
>>
>> diff --git a/lib/Target/R600/AMDGPUCallingConv.td 
>> b/lib/Target/R600/AMDGPUCallingConv.td
>> index 84d3118..d26be32 100644
>> --- a/lib/Target/R600/AMDGPUCallingConv.td
>> +++ b/lib/Target/R600/AMDGPUCallingConv.td
>> @@ -19,7 +19,8 @@ def CC_SI : CallingConv<[
>>
>>CCIfInReg>  SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
>> -SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15
>> +SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
>> +SGPR16
>
> Why is this necessary?  Are we using all 16 user sgprs now?
>
>>]>>>,
>>
>>CCIfInReg> diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp 
>> b/lib/Target/R600/AMDGPUISelLowering.cpp
>> index 1237323..30d9503 100644
>> --- a/lib/Target/R600/AMDGPUISelLowering.cpp
>> +++ b/lib/Target/R600/AMDGPUISelLowering.cpp
>> @@ -718,5 +718,6 @@ const char* 
>> AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
>>NODE_NAME_CASE(SAMPLED)
>>NODE_NAME_CASE(SAMPLEL)
>>NODE_NAME_CASE(STORE_MSKOR)
>> +  NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
>>}
>>  }
>> diff --git a/lib/Target/R600/AMDGPUISelLowering.h 
>> b/lib/Target/R600/AMDGPUISelLowering.h
>> index 75ac4c2..8a68356 100644
>> --- a/lib/Target/R600/AMDGPUISelLowering.h
>> +++ b/lib/Target/R600/AMDGPUISelLowering.h
>> @@ -160,6 +160,7 @@ enum {
>>FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
>>STORE_MSKOR,
>>LOAD_CONSTANT,
>> +  TBUFFER_STORE_FORMAT,
>>LAST_AMDGPU_ISD_NUMBER
>>  };
>>
>> diff --git a/lib/Target/R600/SIISelLowering.cpp 
>> b/lib/Target/R600/SIISelLowering.cpp
>> index f196059..6fa0c85 100644
>> --- a/lib/Target/R600/SIISelLowering.cpp
>> +++ b/lib/Target/R600/SIISelLowering.cpp
>> @@ -86,6 +86,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
>>setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v16i8, Custom);
>>setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
>>
>> +  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
>> +
>>setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand);
>>
>>setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
>> @@ -462,6 +464,43 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, 
>> SelectionDAG &DAG) const {
>>   Op.getOperand(3));
>>  }
>>}
>> +
>> +  case ISD::INTRINSIC_VOID:
>> +SDValue Chain = Op.getOperand(0);
>> +unsigned IntrinsicID = 
>> cast(Op.getOperand(1))->getZExtValue();
>> +
>> +switch (IntrinsicID) {
>> +  case AMDGPUIntrinsic::SI_tbuffer_store: {
>> +SDLoc DL(Op);
>> +SDValue Ops [] = {
>> +  Chain,
>> +  ResourceDescriptorToi128(Op.getOperand(2), DAG),
>> +  Op.getOperand(3),
>> +  Op.getOperand(4),
>> +  Op.getOperand(5),
>> +  Op.getOperand(6),
>> +  Op.getOperand(7),
>> +  Op.getOperand(8),
>> +  Op.getOperand(9),
>> +  Op.getOperand(10),
>> +  Op.getOperand(11),
>> +  Op.getOperand(12),
>> +  Op.getOperand(13),
>> +  Op.getOperand(14)
>> +};
>> +EVT VT = Op.getOperand(3).getValueType();
>> +
>> +MachineMemOperand *MMO = MF.getMachineMemOperand(
>> +MachinePointerInfo(),
>> +MachineMemOperand::MOStore,
>> +VT.getSizeInBits() / 8, 4);
>> +return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL,
>> +   Op->getVTList(), Ops,
>> +   sizeof(Ops)/sizeof(Ops[0]), VT, MMO);
>> +  }
>> +  default:
>> +break;
>> +}
>>}
>>return SDValue();
>>  }
>> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
>> index ecc4718..c902feb 100644
>> --- a/lib/Target/R600/SIInstrInfo.td

Re: [Mesa-dev] [PATCH] R600/SI: expose TBUFFER_STORE_FORMAT_* for OpenGL transform feedback

2013-09-05 Thread Tom Stellard
On Mon, Sep 02, 2013 at 09:07:18PM +0200, Marek Olšák wrote:
> For _XYZ, the type of VDATA is v4i32, because v3i32 doesn't exist.
> 
> The ADDR64 bit is not exposed. A simpler intrinsic that doesn't take
> a resource descriptor might be nicer.
> 
> The maximum number of input SGPRs is bumped to 17.
> 
> Signed-off-by: Marek Olšák 
> ---
>  lib/Target/R600/AMDGPUCallingConv.td   |  3 ++-
>  lib/Target/R600/AMDGPUISelLowering.cpp |  1 +
>  lib/Target/R600/AMDGPUISelLowering.h   |  1 +
>  lib/Target/R600/SIISelLowering.cpp | 39 
> ++
>  lib/Target/R600/SIInstrInfo.td | 27 +++
>  lib/Target/R600/SIInstructions.td  | 29 +
>  lib/Target/R600/SIIntrinsics.td| 18 
>  7 files changed, 113 insertions(+), 5 deletions(-)
> 
> diff --git a/lib/Target/R600/AMDGPUCallingConv.td 
> b/lib/Target/R600/AMDGPUCallingConv.td
> index 84d3118..d26be32 100644
> --- a/lib/Target/R600/AMDGPUCallingConv.td
> +++ b/lib/Target/R600/AMDGPUCallingConv.td
> @@ -19,7 +19,8 @@ def CC_SI : CallingConv<[
>  
>CCIfInReg  SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
> -SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15
> +SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
> +SGPR16

Why is this necessary?  Are we using all 16 user sgprs now?

>]>>>,
>  
>CCIfInReg diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp 
> b/lib/Target/R600/AMDGPUISelLowering.cpp
> index 1237323..30d9503 100644
> --- a/lib/Target/R600/AMDGPUISelLowering.cpp
> +++ b/lib/Target/R600/AMDGPUISelLowering.cpp
> @@ -718,5 +718,6 @@ const char* 
> AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
>NODE_NAME_CASE(SAMPLED)
>NODE_NAME_CASE(SAMPLEL)
>NODE_NAME_CASE(STORE_MSKOR)
> +  NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
>}
>  }
> diff --git a/lib/Target/R600/AMDGPUISelLowering.h 
> b/lib/Target/R600/AMDGPUISelLowering.h
> index 75ac4c2..8a68356 100644
> --- a/lib/Target/R600/AMDGPUISelLowering.h
> +++ b/lib/Target/R600/AMDGPUISelLowering.h
> @@ -160,6 +160,7 @@ enum {
>FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
>STORE_MSKOR,
>LOAD_CONSTANT,
> +  TBUFFER_STORE_FORMAT,
>LAST_AMDGPU_ISD_NUMBER
>  };
>  
> diff --git a/lib/Target/R600/SIISelLowering.cpp 
> b/lib/Target/R600/SIISelLowering.cpp
> index f196059..6fa0c85 100644
> --- a/lib/Target/R600/SIISelLowering.cpp
> +++ b/lib/Target/R600/SIISelLowering.cpp
> @@ -86,6 +86,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
>setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v16i8, Custom);
>setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
>  
> +  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
> +
>setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand);
>  
>setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
> @@ -462,6 +464,43 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, 
> SelectionDAG &DAG) const {
>   Op.getOperand(3));
>  }
>}
> +
> +  case ISD::INTRINSIC_VOID:
> +SDValue Chain = Op.getOperand(0);
> +unsigned IntrinsicID = 
> cast(Op.getOperand(1))->getZExtValue();
> +
> +switch (IntrinsicID) {
> +  case AMDGPUIntrinsic::SI_tbuffer_store: {
> +SDLoc DL(Op);
> +SDValue Ops [] = {
> +  Chain,
> +  ResourceDescriptorToi128(Op.getOperand(2), DAG),
> +  Op.getOperand(3),
> +  Op.getOperand(4),
> +  Op.getOperand(5),
> +  Op.getOperand(6),
> +  Op.getOperand(7),
> +  Op.getOperand(8),
> +  Op.getOperand(9),
> +  Op.getOperand(10),
> +  Op.getOperand(11),
> +  Op.getOperand(12),
> +  Op.getOperand(13),
> +  Op.getOperand(14)
> +};
> +EVT VT = Op.getOperand(3).getValueType();
> +
> +MachineMemOperand *MMO = MF.getMachineMemOperand(
> +MachinePointerInfo(),
> +MachineMemOperand::MOStore,
> +VT.getSizeInBits() / 8, 4);
> +return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL,
> +   Op->getVTList(), Ops,
> +   sizeof(Ops)/sizeof(Ops[0]), VT, MMO);
> +  }
> +  default:
> +break;
> +}
>}
>return SDValue();
>  }
> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
> index ecc4718..c902feb 100644
> --- a/lib/Target/R600/SIInstrInfo.td
> +++ b/lib/Target/R600/SIInstrInfo.td
> @@ -21,6 +21,25 @@ def SIload_constant : SDNode<"AMDGPUISD::LOAD_CONSTANT",
>[SDNPMayLoad, SDNPMemOperand]
>  >;
>  
> +def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT",
> +  SDTypeProfile<0, 13,
> +[SDTCisVT<0, i128>,   // rsrc(SGPR)
> + SDTCisVT<1, iAny>,   // vdata(VGPR)
> + SDTCisVT<2, i32>,// num_channels(imm)
> + SDTCisVT<3, i32>,// vad