Hi, The attached patches add support for 64-bit loads and stores as well as 64-bit kernel arguments.
-Tom
>From c4c7d934e951e73f72d998a9f1af0a523d83bbed Mon Sep 17 00:00:00 2001 From: Tom Stellard <thomas.stell...@amd.com> Date: Tue, 23 Jul 2013 07:52:50 -0700 Subject: [PATCH 1/2] R600: Use 64-bit alignment for 64-bit kernel arguments --- lib/Target/R600/AMDGPUCallingConv.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Target/R600/AMDGPUCallingConv.td b/lib/Target/R600/AMDGPUCallingConv.td index 5c9a3e4..3865c62 100644 --- a/lib/Target/R600/AMDGPUCallingConv.td +++ b/lib/Target/R600/AMDGPUCallingConv.td @@ -39,7 +39,7 @@ def CC_SI : CallingConv<[ // Calling convention for compute kernels def CC_AMDGPU_Kernel : CallingConv<[ CCIfType<[v4i32, v4f32], CCAssignToStack <16, 16>>, - CCIfType<[i64, f64], CCAssignToStack < 8, 4>>, + CCIfType<[i64, f64], CCAssignToStack < 8, 8>>, CCIfType<[i32, f32], CCAssignToStack < 4, 4>>, CCIfType<[i16], CCAssignToStack < 2, 4>>, CCIfType<[i8], CCAssignToStack < 1, 4>> -- 1.7.11.4
>From 041f7609662bbf41d29983f475e8ce1258a9fe7c Mon Sep 17 00:00:00 2001 From: Tom Stellard <thomas.stell...@amd.com> Date: Tue, 16 Jul 2013 14:55:27 -0700 Subject: [PATCH 2/2] R600: Add 64-bit float load/store support * Added R600_Reg64 class * Added T#Index#.XY registers definition * Added v2i32 register reads from parameter and global space * Added f32 and i32 elements extraction from v2f32 and v2i32 * Added v2i32 -> v2f32 conversions Tom Stellard: - Mark vec2 operations as expand. The addition of a vec2 register class made them all legal. Signed-off-by: Dmitry Cherkassov <dcherkas...@gmail.com> --- lib/Target/R600/AMDGPUCallingConv.td | 10 ++-- lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 9 +++- lib/Target/R600/AMDGPUISelLowering.cpp | 3 ++ lib/Target/R600/R600ControlFlowFinalizer.cpp | 4 +- lib/Target/R600/R600ISelLowering.cpp | 25 +++++++-- lib/Target/R600/R600InstrInfo.cpp | 19 ++++--- lib/Target/R600/R600Instructions.td | 80 ++++++++++++++++++++++++++-- lib/Target/R600/R600RegisterInfo.td | 16 ++++++ test/CodeGen/R600/64bit-kernel-args.ll | 2 +- test/CodeGen/R600/build_vector.ll | 34 ++++++++++++ test/CodeGen/R600/fadd.ll | 10 ++++ test/CodeGen/R600/fdiv.ll | 43 ++++++++++----- test/CodeGen/R600/fmul.ll | 10 ++++ test/CodeGen/R600/fp_to_sint.ll | 10 ++++ test/CodeGen/R600/fp_to_uint.ll | 10 ++++ test/CodeGen/R600/fsub.ll | 19 +++++-- test/CodeGen/R600/load.ll | 3 +- test/CodeGen/R600/load.vec.ll | 3 +- test/CodeGen/R600/setcc.ll | 23 ++++---- test/CodeGen/R600/sint_to_fp.ll | 10 ++++ test/CodeGen/R600/store.ll | 8 +-- test/CodeGen/R600/sub.ll | 2 +- test/CodeGen/R600/uint_to_fp.ll | 10 ++++ 23 files changed, 298 insertions(+), 65 deletions(-) create mode 100644 test/CodeGen/R600/build_vector.ll diff --git a/lib/Target/R600/AMDGPUCallingConv.td b/lib/Target/R600/AMDGPUCallingConv.td index 3865c62..fc95d58 100644 --- a/lib/Target/R600/AMDGPUCallingConv.td +++ b/lib/Target/R600/AMDGPUCallingConv.td @@ -38,11 +38,11 @@ def CC_SI : CallingConv<[ // Calling convention for compute kernels def CC_AMDGPU_Kernel : CallingConv<[ - CCIfType<[v4i32, v4f32], CCAssignToStack <16, 16>>, - CCIfType<[i64, f64], CCAssignToStack < 8, 8>>, - CCIfType<[i32, f32], CCAssignToStack < 4, 4>>, - CCIfType<[i16], CCAssignToStack < 2, 4>>, - CCIfType<[i8], CCAssignToStack < 1, 4>> + CCIfType<[v4i32, v4f32], CCAssignToStack <16, 16>>, + CCIfType<[i64, f64, v2f32, v2i32], CCAssignToStack < 8, 8>>, + CCIfType<[i32, f32], CCAssignToStack < 4, 4>>, + CCIfType<[i16], CCAssignToStack < 2, 4>>, + CCIfType<[i8], CCAssignToStack < 1, 4>> ]>; def CC_AMDGPU : CallingConv<[ diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index 307b804..38a5f24 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -260,12 +260,19 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) { break; } + + unsigned RegClassID; + switch(N->getValueType(0).getVectorNumElements()) { + case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; + case 4: RegClassID = AMDGPU::R600_Reg128RegClassID; break; + default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); + } // BUILD_VECTOR is usually lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG // that adds a 128 bits reg copy when going through TwoAddressInstructions // pass. We want to avoid 128 bits copies as much as possible because they // can't be bundled by our scheduler. SDValue RegSeqArgs[9] = { - CurDAG->getTargetConstant(AMDGPU::R600_Reg128RegClassID, MVT::i32), + CurDAG->getTargetConstant(RegClassID, MVT::i32), SDValue(), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32), SDValue(), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32), SDValue(), CurDAG->getTargetConstant(AMDGPU::sub2, MVT::i32), diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 779d97f..4bf7a85 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -79,6 +79,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::LOAD, MVT::f64, Promote); AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Expand); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Expand); + setOperationAction(ISD::FNEG, MVT::v2f32, Expand); setOperationAction(ISD::FNEG, MVT::v4f32, Expand); diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp index b69d38b..bc1743a 100644 --- a/lib/Target/R600/R600ControlFlowFinalizer.cpp +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp @@ -378,8 +378,10 @@ public: case AMDGPU::R600_ExportBuf: case AMDGPU::R600_ExportSwz: case AMDGPU::RAT_WRITE_CACHELESS_32_eg: + case AMDGPU::RAT_WRITE_CACHELESS_64_eg: case AMDGPU::RAT_WRITE_CACHELESS_128_eg: - case AMDGPU::RAT_STORE_DWORD_cm: + case AMDGPU::RAT_STORE_DWORD32_cm: + case AMDGPU::RAT_STORE_DWORD64_cm: DEBUG(dbgs() << CfCount << ":"; MI->dump();); CfCount++; break; diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index a2bc2c3..4be057a 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -33,21 +33,33 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass); addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass); addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass); + addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass); + addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass); + computeRegisterProperties(); setOperationAction(ISD::FADD, MVT::v4f32, Expand); + setOperationAction(ISD::FADD, MVT::v2f32, Expand); setOperationAction(ISD::FMUL, MVT::v4f32, Expand); + setOperationAction(ISD::FMUL, MVT::v2f32, Expand); setOperationAction(ISD::FDIV, MVT::v4f32, Expand); + setOperationAction(ISD::FDIV, MVT::v2f32, Expand); setOperationAction(ISD::FSUB, MVT::v4f32, Expand); + setOperationAction(ISD::FSUB, MVT::v2f32, Expand); setOperationAction(ISD::FCOS, MVT::f32, Custom); setOperationAction(ISD::FSIN, MVT::f32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand); + setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Expand); setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Expand); setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand); + setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Expand); setOperationAction(ISD::SETCC, MVT::v4i32, Expand); + setOperationAction(ISD::SETCC, MVT::v2i32, Expand); setOperationAction(ISD::BR_CC, MVT::i32, Expand); setOperationAction(ISD::BR_CC, MVT::f32, Expand); @@ -70,7 +82,7 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : // Legalize loads and stores to the private address space. setOperationAction(ISD::LOAD, MVT::i32, Custom); - setOperationAction(ISD::LOAD, MVT::v2i32, Expand); + setOperationAction(ISD::LOAD, MVT::v2i32, Custom); setOperationAction(ISD::LOAD, MVT::v4i32, Custom); setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom); setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom); @@ -78,7 +90,7 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom); setOperationAction(ISD::STORE, MVT::i8, Custom); setOperationAction(ISD::STORE, MVT::i32, Custom); - setOperationAction(ISD::STORE, MVT::v2i32, Expand); + setOperationAction(ISD::STORE, MVT::v2i32, Custom); setOperationAction(ISD::STORE, MVT::v4i32, Custom); setOperationAction(ISD::LOAD, MVT::i32, Custom); @@ -173,6 +185,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( } case AMDGPU::RAT_WRITE_CACHELESS_32_eg: + case AMDGPU::RAT_WRITE_CACHELESS_64_eg: case AMDGPU::RAT_WRITE_CACHELESS_128_eg: { unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0; @@ -1132,7 +1145,13 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32)); Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr); } - Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4); + EVT NewVT = MVT::v4i32; + unsigned NumElements = 4; + if (VT.isVector()) { + NewVT = VT; + NumElements = VT.getVectorNumElements(); + } + Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, Slots, NumElements); } else { // non constant ptr cant be folded, keeps it as a v4f32 load Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32, diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index 3bc170f..925cbd2 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -51,9 +51,17 @@ R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { - if (AMDGPU::R600_Reg128RegClass.contains(DestReg) - && AMDGPU::R600_Reg128RegClass.contains(SrcReg)) { - for (unsigned I = 0; I < 4; I++) { + unsigned VectorComponents = 0; + if (AMDGPU::R600_Reg128RegClass.contains(DestReg) && + AMDGPU::R600_Reg128RegClass.contains(SrcReg)) { + VectorComponents = 4; + } else if(AMDGPU::R600_Reg64RegClass.contains(DestReg) && + AMDGPU::R600_Reg64RegClass.contains(SrcReg)) { + VectorComponents = 2; + } + + if (VectorComponents > 0) { + for (unsigned I = 0; I < VectorComponents; I++) { unsigned SubRegIndex = RI.getSubRegFromChannel(I); buildDefaultInstruction(MBB, MI, AMDGPU::MOV, RI.getSubReg(DestReg, SubRegIndex), @@ -62,11 +70,6 @@ R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, RegState::Define | RegState::Implicit); } } else { - - // We can't copy vec4 registers - assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg) - && !AMDGPU::R600_Reg128RegClass.contains(SrcReg)); - MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV, DestReg, SrcReg); NewMI->getOperand(getOperandIdx(*NewMI, AMDGPU::OpName::src0)) diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 9ff3897..3a74b5b 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -1290,6 +1290,13 @@ def RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg < [(global_store i32:$rw_gpr, i32:$index_gpr)] >; +// 64-bit store +def RAT_WRITE_CACHELESS_64_eg : RAT_WRITE_CACHELESS_eg < + (ins R600_Reg64:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), + 0x3, "RAT_WRITE_CACHELESS_64_eg $rw_gpr.XY, $index_gpr, $eop", + [(global_store v2i32:$rw_gpr, i32:$index_gpr)] +>; + //128-bit store def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg < (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), @@ -1358,6 +1365,18 @@ class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern> let Constraints = "$src_gpr.ptr = $dst_gpr"; } +class VTX_READ_64_eg <bits<8> buffer_id, list<dag> pattern> + : VTX_READ_eg <"VTX_READ_64 $dst_gpr.XY, $src_gpr", buffer_id, + (outs R600_Reg64:$dst_gpr), pattern> { + + let MEGA_FETCH_COUNT = 8; + let DST_SEL_X = 0; + let DST_SEL_Y = 1; + let DST_SEL_Z = 7; + let DST_SEL_W = 7; + let DATA_FORMAT = 0x1D; // COLOR_32_32 +} + class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern> : VTX_READ_eg <"VTX_READ_128 $dst_gpr.XYZW, $src_gpr", buffer_id, (outs R600_Reg128:$dst_gpr), pattern> { @@ -1391,6 +1410,10 @@ def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0, [(set i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] >; +def VTX_READ_PARAM_64_eg : VTX_READ_64_eg <0, + [(set v2i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] +>; + def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0, [(set v4i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] >; @@ -1413,6 +1436,11 @@ def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1, [(set i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] >; +// 64-bit reads +def VTX_READ_GLOBAL_64_eg : VTX_READ_64_eg <1, + [(set v2i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] +>; + // 128-bit reads def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1, [(set v4i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] @@ -1744,15 +1772,23 @@ def : Pat < def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>; -def RAT_STORE_DWORD_cm : EG_CF_RAT < - 0x57, 0x14, 0x1, (outs), - (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr), - "EXPORT_RAT_INST_STORE_DWORD $rw_gpr, $index_gpr", - [(global_store i32:$rw_gpr, i32:$index_gpr)] +class RAT_STORE_DWORD_cm <bits<4> mask, dag ins, list<dag> pat> : EG_CF_RAT < + 0x57, 0x14, mask, (outs), ins, + "EXPORT_RAT_INST_STORE_DWORD $rw_gpr, $index_gpr", pat > { let eop = 0; // This bit is not used on Cayman. } +def RAT_STORE_DWORD32_cm : RAT_STORE_DWORD_cm <0x1, + (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr), + [(global_store i32:$rw_gpr, i32:$index_gpr)] +>; + +def RAT_STORE_DWORD64_cm : RAT_STORE_DWORD_cm <0x3, + (ins R600_Reg64:$rw_gpr, R600_TReg32_X:$index_gpr), + [(global_store v2i32:$rw_gpr, i32:$index_gpr)] +>; + class VTX_READ_cm <string name, bits<8> buffer_id, dag outs, list<dag> pattern> : VTX_WORD0_cm, VTX_READ<name, buffer_id, outs, pattern> { @@ -1815,6 +1851,17 @@ class VTX_READ_32_cm <bits<8> buffer_id, list<dag> pattern> let Constraints = "$src_gpr.ptr = $dst_gpr"; } +class VTX_READ_64_cm <bits<8> buffer_id, list<dag> pattern> + : VTX_READ_cm <"VTX_READ_64 $dst_gpr, $src_gpr", buffer_id, + (outs R600_Reg64:$dst_gpr), pattern> { + + let DST_SEL_X = 0; + let DST_SEL_Y = 1; + let DST_SEL_Z = 7; + let DST_SEL_W = 7; + let DATA_FORMAT = 0x1D; // COLOR_32_32 +} + class VTX_READ_128_cm <bits<8> buffer_id, list<dag> pattern> : VTX_READ_cm <"VTX_READ_128 $dst_gpr.XYZW, $src_gpr", buffer_id, (outs R600_Reg128:$dst_gpr), pattern> { @@ -1846,6 +1893,10 @@ def VTX_READ_PARAM_32_cm : VTX_READ_32_cm <0, [(set i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] >; +def VTX_READ_PARAM_64_cm : VTX_READ_64_cm <0, + [(set v2i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] +>; + def VTX_READ_PARAM_128_cm : VTX_READ_128_cm <0, [(set v4i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] >; @@ -1868,6 +1919,11 @@ def VTX_READ_GLOBAL_32_cm : VTX_READ_32_cm <1, [(set i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] >; +// 64-bit reads +def VTX_READ_GLOBAL_64_cm : VTX_READ_64_cm <1, + [(set v2i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] +>; + // 128-bit reads def VTX_READ_GLOBAL_128_cm : VTX_READ_128_cm <1, [(set v4i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] @@ -2300,10 +2356,24 @@ def : Insert_Element <i32, v4i32, 3, sub3>; def : Vector4_Build <v4f32, f32>; def : Vector4_Build <v4i32, i32>; +def : Extract_Element <f32, v2f32, 0, sub0>; +def : Extract_Element <f32, v2f32, 1, sub1>; + +def : Insert_Element <f32, v2f32, 0, sub0>; +def : Insert_Element <f32, v2f32, 1, sub1>; + +def : Extract_Element <i32, v2i32, 0, sub0>; +def : Extract_Element <i32, v2i32, 1, sub1>; + +def : Insert_Element <i32, v2i32, 0, sub0>; +def : Insert_Element <i32, v2i32, 1, sub1>; + // bitconvert patterns def : BitConvert <i32, f32, R600_Reg32>; def : BitConvert <f32, i32, R600_Reg32>; +def : BitConvert <v2f32, v2i32, R600_Reg64>; +def : BitConvert <v2i32, v2f32, R600_Reg64>; def : BitConvert <v4f32, v4i32, R600_Reg128>; def : BitConvert <v4i32, v4f32, R600_Reg128>; diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td index 1eabccb..fa987cf 100644 --- a/lib/Target/R600/R600RegisterInfo.td +++ b/lib/Target/R600/R600RegisterInfo.td @@ -23,6 +23,14 @@ class R600Reg_128<string n, list<Register> subregs, bits<16> encoding> : let HWEncoding = encoding; } +class R600Reg_64<string n, list<Register> subregs, bits<16> encoding> : + RegisterWithSubRegs<n, subregs> { + let Namespace = "AMDGPU"; + let SubRegIndices = [sub0, sub1]; + let HWEncoding = encoding; +} + + foreach Index = 0-127 in { foreach Chan = [ "X", "Y", "Z", "W" ] in { // 32-bit Temporary Registers @@ -41,6 +49,11 @@ foreach Index = 0-127 in { !cast<Register>("T"#Index#"_Z"), !cast<Register>("T"#Index#"_W")], Index>; + + def T#Index#_XY : R600Reg_64 <"T"#Index#"", + [!cast<Register>("T"#Index#"_X"), + !cast<Register>("T"#Index#"_Y")], + Index>; } // KCACHE_BANK0 @@ -186,6 +199,9 @@ def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, let CopyCost = -1; } +def R600_Reg64 : RegisterClass<"AMDGPU", [v2f32, v2i32], 64, + (add (sequence "T%u_XY", 0, 63))>; + //===----------------------------------------------------------------------===// // Register classes for indirect addressing //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/R600/64bit-kernel-args.ll b/test/CodeGen/R600/64bit-kernel-args.ll index 2020e65..52016dd 100644 --- a/test/CodeGen/R600/64bit-kernel-args.ll +++ b/test/CodeGen/R600/64bit-kernel-args.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=r600 -mcpu=tahiti | FileCheck %s +; RUN: llc < %s -march=r600 -mcpu=tahiti | FileCheck %s --check-prefix=SI-CHECK ; SI-CHECK: @f64_kernel_arg ; SI-CHECK: BUFFER_STORE_DWORDX2 diff --git a/test/CodeGen/R600/build_vector.ll b/test/CodeGen/R600/build_vector.ll new file mode 100644 index 0000000..9b738a2 --- /dev/null +++ b/test/CodeGen/R600/build_vector.ll @@ -0,0 +1,34 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK +; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK + +; R600-CHECK: @build_vector2 +; R600-CHECK: MOV +; R600-CHECK: MOV +; R600-CHECK-NOT: MOV +; SI-CHECK: @build_vector2 +; SI-CHECK-DAG: V_MOV_B32_e32 [[X:VGPR[0-9]]], 5 +; SI-CHECK-DAG: V_MOV_B32_e32 [[Y:VGPR[0-9]]], 6 +; SI-CHECK: BUFFER_STORE_DWORDX2 [[X]]_[[Y]] +define void @build_vector2 (<2 x i32> addrspace(1)* %out) { +entry: + store <2 x i32> <i32 5, i32 6>, <2 x i32> addrspace(1)* %out + ret void +} + +; R600-CHECK: @build_vector4 +; R600-CHECK: MOV +; R600-CHECK: MOV +; R600-CHECK: MOV +; R600-CHECK: MOV +; R600-CHECK-NOT: MOV +; SI-CHECK: @build_vector4 +; SI-CHECK-DAG: V_MOV_B32_e32 [[X:VGPR[0-9]]], 5 +; SI-CHECK-DAG: V_MOV_B32_e32 [[Y:VGPR[0-9]]], 6 +; SI-CHECK-DAG: V_MOV_B32_e32 [[Z:VGPR[0-9]]], 7 +; SI-CHECK-DAG: V_MOV_B32_e32 [[W:VGPR[0-9]]], 8 +; SI-CHECK: BUFFER_STORE_DWORDX4 [[X]]_[[Y]]_[[Z]]_[[W]] +define void @build_vector4 (<4 x i32> addrspace(1)* %out) { +entry: + store <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/fadd.ll b/test/CodeGen/R600/fadd.ll index 9a67232..97dbe44 100644 --- a/test/CodeGen/R600/fadd.ll +++ b/test/CodeGen/R600/fadd.ll @@ -15,6 +15,16 @@ declare float @llvm.R600.load.input(i32) readnone declare void @llvm.AMDGPU.store.output(float, i32) +; CHECK: @fadd_v2f32 +; CHECK-DAG: ADD * T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z +; CHECK-DAG: ADD * T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y +define void @fadd_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) { +entry: + %0 = fadd <2 x float> %a, %b + store <2 x float> %0, <2 x float> addrspace(1)* %out + ret void +} + ; CHECK: @fadd_v4f32 ; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} diff --git a/test/CodeGen/R600/fdiv.ll b/test/CodeGen/R600/fdiv.ll index c581ec9..6798eac 100644 --- a/test/CodeGen/R600/fdiv.ll +++ b/test/CodeGen/R600/fdiv.ll @@ -1,17 +1,36 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -;CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}} -;CHECK-DAG: MUL_IEEE * T{{[0-9]+\.[XYZW]}} -;CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}} -;CHECK-DAG: MUL_IEEE * T{{[0-9]+\.[XYZW]}} -;CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}} -;CHECK-DAG: MUL_IEEE * T{{[0-9]+\.[XYZW]}} -;CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}} -;CHECK-DAG: MUL_IEEE * T{{[0-9]+\.[XYZW]}} +; These tests check that fdiv is expanded correctly and also test that the +; scheduler is scheduling the RECIP_IEEE and MUL_IEEE instructions in separate +; instruction groups. -define void @test(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) { +; CHECK: @fdiv_v2f32 +; CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z +; CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y +; CHECK-DAG: MUL_IEEE T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS +; CHECK-DAG: MUL_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS +define void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) { entry: - %0 = fdiv <4 x float> %a, %b - store <4 x float> %0, <4 x float> addrspace(1)* %out + %0 = fdiv <2 x float> %a, %b + store <2 x float> %0, <2 x float> addrspace(1)* %out + ret void +} + +; CHECK: @fdiv_v4f32 +; CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK-DAG: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS +; CHECK-DAG: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS +; CHECK-DAG: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS +; CHECK-DAG: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS + +define void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1 + %a = load <4 x float> addrspace(1) * %in + %b = load <4 x float> addrspace(1) * %b_ptr + %result = fdiv <4 x float> %a, %b + store <4 x float> %result, <4 x float> addrspace(1)* %out ret void } diff --git a/test/CodeGen/R600/fmul.ll b/test/CodeGen/R600/fmul.ll index a40e818..6ef3a11 100644 --- a/test/CodeGen/R600/fmul.ll +++ b/test/CodeGen/R600/fmul.ll @@ -15,6 +15,16 @@ declare float @llvm.R600.load.input(i32) readnone declare void @llvm.AMDGPU.store.output(float, i32) +; CHECK: @fmul_v2f32 +; CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW]}} +; CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW]}} +define void @fmul_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) { +entry: + %0 = fmul <2 x float> %a, %b + store <2 x float> %0, <2 x float> addrspace(1)* %out + ret void +} + ; CHECK: @fmul_v4f32 ; CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} diff --git a/test/CodeGen/R600/fp_to_sint.ll b/test/CodeGen/R600/fp_to_sint.ll index 77ab328..09a6967 100644 --- a/test/CodeGen/R600/fp_to_sint.ll +++ b/test/CodeGen/R600/fp_to_sint.ll @@ -1,5 +1,15 @@ ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +; CHECK: @fp_to_sint_v2i32 +; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} + +define void @fp_to_sint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) { + %result = fptosi <2 x float> %in to <2 x i32> + store <2 x i32> %result, <2 x i32> addrspace(1)* %out + ret void +} + ; CHECK: @fp_to_sint_v4i32 ; CHECK: FLT_TO_INT {{[* ]*}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} ; CHECK: FLT_TO_INT {{[* ]*}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} diff --git a/test/CodeGen/R600/fp_to_uint.ll b/test/CodeGen/R600/fp_to_uint.ll index b07e286..2a365f9 100644 --- a/test/CodeGen/R600/fp_to_uint.ll +++ b/test/CodeGen/R600/fp_to_uint.ll @@ -1,5 +1,15 @@ ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +; CHECK: @fp_to_uint_v2i32 +; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} + +define void @fp_to_uint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) { + %result = fptoui <2 x float> %in to <2 x i32> + store <2 x i32> %result, <2 x i32> addrspace(1)* %out + ret void +} + ; CHECK: @fp_to_uint_v4i32 ; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} ; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} diff --git a/test/CodeGen/R600/fsub.ll b/test/CodeGen/R600/fsub.ll index f784cde..0fc5860 100644 --- a/test/CodeGen/R600/fsub.ll +++ b/test/CodeGen/R600/fsub.ll @@ -15,12 +15,21 @@ declare float @llvm.R600.load.input(i32) readnone declare void @llvm.AMDGPU.store.output(float, i32) -; CHECK: @fsub_v4f32 -; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: @fsub_v2f32 +; CHECK-DAG: ADD * T{{[0-9]+\.[XYZW]}}, KC0[3].X, -KC0[3].Z +; CHECK-DAG: ADD * T{{[0-9]+\.[XYZW]}}, KC0[2].W, -KC0[3].Y +define void @fsub_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) { +entry: + %0 = fsub <2 x float> %a, %b + store <2 x float> %0, <2 x float> addrspace(1)* %out + ret void +} +; CHECK: @fsub_v4f32 +; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}} +; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}} +; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}} +; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}} define void @fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1 %a = load <4 x float> addrspace(1) * %in diff --git a/test/CodeGen/R600/load.ll b/test/CodeGen/R600/load.ll index 6eef7c7..f478ef5 100644 --- a/test/CodeGen/R600/load.ll +++ b/test/CodeGen/R600/load.ll @@ -92,8 +92,7 @@ entry: ; load a v2f32 value from the global address space ; R600-CHECK: @load_v2f32 -; R600-CHECK: VTX_READ_32 -; R600-CHECK: VTX_READ_32 +; R600-CHECK: VTX_READ_64 ; SI-CHECK: @load_v2f32 ; SI-CHECK: BUFFER_LOAD_DWORDX2 diff --git a/test/CodeGen/R600/load.vec.ll b/test/CodeGen/R600/load.vec.ll index b3d6349..8cba0b6 100644 --- a/test/CodeGen/R600/load.vec.ll +++ b/test/CodeGen/R600/load.vec.ll @@ -3,8 +3,7 @@ ; load a v2i32 value from the global address space. ; EG-CHECK: @load_v2i32 -; EG-CHECK-DAG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 4 -; EG-CHECK-DAG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 +; EG-CHECK: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0 ; SI-CHECK: @load_v2i32 ; SI-CHECK: BUFFER_LOAD_DWORDX2 VGPR{{[0-9]+}} define void @load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { diff --git a/test/CodeGen/R600/setcc.ll b/test/CodeGen/R600/setcc.ll index e3f77b1..992de70 100644 --- a/test/CodeGen/R600/setcc.ll +++ b/test/CodeGen/R600/setcc.ll @@ -1,26 +1,23 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s -;EG-CHECK: @test2 -;EG-CHECK: SETE_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: SETE_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: @setcc_v2i32 +; EG-CHECK-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[3].X, KC0[3].Z +; EG-CHECK-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[2].W, KC0[3].Y -define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { - %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1 - %a = load <2 x i32> addrspace(1) * %in - %b = load <2 x i32> addrspace(1) * %b_ptr +define void @setcc_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) { %result = icmp eq <2 x i32> %a, %b %sext = sext <2 x i1> %result to <2 x i32> store <2 x i32> %sext, <2 x i32> addrspace(1)* %out ret void } -;EG-CHECK: @test4 -;EG-CHECK: SETE_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: SETE_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: SETE_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: SETE_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: @setcc_v4i32 +; EG-CHECK-DAG: SETE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG-CHECK-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG-CHECK-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG-CHECK-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { +define void @setcc_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 %a = load <4 x i32> addrspace(1) * %in %b = load <4 x i32> addrspace(1) * %b_ptr diff --git a/test/CodeGen/R600/sint_to_fp.ll b/test/CodeGen/R600/sint_to_fp.ll index 91a8eb7..d1e560e 100644 --- a/test/CodeGen/R600/sint_to_fp.ll +++ b/test/CodeGen/R600/sint_to_fp.ll @@ -1,5 +1,15 @@ ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +; CHECK: @sint_to_fp_v2i32 +; CHECK-DAG: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W +; CHECK-DAG: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X + +define void @sint_to_fp_v2i32(<2 x float> addrspace(1)* %out, <2 x i32> %in) { + %result = sitofp <2 x i32> %in to <2 x float> + store <2 x float> %result, <2 x float> addrspace(1)* %out + ret void +} + ; CHECK: @sint_to_fp_v4i32 ; CHECK: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; CHECK: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} diff --git a/test/CodeGen/R600/store.ll b/test/CodeGen/R600/store.ll index d233c73..1bda5e6 100644 --- a/test/CodeGen/R600/store.ll +++ b/test/CodeGen/R600/store.ll @@ -17,11 +17,9 @@ define void @store_f32(float addrspace(1)* %out, float %in) { ; vec2 floating-point stores ; EG-CHECK: @store_v2f32 -; EG-CHECK: RAT_WRITE_CACHELESS_32_eg -; EG-CHECK-NEXT: RAT_WRITE_CACHELESS_32_eg +; EG-CHECK: RAT_WRITE_CACHELESS_64_eg ; CM-CHECK: @store_v2f32 ; CM-CHECK: EXPORT_RAT_INST_STORE_DWORD -; CM-CHECK-NEXT: EXPORT_RAT_INST_STORE_DWORD ; SI-CHECK: @store_v2f32 ; SI-CHECK: BUFFER_STORE_DWORDX2 @@ -41,11 +39,9 @@ entry: ; be two 32-bit stores. ; EG-CHECK: @vecload2 -; EG-CHECK: RAT_WRITE_CACHELESS_32_eg -; EG-CHECK: RAT_WRITE_CACHELESS_32_eg +; EG-CHECK: RAT_WRITE_CACHELESS_64_eg ; CM-CHECK: @vecload2 ; CM-CHECK: EXPORT_RAT_INST_STORE_DWORD -; CM-CHECK: EXPORT_RAT_INST_STORE_DWORD ; SI-CHECK: @vecload2 ; SI-CHECK: BUFFER_STORE_DWORDX2 define void @vecload2(i32 addrspace(1)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 { diff --git a/test/CodeGen/R600/sub.ll b/test/CodeGen/R600/sub.ll index 10fce6c..5d20da0 100644 --- a/test/CodeGen/R600/sub.ll +++ b/test/CodeGen/R600/sub.ll @@ -2,8 +2,8 @@ ;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s ;EG-CHECK: @test2 -;EG-CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG-CHECK: SUB_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG-CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;SI-CHECK: @test2 ;SI-CHECK: V_SUB_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}} diff --git a/test/CodeGen/R600/uint_to_fp.ll b/test/CodeGen/R600/uint_to_fp.ll index 9054fc4..ea73bb0 100644 --- a/test/CodeGen/R600/uint_to_fp.ll +++ b/test/CodeGen/R600/uint_to_fp.ll @@ -1,5 +1,15 @@ ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +; CHECK: @uint_to_fp_v2i32 +; CHECK-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W +; CHECK-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X + +define void @uint_to_fp_v2i32(<2 x float> addrspace(1)* %out, <2 x i32> %in) { + %result = uitofp <2 x i32> %in to <2 x float> + store <2 x float> %result, <2 x float> addrspace(1)* %out + ret void +} + ; CHECK: @uint_to_fp_v4i32 ; CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -- 1.7.11.4
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev