On Sun, May 12, 2013 at 07:41:21AM -0700, Vincent Lejeune wrote: > Hi, > Patches 2 and 3 factorizes some code from the backend. Patch 3 should avoid > some recomputation too, which shouldn't hurt. > Patch 4 and 5 rework how textures are handled in our backend. It replaces > TGSI like intrinsic (ie intrinsic that uses last argument as TextureTarget > which has no sense > from hw pov) to intrinsic closer to hw. The pass could be done in mesa but I > rather have it in llvm for now to ensure backward compatibility with llvm 3.3. >
Hi Vincent, Just some small comments on patches 4 and 5. With those changes, this series is: Reviewed-by: Tom Stellard <thomas.stell...@amd.com> > From 3974315f153e67913f8cc4b4d52550bf6ab33e59 Mon Sep 17 00:00:00 2001 > From: Vincent Lejeune <v...@ovi.com> > Date: Sun, 12 May 2013 16:29:50 +0200 > Subject: [PATCH 4/5] R600: Rename 128 bit registers. > > --- > lib/Target/R600/R600Instructions.td | 17 ++++++++--------- > lib/Target/R600/R600RegisterInfo.td | 2 +- > 2 files changed, 9 insertions(+), 10 deletions(-) > What is the reason for renaming these registers? Could you add an explanation to the commit message? > diff --git a/lib/Target/R600/R600Instructions.td > b/lib/Target/R600/R600Instructions.td > index 86e4b4a..abaa94b 100644 > --- a/lib/Target/R600/R600Instructions.td > +++ b/lib/Target/R600/R600Instructions.td > @@ -1750,8 +1750,7 @@ let usesCustomInserter = 1 in { > > class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> comp_mask, string name, > list<dag> pattern> > - : EG_CF_RAT <0x57, 0x2, 0, (outs), ins, > - !strconcat(name, " $rw_gpr, $index_gpr, $eop"), pattern> { > + : EG_CF_RAT <0x57, 0x2, 0, (outs), ins, name, pattern> { > let RIM = 0; > // XXX: Have a separate instruction for non-indexed writes. > let TYPE = 1; > @@ -1771,19 +1770,19 @@ class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> > comp_mask, string name, > // 32-bit store > def RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg < > (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), > - 0x1, "RAT_WRITE_CACHELESS_32_eg", > + 0x1, "RAT_WRITE_CACHELESS_32_eg $rw_gpr, $index_gpr, $eop", > [(global_store i32:$rw_gpr, i32:$index_gpr)] > >; > > //128-bit store > def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg < > (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), > - 0xf, "RAT_WRITE_CACHELESS_128", > + 0xf, "RAT_WRITE_CACHELESS_128 $rw_gpr.XYZW, $index_gpr, $eop", > [(global_store v4i32:$rw_gpr, i32:$index_gpr)] > >; > > class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> > pattern> > - : InstR600ISA <outs, (ins MEMxi:$ptr), name#" $dst, $ptr", pattern>, > + : InstR600ISA <outs, (ins MEMxi:$ptr), name, pattern>, > VTX_WORD1_GPR, VTX_WORD0 { > > // Static fields > @@ -1838,7 +1837,7 @@ class VTX_READ_eg <string name, bits<8> buffer_id, dag > outs, list<dag> pattern> > } > > class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern> > - : VTX_READ_eg <"VTX_READ_8", buffer_id, (outs R600_TReg32_X:$dst), > + : VTX_READ_eg <"VTX_READ_8 $dst, $ptr", buffer_id, (outs > R600_TReg32_X:$dst), > pattern> { > > let MEGA_FETCH_COUNT = 1; > @@ -1850,7 +1849,7 @@ class VTX_READ_8_eg <bits<8> buffer_id, list<dag> > pattern> > } > > class VTX_READ_16_eg <bits<8> buffer_id, list<dag> pattern> > - : VTX_READ_eg <"VTX_READ_16", buffer_id, (outs R600_TReg32_X:$dst), > + : VTX_READ_eg <"VTX_READ_16 $dst, $ptr", buffer_id, (outs > R600_TReg32_X:$dst), > pattern> { > let MEGA_FETCH_COUNT = 2; > let DST_SEL_X = 0; > @@ -1862,7 +1861,7 @@ class VTX_READ_16_eg <bits<8> buffer_id, list<dag> > pattern> > } > > class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern> > - : VTX_READ_eg <"VTX_READ_32", buffer_id, (outs R600_TReg32_X:$dst), > + : VTX_READ_eg <"VTX_READ_32 $dst, $ptr", buffer_id, (outs > R600_TReg32_X:$dst), > pattern> { > > let MEGA_FETCH_COUNT = 4; > @@ -1883,7 +1882,7 @@ class VTX_READ_32_eg <bits<8> buffer_id, list<dag> > pattern> > } > > class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern> > - : VTX_READ_eg <"VTX_READ_128", buffer_id, (outs R600_Reg128:$dst), > + : VTX_READ_eg <"VTX_READ_128 $dst.XYZW, $ptr", buffer_id, (outs > R600_Reg128:$dst), > pattern> { > > let MEGA_FETCH_COUNT = 16; > diff --git a/lib/Target/R600/R600RegisterInfo.td > b/lib/Target/R600/R600RegisterInfo.td > index bfc546b..df6004b 100644 > --- a/lib/Target/R600/R600RegisterInfo.td > +++ b/lib/Target/R600/R600RegisterInfo.td > @@ -35,7 +35,7 @@ foreach Index = 0-127 in { > Chan>; > } > // 128-bit Temporary Registers > - def T#Index#_XYZW : R600Reg_128 <"T"#Index#".XYZW", > + def T#Index#_XYZW : R600Reg_128 <"T"#Index#"", > [!cast<Register>("T"#Index#"_X"), > !cast<Register>("T"#Index#"_Y"), > !cast<Register>("T"#Index#"_Z"), > -- > 1.8.2.1 > > From 6840d3e3995283e98cd535db36ba24364f690072 Mon Sep 17 00:00:00 2001 > From: Vincent Lejeune <v...@ovi.com> > Date: Mon, 6 May 2013 20:05:16 +0200 > Subject: [PATCH 5/5] R600: Improve texture handling > > --- > lib/Target/R600/AMDGPU.h | 1 + > lib/Target/R600/AMDGPUISelLowering.h | 1 + > lib/Target/R600/AMDGPUTargetMachine.cpp | 2 + > lib/Target/R600/CMakeLists.txt | 1 + > lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp | 45 ++++ > lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h | 2 + > lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp | 95 ++----- > lib/Target/R600/R600ISelLowering.cpp | 260 +++++++++++++++++-- > lib/Target/R600/R600Instructions.td | 196 +++++++------- > lib/Target/R600/R600Intrinsics.td | 130 ++++++++++ > lib/Target/R600/R600TextureIntrinsicsReplacer.cpp | 286 > +++++++++++++++++++++ > test/CodeGen/R600/llvm.AMDGPU.tex.ll | 32 +-- > 12 files changed, 834 insertions(+), 217 deletions(-) > create mode 100644 lib/Target/R600/R600TextureIntrinsicsReplacer.cpp > > diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h > index 9792bd8..f9d70c9 100644 > --- a/lib/Target/R600/AMDGPU.h > +++ b/lib/Target/R600/AMDGPU.h > @@ -21,6 +21,7 @@ class FunctionPass; > class AMDGPUTargetMachine; > > // R600 Passes > +FunctionPass* createR600TextureIntrinsicsReplacer(); > FunctionPass* createR600KernelParametersPass(const DataLayout *TD); > FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm); > FunctionPass *createR600EmitClauseMarkers(TargetMachine &tm); > diff --git a/lib/Target/R600/AMDGPUISelLowering.h > b/lib/Target/R600/AMDGPUISelLowering.h > index c2a79ea..f108fbc 100644 > --- a/lib/Target/R600/AMDGPUISelLowering.h > +++ b/lib/Target/R600/AMDGPUISelLowering.h > @@ -126,6 +126,7 @@ enum { > SMIN, > UMIN, > URECIP, > + TEXTURE_FETCH, > EXPORT, > CONST_ADDRESS, > REGISTER_LOAD, > diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp > b/lib/Target/R600/AMDGPUTargetMachine.cpp > index 0ec67ce..9e0edfb 100644 > --- a/lib/Target/R600/AMDGPUTargetMachine.cpp > +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp > @@ -110,6 +110,8 @@ AMDGPUPassConfig::addPreISel() { > if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) { > addPass(createAMDGPUStructurizeCFGPass()); > addPass(createSIAnnotateControlFlowPass()); > + } else { > + addPass(createR600TextureIntrinsicsReplacer()); > } > return false; > } > diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt > index 2ad2047..a4f281a 100644 > --- a/lib/Target/R600/CMakeLists.txt > +++ b/lib/Target/R600/CMakeLists.txt > @@ -44,6 +44,7 @@ add_llvm_target(R600CodeGen > R600MachineScheduler.cpp > R600Packetizer.cpp > R600RegisterInfo.cpp > + R600TextureIntrinsicsReplacer.cpp > SIAnnotateControlFlow.cpp > SIInsertWaits.cpp > SIInstrInfo.cpp > diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp > b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp > index bbc6cc5..8de644b 100644 > --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp > +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp > @@ -198,6 +198,51 @@ void AMDGPUInstPrinter::printBankSwizzle(const MCInst > *MI, unsigned OpNo, > return; > } > > +void AMDGPUInstPrinter::printRSel(const MCInst *MI, unsigned OpNo, > + raw_ostream &O) { > + unsigned Sel = MI->getOperand(OpNo).getImm(); > + switch (Sel) { > + case 0: > + O << "X"; > + break; > + case 1: > + O << "Y"; > + break; > + case 2: > + O << "Z"; > + break; > + case 3: > + O << "W"; > + break; > + case 4: > + O << "0"; > + break; > + case 5: > + O << "1"; > + break; > + case 7: > + O << "_"; > + break; > + default: > + break; > + } > +} > + > +void AMDGPUInstPrinter::printCT(const MCInst *MI, unsigned OpNo, > + raw_ostream &O) { > + unsigned CT = MI->getOperand(OpNo).getImm(); > + switch (CT) { > + case 0: > + O << "U"; > + break; > + case 1: > + O << "N"; > + break; > + default: > + break; > + } > +} > + > void AMDGPUInstPrinter::printKCache(const MCInst *MI, unsigned OpNo, > raw_ostream &O) { > int KCacheMode = MI->getOperand(OpNo).getImm(); > diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h > b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h > index c6fd053..4c1dfa6 100644 > --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h > +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h > @@ -49,6 +49,8 @@ private: > void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O); > void printSel(const MCInst *MI, unsigned OpNo, raw_ostream &O); > void printBankSwizzle(const MCInst *MI, unsigned OpNo, raw_ostream &O); > + void printRSel(const MCInst *MI, unsigned OpNo, raw_ostream &O); > + void printCT(const MCInst *MI, unsigned OpNo, raw_ostream &O); > void printKCache(const MCInst *MI, unsigned OpNo, raw_ostream &O); > }; > > diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp > b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp > index 271a974..7bc200a 100644 > --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp > +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp > @@ -82,21 +82,6 @@ enum FCInstr { > FC_CONTINUE > }; > > -enum TextureTypes { > - TEXTURE_1D = 1, > - TEXTURE_2D, > - TEXTURE_3D, > - TEXTURE_CUBE, > - TEXTURE_RECT, > - TEXTURE_SHADOW1D, > - TEXTURE_SHADOW2D, > - TEXTURE_SHADOWRECT, > - TEXTURE_1D_ARRAY, > - TEXTURE_2D_ARRAY, > - TEXTURE_SHADOW1D_ARRAY, > - TEXTURE_SHADOW2D_ARRAY > -}; > - > MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII, > const MCRegisterInfo &MRI, > const MCSubtargetInfo &STI, > @@ -122,63 +107,29 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst > &MI, raw_ostream &OS, > Emit(InstWord2, OS); > Emit((u_int32_t) 0, OS); > } else if (IS_TEX(Desc)) { > - unsigned Opcode = MI.getOpcode(); > - bool HasOffsets = (Opcode == AMDGPU::TEX_LD); > - unsigned OpOffset = HasOffsets ? 3 : 0; > - int64_t Sampler = MI.getOperand(OpOffset + 3).getImm(); > - int64_t TextureType = MI.getOperand(OpOffset + 4).getImm(); > - > - uint32_t SrcSelect[4] = {0, 1, 2, 3}; > - uint32_t Offsets[3] = {0, 0, 0}; > - uint64_t CoordType[4] = {1, 1, 1, 1}; > - > - if (HasOffsets) > - for (unsigned i = 0; i < 3; i++) { > - int SignedOffset = MI.getOperand(i + 2).getImm(); > - Offsets[i] = (SignedOffset & 0x1F); > - } > - > - if (TextureType == TEXTURE_RECT || > - TextureType == TEXTURE_SHADOWRECT) { > - CoordType[ELEMENT_X] = 0; > - CoordType[ELEMENT_Y] = 0; > - } > - > - if (TextureType == TEXTURE_1D_ARRAY || > - TextureType == TEXTURE_SHADOW1D_ARRAY) { > - if (Opcode == AMDGPU::TEX_SAMPLE_C_L || > - Opcode == AMDGPU::TEX_SAMPLE_C_LB) { > - CoordType[ELEMENT_Y] = 0; > - } else { > - CoordType[ELEMENT_Z] = 0; > - SrcSelect[ELEMENT_Z] = ELEMENT_Y; > - } > - } else if (TextureType == TEXTURE_2D_ARRAY || > - TextureType == TEXTURE_SHADOW2D_ARRAY) { > - CoordType[ELEMENT_Z] = 0; > - } > - > - > - if ((TextureType == TEXTURE_SHADOW1D || > - TextureType == TEXTURE_SHADOW2D || > - TextureType == TEXTURE_SHADOWRECT || > - TextureType == TEXTURE_SHADOW1D_ARRAY) && > - Opcode != AMDGPU::TEX_SAMPLE_C_L && > - Opcode != AMDGPU::TEX_SAMPLE_C_LB) { > - SrcSelect[ELEMENT_W] = ELEMENT_Z; > - } > - > - uint64_t Word01 = getBinaryCodeForInstr(MI, Fixups) | > - CoordType[ELEMENT_X] << 60 | CoordType[ELEMENT_Y] << 61 | > - CoordType[ELEMENT_Z] << 62 | CoordType[ELEMENT_W] << 63; > - uint32_t Word2 = Sampler << 15 | SrcSelect[ELEMENT_X] << 20 | > - SrcSelect[ELEMENT_Y] << 23 | SrcSelect[ELEMENT_Z] << 26 | > - SrcSelect[ELEMENT_W] << 29 | Offsets[0] << 0 | Offsets[1] << 5 | > - Offsets[2] << 10; > - > - Emit(Word01, OS); > - Emit(Word2, OS); > - Emit((u_int32_t) 0, OS); > + int64_t Sampler = MI.getOperand(14).getImm(); > + > + uint32_t SrcSelect[4] = { > + MI.getOperand(2).getImm(), > + MI.getOperand(3).getImm(), > + MI.getOperand(4).getImm(), > + MI.getOperand(5).getImm() > + }; > + uint32_t Offsets[3] = { > + MI.getOperand(6).getImm() & 0x1F, > + MI.getOperand(7).getImm() & 0x1F, > + MI.getOperand(8).getImm() & 0x1F > + }; > + > + uint64_t Word01 = getBinaryCodeForInstr(MI, Fixups); > + uint32_t Word2 = Sampler << 15 | SrcSelect[ELEMENT_X] << 20 | > + SrcSelect[ELEMENT_Y] << 23 | SrcSelect[ELEMENT_Z] << 26 | > + SrcSelect[ELEMENT_W] << 29 | Offsets[0] << 0 | Offsets[1] << 5 | > + Offsets[2] << 10; > + > + Emit(Word01, OS); > + Emit(Word2, OS); > + Emit((u_int32_t) 0, OS); > } else { > uint64_t Inst = getBinaryCodeForInstr(MI, Fixups); > Emit(Inst, OS); > diff --git a/lib/Target/R600/R600ISelLowering.cpp > b/lib/Target/R600/R600ISelLowering.cpp > index c6e2136..d232022 100644 > --- a/lib/Target/R600/R600ISelLowering.cpp > +++ b/lib/Target/R600/R600ISelLowering.cpp > @@ -179,23 +179,99 @@ MachineBasicBlock * > R600TargetLowering::EmitInstrWithCustomInserter( > case AMDGPU::TXD: { > unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); > unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); > - > + MachineOperand &RID = MI->getOperand(4); > + MachineOperand &SID = MI->getOperand(5); > + unsigned TextureId = MI->getOperand(6).getImm(); > + unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3; > + unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1; > + > + switch (TextureId) { > + case 5: // Rect > + CTX = CTY = 0; > + break; > + case 6: // Shadow1D > + SrcW = SrcZ; > + break; > + case 7: // Shadow2D > + SrcW = SrcZ; > + break; > + case 8: // ShadowRect > + CTX = CTY = 0; > + SrcW = SrcZ; > + break; > + case 9: // 1DArray > + SrcZ = SrcY; > + CTZ = 0; > + break; > + case 10: // 2DArray > + CTZ = 0; > + break; > + case 11: // Shadow1DArray > + SrcZ = SrcY; > + CTZ = 0; > + break; > + case 12: // Shadow2DArray > + CTZ = 0; > + break; > + } > BuildMI(*BB, I, BB->findDebugLoc(I), > TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0) > .addOperand(MI->getOperand(3)) > - .addOperand(MI->getOperand(4)) > - .addOperand(MI->getOperand(5)) > - .addOperand(MI->getOperand(6)); > + .addImm(SrcX) > + .addImm(SrcY) > + .addImm(SrcZ) > + .addImm(SrcW) > + .addImm(0) > + .addImm(0) > + .addImm(0) > + .addImm(0) > + .addImm(1) > + .addImm(2) > + .addImm(3) > + .addOperand(RID) > + .addOperand(SID) > + .addImm(CTX) > + .addImm(CTY) > + .addImm(CTZ) > + .addImm(CTW); > BuildMI(*BB, I, BB->findDebugLoc(I), > TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1) > .addOperand(MI->getOperand(2)) > - .addOperand(MI->getOperand(4)) > - .addOperand(MI->getOperand(5)) > - .addOperand(MI->getOperand(6)); > + .addImm(SrcX) > + .addImm(SrcY) > + .addImm(SrcZ) > + .addImm(SrcW) > + .addImm(0) > + .addImm(0) > + .addImm(0) > + .addImm(0) > + .addImm(1) > + .addImm(2) > + .addImm(3) > + .addOperand(RID) > + .addOperand(SID) > + .addImm(CTX) > + .addImm(CTY) > + .addImm(CTZ) > + .addImm(CTW); > BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G)) > .addOperand(MI->getOperand(0)) > .addOperand(MI->getOperand(1)) > - .addOperand(MI->getOperand(4)) > - .addOperand(MI->getOperand(5)) > - .addOperand(MI->getOperand(6)) > + .addImm(SrcX) > + .addImm(SrcY) > + .addImm(SrcZ) > + .addImm(SrcW) > + .addImm(0) > + .addImm(0) > + .addImm(0) > + .addImm(0) > + .addImm(1) > + .addImm(2) > + .addImm(3) > + .addOperand(RID) > + .addOperand(SID) > + .addImm(CTX) > + .addImm(CTY) > + .addImm(CTZ) > + .addImm(CTW) > .addReg(T0, RegState::Implicit) > .addReg(T1, RegState::Implicit); > break; > @@ -204,23 +280,100 @@ MachineBasicBlock * > R600TargetLowering::EmitInstrWithCustomInserter( > case AMDGPU::TXD_SHADOW: { > unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); > unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); > + MachineOperand &RID = MI->getOperand(4); > + MachineOperand &SID = MI->getOperand(5); > + unsigned TextureId = MI->getOperand(6).getImm(); > + unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3; > + unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1; > + > + switch (TextureId) { > + case 5: // Rect > + CTX = CTY = 0; > + break; > + case 6: // Shadow1D > + SrcW = SrcZ; > + break; > + case 7: // Shadow2D > + SrcW = SrcZ; > + break; > + case 8: // ShadowRect > + CTX = CTY = 0; > + SrcW = SrcZ; > + break; > + case 9: // 1DArray > + SrcZ = SrcY; > + CTZ = 0; > + break; > + case 10: // 2DArray > + CTZ = 0; > + break; > + case 11: // Shadow1DArray > + SrcZ = SrcY; > + CTZ = 0; > + break; > + case 12: // Shadow2DArray > + CTZ = 0; > + break; > + } > > BuildMI(*BB, I, BB->findDebugLoc(I), > TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0) > .addOperand(MI->getOperand(3)) > - .addOperand(MI->getOperand(4)) > - .addOperand(MI->getOperand(5)) > - .addOperand(MI->getOperand(6)); > + .addImm(SrcX) > + .addImm(SrcY) > + .addImm(SrcZ) > + .addImm(SrcW) > + .addImm(0) > + .addImm(0) > + .addImm(0) > + .addImm(0) > + .addImm(1) > + .addImm(2) > + .addImm(3) > + .addOperand(RID) > + .addOperand(SID) > + .addImm(CTX) > + .addImm(CTY) > + .addImm(CTZ) > + .addImm(CTW); > BuildMI(*BB, I, BB->findDebugLoc(I), > TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1) > .addOperand(MI->getOperand(2)) > - .addOperand(MI->getOperand(4)) > - .addOperand(MI->getOperand(5)) > - .addOperand(MI->getOperand(6)); > + .addImm(SrcX) > + .addImm(SrcY) > + .addImm(SrcZ) > + .addImm(SrcW) > + .addImm(0) > + .addImm(0) > + .addImm(0) > + .addImm(0) > + .addImm(1) > + .addImm(2) > + .addImm(3) > + .addOperand(RID) > + .addOperand(SID) > + .addImm(CTX) > + .addImm(CTY) > + .addImm(CTZ) > + .addImm(CTW); > BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G)) > .addOperand(MI->getOperand(0)) > .addOperand(MI->getOperand(1)) > - .addOperand(MI->getOperand(4)) > - .addOperand(MI->getOperand(5)) > - .addOperand(MI->getOperand(6)) > + .addImm(SrcX) > + .addImm(SrcY) > + .addImm(SrcZ) > + .addImm(SrcW) > + .addImm(0) > + .addImm(0) > + .addImm(0) > + .addImm(0) > + .addImm(1) > + .addImm(2) > + .addImm(3) > + .addOperand(RID) > + .addOperand(SID) > + .addImm(CTX) > + .addImm(CTY) > + .addImm(CTZ) > + .addImm(CTW) > .addReg(T0, RegState::Implicit) > .addReg(T1, RegState::Implicit); > break; > @@ -400,6 +553,75 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, > SelectionDAG &DAG) const > > return SDValue(interp, slot % 2); > } > + case AMDGPUIntrinsic::R600_tex: > + case AMDGPUIntrinsic::R600_texc: > + case AMDGPUIntrinsic::R600_txl: > + case AMDGPUIntrinsic::R600_txlc: > + case AMDGPUIntrinsic::R600_txb: > + case AMDGPUIntrinsic::R600_txbc: > + case AMDGPUIntrinsic::R600_txf: > + case AMDGPUIntrinsic::R600_txq: > + case AMDGPUIntrinsic::R600_ddx: > + case AMDGPUIntrinsic::R600_ddy: { > + unsigned TextureOp; > + switch (IntrinsicID) { > + case AMDGPUIntrinsic::R600_tex: > + TextureOp = 0; > + break; > + case AMDGPUIntrinsic::R600_texc: > + TextureOp = 1; > + break; > + case AMDGPUIntrinsic::R600_txl: > + TextureOp = 2; > + break; > + case AMDGPUIntrinsic::R600_txlc: > + TextureOp = 3; > + break; > + case AMDGPUIntrinsic::R600_txb: > + TextureOp = 4; > + break; > + case AMDGPUIntrinsic::R600_txbc: > + TextureOp = 5; > + break; > + case AMDGPUIntrinsic::R600_txf: > + TextureOp = 6; > + break; > + case AMDGPUIntrinsic::R600_txq: > + TextureOp = 7; > + break; > + case AMDGPUIntrinsic::R600_ddx: > + TextureOp = 8; > + break; > + case AMDGPUIntrinsic::R600_ddy: > + TextureOp = 9; > + break; > + default: > + llvm_unreachable("Unknow Texture Operation"); > + } > + > + SDValue TexArgs[19] = { > + DAG.getConstant(TextureOp, MVT::i32), > + Op.getOperand(1), > + DAG.getConstant(0, MVT::i32), > + DAG.getConstant(1, MVT::i32), > + DAG.getConstant(2, MVT::i32), > + DAG.getConstant(3, MVT::i32), > + Op.getOperand(2), > + Op.getOperand(3), > + Op.getOperand(4), > + DAG.getConstant(0, MVT::i32), > + DAG.getConstant(1, MVT::i32), > + DAG.getConstant(2, MVT::i32), > + DAG.getConstant(3, MVT::i32), > + Op.getOperand(5), > + Op.getOperand(6), > + Op.getOperand(7), > + Op.getOperand(8), > + Op.getOperand(9), > + Op.getOperand(10) > + }; > + return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, > 19); > + } > > case r600_read_ngroups_x: > return LowerImplicitParameter(DAG, VT, DL, 0); > diff --git a/lib/Target/R600/R600Instructions.td > b/lib/Target/R600/R600Instructions.td > index abaa94b..83bbab1 100644 > --- a/lib/Target/R600/R600Instructions.td > +++ b/lib/Target/R600/R600Instructions.td > @@ -96,6 +96,12 @@ def UP : InstFlag <"printUpdatePred">; > // Once we start using the packetizer in this backend we should have this > // default to 0. > def LAST : InstFlag<"printLast", 1>; > +def RSel : Operand<i32> { > + let PrintMethod = "printRSel"; > +} > +def CT: Operand<i32> { > + let PrintMethod = "printCT"; > +} > > def FRAMEri : Operand<iPTR> { > let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index); > @@ -463,38 +469,7 @@ class R600_REDUCTION <bits<11> inst, dag ins, string > asm, list<dag> pattern, > pattern, > itin>; > > -class R600_TEX <bits<11> inst, string opName, list<dag> pattern, > - InstrItinClass itin = AnyALU> : > - InstR600 <(outs R600_Reg128:$DST_GPR), > - (ins R600_Reg128:$SRC_GPR, i32imm:$RESOURCE_ID, > i32imm:$SAMPLER_ID, i32imm:$textureTarget), > - !strconcat(opName, "$DST_GPR, $SRC_GPR, $RESOURCE_ID, $SAMPLER_ID, > $textureTarget"), > - pattern, > - itin>, TEX_WORD0, TEX_WORD1, TEX_WORD2 { > - let Inst{31-0} = Word0; > - let Inst{63-32} = Word1; > - > - let TEX_INST = inst{4-0}; > - let SRC_REL = 0; > - let DST_REL = 0; > - let DST_SEL_X = 0; > - let DST_SEL_Y = 1; > - let DST_SEL_Z = 2; > - let DST_SEL_W = 3; > - let LOD_BIAS = 0; > - > - let INST_MOD = 0; > - let FETCH_WHOLE_QUAD = 0; > - let ALT_CONST = 0; > - let SAMPLER_INDEX_MODE = 0; > - let RESOURCE_INDEX_MODE = 0; > - > - let COORD_TYPE_X = 0; > - let COORD_TYPE_Y = 0; > - let COORD_TYPE_Z = 0; > - let COORD_TYPE_W = 0; > - > - let TEXInst = 1; > - } > + > > } // End mayLoad = 1, mayStore = 0, hasSideEffects = 0 > > @@ -618,6 +593,29 @@ def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS", > [SDNPVariadic] > >; > > +def TEXTURE_FETCH_Type : SDTypeProfile<1, 19, [SDTCisFP<0>]>; > + > +def TEXTURE_FETCH: SDNode<"AMDGPUISD::TEXTURE_FETCH", TEXTURE_FETCH_Type, > []>; > + > +multiclass TexPattern<bits<32> TextureOp, Instruction inst, ValueType vt = > v4f32> { > +def : Pat<(TEXTURE_FETCH (i32 TextureOp), vt:$SRC_GPR, > + (i32 imm:$srcx), (i32 imm:$srcy), (i32 imm:$srcz), (i32 imm:$srcw), > + (i32 imm:$offsetx), (i32 imm:$offsety), (i32 imm:$offsetz), > + (i32 imm:$DST_SEL_X), (i32 imm:$DST_SEL_Y), (i32 imm:$DST_SEL_Z), > + (i32 imm:$DST_SEL_W), > + (i32 imm:$RESOURCE_ID), (i32 imm:$SAMPLER_ID), > + (i32 imm:$COORD_TYPE_X), (i32 imm:$COORD_TYPE_Y), (i32 > imm:$COORD_TYPE_Z), > + (i32 imm:$COORD_TYPE_W)), > + (inst R600_Reg128:$SRC_GPR, > + imm:$srcx, imm:$srcy, imm:$srcz, imm:$srcw, > + imm:$offsetx, imm:$offsety, imm:$offsetz, > + imm:$DST_SEL_X, imm:$DST_SEL_Y, imm:$DST_SEL_Z, > + imm:$DST_SEL_W, > + imm:$RESOURCE_ID, imm:$SAMPLER_ID, > + imm:$COORD_TYPE_X, imm:$COORD_TYPE_Y, imm:$COORD_TYPE_Z, > + imm:$COORD_TYPE_W)>; > +} > + > > //===----------------------------------------------------------------------===// > // Interpolation Instructions > > //===----------------------------------------------------------------------===// > @@ -1132,92 +1130,70 @@ def CNDGT_INT : R600_3OP < > // Texture instructions > > //===----------------------------------------------------------------------===// > > -def TEX_LD : R600_TEX < > - 0x03, "TEX_LD", > - [(set v4f32:$DST_GPR, (int_AMDGPU_txf v4f32:$SRC_GPR, > - imm:$OFFSET_X, imm:$OFFSET_Y, imm:$OFFSET_Z, imm:$RESOURCE_ID, > - imm:$SAMPLER_ID, imm:$textureTarget))] > -> { > -let AsmString = "TEX_LD $DST_GPR, $SRC_GPR, $OFFSET_X, $OFFSET_Y, $OFFSET_Z," > - "$RESOURCE_ID, $SAMPLER_ID, $textureTarget"; > -let InOperandList = (ins R600_Reg128:$SRC_GPR, i32imm:$OFFSET_X, > - i32imm:$OFFSET_Y, i32imm:$OFFSET_Z, i32imm:$RESOURCE_ID, > i32imm:$SAMPLER_ID, > - i32imm:$textureTarget); > -} > - > -def TEX_GET_TEXTURE_RESINFO : R600_TEX < > - 0x04, "TEX_GET_TEXTURE_RESINFO", > - [(set v4f32:$DST_GPR, (int_AMDGPU_txq v4f32:$SRC_GPR, > - imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] > ->; > - > -def TEX_GET_GRADIENTS_H : R600_TEX < > - 0x07, "TEX_GET_GRADIENTS_H", > - [(set v4f32:$DST_GPR, (int_AMDGPU_ddx v4f32:$SRC_GPR, > - imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] > ->; > - > -def TEX_GET_GRADIENTS_V : R600_TEX < > - 0x08, "TEX_GET_GRADIENTS_V", > - [(set v4f32:$DST_GPR, (int_AMDGPU_ddy v4f32:$SRC_GPR, > - imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] > ->; > - > -def TEX_SET_GRADIENTS_H : R600_TEX < > - 0x0B, "TEX_SET_GRADIENTS_H", > - [] > ->; > - > -def TEX_SET_GRADIENTS_V : R600_TEX < > - 0x0C, "TEX_SET_GRADIENTS_V", > - [] > ->; > +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { > > -def TEX_SAMPLE : R600_TEX < > - 0x10, "TEX_SAMPLE", > - [(set v4f32:$DST_GPR, (int_AMDGPU_tex v4f32:$SRC_GPR, > - imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] > ->; > +class R600_TEX <bits<11> inst, string opName> : > + InstR600 <(outs R600_Reg128:$DST_GPR), > + (ins R600_Reg128:$SRC_GPR, > + RSel:$srcx, RSel:$srcy, RSel:$srcz, RSel:$srcw, > + i32imm:$offsetx, i32imm:$offsety, i32imm:$offsetz, > + RSel:$DST_SEL_X, RSel:$DST_SEL_Y, RSel:$DST_SEL_Z, RSel:$DST_SEL_W, > + i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID, > + CT:$COORD_TYPE_X, CT:$COORD_TYPE_Y, CT:$COORD_TYPE_Z, > + CT:$COORD_TYPE_W), > + !strconcat(opName, > + " $DST_GPR.$DST_SEL_X$DST_SEL_Y$DST_SEL_Z$DST_SEL_W, " > + "$SRC_GPR.$srcx$srcy$srcz$srcw " > + "RID:$RESOURCE_ID SID:$SAMPLER_ID " > + "CT:$COORD_TYPE_X$COORD_TYPE_Y$COORD_TYPE_Z$COORD_TYPE_W"), > + [], > + NullALU>, TEX_WORD0, TEX_WORD1, TEX_WORD2 { > + let Inst{31-0} = Word0; > + let Inst{63-32} = Word1; > > -def TEX_SAMPLE_C : R600_TEX < > - 0x18, "TEX_SAMPLE_C", > - [(set v4f32:$DST_GPR, (int_AMDGPU_tex v4f32:$SRC_GPR, > - imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))] > ->; > + let TEX_INST = inst{4-0}; > + let SRC_REL = 0; > + let DST_REL = 0; > + let LOD_BIAS = 0; > > -def TEX_SAMPLE_L : R600_TEX < > - 0x11, "TEX_SAMPLE_L", > - [(set v4f32:$DST_GPR, (int_AMDGPU_txl v4f32:$SRC_GPR, > - imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] > ->; > + let INST_MOD = 0; > + let FETCH_WHOLE_QUAD = 0; > + let ALT_CONST = 0; > + let SAMPLER_INDEX_MODE = 0; > + let RESOURCE_INDEX_MODE = 0; > > -def TEX_SAMPLE_C_L : R600_TEX < > - 0x19, "TEX_SAMPLE_C_L", > - [(set v4f32:$DST_GPR, (int_AMDGPU_txl v4f32:$SRC_GPR, > - imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))] > ->; > + let TEXInst = 1; > +} > > -def TEX_SAMPLE_LB : R600_TEX < > - 0x12, "TEX_SAMPLE_LB", > - [(set v4f32:$DST_GPR, (int_AMDGPU_txb v4f32:$SRC_GPR, > - imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] > ->; > +} // End mayLoad = 0, mayStore = 0, hasSideEffects = 0 > > -def TEX_SAMPLE_C_LB : R600_TEX < > - 0x1A, "TEX_SAMPLE_C_LB", > - [(set v4f32:$DST_GPR, (int_AMDGPU_txb v4f32:$SRC_GPR, > - imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))] > ->; > > -def TEX_SAMPLE_G : R600_TEX < > - 0x14, "TEX_SAMPLE_G", > - [] > ->; > > -def TEX_SAMPLE_C_G : R600_TEX < > - 0x1C, "TEX_SAMPLE_C_G", > - [] > ->; > +def TEX_SAMPLE : R600_TEX <0x10, "TEX_SAMPLE">; > +def TEX_SAMPLE_C : R600_TEX <0x18, "TEX_SAMPLE_C">; > +def TEX_SAMPLE_L : R600_TEX <0x11, "TEX_SAMPLE_L">; > +def TEX_SAMPLE_C_L : R600_TEX <0x19, "TEX_SAMPLE_C_L">; > +def TEX_SAMPLE_LB : R600_TEX <0x12, "TEX_SAMPLE_LB">; > +def TEX_SAMPLE_C_LB : R600_TEX <0x1A, "TEX_SAMPLE_C_LB">; > +def TEX_LD : R600_TEX <0x03, "TEX_LD">; > +def TEX_GET_TEXTURE_RESINFO : R600_TEX <0x04, "TEX_GET_TEXTURE_RESINFO">; > +def TEX_GET_GRADIENTS_H : R600_TEX <0x07, "TEX_GET_GRADIENTS_H">; > +def TEX_GET_GRADIENTS_V : R600_TEX <0x08, "TEX_GET_GRADIENTS_V">; > +def TEX_SET_GRADIENTS_H : R600_TEX <0x0B, "TEX_SET_GRADIENTS_H">; > +def TEX_SET_GRADIENTS_V : R600_TEX <0x0C, "TEX_SET_GRADIENTS_V">; > +def TEX_SAMPLE_G : R600_TEX <0x14, "TEX_SAMPLE_G">; > +def TEX_SAMPLE_C_G : R600_TEX <0x1C, "TEX_SAMPLE_C_G">; > + > +defm : TexPattern<0, TEX_SAMPLE>; > +defm : TexPattern<1, TEX_SAMPLE_C>; > +defm : TexPattern<2, TEX_SAMPLE_L>; > +defm : TexPattern<3, TEX_SAMPLE_C_L>; > +defm : TexPattern<4, TEX_SAMPLE_LB>; > +defm : TexPattern<5, TEX_SAMPLE_C_LB>; > +defm : TexPattern<6, TEX_LD, v4i32>; > +defm : TexPattern<7, TEX_GET_TEXTURE_RESINFO, v4i32>; > +defm : TexPattern<8, TEX_GET_GRADIENTS_H>; > +defm : TexPattern<9, TEX_GET_GRADIENTS_V>; > > > //===----------------------------------------------------------------------===// > // Helper classes for common instructions > diff --git a/lib/Target/R600/R600Intrinsics.td > b/lib/Target/R600/R600Intrinsics.td > index dc8980a..7c434bc 100644 > --- a/lib/Target/R600/R600Intrinsics.td > +++ b/lib/Target/R600/R600Intrinsics.td > @@ -18,6 +18,136 @@ let TargetPrefix = "R600", isTarget = 1 in { > Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; > def int_R600_load_texbuf : > Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; > + def int_R600_tex : > + Intrinsic<[llvm_v4f32_ty], [ > + llvm_v4f32_ty, // Coord > + llvm_i32_ty, // offset_x > + llvm_i32_ty, // offset_y, > + llvm_i32_ty, // offset_z, > + llvm_i32_ty, // resource_id > + llvm_i32_ty, // samplerid > + llvm_i32_ty, // coord_type_x > + llvm_i32_ty, // coord_type_y > + llvm_i32_ty, // coord_type_z > + llvm_i32_ty // coord_type_w > + ], [IntrNoMem]>; > + def int_R600_texc : > + Intrinsic<[llvm_v4f32_ty], [ > + llvm_v4f32_ty, // Coord > + llvm_i32_ty, // offset_x > + llvm_i32_ty, // offset_y, > + llvm_i32_ty, // offset_z, > + llvm_i32_ty, // resource_id > + llvm_i32_ty, // samplerid > + llvm_i32_ty, // coord_type_x > + llvm_i32_ty, // coord_type_y > + llvm_i32_ty, // coord_type_z > + llvm_i32_ty // coord_type_w > + ], [IntrNoMem]>; > + def int_R600_txl : > + Intrinsic<[llvm_v4f32_ty], [ > + llvm_v4f32_ty, // Coord > + llvm_i32_ty, // offset_x > + llvm_i32_ty, // offset_y, > + llvm_i32_ty, // offset_z, > + llvm_i32_ty, // resource_id > + llvm_i32_ty, // samplerid > + llvm_i32_ty, // coord_type_x > + llvm_i32_ty, // coord_type_y > + llvm_i32_ty, // coord_type_z > + llvm_i32_ty // coord_type_w > + ], [IntrNoMem]>; > + def int_R600_txlc : > + Intrinsic<[llvm_v4f32_ty], [ > + llvm_v4f32_ty, // Coord > + llvm_i32_ty, // offset_x > + llvm_i32_ty, // offset_y, > + llvm_i32_ty, // offset_z, > + llvm_i32_ty, // resource_id > + llvm_i32_ty, // samplerid > + llvm_i32_ty, // coord_type_x > + llvm_i32_ty, // coord_type_y > + llvm_i32_ty, // coord_type_z > + llvm_i32_ty // coord_type_w > + ], [IntrNoMem]>; > + def int_R600_txb : > + Intrinsic<[llvm_v4f32_ty], [ > + llvm_v4f32_ty, // Coord > + llvm_i32_ty, // offset_x > + llvm_i32_ty, // offset_y, > + llvm_i32_ty, // offset_z, > + llvm_i32_ty, // resource_id > + llvm_i32_ty, // samplerid > + llvm_i32_ty, // coord_type_x > + llvm_i32_ty, // coord_type_y > + llvm_i32_ty, // coord_type_z > + llvm_i32_ty // coord_type_w > + ], [IntrNoMem]>; > + def int_R600_txbc : > + Intrinsic<[llvm_v4f32_ty], [ > + llvm_v4f32_ty, // Coord > + llvm_i32_ty, // offset_x > + llvm_i32_ty, // offset_y, > + llvm_i32_ty, // offset_z, > + llvm_i32_ty, // resource_id > + llvm_i32_ty, // samplerid > + llvm_i32_ty, // coord_type_x > + llvm_i32_ty, // coord_type_y > + llvm_i32_ty, // coord_type_z > + llvm_i32_ty // coord_type_w > + ], [IntrNoMem]>; > + def int_R600_txf : > + Intrinsic<[llvm_v4i32_ty], [ > + llvm_v4f32_ty, // Coord > + llvm_i32_ty, // offset_x > + llvm_i32_ty, // offset_y, > + llvm_i32_ty, // offset_z, > + llvm_i32_ty, // resource_id > + llvm_i32_ty, // samplerid > + llvm_i32_ty, // coord_type_x > + llvm_i32_ty, // coord_type_y > + llvm_i32_ty, // coord_type_z > + llvm_i32_ty // coord_type_w > + ], [IntrNoMem]>; > + def int_R600_txq : > + Intrinsic<[llvm_v4i32_ty], [ > + llvm_v4f32_ty, // Coord > + llvm_i32_ty, // offset_x > + llvm_i32_ty, // offset_y, > + llvm_i32_ty, // offset_z, > + llvm_i32_ty, // resource_id > + llvm_i32_ty, // samplerid > + llvm_i32_ty, // coord_type_x > + llvm_i32_ty, // coord_type_y > + llvm_i32_ty, // coord_type_z > + llvm_i32_ty // coord_type_w > + ], [IntrNoMem]>; > + def int_R600_ddx : > + Intrinsic<[llvm_v4f32_ty], [ > + llvm_v4f32_ty, // Coord > + llvm_i32_ty, // offset_x > + llvm_i32_ty, // offset_y, > + llvm_i32_ty, // offset_z, > + llvm_i32_ty, // resource_id > + llvm_i32_ty, // samplerid > + llvm_i32_ty, // coord_type_x > + llvm_i32_ty, // coord_type_y > + llvm_i32_ty, // coord_type_z > + llvm_i32_ty // coord_type_w > + ], [IntrNoMem]>; > + def int_R600_ddy : > + Intrinsic<[llvm_v4f32_ty], [ > + llvm_v4f32_ty, // Coord > + llvm_i32_ty, // offset_x > + llvm_i32_ty, // offset_y, > + llvm_i32_ty, // offset_z, > + llvm_i32_ty, // resource_id > + llvm_i32_ty, // samplerid > + llvm_i32_ty, // coord_type_x > + llvm_i32_ty, // coord_type_y > + llvm_i32_ty, // coord_type_z > + llvm_i32_ty // coord_type_w > + ], [IntrNoMem]>; You can create an intrinsic class for textures and factor out a lot of this duplicate code. > def int_R600_store_swizzle : > Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>; > def int_R600_store_stream_output : > diff --git a/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp > b/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp > new file mode 100644 > index 0000000..938bd51 > --- /dev/null > +++ b/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp > @@ -0,0 +1,286 @@ > +//===-- R600TextureIntrinsicsReplacer.cpp > ---------------------------------===// > +// > +// The LLVM Compiler Infrastructure > +// > +// This file is distributed under the University of Illinois Open Source > +// License. See LICENSE.TXT for details. > +// > +//===----------------------------------------------------------------------===// > +// > +/// \file > +/// This pass translates tgsi-like texture intrinsics into R600 texture > +/// closer to hardware intrinsics. > +//===----------------------------------------------------------------------===// > + > +#include "AMDGPU.h" > +#include "llvm/Analysis/Passes.h" > +#include "llvm/ADT/Statistic.h" > +#include "llvm/IR/Function.h" > +#include "llvm/InstVisitor.h" > +#include "llvm/IR/IRBuilder.h" > +#include "llvm/IR/GlobalValue.h" > + > +using namespace llvm; > + > +namespace { > +class R600TextureIntrinsicsReplacer : > + public FunctionPass, public InstVisitor<R600TextureIntrinsicsReplacer> { > + static char ID; > + > + Module *Mod; > + Type *FloatType; > + Type *Int32Type; > + Type *V4f32Type; > + Type *V4i32Type; > + FunctionType *TexSign; > + FunctionType *TexQSign; > + > + void getAdjustementFromTextureTarget(unsigned TextureType, bool hasLOD, > + unsigned SrcSelect[4], unsigned CT[4], > + bool &useShadowVariant) { > + enum TextureTypes { > + TEXTURE_1D = 1, > + TEXTURE_2D, > + TEXTURE_3D, > + TEXTURE_CUBE, > + TEXTURE_RECT, > + TEXTURE_SHADOW1D, > + TEXTURE_SHADOW2D, > + TEXTURE_SHADOWRECT, > + TEXTURE_1D_ARRAY, > + TEXTURE_2D_ARRAY, > + TEXTURE_SHADOW1D_ARRAY, > + TEXTURE_SHADOW2D_ARRAY, > + TEXTURE_SHADOWCUBE, > + TEXTURE_2D_MSAA, > + TEXTURE_2D_ARRAY_MSAA, > + TEXTURE_CUBE_ARRAY, > + TEXTURE_SHADOWCUBE_ARRAY > + }; > + > + switch (TextureType) { > + case 0: > + return; > + case TEXTURE_RECT: > + case TEXTURE_1D: > + case TEXTURE_2D: > + case TEXTURE_3D: > + case TEXTURE_CUBE: > + case TEXTURE_1D_ARRAY: > + case TEXTURE_2D_ARRAY: > + case TEXTURE_CUBE_ARRAY: > + case TEXTURE_2D_MSAA: > + case TEXTURE_2D_ARRAY_MSAA: > + useShadowVariant = false; > + break; > + case TEXTURE_SHADOW1D: > + case TEXTURE_SHADOW2D: > + case TEXTURE_SHADOWRECT: > + case TEXTURE_SHADOW1D_ARRAY: > + case TEXTURE_SHADOW2D_ARRAY: > + case TEXTURE_SHADOWCUBE: > + case TEXTURE_SHADOWCUBE_ARRAY: > + useShadowVariant = true; > + break; > + default: > + llvm_unreachable("Unknow Texture Type"); > + } > + > + if (TextureType == TEXTURE_RECT || > + TextureType == TEXTURE_SHADOWRECT) { > + CT[0] = 0; > + CT[1] = 0; > + } > + > + if (TextureType == TEXTURE_CUBE_ARRAY || > + TextureType == TEXTURE_SHADOWCUBE_ARRAY) { > + CT[2] = 0; > + } > + > + if (TextureType == TEXTURE_1D_ARRAY || > + TextureType == TEXTURE_SHADOW1D_ARRAY) { > + if (hasLOD && useShadowVariant) { > + CT[1] = 0; > + } else { > + CT[2] = 0; > + SrcSelect[2] = 1; > + } > + } else if (TextureType == TEXTURE_2D_ARRAY || > + TextureType == TEXTURE_SHADOW2D_ARRAY) { > + CT[2] = 0; > + } > + > + if ((TextureType == TEXTURE_SHADOW1D || > + TextureType == TEXTURE_SHADOW2D || > + TextureType == TEXTURE_SHADOWRECT || > + TextureType == TEXTURE_SHADOW1D_ARRAY) && > + !(hasLOD && useShadowVariant)) { > + SrcSelect[3] = 2; > + } > + } > + > + void ReplaceCallInst(CallInst &I, FunctionType *FT, const char *Name, > + unsigned SrcSelect[4], Value *Offset[3], Value > *Resource, > + Value *Sampler, unsigned CT[4], Value *Coord) { > + IRBuilder<> Builder(&I); > + Constant *Mask[] = { > + ConstantInt::get(Int32Type, SrcSelect[0]), > + ConstantInt::get(Int32Type, SrcSelect[1]), > + ConstantInt::get(Int32Type, SrcSelect[2]), > + ConstantInt::get(Int32Type, SrcSelect[3]) > + }; > + Value *SwizzleMask = ConstantVector::get(Mask); > + Value *SwizzledCoord = > + Builder.CreateShuffleVector(Coord, Coord, SwizzleMask); > + > + Value *Args[] = { > + SwizzledCoord, > + Offset[0], > + Offset[1], > + Offset[2], > + Resource, > + Sampler, > + ConstantInt::get(Int32Type, CT[0]), > + ConstantInt::get(Int32Type, CT[1]), > + ConstantInt::get(Int32Type, CT[2]), > + ConstantInt::get(Int32Type, CT[3]) > + }; > + > + Function *F = Mod->getFunction(Name); > + if (!F) { > + F = Function::Create(FT, GlobalValue::ExternalLinkage, Name, Mod); > + F->addFnAttr(Attribute::ReadNone); > + } > + I.replaceAllUsesWith(Builder.CreateCall(F, Args)); > + I.eraseFromParent(); > + } > + > + void ReplaceTexIntrinsic(CallInst &I, bool hasLOD, FunctionType *FT, > + const char *VanillaInt, > + const char *ShadowInt) { > + Value *Coord = I.getArgOperand(0); > + Value *ResourceId = I.getArgOperand(1); > + Value *SamplerId = I.getArgOperand(2); > + > + unsigned TextureType = > + dyn_cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); > + > + unsigned SrcSelect[4] = { 0, 1, 2, 3 }; > + unsigned CT[4] = {1, 1, 1, 1}; > + Value *Offset[3] = { > + ConstantInt::get(Int32Type, 0), > + ConstantInt::get(Int32Type, 0), > + ConstantInt::get(Int32Type, 0) > + }; > + bool useShadowVariant; > + > + getAdjustementFromTextureTarget(TextureType, hasLOD, SrcSelect, CT, > + useShadowVariant); > + > + ReplaceCallInst(I, FT, useShadowVariant?ShadowInt:VanillaInt, SrcSelect, > + Offset, ResourceId, SamplerId, CT, Coord); > + } > + > + void ReplaceTXF(CallInst &I) { > + Value *Coord = I.getArgOperand(0); > + Value *ResourceId = I.getArgOperand(4); > + Value *SamplerId = I.getArgOperand(5); > + > + unsigned TextureType = > + dyn_cast<ConstantInt>(I.getArgOperand(6))->getZExtValue(); > + > + unsigned SrcSelect[4] = { 0, 1, 2, 3 }; > + unsigned CT[4] = {1, 1, 1, 1}; > + Value *Offset[3] = { > + I.getArgOperand(1), > + I.getArgOperand(2), > + I.getArgOperand(3), > + }; > + bool useShadowVariant; > + > + getAdjustementFromTextureTarget(TextureType, false, SrcSelect, CT, > + useShadowVariant); > + > + ReplaceCallInst(I, TexQSign, "llvm.R600.txf", SrcSelect, > + Offset, ResourceId, SamplerId, CT, Coord); > + } > + > +public: > + R600TextureIntrinsicsReplacer(): > + FunctionPass(ID) { > + } > + > + virtual bool doInitialization(Module &M) { > + LLVMContext &Ctx = M.getContext(); > + Mod = &M; > + FloatType = Type::getFloatTy(Ctx); > + Int32Type = Type::getInt32Ty(Ctx); > + V4f32Type = VectorType::get(FloatType, 4); > + V4i32Type = VectorType::get(Int32Type, 4); > + Type *ArgsType[] = { > + V4f32Type, > + Int32Type, > + Int32Type, > + Int32Type, > + Int32Type, > + Int32Type, > + Int32Type, > + Int32Type, > + Int32Type, > + Int32Type, > + }; > + TexSign = FunctionType::get(V4f32Type, ArgsType); > + Type *ArgsQType[] = { > + V4i32Type, > + Int32Type, > + Int32Type, > + Int32Type, > + Int32Type, > + Int32Type, > + Int32Type, > + Int32Type, > + Int32Type, > + Int32Type, > + }; > + TexQSign = FunctionType::get(V4f32Type, ArgsQType); > + return false; > + } > + > + virtual bool runOnFunction(Function &F) { > + visit(F); > + return false; > + } > + > + virtual const char *getPassName() const { > + return "R600 Texture Intrinsics Replacer"; > + } > + > + void getAnalysisUsage(AnalysisUsage &AU) const { > + } > + > + void visitCallInst(CallInst &I) { > + if (I.getCalledFunction()->getName() == "llvm.AMDGPU.tex") > + ReplaceTexIntrinsic(I, false, TexSign, "llvm.R600.tex", > "llvm.R600.texc"); > + if (I.getCalledFunction()->getName() == "llvm.AMDGPU.txl") > + ReplaceTexIntrinsic(I, true, TexSign, "llvm.R600.txl", > "llvm.R600.txlc"); > + if (I.getCalledFunction()->getName() == "llvm.AMDGPU.txb") > + ReplaceTexIntrinsic(I, true, TexSign, "llvm.R600.txb", > "llvm.R600.txbc"); > + if (I.getCalledFunction()->getName() == "llvm.AMDGPU.txf") > + ReplaceTXF(I); > + if (I.getCalledFunction()->getName() == "llvm.AMDGPU.txq") > + ReplaceTexIntrinsic(I, false, TexQSign, "llvm.R600.txq", > "llvm.R600.txq"); > + if (I.getCalledFunction()->getName() == "llvm.AMDGPU.ddx") > + ReplaceTexIntrinsic(I, false, TexSign, "llvm.R600.ddx", > "llvm.R600.ddx"); > + if (I.getCalledFunction()->getName() == "llvm.AMDGPU.ddy") > + ReplaceTexIntrinsic(I, false, TexSign, "llvm.R600.ddy", > "llvm.R600.ddy"); > + } > + > +}; > + > +char R600TextureIntrinsicsReplacer::ID = 0; > + > +} > + > +FunctionPass *llvm::createR600TextureIntrinsicsReplacer() { > + return new R600TextureIntrinsicsReplacer(); > +} > diff --git a/test/CodeGen/R600/llvm.AMDGPU.tex.ll > b/test/CodeGen/R600/llvm.AMDGPU.tex.ll > index 74331fa..4ea82bb 100644 > --- a/test/CodeGen/R600/llvm.AMDGPU.tex.ll > +++ b/test/CodeGen/R600/llvm.AMDGPU.tex.ll > @@ -1,21 +1,21 @@ > ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s > > -;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 1 > -;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 2 > -;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 3 > -;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 4 > -;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 5 > -;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 6 > -;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 7 > -;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 8 > -;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 9 > -;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 10 > -;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 11 > -;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 12 > -;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 13 > -;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 14 > -;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 15 > -;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 16 > +;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN > +;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN > +;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN > +;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN > +;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:UUNN > +;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN > +;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN > +;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:UUNN > +;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNUN > +;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNUN > +;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNUN > +;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNUN > +;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN > +;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN > +;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN > +;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNUN > > define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* > %in) { > %addr = load <4 x float> addrspace(1)* %in > -- > 1.8.2.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev