From: Michel Dänzer <michel.daen...@amd.com> 21 more little piglits with radeonsi.
Signed-off-by: Michel Dänzer <michel.daen...@amd.com> --- v3: Use constant for and add comments about scaling multiplications lib/Target/R600/AMDGPUInstructions.td | 1 + lib/Target/R600/R600Instructions.td | 3 ++- lib/Target/R600/SIInstructions.td | 12 ++++++++++-- test/CodeGen/R600/urecip.ll | 12 ++++++++++++ 4 files changed, 25 insertions(+), 3 deletions(-) create mode 100644 test/CodeGen/R600/urecip.ll diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index e740348..fa890c1 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -94,6 +94,7 @@ class Constants { int TWO_PI = 0x40c90fdb; int PI = 0x40490fdb; int TWO_PI_INV = 0x3e22f983; +int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding } def CONST : Constants; diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index b4c45e1..8ede6cc 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -1923,10 +1923,11 @@ def : COS_PAT <COS_cm>; defm DIV_cm : DIV_Common<RECIP_IEEE_cm>; // RECIP_UINT emulation for Cayman +// The multiplication scales from [0,1] to the unsigned integer range def : Pat < (AMDGPUurecip R600_Reg32:$src0), (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg R600_Reg32:$src0)), - (MOV_IMM_I32 0x4f800000))) + (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1))) >; diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index e2a08fc..0226d5a 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -602,8 +602,8 @@ defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, "V_READFIRSTLANE_B32", []>; defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32", [(set VReg_32:$dst, (sint_to_fp VSrc_32:$src0))] >; -//defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>; -//defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>; +defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>; +defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>; defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32", [(set (i32 VReg_32:$dst), (fp_to_sint VSrc_32:$src0))] >; @@ -1514,6 +1514,14 @@ def : Pat < (BUFFER_LOAD_DWORD 0, 1, 0, 0, 0, 0, VReg_32:$voff, SReg_128:$sbase, 0, 0, 0) >; +// The multiplication scales from [0,1] to the unsigned integer range +def : Pat < + (AMDGPUurecip VSrc_32:$src0), + (V_CVT_U32_F32_e32 + (V_MUL_F32_e32 CONST.FP_UINT_MAX_PLUS_1, + (V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 VSrc_32:$src0)))) +>; + /********** ================== **********/ /********** VOP3 Patterns **********/ /********** ================== **********/ diff --git a/test/CodeGen/R600/urecip.ll b/test/CodeGen/R600/urecip.ll new file mode 100644 index 0000000..dad02dd --- /dev/null +++ b/test/CodeGen/R600/urecip.ll @@ -0,0 +1,12 @@ +;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s + +;CHECK: V_RCP_IFLAG_F32_e32 + +define void @test(i32 %p, i32 %q) { + %i = udiv i32 %p, %q + %r = bitcast i32 %i to float + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r) + ret void +} + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) -- 1.8.2 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev