Am 10.04.2013 11:46, schrieb Michel Dänzer:
From: Michel Dänzer <michel.daen...@amd.com>

21 more little piglits with radeonsi.

Signed-off-by: Michel Dänzer <michel.daen...@amd.com>
You figured it out, great! But why the heck is multiplying with 0x4f800000 fixing the result?

Cheers,
Christian.

---

v2: Now with lit test.

  lib/Target/R600/SIInstructions.td | 11 +++++++++--
  test/CodeGen/R600/urecip.ll       | 12 ++++++++++++
  2 files changed, 21 insertions(+), 2 deletions(-)
  create mode 100644 test/CodeGen/R600/urecip.ll

diff --git a/lib/Target/R600/SIInstructions.td 
b/lib/Target/R600/SIInstructions.td
index e2a08fc..7865939 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -602,8 +602,8 @@ defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, 
"V_READFIRSTLANE_B32", []>;
  defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32",
    [(set VReg_32:$dst, (sint_to_fp VSrc_32:$src0))]
  >;
-//defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>;
-//defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>;
+defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>;
+defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>;
  defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32",
    [(set (i32 VReg_32:$dst), (fp_to_sint VSrc_32:$src0))]
  >;
@@ -1514,6 +1514,13 @@ def : Pat <
    (BUFFER_LOAD_DWORD 0, 1, 0, 0, 0, 0, VReg_32:$voff, SReg_128:$sbase, 0, 0, 
0)
  >;
+def : Pat <
+  (AMDGPUurecip VSrc_32:$src0),
+  (V_CVT_U32_F32_e32
+    (V_MUL_F32_e32 (V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 VSrc_32:$src0)),
+                   (V_MOV_B32_e32 0x4f800000)))
+>;
+
  /********** ================== **********/
  /**********   VOP3 Patterns    **********/
  /********** ================== **********/
diff --git a/test/CodeGen/R600/urecip.ll b/test/CodeGen/R600/urecip.ll
new file mode 100644
index 0000000..dad02dd
--- /dev/null
+++ b/test/CodeGen/R600/urecip.ll
@@ -0,0 +1,12 @@
+;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+
+;CHECK: V_RCP_IFLAG_F32_e32
+
+define void @test(i32 %p, i32 %q) {
+   %i = udiv i32 %p, %q
+   %r = bitcast i32 %i to float
+   call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, 
float %r, float %r, float %r)
+   ret void
+}
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, 
float)

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to