Re: [Mesa-dev] R600/SI: Initial double precision support for Radeon SI
Hi Tom, All these patches look good to me, but #2 and #6 should have a test case with them. If you resubmit these patches with test cases, I will push the entire series. I have attached an updated patchset. I have added a test case to patch #2 and #6. I have also replaced the scalar move in patch #2 by a vector move since there is probably no point in having a floating point value in a scalar register. Kind regards, OleFrom 4224b314cf2d97cdf2ac99564d6155fa04fbb971 Mon Sep 17 00:00:00 2001 From: Niels Ole Salscheider niels_...@salscheider-online.de Date: Sat, 1 Jun 2013 16:48:56 +0200 Subject: [PATCH 1/6] R600/SI: Add initial double precision support for SI --- lib/Target/R600/AMDGPUISelLowering.cpp | 6 ++ lib/Target/R600/SIISelLowering.cpp | 1 + lib/Target/R600/SIInstructions.td | 30 +- test/CodeGen/R600/fadd64.ll| 13 + test/CodeGen/R600/fdiv64.ll| 14 ++ test/CodeGen/R600/fmul64.ll| 13 + test/CodeGen/R600/load64.ll| 20 7 Dateien geändert, 96 Zeilen hinzugefügt(+), 1 Zeile entfernt(-) create mode 100644 test/CodeGen/R600/fadd64.ll create mode 100644 test/CodeGen/R600/fdiv64.ll create mode 100644 test/CodeGen/R600/fmul64.ll create mode 100644 test/CodeGen/R600/load64.ll diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 4019a1f..5f3d496 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -60,12 +60,18 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine TM) : setOperationAction(ISD::STORE, MVT::v4f32, Promote); AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32); + setOperationAction(ISD::STORE, MVT::f64, Promote); + AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64); + setOperationAction(ISD::LOAD, MVT::f32, Promote); AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32); setOperationAction(ISD::LOAD, MVT::v4f32, Promote); AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32); + setOperationAction(ISD::LOAD, MVT::f64, Promote); + AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64); + setOperationAction(ISD::MUL, MVT::i64, Expand); setOperationAction(ISD::UDIV, MVT::i32, Expand); diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 9d4cfef..0d17a12 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -45,6 +45,7 @@ SITargetLowering::SITargetLowering(TargetMachine TM) : addRegisterClass(MVT::v2i32, AMDGPU::VReg_64RegClass); addRegisterClass(MVT::v2f32, AMDGPU::VReg_64RegClass); + addRegisterClass(MVT::f64, AMDGPU::VReg_64RegClass); addRegisterClass(MVT::v4i32, AMDGPU::VReg_128RegClass); addRegisterClass(MVT::v4f32, AMDGPU::VReg_128RegClass); diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 9c96c08..b956387 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -660,7 +660,9 @@ defm V_RSQ_LEGACY_F32 : VOP1_32 [(set f32:$dst, (int_AMDGPU_rsq f32:$src0))] ; defm V_RSQ_F32 : VOP1_32 0x002e, V_RSQ_F32, []; -defm V_RCP_F64 : VOP1_64 0x002f, V_RCP_F64, []; +defm V_RCP_F64 : VOP1_64 0x002f, V_RCP_F64, + [(set f64:$dst, (fdiv FP_ONE, f64:$src0))] +; defm V_RCP_CLAMP_F64 : VOP1_64 0x0030, V_RCP_CLAMP_F64, []; defm V_RSQ_F64 : VOP1_64 0x0031, V_RSQ_F64, []; defm V_RSQ_CLAMP_F64 : VOP1_64 0x0032, V_RSQ_CLAMP_F64, []; @@ -996,10 +998,25 @@ def V_LSHR_B64 : VOP3_64_Shift 0x0162, V_LSHR_B64, ; def V_ASHR_I64 : VOP3_64_Shift 0x0163, V_ASHR_I64, []; +let isCommutable = 1 in { + def V_ADD_F64 : VOP3_64 0x0164, V_ADD_F64, []; def V_MUL_F64 : VOP3_64 0x0165, V_MUL_F64, []; def V_MIN_F64 : VOP3_64 0x0166, V_MIN_F64, []; def V_MAX_F64 : VOP3_64 0x0167, V_MAX_F64, []; + +} // isCommutable = 1 + +def : Pat + (fadd f64:$src0, f64:$src1), + (V_ADD_F64 $src0, $src1, (i64 0)) +; + +def : Pat + (fmul f64:$src0, f64:$src1), + (V_MUL_F64 $src0, $src1, (i64 0)) +; + def V_LDEXP_F64 : VOP3_64 0x0168, V_LDEXP_F64, []; let isCommutable = 1 in { @@ -1417,6 +1434,10 @@ def : BitConvert i32, f32, VReg_32; def : BitConvert f32, i32, SReg_32; def : BitConvert f32, i32, VReg_32; +def : BitConvert i64, f64, VReg_64; + +def : BitConvert f64, i64, VReg_64; + /** === **/ /** Src Dst modifiers **/ /** === **/ @@ -1505,6 +1526,11 @@ def : Pat (V_MUL_F32_e32 $src0, (V_RCP_F32_e32 $src1)) ; +def : Pat + (fdiv f64:$src0, f64:$src1), + (V_MUL_F64 $src0, (V_RCP_F64_e32 $src1), (i64 0)) +; + def : Pat (fcos f32:$src0), (V_COS_F32_e32 (V_MUL_F32_e32 $src0, (V_MOV_B32_e32 CONST.TWO_PI_INV))) @@ -1634,6 +1660,8 @@ multiclass MUBUFLoad_Pattern MUBUF Instr_ADDR64, ValueType vt, ; } +defm : MUBUFLoad_Pattern
Re: [Mesa-dev] R600/SI: Initial double precision support for Radeon SI
On Tue, Jul 02, 2013 at 10:44:10AM +0200, Niels Ole Salscheider wrote: Hi, the attached patches add initial support for double precision operations on Southern Islands cards. Some expressions containing multiple double precision kernel arguments cause llvm to run until all memory is used - but I do not (yet) know why. It works fine as long as I pass pointers to double values. I may have an idea about why this is happening. Could you file a bug report and attach an LLVM IR test case? All these patches look good to me, but #2 and #6 should have a test case with them. If you resubmit these patches with test cases, I will push the entire series. Nice work! -Tom Regards, Ole From 4224b314cf2d97cdf2ac99564d6155fa04fbb971 Mon Sep 17 00:00:00 2001 From: Niels Ole Salscheider niels_...@salscheider-online.de Date: Sat, 1 Jun 2013 16:48:56 +0200 Subject: [PATCH 1/6] R600/SI: Add initial double precision support for SI --- lib/Target/R600/AMDGPUISelLowering.cpp | 6 ++ lib/Target/R600/SIISelLowering.cpp | 1 + lib/Target/R600/SIInstructions.td | 30 +- test/CodeGen/R600/fadd64.ll| 13 + test/CodeGen/R600/fdiv64.ll| 14 ++ test/CodeGen/R600/fmul64.ll| 13 + test/CodeGen/R600/load64.ll| 20 7 Dateien geändert, 96 Zeilen hinzugefügt(+), 1 Zeile entfernt(-) create mode 100644 test/CodeGen/R600/fadd64.ll create mode 100644 test/CodeGen/R600/fdiv64.ll create mode 100644 test/CodeGen/R600/fmul64.ll create mode 100644 test/CodeGen/R600/load64.ll diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 4019a1f..5f3d496 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -60,12 +60,18 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine TM) : setOperationAction(ISD::STORE, MVT::v4f32, Promote); AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32); + setOperationAction(ISD::STORE, MVT::f64, Promote); + AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64); + setOperationAction(ISD::LOAD, MVT::f32, Promote); AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32); setOperationAction(ISD::LOAD, MVT::v4f32, Promote); AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32); + setOperationAction(ISD::LOAD, MVT::f64, Promote); + AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64); + setOperationAction(ISD::MUL, MVT::i64, Expand); setOperationAction(ISD::UDIV, MVT::i32, Expand); diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 9d4cfef..0d17a12 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -45,6 +45,7 @@ SITargetLowering::SITargetLowering(TargetMachine TM) : addRegisterClass(MVT::v2i32, AMDGPU::VReg_64RegClass); addRegisterClass(MVT::v2f32, AMDGPU::VReg_64RegClass); + addRegisterClass(MVT::f64, AMDGPU::VReg_64RegClass); addRegisterClass(MVT::v4i32, AMDGPU::VReg_128RegClass); addRegisterClass(MVT::v4f32, AMDGPU::VReg_128RegClass); diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 9c96c08..b956387 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -660,7 +660,9 @@ defm V_RSQ_LEGACY_F32 : VOP1_32 [(set f32:$dst, (int_AMDGPU_rsq f32:$src0))] ; defm V_RSQ_F32 : VOP1_32 0x002e, V_RSQ_F32, []; -defm V_RCP_F64 : VOP1_64 0x002f, V_RCP_F64, []; +defm V_RCP_F64 : VOP1_64 0x002f, V_RCP_F64, + [(set f64:$dst, (fdiv FP_ONE, f64:$src0))] +; defm V_RCP_CLAMP_F64 : VOP1_64 0x0030, V_RCP_CLAMP_F64, []; defm V_RSQ_F64 : VOP1_64 0x0031, V_RSQ_F64, []; defm V_RSQ_CLAMP_F64 : VOP1_64 0x0032, V_RSQ_CLAMP_F64, []; @@ -996,10 +998,25 @@ def V_LSHR_B64 : VOP3_64_Shift 0x0162, V_LSHR_B64, ; def V_ASHR_I64 : VOP3_64_Shift 0x0163, V_ASHR_I64, []; +let isCommutable = 1 in { + def V_ADD_F64 : VOP3_64 0x0164, V_ADD_F64, []; def V_MUL_F64 : VOP3_64 0x0165, V_MUL_F64, []; def V_MIN_F64 : VOP3_64 0x0166, V_MIN_F64, []; def V_MAX_F64 : VOP3_64 0x0167, V_MAX_F64, []; + +} // isCommutable = 1 + +def : Pat + (fadd f64:$src0, f64:$src1), + (V_ADD_F64 $src0, $src1, (i64 0)) +; + +def : Pat + (fmul f64:$src0, f64:$src1), + (V_MUL_F64 $src0, $src1, (i64 0)) +; + def V_LDEXP_F64 : VOP3_64 0x0168, V_LDEXP_F64, []; let isCommutable = 1 in { @@ -1417,6 +1434,10 @@ def : BitConvert i32, f32, VReg_32; def : BitConvert f32, i32, SReg_32; def : BitConvert f32, i32, VReg_32; +def : BitConvert i64, f64, VReg_64; + +def : BitConvert f64, i64, VReg_64; + /** === **/ /** Src Dst modifiers **/ /** === **/ @@ -1505,6 +1526,11 @@ def : Pat
[Mesa-dev] R600/SI: Initial double precision support for Radeon SI
Hi, the attached patches add initial support for double precision operations on Southern Islands cards. Some expressions containing multiple double precision kernel arguments cause llvm to run until all memory is used - but I do not (yet) know why. It works fine as long as I pass pointers to double values. Regards, OleFrom 4224b314cf2d97cdf2ac99564d6155fa04fbb971 Mon Sep 17 00:00:00 2001 From: Niels Ole Salscheider niels_...@salscheider-online.de Date: Sat, 1 Jun 2013 16:48:56 +0200 Subject: [PATCH 1/6] R600/SI: Add initial double precision support for SI --- lib/Target/R600/AMDGPUISelLowering.cpp | 6 ++ lib/Target/R600/SIISelLowering.cpp | 1 + lib/Target/R600/SIInstructions.td | 30 +- test/CodeGen/R600/fadd64.ll| 13 + test/CodeGen/R600/fdiv64.ll| 14 ++ test/CodeGen/R600/fmul64.ll| 13 + test/CodeGen/R600/load64.ll| 20 7 Dateien geändert, 96 Zeilen hinzugefügt(+), 1 Zeile entfernt(-) create mode 100644 test/CodeGen/R600/fadd64.ll create mode 100644 test/CodeGen/R600/fdiv64.ll create mode 100644 test/CodeGen/R600/fmul64.ll create mode 100644 test/CodeGen/R600/load64.ll diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 4019a1f..5f3d496 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -60,12 +60,18 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine TM) : setOperationAction(ISD::STORE, MVT::v4f32, Promote); AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32); + setOperationAction(ISD::STORE, MVT::f64, Promote); + AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64); + setOperationAction(ISD::LOAD, MVT::f32, Promote); AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32); setOperationAction(ISD::LOAD, MVT::v4f32, Promote); AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32); + setOperationAction(ISD::LOAD, MVT::f64, Promote); + AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64); + setOperationAction(ISD::MUL, MVT::i64, Expand); setOperationAction(ISD::UDIV, MVT::i32, Expand); diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 9d4cfef..0d17a12 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -45,6 +45,7 @@ SITargetLowering::SITargetLowering(TargetMachine TM) : addRegisterClass(MVT::v2i32, AMDGPU::VReg_64RegClass); addRegisterClass(MVT::v2f32, AMDGPU::VReg_64RegClass); + addRegisterClass(MVT::f64, AMDGPU::VReg_64RegClass); addRegisterClass(MVT::v4i32, AMDGPU::VReg_128RegClass); addRegisterClass(MVT::v4f32, AMDGPU::VReg_128RegClass); diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 9c96c08..b956387 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -660,7 +660,9 @@ defm V_RSQ_LEGACY_F32 : VOP1_32 [(set f32:$dst, (int_AMDGPU_rsq f32:$src0))] ; defm V_RSQ_F32 : VOP1_32 0x002e, V_RSQ_F32, []; -defm V_RCP_F64 : VOP1_64 0x002f, V_RCP_F64, []; +defm V_RCP_F64 : VOP1_64 0x002f, V_RCP_F64, + [(set f64:$dst, (fdiv FP_ONE, f64:$src0))] +; defm V_RCP_CLAMP_F64 : VOP1_64 0x0030, V_RCP_CLAMP_F64, []; defm V_RSQ_F64 : VOP1_64 0x0031, V_RSQ_F64, []; defm V_RSQ_CLAMP_F64 : VOP1_64 0x0032, V_RSQ_CLAMP_F64, []; @@ -996,10 +998,25 @@ def V_LSHR_B64 : VOP3_64_Shift 0x0162, V_LSHR_B64, ; def V_ASHR_I64 : VOP3_64_Shift 0x0163, V_ASHR_I64, []; +let isCommutable = 1 in { + def V_ADD_F64 : VOP3_64 0x0164, V_ADD_F64, []; def V_MUL_F64 : VOP3_64 0x0165, V_MUL_F64, []; def V_MIN_F64 : VOP3_64 0x0166, V_MIN_F64, []; def V_MAX_F64 : VOP3_64 0x0167, V_MAX_F64, []; + +} // isCommutable = 1 + +def : Pat + (fadd f64:$src0, f64:$src1), + (V_ADD_F64 $src0, $src1, (i64 0)) +; + +def : Pat + (fmul f64:$src0, f64:$src1), + (V_MUL_F64 $src0, $src1, (i64 0)) +; + def V_LDEXP_F64 : VOP3_64 0x0168, V_LDEXP_F64, []; let isCommutable = 1 in { @@ -1417,6 +1434,10 @@ def : BitConvert i32, f32, VReg_32; def : BitConvert f32, i32, SReg_32; def : BitConvert f32, i32, VReg_32; +def : BitConvert i64, f64, VReg_64; + +def : BitConvert f64, i64, VReg_64; + /** === **/ /** Src Dst modifiers **/ /** === **/ @@ -1505,6 +1526,11 @@ def : Pat (V_MUL_F32_e32 $src0, (V_RCP_F32_e32 $src1)) ; +def : Pat + (fdiv f64:$src0, f64:$src1), + (V_MUL_F64 $src0, (V_RCP_F64_e32 $src1), (i64 0)) +; + def : Pat (fcos f32:$src0), (V_COS_F32_e32 (V_MUL_F32_e32 $src0, (V_MOV_B32_e32 CONST.TWO_PI_INV))) @@ -1634,6 +1660,8 @@ multiclass MUBUFLoad_Pattern MUBUF Instr_ADDR64, ValueType vt, ; } +defm : MUBUFLoad_Pattern BUFFER_LOAD_DWORDX2_ADDR64, i64, + global_load, constant_load; defm : MUBUFLoad_Pattern