[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for memory atomic fadd f64 (PR #96444)
arsenm wrote: ### Merge activity * **Jul 10, 8:37 AM EDT**: @arsenm started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/96444). https://github.com/llvm/llvm-project/pull/96444 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for memory atomic fadd f64 (PR #96444)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96444 >From 4590c05051bbf57f0b269b2561aa1a7c74f06fbc Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 23 Jun 2024 17:07:53 +0200 Subject: [PATCH] AMDGPU: Add subtarget feature for memory atomic fadd f64 --- llvm/lib/Target/AMDGPU/AMDGPU.td | 21 ++--- llvm/lib/Target/AMDGPU/BUFInstructions.td | 10 ++ llvm/lib/Target/AMDGPU/FLATInstructions.td | 6 +++--- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 10 +++--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 +- 5 files changed, 31 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index bea233bfb27bd..94e8e77b3c052 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst "Has flat_atomic_add_f32 instruction" >; +def FeatureFlatBufferGlobalAtomicFaddF64Inst + : SubtargetFeature<"flat-buffer-global-fadd-f64-inst", + "HasFlatBufferGlobalAtomicFaddF64Inst", + "true", + "Has flat, buffer, and global instructions for f64 atomic fadd" +>; + def FeatureMemoryAtomicFAddF32DenormalSupport : SubtargetFeature<"memory-atomic-fadd-f32-denormal-support", "HasMemoryAtomicFaddF32DenormalSupport", @@ -1390,7 +1397,8 @@ def FeatureISAVersion9_0_A : FeatureSet< FeatureBackOffBarrier, FeatureKernargPreload, FeatureAtomicFMinFMaxF64GlobalInsts, - FeatureAtomicFMinFMaxF64FlatInsts + FeatureAtomicFMinFMaxF64FlatInsts, + FeatureFlatBufferGlobalAtomicFaddF64Inst ])>; def FeatureISAVersion9_0_C : FeatureSet< @@ -1435,7 +1443,8 @@ def FeatureISAVersion9_4_Common : FeatureSet< FeatureAtomicFMinFMaxF64GlobalInsts, FeatureAtomicFMinFMaxF64FlatInsts, FeatureAgentScopeFineGrainedRemoteMemoryAtomics, - FeatureMemoryAtomicFAddF32DenormalSupport + FeatureMemoryAtomicFAddF32DenormalSupport, + FeatureFlatBufferGlobalAtomicFaddF64Inst ]>; def FeatureISAVersion9_4_0 : FeatureSet< @@ -1932,11 +1941,9 @@ def isGFX12Plus : def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">, AssemblerPredicate<(all_of FeatureFlatAddressSpace)>; - -def HasBufferFlatGlobalAtomicsF64 : // FIXME: Rename to show it's only for fadd - Predicate<"Subtarget->hasBufferFlatGlobalAtomicsF64()">, - // FIXME: This is too coarse, and working around using pseudo's predicates on real instruction. - AssemblerPredicate<(any_of FeatureGFX90AInsts, FeatureGFX10Insts, FeatureSouthernIslands, FeatureSeaIslands)>; +def HasFlatBufferGlobalAtomicFaddF64Inst : + Predicate<"Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst()">, + AssemblerPredicate<(any_of FeatureFlatBufferGlobalAtomicFaddF64Inst)>; def HasAtomicFMinFMaxF32GlobalInsts : Predicate<"Subtarget->hasAtomicFMinFMaxF32GlobalInsts()">, diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 3b8d94b744000..a904c8483dbf5 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1312,14 +1312,16 @@ let SubtargetPredicate = isGFX90APlus in { } } // End SubtargetPredicate = isGFX90APlus -let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in { +let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in { defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64>; +} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst +let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in { // Note the names can be buffer_atomic_fmin_x2/buffer_atomic_fmax_x2 // depending on some subtargets. defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_min_f64", VReg_64, f64>; defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64>; -} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 +} def BUFFER_INV : MUBUF_Invalidate<"buffer_inv"> { let SubtargetPredicate = isGFX940Plus; @@ -1836,9 +1838,9 @@ let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["ret"]>; } // End SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts -let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in { +let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f64, "BUFFER_ATOMIC_ADD_F64">; -} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 +} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">; diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 4bf8f20269a15..16dc019ede810 100644 ---
[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for memory atomic fadd f64 (PR #96444)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96444 >From 308e31175185edc0d1aba78653b137c6a6f53a0e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 23 Jun 2024 17:07:53 +0200 Subject: [PATCH] AMDGPU: Add subtarget feature for memory atomic fadd f64 --- llvm/lib/Target/AMDGPU/AMDGPU.td | 21 ++--- llvm/lib/Target/AMDGPU/BUFInstructions.td | 10 ++ llvm/lib/Target/AMDGPU/FLATInstructions.td | 6 +++--- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 10 +++--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 +- 5 files changed, 31 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index bea233bfb27bd..94e8e77b3c052 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst "Has flat_atomic_add_f32 instruction" >; +def FeatureFlatBufferGlobalAtomicFaddF64Inst + : SubtargetFeature<"flat-buffer-global-fadd-f64-inst", + "HasFlatBufferGlobalAtomicFaddF64Inst", + "true", + "Has flat, buffer, and global instructions for f64 atomic fadd" +>; + def FeatureMemoryAtomicFAddF32DenormalSupport : SubtargetFeature<"memory-atomic-fadd-f32-denormal-support", "HasMemoryAtomicFaddF32DenormalSupport", @@ -1390,7 +1397,8 @@ def FeatureISAVersion9_0_A : FeatureSet< FeatureBackOffBarrier, FeatureKernargPreload, FeatureAtomicFMinFMaxF64GlobalInsts, - FeatureAtomicFMinFMaxF64FlatInsts + FeatureAtomicFMinFMaxF64FlatInsts, + FeatureFlatBufferGlobalAtomicFaddF64Inst ])>; def FeatureISAVersion9_0_C : FeatureSet< @@ -1435,7 +1443,8 @@ def FeatureISAVersion9_4_Common : FeatureSet< FeatureAtomicFMinFMaxF64GlobalInsts, FeatureAtomicFMinFMaxF64FlatInsts, FeatureAgentScopeFineGrainedRemoteMemoryAtomics, - FeatureMemoryAtomicFAddF32DenormalSupport + FeatureMemoryAtomicFAddF32DenormalSupport, + FeatureFlatBufferGlobalAtomicFaddF64Inst ]>; def FeatureISAVersion9_4_0 : FeatureSet< @@ -1932,11 +1941,9 @@ def isGFX12Plus : def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">, AssemblerPredicate<(all_of FeatureFlatAddressSpace)>; - -def HasBufferFlatGlobalAtomicsF64 : // FIXME: Rename to show it's only for fadd - Predicate<"Subtarget->hasBufferFlatGlobalAtomicsF64()">, - // FIXME: This is too coarse, and working around using pseudo's predicates on real instruction. - AssemblerPredicate<(any_of FeatureGFX90AInsts, FeatureGFX10Insts, FeatureSouthernIslands, FeatureSeaIslands)>; +def HasFlatBufferGlobalAtomicFaddF64Inst : + Predicate<"Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst()">, + AssemblerPredicate<(any_of FeatureFlatBufferGlobalAtomicFaddF64Inst)>; def HasAtomicFMinFMaxF32GlobalInsts : Predicate<"Subtarget->hasAtomicFMinFMaxF32GlobalInsts()">, diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 3b8d94b744000..a904c8483dbf5 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1312,14 +1312,16 @@ let SubtargetPredicate = isGFX90APlus in { } } // End SubtargetPredicate = isGFX90APlus -let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in { +let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in { defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64>; +} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst +let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in { // Note the names can be buffer_atomic_fmin_x2/buffer_atomic_fmax_x2 // depending on some subtargets. defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_min_f64", VReg_64, f64>; defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64>; -} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 +} def BUFFER_INV : MUBUF_Invalidate<"buffer_inv"> { let SubtargetPredicate = isGFX940Plus; @@ -1836,9 +1838,9 @@ let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["ret"]>; } // End SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts -let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in { +let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f64, "BUFFER_ATOMIC_ADD_F64">; -} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 +} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">; diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 4bf8f20269a15..16dc019ede810 100644 ---
[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for memory atomic fadd f64 (PR #96444)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96444 >From 5945915a9a9f0caf3ed890ce450a25cff58ef608 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 23 Jun 2024 17:07:53 +0200 Subject: [PATCH] AMDGPU: Add subtarget feature for memory atomic fadd f64 --- llvm/lib/Target/AMDGPU/AMDGPU.td | 21 ++--- llvm/lib/Target/AMDGPU/BUFInstructions.td | 10 ++ llvm/lib/Target/AMDGPU/FLATInstructions.td | 6 +++--- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 10 +++--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 +- 5 files changed, 31 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index bea233bfb27bd..94e8e77b3c052 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst "Has flat_atomic_add_f32 instruction" >; +def FeatureFlatBufferGlobalAtomicFaddF64Inst + : SubtargetFeature<"flat-buffer-global-fadd-f64-inst", + "HasFlatBufferGlobalAtomicFaddF64Inst", + "true", + "Has flat, buffer, and global instructions for f64 atomic fadd" +>; + def FeatureMemoryAtomicFAddF32DenormalSupport : SubtargetFeature<"memory-atomic-fadd-f32-denormal-support", "HasMemoryAtomicFaddF32DenormalSupport", @@ -1390,7 +1397,8 @@ def FeatureISAVersion9_0_A : FeatureSet< FeatureBackOffBarrier, FeatureKernargPreload, FeatureAtomicFMinFMaxF64GlobalInsts, - FeatureAtomicFMinFMaxF64FlatInsts + FeatureAtomicFMinFMaxF64FlatInsts, + FeatureFlatBufferGlobalAtomicFaddF64Inst ])>; def FeatureISAVersion9_0_C : FeatureSet< @@ -1435,7 +1443,8 @@ def FeatureISAVersion9_4_Common : FeatureSet< FeatureAtomicFMinFMaxF64GlobalInsts, FeatureAtomicFMinFMaxF64FlatInsts, FeatureAgentScopeFineGrainedRemoteMemoryAtomics, - FeatureMemoryAtomicFAddF32DenormalSupport + FeatureMemoryAtomicFAddF32DenormalSupport, + FeatureFlatBufferGlobalAtomicFaddF64Inst ]>; def FeatureISAVersion9_4_0 : FeatureSet< @@ -1932,11 +1941,9 @@ def isGFX12Plus : def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">, AssemblerPredicate<(all_of FeatureFlatAddressSpace)>; - -def HasBufferFlatGlobalAtomicsF64 : // FIXME: Rename to show it's only for fadd - Predicate<"Subtarget->hasBufferFlatGlobalAtomicsF64()">, - // FIXME: This is too coarse, and working around using pseudo's predicates on real instruction. - AssemblerPredicate<(any_of FeatureGFX90AInsts, FeatureGFX10Insts, FeatureSouthernIslands, FeatureSeaIslands)>; +def HasFlatBufferGlobalAtomicFaddF64Inst : + Predicate<"Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst()">, + AssemblerPredicate<(any_of FeatureFlatBufferGlobalAtomicFaddF64Inst)>; def HasAtomicFMinFMaxF32GlobalInsts : Predicate<"Subtarget->hasAtomicFMinFMaxF32GlobalInsts()">, diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 3b8d94b744000..a904c8483dbf5 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1312,14 +1312,16 @@ let SubtargetPredicate = isGFX90APlus in { } } // End SubtargetPredicate = isGFX90APlus -let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in { +let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in { defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64>; +} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst +let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in { // Note the names can be buffer_atomic_fmin_x2/buffer_atomic_fmax_x2 // depending on some subtargets. defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_min_f64", VReg_64, f64>; defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64>; -} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 +} def BUFFER_INV : MUBUF_Invalidate<"buffer_inv"> { let SubtargetPredicate = isGFX940Plus; @@ -1836,9 +1838,9 @@ let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["ret"]>; } // End SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts -let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in { +let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f64, "BUFFER_ATOMIC_ADD_F64">; -} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 +} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">; diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 4bf8f20269a15..16dc019ede810 100644 ---
[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for memory atomic fadd f64 (PR #96444)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96444 >From ece323988a3a47cfa5a1332620100d77a9cbd56a Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 23 Jun 2024 17:07:53 +0200 Subject: [PATCH] AMDGPU: Add subtarget feature for memory atomic fadd f64 --- llvm/lib/Target/AMDGPU/AMDGPU.td | 21 ++--- llvm/lib/Target/AMDGPU/BUFInstructions.td | 10 ++ llvm/lib/Target/AMDGPU/FLATInstructions.td | 6 +++--- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 10 +++--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 +- 5 files changed, 31 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index bea233bfb27bd..94e8e77b3c052 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst "Has flat_atomic_add_f32 instruction" >; +def FeatureFlatBufferGlobalAtomicFaddF64Inst + : SubtargetFeature<"flat-buffer-global-fadd-f64-inst", + "HasFlatBufferGlobalAtomicFaddF64Inst", + "true", + "Has flat, buffer, and global instructions for f64 atomic fadd" +>; + def FeatureMemoryAtomicFAddF32DenormalSupport : SubtargetFeature<"memory-atomic-fadd-f32-denormal-support", "HasMemoryAtomicFaddF32DenormalSupport", @@ -1390,7 +1397,8 @@ def FeatureISAVersion9_0_A : FeatureSet< FeatureBackOffBarrier, FeatureKernargPreload, FeatureAtomicFMinFMaxF64GlobalInsts, - FeatureAtomicFMinFMaxF64FlatInsts + FeatureAtomicFMinFMaxF64FlatInsts, + FeatureFlatBufferGlobalAtomicFaddF64Inst ])>; def FeatureISAVersion9_0_C : FeatureSet< @@ -1435,7 +1443,8 @@ def FeatureISAVersion9_4_Common : FeatureSet< FeatureAtomicFMinFMaxF64GlobalInsts, FeatureAtomicFMinFMaxF64FlatInsts, FeatureAgentScopeFineGrainedRemoteMemoryAtomics, - FeatureMemoryAtomicFAddF32DenormalSupport + FeatureMemoryAtomicFAddF32DenormalSupport, + FeatureFlatBufferGlobalAtomicFaddF64Inst ]>; def FeatureISAVersion9_4_0 : FeatureSet< @@ -1932,11 +1941,9 @@ def isGFX12Plus : def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">, AssemblerPredicate<(all_of FeatureFlatAddressSpace)>; - -def HasBufferFlatGlobalAtomicsF64 : // FIXME: Rename to show it's only for fadd - Predicate<"Subtarget->hasBufferFlatGlobalAtomicsF64()">, - // FIXME: This is too coarse, and working around using pseudo's predicates on real instruction. - AssemblerPredicate<(any_of FeatureGFX90AInsts, FeatureGFX10Insts, FeatureSouthernIslands, FeatureSeaIslands)>; +def HasFlatBufferGlobalAtomicFaddF64Inst : + Predicate<"Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst()">, + AssemblerPredicate<(any_of FeatureFlatBufferGlobalAtomicFaddF64Inst)>; def HasAtomicFMinFMaxF32GlobalInsts : Predicate<"Subtarget->hasAtomicFMinFMaxF32GlobalInsts()">, diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 3b8d94b744000..a904c8483dbf5 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1312,14 +1312,16 @@ let SubtargetPredicate = isGFX90APlus in { } } // End SubtargetPredicate = isGFX90APlus -let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in { +let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in { defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64>; +} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst +let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in { // Note the names can be buffer_atomic_fmin_x2/buffer_atomic_fmax_x2 // depending on some subtargets. defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_min_f64", VReg_64, f64>; defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64>; -} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 +} def BUFFER_INV : MUBUF_Invalidate<"buffer_inv"> { let SubtargetPredicate = isGFX940Plus; @@ -1836,9 +1838,9 @@ let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["ret"]>; } // End SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts -let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in { +let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f64, "BUFFER_ATOMIC_ADD_F64">; -} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 +} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">; diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 4bf8f20269a15..16dc019ede810 100644 ---
[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for memory atomic fadd f64 (PR #96444)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96444 >From 20d2b3f20bfd4a9a919ae6281d436b070d87c289 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 23 Jun 2024 17:07:53 +0200 Subject: [PATCH] AMDGPU: Add subtarget feature for memory atomic fadd f64 --- llvm/lib/Target/AMDGPU/AMDGPU.td | 21 ++--- llvm/lib/Target/AMDGPU/BUFInstructions.td | 10 ++ llvm/lib/Target/AMDGPU/FLATInstructions.td | 6 +++--- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 10 +++--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 +- 5 files changed, 31 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 34c6f6ff19bff..84ea040477763 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst "Has flat_atomic_add_f32 instruction" >; +def FeatureFlatBufferGlobalAtomicFaddF64Inst + : SubtargetFeature<"flat-buffer-global-fadd-f64-inst", + "HasFlatBufferGlobalAtomicFaddF64Inst", + "true", + "Has flat, buffer, and global instructions for f64 atomic fadd" +>; + def FeatureMemoryAtomicFAddF32DenormalSupport : SubtargetFeature<"memory-atomic-fadd-f32-denormal-support", "HasMemoryAtomicFaddF32DenormalSupport", @@ -1390,7 +1397,8 @@ def FeatureISAVersion9_0_A : FeatureSet< FeatureBackOffBarrier, FeatureKernargPreload, FeatureAtomicFMinFMaxF64GlobalInsts, - FeatureAtomicFMinFMaxF64FlatInsts + FeatureAtomicFMinFMaxF64FlatInsts, + FeatureFlatBufferGlobalAtomicFaddF64Inst ])>; def FeatureISAVersion9_0_C : FeatureSet< @@ -1435,7 +1443,8 @@ def FeatureISAVersion9_4_Common : FeatureSet< FeatureAtomicFMinFMaxF64GlobalInsts, FeatureAtomicFMinFMaxF64FlatInsts, FeatureAgentScopeFineGrainedRemoteMemoryAtomics, - FeatureMemoryAtomicFAddF32DenormalSupport + FeatureMemoryAtomicFAddF32DenormalSupport, + FeatureFlatBufferGlobalAtomicFaddF64Inst ]>; def FeatureISAVersion9_4_0 : FeatureSet< @@ -1932,11 +1941,9 @@ def isGFX12Plus : def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">, AssemblerPredicate<(all_of FeatureFlatAddressSpace)>; - -def HasBufferFlatGlobalAtomicsF64 : // FIXME: Rename to show it's only for fadd - Predicate<"Subtarget->hasBufferFlatGlobalAtomicsF64()">, - // FIXME: This is too coarse, and working around using pseudo's predicates on real instruction. - AssemblerPredicate<(any_of FeatureGFX90AInsts, FeatureGFX10Insts, FeatureSouthernIslands, FeatureSeaIslands)>; +def HasFlatBufferGlobalAtomicFaddF64Inst : + Predicate<"Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst()">, + AssemblerPredicate<(any_of FeatureFlatBufferGlobalAtomicFaddF64Inst)>; def HasAtomicFMinFMaxF32GlobalInsts : Predicate<"Subtarget->hasAtomicFMinFMaxF32GlobalInsts()">, diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 3b8d94b744000..a904c8483dbf5 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1312,14 +1312,16 @@ let SubtargetPredicate = isGFX90APlus in { } } // End SubtargetPredicate = isGFX90APlus -let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in { +let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in { defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64>; +} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst +let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in { // Note the names can be buffer_atomic_fmin_x2/buffer_atomic_fmax_x2 // depending on some subtargets. defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_min_f64", VReg_64, f64>; defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64>; -} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 +} def BUFFER_INV : MUBUF_Invalidate<"buffer_inv"> { let SubtargetPredicate = isGFX940Plus; @@ -1836,9 +1838,9 @@ let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["ret"]>; } // End SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts -let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in { +let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f64, "BUFFER_ATOMIC_ADD_F64">; -} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 +} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">; diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 4bf8f20269a15..16dc019ede810 100644 ---
[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for memory atomic fadd f64 (PR #96444)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96444 >From 234b772ad9a5f5a430da538474edcc968233f2ad Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 23 Jun 2024 17:07:53 +0200 Subject: [PATCH] AMDGPU: Add subtarget feature for memory atomic fadd f64 --- llvm/lib/Target/AMDGPU/AMDGPU.td | 21 ++--- llvm/lib/Target/AMDGPU/BUFInstructions.td | 10 ++ llvm/lib/Target/AMDGPU/FLATInstructions.td | 6 +++--- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 10 +++--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 +- 5 files changed, 31 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 34c6f6ff19bff..84ea040477763 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst "Has flat_atomic_add_f32 instruction" >; +def FeatureFlatBufferGlobalAtomicFaddF64Inst + : SubtargetFeature<"flat-buffer-global-fadd-f64-inst", + "HasFlatBufferGlobalAtomicFaddF64Inst", + "true", + "Has flat, buffer, and global instructions for f64 atomic fadd" +>; + def FeatureMemoryAtomicFAddF32DenormalSupport : SubtargetFeature<"memory-atomic-fadd-f32-denormal-support", "HasMemoryAtomicFaddF32DenormalSupport", @@ -1390,7 +1397,8 @@ def FeatureISAVersion9_0_A : FeatureSet< FeatureBackOffBarrier, FeatureKernargPreload, FeatureAtomicFMinFMaxF64GlobalInsts, - FeatureAtomicFMinFMaxF64FlatInsts + FeatureAtomicFMinFMaxF64FlatInsts, + FeatureFlatBufferGlobalAtomicFaddF64Inst ])>; def FeatureISAVersion9_0_C : FeatureSet< @@ -1435,7 +1443,8 @@ def FeatureISAVersion9_4_Common : FeatureSet< FeatureAtomicFMinFMaxF64GlobalInsts, FeatureAtomicFMinFMaxF64FlatInsts, FeatureAgentScopeFineGrainedRemoteMemoryAtomics, - FeatureMemoryAtomicFAddF32DenormalSupport + FeatureMemoryAtomicFAddF32DenormalSupport, + FeatureFlatBufferGlobalAtomicFaddF64Inst ]>; def FeatureISAVersion9_4_0 : FeatureSet< @@ -1932,11 +1941,9 @@ def isGFX12Plus : def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">, AssemblerPredicate<(all_of FeatureFlatAddressSpace)>; - -def HasBufferFlatGlobalAtomicsF64 : // FIXME: Rename to show it's only for fadd - Predicate<"Subtarget->hasBufferFlatGlobalAtomicsF64()">, - // FIXME: This is too coarse, and working around using pseudo's predicates on real instruction. - AssemblerPredicate<(any_of FeatureGFX90AInsts, FeatureGFX10Insts, FeatureSouthernIslands, FeatureSeaIslands)>; +def HasFlatBufferGlobalAtomicFaddF64Inst : + Predicate<"Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst()">, + AssemblerPredicate<(any_of FeatureFlatBufferGlobalAtomicFaddF64Inst)>; def HasAtomicFMinFMaxF32GlobalInsts : Predicate<"Subtarget->hasAtomicFMinFMaxF32GlobalInsts()">, diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 3b8d94b744000..a904c8483dbf5 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1312,14 +1312,16 @@ let SubtargetPredicate = isGFX90APlus in { } } // End SubtargetPredicate = isGFX90APlus -let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in { +let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in { defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64>; +} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst +let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in { // Note the names can be buffer_atomic_fmin_x2/buffer_atomic_fmax_x2 // depending on some subtargets. defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_min_f64", VReg_64, f64>; defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64>; -} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 +} def BUFFER_INV : MUBUF_Invalidate<"buffer_inv"> { let SubtargetPredicate = isGFX940Plus; @@ -1836,9 +1838,9 @@ let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["ret"]>; } // End SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts -let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in { +let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f64, "BUFFER_ATOMIC_ADD_F64">; -} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 +} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">; diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 98054dde398b3..89946a4719557 100644 ---
[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for memory atomic fadd f64 (PR #96444)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96444 >From 0381e27b091f0cb6558fb9b4bf3e5359655acab0 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 23 Jun 2024 17:07:53 +0200 Subject: [PATCH] AMDGPU: Add subtarget feature for memory atomic fadd f64 --- llvm/lib/Target/AMDGPU/AMDGPU.td | 21 ++--- llvm/lib/Target/AMDGPU/BUFInstructions.td | 10 ++ llvm/lib/Target/AMDGPU/FLATInstructions.td | 6 +++--- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 10 +++--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 +- 5 files changed, 31 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 34c6f6ff19bff..84ea040477763 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst "Has flat_atomic_add_f32 instruction" >; +def FeatureFlatBufferGlobalAtomicFaddF64Inst + : SubtargetFeature<"flat-buffer-global-fadd-f64-inst", + "HasFlatBufferGlobalAtomicFaddF64Inst", + "true", + "Has flat, buffer, and global instructions for f64 atomic fadd" +>; + def FeatureMemoryAtomicFAddF32DenormalSupport : SubtargetFeature<"memory-atomic-fadd-f32-denormal-support", "HasMemoryAtomicFaddF32DenormalSupport", @@ -1390,7 +1397,8 @@ def FeatureISAVersion9_0_A : FeatureSet< FeatureBackOffBarrier, FeatureKernargPreload, FeatureAtomicFMinFMaxF64GlobalInsts, - FeatureAtomicFMinFMaxF64FlatInsts + FeatureAtomicFMinFMaxF64FlatInsts, + FeatureFlatBufferGlobalAtomicFaddF64Inst ])>; def FeatureISAVersion9_0_C : FeatureSet< @@ -1435,7 +1443,8 @@ def FeatureISAVersion9_4_Common : FeatureSet< FeatureAtomicFMinFMaxF64GlobalInsts, FeatureAtomicFMinFMaxF64FlatInsts, FeatureAgentScopeFineGrainedRemoteMemoryAtomics, - FeatureMemoryAtomicFAddF32DenormalSupport + FeatureMemoryAtomicFAddF32DenormalSupport, + FeatureFlatBufferGlobalAtomicFaddF64Inst ]>; def FeatureISAVersion9_4_0 : FeatureSet< @@ -1932,11 +1941,9 @@ def isGFX12Plus : def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">, AssemblerPredicate<(all_of FeatureFlatAddressSpace)>; - -def HasBufferFlatGlobalAtomicsF64 : // FIXME: Rename to show it's only for fadd - Predicate<"Subtarget->hasBufferFlatGlobalAtomicsF64()">, - // FIXME: This is too coarse, and working around using pseudo's predicates on real instruction. - AssemblerPredicate<(any_of FeatureGFX90AInsts, FeatureGFX10Insts, FeatureSouthernIslands, FeatureSeaIslands)>; +def HasFlatBufferGlobalAtomicFaddF64Inst : + Predicate<"Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst()">, + AssemblerPredicate<(any_of FeatureFlatBufferGlobalAtomicFaddF64Inst)>; def HasAtomicFMinFMaxF32GlobalInsts : Predicate<"Subtarget->hasAtomicFMinFMaxF32GlobalInsts()">, diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 3b8d94b744000..a904c8483dbf5 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1312,14 +1312,16 @@ let SubtargetPredicate = isGFX90APlus in { } } // End SubtargetPredicate = isGFX90APlus -let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in { +let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in { defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64>; +} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst +let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in { // Note the names can be buffer_atomic_fmin_x2/buffer_atomic_fmax_x2 // depending on some subtargets. defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_min_f64", VReg_64, f64>; defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64>; -} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 +} def BUFFER_INV : MUBUF_Invalidate<"buffer_inv"> { let SubtargetPredicate = isGFX940Plus; @@ -1836,9 +1838,9 @@ let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["ret"]>; } // End SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts -let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in { +let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f64, "BUFFER_ATOMIC_ADD_F64">; -} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 +} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">; diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 98054dde398b3..89946a4719557 100644 ---
[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for memory atomic fadd f64 (PR #96444)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96444 >From 36cbbdfaa31c6313c96a9c908bade1e6f7debc5b Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 23 Jun 2024 17:07:53 +0200 Subject: [PATCH] AMDGPU: Add subtarget feature for memory atomic fadd f64 --- llvm/lib/Target/AMDGPU/AMDGPU.td | 21 ++--- llvm/lib/Target/AMDGPU/BUFInstructions.td | 10 ++ llvm/lib/Target/AMDGPU/FLATInstructions.td | 6 +++--- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 10 +++--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 +- 5 files changed, 31 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 34c6f6ff19bff..84ea040477763 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst "Has flat_atomic_add_f32 instruction" >; +def FeatureFlatBufferGlobalAtomicFaddF64Inst + : SubtargetFeature<"flat-buffer-global-fadd-f64-inst", + "HasFlatBufferGlobalAtomicFaddF64Inst", + "true", + "Has flat, buffer, and global instructions for f64 atomic fadd" +>; + def FeatureMemoryAtomicFAddF32DenormalSupport : SubtargetFeature<"memory-atomic-fadd-f32-denormal-support", "HasMemoryAtomicFaddF32DenormalSupport", @@ -1390,7 +1397,8 @@ def FeatureISAVersion9_0_A : FeatureSet< FeatureBackOffBarrier, FeatureKernargPreload, FeatureAtomicFMinFMaxF64GlobalInsts, - FeatureAtomicFMinFMaxF64FlatInsts + FeatureAtomicFMinFMaxF64FlatInsts, + FeatureFlatBufferGlobalAtomicFaddF64Inst ])>; def FeatureISAVersion9_0_C : FeatureSet< @@ -1435,7 +1443,8 @@ def FeatureISAVersion9_4_Common : FeatureSet< FeatureAtomicFMinFMaxF64GlobalInsts, FeatureAtomicFMinFMaxF64FlatInsts, FeatureAgentScopeFineGrainedRemoteMemoryAtomics, - FeatureMemoryAtomicFAddF32DenormalSupport + FeatureMemoryAtomicFAddF32DenormalSupport, + FeatureFlatBufferGlobalAtomicFaddF64Inst ]>; def FeatureISAVersion9_4_0 : FeatureSet< @@ -1932,11 +1941,9 @@ def isGFX12Plus : def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">, AssemblerPredicate<(all_of FeatureFlatAddressSpace)>; - -def HasBufferFlatGlobalAtomicsF64 : // FIXME: Rename to show it's only for fadd - Predicate<"Subtarget->hasBufferFlatGlobalAtomicsF64()">, - // FIXME: This is too coarse, and working around using pseudo's predicates on real instruction. - AssemblerPredicate<(any_of FeatureGFX90AInsts, FeatureGFX10Insts, FeatureSouthernIslands, FeatureSeaIslands)>; +def HasFlatBufferGlobalAtomicFaddF64Inst : + Predicate<"Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst()">, + AssemblerPredicate<(any_of FeatureFlatBufferGlobalAtomicFaddF64Inst)>; def HasAtomicFMinFMaxF32GlobalInsts : Predicate<"Subtarget->hasAtomicFMinFMaxF32GlobalInsts()">, diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 3b8d94b744000..a904c8483dbf5 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1312,14 +1312,16 @@ let SubtargetPredicate = isGFX90APlus in { } } // End SubtargetPredicate = isGFX90APlus -let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in { +let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in { defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64>; +} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst +let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in { // Note the names can be buffer_atomic_fmin_x2/buffer_atomic_fmax_x2 // depending on some subtargets. defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_min_f64", VReg_64, f64>; defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64>; -} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 +} def BUFFER_INV : MUBUF_Invalidate<"buffer_inv"> { let SubtargetPredicate = isGFX940Plus; @@ -1836,9 +1838,9 @@ let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["ret"]>; } // End SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts -let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in { +let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f64, "BUFFER_ATOMIC_ADD_F64">; -} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 +} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">; diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 98054dde398b3..89946a4719557 100644 ---
[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for memory atomic fadd f64 (PR #96444)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96444 >From db519863301bd95fe0d50b56d74584b0f7f2fbf6 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 23 Jun 2024 17:07:53 +0200 Subject: [PATCH] AMDGPU: Add subtarget feature for memory atomic fadd f64 --- llvm/lib/Target/AMDGPU/AMDGPU.td | 21 ++--- llvm/lib/Target/AMDGPU/BUFInstructions.td | 10 ++ llvm/lib/Target/AMDGPU/FLATInstructions.td | 6 +++--- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 10 +++--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 +- 5 files changed, 31 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 34c6f6ff19bff..84ea040477763 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst "Has flat_atomic_add_f32 instruction" >; +def FeatureFlatBufferGlobalAtomicFaddF64Inst + : SubtargetFeature<"flat-buffer-global-fadd-f64-inst", + "HasFlatBufferGlobalAtomicFaddF64Inst", + "true", + "Has flat, buffer, and global instructions for f64 atomic fadd" +>; + def FeatureMemoryAtomicFAddF32DenormalSupport : SubtargetFeature<"memory-atomic-fadd-f32-denormal-support", "HasMemoryAtomicFaddF32DenormalSupport", @@ -1390,7 +1397,8 @@ def FeatureISAVersion9_0_A : FeatureSet< FeatureBackOffBarrier, FeatureKernargPreload, FeatureAtomicFMinFMaxF64GlobalInsts, - FeatureAtomicFMinFMaxF64FlatInsts + FeatureAtomicFMinFMaxF64FlatInsts, + FeatureFlatBufferGlobalAtomicFaddF64Inst ])>; def FeatureISAVersion9_0_C : FeatureSet< @@ -1435,7 +1443,8 @@ def FeatureISAVersion9_4_Common : FeatureSet< FeatureAtomicFMinFMaxF64GlobalInsts, FeatureAtomicFMinFMaxF64FlatInsts, FeatureAgentScopeFineGrainedRemoteMemoryAtomics, - FeatureMemoryAtomicFAddF32DenormalSupport + FeatureMemoryAtomicFAddF32DenormalSupport, + FeatureFlatBufferGlobalAtomicFaddF64Inst ]>; def FeatureISAVersion9_4_0 : FeatureSet< @@ -1932,11 +1941,9 @@ def isGFX12Plus : def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">, AssemblerPredicate<(all_of FeatureFlatAddressSpace)>; - -def HasBufferFlatGlobalAtomicsF64 : // FIXME: Rename to show it's only for fadd - Predicate<"Subtarget->hasBufferFlatGlobalAtomicsF64()">, - // FIXME: This is too coarse, and working around using pseudo's predicates on real instruction. - AssemblerPredicate<(any_of FeatureGFX90AInsts, FeatureGFX10Insts, FeatureSouthernIslands, FeatureSeaIslands)>; +def HasFlatBufferGlobalAtomicFaddF64Inst : + Predicate<"Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst()">, + AssemblerPredicate<(any_of FeatureFlatBufferGlobalAtomicFaddF64Inst)>; def HasAtomicFMinFMaxF32GlobalInsts : Predicate<"Subtarget->hasAtomicFMinFMaxF32GlobalInsts()">, diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 3b8d94b744000..a904c8483dbf5 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1312,14 +1312,16 @@ let SubtargetPredicate = isGFX90APlus in { } } // End SubtargetPredicate = isGFX90APlus -let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in { +let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in { defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64>; +} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst +let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in { // Note the names can be buffer_atomic_fmin_x2/buffer_atomic_fmax_x2 // depending on some subtargets. defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_min_f64", VReg_64, f64>; defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64>; -} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 +} def BUFFER_INV : MUBUF_Invalidate<"buffer_inv"> { let SubtargetPredicate = isGFX940Plus; @@ -1836,9 +1838,9 @@ let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["ret"]>; } // End SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts -let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in { +let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f64, "BUFFER_ATOMIC_ADD_F64">; -} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 +} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">; diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 98054dde398b3..89946a4719557 100644 ---
[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for memory atomic fadd f64 (PR #96444)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96444 >From 7648917e1a8f14940f31add840201d3413abbbdb Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 23 Jun 2024 17:07:53 +0200 Subject: [PATCH] AMDGPU: Add subtarget feature for memory atomic fadd f64 --- llvm/lib/Target/AMDGPU/AMDGPU.td | 21 ++--- llvm/lib/Target/AMDGPU/BUFInstructions.td | 10 ++ llvm/lib/Target/AMDGPU/FLATInstructions.td | 6 +++--- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 10 +++--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 +- 5 files changed, 31 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 39a1d629a4aea..97dd45df91da2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst "Has flat_atomic_add_f32 instruction" >; +def FeatureFlatBufferGlobalAtomicFaddF64Inst + : SubtargetFeature<"flat-buffer-global-fadd-f64-inst", + "HasFlatBufferGlobalAtomicFaddF64Inst", + "true", + "Has flat, buffer, and global instructions for f64 atomic fadd" +>; + def FeatureMemoryAtomicFaddF32DenormalSupport : SubtargetFeature<"memory-atomic-fadd-f32-denormal-support", "HasAtomicMemoryAtomicFaddF32DenormalSupport", @@ -1390,7 +1397,8 @@ def FeatureISAVersion9_0_A : FeatureSet< FeatureBackOffBarrier, FeatureKernargPreload, FeatureAtomicFMinFMaxF64GlobalInsts, - FeatureAtomicFMinFMaxF64FlatInsts + FeatureAtomicFMinFMaxF64FlatInsts, + FeatureFlatBufferGlobalAtomicFaddF64Inst ])>; def FeatureISAVersion9_0_C : FeatureSet< @@ -1435,7 +1443,8 @@ def FeatureISAVersion9_4_Common : FeatureSet< FeatureAtomicFMinFMaxF64GlobalInsts, FeatureAtomicFMinFMaxF64FlatInsts, FeatureAgentScopeFineGrainedRemoteMemoryAtomics, - FeatureMemoryAtomicFaddF32DenormalSupport + FeatureMemoryAtomicFaddF32DenormalSupport, + FeatureFlatBufferGlobalAtomicFaddF64Inst ]>; def FeatureISAVersion9_4_0 : FeatureSet< @@ -1932,11 +1941,9 @@ def isGFX12Plus : def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">, AssemblerPredicate<(all_of FeatureFlatAddressSpace)>; - -def HasBufferFlatGlobalAtomicsF64 : // FIXME: Rename to show it's only for fadd - Predicate<"Subtarget->hasBufferFlatGlobalAtomicsF64()">, - // FIXME: This is too coarse, and working around using pseudo's predicates on real instruction. - AssemblerPredicate<(any_of FeatureGFX90AInsts, FeatureGFX10Insts, FeatureSouthernIslands, FeatureSeaIslands)>; +def HasFlatBufferGlobalAtomicFaddF64Inst : + Predicate<"Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst()">, + AssemblerPredicate<(any_of FeatureFlatBufferGlobalAtomicFaddF64Inst)>; def HasAtomicFMinFMaxF32GlobalInsts : Predicate<"Subtarget->hasAtomicFMinFMaxF32GlobalInsts()">, diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 3b8d94b744000..a904c8483dbf5 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1312,14 +1312,16 @@ let SubtargetPredicate = isGFX90APlus in { } } // End SubtargetPredicate = isGFX90APlus -let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in { +let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in { defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64>; +} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst +let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in { // Note the names can be buffer_atomic_fmin_x2/buffer_atomic_fmax_x2 // depending on some subtargets. defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_min_f64", VReg_64, f64>; defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64>; -} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 +} def BUFFER_INV : MUBUF_Invalidate<"buffer_inv"> { let SubtargetPredicate = isGFX940Plus; @@ -1836,9 +1838,9 @@ let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["ret"]>; } // End SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts -let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in { +let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f64, "BUFFER_ATOMIC_ADD_F64">; -} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 +} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">; diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 98054dde398b3..89946a4719557 100644 ---
[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for memory atomic fadd f64 (PR #96444)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96444 >From e948fe9cd65450f109ff02a7f1a033afeb04 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 23 Jun 2024 17:07:53 +0200 Subject: [PATCH] AMDGPU: Add subtarget feature for memory atomic fadd f64 --- llvm/lib/Target/AMDGPU/AMDGPU.td | 21 ++--- llvm/lib/Target/AMDGPU/BUFInstructions.td | 10 ++ llvm/lib/Target/AMDGPU/FLATInstructions.td | 6 +++--- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 10 +++--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 +- 5 files changed, 31 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 3d0d18e59a8c3..c6d2645e48b2b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst "Has flat_atomic_add_f32 instruction" >; +def FeatureFlatBufferGlobalAtomicFaddF64Inst + : SubtargetFeature<"flat-buffer-global-fadd-f64-inst", + "HasFlatBufferGlobalAtomicFaddF64Inst", + "true", + "Has flat, buffer, and global instructions for f64 atomic fadd" +>; + def FeatureMemoryAtomicFaddF32DenormalSupport : SubtargetFeature<"memory-atomic-fadd-f32-denormal-support", "HasAtomicMemoryAtomicFaddF32DenormalSupport", @@ -1390,7 +1397,8 @@ def FeatureISAVersion9_0_A : FeatureSet< FeatureBackOffBarrier, FeatureKernargPreload, FeatureAtomicFMinFMaxF64GlobalInsts, - FeatureAtomicFMinFMaxF64FlatInsts + FeatureAtomicFMinFMaxF64FlatInsts, + FeatureFlatBufferGlobalAtomicFaddF64Inst ])>; def FeatureISAVersion9_0_C : FeatureSet< @@ -1435,7 +1443,8 @@ def FeatureISAVersion9_4_Common : FeatureSet< FeatureAtomicFMinFMaxF64GlobalInsts, FeatureAtomicFMinFMaxF64FlatInsts, FeatureAgentScopeFineGrainedRemoteMemoryAtomics, - FeatureMemoryAtomicFaddF32DenormalSupport + FeatureMemoryAtomicFaddF32DenormalSupport, + FeatureFlatBufferGlobalAtomicFaddF64Inst ]>; def FeatureISAVersion9_4_0 : FeatureSet< @@ -1930,11 +1939,9 @@ def isGFX12Plus : def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">, AssemblerPredicate<(all_of FeatureFlatAddressSpace)>; - -def HasBufferFlatGlobalAtomicsF64 : // FIXME: Rename to show it's only for fadd - Predicate<"Subtarget->hasBufferFlatGlobalAtomicsF64()">, - // FIXME: This is too coarse, and working around using pseudo's predicates on real instruction. - AssemblerPredicate<(any_of FeatureGFX90AInsts, FeatureGFX10Insts, FeatureSouthernIslands, FeatureSeaIslands)>; +def HasFlatBufferGlobalAtomicFaddF64Inst : + Predicate<"Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst()">, + AssemblerPredicate<(any_of FeatureFlatBufferGlobalAtomicFaddF64Inst)>; def HasAtomicFMinFMaxF32GlobalInsts : Predicate<"Subtarget->hasAtomicFMinFMaxF32GlobalInsts()">, diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 3b8d94b744000..a904c8483dbf5 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1312,14 +1312,16 @@ let SubtargetPredicate = isGFX90APlus in { } } // End SubtargetPredicate = isGFX90APlus -let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in { +let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in { defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64>; +} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst +let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in { // Note the names can be buffer_atomic_fmin_x2/buffer_atomic_fmax_x2 // depending on some subtargets. defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_min_f64", VReg_64, f64>; defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64>; -} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 +} def BUFFER_INV : MUBUF_Invalidate<"buffer_inv"> { let SubtargetPredicate = isGFX940Plus; @@ -1836,9 +1838,9 @@ let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["ret"]>; } // End SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts -let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in { +let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f64, "BUFFER_ATOMIC_ADD_F64">; -} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 +} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">; diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 98054dde398b3..89946a4719557 100644 ---
[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for memory atomic fadd f64 (PR #96444)
https://github.com/rampitec approved this pull request. https://github.com/llvm/llvm-project/pull/96444 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for memory atomic fadd f64 (PR #96444)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96444 >From baaf96125e8f913a161f1c13216618a3de128182 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 23 Jun 2024 17:07:53 +0200 Subject: [PATCH] AMDGPU: Add subtarget feature for memory atomic fadd f64 --- llvm/lib/Target/AMDGPU/AMDGPU.td | 21 ++--- llvm/lib/Target/AMDGPU/BUFInstructions.td | 10 ++ llvm/lib/Target/AMDGPU/FLATInstructions.td | 6 +++--- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 10 +++--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 +- 5 files changed, 31 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 0ec65f759bc35..9aaeaf73287d5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst "Has flat_atomic_add_f32 instruction" >; +def FeatureFlatBufferGlobalAtomicFaddF64Inst + : SubtargetFeature<"flat-buffer-global-fadd-f64-inst", + "HasFlatBufferGlobalAtomicFaddF64Inst", + "true", + "Has flat, buffer, and global instructions for f64 atomic fadd" +>; + def FeatureMemoryAtomicFaddF32DenormalSupport : SubtargetFeature<"memory-atomic-fadd-f32-denormal-support", "HasAtomicMemoryAtomicFaddF32DenormalSupport", @@ -1388,7 +1395,8 @@ def FeatureISAVersion9_0_A : FeatureSet< FeatureBackOffBarrier, FeatureKernargPreload, FeatureAtomicFMinFMaxF64GlobalInsts, - FeatureAtomicFMinFMaxF64FlatInsts + FeatureAtomicFMinFMaxF64FlatInsts, + FeatureFlatBufferGlobalAtomicFaddF64Inst ])>; def FeatureISAVersion9_0_C : FeatureSet< @@ -1433,7 +1441,8 @@ def FeatureISAVersion9_4_Common : FeatureSet< FeatureAtomicFMinFMaxF64GlobalInsts, FeatureAtomicFMinFMaxF64FlatInsts, FeatureAgentScopeFineGrainedRemoteMemoryAtomics, - FeatureMemoryAtomicFaddF32DenormalSupport + FeatureMemoryAtomicFaddF32DenormalSupport, + FeatureFlatBufferGlobalAtomicFaddF64Inst ]>; def FeatureISAVersion9_4_0 : FeatureSet< @@ -1928,11 +1937,9 @@ def isGFX12Plus : def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">, AssemblerPredicate<(all_of FeatureFlatAddressSpace)>; - -def HasBufferFlatGlobalAtomicsF64 : // FIXME: Rename to show it's only for fadd - Predicate<"Subtarget->hasBufferFlatGlobalAtomicsF64()">, - // FIXME: This is too coarse, and working around using pseudo's predicates on real instruction. - AssemblerPredicate<(any_of FeatureGFX90AInsts, FeatureGFX10Insts, FeatureSouthernIslands, FeatureSeaIslands)>; +def HasFlatBufferGlobalAtomicFaddF64Inst : + Predicate<"Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst()">, + AssemblerPredicate<(any_of FeatureFlatBufferGlobalAtomicFaddF64Inst)>; def HasAtomicFMinFMaxF32GlobalInsts : Predicate<"Subtarget->hasAtomicFMinFMaxF32GlobalInsts()">, diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 3b8d94b744000..a904c8483dbf5 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1312,14 +1312,16 @@ let SubtargetPredicate = isGFX90APlus in { } } // End SubtargetPredicate = isGFX90APlus -let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in { +let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in { defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64>; +} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst +let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in { // Note the names can be buffer_atomic_fmin_x2/buffer_atomic_fmax_x2 // depending on some subtargets. defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_min_f64", VReg_64, f64>; defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64>; -} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 +} def BUFFER_INV : MUBUF_Invalidate<"buffer_inv"> { let SubtargetPredicate = isGFX940Plus; @@ -1836,9 +1838,9 @@ let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["ret"]>; } // End SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts -let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in { +let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f64, "BUFFER_ATOMIC_ADD_F64">; -} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 +} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">; diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 98054dde398b3..89946a4719557 100644 ---
[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for memory atomic fadd f64 (PR #96444)
rampitec wrote: Use it in a predicate when defining pseudos? https://github.com/llvm/llvm-project/pull/96444 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for memory atomic fadd f64 (PR #96444)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96444 >From 80c3f71f03d3b2ccbcd418d76d417f2a243fdbe4 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 23 Jun 2024 17:07:53 +0200 Subject: [PATCH 1/2] AMDGPU: Add subtarget feature for memory atomic fadd f64 --- llvm/lib/Target/AMDGPU/AMDGPU.td | 10 +- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 7 +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 +- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 0ec65f759bc35..028c54d8d94d2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst "Has flat_atomic_add_f32 instruction" >; +def FeatureFlatBufferGlobalAtomicFaddF64Inst + : SubtargetFeature<"flat-buffer-global-fadd-f64-inst", + "HasFlatBufferGlobalAtomicFaddF64Inst", + "true", + "Has flat, buffer, and global instructions for f64 atomic fadd" +>; + def FeatureMemoryAtomicFaddF32DenormalSupport : SubtargetFeature<"memory-atomic-fadd-f32-denormal-support", "HasAtomicMemoryAtomicFaddF32DenormalSupport", @@ -1388,7 +1395,8 @@ def FeatureISAVersion9_0_A : FeatureSet< FeatureBackOffBarrier, FeatureKernargPreload, FeatureAtomicFMinFMaxF64GlobalInsts, - FeatureAtomicFMinFMaxF64FlatInsts + FeatureAtomicFMinFMaxF64FlatInsts, + FeatureFlatBufferGlobalAtomicFaddF64Inst ])>; def FeatureISAVersion9_0_C : FeatureSet< diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 674d84422538f..922435c5efaa6 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -174,6 +174,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasAtomicGlobalPkAddBF16Inst = false; bool HasAtomicBufferPkAddBF16Inst = false; bool HasFlatAtomicFaddF32Inst = false; + bool HasFlatBufferGlobalAtomicFaddF64Inst = false; bool HasDefaultComponentZero = false; bool HasAgentScopeFineGrainedRemoteMemoryAtomics = false; bool HasDefaultComponentBroadcast = false; @@ -873,6 +874,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; } + /// \return true if the target has flat, global, and buffer atomic fadd for + /// double. + bool hasFlatBufferGlobalAtomicFaddF64Inst() const { +return HasFlatBufferGlobalAtomicFaddF64Inst; + } + /// \return true if the target's flat, global, and buffer atomic fadd for /// float supports denormal handling. bool hasMemoryAtomicFaddF32DenormalSupport() const { diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index eec750e5b8251..6b5ba160d6402 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -16028,7 +16028,7 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { return AtomicExpansionKind::CmpXChg; // global and flat atomic fadd f64: gfx90a, gfx940. -if (Subtarget->hasGFX90AInsts() && Ty->isDoubleTy()) +if (Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst() && Ty->isDoubleTy()) return ReportUnsafeHWInst(AtomicExpansionKind::None); if (AS != AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy()) { >From c1354032fc55234ffddf9136f17f5ee400c01c16 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 24 Jun 2024 15:42:17 +0200 Subject: [PATCH 2/2] Add to gfx940 --- llvm/lib/Target/AMDGPU/AMDGPU.td | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 028c54d8d94d2..3ed68a259ca15 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1441,7 +1441,8 @@ def FeatureISAVersion9_4_Common : FeatureSet< FeatureAtomicFMinFMaxF64GlobalInsts, FeatureAtomicFMinFMaxF64FlatInsts, FeatureAgentScopeFineGrainedRemoteMemoryAtomics, - FeatureMemoryAtomicFaddF32DenormalSupport + FeatureMemoryAtomicFaddF32DenormalSupport, + FeatureFlatBufferGlobalAtomicFaddF64Inst ]>; def FeatureISAVersion9_4_0 : FeatureSet< ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for memory atomic fadd f64 (PR #96444)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/96444.diff 3 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPU.td (+9-1) - (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.h (+7) - (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+1-1) ``diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 5f798b4391704..fe3cd75d81009 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst "Has flat_atomic_add_f32 instruction" >; +def FeatureFlatBufferGlobalAtomicFaddF64Inst + : SubtargetFeature<"flat-buffer-global-fadd-f64-inst", + "HasFlatBufferGlobalAtomicFaddF64Inst", + "true", + "Has flat, buffer, and global instructions for f64 atomic fadd" +>; + def FeatureMemoryAtomicFaddF32DenormalSupport : SubtargetFeature<"memory-atomic-fadd-f32-denormal-support", "HasAtomicMemoryAtomicFaddF32DenormalSupport", @@ -1388,7 +1395,8 @@ def FeatureISAVersion9_0_A : FeatureSet< FeatureBackOffBarrier, FeatureKernargPreload, FeatureAtomicFMinFMaxF64GlobalInsts, - FeatureAtomicFMinFMaxF64FlatInsts + FeatureAtomicFMinFMaxF64FlatInsts, + FeatureFlatBufferGlobalAtomicFaddF64Inst ])>; def FeatureISAVersion9_0_C : FeatureSet< diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 674d84422538f..922435c5efaa6 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -174,6 +174,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasAtomicGlobalPkAddBF16Inst = false; bool HasAtomicBufferPkAddBF16Inst = false; bool HasFlatAtomicFaddF32Inst = false; + bool HasFlatBufferGlobalAtomicFaddF64Inst = false; bool HasDefaultComponentZero = false; bool HasAgentScopeFineGrainedRemoteMemoryAtomics = false; bool HasDefaultComponentBroadcast = false; @@ -873,6 +874,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; } + /// \return true if the target has flat, global, and buffer atomic fadd for + /// double. + bool hasFlatBufferGlobalAtomicFaddF64Inst() const { +return HasFlatBufferGlobalAtomicFaddF64Inst; + } + /// \return true if the target's flat, global, and buffer atomic fadd for /// float supports denormal handling. bool hasMemoryAtomicFaddF32DenormalSupport() const { diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index eec750e5b8251..6b5ba160d6402 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -16028,7 +16028,7 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { return AtomicExpansionKind::CmpXChg; // global and flat atomic fadd f64: gfx90a, gfx940. -if (Subtarget->hasGFX90AInsts() && Ty->isDoubleTy()) +if (Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst() && Ty->isDoubleTy()) return ReportUnsafeHWInst(AtomicExpansionKind::None); if (AS != AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy()) { `` https://github.com/llvm/llvm-project/pull/96444 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for memory atomic fadd f64 (PR #96444)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/96444 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for memory atomic fadd f64 (PR #96444)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/96444?utm_source=stack-comment-downstack-mergeability-warning; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests;>Learn more * **#96444** https://app.graphite.dev/github/pr/llvm/llvm-project/96444?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#96443** https://app.graphite.dev/github/pr/llvm/llvm-project/96443?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#96442** https://app.graphite.dev/github/pr/llvm/llvm-project/96442?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#95930** https://app.graphite.dev/github/pr/llvm/llvm-project/95930?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#95929** https://app.graphite.dev/github/pr/llvm/llvm-project/95929?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment;>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment;>https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/96444 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for memory atomic fadd f64 (PR #96444)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/96444 None >From a663c429ebe7a05754c771d67856a7cdb20cc11d Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 23 Jun 2024 17:07:53 +0200 Subject: [PATCH] AMDGPU: Add subtarget feature for memory atomic fadd f64 --- llvm/lib/Target/AMDGPU/AMDGPU.td | 10 +- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 7 +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 +- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 5f798b4391704..fe3cd75d81009 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst "Has flat_atomic_add_f32 instruction" >; +def FeatureFlatBufferGlobalAtomicFaddF64Inst + : SubtargetFeature<"flat-buffer-global-fadd-f64-inst", + "HasFlatBufferGlobalAtomicFaddF64Inst", + "true", + "Has flat, buffer, and global instructions for f64 atomic fadd" +>; + def FeatureMemoryAtomicFaddF32DenormalSupport : SubtargetFeature<"memory-atomic-fadd-f32-denormal-support", "HasAtomicMemoryAtomicFaddF32DenormalSupport", @@ -1388,7 +1395,8 @@ def FeatureISAVersion9_0_A : FeatureSet< FeatureBackOffBarrier, FeatureKernargPreload, FeatureAtomicFMinFMaxF64GlobalInsts, - FeatureAtomicFMinFMaxF64FlatInsts + FeatureAtomicFMinFMaxF64FlatInsts, + FeatureFlatBufferGlobalAtomicFaddF64Inst ])>; def FeatureISAVersion9_0_C : FeatureSet< diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 674d84422538f..922435c5efaa6 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -174,6 +174,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasAtomicGlobalPkAddBF16Inst = false; bool HasAtomicBufferPkAddBF16Inst = false; bool HasFlatAtomicFaddF32Inst = false; + bool HasFlatBufferGlobalAtomicFaddF64Inst = false; bool HasDefaultComponentZero = false; bool HasAgentScopeFineGrainedRemoteMemoryAtomics = false; bool HasDefaultComponentBroadcast = false; @@ -873,6 +874,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; } + /// \return true if the target has flat, global, and buffer atomic fadd for + /// double. + bool hasFlatBufferGlobalAtomicFaddF64Inst() const { +return HasFlatBufferGlobalAtomicFaddF64Inst; + } + /// \return true if the target's flat, global, and buffer atomic fadd for /// float supports denormal handling. bool hasMemoryAtomicFaddF32DenormalSupport() const { diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index eec750e5b8251..6b5ba160d6402 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -16028,7 +16028,7 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { return AtomicExpansionKind::CmpXChg; // global and flat atomic fadd f64: gfx90a, gfx940. -if (Subtarget->hasGFX90AInsts() && Ty->isDoubleTy()) +if (Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst() && Ty->isDoubleTy()) return ReportUnsafeHWInst(AtomicExpansionKind::None); if (AS != AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy()) { ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits