https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96442
>From 1a441c05eb510f3310604594b2687ddf90e884fe Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Sun, 23 Jun 2024 22:02:36 +0200 Subject: [PATCH] AMDGPU: Add a subtarget feature for fine-grained remote memory support Atomic access to fine-grained remote memory does not work on all subtargets. Add a feature for targets where this is expected to work. --- llvm/lib/Target/AMDGPU/AMDGPU.td | 16 ++++++++++++++-- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 8 ++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 5bc0fe8bba608..4d2faacaa915b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -788,6 +788,16 @@ def FeatureFlatAtomicFaddF32Inst "Has flat_atomic_add_f32 instruction" >; +def FeatureAgentScopeFineGrainedRemoteMemoryAtomics + : SubtargetFeature<"agent-scope-fine-grained-remote-memory-atomics", + "HasAgentScopeFineGrainedRemoteMemoryAtomics", + "true", + "Agent (device) scoped atomic operations, excluding those directly " + "supported by PCIe (i.e. integer atomic add, exchange, and " + "compare-and-swap), are functional for allocations in host or peer " + "device memory." +>; + def FeatureDefaultComponentZero : SubtargetFeature<"default-component-zero", "HasDefaultComponentZero", "true", @@ -1207,7 +1217,8 @@ def FeatureGFX12 : GCNSubtargetFeatureGeneration<"GFX12", FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureTrue16BitInsts, FeatureDefaultComponentBroadcast, FeatureMaxHardClauseLength32, - FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts + FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts, + FeatureAgentScopeFineGrainedRemoteMemoryAtomics ] >; @@ -1415,7 +1426,8 @@ def FeatureISAVersion9_4_Common : FeatureSet< FeatureBackOffBarrier, FeatureKernargPreload, FeatureAtomicFMinFMaxF64GlobalInsts, - FeatureAtomicFMinFMaxF64FlatInsts + FeatureAtomicFMinFMaxF64FlatInsts, + FeatureAgentScopeFineGrainedRemoteMemoryAtomics ]>; def FeatureISAVersion9_4_0 : FeatureSet< diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 966708db4f37c..c40efbdcf7f0b 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -174,6 +174,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasAtomicBufferPkAddBF16Inst = false; bool HasFlatAtomicFaddF32Inst = false; bool HasDefaultComponentZero = false; + bool HasAgentScopeFineGrainedRemoteMemoryAtomics = false; bool HasDefaultComponentBroadcast = false; /// The maximum number of instructions that may be placed within an S_CLAUSE, /// which is one greater than the maximum argument to S_CLAUSE. A value of 0 @@ -871,6 +872,13 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; } + /// \return true if atomic operations targeting fine-grained memory work + /// correctly at device scope, in allocations in host or peer PCIe device + /// memory. + bool supportsAgentScopeFineGrainedRemoteMemoryAtomics() const { + return HasAgentScopeFineGrainedRemoteMemoryAtomics; + } + bool hasDefaultComponentZero() const { return HasDefaultComponentZero; } bool hasDefaultComponentBroadcast() const { _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits