[llvm-branch-commits] [llvm] AMDGPU: Add a subtarget feature for fine-grained remote memory support (PR #96442)
@@ -788,6 +788,14 @@ def FeatureFlatAtomicFaddF32Inst "Has flat_atomic_add_f32 instruction" >; +def FeatureAgentScopeFineGrainedRemoteMemoryAtomics + : SubtargetFeature<"agent-scope-fine-grained-remote-memory-atomics", + "HasAgentScopeFineGrainedRemoteMemoryAtomics", + "true", + "Agent (device) scoped atomic operations not directly supported by " arsenm wrote: I dropped the last PCIe reference since XGMI should also work https://github.com/llvm/llvm-project/pull/96442 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add a subtarget feature for fine-grained remote memory support (PR #96442)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96442 >From 1a441c05eb510f3310604594b2687ddf90e884fe Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 23 Jun 2024 22:02:36 +0200 Subject: [PATCH] AMDGPU: Add a subtarget feature for fine-grained remote memory support Atomic access to fine-grained remote memory does not work on all subtargets. Add a feature for targets where this is expected to work. --- llvm/lib/Target/AMDGPU/AMDGPU.td | 16 ++-- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 8 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 5bc0fe8bba608..4d2faacaa915b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -788,6 +788,16 @@ def FeatureFlatAtomicFaddF32Inst "Has flat_atomic_add_f32 instruction" >; +def FeatureAgentScopeFineGrainedRemoteMemoryAtomics + : SubtargetFeature<"agent-scope-fine-grained-remote-memory-atomics", + "HasAgentScopeFineGrainedRemoteMemoryAtomics", + "true", + "Agent (device) scoped atomic operations, excluding those directly " + "supported by PCIe (i.e. integer atomic add, exchange, and " + "compare-and-swap), are functional for allocations in host or peer " + "device memory." +>; + def FeatureDefaultComponentZero : SubtargetFeature<"default-component-zero", "HasDefaultComponentZero", "true", @@ -1207,7 +1217,8 @@ def FeatureGFX12 : GCNSubtargetFeatureGeneration<"GFX12", FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureTrue16BitInsts, FeatureDefaultComponentBroadcast, FeatureMaxHardClauseLength32, - FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts + FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts, + FeatureAgentScopeFineGrainedRemoteMemoryAtomics ] >; @@ -1415,7 +1426,8 @@ def FeatureISAVersion9_4_Common : FeatureSet< FeatureBackOffBarrier, FeatureKernargPreload, FeatureAtomicFMinFMaxF64GlobalInsts, - FeatureAtomicFMinFMaxF64FlatInsts + FeatureAtomicFMinFMaxF64FlatInsts, + FeatureAgentScopeFineGrainedRemoteMemoryAtomics ]>; def FeatureISAVersion9_4_0 : FeatureSet< diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 966708db4f37c..c40efbdcf7f0b 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -174,6 +174,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasAtomicBufferPkAddBF16Inst = false; bool HasFlatAtomicFaddF32Inst = false; bool HasDefaultComponentZero = false; + bool HasAgentScopeFineGrainedRemoteMemoryAtomics = false; bool HasDefaultComponentBroadcast = false; /// The maximum number of instructions that may be placed within an S_CLAUSE, /// which is one greater than the maximum argument to S_CLAUSE. A value of 0 @@ -871,6 +872,13 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; } + /// \return true if atomic operations targeting fine-grained memory work + /// correctly at device scope, in allocations in host or peer PCIe device + /// memory. + bool supportsAgentScopeFineGrainedRemoteMemoryAtomics() const { +return HasAgentScopeFineGrainedRemoteMemoryAtomics; + } + bool hasDefaultComponentZero() const { return HasDefaultComponentZero; } bool hasDefaultComponentBroadcast() const { ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add a subtarget feature for fine-grained remote memory support (PR #96442)
@@ -788,6 +788,14 @@ def FeatureFlatAtomicFaddF32Inst "Has flat_atomic_add_f32 instruction" >; +def FeatureAgentScopeFineGrainedRemoteMemoryAtomics + : SubtargetFeature<"agent-scope-fine-grained-remote-memory-atomics", + "HasAgentScopeFineGrainedRemoteMemoryAtomics", + "true", + "Agent (device) scoped atomic operations not directly supported by " yxsamliu wrote: I feel the description is a little bit confusing, at least for me. how about "Agent (device) scoped atomic operations, excluding those directly supported by PCIe (i.e., integer atomic add, exchange, and compare-and-swap), are functional for allocations in host or peer PCIe device memory." https://github.com/llvm/llvm-project/pull/96442 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add a subtarget feature for fine-grained remote memory support (PR #96442)
arsenm wrote: > need some tests This does nothing as it is. The real use patches have thousands of tests https://github.com/llvm/llvm-project/pull/96442 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add a subtarget feature for fine-grained remote memory support (PR #96442)
yxsamliu wrote: need some tests https://github.com/llvm/llvm-project/pull/96442 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add a subtarget feature for fine-grained remote memory support (PR #96442)
arsenm wrote: No, the string description mentions this is for the non-PCIe supported cases https://github.com/llvm/llvm-project/pull/96442 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add a subtarget feature for fine-grained remote memory support (PR #96442)
yxsamliu wrote: If a sub target does not have this feature, does none of the atomic instructions work for fine-grained remote memory, including integer atomic add/xchg/cmpxchg? https://github.com/llvm/llvm-project/pull/96442 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add a subtarget feature for fine-grained remote memory support (PR #96442)
arsenm wrote: > Actually not, we do not know the bus. Moreover, we know this is opposite. On gfx940/gfx12, we don't need to know the bus to handle the agent scope case correctly. The instructions still function, just always at device scope. We do need to know the bus and/or location to handle the system scope correctly, which will come from the new metadata. https://github.com/llvm/llvm-project/pull/96442 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add a subtarget feature for fine-grained remote memory support (PR #96442)
rampitec wrote: > We do statically know for some of the targets (mostly gfx12 and gfx940) that > it's supposed to work. This is the "scope downgrade" vs. "nop" cases in the > atomic support table Actually not, we do not know the bus. Moreover, we know this is opposite. https://github.com/llvm/llvm-project/pull/96442 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add a subtarget feature for fine-grained remote memory support (PR #96442)
arsenm wrote: > Hmm, I might need to reparse this but I'm not entirely sure how this is > expected to work in general since PCI-E attachment is not always / entirely > knowable at compile time. Have I missed some preamble discussion here? We do statically know for some of the targets (mostly gfx12 and gfx940) that it's supposed to work. This is the "scope downgrade" vs. "nop" cases in the atomic support table https://github.com/llvm/llvm-project/pull/96442 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add a subtarget feature for fine-grained remote memory support (PR #96442)
AlexVlx wrote: Hmm, I might need to reparse this but I'm not entirely sure how this is expected to work in general since PCI-E attachment is not always / entirely knowable at compile time. Have I missed some preamble discussion here? https://github.com/llvm/llvm-project/pull/96442 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add a subtarget feature for fine-grained remote memory support (PR #96442)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/96442 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add a subtarget feature for fine-grained remote memory support (PR #96442)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes Atomic access to fine-grained remote memory does not work on all subtargets. Add a feature for targets where this is expected to work. --- Full diff: https://github.com/llvm/llvm-project/pull/96442.diff 2 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPU.td (+12-2) - (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.h (+8) ``diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 5bc0fe8bba608..7ff861f5b144d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -788,6 +788,14 @@ def FeatureFlatAtomicFaddF32Inst "Has flat_atomic_add_f32 instruction" >; +def FeatureAgentScopeFineGrainedRemoteMemoryAtomics + : SubtargetFeature<"agent-scope-fine-grained-remote-memory-atomics", + "HasAgentScopeFineGrainedRemoteMemoryAtomics", + "true", + "Agent (device) scoped atomic operations not directly supported by " + "PCIe work for allocations in host or peer PCIe device memory" +>; + def FeatureDefaultComponentZero : SubtargetFeature<"default-component-zero", "HasDefaultComponentZero", "true", @@ -1207,7 +1215,8 @@ def FeatureGFX12 : GCNSubtargetFeatureGeneration<"GFX12", FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureTrue16BitInsts, FeatureDefaultComponentBroadcast, FeatureMaxHardClauseLength32, - FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts + FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts, + FeatureAgentScopeFineGrainedRemoteMemoryAtomics ] >; @@ -1415,7 +1424,8 @@ def FeatureISAVersion9_4_Common : FeatureSet< FeatureBackOffBarrier, FeatureKernargPreload, FeatureAtomicFMinFMaxF64GlobalInsts, - FeatureAtomicFMinFMaxF64FlatInsts + FeatureAtomicFMinFMaxF64FlatInsts, + FeatureAgentScopeFineGrainedRemoteMemoryAtomics ]>; def FeatureISAVersion9_4_0 : FeatureSet< diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 966708db4f37c..c40efbdcf7f0b 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -174,6 +174,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasAtomicBufferPkAddBF16Inst = false; bool HasFlatAtomicFaddF32Inst = false; bool HasDefaultComponentZero = false; + bool HasAgentScopeFineGrainedRemoteMemoryAtomics = false; bool HasDefaultComponentBroadcast = false; /// The maximum number of instructions that may be placed within an S_CLAUSE, /// which is one greater than the maximum argument to S_CLAUSE. A value of 0 @@ -871,6 +872,13 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; } + /// \return true if atomic operations targeting fine-grained memory work + /// correctly at device scope, in allocations in host or peer PCIe device + /// memory. + bool supportsAgentScopeFineGrainedRemoteMemoryAtomics() const { +return HasAgentScopeFineGrainedRemoteMemoryAtomics; + } + bool hasDefaultComponentZero() const { return HasDefaultComponentZero; } bool hasDefaultComponentBroadcast() const { `` https://github.com/llvm/llvm-project/pull/96442 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add a subtarget feature for fine-grained remote memory support (PR #96442)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/96442?utm_source=stack-comment-downstack-mergeability-warning; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests;>Learn more * **#96443** https://app.graphite.dev/github/pr/llvm/llvm-project/96443?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#96442** https://app.graphite.dev/github/pr/llvm/llvm-project/96442?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#95930** https://app.graphite.dev/github/pr/llvm/llvm-project/95930?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * **#95929** https://app.graphite.dev/github/pr/llvm/llvm-project/95929?utm_source=stack-comment-icon; target="_blank">https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment;>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment;>https://static.graphite.dev/graphite-32x32-black.png; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/96442 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add a subtarget feature for fine-grained remote memory support (PR #96442)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/96442 Atomic access to fine-grained remote memory does not work on all subtargets. Add a feature for targets where this is expected to work. >From 2da0565165d97cded02896b18a9d7f8083474da9 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 23 Jun 2024 22:02:36 +0200 Subject: [PATCH] AMDGPU: Add a subtarget feature for fine-grained remote memory support Atomic access to fine-grained remote memory does not work on all subtargets. Add a feature for targets where this is expected to work. --- llvm/lib/Target/AMDGPU/AMDGPU.td | 14 -- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 8 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 5bc0fe8bba608..7ff861f5b144d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -788,6 +788,14 @@ def FeatureFlatAtomicFaddF32Inst "Has flat_atomic_add_f32 instruction" >; +def FeatureAgentScopeFineGrainedRemoteMemoryAtomics + : SubtargetFeature<"agent-scope-fine-grained-remote-memory-atomics", + "HasAgentScopeFineGrainedRemoteMemoryAtomics", + "true", + "Agent (device) scoped atomic operations not directly supported by " + "PCIe work for allocations in host or peer PCIe device memory" +>; + def FeatureDefaultComponentZero : SubtargetFeature<"default-component-zero", "HasDefaultComponentZero", "true", @@ -1207,7 +1215,8 @@ def FeatureGFX12 : GCNSubtargetFeatureGeneration<"GFX12", FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureTrue16BitInsts, FeatureDefaultComponentBroadcast, FeatureMaxHardClauseLength32, - FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts + FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts, + FeatureAgentScopeFineGrainedRemoteMemoryAtomics ] >; @@ -1415,7 +1424,8 @@ def FeatureISAVersion9_4_Common : FeatureSet< FeatureBackOffBarrier, FeatureKernargPreload, FeatureAtomicFMinFMaxF64GlobalInsts, - FeatureAtomicFMinFMaxF64FlatInsts + FeatureAtomicFMinFMaxF64FlatInsts, + FeatureAgentScopeFineGrainedRemoteMemoryAtomics ]>; def FeatureISAVersion9_4_0 : FeatureSet< diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 966708db4f37c..c40efbdcf7f0b 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -174,6 +174,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasAtomicBufferPkAddBF16Inst = false; bool HasFlatAtomicFaddF32Inst = false; bool HasDefaultComponentZero = false; + bool HasAgentScopeFineGrainedRemoteMemoryAtomics = false; bool HasDefaultComponentBroadcast = false; /// The maximum number of instructions that may be placed within an S_CLAUSE, /// which is one greater than the maximum argument to S_CLAUSE. A value of 0 @@ -871,6 +872,13 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; } + /// \return true if atomic operations targeting fine-grained memory work + /// correctly at device scope, in allocations in host or peer PCIe device + /// memory. + bool supportsAgentScopeFineGrainedRemoteMemoryAtomics() const { +return HasAgentScopeFineGrainedRemoteMemoryAtomics; + } + bool hasDefaultComponentZero() const { return HasDefaultComponentZero; } bool hasDefaultComponentBroadcast() const { ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits