https://github.com/Pierre-vh updated https://github.com/llvm/llvm-project/pull/189556
>From 2f1c7597bb67112f67ac33031c0a5d00b64aa422 Mon Sep 17 00:00:00 2001 From: pvanhout <[email protected]> Date: Tue, 31 Mar 2026 10:00:17 +0200 Subject: [PATCH 1/4] [AMDGPU][SIMemoryLegalizer] Consider scratch operations as NV=1 if GAS is disabled - Clarify that `thread-private` MMO flag is still useful. - If GAS is not enabled (which is the default as of last patch), consider an op as `NV=1` if it's a `scratch_` opcode, or if the MMO is in the private AS. - Add tests for the new cases. - Update AMDGPUUsage GFX12.5 memory model --- llvm/docs/AMDGPUUsage.rst | 19 +- llvm/lib/Target/AMDGPU/SIInstrInfo.h | 4 +- llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp | 17 +- .../AMDGPU/memory-legalizer-non-volatile.ll | 111 +++++++---- .../AMDGPU/memory-legalizer-non-volatile.mir | 181 ++++++++++++++++++ 5 files changed, 286 insertions(+), 46 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/memory-legalizer-non-volatile.mir diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 2a350edcb786f..79c3c30f2bcc9 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -17434,8 +17434,7 @@ For GFX125x: This section is currently incomplete as work on the compiler is still ongoing. The following is a non-exhaustive list of unimplemented/undocumented features: - non-volatile bit code sequences, multicast loads, barriers (including split barriers) - and cooperative atomics. + multicast loads, barriers (including split barriers) and cooperative atomics. Scalar operations memory model needs more elaboration as well. * Vector memory operations are performed as wavefront wide operations, with the @@ -17513,6 +17512,10 @@ For GFX125x: * When ``nv=0`` reads hit dirty ``$nv=1`` data in cache, the hardware will writeback the data to the next level in the hierarchy and then subsequently read it again, updating the cache line with a clean ``$nv=0`` copy of the data. + * ``nv=1`` is set on operations that are known to access read-only memory, or memory + that can only be modified by the current thread. For example, all scratch/private memory + (if ``globally-addressable-scratch`` is disabled), scratch memory used for spill/reloads, + loads marked as invariant, etc. * ``global_inv``, ``global_wb`` and ``global_wbinv`` are cache control instructions. The affected cache(s) are controlled by the ``SCOPE`` of the instruction. @@ -17557,10 +17560,14 @@ may change between kernel dispatch executions. See Atomics in the scratch address space are handled as follows: -* Data types <= 32 bits: The instruction is converted into an atomic in the - generic (``flat``) address space. All properties of the atomic - (atomic ordering, volatility, alignment, etc.) are preserved. - Refer to the generic address space code sequences for further information. +* Data types <= 32 bits: + + * If ``globally-addressable-scratch`` is used, the instruction is converted into + an atomic in the generic (``flat``) address space. All properties of the atomic + (atomic ordering, volatility, alignment, etc.) are preserved. + Refer to the generic address space code sequences for further information. + * Otherwise, the operation is considered as non-atomic. + * Data types >32 bits: unsupported and an error is emitted. The code sequences used to implement the memory model for GFX125x are defined in diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 20ab23df208f8..2fafee48a85d8 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -59,7 +59,9 @@ struct V2PhysSCopyInfo { SmallVector<Register> SGPRs; }; /// Mark the MMO of accesses to memory locations that are -/// never written to by other threads. +/// known to never written to by other threads, no matter the +/// target and target features enabled +/// (e.g. even with globally-addressable scratch enabled). static const MachineMemOperand::Flags MOThreadPrivate = MachineMemOperand::MOTargetFlag4; diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index bfa90e86fe4f0..decdb8282ae61 100644 --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -976,11 +976,22 @@ SIMemOpAccess::getLDSDMAInfo(const MachineBasicBlock::iterator &MI) const { /// being marked as non-volatile. This means that either they are accessing the /// constant address space, are accessing a known invariant memory location, or /// that they are marked with the non-volatile metadata/MMO flag. -static bool isNonVolatileMemoryAccess(const MachineInstr &MI) { +static bool isNonVolatileMemoryAccess(const GCNSubtarget &ST, + const MachineInstr &MI) { if (MI.getNumMemOperands() == 0) return false; + // If globally addressable scratch is not in use, we can assume any scratch + // opcode accesses thread-local memory, thus is NV=1. + bool GASEnabled = ST.isGloballyAddressableScratchEnabled(); + if (!GASEnabled && ST.getInstrInfo()->isFLATScratch(MI.getOpcode())) + return true; return all_of(MI.memoperands(), [&](const MachineMemOperand *MMO) { - return MMO->getFlags() & (MOThreadPrivate | MachineMemOperand::MOInvariant); + // If globally addressable scratch is enabled, we can only set NV=1 by + // checking for the thread-private or invariant memory. If it is disabled, + // we can additionally consider private memory. + return (!GASEnabled && MMO->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS) || + (MMO->getFlags() & + (MOThreadPrivate | MachineMemOperand::MOInvariant)); }); } @@ -2573,7 +2584,7 @@ bool SIMemoryLegalizer::run(MachineFunction &MF) { Changed |= expandAtomicCmpxchgOrRmw(*MOI, MI); } - if (isNonVolatileMemoryAccess(*MI)) + if (isNonVolatileMemoryAccess(ST, *MI)) Changed |= CC->handleNonVolatile(*MI); } } diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-non-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-non-volatile.ll index eab3dd197b13e..754b42ba5c7bf 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-non-volatile.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-non-volatile.ll @@ -2,8 +2,10 @@ ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU,GFX12-CU-DAGISEL %s ; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU,GFX12-CU-GISEL %s -; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250,GFX1250-DAGISEL %s -; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250,GFX1250-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mattr=+globally-addressable-scratch -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250,GFX1250-GAS,GFX1250-GAS-DAGISEL %s +; RUN: llc -global-isel=0 -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250,GFX1250-NOGAS,GFX1250-NOGAS-DAGISEL %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mattr=+globally-addressable-scratch -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250,GFX1250-GAS,GFX1250-GAS-GISEL %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250,GFX1250-NOGAS,GFX1250-NOGAS-GISEL %s define void @flat_i32_nonatomic(ptr addrspace(0) %in, ptr addrspace(0) %out) { ; GFX12-CU-LABEL: flat_i32_nonatomic: @@ -174,14 +176,23 @@ define void @scratch_i32_nonatomic(ptr addrspace(5) %in, ptr addrspace(5) %out) ; GFX12-CU-NEXT: scratch_store_b32 v1, v0, off ; GFX12-CU-NEXT: s_setpc_b64 s[30:31] ; -; GFX1250-LABEL: scratch_i32_nonatomic: -; GFX1250: ; %bb.0: ; %entry -; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: scratch_load_b32 v0, v0, off -; GFX1250-NEXT: s_wait_loadcnt 0x0 -; GFX1250-NEXT: scratch_store_b32 v1, v0, off -; GFX1250-NEXT: s_set_pc_i64 s[30:31] +; GFX1250-GAS-LABEL: scratch_i32_nonatomic: +; GFX1250-GAS: ; %bb.0: ; %entry +; GFX1250-GAS-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GAS-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GAS-NEXT: scratch_load_b32 v0, v0, off +; GFX1250-GAS-NEXT: s_wait_loadcnt 0x0 +; GFX1250-GAS-NEXT: scratch_store_b32 v1, v0, off +; GFX1250-GAS-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-NOGAS-LABEL: scratch_i32_nonatomic: +; GFX1250-NOGAS: ; %bb.0: ; %entry +; GFX1250-NOGAS-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NOGAS-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NOGAS-NEXT: scratch_load_b32 v0, v0, off nv +; GFX1250-NOGAS-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NOGAS-NEXT: scratch_store_b32 v1, v0, off nv +; GFX1250-NOGAS-NEXT: s_set_pc_i64 s[30:31] entry: %val = load i32, ptr addrspace(5) %in store i32 %val, ptr addrspace(5) %out @@ -303,33 +314,61 @@ define void @buffer_i32_nonatomic(ptr addrspace(7) inreg %in, ptr addrspace(7) i ; GFX12-CU-GISEL-NEXT: buffer_store_b32 v0, v1, s[4:7], null offen ; GFX12-CU-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX1250-DAGISEL-LABEL: buffer_i32_nonatomic: -; GFX1250-DAGISEL: ; %bb.0: ; %entry -; GFX1250-DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-DAGISEL-NEXT: s_wait_kmcnt 0x0 -; GFX1250-DAGISEL-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s21 -; GFX1250-DAGISEL-NEXT: s_mov_b32 s7, s20 -; GFX1250-DAGISEL-NEXT: s_mov_b32 s6, s19 -; GFX1250-DAGISEL-NEXT: s_mov_b32 s5, s18 -; GFX1250-DAGISEL-NEXT: buffer_load_b32 v0, v0, s[0:3], null offen -; GFX1250-DAGISEL-NEXT: s_mov_b32 s4, s17 -; GFX1250-DAGISEL-NEXT: s_wait_loadcnt 0x0 -; GFX1250-DAGISEL-NEXT: buffer_store_b32 v0, v1, s[4:7], null offen -; GFX1250-DAGISEL-NEXT: s_set_pc_i64 s[30:31] +; GFX1250-GAS-DAGISEL-LABEL: buffer_i32_nonatomic: +; GFX1250-GAS-DAGISEL: ; %bb.0: ; %entry +; GFX1250-GAS-DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GAS-DAGISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GAS-DAGISEL-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s21 +; GFX1250-GAS-DAGISEL-NEXT: s_mov_b32 s7, s20 +; GFX1250-GAS-DAGISEL-NEXT: s_mov_b32 s6, s19 +; GFX1250-GAS-DAGISEL-NEXT: s_mov_b32 s5, s18 +; GFX1250-GAS-DAGISEL-NEXT: buffer_load_b32 v0, v0, s[0:3], null offen +; GFX1250-GAS-DAGISEL-NEXT: s_mov_b32 s4, s17 +; GFX1250-GAS-DAGISEL-NEXT: s_wait_loadcnt 0x0 +; GFX1250-GAS-DAGISEL-NEXT: buffer_store_b32 v0, v1, s[4:7], null offen +; GFX1250-GAS-DAGISEL-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-NOGAS-DAGISEL-LABEL: buffer_i32_nonatomic: +; GFX1250-NOGAS-DAGISEL: ; %bb.0: ; %entry +; GFX1250-NOGAS-DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NOGAS-DAGISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NOGAS-DAGISEL-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s21 +; GFX1250-NOGAS-DAGISEL-NEXT: s_mov_b32 s7, s20 +; GFX1250-NOGAS-DAGISEL-NEXT: s_mov_b32 s6, s19 +; GFX1250-NOGAS-DAGISEL-NEXT: s_mov_b32 s5, s18 +; GFX1250-NOGAS-DAGISEL-NEXT: buffer_load_b32 v0, v0, s[0:3], null offen +; GFX1250-NOGAS-DAGISEL-NEXT: s_mov_b32 s4, s17 +; GFX1250-NOGAS-DAGISEL-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NOGAS-DAGISEL-NEXT: buffer_store_b32 v0, v1, s[4:7], null offen +; GFX1250-NOGAS-DAGISEL-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GAS-GISEL-LABEL: buffer_i32_nonatomic: +; GFX1250-GAS-GISEL: ; %bb.0: ; %entry +; GFX1250-GAS-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GAS-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GAS-GISEL-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s21 +; GFX1250-GAS-GISEL-NEXT: s_mov_b32 s4, s17 +; GFX1250-GAS-GISEL-NEXT: s_mov_b32 s5, s18 +; GFX1250-GAS-GISEL-NEXT: s_mov_b32 s6, s19 +; GFX1250-GAS-GISEL-NEXT: buffer_load_b32 v0, v0, s[0:3], null offen +; GFX1250-GAS-GISEL-NEXT: s_mov_b32 s7, s20 +; GFX1250-GAS-GISEL-NEXT: s_wait_loadcnt 0x0 +; GFX1250-GAS-GISEL-NEXT: buffer_store_b32 v0, v1, s[4:7], null offen +; GFX1250-GAS-GISEL-NEXT: s_set_pc_i64 s[30:31] ; -; GFX1250-GISEL-LABEL: buffer_i32_nonatomic: -; GFX1250-GISEL: ; %bb.0: ; %entry -; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX1250-GISEL-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s21 -; GFX1250-GISEL-NEXT: s_mov_b32 s4, s17 -; GFX1250-GISEL-NEXT: s_mov_b32 s5, s18 -; GFX1250-GISEL-NEXT: s_mov_b32 s6, s19 -; GFX1250-GISEL-NEXT: buffer_load_b32 v0, v0, s[0:3], null offen -; GFX1250-GISEL-NEXT: s_mov_b32 s7, s20 -; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 -; GFX1250-GISEL-NEXT: buffer_store_b32 v0, v1, s[4:7], null offen -; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] +; GFX1250-NOGAS-GISEL-LABEL: buffer_i32_nonatomic: +; GFX1250-NOGAS-GISEL: ; %bb.0: ; %entry +; GFX1250-NOGAS-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NOGAS-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NOGAS-GISEL-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s21 +; GFX1250-NOGAS-GISEL-NEXT: s_mov_b32 s4, s17 +; GFX1250-NOGAS-GISEL-NEXT: s_mov_b32 s5, s18 +; GFX1250-NOGAS-GISEL-NEXT: s_mov_b32 s6, s19 +; GFX1250-NOGAS-GISEL-NEXT: buffer_load_b32 v0, v0, s[0:3], null offen +; GFX1250-NOGAS-GISEL-NEXT: s_mov_b32 s7, s20 +; GFX1250-NOGAS-GISEL-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NOGAS-GISEL-NEXT: buffer_store_b32 v0, v1, s[4:7], null offen +; GFX1250-NOGAS-GISEL-NEXT: s_set_pc_i64 s[30:31] entry: %val = load i32, ptr addrspace(7) %in store i32 %val, ptr addrspace(7) %out diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-non-volatile.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-non-volatile.mir new file mode 100644 index 0000000000000..ddd234f7e30dc --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-non-volatile.mir @@ -0,0 +1,181 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -run-pass si-memory-legalizer %s -o - | FileCheck --check-prefixes=GFX1200 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+globally-addressable-scratch -run-pass si-memory-legalizer %s -o - | FileCheck --check-prefixes=GFX1250,GFX1250-GAS +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass si-memory-legalizer %s -o - | FileCheck --check-prefixes=GFX1250,GFX1250-NOGAS + +# Additional test cases to accompany the LLVM IR tests. +# NOTE: NV bit = 5th bit set on CPol operand (values >=32). + +--- + +name: private_through_flat_inst +body: | + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + + ; GFX1200-LABEL: name: private_through_flat_inst + ; GFX1200: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + ; GFX1200-NEXT: {{ $}} + ; GFX1200-NEXT: renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(5) poison`, addrspace 5) + ; GFX1200-NEXT: FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(5) poison`, addrspace 5) + ; GFX1200-NEXT: S_ENDPGM 0 + ; + ; GFX1250-GAS-LABEL: name: private_through_flat_inst + ; GFX1250-GAS: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + ; GFX1250-GAS-NEXT: {{ $}} + ; GFX1250-GAS-NEXT: renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(5) poison`, addrspace 5) + ; GFX1250-GAS-NEXT: FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(5) poison`, addrspace 5) + ; GFX1250-GAS-NEXT: S_ENDPGM 0 + ; + ; GFX1250-NOGAS-LABEL: name: private_through_flat_inst + ; GFX1250-NOGAS: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + ; GFX1250-NOGAS-NEXT: {{ $}} + ; GFX1250-NOGAS-NEXT: renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 32, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(5) poison`, addrspace 5) + ; GFX1250-NOGAS-NEXT: FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 32, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(5) poison`, addrspace 5) + ; GFX1250-NOGAS-NEXT: S_ENDPGM 0 + renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(5) poison`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(5) poison`) + S_ENDPGM 0 +... + +--- + +name: private_and_generic_mmo +body: | + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + + ; GFX1200-LABEL: name: private_and_generic_mmo + ; GFX1200: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + ; GFX1200-NEXT: {{ $}} + ; GFX1200-NEXT: renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(5) poison`, addrspace 5), (load (s32) from `ptr poison`) + ; GFX1200-NEXT: FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(5) poison`, addrspace 5), (store (s32) into `ptr poison`) + ; GFX1200-NEXT: S_ENDPGM 0 + ; + ; GFX1250-LABEL: name: private_and_generic_mmo + ; GFX1250: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + ; GFX1250-NEXT: {{ $}} + ; GFX1250-NEXT: renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(5) poison`, addrspace 5), (load (s32) from `ptr poison`) + ; GFX1250-NEXT: FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(5) poison`, addrspace 5), (store (s32) into `ptr poison`) + ; GFX1250-NEXT: S_ENDPGM 0 + renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(5) poison`), (load (s32) from `ptr poison`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(5) poison`), (store (s32) into `ptr poison`) + S_ENDPGM 0 +... + +--- + +name: multiple_private_mmos +body: | + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + + ; GFX1200-LABEL: name: multiple_private_mmos + ; GFX1200: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + ; GFX1200-NEXT: {{ $}} + ; GFX1200-NEXT: renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(5) poison`, addrspace 5), (load (s32) from `ptr addrspace(5) poison`, addrspace 5) + ; GFX1200-NEXT: FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(5) poison`, addrspace 5), (store (s32) into `ptr addrspace(5) poison`, addrspace 5) + ; GFX1200-NEXT: S_ENDPGM 0 + ; + ; GFX1250-GAS-LABEL: name: multiple_private_mmos + ; GFX1250-GAS: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + ; GFX1250-GAS-NEXT: {{ $}} + ; GFX1250-GAS-NEXT: renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(5) poison`, addrspace 5), (load (s32) from `ptr addrspace(5) poison`, addrspace 5) + ; GFX1250-GAS-NEXT: FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(5) poison`, addrspace 5), (store (s32) into `ptr addrspace(5) poison`, addrspace 5) + ; GFX1250-GAS-NEXT: S_ENDPGM 0 + ; + ; GFX1250-NOGAS-LABEL: name: multiple_private_mmos + ; GFX1250-NOGAS: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + ; GFX1250-NOGAS-NEXT: {{ $}} + ; GFX1250-NOGAS-NEXT: renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 32, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(5) poison`, addrspace 5), (load (s32) from `ptr addrspace(5) poison`, addrspace 5) + ; GFX1250-NOGAS-NEXT: FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 32, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(5) poison`, addrspace 5), (store (s32) into `ptr addrspace(5) poison`, addrspace 5) + ; GFX1250-NOGAS-NEXT: S_ENDPGM 0 + renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(5) poison`), (load (s32) from `ptr addrspace(5) poison`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(5) poison`), (store (s32) into `ptr addrspace(5) poison`) + S_ENDPGM 0 +... + +--- + +name: private_and_generic_invariant_mmo +body: | + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + + ; GFX1200-LABEL: name: private_and_generic_invariant_mmo + ; GFX1200: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + ; GFX1200-NEXT: {{ $}} + ; GFX1200-NEXT: renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(5) poison`, addrspace 5), (invariant load (s32) from `ptr poison`) + ; GFX1200-NEXT: S_ENDPGM 0 + ; + ; GFX1250-GAS-LABEL: name: private_and_generic_invariant_mmo + ; GFX1250-GAS: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + ; GFX1250-GAS-NEXT: {{ $}} + ; GFX1250-GAS-NEXT: renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(5) poison`, addrspace 5), (invariant load (s32) from `ptr poison`) + ; GFX1250-GAS-NEXT: S_ENDPGM 0 + ; + ; GFX1250-NOGAS-LABEL: name: private_and_generic_invariant_mmo + ; GFX1250-NOGAS: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + ; GFX1250-NOGAS-NEXT: {{ $}} + ; GFX1250-NOGAS-NEXT: renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 32, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(5) poison`, addrspace 5), (invariant load (s32) from `ptr poison`) + ; GFX1250-NOGAS-NEXT: S_ENDPGM 0 + renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(5) poison`), (invariant load (s32) from `ptr poison`) + S_ENDPGM 0 +... + +--- + +name: private_and_generic_threadprivate_mmo +body: | + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + + ; GFX1200-LABEL: name: private_and_generic_threadprivate_mmo + ; GFX1200: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + ; GFX1200-NEXT: {{ $}} + ; GFX1200-NEXT: renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(5) poison`, addrspace 5), ("amdgpu-thread-private" load (s32) from `ptr poison`) + ; GFX1200-NEXT: FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(5) poison`, addrspace 5), ("amdgpu-thread-private" store (s32) into `ptr poison`) + ; GFX1200-NEXT: S_ENDPGM 0 + ; + ; GFX1250-GAS-LABEL: name: private_and_generic_threadprivate_mmo + ; GFX1250-GAS: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + ; GFX1250-GAS-NEXT: {{ $}} + ; GFX1250-GAS-NEXT: renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(5) poison`, addrspace 5), ("amdgpu-thread-private" load (s32) from `ptr poison`) + ; GFX1250-GAS-NEXT: FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(5) poison`, addrspace 5), ("amdgpu-thread-private" store (s32) into `ptr poison`) + ; GFX1250-GAS-NEXT: S_ENDPGM 0 + ; + ; GFX1250-NOGAS-LABEL: name: private_and_generic_threadprivate_mmo + ; GFX1250-NOGAS: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + ; GFX1250-NOGAS-NEXT: {{ $}} + ; GFX1250-NOGAS-NEXT: renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 32, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(5) poison`, addrspace 5), ("amdgpu-thread-private" load (s32) from `ptr poison`) + ; GFX1250-NOGAS-NEXT: FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 32, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(5) poison`, addrspace 5), ("amdgpu-thread-private" store (s32) into `ptr poison`) + ; GFX1250-NOGAS-NEXT: S_ENDPGM 0 + renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(5) poison`), ("amdgpu-thread-private" load (s32) from `ptr poison`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(5) poison`), ("amdgpu-thread-private" store (s32) into `ptr poison`) + S_ENDPGM 0 +... + +--- + +name: threadprivate_and_invariant_mmo +body: | + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + + ; GFX1200-LABEL: name: threadprivate_and_invariant_mmo + ; GFX1200: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + ; GFX1200-NEXT: {{ $}} + ; GFX1200-NEXT: renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from `ptr poison`), (invariant load (s32) from `ptr poison`) + ; GFX1200-NEXT: FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into `ptr poison`) + ; GFX1200-NEXT: S_ENDPGM 0 + ; + ; GFX1250-LABEL: name: threadprivate_and_invariant_mmo + ; GFX1250: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + ; GFX1250-NEXT: {{ $}} + ; GFX1250-NEXT: renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 32, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from `ptr poison`), (invariant load (s32) from `ptr poison`) + ; GFX1250-NEXT: FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 32, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into `ptr poison`) + ; GFX1250-NEXT: S_ENDPGM 0 + renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from `ptr poison`), (invariant load (s32) from `ptr poison`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into `ptr poison`) + S_ENDPGM 0 +... >From b44c2a4e444e455fa5c1593bac4d5c19651ce6b0 Mon Sep 17 00:00:00 2001 From: pvanhout <[email protected]> Date: Tue, 31 Mar 2026 10:52:59 +0200 Subject: [PATCH 2/4] Fix MIR test --- llvm/test/CodeGen/AMDGPU/memory-legalizer-non-volatile.mir | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-non-volatile.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-non-volatile.mir index ddd234f7e30dc..4e380e8f9fd80 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-non-volatile.mir +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-non-volatile.mir @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -run-pass si-memory-legalizer %s -o - | FileCheck --check-prefixes=GFX1200 -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+globally-addressable-scratch -run-pass si-memory-legalizer %s -o - | FileCheck --check-prefixes=GFX1250,GFX1250-GAS -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass si-memory-legalizer %s -o - | FileCheck --check-prefixes=GFX1250,GFX1250-NOGAS +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -run-pass si-memory-legalizer %s -o - | FileCheck %s --check-prefixes=GFX1200 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+globally-addressable-scratch -run-pass si-memory-legalizer %s -o - | FileCheck %s --check-prefixes=GFX1250,GFX1250-GAS +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass si-memory-legalizer %s -o - | FileCheck %s --check-prefixes=GFX1250,GFX1250-NOGAS # Additional test cases to accompany the LLVM IR tests. # NOTE: NV bit = 5th bit set on CPol operand (values >=32). >From b47d4bc90c0def78d0131b9b0c3ed766a54e945c Mon Sep 17 00:00:00 2001 From: pvanhout <[email protected]> Date: Wed, 1 Apr 2026 14:06:45 +0200 Subject: [PATCH 3/4] Restack + comments --- llvm/docs/AMDGPUUsage.rst | 16 ++-------------- .../AMDGPU/memory-legalizer-non-volatile.ll | 8 ++++---- .../AMDGPU/memory-legalizer-non-volatile.mir | 2 +- 3 files changed, 7 insertions(+), 19 deletions(-) diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 79c3c30f2bcc9..14d57a90b13f8 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -17514,8 +17514,8 @@ For GFX125x: it again, updating the cache line with a clean ``$nv=0`` copy of the data. * ``nv=1`` is set on operations that are known to access read-only memory, or memory that can only be modified by the current thread. For example, all scratch/private memory - (if ``globally-addressable-scratch`` is disabled), scratch memory used for spill/reloads, - loads marked as invariant, etc. + (unless globally addressable scratch is supported by the target **and** enabled), + scratch memory used for spill/reloads, loads marked as invariant, etc. * ``global_inv``, ``global_wb`` and ``global_wbinv`` are cache control instructions. The affected cache(s) are controlled by the ``SCOPE`` of the instruction. @@ -17558,18 +17558,6 @@ invalidated between kernel dispatches by CP since constant address space data may change between kernel dispatch executions. See :ref:`amdgpu-amdhsa-memory-spaces`. -Atomics in the scratch address space are handled as follows: - -* Data types <= 32 bits: - - * If ``globally-addressable-scratch`` is used, the instruction is converted into - an atomic in the generic (``flat``) address space. All properties of the atomic - (atomic ordering, volatility, alignment, etc.) are preserved. - Refer to the generic address space code sequences for further information. - * Otherwise, the operation is considered as non-atomic. - -* Data types >32 bits: unsupported and an error is emitted. - The code sequences used to implement the memory model for GFX125x are defined in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx125x-table`. diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-non-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-non-volatile.ll index 754b42ba5c7bf..aab75e1e0025b 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-non-volatile.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-non-volatile.ll @@ -2,10 +2,10 @@ ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU,GFX12-CU-DAGISEL %s ; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU,GFX12-CU-GISEL %s -; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mattr=+globally-addressable-scratch -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250,GFX1250-GAS,GFX1250-GAS-DAGISEL %s -; RUN: llc -global-isel=0 -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250,GFX1250-NOGAS,GFX1250-NOGAS-DAGISEL %s -; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mattr=+globally-addressable-scratch -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250,GFX1250-GAS,GFX1250-GAS-GISEL %s -; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250,GFX1250-NOGAS,GFX1250-NOGAS-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -amdgpu-globally-addressable-scratch -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250,GFX1250-GAS,GFX1250-GAS-DAGISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250,GFX1250-NOGAS,GFX1250-NOGAS-DAGISEL %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -amdgpu-globally-addressable-scratch -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250,GFX1250-GAS,GFX1250-GAS-GISEL %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250,GFX1250-NOGAS,GFX1250-NOGAS-GISEL %s define void @flat_i32_nonatomic(ptr addrspace(0) %in, ptr addrspace(0) %out) { ; GFX12-CU-LABEL: flat_i32_nonatomic: diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-non-volatile.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-non-volatile.mir index 4e380e8f9fd80..bead28045957f 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-non-volatile.mir +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-non-volatile.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -run-pass si-memory-legalizer %s -o - | FileCheck %s --check-prefixes=GFX1200 -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+globally-addressable-scratch -run-pass si-memory-legalizer %s -o - | FileCheck %s --check-prefixes=GFX1250,GFX1250-GAS +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -amdgpu-globally-addressable-scratch -run-pass si-memory-legalizer %s -o - | FileCheck %s --check-prefixes=GFX1250,GFX1250-GAS # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass si-memory-legalizer %s -o - | FileCheck %s --check-prefixes=GFX1250,GFX1250-NOGAS # Additional test cases to accompany the LLVM IR tests. >From 06a0c0614322531774407858ef9aa96c653e6136 Mon Sep 17 00:00:00 2001 From: pvanhout <[email protected]> Date: Thu, 4 Jun 2026 09:43:00 +0200 Subject: [PATCH 4/4] Comments --- llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp | 2 +- llvm/test/CodeGen/AMDGPU/memory-legalizer-non-volatile.mir | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index decdb8282ae61..bbe76ccc9b32b 100644 --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -985,7 +985,7 @@ static bool isNonVolatileMemoryAccess(const GCNSubtarget &ST, bool GASEnabled = ST.isGloballyAddressableScratchEnabled(); if (!GASEnabled && ST.getInstrInfo()->isFLATScratch(MI.getOpcode())) return true; - return all_of(MI.memoperands(), [&](const MachineMemOperand *MMO) { + return all_of(MI.memoperands(), [GASEnabled](const MachineMemOperand *MMO) { // If globally addressable scratch is enabled, we can only set NV=1 by // checking for the thread-private or invariant memory. If it is disabled, // we can additionally consider private memory. diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-non-volatile.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-non-volatile.mir index bead28045957f..f0d153e0d0fb1 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-non-volatile.mir +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-non-volatile.mir @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -run-pass si-memory-legalizer %s -o - | FileCheck %s --check-prefixes=GFX1200 -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -amdgpu-globally-addressable-scratch -run-pass si-memory-legalizer %s -o - | FileCheck %s --check-prefixes=GFX1250,GFX1250-GAS -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass si-memory-legalizer %s -o - | FileCheck %s --check-prefixes=GFX1250,GFX1250-NOGAS +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -run-pass=si-memory-legalizer %s -o - | FileCheck %s --check-prefixes=GFX1200 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -amdgpu-globally-addressable-scratch -run-pass=si-memory-legalizer %s -o - | FileCheck %s --check-prefixes=GFX1250,GFX1250-GAS +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass=si-memory-legalizer %s -o - | FileCheck %s --check-prefixes=GFX1250,GFX1250-NOGAS # Additional test cases to accompany the LLVM IR tests. # NOTE: NV bit = 5th bit set on CPol operand (values >=32). _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
