https://github.com/jmmartinez created https://github.com/llvm/llvm-project/pull/185696
Still missing: * [ ] why load of vector is rejected by spirv-val and not store of vector ? * [ ] dropping volatile * [ ] dropping alignment * [ ] Try to put all together in the addMemoryOperands function closes #185629 From 5bf3949b165f210e73725e7658849e3e382327bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= <[email protected]> Date: Tue, 10 Mar 2026 17:45:57 +0100 Subject: [PATCH] [SPIRV] Lower load/store atomic to OpAtomicLoad/OpAtomicStore --- .../Target/SPIRV/SPIRVInstructionSelector.cpp | 54 +++++++++++++++++-- .../CodeGen/SPIRV/transcoding/load-atomic.ll | 45 +++++++++++----- .../CodeGen/SPIRV/transcoding/store-atomic.ll | 44 ++++++++++----- 3 files changed, 113 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 65f4856aeee68..3097a70ccb946 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -1635,6 +1635,7 @@ static void addMemoryOperands(uint64_t Flags, MachineInstrBuilder &MIB) { bool SPIRVInstructionSelector::selectLoad(Register ResVReg, SPIRVTypeInst ResType, MachineInstr &I) const { + LLVMContext &Context = I.getMF()->getFunction().getContext(); unsigned OpOffset = isa<GIntrinsic>(I) ? 1 : 0; Register Ptr = I.getOperand(1 + OpOffset).getReg(); @@ -1658,7 +1659,31 @@ bool SPIRVInstructionSelector::selectLoad(Register ResVReg, } } - auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpLoad)) + MachineIRBuilder MIRBuilder(I); + + if (I.getNumMemOperands()) { + const MachineMemOperand *MemOp = *I.memoperands_begin(); + if (MemOp->isAtomic()) { + uint32_t Scope = static_cast<uint32_t>(getMemScope( + Context, MemOp->getSyncScopeID())); + Register ScopeReg = buildI32Constant(Scope, I); + + AtomicOrdering AO = MemOp->getSuccessOrdering(); + uint32_t MemSem = static_cast<uint32_t>(getMemSemantics(AO)); + Register MemSemReg = buildI32Constant(MemSem, I); + + auto Load = MIRBuilder.buildInstr(SPIRV::OpAtomicLoad) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addUse(Ptr) + .addUse(ScopeReg) + .addUse(MemSemReg); + Load.constrainAllUses(TII, TRI, RBI); + return true; + } + } + + auto MIB = MIRBuilder.buildInstr(SPIRV::OpLoad) .addDef(ResVReg) .addUse(GR.getSPIRVTypeID(ResType)) .addUse(Ptr); @@ -1676,6 +1701,7 @@ bool SPIRVInstructionSelector::selectLoad(Register ResVReg, } bool SPIRVInstructionSelector::selectStore(MachineInstr &I) const { + LLVMContext &Context = I.getMF()->getFunction().getContext(); unsigned OpOffset = isa<GIntrinsic>(I) ? 1 : 0; Register StoreVal = I.getOperand(0 + OpOffset).getReg(); Register Ptr = I.getOperand(1 + OpOffset).getReg(); @@ -1710,8 +1736,30 @@ bool SPIRVInstructionSelector::selectStore(MachineInstr &I) const { } } - MachineBasicBlock &BB = *I.getParent(); - auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpStore)) + MachineIRBuilder MIRBuilder(I); + + if (I.getNumMemOperands()) { + const MachineMemOperand *MemOp = *I.memoperands_begin(); + if (MemOp->isAtomic()) { + uint32_t Scope = static_cast<uint32_t>(getMemScope( + Context, MemOp->getSyncScopeID())); + Register ScopeReg = buildI32Constant(Scope, I); + + AtomicOrdering AO = MemOp->getSuccessOrdering(); + uint32_t MemSem = static_cast<uint32_t>(getMemSemantics(AO)); + Register MemSemReg = buildI32Constant(MemSem, I); + + auto Store = MIRBuilder.buildInstr(SPIRV::OpAtomicStore) + .addUse(Ptr) + .addUse(ScopeReg) + .addUse(MemSemReg) + .addUse(StoreVal); + Store.constrainAllUses(TII, TRI, RBI); + return true; + } + } + + auto MIB = MIRBuilder.buildInstr(SPIRV::OpStore) .addUse(Ptr) .addUse(StoreVal); if (!I.getNumMemOperands()) { diff --git a/llvm/test/CodeGen/SPIRV/transcoding/load-atomic.ll b/llvm/test/CodeGen/SPIRV/transcoding/load-atomic.ll index 0ebd3a5ec20ae..1e2568b05b251 100644 --- a/llvm/test/CodeGen/SPIRV/transcoding/load-atomic.ll +++ b/llvm/test/CodeGen/SPIRV/transcoding/load-atomic.ll @@ -4,19 +4,25 @@ ; RUN: llc -O0 -mtriple=spirv32-- %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-- %s -o - -filetype=obj | spirv-val %} -;; Check that 'load atomic' LLVM IR instructions are lowered. -;; NOTE: The current lowering is incorrect: 'load atomic' should produce -;; OpAtomicLoad but currently produces OpLoad, silently dropping the atomic -;; ordering. This test documents the broken behaviour so it can be fixed. +; Check that 'load atomic' LLVM IR instructions are lowered correctly to +; OpAtomicLoad with the right Scope and Memory Semantics operands. +; +; unordered and monotonic are currently mapped to Memory Semantics `None (Relaxed)` 0x0 ; CHECK-DAG: %[[#Int32:]] = OpTypeInt 32 0 ; CHECK-DAG: %[[#Float:]] = OpTypeFloat 32 ; CHECK-DAG: %[[#Int32Vec:]] = OpTypeVector %[[#Int32]] 2 +; CHECK-DAG: %[[#Const0:]] = OpConstantNull %[[#Int32]] +; CHECK-DAG: %[[#Const1:]] = OpConstant %[[#Int32]] 1{{$}} +; CHECK-DAG: %[[#Const2:]] = OpConstant %[[#Int32]] 2{{$}} +; CHECK-DAG: %[[#Const3:]] = OpConstant %[[#Int32]] 3{{$}} +; CHECK-DAG: %[[#Const4:]] = OpConstant %[[#Int32]] 4{{$}} +; CHECK-DAG: %[[#Const16:]] = OpConstant %[[#Int32]] 16{{$}} define i32 @load_i32_unordered(ptr addrspace(1) %ptr) { ; CHECK-LABEL: OpFunction %[[#]] ; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]] -; CHECK: %[[#]] = OpLoad %[[#Int32]] %[[#ptr]] Aligned 4 +; CHECK: %[[#]] = OpAtomicLoad %[[#Int32]] %[[#ptr]] %[[#Const0]] %[[#Const0]] ; CHECK: OpReturnValue %val = load atomic i32, ptr addrspace(1) %ptr unordered, align 4 ret i32 %val @@ -25,7 +31,7 @@ define i32 @load_i32_unordered(ptr addrspace(1) %ptr) { define i32 @load_i32_monotonic(ptr addrspace(1) %ptr) { ; CHECK-LABEL: OpFunction %[[#]] ; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]] -; CHECK: %[[#]] = OpLoad %[[#Int32]] %[[#ptr]] Aligned 4 +; CHECK: %[[#]] = OpAtomicLoad %[[#Int32]] %[[#ptr]] %[[#Const0]] %[[#Const0]] ; CHECK: OpReturnValue %val = load atomic i32, ptr addrspace(1) %ptr monotonic, align 4 ret i32 %val @@ -34,7 +40,7 @@ define i32 @load_i32_monotonic(ptr addrspace(1) %ptr) { define i32 @load_i32_acquire(ptr addrspace(1) %ptr) { ; CHECK-LABEL: OpFunction %[[#]] ; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]] -; CHECK: %[[#]] = OpLoad %[[#Int32]] %[[#ptr]] Aligned 4 +; CHECK: %[[#]] = OpAtomicLoad %[[#Int32]] %[[#ptr]] %[[#Const0]] %[[#Const2]] ; CHECK: OpReturnValue %val = load atomic i32, ptr addrspace(1) %ptr acquire, align 4 ret i32 %val @@ -43,7 +49,7 @@ define i32 @load_i32_acquire(ptr addrspace(1) %ptr) { define i32 @load_i32_seq_cst(ptr addrspace(1) %ptr) { ; CHECK-LABEL: OpFunction %[[#]] ; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]] -; CHECK: %[[#]] = OpLoad %[[#Int32]] %[[#ptr]] Aligned 4 +; CHECK: %[[#]] = OpAtomicLoad %[[#Int32]] %[[#ptr]] %[[#Const0]] %[[#Const16]] ; CHECK: OpReturnValue %val = load atomic i32, ptr addrspace(1) %ptr seq_cst, align 4 ret i32 %val @@ -54,7 +60,7 @@ define i32 @load_i32_seq_cst(ptr addrspace(1) %ptr) { define i32 @load_i32_acquire_singlethread(ptr addrspace(1) %ptr) { ; CHECK-LABEL: OpFunction %[[#]] ; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]] -; CHECK: %[[#]] = OpLoad %[[#Int32]] %[[#ptr]] Aligned 4 +; CHECK: %[[#]] = OpAtomicLoad %[[#Int32]] %[[#ptr]] %[[#Const4]] %[[#Const2]] ; CHECK: OpReturnValue %val = load atomic i32, ptr addrspace(1) %ptr syncscope("singlethread") acquire, align 4 ret i32 %val @@ -63,7 +69,7 @@ define i32 @load_i32_acquire_singlethread(ptr addrspace(1) %ptr) { define i32 @load_i32_acquire_subgroup(ptr addrspace(1) %ptr) { ; CHECK-LABEL: OpFunction %[[#]] ; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]] -; CHECK: %[[#]] = OpLoad %[[#Int32]] %[[#ptr]] Aligned 4 +; CHECK: %[[#]] = OpAtomicLoad %[[#Int32]] %[[#ptr]] %[[#Const3]] %[[#Const2]] ; CHECK: OpReturnValue %val = load atomic i32, ptr addrspace(1) %ptr syncscope("subgroup") acquire, align 4 ret i32 %val @@ -72,7 +78,7 @@ define i32 @load_i32_acquire_subgroup(ptr addrspace(1) %ptr) { define i32 @load_i32_acquire_workgroup(ptr addrspace(1) %ptr) { ; CHECK-LABEL: OpFunction %[[#]] ; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]] -; CHECK: %[[#]] = OpLoad %[[#Int32]] %[[#ptr]] Aligned 4 +; CHECK: %[[#]] = OpAtomicLoad %[[#Int32]] %[[#ptr]] %[[#Const2]] %[[#Const2]] ; CHECK: OpReturnValue %val = load atomic i32, ptr addrspace(1) %ptr syncscope("workgroup") acquire, align 4 ret i32 %val @@ -81,7 +87,7 @@ define i32 @load_i32_acquire_workgroup(ptr addrspace(1) %ptr) { define i32 @load_i32_acquire_device(ptr addrspace(1) %ptr) { ; CHECK-LABEL: OpFunction %[[#]] ; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]] -; CHECK: %[[#]] = OpLoad %[[#Int32]] %[[#ptr]] Aligned 4 +; CHECK: %[[#]] = OpAtomicLoad %[[#Int32]] %[[#ptr]] %[[#Const1]] %[[#Const2]] ; CHECK: OpReturnValue %val = load atomic i32, ptr addrspace(1) %ptr syncscope("device") acquire, align 4 ret i32 %val @@ -92,7 +98,7 @@ define i32 @load_i32_acquire_device(ptr addrspace(1) %ptr) { define float @load_float_acquire(ptr addrspace(1) %ptr) { ; CHECK-LABEL: OpFunction %[[#]] ; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]] -; CHECK: %[[#load:]] = OpLoad %[[#Int32]] %[[#ptr]] Aligned 8 +; CHECK: %[[#load:]] = OpAtomicLoad %[[#Int32]] %[[#ptr]] %[[#Const0]] %[[#Const2]] ; CHECK: %[[#val:]] = OpBitcast %[[#Float]] %[[#load]] ; CHECK: OpReturnValue %[[#val]] %val = load atomic float, ptr addrspace(1) %ptr acquire, align 8 @@ -104,8 +110,19 @@ define float @load_float_acquire(ptr addrspace(1) %ptr) { define <2 x i32> @load_vector_acquire(ptr addrspace(1) %ptr) { ; CHECK-LABEL: OpFunction %[[#]] ; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]] -; CHECK: %[[#]] = OpLoad %[[#Int32Vec]] %[[#ptr]] Aligned 8 +; CHECK: %[[#]] = OpAtomicLoad %[[#Int32Vec]] %[[#ptr]] %[[#Const0]] %[[#Const2]] ; CHECK: OpReturnValue %val = load atomic <2 x i32>, ptr addrspace(1) %ptr acquire, align 8 ret <2 x i32> %val } + +; -- test with volatile + +define i32 @load_i32_acquire_device_volatile(ptr addrspace(1) %ptr) { +; CHECK-LABEL: OpFunction %[[#]] +; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]] +; CHECK: %[[#]] = OpAtomicLoad %[[#Int32]] %[[#ptr]] %[[#Const1]] %[[#Const2]] +; CHECK: OpReturnValue + %val = load atomic volatile i32, ptr addrspace(1) %ptr syncscope("device") acquire, align 4 + ret i32 %val +} diff --git a/llvm/test/CodeGen/SPIRV/transcoding/store-atomic.ll b/llvm/test/CodeGen/SPIRV/transcoding/store-atomic.ll index b11b26451d086..b0f685d5c9e29 100644 --- a/llvm/test/CodeGen/SPIRV/transcoding/store-atomic.ll +++ b/llvm/test/CodeGen/SPIRV/transcoding/store-atomic.ll @@ -4,20 +4,26 @@ ; RUN: llc -O0 -mtriple=spirv32-- %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-- %s -o - -filetype=obj | spirv-val %} -;; Check that 'store atomic' LLVM IR instructions are lowered. -;; NOTE: The current lowering is incorrect: 'store atomic' should produce -;; OpAtomicStore but currently produces OpStore, silently dropping the atomic -;; ordering. This test documents the broken behaviour so it can be fixed. +; Check that 'store atomic' LLVM IR instructions are lowered correctly to +; OpAtomicStore with the right Scope and Memory Semantics operands. +; +; unordered and monotonic are currently mapped to Memory Semantics `None (Relaxed)` 0x0 ; CHECK-DAG: %[[#Int32:]] = OpTypeInt 32 0 ; CHECK-DAG: %[[#Float:]] = OpTypeFloat 32 ; CHECK-DAG: %[[#Int32Vec:]] = OpTypeVector %[[#Int32]] 2 +; CHECK-DAG: %[[#Const0:]] = OpConstantNull %[[#Int32]] +; CHECK-DAG: %[[#Const1:]] = OpConstant %[[#Int32]] 1{{$}} +; CHECK-DAG: %[[#Const2:]] = OpConstant %[[#Int32]] 2{{$}} +; CHECK-DAG: %[[#Const3:]] = OpConstant %[[#Int32]] 3{{$}} +; CHECK-DAG: %[[#Const4:]] = OpConstant %[[#Int32]] 4{{$}} +; CHECK-DAG: %[[#Const16:]] = OpConstant %[[#Int32]] 16{{$}} define void @store_i32_unordered(ptr addrspace(1) %ptr, i32 %val) { ; CHECK-LABEL: OpFunction %[[#]] ; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]] ; CHECK: %[[#val:]] = OpFunctionParameter %[[#Int32]] -; CHECK: OpStore %[[#ptr]] %[[#val]] Aligned 4 +; CHECK: OpAtomicStore %[[#ptr]] %[[#Const0]] %[[#Const0]] %[[#val]] ; CHECK: OpReturn store atomic i32 %val, ptr addrspace(1) %ptr unordered, align 4 ret void @@ -27,7 +33,7 @@ define void @store_i32_monotonic(ptr addrspace(1) %ptr, i32 %val) { ; CHECK-LABEL: OpFunction %[[#]] ; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]] ; CHECK: %[[#val:]] = OpFunctionParameter %[[#Int32]] -; CHECK: OpStore %[[#ptr]] %[[#val]] Aligned 4 +; CHECK: OpAtomicStore %[[#ptr]] %[[#Const0]] %[[#Const0]] %[[#val]] ; CHECK: OpReturn store atomic i32 %val, ptr addrspace(1) %ptr monotonic, align 4 ret void @@ -37,7 +43,7 @@ define void @store_i32_release(ptr addrspace(1) %ptr, i32 %val) { ; CHECK-LABEL: OpFunction %[[#]] ; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]] ; CHECK: %[[#val:]] = OpFunctionParameter %[[#Int32]] -; CHECK: OpStore %[[#ptr]] %[[#val]] Aligned 4 +; CHECK: OpAtomicStore %[[#ptr]] %[[#Const0]] %[[#Const4]] %[[#val]] ; CHECK: OpReturn store atomic i32 %val, ptr addrspace(1) %ptr release, align 4 ret void @@ -47,7 +53,7 @@ define void @store_i32_seq_cst(ptr addrspace(1) %ptr, i32 %val) { ; CHECK-LABEL: OpFunction %[[#]] ; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]] ; CHECK: %[[#val:]] = OpFunctionParameter %[[#Int32]] -; CHECK: OpStore %[[#ptr]] %[[#val]] Aligned 4 +; CHECK: OpAtomicStore %[[#ptr]] %[[#Const0]] %[[#Const16]] %[[#val]] ; CHECK: OpReturn store atomic i32 %val, ptr addrspace(1) %ptr seq_cst, align 4 ret void @@ -59,7 +65,7 @@ define void @store_i32_release_singlethread(ptr addrspace(1) %ptr, i32 %val) { ; CHECK-LABEL: OpFunction %[[#]] ; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]] ; CHECK: %[[#val:]] = OpFunctionParameter %[[#Int32]] -; CHECK: OpStore %[[#ptr]] %[[#val]] Aligned 4 +; CHECK: OpAtomicStore %[[#ptr]] %[[#Const4]] %[[#Const4]] %[[#val]] ; CHECK: OpReturn store atomic i32 %val, ptr addrspace(1) %ptr syncscope("singlethread") release, align 4 ret void @@ -69,7 +75,7 @@ define void @store_i32_release_subgroup(ptr addrspace(1) %ptr, i32 %val) { ; CHECK-LABEL: OpFunction %[[#]] ; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]] ; CHECK: %[[#val:]] = OpFunctionParameter %[[#Int32]] -; CHECK: OpStore %[[#ptr]] %[[#val]] Aligned 4 +; CHECK: OpAtomicStore %[[#ptr]] %[[#Const3]] %[[#Const4]] %[[#val]] ; CHECK: OpReturn store atomic i32 %val, ptr addrspace(1) %ptr syncscope("subgroup") release, align 4 ret void @@ -79,7 +85,7 @@ define void @store_i32_release_workgroup(ptr addrspace(1) %ptr, i32 %val) { ; CHECK-LABEL: OpFunction %[[#]] ; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]] ; CHECK: %[[#val:]] = OpFunctionParameter %[[#Int32]] -; CHECK: OpStore %[[#ptr]] %[[#val]] Aligned 4 +; CHECK: OpAtomicStore %[[#ptr]] %[[#Const2]] %[[#Const4]] %[[#val]] ; CHECK: OpReturn store atomic i32 %val, ptr addrspace(1) %ptr syncscope("workgroup") release, align 4 ret void @@ -89,7 +95,7 @@ define void @store_i32_release_device(ptr addrspace(1) %ptr, i32 %val) { ; CHECK-LABEL: OpFunction %[[#]] ; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]] ; CHECK: %[[#val:]] = OpFunctionParameter %[[#Int32]] -; CHECK: OpStore %[[#ptr]] %[[#val]] Aligned 4 +; CHECK: OpAtomicStore %[[#ptr]] %[[#Const1]] %[[#Const4]] %[[#val]] ; CHECK: OpReturn store atomic i32 %val, ptr addrspace(1) %ptr syncscope("device") release, align 4 ret void @@ -102,7 +108,7 @@ define void @store_float_release(ptr addrspace(1) %ptr, float %val) { ; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]] ; CHECK: %[[#val:]] = OpFunctionParameter %[[#Float]] ; CHECK: %[[#cast:]] = OpBitcast %[[#Int32]] %[[#val]] -; CHECK: OpStore %[[#ptr]] %[[#cast]] Aligned 8 +; CHECK: OpAtomicStore %[[#ptr]] %[[#Const0]] %[[#Const4]] %[[#cast]] ; CHECK: OpReturn store atomic float %val, ptr addrspace(1) %ptr release, align 8 ret void @@ -119,3 +125,15 @@ define void @store_vector_release(ptr addrspace(1) %ptr, <2 x i32> %val) { store atomic <2 x i32> %val, ptr addrspace(1) %ptr release, align 8 ret void } + +; -- test with volatile + +define void @store_i32_release_device_volatile(ptr addrspace(1) %ptr, i32 %val) { +; CHECK-LABEL: OpFunction %[[#]] +; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]] +; CHECK: %[[#val:]] = OpFunctionParameter %[[#Int32]] +; CHECK: OpAtomicStore %[[#ptr]] %[[#Const1]] %[[#Const4]] %[[#val]] +; CHECK: OpReturn + store atomic volatile i32 %val, ptr addrspace(1) %ptr syncscope("device") release, align 4 + ret void +} _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
