================
@@ -2326,6 +2326,20 @@ bool
SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
}
#endif
+ if (ST->isPreciseMemoryEnabled() && Inst.mayLoadOrStore()) {
+ AMDGPU::Waitcnt Wait;
+ if (ST->hasExtendedWaitCounts())
+ Wait = AMDGPU::Waitcnt(0, 0, 0, 0, 0, 0, 0);
+ else
+ Wait = AMDGPU::Waitcnt(0, 0, 0, 0);
+
+ if (!Inst.mayStore())
+ Wait.StoreCnt = ~0u;
----------------
jwanggit86 wrote:
Code updated as suggested. Testfile includes case for both atomic-with-ret and
atomic-no-ret. However, for the following case, even though `ds_add_u32` is
atomic-no-ret, the Waitcnt for StoreCnt is set to ~0u after the call of
`ScoreBrackets.simplifyWaitcnt(Wait)`. Therefore, no s_waitcnt for the StoreCnt
is generated after the `ds_add_u32`.
```
define amdgpu_kernel void @atomic_add_local(ptr addrspace(3) %local) {
%unused = atomicrmw volatile add ptr addrspace(3) %local, i32 5 seq_cst
ret void
}
```
The code for GFX1100 is:
```
; GFX11: ds_add_u32 v0, v1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: buffer_gl0_inv
```
Pls let me know if this looks correct.
https://github.com/llvm/llvm-project/pull/79236
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits