llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Joseph Huber (jhuber6) <details> <summary>Changes</summary> Summary: All the infrastructure for this is here, it's just no one's turned it on. --- Patch is 213.28 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/184737.diff 2 Files Affected: - (modified) clang/lib/CodeGen/Targets/NVPTX.cpp (+33) - (modified) clang/test/CodeGen/scoped-atomic-ops.c (+1419-130) ``````````diff diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp index ba2acd821c704..50e116765b892 100644 --- a/clang/lib/CodeGen/Targets/NVPTX.cpp +++ b/clang/lib/CodeGen/Targets/NVPTX.cpp @@ -8,6 +8,7 @@ #include "ABIInfoImpl.h" #include "TargetInfo.h" +#include "clang/Basic/SyncScope.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/CallingConv.h" @@ -50,6 +51,9 @@ class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo { CodeGen::CodeGenModule &M) const override; bool shouldEmitStaticExternCAliases() const override; + StringRef getLLVMSyncScopeStr(const LangOptions &LangOpts, SyncScope Scope, + llvm::AtomicOrdering Ordering) const override; + llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM, llvm::PointerType *T, QualType QT) const override; @@ -299,6 +303,35 @@ bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { return false; } +StringRef NVPTXTargetCodeGenInfo::getLLVMSyncScopeStr( + const LangOptions &LangOpts, SyncScope Scope, + llvm::AtomicOrdering Ordering) const { + switch (Scope) { + case SyncScope::HIPSingleThread: + case SyncScope::SingleScope: + return "singlethread"; + case SyncScope::HIPWavefront: + case SyncScope::OpenCLSubGroup: + case SyncScope::WavefrontScope: + case SyncScope::HIPWorkgroup: + case SyncScope::OpenCLWorkGroup: + case SyncScope::WorkgroupScope: + return "block"; + case SyncScope::HIPCluster: + case SyncScope::ClusterScope: + return "cluster"; + case SyncScope::HIPAgent: + case SyncScope::OpenCLDevice: + case SyncScope::DeviceScope: + return "device"; + case SyncScope::SystemScope: + case SyncScope::HIPSystem: + case SyncScope::OpenCLAllSVMDevices: + return ""; + } + llvm_unreachable("Unknown SyncScope enum"); +} + llvm::Constant * NVPTXTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM, llvm::PointerType *PT, diff --git a/clang/test/CodeGen/scoped-atomic-ops.c b/clang/test/CodeGen/scoped-atomic-ops.c index 3fbaf75cf98e6..6df0d439d6b88 100644 --- a/clang/test/CodeGen/scoped-atomic-ops.c +++ b/clang/test/CodeGen/scoped-atomic-ops.c @@ -3,6 +3,8 @@ // RUN: -fvisibility=hidden | FileCheck --check-prefixes=AMDGCN,AMDGCN_CL_DEF %s // RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa -ffreestanding \ // RUN: -cl-std=CL2.0 -fvisibility=hidden | FileCheck --check-prefixes=AMDGCN,AMDGCN_CL_20 %s +// RUN: %clang_cc1 %s -emit-llvm -o - -triple=nvptx64-nvidia-cuda -ffreestanding \ +// RUN: -cl-std=CL2.0 -fvisibility=hidden | FileCheck --check-prefixes=NVPTX %s // RUN: %clang_cc1 %s -emit-llvm -o - -triple=spirv64-unknown-unknown -ffreestanding \ // RUN: -fvisibility=hidden | FileCheck --check-prefix=SPIRV %s @@ -63,6 +65,33 @@ // AMDGCN_CL_20-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(5) [[V]], align 4 // AMDGCN_CL_20-NEXT: ret i32 [[TMP12]] // +// NVPTX-LABEL: define hidden i32 @fi1a( +// NVPTX-SAME: ptr noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] { +// NVPTX-NEXT: [[ENTRY:.*:]] +// NVPTX-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 +// NVPTX-NEXT: [[V:%.*]] = alloca i32, align 4 +// NVPTX-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 +// NVPTX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// NVPTX-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[TMP0]] monotonic, align 4 +// NVPTX-NEXT: store i32 [[TMP1]], ptr [[V]], align 4 +// NVPTX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// NVPTX-NEXT: [[TMP3:%.*]] = load atomic i32, ptr [[TMP2]] syncscope("device") monotonic, align 4 +// NVPTX-NEXT: store i32 [[TMP3]], ptr [[V]], align 4 +// NVPTX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// NVPTX-NEXT: [[TMP5:%.*]] = load atomic i32, ptr [[TMP4]] syncscope("block") monotonic, align 4 +// NVPTX-NEXT: store i32 [[TMP5]], ptr [[V]], align 4 +// NVPTX-NEXT: [[TMP6:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// NVPTX-NEXT: [[TMP7:%.*]] = load atomic i32, ptr [[TMP6]] syncscope("cluster") monotonic, align 4 +// NVPTX-NEXT: store i32 [[TMP7]], ptr [[V]], align 4 +// NVPTX-NEXT: [[TMP8:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// NVPTX-NEXT: [[TMP9:%.*]] = load atomic i32, ptr [[TMP8]] syncscope("block") monotonic, align 4 +// NVPTX-NEXT: store i32 [[TMP9]], ptr [[V]], align 4 +// NVPTX-NEXT: [[TMP10:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// NVPTX-NEXT: [[TMP11:%.*]] = load atomic i32, ptr [[TMP10]] syncscope("singlethread") monotonic, align 4 +// NVPTX-NEXT: store i32 [[TMP11]], ptr [[V]], align 4 +// NVPTX-NEXT: [[TMP12:%.*]] = load i32, ptr [[V]], align 4 +// NVPTX-NEXT: ret i32 [[TMP12]] +// // SPIRV-LABEL: define hidden spir_func i32 @fi1a( // SPIRV-SAME: ptr noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] { // SPIRV-NEXT: [[ENTRY:.*:]] @@ -159,6 +188,57 @@ int fi1a(int *i) { // AMDGCN-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 // AMDGCN-NEXT: ret i32 [[TMP25]] // +// NVPTX-LABEL: define hidden i32 @fi1b( +// NVPTX-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { +// NVPTX-NEXT: [[ENTRY:.*:]] +// NVPTX-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 +// NVPTX-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4 +// NVPTX-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4 +// NVPTX-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4 +// NVPTX-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i32, align 4 +// NVPTX-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i32, align 4 +// NVPTX-NEXT: [[ATOMIC_TEMP5:%.*]] = alloca i32, align 4 +// NVPTX-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 +// NVPTX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// NVPTX-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[TMP0]] monotonic, align 4 +// NVPTX-NEXT: store i32 [[TMP1]], ptr [[ATOMIC_TEMP]], align 4 +// NVPTX-NEXT: [[TMP2:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 +// NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// NVPTX-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4 +// NVPTX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// NVPTX-NEXT: [[TMP5:%.*]] = load atomic i32, ptr [[TMP4]] syncscope("device") monotonic, align 4 +// NVPTX-NEXT: store i32 [[TMP5]], ptr [[ATOMIC_TEMP1]], align 4 +// NVPTX-NEXT: [[TMP6:%.*]] = load i32, ptr [[ATOMIC_TEMP1]], align 4 +// NVPTX-NEXT: [[TMP7:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// NVPTX-NEXT: store i32 [[TMP6]], ptr [[TMP7]], align 4 +// NVPTX-NEXT: [[TMP8:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// NVPTX-NEXT: [[TMP9:%.*]] = load atomic i32, ptr [[TMP8]] syncscope("block") monotonic, align 4 +// NVPTX-NEXT: store i32 [[TMP9]], ptr [[ATOMIC_TEMP2]], align 4 +// NVPTX-NEXT: [[TMP10:%.*]] = load i32, ptr [[ATOMIC_TEMP2]], align 4 +// NVPTX-NEXT: [[TMP11:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// NVPTX-NEXT: store i32 [[TMP10]], ptr [[TMP11]], align 4 +// NVPTX-NEXT: [[TMP12:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// NVPTX-NEXT: [[TMP13:%.*]] = load atomic i32, ptr [[TMP12]] syncscope("cluster") monotonic, align 4 +// NVPTX-NEXT: store i32 [[TMP13]], ptr [[ATOMIC_TEMP3]], align 4 +// NVPTX-NEXT: [[TMP14:%.*]] = load i32, ptr [[ATOMIC_TEMP3]], align 4 +// NVPTX-NEXT: [[TMP15:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// NVPTX-NEXT: store i32 [[TMP14]], ptr [[TMP15]], align 4 +// NVPTX-NEXT: [[TMP16:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// NVPTX-NEXT: [[TMP17:%.*]] = load atomic i32, ptr [[TMP16]] syncscope("block") monotonic, align 4 +// NVPTX-NEXT: store i32 [[TMP17]], ptr [[ATOMIC_TEMP4]], align 4 +// NVPTX-NEXT: [[TMP18:%.*]] = load i32, ptr [[ATOMIC_TEMP4]], align 4 +// NVPTX-NEXT: [[TMP19:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// NVPTX-NEXT: store i32 [[TMP18]], ptr [[TMP19]], align 4 +// NVPTX-NEXT: [[TMP20:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// NVPTX-NEXT: [[TMP21:%.*]] = load atomic i32, ptr [[TMP20]] syncscope("singlethread") monotonic, align 4 +// NVPTX-NEXT: store i32 [[TMP21]], ptr [[ATOMIC_TEMP5]], align 4 +// NVPTX-NEXT: [[TMP22:%.*]] = load i32, ptr [[ATOMIC_TEMP5]], align 4 +// NVPTX-NEXT: [[TMP23:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// NVPTX-NEXT: store i32 [[TMP22]], ptr [[TMP23]], align 4 +// NVPTX-NEXT: [[TMP24:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// NVPTX-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// NVPTX-NEXT: ret i32 [[TMP25]] +// // SPIRV-LABEL: define hidden spir_func i32 @fi1b( // SPIRV-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { // SPIRV-NEXT: [[ENTRY:.*:]] @@ -283,6 +363,33 @@ int fi1b(int *i) { // AMDGCN_CL_20-NEXT: store atomic i32 [[TMP17]], ptr [[TMP15]] syncscope("singlethread") monotonic, align 4 // AMDGCN_CL_20-NEXT: ret void // +// NVPTX-LABEL: define hidden void @fi2a( +// NVPTX-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { +// NVPTX-NEXT: [[ENTRY:.*:]] +// NVPTX-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 +// NVPTX-NEXT: [[V:%.*]] = alloca i32, align 4 +// NVPTX-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 +// NVPTX-NEXT: store i32 1, ptr [[V]], align 4 +// NVPTX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[V]], align 4 +// NVPTX-NEXT: store atomic i32 [[TMP1]], ptr [[TMP0]] monotonic, align 4 +// NVPTX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// NVPTX-NEXT: [[TMP3:%.*]] = load i32, ptr [[V]], align 4 +// NVPTX-NEXT: store atomic i32 [[TMP3]], ptr [[TMP2]] syncscope("device") monotonic, align 4 +// NVPTX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// NVPTX-NEXT: [[TMP5:%.*]] = load i32, ptr [[V]], align 4 +// NVPTX-NEXT: store atomic i32 [[TMP5]], ptr [[TMP4]] syncscope("block") monotonic, align 4 +// NVPTX-NEXT: [[TMP6:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// NVPTX-NEXT: [[TMP7:%.*]] = load i32, ptr [[V]], align 4 +// NVPTX-NEXT: store atomic i32 [[TMP7]], ptr [[TMP6]] syncscope("cluster") monotonic, align 4 +// NVPTX-NEXT: [[TMP8:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// NVPTX-NEXT: [[TMP9:%.*]] = load i32, ptr [[V]], align 4 +// NVPTX-NEXT: store atomic i32 [[TMP9]], ptr [[TMP8]] syncscope("block") monotonic, align 4 +// NVPTX-NEXT: [[TMP10:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// NVPTX-NEXT: [[TMP11:%.*]] = load i32, ptr [[V]], align 4 +// NVPTX-NEXT: store atomic i32 [[TMP11]], ptr [[TMP10]] syncscope("singlethread") monotonic, align 4 +// NVPTX-NEXT: ret void +// // SPIRV-LABEL: define hidden spir_func void @fi2a( // SPIRV-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { // SPIRV-NEXT: [[ENTRY:.*:]] @@ -364,6 +471,43 @@ void fi2a(int *i) { // AMDGCN-NEXT: store atomic i32 [[TMP11]], ptr [[TMP10]] syncscope("singlethread") monotonic, align 4 // AMDGCN-NEXT: ret void // +// NVPTX-LABEL: define hidden void @fi2b( +// NVPTX-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { +// NVPTX-NEXT: [[ENTRY:.*:]] +// NVPTX-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 +// NVPTX-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4 +// NVPTX-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4 +// NVPTX-NEXT: [[DOTATOMICTMP2:%.*]] = alloca i32, align 4 +// NVPTX-NEXT: [[DOTATOMICTMP3:%.*]] = alloca i32, align 4 +// NVPTX-NEXT: [[DOTATOMICTMP4:%.*]] = alloca i32, align 4 +// NVPTX-NEXT: [[DOTATOMICTMP5:%.*]] = alloca i32, align 4 +// NVPTX-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 +// NVPTX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// NVPTX-NEXT: store i32 1, ptr [[DOTATOMICTMP]], align 4 +// NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP]], align 4 +// NVPTX-NEXT: store atomic i32 [[TMP1]], ptr [[TMP0]] monotonic, align 4 +// NVPTX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// NVPTX-NEXT: store i32 1, ptr [[DOTATOMICTMP1]], align 4 +// NVPTX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTATOMICTMP1]], align 4 +// NVPTX-NEXT: store atomic i32 [[TMP3]], ptr [[TMP2]] syncscope("device") monotonic, align 4 +// NVPTX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// NVPTX-NEXT: store i32 1, ptr [[DOTATOMICTMP2]], align 4 +// NVPTX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTATOMICTMP2]], align 4 +// NVPTX-NEXT: store atomic i32 [[TMP5]], ptr [[TMP4]] syncscope("block") monotonic, align 4 +// NVPTX-NEXT: [[TMP6:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// NVPTX-NEXT: store i32 1, ptr [[DOTATOMICTMP3]], align 4 +// NVPTX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTATOMICTMP3]], align 4 +// NVPTX-NEXT: store atomic i32 [[TMP7]], ptr [[TMP6]] syncscope("cluster") monotonic, align 4 +// NVPTX-NEXT: [[TMP8:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// NVPTX-NEXT: store i32 1, ptr [[DOTATOMICTMP4]], align 4 +// NVPTX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTATOMICTMP4]], align 4 +// NVPTX-NEXT: store atomic i32 [[TMP9]], ptr [[TMP8]] syncscope("block") monotonic, align 4 +// NVPTX-NEXT: [[TMP10:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// NVPTX-NEXT: store i32 1, ptr [[DOTATOMICTMP5]], align 4 +// NVPTX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTATOMICTMP5]], align 4 +// NVPTX-NEXT: store atomic i32 [[TMP11]], ptr [[TMP10]] syncscope("singlethread") monotonic, align 4 +// NVPTX-NEXT: ret void +// // SPIRV-LABEL: define hidden spir_func void @fi2b( // SPIRV-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { // SPIRV-NEXT: [[ENTRY:.*:]] @@ -472,7 +616,7 @@ void fi2b(int *i) { // AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP_ASCAST]], align 4 // AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 -// AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP1]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3:![0-9]+]], !amdgpu.no.remote.memory [[META3]] +// AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP1]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META2:![0-9]+]], !amdgpu.no.remote.memory [[META2]] // AMDGCN_CL_DEF-NEXT: store i32 [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 // AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4 // AMDGCN_CL_DEF-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 @@ -480,7 +624,7 @@ void fi2b(int *i) { // AMDGCN_CL_DEF-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP1_ASCAST]], align 4 // AMDGCN_CL_DEF-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4 -// AMDGCN_CL_DEF-NEXT: [[TMP7:%.*]] = atomicrmw sub ptr [[TMP5]], i32 [[TMP6]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] +// AMDGCN_CL_DEF-NEXT: [[TMP7:%.*]] = atomicrmw sub ptr [[TMP5]], i32 [[TMP6]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META2]], !amdgpu.no.remote.memory [[META2]] // AMDGCN_CL_DEF-NEXT: store i32 [[TMP7]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4 // AMDGCN_CL_DEF-NEXT: [[TMP8:%.*]] = load i32, ptr [[ATOMIC_TEMP2_ASCAST]], align 4 // AMDGCN_CL_DEF-NEXT: [[TMP9:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 @@ -488,7 +632,7 @@ void fi2b(int *i) { // AMDGCN_CL_DEF-NEXT: [[TMP10:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP3_ASCAST]], align 4 // AMDGCN_CL_DEF-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTATOMICTMP3_ASCAST]], align 4 -// AMDGCN_CL_DEF-NEXT: [[TMP12:%.*]] = atomicrmw and ptr [[TMP10]], i32 [[TMP11]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] +// AMDGCN_CL_DEF-NEXT: [[TMP12:%.*]] = atomicrmw and ptr [[TMP10]], i32 [[TMP11]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META2]], !amdgpu.no.remote.memory [[META2]] // AMDGCN_CL_DEF-NEXT: store i32 [[TMP12]], ptr [[ATOMIC_TEMP4_ASCAST]], align 4 // AMDGCN_CL_DEF-NEXT: [[TMP13:%.*]] = load i32, ptr [[ATOMIC_TEMP4_ASCAST]], align 4 // AMDGCN_CL_DEF-NEXT: [[TMP14:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 @@ -496,7 +640,7 @@ void fi2b(int *i) { // AMDGCN_CL_DEF-NEXT: [[TMP15:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP5_ASCAST]], align 4 // AMDGCN_CL_DEF-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTATOMICTMP5_ASCAST]], align 4 -// AMDGCN_CL_DEF-NEXT: [[TMP17:%.*]] = atomicrmw or ptr [[TMP15]], i32 [[TMP16]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] +// AMDGCN_CL_DEF-NEXT: [[TMP17:%.*]] = atomicrmw or ptr [[TMP15]], i32 [[TMP16]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META2]], !amdgpu.no.remote.memory [[META2]] // AMDGCN_CL_DEF-NEXT: store i32 [[TMP17]], ptr [[ATOMIC_TEMP6_ASCAST]], align 4 // AMDGCN_CL_DEF-NEXT: [[TMP18:%.*]] = load i32, ptr [[ATOMIC_TEMP6_ASCAST]], align 4 // AMDGCN_CL_DEF-NEXT: [[TMP19:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 @@ -504,7 +648,7 @@ void fi2b(int *i) { // AMDGCN_CL_DEF-NEXT: [[TMP20:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 8 // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP7_ASCAST]], align 4 // AMDGCN_CL_DEF-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTATOMICTMP7_ASCAST]], align 4 -// AMDGCN_CL_DEF-NEXT: [[TMP22:%.*]] = atomicrmw xor ptr [[TMP20]], i32 [[TMP21]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] +// AMDGCN_CL_DEF-NEXT: [[TMP22:%.*]] = atomicrmw xor ptr [[TMP20]], i32 [[TMP21]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META2]], !amdgpu.no.remote.memory [[META2]] // AMDGCN_CL_DEF-NEXT: store i32 [[TMP22]], ptr [[ATOMIC_TEMP8_ASCAST]], align 4 // AMDGCN_CL_DEF-NEXT: [[TMP23:%.*]] = load i32, ptr [[ATOMIC_TEMP8_ASCAST]], align 4 // AMDGCN_CL_DEF-NEXT: [[TMP24:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 8 @@ -512,7 +656,7 @@ void fi2b(int *i) { // AMDGCN_CL_DEF-NEXT: [[TMP25:%.*]] = load ptr, ptr [[F_ADDR_ASCAST]], align 8 // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP9_ASCAST]], align 4 // AMDGCN_CL_DEF-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTATOMICTMP9_ASCAST]], align 4 -// AMDGCN_CL_DEF-NEXT: [[TMP27:%.*]] = atomicrmw nand ptr [[TMP25]], i32 [[TMP26]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] +// AMDGCN_CL_DEF-NEXT: [[TMP27:%.*]] = atomicrmw nand ptr [[TMP25]], i32 [[TMP26]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META2]], !amdgpu.no.remote.memory [[META2]] // AMDGCN_CL_DEF-NEXT: store i32 [[TMP27]], ptr [[ATOMIC_TEMP10_ASCAST]], align 4 // AMDGCN_CL_DEF-NEXT: [[TMP28:%.*]] = load i32, ptr [[ATOMIC_TEMP10_ASCAST]], align 4 // AMDGCN_CL_DEF-NEXT: [[TMP29:%.*]] = load ptr, ptr [[F_ADDR_ASCAST]], align 8 @@ -520,7 +664,7 @@ void fi2b(int *i) { // AMDGCN_CL_DEF-NEXT: [[TMP30:%.*]] = load ptr, ptr [[G_ADDR_ASCAST]], align 8 // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP11_ASCAST]], align 4 // AMDGCN_CL_DEF-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTATOMICTMP11_ASCAST]], align 4 -// AMDGCN_CL_DEF-NEXT: [[TMP32:%.*]] = atomicrmw min ptr [[TMP30]], i32 [[TMP31]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] +// AMDGCN_CL_DEF-NEXT: [[TMP32:%.*]] = atomicrmw min ptr [[TMP30]], i32 [[TMP31]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META2]], !amdgpu.no.remote.memory [[META2]] // AMDGCN_CL_DEF-NEXT: store i32 [[TMP32]], ptr [[ATOMIC_TEMP12_ASCAST]], align 4 // AMDGCN_CL_DEF-NEXT: [[TMP33:%.*]] = load i32, ptr [[ATOMIC_TEMP12_ASCAST]], align 4 // AMDGCN_CL_DEF-NEXT: [[TMP34:%.*]] = load ptr, ptr [[G_ADDR_ASCAST]], align 8 @@ -528,7 +672,7 @@ void fi2b(int *i) { // AMDGCN_CL_DEF-NEXT: [[TMP35:%.*]] = load ptr, ptr [[H_ADDR_ASCAST]], align 8 // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP13_ASCAST]], align 4 // AMDGCN_CL_DEF-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTATOMICTMP13_ASCAST]], align 4 -// AMDGCN_CL_DEF-NEXT: [[TMP37:%.*]] = atomicrmw max ptr [[TMP35]], i32 [[TMP36]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/184737 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
