[clang] 508b066 - [Remarks] [AMDGPU] Emit optimization remarks for atomics generating hardware instructions

Anshil Gandhi via cfe-commits Thu, 19 Aug 2021 19:51:39 -0700

Author: Anshil Gandhi
Date: 2021-08-19T20:51:19-06:00
New Revision: 508b06699a396cc6f2f2602dab350860cb69f087


URL: 
https://github.com/llvm/llvm-project/commit/508b06699a396cc6f2f2602dab350860cb69f087
DIFF: 
https://github.com/llvm/llvm-project/commit/508b06699a396cc6f2f2602dab350860cb69f087.diff

LOG: [Remarks] [AMDGPU] Emit optimization remarks for atomics generating 
hardware instructions

Produce remarks when atomic instructions are expanded into hardware instructions
in SIISelLowering.cpp. Currently, these remarks are only emitted for atomic fadd
instructions.

Differential Revision: https://reviews.llvm.org/D108150

Added: 
    clang/test/CodeGenOpenCL/atomics-cas-remarks-gfx90a.cl
    clang/test/CodeGenOpenCL/atomics-unsafe-hw-remarks-gfx90a.cl
    llvm/test/CodeGen/AMDGPU/atomics-cas-remarks-gfx90a.ll
    llvm/test/CodeGen/AMDGPU/atomics-hw-remarks-gfx90a.ll

Modified: 
    llvm/lib/CodeGen/AtomicExpandPass.cpp
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Removed: 
    clang/test/CodeGenOpenCL/atomics-remarks-gfx90a.cl
    llvm/test/CodeGen/AMDGPU/atomics-remarks-gfx90a.ll


################################################################################
diff  --git a/clang/test/CodeGenOpenCL/atomics-remarks-gfx90a.cl 
b/clang/test/CodeGenOpenCL/atomics-cas-remarks-gfx90a.cl
similarity index 100%
rename from clang/test/CodeGenOpenCL/atomics-remarks-gfx90a.cl
rename to clang/test/CodeGenOpenCL/atomics-cas-remarks-gfx90a.cl

diff  --git a/clang/test/CodeGenOpenCL/atomics-unsafe-hw-remarks-gfx90a.cl 
b/clang/test/CodeGenOpenCL/atomics-unsafe-hw-remarks-gfx90a.cl
new file mode 100644
index 0000000000000..ea3324126c209
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/atomics-unsafe-hw-remarks-gfx90a.cl
@@ -0,0 +1,44 @@
+// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple=amdgcn-amd-amdhsa -target-cpu 
gfx90a \
+// RUN:     -Rpass=si-lower -munsafe-fp-atomics %s -S -emit-llvm -o - 2>&1 | \
+// RUN:     FileCheck %s --check-prefix=GFX90A-HW
+
+// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple=amdgcn-amd-amdhsa -target-cpu 
gfx90a \
+// RUN:     -Rpass=si-lower -munsafe-fp-atomics %s -S -o - 2>&1 | \
+// RUN:     FileCheck %s --check-prefix=GFX90A-HW-REMARK
+
+
+// REQUIRES: amdgpu-registered-target
+
+typedef enum memory_order {
+  memory_order_relaxed = __ATOMIC_RELAXED,
+  memory_order_acquire = __ATOMIC_ACQUIRE,
+  memory_order_release = __ATOMIC_RELEASE,
+  memory_order_acq_rel = __ATOMIC_ACQ_REL,
+  memory_order_seq_cst = __ATOMIC_SEQ_CST
+} memory_order;
+
+typedef enum memory_scope {
+  memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
+  memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
+  memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
+  memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
+#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)
+  memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
+#endif
+} memory_scope;
+
+// GFX90A-HW-REMARK: Hardware instruction generated for atomic fadd operation 
at memory scope workgroup-one-as due to an unsafe request. [-Rpass=si-lower]
+// GFX90A-HW-REMARK: Hardware instruction generated for atomic fadd operation 
at memory scope agent-one-as due to an unsafe request. [-Rpass=si-lower]
+// GFX90A-HW-REMARK: Hardware instruction generated for atomic fadd operation 
at memory scope wavefront-one-as due to an unsafe request. [-Rpass=si-lower]
+// GFX90A-HW-REMARK: global_atomic_add_f32 v0, v[0:1], v2, off glc
+// GFX90A-HW-REMARK: global_atomic_add_f32 v0, v[0:1], v2, off glc
+// GFX90A-HW-REMARK: global_atomic_add_f32 v0, v[0:1], v2, off glc
+// GFX90A-HW-LABEL: @atomic_unsafe_hw
+// GFX90A-HW:   atomicrmw fadd float addrspace(1)* %{{.*}}, float %{{.*}} 
syncscope("workgroup-one-as") monotonic, align 4
+// GFX90A-HW:   atomicrmw fadd float addrspace(1)* %{{.*}}, float %{{.*}} 
syncscope("agent-one-as") monotonic, align 4
+// GFX90A-HW:   atomicrmw fadd float addrspace(1)* %{{.*}}, float %{{.*}} 
syncscope("wavefront-one-as") monotonic, align 4
+void atomic_unsafe_hw(__global atomic_float *d, float a) {
+  float ret1 = __opencl_atomic_fetch_add(d, a, memory_order_relaxed, 
memory_scope_work_group);
+  float ret2 = __opencl_atomic_fetch_add(d, a, memory_order_relaxed, 
memory_scope_device);
+  float ret3 = __opencl_atomic_fetch_add(d, a, memory_order_relaxed, 
memory_scope_sub_group);
+}

diff  --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp 
b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 47cdd222702f2..1297f99698d8b 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -610,7 +610,7 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
                           : SSNs[AI->getSyncScopeID()];
       OptimizationRemarkEmitter ORE(AI->getFunction());
       ORE.emit([&]() {
-        return OptimizationRemark(DEBUG_TYPE, "Passed", AI->getFunction())
+        return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
                << "A compare and swap loop was generated for an atomic "
                << AI->getOperationName(AI->getOperation()) << " operation at "
                << MemScope << " memory scope";

diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp 
b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 9a81ada5830d9..faf38bb0b914b 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -19,6 +19,7 @@
 #include "SIRegisterInfo.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
@@ -12129,6 +12130,25 @@ static bool fpModeMatchesGlobalFPAtomicMode(const 
AtomicRMWInst *RMW) {
 
 TargetLowering::AtomicExpansionKind
 SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
+
+  auto ReportUnsafeHWInst = [&](TargetLowering::AtomicExpansionKind Kind) {
+    OptimizationRemarkEmitter ORE(RMW->getFunction());
+    LLVMContext &Ctx = RMW->getFunction()->getContext();
+    SmallVector<StringRef> SSNs;
+    Ctx.getSyncScopeNames(SSNs);
+    auto MemScope = SSNs[RMW->getSyncScopeID()].empty()
+                        ? "system"
+                        : SSNs[RMW->getSyncScopeID()];
+    ORE.emit([&]() {
+      return OptimizationRemark(DEBUG_TYPE, "Passed", RMW)
+             << "Hardware instruction generated for atomic "
+             << RMW->getOperationName(RMW->getOperation())
+             << " operation at memory scope " << MemScope
+             << " due to an unsafe request.";
+    });
+    return Kind;
+  };
+
   switch (RMW->getOperation()) {
   case AtomicRMWInst::FAdd: {
     Type *Ty = RMW->getType();
@@ -12163,13 +12183,13 @@ 
SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
             SSID == RMW->getContext().getOrInsertSyncScopeID("one-as"))
           return AtomicExpansionKind::CmpXChg;
 
-        return AtomicExpansionKind::None;
+        return ReportUnsafeHWInst(AtomicExpansionKind::None);
       }
 
       if (AS == AMDGPUAS::FLAT_ADDRESS)
         return AtomicExpansionKind::CmpXChg;
 
-      return RMW->use_empty() ? AtomicExpansionKind::None
+      return RMW->use_empty() ? ReportUnsafeHWInst(AtomicExpansionKind::None)
                               : AtomicExpansionKind::CmpXChg;
     }
 
@@ -12180,11 +12200,13 @@ 
SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
       if (!Ty->isDoubleTy())
         return AtomicExpansionKind::None;
 
-      return (fpModeMatchesGlobalFPAtomicMode(RMW) ||
-              RMW->getFunction()
-                      ->getFnAttribute("amdgpu-unsafe-fp-atomics")
-                      .getValueAsString() == "true")
-                 ? AtomicExpansionKind::None
+      if (fpModeMatchesGlobalFPAtomicMode(RMW))
+        return AtomicExpansionKind::None;
+
+      return RMW->getFunction()
+                         ->getFnAttribute("amdgpu-unsafe-fp-atomics")
+                         .getValueAsString() == "true"
+                 ? ReportUnsafeHWInst(AtomicExpansionKind::None)
                  : AtomicExpansionKind::CmpXChg;
     }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/atomics-remarks-gfx90a.ll 
b/llvm/test/CodeGen/AMDGPU/atomics-cas-remarks-gfx90a.ll
similarity index 100%
rename from llvm/test/CodeGen/AMDGPU/atomics-remarks-gfx90a.ll
rename to llvm/test/CodeGen/AMDGPU/atomics-cas-remarks-gfx90a.ll

diff  --git a/llvm/test/CodeGen/AMDGPU/atomics-hw-remarks-gfx90a.ll 
b/llvm/test/CodeGen/AMDGPU/atomics-hw-remarks-gfx90a.ll
new file mode 100644
index 0000000000000..35637cd9882a1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/atomics-hw-remarks-gfx90a.ll
@@ -0,0 +1,95 @@
+; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs 
--pass-remarks=si-lower \
+; RUN:      %s -o - 2>&1 | FileCheck %s --check-prefix=GFX90A-HW
+
+; GFX90A-HW: Hardware instruction generated for atomic fadd operation at 
memory scope system due to an unsafe request.
+; GFX90A-HW: Hardware instruction generated for atomic fadd operation at 
memory scope agent due to an unsafe request.
+; GFX90A-HW: Hardware instruction generated for atomic fadd operation at 
memory scope workgroup due to an unsafe request.
+; GFX90A-HW: Hardware instruction generated for atomic fadd operation at 
memory scope wavefront due to an unsafe request.
+; GFX90A-HW: Hardware instruction generated for atomic fadd operation at 
memory scope singlethread due to an unsafe request.
+; GFX90A-HW: Hardware instruction generated for atomic fadd operation at 
memory scope agent-one-as due to an unsafe request.
+; GFX90A-HW: Hardware instruction generated for atomic fadd operation at 
memory scope workgroup-one-as due to an unsafe request.
+; GFX90A-HW: Hardware instruction generated for atomic fadd operation at 
memory scope wavefront-one-as due to an unsafe request.
+; GFX90A-HW: Hardware instruction generated for atomic fadd operation at 
memory scope singlethread-one-as due to an unsafe request.
+
+; GFX90A-HW-LABEL: atomic_add_unsafe_hw:
+; GFX90A-HW:    ds_add_f64 v2, v[0:1]
+; GFX90A-HW:    s_endpgm
+define amdgpu_kernel void @atomic_add_unsafe_hw(double addrspace(3)* %ptr) #0 {
+main_body:
+  %ret = atomicrmw fadd double addrspace(3)* %ptr, double 4.0 seq_cst
+  ret void
+}
+
+; GFX90A-HW-LABEL: atomic_add_unsafe_hw_agent:
+; GFX90A-HW:    global_atomic_add_f32 v0, v1, s[2:3]
+; GFX90A-HW:    s_endpgm
+define amdgpu_kernel void @atomic_add_unsafe_hw_agent(float addrspace(1)* 
%ptr, float %val) #0 {
+main_body:
+  %ret = atomicrmw fadd float addrspace(1)* %ptr, float %val 
syncscope("agent") monotonic, align 4
+  ret void
+}
+
+; GFX90A-HW-LABEL: atomic_add_unsafe_hw_wg:
+; GFX90A-HW:    global_atomic_add_f32 v0, v1, s[2:3]
+; GFX90A-HW:    s_endpgm
+define amdgpu_kernel void @atomic_add_unsafe_hw_wg(float addrspace(1)* %ptr, 
float %val) #0 {
+main_body:
+  %ret = atomicrmw fadd float addrspace(1)* %ptr, float %val 
syncscope("workgroup") monotonic, align 4
+  ret void
+}
+
+; GFX90A-HW-LABEL: atomic_add_unsafe_hw_wavefront:
+; GFX90A-HW:    global_atomic_add_f32 v0, v1, s[2:3]
+; GFX90A-HW:    s_endpgm
+define amdgpu_kernel void @atomic_add_unsafe_hw_wavefront(float addrspace(1)* 
%ptr, float %val) #0 {
+main_body:
+  %ret = atomicrmw fadd float addrspace(1)* %ptr, float %val 
syncscope("wavefront") monotonic, align 4
+  ret void
+}
+
+; GFX90A-HW-LABEL: atomic_add_unsafe_hw_single_thread:
+; GFX90A-HW:    global_atomic_add_f32 v0, v1, s[2:3]
+; GFX90A-HW:    s_endpgm
+define amdgpu_kernel void @atomic_add_unsafe_hw_single_thread(float 
addrspace(1)* %ptr, float %val) #0 {
+main_body:
+  %ret = atomicrmw fadd float addrspace(1)* %ptr, float %val 
syncscope("singlethread") monotonic, align 4
+  ret void
+}
+
+; GFX90A-HW-LABEL: atomic_add_unsafe_hw_aoa:
+; GFX90A-HW:    global_atomic_add_f32 v0, v1, s[2:3]
+; GFX90A-HW:    s_endpgm
+define amdgpu_kernel void @atomic_add_unsafe_hw_aoa(float addrspace(1)* %ptr, 
float %val) #0 {
+main_body:
+  %ret = atomicrmw fadd float addrspace(1)* %ptr, float %val 
syncscope("agent-one-as") monotonic, align 4
+  ret void
+}
+
+; GFX90A-HW-LABEL: atomic_add_unsafe_hw_wgoa:
+; GFX90A-HW:    global_atomic_add_f32 v0, v1, s[2:3]
+; GFX90A-HW:    s_endpgm
+define amdgpu_kernel void @atomic_add_unsafe_hw_wgoa(float addrspace(1)* %ptr, 
float %val) #0 {
+main_body:
+  %ret = atomicrmw fadd float addrspace(1)* %ptr, float %val 
syncscope("workgroup-one-as") monotonic, align 4
+  ret void
+}
+
+; GFX90A-HW-LABEL: atomic_add_unsafe_hw_wfoa:
+; GFX90A-HW:    global_atomic_add_f32 v0, v1, s[2:3]
+; GFX90A-HW:    s_endpgm
+define amdgpu_kernel void @atomic_add_unsafe_hw_wfoa(float addrspace(1)* %ptr, 
float %val) #0 {
+main_body:
+  %ret = atomicrmw fadd float addrspace(1)* %ptr, float %val 
syncscope("wavefront-one-as") monotonic, align 4
+  ret void
+}
+
+; GFX90A-HW-LABEL: atomic_add_unsafe_hw_stoa:
+; GFX90A-HW:    global_atomic_add_f32 v0, v1, s[2:3]
+; GFX90A-HW:    s_endpgm
+define amdgpu_kernel void @atomic_add_unsafe_hw_stoa(float addrspace(1)* %ptr, 
float %val) #0 {
+main_body:
+  %ret = atomicrmw fadd float addrspace(1)* %ptr, float %val 
syncscope("singlethread-one-as") monotonic, align 4
+  ret void
+}
+
+attributes #0 = { "denormal-fp-math"="preserve-sign,preserve-sign" 
"amdgpu-unsafe-fp-atomics"="true" }


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] 508b066 - [Remarks] [AMDGPU] Emit optimization remarks for atomics generating hardware instructions

Reply via email to