[PATCH] D38113: OpenCL: Assume functions are convergent

Matt Arsenault via Phabricator via cfe-commits Thu, 05 Oct 2017 11:16:54 -0700

arsenm updated this revision to Diff 117855.
arsenm added a comment.

Check noduplicate



https://reviews.llvm.org/D38113

Files:
  include/clang/Basic/LangOptions.h
  lib/CodeGen/CGCall.cpp
  test/CodeGenOpenCL/amdgpu-attrs.cl
  test/CodeGenOpenCL/convergent.cl

Index: test/CodeGenOpenCL/convergent.cl
===================================================================
--- test/CodeGenOpenCL/convergent.cl
+++ test/CodeGenOpenCL/convergent.cl
@@ -1,9 +1,19 @@
-// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - | opt -instnamer -S | FileCheck %s
+// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - | opt -instnamer -S | FileCheck -enable-var-scope %s
+
+// This is initially assumed convergent, but can be deduced to not require it.
+
+// CHECK-LABEL: define spir_func void @non_convfun() local_unnamed_addr #0
+// CHECK: ret void
+__attribute__((noinline))
+void non_convfun(void) {
+  volatile int* p;
+  *p = 0;
+}
 
 void convfun(void) __attribute__((convergent));
-void non_convfun(void);
 void nodupfun(void) __attribute__((noduplicate));
 
+// External functions should be assumed convergent.
 void f(void);
 void g(void);
 
@@ -17,19 +27,23 @@
 //      non_convfun();
 //    }
 //
-// CHECK: define spir_func void @test_merge_if(i32 %[[a:.+]])
-// CHECK: %[[tobool:.+]] = icmp eq i32 %[[a]], 0
+// CHECK-LABEL: define spir_func void @test_merge_if(i32 %a) local_unnamed_addr #1 {
+// CHECK: %[[tobool:.+]] = icmp eq i32 %a, 0
 // CHECK: br i1 %[[tobool]], label %[[if_end3_critedge:.+]], label %[[if_then:.+]]
+
 // CHECK: [[if_then]]:
 // CHECK: tail call spir_func void @f()
 // CHECK: tail call spir_func void @non_convfun()
 // CHECK: tail call spir_func void @g()
+
 // CHECK: br label %[[if_end3:.+]]
+
 // CHECK: [[if_end3_critedge]]:
 // CHECK: tail call spir_func void @non_convfun()
 // CHECK: br label %[[if_end3]]
+
 // CHECK: [[if_end3]]:
-// CHECK-LABEL: ret void
+// CHECK: ret void
 
 void test_merge_if(int a) {
   if (a) {
@@ -41,22 +55,22 @@
   }
 }
 
-// CHECK-DAG: declare spir_func void @f()
-// CHECK-DAG: declare spir_func void @non_convfun()
-// CHECK-DAG: declare spir_func void @g()
+// CHECK-DAG: declare spir_func void @f() local_unnamed_addr #2
+// CHECK-DAG: declare spir_func void @g() local_unnamed_addr #2
+
 
 // Test two if's are not merged.
-// CHECK: define spir_func void @test_no_merge_if(i32 %[[a:.+]])
-// CHECK:  %[[tobool:.+]] = icmp eq i32 %[[a]], 0
+// CHECK-LABEL: define spir_func void @test_no_merge_if(i32 %a) local_unnamed_addr #1
+// CHECK:  %[[tobool:.+]] = icmp eq i32 %a, 0
 // CHECK: br i1 %[[tobool]], label %[[if_end:.+]], label %[[if_then:.+]]
 // CHECK: [[if_then]]:
 // CHECK: tail call spir_func void @f()
 // CHECK-NOT: call spir_func void @convfun()
 // CHECK-NOT: call spir_func void @g()
 // CHECK: br label %[[if_end]]
 // CHECK: [[if_end]]:
 // CHECK:  %[[tobool_pr:.+]] = phi i1 [ true, %[[if_then]] ], [ false, %{{.+}} ]
-// CHECK:  tail call spir_func void @convfun() #[[attr5:.+]]
+// CHECK:  tail call spir_func void @convfun() #[[attr4:.+]]
 // CHECK:  br i1 %[[tobool_pr]], label %[[if_then2:.+]], label %[[if_end3:.+]]
 // CHECK: [[if_then2]]:
 // CHECK: tail call spir_func void @g()
@@ -74,20 +88,20 @@
   }
 }
 
-// CHECK: declare spir_func void @convfun(){{[^#]*}} #[[attr2:[0-9]+]]
+// CHECK: declare spir_func void @convfun(){{[^#]*}} #2
 
 // Test loop is unrolled for convergent function.
-// CHECK-LABEL: define spir_func void @test_unroll()
-// CHECK:  tail call spir_func void @convfun() #[[attr5:[0-9]+]]
-// CHECK:  tail call spir_func void @convfun() #[[attr5]]
-// CHECK:  tail call spir_func void @convfun() #[[attr5]]
-// CHECK:  tail call spir_func void @convfun() #[[attr5]]
-// CHECK:  tail call spir_func void @convfun() #[[attr5]]
-// CHECK:  tail call spir_func void @convfun() #[[attr5]]
-// CHECK:  tail call spir_func void @convfun() #[[attr5]]
-// CHECK:  tail call spir_func void @convfun() #[[attr5]]
-// CHECK:  tail call spir_func void @convfun() #[[attr5]]
-// CHECK:  tail call spir_func void @convfun() #[[attr5]]
+// CHECK-LABEL: define spir_func void @test_unroll() local_unnamed_addr #1
+// CHECK:  tail call spir_func void @convfun() #[[attr4:[0-9]+]]
+// CHECK:  tail call spir_func void @convfun() #[[attr4]]
+// CHECK:  tail call spir_func void @convfun() #[[attr4]]
+// CHECK:  tail call spir_func void @convfun() #[[attr4]]
+// CHECK:  tail call spir_func void @convfun() #[[attr4]]
+// CHECK:  tail call spir_func void @convfun() #[[attr4]]
+// CHECK:  tail call spir_func void @convfun() #[[attr4]]
+// CHECK:  tail call spir_func void @convfun() #[[attr4]]
+// CHECK:  tail call spir_func void @convfun() #[[attr4]]
+// CHECK:  tail call spir_func void @convfun() #[[attr4]]
 // CHECK-LABEL:  ret void
 
 void test_unroll() {
@@ -101,7 +115,7 @@
 // CHECK: [[for_cond_cleanup:.+]]:
 // CHECK:  ret void
 // CHECK: [[for_body]]:
-// CHECK:  tail call spir_func void @nodupfun() #[[attr6:[0-9]+]]
+// CHECK:  tail call spir_func void @nodupfun() #[[attr5:[0-9]+]]
 // CHECK-NOT: call spir_func void @nodupfun()
 // CHECK:  br i1 %{{.+}}, label %[[for_body]], label %[[for_cond_cleanup]]
 
@@ -112,7 +126,9 @@
 
 // CHECK: declare spir_func void @nodupfun(){{[^#]*}} #[[attr3:[0-9]+]]
 
-// CHECK-DAG: attributes #[[attr2]] = { {{[^}]*}}convergent{{[^}]*}} }
-// CHECK-DAG: attributes #[[attr3]] = { {{[^}]*}}noduplicate{{[^}]*}} }
-// CHECK-DAG: attributes #[[attr5]] = { {{[^}]*}}convergent{{[^}]*}} }
-// CHECK-DAG: attributes #[[attr6]] = { {{[^}]*}}noduplicate{{[^}]*}} }
+// CHECK: attributes #0 = { noinline norecurse nounwind "
+// CHECK: attributes #1 = { {{[^}]*}}convergent{{[^}]*}} }
+// CHECK: attributes #2 = { {{[^}]*}}convergent{{[^}]*}} }
+// CHECK: attributes #3 = { {{[^}]*}}convergent noduplicate{{[^}]*}} }
+// CHECK: attributes #4 = { {{[^}]*}}convergent{{[^}]*}} }
+// CHECK: attributes #5 = { {{[^}]*}}convergent noduplicate{{[^}]*}} }
Index: test/CodeGenOpenCL/amdgpu-attrs.cl
===================================================================
--- test/CodeGenOpenCL/amdgpu-attrs.cl
+++ test/CodeGenOpenCL/amdgpu-attrs.cl
@@ -151,28 +151,28 @@
 // CHECK-NOT: "amdgpu-num-sgpr"="0"
 // CHECK-NOT: "amdgpu-num-vgpr"="0"
 
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_64_64]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="64,64"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_16_128]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="16,128"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { noinline nounwind optnone "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { noinline nounwind optnone "amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[NUM_SGPR_32]] = { noinline nounwind optnone "amdgpu-num-sgpr"="32"
-// CHECK-DAG: attributes [[NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-num-vgpr"="64"
-
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_SGPR_32]] = { noinline nounwind optnone "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_SGPR_32]] = { noinline nounwind optnone "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[NUM_SGPR_32_NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64"
-
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
-
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_64_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="64,64"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_16_128]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="16,128"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { convergent noinline nounwind optnone "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-num-sgpr"="32"
+// CHECK-DAG: attributes [[NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-num-vgpr"="64"
+
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64"
+
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
+
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
Index: lib/CodeGen/CGCall.cpp
===================================================================
--- lib/CodeGen/CGCall.cpp
+++ lib/CodeGen/CGCall.cpp
@@ -1750,13 +1750,16 @@
       FuncAttrs.addAttribute("backchain");
   }
 
-  if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
-    // Conservatively, mark all functions and calls in CUDA as convergent
-    // (meaning, they may call an intrinsically convergent op, such as
-    // __syncthreads(), and so can't have certain optimizations applied around
-    // them).  LLVM will remove this attribute where it safely can.
+  if (getLangOpts().assumeFunctionsAreConvergent()) {
+    // Conservatively, mark all functions and calls in CUDA and OpenCL as
+    // convergent (meaning, they may call an intrinsically convergent op, such
+    // as __syncthreads() / barrier(), and so can't have certain optimizations
+    // applied around them).  LLVM will remove this attribute where it safely
+    // can.
     FuncAttrs.addAttribute(llvm::Attribute::Convergent);
+  }
 
+  if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
     // Exceptions aren't supported in CUDA device code.
     FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
 
Index: include/clang/Basic/LangOptions.h
===================================================================
--- include/clang/Basic/LangOptions.h
+++ include/clang/Basic/LangOptions.h
@@ -197,6 +197,10 @@
   bool allowsNonTrivialObjCLifetimeQualifiers() const {
     return ObjCAutoRefCount || ObjCWeak;
   }
+
+  bool assumeFunctionsAreConvergent() const {
+    return (CUDA && CUDAIsDevice) || OpenCL;
+  }
 };
 
 /// \brief Floating point control options

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D38113: OpenCL: Assume functions are convergent

Reply via email to