[PATCH] D17056: Mark all CUDA device-side function defs and decls as convergent.

Justin Lebar via cfe-commits Tue, 09 Feb 2016 16:19:46 -0800

jlebar created this revision.
jlebar added a reviewer: majnemer.
jlebar added subscribers: tra, echristo, jhen, cfe-commits.


This is important for e.g. the following case:

  void sync() { __syncthreads(); }
  void foo() {
    do_something();
    sync();
    do_something_else():
  }

Without this change, if the optimizer does not inline sync() (which it
won't because __syncthreads is also marked as noduplicate, for now
anyway), it is free to perform optimizations on sync() that it would not
be able to perform on __syncthreads(), because sync() is not marked as
convergent.

This chagne is conservative; the optimizer will remove these attrs where
it can, see r260318, r260319.

http://reviews.llvm.org/D17056

Files:
  lib/CodeGen/CodeGenModule.cpp
  test/CodeGenCUDA/convergent.cu

Index: test/CodeGenCUDA/convergent.cu
===================================================================
--- /dev/null
+++ test/CodeGenCUDA/convergent.cu
@@ -0,0 +1,34 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -emit-llvm \
+// RUN:   -o - %s | FileCheck -check-prefix DEVICE %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -o - %s | \
+// RUN:  FileCheck -check-prefix HOST %s
+
+#include "Inputs/cuda.h"
+
+// DEVICE: Function Attrs:
+// DEVICE-SAME: convergent
+// DEVICE-NEXT: define void @_Z3foov
+__device__ void foo() {}
+
+// HOST: Function Attrs:
+// HOST-NOT: convergent
+// HOST-NEXT: define void @_Z3barv
+// DEVICE: Function Attrs:
+// DEVICE-SAME: convergent
+// DEVICE-NEXT: define void @_Z3barv
+__host__ __device__ void baz();
+__host__ __device__ void bar() { baz(); }
+
+// DEVICE: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]]
+// DEVICE: attributes [[BAZ_ATTR]] = {
+// DEVICE-SAME: convergent
+// DEVICE-SAME: }
+
+// HOST: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]]
+// HOST: attributes [[BAZ_ATTR]] = {
+// HOST-NOT: convergent
+// NOST-SAME: }
Index: lib/CodeGen/CodeGenModule.cpp
===================================================================
--- lib/CodeGen/CodeGenModule.cpp
+++ lib/CodeGen/CodeGenModule.cpp
@@ -1875,6 +1875,14 @@
                                              B));
   }
 
+  if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
+    // Conservatively, mark all functions in CUDA as convergent (meaning, they
+    // may call an intrinsicly convergent op, such as __syncthreads(), and so
+    // can't have certain optimizations applied around them).  LLVM will remove
+    // this attribute where it safely can.
+    F->addFnAttr(llvm::Attribute::Convergent);
+  }
+
   if (!DontDefer) {
     // All MSVC dtors other than the base dtor are linkonce_odr and delegate to
     // each other bottoming out with the base dtor.  Therefore we emit non-base

Index: test/CodeGenCUDA/convergent.cu
===================================================================
--- /dev/null
+++ test/CodeGenCUDA/convergent.cu
@@ -0,0 +1,34 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -emit-llvm \
+// RUN:   -o - %s | FileCheck -check-prefix DEVICE %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -o - %s | \
+// RUN:  FileCheck -check-prefix HOST %s
+
+#include "Inputs/cuda.h"
+
+// DEVICE: Function Attrs:
+// DEVICE-SAME: convergent
+// DEVICE-NEXT: define void @_Z3foov
+__device__ void foo() {}
+
+// HOST: Function Attrs:
+// HOST-NOT: convergent
+// HOST-NEXT: define void @_Z3barv
+// DEVICE: Function Attrs:
+// DEVICE-SAME: convergent
+// DEVICE-NEXT: define void @_Z3barv
+__host__ __device__ void baz();
+__host__ __device__ void bar() { baz(); }
+
+// DEVICE: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]]
+// DEVICE: attributes [[BAZ_ATTR]] = {
+// DEVICE-SAME: convergent
+// DEVICE-SAME: }
+
+// HOST: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]]
+// HOST: attributes [[BAZ_ATTR]] = {
+// HOST-NOT: convergent
+// NOST-SAME: }
Index: lib/CodeGen/CodeGenModule.cpp
===================================================================
--- lib/CodeGen/CodeGenModule.cpp
+++ lib/CodeGen/CodeGenModule.cpp
@@ -1875,6 +1875,14 @@
                                              B));
   }
 
+  if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
+    // Conservatively, mark all functions in CUDA as convergent (meaning, they
+    // may call an intrinsicly convergent op, such as __syncthreads(), and so
+    // can't have certain optimizations applied around them).  LLVM will remove
+    // this attribute where it safely can.
+    F->addFnAttr(llvm::Attribute::Convergent);
+  }
+
   if (!DontDefer) {
     // All MSVC dtors other than the base dtor are linkonce_odr and delegate to
     // each other bottoming out with the base dtor.  Therefore we emit non-base

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D17056: Mark all CUDA device-side function defs and decls as convergent.

Reply via email to