jlebar created this revision. jlebar added a reviewer: majnemer. jlebar added subscribers: tra, echristo, jhen, cfe-commits.
This is important for e.g. the following case: void sync() { __syncthreads(); } void foo() { do_something(); sync(); do_something_else(): } Without this change, if the optimizer does not inline sync() (which it won't because __syncthreads is also marked as noduplicate, for now anyway), it is free to perform optimizations on sync() that it would not be able to perform on __syncthreads(), because sync() is not marked as convergent. This chagne is conservative; the optimizer will remove these attrs where it can, see r260318, r260319. http://reviews.llvm.org/D17056 Files: lib/CodeGen/CodeGenModule.cpp test/CodeGenCUDA/convergent.cu Index: test/CodeGenCUDA/convergent.cu =================================================================== --- /dev/null +++ test/CodeGenCUDA/convergent.cu @@ -0,0 +1,34 @@ +// REQUIRES: x86-registered-target +// REQUIRES: nvptx-registered-target + +// RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -emit-llvm \ +// RUN: -o - %s | FileCheck -check-prefix DEVICE %s + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -o - %s | \ +// RUN: FileCheck -check-prefix HOST %s + +#include "Inputs/cuda.h" + +// DEVICE: Function Attrs: +// DEVICE-SAME: convergent +// DEVICE-NEXT: define void @_Z3foov +__device__ void foo() {} + +// HOST: Function Attrs: +// HOST-NOT: convergent +// HOST-NEXT: define void @_Z3barv +// DEVICE: Function Attrs: +// DEVICE-SAME: convergent +// DEVICE-NEXT: define void @_Z3barv +__host__ __device__ void baz(); +__host__ __device__ void bar() { baz(); } + +// DEVICE: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]] +// DEVICE: attributes [[BAZ_ATTR]] = { +// DEVICE-SAME: convergent +// DEVICE-SAME: } + +// HOST: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]] +// HOST: attributes [[BAZ_ATTR]] = { +// HOST-NOT: convergent +// NOST-SAME: } Index: lib/CodeGen/CodeGenModule.cpp =================================================================== --- lib/CodeGen/CodeGenModule.cpp +++ lib/CodeGen/CodeGenModule.cpp @@ -1875,6 +1875,14 @@ B)); } + if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { + // Conservatively, mark all functions in CUDA as convergent (meaning, they + // may call an intrinsicly convergent op, such as __syncthreads(), and so + // can't have certain optimizations applied around them). LLVM will remove + // this attribute where it safely can. + F->addFnAttr(llvm::Attribute::Convergent); + } + if (!DontDefer) { // All MSVC dtors other than the base dtor are linkonce_odr and delegate to // each other bottoming out with the base dtor. Therefore we emit non-base
Index: test/CodeGenCUDA/convergent.cu =================================================================== --- /dev/null +++ test/CodeGenCUDA/convergent.cu @@ -0,0 +1,34 @@ +// REQUIRES: x86-registered-target +// REQUIRES: nvptx-registered-target + +// RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -emit-llvm \ +// RUN: -o - %s | FileCheck -check-prefix DEVICE %s + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -o - %s | \ +// RUN: FileCheck -check-prefix HOST %s + +#include "Inputs/cuda.h" + +// DEVICE: Function Attrs: +// DEVICE-SAME: convergent +// DEVICE-NEXT: define void @_Z3foov +__device__ void foo() {} + +// HOST: Function Attrs: +// HOST-NOT: convergent +// HOST-NEXT: define void @_Z3barv +// DEVICE: Function Attrs: +// DEVICE-SAME: convergent +// DEVICE-NEXT: define void @_Z3barv +__host__ __device__ void baz(); +__host__ __device__ void bar() { baz(); } + +// DEVICE: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]] +// DEVICE: attributes [[BAZ_ATTR]] = { +// DEVICE-SAME: convergent +// DEVICE-SAME: } + +// HOST: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]] +// HOST: attributes [[BAZ_ATTR]] = { +// HOST-NOT: convergent +// NOST-SAME: } Index: lib/CodeGen/CodeGenModule.cpp =================================================================== --- lib/CodeGen/CodeGenModule.cpp +++ lib/CodeGen/CodeGenModule.cpp @@ -1875,6 +1875,14 @@ B)); } + if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { + // Conservatively, mark all functions in CUDA as convergent (meaning, they + // may call an intrinsicly convergent op, such as __syncthreads(), and so + // can't have certain optimizations applied around them). LLVM will remove + // this attribute where it safely can. + F->addFnAttr(llvm::Attribute::Convergent); + } + if (!DontDefer) { // All MSVC dtors other than the base dtor are linkonce_odr and delegate to // each other bottoming out with the base dtor. Therefore we emit non-base
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits