This revision was automatically updated to reflect the committed changes. Closed by commit rL261779: [CUDA] Mark all CUDA device-side function defs, decls, and calls as convergent. (authored by jlebar).
Changed prior to commit: http://reviews.llvm.org/D17056?vs=48261&id=48979#toc Repository: rL LLVM http://reviews.llvm.org/D17056 Files: cfe/trunk/lib/CodeGen/CGCall.cpp cfe/trunk/test/CodeGenCUDA/convergent.cu cfe/trunk/test/CodeGenCUDA/device-var-init.cu Index: cfe/trunk/lib/CodeGen/CGCall.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CGCall.cpp +++ cfe/trunk/lib/CodeGen/CGCall.cpp @@ -1595,6 +1595,14 @@ } } + if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { + // Conservatively, mark all functions and calls in CUDA as convergent + // (meaning, they may call an intrinsically convergent op, such as + // __syncthreads(), and so can't have certain optimizations applied around + // them). LLVM will remove this attribute where it safely can. + FuncAttrs.addAttribute(llvm::Attribute::Convergent); + } + ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI); QualType RetTy = FI.getReturnType(); Index: cfe/trunk/test/CodeGenCUDA/convergent.cu =================================================================== --- cfe/trunk/test/CodeGenCUDA/convergent.cu +++ cfe/trunk/test/CodeGenCUDA/convergent.cu @@ -0,0 +1,39 @@ +// REQUIRES: x86-registered-target +// REQUIRES: nvptx-registered-target + +// RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -emit-llvm \ +// RUN: -disable-llvm-passes -o - %s | FileCheck -check-prefix DEVICE %s + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \ +// RUN: -disable-llvm-passes -o - %s | \ +// RUN: FileCheck -check-prefix HOST %s + +#include "Inputs/cuda.h" + +// DEVICE: Function Attrs: +// DEVICE-SAME: convergent +// DEVICE-NEXT: define void @_Z3foov +__device__ void foo() {} + +// HOST: Function Attrs: +// HOST-NOT: convergent +// HOST-NEXT: define void @_Z3barv +// DEVICE: Function Attrs: +// DEVICE-SAME: convergent +// DEVICE-NEXT: define void @_Z3barv +__host__ __device__ void baz(); +__host__ __device__ void bar() { + // DEVICE: call void @_Z3bazv() [[CALL_ATTR:#[0-9]+]] + baz(); +} + +// DEVICE: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]] +// DEVICE: attributes [[BAZ_ATTR]] = { +// DEVICE-SAME: convergent +// DEVICE-SAME: } +// DEVICE: attributes [[CALL_ATTR]] = { convergent } + +// HOST: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]] +// HOST: attributes [[BAZ_ATTR]] = { +// HOST-NOT: convergent +// NOST-SAME: } Index: cfe/trunk/test/CodeGenCUDA/device-var-init.cu =================================================================== --- cfe/trunk/test/CodeGenCUDA/device-var-init.cu +++ cfe/trunk/test/CodeGenCUDA/device-var-init.cu @@ -382,7 +382,7 @@ // CHECK: call void @_ZN4NETCC1IJEEEDpT_(%struct.NETC* %netc) // CHECK: call void @_ZN7EC_I_ECC1Ev(%struct.EC_I_EC* %ec_i_ec) // CHECK: call void @_ZN8EC_I_EC1C1Ev(%struct.EC_I_EC1* %ec_i_ec1) -// CHECK: call void @_ZN5T_V_TC1Ev(%struct.T_V_T* %t_v_t) #3 +// CHECK: call void @_ZN5T_V_TC1Ev(%struct.T_V_T* %t_v_t) // CHECK: call void @_ZN7T_B_NECC1Ev(%struct.T_B_NEC* %t_b_nec) // CHECK: call void @_ZN7T_F_NECC1Ev(%struct.T_F_NEC* %t_f_nec) // CHECK: call void @_ZN8T_FA_NECC1Ev(%struct.T_FA_NEC* %t_fa_nec)
Index: cfe/trunk/lib/CodeGen/CGCall.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CGCall.cpp +++ cfe/trunk/lib/CodeGen/CGCall.cpp @@ -1595,6 +1595,14 @@ } } + if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { + // Conservatively, mark all functions and calls in CUDA as convergent + // (meaning, they may call an intrinsically convergent op, such as + // __syncthreads(), and so can't have certain optimizations applied around + // them). LLVM will remove this attribute where it safely can. + FuncAttrs.addAttribute(llvm::Attribute::Convergent); + } + ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI); QualType RetTy = FI.getReturnType(); Index: cfe/trunk/test/CodeGenCUDA/convergent.cu =================================================================== --- cfe/trunk/test/CodeGenCUDA/convergent.cu +++ cfe/trunk/test/CodeGenCUDA/convergent.cu @@ -0,0 +1,39 @@ +// REQUIRES: x86-registered-target +// REQUIRES: nvptx-registered-target + +// RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -emit-llvm \ +// RUN: -disable-llvm-passes -o - %s | FileCheck -check-prefix DEVICE %s + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \ +// RUN: -disable-llvm-passes -o - %s | \ +// RUN: FileCheck -check-prefix HOST %s + +#include "Inputs/cuda.h" + +// DEVICE: Function Attrs: +// DEVICE-SAME: convergent +// DEVICE-NEXT: define void @_Z3foov +__device__ void foo() {} + +// HOST: Function Attrs: +// HOST-NOT: convergent +// HOST-NEXT: define void @_Z3barv +// DEVICE: Function Attrs: +// DEVICE-SAME: convergent +// DEVICE-NEXT: define void @_Z3barv +__host__ __device__ void baz(); +__host__ __device__ void bar() { + // DEVICE: call void @_Z3bazv() [[CALL_ATTR:#[0-9]+]] + baz(); +} + +// DEVICE: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]] +// DEVICE: attributes [[BAZ_ATTR]] = { +// DEVICE-SAME: convergent +// DEVICE-SAME: } +// DEVICE: attributes [[CALL_ATTR]] = { convergent } + +// HOST: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]] +// HOST: attributes [[BAZ_ATTR]] = { +// HOST-NOT: convergent +// NOST-SAME: } Index: cfe/trunk/test/CodeGenCUDA/device-var-init.cu =================================================================== --- cfe/trunk/test/CodeGenCUDA/device-var-init.cu +++ cfe/trunk/test/CodeGenCUDA/device-var-init.cu @@ -382,7 +382,7 @@ // CHECK: call void @_ZN4NETCC1IJEEEDpT_(%struct.NETC* %netc) // CHECK: call void @_ZN7EC_I_ECC1Ev(%struct.EC_I_EC* %ec_i_ec) // CHECK: call void @_ZN8EC_I_EC1C1Ev(%struct.EC_I_EC1* %ec_i_ec1) -// CHECK: call void @_ZN5T_V_TC1Ev(%struct.T_V_T* %t_v_t) #3 +// CHECK: call void @_ZN5T_V_TC1Ev(%struct.T_V_T* %t_v_t) // CHECK: call void @_ZN7T_B_NECC1Ev(%struct.T_B_NEC* %t_b_nec) // CHECK: call void @_ZN7T_F_NECC1Ev(%struct.T_F_NEC* %t_f_nec) // CHECK: call void @_ZN8T_FA_NECC1Ev(%struct.T_FA_NEC* %t_fa_nec)
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits