Couldn't you just add an implicit UsedAttr when processing the CUDAGlobalAttr and LangOpts.CUDAIsDevice was set to true?
On Thu, Jul 30, 2015 at 3:48 PM, Artem Belevich <t...@google.com> wrote: > tra created this revision. > tra added reviewers: echristo, eliben. > tra added a subscriber: cfe-commits. > > Templated kernels that were instantiated from the host code would normally > be eliminated because they were never referenced on device side. > The patch adds __global__ functions to @llvm.used which prevents their > elimination. > > > http://reviews.llvm.org/D11666 > > Files: > lib/AST/ASTContext.cpp > lib/CodeGen/CodeGenModule.cpp > test/CodeGenCUDA/ptx-kernels.cu > > Index: test/CodeGenCUDA/ptx-kernels.cu > =================================================================== > --- test/CodeGenCUDA/ptx-kernels.cu > +++ test/CodeGenCUDA/ptx-kernels.cu > @@ -1,7 +1,16 @@ > +// Make sure that __global__ functions are emitted along with correct > +// annotations and are added to @llvm.used to prevent their elimination. > +// REQUIRES: nvptx-registered-target > +// > // RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -fcuda-is-device > -emit-llvm -o - | FileCheck %s > > #include "Inputs/cuda.h" > > +// Make sure that all __global__ functiona are added to @llvm.used > +// CHECK: @llvm.used = appending global > +// CHECK-SAME: @global_function > +// CHECK-SAME: @_Z16templated_kernelIiEvT_ > + > // CHECK-LABEL: define void @device_function > extern "C" > __device__ void device_function() {} > @@ -13,4 +22,10 @@ > device_function(); > } > > +// Make sure host-instantiated kernels are preserved on device side. > +template <typename T> __global__ void templated_kernel(T param) {} > +// CHECK-LABEL: define linkonce_odr void @_Z16templated_kernelIiEvT_ > +void host_function() { templated_kernel<<<0,0>>>(0); } > + > // CHECK: !{{[0-9]+}} = !{void ()* @global_function, !"kernel", i32 1} > +// CHECK: !{{[0-9]+}} = !{void (i32)* @_Z16templated_kernelIiEvT_, > !"kernel", i32 1} > Index: lib/CodeGen/CodeGenModule.cpp > =================================================================== > --- lib/CodeGen/CodeGenModule.cpp > +++ lib/CodeGen/CodeGenModule.cpp > @@ -813,6 +813,13 @@ > > if (D->hasAttr<UsedAttr>()) > addUsedGlobal(GV); > + > + // Treat CUDA kernels as if they have attribute((used)) applied so we > don't > + // eliminate them (which would have happened otherwise because the code > that > + // call them is on the host side of the compilation and nothing else > + // references the kernels). > + if (LangOpts.CUDA && LangOpts.CUDAIsDevice && > D->hasAttr<CUDAGlobalAttr>()) > + addUsedGlobal(GV); > } > > void CodeGenModule::setAliasAttributes(const Decl *D, > Index: lib/AST/ASTContext.cpp > =================================================================== > --- lib/AST/ASTContext.cpp > +++ lib/AST/ASTContext.cpp > @@ -8328,6 +8328,9 @@ > if (D->hasAttr<AliasAttr>() || D->hasAttr<UsedAttr>()) > return true; > > + if (LangOpts.CUDA && LangOpts.CUDAIsDevice && > D->hasAttr<CUDAGlobalAttr>()) > + return true; > + > if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) { > // Forward declarations aren't required. > if (!FD->doesThisDeclarationHaveABody()) > > > > _______________________________________________ > cfe-commits mailing list > cfe-commits@cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits > >
_______________________________________________ cfe-commits mailing list cfe-commits@cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits