Author: yaxunl Date: Mon Jun 11 17:16:33 2018 New Revision: 334457 URL: http://llvm.org/viewvc/llvm-project?rev=334457&view=rev Log: [CUDA][HIP] Set kernel calling convention before arrange function
Currently clang set kernel calling convention for CUDA/HIP after arranging function, which causes incorrect kernel function type since it depends on calling convention. This patch moves setting kernel convention before arranging function. Differential Revision: https://reviews.llvm.org/D47733 Added: cfe/trunk/test/CodeGenCUDA/kernel-args.cu Modified: cfe/trunk/lib/CodeGen/CGCall.cpp cfe/trunk/lib/CodeGen/CodeGenModule.cpp cfe/trunk/lib/CodeGen/TargetInfo.cpp cfe/trunk/lib/CodeGen/TargetInfo.h Modified: cfe/trunk/lib/CodeGen/CGCall.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.cpp?rev=334457&r1=334456&r2=334457&view=diff ============================================================================== --- cfe/trunk/lib/CodeGen/CGCall.cpp (original) +++ cfe/trunk/lib/CodeGen/CGCall.cpp Mon Jun 11 17:16:33 2018 @@ -255,6 +255,16 @@ CodeGenTypes::arrangeCXXMethodType(const FTP->getCanonicalTypeUnqualified().getAs<FunctionProtoType>(), MD); } +/// Set calling convention for CUDA/HIP kernel. +static void setCUDAKernelCallingConvention(CanQualType &FTy, CodeGenModule &CGM, + const FunctionDecl *FD) { + if (FD->hasAttr<CUDAGlobalAttr>()) { + const FunctionType *FT = FTy->getAs<FunctionType>(); + CGM.getTargetCodeGenInfo().setCUDAKernelCallingConvention(FT); + FTy = FT->getCanonicalTypeUnqualified(); + } +} + /// Arrange the argument and result information for a declaration or /// definition of the given C++ non-static member function. The /// member function must be an ordinary function, i.e. not a @@ -264,7 +274,9 @@ CodeGenTypes::arrangeCXXMethodDeclaratio assert(!isa<CXXConstructorDecl>(MD) && "wrong method for constructors!"); assert(!isa<CXXDestructorDecl>(MD) && "wrong method for destructors!"); - CanQual<FunctionProtoType> prototype = GetFormalType(MD); + CanQualType FT = GetFormalType(MD).getAs<Type>(); + setCUDAKernelCallingConvention(FT, CGM, MD); + auto prototype = FT.getAs<FunctionProtoType>(); if (MD->isInstance()) { // The abstract case is perfectly fine. @@ -424,6 +436,7 @@ CodeGenTypes::arrangeFunctionDeclaration CanQualType FTy = FD->getType()->getCanonicalTypeUnqualified(); assert(isa<FunctionType>(FTy)); + setCUDAKernelCallingConvention(FTy, CGM, FD); // When declaring a function without a prototype, always use a // non-variadic type. Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=334457&r1=334456&r2=334457&view=diff ============================================================================== --- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original) +++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Mon Jun 11 17:16:33 2018 @@ -3671,8 +3671,6 @@ void CodeGenModule::EmitGlobalFunctionDe MaybeHandleStaticInExternC(D, Fn); - if (D->hasAttr<CUDAGlobalAttr>()) - getTargetCodeGenInfo().setCUDAKernelCallingConvention(Fn); maybeSetTrivialComdat(*D, *Fn); Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=334457&r1=334456&r2=334457&view=diff ============================================================================== --- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original) +++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Mon Jun 11 17:16:33 2018 @@ -7646,7 +7646,7 @@ public: llvm::Function *BlockInvokeFunc, llvm::Value *BlockLiteral) const override; bool shouldEmitStaticExternCAliases() const override; - void setCUDAKernelCallingConvention(llvm::Function *F) const override; + void setCUDAKernelCallingConvention(const FunctionType *&FT) const override; }; } @@ -7783,8 +7783,9 @@ bool AMDGPUTargetCodeGenInfo::shouldEmit } void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention( - llvm::Function *F) const { - F->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); + const FunctionType *&FT) const { + FT = getABIInfo().getContext().adjustFunctionType( + FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel)); } //===----------------------------------------------------------------------===// Modified: cfe/trunk/lib/CodeGen/TargetInfo.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.h?rev=334457&r1=334456&r2=334457&view=diff ============================================================================== --- cfe/trunk/lib/CodeGen/TargetInfo.h (original) +++ cfe/trunk/lib/CodeGen/TargetInfo.h Mon Jun 11 17:16:33 2018 @@ -302,7 +302,7 @@ public: /// as 'used', and having internal linkage. virtual bool shouldEmitStaticExternCAliases() const { return true; } - virtual void setCUDAKernelCallingConvention(llvm::Function *F) const {} + virtual void setCUDAKernelCallingConvention(const FunctionType *&FT) const {} }; } // namespace CodeGen Added: cfe/trunk/test/CodeGenCUDA/kernel-args.cu URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/kernel-args.cu?rev=334457&view=auto ============================================================================== --- cfe/trunk/test/CodeGenCUDA/kernel-args.cu (added) +++ cfe/trunk/test/CodeGenCUDA/kernel-args.cu Mon Jun 11 17:16:33 2018 @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \ +// RUN: -emit-llvm %s -o - | FileCheck -check-prefix=AMDGCN %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda- -fcuda-is-device \ +// RUN: -emit-llvm %s -o - | FileCheck -check-prefix=NVPTX %s +#include "Inputs/cuda.h" + +struct A { + int a[32]; +}; + +// AMDGCN: define amdgpu_kernel void @_Z6kernel1A(%struct.A %x.coerce) +// NVPTX: define void @_Z6kernel1A(%struct.A* byval align 4 %x) +__global__ void kernel(A x) { +} + +class Kernel { +public: + // AMDGCN: define amdgpu_kernel void @_ZN6Kernel12memberKernelE1A(%struct.A %x.coerce) + // NVPTX: define void @_ZN6Kernel12memberKernelE1A(%struct.A* byval align 4 %x) + static __global__ void memberKernel(A x){} + template<typename T> static __global__ void templateMemberKernel(T x) {} +}; + + +template <typename T> +__global__ void templateKernel(T x) {} + +void launch(void*); + +void test() { + Kernel K; + // AMDGCN: define amdgpu_kernel void @_Z14templateKernelI1AEvT_(%struct.A %x.coerce) + // NVPTX: define void @_Z14templateKernelI1AEvT_(%struct.A* byval align 4 %x) + launch((void*)templateKernel<A>); + + // AMDGCN: define amdgpu_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A %x.coerce) + // NVPTX: define void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A* byval align 4 %x) + launch((void*)Kernel::templateMemberKernel<A>); +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits