Re: [PATCH] D21367: AMDGPU: Set amdgpu_kernel calling convention for OpenCL kernels.
This revision was automatically updated to reflect the committed changes. Closed by commit rL274220: AMDGPU: Set amdgpu_kernel calling convention for OpenCL kernels. (authored by nhaustov). Changed prior to commit: http://reviews.llvm.org/D21367?vs=62198=62343#toc Repository: rL LLVM http://reviews.llvm.org/D21367 Files: cfe/trunk/include/clang/Basic/Specifiers.h cfe/trunk/lib/AST/ItaniumMangle.cpp cfe/trunk/lib/AST/Type.cpp cfe/trunk/lib/AST/TypePrinter.cpp cfe/trunk/lib/Basic/Targets.cpp cfe/trunk/lib/CodeGen/CGCall.cpp cfe/trunk/lib/CodeGen/CGDebugInfo.cpp cfe/trunk/lib/CodeGen/CodeGenTypes.h cfe/trunk/lib/CodeGen/TargetInfo.cpp cfe/trunk/lib/CodeGen/TargetInfo.h cfe/trunk/lib/Sema/SemaType.cpp cfe/trunk/test/CodeGenOpenCL/amdgpu-call-kernel.cl cfe/trunk/test/CodeGenOpenCL/amdgpu-calling-conv.cl cfe/trunk/test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl cfe/trunk/tools/libclang/CXType.cpp Index: cfe/trunk/include/clang/Basic/Specifiers.h === --- cfe/trunk/include/clang/Basic/Specifiers.h +++ cfe/trunk/include/clang/Basic/Specifiers.h @@ -241,7 +241,7 @@ CC_AAPCS_VFP, // __attribute__((pcs("aapcs-vfp"))) CC_IntelOclBicc, // __attribute__((intel_ocl_bicc)) CC_SpirFunction, // default for OpenCL functions on SPIR target -CC_SpirKernel, // inferred for OpenCL kernels on SPIR target +CC_OpenCLKernel, // inferred for OpenCL kernels CC_Swift,// __attribute__((swiftcall)) CC_PreserveMost, // __attribute__((preserve_most)) CC_PreserveAll, // __attribute__((preserve_all)) @@ -257,7 +257,7 @@ case CC_X86Pascal: case CC_X86VectorCall: case CC_SpirFunction: -case CC_SpirKernel: +case CC_OpenCLKernel: case CC_Swift: return false; default: Index: cfe/trunk/test/CodeGenOpenCL/amdgpu-calling-conv.cl === --- cfe/trunk/test/CodeGenOpenCL/amdgpu-calling-conv.cl +++ cfe/trunk/test/CodeGenOpenCL/amdgpu-calling-conv.cl @@ -0,0 +1,12 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s + +// CHECK: define amdgpu_kernel void @calling_conv_amdgpu_kernel() +kernel void calling_conv_amdgpu_kernel() +{ +} + +// CHECK: define void @calling_conv_none() +void calling_conv_none() +{ +} Index: cfe/trunk/test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl === --- cfe/trunk/test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl +++ cfe/trunk/test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl @@ -5,23 +5,23 @@ __attribute__((amdgpu_num_vgpr(64))) // expected-no-diagnostics kernel void test_num_vgpr64() { -// CHECK: define void @test_num_vgpr64() [[ATTR_VGPR64:#[0-9]+]] +// CHECK: define amdgpu_kernel void @test_num_vgpr64() [[ATTR_VGPR64:#[0-9]+]] } __attribute__((amdgpu_num_sgpr(32))) // expected-no-diagnostics kernel void test_num_sgpr32() { -// CHECK: define void @test_num_sgpr32() [[ATTR_SGPR32:#[0-9]+]] +// CHECK: define amdgpu_kernel void @test_num_sgpr32() [[ATTR_SGPR32:#[0-9]+]] } __attribute__((amdgpu_num_vgpr(64), amdgpu_num_sgpr(32))) // expected-no-diagnostics kernel void test_num_vgpr64_sgpr32() { -// CHECK: define void @test_num_vgpr64_sgpr32() [[ATTR_VGPR64_SGPR32:#[0-9]+]] +// CHECK: define amdgpu_kernel void @test_num_vgpr64_sgpr32() [[ATTR_VGPR64_SGPR32:#[0-9]+]] } __attribute__((amdgpu_num_sgpr(20), amdgpu_num_vgpr(40))) // expected-no-diagnostics kernel void test_num_sgpr20_vgpr40() { -// CHECK: define void @test_num_sgpr20_vgpr40() [[ATTR_SGPR20_VGPR40:#[0-9]+]] +// CHECK: define amdgpu_kernel void @test_num_sgpr20_vgpr40() [[ATTR_SGPR20_VGPR40:#[0-9]+]] } __attribute__((amdgpu_num_vgpr(0))) // expected-no-diagnostics Index: cfe/trunk/test/CodeGenOpenCL/amdgpu-call-kernel.cl === --- cfe/trunk/test/CodeGenOpenCL/amdgpu-call-kernel.cl +++ cfe/trunk/test/CodeGenOpenCL/amdgpu-call-kernel.cl @@ -0,0 +1,14 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s +// CHECK: define amdgpu_kernel void @test_call_kernel(i32 addrspace(1)* nocapture %out) +// CHECK: store i32 4, i32 addrspace(1)* %out, align 4 + +kernel void test_kernel(global int *out) +{ + out[0] = 4; +} + +__kernel void test_call_kernel(__global int *out) +{ + test_kernel(out); +} Index: cfe/trunk/lib/AST/Type.cpp === --- cfe/trunk/lib/AST/Type.cpp +++ cfe/trunk/lib/AST/Type.cpp @@ -2642,7 +2642,7 @@ case CC_AAPCS_VFP: return "aapcs-vfp"; case CC_IntelOclBicc: return "intel_ocl_bicc"; case CC_SpirFunction: return "spir_function"; - case CC_SpirKernel: return "spir_kernel"; + case CC_OpenCLKernel: return "opencl_kernel"; case CC_Swift: return "swiftcall"; case
Re: [PATCH] D21367: AMDGPU: Set amdgpu_kernel calling convention for OpenCL kernels.
nhaustov updated this revision to Diff 62198. nhaustov added a comment. Update after discussion with Sam and review from Anastasia. http://reviews.llvm.org/D21367 Files: include/clang/Basic/Specifiers.h lib/AST/ItaniumMangle.cpp lib/AST/Type.cpp lib/AST/TypePrinter.cpp lib/Basic/Targets.cpp lib/CodeGen/CGCall.cpp lib/CodeGen/CGDebugInfo.cpp lib/CodeGen/CodeGenTypes.h lib/CodeGen/TargetInfo.cpp lib/CodeGen/TargetInfo.h lib/Sema/SemaType.cpp test/CodeGenOpenCL/amdgpu-call-kernel.cl test/CodeGenOpenCL/amdgpu-calling-conv.cl test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl tools/libclang/CXType.cpp Index: tools/libclang/CXType.cpp === --- tools/libclang/CXType.cpp +++ tools/libclang/CXType.cpp @@ -541,7 +541,7 @@ TCALLINGCONV(PreserveMost); TCALLINGCONV(PreserveAll); case CC_SpirFunction: return CXCallingConv_Unexposed; -case CC_SpirKernel: return CXCallingConv_Unexposed; +case CC_OpenCLKernel: return CXCallingConv_Unexposed; break; } #undef TCALLINGCONV Index: test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl === --- test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl +++ test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl @@ -5,23 +5,23 @@ __attribute__((amdgpu_num_vgpr(64))) // expected-no-diagnostics kernel void test_num_vgpr64() { -// CHECK: define void @test_num_vgpr64() [[ATTR_VGPR64:#[0-9]+]] +// CHECK: define amdgpu_kernel void @test_num_vgpr64() [[ATTR_VGPR64:#[0-9]+]] } __attribute__((amdgpu_num_sgpr(32))) // expected-no-diagnostics kernel void test_num_sgpr32() { -// CHECK: define void @test_num_sgpr32() [[ATTR_SGPR32:#[0-9]+]] +// CHECK: define amdgpu_kernel void @test_num_sgpr32() [[ATTR_SGPR32:#[0-9]+]] } __attribute__((amdgpu_num_vgpr(64), amdgpu_num_sgpr(32))) // expected-no-diagnostics kernel void test_num_vgpr64_sgpr32() { -// CHECK: define void @test_num_vgpr64_sgpr32() [[ATTR_VGPR64_SGPR32:#[0-9]+]] +// CHECK: define amdgpu_kernel void @test_num_vgpr64_sgpr32() [[ATTR_VGPR64_SGPR32:#[0-9]+]] } __attribute__((amdgpu_num_sgpr(20), amdgpu_num_vgpr(40))) // expected-no-diagnostics kernel void test_num_sgpr20_vgpr40() { -// CHECK: define void @test_num_sgpr20_vgpr40() [[ATTR_SGPR20_VGPR40:#[0-9]+]] +// CHECK: define amdgpu_kernel void @test_num_sgpr20_vgpr40() [[ATTR_SGPR20_VGPR40:#[0-9]+]] } __attribute__((amdgpu_num_vgpr(0))) // expected-no-diagnostics Index: test/CodeGenOpenCL/amdgpu-calling-conv.cl === --- /dev/null +++ test/CodeGenOpenCL/amdgpu-calling-conv.cl @@ -0,0 +1,12 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s + +// CHECK: define amdgpu_kernel void @calling_conv_amdgpu_kernel() +kernel void calling_conv_amdgpu_kernel() +{ +} + +// CHECK: define void @calling_conv_none() +void calling_conv_none() +{ +} Index: test/CodeGenOpenCL/amdgpu-call-kernel.cl === --- /dev/null +++ test/CodeGenOpenCL/amdgpu-call-kernel.cl @@ -0,0 +1,14 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s +// CHECK: define amdgpu_kernel void @test_call_kernel(i32 addrspace(1)* nocapture %out) +// CHECK: store i32 4, i32 addrspace(1)* %out, align 4 + +kernel void test_kernel(global int *out) +{ + out[0] = 4; +} + +__kernel void test_call_kernel(__global int *out) +{ + test_kernel(out); +} Index: lib/Sema/SemaType.cpp === --- lib/Sema/SemaType.cpp +++ lib/Sema/SemaType.cpp @@ -3182,15 +3182,19 @@ CallingConv CC = S.Context.getDefaultCallingConvention(FTI.isVariadic, IsCXXInstanceMethod); - // Attribute AT_OpenCLKernel affects the calling convention only on - // the SPIR target, hence it cannot be treated as a calling + // Attribute AT_OpenCLKernel affects the calling convention for SPIR + // and AMDGPU targets, hence it cannot be treated as a calling // convention attribute. This is the simplest place to infer - // "spir_kernel" for OpenCL kernels on SPIR. - if (CC == CC_SpirFunction) { + // calling convention for OpenCL kernels. + if (S.getLangOpts().OpenCL) { for (const AttributeList *Attr = D.getDeclSpec().getAttributes().getList(); Attr; Attr = Attr->getNext()) { if (Attr->getKind() == AttributeList::AT_OpenCLKernel) { -CC = CC_SpirKernel; +llvm::Triple::ArchType arch = S.Context.getTargetInfo().getTriple().getArch(); +if (arch == llvm::Triple::spir || arch == llvm::Triple::spir64 || +arch == llvm::Triple::amdgcn) { + CC = CC_OpenCLKernel; +} break; } } Index: lib/CodeGen/TargetInfo.h
Re: [PATCH] D21367: AMDGPU: Set amdgpu_kernel calling convention for OpenCL kernels.
yaxunl added a comment. > Maybe we can use the fact that calling conventions are separate in AST and in > LLVM IR. So in AST maybe we can have opencl_kernel and in LLVM it can map to > spir_kernel for SPIR and amdgpu_kernel for AMDGPU. This approach looks good to me. Thanks. http://reviews.llvm.org/D21367 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D21367: AMDGPU: Set amdgpu_kernel calling convention for OpenCL kernels.
Anastasia added inline comments. Comment at: lib/Basic/Targets.cpp:2122 @@ +2121,3 @@ + + + CallingConvCheckResult checkCallingConvention(CallingConv CC) const override { One extra line? http://reviews.llvm.org/D21367 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D21367: AMDGPU: Set amdgpu_kernel calling convention for OpenCL kernels.
nhaustov added a comment. In http://reviews.llvm.org/D21367#467894, @yaxunl wrote: > I am wondering what's the difference between this calling convention and > spir_kernel. spir_kernel has only effect and was created only for SPIR target. In theory it could perhaps be reused for AMDGPU target, however it seems not very clear to me. In http://reviews.llvm.org/D21367#468057, @yaxunl wrote: > In http://reviews.llvm.org/D21367#467982, @nhaustov wrote: > > > In http://reviews.llvm.org/D21367#467894, @yaxunl wrote: > > > > > I am wondering what's the difference between this calling convention and > > > spir_kernel. > > > > > > spir_kernel has only effect and was created only for SPIR target. In theory > > it could perhaps be reused for AMDGPU target, however it seems not very > > clear to me. > > > If the new calling convention is to indicate a function is an OpenCL kernel, > then it has exactly the same meaning as spir_kernel. Can we just use it in > AMDGPU target? This looks like a hack to me. Calling conventions in LLVM IR are really target specific. spir_kernel really doesn't make sense for anything other than SPIR. Maybe we can use the fact that calling conventions are separate in AST and in LLVM IR. So in AST maybe we can have opencl_kernel and in LLVM it can map to spir_kernel for SPIR and amdgpu_kernel for AMDGPU. http://reviews.llvm.org/D21367 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D21367: AMDGPU: Set amdgpu_kernel calling convention for OpenCL kernels.
yaxunl added a comment. In http://reviews.llvm.org/D21367#467982, @nhaustov wrote: > In http://reviews.llvm.org/D21367#467894, @yaxunl wrote: > > > I am wondering what's the difference between this calling convention and > > spir_kernel. > > > spir_kernel has only effect and was created only for SPIR target. In theory > it could perhaps be reused for AMDGPU target, however it seems not very clear > to me. If the new calling convention is to indicate a function is an OpenCL kernel, then it has exactly the same meaning as spir_kernel. Can we just use it in AMDGPU target? http://reviews.llvm.org/D21367 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D21367: AMDGPU: Set amdgpu_kernel calling convention for OpenCL kernels.
nhaustov added a comment. In http://reviews.llvm.org/D21367#467894, @yaxunl wrote: > I am wondering what's the difference between this calling convention and > spir_kernel. spir_kernel has only effect and was created only for SPIR target. In theory it could perhaps be reused for AMDGPU target, however it seems not very clear to me. http://reviews.llvm.org/D21367 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D21367: AMDGPU: Set amdgpu_kernel calling convention for OpenCL kernels.
yaxunl added a comment. I am wondering what's the difference between this calling convention and spir_kernel. http://reviews.llvm.org/D21367 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D21367: AMDGPU: Set amdgpu_kernel calling convention for OpenCL kernels.
tstellarAMD added a comment. This seems OK to me. http://reviews.llvm.org/D21367 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D21367: AMDGPU: Set amdgpu_kernel calling convention for OpenCL kernels.
nhaustov updated this revision to Diff 61247. nhaustov added a comment. Add test for calling OpenCL kernel from kernel. http://reviews.llvm.org/D21367 Files: include/clang/AST/Type.h include/clang/Basic/Specifiers.h lib/AST/ItaniumMangle.cpp lib/AST/Type.cpp lib/AST/TypePrinter.cpp lib/Basic/Targets.cpp lib/CodeGen/CGCall.cpp lib/CodeGen/CGDebugInfo.cpp lib/Sema/SemaType.cpp test/CodeGenOpenCL/amdgpu-call-kernel.cl test/CodeGenOpenCL/amdgpu-calling-conv.cl test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl tools/libclang/CXType.cpp Index: tools/libclang/CXType.cpp === --- tools/libclang/CXType.cpp +++ tools/libclang/CXType.cpp @@ -542,6 +542,7 @@ TCALLINGCONV(PreserveAll); case CC_SpirFunction: return CXCallingConv_Unexposed; case CC_SpirKernel: return CXCallingConv_Unexposed; +case CC_AMDGPUKernel: return CXCallingConv_Unexposed; break; } #undef TCALLINGCONV Index: test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl === --- test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl +++ test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl @@ -5,23 +5,23 @@ __attribute__((amdgpu_num_vgpr(64))) // expected-no-diagnostics kernel void test_num_vgpr64() { -// CHECK: define void @test_num_vgpr64() [[ATTR_VGPR64:#[0-9]+]] +// CHECK: define amdgpu_kernel void @test_num_vgpr64() [[ATTR_VGPR64:#[0-9]+]] } __attribute__((amdgpu_num_sgpr(32))) // expected-no-diagnostics kernel void test_num_sgpr32() { -// CHECK: define void @test_num_sgpr32() [[ATTR_SGPR32:#[0-9]+]] +// CHECK: define amdgpu_kernel void @test_num_sgpr32() [[ATTR_SGPR32:#[0-9]+]] } __attribute__((amdgpu_num_vgpr(64), amdgpu_num_sgpr(32))) // expected-no-diagnostics kernel void test_num_vgpr64_sgpr32() { -// CHECK: define void @test_num_vgpr64_sgpr32() [[ATTR_VGPR64_SGPR32:#[0-9]+]] +// CHECK: define amdgpu_kernel void @test_num_vgpr64_sgpr32() [[ATTR_VGPR64_SGPR32:#[0-9]+]] } __attribute__((amdgpu_num_sgpr(20), amdgpu_num_vgpr(40))) // expected-no-diagnostics kernel void test_num_sgpr20_vgpr40() { -// CHECK: define void @test_num_sgpr20_vgpr40() [[ATTR_SGPR20_VGPR40:#[0-9]+]] +// CHECK: define amdgpu_kernel void @test_num_sgpr20_vgpr40() [[ATTR_SGPR20_VGPR40:#[0-9]+]] } __attribute__((amdgpu_num_vgpr(0))) // expected-no-diagnostics Index: test/CodeGenOpenCL/amdgpu-calling-conv.cl === --- /dev/null +++ test/CodeGenOpenCL/amdgpu-calling-conv.cl @@ -0,0 +1,12 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s + +// CHECK: define amdgpu_kernel void @calling_conv_amdgpu_kernel() +kernel void calling_conv_amdgpu_kernel() +{ +} + +// CHECK: define void @calling_conv_none() +void calling_conv_none() +{ +} Index: test/CodeGenOpenCL/amdgpu-call-kernel.cl === --- /dev/null +++ test/CodeGenOpenCL/amdgpu-call-kernel.cl @@ -0,0 +1,14 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s +// CHECK: define amdgpu_kernel void @test_call_kernel(i32 addrspace(1)* nocapture %out) +// CHECK: store i32 4, i32 addrspace(1)* %out, align 4 + +kernel void test_kernel(global int *out) +{ + out[0] = 4; +} + +__kernel void test_call_kernel(__global int *out) +{ + test_kernel(out); +} Index: lib/Sema/SemaType.cpp === --- lib/Sema/SemaType.cpp +++ lib/Sema/SemaType.cpp @@ -3182,15 +3182,20 @@ CallingConv CC = S.Context.getDefaultCallingConvention(FTI.isVariadic, IsCXXInstanceMethod); - // Attribute AT_OpenCLKernel affects the calling convention only on - // the SPIR target, hence it cannot be treated as a calling + // Attribute AT_OpenCLKernel affects the calling convention for SPIR + // and AMDGPU targets, hence it cannot be treated as a calling // convention attribute. This is the simplest place to infer - // "spir_kernel" for OpenCL kernels on SPIR. - if (CC == CC_SpirFunction) { + // calling convention for OpenCL kernels. + if (S.getLangOpts().OpenCL) { for (const AttributeList *Attr = D.getDeclSpec().getAttributes().getList(); Attr; Attr = Attr->getNext()) { if (Attr->getKind() == AttributeList::AT_OpenCLKernel) { -CC = CC_SpirKernel; +llvm::Triple::ArchType arch = S.Context.getTargetInfo().getTriple().getArch(); +if (arch == llvm::Triple::spir || arch == llvm::Triple::spir64) { + CC = CC_SpirKernel; +} else if (arch == llvm::Triple::amdgcn) { + CC = CC_AMDGPUKernel; +} break; } } Index: lib/CodeGen/CGDebugInfo.cpp === ---
Re: [PATCH] D21367: AMDGPU: Set amdgpu_kernel calling convention for OpenCL kernels.
nhaustov added a comment. In http://reviews.llvm.org/D21367#461480, @tstellarAMD wrote: > Does this new patch fix the OpenCL regression? Yes, it fixes the problem with calling kernel from kernel. I'll add a test too. http://reviews.llvm.org/D21367 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D21367: AMDGPU: Set amdgpu_kernel calling convention for OpenCL kernels.
tstellarAMD added a comment. Does this new patch fix the OpenCL regression? http://reviews.llvm.org/D21367 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D21367: AMDGPU: Set amdgpu_kernel calling convention for OpenCL kernels.
nhaustov created this revision. nhaustov added reviewers: tstellarAMD, rsmith. nhaustov added a subscriber: cfe-commits. Herald added a subscriber: kzhuravl. Extend CC field of FunctionType to 5 bits and ExtInfo to 10 bits to make space for new calling convention. Add new Clang calling convention CC_AMDGPUKernel. Set calling convention CC_AMDGPUKernel in same place as SPIR target (see comment). Update tests. http://reviews.llvm.org/D21367 Files: include/clang/AST/Type.h include/clang/Basic/Specifiers.h lib/AST/ItaniumMangle.cpp lib/AST/Type.cpp lib/AST/TypePrinter.cpp lib/Basic/Targets.cpp lib/CodeGen/CGCall.cpp lib/CodeGen/CGDebugInfo.cpp lib/Sema/SemaType.cpp test/CodeGenOpenCL/amdgpu-calling-conv.cl test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl tools/libclang/CXType.cpp Index: tools/libclang/CXType.cpp === --- tools/libclang/CXType.cpp +++ tools/libclang/CXType.cpp @@ -542,6 +542,7 @@ TCALLINGCONV(PreserveAll); case CC_SpirFunction: return CXCallingConv_Unexposed; case CC_SpirKernel: return CXCallingConv_Unexposed; +case CC_AMDGPUKernel: return CXCallingConv_Unexposed; break; } #undef TCALLINGCONV Index: test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl === --- test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl +++ test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl @@ -5,23 +5,23 @@ __attribute__((amdgpu_num_vgpr(64))) // expected-no-diagnostics kernel void test_num_vgpr64() { -// CHECK: define void @test_num_vgpr64() [[ATTR_VGPR64:#[0-9]+]] +// CHECK: define amdgpu_kernel void @test_num_vgpr64() [[ATTR_VGPR64:#[0-9]+]] } __attribute__((amdgpu_num_sgpr(32))) // expected-no-diagnostics kernel void test_num_sgpr32() { -// CHECK: define void @test_num_sgpr32() [[ATTR_SGPR32:#[0-9]+]] +// CHECK: define amdgpu_kernel void @test_num_sgpr32() [[ATTR_SGPR32:#[0-9]+]] } __attribute__((amdgpu_num_vgpr(64), amdgpu_num_sgpr(32))) // expected-no-diagnostics kernel void test_num_vgpr64_sgpr32() { -// CHECK: define void @test_num_vgpr64_sgpr32() [[ATTR_VGPR64_SGPR32:#[0-9]+]] +// CHECK: define amdgpu_kernel void @test_num_vgpr64_sgpr32() [[ATTR_VGPR64_SGPR32:#[0-9]+]] } __attribute__((amdgpu_num_sgpr(20), amdgpu_num_vgpr(40))) // expected-no-diagnostics kernel void test_num_sgpr20_vgpr40() { -// CHECK: define void @test_num_sgpr20_vgpr40() [[ATTR_SGPR20_VGPR40:#[0-9]+]] +// CHECK: define amdgpu_kernel void @test_num_sgpr20_vgpr40() [[ATTR_SGPR20_VGPR40:#[0-9]+]] } __attribute__((amdgpu_num_vgpr(0))) // expected-no-diagnostics Index: test/CodeGenOpenCL/amdgpu-calling-conv.cl === --- /dev/null +++ test/CodeGenOpenCL/amdgpu-calling-conv.cl @@ -0,0 +1,12 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s + +// CHECK: define amdgpu_kernel void @calling_conv_amdgpu_kernel() +kernel void calling_conv_amdgpu_kernel() +{ +} + +// CHECK: define void @calling_conv_none() +void calling_conv_none() +{ +} Index: lib/Sema/SemaType.cpp === --- lib/Sema/SemaType.cpp +++ lib/Sema/SemaType.cpp @@ -3182,15 +3182,20 @@ CallingConv CC = S.Context.getDefaultCallingConvention(FTI.isVariadic, IsCXXInstanceMethod); - // Attribute AT_OpenCLKernel affects the calling convention only on - // the SPIR target, hence it cannot be treated as a calling + // Attribute AT_OpenCLKernel affects the calling convention for SPIR + // and AMDGPU targets, hence it cannot be treated as a calling // convention attribute. This is the simplest place to infer - // "spir_kernel" for OpenCL kernels on SPIR. - if (CC == CC_SpirFunction) { + // calling convention for OpenCL kernels. + if (S.getLangOpts().OpenCL) { for (const AttributeList *Attr = D.getDeclSpec().getAttributes().getList(); Attr; Attr = Attr->getNext()) { if (Attr->getKind() == AttributeList::AT_OpenCLKernel) { -CC = CC_SpirKernel; +llvm::Triple::ArchType arch = S.Context.getTargetInfo().getTriple().getArch(); +if (arch == llvm::Triple::spir || arch == llvm::Triple::spir64) { + CC = CC_SpirKernel; +} else if (arch == llvm::Triple::amdgcn) { + CC = CC_AMDGPUKernel; +} break; } } Index: lib/CodeGen/CGDebugInfo.cpp === --- lib/CodeGen/CGDebugInfo.cpp +++ lib/CodeGen/CGDebugInfo.cpp @@ -859,6 +859,7 @@ case CC_Swift: case CC_PreserveMost: case CC_PreserveAll: + case CC_AMDGPUKernel: return 0; } return 0; Index: lib/CodeGen/CGCall.cpp === --- lib/CodeGen/CGCall.cpp +++ lib/CodeGen/CGCall.cpp @@ -61,6