Author: Jon Chesterfield Date: 2022-05-20T08:50:37+01:00 New Revision: 83c431fb9e72abbd2eddf26388245eb4963370e2
URL: https://github.com/llvm/llvm-project/commit/83c431fb9e72abbd2eddf26388245eb4963370e2 DIFF: https://github.com/llvm/llvm-project/commit/83c431fb9e72abbd2eddf26388245eb4963370e2.diff LOG: [amdgpu] Add amdgpu_kernel calling conv attribute to clang Allows emitting define amdgpu_kernel void @func() IR from C or C++. This replaces the current workflow which is to write a stub in opencl that calls an external C function implemented in C++ combined through llvm-link. Calling the resulting function still requires a manual implementation of the ABI from the host side. The primary application is for more rapid debugging of the amdgpu backend by permuting a C or C++ test file instead of manually updating an IR file. Implementation closely follows D54425. Non-amd reviewers from there. Reviewed By: yaxunl Differential Revision: https://reviews.llvm.org/D125970 Added: clang/test/CodeGenCXX/amdgpu-kernel-arg-pointer-type.cpp Modified: clang/include/clang/Basic/Attr.td clang/include/clang/Basic/Specifiers.h clang/lib/AST/ItaniumMangle.cpp clang/lib/AST/Type.cpp clang/lib/AST/TypePrinter.cpp clang/lib/Basic/Targets/AMDGPU.h clang/lib/CodeGen/CGCall.cpp clang/lib/CodeGen/CGDebugInfo.cpp clang/lib/Sema/SemaDeclAttr.cpp clang/lib/Sema/SemaType.cpp clang/test/Sema/callingconv.c clang/tools/libclang/CXType.cpp Removed: ################################################################################ diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 0c95adfa237d7..fed29b03a8b14 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1857,6 +1857,11 @@ def AMDGPUNumVGPR : InheritableAttr { let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">; } +def AMDGPUKernelCall : DeclOrTypeAttr { + let Spellings = [Clang<"amdgpu_kernel">]; + let Documentation = [Undocumented]; +} + def BPFPreserveAccessIndex : InheritableAttr, TargetSpecificAttr<TargetBPF> { let Spellings = [Clang<"preserve_access_index">]; diff --git a/clang/include/clang/Basic/Specifiers.h b/clang/include/clang/Basic/Specifiers.h index 7a727e7088deb..7657ae36d21bb 100644 --- a/clang/include/clang/Basic/Specifiers.h +++ b/clang/include/clang/Basic/Specifiers.h @@ -281,6 +281,7 @@ namespace clang { CC_PreserveAll, // __attribute__((preserve_all)) CC_AArch64VectorCall, // __attribute__((aarch64_vector_pcs)) CC_AArch64SVEPCS, // __attribute__((aarch64_sve_pcs)) + CC_AMDGPUKernelCall, // __attribute__((amdgpu_kernel)) }; /// Checks whether the given calling convention supports variadic diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index 1be70487c1b4e..b380e02fc8f7d 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -3150,6 +3150,7 @@ StringRef CXXNameMangler::getCallingConvQualifierName(CallingConv CC) { case CC_AAPCS_VFP: case CC_AArch64VectorCall: case CC_AArch64SVEPCS: + case CC_AMDGPUKernelCall: case CC_IntelOclBicc: case CC_SpirFunction: case CC_OpenCLKernel: diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index 200a129437ed5..ece4165c51f53 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -3186,6 +3186,7 @@ StringRef FunctionType::getNameForCallConv(CallingConv CC) { case CC_AAPCS_VFP: return "aapcs-vfp"; case CC_AArch64VectorCall: return "aarch64_vector_pcs"; case CC_AArch64SVEPCS: return "aarch64_sve_pcs"; + case CC_AMDGPUKernelCall: return "amdgpu_kernel"; case CC_IntelOclBicc: return "intel_ocl_bicc"; case CC_SpirFunction: return "spir_function"; case CC_OpenCLKernel: return "opencl_kernel"; @@ -3622,6 +3623,7 @@ bool AttributedType::isCallingConv() const { case attr::VectorCall: case attr::AArch64VectorPcs: case attr::AArch64SVEPcs: + case attr::AMDGPUKernelCall: case attr::Pascal: case attr::MSABI: case attr::SysVABI: diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp index 7bca45b5f5601..b5286de5b1cab 100644 --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -964,6 +964,9 @@ void TypePrinter::printFunctionAfter(const FunctionType::ExtInfo &Info, case CC_AArch64SVEPCS: OS << "__attribute__((aarch64_sve_pcs))"; break; + case CC_AMDGPUKernelCall: + OS << "__attribute__((amdgpu_kernel))"; + break; case CC_IntelOclBicc: OS << " __attribute__((intel_ocl_bicc))"; break; @@ -1754,6 +1757,7 @@ void TypePrinter::printAttributedAfter(const AttributedType *T, } case attr::AArch64VectorPcs: OS << "aarch64_vector_pcs"; break; case attr::AArch64SVEPcs: OS << "aarch64_sve_pcs"; break; + case attr::AMDGPUKernelCall: OS << "amdgpu_kernel"; break; case attr::IntelOclBicc: OS << "inteloclbicc"; break; case attr::PreserveMost: OS << "preserve_most"; diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h index 9749221914888..4e3db7a181f58 100644 --- a/clang/lib/Basic/Targets/AMDGPU.h +++ b/clang/lib/Basic/Targets/AMDGPU.h @@ -411,6 +411,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo { return CCCR_Warning; case CC_C: case CC_OpenCLKernel: + case CC_AMDGPUKernelCall: return CCCR_OK; } } diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index a35e3d811862d..0d838b9aeb529 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -63,6 +63,7 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) { case CC_X86VectorCall: return llvm::CallingConv::X86_VectorCall; case CC_AArch64VectorCall: return llvm::CallingConv::AArch64_VectorCall; case CC_AArch64SVEPCS: return llvm::CallingConv::AArch64_SVE_VectorCall; + case CC_AMDGPUKernelCall: return llvm::CallingConv::AMDGPU_KERNEL; case CC_SpirFunction: return llvm::CallingConv::SPIR_FUNC; case CC_OpenCLKernel: return CGM.getTargetCodeGenInfo().getOpenCLKernelCallingConv(); case CC_PreserveMost: return llvm::CallingConv::PreserveMost; @@ -232,6 +233,9 @@ static CallingConv getCallingConventionForDecl(const ObjCMethodDecl *D, if (D->hasAttr<AArch64SVEPcsAttr>()) return CC_AArch64SVEPCS; + if (D->hasAttr<AMDGPUKernelCallAttr>()) + return CC_AMDGPUKernelCall; + if (D->hasAttr<IntelOclBiccAttr>()) return CC_IntelOclBicc; diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 7534270294415..1f528fe4065c2 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -1335,6 +1335,7 @@ static unsigned getDwarfCC(CallingConv CC) { case CC_SpirFunction: return llvm::dwarf::DW_CC_LLVM_SpirFunction; case CC_OpenCLKernel: + case CC_AMDGPUKernelCall: return llvm::dwarf::DW_CC_LLVM_OpenCLKernel; case CC_Swift: return llvm::dwarf::DW_CC_LLVM_Swift; diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 1b055543dc485..f39e1af10e3d7 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -5011,6 +5011,9 @@ static void handleCallConvAttr(Sema &S, Decl *D, const ParsedAttr &AL) { case ParsedAttr::AT_AArch64SVEPcs: D->addAttr(::new (S.Context) AArch64SVEPcsAttr(S.Context, AL)); return; + case ParsedAttr::AT_AMDGPUKernelCall: + D->addAttr(::new (S.Context) AMDGPUKernelCallAttr(S.Context, AL)); + return; case ParsedAttr::AT_IntelOclBicc: D->addAttr(::new (S.Context) IntelOclBiccAttr(S.Context, AL)); return; @@ -5171,6 +5174,9 @@ bool Sema::CheckCallingConvAttr(const ParsedAttr &Attrs, CallingConv &CC, case ParsedAttr::AT_AArch64SVEPcs: CC = CC_AArch64SVEPCS; break; + case ParsedAttr::AT_AMDGPUKernelCall: + CC = CC_AMDGPUKernelCall; + break; case ParsedAttr::AT_RegCall: CC = CC_X86RegCall; break; @@ -8784,6 +8790,7 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, case ParsedAttr::AT_PreserveAll: case ParsedAttr::AT_AArch64VectorPcs: case ParsedAttr::AT_AArch64SVEPcs: + case ParsedAttr::AT_AMDGPUKernelCall: handleCallConvAttr(S, D, AL); break; case ParsedAttr::AT_Suppress: diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 314110c926828..5c4d3b9c2a293 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -122,6 +122,7 @@ static void diagnoseBadTypeAttribute(Sema &S, const ParsedAttr &attr, case ParsedAttr::AT_VectorCall: \ case ParsedAttr::AT_AArch64VectorPcs: \ case ParsedAttr::AT_AArch64SVEPcs: \ + case ParsedAttr::AT_AMDGPUKernelCall: \ case ParsedAttr::AT_MSABI: \ case ParsedAttr::AT_SysVABI: \ case ParsedAttr::AT_Pcs: \ @@ -7482,6 +7483,8 @@ static Attr *getCCTypeAttr(ASTContext &Ctx, ParsedAttr &Attr) { return createSimpleAttr<AArch64VectorPcsAttr>(Ctx, Attr); case ParsedAttr::AT_AArch64SVEPcs: return createSimpleAttr<AArch64SVEPcsAttr>(Ctx, Attr); + case ParsedAttr::AT_AMDGPUKernelCall: + return createSimpleAttr<AMDGPUKernelCallAttr>(Ctx, Attr); case ParsedAttr::AT_Pcs: { // The attribute may have had a fixit applied where we treated an // identifier as a string literal. The contents of the string are valid, diff --git a/clang/test/CodeGenCXX/amdgpu-kernel-arg-pointer-type.cpp b/clang/test/CodeGenCXX/amdgpu-kernel-arg-pointer-type.cpp new file mode 100644 index 0000000000000..90a9366ac9a20 --- /dev/null +++ b/clang/test/CodeGenCXX/amdgpu-kernel-arg-pointer-type.cpp @@ -0,0 +1,83 @@ +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck --check-prefixes=COMMON,CHECK %s + +// Derived from CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu by deleting references to HOST +// The original test passes the result through opt O2, but that seems to introduce invalid +// addrspace casts which are not being fixed as part of the present change. + +// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel1Pi(i32* {{.*}} %x) +// CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* +__attribute__((amdgpu_kernel)) void kernel1(int *x) { + x[0]++; +} + +// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel2Ri(i32* {{.*}} nonnull align 4 dereferenceable(4) %x) +// CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* +__attribute__((amdgpu_kernel)) void kernel2(int &x) { + x++; +} + +// CHECK-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel3PU3AS2iPU3AS1i(i32 addrspace(2)*{{.*}} %x, i32 addrspace(1)*{{.*}} %y) +// CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* +__attribute__((amdgpu_kernel)) void kernel3(__attribute__((address_space(2))) int *x, + __attribute__((address_space(1))) int *y) { + y[0] = x[0]; +} + +// COMMON-LABEL: define{{.*}} void @_Z4funcPi(i32*{{.*}} %x) +// CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* +__attribute__((amdgpu_kernel)) void func(int *x) { + x[0]++; +} + +struct S { + int *x; + float *y; +}; +// `by-val` struct is passed by-indirect-alias (a mix of by-ref and indirect +// by-val). However, the enhanced address inferring pass should be able to +// assume they are global pointers. +// + +// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel41S(%struct.S addrspace(4)*{{.*}} byref(%struct.S) align 8 %0) +__attribute__((amdgpu_kernel)) void kernel4(struct S s) { + s.x[0]++; + s.y[0] += 1.f; +} + +// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel5P1S(%struct.S* {{.*}} %s) +__attribute__((amdgpu_kernel)) void kernel5(struct S *s) { + s->x[0]++; + s->y[0] += 1.f; +} + +struct T { + float *x[2]; +}; +// `by-val` array is passed by-indirect-alias (a mix of by-ref and indirect +// by-val). However, the enhanced address inferring pass should be able to +// assume they are global pointers. +// +// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel61T(%struct.T addrspace(4)*{{.*}} byref(%struct.T) align 8 %0) +__attribute__((amdgpu_kernel)) void kernel6(struct T t) { + t.x[0][0] += 1.f; + t.x[1][0] += 2.f; +} + +// Check that coerced pointers retain the noalias attribute when qualified with __restrict. +// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel7Pi(i32* noalias{{.*}} %x) +__attribute__((amdgpu_kernel)) void kernel7(int *__restrict x) { + x[0]++; +} + +// Single element struct. +struct SS { + float *x; +}; +// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel82SS(float* %a.coerce) +// CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* +__attribute__((amdgpu_kernel)) void kernel8(struct SS a) { + *a.x += 3.f; +} + diff --git a/clang/test/Sema/callingconv.c b/clang/test/Sema/callingconv.c index 675200cc9cfc7..1555a993218f7 100644 --- a/clang/test/Sema/callingconv.c +++ b/clang/test/Sema/callingconv.c @@ -54,6 +54,8 @@ int __attribute__((pcs("foo"))) pcs7(void); // expected-error {{invalid PCS type int __attribute__((aarch64_vector_pcs)) aavpcs(void); // expected-warning {{'aarch64_vector_pcs' calling convention is not supported for this target}} int __attribute__((aarch64_sve_pcs)) aasvepcs(void); // expected-warning {{'aarch64_sve_pcs' calling convention is not supported for this target}} +int __attribute__((amdgpu_kernel)) amdgpu_kernel(void); // expected-warning {{'amdgpu_kernel' calling convention is not supported for this target}} + // PR6361 void ctest3(); void __attribute__((cdecl)) ctest3() {} diff --git a/clang/tools/libclang/CXType.cpp b/clang/tools/libclang/CXType.cpp index 4aee32210df1f..cac5d3d4fa90d 100644 --- a/clang/tools/libclang/CXType.cpp +++ b/clang/tools/libclang/CXType.cpp @@ -677,6 +677,7 @@ CXCallingConv clang_getFunctionTypeCallingConv(CXType X) { TCALLINGCONV(PreserveMost); TCALLINGCONV(PreserveAll); case CC_SpirFunction: return CXCallingConv_Unexposed; + case CC_AMDGPUKernelCall: return CXCallingConv_Unexposed; case CC_OpenCLKernel: return CXCallingConv_Unexposed; break; } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits