Author: David Rivera Date: 2026-05-09T02:41:43-04:00 New Revision: 66d4162d99dd0592c33e23293deb187df5ad13d4
URL: https://github.com/llvm/llvm-project/commit/66d4162d99dd0592c33e23293deb187df5ad13d4 DIFF: https://github.com/llvm/llvm-project/commit/66d4162d99dd0592c33e23293deb187df5ad13d4.diff LOG: [CIR][CUDA][NVPTX] Set ptx_kernel calling convention on CUDA kernels (#195382) Related: https://github.com/llvm/llvm-project/issues/179278, https://github.com/llvm/llvm-project/issues/175871 More target attributes like: NoInline on kernels, CUDALaunchBoundsAttr, CUDAGridConstantAttr param attrs, nvvm.annotations for surface/texture VarDecls to be deferred for later patches. Added: clang/test/CIR/CodeGenCUDA/ptx-kernels.cu Modified: clang/include/clang/CIR/MissingFeatures.h clang/lib/CIR/CodeGen/TargetInfo.cpp clang/test/CIR/CodeGenCUDA/address-spaces.cu Removed: ################################################################################ diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index b285d93ac007d..ba5c2bf786a99 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -38,6 +38,7 @@ struct MissingFeatures { static bool opGlobalPragmaClangSection() { return false; } static bool opGlobalAnnotations() { return false; } static bool opGlobalCtorPriority() { return false; } + static bool emitNVVMMetadata() { return false; } static bool setDSOLocal() { return false; } static bool supportIFuncAttr() { return false; } @@ -88,6 +89,7 @@ struct MissingFeatures { static bool opFuncUnwindTablesAttr() { return false; } static bool opFuncWillReturn() { return false; } static bool opFuncNoReturn() { return false; } + static bool handleCUDALaunchBoundsAttr() { return false; } static bool setLLVMFunctionFEnvAttributes() { return false; } // CallOp handling diff --git a/clang/lib/CIR/CodeGen/TargetInfo.cpp b/clang/lib/CIR/CodeGen/TargetInfo.cpp index fc939cd9605ab..71ccb6e24a8aa 100644 --- a/clang/lib/CIR/CodeGen/TargetInfo.cpp +++ b/clang/lib/CIR/CodeGen/TargetInfo.cpp @@ -6,6 +6,7 @@ #include "clang/Basic/AddressSpaces.h" #include "clang/CIR/Dialect/IR/CIRAttrs.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" +#include "clang/CIR/MissingFeatures.h" using namespace clang; using namespace clang::CIRGen; @@ -132,6 +133,46 @@ class NVPTXTargetCIRGenInfo : public TargetCIRGenInfo { public: NVPTXTargetCIRGenInfo(CIRGenTypes &cgt) : TargetCIRGenInfo(std::make_unique<NVPTXABIInfo>(cgt)) {} + + void setTargetAttributes(const clang::Decl *decl, mlir::Operation *global, + CIRGenModule &cgm) const override { + auto globalValue = mlir::dyn_cast<cir::CIRGlobalValueInterface>(global); + if (globalValue && globalValue.isDeclaration()) + return; + + const auto *vd = dyn_cast_or_null<VarDecl>(decl); + if (vd) { + if (cgm.getLangOpts().CUDA) { + if (vd->getType()->isCUDADeviceBuiltinSurfaceType() || + vd->getType()->isCUDADeviceBuiltinTextureType()) + assert(!cir::MissingFeatures::emitNVVMMetadata()); + return; + } + } + + const auto *fd = dyn_cast_or_null<FunctionDecl>(decl); + if (!fd) + return; + + auto func = mlir::cast<cir::FuncOp>(global); + + // Perform special handling in OpenCL/CUDA mode. + if (cgm.getLangOpts().OpenCL || cgm.getLangOpts().CUDA) { + // Use function attributes to check for kernel functions. By default, all + // functions are device functions. + if (fd->hasAttr<DeviceKernelAttr>() || fd->hasAttr<CUDAGlobalAttr>()) { + // OpenCL/CUDA kernel functions get kernel metadata. Kernel functions + // are also not subject to inlining. + func.setInlineKind(cir::InlineKind::NoInline); + if (fd->hasAttr<CUDAGlobalAttr>()) { + func.setCallingConv(cir::CallingConv::PTXKernel); + assert(!cir::MissingFeatures::opFuncParameterAttributes()); + } + if (fd->hasAttr<CUDALaunchBoundsAttr>()) + assert(!cir::MissingFeatures::handleCUDALaunchBoundsAttr()); + } + } + } }; } // namespace diff --git a/clang/test/CIR/CodeGenCUDA/address-spaces.cu b/clang/test/CIR/CodeGenCUDA/address-spaces.cu index cc1791a8f2244..2f235c8702899 100644 --- a/clang/test/CIR/CodeGenCUDA/address-spaces.cu +++ b/clang/test/CIR/CodeGenCUDA/address-spaces.cu @@ -86,7 +86,7 @@ __global__ void fn() { // CIR-DEVICE: cir.store {{.*}}%[[VAL]], %[[J]] : !s32i, !cir.ptr<!s32i> // CIR-DEVICE: cir.return -// LLVM-DEVICE: define dso_local void @_Z2fnv() +// LLVM-DEVICE: define dso_local ptx_kernel void @_Z2fnv() // LLVM-DEVICE: %[[ALLOCA:.*]] = alloca i32, i64 1, align 4 // LLVM-DEVICE: store i32 0, ptr %[[ALLOCA]], align 4 // LLVM-DEVICE: %[[VAL:.*]] = load i32, ptr %[[ALLOCA]], align 4 diff --git a/clang/test/CIR/CodeGenCUDA/ptx-kernels.cu b/clang/test/CIR/CodeGenCUDA/ptx-kernels.cu new file mode 100644 index 0000000000000..155e59638eac7 --- /dev/null +++ b/clang/test/CIR/CodeGenCUDA/ptx-kernels.cu @@ -0,0 +1,42 @@ +// REQUIRES: nvptx-registered-target + +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -x cuda -fclangir \ +// RUN: -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR %s --input-file=%t.cir + +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -x cuda -fclangir \ +// RUN: -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM %s --input-file=%t.ll + +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -x cuda \ +// RUN: -fcuda-is-device -emit-llvm %s -o %t.ogcg.ll +// RUN: FileCheck --check-prefix=OGCG %s --input-file=%t.ogcg.ll + +#include "Inputs/cuda.h" + +// CIR: cir.func {{.*}} @device_function() +// LLVM: define{{.*}} void @device_function +// OGCG: define{{.*}} void @device_function +extern "C" +__device__ void device_function() {} + +// CIR: cir.func {{.*}} @global_function() cc(ptx_kernel) +// LLVM: define{{.*}} ptx_kernel void @global_function +// OGCG: define{{.*}} ptx_kernel void @global_function +extern "C" +__global__ void global_function() { + device_function(); +} + +template <typename T> __global__ void templated_kernel(T param) {} +template __global__ void templated_kernel<int>(int); +// CIR-DAG: cir.func {{.*}} @_Z16templated_kernelIiEvT_({{.*}}) cc(ptx_kernel) +// LLVM-DAG: define{{.*}} ptx_kernel void @_Z16templated_kernelIiEvT_( +// OGCG-DAG: define{{.*}} ptx_kernel void @_Z16templated_kernelIiEvT_( + +namespace { +__global__ void anonymous_ns_kernel() {} +// CIR-DAG: cir.func {{.*}} @_ZN12_GLOBAL__N_119anonymous_ns_kernelEv() cc(ptx_kernel) +// LLVM-DAG: define{{.*}} ptx_kernel void @_ZN12_GLOBAL__N_119anonymous_ns_kernelEv( +// OGCG-DAG: define{{.*}} ptx_kernel void @_ZN12_GLOBAL__N_119anonymous_ns_kernelEv( +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
