Author: David Rivera
Date: 2026-05-09T02:41:43-04:00
New Revision: 66d4162d99dd0592c33e23293deb187df5ad13d4

URL: 
https://github.com/llvm/llvm-project/commit/66d4162d99dd0592c33e23293deb187df5ad13d4
DIFF: 
https://github.com/llvm/llvm-project/commit/66d4162d99dd0592c33e23293deb187df5ad13d4.diff

LOG: [CIR][CUDA][NVPTX] Set ptx_kernel calling convention on CUDA kernels 
(#195382)

Related: https://github.com/llvm/llvm-project/issues/179278,
https://github.com/llvm/llvm-project/issues/175871

More target attributes like: NoInline on kernels, CUDALaunchBoundsAttr,
CUDAGridConstantAttr param attrs, nvvm.annotations for surface/texture
VarDecls to be deferred for later patches.

Added: 
    clang/test/CIR/CodeGenCUDA/ptx-kernels.cu

Modified: 
    clang/include/clang/CIR/MissingFeatures.h
    clang/lib/CIR/CodeGen/TargetInfo.cpp
    clang/test/CIR/CodeGenCUDA/address-spaces.cu

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/CIR/MissingFeatures.h 
b/clang/include/clang/CIR/MissingFeatures.h
index b285d93ac007d..ba5c2bf786a99 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -38,6 +38,7 @@ struct MissingFeatures {
   static bool opGlobalPragmaClangSection() { return false; }
   static bool opGlobalAnnotations() { return false; }
   static bool opGlobalCtorPriority() { return false; }
+  static bool emitNVVMMetadata() { return false; }
   static bool setDSOLocal() { return false; }
 
   static bool supportIFuncAttr() { return false; }
@@ -88,6 +89,7 @@ struct MissingFeatures {
   static bool opFuncUnwindTablesAttr() { return false; }
   static bool opFuncWillReturn() { return false; }
   static bool opFuncNoReturn() { return false; }
+  static bool handleCUDALaunchBoundsAttr() { return false; }
   static bool setLLVMFunctionFEnvAttributes() { return false; }
 
   // CallOp handling

diff  --git a/clang/lib/CIR/CodeGen/TargetInfo.cpp 
b/clang/lib/CIR/CodeGen/TargetInfo.cpp
index fc939cd9605ab..71ccb6e24a8aa 100644
--- a/clang/lib/CIR/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CIR/CodeGen/TargetInfo.cpp
@@ -6,6 +6,7 @@
 #include "clang/Basic/AddressSpaces.h"
 #include "clang/CIR/Dialect/IR/CIRAttrs.h"
 #include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/MissingFeatures.h"
 
 using namespace clang;
 using namespace clang::CIRGen;
@@ -132,6 +133,46 @@ class NVPTXTargetCIRGenInfo : public TargetCIRGenInfo {
 public:
   NVPTXTargetCIRGenInfo(CIRGenTypes &cgt)
       : TargetCIRGenInfo(std::make_unique<NVPTXABIInfo>(cgt)) {}
+
+  void setTargetAttributes(const clang::Decl *decl, mlir::Operation *global,
+                           CIRGenModule &cgm) const override {
+    auto globalValue = mlir::dyn_cast<cir::CIRGlobalValueInterface>(global);
+    if (globalValue && globalValue.isDeclaration())
+      return;
+
+    const auto *vd = dyn_cast_or_null<VarDecl>(decl);
+    if (vd) {
+      if (cgm.getLangOpts().CUDA) {
+        if (vd->getType()->isCUDADeviceBuiltinSurfaceType() ||
+            vd->getType()->isCUDADeviceBuiltinTextureType())
+          assert(!cir::MissingFeatures::emitNVVMMetadata());
+        return;
+      }
+    }
+
+    const auto *fd = dyn_cast_or_null<FunctionDecl>(decl);
+    if (!fd)
+      return;
+
+    auto func = mlir::cast<cir::FuncOp>(global);
+
+    // Perform special handling in OpenCL/CUDA mode.
+    if (cgm.getLangOpts().OpenCL || cgm.getLangOpts().CUDA) {
+      // Use function attributes to check for kernel functions. By default, all
+      // functions are device functions.
+      if (fd->hasAttr<DeviceKernelAttr>() || fd->hasAttr<CUDAGlobalAttr>()) {
+        // OpenCL/CUDA kernel functions get kernel metadata. Kernel functions
+        // are also not subject to inlining.
+        func.setInlineKind(cir::InlineKind::NoInline);
+        if (fd->hasAttr<CUDAGlobalAttr>()) {
+          func.setCallingConv(cir::CallingConv::PTXKernel);
+          assert(!cir::MissingFeatures::opFuncParameterAttributes());
+        }
+        if (fd->hasAttr<CUDALaunchBoundsAttr>())
+          assert(!cir::MissingFeatures::handleCUDALaunchBoundsAttr());
+      }
+    }
+  }
 };
 } // namespace
 

diff  --git a/clang/test/CIR/CodeGenCUDA/address-spaces.cu 
b/clang/test/CIR/CodeGenCUDA/address-spaces.cu
index cc1791a8f2244..2f235c8702899 100644
--- a/clang/test/CIR/CodeGenCUDA/address-spaces.cu
+++ b/clang/test/CIR/CodeGenCUDA/address-spaces.cu
@@ -86,7 +86,7 @@ __global__ void fn() {
 // CIR-DEVICE:   cir.store {{.*}}%[[VAL]], %[[J]] : !s32i, !cir.ptr<!s32i>
 // CIR-DEVICE:   cir.return
 
-// LLVM-DEVICE: define dso_local void @_Z2fnv()
+// LLVM-DEVICE: define dso_local ptx_kernel void @_Z2fnv()
 // LLVM-DEVICE:   %[[ALLOCA:.*]] = alloca i32, i64 1, align 4
 // LLVM-DEVICE:   store i32 0, ptr %[[ALLOCA]], align 4
 // LLVM-DEVICE:   %[[VAL:.*]] = load i32, ptr %[[ALLOCA]], align 4

diff  --git a/clang/test/CIR/CodeGenCUDA/ptx-kernels.cu 
b/clang/test/CIR/CodeGenCUDA/ptx-kernels.cu
new file mode 100644
index 0000000000000..155e59638eac7
--- /dev/null
+++ b/clang/test/CIR/CodeGenCUDA/ptx-kernels.cu
@@ -0,0 +1,42 @@
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -x cuda -fclangir \
+// RUN:            -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR %s --input-file=%t.cir
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -x cuda -fclangir \
+// RUN:            -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM %s --input-file=%t.ll
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -x cuda \
+// RUN:            -fcuda-is-device -emit-llvm %s -o %t.ogcg.ll
+// RUN: FileCheck --check-prefix=OGCG %s --input-file=%t.ogcg.ll
+
+#include "Inputs/cuda.h"
+
+// CIR: cir.func {{.*}} @device_function()
+// LLVM: define{{.*}} void @device_function
+// OGCG: define{{.*}} void @device_function
+extern "C"
+__device__ void device_function() {}
+
+// CIR: cir.func {{.*}} @global_function() cc(ptx_kernel)
+// LLVM: define{{.*}} ptx_kernel void @global_function
+// OGCG: define{{.*}} ptx_kernel void @global_function
+extern "C"
+__global__ void global_function() {
+  device_function();
+}
+
+template <typename T> __global__ void templated_kernel(T param) {}
+template __global__ void templated_kernel<int>(int);
+// CIR-DAG: cir.func {{.*}} @_Z16templated_kernelIiEvT_({{.*}}) cc(ptx_kernel)
+// LLVM-DAG: define{{.*}} ptx_kernel void @_Z16templated_kernelIiEvT_(
+// OGCG-DAG: define{{.*}} ptx_kernel void @_Z16templated_kernelIiEvT_(
+
+namespace {
+__global__ void anonymous_ns_kernel() {}
+// CIR-DAG: cir.func {{.*}} @_ZN12_GLOBAL__N_119anonymous_ns_kernelEv() 
cc(ptx_kernel)
+// LLVM-DAG: define{{.*}} ptx_kernel void 
@_ZN12_GLOBAL__N_119anonymous_ns_kernelEv(
+// OGCG-DAG: define{{.*}} ptx_kernel void 
@_ZN12_GLOBAL__N_119anonymous_ns_kernelEv(
+}


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to