https://github.com/RiverDave updated https://github.com/llvm/llvm-project/pull/177827
>From 3d2b769f35cb76062f55164720da2d0c176bd04f Mon Sep 17 00:00:00 2001 From: David Rivera <[email protected]> Date: Sat, 24 Jan 2026 23:56:28 -0500 Subject: [PATCH 1/3] [CIR][CUDA] Add NVPTX target info and CUDA/HIP global emission filtering --- clang/lib/CIR/CodeGen/CIRGenModule.cpp | 66 ++++++++++++++++++++++ clang/lib/CIR/CodeGen/CIRGenModule.h | 4 ++ clang/lib/CIR/CodeGen/TargetInfo.cpp | 19 +++++++ clang/lib/CIR/CodeGen/TargetInfo.h | 2 + clang/test/CIR/CodeGen/CUDA/filter-decl.cu | 37 ++++++++++++ clang/test/CIR/CodeGen/CUDA/nvptx-basic.cu | 30 ++++++++++ 6 files changed, 158 insertions(+) create mode 100644 clang/test/CIR/CodeGen/CUDA/filter-decl.cu create mode 100644 clang/test/CIR/CodeGen/CUDA/nvptx-basic.cu diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index b535eab913a5d..64aee615e39eb 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -17,6 +17,7 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/DeclBase.h" +#include "clang/AST/ASTLambda.h" #include "clang/AST/DeclOpenACC.h" #include "clang/AST/GlobalDecl.h" #include "clang/AST/RecordLayout.h" @@ -28,6 +29,7 @@ #include "clang/CIR/MissingFeatures.h" #include "CIRGenFunctionInfo.h" +#include "TargetInfo.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/Location.h" #include "mlir/IR/MLIRContext.h" @@ -242,6 +244,10 @@ const TargetCIRGenInfo &CIRGenModule::getTargetCIRGenInfo() { return *theTargetCIRGenInfo; } } + case llvm::Triple::nvptx: + case llvm::Triple::nvptx64: + theTargetCIRGenInfo = createNVPTXTargetCIRGenInfo(genTypes); + return *theTargetCIRGenInfo; } } @@ -358,6 +364,35 @@ void CIRGenModule::emitDeferred() { } } +template <typename AttrT> static bool hasImplicitAttr(const ValueDecl *decl) { + if (!decl) + return false; + if (auto *attr = decl->getAttr<AttrT>()) + return attr->isImplicit(); + return decl->isImplicit(); +} + +// This function returns true if M is a specialization, a template, +// or a non-generic lambda call operator. +inline bool isLambdaCallOperator(const CXXMethodDecl *MD) { + const CXXRecordDecl *LambdaClass = MD->getParent(); + if (!LambdaClass || !LambdaClass->isLambda()) return false; + return MD->getOverloadedOperator() == OO_Call; +} + +bool CIRGenModule::shouldEmitCUDAGlobalVar(const VarDecl *global) const { + assert(langOpts.CUDA && "Should not be called by non-CUDA languages"); + // We need to emit host-side 'shadows' for all global + // device-side variables because the CUDA runtime needs their + // size and host-side address in order to provide access to + // their device-side incarnations. + return !langOpts.CUDAIsDevice || global->hasAttr<CUDADeviceAttr>() || + global->hasAttr<CUDAConstantAttr>() || + global->hasAttr<CUDASharedAttr>() || + global->getType()->isCUDADeviceBuiltinSurfaceType() || + global->getType()->isCUDADeviceBuiltinTextureType(); +} + void CIRGenModule::emitGlobal(clang::GlobalDecl gd) { if (const auto *cd = dyn_cast<clang::OpenACCConstructDecl>(gd.getDecl())) { emitGlobalOpenACCDecl(cd); @@ -372,6 +407,35 @@ void CIRGenModule::emitGlobal(clang::GlobalDecl gd) { const auto *global = cast<ValueDecl>(gd.getDecl()); + // If this is CUDA, be selective about which declarations we emit. + // Non-constexpr non-lambda implicit host device functions are not emitted + // unless they are used on device side. + if (langOpts.CUDA) { + assert((isa<FunctionDecl>(global) || isa<VarDecl>(global)) && + "Expected Variable or Function"); + if (const auto *varDecl = dyn_cast<VarDecl>(global)) { + if (!shouldEmitCUDAGlobalVar(varDecl)) + return; + } else if (langOpts.CUDAIsDevice) { + const auto *functionDecl = dyn_cast<FunctionDecl>(global); + if ((!global->hasAttr<CUDADeviceAttr>() || + (langOpts.OffloadImplicitHostDeviceTemplates && + hasImplicitAttr<CUDAHostAttr>(functionDecl) && + hasImplicitAttr<CUDADeviceAttr>(functionDecl) && + !functionDecl->isConstexpr() && + !isLambdaCallOperator(functionDecl) && + !getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.count( + functionDecl))) && + !global->hasAttr<CUDAGlobalAttr>() && + !(langOpts.HIPStdPar && isa<FunctionDecl>(global) && + !global->hasAttr<CUDAHostAttr>())) + return; + // Device-only functions are the only things we skip. + } else if (!global->hasAttr<CUDAHostAttr>() && + global->hasAttr<CUDADeviceAttr>()) + return; + } + if (const auto *fd = dyn_cast<FunctionDecl>(global)) { // Update deferred annotations with the latest declaration if the function // was already used or defined. @@ -1981,6 +2045,8 @@ bool CIRGenModule::mayBeEmittedEagerly(const ValueDecl *global) { return true; } + + static bool shouldAssumeDSOLocal(const CIRGenModule &cgm, cir::CIRGlobalValueInterface gv) { if (gv.hasLocalLinkage()) diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h index 3c4f35bacc4f9..9503f993f56d3 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.h +++ b/clang/lib/CIR/CodeGen/CIRGenModule.h @@ -557,6 +557,10 @@ class CIRGenModule : public CIRGenTypeCache { static void setInitializer(cir::GlobalOp &op, mlir::Attribute value); + // Whether a global variable should be emitted by CUDA/HIP host/device + // related attributes. + bool shouldEmitCUDAGlobalVar(const VarDecl *global) const; + void replaceUsesOfNonProtoTypeWithRealFunction(mlir::Operation *old, cir::FuncOp newFn); diff --git a/clang/lib/CIR/CodeGen/TargetInfo.cpp b/clang/lib/CIR/CodeGen/TargetInfo.cpp index 377c532e492d9..dc29dc0204c19 100644 --- a/clang/lib/CIR/CodeGen/TargetInfo.cpp +++ b/clang/lib/CIR/CodeGen/TargetInfo.cpp @@ -56,6 +56,25 @@ class X8664TargetCIRGenInfo : public TargetCIRGenInfo { } // namespace +namespace { + +class NVPTXABIInfo : public ABIInfo { +public: + NVPTXABIInfo(CIRGenTypes &cgt) : ABIInfo(cgt) {} +}; + +class NVPTXTargetCIRGenInfo : public TargetCIRGenInfo { +public: + NVPTXTargetCIRGenInfo(CIRGenTypes &cgt) + : TargetCIRGenInfo(std::make_unique<NVPTXABIInfo>(cgt)) {} +}; +} // namespace + +std::unique_ptr<TargetCIRGenInfo> +clang::CIRGen::createNVPTXTargetCIRGenInfo(CIRGenTypes &cgt) { + return std::make_unique<NVPTXTargetCIRGenInfo>(cgt); +} + std::unique_ptr<TargetCIRGenInfo> clang::CIRGen::createX8664TargetCIRGenInfo(CIRGenTypes &cgt) { return std::make_unique<X8664TargetCIRGenInfo>(cgt); diff --git a/clang/lib/CIR/CodeGen/TargetInfo.h b/clang/lib/CIR/CodeGen/TargetInfo.h index 9535ba94fb08b..bab838692e215 100644 --- a/clang/lib/CIR/CodeGen/TargetInfo.h +++ b/clang/lib/CIR/CodeGen/TargetInfo.h @@ -124,6 +124,8 @@ class TargetCIRGenInfo { std::unique_ptr<TargetCIRGenInfo> createX8664TargetCIRGenInfo(CIRGenTypes &cgt); +std::unique_ptr<TargetCIRGenInfo> createNVPTXTargetCIRGenInfo(CIRGenTypes &cgt); + } // namespace clang::CIRGen #endif // LLVM_CLANG_LIB_CIR_TARGETINFO_H diff --git a/clang/test/CIR/CodeGen/CUDA/filter-decl.cu b/clang/test/CIR/CodeGen/CUDA/filter-decl.cu new file mode 100644 index 0000000000000..ac1e7aeb4f1e1 --- /dev/null +++ b/clang/test/CIR/CodeGen/CUDA/filter-decl.cu @@ -0,0 +1,37 @@ +// Based on clang/test/CodeGenCUDA/filter-decl.cu tailored for CIR current capabilities. +// Tests that host/device functions are emitted only on the appropriate side. + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x cuda \ +// RUN: -I%S/../inputs -emit-cir %s -o %t.host.cir +// RUN: FileCheck --input-file=%t.host.cir %s --check-prefix=CHECK-HOST + +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -x cuda \ +// RUN: -I%S/../inputs -fcuda-is-device -emit-cir %s -o %t.device.cir +// RUN: FileCheck --input-file=%t.device.cir %s --check-prefix=CHECK-DEVICE + +#include "cuda.h" + +// Implicit host function (no attribute) — host only +// CHECK-HOST: cir.func {{.*}} @_Z20implicithostonlyfuncv() +// CHECK-DEVICE-NOT: @_Z20implicithostonlyfuncv +void implicithostonlyfunc(void) {} + +// Explicit __host__ function — host only +// CHECK-HOST: cir.func {{.*}} @_Z20explicithostonlyfuncv() +// CHECK-DEVICE-NOT: @_Z20explicithostonlyfuncv +__host__ void explicithostonlyfunc(void) {} + +// __device__ function — device only +// CHECK-HOST-NOT: @_Z14deviceonlyfuncv +// CHECK-DEVICE: cir.func {{.*}} @_Z14deviceonlyfuncv() +__device__ void deviceonlyfunc(void) {} + +// __host__ __device__ function — both sides +// CHECK-HOST: cir.func {{.*}} @_Z14hostdevicefuncv() +// CHECK-DEVICE: cir.func {{.*}} @_Z14hostdevicefuncv() +__host__ __device__ void hostdevicefunc(void) {} + +// __global__ kernel — both sides (stub on host, kernel on device) +// CHECK-HOST: cir.func {{.*}} @__device_stub__globalfunc() +// CHECK-DEVICE: cir.func {{.*}} @_Z10globalfuncv() +__global__ void globalfunc(void) {} diff --git a/clang/test/CIR/CodeGen/CUDA/nvptx-basic.cu b/clang/test/CIR/CodeGen/CUDA/nvptx-basic.cu new file mode 100644 index 0000000000000..fe2233de4d10c --- /dev/null +++ b/clang/test/CIR/CodeGen/CUDA/nvptx-basic.cu @@ -0,0 +1,30 @@ +// Based on clang/test/CodeGenCUDA/ptx-kernels.cu tailored for CIR current capabilities. +// Tests basic device-side compilation with NVPTX target. + +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -x cuda \ +// RUN: -I%S/../inputs -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s + +#include "cuda.h" + +// CHECK: cir.func {{.*}} @device_function() +extern "C" +__device__ void device_function() {} + +// CHECK: cir.func {{.*}} @global_function() +// CHECK: cir.call @device_function() +extern "C" +__global__ void global_function() { + device_function(); +} + +// Template kernel with explicit instantiation +template <typename T> __global__ void templated_kernel(T param) {} +template __global__ void templated_kernel<int>(int); +// CHECK: cir.func {{.*}} @_Z16templated_kernelIiEvT_ + +// Anonymous namespace kernel +namespace { +__global__ void anonymous_ns_kernel() {} +// CHECK: cir.func {{.*}} @_ZN12_GLOBAL__N_119anonymous_ns_kernelEv +} >From 2192fb246d8f35a9581d11d8a4ecac7254a8c3f2 Mon Sep 17 00:00:00 2001 From: David Rivera <[email protected]> Date: Sun, 25 Jan 2026 00:08:46 -0500 Subject: [PATCH 2/3] le format monseiur --- clang/lib/CIR/CodeGen/CIRGenModule.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index 64aee615e39eb..ffe8d1e55056f 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -16,8 +16,8 @@ #include "CIRGenFunction.h" #include "clang/AST/ASTContext.h" -#include "clang/AST/DeclBase.h" #include "clang/AST/ASTLambda.h" +#include "clang/AST/DeclBase.h" #include "clang/AST/DeclOpenACC.h" #include "clang/AST/GlobalDecl.h" #include "clang/AST/RecordLayout.h" @@ -376,7 +376,8 @@ template <typename AttrT> static bool hasImplicitAttr(const ValueDecl *decl) { // or a non-generic lambda call operator. inline bool isLambdaCallOperator(const CXXMethodDecl *MD) { const CXXRecordDecl *LambdaClass = MD->getParent(); - if (!LambdaClass || !LambdaClass->isLambda()) return false; + if (!LambdaClass || !LambdaClass->isLambda()) + return false; return MD->getOverloadedOperator() == OO_Call; } @@ -2045,8 +2046,6 @@ bool CIRGenModule::mayBeEmittedEagerly(const ValueDecl *global) { return true; } - - static bool shouldAssumeDSOLocal(const CIRGenModule &cgm, cir::CIRGlobalValueInterface gv) { if (gv.hasLocalLinkage()) >From 701b08e73fc41a12780bdf40c9e5174afcab56dd Mon Sep 17 00:00:00 2001 From: David Rivera <[email protected]> Date: Sun, 25 Jan 2026 00:46:49 -0500 Subject: [PATCH 3/3] fix nit test case --- clang/test/CIR/CodeGen/CUDA/filter-decl.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/CIR/CodeGen/CUDA/filter-decl.cu b/clang/test/CIR/CodeGen/CUDA/filter-decl.cu index ac1e7aeb4f1e1..c34117d6e9e71 100644 --- a/clang/test/CIR/CodeGen/CUDA/filter-decl.cu +++ b/clang/test/CIR/CodeGen/CUDA/filter-decl.cu @@ -32,6 +32,6 @@ __device__ void deviceonlyfunc(void) {} __host__ __device__ void hostdevicefunc(void) {} // __global__ kernel — both sides (stub on host, kernel on device) -// CHECK-HOST: cir.func {{.*}} @__device_stub__globalfunc() +// CHECK-HOST: cir.func {{.*}} @_Z25__device_stub__globalfuncv() // CHECK-DEVICE: cir.func {{.*}} @_Z10globalfuncv() __global__ void globalfunc(void) {} _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
