llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clangir Author: David Rivera (RiverDave) <details> <summary>Changes</summary> This patch adds foundational infra for device-side CUDA/HIP compilation by introducing NVPTX target info and implementing the global emission filtering logic. NVPTX Target Info to allows us to compile against that triple: - Add NVPTXABIInfo and NVPTXTargetCIRGenInfo classes - Wire up nvptx and nvptx64 triples in getTargetCIRGenInfo() - Add createNVPTXTargetCIRGenInfo() factory function CUDA/HIP Global Emission Filtering (most of this is boilerplate from the AST) This basically narrows down to: - Skip host-only functions (no `__device__` attribute) when `-fcuda-is-device` - Skip device-only functions (device without host) on host side - Always emit ` __global__` kernels and `__host__` `__device__` functions on both sides - Add `shouldEmitCUDAGlobalVar()` to handle variable emission (device/constant/shared variables) - Handle special cases: implicit host/device templates, lambda call operators --- Full diff: https://github.com/llvm/llvm-project/pull/177827.diff 6 Files Affected: - (modified) clang/lib/CIR/CodeGen/CIRGenModule.cpp (+65) - (modified) clang/lib/CIR/CodeGen/CIRGenModule.h (+4) - (modified) clang/lib/CIR/CodeGen/TargetInfo.cpp (+19) - (modified) clang/lib/CIR/CodeGen/TargetInfo.h (+2) - (added) clang/test/CIR/CodeGen/CUDA/filter-decl.cu (+37) - (added) clang/test/CIR/CodeGen/CUDA/nvptx-basic.cu (+30) ``````````diff diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index b535eab913a5d..ffe8d1e55056f 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -16,6 +16,7 @@ #include "CIRGenFunction.h" #include "clang/AST/ASTContext.h" +#include "clang/AST/ASTLambda.h" #include "clang/AST/DeclBase.h" #include "clang/AST/DeclOpenACC.h" #include "clang/AST/GlobalDecl.h" @@ -28,6 +29,7 @@ #include "clang/CIR/MissingFeatures.h" #include "CIRGenFunctionInfo.h" +#include "TargetInfo.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/Location.h" #include "mlir/IR/MLIRContext.h" @@ -242,6 +244,10 @@ const TargetCIRGenInfo &CIRGenModule::getTargetCIRGenInfo() { return *theTargetCIRGenInfo; } } + case llvm::Triple::nvptx: + case llvm::Triple::nvptx64: + theTargetCIRGenInfo = createNVPTXTargetCIRGenInfo(genTypes); + return *theTargetCIRGenInfo; } } @@ -358,6 +364,36 @@ void CIRGenModule::emitDeferred() { } } +template <typename AttrT> static bool hasImplicitAttr(const ValueDecl *decl) { + if (!decl) + return false; + if (auto *attr = decl->getAttr<AttrT>()) + return attr->isImplicit(); + return decl->isImplicit(); +} + +// This function returns true if M is a specialization, a template, +// or a non-generic lambda call operator. +inline bool isLambdaCallOperator(const CXXMethodDecl *MD) { + const CXXRecordDecl *LambdaClass = MD->getParent(); + if (!LambdaClass || !LambdaClass->isLambda()) + return false; + return MD->getOverloadedOperator() == OO_Call; +} + +bool CIRGenModule::shouldEmitCUDAGlobalVar(const VarDecl *global) const { + assert(langOpts.CUDA && "Should not be called by non-CUDA languages"); + // We need to emit host-side 'shadows' for all global + // device-side variables because the CUDA runtime needs their + // size and host-side address in order to provide access to + // their device-side incarnations. + return !langOpts.CUDAIsDevice || global->hasAttr<CUDADeviceAttr>() || + global->hasAttr<CUDAConstantAttr>() || + global->hasAttr<CUDASharedAttr>() || + global->getType()->isCUDADeviceBuiltinSurfaceType() || + global->getType()->isCUDADeviceBuiltinTextureType(); +} + void CIRGenModule::emitGlobal(clang::GlobalDecl gd) { if (const auto *cd = dyn_cast<clang::OpenACCConstructDecl>(gd.getDecl())) { emitGlobalOpenACCDecl(cd); @@ -372,6 +408,35 @@ void CIRGenModule::emitGlobal(clang::GlobalDecl gd) { const auto *global = cast<ValueDecl>(gd.getDecl()); + // If this is CUDA, be selective about which declarations we emit. + // Non-constexpr non-lambda implicit host device functions are not emitted + // unless they are used on device side. + if (langOpts.CUDA) { + assert((isa<FunctionDecl>(global) || isa<VarDecl>(global)) && + "Expected Variable or Function"); + if (const auto *varDecl = dyn_cast<VarDecl>(global)) { + if (!shouldEmitCUDAGlobalVar(varDecl)) + return; + } else if (langOpts.CUDAIsDevice) { + const auto *functionDecl = dyn_cast<FunctionDecl>(global); + if ((!global->hasAttr<CUDADeviceAttr>() || + (langOpts.OffloadImplicitHostDeviceTemplates && + hasImplicitAttr<CUDAHostAttr>(functionDecl) && + hasImplicitAttr<CUDADeviceAttr>(functionDecl) && + !functionDecl->isConstexpr() && + !isLambdaCallOperator(functionDecl) && + !getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.count( + functionDecl))) && + !global->hasAttr<CUDAGlobalAttr>() && + !(langOpts.HIPStdPar && isa<FunctionDecl>(global) && + !global->hasAttr<CUDAHostAttr>())) + return; + // Device-only functions are the only things we skip. + } else if (!global->hasAttr<CUDAHostAttr>() && + global->hasAttr<CUDADeviceAttr>()) + return; + } + if (const auto *fd = dyn_cast<FunctionDecl>(global)) { // Update deferred annotations with the latest declaration if the function // was already used or defined. diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h index 3c4f35bacc4f9..9503f993f56d3 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.h +++ b/clang/lib/CIR/CodeGen/CIRGenModule.h @@ -557,6 +557,10 @@ class CIRGenModule : public CIRGenTypeCache { static void setInitializer(cir::GlobalOp &op, mlir::Attribute value); + // Whether a global variable should be emitted by CUDA/HIP host/device + // related attributes. + bool shouldEmitCUDAGlobalVar(const VarDecl *global) const; + void replaceUsesOfNonProtoTypeWithRealFunction(mlir::Operation *old, cir::FuncOp newFn); diff --git a/clang/lib/CIR/CodeGen/TargetInfo.cpp b/clang/lib/CIR/CodeGen/TargetInfo.cpp index 377c532e492d9..dc29dc0204c19 100644 --- a/clang/lib/CIR/CodeGen/TargetInfo.cpp +++ b/clang/lib/CIR/CodeGen/TargetInfo.cpp @@ -56,6 +56,25 @@ class X8664TargetCIRGenInfo : public TargetCIRGenInfo { } // namespace +namespace { + +class NVPTXABIInfo : public ABIInfo { +public: + NVPTXABIInfo(CIRGenTypes &cgt) : ABIInfo(cgt) {} +}; + +class NVPTXTargetCIRGenInfo : public TargetCIRGenInfo { +public: + NVPTXTargetCIRGenInfo(CIRGenTypes &cgt) + : TargetCIRGenInfo(std::make_unique<NVPTXABIInfo>(cgt)) {} +}; +} // namespace + +std::unique_ptr<TargetCIRGenInfo> +clang::CIRGen::createNVPTXTargetCIRGenInfo(CIRGenTypes &cgt) { + return std::make_unique<NVPTXTargetCIRGenInfo>(cgt); +} + std::unique_ptr<TargetCIRGenInfo> clang::CIRGen::createX8664TargetCIRGenInfo(CIRGenTypes &cgt) { return std::make_unique<X8664TargetCIRGenInfo>(cgt); diff --git a/clang/lib/CIR/CodeGen/TargetInfo.h b/clang/lib/CIR/CodeGen/TargetInfo.h index 9535ba94fb08b..bab838692e215 100644 --- a/clang/lib/CIR/CodeGen/TargetInfo.h +++ b/clang/lib/CIR/CodeGen/TargetInfo.h @@ -124,6 +124,8 @@ class TargetCIRGenInfo { std::unique_ptr<TargetCIRGenInfo> createX8664TargetCIRGenInfo(CIRGenTypes &cgt); +std::unique_ptr<TargetCIRGenInfo> createNVPTXTargetCIRGenInfo(CIRGenTypes &cgt); + } // namespace clang::CIRGen #endif // LLVM_CLANG_LIB_CIR_TARGETINFO_H diff --git a/clang/test/CIR/CodeGen/CUDA/filter-decl.cu b/clang/test/CIR/CodeGen/CUDA/filter-decl.cu new file mode 100644 index 0000000000000..ac1e7aeb4f1e1 --- /dev/null +++ b/clang/test/CIR/CodeGen/CUDA/filter-decl.cu @@ -0,0 +1,37 @@ +// Based on clang/test/CodeGenCUDA/filter-decl.cu tailored for CIR current capabilities. +// Tests that host/device functions are emitted only on the appropriate side. + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x cuda \ +// RUN: -I%S/../inputs -emit-cir %s -o %t.host.cir +// RUN: FileCheck --input-file=%t.host.cir %s --check-prefix=CHECK-HOST + +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -x cuda \ +// RUN: -I%S/../inputs -fcuda-is-device -emit-cir %s -o %t.device.cir +// RUN: FileCheck --input-file=%t.device.cir %s --check-prefix=CHECK-DEVICE + +#include "cuda.h" + +// Implicit host function (no attribute) — host only +// CHECK-HOST: cir.func {{.*}} @_Z20implicithostonlyfuncv() +// CHECK-DEVICE-NOT: @_Z20implicithostonlyfuncv +void implicithostonlyfunc(void) {} + +// Explicit __host__ function — host only +// CHECK-HOST: cir.func {{.*}} @_Z20explicithostonlyfuncv() +// CHECK-DEVICE-NOT: @_Z20explicithostonlyfuncv +__host__ void explicithostonlyfunc(void) {} + +// __device__ function — device only +// CHECK-HOST-NOT: @_Z14deviceonlyfuncv +// CHECK-DEVICE: cir.func {{.*}} @_Z14deviceonlyfuncv() +__device__ void deviceonlyfunc(void) {} + +// __host__ __device__ function — both sides +// CHECK-HOST: cir.func {{.*}} @_Z14hostdevicefuncv() +// CHECK-DEVICE: cir.func {{.*}} @_Z14hostdevicefuncv() +__host__ __device__ void hostdevicefunc(void) {} + +// __global__ kernel — both sides (stub on host, kernel on device) +// CHECK-HOST: cir.func {{.*}} @__device_stub__globalfunc() +// CHECK-DEVICE: cir.func {{.*}} @_Z10globalfuncv() +__global__ void globalfunc(void) {} diff --git a/clang/test/CIR/CodeGen/CUDA/nvptx-basic.cu b/clang/test/CIR/CodeGen/CUDA/nvptx-basic.cu new file mode 100644 index 0000000000000..fe2233de4d10c --- /dev/null +++ b/clang/test/CIR/CodeGen/CUDA/nvptx-basic.cu @@ -0,0 +1,30 @@ +// Based on clang/test/CodeGenCUDA/ptx-kernels.cu tailored for CIR current capabilities. +// Tests basic device-side compilation with NVPTX target. + +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -x cuda \ +// RUN: -I%S/../inputs -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s + +#include "cuda.h" + +// CHECK: cir.func {{.*}} @device_function() +extern "C" +__device__ void device_function() {} + +// CHECK: cir.func {{.*}} @global_function() +// CHECK: cir.call @device_function() +extern "C" +__global__ void global_function() { + device_function(); +} + +// Template kernel with explicit instantiation +template <typename T> __global__ void templated_kernel(T param) {} +template __global__ void templated_kernel<int>(int); +// CHECK: cir.func {{.*}} @_Z16templated_kernelIiEvT_ + +// Anonymous namespace kernel +namespace { +__global__ void anonymous_ns_kernel() {} +// CHECK: cir.func {{.*}} @_ZN12_GLOBAL__N_119anonymous_ns_kernelEv +} `````````` </details> https://github.com/llvm/llvm-project/pull/177827 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
