https://github.com/ZakyHermawan updated https://github.com/llvm/llvm-project/pull/184248
>From 809121a3d2fe82e146da5cc8177e134b862b2a01 Mon Sep 17 00:00:00 2001 From: ZakyHermawan <[email protected]> Date: Tue, 3 Mar 2026 05:16:23 +0700 Subject: [PATCH 1/3] [CIR][CUDA] Handle __device__ and __shared__ variables Signed-off-by: ZakyHermawan <[email protected]> --- clang/lib/CIR/CodeGen/CIRGenDecl.cpp | 14 ++-- clang/lib/CIR/CodeGen/CIRGenModule.cpp | 53 ++++++++++++- clang/lib/CIR/CodeGen/CIRGenModule.h | 10 +++ clang/lib/CIR/CodeGen/TargetInfo.cpp | 9 +++ clang/lib/CIR/CodeGen/TargetInfo.h | 7 ++ clang/test/CIR/CodeGenCUDA/address-spaces.cu | 78 ++++++++++++++++++++ clang/test/CIR/CodeGenCUDA/global-vars.cu | 47 ++++++++++++ 7 files changed, 206 insertions(+), 12 deletions(-) create mode 100644 clang/test/CIR/CodeGenCUDA/address-spaces.cu create mode 100644 clang/test/CIR/CodeGenCUDA/global-vars.cu diff --git a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp index bb3117dfb2c98..b19e48d0f51d4 100644 --- a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp @@ -433,12 +433,15 @@ CIRGenModule::getOrCreateStaticVarDecl(const VarDecl &d, mlir::Type lty = getTypes().convertTypeForMem(ty); assert(!cir::MissingFeatures::addressSpace()); - if (d.hasAttr<LoaderUninitializedAttr>() || d.hasAttr<CUDASharedAttr>()) + mlir::Attribute init = nullptr; + if (d.hasAttr<LoaderUninitializedAttr>()) errorNYI(d.getSourceRange(), "getOrCreateStaticVarDecl: LoaderUninitializedAttr"); - assert(!cir::MissingFeatures::addressSpace()); + else if (ty.getAddressSpace() != LangAS::opencl_local && + !d.hasAttr<CUDASharedAttr>()) + init = builder.getZeroInitAttr(convertType(ty)); - mlir::Attribute init = builder.getZeroInitAttr(convertType(ty)); + assert(!cir::MissingFeatures::addressSpace()); cir::GlobalOp gv = builder.createVersionedGlobal( getModule(), getLoc(d.getLocation()), name, lty, false, linkage); @@ -665,11 +668,6 @@ void CIRGenFunction::emitStaticVarDecl(const VarDecl &d, var.setAlignment(alignment.getAsAlign().value()); - // There are a lot of attributes that need to be handled here. Until - // we start to support them, we just report an error if there are any. - if (d.hasAttrs()) - cgm.errorNYI(d.getSourceRange(), "static var with attrs"); - if (cgm.getCodeGenOpts().KeepPersistentStorageVariables) cgm.errorNYI(d.getSourceRange(), "static var keep persistent storage"); diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index 223b53731359a..1517058af8782 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -797,6 +797,22 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, mlir::Type ty, "external const declaration with initializer"); } + // TODO(cir): if this method is used to handle functions we must have + // something closer to GlobalValue::isDeclaration instead of checking for + // initializer. + if (gv.isDeclaration()) { + // TODO(cir): set target attributes + + // External HIP managed variables needed to be recorded for transformation + // in both device and host compilations. + // External HIP managed variables needed to be recorded for transformation + // in both device and host compilations. + if (getLangOpts().CUDA && d && d->hasAttr<HIPManagedAttr>() && + d->hasExternalStorage()) + llvm_unreachable("NYI"); + } + + // TODO(cir): address space cast when needed for DAddrSpace. return gv; } @@ -947,10 +963,6 @@ void CIRGenModule::emitGlobalVarDefinition(const clang::VarDecl *vd, errorNYI(vd->getSourceRange(), "annotate global variable"); } - if (langOpts.CUDA) { - errorNYI(vd->getSourceRange(), "CUDA global variable"); - } - // Set initializer and finalize emission CIRGenModule::setInitializer(gv, init); if (emitter) @@ -1563,6 +1575,39 @@ CIRGenModule::getAddrOfConstantStringFromLiteral(const StringLiteral *s, return builder.getGlobalViewAttr(ptrTy, gv); } +LangAS CIRGenModule::getGlobalVarAddressSpace(const VarDecl *d) { + if (langOpts.OpenCL) { + LangAS as = d ? d->getType().getAddressSpace() : LangAS::opencl_global; + assert(as == LangAS::opencl_global || as == LangAS::opencl_global_device || + as == LangAS::opencl_global_host || as == LangAS::opencl_constant || + as == LangAS::opencl_local || as >= LangAS::FirstTargetAddressSpace); + return as; + } + + if (langOpts.SYCLIsDevice && + (!d || d->getType().getAddressSpace() == LangAS::Default)) + llvm_unreachable("NYI"); + + if (langOpts.CUDA && langOpts.CUDAIsDevice) { + if (d) { + if (d->hasAttr<CUDAConstantAttr>()) + return LangAS::cuda_constant; + if (d->hasAttr<CUDASharedAttr>()) + return LangAS::cuda_shared; + if (d->hasAttr<CUDADeviceAttr>()) + return LangAS::cuda_device; + if (d->getType().isConstQualified()) + return LangAS::cuda_constant; + } + return LangAS::cuda_device; + } + + if (langOpts.OpenMP) + llvm_unreachable("NYI"); + + return getTargetCIRGenInfo().getGlobalVarAddressSpace(*this, d); +} + // TODO(cir): this could be a common AST helper for both CIR and LLVM codegen. LangAS CIRGenModule::getLangTempAllocaAddressSpace() const { if (getLangOpts().OpenCL) diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h index 52464a8bc30c4..d9173234868ee 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.h +++ b/clang/lib/CIR/CodeGen/CIRGenModule.h @@ -359,6 +359,16 @@ class CIRGenModule : public CIRGenTypeCache { getAddrOfConstantStringFromLiteral(const StringLiteral *s, llvm::StringRef name = ".str"); + /// Return the AST address space of the underlying global variable for D, as + /// determined by its declaration. Normally this is the same as the address + /// space of D's type, but in CUDA, address spaces are associated with + /// declarations, not types. If D is nullptr, return the default address + /// space for global variable. + /// + /// For languages without explicit address spaces, if D has default address + /// space, target-specific global or constant address space may be returned. + LangAS getGlobalVarAddressSpace(const VarDecl *d); + /// Returns the address space for temporary allocations in the language. This /// ensures that the allocated variable's address space matches the /// expectations of the AST, rather than using the target's allocation address diff --git a/clang/lib/CIR/CodeGen/TargetInfo.cpp b/clang/lib/CIR/CodeGen/TargetInfo.cpp index 2f3824d3d47a7..70ffb46050ea1 100644 --- a/clang/lib/CIR/CodeGen/TargetInfo.cpp +++ b/clang/lib/CIR/CodeGen/TargetInfo.cpp @@ -91,3 +91,12 @@ bool TargetCIRGenInfo::isNoProtoCallVariadic( // For everything else, we just prefer false unless we opt out. return false; } + +clang::LangAS +TargetCIRGenInfo::getGlobalVarAddressSpace(CIRGenModule &cgm, + const clang::VarDecl *d) const { + assert(!cgm.getLangOpts().OpenCL && + !(cgm.getLangOpts().CUDA && cgm.getLangOpts().CUDAIsDevice) && + "Address space agnostic languages only"); + return d ? d->getType().getAddressSpace() : LangAS::Default; +} diff --git a/clang/lib/CIR/CodeGen/TargetInfo.h b/clang/lib/CIR/CodeGen/TargetInfo.h index f4792d5309e36..8db2cbbce5d23 100644 --- a/clang/lib/CIR/CodeGen/TargetInfo.h +++ b/clang/lib/CIR/CodeGen/TargetInfo.h @@ -49,6 +49,13 @@ class TargetCIRGenInfo { /// Returns ABI info helper for the target. const ABIInfo &getABIInfo() const { return *info; } + /// Get target favored AST address space of a global variable for languages + /// other than OpenCL and CUDA. + /// If \p d is nullptr, returns the default target favored address space + /// for global variable. + virtual clang::LangAS getGlobalVarAddressSpace(CIRGenModule &cgm, + const clang::VarDecl *d) const; + /// Get the address space for alloca. virtual mlir::ptr::MemorySpaceAttrInterface getCIRAllocaAddressSpace() const { return cir::LangAddressSpaceAttr::get(&info->cgt.getMLIRContext(), diff --git a/clang/test/CIR/CodeGenCUDA/address-spaces.cu b/clang/test/CIR/CodeGenCUDA/address-spaces.cu new file mode 100644 index 0000000000000..68905a6616ca7 --- /dev/null +++ b/clang/test/CIR/CodeGenCUDA/address-spaces.cu @@ -0,0 +1,78 @@ +#include "Inputs/cuda.h" + +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \ +// RUN: -fcuda-is-device -emit-cir -target-sdk-version=12.3 \ +// RUN: -I%S/Inputs/ %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \ +// RUN: -x cuda -emit-cir -target-sdk-version=12.3 \ +// RUN: -I%S/Inputs/ %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \ +// RUN: -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \ +// RUN: -I%S/Inputs/ %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \ +// RUN: -x cuda -emit-llvm -target-sdk-version=12.3 \ +// RUN: -I%S/Inputs/ %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM-HOST --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \ +// RUN: -x cuda -emit-llvm -target-sdk-version=12.3 \ +// RUN: -I%S/Inputs/ %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG-HOST --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda \ +// RUN: -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \ +// RUN: -I%S/Inputs/ %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s + +__global__ void fn() { + int i = 0; + __shared__ int j; + j = i; +} + +// CIR-DEVICE: cir.global "private" internal dso_local @_ZZ2fnvE1j : !s32i +// CIR-DEVICE: cir.func {{.*}}@_Z2fnv() {{.*}} { +// CIR-DEVICE: %[[I:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] +// CIR-DEVICE: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i +// CIR-DEVICE: cir.store {{.*}}%[[ZERO]], %[[I]] : !s32i, !cir.ptr<!s32i> +// CIR-DEVICE: %[[J:.*]] = cir.get_global @_ZZ2fnvE1j : !cir.ptr<!s32i> +// CIR-DEVICE: %[[VAL:.*]] = cir.load {{.*}}%[[I]] : !cir.ptr<!s32i>, !s32i +// CIR-DEVICE: cir.store {{.*}}%[[VAL]], %[[J]] : !s32i, !cir.ptr<!s32i> +// CIR-DEVICE: cir.return + +// CIR-HOST: cir.func private dso_local @__cudaPopCallConfiguration +// CIR-HOST: cir.func private dso_local @cudaLaunchKernel +// CIR-HOST: cir.func {{.*}}@_Z17__device_stub__fnv() + +// LLVM-DEVICE: @_ZZ2fnvE1j = internal global i32 undef, align 4 +// LLVM-DEVICE: define dso_local void @_Z2fnv() +// LLVM-DEVICE: %[[ALLOCA:.*]] = alloca i32, i64 1, align 4 +// LLVM-DEVICE: store i32 0, ptr %[[ALLOCA]], align 4 +// LLVM-DEVICE: %[[VAL:.*]] = load i32, ptr %[[ALLOCA]], align 4 +// LLVM-DEVICE: store i32 %[[VAL]], ptr @_ZZ2fnvE1j, align 4 +// LLVM-DEVICE: ret void + +// LLVM-HOST: %struct.dim3 = type { i32, i32, i32 } +// LLVM-HOST: declare {{.*}}i32 @__cudaPopCallConfiguration(ptr, ptr, ptr, ptr) +// LLVM-HOST: declare {{.*}}i32 @cudaLaunchKernel(ptr, %struct.dim3, %struct.dim3, ptr, i64, ptr) +// LLVM-HOST: define dso_local void @_Z17__device_stub__fnv() + +// OGCG-HOST: define dso_local void @_Z17__device_stub__fnv() +// OGCG-HOST: entry: +// OGCG-HOST: call i32 @__cudaPopCallConfiguration +// OGCG-HOST: call {{.*}}i32 @cudaLaunchKernel + +// OGCG-DEVICE: @_ZZ2fnvE1j = internal addrspace(3) global i32 undef, align 4 +// OGCG-DEVICE: define dso_local ptx_kernel void @_Z2fnv() +// OGCG-DEVICE: entry: +// OGCG-DEVICE: %[[I:.*]] = alloca i32, align 4 +// OGCG-DEVICE: store i32 0, ptr %[[I]], align 4 +// OGCG-DEVICE: %[[VAL:.*]] = load i32, ptr %[[I]], align 4 +// OGCG-DEVICE: store i32 %[[VAL]], ptr addrspacecast (ptr addrspace(3) @_ZZ2fnvE1j to ptr), align 4 +// OGCG-DEVICE: ret void diff --git a/clang/test/CIR/CodeGenCUDA/global-vars.cu b/clang/test/CIR/CodeGenCUDA/global-vars.cu new file mode 100644 index 0000000000000..f497d0e7f5f64 --- /dev/null +++ b/clang/test/CIR/CodeGenCUDA/global-vars.cu @@ -0,0 +1,47 @@ +#include "Inputs/cuda.h" + +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \ +// RUN: -fcuda-is-device -emit-cir -target-sdk-version=12.3 \ +// RUN: -I%S/Inputs/ %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \ +// RUN: -x cuda -emit-cir -target-sdk-version=12.3 \ +// RUN: -I%S/Inputs/ %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \ +// RUN: -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \ +// RUN: -I%S/Inputs/ %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \ +// RUN: -x cuda -emit-llvm -target-sdk-version=12.3 \ +// RUN: -I%S/Inputs/ %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM-HOST --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda \ +// RUN: -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \ +// RUN: -I%S/Inputs/ %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \ +// RUN: -x cuda -emit-llvm -target-sdk-version=12.3 \ +// RUN: -I%S/Inputs/ %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG-HOST --input-file=%t.ll %s + +__shared__ int a; +// CIR-DEVICE: cir.global external [[SHARED:@.*]] = #cir.int<0> : !s32i {alignment = 4 : i64} +// CIR-HOST: cir.global external [[SHARED_HOST:@.*]] = #cir.int<0> : !s32i {alignment = 4 : i64} +// LLVM-DEVICE: @[[SHARED_LL:.*]] = global i32 0, align 4 +// LLVM-HOST: @[[SHARED_LH:.*]] = global i32 0, align 4 +// OGCG-DEVICE: @[[SHARED_OD:.*]] = addrspace(3) global i32 undef, align 4 +// OGCG-HOST: @[[SHARED_OH:.*]] = internal global i32 undef, align 4 + +__device__ int b; +// CIR-DEVICE: cir.global external [[DEV:@.*]] = #cir.int<0> : !s32i {alignment = 4 : i64} +// CIR-HOST: cir.global external [[DEV_HOST:@.*]] = #cir.int<0> : !s32i {alignment = 4 : i64} +// LLVM-DEVICE: @[[DEV_LD:.*]] = global i32 0, align 4 +// LLVM-HOST: @[[DEV_LH:.*]] = global i32 0, align 4 +// OGCG-HOST: @[[DEV_OH:.*]] = internal global i32 undef, align 4 +// OGCG-DEVICE: @[[DEV_OD:.*]] = addrspace(1) externally_initialized global i32 0, align 4 >From 512dd3acf7be7d06505243603fa2af9106a2b58b Mon Sep 17 00:00:00 2001 From: ZakyHermawan <[email protected]> Date: Fri, 6 Mar 2026 02:39:06 +0700 Subject: [PATCH 2/3] [CIR][CUDA] handle __constant__ variable Remove CIR-HOST LLVM-HOST and OGCG-HOST from global-vars.cu because shadow variables did not handled properly, yet Make few changes to handle __device__, __shared__, and __constant__ global variables using reference from OGCG Create and call a hook (setTargetAttributes) if the variable is global and declaration only. Signed-off-by: ZakyHermawan <[email protected]> --- .../clang/CIR/Dialect/IR/CIRCUDAAttrs.td | 14 +++- clang/lib/CIR/CodeGen/CIRGenDecl.cpp | 22 ++++++ clang/lib/CIR/CodeGen/CIRGenModule.cpp | 68 ++++++++++++++----- clang/lib/CIR/CodeGen/TargetInfo.h | 9 +++ .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 7 ++ clang/test/CIR/CodeGenCUDA/global-vars.cu | 39 ++++------- 6 files changed, 115 insertions(+), 44 deletions(-) diff --git a/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td b/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td index cf6635fc893fa..257cf396abce7 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td +++ b/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td @@ -36,5 +36,17 @@ def CIR_CUDAKernelNameAttr : CIR_Attr<"CUDAKernelName", "cu.kernel_name"> { let assemblyFormat = "`<` $kernel_name `>`"; } +def CUDAExternallyInitializedAttr : CIR_Attr<"CUDAExternallyInitialized", + "cu.externally_initialized"> { + let summary = "The marked variable is externally initialized."; + let description = + [{ + CUDA __device__ and __constant__ variables, along with surface and + textures, might be initialized by host, hence "externally initialized". + Therefore they must be emitted even if they are not referenced. + + The attribute corresponds to the attribute on LLVM with the same name. + }]; +} -#endif // CLANG_CIR_DIALECT_IR_CIRCUDAATTRS_TD \ No newline at end of file +#endif // CLANG_CIR_DIALECT_IR_CIRCUDAATTRS_TD diff --git a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp index b19e48d0f51d4..a636c07876964 100644 --- a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp @@ -14,10 +14,12 @@ #include "CIRGenFunction.h" #include "mlir/IR/Location.h" #include "clang/AST/Attr.h" +#include "clang/AST/Attrs.inc" #include "clang/AST/Decl.h" #include "clang/AST/DeclOpenACC.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" +#include "clang/Basic/Cuda.h" #include "clang/CIR/MissingFeatures.h" using namespace clang; @@ -668,6 +670,26 @@ void CIRGenFunction::emitStaticVarDecl(const VarDecl &d, var.setAlignment(alignment.getAsAlign().value()); + // There are a lot of attributes that need to be handled here. Until + // we start to support them, we just report an error if there are any. + if (d.hasAttr<AnnotateAttr>()) + cgm.errorNYI(d.getSourceRange(), "Global annotations are NYI"); + if (d.getAttr<PragmaClangBSSSectionAttr>()) + cgm.errorNYI(d.getSourceRange(), "CIR global BSS section attribute is NYI"); + if (d.getAttr<PragmaClangDataSectionAttr>()) + cgm.errorNYI(d.getSourceRange(), + "CIR global Data section attribute is NYI"); + if (d.getAttr<PragmaClangRodataSectionAttr>()) + cgm.errorNYI(d.getSourceRange(), + "CIR global Rodata section attribute is NYI"); + if (d.getAttr<PragmaClangRelroSectionAttr>()) + cgm.errorNYI(d.getSourceRange(), + "CIR global Relro section attribute is NYI"); + + if (d.getAttr<SectionAttr>()) + cgm.errorNYI(d.getSourceRange(), + "CIR global object file section attribute is NYI"); + if (cgm.getCodeGenOpts().KeepPersistentStorageVariables) cgm.errorNYI(d.getSourceRange(), "static var keep persistent storage"); diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index 1517058af8782..bd4d2d4e5c1a5 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -18,6 +18,7 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/ASTLambda.h" +#include "clang/AST/Attrs.inc" #include "clang/AST/DeclBase.h" #include "clang/AST/DeclOpenACC.h" #include "clang/AST/GlobalDecl.h" @@ -797,22 +798,19 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, mlir::Type ty, "external const declaration with initializer"); } - // TODO(cir): if this method is used to handle functions we must have - // something closer to GlobalValue::isDeclaration instead of checking for - // initializer. - if (gv.isDeclaration()) { + if (d && + d->isThisDeclarationADefinition(astContext) == VarDecl::DeclarationOnly) { + getTargetCIRGenInfo().setTargetAttributes(d, gv, *this); // TODO(cir): set target attributes - - // External HIP managed variables needed to be recorded for transformation - // in both device and host compilations. // External HIP managed variables needed to be recorded for transformation // in both device and host compilations. if (getLangOpts().CUDA && d && d->hasAttr<HIPManagedAttr>() && d->hasExternalStorage()) - llvm_unreachable("NYI"); + errorNYI(d->getSourceRange(), "HIP managed attribute"); } // TODO(cir): address space cast when needed for DAddrSpace. + assert(!cir::MissingFeatures::addressSpace()); return gv; } @@ -896,9 +894,18 @@ void CIRGenModule::emitGlobalVarDefinition(const clang::VarDecl *vd, assert(!cir::MissingFeatures::cudaSupport()); - if (vd->hasAttr<LoaderUninitializedAttr>()) { + // CUDA E.2.4.1 "__shared__ variables cannot have an initialization + // as part of their declaration." Sema has already checked for + // error cases, so we just need to set Init to UndefValue. + bool isCUDASharedVar = + getLangOpts().CUDAIsDevice && vd->hasAttr<CUDASharedAttr>(); + // TODO(cir): implement isCUDAShadowVar and isCUDADeviceShadowVar, reference: + // OGCG + + if (getLangOpts().CUDA && isCUDASharedVar) { + init = cir::UndefAttr::get(&getMLIRContext(), convertType(vd->getType())); + } else if (vd->hasAttr<LoaderUninitializedAttr>()) { errorNYI(vd->getSourceRange(), "loader uninitialized attribute"); - return; } else if (!initExpr) { // This is a tentative definition; tentative definitions are // implicitly initialized with { 0 }. @@ -963,6 +970,39 @@ void CIRGenModule::emitGlobalVarDefinition(const clang::VarDecl *vd, errorNYI(vd->getSourceRange(), "annotate global variable"); } + // Set CIR's linkage type as appropriate. + cir::GlobalLinkageKind linkage = + getCIRLinkageVarDefinition(vd, /*IsConstant=*/false); + + // CUDA B.2.1 "The __device__ qualifier declares a variable that resides on + // the device. [...]" + // CUDA B.2.2 "The __constant__ qualifier, optionally used together with + // __device__, declares a variable that: [...] + // Is accessible from all the threads within the grid and from the host + // through the runtime library (cudaGetSymbolAddress() / cudaGetSymbolSize() + // / cudaMemcpyToSymbol() / cudaMemcpyFromSymbol())." + if (langOpts.CUDA) { + if (langOpts.CUDAIsDevice) { + // __shared__ variables is not marked as externally initialized, + // because they must not be initialized. + if (linkage != cir::GlobalLinkageKind::InternalLinkage && + !vd->isConstexpr() && !vd->getType().isConstQualified() && + (vd->hasAttr<CUDADeviceAttr>() || vd->hasAttr<CUDAConstantAttr>() || + vd->getType()->isCUDADeviceBuiltinSurfaceType() || + vd->getType()->isCUDADeviceBuiltinTextureType())) { + gv->setAttr(cir::CUDAExternallyInitializedAttr::getMnemonic(), + cir::CUDAExternallyInitializedAttr::get(&getMLIRContext())); + } + } else { + // TODO(cir): + // Adjust linkage of shadow variables in host compilation + // getCUDARuntime().internalizeDeviceSideVar(vd, linkage); + } + // TODO(cir): + // Handle variable registration + // getCUDARuntime().handleVarRegistration(vd, gv); + } + // Set initializer and finalize emission CIRGenModule::setInitializer(gv, init); if (emitter) @@ -977,10 +1017,6 @@ void CIRGenModule::emitGlobalVarDefinition(const clang::VarDecl *vd, /*ExcludeDtor=*/true))); assert(!cir::MissingFeatures::opGlobalSection()); - // Set CIR's linkage type as appropriate. - cir::GlobalLinkageKind linkage = - getCIRLinkageVarDefinition(vd, /*IsConstant=*/false); - // Set CIR linkage and DLL storage class. gv.setLinkage(linkage); // FIXME(cir): setLinkage should likely set MLIR's visibility automatically. @@ -1586,7 +1622,7 @@ LangAS CIRGenModule::getGlobalVarAddressSpace(const VarDecl *d) { if (langOpts.SYCLIsDevice && (!d || d->getType().getAddressSpace() == LangAS::Default)) - llvm_unreachable("NYI"); + errorNYI(d->getSourceRange(), "global as for SYCL device"); if (langOpts.CUDA && langOpts.CUDAIsDevice) { if (d) { @@ -1603,7 +1639,7 @@ LangAS CIRGenModule::getGlobalVarAddressSpace(const VarDecl *d) { } if (langOpts.OpenMP) - llvm_unreachable("NYI"); + errorNYI(d->getSourceRange(), "global as for OpenMP"); return getTargetCIRGenInfo().getGlobalVarAddressSpace(*this, d); } diff --git a/clang/lib/CIR/CodeGen/TargetInfo.h b/clang/lib/CIR/CodeGen/TargetInfo.h index 8db2cbbce5d23..9ba155b220fbc 100644 --- a/clang/lib/CIR/CodeGen/TargetInfo.h +++ b/clang/lib/CIR/CodeGen/TargetInfo.h @@ -106,6 +106,15 @@ class TargetCIRGenInfo { /// right thing when calling a function with no know signature. virtual bool isNoProtoCallVariadic(const FunctionNoProtoType *fnType) const; + /// Provides a convenient hook to handle extra target-specific attributes + /// for the given global. + /// In OG, the function receives an llvm::GlobalValue. However, functions + /// and global variables are separate types in Clang IR, so we use a general + /// mlir::Operation*. + virtual void setTargetAttributes(const clang::Decl *decl, + mlir::Operation *global, + CIRGenModule &module) const {} + virtual bool isScalarizableAsmOperand(CIRGenFunction &cgf, mlir::Type ty) const { return false; diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 03085ad29ab78..eda07dab4d97b 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -2557,6 +2557,13 @@ mlir::LogicalResult CIRToLLVMGlobalOpLowering::matchAndRewrite( const StringRef symbol = op.getSymName(); SmallVector<mlir::NamedAttribute> attributes; + // Mark externally_initialized for __device__ and __constant__ + if (auto extInit = + op->getAttr(CUDAExternallyInitializedAttr::getMnemonic())) { + attributes.push_back(rewriter.getNamedAttr("externally_initialized", + rewriter.getUnitAttr())); + } + if (init.has_value()) { if (mlir::isa<cir::FPAttr, cir::IntAttr, cir::BoolAttr>(init.value())) { GlobalInitAttrRewriter initRewriter(llvmType, rewriter); diff --git a/clang/test/CIR/CodeGenCUDA/global-vars.cu b/clang/test/CIR/CodeGenCUDA/global-vars.cu index f497d0e7f5f64..4791f145d1bae 100644 --- a/clang/test/CIR/CodeGenCUDA/global-vars.cu +++ b/clang/test/CIR/CodeGenCUDA/global-vars.cu @@ -5,43 +5,28 @@ // RUN: -I%S/Inputs/ %s -o %t.cir // RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \ -// RUN: -x cuda -emit-cir -target-sdk-version=12.3 \ -// RUN: -I%S/Inputs/ %s -o %t.cir -// RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s - // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \ // RUN: -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \ // RUN: -I%S/Inputs/ %s -o %t.ll // RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \ -// RUN: -x cuda -emit-llvm -target-sdk-version=12.3 \ -// RUN: -I%S/Inputs/ %s -o %t.ll -// RUN: FileCheck --check-prefix=LLVM-HOST --input-file=%t.ll %s - // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda \ // RUN: -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \ // RUN: -I%S/Inputs/ %s -o %t.ll // RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \ -// RUN: -x cuda -emit-llvm -target-sdk-version=12.3 \ -// RUN: -I%S/Inputs/ %s -o %t.ll -// RUN: FileCheck --check-prefix=OGCG-HOST --input-file=%t.ll %s -__shared__ int a; -// CIR-DEVICE: cir.global external [[SHARED:@.*]] = #cir.int<0> : !s32i {alignment = 4 : i64} -// CIR-HOST: cir.global external [[SHARED_HOST:@.*]] = #cir.int<0> : !s32i {alignment = 4 : i64} -// LLVM-DEVICE: @[[SHARED_LL:.*]] = global i32 0, align 4 -// LLVM-HOST: @[[SHARED_LH:.*]] = global i32 0, align 4 +__device__ int a; +// CIR-DEVICE: cir.global external @[[DEV:.*]] = #cir.int<0> : !s32i {alignment = 4 : i64, cu.externally_initialized = #cir.cu.externally_initialized} +// LLVM-DEVICE: @[[DEV_LD:.*]] = externally_initialized global i32 0, align 4 +// OGCG-DEVICE: @[[DEV_OD:.*]] = addrspace(1) externally_initialized global i32 0, align 4 + +__shared__ int b; +// CIR-DEVICE: cir.global external @[[SHARED:.*]] = #cir.undef : !s32i {alignment = 4 : i64} +// LLVM-DEVICE: @[[SHARED_LL:.*]] = global i32 undef, align 4 // OGCG-DEVICE: @[[SHARED_OD:.*]] = addrspace(3) global i32 undef, align 4 -// OGCG-HOST: @[[SHARED_OH:.*]] = internal global i32 undef, align 4 -__device__ int b; -// CIR-DEVICE: cir.global external [[DEV:@.*]] = #cir.int<0> : !s32i {alignment = 4 : i64} -// CIR-HOST: cir.global external [[DEV_HOST:@.*]] = #cir.int<0> : !s32i {alignment = 4 : i64} -// LLVM-DEVICE: @[[DEV_LD:.*]] = global i32 0, align 4 -// LLVM-HOST: @[[DEV_LH:.*]] = global i32 0, align 4 -// OGCG-HOST: @[[DEV_OH:.*]] = internal global i32 undef, align 4 -// OGCG-DEVICE: @[[DEV_OD:.*]] = addrspace(1) externally_initialized global i32 0, align 4 +__constant__ int c; +// CIR-DEVICE: cir.global constant external @[[CONST:.*]] = #cir.int<0> : !s32i {alignment = 4 : i64, cu.externally_initialized = #cir.cu.externally_initialized} +// LLVM-DEVICE: @[[CONST_LL:.*]] = externally_initialized constant i32 0, align 4 +// OGCG-DEVICE: @[[CONST_OD:.*]] = addrspace(4) externally_initialized constant i32 0, align 4 >From 42105b08feed53b5e637b5e04a550dde21b594f4 Mon Sep 17 00:00:00 2001 From: ZakyHermawan <[email protected]> Date: Fri, 6 Mar 2026 04:01:25 +0700 Subject: [PATCH 3/3] [CIR][CUDA][NFC] Remove unnecessary comment Signed-off-by: ZakyHermawan <[email protected]> --- clang/lib/CIR/CodeGen/CIRGenModule.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index bd4d2d4e5c1a5..2da4ed7b79da2 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -809,7 +809,6 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, mlir::Type ty, errorNYI(d->getSourceRange(), "HIP managed attribute"); } - // TODO(cir): address space cast when needed for DAddrSpace. assert(!cir::MissingFeatures::addressSpace()); return gv; } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
