https://github.com/ZakyHermawan updated 
https://github.com/llvm/llvm-project/pull/184248

>From 809121a3d2fe82e146da5cc8177e134b862b2a01 Mon Sep 17 00:00:00 2001
From: ZakyHermawan <[email protected]>
Date: Tue, 3 Mar 2026 05:16:23 +0700
Subject: [PATCH 1/3] [CIR][CUDA] Handle __device__ and __shared__ variables

Signed-off-by: ZakyHermawan <[email protected]>
---
 clang/lib/CIR/CodeGen/CIRGenDecl.cpp         | 14 ++--
 clang/lib/CIR/CodeGen/CIRGenModule.cpp       | 53 ++++++++++++-
 clang/lib/CIR/CodeGen/CIRGenModule.h         | 10 +++
 clang/lib/CIR/CodeGen/TargetInfo.cpp         |  9 +++
 clang/lib/CIR/CodeGen/TargetInfo.h           |  7 ++
 clang/test/CIR/CodeGenCUDA/address-spaces.cu | 78 ++++++++++++++++++++
 clang/test/CIR/CodeGenCUDA/global-vars.cu    | 47 ++++++++++++
 7 files changed, 206 insertions(+), 12 deletions(-)
 create mode 100644 clang/test/CIR/CodeGenCUDA/address-spaces.cu
 create mode 100644 clang/test/CIR/CodeGenCUDA/global-vars.cu

diff --git a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp 
b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
index bb3117dfb2c98..b19e48d0f51d4 100644
--- a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
@@ -433,12 +433,15 @@ CIRGenModule::getOrCreateStaticVarDecl(const VarDecl &d,
   mlir::Type lty = getTypes().convertTypeForMem(ty);
   assert(!cir::MissingFeatures::addressSpace());
 
-  if (d.hasAttr<LoaderUninitializedAttr>() || d.hasAttr<CUDASharedAttr>())
+  mlir::Attribute init = nullptr;
+  if (d.hasAttr<LoaderUninitializedAttr>())
     errorNYI(d.getSourceRange(),
              "getOrCreateStaticVarDecl: LoaderUninitializedAttr");
-  assert(!cir::MissingFeatures::addressSpace());
+  else if (ty.getAddressSpace() != LangAS::opencl_local &&
+           !d.hasAttr<CUDASharedAttr>())
+    init = builder.getZeroInitAttr(convertType(ty));
 
-  mlir::Attribute init = builder.getZeroInitAttr(convertType(ty));
+  assert(!cir::MissingFeatures::addressSpace());
 
   cir::GlobalOp gv = builder.createVersionedGlobal(
       getModule(), getLoc(d.getLocation()), name, lty, false, linkage);
@@ -665,11 +668,6 @@ void CIRGenFunction::emitStaticVarDecl(const VarDecl &d,
 
   var.setAlignment(alignment.getAsAlign().value());
 
-  // There are a lot of attributes that need to be handled here. Until
-  // we start to support them, we just report an error if there are any.
-  if (d.hasAttrs())
-    cgm.errorNYI(d.getSourceRange(), "static var with attrs");
-
   if (cgm.getCodeGenOpts().KeepPersistentStorageVariables)
     cgm.errorNYI(d.getSourceRange(), "static var keep persistent storage");
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp 
b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index 223b53731359a..1517058af8782 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -797,6 +797,22 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, 
mlir::Type ty,
                "external const declaration with initializer");
   }
 
+  // TODO(cir): if this method is used to handle functions we must have
+  // something closer to GlobalValue::isDeclaration instead of checking for
+  // initializer.
+  if (gv.isDeclaration()) {
+    // TODO(cir): set target attributes
+
+    // External HIP managed variables needed to be recorded for transformation
+    // in both device and host compilations.
+    // External HIP managed variables needed to be recorded for transformation
+    // in both device and host compilations.
+    if (getLangOpts().CUDA && d && d->hasAttr<HIPManagedAttr>() &&
+        d->hasExternalStorage())
+      llvm_unreachable("NYI");
+  }
+
+  // TODO(cir): address space cast when needed for DAddrSpace.
   return gv;
 }
 
@@ -947,10 +963,6 @@ void CIRGenModule::emitGlobalVarDefinition(const 
clang::VarDecl *vd,
     errorNYI(vd->getSourceRange(), "annotate global variable");
   }
 
-  if (langOpts.CUDA) {
-    errorNYI(vd->getSourceRange(), "CUDA global variable");
-  }
-
   // Set initializer and finalize emission
   CIRGenModule::setInitializer(gv, init);
   if (emitter)
@@ -1563,6 +1575,39 @@ CIRGenModule::getAddrOfConstantStringFromLiteral(const 
StringLiteral *s,
   return builder.getGlobalViewAttr(ptrTy, gv);
 }
 
+LangAS CIRGenModule::getGlobalVarAddressSpace(const VarDecl *d) {
+  if (langOpts.OpenCL) {
+    LangAS as = d ? d->getType().getAddressSpace() : LangAS::opencl_global;
+    assert(as == LangAS::opencl_global || as == LangAS::opencl_global_device ||
+           as == LangAS::opencl_global_host || as == LangAS::opencl_constant ||
+           as == LangAS::opencl_local || as >= 
LangAS::FirstTargetAddressSpace);
+    return as;
+  }
+
+  if (langOpts.SYCLIsDevice &&
+      (!d || d->getType().getAddressSpace() == LangAS::Default))
+    llvm_unreachable("NYI");
+
+  if (langOpts.CUDA && langOpts.CUDAIsDevice) {
+    if (d) {
+      if (d->hasAttr<CUDAConstantAttr>())
+        return LangAS::cuda_constant;
+      if (d->hasAttr<CUDASharedAttr>())
+        return LangAS::cuda_shared;
+      if (d->hasAttr<CUDADeviceAttr>())
+        return LangAS::cuda_device;
+      if (d->getType().isConstQualified())
+        return LangAS::cuda_constant;
+    }
+    return LangAS::cuda_device;
+  }
+
+  if (langOpts.OpenMP)
+    llvm_unreachable("NYI");
+
+  return getTargetCIRGenInfo().getGlobalVarAddressSpace(*this, d);
+}
+
 // TODO(cir): this could be a common AST helper for both CIR and LLVM codegen.
 LangAS CIRGenModule::getLangTempAllocaAddressSpace() const {
   if (getLangOpts().OpenCL)
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h 
b/clang/lib/CIR/CodeGen/CIRGenModule.h
index 52464a8bc30c4..d9173234868ee 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.h
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.h
@@ -359,6 +359,16 @@ class CIRGenModule : public CIRGenTypeCache {
   getAddrOfConstantStringFromLiteral(const StringLiteral *s,
                                      llvm::StringRef name = ".str");
 
+  /// Return the AST address space of the underlying global variable for D, as
+  /// determined by its declaration. Normally this is the same as the address
+  /// space of D's type, but in CUDA, address spaces are associated with
+  /// declarations, not types. If D is nullptr, return the default address
+  /// space for global variable.
+  ///
+  /// For languages without explicit address spaces, if D has default address
+  /// space, target-specific global or constant address space may be returned.
+  LangAS getGlobalVarAddressSpace(const VarDecl *d);
+
   /// Returns the address space for temporary allocations in the language. This
   /// ensures that the allocated variable's address space matches the
   /// expectations of the AST, rather than using the target's allocation 
address
diff --git a/clang/lib/CIR/CodeGen/TargetInfo.cpp 
b/clang/lib/CIR/CodeGen/TargetInfo.cpp
index 2f3824d3d47a7..70ffb46050ea1 100644
--- a/clang/lib/CIR/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CIR/CodeGen/TargetInfo.cpp
@@ -91,3 +91,12 @@ bool TargetCIRGenInfo::isNoProtoCallVariadic(
   // For everything else, we just prefer false unless we opt out.
   return false;
 }
+
+clang::LangAS
+TargetCIRGenInfo::getGlobalVarAddressSpace(CIRGenModule &cgm,
+                                           const clang::VarDecl *d) const {
+  assert(!cgm.getLangOpts().OpenCL &&
+         !(cgm.getLangOpts().CUDA && cgm.getLangOpts().CUDAIsDevice) &&
+         "Address space agnostic languages only");
+  return d ? d->getType().getAddressSpace() : LangAS::Default;
+}
diff --git a/clang/lib/CIR/CodeGen/TargetInfo.h 
b/clang/lib/CIR/CodeGen/TargetInfo.h
index f4792d5309e36..8db2cbbce5d23 100644
--- a/clang/lib/CIR/CodeGen/TargetInfo.h
+++ b/clang/lib/CIR/CodeGen/TargetInfo.h
@@ -49,6 +49,13 @@ class TargetCIRGenInfo {
   /// Returns ABI info helper for the target.
   const ABIInfo &getABIInfo() const { return *info; }
 
+  /// Get target favored AST address space of a global variable for languages
+  /// other than OpenCL and CUDA.
+  /// If \p d is nullptr, returns the default target favored address space
+  /// for global variable.
+  virtual clang::LangAS getGlobalVarAddressSpace(CIRGenModule &cgm,
+                                                 const clang::VarDecl *d) 
const;
+
   /// Get the address space for alloca.
   virtual mlir::ptr::MemorySpaceAttrInterface getCIRAllocaAddressSpace() const 
{
     return cir::LangAddressSpaceAttr::get(&info->cgt.getMLIRContext(),
diff --git a/clang/test/CIR/CodeGenCUDA/address-spaces.cu 
b/clang/test/CIR/CodeGenCUDA/address-spaces.cu
new file mode 100644
index 0000000000000..68905a6616ca7
--- /dev/null
+++ b/clang/test/CIR/CodeGenCUDA/address-spaces.cu
@@ -0,0 +1,78 @@
+#include "Inputs/cuda.h"
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
+// RUN:            -fcuda-is-device -emit-cir -target-sdk-version=12.3 \
+// RUN:            -I%S/Inputs/ %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
+// RUN:            -x cuda -emit-cir -target-sdk-version=12.3 \
+// RUN:            -I%S/Inputs/ %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
+// RUN:            -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
+// RUN:            -I%S/Inputs/ %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
+// RUN:            -x cuda -emit-llvm -target-sdk-version=12.3 \
+// RUN:            -I%S/Inputs/ %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM-HOST --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \
+// RUN:            -x cuda -emit-llvm -target-sdk-version=12.3 \
+// RUN:            -I%S/Inputs/ %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG-HOST --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda \
+// RUN:            -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
+// RUN:            -I%S/Inputs/ %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s
+
+__global__ void fn() {
+  int i = 0;
+  __shared__ int j;
+  j = i;
+}
+
+// CIR-DEVICE: cir.global "private" internal dso_local @_ZZ2fnvE1j : !s32i
+// CIR-DEVICE: cir.func {{.*}}@_Z2fnv() {{.*}} {
+// CIR-DEVICE:   %[[I:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init]
+// CIR-DEVICE:   %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i
+// CIR-DEVICE:   cir.store {{.*}}%[[ZERO]], %[[I]] : !s32i, !cir.ptr<!s32i>
+// CIR-DEVICE:   %[[J:.*]] = cir.get_global @_ZZ2fnvE1j : !cir.ptr<!s32i>
+// CIR-DEVICE:   %[[VAL:.*]] = cir.load {{.*}}%[[I]] : !cir.ptr<!s32i>, !s32i
+// CIR-DEVICE:   cir.store {{.*}}%[[VAL]], %[[J]] : !s32i, !cir.ptr<!s32i>
+// CIR-DEVICE:   cir.return
+
+// CIR-HOST: cir.func private dso_local @__cudaPopCallConfiguration
+// CIR-HOST: cir.func private dso_local @cudaLaunchKernel
+// CIR-HOST: cir.func {{.*}}@_Z17__device_stub__fnv()
+
+// LLVM-DEVICE: @_ZZ2fnvE1j = internal global i32 undef, align 4
+// LLVM-DEVICE: define dso_local void @_Z2fnv()
+// LLVM-DEVICE:   %[[ALLOCA:.*]] = alloca i32, i64 1, align 4
+// LLVM-DEVICE:   store i32 0, ptr %[[ALLOCA]], align 4
+// LLVM-DEVICE:   %[[VAL:.*]] = load i32, ptr %[[ALLOCA]], align 4
+// LLVM-DEVICE:   store i32 %[[VAL]], ptr @_ZZ2fnvE1j, align 4
+// LLVM-DEVICE:   ret void
+
+// LLVM-HOST: %struct.dim3 = type { i32, i32, i32 }
+// LLVM-HOST: declare {{.*}}i32 @__cudaPopCallConfiguration(ptr, ptr, ptr, ptr)
+// LLVM-HOST: declare {{.*}}i32 @cudaLaunchKernel(ptr, %struct.dim3, 
%struct.dim3, ptr, i64, ptr)
+// LLVM-HOST: define dso_local void @_Z17__device_stub__fnv()
+
+// OGCG-HOST: define dso_local void @_Z17__device_stub__fnv()
+// OGCG-HOST: entry:
+// OGCG-HOST:   call i32 @__cudaPopCallConfiguration
+// OGCG-HOST:   call {{.*}}i32 @cudaLaunchKernel
+
+// OGCG-DEVICE: @_ZZ2fnvE1j = internal addrspace(3) global i32 undef, align 4
+// OGCG-DEVICE: define dso_local ptx_kernel void @_Z2fnv()
+// OGCG-DEVICE: entry:
+// OGCG-DEVICE:   %[[I:.*]] = alloca i32, align 4
+// OGCG-DEVICE:   store i32 0, ptr %[[I]], align 4
+// OGCG-DEVICE:   %[[VAL:.*]] = load i32, ptr %[[I]], align 4
+// OGCG-DEVICE:   store i32 %[[VAL]], ptr addrspacecast (ptr addrspace(3) 
@_ZZ2fnvE1j to ptr), align 4
+// OGCG-DEVICE:   ret void
diff --git a/clang/test/CIR/CodeGenCUDA/global-vars.cu 
b/clang/test/CIR/CodeGenCUDA/global-vars.cu
new file mode 100644
index 0000000000000..f497d0e7f5f64
--- /dev/null
+++ b/clang/test/CIR/CodeGenCUDA/global-vars.cu
@@ -0,0 +1,47 @@
+#include "Inputs/cuda.h"
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
+// RUN:            -fcuda-is-device -emit-cir -target-sdk-version=12.3 \
+// RUN:            -I%S/Inputs/ %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
+// RUN:            -x cuda -emit-cir -target-sdk-version=12.3 \
+// RUN:            -I%S/Inputs/ %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
+// RUN:            -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
+// RUN:            -I%S/Inputs/ %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
+// RUN:            -x cuda -emit-llvm -target-sdk-version=12.3 \
+// RUN:            -I%S/Inputs/ %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM-HOST --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda \
+// RUN:            -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
+// RUN:            -I%S/Inputs/ %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \
+// RUN:            -x cuda -emit-llvm -target-sdk-version=12.3 \
+// RUN:            -I%S/Inputs/ %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG-HOST --input-file=%t.ll %s
+
+__shared__ int a;
+// CIR-DEVICE: cir.global external [[SHARED:@.*]] = #cir.int<0> : !s32i 
{alignment = 4 : i64}
+// CIR-HOST: cir.global external [[SHARED_HOST:@.*]] = #cir.int<0> : !s32i 
{alignment = 4 : i64}
+// LLVM-DEVICE: @[[SHARED_LL:.*]] = global i32 0, align 4
+// LLVM-HOST: @[[SHARED_LH:.*]] = global i32 0, align 4
+// OGCG-DEVICE: @[[SHARED_OD:.*]] = addrspace(3) global i32 undef, align 4
+// OGCG-HOST: @[[SHARED_OH:.*]] = internal global i32 undef, align 4
+
+__device__ int b;
+// CIR-DEVICE: cir.global external [[DEV:@.*]] = #cir.int<0> : !s32i 
{alignment = 4 : i64}
+// CIR-HOST: cir.global external [[DEV_HOST:@.*]] = #cir.int<0> : !s32i 
{alignment = 4 : i64}
+// LLVM-DEVICE: @[[DEV_LD:.*]] = global i32 0, align 4
+// LLVM-HOST: @[[DEV_LH:.*]] = global i32 0, align 4
+// OGCG-HOST: @[[DEV_OH:.*]] = internal global i32 undef, align 4
+// OGCG-DEVICE: @[[DEV_OD:.*]] = addrspace(1) externally_initialized global 
i32 0, align 4

>From 512dd3acf7be7d06505243603fa2af9106a2b58b Mon Sep 17 00:00:00 2001
From: ZakyHermawan <[email protected]>
Date: Fri, 6 Mar 2026 02:39:06 +0700
Subject: [PATCH 2/3] [CIR][CUDA] handle __constant__ variable Remove CIR-HOST
 LLVM-HOST and OGCG-HOST from global-vars.cu because shadow variables did not
 handled properly, yet Make few changes to handle __device__, __shared__, and
 __constant__ global variables using reference from OGCG Create and call a
 hook (setTargetAttributes) if the variable is global and declaration only.

Signed-off-by: ZakyHermawan <[email protected]>
---
 .../clang/CIR/Dialect/IR/CIRCUDAAttrs.td      | 14 +++-
 clang/lib/CIR/CodeGen/CIRGenDecl.cpp          | 22 ++++++
 clang/lib/CIR/CodeGen/CIRGenModule.cpp        | 68 ++++++++++++++-----
 clang/lib/CIR/CodeGen/TargetInfo.h            |  9 +++
 .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp |  7 ++
 clang/test/CIR/CodeGenCUDA/global-vars.cu     | 39 ++++-------
 6 files changed, 115 insertions(+), 44 deletions(-)

diff --git a/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td 
b/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td
index cf6635fc893fa..257cf396abce7 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td
@@ -36,5 +36,17 @@ def CIR_CUDAKernelNameAttr : CIR_Attr<"CUDAKernelName", 
"cu.kernel_name"> {
   let assemblyFormat = "`<` $kernel_name `>`";
 }
 
+def CUDAExternallyInitializedAttr : CIR_Attr<"CUDAExternallyInitialized",
+                                             "cu.externally_initialized"> {
+  let summary = "The marked variable is externally initialized.";
+  let description =
+  [{
+    CUDA __device__ and __constant__ variables, along with surface and
+    textures, might be initialized by host, hence "externally initialized".
+    Therefore they must be emitted even if they are not referenced.
+
+    The attribute corresponds to the attribute on LLVM with the same name.
+  }];
+}
 
-#endif // CLANG_CIR_DIALECT_IR_CIRCUDAATTRS_TD
\ No newline at end of file
+#endif // CLANG_CIR_DIALECT_IR_CIRCUDAATTRS_TD
diff --git a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp 
b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
index b19e48d0f51d4..a636c07876964 100644
--- a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
@@ -14,10 +14,12 @@
 #include "CIRGenFunction.h"
 #include "mlir/IR/Location.h"
 #include "clang/AST/Attr.h"
+#include "clang/AST/Attrs.inc"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclOpenACC.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
+#include "clang/Basic/Cuda.h"
 #include "clang/CIR/MissingFeatures.h"
 
 using namespace clang;
@@ -668,6 +670,26 @@ void CIRGenFunction::emitStaticVarDecl(const VarDecl &d,
 
   var.setAlignment(alignment.getAsAlign().value());
 
+  // There are a lot of attributes that need to be handled here. Until
+  // we start to support them, we just report an error if there are any.
+  if (d.hasAttr<AnnotateAttr>())
+    cgm.errorNYI(d.getSourceRange(), "Global annotations are NYI");
+  if (d.getAttr<PragmaClangBSSSectionAttr>())
+    cgm.errorNYI(d.getSourceRange(), "CIR global BSS section attribute is 
NYI");
+  if (d.getAttr<PragmaClangDataSectionAttr>())
+    cgm.errorNYI(d.getSourceRange(),
+                 "CIR global Data section attribute is NYI");
+  if (d.getAttr<PragmaClangRodataSectionAttr>())
+    cgm.errorNYI(d.getSourceRange(),
+                 "CIR global Rodata section attribute is NYI");
+  if (d.getAttr<PragmaClangRelroSectionAttr>())
+    cgm.errorNYI(d.getSourceRange(),
+                 "CIR global Relro section attribute is NYI");
+
+  if (d.getAttr<SectionAttr>())
+    cgm.errorNYI(d.getSourceRange(),
+                 "CIR global object file section attribute is NYI");
+
   if (cgm.getCodeGenOpts().KeepPersistentStorageVariables)
     cgm.errorNYI(d.getSourceRange(), "static var keep persistent storage");
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp 
b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index 1517058af8782..bd4d2d4e5c1a5 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -18,6 +18,7 @@
 
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/ASTLambda.h"
+#include "clang/AST/Attrs.inc"
 #include "clang/AST/DeclBase.h"
 #include "clang/AST/DeclOpenACC.h"
 #include "clang/AST/GlobalDecl.h"
@@ -797,22 +798,19 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, 
mlir::Type ty,
                "external const declaration with initializer");
   }
 
-  // TODO(cir): if this method is used to handle functions we must have
-  // something closer to GlobalValue::isDeclaration instead of checking for
-  // initializer.
-  if (gv.isDeclaration()) {
+  if (d &&
+      d->isThisDeclarationADefinition(astContext) == VarDecl::DeclarationOnly) 
{
+    getTargetCIRGenInfo().setTargetAttributes(d, gv, *this);
     // TODO(cir): set target attributes
-
-    // External HIP managed variables needed to be recorded for transformation
-    // in both device and host compilations.
     // External HIP managed variables needed to be recorded for transformation
     // in both device and host compilations.
     if (getLangOpts().CUDA && d && d->hasAttr<HIPManagedAttr>() &&
         d->hasExternalStorage())
-      llvm_unreachable("NYI");
+      errorNYI(d->getSourceRange(), "HIP managed attribute");
   }
 
   // TODO(cir): address space cast when needed for DAddrSpace.
+  assert(!cir::MissingFeatures::addressSpace());
   return gv;
 }
 
@@ -896,9 +894,18 @@ void CIRGenModule::emitGlobalVarDefinition(const 
clang::VarDecl *vd,
 
   assert(!cir::MissingFeatures::cudaSupport());
 
-  if (vd->hasAttr<LoaderUninitializedAttr>()) {
+  // CUDA E.2.4.1 "__shared__ variables cannot have an initialization
+  // as part of their declaration."  Sema has already checked for
+  // error cases, so we just need to set Init to UndefValue.
+  bool isCUDASharedVar =
+      getLangOpts().CUDAIsDevice && vd->hasAttr<CUDASharedAttr>();
+  // TODO(cir): implement isCUDAShadowVar and isCUDADeviceShadowVar, reference:
+  // OGCG
+
+  if (getLangOpts().CUDA && isCUDASharedVar) {
+    init = cir::UndefAttr::get(&getMLIRContext(), convertType(vd->getType()));
+  } else if (vd->hasAttr<LoaderUninitializedAttr>()) {
     errorNYI(vd->getSourceRange(), "loader uninitialized attribute");
-    return;
   } else if (!initExpr) {
     // This is a tentative definition; tentative definitions are
     // implicitly initialized with { 0 }.
@@ -963,6 +970,39 @@ void CIRGenModule::emitGlobalVarDefinition(const 
clang::VarDecl *vd,
     errorNYI(vd->getSourceRange(), "annotate global variable");
   }
 
+  // Set CIR's linkage type as appropriate.
+  cir::GlobalLinkageKind linkage =
+      getCIRLinkageVarDefinition(vd, /*IsConstant=*/false);
+
+  // CUDA B.2.1 "The __device__ qualifier declares a variable that resides on
+  // the device. [...]"
+  // CUDA B.2.2 "The __constant__ qualifier, optionally used together with
+  // __device__, declares a variable that: [...]
+  // Is accessible from all the threads within the grid and from the host
+  // through the runtime library (cudaGetSymbolAddress() / cudaGetSymbolSize()
+  // / cudaMemcpyToSymbol() / cudaMemcpyFromSymbol())."
+  if (langOpts.CUDA) {
+    if (langOpts.CUDAIsDevice) {
+      // __shared__ variables is not marked as externally initialized,
+      // because they must not be initialized.
+      if (linkage != cir::GlobalLinkageKind::InternalLinkage &&
+          !vd->isConstexpr() && !vd->getType().isConstQualified() &&
+          (vd->hasAttr<CUDADeviceAttr>() || vd->hasAttr<CUDAConstantAttr>() ||
+           vd->getType()->isCUDADeviceBuiltinSurfaceType() ||
+           vd->getType()->isCUDADeviceBuiltinTextureType())) {
+        gv->setAttr(cir::CUDAExternallyInitializedAttr::getMnemonic(),
+                    
cir::CUDAExternallyInitializedAttr::get(&getMLIRContext()));
+      }
+    } else {
+      // TODO(cir):
+      // Adjust linkage of shadow variables in host compilation
+      // getCUDARuntime().internalizeDeviceSideVar(vd, linkage);
+    }
+    // TODO(cir):
+    // Handle variable registration
+    // getCUDARuntime().handleVarRegistration(vd, gv);
+  }
+
   // Set initializer and finalize emission
   CIRGenModule::setInitializer(gv, init);
   if (emitter)
@@ -977,10 +1017,6 @@ void CIRGenModule::emitGlobalVarDefinition(const 
clang::VarDecl *vd,
                                                   /*ExcludeDtor=*/true)));
   assert(!cir::MissingFeatures::opGlobalSection());
 
-  // Set CIR's linkage type as appropriate.
-  cir::GlobalLinkageKind linkage =
-      getCIRLinkageVarDefinition(vd, /*IsConstant=*/false);
-
   // Set CIR linkage and DLL storage class.
   gv.setLinkage(linkage);
   // FIXME(cir): setLinkage should likely set MLIR's visibility automatically.
@@ -1586,7 +1622,7 @@ LangAS CIRGenModule::getGlobalVarAddressSpace(const 
VarDecl *d) {
 
   if (langOpts.SYCLIsDevice &&
       (!d || d->getType().getAddressSpace() == LangAS::Default))
-    llvm_unreachable("NYI");
+    errorNYI(d->getSourceRange(), "global as for SYCL device");
 
   if (langOpts.CUDA && langOpts.CUDAIsDevice) {
     if (d) {
@@ -1603,7 +1639,7 @@ LangAS CIRGenModule::getGlobalVarAddressSpace(const 
VarDecl *d) {
   }
 
   if (langOpts.OpenMP)
-    llvm_unreachable("NYI");
+    errorNYI(d->getSourceRange(), "global as for OpenMP");
 
   return getTargetCIRGenInfo().getGlobalVarAddressSpace(*this, d);
 }
diff --git a/clang/lib/CIR/CodeGen/TargetInfo.h 
b/clang/lib/CIR/CodeGen/TargetInfo.h
index 8db2cbbce5d23..9ba155b220fbc 100644
--- a/clang/lib/CIR/CodeGen/TargetInfo.h
+++ b/clang/lib/CIR/CodeGen/TargetInfo.h
@@ -106,6 +106,15 @@ class TargetCIRGenInfo {
   /// right thing when calling a function with no know signature.
   virtual bool isNoProtoCallVariadic(const FunctionNoProtoType *fnType) const;
 
+  /// Provides a convenient hook to handle extra target-specific attributes
+  /// for the given global.
+  /// In OG, the function receives an llvm::GlobalValue. However, functions
+  /// and global variables are separate types in Clang IR, so we use a general
+  /// mlir::Operation*.
+  virtual void setTargetAttributes(const clang::Decl *decl,
+                                   mlir::Operation *global,
+                                   CIRGenModule &module) const {}
+
   virtual bool isScalarizableAsmOperand(CIRGenFunction &cgf,
                                         mlir::Type ty) const {
     return false;
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp 
b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 03085ad29ab78..eda07dab4d97b 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -2557,6 +2557,13 @@ mlir::LogicalResult 
CIRToLLVMGlobalOpLowering::matchAndRewrite(
   const StringRef symbol = op.getSymName();
   SmallVector<mlir::NamedAttribute> attributes;
 
+  // Mark externally_initialized for __device__ and __constant__
+  if (auto extInit =
+          op->getAttr(CUDAExternallyInitializedAttr::getMnemonic())) {
+    attributes.push_back(rewriter.getNamedAttr("externally_initialized",
+                                               rewriter.getUnitAttr()));
+  }
+
   if (init.has_value()) {
     if (mlir::isa<cir::FPAttr, cir::IntAttr, cir::BoolAttr>(init.value())) {
       GlobalInitAttrRewriter initRewriter(llvmType, rewriter);
diff --git a/clang/test/CIR/CodeGenCUDA/global-vars.cu 
b/clang/test/CIR/CodeGenCUDA/global-vars.cu
index f497d0e7f5f64..4791f145d1bae 100644
--- a/clang/test/CIR/CodeGenCUDA/global-vars.cu
+++ b/clang/test/CIR/CodeGenCUDA/global-vars.cu
@@ -5,43 +5,28 @@
 // RUN:            -I%S/Inputs/ %s -o %t.cir
 // RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s
 
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
-// RUN:            -x cuda -emit-cir -target-sdk-version=12.3 \
-// RUN:            -I%S/Inputs/ %s -o %t.cir
-// RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s
-
 // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
 // RUN:            -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
 // RUN:            -I%S/Inputs/ %s -o %t.ll
 // RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s
 
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
-// RUN:            -x cuda -emit-llvm -target-sdk-version=12.3 \
-// RUN:            -I%S/Inputs/ %s -o %t.ll
-// RUN: FileCheck --check-prefix=LLVM-HOST --input-file=%t.ll %s
-
 // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda \
 // RUN:            -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
 // RUN:            -I%S/Inputs/ %s -o %t.ll
 // RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s
 
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \
-// RUN:            -x cuda -emit-llvm -target-sdk-version=12.3 \
-// RUN:            -I%S/Inputs/ %s -o %t.ll
-// RUN: FileCheck --check-prefix=OGCG-HOST --input-file=%t.ll %s
 
-__shared__ int a;
-// CIR-DEVICE: cir.global external [[SHARED:@.*]] = #cir.int<0> : !s32i 
{alignment = 4 : i64}
-// CIR-HOST: cir.global external [[SHARED_HOST:@.*]] = #cir.int<0> : !s32i 
{alignment = 4 : i64}
-// LLVM-DEVICE: @[[SHARED_LL:.*]] = global i32 0, align 4
-// LLVM-HOST: @[[SHARED_LH:.*]] = global i32 0, align 4
+__device__ int a;
+// CIR-DEVICE: cir.global external @[[DEV:.*]] = #cir.int<0> : !s32i 
{alignment = 4 : i64, cu.externally_initialized = 
#cir.cu.externally_initialized}
+// LLVM-DEVICE: @[[DEV_LD:.*]] = externally_initialized global i32 0, align 4
+// OGCG-DEVICE: @[[DEV_OD:.*]] = addrspace(1) externally_initialized global 
i32 0, align 4
+
+__shared__ int b;
+// CIR-DEVICE: cir.global external @[[SHARED:.*]] = #cir.undef : !s32i 
{alignment = 4 : i64}
+// LLVM-DEVICE: @[[SHARED_LL:.*]] = global i32 undef, align 4
 // OGCG-DEVICE: @[[SHARED_OD:.*]] = addrspace(3) global i32 undef, align 4
-// OGCG-HOST: @[[SHARED_OH:.*]] = internal global i32 undef, align 4
 
-__device__ int b;
-// CIR-DEVICE: cir.global external [[DEV:@.*]] = #cir.int<0> : !s32i 
{alignment = 4 : i64}
-// CIR-HOST: cir.global external [[DEV_HOST:@.*]] = #cir.int<0> : !s32i 
{alignment = 4 : i64}
-// LLVM-DEVICE: @[[DEV_LD:.*]] = global i32 0, align 4
-// LLVM-HOST: @[[DEV_LH:.*]] = global i32 0, align 4
-// OGCG-HOST: @[[DEV_OH:.*]] = internal global i32 undef, align 4
-// OGCG-DEVICE: @[[DEV_OD:.*]] = addrspace(1) externally_initialized global 
i32 0, align 4
+__constant__ int c;
+// CIR-DEVICE: cir.global constant external @[[CONST:.*]] = #cir.int<0> : 
!s32i {alignment = 4 : i64, cu.externally_initialized = 
#cir.cu.externally_initialized}
+// LLVM-DEVICE: @[[CONST_LL:.*]] = externally_initialized constant i32 0, 
align 4
+// OGCG-DEVICE: @[[CONST_OD:.*]] = addrspace(4) externally_initialized 
constant i32 0, align 4

>From 42105b08feed53b5e637b5e04a550dde21b594f4 Mon Sep 17 00:00:00 2001
From: ZakyHermawan <[email protected]>
Date: Fri, 6 Mar 2026 04:01:25 +0700
Subject: [PATCH 3/3] [CIR][CUDA][NFC] Remove unnecessary comment

Signed-off-by: ZakyHermawan <[email protected]>
---
 clang/lib/CIR/CodeGen/CIRGenModule.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp 
b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index bd4d2d4e5c1a5..2da4ed7b79da2 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -809,7 +809,6 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, 
mlir::Type ty,
       errorNYI(d->getSourceRange(), "HIP managed attribute");
   }
 
-  // TODO(cir): address space cast when needed for DAddrSpace.
   assert(!cir::MissingFeatures::addressSpace());
   return gv;
 }

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to