This revision was not accepted when it landed; it landed in state "Needs 
Review".
This revision was automatically updated to reflect the committed changes.
Closed by commit rG45f2a56856e2: [CUDA][HIP] Support accessing static device 
variable in host code for -fno-gpu… (authored by yaxunl).
Herald added a project: clang.

Changed prior to commit:
  https://reviews.llvm.org/D80858?vs=282952&id=283202#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D80858/new/

https://reviews.llvm.org/D80858

Files:
  clang/include/clang/AST/ASTContext.h
  clang/lib/AST/ASTContext.cpp
  clang/lib/Sema/SemaExpr.cpp
  clang/test/CodeGenCUDA/constexpr-variables.cu
  clang/test/CodeGenCUDA/static-device-var-no-rdc.cu

Index: clang/test/CodeGenCUDA/static-device-var-no-rdc.cu
===================================================================
--- /dev/null
+++ clang/test/CodeGenCUDA/static-device-var-no-rdc.cu
@@ -0,0 +1,94 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
+// RUN:   -emit-llvm -o - -x hip %s | FileCheck \
+// RUN:   -check-prefixes=DEV %s
+
+// RUN: %clang_cc1 -triple x86_64-gnu-linux \
+// RUN:   -emit-llvm -o - -x hip %s | FileCheck \
+// RUN:   -check-prefixes=HOST %s
+
+#include "Inputs/cuda.h"
+
+// Test function scope static device variable, which should not be externalized.
+// DEV-DAG: @_ZZ6kernelPiPPKiE1w = internal addrspace(4) constant i32 1
+
+// Check a static device variable referenced by host function is externalized.
+// DEV-DAG: @_ZL1x = addrspace(1) externally_initialized global i32 0
+// HOST-DAG: @_ZL1x = internal global i32 undef
+// HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x\00"
+
+static __device__ int x;
+
+// Check a static device variables referenced only by device functions and kernels
+// is not externalized.
+// DEV-DAG: @_ZL2x2 = internal addrspace(1) global i32 0
+static __device__ int x2;
+
+// Check a static device variable referenced by host device function is externalized.
+// DEV-DAG: @_ZL2x3 = addrspace(1) externally_initialized global i32 0
+static __device__ int x3;
+
+// Check a static device variable referenced in file scope is externalized.
+// DEV-DAG: @_ZL2x4 = addrspace(1) externally_initialized global i32 0
+static __device__ int x4;
+int& x4_ref = x4;
+
+// Check a static device variable in anonymous namespace.
+// DEV-DAG: @_ZN12_GLOBAL__N_12x5E = addrspace(1) externally_initialized global i32 0
+namespace {
+static __device__ int x5;
+}
+
+// Check a static constant variable referenced by host is externalized.
+// DEV-DAG: @_ZL1y = addrspace(4) externally_initialized global i32 0
+// HOST-DAG: @_ZL1y = internal global i32 undef
+// HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y\00"
+
+static __constant__ int y;
+
+// Test static host variable, which should not be externalized nor registered.
+// HOST-DAG: @_ZL1z = internal global i32 0
+// DEV-NOT: @_ZL1z
+static int z;
+
+// Test static device variable in inline function, which should not be
+// externalized nor registered.
+// DEV-DAG: @_ZZ6devfunPPKiE1p = linkonce_odr addrspace(4) constant i32 2, comdat
+
+inline __device__ void devfun(const int ** b) {
+  const static int p = 2;
+  b[0] = &p;
+  b[1] = &x2;
+}
+
+__global__ void kernel(int *a, const int **b) {
+  const static int w = 1;
+  a[0] = x;
+  a[1] = y;
+  a[2] = x2;
+  a[3] = x3;
+  a[4] = x4;
+  a[5] = x5;
+  b[0] = &w;
+  devfun(b);
+}
+
+__host__ __device__ void hdf(int *a) {
+  a[0] = x3;
+}
+
+int* getDeviceSymbol(int *x);
+
+void foo(int *a) {
+  getDeviceSymbol(&x);
+  getDeviceSymbol(&x5);
+  getDeviceSymbol(&y);
+  z = 123;
+}
+
+// HOST: __hipRegisterVar({{.*}}@_ZL1x {{.*}}@[[DEVNAMEX]]
+// HOST: __hipRegisterVar({{.*}}@_ZL1y {{.*}}@[[DEVNAMEY]]
+// HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6kernelPiPPKiE1w
+// HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6devfunPPKiE1p
Index: clang/test/CodeGenCUDA/constexpr-variables.cu
===================================================================
--- clang/test/CodeGenCUDA/constexpr-variables.cu
+++ clang/test/CodeGenCUDA/constexpr-variables.cu
@@ -19,7 +19,7 @@
   // CXX14: @_ZN1Q2k2E = {{.*}}externally_initialized constant i32 6
   // CXX17: @_ZN1Q2k2E = internal {{.*}}constant i32 6
   // CXX14: @_ZN1Q2k1E = available_externally {{.*}}constant i32 5
-  // CXX17: @_ZN1Q2k1E = linkonce_odr {{.*}}constant i32 5
+  // CXX17: @_ZN1Q2k1E = {{.*}} externally_initialized constant i32 5
   static constexpr int k1 = 5;
   static constexpr int k2 = 6;
 };
@@ -30,14 +30,14 @@
 
 template<typename T> struct X {
   // CXX14: @_ZN1XIiE1aE = available_externally {{.*}}constant i32 123
-  // CXX17: @_ZN1XIiE1aE = linkonce_odr {{.*}}constant i32 123
+  // CXX17: @_ZN1XIiE1aE = {{.*}}externally_initialized constant i32 123
   static constexpr int a = 123;
 };
 __constant__ const int &use_X_a = X<int>::a;
 
 template <typename T, T a, T b> struct A {
   // CXX14: @_ZN1AIiLi1ELi2EE1xE = available_externally {{.*}}constant i32 2
-  // CXX17: @_ZN1AIiLi1ELi2EE1xE = linkonce_odr {{.*}}constant i32 2
+  // CXX17: @_ZN1AIiLi1ELi2EE1xE = {{.*}}externally_initialized constant i32 2
   constexpr static T x = a * b;
 };
 __constant__ const int &y = A<int, 1, 2>::x;
Index: clang/lib/Sema/SemaExpr.cpp
===================================================================
--- clang/lib/Sema/SemaExpr.cpp
+++ clang/lib/Sema/SemaExpr.cpp
@@ -17864,6 +17864,25 @@
   if (Var->isInvalidDecl())
     return;
 
+  // Record a CUDA/HIP static device/constant variable if it is referenced
+  // by host code. This is done conservatively, when the variable is referenced
+  // in any of the following contexts:
+  //   - a non-function context
+  //   - a host function
+  //   - a host device function
+  // This also requires the reference of the static device/constant variable by
+  // host code to be visible in the device compilation for the compiler to be
+  // able to externalize the static device/constant variable.
+  if ((Var->hasAttr<CUDADeviceAttr>() || Var->hasAttr<CUDAConstantAttr>()) &&
+      Var->isFileVarDecl() && Var->getStorageClass() == SC_Static) {
+    auto *CurContext = SemaRef.CurContext;
+    if (!CurContext || !isa<FunctionDecl>(CurContext) ||
+        cast<FunctionDecl>(CurContext)->hasAttr<CUDAHostAttr>() ||
+        (!cast<FunctionDecl>(CurContext)->hasAttr<CUDADeviceAttr>() &&
+         !cast<FunctionDecl>(CurContext)->hasAttr<CUDAGlobalAttr>()))
+      SemaRef.getASTContext().CUDAStaticDeviceVarReferencedByHost.insert(Var);
+  }
+
   auto *MSI = Var->getMemberSpecializationInfo();
   TemplateSpecializationKind TSK = MSI ? MSI->getTemplateSpecializationKind()
                                        : Var->getTemplateSpecializationKind();
Index: clang/lib/AST/ASTContext.cpp
===================================================================
--- clang/lib/AST/ASTContext.cpp
+++ clang/lib/AST/ASTContext.cpp
@@ -10325,12 +10325,17 @@
   } else if (D->hasAttr<DLLExportAttr>()) {
     if (L == GVA_DiscardableODR)
       return GVA_StrongODR;
-  } else if (Context.getLangOpts().CUDA && Context.getLangOpts().CUDAIsDevice &&
-             D->hasAttr<CUDAGlobalAttr>()) {
+  } else if (Context.getLangOpts().CUDA && Context.getLangOpts().CUDAIsDevice) {
     // Device-side functions with __global__ attribute must always be
     // visible externally so they can be launched from host.
-    if (L == GVA_DiscardableODR || L == GVA_Internal)
+    if (D->hasAttr<CUDAGlobalAttr>() &&
+        (L == GVA_DiscardableODR || L == GVA_Internal))
       return GVA_StrongODR;
+    // Single source offloading languages like CUDA/HIP need to be able to
+    // access static device variables from host code of the same compilation
+    // unit. This is done by externalizing the static variable.
+    if (Context.shouldExternalizeStaticVar(D))
+      return GVA_StrongExternal;
   }
   return L;
 }
@@ -11185,3 +11190,11 @@
     return DB << Section.Decl;
   return DB << "a prior #pragma section";
 }
+
+bool ASTContext::shouldExternalizeStaticVar(const Decl *D) const {
+  return !getLangOpts().GPURelocatableDeviceCode &&
+         (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>()) &&
+         isa<VarDecl>(D) && cast<VarDecl>(D)->isFileVarDecl() &&
+         cast<VarDecl>(D)->getStorageClass() == SC_Static &&
+         CUDAStaticDeviceVarReferencedByHost.count(cast<VarDecl>(D));
+}
Index: clang/include/clang/AST/ASTContext.h
===================================================================
--- clang/include/clang/AST/ASTContext.h
+++ clang/include/clang/AST/ASTContext.h
@@ -43,6 +43,7 @@
 #include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"
@@ -999,6 +1000,9 @@
   // Implicitly-declared type 'struct _GUID'.
   mutable TagDecl *MSGuidTagDecl = nullptr;
 
+  /// Keep track of CUDA/HIP static device variables referenced by host code.
+  llvm::DenseSet<const VarDecl *> CUDAStaticDeviceVarReferencedByHost;
+
   ASTContext(LangOptions &LOpts, SourceManager &SM, IdentifierTable &idents,
              SelectorTable &sels, Builtin::Context &builtins);
   ASTContext(const ASTContext &) = delete;
@@ -3030,6 +3034,9 @@
   /// Return a new OMPTraitInfo object owned by this context.
   OMPTraitInfo &getNewOMPTraitInfo();
 
+  /// Whether a C++ static variable should be externalized.
+  bool shouldExternalizeStaticVar(const Decl *D) const;
+
 private:
   /// All OMPTraitInfo objects live in this collection, one per
   /// `pragma omp [begin] declare variant` directive.
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to