yaxunl created this revision.
yaxunl added a reviewer: tra.
yaxunl requested review of this revision.

Recently we added diagnosing ODR-use of host variables
in device functions, which includes ODR-use of const
host variables since they are not really emitted on
device side. This caused regressions since we used
to allow ODR-use of const host variables in device
functions.

This patch allows ODR-use of const variables in device
functions if the const variables can be statically initialized
and have an empty dtor. Such variables are marked with
implicit constant attrs and emitted on device side. This is
in line with what clang does for constexpr variables.


https://reviews.llvm.org/D103108

Files:
  clang/lib/Sema/SemaCUDA.cpp
  clang/lib/Sema/SemaDecl.cpp
  clang/test/CodeGenCUDA/device-use-host-var.cu
  clang/test/SemaCUDA/device-use-host-var.cu

Index: clang/test/SemaCUDA/device-use-host-var.cu
===================================================================
--- clang/test/SemaCUDA/device-use-host-var.cu
+++ clang/test/SemaCUDA/device-use-host-var.cu
@@ -5,6 +5,8 @@
 
 #include "Inputs/cuda.h"
 
+int func();
+
 struct A {
   int x;
   static int host_var;
@@ -16,6 +18,19 @@
   int host_var;
 }
 
+// struct with non-empty ctor.
+struct B1 {
+  int x;
+  B1() { x = 1; }
+};
+
+// struct with non-empty dtor.
+struct B2 {
+  int x;
+  B2() {}
+  ~B2() { x = 0; }
+};
+
 static int static_host_var;
 
 __device__ int global_dev_var;
@@ -34,6 +49,17 @@
 const A global_const_struct_var{1};
 constexpr A global_constexpr_struct_var{1};
 
+// Check const host var initialized with non-empty ctor is not allowed in
+// device function.
+const B1 b1;
+
+// Check const host var having non-empty dtor is not allowed in device function.
+const B2 b2;
+
+// Check const host var initialized by non-constant initializer is not allowed
+// in device function.
+const int b3 = func();
+
 template<typename F>
 __global__ void kernel(F f) { f(); } // dev-note2 {{called by 'kernel<(lambda}}
 
@@ -53,11 +79,14 @@
   *out = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __device__ function}}
   *out = global_const_var;
   *out = global_constexpr_var;
+  *out = b1.x; // dev-error {{reference to __host__ variable 'b1' in __device__ function}}
+  *out = b2.x; // dev-error {{reference to __host__ variable 'b2' in __device__ function}}
+  *out = b3; // dev-error {{reference to __host__ variable 'b3' in __device__ function}}
   global_host_var = 1; // dev-error {{reference to __host__ variable 'global_host_var' in __device__ function}}
 
   // Check reference of non-constexpr host variables are not allowed.
   int &ref_host_var = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __device__ function}}
-  const int &ref_const_var = global_const_var; // dev-error {{reference to __host__ variable 'global_const_var' in __device__ function}}
+  const int &ref_const_var = global_const_var;
   const int &ref_constexpr_var = global_constexpr_var;
   *out = ref_host_var;
   *out = ref_constexpr_var;
@@ -65,18 +94,18 @@
 
   // Check access member of non-constexpr struct type host variable is not allowed.
   *out = global_host_struct_var.x; // dev-error {{reference to __host__ variable 'global_host_struct_var' in __device__ function}}
-  *out = global_const_struct_var.x; // dev-error {{reference to __host__ variable 'global_const_struct_var' in __device__ function}}
+  *out = global_const_struct_var.x;
   *out = global_constexpr_struct_var.x;
   global_host_struct_var.x = 1; // dev-error {{reference to __host__ variable 'global_host_struct_var' in __device__ function}}
 
   // Check address taking of non-constexpr host variables is not allowed.
   int *p = &global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __device__ function}}
-  const int *cp = &global_const_var; // dev-error {{reference to __host__ variable 'global_const_var' in __device__ function}}
+  const int *cp = &global_const_var;
   const int *cp2 = &global_constexpr_var;
 
   // Check access elements of non-constexpr host array is not allowed.
   *out = global_host_array[1]; // dev-error {{reference to __host__ variable 'global_host_array' in __device__ function}}
-  *out = global_const_array[1]; // dev-error {{reference to __host__ variable 'global_const_array' in __device__ function}}
+  *out = global_const_array[1];
   *out = global_constexpr_array[1];
 
   // Check ODR-use of host variables in namespace is not allowed.
@@ -103,7 +132,7 @@
   int &ref_constant_var = global_constant_var;
   int &ref_shared_var = global_shared_var;
   const int &ref_constexpr_var = global_constexpr_var;
-  const int &ref_const_var = global_const_var; // dev-error {{reference to __host__ variable 'global_const_var' in __global__ function}}
+  const int &ref_const_var = global_const_var;
 
   *out = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __global__ function}}
   *out = global_dev_var;
@@ -126,7 +155,7 @@
   int &ref_constant_var = global_constant_var;
   int &ref_shared_var = global_shared_var;
   const int &ref_constexpr_var = global_constexpr_var;
-  const int &ref_const_var = global_const_var; // dev-error {{reference to __host__ variable 'global_const_var' in __host__ __device__ function}}
+  const int &ref_const_var = global_const_var;
 
   *out = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __host__ __device__ function}}
   *out = global_dev_var;
@@ -173,7 +202,7 @@
   int &ref_constant_var = global_constant_var;
   int &ref_shared_var = global_shared_var;
   const int &ref_constexpr_var = global_constexpr_var;
-  const int &ref_const_var = global_const_var; // dev-error {{reference to __host__ variable 'global_const_var' in __host__ __device__ function}}
+  const int &ref_const_var = global_const_var;
 
   *out = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __host__ __device__ function}}
                           // dev-error@-1 {{capture host variable 'out' by reference in device or host device lambda function}}
@@ -199,7 +228,7 @@
   int &ref_constant_var = global_constant_var;
   int &ref_shared_var = global_shared_var;
   const int &ref_constexpr_var = global_constexpr_var;
-  const int &ref_const_var = global_const_var; // dev-error {{reference to __host__ variable 'global_const_var' in __host__ __device__ function}}
+  const int &ref_const_var = global_const_var;
 
   *out = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __host__ __device__ function}}
   *out = global_dev_var;
Index: clang/test/CodeGenCUDA/device-use-host-var.cu
===================================================================
--- clang/test/CodeGenCUDA/device-use-host-var.cu
+++ clang/test/CodeGenCUDA/device-use-host-var.cu
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -std=c++11 -triple amdgcn-amd-amdhsa \
 // RUN:   -fcuda-is-device -emit-llvm -o - -x hip %s | FileCheck %s
+// RUN: %clang_cc1 -std=c++11 -triple amdgcn-amd-amdhsa \
+// RUN:   -fcuda-is-device -emit-llvm -o - -x hip %s | FileCheck -check-prefix=NEG %s
 
 #include "Inputs/cuda.h"
 
@@ -12,29 +14,49 @@
 constexpr A constexpr_array[4] = {0, 0, 0, 3};
 constexpr char constexpr_str[] = "abcd";
 const int const_var = 4;
+const A const_struct{5};
+const A const_array[] = {0, 0, 0, 6};
+const char const_str[] = "xyz";
 
 // CHECK-DAG: @_ZL13constexpr_str.const = private unnamed_addr addrspace(4) constant [5 x i8] c"abcd\00"
 // CHECK-DAG: @_ZL13constexpr_var = internal addrspace(4) constant i32 1
 // CHECK-DAG: @_ZL16constexpr_struct = internal addrspace(4) constant %struct.A { i32 2 }
 // CHECK-DAG: @_ZL15constexpr_array = internal addrspace(4) constant [4 x %struct.A] [%struct.A zeroinitializer, %struct.A zeroinitializer, %struct.A zeroinitializer, %struct.A { i32 3 }]
-// CHECK-NOT: external
+// CHECK-DAG: @_ZL9const_var = internal addrspace(4) constant i32 4
+// CHECK-DAG: @_ZL12const_struct = internal addrspace(4) constant %struct.A { i32 5 }
+// CHECK-DAG: @_ZL11const_array = internal addrspace(4) constant [4 x %struct.A] [%struct.A zeroinitializer, %struct.A zeroinitializer, %struct.A zeroinitializer, %struct.A { i32 6 }]
+// CHECK-DAG: @_ZL9const_str = internal addrspace(4) constant [4 x i8] c"xyz\00"
+
+// NEG-NOT: external
 
 // CHECK-LABEL: define{{.*}}@_Z7dev_funPiPPKi
 // CHECK: store i32 1
 // CHECK: store i32 2
 // CHECK: store i32 3
-// CHECK: store i32 4
 // CHECK: load i8, i8* getelementptr {{.*}} @_ZL13constexpr_str.const
+// CHECK: store i32 4
+// CHECK: store i32 5
+// CHECK: store i32 6
+// CHECK: load i8, i8* getelementptr {{.*}} @_ZL9const_str
 // CHECK: store i32* {{.*}}@_ZL13constexpr_var
 // CHECK: store i32* getelementptr {{.*}} @_ZL16constexpr_struct
 // CHECK: store i32* getelementptr {{.*}} @_ZL15constexpr_array
+// CHECK: store i32* {{.*}}@_ZL9const_var
+// CHECK: store i32* getelementptr {{.*}} @_ZL12const_struct
+// CHECK: store i32* getelementptr {{.*}} @_ZL11const_array
 __device__ void dev_fun(int *out, const int **out2) {
   *out = constexpr_var;
   *out = constexpr_struct.x;
   *out = constexpr_array[3].x;
-  *out = const_var;
   *out = constexpr_str[3];
+  *out = const_var;
+  *out = const_struct.x;
+  *out = const_array[3].x;
+  *out = const_str[3];
   *out2 = &constexpr_var;
   *out2 = &constexpr_struct.x;
   *out2 = &constexpr_array[3].x;
+  *out2 = &const_var;
+  *out2 = &const_struct.x;
+  *out2 = &const_array[3].x;
 }
Index: clang/lib/Sema/SemaDecl.cpp
===================================================================
--- clang/lib/Sema/SemaDecl.cpp
+++ clang/lib/Sema/SemaDecl.cpp
@@ -12957,6 +12957,8 @@
 void Sema::CheckCompleteVariableDeclaration(VarDecl *var) {
   if (var->isInvalidDecl()) return;
 
+  MaybeAddCUDAConstantAttr(var);
+
   if (getLangOpts().OpenCL) {
     // OpenCL v2.0 s6.12.5 - Every block variable declaration must have an
     // initialiser
Index: clang/lib/Sema/SemaCUDA.cpp
===================================================================
--- clang/lib/Sema/SemaCUDA.cpp
+++ clang/lib/Sema/SemaCUDA.cpp
@@ -147,6 +147,9 @@
     return CVT_Unified;
   if (Var->isConstexpr() && !hasExplicitAttr<CUDAConstantAttr>(Var))
     return CVT_Both;
+  if (Var->getType().isConstQualified() && Var->hasAttr<CUDAConstantAttr>() &&
+      !hasExplicitAttr<CUDAConstantAttr>(Var))
+    return CVT_Both;
   if (Var->hasAttr<CUDADeviceAttr>() || Var->hasAttr<CUDAConstantAttr>() ||
       Var->hasAttr<CUDASharedAttr>() ||
       Var->getType()->isCUDADeviceBuiltinSurfaceType() ||
@@ -549,47 +552,72 @@
   return true;
 }
 
+namespace {
+enum CUDAInitializerCheckKind {
+  CICK_DeviceOrConstant, // Check initializer for device/constant variable
+  CICK_Shared,           // Check initializer for shared variable
+};
+
+// Check whether a variable has an allowed initializer for a CUDA device side
+// variable with global storage. \p VD may be a host variable to be checked for
+// potential promotion to device side variable.
+//
+// CUDA/HIP allows only empty constructors as initializers for global
+// variables (see E.2.3.1, CUDA 7.5). The same restriction also applies to all
+// __shared__ variables whether they are local or not (they all are implicitly
+// static in CUDA). One exception is that CUDA allows constant initializers
+// for __constant__ and __device__ variables.
+bool HasAllowedCUDADeviceStaticInitializer(Sema &S, VarDecl *VD,
+                                           CUDAInitializerCheckKind CheckKind) {
+  assert(!VD->isInvalidDecl() && VD->hasGlobalStorage());
+  const Expr *Init = VD->getInit();
+  auto IsEmptyInit = [&](const Expr *Init) {
+    if (!Init)
+      return true;
+    if (const auto *CE = dyn_cast<CXXConstructExpr>(Init)) {
+      return S.isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor());
+    }
+    return false;
+  };
+  // isConstantInitializer cannot be called with dependent value, therefore
+  // we skip checking dependent value here. This is OK since
+  // IsAllowedCUDAStaticInitializer is called again when the template is
+  // instantiated.
+  auto IsDependentOrConstantInit = [&](const Expr *Init) {
+    assert(Init);
+    return VD->getType()->isDependentType() || Init->isValueDependent() ||
+           Init->isConstantInitializer(S.Context,
+                                       VD->getType()->isReferenceType());
+  };
+  auto HasEmptyDtor = [&](VarDecl *VD) {
+    if (const auto *RD = VD->getType()->getAsCXXRecordDecl())
+      return S.isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor());
+    return true;
+  };
+  if (CheckKind == CICK_Shared)
+    return IsEmptyInit(Init) && HasEmptyDtor(VD);
+  return S.LangOpts.GPUAllowDeviceInit ||
+         ((IsEmptyInit(Init) || IsDependentOrConstantInit(Init)) &&
+          HasEmptyDtor(VD));
+}
+} // namespace
+
 void Sema::checkAllowedCUDAInitializer(VarDecl *VD) {
   if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage())
     return;
   const Expr *Init = VD->getInit();
-  if (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>() ||
-      VD->hasAttr<CUDASharedAttr>()) {
-    if (LangOpts.GPUAllowDeviceInit)
+  bool IsSharedVar = VD->hasAttr<CUDASharedAttr>();
+  bool IsDeviceOrConstantVar =
+      !IsSharedVar &&
+      (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>());
+  if (IsDeviceOrConstantVar || IsSharedVar) {
+    if (HasAllowedCUDADeviceStaticInitializer(
+            *this, VD, IsSharedVar ? CICK_Shared : CICK_DeviceOrConstant))
       return;
-    bool AllowedInit = false;
-    if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init))
-      AllowedInit =
-          isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor());
-    // We'll allow constant initializers even if it's a non-empty
-    // constructor according to CUDA rules. This deviates from NVCC,
-    // but allows us to handle things like constexpr constructors.
-    if (!AllowedInit &&
-        (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>())) {
-      auto *Init = VD->getInit();
-      // isConstantInitializer cannot be called with dependent value, therefore
-      // we skip checking dependent value here. This is OK since
-      // checkAllowedCUDAInitializer is called again when the template is
-      // instantiated.
-      AllowedInit =
-          VD->getType()->isDependentType() || Init->isValueDependent() ||
-          Init->isConstantInitializer(Context,
-                                      VD->getType()->isReferenceType());
-    }
-
-    // Also make sure that destructor, if there is one, is empty.
-    if (AllowedInit)
-      if (CXXRecordDecl *RD = VD->getType()->getAsCXXRecordDecl())
-        AllowedInit =
-            isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor());
-
-    if (!AllowedInit) {
-      Diag(VD->getLocation(), VD->hasAttr<CUDASharedAttr>()
-                                  ? diag::err_shared_var_init
-                                  : diag::err_dynamic_var_init)
-          << Init->getSourceRange();
-      VD->setInvalidDecl();
-    }
+    Diag(VD->getLocation(),
+         IsSharedVar ? diag::err_shared_var_init : diag::err_dynamic_var_init)
+        << Init->getSourceRange();
+    VD->setInvalidDecl();
   } else {
     // This is a host-side global variable.  Check that the initializer is
     // callable from the host side.
@@ -673,9 +701,12 @@
 }
 
 void Sema::MaybeAddCUDAConstantAttr(VarDecl *VD) {
-  if (getLangOpts().CUDAIsDevice && VD->isConstexpr() &&
+  if (getLangOpts().CUDAIsDevice && !VD->hasAttr<CUDAConstantAttr>() &&
+      !VD->hasAttr<CUDAConstantAttr>() && !VD->hasAttr<CUDASharedAttr>() &&
       (VD->isFileVarDecl() || VD->isStaticDataMember()) &&
-      !VD->hasAttr<CUDAConstantAttr>()) {
+      (VD->isConstexpr() || (VD->getType().isConstQualified() &&
+                             HasAllowedCUDADeviceStaticInitializer(
+                                 *this, VD, CICK_DeviceOrConstant)))) {
     VD->addAttr(CUDAConstantAttr::CreateImplicit(getASTContext()));
   }
 }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to