yaxunl updated this revision to Diff 140110.
yaxunl retitled this revision from "Disable zeroinitializer for CUDA shared 
varirable for amdgcn target" to "Remove initializer for CUDA shared varirable".
yaxunl edited the summary of this revision.
yaxunl added a reviewer: tra.
yaxunl added a comment.

Revised by Artem's comments.


https://reviews.llvm.org/D44985

Files:
  lib/CodeGen/CGDecl.cpp
  test/CodeGenCUDA/address-spaces.cu
  test/CodeGenCUDA/device-var-init.cu

Index: test/CodeGenCUDA/device-var-init.cu
===================================================================
--- test/CodeGenCUDA/device-var-init.cu
+++ test/CodeGenCUDA/device-var-init.cu
@@ -1,10 +1,14 @@
 // REQUIRES: nvptx-registered-target
+// REQUIRES: amdgpu-registered-target
 
 // Make sure we don't allow dynamic initialization for device
 // variables, but accept empty constructors allowed by CUDA.
 
 // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -std=c++11 \
-// RUN:     -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck %s
+// RUN:     -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,NVPTX %s
+
+// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -std=c++11 \
+// RUN:     -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,AMDGCN %s
 
 #ifdef __clang__
 #include "Inputs/cuda.h"
@@ -105,68 +109,114 @@
 __constant__ EC_I_EC c_ec_i_ec;
 // CHECK: @c_ec_i_ec = addrspace(4) externally_initialized global %struct.EC_I_EC zeroinitializer,
 
+// CHECK: @_ZZ2dfvE4s_ec = internal addrspace(3) global %struct.EC undef
+// CHECK: @_ZZ2dfvE5s_etc = internal addrspace(3) global %struct.ETC undef
+
 // We should not emit global initializers for device-side variables.
 // CHECK-NOT: @__cxx_global_var_init
 
 // Make sure that initialization restrictions do not apply to local
 // variables.
 __device__ void df() {
+  // AMDGCN:  %[[ec:.*]] = addrspacecast %struct.EC addrspace(5)* %ec to %struct.EC*
+  // AMDGCN:  %[[ed:.*]] = addrspacecast %struct.ED addrspace(5)* %ed to %struct.ED*
+  // AMDGCN:  %[[ecd:.*]] = addrspacecast %struct.ECD addrspace(5)* %ecd to %struct.ECD*
+  // AMDGCN:  %[[etc:.*]] = addrspacecast %struct.ETC addrspace(5)* %etc to %struct.ETC*
+  // AMDGCN:  %[[uc:.*]] = addrspacecast %struct.UC addrspace(5)* %uc to %struct.UC*
+  // AMDGCN:  %[[ud:.*]] = addrspacecast %struct.UD addrspace(5)* %ud to %struct.UD*
+  // AMDGCN:  %[[eci:.*]] = addrspacecast %struct.ECI addrspace(5)* %eci to %struct.ECI*
+  // AMDGCN:  %[[nec:.*]] = addrspacecast %struct.NEC addrspace(5)* %nec to %struct.NEC*
+  // AMDGCN:  %[[ned:.*]] = addrspacecast %struct.NED addrspace(5)* %ned to %struct.NED*
+  // AMDGCN:  %[[ncv:.*]] = addrspacecast %struct.NCV addrspace(5)* %ncv to %struct.NCV*
+  // AMDGCN:  %[[vd:.*]] = addrspacecast %struct.VD addrspace(5)* %vd to %struct.VD*
+  // AMDGCN:  %[[ncf:.*]] = addrspacecast %struct.NCF addrspace(5)* %ncf to %struct.NCF*
+  // AMDGCN:  %[[ncfs:.*]] = addrspacecast %struct.NCFS addrspace(5)* %ncfs to %struct.NCFS*
+  // AMDGCN:  %[[utc:.*]] = addrspacecast %struct.UTC addrspace(5)* %utc to %struct.UTC*
+  // AMDGCN:  %[[netc:.*]] = addrspacecast %struct.NETC addrspace(5)* %netc to %struct.NETC*
+  // AMDGCN:  %[[ec_i_ec:.*]] = addrspacecast %struct.EC_I_EC addrspace(5)* %ec_i_ec to %struct.EC_I_EC*
+  // AMDGCN:  %[[ec_i_ec1:.*]] = addrspacecast %struct.EC_I_EC1 addrspace(5)* %ec_i_ec1 to %struct.EC_I_EC1*
+  // AMDGCN:  %[[t_v_t:.*]] = addrspacecast %struct.T_V_T addrspace(5)* %t_v_t to %struct.T_V_T*
+  // AMDGCN:  %[[t_b_nec:.*]] = addrspacecast %struct.T_B_NEC addrspace(5)* %t_b_nec to %struct.T_B_NEC*
+  // AMDGCN:  %[[t_f_nec:.*]] = addrspacecast %struct.T_F_NEC addrspace(5)* %t_f_nec to %struct.T_F_NEC*
+  // AMDGCN:  %[[t_fa_nec:.*]] = addrspacecast %struct.T_FA_NEC addrspace(5)* %t_fa_nec to %struct.T_FA_NEC*
+  // AMDGCN:  %[[t_b_ned:.*]] = addrspacecast %struct.T_B_NED addrspace(5)* %t_b_ned to %struct.T_B_NED*
+  // AMDGCN:  %[[t_f_ned:.*]] = addrspacecast %struct.T_F_NED addrspace(5)* %t_f_ned to %struct.T_F_NED*
+  // AMDGCN:  %[[t_fa_ned:.*]] = addrspacecast %struct.T_FA_NED addrspace(5)* %t_fa_ned to %struct.T_FA_NED*
+
   T t;
   // CHECK-NOT: call
   EC ec;
-  // CHECK:   call void @_ZN2ECC1Ev(%struct.EC* %ec)
+  // NVPTX:   call void @_ZN2ECC1Ev(%struct.EC* %ec)
+  // AMDGCN:  call void @_ZN2ECC1Ev(%struct.EC* %[[ec]])
   ED ed;
   // CHECK-NOT: call
   ECD ecd;
-  // CHECK:   call void @_ZN3ECDC1Ev(%struct.ECD* %ecd)
+  // NVPTX:   call void @_ZN3ECDC1Ev(%struct.ECD* %ecd)
+  // AMDGCN:  call void @_ZN3ECDC1Ev(%struct.ECD* %[[ecd]])
   ETC etc;
-  // CHECK:   call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* %etc)
+  // NVPTX:   call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* %etc)
+  // AMDGCN:  call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* %[[etc]])
   UC uc;
   // undefined constructor -- not allowed
-  // CHECK:   call void @_ZN2UCC1Ev(%struct.UC* %uc)
+  // NVPTX:   call void @_ZN2UCC1Ev(%struct.UC* %uc)
+  // AMDGCN:  call void @_ZN2UCC1Ev(%struct.UC* %[[uc]])
   UD ud;
   // undefined destructor -- not allowed
   // CHECK-NOT: call
   ECI eci;
   // empty constructor w/ initializer list -- not allowed
-  // CHECK:   call void @_ZN3ECIC1Ev(%struct.ECI* %eci)
+  // NVPTX:   call void @_ZN3ECIC1Ev(%struct.ECI* %eci)
+  // AMDGCN:  call void @_ZN3ECIC1Ev(%struct.ECI* %[[eci]])
   NEC nec;
   // non-empty constructor -- not allowed
-  // CHECK:   call void @_ZN3NECC1Ev(%struct.NEC* %nec)
+  // NVPTX:   call void @_ZN3NECC1Ev(%struct.NEC* %nec)
+  // AMDGCN:  call void @_ZN3NECC1Ev(%struct.NEC* %[[nec]])
   // non-empty destructor -- not allowed
   NED ned;
   // no-constructor,  virtual method -- not allowed
-  // CHECK:   call void @_ZN3NCVC1Ev(%struct.NCV* %ncv)
+  // NVPTX:   call void @_ZN3NCVC1Ev(%struct.NCV* %ncv)
+  // AMDGCN:  call void @_ZN3NCVC1Ev(%struct.NCV* %[[ncv]])
   NCV ncv;
   // CHECK-NOT: call
   VD vd;
-  // CHECK:   call void @_ZN2VDC1Ev(%struct.VD* %vd)
+  // NVPTX:   call void @_ZN2VDC1Ev(%struct.VD* %vd)
+  // AMDGCN:  call void @_ZN2VDC1Ev(%struct.VD* %[[vd]])
   NCF ncf;
-  // CHECK:   call void @_ZN3NCFC1Ev(%struct.NCF* %ncf)
+  // NVPTX:   call void @_ZN3NCFC1Ev(%struct.NCF* %ncf)
+  // AMDGCN:   call void @_ZN3NCFC1Ev(%struct.NCF* %[[ncf]])
   NCFS ncfs;
-  // CHECK:   call void @_ZN4NCFSC1Ev(%struct.NCFS* %ncfs)
+  // NVPTX:   call void @_ZN4NCFSC1Ev(%struct.NCFS* %ncfs)
+  // AMDGCN:  call void @_ZN4NCFSC1Ev(%struct.NCFS* %[[ncfs]])
   UTC utc;
-  // CHECK:   call void @_ZN3UTCC1IJEEEDpT_(%struct.UTC* %utc)
+  // NVPTX:   call void @_ZN3UTCC1IJEEEDpT_(%struct.UTC* %utc)
+  // AMDGCN:  call void @_ZN3UTCC1IJEEEDpT_(%struct.UTC* %[[utc]])
   NETC netc;
-  // CHECK:   call void @_ZN4NETCC1IJEEEDpT_(%struct.NETC* %netc)
+  // NVPTX:   call void @_ZN4NETCC1IJEEEDpT_(%struct.NETC* %netc)
+  // AMDGCN:  call void @_ZN4NETCC1IJEEEDpT_(%struct.NETC* %[[netc]])
   T_B_T t_b_t;
   // CHECK-NOT: call
   T_F_T t_f_t;
   // CHECK-NOT: call
   T_FA_T t_fa_t;
   // CHECK-NOT: call
   EC_I_EC ec_i_ec;
-  // CHECK:   call void @_ZN7EC_I_ECC1Ev(%struct.EC_I_EC* %ec_i_ec)
+  // NVPTX:   call void @_ZN7EC_I_ECC1Ev(%struct.EC_I_EC* %ec_i_ec)
+  // AMDGCN:  call void @_ZN7EC_I_ECC1Ev(%struct.EC_I_EC* %[[ec_i_ec]])
   EC_I_EC1 ec_i_ec1;
-  // CHECK:   call void @_ZN8EC_I_EC1C1Ev(%struct.EC_I_EC1* %ec_i_ec1)
+  // NVPTX:   call void @_ZN8EC_I_EC1C1Ev(%struct.EC_I_EC1* %ec_i_ec1)
+  // AMDGCN:  call void @_ZN8EC_I_EC1C1Ev(%struct.EC_I_EC1* %[[ec_i_ec1]])
   T_V_T t_v_t;
-  // CHECK:   call void @_ZN5T_V_TC1Ev(%struct.T_V_T* %t_v_t)
+  // NVPTX:   call void @_ZN5T_V_TC1Ev(%struct.T_V_T* %t_v_t)
+  // AMDGCN:  call void @_ZN5T_V_TC1Ev(%struct.T_V_T* %[[t_v_t]])
   T_B_NEC t_b_nec;
-  // CHECK:   call void @_ZN7T_B_NECC1Ev(%struct.T_B_NEC* %t_b_nec)
+  // NVPTX:   call void @_ZN7T_B_NECC1Ev(%struct.T_B_NEC* %t_b_nec)
+  // AMDGCN:  call void @_ZN7T_B_NECC1Ev(%struct.T_B_NEC* %[[t_b_nec]])
   T_F_NEC t_f_nec;
-  // CHECK:   call void @_ZN7T_F_NECC1Ev(%struct.T_F_NEC* %t_f_nec)
+  // NVPTX:   call void @_ZN7T_F_NECC1Ev(%struct.T_F_NEC* %t_f_nec)
+  // AMDGCN:  call void @_ZN7T_F_NECC1Ev(%struct.T_F_NEC* %[[t_f_nec]])
   T_FA_NEC t_fa_nec;
-  // CHECK:   call void @_ZN8T_FA_NECC1Ev(%struct.T_FA_NEC* %t_fa_nec)
+  // NVPTX:   call void @_ZN8T_FA_NECC1Ev(%struct.T_FA_NEC* %t_fa_nec)
+  // AMDGCN:  call void @_ZN8T_FA_NECC1Ev(%struct.T_FA_NEC* %[[t_fa_nec]])
   T_B_NED t_b_ned;
   // CHECK-NOT: call
   T_F_NED t_f_ned;
@@ -182,14 +232,23 @@
   df(); // CHECK: call void @_Z2dfv()
 
   // Verify that we only call non-empty destructors
-  // CHECK-NEXT: call void @_ZN8T_FA_NEDD1Ev(%struct.T_FA_NED* %t_fa_ned)
-  // CHECK-NEXT: call void @_ZN7T_F_NEDD1Ev(%struct.T_F_NED* %t_f_ned)
-  // CHECK-NEXT: call void @_ZN7T_B_NEDD1Ev(%struct.T_B_NED* %t_b_ned)
-  // CHECK-NEXT: call void @_ZN2VDD1Ev(%struct.VD* %vd)
-  // CHECK-NEXT: call void @_ZN3NEDD1Ev(%struct.NED* %ned)
-  // CHECK-NEXT: call void @_ZN2UDD1Ev(%struct.UD* %ud)
-  // CHECK-NEXT: call void @_ZN3ECDD1Ev(%struct.ECD* %ecd)
-  // CHECK-NEXT: call void @_ZN2EDD1Ev(%struct.ED* %ed)
+  // NVPTX-NEXT: call void @_ZN8T_FA_NEDD1Ev(%struct.T_FA_NED* %t_fa_ned)
+  // NVPTX-NEXT: call void @_ZN7T_F_NEDD1Ev(%struct.T_F_NED* %t_f_ned)
+  // NVPTX-NEXT: call void @_ZN7T_B_NEDD1Ev(%struct.T_B_NED* %t_b_ned)
+  // NVPTX-NEXT: call void @_ZN2VDD1Ev(%struct.VD* %vd)
+  // NVPTX-NEXT: call void @_ZN3NEDD1Ev(%struct.NED* %ned)
+  // NVPTX-NEXT: call void @_ZN2UDD1Ev(%struct.UD* %ud)
+  // NVPTX-NEXT: call void @_ZN3ECDD1Ev(%struct.ECD* %ecd)
+  // NVPTX-NEXT: call void @_ZN2EDD1Ev(%struct.ED* %ed)
+
+  // AMDGCN-NEXT: call void @_ZN8T_FA_NEDD1Ev(%struct.T_FA_NED* %[[t_fa_ned]])
+  // AMDGCN-NEXT: call void @_ZN7T_F_NEDD1Ev(%struct.T_F_NED* %[[t_f_ned]])
+  // AMDGCN-NEXT: call void @_ZN7T_B_NEDD1Ev(%struct.T_B_NED* %[[t_b_ned]])
+  // AMDGCN-NEXT: call void @_ZN2VDD1Ev(%struct.VD* %[[vd]])
+  // AMDGCN-NEXT: call void @_ZN3NEDD1Ev(%struct.NED* %[[ned]])
+  // AMDGCN-NEXT: call void @_ZN2UDD1Ev(%struct.UD* %[[ud]])
+  // AMDGCN-NEXT: call void @_ZN3ECDD1Ev(%struct.ECD* %[[ecd]])
+  // AMDGCN-NEXT: call void @_ZN2EDD1Ev(%struct.ED* %[[ed]])
 
   // CHECK-NEXT: ret void
 }
Index: test/CodeGenCUDA/address-spaces.cu
===================================================================
--- test/CodeGenCUDA/address-spaces.cu
+++ test/CodeGenCUDA/address-spaces.cu
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -emit-llvm %s -o - -fcuda-is-device -triple nvptx-unknown-unknown | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm %s -o - -fcuda-is-device -triple amdgcn | FileCheck %s
 
 // Verifies Clang emits correct address spaces and addrspacecast instructions
 // for CUDA code.
@@ -19,11 +20,11 @@
   int data2;
 };
 
-// CHECK: @_ZZ5func0vE1a = internal addrspace(3) global %struct.MyStruct zeroinitializer
-// CHECK: @_ZZ5func1vE1a = internal addrspace(3) global float 0.000000e+00
-// CHECK: @_ZZ5func2vE1a = internal addrspace(3) global [256 x float] zeroinitializer
-// CHECK: @_ZZ5func3vE1a = internal addrspace(3) global float 0.000000e+00
-// CHECK: @_ZZ5func4vE1a = internal addrspace(3) global float 0.000000e+00
+// CHECK: @_ZZ5func0vE1a = internal addrspace(3) global %struct.MyStruct undef
+// CHECK: @_ZZ5func1vE1a = internal addrspace(3) global float undef
+// CHECK: @_ZZ5func2vE1a = internal addrspace(3) global [256 x float] undef
+// CHECK: @_ZZ5func3vE1a = internal addrspace(3) global float undef
+// CHECK: @_ZZ5func4vE1a = internal addrspace(3) global float undef
 // CHECK: @b = addrspace(3) global float undef
 
 __device__ void foo() {
@@ -48,7 +49,7 @@
   ap->data2 = 2;
 }
 // CHECK: define void @_Z5func0v()
-// CHECK: store %struct.MyStruct* addrspacecast (%struct.MyStruct addrspace(3)* @_ZZ5func0vE1a to %struct.MyStruct*), %struct.MyStruct** %ap
+// CHECK: store %struct.MyStruct* addrspacecast (%struct.MyStruct addrspace(3)* @_ZZ5func0vE1a to %struct.MyStruct*), %struct.MyStruct** %{{.*}}
 
 __device__ void callee(float *ap) {
   *ap = 1.0f;
@@ -67,23 +68,23 @@
   *ap = 1.0f;
 }
 // CHECK: define void @_Z5func2v()
-// CHECK: store float* getelementptr inbounds ([256 x float], [256 x float]* addrspacecast ([256 x float] addrspace(3)* @_ZZ5func2vE1a to [256 x float]*), i32 0, i32 128), float** %ap
+// CHECK: store float* getelementptr inbounds ([256 x float], [256 x float]* addrspacecast ([256 x float] addrspace(3)* @_ZZ5func2vE1a to [256 x float]*), i{{32|64}} 0, i{{32|64}} 128), float** %{{.*}}
 
 __device__ void func3() {
   __shared__ float a;
   float *ap = reinterpret_cast<float *>(&a); // explicit cast
   *ap = 1.0f;
 }
 // CHECK: define void @_Z5func3v()
-// CHECK: store float* addrspacecast (float addrspace(3)* @_ZZ5func3vE1a to float*), float** %ap
+// CHECK: store float* addrspacecast (float addrspace(3)* @_ZZ5func3vE1a to float*), float** %{{.*}}
 
 __device__ void func4() {
   __shared__ float a;
   float *ap = (float *)&a; // explicit c-style cast
   *ap = 1.0f;
 }
 // CHECK: define void @_Z5func4v()
-// CHECK: store float* addrspacecast (float addrspace(3)* @_ZZ5func4vE1a to float*), float** %ap
+// CHECK: store float* addrspacecast (float addrspace(3)* @_ZZ5func4vE1a to float*), float** %{{.*}}
 
 __shared__ float b;
 
Index: lib/CodeGen/CGDecl.cpp
===================================================================
--- lib/CodeGen/CGDecl.cpp
+++ lib/CodeGen/CGDecl.cpp
@@ -405,6 +405,11 @@
   if (D.getInit() && !isCudaSharedVar)
     var = AddInitializerToStaticVarDecl(D, var);
 
+  // CUDA shared variable should not be initialized.
+  if (isCudaSharedVar)
+    var->setInitializer(
+        llvm::UndefValue::get(var->getType()->getElementType()));
+
   var->setAlignment(alignment.getQuantity());
 
   if (D.hasAttr<AnnotateAttr>())
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to