This revision was automatically updated to reflect the committed changes. Closed by commit rL349981: [CUDA] Treat extern global variable shadows same as regular extern vars. (authored by tra, committed by ). Herald added a subscriber: llvm-commits.
Changed prior to commit: https://reviews.llvm.org/D56033?vs=179387&id=179395#toc Repository: rL LLVM CHANGES SINCE LAST ACTION https://reviews.llvm.org/D56033/new/ https://reviews.llvm.org/D56033 Files: cfe/trunk/lib/CodeGen/CodeGenModule.cpp cfe/trunk/test/CodeGenCUDA/device-stub.cu Index: cfe/trunk/lib/CodeGen/CodeGenModule.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CodeGenModule.cpp +++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp @@ -2188,15 +2188,7 @@ } else { const auto *VD = cast<VarDecl>(Global); assert(VD->isFileVarDecl() && "Cannot emit local var decl as global."); - // We need to emit device-side global CUDA variables even if a - // variable does not have a definition -- we still need to define - // host-side shadow for it. - bool MustEmitForCuda = LangOpts.CUDA && !LangOpts.CUDAIsDevice && - !VD->hasDefinition() && - (VD->hasAttr<CUDAConstantAttr>() || - VD->hasAttr<CUDADeviceAttr>()); - if (!MustEmitForCuda && - VD->isThisDeclarationADefinition() != VarDecl::Definition && + if (VD->isThisDeclarationADefinition() != VarDecl::Definition && !Context.isMSStaticDataMemberInlineDefinition(VD)) { if (LangOpts.OpenMP) { // Emit declaration of the must-be-emitted declare target variable. @@ -3616,7 +3608,10 @@ Flags |= CGCUDARuntime::ExternDeviceVar; if (D->hasAttr<CUDAConstantAttr>()) Flags |= CGCUDARuntime::ConstantDeviceVar; - getCUDARuntime().registerDeviceVar(*GV, Flags); + // Extern global variables will be registered in the TU where they are + // defined. + if (!D->hasExternalStorage()) + getCUDARuntime().registerDeviceVar(*GV, Flags); } else if (D->hasAttr<CUDASharedAttr>()) // __shared__ variables are odd. Shadows do get created, but // they are not registered with the CUDA runtime, so they Index: cfe/trunk/test/CodeGenCUDA/device-stub.cu =================================================================== --- cfe/trunk/test/CodeGenCUDA/device-stub.cu +++ cfe/trunk/test/CodeGenCUDA/device-stub.cu @@ -42,13 +42,20 @@ // ALL-DAG: @ext_host_var = external global i32 extern int ext_host_var; -// Shadows for external device-side variables are *definitions* of -// those variables. -// ALL-DAG: @ext_device_var = internal global i32 +// external device-side variables -> extern references to their shadows. +// ALL-DAG: @ext_device_var = external global i32 extern __device__ int ext_device_var; -// ALL-DAG: @ext_device_var = internal global i32 +// ALL-DAG: @ext_device_var = external global i32 extern __constant__ int ext_constant_var; +// external device-side variables with definitions should generate +// definitions for the shadows. +// ALL-DAG: @ext_device_var_def = internal global i32 undef, +extern __device__ int ext_device_var_def; +__device__ int ext_device_var_def = 1; +// ALL-DAG: @ext_device_var_def = internal global i32 undef, +__constant__ int ext_constant_var_def = 2; + void use_pointers() { int *p; p = &device_var; @@ -114,8 +121,8 @@ // ALL: call{{.*}}[[PREFIX]]RegisterFunction(i8** %0, {{.*}}kernelfunc // ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}device_var{{.*}}i32 0, i32 4, i32 0, i32 0 // ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}constant_var{{.*}}i32 0, i32 4, i32 1, i32 0 -// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_device_var{{.*}}i32 1, i32 4, i32 0, i32 0 -// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_constant_var{{.*}}i32 1, i32 4, i32 1, i32 0 +// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_device_var_def{{.*}}i32 0, i32 4, i32 0, i32 0 +// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_constant_var_def{{.*}}i32 0, i32 4, i32 1, i32 0 // ALL: ret void // Test that we've built a constructor.
Index: cfe/trunk/lib/CodeGen/CodeGenModule.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CodeGenModule.cpp +++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp @@ -2188,15 +2188,7 @@ } else { const auto *VD = cast<VarDecl>(Global); assert(VD->isFileVarDecl() && "Cannot emit local var decl as global."); - // We need to emit device-side global CUDA variables even if a - // variable does not have a definition -- we still need to define - // host-side shadow for it. - bool MustEmitForCuda = LangOpts.CUDA && !LangOpts.CUDAIsDevice && - !VD->hasDefinition() && - (VD->hasAttr<CUDAConstantAttr>() || - VD->hasAttr<CUDADeviceAttr>()); - if (!MustEmitForCuda && - VD->isThisDeclarationADefinition() != VarDecl::Definition && + if (VD->isThisDeclarationADefinition() != VarDecl::Definition && !Context.isMSStaticDataMemberInlineDefinition(VD)) { if (LangOpts.OpenMP) { // Emit declaration of the must-be-emitted declare target variable. @@ -3616,7 +3608,10 @@ Flags |= CGCUDARuntime::ExternDeviceVar; if (D->hasAttr<CUDAConstantAttr>()) Flags |= CGCUDARuntime::ConstantDeviceVar; - getCUDARuntime().registerDeviceVar(*GV, Flags); + // Extern global variables will be registered in the TU where they are + // defined. + if (!D->hasExternalStorage()) + getCUDARuntime().registerDeviceVar(*GV, Flags); } else if (D->hasAttr<CUDASharedAttr>()) // __shared__ variables are odd. Shadows do get created, but // they are not registered with the CUDA runtime, so they Index: cfe/trunk/test/CodeGenCUDA/device-stub.cu =================================================================== --- cfe/trunk/test/CodeGenCUDA/device-stub.cu +++ cfe/trunk/test/CodeGenCUDA/device-stub.cu @@ -42,13 +42,20 @@ // ALL-DAG: @ext_host_var = external global i32 extern int ext_host_var; -// Shadows for external device-side variables are *definitions* of -// those variables. -// ALL-DAG: @ext_device_var = internal global i32 +// external device-side variables -> extern references to their shadows. +// ALL-DAG: @ext_device_var = external global i32 extern __device__ int ext_device_var; -// ALL-DAG: @ext_device_var = internal global i32 +// ALL-DAG: @ext_device_var = external global i32 extern __constant__ int ext_constant_var; +// external device-side variables with definitions should generate +// definitions for the shadows. +// ALL-DAG: @ext_device_var_def = internal global i32 undef, +extern __device__ int ext_device_var_def; +__device__ int ext_device_var_def = 1; +// ALL-DAG: @ext_device_var_def = internal global i32 undef, +__constant__ int ext_constant_var_def = 2; + void use_pointers() { int *p; p = &device_var; @@ -114,8 +121,8 @@ // ALL: call{{.*}}[[PREFIX]]RegisterFunction(i8** %0, {{.*}}kernelfunc // ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}device_var{{.*}}i32 0, i32 4, i32 0, i32 0 // ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}constant_var{{.*}}i32 0, i32 4, i32 1, i32 0 -// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_device_var{{.*}}i32 1, i32 4, i32 0, i32 0 -// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_constant_var{{.*}}i32 1, i32 4, i32 1, i32 0 +// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_device_var_def{{.*}}i32 0, i32 4, i32 0, i32 0 +// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_constant_var_def{{.*}}i32 0, i32 4, i32 1, i32 0 // ALL: ret void // Test that we've built a constructor.
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits