Author: JonChesterfield Date: 2020-10-28T14:25:36Z New Revision: 5d02ca49a294848b533adf7dc1d1275d125ef587
URL: https://github.com/llvm/llvm-project/commit/5d02ca49a294848b533adf7dc1d1275d125ef587 DIFF: https://github.com/llvm/llvm-project/commit/5d02ca49a294848b533adf7dc1d1275d125ef587.diff LOG: [libomptarget][nvptx] Undef, weak shared variables [libomptarget][nvptx] Undef, weak shared variables Shared variables on nvptx, and LDS on amdgcn, are uninitialized at the start of kernel execution. Therefore create the variables with undef instead of zeros, motivated in part by the amdgcn back end rejecting LDS+initializer. Common is zero initialized, which seems incompatible with shared. Thus change them to weak, following the direction of https://reviews.llvm.org/rG7b3eabdcd215 Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D90248 Added: Modified: clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp clang/test/OpenMP/nvptx_data_sharing.cpp clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp clang/test/OpenMP/nvptx_parallel_codegen.cpp clang/test/OpenMP/nvptx_parallel_for_codegen.cpp clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp clang/test/OpenMP/nvptx_teams_codegen.cpp clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp Removed: ################################################################################ diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index bcabc5398127..08903a1444c2 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -1102,7 +1102,7 @@ void CGOpenMPRuntimeGPU::emitNonSPMDKernel(const OMPExecutableDirective &D, KernelStaticGlobalized = new llvm::GlobalVariable( CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false, llvm::GlobalValue::InternalLinkage, - llvm::ConstantPointerNull::get(CGM.VoidPtrTy), + llvm::UndefValue::get(CGM.VoidPtrTy), "_openmp_kernel_static_glob_rd$ptr", /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal, CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared)); @@ -1234,7 +1234,7 @@ void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D, KernelStaticGlobalized = new llvm::GlobalVariable( CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false, llvm::GlobalValue::InternalLinkage, - llvm::ConstantPointerNull::get(CGM.VoidPtrTy), + llvm::UndefValue::get(CGM.VoidPtrTy), "_openmp_kernel_static_glob_rd$ptr", /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal, CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared)); @@ -2855,8 +2855,8 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, auto *Ty = llvm::ArrayType::get(CGM.Int32Ty, WarpSize); unsigned SharedAddressSpace = C.getTargetAddressSpace(LangAS::cuda_shared); TransferMedium = new llvm::GlobalVariable( - M, Ty, /*isConstant=*/false, llvm::GlobalVariable::CommonLinkage, - llvm::Constant::getNullValue(Ty), TransferMediumName, + M, Ty, /*isConstant=*/false, llvm::GlobalVariable::WeakAnyLinkage, + llvm::UndefValue::get(Ty), TransferMediumName, /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal, SharedAddressSpace); CGM.addCompilerUsedGlobal(TransferMedium); @@ -4791,8 +4791,8 @@ void CGOpenMPRuntimeGPU::clear() { llvm::Type *LLVMStaticTy = CGM.getTypes().ConvertTypeForMem(StaticTy); auto *GV = new llvm::GlobalVariable( CGM.getModule(), LLVMStaticTy, - /*isConstant=*/false, llvm::GlobalValue::CommonLinkage, - llvm::Constant::getNullValue(LLVMStaticTy), + /*isConstant=*/false, llvm::GlobalValue::WeakAnyLinkage, + llvm::UndefValue::get(LLVMStaticTy), "_openmp_shared_static_glob_rd_$_", /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal, C.getTargetAddressSpace(LangAS::cuda_shared)); diff --git a/clang/test/OpenMP/nvptx_data_sharing.cpp b/clang/test/OpenMP/nvptx_data_sharing.cpp index 1372246c7fc8..b6117d738d2b 100644 --- a/clang/test/OpenMP/nvptx_data_sharing.cpp +++ b/clang/test/OpenMP/nvptx_data_sharing.cpp @@ -28,8 +28,8 @@ void test_ds(){ } } // SEQ: [[MEM_TY:%.+]] = type { [128 x i8] } -// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = common addrspace(3) global [[MEM_TY]] zeroinitializer -// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null +// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] undef +// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* undef // SEQ-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i64 8 // SEQ-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1 diff --git a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp index 4b194a315e0e..fef5f6f84143 100644 --- a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp @@ -35,8 +35,8 @@ int main(int argc, char **argv) { } // SEQ: [[MEM_TY:%.+]] = type { [128 x i8] } -// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = common addrspace(3) global [[MEM_TY]] zeroinitializer -// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null +// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] undef +// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* undef // SEQ-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 40 // SEQ-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1 // CHECK-DAG: @__omp_offloading_{{.*}}_main_[[LINE:l.+]]_exec_mode = weak constant i8 0 diff --git a/clang/test/OpenMP/nvptx_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_codegen.cpp index bd9c988d46e7..593f7fa49bf4 100644 --- a/clang/test/OpenMP/nvptx_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_codegen.cpp @@ -76,8 +76,8 @@ int bar(int n){ } // SEQ: [[MEM_TY:%.+]] = type { [128 x i8] } -// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = common addrspace(3) global [[MEM_TY]] zeroinitializer -// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null +// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] undef +// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* undef // SEQ-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 4 // SEQ-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1 diff --git a/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp index 4ef167de3b8a..839e859b0d3b 100644 --- a/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp @@ -32,8 +32,8 @@ int bar(int n){ } // SEQ: [[MEM_TY:%.+]] = type { [128 x i8] } -// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = common addrspace(3) global [[MEM_TY]] zeroinitializer -// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null +// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] undef +// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* undef // SEQ-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 4 // SEQ-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1 diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp index 459330d31f66..a69a9f90a839 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp @@ -9,7 +9,7 @@ #define HEADER // Check for the data transfer medium in shared memory to transfer the reduction list to the first warp. -// CHECK-DAG: [[TRANSFER_STORAGE:@.+]] = common addrspace([[SHARED_ADDRSPACE:[0-9]+]]) global [32 x i32] +// CHECK-DAG: [[TRANSFER_STORAGE:@.+]] = weak addrspace([[SHARED_ADDRSPACE:[0-9]+]]) global [32 x i32] // Check that the execution mode of all 3 target regions is set to Spmd Mode. // CHECK-DAG: {{@__omp_offloading_.+l27}}_exec_mode = weak constant i8 0 diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp index d40aad3dee77..480b34d28ae3 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp @@ -93,8 +93,8 @@ int bar(int n){ } // SEQ-DAG: [[MEM_TY:%.+]] = type { [128 x i8] } -// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = common addrspace(3) global [[MEM_TY]] zeroinitializer -// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null +// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] undef +// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* undef // SEQ-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 4 // SEQ-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1 diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp index e9126fce7020..79af94ac6e6b 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp @@ -76,8 +76,8 @@ int bar(int n){ } // SEQ-DAG: [[MEM_TY:%.+]] = type { [128 x i8] } -// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = common addrspace(3) global [[MEM_TY]] zeroinitializer -// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null +// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] undef +// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* undef // SEQ-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 4 // SEQ-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1 diff --git a/clang/test/OpenMP/nvptx_teams_codegen.cpp b/clang/test/OpenMP/nvptx_teams_codegen.cpp index 36bb972175a5..ac08ccab2308 100644 --- a/clang/test/OpenMP/nvptx_teams_codegen.cpp +++ b/clang/test/OpenMP/nvptx_teams_codegen.cpp @@ -30,8 +30,8 @@ int main (int argc, char **argv) { } // SEQ: [[MEM_TY:%.+]] = type { [128 x i8] } -// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = common addrspace(3) global [[MEM_TY]] zeroinitializer -// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null +// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] undef +// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* undef // SEQ-DAG: [[KERNEL_SIZE1:@.+]] = internal unnamed_addr constant i{{64|32}} 4 // SEQ-DAG: [[KERNEL_SIZE2:@.+]] = internal unnamed_addr constant i{{64|32}} {{8|4}} // SEQ-DAG: [[KERNEL_SHARED1:@.+]] = internal unnamed_addr constant i16 1 @@ -119,8 +119,8 @@ int main (int argc, char **argv) { } // SEQ2: [[MEM_TY:%.+]] = type { [128 x i8] } -// SEQ2-DAG: [[SHARED_GLOBAL_RD:@.+]] = common addrspace(3) global [[MEM_TY]] zeroinitializer -// SEQ2-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null +// SEQ2-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] undef +// SEQ2-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* undef // SEQ2-DAG: [[KERNEL_SIZE1:@.+]] = internal unnamed_addr constant i{{64|32}} 4 // SEQ2-DAG: [[KERNEL_SIZE2:@.+]] = internal unnamed_addr constant i{{64|32}} {{8|4}} // SEQ2-DAG: [[KERNEL_SHARED1:@.+]] = internal unnamed_addr constant i16 1 diff --git a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp index 5aa3e1978b10..507b2f8b1020 100644 --- a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp +++ b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp @@ -17,14 +17,14 @@ // CHECK-DAG: [[TEAMS_REDUCE_UNION_TY:%.+]] = type { [[TEAM1_REDUCE_TY]] } // SEQ-DAG: [[MAP_TY:%.+]] = type { [128 x i8] } -// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null +// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* undef // SEQ-DAG: [[KERNEL_SHARED1:@.+]] = internal unnamed_addr constant i16 1 // SEQ-DAG: [[KERNEL_SHARED2:@.+]] = internal unnamed_addr constant i16 1 // SEQ-DAG: [[KERNEL_SIZE1:@.+]] = internal unnamed_addr constant i{{64|32}} {{16|8}} // SEQ-DAG: [[KERNEL_SIZE2:@.+]] = internal unnamed_addr constant i{{64|32}} 16 // Check for the data transfer medium in shared memory to transfer the reduction list to the first warp. -// CHECK-DAG: [[TRANSFER_STORAGE:@.+]] = common addrspace([[SHARED_ADDRSPACE:[0-9]+]]) global [32 x i32] +// CHECK-DAG: [[TRANSFER_STORAGE:@.+]] = weak addrspace([[SHARED_ADDRSPACE:[0-9]+]]) global [32 x i32] // Check that the execution mode of 2 target regions is set to Non-SPMD and the 3rd is in SPMD. // CHECK-DAG: {{@__omp_offloading_.+l44}}_exec_mode = weak constant i8 1 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits