Author: abataev Date: Fri Nov 9 08:18:04 2018 New Revision: 346507 URL: http://llvm.org/viewvc/llvm-project?rev=346507&view=rev Log: [OPENMP][NVPTX]Allow to use shared memory for the target|teams|distribute variables.
If the total size of the variables, declared in target|teams|distribute regions, is less than the maximal size of shared memory available, the buffer is allocated in the shared memory. Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h cfe/trunk/test/OpenMP/nvptx_data_sharing.cpp cfe/trunk/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp cfe/trunk/test/OpenMP/nvptx_parallel_for_codegen.cpp cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp cfe/trunk/test/OpenMP/nvptx_teams_codegen.cpp Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp?rev=346507&r1=346506&r2=346507&view=diff ============================================================================== --- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp (original) +++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp Fri Nov 9 08:18:04 2018 @@ -176,6 +176,9 @@ enum MachineConfiguration : unsigned { /// Global memory alignment for performance. GlobalMemoryAlignment = 128, + + /// Maximal size of the shared memory buffer. + SharedMemorySize = 128, }; enum NamedBarrier : unsigned { @@ -1143,13 +1146,6 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDKe IsInTTDRegion = true; // Reserve place for the globalized memory. GlobalizedRecords.emplace_back(); - if (!StaticGlobalized) { - StaticGlobalized = new llvm::GlobalVariable( - CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/true, - llvm::GlobalValue::WeakAnyLinkage, nullptr, - "_openmp_static_glob_rd$ptr"); - StaticGlobalized->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - } if (!KernelStaticGlobalized) { KernelStaticGlobalized = new llvm::GlobalVariable( CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false, @@ -1277,13 +1273,6 @@ void CGOpenMPRuntimeNVPTX::emitSPMDKerne IsInTTDRegion = true; // Reserve place for the globalized memory. GlobalizedRecords.emplace_back(); - if (!StaticGlobalized) { - StaticGlobalized = new llvm::GlobalVariable( - CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/true, - llvm::GlobalValue::WeakAnyLinkage, nullptr, - "_openmp_static_glob_rd$ptr"); - StaticGlobalized->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - } if (!KernelStaticGlobalized) { KernelStaticGlobalized = new llvm::GlobalVariable( CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false, @@ -2138,30 +2127,41 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa GlobalizedRecords.back().Records.push_back(GlobalizedVarsRecord); ++GlobalizedRecords.back().RegionCounter; if (GlobalizedRecords.back().Records.size() == 1) { - assert(StaticGlobalized && - "Static pointer must be initialized already."); - Address Buffer = CGF.EmitLoadOfPointer( - Address(StaticGlobalized, CGM.getPointerAlign()), - CGM.getContext() - .getPointerType(CGM.getContext().VoidPtrTy) - .castAs<PointerType>()); + assert(KernelStaticGlobalized && + "Kernel static pointer must be initialized already."); + auto *UseSharedMemory = new llvm::GlobalVariable( + CGM.getModule(), CGM.Int16Ty, /*isConstant=*/true, + llvm::GlobalValue::InternalLinkage, nullptr, + "_openmp_static_kernel$is_shared"); + UseSharedMemory->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + QualType Int16Ty = CGM.getContext().getIntTypeForBitwidth( + /*DestWidth=*/16, /*Signed=*/0); + llvm::Value *IsInSharedMemory = CGF.EmitLoadOfScalar( + Address(UseSharedMemory, + CGM.getContext().getTypeAlignInChars(Int16Ty)), + /*Volatile=*/false, Int16Ty, Loc); + auto *StaticGlobalized = new llvm::GlobalVariable( + CGM.getModule(), CGM.Int8Ty, /*isConstant=*/false, + llvm::GlobalValue::WeakAnyLinkage, nullptr); auto *RecSize = new llvm::GlobalVariable( CGM.getModule(), CGM.SizeTy, /*isConstant=*/true, llvm::GlobalValue::InternalLinkage, nullptr, "_openmp_static_kernel$size"); RecSize->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); llvm::Value *Ld = CGF.EmitLoadOfScalar( - Address(RecSize, CGM.getPointerAlign()), /*Volatile=*/false, + Address(RecSize, CGM.getSizeAlign()), /*Volatile=*/false, CGM.getContext().getSizeType(), Loc); llvm::Value *ResAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( KernelStaticGlobalized, CGM.VoidPtrPtrTy); - llvm::Value *GlobalRecordSizeArg[] = { - Buffer.getPointer(), Ld, - llvm::ConstantInt::getNullValue(CGM.Int16Ty), ResAddr}; + llvm::Value *GlobalRecordSizeArg[] = {StaticGlobalized, Ld, + IsInSharedMemory, ResAddr}; CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( OMPRTL_NVPTX__kmpc_get_team_static_memory), GlobalRecordSizeArg); + GlobalizedRecords.back().Buffer = StaticGlobalized; GlobalizedRecords.back().RecSize = RecSize; + GlobalizedRecords.back().UseSharedMemory = UseSharedMemory; + GlobalizedRecords.back().Loc = Loc; } assert(KernelStaticGlobalized && "Global address must be set already."); Address FrameAddr = CGF.EmitLoadOfPointer( @@ -2336,10 +2336,16 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa --GlobalizedRecords.back().RegionCounter; // Emit the restore function only in the target region. if (GlobalizedRecords.back().RegionCounter == 0) { + QualType Int16Ty = CGM.getContext().getIntTypeForBitwidth( + /*DestWidth=*/16, /*Signed=*/0); + llvm::Value *IsInSharedMemory = CGF.EmitLoadOfScalar( + Address(GlobalizedRecords.back().UseSharedMemory, + CGM.getContext().getTypeAlignInChars(Int16Ty)), + /*Volatile=*/false, Int16Ty, GlobalizedRecords.back().Loc); CGF.EmitRuntimeCall( createNVPTXRuntimeFunction( OMPRTL_NVPTX__kmpc_restore_team_static_memory), - llvm::ConstantInt::getNullValue(CGM.Int16Ty)); + IsInSharedMemory); } } else { CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( @@ -4507,21 +4513,24 @@ static std::pair<unsigned, unsigned> get void CGOpenMPRuntimeNVPTX::clear() { if (!GlobalizedRecords.empty()) { ASTContext &C = CGM.getContext(); + llvm::SmallVector<const GlobalPtrSizeRecsTy *, 4> GlobalRecs; + llvm::SmallVector<const GlobalPtrSizeRecsTy *, 4> SharedRecs; RecordDecl *StaticRD = C.buildImplicitRecord( "_openmp_static_memory_type_$_", RecordDecl::TagKind::TTK_Union); StaticRD->startDefinition(); + RecordDecl *SharedStaticRD = C.buildImplicitRecord( + "_shared_openmp_static_memory_type_$_", RecordDecl::TagKind::TTK_Union); + SharedStaticRD->startDefinition(); for (const GlobalPtrSizeRecsTy &Records : GlobalizedRecords) { if (Records.Records.empty()) continue; unsigned Size = 0; unsigned RecAlignment = 0; for (const RecordDecl *RD : Records.Records) { - QualType RDTy = CGM.getContext().getRecordType(RD); - unsigned Alignment = - CGM.getContext().getTypeAlignInChars(RDTy).getQuantity(); + QualType RDTy = C.getRecordType(RD); + unsigned Alignment = C.getTypeAlignInChars(RDTy).getQuantity(); RecAlignment = std::max(RecAlignment, Alignment); - unsigned RecSize = - CGM.getContext().getTypeSizeInChars(RDTy).getQuantity(); + unsigned RecSize = C.getTypeSizeInChars(RDTy).getQuantity(); Size = llvm::alignTo(llvm::alignTo(Size, Alignment) + RecSize, Alignment); } @@ -4529,32 +4538,67 @@ void CGOpenMPRuntimeNVPTX::clear() { llvm::APInt ArySize(/*numBits=*/64, Size); QualType SubTy = C.getConstantArrayType( C.CharTy, ArySize, ArrayType::Normal, /*IndexTypeQuals=*/0); - auto *Field = FieldDecl::Create( - C, StaticRD, SourceLocation(), SourceLocation(), nullptr, SubTy, - C.getTrivialTypeSourceInfo(SubTy, SourceLocation()), - /*BW=*/nullptr, /*Mutable=*/false, - /*InitStyle=*/ICIS_NoInit); + const bool UseSharedMemory = Size <= SharedMemorySize; + auto *Field = + FieldDecl::Create(C, UseSharedMemory ? SharedStaticRD : StaticRD, + SourceLocation(), SourceLocation(), nullptr, SubTy, + C.getTrivialTypeSourceInfo(SubTy, SourceLocation()), + /*BW=*/nullptr, /*Mutable=*/false, + /*InitStyle=*/ICIS_NoInit); Field->setAccess(AS_public); - StaticRD->addDecl(Field); + if (UseSharedMemory) { + SharedStaticRD->addDecl(Field); + SharedRecs.push_back(&Records); + } else { + StaticRD->addDecl(Field); + GlobalRecs.push_back(&Records); + } Records.RecSize->setInitializer(llvm::ConstantInt::get(CGM.SizeTy, Size)); + Records.UseSharedMemory->setInitializer( + llvm::ConstantInt::get(CGM.Int16Ty, UseSharedMemory ? 1 : 0)); + } + SharedStaticRD->completeDefinition(); + if (!SharedStaticRD->field_empty()) { + QualType StaticTy = C.getRecordType(SharedStaticRD); + llvm::Type *LLVMStaticTy = CGM.getTypes().ConvertTypeForMem(StaticTy); + auto *GV = new llvm::GlobalVariable( + CGM.getModule(), LLVMStaticTy, + /*isConstant=*/false, llvm::GlobalValue::WeakAnyLinkage, + llvm::Constant::getNullValue(LLVMStaticTy), + "_openmp_shared_static_glob_rd_$_", /*InsertBefore=*/nullptr, + llvm::GlobalValue::NotThreadLocal, + C.getTargetAddressSpace(LangAS::cuda_shared)); + auto *Replacement = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( + GV, CGM.VoidPtrTy); + for (const GlobalPtrSizeRecsTy *Rec : SharedRecs) { + Rec->Buffer->replaceAllUsesWith(Replacement); + Rec->Buffer->eraseFromParent(); + } } StaticRD->completeDefinition(); - QualType StaticTy = C.getRecordType(StaticRD); - std::pair<unsigned, unsigned> SMsBlockPerSM = getSMsBlocksPerSM(CGM); - llvm::APInt Size1(32, SMsBlockPerSM.second); - QualType Arr1Ty = C.getConstantArrayType(StaticTy, Size1, ArrayType::Normal, - /*IndexTypeQuals=*/0); - llvm::APInt Size2(32, SMsBlockPerSM.first); - QualType Arr2Ty = C.getConstantArrayType(Arr1Ty, Size2, ArrayType::Normal, - /*IndexTypeQuals=*/0); - llvm::Type *LLVMArr2Ty = CGM.getTypes().ConvertTypeForMem(Arr2Ty); - auto *GV = new llvm::GlobalVariable( - CGM.getModule(), LLVMArr2Ty, - /*isConstant=*/false, llvm::GlobalValue::WeakAnyLinkage, - llvm::Constant::getNullValue(LLVMArr2Ty), "_openmp_static_glob_rd_$_"); - StaticGlobalized->setInitializer( - llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, - CGM.VoidPtrTy)); + if (!StaticRD->field_empty()) { + QualType StaticTy = C.getRecordType(StaticRD); + std::pair<unsigned, unsigned> SMsBlockPerSM = getSMsBlocksPerSM(CGM); + llvm::APInt Size1(32, SMsBlockPerSM.second); + QualType Arr1Ty = + C.getConstantArrayType(StaticTy, Size1, ArrayType::Normal, + /*IndexTypeQuals=*/0); + llvm::APInt Size2(32, SMsBlockPerSM.first); + QualType Arr2Ty = C.getConstantArrayType(Arr1Ty, Size2, ArrayType::Normal, + /*IndexTypeQuals=*/0); + llvm::Type *LLVMArr2Ty = CGM.getTypes().ConvertTypeForMem(Arr2Ty); + auto *GV = new llvm::GlobalVariable( + CGM.getModule(), LLVMArr2Ty, + /*isConstant=*/false, llvm::GlobalValue::WeakAnyLinkage, + llvm::Constant::getNullValue(LLVMArr2Ty), + "_openmp_static_glob_rd_$_"); + auto *Replacement = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( + GV, CGM.VoidPtrTy); + for (const GlobalPtrSizeRecsTy *Rec : GlobalRecs) { + Rec->Buffer->replaceAllUsesWith(Replacement); + Rec->Buffer->eraseFromParent(); + } + } } CGOpenMPRuntime::clear(); } Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h?rev=346507&r1=346506&r2=346507&view=diff ============================================================================== --- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h (original) +++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h Fri Nov 9 08:18:04 2018 @@ -420,14 +420,14 @@ private: /// union. This resulting union (one per CU) is the entry point for the static /// memory management runtime functions. struct GlobalPtrSizeRecsTy { + llvm::GlobalVariable *UseSharedMemory = nullptr; llvm::GlobalVariable *RecSize = nullptr; + llvm::GlobalVariable *Buffer = nullptr; + SourceLocation Loc; llvm::SmallVector<const RecordDecl *, 2> Records; unsigned RegionCounter = 0; }; llvm::SmallVector<GlobalPtrSizeRecsTy, 8> GlobalizedRecords; - /// Global variable used for staticlly allocated global memoryused for - /// globalization in target/teams/distribute regions. - llvm::GlobalVariable *StaticGlobalized = nullptr; /// Shared pointer for the global memory in the global memory buffer used for /// the given kernel. llvm::GlobalVariable *KernelStaticGlobalized = nullptr; Modified: cfe/trunk/test/OpenMP/nvptx_data_sharing.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_data_sharing.cpp?rev=346507&r1=346506&r2=346507&view=diff ============================================================================== --- cfe/trunk/test/OpenMP/nvptx_data_sharing.cpp (original) +++ cfe/trunk/test/OpenMP/nvptx_data_sharing.cpp Fri Nov 9 08:18:04 2018 @@ -27,10 +27,10 @@ void test_ds(){ } } // CK1: [[MEM_TY:%.+]] = type { [8 x i8] } -// CK1-DAG: [[GLOBAL_RD:@.+]] = weak global [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]] zeroinitializer -// CK1-DAG: [[GLOBAL_RD_PTR:@.+]] = weak unnamed_addr constant i8* getelementptr inbounds ([{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]], [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]]* [[GLOBAL_RD]], i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0) +// CK1-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] zeroinitializer // CK1-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null // CK1-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i64 8 +// CK1-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1 /// ========= In the worker function ========= /// // CK1: {{.*}}define internal void @__omp_offloading{{.*}}test_ds{{.*}}_worker() @@ -44,9 +44,9 @@ void test_ds(){ // CK1: [[SHAREDARGS2:%.+]] = alloca i8** // CK1: call void @__kmpc_kernel_init // CK1: call void @__kmpc_data_sharing_init_stack -// CK1: [[GLOBAL_RD:%.+]] = load i8*, i8** [[GLOBAL_RD_PTR]], +// CK1: [[SHARED_MEM_FLAG:%.+]] = load i16, i16* [[KERNEL_SHARED]], // CK1: [[SIZE:%.+]] = load i64, i64* [[KERNEL_SIZE]], -// CK1: call void @__kmpc_get_team_static_memory(i8* [[GLOBAL_RD]], i64 [[SIZE]], i16 0, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**)) +// CK1: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i64 [[SIZE]], i16 [[SHARED_MEM_FLAG]], i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**)) // CK1: [[KERNEL_RD:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]], // CK1: [[GLOBALSTACK:%.+]] = getelementptr inbounds i8, i8* [[KERNEL_RD]], i64 0 // CK1: [[GLOBALSTACK2:%.+]] = bitcast i8* [[GLOBALSTACK]] to %struct._globalized_locals_ty* @@ -75,7 +75,8 @@ void test_ds(){ // CK1: call void @llvm.nvvm.barrier0() // CK1: call void @llvm.nvvm.barrier0() // CK1: call void @__kmpc_end_sharing_variables() -// CK1: call void @__kmpc_restore_team_static_memory(i16 0) +// CK1: [[SHARED_MEM_FLAG:%.+]] = load i16, i16* [[KERNEL_SHARED]], +// CK1: call void @__kmpc_restore_team_static_memory(i16 [[SHARED_MEM_FLAG]]) // CK1: call void @__kmpc_kernel_deinit(i16 1) /// ========= In the data sharing wrapper function ========= /// Modified: cfe/trunk/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp?rev=346507&r1=346506&r2=346507&view=diff ============================================================================== --- cfe/trunk/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp (original) +++ cfe/trunk/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp Fri Nov 9 08:18:04 2018 @@ -22,8 +22,7 @@ int main(int argc, char **argv) { } // CHECK: [[MEM_TY:%.+]] = type { [84 x i8] } -// CHECK-DAG: [[GLOBAL_RD:@.+]] = weak global [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]] zeroinitializer -// CHECK-DAG: [[GLOBAL_RD_PTR:@.+]] = weak unnamed_addr constant i8* getelementptr inbounds ([{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]], [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]]* [[GLOBAL_RD]], i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0) +// CHECK-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] zeroinitializer // CHECK-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null // CHECK-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 84 // CHECK-DAG: @__omp_offloading_{{.*}}_main_l17_exec_mode = weak constant i8 1 @@ -31,8 +30,7 @@ int main(int argc, char **argv) { // CHECK-LABEL: define internal void @__omp_offloading_{{.*}}_main_l17_worker( // CHECK: define weak void @__omp_offloading_{{.*}}_main_l17([10 x i32]* dereferenceable(40) %{{.+}}, [10 x i32]* dereferenceable(40) %{{.+}}, i32* dereferenceable(4) %{{.+}}, i{{64|32}} %{{.+}}, [10 x i32]* dereferenceable(40) %{{.+}}) -// CHECK: [[GLOBAL_RD:%.+]] = load i8*, i8** [[GLOBAL_RD_PTR]], -// CHECK: call void @__kmpc_get_team_static_memory(i8* [[GLOBAL_RD]], i{{64|32}} 84, i16 0, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**)) +// CHECK: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} 84, i16 1, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**)) // CHECK: [[PTR:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]], // CHECK: [[STACK:%.+]] = bitcast i8* [[PTR]] to %struct._globalized_locals_ty* // CHECK: [[ARGC:%.+]] = load i32, i32* %{{.+}}, align @@ -48,7 +46,7 @@ int main(int argc, char **argv) { // CHECK: call void @__kmpc_for_static_fini(%struct.ident_t* @ -// CHECK: call void @__kmpc_restore_team_static_memory(i16 0) +// CHECK: call void @__kmpc_restore_team_static_memory(i16 1) // CHECK: define internal void [[PARALLEL]]( // CHECK-NOT: call i8* @__kmpc_data_sharing_push_stack( Modified: cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp?rev=346507&r1=346506&r2=346507&view=diff ============================================================================== --- cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp (original) +++ cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp Fri Nov 9 08:18:04 2018 @@ -72,10 +72,10 @@ int bar(int n){ } // CHECK: [[MEM_TY:%.+]] = type { [4 x i8] } -// CHECK-DAG: [[GLOBAL_RD:@.+]] = weak global [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]] zeroinitializer -// CHECK-DAG: [[GLOBAL_RD_PTR:@.+]] = weak unnamed_addr constant i8* getelementptr inbounds ([{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]], [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]]* [[GLOBAL_RD]], i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0) +// CHECK-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] zeroinitializer // CHECK-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null // CHECK-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 4 +// CHECK-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1 // CHECK-NOT: define {{.*}}void {{@__omp_offloading_.+template.+l17}}_worker() @@ -324,9 +324,9 @@ int bar(int n){ // CHECK-32: [[A_ADDR:%.+]] = alloca i32, // CHECK-64: [[A_ADDR:%.+]] = alloca i64, // CHECK-64: [[CONV:%.+]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK: [[GLOBAL_RD:%.+]] = load i8*, i8** [[GLOBAL_RD_PTR]], +// CHECK: [[IS_SHARED:%.+]] = load i16, i16* [[KERNEL_SHARED]], // CHECK: [[SIZE:%.+]] = load i{{64|32}}, i{{64|32}}* [[KERNEL_SIZE]], -// CHECK: call void @__kmpc_get_team_static_memory(i8* [[GLOBAL_RD]], i{{64|32}} [[SIZE]], i16 0, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**)) +// CHECK: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} [[SIZE]], i16 [[IS_SHARED]], i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**)) // CHECK: [[KERNEL_RD:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]], // CHECK: [[STACK:%.+]] = getelementptr inbounds i8, i8* [[KERNEL_RD]], i{{64|32}} 0 // CHECK: [[BC:%.+]] = bitcast i8* [[STACK]] to %struct._globalized_locals_ty* @@ -334,7 +334,8 @@ int bar(int n){ // CHECK-64: [[A:%.+]] = load i32, i32* [[CONV]], // CHECK: [[GLOBAL_A_ADDR:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[BC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: store i32 [[A]], i32* [[GLOBAL_A_ADDR]], -// CHECK: call void @__kmpc_restore_team_static_memory(i16 0) +// CHECK: [[IS_SHARED:%.+]] = load i16, i16* [[KERNEL_SHARED]], +// CHECK: call void @__kmpc_restore_team_static_memory(i16 [[IS_SHARED]]) // CHECK-LABEL: define internal void @{{.+}}(i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable{{.*}}) // CHECK: [[CC:%.+]] = alloca i32, Modified: cfe/trunk/test/OpenMP/nvptx_parallel_for_codegen.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_parallel_for_codegen.cpp?rev=346507&r1=346506&r2=346507&view=diff ============================================================================== --- cfe/trunk/test/OpenMP/nvptx_parallel_for_codegen.cpp (original) +++ cfe/trunk/test/OpenMP/nvptx_parallel_for_codegen.cpp Fri Nov 9 08:18:04 2018 @@ -31,10 +31,10 @@ int bar(int n){ } // CHECK: [[MEM_TY:%.+]] = type { [4 x i8] } -// CHECK-DAG: [[GLOBAL_RD:@.+]] = weak global [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]] zeroinitializer -// CHECK-DAG: [[GLOBAL_RD_PTR:@.+]] = weak unnamed_addr constant i8* getelementptr inbounds ([{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]], [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]]* [[GLOBAL_RD]], i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0) +// CHECK-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] zeroinitializer // CHECK-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null // CHECK-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 4 +// CHECK-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1 // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l12}}_worker() // CHECK: call void @llvm.nvvm.barrier0() @@ -45,9 +45,9 @@ int bar(int n){ // CHECK: call void @__omp_offloading_{{.*}}l12_worker() // CHECK: call void @__kmpc_kernel_init( // CHECK: call void @__kmpc_data_sharing_init_stack() -// CHECK: [[GLOBAL_RD:%.+]] = load i8*, i8** [[GLOBAL_RD_PTR]], +// CHECK: [[IS_SHARED:%.+]] = load i16, i16* [[KERNEL_SHARED]], // CHECK: [[SIZE:%.+]] = load i{{64|32}}, i{{64|32}}* [[KERNEL_SIZE]], -// CHECK: call void @__kmpc_get_team_static_memory(i8* [[GLOBAL_RD]], i{{64|32}} [[SIZE]], i16 0, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**)) +// CHECK: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i64 %7, i16 %6, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**)) // CHECK: [[KERNEL_RD:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]], // CHECK: [[STACK:%.+]] = getelementptr inbounds i8, i8* [[KERNEL_RD]], i{{64|32}} 0 // CHECK: call void @__kmpc_kernel_prepare_parallel( @@ -55,7 +55,8 @@ int bar(int n){ // CHECK: call void @llvm.nvvm.barrier0() // CHECK: call void @llvm.nvvm.barrier0() // CHECK: call void @__kmpc_end_sharing_variables() -// CHECK: call void @__kmpc_restore_team_static_memory(i16 0) +// CHECK: [[IS_SHARED:%.+]] = load i16, i16* [[KERNEL_SHARED]], +// CHECK: call void @__kmpc_restore_team_static_memory(i16 [[IS_SHARED]]) // CHECK: call void @__kmpc_kernel_deinit(i16 1) // CHECK: define internal void @__omp_outlined__( Modified: cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp?rev=346507&r1=346506&r2=346507&view=diff ============================================================================== --- cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp (original) +++ cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp Fri Nov 9 08:18:04 2018 @@ -68,23 +68,22 @@ int bar(int n){ } // CHECK-DAG: [[MEM_TY:%.+]] = type { [4 x i8] } -// CHECK-DAG: [[GLOBAL_RD:@.+]] = weak global [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]] zeroinitializer -// CHECK-DAG: [[GLOBAL_RD_PTR:@.+]] = weak unnamed_addr constant i8* getelementptr inbounds ([{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]], [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]]* [[GLOBAL_RD]], i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0) +// CHECK-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] zeroinitializer // CHECK-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null // CHECK-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 4 +// CHECK-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1 // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l32( // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 1) -// CHECK: [[GLOBAL_RD:%.+]] = load i8*, i8** [[GLOBAL_RD_PTR]], -// CHECK: call void @__kmpc_get_team_static_memory(i8* [[GLOBAL_RD]], i{{64|32}} 4, i16 0, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**)) +// CHECK: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} 4, i16 1, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**)) // CHECK: [[TEAM_ALLOC:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]], // CHECK: [[BC:%.+]] = bitcast i8* [[TEAM_ALLOC]] to [[REC:%.+]]* // CHECK: getelementptr inbounds [[REC]], [[REC]]* [[BC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, // CHECK: {{call|invoke}} void [[OUTL1:@.+]]( // CHECK: call void @__kmpc_for_static_fini( -// CHECK: call void @__kmpc_restore_team_static_memory(i16 0) +// CHECK: call void @__kmpc_restore_team_static_memory(i16 1) // CHECK: call void @__kmpc_spmd_kernel_deinit() // CHECK: ret void Modified: cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp?rev=346507&r1=346506&r2=346507&view=diff ============================================================================== --- cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp (original) +++ cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp Fri Nov 9 08:18:04 2018 @@ -63,23 +63,22 @@ int bar(int n){ } // CHECK-DAG: [[MEM_TY:%.+]] = type { [4 x i8] } -// CHECK-DAG: [[GLOBAL_RD:@.+]] = weak global [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]] zeroinitializer -// CHECK-DAG: [[GLOBAL_RD_PTR:@.+]] = weak unnamed_addr constant i8* getelementptr inbounds ([{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]], [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]]* [[GLOBAL_RD]], i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0) +// CHECK-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] zeroinitializer // CHECK-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null // CHECK-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 4 +// CHECK-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1 // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l30( // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 1) -// CHECK: [[GLOBAL_RD:%.+]] = load i8*, i8** [[GLOBAL_RD_PTR]], -// CHECK: call void @__kmpc_get_team_static_memory(i8* [[GLOBAL_RD]], i{{64|32}} 4, i16 0, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**)) +// CHECK: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} 4, i16 1, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**)) // CHECK: [[TEAM_ALLOC:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]], // CHECK: [[BC:%.+]] = bitcast i8* [[TEAM_ALLOC]] to [[REC:%.+]]* // CHECK: getelementptr inbounds [[REC]], [[REC]]* [[BC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, // CHECK: {{call|invoke}} void [[OUTL1:@.+]]( // CHECK: call void @__kmpc_for_static_fini( -// CHECK: call void @__kmpc_restore_team_static_memory(i16 0) +// CHECK: call void @__kmpc_restore_team_static_memory(i16 1) // CHECK: call void @__kmpc_spmd_kernel_deinit() // CHECK: ret void Modified: cfe/trunk/test/OpenMP/nvptx_teams_codegen.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_teams_codegen.cpp?rev=346507&r1=346506&r2=346507&view=diff ============================================================================== --- cfe/trunk/test/OpenMP/nvptx_teams_codegen.cpp (original) +++ cfe/trunk/test/OpenMP/nvptx_teams_codegen.cpp Fri Nov 9 08:18:04 2018 @@ -28,11 +28,12 @@ int main (int argc, char **argv) { } // CK1: [[MEM_TY:%.+]] = type { [{{4|8}} x i8] } -// CK1-DAG: [[GLOBAL_RD:@.+]] = weak global [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]] zeroinitializer -// CK1-DAG: [[GLOBAL_RD_PTR:@.+]] = weak unnamed_addr constant i8* getelementptr inbounds ([{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]], [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]]* [[GLOBAL_RD]], i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0) +// CK1-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] zeroinitializer // CK1-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null // CK1-DAG: [[KERNEL_SIZE1:@.+]] = internal unnamed_addr constant i{{64|32}} 4 // CK1-DAG: [[KERNEL_SIZE2:@.+]] = internal unnamed_addr constant i{{64|32}} {{8|4}} +// CK1-DAG: [[KERNEL_SHARED1:@.+]] = internal unnamed_addr constant i16 1 +// CK1-DAG: [[KERNEL_SHARED2:@.+]] = internal unnamed_addr constant i16 1 // only nvptx side: do not outline teams region and do not call fork_teams // CK1: define {{.*}}void @{{[^,]+}}(i{{[0-9]+}} [[ARGC:%.+]]) @@ -43,9 +44,9 @@ int main (int argc, char **argv) { // CK1: store {{.+}} 0, {{.+}}, // CK1: store i{{[0-9]+}} [[ARGC]], i{{[0-9]+}}* [[ARGCADDR]], // CK1-64: [[CONV:%.+]] = bitcast i{{[0-9]+}}* [[ARGCADDR]] to i{{[0-9]+}}* -// CK1: [[GLOBAL_RD:%.+]] = load i8*, i8** [[GLOBAL_RD_PTR]], +// CK1: [[IS_SHARED:%.+]] = load i16, i16* [[KERNEL_SHARED1]], // CK1: [[SIZE:%.+]] = load i{{64|32}}, i{{64|32}}* [[KERNEL_SIZE1]], -// CK1: call void @__kmpc_get_team_static_memory(i8* [[GLOBAL_RD]], i{{64|32}} [[SIZE]], i16 0, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**)) +// CK1: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} [[SIZE]], i16 [[IS_SHARED]], i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**)) // CK1: [[KERNEL_RD:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]], // CK1: [[GLOBALSTACK:%.+]] = getelementptr inbounds i8, i8* [[KERNEL_RD]], i{{64|32}} 0 // CK1-64: [[ARG:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[CONV]] @@ -64,9 +65,9 @@ int main (int argc, char **argv) { // CK1: [[ARGCADDR_PTR:%.+]] = alloca i{{.+}}***, // CK1: [[ARGCADDR:%.+]] = alloca i{{.+}}**, // CK1: store i{{.+}}** [[ARGC]], i{{.+}}*** [[ARGCADDR]] -// CK1: [[GLOBAL_RD:%.+]] = load i8*, i8** [[GLOBAL_RD_PTR]], +// CK1: [[IS_SHARED:%.+]] = load i16, i16* [[KERNEL_SHARED2]], // CK1: [[SIZE:%.+]] = load i{{64|32}}, i{{64|32}}* [[KERNEL_SIZE2]], -// CK1: call void @__kmpc_get_team_static_memory(i8* [[GLOBAL_RD]], i{{64|32}} [[SIZE]], i16 0, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**)) +// CK1: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} [[SIZE]], i16 [[IS_SHARED]], i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**)) // CK1: [[KERNEL_RD:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]], // CK1: [[GLOBALSTACK:%.+]] = getelementptr inbounds i8, i8* [[KERNEL_RD]], i{{64|32}} 0 // CK1: [[ARG:%.+]] = load i{{[0-9]+}}**, i{{[0-9]+}}*** [[ARGCADDR]] @@ -114,11 +115,12 @@ int main (int argc, char **argv) { } // CK2: [[MEM_TY:%.+]] = type { [{{4|8}} x i8] } -// CK2-DAG: [[GLOBAL_RD:@.+]] = weak global [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]] zeroinitializer -// CK2-DAG: [[GLOBAL_RD_PTR:@.+]] = weak unnamed_addr constant i8* getelementptr inbounds ([{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]], [{{[0-9]+}} x [{{[0-9]+}} x [[MEM_TY]]]]* [[GLOBAL_RD]], i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0, i{{[0-9]+}} 0) +// CK2-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] zeroinitializer // CK2-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null // CK2-DAG: [[KERNEL_SIZE1:@.+]] = internal unnamed_addr constant i{{64|32}} 4 // CK2-DAG: [[KERNEL_SIZE2:@.+]] = internal unnamed_addr constant i{{64|32}} {{8|4}} +// CK2-DAG: [[KERNEL_SHARED1:@.+]] = internal unnamed_addr constant i16 1 +// CK2-DAG: [[KERNEL_SHARED2:@.+]] = internal unnamed_addr constant i16 1 // CK2: define {{.*}}void @{{[^,]+}}(i{{[0-9]+}} [[A_IN:%.+]], i{{[0-9]+}} [[B_IN:%.+]], i{{[0-9]+}} [[ARGC_IN:.+]]) // CK2: {{.}} = alloca i{{[0-9]+}}*, @@ -133,9 +135,9 @@ int main (int argc, char **argv) { // CK2-64: [[ACONV:%.+]] = bitcast i64* [[AADDR]] to i32* // CK2-64: [[BCONV:%.+]] = bitcast i64* [[BADDR]] to i32* // CK2-64: [[CONV:%.+]] = bitcast i64* [[ARGCADDR]] to i32* -// CK2: [[GLOBAL_RD:%.+]] = load i8*, i8** [[GLOBAL_RD_PTR]], +// CK2: [[IS_SHARED:%.+]] = load i16, i16* [[KERNEL_SHARED1]], // CK2: [[SIZE:%.+]] = load i{{64|32}}, i{{64|32}}* [[KERNEL_SIZE1]], -// CK2: call void @__kmpc_get_team_static_memory(i8* [[GLOBAL_RD]], i{{64|32}} [[SIZE]], i16 0, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**)) +// CK2: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} [[SIZE]], i16 [[IS_SHARED]], i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**)) // CK2: [[KERNEL_RD:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]], // CK2: [[GLOBALSTACK:%.+]] = getelementptr inbounds i8, i8* [[KERNEL_RD]], i{{64|32}} 0 // CK2-64: [[ARG:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[CONV]] @@ -158,9 +160,9 @@ int main (int argc, char **argv) { // CK2: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[AADDR]], // CK2: store i{{[0-9]+}} [[B_IN]], i{{[0-9]+}}* [[BADDR]], // CK2: store i{{[0-9]+}}** [[ARGC]], i{{[0-9]+}}*** [[ARGCADDR]], -// CK2: [[GLOBAL_RD:%.+]] = load i8*, i8** [[GLOBAL_RD_PTR]], +// CK2: [[IS_SHARED:%.+]] = load i16, i16* [[KERNEL_SHARED2]], // CK2: [[SIZE:%.+]] = load i{{64|32}}, i{{64|32}}* [[KERNEL_SIZE2]], -// CK2: call void @__kmpc_get_team_static_memory(i8* [[GLOBAL_RD]], i{{64|32}} [[SIZE]], i16 0, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**)) +// CK2: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} [[SIZE]], i16 [[IS_SHARED]], i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**)) // CK2: [[KERNEL_RD:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]], // CK2: [[GLOBALSTACK:%.+]] = getelementptr inbounds i8, i8* [[KERNEL_RD]], i{{64|32}} 0 // CK2: [[ARG:%.+]] = load i{{[0-9]+}}**, i{{[0-9]+}}*** [[ARGCADDR]] _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits