Hello Alexey, It looks like this commit broke tests on one of our builders. This failure did not manifest, but masked by another build failures.
Please have a look? Thanks Galina http://lab.llvm.org:8011/builders/llvm-clang-x86_64-expensive-checks-win/builds/13262 . . . Failing Tests (10): Clang :: OpenMP/declare_target_codegen_globalization.cpp Clang :: OpenMP/nvptx_SPMD_codegen.cpp Clang :: OpenMP/nvptx_data_sharing.cpp Clang :: OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp Clang :: OpenMP/nvptx_force_full_runtime_SPMD_codegen.cpp Clang :: OpenMP/nvptx_parallel_codegen.cpp Clang :: OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp Clang :: OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp Clang :: OpenMP/nvptx_teams_codegen.cpp Clang :: OpenMP/nvptx_teams_reduction_codegen.cpp On Fri, Oct 12, 2018 at 9:06 AM Alexey Bataev via cfe-commits < cfe-commits@lists.llvm.org> wrote: > Author: abataev > Date: Fri Oct 12 09:04:20 2018 > New Revision: 344356 > > URL: http://llvm.org/viewvc/llvm-project?rev=344356&view=rev > Log: > [OPENMP][NVPTX]Reduce memory usage in orphaned functions. > > if the function has globalized variables and called in context of > target/teams/distribute regions, it does not need to globalize 32 > copies of the same variables for memory coalescing, it is enough to > have just one copy, because there is parallel region. > Patch does this by adding call for `__kmpc_parallel_level` function and > checking its return value. If the code sees that the parallel level is > 0, then only one variable is allocated, not 32. > > Modified: > cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp > cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h > cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp > > Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp?rev=344356&r1=344355&r2=344356&view=diff > > ============================================================================== > --- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp (original) > +++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp Fri Oct 12 09:04:20 2018 > @@ -1972,6 +1972,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa > return; > if (const RecordDecl *GlobalizedVarsRecord = > I->getSecond().GlobalRecord) { > QualType GlobalRecTy = > CGM.getContext().getRecordType(GlobalizedVarsRecord); > + QualType SecGlobalRecTy; > > // Recover pointer to this function's global record. The runtime will > // handle the specifics of the allocation of the memory. > @@ -1986,11 +1987,20 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa > llvm::PointerType *GlobalRecPtrTy = > CGF.ConvertTypeForMem(GlobalRecTy)->getPointerTo(); > llvm::Value *GlobalRecCastAddr; > + llvm::Value *IsTTD = nullptr; > if (WithSPMDCheck || > getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown) { > llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit"); > llvm::BasicBlock *SPMDBB = CGF.createBasicBlock(".spmd"); > llvm::BasicBlock *NonSPMDBB = CGF.createBasicBlock(".non-spmd"); > + if (I->getSecond().SecondaryGlobalRecord.hasValue()) { > + llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); > + llvm::Value *ThreadID = getThreadID(CGF, Loc); > + llvm::Value *PL = CGF.EmitRuntimeCall( > + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level), > + {RTLoc, ThreadID}); > + IsTTD = Bld.CreateIsNull(PL); > + } > llvm::Value *IsSPMD = > Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall( > > createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode))); > Bld.CreateCondBr(IsSPMD, SPMDBB, NonSPMDBB); > @@ -2003,11 +2013,28 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa > // There is no need to emit line number for unconditional branch. > (void)ApplyDebugLocation::CreateEmpty(CGF); > CGF.EmitBlock(NonSPMDBB); > + llvm::Value *Size = llvm::ConstantInt::get(CGM.SizeTy, > GlobalRecordSize); > + if (const RecordDecl *SecGlobalizedVarsRecord = > + I->getSecond().SecondaryGlobalRecord.getValueOr(nullptr)) { > + SecGlobalRecTy = > + CGM.getContext().getRecordType(SecGlobalizedVarsRecord); > + > + // Recover pointer to this function's global record. The runtime > will > + // handle the specifics of the allocation of the memory. > + // Use actual memory size of the record including the padding > + // for alignment purposes. > + unsigned Alignment = > + > CGM.getContext().getTypeAlignInChars(SecGlobalRecTy).getQuantity(); > + unsigned GlobalRecordSize = > + > CGM.getContext().getTypeSizeInChars(SecGlobalRecTy).getQuantity(); > + GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment); > + Size = Bld.CreateSelect( > + IsTTD, llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize), > Size); > + } > // TODO: allow the usage of shared memory to be controlled by > // the user, for now, default to global. > llvm::Value *GlobalRecordSizeArg[] = { > - llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize), > - CGF.Builder.getInt16(/*UseSharedMemory=*/0)}; > + Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)}; > llvm::Value *GlobalRecValue = > CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( > > OMPRTL_NVPTX__kmpc_data_sharing_push_stack), > @@ -2042,6 +2069,17 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa > > // Emit the "global alloca" which is a GEP from the global declaration > // record using the pointer returned by the runtime. > + LValue SecBase; > + decltype(I->getSecond().LocalVarData)::const_iterator SecIt; > + if (IsTTD) { > + SecIt = I->getSecond().SecondaryLocalVarData->begin(); > + llvm::PointerType *SecGlobalRecPtrTy = > + CGF.ConvertTypeForMem(SecGlobalRecTy)->getPointerTo(); > + SecBase = CGF.MakeNaturalAlignPointeeAddrLValue( > + Bld.CreatePointerBitCastOrAddrSpaceCast( > + I->getSecond().GlobalRecordAddr, SecGlobalRecPtrTy), > + SecGlobalRecTy); > + } > for (auto &Rec : I->getSecond().LocalVarData) { > bool EscapedParam = > I->getSecond().EscapedParameters.count(Rec.first); > llvm::Value *ParValue; > @@ -2055,23 +2093,32 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa > // Emit VarAddr basing on lane-id if required. > QualType VarTy; > if (Rec.second.IsOnePerTeam) { > - Rec.second.PrivateAddr = VarAddr.getAddress(); > VarTy = Rec.second.FD->getType(); > } else { > llvm::Value *Ptr = CGF.Builder.CreateInBoundsGEP( > VarAddr.getAddress().getPointer(), > {Bld.getInt32(0), getNVPTXLaneID(CGF)}); > - Rec.second.PrivateAddr = > - Address(Ptr, CGM.getContext().getDeclAlign(Rec.first)); > VarTy = > > Rec.second.FD->getType()->castAsArrayTypeUnsafe()->getElementType(); > - VarAddr = CGF.MakeAddrLValue(Rec.second.PrivateAddr, VarTy, > - AlignmentSource::Decl); > + VarAddr = CGF.MakeAddrLValue( > + Address(Ptr, CGM.getContext().getDeclAlign(Rec.first)), VarTy, > + AlignmentSource::Decl); > } > + Rec.second.PrivateAddr = VarAddr.getAddress(); > if (WithSPMDCheck || > - getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown) { > + getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown) { > assert(I->getSecond().IsInSPMDModeFlag && > "Expected unknown execution mode or required SPMD check."); > + if (IsTTD) { > + assert(SecIt->second.IsOnePerTeam && > + "Secondary glob data must be one per team."); > + LValue SecVarAddr = CGF.EmitLValueForField(SecBase, > SecIt->second.FD); > + VarAddr.setAddress( > + Address(Bld.CreateSelect(IsTTD, SecVarAddr.getPointer(), > + VarAddr.getPointer()), > + VarAddr.getAlignment())); > + Rec.second.PrivateAddr = VarAddr.getAddress(); > + } > Address GlobalPtr = Rec.second.PrivateAddr; > Address LocalAddr = CGF.CreateMemTemp(VarTy, > Rec.second.FD->getName()); > Rec.second.PrivateAddr = Address( > @@ -2084,6 +2131,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa > CGF.EmitStoreOfScalar(ParValue, VarAddr); > I->getSecond().MappedParams->setVarAddr(CGF, VD, > VarAddr.getAddress()); > } > + ++SecIt; > } > } > for (const ValueDecl *VD : I->getSecond().EscapedVariableLengthDecls) { > @@ -4115,6 +4163,21 @@ void CGOpenMPRuntimeNVPTX::emitFunctionP > Data.insert( > std::make_pair(VD, MappedVarData(FD, > IsInTargetMasterThreadRegion))); > } > + if (!IsInTargetMasterThreadRegion && !NeedToDelayGlobalization && > + !IsInParallelRegion) { > + CheckVarsEscapingDeclContext VarChecker(CGF); > + VarChecker.Visit(Body); > + I->getSecond().SecondaryGlobalRecord = > + > VarChecker.getGlobalizedRecord(/*IsInTargetMasterThreadRegion=*/true); > + I->getSecond().SecondaryLocalVarData.emplace(); > + DeclToAddrMapTy &Data = > I->getSecond().SecondaryLocalVarData.getValue(); > + for (const ValueDecl *VD : VarChecker.getEscapedDecls()) { > + assert(VD->isCanonicalDecl() && "Expected canonical declaration"); > + const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD); > + Data.insert(std::make_pair( > + VD, MappedVarData(FD, /*IsInTargetMasterThreadRegion=*/true))); > + } > + } > if (!NeedToDelayGlobalization) { > emitGenericVarsProlog(CGF, D->getBeginLoc(), /*WithSPMDCheck=*/true); > struct GlobalizationScope final : EHScopeStack::Cleanup { > > Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h?rev=344356&r1=344355&r2=344356&view=diff > > ============================================================================== > --- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h (original) > +++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h Fri Oct 12 09:04:20 2018 > @@ -376,7 +376,7 @@ private: > /// The data for the single globalized variable. > struct MappedVarData { > /// Corresponding field in the global record. > - const FieldDecl * FD = nullptr; > + const FieldDecl *FD = nullptr; > /// Corresponding address. > Address PrivateAddr = Address::invalid(); > /// true, if only one element is required (for latprivates in SPMD > mode), > @@ -392,10 +392,12 @@ private: > using EscapedParamsTy = llvm::SmallPtrSet<const Decl *, 4>; > struct FunctionData { > DeclToAddrMapTy LocalVarData; > + llvm::Optional<DeclToAddrMapTy> SecondaryLocalVarData = llvm::None; > EscapedParamsTy EscapedParameters; > llvm::SmallVector<const ValueDecl*, 4> EscapedVariableLengthDecls; > llvm::SmallVector<llvm::Value *, 4> EscapedVariableLengthDeclsAddrs; > const RecordDecl *GlobalRecord = nullptr; > + llvm::Optional<const RecordDecl *> SecondaryGlobalRecord = llvm::None; > llvm::Value *GlobalRecordAddr = nullptr; > llvm::Value *IsInSPMDModeFlag = nullptr; > std::unique_ptr<CodeGenFunction::OMPMapVars> MappedParams; > > Modified: cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp?rev=344356&r1=344355&r2=344356&view=diff > > ============================================================================== > --- cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp (original) > +++ cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp Fri Oct 12 09:04:20 2018 > @@ -557,20 +557,26 @@ int baz(int f, double &a) { > // CHECK: alloca i32, > // CHECK: [[LOCAL_F_PTR:%.+]] = alloca i32, > // CHECK: [[ZERO_ADDR:%.+]] = alloca i32, > - // CHECK: [[GTID:%.+]] = call i32 > @__kmpc_global_thread_num(%struct.ident_t* > // CHECK: store i32 0, i32* [[ZERO_ADDR]] > + // CHECK: [[GTID:%.+]] = call i32 > @__kmpc_global_thread_num(%struct.ident_t* > + // CHECK: [[PAR_LEVEL:%.+]] = call i16 > @__kmpc_parallel_level(%struct.ident_t* @0, i32 [[GTID]]) > + // CHECK: [[IS_TTD:%.+]] = icmp eq i16 %1, 0 > // CHECK: [[RES:%.+]] = call i8 @__kmpc_is_spmd_exec_mode() > // CHECK: [[IS_SPMD:%.+]] = icmp ne i8 [[RES]], 0 > // CHECK: br i1 [[IS_SPMD]], label > // CHECK: br label > - // CHECK: [[PTR:%.+]] = call i8* > @__kmpc_data_sharing_push_stack(i{{64|32}} 128, i16 0) > + // CHECK: [[SIZE:%.+]] = select i1 [[IS_TTD]], i{{64|32}} 4, i{{64|32}} > 128 > + // CHECK: [[PTR:%.+]] = call i8* > @__kmpc_data_sharing_push_stack(i{{64|32}} [[SIZE]], i16 0) > // CHECK: [[REC_ADDR:%.+]] = bitcast i8* [[PTR]] to [[GLOBAL_ST:%.+]]* > // CHECK: br label > // CHECK: [[ITEMS:%.+]] = phi [[GLOBAL_ST]]* [ null, {{.+}} ], [ > [[REC_ADDR]], {{.+}} ] > + // CHECK: [[TTD_ITEMS:%.+]] = bitcast [[GLOBAL_ST]]* [[ITEMS]] to > [[SEC_GLOBAL_ST:%.+]]* > // CHECK: [[F_PTR_ARR:%.+]] = getelementptr inbounds [[GLOBAL_ST]], > [[GLOBAL_ST]]* [[ITEMS]], i32 0, i32 0 > // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() > // CHECK: [[LID:%.+]] = and i32 [[TID]], 31 > - // CHECK: [[GLOBAL_F_PTR:%.+]] = getelementptr inbounds [32 x i32], [32 > x i32]* [[F_PTR_ARR]], i32 0, i32 [[LID]] > + // CHECK: [[GLOBAL_F_PTR_PAR:%.+]] = getelementptr inbounds [32 x i32], > [32 x i32]* [[F_PTR_ARR]], i32 0, i32 [[LID]] > + // CHECK: [[GLOBAL_F_PTR_TTD:%.+]] = getelementptr inbounds > [[SEC_GLOBAL_ST]], [[SEC_GLOBAL_ST]]* [[TTD_ITEMS]], i32 0, i32 0 > + // CHECK: [[GLOBAL_F_PTR:%.+]] = select i1 [[IS_TTD]], i32* > [[GLOBAL_F_PTR_TTD]], i32* [[GLOBAL_F_PTR_PAR]] > // CHECK: [[F_PTR:%.+]] = select i1 [[IS_SPMD]], i32* [[LOCAL_F_PTR]], > i32* [[GLOBAL_F_PTR]] > // CHECK: store i32 %{{.+}}, i32* [[F_PTR]], > > > > _______________________________________________ > cfe-commits mailing list > cfe-commits@lists.llvm.org > http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits >
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits