Author: Alexey Bataev Date: 2020-05-14T18:02:12-04:00 New Revision: 0363ae97abb841114e841a963c95eb6a2202716d
URL: https://github.com/llvm/llvm-project/commit/0363ae97abb841114e841a963c95eb6a2202716d DIFF: https://github.com/llvm/llvm-project/commit/0363ae97abb841114e841a963c95eb6a2202716d.diff LOG: [OPENMP50]Codegen for uses_allocators clause. Summary: Predefined allocators should not be mapped at all (they are just enumeric constants). FOr user-defined allocators need to map the traits only as firstprivates, the allocator itself is private. At the beginning of the target region the user-defined allocatores must be created and then destroyed at the end of the target region: ``` omp_allocator_handle_t my_allocator = __kmpc_init_allocator(<gtid>, /*default memhandle*/ 0, <number_of_traits>, &<traits>); ... call void @__kmpc_destroy_allocator(<gtid>, my_allocator); ``` Reviewers: jdoerfert, aaron.ballman Subscribers: jholewinski, yaxunl, guansong, cfe-commits, caomhin Tags: #clang Differential Revision: https://reviews.llvm.org/D79257 Added: clang/test/OpenMP/target_parallel_for_simd_uses_allocators_codegen.cpp clang/test/OpenMP/target_parallel_for_uses_allocators_codegen.cpp clang/test/OpenMP/target_parallel_uses_allocators_codegen.cpp clang/test/OpenMP/target_simd_uses_allocators_codegen.cpp clang/test/OpenMP/target_teams_distribute_parallel_for_simd_uses_allocators_codegen.cpp clang/test/OpenMP/target_teams_distribute_parallel_for_uses_allocators_codegen.cpp clang/test/OpenMP/target_teams_distribute_simd_uses_allocators_codegen.cpp clang/test/OpenMP/target_teams_distribute_uses_allocators_codegen.cpp clang/test/OpenMP/target_teams_uses_allocators_codegen.cpp clang/test/OpenMP/target_uses_allocators_codegen.cpp Modified: clang/lib/CodeGen/CGOpenMPRuntime.cpp clang/lib/CodeGen/CGOpenMPRuntime.h clang/lib/Sema/SemaOpenMP.cpp Removed: ################################################################################ diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index b7d05ed48e59..250893fef19d 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -720,6 +720,11 @@ enum OpenMPRTLFunction { OMPRTL__kmpc_alloc, // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al); OMPRTL__kmpc_free, + // Call to omp_allocator_handle_t __kmpc_init_allocator(int gtid, + // omp_memspace_handle_t, int ntraits, omp_alloctrait_t traits[]); + OMPRTL__kmpc_init_allocator, + // Call to void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t al); + OMPRTL__kmpc_destroy_allocator, // // Offloading related calls @@ -2392,6 +2397,26 @@ llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free"); break; } + case OMPRTL__kmpc_init_allocator: { + // Build omp_allocator_handle_t __kmpc_init_allocator(int gtid, + // omp_memspace_handle_t, int ntraits, omp_alloctrait_t traits[]); + // omp_allocator_handle_t type is void*, omp_memspace_handle_t type is + // void*. + auto *FnTy = llvm::FunctionType::get( + CGM.VoidPtrTy, {CGM.IntTy, CGM.VoidPtrTy, CGM.IntTy, CGM.VoidPtrTy}, + /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_init_allocator"); + break; + } + case OMPRTL__kmpc_destroy_allocator: { + // Build void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t al); + // omp_allocator_handle_t type is void*. + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, {CGM.IntTy, CGM.VoidPtrTy}, + /*isVarArg=*/false); + RTLFn = + CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_destroy_allocator"); + break; + } case OMPRTL__kmpc_push_target_tripcount: { // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 // size); @@ -7085,16 +7110,104 @@ void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, } } +namespace { +/// Cleanup action for uses_allocators support. +class OMPUsesAllocatorsActionTy final : public PrePostActionTy { + ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; + +public: + OMPUsesAllocatorsActionTy( + ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) + : Allocators(Allocators) {} + void Enter(CodeGenFunction &CGF) override { + if (!CGF.HaveInsertPoint()) + return; + for (const auto &AllocatorData : Allocators) { + CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( + CGF, AllocatorData.first, AllocatorData.second); + } + } + void Exit(CodeGenFunction &CGF) override { + if (!CGF.HaveInsertPoint()) + return; + for (const auto &AllocatorData : Allocators) { + CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, + AllocatorData.first); + } + } +}; +} // namespace + void CGOpenMPRuntime::emitTargetOutlinedFunction( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { assert(!ParentName.empty() && "Invalid target region parent name!"); HasEmittedTargetRegion = true; + SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; + for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { + for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { + const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); + if (!D.AllocatorTraits) + continue; + Allocators.emplace_back(D.Allocator, D.AllocatorTraits); + } + } + OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); + CodeGen.setAction(UsesAllocatorAction); emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); } +void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, + const Expr *Allocator, + const Expr *AllocatorTraits) { + llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); + ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); + // Use default memspace handle. + llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); + llvm::Value *NumTraits = llvm::ConstantInt::get( + CGF.IntTy, cast<ConstantArrayType>( + AllocatorTraits->getType()->getAsArrayTypeUnsafe()) + ->getSize() + .getLimitedValue()); + LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); + Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); + AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, + AllocatorTraitsLVal.getBaseInfo(), + AllocatorTraitsLVal.getTBAAInfo()); + llvm::Value *Traits = + CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); + + llvm::Value *AllocatorVal = + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_init_allocator), + {ThreadId, MemSpaceHandle, NumTraits, Traits}); + // Store to allocator. + CGF.EmitVarDecl(*cast<VarDecl>( + cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); + LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); + AllocatorVal = + CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, + Allocator->getType(), Allocator->getExprLoc()); + CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); +} + +void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, + const Expr *Allocator) { + llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); + ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); + LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); + llvm::Value *AllocatorVal = + CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); + AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), + CGF.getContext().VoidPtrTy, + Allocator->getExprLoc()); + (void)CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__kmpc_destroy_allocator), + {ThreadId, AllocatorVal}); +} + void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, @@ -8537,6 +8650,19 @@ class MappableExprsHandler { for (const auto *D : C->varlists()) FirstPrivateDecls.try_emplace( cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); + // Extract implicit firstprivates from uses_allocators clauses. + for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { + for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { + OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); + if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) + FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), + /*Implicit=*/true); + else if (const auto *VD = dyn_cast<VarDecl>( + cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) + ->getDecl())) + FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); + } + } // Extract device pointer clause information. for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) for (auto L : C->component_lists()) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index 7a6a06aaf4a6..0b1f81983aa3 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -1832,6 +1832,14 @@ class CGOpenMPRuntime { /// \param NewDepKind New dependency kind. void emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, OpenMPDependClauseKind NewDepKind, SourceLocation Loc); + + /// Initializes user defined allocators specified in the uses_allocators + /// clauses. + void emitUsesAllocatorsInit(CodeGenFunction &CGF, const Expr *Allocator, + const Expr *AllocatorTraits); + + /// Destroys user defined allocators specified in the uses_allocators clause. + void emitUsesAllocatorsFini(CodeGenFunction &CGF, const Expr *Allocator); }; /// Class supports emissionof SIMD-only code. diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index d5fdafd441af..544dc6134387 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -79,6 +79,15 @@ class DSAStackTy { llvm::SmallVector<std::pair<Expr *, OverloadedOperatorKind>, 4>; using DoacrossDependMapTy = llvm::DenseMap<OMPDependClause *, OperatorOffsetTy>; + /// Kind of the declaration used in the uses_allocators clauses. + enum class UsesAllocatorsDeclKind { + /// Predefined allocator + PredefinedAllocator, + /// User-defined allocator + UserDefinedAllocator, + /// The declaration that represent allocator trait + AllocatorTrait, + }; private: struct DSAInfo { @@ -170,7 +179,8 @@ class DSAStackTy { llvm::SmallVector<DeclRefExpr *, 4> DeclareTargetLinkVarDecls; /// List of decls used in inclusive/exclusive clauses of the scan directive. llvm::DenseSet<CanonicalDeclPtr<Decl>> UsedInScanDirective; - llvm::DenseSet<CanonicalDeclPtr<const Decl>> UsesAllocatorsDecls; + llvm::DenseMap<CanonicalDeclPtr<const Decl>, UsesAllocatorsDeclKind> + UsesAllocatorsDecls; SharingMapTy(OpenMPDirectiveKind DKind, DeclarationNameInfo Name, Scope *CurScope, SourceLocation Loc) : Directive(DKind), DirectiveName(Name), CurScope(CurScope), @@ -1019,16 +1029,25 @@ class DSAStackTy { } /// Marks decl as used in uses_allocators clause as the allocator. - void addUsesAllocatorsDecl(const Decl *D) { - getTopOfStack().UsesAllocatorsDecls.insert(D); + void addUsesAllocatorsDecl(const Decl *D, UsesAllocatorsDeclKind Kind) { + getTopOfStack().UsesAllocatorsDecls.try_emplace(D, Kind); } /// Checks if specified decl is used in uses allocator clause as the /// allocator. - bool isUsesAllocatorsDecl(unsigned Level, const Decl *D) const { - return getStackElemAtLevel(Level).UsesAllocatorsDecls.count(D) > 0; + Optional<UsesAllocatorsDeclKind> isUsesAllocatorsDecl(unsigned Level, + const Decl *D) const { + const SharingMapTy &StackElem = getTopOfStack(); + auto I = StackElem.UsesAllocatorsDecls.find(D); + if (I == StackElem.UsesAllocatorsDecls.end()) + return None; + return I->getSecond(); } - bool isUsesAllocatorsDecl(const Decl *D) const { - return getTopOfStack().UsesAllocatorsDecls.count(D) > 0; + Optional<UsesAllocatorsDeclKind> isUsesAllocatorsDecl(const Decl *D) const { + const SharingMapTy &StackElem = getTopOfStack(); + auto I = StackElem.UsesAllocatorsDecls.find(D); + if (I == StackElem.UsesAllocatorsDecls.end()) + return None; + return I->getSecond(); } }; @@ -2234,6 +2253,13 @@ OpenMPClauseKind Sema::isOpenMPPrivateDecl(ValueDecl *D, unsigned Level, D, [](OpenMPClauseKind K) { return K == OMPC_copyin; }, Level)) return OMPC_private; } + // User-defined allocators are private since they must be defined in the + // context of target region. + if (DSAStack->hasExplicitDirective(isOpenMPTargetExecutionDirective, Level) && + DSAStack->isUsesAllocatorsDecl(Level, D).getValueOr( + DSAStackTy::UsesAllocatorsDeclKind::AllocatorTrait) == + DSAStackTy::UsesAllocatorsDeclKind::UserDefinedAllocator) + return OMPC_private; return (DSAStack->hasExplicitDSA( D, [](OpenMPClauseKind K) { return K == OMPC_private; }, Level) || (DSAStack->isClauseParsingMode() && @@ -2556,7 +2582,7 @@ void Sema::EndOpenMPDSABlock(Stmt *CurDirective) { if (!DRE) continue; ValueDecl *VD = DRE->getDecl(); - if (!VD) + if (!VD || !isa<VarDecl>(VD)) continue; DSAStackTy::DSAVarData DVar = DSAStack->getTopDSA(VD, /*FromParent=*/false); @@ -3277,7 +3303,7 @@ class DSAAttrChecker final : public StmtVisitor<DSAAttrChecker, void> { !Stack->isImplicitTaskFirstprivate(VD)) return; // Skip allocators in uses_allocators clauses. - if (Stack->isUsesAllocatorsDecl(VD)) + if (Stack->isUsesAllocatorsDecl(VD).hasValue()) return; DSAStackTy::DSAVarData DVar = Stack->getTopDSA(VD, /*FromParent=*/false); @@ -4314,6 +4340,21 @@ StmtResult Sema::ActOnOpenMPRegionEnd(StmtResult S, } } } + if (ThisCaptureRegion == OMPD_target) { + // Capture allocator traits in the target region. They are used implicitly + // and, thus, are not captured by default. + for (OMPClause *C : Clauses) { + if (const auto *UAC = dyn_cast<OMPUsesAllocatorsClause>(C)) { + for (unsigned I = 0, End = UAC->getNumberOfAllocators(); I < End; + ++I) { + OMPUsesAllocatorsClause::Data D = UAC->getAllocatorData(I); + if (Expr *E = D.AllocatorTraits) + MarkDeclarationsReferencedInExpr(E); + } + continue; + } + } + } if (++CompletedRegions == CaptureRegions.size()) DSAStack->setBodyComplete(); SR = ActOnCapturedRegionEnd(SR.get()); @@ -4741,7 +4782,10 @@ class AllocatorChecker final : public ConstStmtVisitor<AllocatorChecker, bool> { public: bool VisitDeclRefExpr(const DeclRefExpr *E) { - return !S->isUsesAllocatorsDecl(E->getDecl()); + return S->isUsesAllocatorsDecl(E->getDecl()) + .getValueOr( + DSAStackTy::UsesAllocatorsDeclKind::AllocatorTrait) == + DSAStackTy::UsesAllocatorsDeclKind::AllocatorTrait; } bool VisitStmt(const Stmt *S) { for (const Stmt *Child : S->children()) { @@ -18632,8 +18676,7 @@ OMPClause *Sema::ActOnOpenMPUsesAllocatorClause( !findOMPAlloctraitT(*this, StartLoc, DSAStack)) return nullptr; llvm::SmallSet<CanonicalDeclPtr<Decl>, 4> PredefinedAllocators; - for (int I = OMPAllocateDeclAttr::OMPDefaultMemAlloc; - I < OMPAllocateDeclAttr::OMPUserDefinedMemAlloc; ++I) { + for (int I = 0; I < OMPAllocateDeclAttr::OMPUserDefinedMemAlloc; ++I) { auto AllocatorKind = static_cast<OMPAllocateDeclAttr::AllocatorTypeTy>(I); StringRef Allocator = OMPAllocateDeclAttr::ConvertAllocatorTypeTyToStr(AllocatorKind); @@ -18693,7 +18736,11 @@ OMPClause *Sema::ActOnOpenMPUsesAllocatorClause( // No allocator traits - just convert it to rvalue. if (!D.AllocatorTraits) AllocatorExpr = DefaultLvalueConversion(AllocatorExpr).get(); - DSAStack->addUsesAllocatorsDecl(DRE->getDecl()); + DSAStack->addUsesAllocatorsDecl( + DRE->getDecl(), + IsPredefinedAllocator + ? DSAStackTy::UsesAllocatorsDeclKind::PredefinedAllocator + : DSAStackTy::UsesAllocatorsDeclKind::UserDefinedAllocator); } Expr *AllocatorTraitsExpr = nullptr; if (D.AllocatorTraits) { @@ -18721,6 +18768,12 @@ OMPClause *Sema::ActOnOpenMPUsesAllocatorClause( << AllocatorTraitsExpr->getType(); continue; } + // Do not map by default allocator traits if it is a standalone + // variable. + if (auto *DRE = dyn_cast<DeclRefExpr>(AllocatorTraitsExpr)) + DSAStack->addUsesAllocatorsDecl( + DRE->getDecl(), + DSAStackTy::UsesAllocatorsDeclKind::AllocatorTrait); } } OMPUsesAllocatorsClause::Data &NewD = NewData.emplace_back(); diff --git a/clang/test/OpenMP/target_parallel_for_simd_uses_allocators_codegen.cpp b/clang/test/OpenMP/target_parallel_for_simd_uses_allocators_codegen.cpp new file mode 100644 index 000000000000..e7c3abee6be4 --- /dev/null +++ b/clang/test/OpenMP/target_parallel_for_simd_uses_allocators_codegen.cpp @@ -0,0 +1,94 @@ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +enum omp_allocator_handle_t { + omp_null_allocator = 0, + omp_default_mem_alloc = 1, + omp_large_cap_mem_alloc = 2, + omp_const_mem_alloc = 3, + omp_high_bw_mem_alloc = 4, + omp_low_lat_mem_alloc = 5, + omp_cgroup_mem_alloc = 6, + omp_pteam_mem_alloc = 7, + omp_thread_mem_alloc = 8, + KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__ +}; + +typedef enum omp_alloctrait_key_t { omp_atk_sync_hint = 1, + omp_atk_alignment = 2, + omp_atk_access = 3, + omp_atk_pool_size = 4, + omp_atk_fallback = 5, + omp_atk_fb_data = 6, + omp_atk_pinned = 7, + omp_atk_partition = 8 +} omp_alloctrait_key_t; +typedef enum omp_alloctrait_value_t { + omp_atv_false = 0, + omp_atv_true = 1, + omp_atv_default = 2, + omp_atv_contended = 3, + omp_atv_uncontended = 4, + omp_atv_sequential = 5, + omp_atv_private = 6, + omp_atv_all = 7, + omp_atv_thread = 8, + omp_atv_pteam = 9, + omp_atv_cgroup = 10, + omp_atv_default_mem_fb = 11, + omp_atv_null_fb = 12, + omp_atv_abort_fb = 13, + omp_atv_allocator_fb = 14, + omp_atv_environment = 15, + omp_atv_nearest = 16, + omp_atv_blocked = 17, + omp_atv_interleaved = 18 +} omp_alloctrait_value_t; + +typedef struct omp_alloctrait_t { + omp_alloctrait_key_t key; + __UINTPTR_TYPE__ value; +} omp_alloctrait_t; + +// Just map the traits variable as a firstprivate variable. +// CHECK-DAG: [[SIZES:@.+]] = private unnamed_addr constant [1 x i64] [i64 160] +// CHECK-DAG: [[MAPTYPES:@.+]] = private unnamed_addr constant [1 x i64] [i64 673] + +// CHECK: define {{.*}}[[FOO:@.+]]() +void foo() { + omp_alloctrait_t traits[10]; + omp_allocator_handle_t my_allocator; + +// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i32 1, i32 0) +// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0 +// CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]] +// CHECK: [[FAILED]]: +// CHECK: call void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* %{{[^,]+}}) +#pragma omp target parallel for simd uses_allocators(omp_null_allocator, omp_thread_mem_alloc, my_allocator(traits)) + for (int i = 0; i < 10; ++i) + ; +} + +// CHECK: define internal void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* {{.+}}) +// CHECK: [[TRAITS_ADDR_REF:%.+]] = alloca [10 x %struct.omp_alloctrait_t]*, +// CHECK: [[MY_ALLOCATOR_ADDR:%.+]] = alloca i64, +// CHECK: [[TRAITS_ADDR:%.+]] = load [10 x %struct.omp_alloctrait_t]*, [10 x %struct.omp_alloctrait_t]** [[TRAITS_ADDR_REF]], +// CHECK: [[TRAITS_ADDR_VOIDPTR:%.+]] = bitcast [10 x %struct.omp_alloctrait_t]* [[TRAITS_ADDR]] to i8** +// CHECK: [[TRAITS:%.+]] = load i8*, i8** [[TRAITS_ADDR_VOIDPTR]], +// CHECK: [[ALLOCATOR:%.+]] = call i8* @__kmpc_init_allocator(i32 %{{.+}}, i8* null, i32 10, i8* [[TRAITS]]) +// CHECK: [[CONV:%.+]] = ptrtoint i8* [[ALLOCATOR]] to i64 +// CHECK: store i64 [[CONV]], i64* [[MY_ALLOCATOR_ADDR]], + +// Destroy allocator upon exit from the region. +// CHECK: [[ALLOCATOR:%.+]] = load i64, i64* [[MY_ALLOCATOR_ADDR]], +// CHECK: [[CONV:%.+]] = inttoptr i64 [[ALLOCATOR]] to i8* +// CHECK: call void @__kmpc_destroy_allocator(i32 %{{.+}}, i8* [[CONV]]) +// CHECK: ret void + +#endif diff --git a/clang/test/OpenMP/target_parallel_for_uses_allocators_codegen.cpp b/clang/test/OpenMP/target_parallel_for_uses_allocators_codegen.cpp new file mode 100644 index 000000000000..f04b8108cd0e --- /dev/null +++ b/clang/test/OpenMP/target_parallel_for_uses_allocators_codegen.cpp @@ -0,0 +1,94 @@ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +enum omp_allocator_handle_t { + omp_null_allocator = 0, + omp_default_mem_alloc = 1, + omp_large_cap_mem_alloc = 2, + omp_const_mem_alloc = 3, + omp_high_bw_mem_alloc = 4, + omp_low_lat_mem_alloc = 5, + omp_cgroup_mem_alloc = 6, + omp_pteam_mem_alloc = 7, + omp_thread_mem_alloc = 8, + KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__ +}; + +typedef enum omp_alloctrait_key_t { omp_atk_sync_hint = 1, + omp_atk_alignment = 2, + omp_atk_access = 3, + omp_atk_pool_size = 4, + omp_atk_fallback = 5, + omp_atk_fb_data = 6, + omp_atk_pinned = 7, + omp_atk_partition = 8 +} omp_alloctrait_key_t; +typedef enum omp_alloctrait_value_t { + omp_atv_false = 0, + omp_atv_true = 1, + omp_atv_default = 2, + omp_atv_contended = 3, + omp_atv_uncontended = 4, + omp_atv_sequential = 5, + omp_atv_private = 6, + omp_atv_all = 7, + omp_atv_thread = 8, + omp_atv_pteam = 9, + omp_atv_cgroup = 10, + omp_atv_default_mem_fb = 11, + omp_atv_null_fb = 12, + omp_atv_abort_fb = 13, + omp_atv_allocator_fb = 14, + omp_atv_environment = 15, + omp_atv_nearest = 16, + omp_atv_blocked = 17, + omp_atv_interleaved = 18 +} omp_alloctrait_value_t; + +typedef struct omp_alloctrait_t { + omp_alloctrait_key_t key; + __UINTPTR_TYPE__ value; +} omp_alloctrait_t; + +// Just map the traits variable as a firstprivate variable. +// CHECK-DAG: [[SIZES:@.+]] = private unnamed_addr constant [1 x i64] [i64 160] +// CHECK-DAG: [[MAPTYPES:@.+]] = private unnamed_addr constant [1 x i64] [i64 673] + +// CHECK: define {{.*}}[[FOO:@.+]]() +void foo() { + omp_alloctrait_t traits[10]; + omp_allocator_handle_t my_allocator; + +// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i32 1, i32 0) +// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0 +// CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]] +// CHECK: [[FAILED]]: +// CHECK: call void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* %{{[^,]+}}) +#pragma omp target parallel for uses_allocators(omp_null_allocator, omp_thread_mem_alloc, my_allocator(traits)) + for (int i = 0; i < 10; ++i) + ; +} + +// CHECK: define internal void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* {{.+}}) +// CHECK: [[TRAITS_ADDR_REF:%.+]] = alloca [10 x %struct.omp_alloctrait_t]*, +// CHECK: [[MY_ALLOCATOR_ADDR:%.+]] = alloca i64, +// CHECK: [[TRAITS_ADDR:%.+]] = load [10 x %struct.omp_alloctrait_t]*, [10 x %struct.omp_alloctrait_t]** [[TRAITS_ADDR_REF]], +// CHECK: [[TRAITS_ADDR_VOIDPTR:%.+]] = bitcast [10 x %struct.omp_alloctrait_t]* [[TRAITS_ADDR]] to i8** +// CHECK: [[TRAITS:%.+]] = load i8*, i8** [[TRAITS_ADDR_VOIDPTR]], +// CHECK: [[ALLOCATOR:%.+]] = call i8* @__kmpc_init_allocator(i32 %{{.+}}, i8* null, i32 10, i8* [[TRAITS]]) +// CHECK: [[CONV:%.+]] = ptrtoint i8* [[ALLOCATOR]] to i64 +// CHECK: store i64 [[CONV]], i64* [[MY_ALLOCATOR_ADDR]], + +// Destroy allocator upon exit from the region. +// CHECK: [[ALLOCATOR:%.+]] = load i64, i64* [[MY_ALLOCATOR_ADDR]], +// CHECK: [[CONV:%.+]] = inttoptr i64 [[ALLOCATOR]] to i8* +// CHECK: call void @__kmpc_destroy_allocator(i32 %{{.+}}, i8* [[CONV]]) +// CHECK: ret void + +#endif diff --git a/clang/test/OpenMP/target_parallel_uses_allocators_codegen.cpp b/clang/test/OpenMP/target_parallel_uses_allocators_codegen.cpp new file mode 100644 index 000000000000..d98f76261caf --- /dev/null +++ b/clang/test/OpenMP/target_parallel_uses_allocators_codegen.cpp @@ -0,0 +1,93 @@ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +enum omp_allocator_handle_t { + omp_null_allocator = 0, + omp_default_mem_alloc = 1, + omp_large_cap_mem_alloc = 2, + omp_const_mem_alloc = 3, + omp_high_bw_mem_alloc = 4, + omp_low_lat_mem_alloc = 5, + omp_cgroup_mem_alloc = 6, + omp_pteam_mem_alloc = 7, + omp_thread_mem_alloc = 8, + KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__ +}; + +typedef enum omp_alloctrait_key_t { omp_atk_sync_hint = 1, + omp_atk_alignment = 2, + omp_atk_access = 3, + omp_atk_pool_size = 4, + omp_atk_fallback = 5, + omp_atk_fb_data = 6, + omp_atk_pinned = 7, + omp_atk_partition = 8 +} omp_alloctrait_key_t; +typedef enum omp_alloctrait_value_t { + omp_atv_false = 0, + omp_atv_true = 1, + omp_atv_default = 2, + omp_atv_contended = 3, + omp_atv_uncontended = 4, + omp_atv_sequential = 5, + omp_atv_private = 6, + omp_atv_all = 7, + omp_atv_thread = 8, + omp_atv_pteam = 9, + omp_atv_cgroup = 10, + omp_atv_default_mem_fb = 11, + omp_atv_null_fb = 12, + omp_atv_abort_fb = 13, + omp_atv_allocator_fb = 14, + omp_atv_environment = 15, + omp_atv_nearest = 16, + omp_atv_blocked = 17, + omp_atv_interleaved = 18 +} omp_alloctrait_value_t; + +typedef struct omp_alloctrait_t { + omp_alloctrait_key_t key; + __UINTPTR_TYPE__ value; +} omp_alloctrait_t; + +// Just map the traits variable as a firstprivate variable. +// CHECK-DAG: [[SIZES:@.+]] = private unnamed_addr constant [1 x i64] [i64 160] +// CHECK-DAG: [[MAPTYPES:@.+]] = private unnamed_addr constant [1 x i64] [i64 673] + +// CHECK: define {{.*}}[[FOO:@.+]]() +void foo() { + omp_alloctrait_t traits[10]; + omp_allocator_handle_t my_allocator; + +// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i32 1, i32 0) +// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0 +// CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]] +// CHECK: [[FAILED]]: +// CHECK: call void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* %{{[^,]+}}) +#pragma omp target parallel uses_allocators(omp_null_allocator, omp_thread_mem_alloc, my_allocator(traits)) + ; +} + +// CHECK: define internal void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* {{.+}}) +// CHECK: [[TRAITS_ADDR_REF:%.+]] = alloca [10 x %struct.omp_alloctrait_t]*, +// CHECK: [[MY_ALLOCATOR_ADDR:%.+]] = alloca i64, +// CHECK: [[TRAITS_ADDR:%.+]] = load [10 x %struct.omp_alloctrait_t]*, [10 x %struct.omp_alloctrait_t]** [[TRAITS_ADDR_REF]], +// CHECK: [[TRAITS_ADDR_VOIDPTR:%.+]] = bitcast [10 x %struct.omp_alloctrait_t]* [[TRAITS_ADDR]] to i8** +// CHECK: [[TRAITS:%.+]] = load i8*, i8** [[TRAITS_ADDR_VOIDPTR]], +// CHECK: [[ALLOCATOR:%.+]] = call i8* @__kmpc_init_allocator(i32 %{{.+}}, i8* null, i32 10, i8* [[TRAITS]]) +// CHECK: [[CONV:%.+]] = ptrtoint i8* [[ALLOCATOR]] to i64 +// CHECK: store i64 [[CONV]], i64* [[MY_ALLOCATOR_ADDR]], + +// Destroy allocator upon exit from the region. +// CHECK: [[ALLOCATOR:%.+]] = load i64, i64* [[MY_ALLOCATOR_ADDR]], +// CHECK: [[CONV:%.+]] = inttoptr i64 [[ALLOCATOR]] to i8* +// CHECK: call void @__kmpc_destroy_allocator(i32 %{{.+}}, i8* [[CONV]]) +// CHECK: ret void + +#endif diff --git a/clang/test/OpenMP/target_simd_uses_allocators_codegen.cpp b/clang/test/OpenMP/target_simd_uses_allocators_codegen.cpp new file mode 100644 index 000000000000..eaade4b9b5cd --- /dev/null +++ b/clang/test/OpenMP/target_simd_uses_allocators_codegen.cpp @@ -0,0 +1,94 @@ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +enum omp_allocator_handle_t { + omp_null_allocator = 0, + omp_default_mem_alloc = 1, + omp_large_cap_mem_alloc = 2, + omp_const_mem_alloc = 3, + omp_high_bw_mem_alloc = 4, + omp_low_lat_mem_alloc = 5, + omp_cgroup_mem_alloc = 6, + omp_pteam_mem_alloc = 7, + omp_thread_mem_alloc = 8, + KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__ +}; + +typedef enum omp_alloctrait_key_t { omp_atk_sync_hint = 1, + omp_atk_alignment = 2, + omp_atk_access = 3, + omp_atk_pool_size = 4, + omp_atk_fallback = 5, + omp_atk_fb_data = 6, + omp_atk_pinned = 7, + omp_atk_partition = 8 +} omp_alloctrait_key_t; +typedef enum omp_alloctrait_value_t { + omp_atv_false = 0, + omp_atv_true = 1, + omp_atv_default = 2, + omp_atv_contended = 3, + omp_atv_uncontended = 4, + omp_atv_sequential = 5, + omp_atv_private = 6, + omp_atv_all = 7, + omp_atv_thread = 8, + omp_atv_pteam = 9, + omp_atv_cgroup = 10, + omp_atv_default_mem_fb = 11, + omp_atv_null_fb = 12, + omp_atv_abort_fb = 13, + omp_atv_allocator_fb = 14, + omp_atv_environment = 15, + omp_atv_nearest = 16, + omp_atv_blocked = 17, + omp_atv_interleaved = 18 +} omp_alloctrait_value_t; + +typedef struct omp_alloctrait_t { + omp_alloctrait_key_t key; + __UINTPTR_TYPE__ value; +} omp_alloctrait_t; + +// Just map the traits variable as a firstprivate variable. +// CHECK-DAG: [[SIZES:@.+]] = private unnamed_addr constant [1 x i64] [i64 160] +// CHECK-DAG: [[MAPTYPES:@.+]] = private unnamed_addr constant [1 x i64] [i64 673] + +// CHECK: define {{.*}}[[FOO:@.+]]() +void foo() { + omp_alloctrait_t traits[10]; + omp_allocator_handle_t my_allocator; + +// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i32 1, i32 1) +// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0 +// CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]] +// CHECK: [[FAILED]]: +// CHECK: call void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* %{{[^,]+}}) +#pragma omp target simd uses_allocators(omp_null_allocator, omp_thread_mem_alloc, my_allocator(traits)) + for (int i = 0; i < 10; ++i) + ; +} + +// CHECK: define internal void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* {{.+}}) +// CHECK: [[TRAITS_ADDR_REF:%.+]] = alloca [10 x %struct.omp_alloctrait_t]*, +// CHECK: [[MY_ALLOCATOR_ADDR:%.+]] = alloca i64, +// CHECK: [[TRAITS_ADDR:%.+]] = load [10 x %struct.omp_alloctrait_t]*, [10 x %struct.omp_alloctrait_t]** [[TRAITS_ADDR_REF]], +// CHECK: [[TRAITS_ADDR_VOIDPTR:%.+]] = bitcast [10 x %struct.omp_alloctrait_t]* [[TRAITS_ADDR]] to i8** +// CHECK: [[TRAITS:%.+]] = load i8*, i8** [[TRAITS_ADDR_VOIDPTR]], +// CHECK: [[ALLOCATOR:%.+]] = call i8* @__kmpc_init_allocator(i32 %{{.+}}, i8* null, i32 10, i8* [[TRAITS]]) +// CHECK: [[CONV:%.+]] = ptrtoint i8* [[ALLOCATOR]] to i64 +// CHECK: store i64 [[CONV]], i64* [[MY_ALLOCATOR_ADDR]], + +// Destroy allocator upon exit from the region. +// CHECK: [[ALLOCATOR:%.+]] = load i64, i64* [[MY_ALLOCATOR_ADDR]], +// CHECK: [[CONV:%.+]] = inttoptr i64 [[ALLOCATOR]] to i8* +// CHECK: call void @__kmpc_destroy_allocator(i32 %{{.+}}, i8* [[CONV]]) +// CHECK: ret void + +#endif diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_uses_allocators_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_uses_allocators_codegen.cpp new file mode 100644 index 000000000000..30bcdcfa68c5 --- /dev/null +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_uses_allocators_codegen.cpp @@ -0,0 +1,94 @@ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +enum omp_allocator_handle_t { + omp_null_allocator = 0, + omp_default_mem_alloc = 1, + omp_large_cap_mem_alloc = 2, + omp_const_mem_alloc = 3, + omp_high_bw_mem_alloc = 4, + omp_low_lat_mem_alloc = 5, + omp_cgroup_mem_alloc = 6, + omp_pteam_mem_alloc = 7, + omp_thread_mem_alloc = 8, + KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__ +}; + +typedef enum omp_alloctrait_key_t { omp_atk_sync_hint = 1, + omp_atk_alignment = 2, + omp_atk_access = 3, + omp_atk_pool_size = 4, + omp_atk_fallback = 5, + omp_atk_fb_data = 6, + omp_atk_pinned = 7, + omp_atk_partition = 8 +} omp_alloctrait_key_t; +typedef enum omp_alloctrait_value_t { + omp_atv_false = 0, + omp_atv_true = 1, + omp_atv_default = 2, + omp_atv_contended = 3, + omp_atv_uncontended = 4, + omp_atv_sequential = 5, + omp_atv_private = 6, + omp_atv_all = 7, + omp_atv_thread = 8, + omp_atv_pteam = 9, + omp_atv_cgroup = 10, + omp_atv_default_mem_fb = 11, + omp_atv_null_fb = 12, + omp_atv_abort_fb = 13, + omp_atv_allocator_fb = 14, + omp_atv_environment = 15, + omp_atv_nearest = 16, + omp_atv_blocked = 17, + omp_atv_interleaved = 18 +} omp_alloctrait_value_t; + +typedef struct omp_alloctrait_t { + omp_alloctrait_key_t key; + __UINTPTR_TYPE__ value; +} omp_alloctrait_t; + +// Just map the traits variable as a firstprivate variable. +// CHECK-DAG: [[SIZES:@.+]] = private unnamed_addr constant [1 x i64] [i64 160] +// CHECK-DAG: [[MAPTYPES:@.+]] = private unnamed_addr constant [1 x i64] [i64 673] + +// CHECK: define {{.*}}[[FOO:@.+]]() +void foo() { + omp_alloctrait_t traits[10]; + omp_allocator_handle_t my_allocator; + +// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i32 0, i32 0) +// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0 +// CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]] +// CHECK: [[FAILED]]: +// CHECK: call void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* %{{[^,]+}}) +#pragma omp target teams distribute parallel for simd uses_allocators(omp_null_allocator, omp_thread_mem_alloc, my_allocator(traits)) + for (int i = 0; i < 10; ++i) + ; +} + +// CHECK: define internal void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* {{.+}}) +// CHECK: [[TRAITS_ADDR_REF:%.+]] = alloca [10 x %struct.omp_alloctrait_t]*, +// CHECK: [[MY_ALLOCATOR_ADDR:%.+]] = alloca i64, +// CHECK: [[TRAITS_ADDR:%.+]] = load [10 x %struct.omp_alloctrait_t]*, [10 x %struct.omp_alloctrait_t]** [[TRAITS_ADDR_REF]], +// CHECK: [[TRAITS_ADDR_VOIDPTR:%.+]] = bitcast [10 x %struct.omp_alloctrait_t]* [[TRAITS_ADDR]] to i8** +// CHECK: [[TRAITS:%.+]] = load i8*, i8** [[TRAITS_ADDR_VOIDPTR]], +// CHECK: [[ALLOCATOR:%.+]] = call i8* @__kmpc_init_allocator(i32 %{{.+}}, i8* null, i32 10, i8* [[TRAITS]]) +// CHECK: [[CONV:%.+]] = ptrtoint i8* [[ALLOCATOR]] to i64 +// CHECK: store i64 [[CONV]], i64* [[MY_ALLOCATOR_ADDR]], + +// Destroy allocator upon exit from the region. +// CHECK: [[ALLOCATOR:%.+]] = load i64, i64* [[MY_ALLOCATOR_ADDR]], +// CHECK: [[CONV:%.+]] = inttoptr i64 [[ALLOCATOR]] to i8* +// CHECK: call void @__kmpc_destroy_allocator(i32 %{{.+}}, i8* [[CONV]]) +// CHECK: ret void + +#endif diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_uses_allocators_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_uses_allocators_codegen.cpp new file mode 100644 index 000000000000..f352b2e1bc51 --- /dev/null +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_uses_allocators_codegen.cpp @@ -0,0 +1,94 @@ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +enum omp_allocator_handle_t { + omp_null_allocator = 0, + omp_default_mem_alloc = 1, + omp_large_cap_mem_alloc = 2, + omp_const_mem_alloc = 3, + omp_high_bw_mem_alloc = 4, + omp_low_lat_mem_alloc = 5, + omp_cgroup_mem_alloc = 6, + omp_pteam_mem_alloc = 7, + omp_thread_mem_alloc = 8, + KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__ +}; + +typedef enum omp_alloctrait_key_t { omp_atk_sync_hint = 1, + omp_atk_alignment = 2, + omp_atk_access = 3, + omp_atk_pool_size = 4, + omp_atk_fallback = 5, + omp_atk_fb_data = 6, + omp_atk_pinned = 7, + omp_atk_partition = 8 +} omp_alloctrait_key_t; +typedef enum omp_alloctrait_value_t { + omp_atv_false = 0, + omp_atv_true = 1, + omp_atv_default = 2, + omp_atv_contended = 3, + omp_atv_uncontended = 4, + omp_atv_sequential = 5, + omp_atv_private = 6, + omp_atv_all = 7, + omp_atv_thread = 8, + omp_atv_pteam = 9, + omp_atv_cgroup = 10, + omp_atv_default_mem_fb = 11, + omp_atv_null_fb = 12, + omp_atv_abort_fb = 13, + omp_atv_allocator_fb = 14, + omp_atv_environment = 15, + omp_atv_nearest = 16, + omp_atv_blocked = 17, + omp_atv_interleaved = 18 +} omp_alloctrait_value_t; + +typedef struct omp_alloctrait_t { + omp_alloctrait_key_t key; + __UINTPTR_TYPE__ value; +} omp_alloctrait_t; + +// Just map the traits variable as a firstprivate variable. +// CHECK-DAG: [[SIZES:@.+]] = private unnamed_addr constant [1 x i64] [i64 160] +// CHECK-DAG: [[MAPTYPES:@.+]] = private unnamed_addr constant [1 x i64] [i64 673] + +// CHECK: define {{.*}}[[FOO:@.+]]() +void foo() { + omp_alloctrait_t traits[10]; + omp_allocator_handle_t my_allocator; + +// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i32 0, i32 0) +// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0 +// CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]] +// CHECK: [[FAILED]]: +// CHECK: call void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* %{{[^,]+}}) +#pragma omp target teams distribute parallel for uses_allocators(omp_null_allocator, omp_thread_mem_alloc, my_allocator(traits)) + for (int i = 0; i < 10; ++i) + ; +} + +// CHECK: define internal void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* {{.+}}) +// CHECK: [[TRAITS_ADDR_REF:%.+]] = alloca [10 x %struct.omp_alloctrait_t]*, +// CHECK: [[MY_ALLOCATOR_ADDR:%.+]] = alloca i64, +// CHECK: [[TRAITS_ADDR:%.+]] = load [10 x %struct.omp_alloctrait_t]*, [10 x %struct.omp_alloctrait_t]** [[TRAITS_ADDR_REF]], +// CHECK: [[TRAITS_ADDR_VOIDPTR:%.+]] = bitcast [10 x %struct.omp_alloctrait_t]* [[TRAITS_ADDR]] to i8** +// CHECK: [[TRAITS:%.+]] = load i8*, i8** [[TRAITS_ADDR_VOIDPTR]], +// CHECK: [[ALLOCATOR:%.+]] = call i8* @__kmpc_init_allocator(i32 %{{.+}}, i8* null, i32 10, i8* [[TRAITS]]) +// CHECK: [[CONV:%.+]] = ptrtoint i8* [[ALLOCATOR]] to i64 +// CHECK: store i64 [[CONV]], i64* [[MY_ALLOCATOR_ADDR]], + +// Destroy allocator upon exit from the region. +// CHECK: [[ALLOCATOR:%.+]] = load i64, i64* [[MY_ALLOCATOR_ADDR]], +// CHECK: [[CONV:%.+]] = inttoptr i64 [[ALLOCATOR]] to i8* +// CHECK: call void @__kmpc_destroy_allocator(i32 %{{.+}}, i8* [[CONV]]) +// CHECK: ret void + +#endif diff --git a/clang/test/OpenMP/target_teams_distribute_simd_uses_allocators_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_uses_allocators_codegen.cpp new file mode 100644 index 000000000000..6091ae2716b1 --- /dev/null +++ b/clang/test/OpenMP/target_teams_distribute_simd_uses_allocators_codegen.cpp @@ -0,0 +1,94 @@ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +enum omp_allocator_handle_t { + omp_null_allocator = 0, + omp_default_mem_alloc = 1, + omp_large_cap_mem_alloc = 2, + omp_const_mem_alloc = 3, + omp_high_bw_mem_alloc = 4, + omp_low_lat_mem_alloc = 5, + omp_cgroup_mem_alloc = 6, + omp_pteam_mem_alloc = 7, + omp_thread_mem_alloc = 8, + KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__ +}; + +typedef enum omp_alloctrait_key_t { omp_atk_sync_hint = 1, + omp_atk_alignment = 2, + omp_atk_access = 3, + omp_atk_pool_size = 4, + omp_atk_fallback = 5, + omp_atk_fb_data = 6, + omp_atk_pinned = 7, + omp_atk_partition = 8 +} omp_alloctrait_key_t; +typedef enum omp_alloctrait_value_t { + omp_atv_false = 0, + omp_atv_true = 1, + omp_atv_default = 2, + omp_atv_contended = 3, + omp_atv_uncontended = 4, + omp_atv_sequential = 5, + omp_atv_private = 6, + omp_atv_all = 7, + omp_atv_thread = 8, + omp_atv_pteam = 9, + omp_atv_cgroup = 10, + omp_atv_default_mem_fb = 11, + omp_atv_null_fb = 12, + omp_atv_abort_fb = 13, + omp_atv_allocator_fb = 14, + omp_atv_environment = 15, + omp_atv_nearest = 16, + omp_atv_blocked = 17, + omp_atv_interleaved = 18 +} omp_alloctrait_value_t; + +typedef struct omp_alloctrait_t { + omp_alloctrait_key_t key; + __UINTPTR_TYPE__ value; +} omp_alloctrait_t; + +// Just map the traits variable as a firstprivate variable. +// CHECK-DAG: [[SIZES:@.+]] = private unnamed_addr constant [1 x i64] [i64 160] +// CHECK-DAG: [[MAPTYPES:@.+]] = private unnamed_addr constant [1 x i64] [i64 673] + +// CHECK: define {{.*}}[[FOO:@.+]]() +void foo() { + omp_alloctrait_t traits[10]; + omp_allocator_handle_t my_allocator; + +// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i32 0, i32 1) +// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0 +// CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]] +// CHECK: [[FAILED]]: +// CHECK: call void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* %{{[^,]+}}) +#pragma omp target teams distribute simd uses_allocators(omp_null_allocator, omp_thread_mem_alloc, my_allocator(traits)) + for (int i = 0; i < 10; ++i) + ; +} + +// CHECK: define internal void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* {{.+}}) +// CHECK: [[TRAITS_ADDR_REF:%.+]] = alloca [10 x %struct.omp_alloctrait_t]*, +// CHECK: [[MY_ALLOCATOR_ADDR:%.+]] = alloca i64, +// CHECK: [[TRAITS_ADDR:%.+]] = load [10 x %struct.omp_alloctrait_t]*, [10 x %struct.omp_alloctrait_t]** [[TRAITS_ADDR_REF]], +// CHECK: [[TRAITS_ADDR_VOIDPTR:%.+]] = bitcast [10 x %struct.omp_alloctrait_t]* [[TRAITS_ADDR]] to i8** +// CHECK: [[TRAITS:%.+]] = load i8*, i8** [[TRAITS_ADDR_VOIDPTR]], +// CHECK: [[ALLOCATOR:%.+]] = call i8* @__kmpc_init_allocator(i32 %{{.+}}, i8* null, i32 10, i8* [[TRAITS]]) +// CHECK: [[CONV:%.+]] = ptrtoint i8* [[ALLOCATOR]] to i64 +// CHECK: store i64 [[CONV]], i64* [[MY_ALLOCATOR_ADDR]], + +// Destroy allocator upon exit from the region. +// CHECK: [[ALLOCATOR:%.+]] = load i64, i64* [[MY_ALLOCATOR_ADDR]], +// CHECK: [[CONV:%.+]] = inttoptr i64 [[ALLOCATOR]] to i8* +// CHECK: call void @__kmpc_destroy_allocator(i32 %{{.+}}, i8* [[CONV]]) +// CHECK: ret void + +#endif diff --git a/clang/test/OpenMP/target_teams_distribute_uses_allocators_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_uses_allocators_codegen.cpp new file mode 100644 index 000000000000..376d51ab0067 --- /dev/null +++ b/clang/test/OpenMP/target_teams_distribute_uses_allocators_codegen.cpp @@ -0,0 +1,94 @@ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +enum omp_allocator_handle_t { + omp_null_allocator = 0, + omp_default_mem_alloc = 1, + omp_large_cap_mem_alloc = 2, + omp_const_mem_alloc = 3, + omp_high_bw_mem_alloc = 4, + omp_low_lat_mem_alloc = 5, + omp_cgroup_mem_alloc = 6, + omp_pteam_mem_alloc = 7, + omp_thread_mem_alloc = 8, + KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__ +}; + +typedef enum omp_alloctrait_key_t { omp_atk_sync_hint = 1, + omp_atk_alignment = 2, + omp_atk_access = 3, + omp_atk_pool_size = 4, + omp_atk_fallback = 5, + omp_atk_fb_data = 6, + omp_atk_pinned = 7, + omp_atk_partition = 8 +} omp_alloctrait_key_t; +typedef enum omp_alloctrait_value_t { + omp_atv_false = 0, + omp_atv_true = 1, + omp_atv_default = 2, + omp_atv_contended = 3, + omp_atv_uncontended = 4, + omp_atv_sequential = 5, + omp_atv_private = 6, + omp_atv_all = 7, + omp_atv_thread = 8, + omp_atv_pteam = 9, + omp_atv_cgroup = 10, + omp_atv_default_mem_fb = 11, + omp_atv_null_fb = 12, + omp_atv_abort_fb = 13, + omp_atv_allocator_fb = 14, + omp_atv_environment = 15, + omp_atv_nearest = 16, + omp_atv_blocked = 17, + omp_atv_interleaved = 18 +} omp_alloctrait_value_t; + +typedef struct omp_alloctrait_t { + omp_alloctrait_key_t key; + __UINTPTR_TYPE__ value; +} omp_alloctrait_t; + +// Just map the traits variable as a firstprivate variable. +// CHECK-DAG: [[SIZES:@.+]] = private unnamed_addr constant [1 x i64] [i64 160] +// CHECK-DAG: [[MAPTYPES:@.+]] = private unnamed_addr constant [1 x i64] [i64 673] + +// CHECK: define {{.*}}[[FOO:@.+]]() +void foo() { + omp_alloctrait_t traits[10]; + omp_allocator_handle_t my_allocator; + +// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i32 0, i32 0) +// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0 +// CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]] +// CHECK: [[FAILED]]: +// CHECK: call void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* %{{[^,]+}}) +#pragma omp target teams distribute uses_allocators(omp_null_allocator, omp_thread_mem_alloc, my_allocator(traits)) + for (int i = 0; i < 10; ++i) + ; +} + +// CHECK: define internal void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* {{.+}}) +// CHECK: [[TRAITS_ADDR_REF:%.+]] = alloca [10 x %struct.omp_alloctrait_t]*, +// CHECK: [[MY_ALLOCATOR_ADDR:%.+]] = alloca i64, +// CHECK: [[TRAITS_ADDR:%.+]] = load [10 x %struct.omp_alloctrait_t]*, [10 x %struct.omp_alloctrait_t]** [[TRAITS_ADDR_REF]], +// CHECK: [[TRAITS_ADDR_VOIDPTR:%.+]] = bitcast [10 x %struct.omp_alloctrait_t]* [[TRAITS_ADDR]] to i8** +// CHECK: [[TRAITS:%.+]] = load i8*, i8** [[TRAITS_ADDR_VOIDPTR]], +// CHECK: [[ALLOCATOR:%.+]] = call i8* @__kmpc_init_allocator(i32 %{{.+}}, i8* null, i32 10, i8* [[TRAITS]]) +// CHECK: [[CONV:%.+]] = ptrtoint i8* [[ALLOCATOR]] to i64 +// CHECK: store i64 [[CONV]], i64* [[MY_ALLOCATOR_ADDR]], + +// Destroy allocator upon exit from the region. +// CHECK: [[ALLOCATOR:%.+]] = load i64, i64* [[MY_ALLOCATOR_ADDR]], +// CHECK: [[CONV:%.+]] = inttoptr i64 [[ALLOCATOR]] to i8* +// CHECK: call void @__kmpc_destroy_allocator(i32 %{{.+}}, i8* [[CONV]]) +// CHECK: ret void + +#endif diff --git a/clang/test/OpenMP/target_teams_uses_allocators_codegen.cpp b/clang/test/OpenMP/target_teams_uses_allocators_codegen.cpp new file mode 100644 index 000000000000..829a99bd8eb7 --- /dev/null +++ b/clang/test/OpenMP/target_teams_uses_allocators_codegen.cpp @@ -0,0 +1,93 @@ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +enum omp_allocator_handle_t { + omp_null_allocator = 0, + omp_default_mem_alloc = 1, + omp_large_cap_mem_alloc = 2, + omp_const_mem_alloc = 3, + omp_high_bw_mem_alloc = 4, + omp_low_lat_mem_alloc = 5, + omp_cgroup_mem_alloc = 6, + omp_pteam_mem_alloc = 7, + omp_thread_mem_alloc = 8, + KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__ +}; + +typedef enum omp_alloctrait_key_t { omp_atk_sync_hint = 1, + omp_atk_alignment = 2, + omp_atk_access = 3, + omp_atk_pool_size = 4, + omp_atk_fallback = 5, + omp_atk_fb_data = 6, + omp_atk_pinned = 7, + omp_atk_partition = 8 +} omp_alloctrait_key_t; +typedef enum omp_alloctrait_value_t { + omp_atv_false = 0, + omp_atv_true = 1, + omp_atv_default = 2, + omp_atv_contended = 3, + omp_atv_uncontended = 4, + omp_atv_sequential = 5, + omp_atv_private = 6, + omp_atv_all = 7, + omp_atv_thread = 8, + omp_atv_pteam = 9, + omp_atv_cgroup = 10, + omp_atv_default_mem_fb = 11, + omp_atv_null_fb = 12, + omp_atv_abort_fb = 13, + omp_atv_allocator_fb = 14, + omp_atv_environment = 15, + omp_atv_nearest = 16, + omp_atv_blocked = 17, + omp_atv_interleaved = 18 +} omp_alloctrait_value_t; + +typedef struct omp_alloctrait_t { + omp_alloctrait_key_t key; + __UINTPTR_TYPE__ value; +} omp_alloctrait_t; + +// Just map the traits variable as a firstprivate variable. +// CHECK-DAG: [[SIZES:@.+]] = private unnamed_addr constant [1 x i64] [i64 160] +// CHECK-DAG: [[MAPTYPES:@.+]] = private unnamed_addr constant [1 x i64] [i64 673] + +// CHECK: define {{.*}}[[FOO:@.+]]() +void foo() { + omp_alloctrait_t traits[10]; + omp_allocator_handle_t my_allocator; + +// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i32 0, i32 0) +// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0 +// CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]] +// CHECK: [[FAILED]]: +// CHECK: call void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* %{{[^,]+}}) +#pragma omp target teams uses_allocators(omp_null_allocator, omp_thread_mem_alloc, my_allocator(traits)) + ; +} + +// CHECK: define internal void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* {{.+}}) +// CHECK: [[TRAITS_ADDR_REF:%.+]] = alloca [10 x %struct.omp_alloctrait_t]*, +// CHECK: [[MY_ALLOCATOR_ADDR:%.+]] = alloca i64, +// CHECK: [[TRAITS_ADDR:%.+]] = load [10 x %struct.omp_alloctrait_t]*, [10 x %struct.omp_alloctrait_t]** [[TRAITS_ADDR_REF]], +// CHECK: [[TRAITS_ADDR_VOIDPTR:%.+]] = bitcast [10 x %struct.omp_alloctrait_t]* [[TRAITS_ADDR]] to i8** +// CHECK: [[TRAITS:%.+]] = load i8*, i8** [[TRAITS_ADDR_VOIDPTR]], +// CHECK: [[ALLOCATOR:%.+]] = call i8* @__kmpc_init_allocator(i32 %{{.+}}, i8* null, i32 10, i8* [[TRAITS]]) +// CHECK: [[CONV:%.+]] = ptrtoint i8* [[ALLOCATOR]] to i64 +// CHECK: store i64 [[CONV]], i64* [[MY_ALLOCATOR_ADDR]], + +// Destroy allocator upon exit from the region. +// CHECK: [[ALLOCATOR:%.+]] = load i64, i64* [[MY_ALLOCATOR_ADDR]], +// CHECK: [[CONV:%.+]] = inttoptr i64 [[ALLOCATOR]] to i8* +// CHECK: call void @__kmpc_destroy_allocator(i32 %{{.+}}, i8* [[CONV]]) +// CHECK: ret void + +#endif diff --git a/clang/test/OpenMP/target_uses_allocators_codegen.cpp b/clang/test/OpenMP/target_uses_allocators_codegen.cpp new file mode 100644 index 000000000000..213e7c9a8778 --- /dev/null +++ b/clang/test/OpenMP/target_uses_allocators_codegen.cpp @@ -0,0 +1,93 @@ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +enum omp_allocator_handle_t { + omp_null_allocator = 0, + omp_default_mem_alloc = 1, + omp_large_cap_mem_alloc = 2, + omp_const_mem_alloc = 3, + omp_high_bw_mem_alloc = 4, + omp_low_lat_mem_alloc = 5, + omp_cgroup_mem_alloc = 6, + omp_pteam_mem_alloc = 7, + omp_thread_mem_alloc = 8, + KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__ +}; + +typedef enum omp_alloctrait_key_t { omp_atk_sync_hint = 1, + omp_atk_alignment = 2, + omp_atk_access = 3, + omp_atk_pool_size = 4, + omp_atk_fallback = 5, + omp_atk_fb_data = 6, + omp_atk_pinned = 7, + omp_atk_partition = 8 +} omp_alloctrait_key_t; +typedef enum omp_alloctrait_value_t { + omp_atv_false = 0, + omp_atv_true = 1, + omp_atv_default = 2, + omp_atv_contended = 3, + omp_atv_uncontended = 4, + omp_atv_sequential = 5, + omp_atv_private = 6, + omp_atv_all = 7, + omp_atv_thread = 8, + omp_atv_pteam = 9, + omp_atv_cgroup = 10, + omp_atv_default_mem_fb = 11, + omp_atv_null_fb = 12, + omp_atv_abort_fb = 13, + omp_atv_allocator_fb = 14, + omp_atv_environment = 15, + omp_atv_nearest = 16, + omp_atv_blocked = 17, + omp_atv_interleaved = 18 +} omp_alloctrait_value_t; + +typedef struct omp_alloctrait_t { + omp_alloctrait_key_t key; + __UINTPTR_TYPE__ value; +} omp_alloctrait_t; + +// Just map the traits variable as a firstprivate variable. +// CHECK-DAG: [[SIZES:@.+]] = private unnamed_addr constant [1 x i64] [i64 160] +// CHECK-DAG: [[MAPTYPES:@.+]] = private unnamed_addr constant [1 x i64] [i64 673] + +// CHECK: define {{.*}}[[FOO:@.+]]() +void foo() { + omp_alloctrait_t traits[10]; + omp_allocator_handle_t my_allocator; + +// CHECK: [[RES:%.+]] = call i32 @__tgt_target(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0)) +// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0 +// CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]] +// CHECK: [[FAILED]]: +// CHECK: call void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* %{{[^,]+}}) +#pragma omp target uses_allocators(omp_null_allocator, omp_thread_mem_alloc, my_allocator(traits)) + ; +} + +// CHECK: define internal void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* {{.+}}) +// CHECK: [[TRAITS_ADDR_REF:%.+]] = alloca [10 x %struct.omp_alloctrait_t]*, +// CHECK: [[MY_ALLOCATOR_ADDR:%.+]] = alloca i64, +// CHECK: [[TRAITS_ADDR:%.+]] = load [10 x %struct.omp_alloctrait_t]*, [10 x %struct.omp_alloctrait_t]** [[TRAITS_ADDR_REF]], +// CHECK: [[TRAITS_ADDR_VOIDPTR:%.+]] = bitcast [10 x %struct.omp_alloctrait_t]* [[TRAITS_ADDR]] to i8** +// CHECK: [[TRAITS:%.+]] = load i8*, i8** [[TRAITS_ADDR_VOIDPTR]], +// CHECK: [[ALLOCATOR:%.+]] = call i8* @__kmpc_init_allocator(i32 %{{.+}}, i8* null, i32 10, i8* [[TRAITS]]) +// CHECK: [[CONV:%.+]] = ptrtoint i8* [[ALLOCATOR]] to i64 +// CHECK: store i64 [[CONV]], i64* [[MY_ALLOCATOR_ADDR]], + +// Destroy allocator upon exit from the region. +// CHECK: [[ALLOCATOR:%.+]] = load i64, i64* [[MY_ALLOCATOR_ADDR]], +// CHECK: [[CONV:%.+]] = inttoptr i64 [[ALLOCATOR]] to i8* +// CHECK: call void @__kmpc_destroy_allocator(i32 %{{.+}}, i8* [[CONV]]) +// CHECK: ret void + +#endif _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits