gtbercea updated this revision to Diff 195644. gtbercea added a comment. - Update tests. - Move error check in sema.
Repository: rC Clang CHANGES SINCE LAST ACTION https://reviews.llvm.org/D60568/new/ https://reviews.llvm.org/D60568 Files: include/clang/Basic/DiagnosticSemaKinds.td lib/CodeGen/CGOpenMPRuntime.cpp lib/CodeGen/CGOpenMPRuntime.h lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp lib/CodeGen/CGOpenMPRuntimeNVPTX.h lib/CodeGen/CodeGenModule.cpp lib/Sema/SemaOpenMP.cpp test/OpenMP/openmp_offload_registration.cpp test/OpenMP/target_codegen.cpp test/OpenMP/target_codegen_registration.cpp
Index: test/OpenMP/target_codegen_registration.cpp =================================================================== --- test/OpenMP/target_codegen_registration.cpp +++ test/OpenMP/target_codegen_registration.cpp @@ -180,10 +180,11 @@ // CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]]) // We have 4 initializers, one for the 500 priority, another one for 501, or more for the default priority, and the last one for the offloading registration function. -// CHECK: @llvm.global_ctors = appending global [4 x { i32, void ()*, i8* }] [ +// CHECK: @llvm.global_ctors = appending global [5 x { i32, void ()*, i8* }] [ // CHECK-SAME: { i32, void ()*, i8* } { i32 500, void ()* [[P500:@[^,]+]], i8* null }, // CHECK-SAME: { i32, void ()*, i8* } { i32 501, void ()* [[P501:@[^,]+]], i8* null }, // CHECK-SAME: { i32, void ()*, i8* } { i32 65535, void ()* [[PMAX:@[^,]+]], i8* null }, +// CHECK-SAME: { i32, void ()*, i8* } { i32 0, void ()* @.omp_offloading.requires_reg, i8* null }, // CHECK-SAME: { i32, void ()*, i8* } { i32 0, void ()* @[[REGFN]], i8* bitcast (void ()* @[[REGFN]] to i8*) }] // CHECK-NTARGET: @llvm.global_ctors = appending global [3 x { i32, void ()*, i8* }] [ @@ -387,6 +388,10 @@ // Check registration and unregistration +//CHECK: define internal void @.omp_offloading.requires_reg() +//CHECK: call void @__tgt_register_requires(i64 1) +//CHECK: ret void + //CHECK: define internal void @[[UNREGFN:.+]](i8*) //CHECK-SAME: comdat($[[REGFN]]) { //CHECK: call i32 @__tgt_unregister_lib([[DSCTY]]* [[DESC]]) @@ -432,31 +437,31 @@ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 216, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 266, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 282, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 288, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 299, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 305, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 427, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 311, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 305, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 311, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 299, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 241, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 217, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 267, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 283, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 289, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 300, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 306, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 432, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 312, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 306, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 312, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 300, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 242, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 216, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 266, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 282, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 288, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 299, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 305, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 427, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 311, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 305, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 311, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 299, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 241, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 217, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 267, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 283, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 289, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 300, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 306, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 432, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 312, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 306, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 312, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 300, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 242, i32 {{[0-9]+}}} #endif Index: test/OpenMP/target_codegen.cpp =================================================================== --- test/OpenMP/target_codegen.cpp +++ test/OpenMP/target_codegen.cpp @@ -96,7 +96,7 @@ // CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]]) // Check target registration is registered as a Ctor. -// CHECK: appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* @[[REGFN]], i8* bitcast (void ()* @[[REGFN]] to i8*) }] +// CHECK: appending global [2 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* @.omp_offloading.requires_reg, i8* null }, { i32, void ()*, i8* } { i32 0, void ()* @[[REGFN]], i8* bitcast (void ()* @[[REGFN]] to i8*) }] template<typename tx, typename ty> Index: test/OpenMP/openmp_offload_registration.cpp =================================================================== --- test/OpenMP/openmp_offload_registration.cpp +++ test/OpenMP/openmp_offload_registration.cpp @@ -26,7 +26,7 @@ // CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 2, [[DEVTY]]* getelementptr inbounds ([2 x [[DEVTY]]], [2 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]]) // Check target registration is registered as a Ctor. -// CHECK: appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* @[[REGFN]], i8* bitcast (void ()* @[[REGFN]] to i8*) }] +// CHECK: appending global [2 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* @.omp_offloading.requires_reg, i8* null }, { i32, void ()*, i8* } { i32 0, void ()* @[[REGFN]], i8* bitcast (void ()* @[[REGFN]] to i8*) }] // Check presence of foo() and the outlined target region // CHECK: define void [[FOO:@.+]]() @@ -34,6 +34,11 @@ // Check registration and unregistration code. +// CHECK: define internal void @.omp_offloading.requires_reg() +// CHECK: call void @__tgt_register_requires(i64 1) +// CHECK: ret void +// CHECK: declare void @__tgt_register_requires(i64) + // CHECK: define internal void @[[UNREGFN:.+]](i8*) // CHECK-SAME: comdat($[[REGFN]]) { // CHECK: call i32 @__tgt_unregister_lib([[DSCTY]]* [[DESC]]) Index: lib/Sema/SemaOpenMP.cpp =================================================================== --- lib/Sema/SemaOpenMP.cpp +++ lib/Sema/SemaOpenMP.cpp @@ -193,6 +193,8 @@ /// Expression for the predefined allocators. Expr *OMPPredefinedAllocators[OMPAllocateDeclAttr::OMPUserDefinedMemAlloc] = { nullptr}; + /// Vector of previously encountered target directives + SmallVector<SourceLocation, 2> TargetLocations; public: explicit DSAStackTy(Sema &S) : SemaRef(S) {} @@ -454,6 +456,34 @@ return IsDuplicate; } + /// Add location of previously encountered target to internal vector + void addTargetDirLocation(SourceLocation LocStart) { + TargetLocations.push_back(LocStart); + } + + /// For target specific clauses, the requires directive cannot be + /// specified after the handling of any of the target regions in the + /// current compilation unit. + void checkEncounteredTargets(SourceLocation Loc, + ArrayRef<OMPClause *> ClauseList) { + for (OMPClause *CNew : ClauseList) { + // Check if any of the requires clauses affect target regions. + if (!TargetLocations.empty() && + (isa<OMPUnifiedSharedMemoryClause>(CNew) || + isa<OMPUnifiedAddressClause>(CNew) || + isa<OMPReverseOffloadClause>(CNew) || + isa<OMPDynamicAllocatorsClause>(CNew))) { + SemaRef.Diag(Loc, + diag::err_omp_target_before_requires) + << getOpenMPClauseName(CNew->getClauseKind()); + for (SourceLocation TargetLoc : TargetLocations) { + SemaRef.Diag(TargetLoc, + diag::note_omp_requires_encountered_target); + } + } + } + } + /// Set default data sharing attribute to none. void setDefaultDSANone(SourceLocation Loc) { assert(!isStackEmpty()); @@ -2418,6 +2448,7 @@ OMPRequiresDecl *Sema::CheckOMPRequiresDecl(SourceLocation Loc, ArrayRef<OMPClause *> ClauseList) { + DSAStack->checkEncounteredTargets(Loc, ClauseList); if (!DSAStack->hasDuplicateRequiresClause(ClauseList)) return OMPRequiresDecl::Create(Context, getCurLexicalContext(), Loc, ClauseList); @@ -4167,6 +4198,16 @@ ->setIsOMPStructuredBlock(true); } + if (isOpenMPTargetExecutionDirective(Kind) && + !(DSAStack->hasRequiresDeclWithClause<OMPUnifiedSharedMemoryClause>() || + DSAStack->hasRequiresDeclWithClause<OMPUnifiedAddressClause>() || + DSAStack->hasRequiresDeclWithClause<OMPReverseOffloadClause>() || + DSAStack->hasRequiresDeclWithClause<OMPDynamicAllocatorsClause>()) && + !CurContext->isDependentContext()) { + // Register target to DSA Stack. + DSAStack->addTargetDirLocation(StartLoc); + } + return Res; } Index: lib/CodeGen/CodeGenModule.cpp =================================================================== --- lib/CodeGen/CodeGenModule.cpp +++ lib/CodeGen/CodeGenModule.cpp @@ -410,6 +410,10 @@ AddGlobalCtor(CudaCtorFunction); } if (OpenMPRuntime) { + if (llvm::Function *OpenMPRequiresDirectiveRegFun = + OpenMPRuntime->emitRequiresDirectiveRegFun()) { + AddGlobalCtor(OpenMPRequiresDirectiveRegFun, 0); + } if (llvm::Function *OpenMPRegistrationFunction = OpenMPRuntime->emitRegistrationFunction()) { auto ComdatKey = OpenMPRegistrationFunction->hasComdat() ? Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.h =================================================================== --- lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -383,7 +383,7 @@ /// Perform check on requires decl to ensure that target architecture /// supports unified addressing - void checkArchForUnifiedAddressing(const OMPRequiresDecl *D) const override; + void checkArchForUnifiedAddressing(const OMPRequiresDecl *D) override; /// Returns default address space for the constant firstprivates, __constant__ /// address space by default. Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -4942,7 +4942,7 @@ /// Check to see if target architecture supports unified addressing which is /// a restriction for OpenMP requires clause "unified_shared_memory". void CGOpenMPRuntimeNVPTX::checkArchForUnifiedAddressing( - const OMPRequiresDecl *D) const { + const OMPRequiresDecl *D) { for (const OMPClause *Clause : D->clauselists()) { if (Clause->getClauseKind() == OMPC_unified_shared_memory) { switch (getCudaArch(CGM)) { @@ -4987,6 +4987,7 @@ } } } + CGOpenMPRuntime::checkArchForUnifiedAddressing(D); } /// Get number of SMs and number of blocks per SM. Index: lib/CodeGen/CGOpenMPRuntime.h =================================================================== --- lib/CodeGen/CGOpenMPRuntime.h +++ lib/CodeGen/CGOpenMPRuntime.h @@ -636,6 +636,17 @@ /// must be emitted. llvm::SmallDenseSet<const VarDecl *> DeferredGlobalVariables; + /// Flag for keeping track of weather a requires unified_shared_memory + /// directive is present. + bool HasRequiresUnifiedSharedMemory = false; + + /// Flag for keeping track of weather a target region has been emitted. + bool HasEmittedTargetRegion = false; + + /// Flag for keeping track of weather a device routine has been emitted. + /// Device routines are specific to the + bool HasEmittedDeclareTargetRegion = false; + /// Creates and registers offloading binary descriptor for the current /// compilation unit. The function that does the registration is returned. llvm::Function *createOffloadingBinaryDescriptorRegistration(); @@ -1429,6 +1440,10 @@ /// \param GD Global to scan. virtual bool emitTargetGlobal(GlobalDecl GD); + /// Creates and returns a registration function for when at least one + /// requires directives was used in the current module. + llvm::Function *emitRequiresDirectiveRegFun(); + /// Creates the offloading descriptor in the event any target region /// was emitted in the current module and return the function that registers /// it. @@ -1576,7 +1591,7 @@ /// Emits OpenMP-specific function prolog. /// Required for device constructs. - virtual void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {} + virtual void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D); /// Gets the OpenMP-specific address of the local variable. virtual Address getAddressOfLocalVariable(CodeGenFunction &CGF, @@ -1597,7 +1612,7 @@ /// Perform check on requires decl to ensure that target architecture /// supports unified addressing - virtual void checkArchForUnifiedAddressing(const OMPRequiresDecl *D) const {} + virtual void checkArchForUnifiedAddressing(const OMPRequiresDecl *D); /// Checks if the variable has associated OMPAllocateDeclAttr attribute with /// the predefined allocator and translates it into the corresponding address Index: lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntime.cpp +++ lib/CodeGen/CGOpenMPRuntime.cpp @@ -457,6 +457,26 @@ LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) }; +namespace { +LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); +/// Values for bit flags for marking which requires clauses have been used. +enum OpenMPOffloadingRequiresDirFlags : int64_t { + /// flag undefined. + OMP_REQ_UNDEFINED = 0x000, + /// no requires clause present. + OMP_REQ_NONE = 0x001, + /// reverse_offload clause. + OMP_REQ_REVERSE_OFFLOAD = 0x002, + /// unified_address clause. + OMP_REQ_UNIFIED_ADDRESS = 0x004, + /// unified_shared_memory clause. + OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, + /// dynamic_allocators clause. + OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) +}; +} // anonymous namespace + /// Describes ident structure that describes a source location. /// All descriptions are taken from /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h @@ -694,6 +714,8 @@ // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); OMPRTL__tgt_target_teams_nowait, + // Call to void __tgt_register_requires(int64_t flags); + OMPRTL__tgt_register_requires, // Call to void __tgt_register_lib(__tgt_bin_desc *desc); OMPRTL__tgt_register_lib, // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); @@ -2294,6 +2316,14 @@ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); break; } + case OMPRTL__tgt_register_requires: { + // Build void __tgt_register_requires(int64_t flags); + llvm::Type *TypeParams[] = {CGM.Int64Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires"); + break; + } case OMPRTL__tgt_register_lib: { // Build void __tgt_register_lib(__tgt_bin_desc *desc); QualType ParamTy = @@ -6404,6 +6434,7 @@ llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { assert(!ParentName.empty() && "Invalid target region parent name!"); + HasEmittedTargetRegion = true; emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); } @@ -7922,7 +7953,7 @@ MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types) const { // Map other list items in the map clause which are not captured variables - // but "declare target link" global variables., + // but "declare target link" global variables. for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) { for (const auto &L : C->component_lists()) { if (!L.first) @@ -8935,6 +8966,16 @@ " Expected target-based directive."); } +void CGOpenMPRuntime::checkArchForUnifiedAddressing( + const OMPRequiresDecl *D) { + for (const OMPClause *Clause : D->clauselists()) { + if (Clause->getClauseKind() == OMPC_unified_shared_memory) { + HasRequiresUnifiedSharedMemory = true; + break; + } + } +} + bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS) { if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) @@ -8993,6 +9034,41 @@ return !AlreadyEmittedTargetFunctions.insert(Name).second; } +llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { + // If we don't have entries or if we are emitting code for the device, we + // don't need to do anything. + if (CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || + (OffloadEntriesInfoManager.empty() && !HasEmittedDeclareTargetRegion)) + return nullptr; + + // Create and register the function that handles the requires directives. + ASTContext &C = CGM.getContext(); + + llvm::Function *RequiresRegFn; + { + CodeGenFunction CGF(CGM); + const auto &FI = CGM.getTypes().arrangeNullaryFunction(); + llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); + std::string ReqName = getName({"omp_offloading", "requires_reg"}); + RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI); + CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); + OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; + // TODO: check for other requires clauses. + // The requires directive takes effect only when a target region is + // present in the compilation unit. Otherwise it is ignored and not + // passed to the runtime. This avoids the runtime from throwing an error + // for mismatching requires clauses across compilation units that don't + // contain at least 1 target region. + if ((HasEmittedTargetRegion || HasEmittedDeclareTargetRegion) && + HasRequiresUnifiedSharedMemory) + Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires), + llvm::ConstantInt::get(CGM.Int64Ty, Flags)); + CGF.FinishFunction(); + } + return RequiresRegFn; +} + llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { // If we have offloading in the current module, we need to emit the entries // now and register the offloading descriptor. @@ -9718,6 +9794,14 @@ emitCall(CGF, Loc, OutlinedFn, Args); } +void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D){ + if (const auto *FD = dyn_cast<FunctionDecl>(D)) { + if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) { + HasEmittedDeclareTargetRegion = true; + } + } +} + Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const { Index: include/clang/Basic/DiagnosticSemaKinds.td =================================================================== --- include/clang/Basic/DiagnosticSemaKinds.td +++ include/clang/Basic/DiagnosticSemaKinds.td @@ -9132,6 +9132,10 @@ "Only one %0 clause can appear on a requires directive in a single translation unit">; def note_omp_requires_previous_clause : Note < "%0 clause previously used here">; +def err_omp_target_before_requires : Error < + "Target region encountered before requires directive with %0 clause.">; +def note_omp_requires_encountered_target : Note < + "Target previously encountered here">; def err_omp_invalid_scope : Error < "'#pragma omp %0' directive must appear only in file scope">; def note_omp_invalid_length_on_this_ptr_mapping : Note <
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits