Author: Jack Styles Date: 2025-11-27T14:41:59Z New Revision: 1778938460a1bbbb686f8f44bfa41df03b165ca8
URL: https://github.com/llvm/llvm-project/commit/1778938460a1bbbb686f8f44bfa41df03b165ca8 DIFF: https://github.com/llvm/llvm-project/commit/1778938460a1bbbb686f8f44bfa41df03b165ca8.diff LOG: Revert "[MLIR][OpenMP] Add MLIR Lowering Support for dist_schedule (#152736)" This reverts commit 47ae3eaa29f2195429f2ca19cc171a9ebd83c242. Added: Modified: flang/docs/OpenMPSupport.md llvm/include/llvm/Frontend/OpenMP/OMP.td llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir mlir/test/Target/LLVMIR/openmp-todo.mlir Removed: mlir/test/Target/LLVMIR/openmp-dist_schedule.mlir mlir/test/Target/LLVMIR/openmp-dist_schedule_with_wsloop.mlir ################################################################################ diff --git a/flang/docs/OpenMPSupport.md b/flang/docs/OpenMPSupport.md index 8eea39c6ba91b..81f5f9f6dee5b 100644 --- a/flang/docs/OpenMPSupport.md +++ b/flang/docs/OpenMPSupport.md @@ -42,10 +42,10 @@ Note : No distinction is made between the support in Parser/Semantics, MLIR, Low | target update construct | P | device clause not supported | | declare target directive | P | | | teams construct | Y | | -| distribute construct | P | | -| distribute simd construct | P | linear clauses are not supported | -| distribute parallel loop construct | P | | -| distribute parallel loop simd construct | P | linear clauses are not supported | +| distribute construct | P | dist_schedule clause not supported | +| distribute simd construct | P | dist_schedule and linear clauses are not supported | +| distribute parallel loop construct | P | dist_schedule clause not supported | +| distribute parallel loop simd construct | P | dist_schedule and linear clauses are not supported | | depend clause | Y | | | declare reduction construct | N | | | atomic construct extensions | Y | | @@ -53,13 +53,13 @@ Note : No distinction is made between the support in Parser/Semantics, MLIR, Low | cancellation point construct | Y | | | parallel do simd construct | P | linear clause not supported | | target teams construct | P | device clause not supported | -| teams distribute construct | P | | -| teams distribute simd construct | P | linear clause is not supported | -| target teams distribute construct | P | device clause is not supported | -| teams distribute parallel loop construct | P | | -| target teams distribute parallel loop construct | P | device clause is not supported | -| teams distribute parallel loop simd construct | P | linear clause is not supported | -| target teams distribute parallel loop simd construct | P | device and linear clauses are not supported | +| teams distribute construct | P | dist_schedule clause not supported | +| teams distribute simd construct | P | dist_schedule and linear clauses are not supported | +| target teams distribute construct | P | device and dist_schedule clauses are not supported | +| teams distribute parallel loop construct | P | dist_schedule clause not supported | +| target teams distribute parallel loop construct | P | device and dist_schedule clauses are not supported | +| teams distribute parallel loop simd construct | P | dist_schedule and linear clauses are not supported | +| target teams distribute parallel loop simd construct | P | device, dist_schedule and linear clauses are not supported | ## Extensions ### ATOMIC construct diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index da70048d28c12..ade00e7ca27d5 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -490,8 +490,7 @@ def OMP_SCHEDULE_Dynamic : EnumVal<"dynamic", 3, 1> {} def OMP_SCHEDULE_Guided : EnumVal<"guided", 4, 1> {} def OMP_SCHEDULE_Auto : EnumVal<"auto", 5, 1> {} def OMP_SCHEDULE_Runtime : EnumVal<"runtime", 6, 1> {} -def OMP_SCHEDULE_Distribute : EnumVal<"distribute", 7, 1> {} -def OMP_SCHEDULE_Default : EnumVal<"default", 8, 0> { let isDefault = 1; } +def OMP_SCHEDULE_Default : EnumVal<"default", 7, 0> { let isDefault = 1; } def OMPC_Schedule : Clause<[Spelling<"schedule">]> { let clangClass = "OMPScheduleClause"; let flangClass = "OmpScheduleClause"; @@ -502,7 +501,6 @@ def OMPC_Schedule : Clause<[Spelling<"schedule">]> { OMP_SCHEDULE_Guided, OMP_SCHEDULE_Auto, OMP_SCHEDULE_Runtime, - OMP_SCHEDULE_Distribute, OMP_SCHEDULE_Default ]; } diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 3efbdc4fe17d6..984bfeaaa3fad 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -1133,17 +1133,11 @@ class OpenMPIRBuilder { /// \param NeedsBarrier Indicates whether a barrier must be inserted after /// the loop. /// \param LoopType Type of workshare loop. - /// \param HasDistSchedule Defines if the clause being lowered is - /// dist_schedule as this is handled slightly diff erently - /// \param DistScheduleSchedType Defines the Schedule Type for the Distribute - /// loop. Defaults to None if no Distribute loop is present. /// /// \returns Point where to insert code after the workshare construct. InsertPointOrErrorTy applyStaticWorkshareLoop( DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, - omp::WorksharingLoopType LoopType, bool NeedsBarrier, - bool HasDistSchedule = false, - omp::OMPScheduleType DistScheduleSchedType = omp::OMPScheduleType::None); + omp::WorksharingLoopType LoopType, bool NeedsBarrier); /// Modifies the canonical loop a statically-scheduled workshare loop with a /// user-specified chunk size. @@ -1156,22 +1150,13 @@ class OpenMPIRBuilder { /// \param NeedsBarrier Indicates whether a barrier must be inserted after the /// loop. /// \param ChunkSize The user-specified chunk size. - /// \param SchedType Optional type of scheduling to be passed to the init - /// function. - /// \param DistScheduleChunkSize The size of dist_shcedule chunk considered - /// as a unit when - /// scheduling. If \p nullptr, defaults to 1. - /// \param DistScheduleSchedType Defines the Schedule Type for the Distribute - /// loop. Defaults to None if no Distribute loop is present. /// /// \returns Point where to insert code after the workshare construct. - InsertPointOrErrorTy applyStaticChunkedWorkshareLoop( - DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, - bool NeedsBarrier, Value *ChunkSize, - omp::OMPScheduleType SchedType = - omp::OMPScheduleType::UnorderedStaticChunked, - Value *DistScheduleChunkSize = nullptr, - omp::OMPScheduleType DistScheduleSchedType = omp::OMPScheduleType::None); + InsertPointOrErrorTy applyStaticChunkedWorkshareLoop(DebugLoc DL, + CanonicalLoopInfo *CLI, + InsertPointTy AllocaIP, + bool NeedsBarrier, + Value *ChunkSize); /// Modifies the canonical loop to be a dynamically-scheduled workshare loop. /// @@ -1250,10 +1235,6 @@ class OpenMPIRBuilder { /// \param LoopType Information about type of loop worksharing. /// It corresponds to type of loop workshare OpenMP pragma. /// \param NoLoop If true, no-loop code is generated. - /// \param HasDistSchedule Defines if the clause being lowered is - /// dist_schedule as this is handled slightly diff erently - /// - /// \param DistScheduleChunkSize The chunk size for dist_schedule loop /// /// \returns Point where to insert code after the workshare construct. LLVM_ABI InsertPointOrErrorTy applyWorkshareLoop( @@ -1265,8 +1246,7 @@ class OpenMPIRBuilder { bool HasOrderedClause = false, omp::WorksharingLoopType LoopType = omp::WorksharingLoopType::ForStaticLoop, - bool NoLoop = false, bool HasDistSchedule = false, - Value *DistScheduleChunkSize = nullptr); + bool NoLoop = false); /// Tile a loop nest. /// diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index be3053c34bc4e..26fdbadafe6a9 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -14,7 +14,6 @@ #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/ADT/SmallBitVector.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AssumptionCache.h" @@ -137,8 +136,6 @@ static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType) { case OMPScheduleType::NomergeOrderedRuntime: case OMPScheduleType::NomergeOrderedAuto: case OMPScheduleType::NomergeOrderedTrapezoidal: - case OMPScheduleType::OrderedDistributeChunked: - case OMPScheduleType::OrderedDistribute: break; default: return false; @@ -185,7 +182,7 @@ static const omp::GV &getGridValue(const Triple &T, Function *Kernel) { /// arguments. static OMPScheduleType getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks, - bool HasSimdModifier, bool HasDistScheduleChunks) { + bool HasSimdModifier) { // Currently, the default schedule it static. switch (ClauseKind) { case OMP_SCHEDULE_Default: @@ -202,9 +199,6 @@ getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks, case OMP_SCHEDULE_Runtime: return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd : OMPScheduleType::BaseRuntime; - case OMP_SCHEDULE_Distribute: - return HasDistScheduleChunks ? OMPScheduleType::BaseDistributeChunked - : OMPScheduleType::BaseDistribute; } llvm_unreachable("unhandled schedule clause argument"); } @@ -273,10 +267,9 @@ getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType, static OMPScheduleType computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasMonotonicModifier, - bool HasNonmonotonicModifier, bool HasOrderedClause, - bool HasDistScheduleChunks) { - OMPScheduleType BaseSchedule = getOpenMPBaseScheduleType( - ClauseKind, HasChunks, HasSimdModifier, HasDistScheduleChunks); + bool HasNonmonotonicModifier, bool HasOrderedClause) { + OMPScheduleType BaseSchedule = + getOpenMPBaseScheduleType(ClauseKind, HasChunks, HasSimdModifier); OMPScheduleType OrderedSchedule = getOpenMPOrderingScheduleType(BaseSchedule, HasOrderedClause); OMPScheduleType Result = getOpenMPMonotonicityScheduleType( @@ -4810,8 +4803,7 @@ static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyStaticWorkshareLoop( DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, - WorksharingLoopType LoopType, bool NeedsBarrier, bool HasDistSchedule, - OMPScheduleType DistScheduleSchedType) { + WorksharingLoopType LoopType, bool NeedsBarrier) { assert(CLI->isValid() && "Requires a valid canonical loop"); assert(!isConflictIP(AllocaIP, CLI->getPreheaderIP()) && "Require dedicated allocate IP"); @@ -4867,29 +4859,15 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyStaticWorkshareLoop( // Call the "init" function and update the trip count of the loop with the // value it produced. - auto BuildInitCall = [LoopType, SrcLoc, ThreadNum, PLastIter, PLowerBound, - PUpperBound, IVTy, PStride, One, Zero, StaticInit, - this](Value *SchedulingType, auto &Builder) { - SmallVector<Value *, 10> Args({SrcLoc, ThreadNum, SchedulingType, PLastIter, - PLowerBound, PUpperBound}); - if (LoopType == WorksharingLoopType::DistributeForStaticLoop) { - Value *PDistUpperBound = - Builder.CreateAlloca(IVTy, nullptr, "p.distupperbound"); - Args.push_back(PDistUpperBound); - } - Args.append({PStride, One, Zero}); - createRuntimeFunctionCall(StaticInit, Args); - }; - BuildInitCall(SchedulingType, Builder); - if (HasDistSchedule && - LoopType != WorksharingLoopType::DistributeStaticLoop) { - Constant *DistScheduleSchedType = ConstantInt::get( - I32Type, static_cast<int>(omp::OMPScheduleType::OrderedDistribute)); - // We want to emit a second init function call for the dist_schedule clause - // to the Distribute construct. This should only be done however if a - // Workshare Loop is nested within a Distribute Construct - BuildInitCall(DistScheduleSchedType, Builder); + SmallVector<Value *, 10> Args( + {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound, PUpperBound}); + if (LoopType == WorksharingLoopType::DistributeForStaticLoop) { + Value *PDistUpperBound = + Builder.CreateAlloca(IVTy, nullptr, "p.distupperbound"); + Args.push_back(PDistUpperBound); } + Args.append({PStride, One, Zero}); + createRuntimeFunctionCall(StaticInit, Args); Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound); Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound); Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound); @@ -4928,44 +4906,14 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyStaticWorkshareLoop( return AfterIP; } -static void addAccessGroupMetadata(BasicBlock *Block, MDNode *AccessGroup, - LoopInfo &LI); -static void addLoopMetadata(CanonicalLoopInfo *Loop, - ArrayRef<Metadata *> Properties); - -static void applyParallelAccessesMetadata(CanonicalLoopInfo *CLI, - LLVMContext &Ctx, Loop *Loop, - LoopInfo &LoopInfo, - SmallVector<Metadata *> &LoopMDList) { - SmallSet<BasicBlock *, 8> Reachable; - - // Get the basic blocks from the loop in which memref instructions - // can be found. - // TODO: Generalize getting all blocks inside a CanonicalizeLoopInfo, - // preferably without running any passes. - for (BasicBlock *Block : Loop->getBlocks()) { - if (Block == CLI->getCond() || Block == CLI->getHeader()) - continue; - Reachable.insert(Block); - } - - // Add access group metadata to memory-access instructions. - MDNode *AccessGroup = MDNode::getDistinct(Ctx, {}); - for (BasicBlock *BB : Reachable) - addAccessGroupMetadata(BB, AccessGroup, LoopInfo); - // TODO: If the loop has existing parallel access metadata, have - // to combine two lists. - LoopMDList.push_back(MDNode::get( - Ctx, {MDString::get(Ctx, "llvm.loop.parallel_accesses"), AccessGroup})); -} - OpenMPIRBuilder::InsertPointOrErrorTy -OpenMPIRBuilder::applyStaticChunkedWorkshareLoop( - DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, - bool NeedsBarrier, Value *ChunkSize, OMPScheduleType SchedType, - Value *DistScheduleChunkSize, OMPScheduleType DistScheduleSchedType) { +OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(DebugLoc DL, + CanonicalLoopInfo *CLI, + InsertPointTy AllocaIP, + bool NeedsBarrier, + Value *ChunkSize) { assert(CLI->isValid() && "Requires a valid canonical loop"); - assert(ChunkSize || DistScheduleChunkSize && "Chunk size is required"); + assert(ChunkSize && "Chunk size is required"); LLVMContext &Ctx = CLI->getFunction()->getContext(); Value *IV = CLI->getIndVar(); @@ -4979,18 +4927,6 @@ OpenMPIRBuilder::applyStaticChunkedWorkshareLoop( Constant *Zero = ConstantInt::get(InternalIVTy, 0); Constant *One = ConstantInt::get(InternalIVTy, 1); - Function *F = CLI->getFunction(); - FunctionAnalysisManager FAM; - FAM.registerPass([]() { return DominatorTreeAnalysis(); }); - FAM.registerPass([]() { return PassInstrumentationAnalysis(); }); - LoopAnalysis LIA; - LoopInfo &&LI = LIA.run(*F, FAM); - Loop *L = LI.getLoopFor(CLI->getHeader()); - SmallVector<Metadata *> LoopMDList; - if (ChunkSize || DistScheduleChunkSize) - applyParallelAccessesMetadata(CLI, Ctx, L, LI, LoopMDList); - addLoopMetadata(CLI, LoopMDList); - // Declare useful OpenMP runtime functions. FunctionCallee StaticInit = getKmpcForStaticInitForType(InternalIVTy, M, *this); @@ -5013,18 +4949,13 @@ OpenMPIRBuilder::applyStaticChunkedWorkshareLoop( Builder.SetCurrentDebugLocation(DL); // TODO: Detect overflow in ubsan or max-out with current tripcount. - Value *CastedChunkSize = Builder.CreateZExtOrTrunc( - ChunkSize ? ChunkSize : Zero, InternalIVTy, "chunksize"); - Value *CastedDistScheduleChunkSize = Builder.CreateZExtOrTrunc( - DistScheduleChunkSize ? DistScheduleChunkSize : Zero, InternalIVTy, - "distschedulechunksize"); + Value *CastedChunkSize = + Builder.CreateZExtOrTrunc(ChunkSize, InternalIVTy, "chunksize"); Value *CastedTripCount = Builder.CreateZExt(OrigTripCount, InternalIVTy, "tripcount"); - Constant *SchedulingType = - ConstantInt::get(I32Type, static_cast<int>(SchedType)); - Constant *DistSchedulingType = - ConstantInt::get(I32Type, static_cast<int>(DistScheduleSchedType)); + Constant *SchedulingType = ConstantInt::get( + I32Type, static_cast<int>(OMPScheduleType::UnorderedStaticChunked)); Builder.CreateStore(Zero, PLowerBound); Value *OrigUpperBound = Builder.CreateSub(CastedTripCount, One); Builder.CreateStore(OrigUpperBound, PUpperBound); @@ -5036,26 +4967,12 @@ OpenMPIRBuilder::applyStaticChunkedWorkshareLoop( Constant *SrcLocStr = getOrCreateSrcLocStr(DL, SrcLocStrSize); Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadNum = getOrCreateThreadID(SrcLoc); - auto BuildInitCall = [StaticInit, SrcLoc, ThreadNum, PLastIter, PLowerBound, - PUpperBound, PStride, One, - this](Value *SchedulingType, Value *ChunkSize, - auto &Builder) { - createRuntimeFunctionCall( - StaticInit, {/*loc=*/SrcLoc, /*global_tid=*/ThreadNum, - /*schedtype=*/SchedulingType, /*plastiter=*/PLastIter, - /*plower=*/PLowerBound, /*pupper=*/PUpperBound, - /*pstride=*/PStride, /*incr=*/One, - /*chunk=*/ChunkSize}); - }; - BuildInitCall(SchedulingType, CastedChunkSize, Builder); - if (DistScheduleSchedType != OMPScheduleType::None && - SchedType != OMPScheduleType::OrderedDistributeChunked && - SchedType != OMPScheduleType::OrderedDistribute) { - // We want to emit a second init function call for the dist_schedule clause - // to the Distribute construct. This should only be done however if a - // Workshare Loop is nested within a Distribute Construct - BuildInitCall(DistSchedulingType, CastedDistScheduleChunkSize, Builder); - } + createRuntimeFunctionCall( + StaticInit, {/*loc=*/SrcLoc, /*global_tid=*/ThreadNum, + /*schedtype=*/SchedulingType, /*plastiter=*/PLastIter, + /*plower=*/PLowerBound, /*pupper=*/PUpperBound, + /*pstride=*/PStride, /*incr=*/One, + /*chunk=*/CastedChunkSize}); // Load values written by the "init" function. Value *FirstChunkStart = @@ -5382,47 +5299,31 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyWorkshareLoop( bool NeedsBarrier, omp::ScheduleKind SchedKind, Value *ChunkSize, bool HasSimdModifier, bool HasMonotonicModifier, bool HasNonmonotonicModifier, bool HasOrderedClause, - WorksharingLoopType LoopType, bool NoLoop, bool HasDistSchedule, - Value *DistScheduleChunkSize) { + WorksharingLoopType LoopType, bool NoLoop) { if (Config.isTargetDevice()) return applyWorkshareLoopTarget(DL, CLI, AllocaIP, LoopType, NoLoop); OMPScheduleType EffectiveScheduleType = computeOpenMPScheduleType( SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier, - HasNonmonotonicModifier, HasOrderedClause, DistScheduleChunkSize); + HasNonmonotonicModifier, HasOrderedClause); bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) == OMPScheduleType::ModifierOrdered; - OMPScheduleType DistScheduleSchedType = OMPScheduleType::None; - if (HasDistSchedule) { - DistScheduleSchedType = DistScheduleChunkSize - ? OMPScheduleType::OrderedDistributeChunked - : OMPScheduleType::OrderedDistribute; - } switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) { case OMPScheduleType::BaseStatic: - case OMPScheduleType::BaseDistribute: - assert(!ChunkSize || !DistScheduleChunkSize && - "No chunk size with static-chunked schedule"); - if (IsOrdered && !HasDistSchedule) + assert(!ChunkSize && "No chunk size with static-chunked schedule"); + if (IsOrdered) return applyDynamicWorkshareLoop(DL, CLI, AllocaIP, EffectiveScheduleType, NeedsBarrier, ChunkSize); // FIXME: Monotonicity ignored? - if (DistScheduleChunkSize) - return applyStaticChunkedWorkshareLoop( - DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType, - DistScheduleChunkSize, DistScheduleSchedType); - return applyStaticWorkshareLoop(DL, CLI, AllocaIP, LoopType, NeedsBarrier, - HasDistSchedule); + return applyStaticWorkshareLoop(DL, CLI, AllocaIP, LoopType, NeedsBarrier); case OMPScheduleType::BaseStaticChunked: - case OMPScheduleType::BaseDistributeChunked: - if (IsOrdered && !HasDistSchedule) + if (IsOrdered) return applyDynamicWorkshareLoop(DL, CLI, AllocaIP, EffectiveScheduleType, NeedsBarrier, ChunkSize); // FIXME: Monotonicity ignored? - return applyStaticChunkedWorkshareLoop( - DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType, - DistScheduleChunkSize, DistScheduleSchedType); + return applyStaticChunkedWorkshareLoop(DL, CLI, AllocaIP, NeedsBarrier, + ChunkSize); case OMPScheduleType::BaseRuntime: case OMPScheduleType::BaseAuto: @@ -6016,8 +5917,8 @@ static void addLoopMetadata(CanonicalLoopInfo *Loop, } /// Attach llvm.access.group metadata to the memref instructions of \p Block -static void addAccessGroupMetadata(BasicBlock *Block, MDNode *AccessGroup, - LoopInfo &LI) { +static void addSimdMetadata(BasicBlock *Block, MDNode *AccessGroup, + LoopInfo &LI) { for (Instruction &I : *Block) { if (I.mayReadOrWriteMemory()) { // TODO: This instruction may already have access group from @@ -6207,8 +6108,16 @@ void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop, // dependences of 'safelen' iterations are possible. // If clause order(concurrent) is specified then the memory instructions // are marked parallel even if 'safelen' is finite. - if ((Safelen == nullptr) || (Order == OrderKind::OMP_ORDER_concurrent)) - applyParallelAccessesMetadata(CanonicalLoop, Ctx, L, LI, LoopMDList); + if ((Safelen == nullptr) || (Order == OrderKind::OMP_ORDER_concurrent)) { + // Add access group metadata to memory-access instructions. + MDNode *AccessGroup = MDNode::getDistinct(Ctx, {}); + for (BasicBlock *BB : Reachable) + addSimdMetadata(BB, AccessGroup, LI); + // TODO: If the loop has existing parallel access metadata, have + // to combine two lists. + LoopMDList.push_back(MDNode::get( + Ctx, {MDString::get(Ctx, "llvm.loop.parallel_accesses"), AccessGroup})); + } // FIXME: the IF clause shares a loop backedge for the SIMD and non-SIMD // versions so we can't add the loop attributes in that case. diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 0d6b2870c625a..1b069c62a8be9 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -617,7 +617,6 @@ parseScheduleClause(OpAsmParser &parser, ClauseScheduleKindAttr &scheduleAttr, break; case ClauseScheduleKind::Auto: case ClauseScheduleKind::Runtime: - case ClauseScheduleKind::Distribute: chunkSize = std::nullopt; } diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index cdab9f87a8758..c31edd31e0f72 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -61,8 +61,6 @@ convertToScheduleKind(std::optional<omp::ClauseScheduleKind> schedKind) { return llvm::omp::OMP_SCHEDULE_Auto; case omp::ClauseScheduleKind::Runtime: return llvm::omp::OMP_SCHEDULE_Runtime; - case omp::ClauseScheduleKind::Distribute: - return llvm::omp::OMP_SCHEDULE_Distribute; } llvm_unreachable("unhandled schedule clause argument"); } @@ -321,6 +319,10 @@ static LogicalResult checkImplementationStatus(Operation &op) { if (op.getDevice()) result = todo("device"); }; + auto checkDistSchedule = [&todo](auto op, LogicalResult &result) { + if (op.getDistScheduleChunkSize()) + result = todo("dist_schedule with chunk_size"); + }; auto checkHint = [](auto op, LogicalResult &) { if (op.getHint()) op.emitWarning("hint clause discarded"); @@ -385,6 +387,7 @@ static LogicalResult checkImplementationStatus(Operation &op) { }) .Case([&](omp::DistributeOp op) { checkAllocate(op, result); + checkDistSchedule(op, result); checkOrder(op, result); }) .Case([&](omp::OrderedRegionOp op) { checkParLevelSimd(op, result); }) @@ -2545,19 +2548,6 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, chunk = builder.CreateSExtOrTrunc(chunkVar, ivType); } - omp::DistributeOp distributeOp = nullptr; - llvm::Value *distScheduleChunk = nullptr; - bool hasDistSchedule = false; - if (llvm::isa_and_present<omp::DistributeOp>(opInst.getParentOp())) { - distributeOp = cast<omp::DistributeOp>(opInst.getParentOp()); - hasDistSchedule = distributeOp.getDistScheduleStatic(); - if (distributeOp.getDistScheduleChunkSize()) { - llvm::Value *chunkVar = moduleTranslation.lookupValue( - distributeOp.getDistScheduleChunkSize()); - distScheduleChunk = builder.CreateSExtOrTrunc(chunkVar, ivType); - } - } - PrivateVarsInfo privateVarsInfo(wsloopOp); SmallVector<omp::DeclareReductionOp> reductionDecls; @@ -2685,7 +2675,7 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, convertToScheduleKind(schedule), chunk, isSimd, scheduleMod == omp::ScheduleModifier::monotonic, scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered, - workshareLoopType, noLoopMode, hasDistSchedule, distScheduleChunk); + workshareLoopType, noLoopMode); if (failed(handleError(wsloopIP, opInst))) return failure(); @@ -5276,18 +5266,15 @@ convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder, if (!isa_and_present<omp::WsloopOp>(distributeOp.getNestedWrapper())) { // TODO: Add support for clauses which are valid for DISTRIBUTE // constructs. Static schedule is the default. - bool hasDistSchedule = distributeOp.getDistScheduleStatic(); - auto schedule = hasDistSchedule ? omp::ClauseScheduleKind::Distribute - : omp::ClauseScheduleKind::Static; - // dist_schedule clauses are ordered - otherise this should be false - bool isOrdered = hasDistSchedule; + auto schedule = omp::ClauseScheduleKind::Static; + bool isOrdered = false; std::optional<omp::ScheduleModifier> scheduleMod; bool isSimd = false; llvm::omp::WorksharingLoopType workshareLoopType = llvm::omp::WorksharingLoopType::DistributeStaticLoop; bool loopNeedsBarrier = false; - llvm::Value *chunk = moduleTranslation.lookupValue( - distributeOp.getDistScheduleChunkSize()); + llvm::Value *chunk = nullptr; + llvm::CanonicalLoopInfo *loopInfo = findCurrentLoopInfo(moduleTranslation); llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP = @@ -5296,11 +5283,12 @@ convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder, convertToScheduleKind(schedule), chunk, isSimd, scheduleMod == omp::ScheduleModifier::monotonic, scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered, - workshareLoopType, false, hasDistSchedule, chunk); + workshareLoopType); if (!wsloopIP) return wsloopIP.takeError(); } + if (failed(cleanupPrivateVars(builder, moduleTranslation, distributeOp.getLoc(), privVarsInfo.llvmVars, privVarsInfo.privatizers))) diff --git a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir index b122f425f0752..f2fbe91a41ecd 100644 --- a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir +++ b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir @@ -615,22 +615,3 @@ omp.declare_mapper @my_mapper : !llvm.struct<"_QFdeclare_mapperTmy_type", (i32)> // CHECK: omp.declare_mapper.info map_entries(%{{.*}}, %{{.*}} : !llvm.ptr, !llvm.ptr) omp.declare_mapper.info map_entries(%3, %2 : !llvm.ptr, !llvm.ptr) } - -// CHECK-LABEL: llvm.func @omp_dist_schedule(%arg0: i32) { -func.func @omp_dist_schedule(%arg0: i32) { - %c1_i32 = arith.constant 1 : i32 - // CHECK: %1 = llvm.mlir.constant(1024 : i32) : i32 - %c1024_i32 = arith.constant 1024 : i32 - %c16_i32 = arith.constant 16 : i32 - %c8_i32 = arith.constant 8 : i32 - omp.teams num_teams( to %c8_i32 : i32) thread_limit(%c16_i32 : i32) { - // CHECK: omp.distribute dist_schedule_static dist_schedule_chunk_size(%1 : i32) { - omp.distribute dist_schedule_static dist_schedule_chunk_size(%c1024_i32 : i32) { - omp.loop_nest (%arg1) : i32 = (%c1_i32) to (%arg0) inclusive step (%c1_i32) { - omp.terminator - } - } - omp.terminator - } - return -} diff --git a/mlir/test/Target/LLVMIR/openmp-dist_schedule.mlir b/mlir/test/Target/LLVMIR/openmp-dist_schedule.mlir deleted file mode 100644 index a0dd5560a15c4..0000000000000 --- a/mlir/test/Target/LLVMIR/openmp-dist_schedule.mlir +++ /dev/null @@ -1,34 +0,0 @@ -// Test that dist_schedule gets correctly translated with the correct schedule type and chunk size where appropriate - -// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s - -llvm.func @distribute_dist_schedule_chunk_size(%lb : i32, %ub : i32, %step : i32, %x : i32) { - // CHECK: call void @[[RUNTIME_FUNC:__kmpc_for_static_init_4u]](ptr @1, i32 %omp_global_thread_num, i32 91, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i32 1, i32 1024) - // We want to make sure that the next call is not another init builder. - // CHECK-NOT: call void @[[RUNTIME_FUNC]] - %1 = llvm.mlir.constant(1024: i32) : i32 - omp.distribute dist_schedule_static dist_schedule_chunk_size(%1 : i32) { - omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) { - omp.yield - } - } - llvm.return -} - -// When a chunk size is present, we need to make sure the correct parallel accesses metadata is added -// CHECK: !2 = !{!"llvm.loop.parallel_accesses", !3} -// CHECK-NEXT: !3 = distinct !{} - -// ----- - -llvm.func @distribute_dist_schedule(%lb : i32, %ub : i32, %step : i32, %x : i32) { - // CHECK: call void @[[RUNTIME_FUNC:__kmpc_for_static_init_4u]](ptr @1, i32 %omp_global_thread_num, i32 92, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i32 1, i32 0) - // We want to make sure that the next call is not another init builder. - // CHECK-NOT: call void @[[RUNTIME_FUNC]] - omp.distribute dist_schedule_static { - omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) { - omp.yield - } - } - llvm.return -} diff --git a/mlir/test/Target/LLVMIR/openmp-dist_schedule_with_wsloop.mlir b/mlir/test/Target/LLVMIR/openmp-dist_schedule_with_wsloop.mlir deleted file mode 100644 index dad32b48e5419..0000000000000 --- a/mlir/test/Target/LLVMIR/openmp-dist_schedule_with_wsloop.mlir +++ /dev/null @@ -1,205 +0,0 @@ -// Test that dist_schedule gets correctly translated with the correct schedule type and chunk size where appropriate while using workshare loops. - -// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s - -llvm.func @distribute_wsloop_dist_schedule_chunked_schedule_chunked(%n: i32, %teams: i32, %threads: i32, %dcs: i32) { - %0 = llvm.mlir.constant(0 : i32) : i32 - %1 = llvm.mlir.constant(1 : i32) : i32 - %scs = llvm.mlir.constant(64 : i32) : i32 - - omp.teams num_teams(to %teams : i32) thread_limit(%threads : i32) { - omp.parallel { - omp.distribute dist_schedule_static dist_schedule_chunk_size(%dcs : i32) { - omp.wsloop schedule(static = %scs : i32) { - omp.loop_nest (%i) : i32 = (%0) to (%n) step (%1) { - omp.yield - } - } {omp.composite} - } {omp.composite} - omp.terminator - } {omp.composite} - omp.terminator - } - llvm.return -} -// CHECK: define internal void @distribute_wsloop_dist_schedule_chunked_schedule_chunked..omp_par(ptr noalias %tid.addr, ptr noalias %zero.addr, ptr %0) #0 { -// CHECK: call void @__kmpc_for_static_init_4u(ptr @1, i32 %omp_global_thread_num9, i32 33, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i32 1, i32 64) -// CHECK: call void @__kmpc_for_static_init_4u(ptr @1, i32 %omp_global_thread_num9, i32 91, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i32 1, i32 %3) - -llvm.func @distribute_wsloop_dist_schedule_chunked_schedule_chunked_i64(%n: i32, %teams: i32, %threads: i32) { - %0 = llvm.mlir.constant(0 : i64) : i64 - %1 = llvm.mlir.constant(1 : i64) : i64 - %dcs = llvm.mlir.constant(1024 : i64) : i64 - %scs = llvm.mlir.constant(64 : i64) : i64 - %n64 = llvm.zext %n : i32 to i64 - - omp.teams num_teams(to %teams : i32) thread_limit(%threads : i32) { - omp.parallel { - omp.distribute dist_schedule_static dist_schedule_chunk_size(%dcs : i64) { - omp.wsloop schedule(static = %scs : i64) { - omp.loop_nest (%i) : i64 = (%0) to (%n64) step (%1) { - omp.yield - } - } {omp.composite} - } {omp.composite} - omp.terminator - } {omp.composite} - omp.terminator - } - llvm.return -} -// CHECK: define internal void @distribute_wsloop_dist_schedule_chunked_schedule_chunked_i64..omp_par(ptr noalias %tid.addr, ptr noalias %zero.addr, ptr %0) #0 { -// CHECK: call void @__kmpc_for_static_init_8u(ptr @1, i32 %omp_global_thread_num9, i32 33, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i64 1, i64 64) -// call void @__kmpc_for_static_init_8u(ptr @1, i32 %omp_global_thread_num9, i32 91, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i64 1, i64 1024) - -// ----- - -llvm.func @distribute_wsloop_dist_schedule_chunked(%n: i32, %teams: i32, %threads: i32) { - %0 = llvm.mlir.constant(0 : i32) : i32 - %1 = llvm.mlir.constant(1 : i32) : i32 - %dcs = llvm.mlir.constant(1024 : i32) : i32 - - omp.teams num_teams(to %teams : i32) thread_limit(%threads : i32) { - omp.parallel { - omp.distribute dist_schedule_static dist_schedule_chunk_size(%dcs : i32) { - omp.wsloop schedule(static) { - omp.loop_nest (%i) : i32 = (%0) to (%n) step (%1) { - omp.yield - } - } {omp.composite} - } {omp.composite} - omp.terminator - } {omp.composite} - omp.terminator - } - llvm.return -} -// CHECK: define internal void @distribute_wsloop_dist_schedule_chunked..omp_par(ptr noalias %tid.addr, ptr noalias %zero.addr, ptr %0) #0 { -// CHECK: call void @__kmpc_for_static_init_4u(ptr @1, i32 %omp_global_thread_num9, i32 34, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i32 1, i32 0) -// CHECK: call void @__kmpc_for_static_init_4u(ptr @1, i32 %omp_global_thread_num9, i32 91, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i32 1, i32 1024) - -llvm.func @distribute_wsloop_dist_schedule_chunked_i64(%n: i32, %teams: i32, %threads: i32) { - %0 = llvm.mlir.constant(0 : i64) : i64 - %1 = llvm.mlir.constant(1 : i64) : i64 - %dcs = llvm.mlir.constant(1024 : i64) : i64 - %n64 = llvm.zext %n : i32 to i64 - - omp.teams num_teams(to %teams : i32) thread_limit(%threads : i32) { - omp.parallel { - omp.distribute dist_schedule_static dist_schedule_chunk_size(%dcs : i64) { - omp.wsloop schedule(static) { - omp.loop_nest (%i) : i64 = (%0) to (%n64) step (%1) { - omp.yield - } - } {omp.composite} - } {omp.composite} - omp.terminator - } {omp.composite} - omp.terminator - } - llvm.return -} -// CHECK: define internal void @distribute_wsloop_dist_schedule_chunked_i64..omp_par(ptr noalias %tid.addr, ptr noalias %zero.addr, ptr %0) #0 { -// CHECK: call void @__kmpc_for_static_init_8u(ptr @1, i32 %omp_global_thread_num9, i32 34, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i64 1, i64 0) -// CHECK: call void @__kmpc_for_static_init_8u(ptr @1, i32 %omp_global_thread_num9, i32 91, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i64 1, i64 1024) - -// ----- - -llvm.func @distribute_wsloop_schedule_chunked(%n: i32, %teams: i32, %threads: i32) { - %0 = llvm.mlir.constant(0 : i32) : i32 - %1 = llvm.mlir.constant(1 : i32) : i32 - %scs = llvm.mlir.constant(64 : i32) : i32 - - omp.teams num_teams(to %teams : i32) thread_limit(%threads : i32) { - omp.parallel { - omp.distribute dist_schedule_static { - omp.wsloop schedule(static = %scs : i32) { - omp.loop_nest (%i) : i32 = (%0) to (%n) step (%1) { - omp.yield - } - } {omp.composite} - } {omp.composite} - omp.terminator - } {omp.composite} - omp.terminator - } - llvm.return -} -// CHECK: define internal void @distribute_wsloop_schedule_chunked..omp_par(ptr noalias %tid.addr, ptr noalias %zero.addr, ptr %0) #0 { -// CHECK: call void @__kmpc_for_static_init_4u(ptr @1, i32 %omp_global_thread_num9, i32 33, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i32 1, i32 64) -// CHECK: call void @__kmpc_for_static_init_4u(ptr @1, i32 %omp_global_thread_num9, i32 92, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i32 1, i32 0) - -llvm.func @distribute_wsloop_schedule_chunked_i64(%n: i32, %teams: i32, %threads: i32) { - %0 = llvm.mlir.constant(0 : i64) : i64 - %1 = llvm.mlir.constant(1 : i64) : i64 - %scs = llvm.mlir.constant(64 : i64) : i64 - %n64 = llvm.zext %n : i32 to i64 - - omp.teams num_teams(to %teams : i32) thread_limit(%threads : i32) { - omp.parallel { - omp.distribute dist_schedule_static { - omp.wsloop schedule(static = %scs : i64) { - omp.loop_nest (%i) : i64 = (%0) to (%n64) step (%1) { - omp.yield - } - } {omp.composite} - } {omp.composite} - omp.terminator - } {omp.composite} - omp.terminator - } - llvm.return -} - -// CHECK: define internal void @distribute_wsloop_schedule_chunked_i64..omp_par(ptr noalias %tid.addr, ptr noalias %zero.addr, ptr %0) #0 { -// CHECK: call void @__kmpc_for_static_init_8u(ptr @1, i32 %omp_global_thread_num9, i32 33, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i64 1, i64 64) -// CHECK: call void @__kmpc_for_static_init_8u(ptr @1, i32 %omp_global_thread_num9, i32 92, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i64 1, i64 0) - -// ----- - -llvm.func @distribute_wsloop_no_chunks(%n: i32, %teams: i32, %threads: i32) { - %0 = llvm.mlir.constant(0 : i32) : i32 - %1 = llvm.mlir.constant(1 : i32) : i32 - - omp.teams num_teams(to %teams : i32) thread_limit(%threads : i32) { - omp.parallel { - omp.distribute dist_schedule_static { - omp.wsloop schedule(static) { - omp.loop_nest (%i) : i32 = (%0) to (%n) step (%1) { - omp.yield - } - } {omp.composite} - } {omp.composite} - omp.terminator - } {omp.composite} - omp.terminator - } - llvm.return -} -// CHECK: define internal void @distribute_wsloop_no_chunks..omp_par(ptr noalias %tid.addr, ptr noalias %zero.addr, ptr %0) #0 { -// CHECK: call void @__kmpc_dist_for_static_init_4u(ptr @1, i32 %omp_global_thread_num9, i32 34, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.distupperbound, ptr %p.stride, i32 1, i32 0) -// CHECK: call void @__kmpc_dist_for_static_init_4u(ptr @1, i32 %omp_global_thread_num9, i32 92, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.distupperbound10, ptr %p.stride, i32 1, i32 0) - -llvm.func @distribute_wsloop_no_chunks_i64(%n: i32, %teams: i32, %threads: i32) { - %0 = llvm.mlir.constant(0 : i64) : i64 - %1 = llvm.mlir.constant(1 : i64) : i64 - %n64 = llvm.zext %n : i32 to i64 - - omp.teams num_teams(to %teams : i32) thread_limit(%threads : i32) { - omp.parallel { - omp.distribute dist_schedule_static { - omp.wsloop schedule(static) { - omp.loop_nest (%i) : i64 = (%0) to (%n64) step (%1) { - omp.yield - } - } {omp.composite} - } {omp.composite} - omp.terminator - } {omp.composite} - omp.terminator - } - llvm.return -} -// CHECK: define internal void @distribute_wsloop_no_chunks_i64..omp_par(ptr noalias %tid.addr, ptr noalias %zero.addr, ptr %0) #0 { -// CHECK: call void @__kmpc_dist_for_static_init_8u(ptr @1, i32 %omp_global_thread_num9, i32 34, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.distupperbound, ptr %p.stride, i64 1, i64 0) -// CHECK: call void @__kmpc_dist_for_static_init_8u(ptr @1, i32 %omp_global_thread_num9, i32 92, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.distupperbound10, ptr %p.stride, i64 1, i64 0) \ No newline at end of file diff --git a/mlir/test/Target/LLVMIR/openmp-todo.mlir b/mlir/test/Target/LLVMIR/openmp-todo.mlir index 731a6322736d4..af6d254cfd3c3 100644 --- a/mlir/test/Target/LLVMIR/openmp-todo.mlir +++ b/mlir/test/Target/LLVMIR/openmp-todo.mlir @@ -39,6 +39,19 @@ llvm.func @distribute_allocate(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr // ----- +llvm.func @distribute_dist_schedule(%lb : i32, %ub : i32, %step : i32, %x : i32) { + // expected-error@below {{not yet implemented: Unhandled clause dist_schedule with chunk_size in omp.distribute operation}} + // expected-error@below {{LLVM Translation failed for operation: omp.distribute}} + omp.distribute dist_schedule_static dist_schedule_chunk_size(%x : i32) { + omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) { + omp.yield + } + } + llvm.return +} + +// ----- + llvm.func @distribute_order(%lb : i32, %ub : i32, %step : i32) { // expected-error@below {{not yet implemented: Unhandled clause order in omp.distribute operation}} // expected-error@below {{LLVM Translation failed for operation: omp.distribute}} _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
