https://github.com/skatrak updated https://github.com/llvm/llvm-project/pull/198783
>From bec04650909a703721c9e9bda35e4f03f98740b7 Mon Sep 17 00:00:00 2001 From: Sergio Afonso <[email protected]> Date: Wed, 20 May 2026 13:06:54 +0100 Subject: [PATCH 1/2] [Flang][OpenMP] Add combined construct information This patch adds the `omp.combined` attribute to OpenMP dialect operations following changes to the `ComposableOpInterface`. This attribute is added to operations representing non-innermost leaf constructs of a combined construct and to standalone block-associated constructs that can be combined with their parent construct. Changes are made to the OpenMP lowering logic, as well as the do-concurrent, workshare and workdistribute transformation passes. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 76 +- .../OpenMP/DoConcurrentConversion.cpp | 8 +- .../Optimizer/OpenMP/LowerWorkdistribute.cpp | 6 +- flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 7 + .../OpenMP/workshare-array-array-assign.f90 | 3 +- .../Integration/OpenMP/workshare-axpy.f90 | 3 +- .../OpenMP/workshare-forall-sliced-array.f90 | 3 +- .../OpenMP/workshare-scalar-array-assign.f90 | 3 +- .../OpenMP/workshare-scalar-array-mul.f90 | 4 + flang/test/Lower/OpenMP/compound.f90 | 1094 +++++++++++++++++ .../Lower/OpenMP/multiple-entry-points.f90 | 1 + flang/test/Lower/OpenMP/workshare.f90 | 6 +- .../Transforms/DoConcurrent/basic_device.f90 | 6 +- .../Transforms/DoConcurrent/basic_host.f90 | 2 +- .../Transforms/DoConcurrent/basic_host.mlir | 2 +- .../Transforms/DoConcurrent/local_device.mlir | 10 +- .../locality_specifiers_simple.mlir | 2 +- .../DoConcurrent/non_const_bounds.f90 | 2 +- .../Transforms/DoConcurrent/reduce_add.mlir | 2 +- .../DoConcurrent/reduce_all_regions.mlir | 2 +- .../DoConcurrent/reduce_device.mlir | 8 +- .../DoConcurrent/reduce_device_min.f90 | 4 +- .../Transforms/DoConcurrent/reduce_local.mlir | 2 +- .../reduction_symbol_resultion.f90 | 4 +- .../DoConcurrent/runtime_sized_array.f90 | 2 +- .../DoConcurrent/skip_all_nested_loops.f90 | 2 +- .../DoConcurrent/use_loop_bounds_in_body.f90 | 12 +- .../OpenMP/lower-workdistribute-doloop.mlir | 2 +- .../lower-workdistribute-fission-host.mlir | 4 +- .../lower-workdistribute-fission-target.mlir | 4 +- .../OpenMP/lower-workdistribute-fission.mlir | 2 +- ...-workdistribute-runtime-assign-scalar.mlir | 4 +- .../omp-function-filtering-todo.mlir | 4 +- 33 files changed, 1225 insertions(+), 71 deletions(-) create mode 100644 flang/test/Lower/OpenMP/compound.f90 diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index e9a8960298c2e..d0e4acd13be85 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -71,6 +71,25 @@ static void genOMPDispatch(lower::AbstractConverter &converter, const ConstructQueue &queue, ConstructQueue::const_iterator item); +/// Return the directive that is immediately nested inside of the given +/// \c parent evaluation, if it is its only non-end-statement nested evaluation +/// and it represents an OpenMP construct. +lower::pft::Evaluation * +extractOnlyOmpNestedEval(lower::pft::Evaluation &parent) { + if (!parent.hasNestedEvaluations()) + return nullptr; + + auto &nested{parent.getFirstNestedEvaluation()}; + if (!nested.isA<parser::OpenMPConstruct>()) + return nullptr; + + for (auto &sibling : parent.getNestedEvaluations()) + if (&sibling != &nested && !sibling.isEndStmt()) + return nullptr; + + return &nested; +} + static llvm::SmallVector<Object> makeObjects(llvm::ArrayRef<const semantics::Symbol *> syms) { llvm::SmallVector<Object> objects; @@ -386,25 +405,6 @@ class DirectivePatternVisitor { } } - /// Return the directive that is immediately nested inside of the given - /// \c parent evaluation, if it is its only non-end-statement nested - /// evaluation and it represents an OpenMP construct. - lower::pft::Evaluation * - extractOnlyOmpNestedEval(lower::pft::Evaluation &parent) { - if (!parent.hasNestedEvaluations()) - return nullptr; - - auto &nested{parent.getFirstNestedEvaluation()}; - if (!nested.isA<parser::OpenMPConstruct>()) - return nullptr; - - for (auto &sibling : parent.getNestedEvaluations()) - if (&sibling != &nested && !sibling.isEndStmt()) - return nullptr; - - return &nested; - } - protected: semantics::SemanticsContext &semaCtx; @@ -4307,7 +4307,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter, // Lowered in the enclosing genSectionsOp. break; case llvm::omp::Directive::OMPD_sections: - genSectionsOp(converter, symTable, semaCtx, eval, loc, queue, item); + newOp = genSectionsOp(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_simd: newOp = @@ -4394,6 +4394,42 @@ static void genOMPDispatch(lower::AbstractConverter &converter, finalizeStmtCtx(); if (loopLeaf) symTable.popScope(); + + // Add the omp.combined attribute to eligible ops. In this case, all + // composable ops that are not loop-associated, except for the ones that can + // only appear as the innermost leaf construct. + if (!loopLeaf && + llvm::isa_and_present<mlir::omp::ComposableOpInterface>(newOp) && + !llvm::isa<mlir::omp::SectionsOp, mlir::omp::WorkshareOp, + mlir::omp::WorkdistributeOp>(newOp)) { + bool isCombined = false; + if (std::next(item) != queue.end()) { + // Non-innermost leafs of a combined construct must always hold the + // attribute. + isCombined = true; + } else if (lower::pft::Evaluation *nestedEval = + extractOnlyOmpNestedEval(eval)) { + // Combinable constructs that are immediately nested with no other + // statements or directives preventing them from being combined need the + // attribute as well. + OmpDirectiveSet combinableDirs = + (llvm::omp::blockConstructSet & + ~OmpDirectiveSet{llvm::omp::Directive::OMPD_ordered, + llvm::omp::Directive::OMPD_scope, + llvm::omp::Directive::OMPD_taskgroup}) | + (llvm::omp::loopConstructSet & ~llvm::omp::loopTransformationSet); + const auto &ompEval = nestedEval->get<parser::OpenMPConstruct>(); + llvm::omp::Directive nestedDir = + parser::omp::GetOmpDirectiveName(ompEval).v; + llvm::omp::Directive firstLeafDir = + llvm::omp::getLeafConstructsOrSelf(nestedDir).front(); + + if (combinableDirs.test(firstLeafDir)) + isCombined = true; + } + if (isCombined) + llvm::cast<mlir::omp::ComposableOpInterface>(newOp).setCombined(true); + } } //===----------------------------------------------------------------------===// diff --git a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp index 950a64fc13d57..945250daf1dbe 100644 --- a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp +++ b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp @@ -326,14 +326,18 @@ class DoConcurrentConversion targetOp = genTargetOp(doLoop.getLoc(), rewriter, mapper, loopNestLiveIns, targetClauseOps, loopNestClauseOps, liveInShapeInfoMap); - genTeamsOp(rewriter, loop, mapper); + auto teamsOp = genTeamsOp(rewriter, loop, mapper); + targetOp.setCombined(true); + teamsOp.setCombined(true); } mlir::omp::ParallelOp parallelOp = genParallelOp(rewriter, loop, ivInfos, mapper); - // Only set as composite when part of `distribute parallel do`. + // Only set as composite when part of `distribute parallel do`, and only set + // as combined when part of `parallel do`. parallelOp.setComposite(mapToDevice); + parallelOp.setCombined(!mapToDevice); if (!mapToDevice) genLoopNestClauseOps(doLoop.getLoc(), rewriter, loop, loopNestClauseOps); diff --git a/flang/lib/Optimizer/OpenMP/LowerWorkdistribute.cpp b/flang/lib/Optimizer/OpenMP/LowerWorkdistribute.cpp index 3274e04179d33..c2de2499f7d10 100644 --- a/flang/lib/Optimizer/OpenMP/LowerWorkdistribute.cpp +++ b/flang/lib/Optimizer/OpenMP/LowerWorkdistribute.cpp @@ -274,8 +274,10 @@ fissionWorkdistribute(omp::WorkdistributeOp workdistribute) { } if (parallelize && hoisted.empty() && - parallelize->getNextNode() == terminator) + parallelize->getNextNode() == terminator) { + teams.setCombined(true); break; + } if (parallelize) { auto newTeams = rewriter.cloneWithoutRegions(teams); auto *newTeamsBlock = rewriter.createBlock( @@ -290,6 +292,7 @@ fissionWorkdistribute(omp::WorkdistributeOp workdistribute) { parallelize->replaceAllUsesWith(cloned); parallelize->erase(); omp::TerminatorOp::create(rewriter, loc); + newTeams.setCombined(true); changed = true; } } @@ -1591,6 +1594,7 @@ genIsolatedTargetOp(omp::TargetOp targetOp, SmallVector<Value> &postMapOperands, targetOp.getThreadLimitVars(), targetOp.getPrivateMapsAttr(), omp::TargetExecModeAttr::get(targetOp->getContext(), omp::TargetExecMode::spmd)); + isolatedTargetOp.setCombined(true); auto *isolatedTargetBlock = rewriter.createBlock(&isolatedTargetOp.getRegion(), isolatedTargetOp.getRegion().begin(), {}, {}); diff --git a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp index a41d8d8826501..b8231bc35c999 100644 --- a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp +++ b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp @@ -612,6 +612,13 @@ LogicalResult lowerWorkshare(mlir::omp::WorkshareOp wsOp, DominanceInfo &di) { term->erase(); newOp->erase(); wsOp->erase(); + + // If this was part of a combined construct (e.g. 'parallel workshare'), the + // changes we just made to the region can be incompatible with a combined + // construct, such as containing multiple block-associated constructs in it. + if (auto parentOp = + dyn_cast<omp::ComposableOpInterface>(parentBlock->getParentOp())) + parentOp.setCombined(false); } else { // Otherwise just change the operation to an omp.single. diff --git a/flang/test/Integration/OpenMP/workshare-array-array-assign.f90 b/flang/test/Integration/OpenMP/workshare-array-array-assign.f90 index e9ec5d9175beb..3ccb46ebeebea 100644 --- a/flang/test/Integration/OpenMP/workshare-array-array-assign.f90 +++ b/flang/test/Integration/OpenMP/workshare-array-array-assign.f90 @@ -23,7 +23,7 @@ subroutine sb1(x, y) ! HLFIR: omp.terminator ! HLFIR: } ! HLFIR: omp.terminator -! HLFIR: } +! HLFIR: } {omp.combined} ! FIR: omp.parallel { ! FIR: omp.wsloop nowait { @@ -32,3 +32,4 @@ subroutine sb1(x, y) ! FIR: omp.barrier ! FIR: omp.terminator ! FIR: } +! FIR-NOT: omp.combined diff --git a/flang/test/Integration/OpenMP/workshare-axpy.f90 b/flang/test/Integration/OpenMP/workshare-axpy.f90 index 846bef5f5082c..3bbbb159de707 100644 --- a/flang/test/Integration/OpenMP/workshare-axpy.f90 +++ b/flang/test/Integration/OpenMP/workshare-axpy.f90 @@ -32,7 +32,7 @@ subroutine sb1(a, x, y, z) ! HLFIR: } ! HLFIR-NOT: omp.barrier ! HLFIR: omp.terminator -! HLFIR: } +! HLFIR: } {omp.combined} ! HLFIR: return ! HLFIR: } ! HLFIR:} @@ -53,3 +53,4 @@ subroutine sb1(a, x, y, z) ! FIR: omp.barrier ! FIR: omp.terminator ! FIR: } +! FIR-NOT:omp.combined diff --git a/flang/test/Integration/OpenMP/workshare-forall-sliced-array.f90 b/flang/test/Integration/OpenMP/workshare-forall-sliced-array.f90 index 88d1062b091bf..e841213c2f1bf 100644 --- a/flang/test/Integration/OpenMP/workshare-forall-sliced-array.f90 +++ b/flang/test/Integration/OpenMP/workshare-forall-sliced-array.f90 @@ -36,7 +36,7 @@ subroutine workshare_forall_sliced(a1) ! HLFIR: omp.terminator ! HLFIR: } ! HLFIR: omp.terminator -! HLFIR: } +! HLFIR: } {omp.combined} ! After workshare lowering, the forall should be in omp.single (since it ! contains operations that are not safe to parallelize across threads). @@ -51,6 +51,7 @@ subroutine workshare_forall_sliced(a1) ! FIR: omp.barrier ! FIR: omp.terminator ! FIR: } +! FIR-NOT: omp.combined ! Verify LLVM IR is generated successfully (the original issue caused crashes) ! LLVM-LABEL: define {{.*}}workshare_forall_sliced diff --git a/flang/test/Integration/OpenMP/workshare-scalar-array-assign.f90 b/flang/test/Integration/OpenMP/workshare-scalar-array-assign.f90 index 6c180cd639997..43e6cc4bef7b7 100644 --- a/flang/test/Integration/OpenMP/workshare-scalar-array-assign.f90 +++ b/flang/test/Integration/OpenMP/workshare-scalar-array-assign.f90 @@ -24,7 +24,7 @@ subroutine sb1(a, x) ! HLFIR: omp.terminator ! HLFIR: } ! HLFIR: omp.terminator -! HLFIR: } +! HLFIR: } {omp.combined} ! FIR: omp.parallel { ! FIR: %[[SCALAR_ALLOCA:.*]] = fir.alloca i32 @@ -43,3 +43,4 @@ subroutine sb1(a, x) ! FIR: } ! FIR: omp.barrier ! FIR: omp.terminator +! FIR-NOT: omp.combined diff --git a/flang/test/Integration/OpenMP/workshare-scalar-array-mul.f90 b/flang/test/Integration/OpenMP/workshare-scalar-array-mul.f90 index 9b8ef66b48f47..1a9c9a031d9c4 100644 --- a/flang/test/Integration/OpenMP/workshare-scalar-array-mul.f90 +++ b/flang/test/Integration/OpenMP/workshare-scalar-array-mul.f90 @@ -26,12 +26,14 @@ program test ! HLFIR-O3: hlfir.destroy ! HLFIR-O3: omp.terminator ! HLFIR-O3: omp.terminator +! HLFIR-O3: omp.combined ! FIR-O3: omp.parallel { ! FIR-O3: omp.wsloop nowait { ! FIR-O3: omp.loop_nest ! FIR-O3: omp.barrier ! FIR-O3: omp.terminator +! FIR-O3-NOT:omp.combined ! HLFIR-O0: omp.parallel { ! HLFIR-O0: omp.workshare { @@ -40,6 +42,7 @@ program test ! HLFIR-O0: hlfir.destroy ! HLFIR-O0: omp.terminator ! HLFIR-O0: omp.terminator +! HLFIR-O0: omp.combined ! Check the copyprivate copy function ! FIR-O0: func.func private @_workshare_copy_heap_{{.*}}(%[[DST:.*]]: {{.*}}, %[[SRC:.*]]: {{.*}}) @@ -63,3 +66,4 @@ program test ! FIR-O0: omp.terminator ! FIR-O0: omp.barrier ! FIR-O0: omp.terminator +! FIR-O0-NOT:omp.combined diff --git a/flang/test/Lower/OpenMP/compound.f90 b/flang/test/Lower/OpenMP/compound.f90 new file mode 100644 index 0000000000000..d61745345a640 --- /dev/null +++ b/flang/test/Lower/OpenMP/compound.f90 @@ -0,0 +1,1094 @@ +! This test checks lowering of compound (combined and composite) constructs. +! Specifically, it makes sure that the proper ComposableOpInterface attributes +! are set. + +! RUN: bbc -fopenmp -fopenmp-version=60 -emit-hlfir %s -o - | FileCheck %s +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=60 %s -o - | FileCheck %s + +! ------------------------------------------------------------------------------ +! COMPOSITE CONSTRUCTS +! ------------------------------------------------------------------------------ + +subroutine distribute_parallel_do() + implicit none + integer :: i + + !$omp teams + !$omp distribute parallel do + do i=1, 10 + end do + !$omp end teams +end subroutine + +! CHECK-LABEL: func.func @_QPdistribute_parallel_do +! CHECK: omp.parallel +! CHECK: omp.distribute +! CHECK-NEXT: omp.wsloop +! CHECK-NEXT: omp.loop_nest +! CHECK: omp.yield +! CHECK-NEXT: } +! CHECK-NEXT: } {{{.*}}omp.composite{{.*}}} +! CHECK-NEXT: } {{{.*}}omp.composite{{.*}}} +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.composite{{.*}}} + +subroutine distribute_parallel_do_simd() + implicit none + integer :: i + + !$omp teams + !$omp distribute parallel do simd + do i=1, 10 + end do + !$omp end teams +end subroutine + +! CHECK-LABEL: func.func @_QPdistribute_parallel_do_simd +! CHECK: omp.parallel +! CHECK: omp.distribute +! CHECK-NEXT: omp.wsloop +! CHECK-NEXT: omp.simd +! CHECK-NEXT: omp.loop_nest +! CHECK: omp.yield +! CHECK-NEXT: } +! CHECK-NEXT: } {{{.*}}omp.composite{{.*}}} +! CHECK-NEXT: } {{{.*}}omp.composite{{.*}}} +! CHECK-NEXT: } {{{.*}}omp.composite{{.*}}} +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.composite{{.*}}} + +subroutine distribute_simd() + implicit none + integer :: i + + !$omp teams + !$omp distribute simd + do i=1, 10 + end do + !$omp end teams +end subroutine + +! CHECK-LABEL: func.func @_QPdistribute_simd +! CHECK: omp.distribute +! CHECK-NEXT: omp.simd +! CHECK-NEXT: omp.loop_nest +! CHECK: omp.yield +! CHECK-NEXT: } +! CHECK-NEXT: } {{{.*}}omp.composite{{.*}}} +! CHECK-NEXT: } {{{.*}}omp.composite{{.*}}} + +subroutine do_simd() + implicit none + integer :: i + + !$omp do simd + do i=1, 10 + end do +end subroutine + +! CHECK-LABEL: func.func @_QPdo_simd +! CHECK: omp.wsloop +! CHECK-NEXT: omp.simd +! CHECK-NEXT: omp.loop_nest +! CHECK: omp.yield +! CHECK-NEXT: } +! CHECK-NEXT: } {{{.*}}omp.composite{{.*}}} +! CHECK-NEXT: } {{{.*}}omp.composite{{.*}}} + +! TODO: Add taskloop simd once supported by lowering. + +! ------------------------------------------------------------------------------ +! COMBINED CONSTRUCTS +! ------------------------------------------------------------------------------ + +subroutine masked_taskloop() + implicit none + integer :: i + + !$omp masked taskloop + do i=1, 10 + end do +end subroutine + +! CHECK-LABEL: func.func @_QPmasked_taskloop +! CHECK: omp.masked +! CHECK: omp.taskloop.context +! CHECK: omp.taskloop.wrapper +! CHECK-NEXT: omp.loop_nest +! CHECK: omp.yield +! CHECK-NEXT: } +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine master_taskloop() + implicit none + integer :: i + + !$omp master taskloop + do i=1, 10 + end do +end subroutine + +! CHECK-LABEL: func.func @_QPmaster_taskloop +! CHECK: omp.master +! CHECK: omp.taskloop.context +! CHECK: omp.taskloop.wrapper +! CHECK-NEXT: omp.loop_nest +! CHECK: omp.yield +! CHECK-NEXT: } +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine parallel_do() + implicit none + integer :: i + + !$omp parallel do + do i=1, 10 + end do +end subroutine + +! CHECK-LABEL: func.func @_QPparallel_do +! CHECK: omp.parallel +! CHECK: omp.wsloop +! CHECK-NEXT: omp.loop_nest +! CHECK: omp.yield +! CHECK-NEXT: } +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine parallel_loop() + implicit none + integer :: i + + !$omp parallel loop + do i=1, 10 + end do +end subroutine + +! CHECK-LABEL: func.func @_QPparallel_loop +! CHECK: omp.parallel +! CHECK: omp.wsloop +! CHECK-NEXT: omp.loop_nest +! CHECK: omp.yield +! CHECK-NEXT: } +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine parallel_masked() + implicit none + + !$omp parallel masked + call foo() + !$omp end parallel masked +end subroutine + +! CHECK-LABEL: func.func @_QPparallel_masked +! CHECK: omp.parallel +! CHECK: omp.masked +! CHECK: omp.terminator +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine parallel_master() + implicit none + + !$omp parallel master + call foo() + !$omp end parallel master +end subroutine + +! CHECK-LABEL: func.func @_QPparallel_master +! CHECK: omp.parallel +! CHECK: omp.master +! CHECK: omp.terminator +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine parallel_sections() + implicit none + + !$omp parallel sections + call foo() + !$omp end parallel sections +end subroutine + +! CHECK-LABEL: func.func @_QPparallel_sections +! CHECK: omp.parallel +! CHECK: omp.sections +! CHECK: omp.section +! CHECK: omp.terminator +! CHECK-NEXT: } +! CHECK: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine parallel_workshare() + implicit none + integer :: x(10) + + !$omp parallel workshare + x = 1 + !$omp end parallel workshare +end subroutine + +! CHECK-LABEL: func.func @_QPparallel_workshare +! CHECK: omp.parallel +! CHECK: omp.workshare +! CHECK: omp.terminator +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine target_loop() + implicit none + integer :: i + + !$omp target loop + do i=1, 10 + end do +end subroutine + +! CHECK-LABEL: func.func @_QPtarget_loop +! CHECK: omp.target +! CHECK: omp.simd +! CHECK-NEXT: omp.loop_nest +! CHECK: omp.yield +! CHECK-NEXT: } +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine target_parallel() + implicit none + + !$omp target parallel + call foo() + !$omp end target parallel +end subroutine + +! CHECK-LABEL: func.func @_QPtarget_parallel +! CHECK: omp.target +! CHECK: omp.parallel +! CHECK: omp.terminator +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine target_simd() + implicit none + integer :: i + + !$omp target simd + do i=1, 10 + end do +end subroutine + +! CHECK-LABEL: func.func @_QPtarget_simd +! CHECK: omp.target +! CHECK: omp.simd +! CHECK-NEXT: omp.loop_nest +! CHECK: omp.yield +! CHECK-NEXT: } +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine target_teams() + implicit none + + !$omp target teams + call foo() + !$omp end target teams +end subroutine + +! CHECK-LABEL: func.func @_QPtarget_teams +! CHECK: omp.target +! CHECK: omp.teams +! CHECK: omp.terminator +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine teams_distribute() + implicit none + integer :: i + + !$omp teams distribute + do i=1, 10 + end do +end subroutine + +! CHECK-LABEL: func.func @_QPteams_distribute +! CHECK: omp.teams +! CHECK: omp.distribute +! CHECK-NEXT: omp.loop_nest +! CHECK: omp.yield +! CHECK-NEXT: } +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine teams_loop() + implicit none + integer :: i + + !$omp teams loop + do i=1, 10 + end do +end subroutine + +! CHECK-LABEL: func.func @_QPteams_loop +! CHECK: omp.teams +! CHECK: omp.parallel +! CHECK: omp.distribute +! CHECK-NEXT: omp.wsloop +! CHECK-NEXT: omp.loop_nest +! CHECK: omp.yield +! CHECK-NEXT: } +! CHECK-NEXT: } {{{.*}}omp.composite{{.*}}} +! CHECK-NEXT: } {{{.*}}omp.composite{{.*}}} +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.composite{{.*}}} +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine teams_workdistribute() + implicit none + integer :: x + + !$omp teams workdistribute + x = 1 + !$omp end teams workdistribute +end subroutine + +! CHECK-LABEL: func.func @_QPteams_workdistribute +! CHECK: omp.teams +! CHECK: omp.workdistribute +! CHECK: omp.terminator +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +! ------------------------------------------------------------------------------ +! COMBINED CONSTRUCTS (SPLIT) +! ------------------------------------------------------------------------------ + +subroutine masked_loop() + implicit none + integer :: i + + !$omp masked + !$omp loop + do i=1, 10 + end do + !$omp end masked +end subroutine + +! CHECK-LABEL: func.func @_QPmasked_loop +! CHECK: omp.masked +! CHECK: omp.simd +! CHECK-NEXT: omp.loop_nest +! CHECK: omp.yield +! CHECK-NEXT: } +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine masked_parallel() + implicit none + + !$omp masked + !$omp parallel + call foo() + !$omp end parallel + !$omp end masked +end subroutine + +! CHECK-LABEL: func.func @_QPmasked_parallel +! CHECK: omp.masked +! CHECK: omp.parallel +! CHECK: omp.terminator +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine masked_simd() + implicit none + integer :: i + + !$omp masked + !$omp simd + do i=1, 10 + end do + !$omp end masked +end subroutine + +! CHECK-LABEL: func.func @_QPmasked_simd +! CHECK: omp.masked +! CHECK: omp.simd +! CHECK-NEXT: omp.loop_nest +! CHECK: omp.yield +! CHECK-NEXT: } +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine masked_target() + implicit none + + !$omp masked + !$omp target + call foo() + !$omp end target + !$omp end masked +end subroutine + +! CHECK-LABEL: func.func @_QPmasked_target +! CHECK: omp.masked +! CHECK: omp.target +! CHECK: omp.terminator +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine masked_target_data() + implicit none + integer :: x(10) + + !$omp masked + !$omp target_data map(tofrom: x) + call foo() + !$omp end target_data + !$omp end masked +end subroutine + +! CHECK-LABEL: func.func @_QPmasked_target_data +! CHECK: omp.masked +! CHECK: omp.target_data +! CHECK: omp.terminator +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine masked_task() + implicit none + + !$omp masked + !$omp task + call foo() + !$omp end task + !$omp end masked +end subroutine + +! CHECK-LABEL: func.func @_QPmasked_task +! CHECK: omp.masked +! CHECK: omp.task +! CHECK: omp.terminator +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine master_loop() + implicit none + integer :: i + + !$omp master + !$omp loop + do i=1, 10 + end do + !$omp end master +end subroutine + +! CHECK-LABEL: func.func @_QPmaster_loop +! CHECK: omp.master +! CHECK: omp.simd +! CHECK-NEXT: omp.loop_nest +! CHECK: omp.yield +! CHECK-NEXT: } +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine master_parallel() + implicit none + + !$omp master + !$omp parallel + call foo() + !$omp end parallel + !$omp end master +end subroutine + +! CHECK-LABEL: func.func @_QPmaster_parallel +! CHECK: omp.master +! CHECK: omp.parallel +! CHECK: omp.terminator +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine master_simd() + implicit none + integer :: i + + !$omp master + !$omp simd + do i=1, 10 + end do + !$omp end master +end subroutine + +! CHECK-LABEL: func.func @_QPmaster_simd +! CHECK: omp.master +! CHECK: omp.simd +! CHECK-NEXT: omp.loop_nest +! CHECK: omp.yield +! CHECK-NEXT: } +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine master_target() + implicit none + + !$omp master + !$omp target + call foo() + !$omp end target + !$omp end master +end subroutine + +! CHECK-LABEL: func.func @_QPmaster_target +! CHECK: omp.master +! CHECK: omp.target +! CHECK: omp.terminator +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine master_target_data() + implicit none + integer :: x(10) + + !$omp master + !$omp target_data map(tofrom: x) + call foo() + !$omp end target_data + !$omp end master +end subroutine + +! CHECK-LABEL: func.func @_QPmaster_target_data +! CHECK: omp.master +! CHECK: omp.target_data +! CHECK: omp.terminator +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine master_task() + implicit none + + !$omp master + !$omp task + call foo() + !$omp end task + !$omp end master +end subroutine + +! CHECK-LABEL: func.func @_QPmaster_task +! CHECK: omp.master +! CHECK: omp.task +! CHECK: omp.terminator +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine parallel_simd() + implicit none + integer :: i + + !$omp parallel + !$omp simd + do i=1, 10 + end do + !$omp end parallel +end subroutine + +! CHECK-LABEL: func.func @_QPparallel_simd +! CHECK: omp.parallel +! CHECK: omp.simd +! CHECK-NEXT: omp.loop_nest +! CHECK: omp.yield +! CHECK-NEXT: } +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine parallel_single() + implicit none + + !$omp parallel + !$omp single + call foo() + !$omp end single + !$omp end parallel +end subroutine + +! CHECK-LABEL: func.func @_QPparallel_single +! CHECK: omp.parallel +! CHECK: omp.single +! CHECK: omp.terminator +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine parallel_target() + implicit none + + !$omp parallel + !$omp target + call foo() + !$omp end target + !$omp end parallel +end subroutine + +! CHECK-LABEL: func.func @_QPparallel_target +! CHECK: omp.parallel +! CHECK: omp.target +! CHECK: omp.terminator +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine parallel_target_data() + implicit none + integer :: x(10) + + !$omp parallel + !$omp target_data map(tofrom: x) + call foo() + !$omp end target_data + !$omp end parallel +end subroutine + +! CHECK-LABEL: func.func @_QPparallel_target_data +! CHECK: omp.parallel +! CHECK: omp.target_data +! CHECK: omp.terminator +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine parallel_task() + implicit none + + !$omp parallel + !$omp task + call foo() + !$omp end task + !$omp end parallel +end subroutine + +! CHECK-LABEL: func.func @_QPparallel_task +! CHECK: omp.parallel +! CHECK: omp.task +! CHECK: omp.terminator +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine parallel_taskloop() + implicit none + integer :: i + + !$omp parallel + !$omp taskloop + do i=1, 10 + end do + !$omp end parallel +end subroutine + +! CHECK-LABEL: func.func @_QPparallel_taskloop +! CHECK: omp.parallel +! CHECK: omp.taskloop.context +! CHECK: omp.taskloop.wrapper +! CHECK-NEXT: omp.loop_nest +! CHECK: omp.yield +! CHECK-NEXT: } +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine single_loop() + implicit none + integer :: i + + !$omp single + !$omp loop + do i=1, 10 + end do + !$omp end single +end subroutine + +! CHECK-LABEL: func.func @_QPsingle_loop +! CHECK: omp.single +! CHECK: omp.simd +! CHECK-NEXT: omp.loop_nest +! CHECK: omp.yield +! CHECK-NEXT: } +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine single_parallel() + implicit none + + !$omp single + !$omp parallel + call foo() + !$omp end parallel + !$omp end single +end subroutine + +! CHECK-LABEL: func.func @_QPsingle_parallel +! CHECK: omp.single +! CHECK: omp.parallel +! CHECK: omp.terminator +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine single_simd() + implicit none + integer :: i + + !$omp single + !$omp simd + do i=1, 10 + end do + !$omp end single +end subroutine + +! CHECK-LABEL: func.func @_QPsingle_simd +! CHECK: omp.single +! CHECK: omp.simd +! CHECK-NEXT: omp.loop_nest +! CHECK: omp.yield +! CHECK-NEXT: } +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine single_target() + implicit none + + !$omp single + !$omp target + call foo() + !$omp end target + !$omp end single +end subroutine + +! CHECK-LABEL: func.func @_QPsingle_target +! CHECK: omp.single +! CHECK: omp.target +! CHECK: omp.terminator +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine single_target_data() + implicit none + integer :: x(10) + + !$omp single + !$omp target_data map(tofrom: x) + call foo() + !$omp end target_data + !$omp end single +end subroutine + +! CHECK-LABEL: func.func @_QPsingle_target_data +! CHECK: omp.single +! CHECK: omp.target_data +! CHECK: omp.terminator +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine single_task() + implicit none + + !$omp single + !$omp task + call foo() + !$omp end task + !$omp end single +end subroutine + +! CHECK-LABEL: func.func @_QPsingle_task +! CHECK: omp.single +! CHECK: omp.task +! CHECK: omp.terminator +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine single_taskloop() + implicit none + integer :: i + + !$omp single + !$omp taskloop + do i=1, 10 + end do + !$omp end single +end subroutine + +! CHECK-LABEL: func.func @_QPsingle_taskloop +! CHECK: omp.single +! CHECK: omp.taskloop.context +! CHECK: omp.taskloop.wrapper +! CHECK-NEXT: omp.loop_nest +! CHECK: omp.yield +! CHECK-NEXT: } +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine target_task() + implicit none + + !$omp target + !$omp task + call foo() + !$omp end task + !$omp end target +end subroutine + +! CHECK-LABEL: func.func @_QPtarget_task +! CHECK: omp.target +! CHECK: omp.task +! CHECK: omp.terminator +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine target_taskloop() + implicit none + integer :: i + + !$omp target + !$omp taskloop + do i=1, 10 + end do + !$omp end target +end subroutine + +! CHECK-LABEL: func.func @_QPtarget_taskloop +! CHECK: omp.target +! CHECK: omp.taskloop.context +! CHECK: omp.taskloop.wrapper +! CHECK-NEXT: omp.loop_nest +! CHECK: omp.yield +! CHECK-NEXT: } +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine target_data_loop() + implicit none + integer :: i + integer :: x(10) + + !$omp target_data map(tofrom: x) + !$omp loop + do i=1, 10 + end do + !$omp end target_data +end subroutine + +! CHECK-LABEL: func.func @_QPtarget_data_loop +! CHECK: omp.target_data +! CHECK: omp.simd +! CHECK-NEXT: omp.loop_nest +! CHECK: omp.yield +! CHECK-NEXT: } +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine target_data_parallel() + implicit none + integer :: x(10) + + !$omp target_data map(tofrom: x) + !$omp parallel + call foo() + !$omp end parallel + !$omp end target_data +end subroutine + +! CHECK-LABEL: func.func @_QPtarget_data_parallel +! CHECK: omp.target_data +! CHECK: omp.parallel +! CHECK: omp.terminator +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine target_data_simd() + implicit none + integer :: i + integer :: x(10) + + !$omp target_data map(tofrom: x) + !$omp simd + do i=1, 10 + end do + !$omp end target_data +end subroutine + +! CHECK-LABEL: func.func @_QPtarget_data_simd +! CHECK: omp.target_data +! CHECK: omp.simd +! CHECK-NEXT: omp.loop_nest +! CHECK: omp.yield +! CHECK-NEXT: } +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine task_loop() + implicit none + integer :: i + + !$omp task + !$omp loop + do i=1, 10 + end do + !$omp end task +end subroutine + +! CHECK-LABEL: func.func @_QPtask_loop +! CHECK: omp.task +! CHECK: omp.simd +! CHECK-NEXT: omp.loop_nest +! CHECK: omp.yield +! CHECK-NEXT: } +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine task_parallel() + implicit none + + !$omp task + !$omp parallel + call foo() + !$omp end parallel + !$omp end task +end subroutine + +! CHECK-LABEL: func.func @_QPtask_parallel +! CHECK: omp.task +! CHECK: omp.parallel +! CHECK: omp.terminator +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine task_simd() + implicit none + integer :: i + + !$omp task + !$omp simd + do i=1, 10 + end do + !$omp end task +end subroutine + +! CHECK-LABEL: func.func @_QPtask_simd +! CHECK: omp.task +! CHECK: omp.simd +! CHECK-NEXT: omp.loop_nest +! CHECK: omp.yield +! CHECK-NEXT: } +! CHECK-NEXT: } +! CHECK-NOT: omp.combined +! CHECK: omp.terminator +! CHECK-NEXT: } {{{.*}}omp.combined{{.*}}} + +subroutine teams_parallel() + implicit none + + !$omp teams + !$omp parallel + call foo() + !$omp end parallel + !$omp end teams +end subroutine diff --git a/flang/test/Lower/OpenMP/multiple-entry-points.f90 b/flang/test/Lower/OpenMP/multiple-entry-points.f90 index 604b9cda4af3d..2cea046928cff 100644 --- a/flang/test/Lower/OpenMP/multiple-entry-points.f90 +++ b/flang/test/Lower/OpenMP/multiple-entry-points.f90 @@ -8,6 +8,7 @@ !CHECK: %[[V1:[a-z_0-9]+]] = arith.constant 2.000000e+00 : f32 !CHECK: = arith.mulf %[[V0]], %[[V1]] fastmath<contract> : f32 !CHECK: omp.terminator +!CHECK: {omp.combined} !CHECK-NOT: omp !CHECK: return diff --git a/flang/test/Lower/OpenMP/workshare.f90 b/flang/test/Lower/OpenMP/workshare.f90 index 8e771952f5b6d..bf7cec1f99769 100644 --- a/flang/test/Lower/OpenMP/workshare.f90 +++ b/flang/test/Lower/OpenMP/workshare.f90 @@ -12,7 +12,7 @@ subroutine sb1(arr) !$omp end workshare !CHECK: } !$omp end parallel -!CHECK: } +!CHECK: } {omp.combined} end subroutine !CHECK-LABEL: func @_QPsb2 @@ -26,7 +26,7 @@ subroutine sb2(arr) !$omp end workshare nowait !CHECK: } !$omp end parallel -!CHECK: } +!CHECK: } {omp.combined} end subroutine !CHECK-LABEL: func @_QPsb3 @@ -38,5 +38,5 @@ subroutine sb3(arr) arr = 0 !$omp end parallel workshare !CHECK: } -!CHECK: } +!CHECK: } {omp.combined} end subroutine diff --git a/flang/test/Transforms/DoConcurrent/basic_device.f90 b/flang/test/Transforms/DoConcurrent/basic_device.f90 index fd13f9c6babe0..8404e5e3a2cde 100644 --- a/flang/test/Transforms/DoConcurrent/basic_device.f90 +++ b/flang/test/Transforms/DoConcurrent/basic_device.f90 @@ -37,7 +37,7 @@ program do_concurrent_basic ! CHECK: %[[A_MAP_INFO:.*]] = omp.map.info var_ptr(%[[A_ORIG_DECL]]#1 : {{[^(]+}}) ! CHECK-SAME: map_clauses(implicit, tofrom) capture(ByRef) bounds(%[[A_BOUNDS]]) - ! CHECK: omp.target + ! CHECK: omp.target kernel_type(spmd) ! CHECK-SAME: host_eval(%[[HOST_LB]] -> %[[LB:[[:alnum:]]+]], %[[HOST_UB]] -> %[[UB:[[:alnum:]]+]], %[[HOST_STEP]] -> %[[STEP:[[:alnum:]]+]] : index, index, index) ! CHECK-SAME: map_entries( ! CHECK-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}}, @@ -72,9 +72,9 @@ program do_concurrent_basic ! CHECK-NEXT: omp.terminator ! CHECK-NEXT: } {omp.composite} ! CHECK-NEXT: omp.terminator - ! CHECK-NEXT: } + ! CHECK-NEXT: } {omp.combined} ! CHECK-NEXT: omp.terminator - ! CHECK-NEXT: } + ! CHECK-NEXT: } {omp.combined} do concurrent (i=1:10) a(i) = i end do diff --git a/flang/test/Transforms/DoConcurrent/basic_host.f90 b/flang/test/Transforms/DoConcurrent/basic_host.f90 index b4eb15837d0a5..7d0f70add2815 100644 --- a/flang/test/Transforms/DoConcurrent/basic_host.f90 +++ b/flang/test/Transforms/DoConcurrent/basic_host.f90 @@ -40,7 +40,7 @@ program do_concurrent_basic ! CHECK-NEXT: } ! CHECK-NEXT: omp.terminator - ! CHECK-NEXT: } + ! CHECK-NEXT: } {omp.combined} do concurrent (i=1:10) a(i) = i end do diff --git a/flang/test/Transforms/DoConcurrent/basic_host.mlir b/flang/test/Transforms/DoConcurrent/basic_host.mlir index 5425829404d7b..ac7859bc20bc9 100644 --- a/flang/test/Transforms/DoConcurrent/basic_host.mlir +++ b/flang/test/Transforms/DoConcurrent/basic_host.mlir @@ -43,7 +43,7 @@ func.func @do_concurrent_basic() attributes {fir.bindc_name = "do_concurrent_bas // CHECK-NEXT: } // CHECK-NEXT: omp.terminator - // CHECK-NEXT: } + // CHECK-NEXT: } {omp.combined} fir.do_concurrent { %0 = fir.alloca i32 {bindc_name = "i"} %1:2 = hlfir.declare %0 {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) diff --git a/flang/test/Transforms/DoConcurrent/local_device.mlir b/flang/test/Transforms/DoConcurrent/local_device.mlir index 6da3db7f51e0b..0c74f4d745e4f 100644 --- a/flang/test/Transforms/DoConcurrent/local_device.mlir +++ b/flang/test/Transforms/DoConcurrent/local_device.mlir @@ -42,8 +42,8 @@ func.func @_QPfoo() { // CHECK: hlfir.assign %{{.*}} to %[[LOCAL_LOOP_DECL]]#0 // CHECK: omp.yield // CHECK: } -// CHECK: } -// CHECK: } -// CHECK: } -// CHECK: } -// CHECK: } +// CHECK: } {omp.composite} +// CHECK: } {omp.composite} +// CHECK: } {omp.composite} +// CHECK: } {omp.combined} +// CHECK: } {omp.combined} diff --git a/flang/test/Transforms/DoConcurrent/locality_specifiers_simple.mlir b/flang/test/Transforms/DoConcurrent/locality_specifiers_simple.mlir index 160c1df040680..e5064f20835f8 100644 --- a/flang/test/Transforms/DoConcurrent/locality_specifiers_simple.mlir +++ b/flang/test/Transforms/DoConcurrent/locality_specifiers_simple.mlir @@ -45,4 +45,4 @@ func.func @_QPlocal_spec_translation() { // CHECK: } // CHECK: } // CHECK: omp.terminator -// CHECK: } +// CHECK: } {omp.combined} diff --git a/flang/test/Transforms/DoConcurrent/non_const_bounds.f90 b/flang/test/Transforms/DoConcurrent/non_const_bounds.f90 index cd1bd4f98a3f5..05a3aa7650269 100644 --- a/flang/test/Transforms/DoConcurrent/non_const_bounds.f90 +++ b/flang/test/Transforms/DoConcurrent/non_const_bounds.f90 @@ -41,5 +41,5 @@ end program main ! CHECK: } ! CHECK: } ! CHECK: omp.terminator -! CHECK: } +! CHECK: } {omp.combined} diff --git a/flang/test/Transforms/DoConcurrent/reduce_add.mlir b/flang/test/Transforms/DoConcurrent/reduce_add.mlir index 1ea3e3e527335..9b70da0131164 100644 --- a/flang/test/Transforms/DoConcurrent/reduce_add.mlir +++ b/flang/test/Transforms/DoConcurrent/reduce_add.mlir @@ -66,7 +66,7 @@ func.func @_QPdo_concurrent_reduce() { // CHECK: } // CHECK: } // CHECK: omp.terminator -// CHECK: } +// CHECK: } {omp.combined} // CHECK: return // CHECK: } diff --git a/flang/test/Transforms/DoConcurrent/reduce_all_regions.mlir b/flang/test/Transforms/DoConcurrent/reduce_all_regions.mlir index 3d5b8bf22af75..cfd2c78ca91d1 100644 --- a/flang/test/Transforms/DoConcurrent/reduce_all_regions.mlir +++ b/flang/test/Transforms/DoConcurrent/reduce_all_regions.mlir @@ -65,6 +65,6 @@ func.func @_QPdo_concurrent_reduce() { // CHECK: } // CHECK: } // CHECK: omp.terminator -// CHECK: } +// CHECK: } {omp.combined} // CHECK: return // CHECK: } diff --git a/flang/test/Transforms/DoConcurrent/reduce_device.mlir b/flang/test/Transforms/DoConcurrent/reduce_device.mlir index a66243375297e..81cb680b58cf6 100644 --- a/flang/test/Transforms/DoConcurrent/reduce_device.mlir +++ b/flang/test/Transforms/DoConcurrent/reduce_device.mlir @@ -48,7 +48,7 @@ func.func @_QPfoo() { // CHECK: %[[S_VAL:.*]] = fir.load %[[S_WS_DECL]]#0 // CHECK: %[[RED_RES:.*]] = arith.addf %[[S_VAL]], %{{.*}} fastmath<contract> : f32 // CHECK: hlfir.assign %[[RED_RES]] to %[[S_WS_DECL]]#0 -// CHECK: } -// CHECK: } -// CHECK: } -// CHECK: } +// CHECK: } {omp.composite} +// CHECK: } {omp.composite} +// CHECK: } {omp.composite} +// CHECK: } {omp.combined} diff --git a/flang/test/Transforms/DoConcurrent/reduce_device_min.f90 b/flang/test/Transforms/DoConcurrent/reduce_device_min.f90 index 509207c1db2a8..b3fa5636a3077 100644 --- a/flang/test/Transforms/DoConcurrent/reduce_device_min.f90 +++ b/flang/test/Transforms/DoConcurrent/reduce_device_min.f90 @@ -30,7 +30,7 @@ end subroutine min_reduce ! CHECK-SAME: map_clauses(implicit, tofrom) capture(ByRef) ! CHECK-SAME: -> !fir.ref<f32> {name = "_QFmin_reduceEmin_val"} -! CHECK: omp.target +! CHECK: omp.target kernel_type(spmd) ! CHECK-SAME: map_entries({{.*}}%[[MIN_VAL_MAP]] -> %[[MIN_VAL_ARG:[[:alnum:]]+]]{{.*}}) ! CHECK: %[[MIN_VAL_DEV:.*]]:2 = hlfir.declare %[[MIN_VAL_ARG]] {{.*}} "_QFmin_reduceEmin_val" @@ -42,4 +42,4 @@ end subroutine min_reduce ! CHECK: } {omp.composite} ! CHECK: } {omp.composite} ! CHECK: } {omp.composite} -! CHECK: } +! CHECK: } {omp.combined} diff --git a/flang/test/Transforms/DoConcurrent/reduce_local.mlir b/flang/test/Transforms/DoConcurrent/reduce_local.mlir index 0f667109e6e83..8089637f54029 100644 --- a/flang/test/Transforms/DoConcurrent/reduce_local.mlir +++ b/flang/test/Transforms/DoConcurrent/reduce_local.mlir @@ -77,7 +77,7 @@ fir.declare_reduction @add_reduction_i32 : i32 init { // CHECK: } // CHECK: } // CHECK: omp.terminator -// CHECK: } +// CHECK: } {omp.combined} // CHECK: return // CHECK: } diff --git a/flang/test/Transforms/DoConcurrent/reduction_symbol_resultion.f90 b/flang/test/Transforms/DoConcurrent/reduction_symbol_resultion.f90 index ab56a4f6c7e70..7278a68b73260 100644 --- a/flang/test/Transforms/DoConcurrent/reduction_symbol_resultion.f90 +++ b/flang/test/Transforms/DoConcurrent/reduction_symbol_resultion.f90 @@ -23,10 +23,10 @@ end subroutine test2 ! CHECK: omp.parallel { ! CHECK: omp.wsloop reduction(@[[RED_SYM]] {{.*}} : !fir.ref<f32>) { ! CHECK: } -! CHECK: } +! CHECK: } {omp.combined} ! CHECK-LABEL: func.func @_QPtest2 ! CHECK: omp.parallel { ! CHECK: omp.wsloop reduction(@[[RED_SYM]] {{.*}} : !fir.ref<f32>) { ! CHECK: } -! CHECK: } +! CHECK: } {omp.combined} diff --git a/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90 b/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90 index e38474a68747f..9cf49ce3d4a2f 100644 --- a/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90 +++ b/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90 @@ -24,7 +24,7 @@ subroutine foo(n) ! CHECK-DAG: %[[A_MAP:.*]] = omp.map.info var_ptr(%[[A_DECL]]#1 : {{.*}}) {{.*}} {name = "_QFfooEa"} ! CHECK-DAG: %[[N_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : {{.*}}) {{.*}} {name = "_QFfooEa.extent.dim0"} -! CHECK: omp.target +! CHECK: omp.target kernel_type(spmd) ! CHECK-SAME: map_entries( ! CHECK-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}}, ! CHECK-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}}, diff --git a/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90 b/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90 index 2dada05396ad6..ebacd0353b90b 100644 --- a/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90 +++ b/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90 @@ -26,7 +26,7 @@ program main ! HOST: %[[ORIG_J_ALLOC:.*]] = fir.alloca i32 {bindc_name = "j", {{.*}}} ! HOST: %[[ORIG_J_DECL:.*]]:2 = hlfir.declare %[[ORIG_J_ALLOC]] -! DEVICE: omp.target {{.*}}map_entries( +! DEVICE: omp.target kernel_type(spmd) {{.*}}map_entries( ! DEVICE-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}}, ! DEVICE-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}}, ! DEVICE-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}}, diff --git a/flang/test/Transforms/DoConcurrent/use_loop_bounds_in_body.f90 b/flang/test/Transforms/DoConcurrent/use_loop_bounds_in_body.f90 index 07a3b5b62b5a5..a86f4759b02b9 100644 --- a/flang/test/Transforms/DoConcurrent/use_loop_bounds_in_body.f90 +++ b/flang/test/Transforms/DoConcurrent/use_loop_bounds_in_body.f90 @@ -17,7 +17,7 @@ subroutine foo(a, n) end subroutine ! CHECK-LABEL: func.func @_QPfoo -! CHECK: omp.target +! CHECK: omp.target kernel_type(spmd) ! CHECK-SAME: host_eval(%{{.*}} -> %{{.*}}, %{{.*}} -> %[[N_HOST_EVAL:.*]], %{{.*}} -> %{{.*}} : index, index, index) ! CHECK-SAME: map_entries({{[^[:space:]]*}} -> {{[^[:space:]]*}}, ! CHECK-SAME: {{[^[:space:]]*}} -> {{[^[:space:]]*}}, {{[^[:space:]]*}} -> {{[^[:space:]]*}}, @@ -33,8 +33,8 @@ subroutine foo(a, n) ! CHECK: hlfir.assign %[[N_VAL_CVT]] to {{.*}} ! CHECK-NEXT: omp.yield ! CHECK: } -! CHECK: } -! CHECK: } -! CHECK: } -! CHECK: } -! CHECK: } +! CHECK: } {omp.composite} +! CHECK: } {omp.composite} +! CHECK: } {omp.composite} +! CHECK: } {omp.combined} +! CHECK: } {omp.combined} diff --git a/flang/test/Transforms/OpenMP/lower-workdistribute-doloop.mlir b/flang/test/Transforms/OpenMP/lower-workdistribute-doloop.mlir index 00d10d6264ec9..320124ebab21d 100644 --- a/flang/test/Transforms/OpenMP/lower-workdistribute-doloop.mlir +++ b/flang/test/Transforms/OpenMP/lower-workdistribute-doloop.mlir @@ -15,7 +15,7 @@ // CHECK: omp.terminator // CHECK: } {omp.composite} // CHECK: omp.terminator -// CHECK: } +// CHECK: } {omp.combined} // CHECK: return // CHECK: } func.func @x(%lb : index, %ub : index, %step : index, %b : i1, %addr : !fir.ref<index>) { diff --git a/flang/test/Transforms/OpenMP/lower-workdistribute-fission-host.mlir b/flang/test/Transforms/OpenMP/lower-workdistribute-fission-host.mlir index 936a487d27249..5ae81c31ba4f8 100644 --- a/flang/test/Transforms/OpenMP/lower-workdistribute-fission-host.mlir +++ b/flang/test/Transforms/OpenMP/lower-workdistribute-fission-host.mlir @@ -60,9 +60,9 @@ // CHECK: omp.terminator // CHECK: } {omp.composite} // CHECK: omp.terminator -// CHECK: } +// CHECK: } {omp.combined} // CHECK: omp.terminator -// CHECK: } +// CHECK: } {omp.combined} // CHECK: %[[VAL_48:.*]] = llvm.mlir.constant(0 : i32) : i32 // CHECK: %[[VAL_49:.*]] = fir.load %[[VAL_11]] : !fir.ref<index> // CHECK: %[[VAL_50:.*]] = fir.load %[[VAL_14]] : !fir.ref<index> diff --git a/flang/test/Transforms/OpenMP/lower-workdistribute-fission-target.mlir b/flang/test/Transforms/OpenMP/lower-workdistribute-fission-target.mlir index 832fec201bca3..95722f101efd7 100644 --- a/flang/test/Transforms/OpenMP/lower-workdistribute-fission-target.mlir +++ b/flang/test/Transforms/OpenMP/lower-workdistribute-fission-target.mlir @@ -60,9 +60,9 @@ // CHECK: omp.terminator // CHECK: } {omp.composite} // CHECK: omp.terminator -// CHECK: } +// CHECK: } {omp.combined} // CHECK: omp.terminator -// CHECK: } +// CHECK: } {omp.combined} // CHECK: %[[VAL_45:.*]] = llvm.mlir.constant(0 : i32) : i32 // CHECK: %[[VAL_46:.*]] = fir.load %[[VAL_11]] : !fir.ref<index> // CHECK: %[[VAL_47:.*]] = fir.load %[[VAL_14]] : !fir.ref<index> diff --git a/flang/test/Transforms/OpenMP/lower-workdistribute-fission.mlir b/flang/test/Transforms/OpenMP/lower-workdistribute-fission.mlir index c562b7009664d..e1c08bbf8bb82 100644 --- a/flang/test/Transforms/OpenMP/lower-workdistribute-fission.mlir +++ b/flang/test/Transforms/OpenMP/lower-workdistribute-fission.mlir @@ -22,7 +22,7 @@ // CHECK: omp.terminator // CHECK: } {omp.composite} // CHECK: omp.terminator -// CHECK: } +// CHECK: } {omp.combined} // CHECK: fir.call @regular_side_effect_func(%[[ARG2:.*]]) : (!fir.ref<f32>) -> () // CHECK: fir.call @my_fir_parallel_runtime_func(%[[ARG3:.*]]) : (!fir.ref<f32>) -> () // CHECK: fir.do_loop %[[VAL_8:.*]] = %[[VAL_0]] to %[[VAL_2]] step %[[VAL_1]] { diff --git a/flang/test/Transforms/OpenMP/lower-workdistribute-runtime-assign-scalar.mlir b/flang/test/Transforms/OpenMP/lower-workdistribute-runtime-assign-scalar.mlir index 04e95c27e3c5c..ea48fcc7183ca 100644 --- a/flang/test/Transforms/OpenMP/lower-workdistribute-runtime-assign-scalar.mlir +++ b/flang/test/Transforms/OpenMP/lower-workdistribute-runtime-assign-scalar.mlir @@ -35,9 +35,9 @@ // CHECK: omp.terminator // CHECK: } {omp.composite} // CHECK: omp.terminator -// CHECK: } +// CHECK: } {omp.combined} // CHECK: omp.terminator -// CHECK: } +// CHECK: } {omp.combined} // CHECK: omp.terminator // CHECK: } // CHECK: return diff --git a/flang/test/Transforms/omp-function-filtering-todo.mlir b/flang/test/Transforms/omp-function-filtering-todo.mlir index cd45de88edfa9..280671ff774bc 100644 --- a/flang/test/Transforms/omp-function-filtering-todo.mlir +++ b/flang/test/Transforms/omp-function-filtering-todo.mlir @@ -25,9 +25,9 @@ module attributes {omp.is_gpu = true, omp.is_target_device = true} { } } omp.terminator - } + } {omp.combined} omp.terminator - } + } {omp.combined} return } } >From a07a6ffaa00dddac500e2744dca594b2e7fe2048 Mon Sep 17 00:00:00 2001 From: Sergio Afonso <[email protected]> Date: Fri, 12 Jun 2026 12:41:16 +0100 Subject: [PATCH 2/2] address review comments --- flang/lib/Lower/OpenMP/OpenMP.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index d0e4acd13be85..fa584aa9425f8 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -74,12 +74,12 @@ static void genOMPDispatch(lower::AbstractConverter &converter, /// Return the directive that is immediately nested inside of the given /// \c parent evaluation, if it is its only non-end-statement nested evaluation /// and it represents an OpenMP construct. -lower::pft::Evaluation * +static lower::pft::Evaluation * extractOnlyOmpNestedEval(lower::pft::Evaluation &parent) { if (!parent.hasNestedEvaluations()) return nullptr; - auto &nested{parent.getFirstNestedEvaluation()}; + auto &nested = parent.getFirstNestedEvaluation(); if (!nested.isA<parser::OpenMPConstruct>()) return nullptr; @@ -4395,9 +4395,10 @@ static void genOMPDispatch(lower::AbstractConverter &converter, if (loopLeaf) symTable.popScope(); - // Add the omp.combined attribute to eligible ops. In this case, all - // composable ops that are not loop-associated, except for the ones that can - // only appear as the innermost leaf construct. + // Add the omp.combined attribute to eligible ops, including non-innermost + // leafs of a combined construct and immediately nested block-associated + // combinable constructs. SECTIONS, WORKSHARE and WORKDISTRIBUTE are skipped + // due to only being able to appear as an innermost combined construct. if (!loopLeaf && llvm::isa_and_present<mlir::omp::ComposableOpInterface>(newOp) && !llvm::isa<mlir::omp::SectionsOp, mlir::omp::WorkshareOp, @@ -4411,7 +4412,8 @@ static void genOMPDispatch(lower::AbstractConverter &converter, extractOnlyOmpNestedEval(eval)) { // Combinable constructs that are immediately nested with no other // statements or directives preventing them from being combined need the - // attribute as well. + // attribute as well. Disallow block constructs that can only be outermost + // leafs and loop transformation constructs. OmpDirectiveSet combinableDirs = (llvm::omp::blockConstructSet & ~OmpDirectiveSet{llvm::omp::Directive::OMPD_ordered, _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
