https://github.com/skatrak updated https://github.com/llvm/llvm-project/pull/97718
>From 27490fbac8e08ed6c11437ef28efd8298fb120b1 Mon Sep 17 00:00:00 2001 From: Sergio Afonso <safon...@amd.com> Date: Thu, 4 Jul 2024 12:56:43 +0100 Subject: [PATCH 1/2] [Flang][OpenMP] Add lowering support for DO SIMD This patch adds support for lowering 'DO SIMD' constructs to MLIR. SIMD information is now stored in an `omp.simd` loop wrapper, which is currently ignored by the OpenMP dialect to LLVM IR translation stage. The end result is that runtime behavior of compiled 'DO SIMD' constructs does not change after this patch, so 'DO SIMD' still runs like 'DO' (i.e. SIMD width = 1). However, all of the required information is now present in the resulting MLIR representation. To avoid confusion, the previous wsloop-simd.f90 lit test is renamed to wsloop-schedule.f90 and a new wsloop-simd.f90 test is created to check the addition of SIMD clauses to the `omp.simd` operation produced when a 'DO SIMD' construct is lowered to MLIR. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 49 ++++++++---- .../Lower/OpenMP/Todo/omp-do-simd-aligned.f90 | 16 ---- .../Lower/OpenMP/Todo/omp-do-simd-linear.f90 | 2 +- .../Lower/OpenMP/Todo/omp-do-simd-safelen.f90 | 14 ---- .../Lower/OpenMP/Todo/omp-do-simd-simdlen.f90 | 14 ---- flang/test/Lower/OpenMP/if-clause.f90 | 31 ++++++++ flang/test/Lower/OpenMP/loop-compound.f90 | 3 + flang/test/Lower/OpenMP/wsloop-schedule.f90 | 37 ++++++++++ flang/test/Lower/OpenMP/wsloop-simd.f90 | 74 +++++++++++-------- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 3 + 10 files changed, 153 insertions(+), 90 deletions(-) delete mode 100644 flang/test/Lower/OpenMP/Todo/omp-do-simd-aligned.f90 delete mode 100644 flang/test/Lower/OpenMP/Todo/omp-do-simd-safelen.f90 delete mode 100644 flang/test/Lower/OpenMP/Todo/omp-do-simd-simdlen.f90 create mode 100644 flang/test/Lower/OpenMP/wsloop-schedule.f90 diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index f29d3517bb39bc..67bfdb150a7f22 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1986,19 +1986,42 @@ static void genCompositeDoSimd(lower::AbstractConverter &converter, const ConstructQueue &queue, ConstructQueue::iterator item, DataSharingProcessor &dsp) { - ClauseProcessor cp(converter, semaCtx, item->clauses); - cp.processTODO<clause::Aligned, clause::Allocate, clause::Linear, - clause::Safelen, clause::Simdlen>(loc, - llvm::omp::OMPD_do_simd); - // TODO: Add support for vectorization - add vectorization hints inside loop - // body. - // OpenMP standard does not specify the length of vector instructions. - // Currently we safely assume that for !$omp do simd pragma the SIMD length - // is equal to 1 (i.e. we generate standard workshare loop). - // When support for vectorization is enabled, then we need to add handling of - // if clause. Currently if clause can be skipped because we always assume - // SIMD length = 1. - genStandaloneDo(converter, symTable, semaCtx, eval, loc, queue, item, dsp); + lower::StatementContext stmtCtx; + + // Clause processing. + mlir::omp::WsloopClauseOps wsloopClauseOps; + llvm::SmallVector<const semantics::Symbol *> wsloopReductionSyms; + llvm::SmallVector<mlir::Type> wsloopReductionTypes; + genWsloopClauses(converter, semaCtx, stmtCtx, item->clauses, loc, + wsloopClauseOps, wsloopReductionTypes, wsloopReductionSyms); + + mlir::omp::SimdClauseOps simdClauseOps; + genSimdClauses(converter, semaCtx, item->clauses, loc, simdClauseOps); + + mlir::omp::LoopNestClauseOps loopNestClauseOps; + llvm::SmallVector<const semantics::Symbol *> iv; + genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, + loopNestClauseOps, iv); + + // Operation creation. + auto wsloopOp = + genWsloopWrapperOp(converter, semaCtx, eval, loc, wsloopClauseOps, + wsloopReductionSyms, wsloopReductionTypes); + + auto simdOp = genSimdWrapperOp(converter, semaCtx, eval, loc, simdClauseOps); + + // Construct wrapper entry block list and associated symbols. It is important + // that the symbol and block argument order match, so that the symbol-value + // bindings created are correct. + // TODO: Add omp.wsloop private and omp.simd private and reduction args. + auto wrapperArgs = llvm::to_vector(llvm::concat<mlir::BlockArgument>( + wsloopOp.getRegion().getArguments(), simdOp.getRegion().getArguments())); + + assert(wsloopReductionSyms.size() == wrapperArgs.size() && + "Number of symbols and wrapper block arguments must match"); + genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, + loopNestClauseOps, iv, wsloopReductionSyms, wrapperArgs, + llvm::omp::Directive::OMPD_do_simd, dsp); } static void genCompositeTaskloopSimd( diff --git a/flang/test/Lower/OpenMP/Todo/omp-do-simd-aligned.f90 b/flang/test/Lower/OpenMP/Todo/omp-do-simd-aligned.f90 deleted file mode 100644 index b62c54182442ac..00000000000000 --- a/flang/test/Lower/OpenMP/Todo/omp-do-simd-aligned.f90 +++ /dev/null @@ -1,16 +0,0 @@ -! This test checks lowering of OpenMP do simd aligned() pragma - -! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s -! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s -subroutine testDoSimdAligned(int_array) - use iso_c_binding - type(c_ptr) :: int_array -!CHECK: not yet implemented: Unhandled clause ALIGNED in DO SIMD construct -!$omp do simd aligned(int_array) - do index_ = 1, 10 - call c_test_call(int_array) - end do -!$omp end do simd - -end subroutine testDoSimdAligned - diff --git a/flang/test/Lower/OpenMP/Todo/omp-do-simd-linear.f90 b/flang/test/Lower/OpenMP/Todo/omp-do-simd-linear.f90 index a9e0446ec8c34e..2f5366c2a5b368 100644 --- a/flang/test/Lower/OpenMP/Todo/omp-do-simd-linear.f90 +++ b/flang/test/Lower/OpenMP/Todo/omp-do-simd-linear.f90 @@ -4,7 +4,7 @@ ! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s subroutine testDoSimdLinear(int_array) integer :: int_array(*) -!CHECK: not yet implemented: Unhandled clause LINEAR in DO SIMD construct +!CHECK: not yet implemented: Unhandled clause LINEAR in DO construct !$omp do simd linear(int_array) do index_ = 1, 10 end do diff --git a/flang/test/Lower/OpenMP/Todo/omp-do-simd-safelen.f90 b/flang/test/Lower/OpenMP/Todo/omp-do-simd-safelen.f90 deleted file mode 100644 index 054eb52ea170ac..00000000000000 --- a/flang/test/Lower/OpenMP/Todo/omp-do-simd-safelen.f90 +++ /dev/null @@ -1,14 +0,0 @@ -! This test checks lowering of OpenMP do simd safelen() pragma - -! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s -! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s -subroutine testDoSimdSafelen(int_array) - integer :: int_array(*) -!CHECK: not yet implemented: Unhandled clause SAFELEN in DO SIMD construct -!$omp do simd safelen(4) - do index_ = 1, 10 - end do -!$omp end do simd - -end subroutine testDoSimdSafelen - diff --git a/flang/test/Lower/OpenMP/Todo/omp-do-simd-simdlen.f90 b/flang/test/Lower/OpenMP/Todo/omp-do-simd-simdlen.f90 deleted file mode 100644 index bd00b6f336c931..00000000000000 --- a/flang/test/Lower/OpenMP/Todo/omp-do-simd-simdlen.f90 +++ /dev/null @@ -1,14 +0,0 @@ -! This test checks lowering of OpenMP do simd simdlen() pragma - -! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s -! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s -subroutine testDoSimdSimdlen(int_array) - integer :: int_array(*) -!CHECK: not yet implemented: Unhandled clause SIMDLEN in DO SIMD construct -!$omp do simd simdlen(4) - do index_ = 1, 10 - end do -!$omp end do simd - -end subroutine testDoSimdSimdlen - diff --git a/flang/test/Lower/OpenMP/if-clause.f90 b/flang/test/Lower/OpenMP/if-clause.f90 index 2c9a66e7bc11ea..ea730b5f1d9db4 100644 --- a/flang/test/Lower/OpenMP/if-clause.f90 +++ b/flang/test/Lower/OpenMP/if-clause.f90 @@ -30,6 +30,9 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.simd + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK-NEXT: omp.loop_nest !$omp do simd do i = 1, 10 @@ -39,6 +42,8 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.simd + ! CHECK-SAME: if({{.*}}) ! CHECK-NEXT: omp.loop_nest !$omp do simd if(.true.) do i = 1, 10 @@ -48,6 +53,8 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.simd + ! CHECK-SAME: if({{.*}}) ! CHECK-NEXT: omp.loop_nest !$omp do simd if(simd: .true.) do i = 1, 10 @@ -122,6 +129,9 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.simd + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK-NEXT: omp.loop_nest !$omp parallel do simd do i = 1, 10 @@ -133,6 +143,8 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.simd + ! CHECK-SAME: if({{.*}}) ! CHECK-NEXT: omp.loop_nest !$omp parallel do simd if(.true.) do i = 1, 10 @@ -144,6 +156,8 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.simd + ! CHECK-SAME: if({{.*}}) ! CHECK-NEXT: omp.loop_nest !$omp parallel do simd if(parallel: .true.) if(simd: .false.) do i = 1, 10 @@ -155,6 +169,9 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.simd + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK-NEXT: omp.loop_nest !$omp parallel do simd if(parallel: .true.) do i = 1, 10 @@ -167,6 +184,8 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.simd + ! CHECK-SAME: if({{.*}}) ! CHECK-NEXT: omp.loop_nest !$omp parallel do simd if(simd: .true.) do i = 1, 10 @@ -355,6 +374,9 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.simd + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK-NEXT: omp.loop_nest !$omp target parallel do simd do i = 1, 10 @@ -368,6 +390,8 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.simd + ! CHECK-SAME: if({{.*}}) ! CHECK-NEXT: omp.loop_nest !$omp target parallel do simd if(.true.) do i = 1, 10 @@ -381,6 +405,8 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.simd + ! CHECK-SAME: if({{.*}}) ! CHECK-NEXT: omp.loop_nest !$omp target parallel do simd if(target: .true.) if(parallel: .false.) & !$omp& if(simd: .true.) @@ -396,6 +422,9 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.simd + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK-NEXT: omp.loop_nest !$omp target parallel do simd if(target: .true.) do i = 1, 10 @@ -410,6 +439,8 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.simd + ! CHECK-SAME: if({{.*}}) ! CHECK-NEXT: omp.loop_nest !$omp target parallel do simd if(parallel: .true.) if(simd: .false.) do i = 1, 10 diff --git a/flang/test/Lower/OpenMP/loop-compound.f90 b/flang/test/Lower/OpenMP/loop-compound.f90 index 5012008b076714..383a3716a94395 100644 --- a/flang/test/Lower/OpenMP/loop-compound.f90 +++ b/flang/test/Lower/OpenMP/loop-compound.f90 @@ -23,6 +23,7 @@ program main ! DO SIMD ! ---------------------------------------------------------------------------- ! CHECK: omp.wsloop + ! CHECK-NEXT: omp.simd ! CHECK-NEXT: omp.loop_nest !$omp do simd do i = 1, 10 @@ -34,6 +35,7 @@ program main ! ---------------------------------------------------------------------------- ! CHECK: omp.parallel ! CHECK: omp.wsloop + ! CHECK-NEXT: omp.simd ! CHECK-NEXT: omp.loop_nest !$omp parallel do simd do i = 1, 10 @@ -57,6 +59,7 @@ program main ! CHECK: omp.target ! CHECK: omp.parallel ! CHECK: omp.wsloop + ! CHECK-NEXT: omp.simd ! CHECK-NEXT: omp.loop_nest !$omp target parallel do simd do i = 1, 10 diff --git a/flang/test/Lower/OpenMP/wsloop-schedule.f90 b/flang/test/Lower/OpenMP/wsloop-schedule.f90 new file mode 100644 index 00000000000000..1df67474d65e3b --- /dev/null +++ b/flang/test/Lower/OpenMP/wsloop-schedule.f90 @@ -0,0 +1,37 @@ +! This test checks lowering of OpenMP DO Directive(Worksharing) with +! simd schedule modifier. + +! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck %s + +program wsloop_dynamic + integer :: i +!CHECK-LABEL: func @_QQmain() + +!$OMP PARALLEL +!CHECK: omp.parallel { + +!$OMP DO SCHEDULE(simd: runtime) +!CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 +!CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 +!CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 +!CHECK: omp.wsloop schedule(runtime, simd) nowait { +!CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) { +!CHECK: fir.store %[[I]] to %[[STORE:.*]]#1 : !fir.ref<i32> + + do i=1, 9 + print*, i +!CHECK: %[[RTBEGIN:.*]] = fir.call @_FortranAioBeginExternalListOutput +!CHECK: %[[LOAD:.*]] = fir.load %[[STORE]]#0 : !fir.ref<i32> +!CHECK: fir.call @_FortranAioOutputInteger32(%[[RTBEGIN]], %[[LOAD]]) {{.*}}: (!fir.ref<i8>, i32) -> i1 +!CHECK: fir.call @_FortranAioEndIoStatement(%[[RTBEGIN]]) {{.*}}: (!fir.ref<i8>) -> i32 + end do +!CHECK: omp.yield +!CHECK: } +!CHECK: omp.terminator +!CHECK: } +!CHECK: omp.terminator +!CHECK: } + +!$OMP END DO NOWAIT +!$OMP END PARALLEL +end diff --git a/flang/test/Lower/OpenMP/wsloop-simd.f90 b/flang/test/Lower/OpenMP/wsloop-simd.f90 index 1df67474d65e3b..662ae56e0b76ee 100644 --- a/flang/test/Lower/OpenMP/wsloop-simd.f90 +++ b/flang/test/Lower/OpenMP/wsloop-simd.f90 @@ -1,37 +1,47 @@ -! This test checks lowering of OpenMP DO Directive(Worksharing) with -! simd schedule modifier. +! This test checks lowering of OpenMP DO SIMD composite constructs. ! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck %s +! RUN: %flang_fc1 -emit-hlfir %openmp_flags %s -o - | FileCheck %s -program wsloop_dynamic - integer :: i -!CHECK-LABEL: func @_QQmain() +! CHECK-LABEL: func.func @_QPdo_simd_aligned( +subroutine do_simd_aligned(A) + use iso_c_binding + type(c_ptr) :: A + + ! CHECK: omp.wsloop + ! CHECK-NOT: aligned( + ! CHECK-SAME: { + ! CHECK-NEXT: omp.simd + ! CHECK-SAME: aligned( + !$omp do simd aligned(A) + do index_ = 1, 10 + call c_test_call(A) + end do + !$omp end do simd +end subroutine do_simd_aligned -!$OMP PARALLEL -!CHECK: omp.parallel { +! CHECK-LABEL: func.func @_QPdo_simd_safelen( +subroutine do_simd_safelen() + ! CHECK: omp.wsloop + ! CHECK-NOT: safelen( + ! CHECK-SAME: { + ! CHECK-NEXT: omp.simd + ! CHECK-SAME: safelen( + !$omp do simd safelen(4) + do index_ = 1, 10 + end do + !$omp end do simd +end subroutine do_simd_safelen -!$OMP DO SCHEDULE(simd: runtime) -!CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 -!CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 -!CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop schedule(runtime, simd) nowait { -!CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) { -!CHECK: fir.store %[[I]] to %[[STORE:.*]]#1 : !fir.ref<i32> - - do i=1, 9 - print*, i -!CHECK: %[[RTBEGIN:.*]] = fir.call @_FortranAioBeginExternalListOutput -!CHECK: %[[LOAD:.*]] = fir.load %[[STORE]]#0 : !fir.ref<i32> -!CHECK: fir.call @_FortranAioOutputInteger32(%[[RTBEGIN]], %[[LOAD]]) {{.*}}: (!fir.ref<i8>, i32) -> i1 -!CHECK: fir.call @_FortranAioEndIoStatement(%[[RTBEGIN]]) {{.*}}: (!fir.ref<i8>) -> i32 - end do -!CHECK: omp.yield -!CHECK: } -!CHECK: omp.terminator -!CHECK: } -!CHECK: omp.terminator -!CHECK: } - -!$OMP END DO NOWAIT -!$OMP END PARALLEL -end +! CHECK-LABEL: func.func @_QPdo_simd_simdlen( +subroutine do_simd_simdlen() + ! CHECK: omp.wsloop + ! CHECK-NOT: simdlen( + ! CHECK-SAME: { + ! CHECK-NEXT: omp.simd + ! CHECK-SAME: simdlen( + !$omp do simd simdlen(4) + do index_ = 1, 10 + end do + !$omp end do simd +end subroutine do_simd_simdlen diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 35971fbacbf91d..0c9c699a1f390b 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -899,6 +899,9 @@ static LogicalResult convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { auto wsloopOp = cast<omp::WsloopOp>(opInst); + // FIXME: Here any other nested wrappers (e.g. omp.simd) are skipped, so + // codegen for composite constructs like 'DO/FOR SIMD' will be the same as for + // 'DO/FOR'. auto loopOp = cast<omp::LoopNestOp>(wsloopOp.getWrappedLoop()); llvm::ArrayRef<bool> isByRef = getIsByRef(wsloopOp.getReductionVarsByref()); >From 7c208f6d4ec126c6ab3a1ab171c4552450726cdf Mon Sep 17 00:00:00 2001 From: Sergio Afonso <safon...@amd.com> Date: Fri, 5 Jul 2024 11:30:19 +0100 Subject: [PATCH 2/2] Fix lit test --- flang/test/Lower/OpenMP/wsloop-simd.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flang/test/Lower/OpenMP/wsloop-simd.f90 b/flang/test/Lower/OpenMP/wsloop-simd.f90 index 662ae56e0b76ee..e0ecaf425d5810 100644 --- a/flang/test/Lower/OpenMP/wsloop-simd.f90 +++ b/flang/test/Lower/OpenMP/wsloop-simd.f90 @@ -1,7 +1,7 @@ ! This test checks lowering of OpenMP DO SIMD composite constructs. ! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck %s -! RUN: %flang_fc1 -emit-hlfir %openmp_flags %s -o - | FileCheck %s +! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - | FileCheck %s ! CHECK-LABEL: func.func @_QPdo_simd_aligned( subroutine do_simd_aligned(A) _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits