llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir-llvm
Author: Sairudra More (Saieiei)
<details>
<summary>Changes</summary>
This is stacked on the existing taskgroup/taskloop reduction work.
This patch teaches Flang lowering and MLIR OpenMP translation to carry
`in_reduction` through `omp.target`.
The translation looks up the task reduction-private storage with:
__kmpc_task_reduction_get_th_data
and binds the target region argument to that private pointer, so uses inside
the region do not continue referring to the original variable.
The patch also fixes the `omp::TargetOp::build(TargetOperands)` path to
preserve the `in_reduction` operands instead of dropping them.
For Flang lowering, `target in_reduction` list items are added to the target
map entries when needed, so the translation has a matching mapped value to
rewrite.
Fixes #<!-- -->199904
---
Patch is 21.39 KiB, truncated to 20.00 KiB below, full version:
https://github.com/llvm/llvm-project/pull/199967.diff
8 Files Affected:
- (modified) flang/lib/Lower/OpenMP/OpenMP.cpp (+17-4)
- (removed) flang/test/Lower/OpenMP/Todo/target-inreduction.f90 (-15)
- (added) flang/test/Lower/OpenMP/target-inreduction-unused.f90 (+27)
- (added) flang/test/Lower/OpenMP/target-inreduction.f90 (+28)
- (modified) mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp (+5-5)
- (modified)
mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp (+90-1)
- (added) mlir/test/Target/LLVMIR/openmp-target-in-reduction.mlir (+50)
- (modified) mlir/test/Target/LLVMIR/openmp-todo.mlir (+83-3)
``````````diff
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 7cb7e379eb503..099220acf8102 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1873,6 +1873,7 @@ genTargetClauses(lower::AbstractConverter &converter,
mlir::omp::TargetOperands &clauseOps,
DefaultMapsTy &defaultMaps,
llvm::SmallVectorImpl<Object> &hasDeviceAddrObjects,
+ llvm::SmallVectorImpl<Object> &inReductionObjects,
llvm::SmallVectorImpl<Object> &isDevicePtrObjects,
llvm::SmallVectorImpl<Object> &mapObjects) {
ClauseProcessor cp(converter, semaCtx, clauses);
@@ -1887,13 +1888,14 @@ genTargetClauses(lower::AbstractConverter &converter,
hostEvalInfo->collectValues(clauseOps.hostEvalVars);
}
cp.processIf(llvm::omp::Directive::OMPD_target, clauseOps);
+ cp.processInReduction(loc, clauseOps, inReductionObjects);
cp.processIsDevicePtr(stmtCtx, clauseOps, isDevicePtrObjects);
cp.processMap(loc, stmtCtx, clauseOps, llvm::omp::Directive::OMPD_unknown,
&mapObjects);
cp.processNowait(clauseOps);
cp.processThreadLimit(stmtCtx, clauseOps);
- cp.processTODO<clause::Allocate, clause::InReduction,
clause::UsesAllocators>(
+ cp.processTODO<clause::Allocate, clause::UsesAllocators>(
loc, llvm::omp::Directive::OMPD_target);
// `target private(..)` is only supported in delayed privatization mode.
@@ -2932,10 +2934,10 @@ genTargetOp(lower::AbstractConverter &converter,
lower::SymMap &symTable,
mlir::omp::TargetOperands clauseOps;
DefaultMapsTy defaultMaps;
llvm::SmallVector<Object> mapObjects, hasDeviceAddrObjects,
- isDevicePtrObjects;
+ inReductionObjects, isDevicePtrObjects;
genTargetClauses(converter, semaCtx, symTable, stmtCtx, eval, item->clauses,
loc, clauseOps, defaultMaps, hasDeviceAddrObjects,
- isDevicePtrObjects, mapObjects);
+ inReductionObjects, isDevicePtrObjects, mapObjects);
if (!isDevicePtrObjects.empty()) {
// is_device_ptr maps get duplicated so the clause and synthesized
@@ -3110,6 +3112,16 @@ genTargetOp(lower::AbstractConverter &converter,
lower::SymMap &symTable,
};
lower::pft::visitAllSymbols(eval, captureImplicitMap);
+ // OpenMP requires `in_reduction` list items on `target` to be implicitly
+ // data-mapped. The body-symbol walk above only catches list items that are
+ // referenced inside the target region; force-capture the rest so the MLIR
+ // -> LLVM IR translation can rely on every in_reduction operand being
+ // present in `map_entries`. `captureImplicitMap` is a no-op for symbols
+ // already covered by an explicit map/has_device_addr/is_device_ptr.
+ for (const Object &object : inReductionObjects)
+ if (const semantics::Symbol *sym = object.sym())
+ captureImplicitMap(*sym);
+
auto targetOp = mlir::omp::TargetOp::create(firOpBuilder, loc, clauseOps);
llvm::SmallVector<mlir::Value> hasDeviceAddrBaseValues, mapBaseValues;
@@ -3120,7 +3132,8 @@ genTargetOp(lower::AbstractConverter &converter,
lower::SymMap &symTable,
args.hasDeviceAddr.objects = hasDeviceAddrObjects;
args.hasDeviceAddr.vars = hasDeviceAddrBaseValues;
args.hostEvalVars = clauseOps.hostEvalVars;
- // TODO: Add in_reduction syms and vars.
+ args.inReduction.objects = inReductionObjects;
+ args.inReduction.vars = clauseOps.inReductionVars;
args.map.objects = mapObjects;
args.map.vars = mapBaseValues;
args.priv.objects = makeObjects(dsp.getDelayedPrivSymbols());
diff --git a/flang/test/Lower/OpenMP/Todo/target-inreduction.f90
b/flang/test/Lower/OpenMP/Todo/target-inreduction.f90
deleted file mode 100644
index e5a9cffac5a11..0000000000000
--- a/flang/test/Lower/OpenMP/Todo/target-inreduction.f90
+++ /dev/null
@@ -1,15 +0,0 @@
-! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -fopenmp-version=50 -o - %s 2>&1 |
FileCheck %s
-! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -fopenmp-version=50 -o - %s
2>&1 | FileCheck %s
-
-!===============================================================================
-! `mergeable` clause
-!===============================================================================
-
-! CHECK: not yet implemented: Unhandled clause IN_REDUCTION in TARGET construct
-subroutine omp_target_inreduction()
- integer i
- i = 0
- !$omp target in_reduction(+:i)
- i = i + 1
- !$omp end target
-end subroutine omp_target_inreduction
diff --git a/flang/test/Lower/OpenMP/target-inreduction-unused.f90
b/flang/test/Lower/OpenMP/target-inreduction-unused.f90
new file mode 100644
index 0000000000000..6831136307a59
--- /dev/null
+++ b/flang/test/Lower/OpenMP/target-inreduction-unused.f90
@@ -0,0 +1,27 @@
+! RUN: bbc -emit-hlfir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 -o - %s 2>&1 |
FileCheck %s
+
+! Per the OpenMP spec, an in_reduction list item on a target construct is
+! implicitly data-mapped. The lowering must not rely on the variable being
+! referenced inside the target body to discover that map: here `i` only
+! appears in the in_reduction clause and is never read or written inside
+! the region. Verify that an omp.map.info for `i` is still emitted and
+! flows into the omp.target's map_entries.
+
+!CHECK-LABEL: func.func @_QPomp_target_in_reduction_unused()
+!CHECK: %[[IDECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name =
"_QFomp_target_in_reduction_unusedEi"}
+!CHECK: %[[IMAP:.*]] = omp.map.info var_ptr(%[[IDECL]]#1 :
!fir.ref<i32>, i32) {{.*}} {name = "i"}
+!CHECK: omp.target in_reduction(@{{[^ ]+}} %[[IDECL]]#0 -> %{{[^ ]+}} :
!fir.ref<i32>)
+!CHECK-SAME: map_entries(%[[IMAP]] -> %{{[^ ]+}} : !fir.ref<i32>)
+
+subroutine omp_target_in_reduction_unused()
+ interface
+ subroutine sub()
+ end subroutine
+ end interface
+ integer i
+ i = 0
+ !$omp target in_reduction(+:i)
+ call sub()
+ !$omp end target
+end subroutine omp_target_in_reduction_unused
diff --git a/flang/test/Lower/OpenMP/target-inreduction.f90
b/flang/test/Lower/OpenMP/target-inreduction.f90
new file mode 100644
index 0000000000000..0576e9099e19e
--- /dev/null
+++ b/flang/test/Lower/OpenMP/target-inreduction.f90
@@ -0,0 +1,28 @@
+! RUN: bbc -emit-hlfir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 -o - %s 2>&1 |
FileCheck %s
+
+! Verify that in_reduction on a target construct is lowered to an
+! omp.target with both an in_reduction clause and an implicit map_entries
+! entry for the same variable. The implicit map captures the original
+! pointer into the target region so the MLIR -> LLVM IR translation can
+! pass it to __kmpc_task_reduction_get_th_data.
+
+!CHECK-LABEL: omp.declare_reduction
+!CHECK-SAME: @[[RED_I32_NAME:.*]] : i32 init {
+
+!CHECK-LABEL: func.func @_QPomp_target_in_reduction()
+!CHECK: %[[IDECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name =
"_QFomp_target_in_reductionEi"}
+!CHECK: %[[IMAP:.*]] = omp.map.info var_ptr(%[[IDECL]]#1 :
!fir.ref<i32>, i32) {{.*}} {name = "i"}
+!CHECK: omp.target in_reduction(@[[RED_I32_NAME]] %[[IDECL]]#0 ->
%[[INARG:[^ ]+]] : !fir.ref<i32>)
+!CHECK-SAME: map_entries(%[[IMAP]] -> %{{[^ ]+}} : !fir.ref<i32>)
+!CHECK: hlfir.declare %[[INARG]]
+!CHECK: omp.terminator
+!CHECK: }
+
+subroutine omp_target_in_reduction()
+ integer i
+ i = 0
+ !$omp target in_reduction(+:i)
+ i = i + 1
+ !$omp end target
+end subroutine omp_target_in_reduction
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 7cef23bdfef18..8836ebce03349 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -2545,8 +2545,7 @@ LogicalResult TargetUpdateOp::verify() {
void TargetOp::build(OpBuilder &builder, OperationState &state,
const TargetOperands &clauses) {
MLIRContext *ctx = builder.getContext();
- // TODO Store clauses in op: allocateVars, allocatorVars, inReductionVars,
- // inReductionByref, inReductionSyms.
+ // TODO Store clauses in op: allocateVars, allocatorVars.
TargetOp::build(
builder, state, /*allocate_vars=*/{}, /*allocator_vars=*/{},
clauses.bare,
makeArrayAttr(ctx, clauses.dependKinds), clauses.dependVars,
@@ -2554,9 +2553,10 @@ void TargetOp::build(OpBuilder &builder, OperationState
&state,
clauses.device, clauses.dynGroupprivateAccessGroup,
clauses.dynGroupprivateFallback, clauses.dynGroupprivateSize,
clauses.hasDeviceAddrVars, clauses.hostEvalVars, clauses.ifExpr,
- /*in_reduction_vars=*/{}, /*in_reduction_byref=*/nullptr,
- /*in_reduction_syms=*/nullptr, clauses.isDevicePtrVars, clauses.mapVars,
- clauses.nowait, clauses.privateVars,
+ clauses.inReductionVars,
+ makeDenseBoolArrayAttr(ctx, clauses.inReductionByref),
+ makeArrayAttr(ctx, clauses.inReductionSyms), clauses.isDevicePtrVars,
+ clauses.mapVars, clauses.nowait, clauses.privateVars,
makeArrayAttr(ctx, clauses.privateSyms), clauses.privateNeedsBarrier,
clauses.threadLimitVars,
/*private_maps=*/nullptr);
diff --git
a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 1120d9fc38d0a..2ef23a80577d8 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -490,7 +490,11 @@ static LogicalResult checkImplementationStatus(Operation
&op) {
.Case([&](omp::TargetOp op) {
checkAllocate(op, result);
checkBare(op, result);
- checkInReduction(op, result);
+ // in_reduction(byref(...)) on target is not implemented yet. Other
+ // unsupported in_reduction shapes (cleanup region, two-argument
+ // initializer, missing combiner) and the device-side / offload-entry
+ // cases are diagnosed inline in convertOmpTarget.
+ checkInReductionByref(op, result);
checkThreadLimit(op, result);
})
.Default([](Operation &) {
@@ -8208,6 +8212,61 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase
&builder,
bool isOffloadEntry =
isTargetDevice || !ompBuilder->Config.TargetTriples.empty();
+ // Validate and resolve in_reduction clauses on omp.target. We currently
+ // only support the non-offload host-fallback path: the per-task private
+ // pointer is obtained by calling __kmpc_task_reduction_get_th_data inside
+ // the to-be-outlined target task body. Threading that pointer through the
+ // device kernel argument list is left as follow-up work.
+ SmallVector<llvm::Value *> inRedOrigPtrs;
+ if (!targetOp.getInReductionVars().empty()) {
+ if (isTargetDevice || isOffloadEntry)
+ return opInst.emitError(
+ "not yet implemented: in_reduction clause on omp.target with "
+ "offload / target-device compilation");
+ if (auto inRedSyms = targetOp.getInReductionSyms()) {
+ for (auto sym : inRedSyms->getAsRange<SymbolRefAttr>()) {
+ auto decl =
+ SymbolTable::lookupNearestSymbolFrom<omp::DeclareReductionOp>(
+ targetOp, sym);
+ if (!decl)
+ return targetOp.emitError()
+ << "failed to resolve in_reduction declare_reduction symbol "
+ << sym.getRootReference() << " on omp.target";
+ if (decl.getInitializerRegion().front().getNumArguments() != 1)
+ return targetOp.emitError()
+ << "not yet implemented: in_reduction with two-argument "
+ "initializer on omp.target";
+ if (!decl.getCleanupRegion().empty())
+ return targetOp.emitError()
+ << "not yet implemented: in_reduction with cleanup region "
+ "on omp.target";
+ // The reduction combiner region is intentionally not required here:
+ // the in_reduction lowering on omp.target only locates the per-task
+ // private storage via __kmpc_task_reduction_get_th_data. The combiner
+ // is owned by the enclosing taskgroup's task_reduction registration.
+ }
+ }
+ // Each in_reduction variable must also be captured by the target via a
+ // map_entries entry referring to the same outer SSA value. OMPIRBuilder
+ // outlines the target body and only rewires uses of values that enter
+ // the kernel through the map-derived input set. The runtime call below
+ // uses that same outer SSA value as its `orig` argument, so without a
+ // matching map entry the outlined kernel would reference a value defined
+ // in the host function and fail IR verification.
+ llvm::SmallPtrSet<Value, 4> mappedVarPtrs;
+ for (Value mapV : targetOp.getMapVars())
+ if (auto mapInfo = mapV.getDefiningOp<omp::MapInfoOp>())
+ mappedVarPtrs.insert(mapInfo.getVarPtr());
+ inRedOrigPtrs.reserve(targetOp.getInReductionVars().size());
+ for (Value v : targetOp.getInReductionVars()) {
+ if (!mappedVarPtrs.contains(v))
+ return targetOp.emitError()
+ << "not yet implemented: in_reduction variable on omp.target "
+ "must also be captured by a matching map_entries entry";
+ inRedOrigPtrs.push_back(moduleTranslation.lookupValue(v));
+ }
+ }
+
// For some private variables, the MapsForPrivatizedVariablesPass
// creates MapInfoOp instances. Go through the private variables and
// the mapped variables so that during codegeneration we are able
@@ -8320,6 +8379,36 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase
&builder,
targetOp.getPrivateNeedsBarrier(), &mappedPrivateVars)))
return llvm::make_error<PreviouslyReportedError>();
+ // Map in_reduction block arguments to the per-task private storage
+ // returned by __kmpc_task_reduction_get_th_data. The lookup must run
+ // inside the target task body so the gtid corresponds to the executing
+ // thread. The descriptor argument is NULL: the runtime walks enclosing
+ // taskgroups to locate the matching task_reduction registration for
+ // `origPtr`. Mirrors the in_reduction handling on omp.taskloop.context.
+ ArrayRef<BlockArgument> inRedBlockArgs =
argIface.getInReductionBlockArgs();
+ if (!inRedBlockArgs.empty()) {
+ llvm::OpenMPIRBuilder &ompB = *moduleTranslation.getOpenMPBuilder();
+ llvm::Module *m = moduleTranslation.getLLVMModule();
+ llvm::LLVMContext &llvmCtx = m->getContext();
+ uint32_t srcLocSize;
+ llvm::Constant *srcLocStr = ompB.getOrCreateDefaultSrcLocStr(srcLocSize);
+ llvm::Value *bodyIdent = ompB.getOrCreateIdent(srcLocStr, srcLocSize);
+ llvm::Function *gtidFn = ompB.getOrCreateRuntimeFunctionPtr(
+ llvm::omp::OMPRTL___kmpc_global_thread_num);
+ llvm::Value *bodyGtid =
+ builder.CreateCall(gtidFn, {bodyIdent}, "omp_global_thread_num");
+ llvm::FunctionCallee getThData = ompB.getOrCreateRuntimeFunction(
+ *m, llvm::omp::OMPRTL___kmpc_task_reduction_get_th_data);
+ llvm::Type *ptrTy = llvm::PointerType::getUnqual(llvmCtx);
+ llvm::Value *nullDesc = llvm::ConstantPointerNull::get(ptrTy);
+ for (auto [blockArg, origPtr] :
+ llvm::zip_equal(inRedBlockArgs, inRedOrigPtrs)) {
+ llvm::Value *priv = builder.CreateCall(
+ getThData, {bodyGtid, nullDesc, origPtr}, "omp.inred.priv");
+ moduleTranslation.mapValue(blockArg, priv);
+ }
+ }
+
LLVM::ModuleTranslation::SaveStack<OpenMPAllocStackFrame> frame(
moduleTranslation, allocaIP, deallocBlocks);
llvm::Expected<llvm::BasicBlock *> exitBlock = convertOmpOpRegions(
diff --git a/mlir/test/Target/LLVMIR/openmp-target-in-reduction.mlir
b/mlir/test/Target/LLVMIR/openmp-target-in-reduction.mlir
new file mode 100644
index 0000000000000..2b3cfd514d82e
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-target-in-reduction.mlir
@@ -0,0 +1,50 @@
+// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
+
+// in_reduction on omp.target: the in_reduction variable is also captured
+// into the target region as a map entry (the Flang front-end emits this
+// implicit map). Inside the outlined target body the captured pointer is
+// passed to __kmpc_task_reduction_get_th_data with a NULL descriptor;
+// the runtime walks enclosing taskgroups to locate the matching
+// task_reduction registration. The returned pointer is bound to the
+// in_reduction region block argument so subsequent loads/stores inside
+// the region use the private copy.
+
+omp.declare_reduction @add_i32 : i32
+init {
+^bb0(%arg0: i32):
+ %c0 = llvm.mlir.constant(0 : i32) : i32
+ omp.yield(%c0 : i32)
+}
+combiner {
+^bb0(%arg0: i32, %arg1: i32):
+ %s = llvm.add %arg0, %arg1 : i32
+ omp.yield(%s : i32)
+}
+
+llvm.func @target_inreduction(%x : !llvm.ptr) {
+ %m = omp.map.info var_ptr(%x : !llvm.ptr, i32) map_clauses(tofrom)
capture(ByRef) -> !llvm.ptr
+ omp.target in_reduction(@add_i32 %x -> %prv : !llvm.ptr) map_entries(%m ->
%marg : !llvm.ptr) {
+ %v = llvm.load %prv : !llvm.ptr -> i32
+ %c1 = llvm.mlir.constant(1 : i32) : i32
+ %s = llvm.add %v, %c1 : i32
+ llvm.store %s, %prv : i32, !llvm.ptr
+ omp.terminator
+ }
+ llvm.return
+}
+
+// The host stub forwards the captured pointer into the outlined target
+// kernel.
+// CHECK-LABEL: define void @target_inreduction(
+// CHECK: call void
@__omp_offloading_{{.*}}_target_inreduction_{{.*}}(ptr %{{.+}}, ptr null)
+
+// In the outlined target body the in_reduction private pointer is
+// obtained from the runtime using the captured original pointer; that
+// pointer is then the base of the load and store inside the region.
+// CHECK-LABEL: define internal void
@__omp_offloading_{{.*}}_target_inreduction_
+// CHECK-SAME: (ptr %[[CAPT:.+]], ptr %{{.+}})
+// CHECK: %[[GTID:.+]] = call i32 @__kmpc_global_thread_num(
+// CHECK: %[[PRIV:.+]] = call ptr
@__kmpc_task_reduction_get_th_data(i32 %[[GTID]], ptr null, ptr %[[CAPT]])
+// CHECK: %[[LOADED:.+]] = load i32, ptr %[[PRIV]]
+// CHECK: %[[SUM:.+]] = add i32 %[[LOADED]], 1
+// CHECK: store i32 %[[SUM]], ptr %[[PRIV]]
diff --git a/mlir/test/Target/LLVMIR/openmp-todo.mlir
b/mlir/test/Target/LLVMIR/openmp-todo.mlir
index a84da99458fd1..926ab48503acc 100644
--- a/mlir/test/Target/LLVMIR/openmp-todo.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-todo.mlir
@@ -190,10 +190,90 @@ atomic {
llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32
omp.yield
}
-llvm.func @target_in_reduction(%x : !llvm.ptr) {
- // expected-error@below {{not yet implemented: Unhandled clause in_reduction
in omp.target operation}}
+llvm.func @target_in_reduction_byref(%x : !llvm.ptr) {
+ // expected-error@below {{not yet implemented: Unhandled clause in_reduction
with byref modifier in omp.target operation}}
// expected-error@below {{LLVM Translation failed for operation: omp.target}}
- omp.target in_reduction(@add_f32 %x -> %prv : !llvm.ptr) {
+ omp.target in_reduction(byref @add_f32 %x -> %prv : !llvm.ptr) {
+ omp.terminator
+ }
+ llvm.return
+}
+
+// -----
+
+omp.declare_reduction @add_cleanup_f32 : f32
+init {
+^bb0(%arg: f32):
+ %0 = llvm.mlir.constant(0.0 : f32) : f32
+ omp.yield (%0 : f32)
+}
+combiner {
+^bb1(%arg0: f32, %arg1: f32):
+ %1 = llvm.fadd %arg0, %arg1 : f32
+ omp.yield (%1 : f32)
+}
+cleanup {
+^bb2(%arg2: f32):
+ omp.yield
+}
+llvm.func @target_in_reduction_cleanup(%x : !llvm.ptr) {
+ // expected-error@below {{not yet implemented: in_reduction with cleanup
region on omp.target}}
+ // expected-error@below {{LLVM Translation failed for operation: omp.target}}
+ omp.target in_reduction(@add_cleanup_f32 %x -> %prv : !llvm.ptr) {
+ omp.terminator
+ }
+ llvm.return
+}
+
+// -----
+
+omp.declare_reduction @add_two_arg_init_i32 : !llvm.ptr alloc {
+^bb0(%arg: !llvm.ptr):
+ %0 = llvm.mlir.constant(1 : i64) : i64
+ %1 = llvm.alloca %0 x i32 : (i64) -> !llvm.ptr
+ omp.yield(%1 : !llvm.ptr)
+} init {
+^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+ %0 = llvm.mlir.constant(0 : i32) : i32
+ llvm.store %0, %arg1 : i32, !llvm.ptr
+ omp.yield(%arg1 : !llvm.ptr)
+} combiner {
+^bb1(%arg0: !llvm.ptr, %arg1: !llvm.pt...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/199967
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits