https://github.com/tblah updated https://github.com/llvm/llvm-project/pull/174588
>From 0867ea76200431b0962f6a874c4f980b3aa7f282 Mon Sep 17 00:00:00 2001 From: Tom Eccles <[email protected]> Date: Tue, 6 Jan 2026 13:29:30 +0000 Subject: [PATCH] [mlir][OpenMP] Don't allocate task context structure if not needed Don't allocate a task context structure if none of the private variables needed it. This was already skipped when there were no private variables at all. --- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 11 +++-- .../LLVMIR/openmp-task-no-context-struct.mlir | 48 +++++++++++++++++++ .../openmp-taskloop-no-context-struct.mlir | 46 ++++++------------ 3 files changed, 70 insertions(+), 35 deletions(-) create mode 100644 mlir/test/Target/LLVMIR/openmp-task-no-context-struct.mlir diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index eef71a17fb41d..a4c1d1e7219d4 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -2262,6 +2262,9 @@ void TaskContextStructManager::generateTaskContextStruct() { privateVarTypes.push_back(moduleTranslation.convertType(mlirType)); } + if (privateVarTypes.empty()) + return; + structTy = llvm::StructType::get(moduleTranslation.getLLVMContext(), privateVarTypes); @@ -2299,10 +2302,10 @@ SmallVector<llvm::Value *> TaskContextStructManager::createGEPsToPrivateVars( } void TaskContextStructManager::createGEPsToPrivateVars() { - if (!structPtr) { + if (!structPtr) assert(privateVarTypes.empty()); - return; - } + // Still need to run createGEPsToPrivateVars to populate llvmPrivateVarGEPs + // with null values for skipped private decls llvmPrivateVarGEPs = createGEPsToPrivateVars(structPtr); } @@ -2751,7 +2754,7 @@ convertOmpTaskloopOp(Operation &opInst, llvm::IRBuilderBase &builder, } llvm::OpenMPIRBuilder::TaskDupCallbackTy taskDupOrNull = nullptr; - if (!taskStructMgr.getLLVMPrivateVarGEPs().empty()) + if (taskStructMgr.getStructPtr()) taskDupOrNull = taskDupCB; llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); diff --git a/mlir/test/Target/LLVMIR/openmp-task-no-context-struct.mlir b/mlir/test/Target/LLVMIR/openmp-task-no-context-struct.mlir new file mode 100644 index 0000000000000..32ccac8296696 --- /dev/null +++ b/mlir/test/Target/LLVMIR/openmp-task-no-context-struct.mlir @@ -0,0 +1,48 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +// Check that we don't allocate a task context structure when none of the private +// vars need it. + +omp.private {type = private} @_QFtestEp_private_i32 : i32 +llvm.func @_QPtest() { + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x i32 {bindc_name = "p"} : (i64) -> !llvm.ptr + omp.task private(@_QFtestEp_private_i32 %1 -> %arg0 : !llvm.ptr) { + llvm.call @_QPdo_something(%arg0) {fastmathFlags = #llvm.fastmath<contract>} : (!llvm.ptr) -> () + omp.terminator + } + llvm.return +} +llvm.func @_QPdo_something(!llvm.ptr) attributes {sym_visibility = "private"} + +// CHECK-LABEL: define void @_QPtest() +// CHECK: %[[VAL_0:.*]] = alloca i32, i64 1, align 4 +// CHECK: br label %[[VAL_1:.*]] +// CHECK: entry: ; preds = %[[VAL_2:.*]] +// CHECK: br label %[[VAL_3:.*]] +// CHECK: omp.private.init: ; preds = %[[VAL_1]] +// CHECK-NOT: @malloc +// CHECK: br label %[[VAL_4:.*]] +// CHECK: omp.private.copy: ; preds = %[[VAL_3]] +// CHECK: br label %[[VAL_5:.*]] +// CHECK: omp.task.start: ; preds = %[[VAL_4]] +// CHECK: br label %[[VAL_6:.*]] +// CHECK: codeRepl: ; preds = %[[VAL_5]] +// CHECK: %[[VAL_7:.*]] = call i32 @__kmpc_global_thread_num(ptr @1) +// CHECK: %[[VAL_8:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[VAL_7]], i32 1, i64 40, i64 0, ptr @_QPtest..omp_par) +// CHECK: %[[VAL_9:.*]] = call i32 @__kmpc_omp_task(ptr @1, i32 %[[VAL_7]], ptr %[[VAL_8]]) +// CHECK: br label %[[VAL_10:.*]] +// CHECK: task.exit: ; preds = %[[VAL_6]] +// CHECK: ret void + +// CHECK-LABEL: define internal void @_QPtest..omp_par +// CHECK: task.alloca: +// CHECK: %[[VAL_11:.*]] = alloca i32, align 4 +// CHECK: br label %[[VAL_12:.*]] +// CHECK: task.body: ; preds = %[[VAL_13:.*]] +// CHECK: br label %[[VAL_14:.*]] +// CHECK: omp.task.region: ; preds = %[[VAL_12]] +// CHECK: call void @_QPdo_something(ptr %[[VAL_11]]) +// CHECK: br label %[[VAL_15:.*]] +// CHECK: omp.region.cont: ; preds = %[[VAL_14]] +// CHECK-NOT: @free diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir index 3355a14e38ffd..6eda756280643 100644 --- a/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir +++ b/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir @@ -22,7 +22,8 @@ llvm.func @_QPtest() { llvm.return } // CHECK-LABEL: define void @_QPtest() { -// CHECK: %[[STRUCTARG:.*]] = alloca { i64, i64, i64, ptr }, align 8 +// No task context structure: +// CHECK: %[[STRUCTARG:.*]] = alloca { i64, i64, i64 }, align 8 // CHECK: %[[VAL_0:.*]] = alloca i32, i64 1, align 4 // CHECK: %[[VAL_1:.*]] = alloca i32, i64 1, align 4 // CHECK: %[[VAL_2:.*]] = alloca i32, i64 1, align 4 @@ -30,31 +31,28 @@ llvm.func @_QPtest() { // CHECK: entry: ; preds = %[[VAL_4:.*]] // CHECK: br label %[[VAL_5:.*]] // CHECK: omp.private.init: ; preds = %[[VAL_3]] -// CHECK: %[[VAL_6:.*]] = tail call ptr @malloc(i64 ptrtoint (ptr getelementptr ({}, ptr null, i32 1) to i64)) // CHECK: br label %[[VAL_7:.*]] // CHECK: omp.private.copy: ; preds = %[[VAL_5]] // CHECK: br label %[[VAL_8:.*]] // CHECK: omp.taskloop.start: ; preds = %[[VAL_7]] // CHECK: br label %[[VAL_9:.*]] // CHECK: codeRepl: ; preds = %[[VAL_8]] -// CHECK: %[[VAL_10:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[STRUCTARG]], i32 0, i32 0 +// CHECK: %[[VAL_10:.*]] = getelementptr { i64, i64, i64 }, ptr %[[STRUCTARG]], i32 0, i32 0 // CHECK: store i64 1, ptr %[[VAL_10]], align 4 -// CHECK: %[[VAL_11:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[STRUCTARG]], i32 0, i32 1 +// CHECK: %[[VAL_11:.*]] = getelementptr { i64, i64, i64 }, ptr %[[STRUCTARG]], i32 0, i32 1 // CHECK: store i64 20, ptr %[[VAL_11]], align 4 -// CHECK: %[[VAL_12:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[STRUCTARG]], i32 0, i32 2 +// CHECK: %[[VAL_12:.*]] = getelementptr { i64, i64, i64 }, ptr %[[STRUCTARG]], i32 0, i32 2 // CHECK: store i64 1, ptr %[[VAL_12]], align 4 -// CHECK: %[[VAL_13:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[STRUCTARG]], i32 0, i32 3 -// CHECK: store ptr %[[VAL_6]], ptr %[[VAL_13]], align 8 // CHECK: %[[VAL_14:.*]] = call i32 @__kmpc_global_thread_num(ptr @1) // CHECK: call void @__kmpc_taskgroup(ptr @1, i32 %[[VAL_14]]) -// CHECK: %[[VAL_15:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[VAL_14]], i32 1, i64 40, i64 32, ptr @_QPtest..omp_par) +// CHECK: %[[VAL_15:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[VAL_14]], i32 1, i64 40, i64 24, ptr @_QPtest..omp_par) // CHECK: %[[VAL_16:.*]] = load ptr, ptr %[[VAL_15]], align 8 -// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_16]], ptr align 1 %[[STRUCTARG]], i64 32, i1 false) -// CHECK: %[[VAL_17:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_16]], i32 0, i32 0 -// CHECK: %[[VAL_18:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_16]], i32 0, i32 1 -// CHECK: %[[VAL_19:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_16]], i32 0, i32 2 +// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_16]], ptr align 1 %[[STRUCTARG]], i64 24, i1 false) +// CHECK: %[[VAL_17:.*]] = getelementptr { i64, i64, i64 }, ptr %[[VAL_16]], i32 0, i32 0 +// CHECK: %[[VAL_18:.*]] = getelementptr { i64, i64, i64 }, ptr %[[VAL_16]], i32 0, i32 1 +// CHECK: %[[VAL_19:.*]] = getelementptr { i64, i64, i64 }, ptr %[[VAL_16]], i32 0, i32 2 // CHECK: %[[VAL_20:.*]] = load i64, ptr %[[VAL_19]], align 4 -// CHECK: call void @__kmpc_taskloop(ptr @1, i32 %[[VAL_14]], ptr %[[VAL_15]], i32 1, ptr %[[VAL_17]], ptr %[[VAL_18]], i64 %[[VAL_20]], i32 0, i32 0, i64 0, ptr @omp_taskloop_dup) +// CHECK: call void @__kmpc_taskloop(ptr @1, i32 %[[VAL_14]], ptr %[[VAL_15]], i32 1, ptr %[[VAL_17]], ptr %[[VAL_18]], i64 %[[VAL_20]], i32 0, i32 0, i64 0, ptr null) // CHECK: call void @__kmpc_end_taskgroup(ptr @1, i32 %[[VAL_14]]) // CHECK: br label %[[VAL_21:.*]] // CHECK: taskloop.exit: ; preds = %[[VAL_9]] @@ -63,14 +61,12 @@ llvm.func @_QPtest() { // CHECK-LABEL: define internal void @_QPtest..omp_par // CHECK: taskloop.alloca: // CHECK: %[[VAL_22:.*]] = load ptr, ptr %[[VAL_23:.*]], align 8 -// CHECK: %[[VAL_24:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_22]], i32 0, i32 0 +// CHECK: %[[VAL_24:.*]] = getelementptr { i64, i64, i64 }, ptr %[[VAL_22]], i32 0, i32 0 // CHECK: %[[VAL_25:.*]] = load i64, ptr %[[VAL_24]], align 4 -// CHECK: %[[VAL_26:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_22]], i32 0, i32 1 +// CHECK: %[[VAL_26:.*]] = getelementptr { i64, i64, i64 }, ptr %[[VAL_22]], i32 0, i32 1 // CHECK: %[[VAL_27:.*]] = load i64, ptr %[[VAL_26]], align 4 -// CHECK: %[[VAL_28:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_22]], i32 0, i32 2 +// CHECK: %[[VAL_28:.*]] = getelementptr { i64, i64, i64 }, ptr %[[VAL_22]], i32 0, i32 2 // CHECK: %[[VAL_29:.*]] = load i64, ptr %[[VAL_28]], align 4 -// CHECK: %[[VAL_30:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_22]], i32 0, i32 3 -// CHECK: %[[VAL_31:.*]] = load ptr, ptr %[[VAL_30]], align 8, !align !1 // CHECK: %[[VAL_32:.*]] = alloca i32, align 4 // CHECK: %[[VAL_33:.*]] = alloca i32, align 4 // CHECK: %[[VAL_34:.*]] = alloca i32, align 4 @@ -97,7 +93,6 @@ llvm.func @_QPtest() { // CHECK: omp_loop.after: ; preds = %[[VAL_51]] // CHECK: br label %[[VAL_53:.*]] // CHECK: omp.region.cont: ; preds = %[[VAL_52]] -// CHECK: tail call void @free(ptr %[[VAL_31]]) // CHECK: br label %[[VAL_54:.*]] // CHECK: omp_loop.body: ; preds = %[[VAL_48]] // CHECK: %[[VAL_55:.*]] = mul i32 %[[VAL_46]], 1 @@ -114,15 +109,4 @@ llvm.func @_QPtest() { // CHECK: taskloop.exit.exitStub: ; preds = %[[VAL_53]] // CHECK: ret void -// CHECK-LABEL: define internal void @omp_taskloop_dup( -// CHECK: entry: -// CHECK: %[[VAL_59:.*]] = getelementptr { %[[VAL_60:.*]], { i64, i64, i64, ptr } }, ptr %[[VAL_61:.*]], i32 0, i32 1 -// CHECK: %[[VAL_62:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_59]], i32 0, i32 3 -// CHECK: %[[VAL_63:.*]] = getelementptr { %[[VAL_60]], { i64, i64, i64, ptr } }, ptr %[[VAL_64:.*]], i32 0, i32 1 -// CHECK: %[[VAL_65:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_63]], i32 0, i32 3 -// CHECK: %[[VAL_66:.*]] = load ptr, ptr %[[VAL_65]], align 8 -// TODO: don't generate allocation for empty task context struct (for later patch) -// CHECK: %[[VAL_67:.*]] = tail call ptr @malloc(i64 ptrtoint (ptr getelementptr ({}, ptr null, i32 1) to i64)) -// CHECK: store ptr %[[VAL_67]], ptr %[[VAL_62]], align 8 -// CHECK: ret void - +// CHECK-NOT: define internal void @omp_taskloop_dup _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
