[PATCH] D53636: Do not always request an implicit taskgroup region inside the kmpc_taskloop function
This revision was automatically updated to reflect the committed changes. Closed by commit rC345180: Do not always request an implicit taskgroup region inside the kmpc_taskloop… (authored by ABataev, committed by ). Repository: rC Clang https://reviews.llvm.org/D53636 Files: lib/CodeGen/CGOpenMPRuntime.cpp test/OpenMP/taskloop_codegen.cpp test/OpenMP/taskloop_firstprivate_codegen.cpp test/OpenMP/taskloop_lastprivate_codegen.cpp test/OpenMP/taskloop_private_codegen.cpp test/OpenMP/taskloop_reduction_codegen.cpp test/OpenMP/taskloop_simd_codegen.cpp test/OpenMP/taskloop_simd_firstprivate_codegen.cpp test/OpenMP/taskloop_simd_lastprivate_codegen.cpp test/OpenMP/taskloop_simd_private_codegen.cpp test/OpenMP/taskloop_simd_reduction_codegen.cpp Index: test/OpenMP/taskloop_simd_private_codegen.cpp === --- test/OpenMP/taskloop_simd_private_codegen.cpp +++ test/OpenMP/taskloop_simd_private_codegen.cpp @@ -65,7 +65,7 @@ // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( // LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) // LAMBDA: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 -// LAMBDA: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 0, i32 0, i64 0, i8* null) +// LAMBDA: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null) // LAMBDA: ret #pragma omp taskloop simd private(g, sivar) for (int i = 0; i < 10; ++i) { @@ -101,7 +101,7 @@ // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8* // BLOCKS: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) // BLOCKS: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 - // BLOCKS: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 0, i32 0, i64 0, i8* null) + // BLOCKS: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null) // BLOCKS: ret #pragma omp taskloop simd private(g, sivar) for (int i = 0; i < 10; ++i) { @@ -193,7 +193,7 @@ // CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_PTR]], // Start task. -// CHECK: call void @__kmpc_taskloop([[LOC]], i32 [[GTID]], i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 0, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_MAIN_TY]]*, [[KMP_TASK_MAIN_TY]]*, i32)* [[MAIN_DUP:@.+]] to i8*)) +// CHECK: call void @__kmpc_taskloop([[LOC]], i32 [[GTID]], i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_MAIN_TY]]*, [[KMP_TASK_MAIN_TY]]*, i32)* [[MAIN_DUP:@.+]] to i8*)) // CHECK: call i32 @__kmpc_omp_task([[LOC]], i32 [[GTID]], i8* // CHECK: = call i{{.+}} [[TMAIN_INT:@.+]]() @@ -324,7 +324,7 @@ // CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_PTR]], // Start task. -// CHECK: call void @__kmpc_taskloop([[LOC]], i32 [[GTID]], i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 0, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_TMAIN_TY]]*, [[KMP_TASK_TMAIN_TY]]*, i32)* [[TMAIN_DUP:@.+]] to i8*)) +// CHECK: call void @__kmpc_taskloop([[LOC]], i32 [[GTID]], i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_TMAIN_TY]]*, [[KMP_TASK_TMAIN_TY]]*, i32)* [[TMAIN_DUP:@.+]] to i8*)) // No destructors must be called for private copies of s_arr and var. // CHECK-NOT: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 Index: test/OpenMP/taskloop_private_codegen.cpp === --- test/OpenMP/taskloop_private_codegen.cpp +++ test/OpenMP/taskloop_private_codegen.cpp @@ -65,7 +65,7 @@ // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( // LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) // LAMBDA: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 -// LAMBDA: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}
[PATCH] D53636: Do not always request an implicit taskgroup region inside the kmpc_taskloop function
ABataev added a comment. In https://reviews.llvm.org/D53636#1274673, @smateo wrote: > Hi Alexey, > > Thanks for the prompt review! > > I don't have commit access yet, do you mind to commit it for me? > > Thanks! Sure, no problems, thanks for the patch! Repository: rC Clang https://reviews.llvm.org/D53636 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D53636: Do not always request an implicit taskgroup region inside the kmpc_taskloop function
smateo added a comment. Hi Alexey, Thanks for the prompt review! I don't have commit access yet, do you mind to commit it for me? Thanks! Repository: rC Clang https://reviews.llvm.org/D53636 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D53636: Do not always request an implicit taskgroup region inside the kmpc_taskloop function
ABataev accepted this revision. ABataev added a comment. This revision is now accepted and ready to land. LG Repository: rC Clang https://reviews.llvm.org/D53636 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D53636: Do not always request an implicit taskgroup region inside the kmpc_taskloop function
smateo created this revision. smateo added a reviewer: ABataev. Herald added a subscriber: cfe-commits. For the following code: int i; #pragma omp taskloop for (i = 0; i < 100; ++i) {} #pragma omp taskloop nogroup for (i = 0; i < 100; ++i) {} Clang emits the following LLVM IR: ... call void @__kmpc_taskgroup(%struct.ident_t* @0, i32 %0) %2 = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @0, i32 %0, i32 1, i64 80, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) ... call void @__kmpc_taskloop(%struct.ident_t* @0, i32 %0, i8* %2, i32 1, i64* %8, i64* %9, i64 %13, i32 0, i32 0, i64 0, i8* null) call void @__kmpc_end_taskgroup(%struct.ident_t* @0, i32 %0) ... %15 = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @0, i32 %0, i32 1, i64 80, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.1*)* @.omp_task_entry..2 to i32 (i32, i8*)*)) ... call void @__kmpc_taskloop(%struct.ident_t* @0, i32 %0, i8* %15, i32 1, i64* %21, i64* %22, i64 %26, i32 0, i32 0, i64 0, i8* null) The first set of instructions corresponds to the first taskloop construct. It is important to note that the implicit taskgroup region associated with the taskloop construct has been materialized in our IR: the `__kmpc_taskloop` occurs inside a taskgroup region. Note also that this taskgroup region does not exist in our second taskloop because we are using the `nogroup` clause. The issue here is the 4th argument of the kmpc_taskloop call, starting from the end, is always a zero. Checking the LLVM OpenMP RT implementation, we see that this argument corresponds to the nogroup parameter: void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, void *task_dup); So basically we always tell to the RT to do another taskgroup region. For the first taskloop, this means that we create two taskgroup regions. For the second example, it means that despite the fact we had a nogroup clause we are going to have a taskgroup region, so we unnecessary wait until all descendant tasks have been executed. Repository: rC Clang https://reviews.llvm.org/D53636 Files: lib/CodeGen/CGOpenMPRuntime.cpp test/OpenMP/taskloop_codegen.cpp test/OpenMP/taskloop_firstprivate_codegen.cpp test/OpenMP/taskloop_lastprivate_codegen.cpp test/OpenMP/taskloop_private_codegen.cpp test/OpenMP/taskloop_reduction_codegen.cpp test/OpenMP/taskloop_simd_codegen.cpp test/OpenMP/taskloop_simd_firstprivate_codegen.cpp test/OpenMP/taskloop_simd_lastprivate_codegen.cpp test/OpenMP/taskloop_simd_private_codegen.cpp test/OpenMP/taskloop_simd_reduction_codegen.cpp Index: test/OpenMP/taskloop_simd_reduction_codegen.cpp === --- test/OpenMP/taskloop_simd_reduction_codegen.cpp +++ test/OpenMP/taskloop_simd_reduction_codegen.cpp @@ -148,7 +148,7 @@ // CHECK:[[SUB12:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK:store i32 [[SUB12]], i32* [[DOTCAPTURE_EXPR_9]], // CHECK:[[TMP65:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* %{{.+}}, i32 [[TMP0]], i32 1, i64 888, i64 72, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @{{.+}} to i32 (i32, i8*)*)) -// CHECK:call void @__kmpc_taskloop(%struct.ident_t* %{{.+}}, i32 [[TMP0]], i8* [[TMP65]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 0, i32 0, i64 0, i8* null) +// CHECK:call void @__kmpc_taskloop(%struct.ident_t* %{{.+}}, i32 [[TMP0]], i8* [[TMP65]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null) // CHECK:call void @__kmpc_end_taskgroup(%struct.ident_t* // CHECK:ret i32 Index: test/OpenMP/taskloop_simd_private_codegen.cpp === --- test/OpenMP/taskloop_simd_private_codegen.cpp +++ test/OpenMP/taskloop_simd_private_codegen.cpp @@ -65,7 +65,7 @@ // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( // LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) // LAMBDA: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 -// LAMBDA: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 0, i32 0, i64 0, i8* null) +// LAMBDA: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null) // LAMBDA: ret #pragma omp taskloop simd private(g, sivar) for (int i = 0; i < 10; ++i) { @@ -101,7 +101,7 @@ // BLOCKS: define{{.*}} internal{{.*}} vo