[PATCH] D53636: Do not always request an implicit taskgroup region inside the kmpc_taskloop function

2018-10-24 Thread Alexey Bataev via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rC345180: Do not always request an implicit taskgroup region 
inside the kmpc_taskloop… (authored by ABataev, committed by ).

Repository:
  rC Clang

https://reviews.llvm.org/D53636

Files:
  lib/CodeGen/CGOpenMPRuntime.cpp
  test/OpenMP/taskloop_codegen.cpp
  test/OpenMP/taskloop_firstprivate_codegen.cpp
  test/OpenMP/taskloop_lastprivate_codegen.cpp
  test/OpenMP/taskloop_private_codegen.cpp
  test/OpenMP/taskloop_reduction_codegen.cpp
  test/OpenMP/taskloop_simd_codegen.cpp
  test/OpenMP/taskloop_simd_firstprivate_codegen.cpp
  test/OpenMP/taskloop_simd_lastprivate_codegen.cpp
  test/OpenMP/taskloop_simd_private_codegen.cpp
  test/OpenMP/taskloop_simd_reduction_codegen.cpp

Index: test/OpenMP/taskloop_simd_private_codegen.cpp
===
--- test/OpenMP/taskloop_simd_private_codegen.cpp
+++ test/OpenMP/taskloop_simd_private_codegen.cpp
@@ -65,7 +65,7 @@
   // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
   // LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
 // LAMBDA: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1
-// LAMBDA: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 0, i32 0, i64 0, i8* null)
+// LAMBDA: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null)
 // LAMBDA: ret
 #pragma omp taskloop simd private(g, sivar)
   for (int i = 0; i < 10; ++i) {
@@ -101,7 +101,7 @@
   // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8*
   // BLOCKS: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
   // BLOCKS: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1
-  // BLOCKS: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 0, i32 0, i64 0, i8* null)
+  // BLOCKS: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null)
   // BLOCKS: ret
 #pragma omp taskloop simd private(g, sivar)
   for (int i = 0; i < 10; ++i) {
@@ -193,7 +193,7 @@
 // CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_PTR]],
 
 // Start task.
-// CHECK: call void @__kmpc_taskloop([[LOC]], i32 [[GTID]], i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 0, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_MAIN_TY]]*, [[KMP_TASK_MAIN_TY]]*, i32)* [[MAIN_DUP:@.+]] to i8*))
+// CHECK: call void @__kmpc_taskloop([[LOC]], i32 [[GTID]], i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_MAIN_TY]]*, [[KMP_TASK_MAIN_TY]]*, i32)* [[MAIN_DUP:@.+]] to i8*))
 // CHECK: call i32 @__kmpc_omp_task([[LOC]], i32 [[GTID]], i8*
 
 // CHECK: = call i{{.+}} [[TMAIN_INT:@.+]]()
@@ -324,7 +324,7 @@
 // CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_PTR]],
 
 // Start task.
-// CHECK: call void @__kmpc_taskloop([[LOC]], i32 [[GTID]], i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 0, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_TMAIN_TY]]*, [[KMP_TASK_TMAIN_TY]]*, i32)* [[TMAIN_DUP:@.+]] to i8*))
+// CHECK: call void @__kmpc_taskloop([[LOC]], i32 [[GTID]], i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_TMAIN_TY]]*, [[KMP_TASK_TMAIN_TY]]*, i32)* [[TMAIN_DUP:@.+]] to i8*))
 
 // No destructors must be called for private copies of s_arr and var.
 // CHECK-NOT: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2
Index: test/OpenMP/taskloop_private_codegen.cpp
===
--- test/OpenMP/taskloop_private_codegen.cpp
+++ test/OpenMP/taskloop_private_codegen.cpp
@@ -65,7 +65,7 @@
   // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
   // LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
 // LAMBDA: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1
-// LAMBDA: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}

[PATCH] D53636: Do not always request an implicit taskgroup region inside the kmpc_taskloop function

2018-10-24 Thread Alexey Bataev via Phabricator via cfe-commits
ABataev added a comment.

In https://reviews.llvm.org/D53636#1274673, @smateo wrote:

> Hi Alexey,
>
> Thanks for the prompt review!
>
> I don't have commit access yet, do you mind to commit it for me?
>
> Thanks!


Sure, no problems, thanks for the patch!


Repository:
  rC Clang

https://reviews.llvm.org/D53636



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D53636: Do not always request an implicit taskgroup region inside the kmpc_taskloop function

2018-10-24 Thread Sergi Mateo via Phabricator via cfe-commits
smateo added a comment.

Hi Alexey,

Thanks for the prompt review!

I don't have commit access yet, do you mind to commit it for me?

Thanks!


Repository:
  rC Clang

https://reviews.llvm.org/D53636



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D53636: Do not always request an implicit taskgroup region inside the kmpc_taskloop function

2018-10-24 Thread Alexey Bataev via Phabricator via cfe-commits
ABataev accepted this revision.
ABataev added a comment.
This revision is now accepted and ready to land.

LG


Repository:
  rC Clang

https://reviews.llvm.org/D53636



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D53636: Do not always request an implicit taskgroup region inside the kmpc_taskloop function

2018-10-24 Thread Sergi Mateo via Phabricator via cfe-commits
smateo created this revision.
smateo added a reviewer: ABataev.
Herald added a subscriber: cfe-commits.

For the following code:

  int i;
  #pragma omp taskloop
  for (i = 0; i < 100; ++i)
  {}
  
  #pragma omp taskloop nogroup
  for (i = 0; i < 100; ++i)
  {}

Clang emits the following LLVM IR:

  ...
   call void @__kmpc_taskgroup(%struct.ident_t* @0, i32 %0)
   %2 = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @0, i32 %0, i32 1, i64 
80, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, 
%struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
   ...
   call void @__kmpc_taskloop(%struct.ident_t* @0, i32 %0, i8* %2, i32 1, i64* 
%8, i64* %9, i64 %13, i32 0, i32 0, i64 0, i8* null)
   call void @__kmpc_end_taskgroup(%struct.ident_t* @0, i32 %0)
  
  
   ...
   %15 = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @0, i32 %0, i32 1, 
i64 80, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, 
%struct.kmp_task_t_with_privates.1*)* @.omp_task_entry..2 to i32 (i32, i8*)*))
   ...
   call void @__kmpc_taskloop(%struct.ident_t* @0, i32 %0, i8* %15, i32 1, i64* 
%21, i64* %22, i64 %26, i32 0, i32 0, i64 0, i8* null)

The first set of instructions corresponds to the first taskloop construct. It 
is important to note that the implicit taskgroup region associated with the 
taskloop construct has been materialized in our IR:  the `__kmpc_taskloop` 
occurs inside a taskgroup region. Note also that this taskgroup region does not 
exist in our second taskloop because we are using the `nogroup` clause.

The issue here is the 4th argument of the kmpc_taskloop call, starting from the 
end,  is always a zero. Checking the LLVM OpenMP RT implementation, we see that 
this argument corresponds to the nogroup parameter:

  void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
   kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int 
nogroup,
   int sched, kmp_uint64 grainsize, void *task_dup);

So basically we always tell to the RT to do another taskgroup region. For the 
first taskloop, this means that we create two taskgroup regions. For the second 
example, it means that despite the fact we had a nogroup clause we are going to 
have a taskgroup region, so we unnecessary wait until all descendant tasks have 
been executed.


Repository:
  rC Clang

https://reviews.llvm.org/D53636

Files:
  lib/CodeGen/CGOpenMPRuntime.cpp
  test/OpenMP/taskloop_codegen.cpp
  test/OpenMP/taskloop_firstprivate_codegen.cpp
  test/OpenMP/taskloop_lastprivate_codegen.cpp
  test/OpenMP/taskloop_private_codegen.cpp
  test/OpenMP/taskloop_reduction_codegen.cpp
  test/OpenMP/taskloop_simd_codegen.cpp
  test/OpenMP/taskloop_simd_firstprivate_codegen.cpp
  test/OpenMP/taskloop_simd_lastprivate_codegen.cpp
  test/OpenMP/taskloop_simd_private_codegen.cpp
  test/OpenMP/taskloop_simd_reduction_codegen.cpp

Index: test/OpenMP/taskloop_simd_reduction_codegen.cpp
===
--- test/OpenMP/taskloop_simd_reduction_codegen.cpp
+++ test/OpenMP/taskloop_simd_reduction_codegen.cpp
@@ -148,7 +148,7 @@
 // CHECK:[[SUB12:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK:store i32 [[SUB12]], i32* [[DOTCAPTURE_EXPR_9]],
 // CHECK:[[TMP65:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* %{{.+}}, i32 [[TMP0]], i32 1, i64 888, i64 72, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @{{.+}} to i32 (i32, i8*)*))
-// CHECK:call void @__kmpc_taskloop(%struct.ident_t* %{{.+}}, i32 [[TMP0]], i8* [[TMP65]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 0, i32 0, i64 0, i8* null)
+// CHECK:call void @__kmpc_taskloop(%struct.ident_t* %{{.+}}, i32 [[TMP0]], i8* [[TMP65]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null)
 // CHECK:call void @__kmpc_end_taskgroup(%struct.ident_t*
 
 // CHECK:ret i32
Index: test/OpenMP/taskloop_simd_private_codegen.cpp
===
--- test/OpenMP/taskloop_simd_private_codegen.cpp
+++ test/OpenMP/taskloop_simd_private_codegen.cpp
@@ -65,7 +65,7 @@
   // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
   // LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
 // LAMBDA: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1
-// LAMBDA: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 0, i32 0, i64 0, i8* null)
+// LAMBDA: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null)
 // LAMBDA: ret
 #pragma omp taskloop simd private(g, sivar)
   for (int i = 0; i < 10; ++i) {
@@ -101,7 +101,7 @@
   // BLOCKS: define{{.*}} internal{{.*}} vo