[PATCH] D86558: [OPENMP]Add support for allocate vars in untied tasks.
This revision was automatically updated to reflect the committed changes. Closed by commit rG738bab743b5c: [OPENMP]Add support for allocate vars in untied tasks. (authored by ABataev). Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D86558/new/ https://reviews.llvm.org/D86558 Files: clang/lib/CodeGen/CGOpenMPRuntime.cpp clang/lib/CodeGen/CGOpenMPRuntime.h clang/lib/CodeGen/CGStmtOpenMP.cpp clang/test/OpenMP/allocate_codegen.cpp clang/test/OpenMP/for_lastprivate_codegen.cpp clang/test/OpenMP/for_linear_codegen.cpp clang/test/OpenMP/for_reduction_codegen_UDR.cpp clang/test/OpenMP/parallel_firstprivate_codegen.cpp clang/test/OpenMP/parallel_private_codegen.cpp clang/test/OpenMP/task_codegen.cpp Index: clang/test/OpenMP/task_codegen.cpp === --- clang/test/OpenMP/task_codegen.cpp +++ clang/test/OpenMP/task_codegen.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix UNTIEDRT -// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - -DUNTIEDRT | FileCheck %s --check-prefix CHECK --check-prefix UNTIEDRT +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s -DUNTIEDRT +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix UNTIEDRT // // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -o - | FileCheck %s // RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s @@ -14,6 +14,19 @@ #ifndef HEADER #define HEADER +enum omp_allocator_handle_t { + omp_null_allocator = 0, + omp_default_mem_alloc = 1, + omp_large_cap_mem_alloc = 2, + omp_const_mem_alloc = 3, + omp_high_bw_mem_alloc = 4, + omp_low_lat_mem_alloc = 5, + omp_cgroup_mem_alloc = 6, + omp_pteam_mem_alloc = 7, + omp_thread_mem_alloc = 8, + KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__ +}; + // CHECK-DAG: [[IDENT_T:%.+]] = type { i32, i32, i32, i32, i8* } // CHECK-DAG: [[STRUCT_SHAREDS:%.+]] = type { i8*, [2 x [[STRUCT_S:%.+]]]* } // CHECK-DAG: [[STRUCT_SHAREDS1:%.+]] = type { [2 x [[STRUCT_S:%.+]]]* } @@ -258,21 +271,26 @@ a = 4; c = 5; } -// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 0, i64 48, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY6:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 0, i64 256, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY6:@.+]] to i32 (i32, i8*)*)) // CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]]) -#pragma omp task untied +#pragma omp task untied firstprivate(c) allocate(omp_pteam_mem_alloc:c) { -S s1; +S s1, s2; +#ifdef UNTIEDRT +#pragma omp allocate(s2) allocator(omp_pteam_mem_alloc) +#endif +s2.a = 0; #pragma omp task -a = 4; +a = c = 4; #pragma omp taskyield s1 = S(); +s2.a = 10; #pragma omp taskwait } return a; } // CHECK: define internal i32 [[TASK_ENTRY1]](i32 %0, [[KMP_TASK_T]]{{.*}}* noalias %1) -// CHECK: store i32 15, i32* [[A_PTR:@.+]] +// CHECK: store i32 15, i32* [[A_PTR:@.+]], // CHECK: [[A_VAL:%.+]] = load i32, i32* [[A_PTR]] // CHECK: [[A_VAL_I8:%.+]] = trunc i32 [[A_VAL]] to i8 // CHECK: store i8 [[A_VAL_I8]], i8* %{{.+}} @@ -294,10 +312,13 @@ // CHECK: define internal i32 // CHECK: store i32 4, i32* [[A_PTR]] -// CHECK: define internal i32 [[TASK_ENTRY6]](i32 %0, [[KMP_TASK_T]]{{.*}}* noalias %1) +// CHECK: define internal i32 [[TASK_ENTRY6]](i32 %0, [[KMP_TASK_T]]{{.*}}* noalias %{{.+}}) // UNTIEDRT: [[S1_ADDR_PTR:%.+]] = alloca %struct.S*, -// UNTIEDRT: call void (i8*, ...) %{{.+}}(i8* %{{.+}}, %struct.S** [[S1_ADDR_PTR]]) -// UNTIEDRT: [[S1_ADDR:%.+]] = load %struct.S*, %struct.S** [[S1_ADDR_PTR]], +// UNTIEDRT: [[S2_ADDR_PTR_REF:%.+]] = alloca %struct.S**, +// UNTIEDRT: call void (i8*, ...) %{{.+}}(i8* %{{.+}}, %struct.S** [[S1_ADDR_PTR]], %struct.S*** [[S2_ADDR_PTR_REF]]) +// UNTIEDRT-DAG: [[S1_ADDR:%.+]] = load %struct.S*, %struct.S** [[S1_ADDR_PTR]], +// UNTIEDRT-DAG: [[S2_ADDR_PTR:%.+]] = load %struct.S**, %struct.S*** [[S2_ADDR_PTR_REF]], +// UNTIEDRT-DAG: [[S2_ADDR:%.+]] = load %struct.S*, %struct.S** [[S2_ADDR_PTR]], // CHECK: switch i32 %{{.+}}, label %[[DONE:.+]] [ // CHECK: [[DONE]]: @@ -309,16 +330,25 @@ // UNTIEDRT: br label %[[EXIT:[^,]+]] //
[PATCH] D86558: [OPENMP]Add support for allocate vars in untied tasks.
jdoerfert accepted this revision. jdoerfert added a comment. This revision is now accepted and ready to land. LGTM Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D86558/new/ https://reviews.llvm.org/D86558 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D86558: [OPENMP]Add support for allocate vars in untied tasks.
ABataev updated this revision to Diff 288711. ABataev added a comment. Herald added a subscriber: danielkiss. Rebase Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D86558/new/ https://reviews.llvm.org/D86558 Files: clang/lib/CodeGen/CGOpenMPRuntime.cpp clang/lib/CodeGen/CGOpenMPRuntime.h clang/lib/CodeGen/CGStmtOpenMP.cpp clang/test/OpenMP/allocate_codegen.cpp clang/test/OpenMP/for_lastprivate_codegen.cpp clang/test/OpenMP/for_linear_codegen.cpp clang/test/OpenMP/for_reduction_codegen_UDR.cpp clang/test/OpenMP/parallel_firstprivate_codegen.cpp clang/test/OpenMP/parallel_private_codegen.cpp clang/test/OpenMP/task_codegen.cpp Index: clang/test/OpenMP/task_codegen.cpp === --- clang/test/OpenMP/task_codegen.cpp +++ clang/test/OpenMP/task_codegen.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix UNTIEDRT -// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - -DUNTIEDRT | FileCheck %s --check-prefix CHECK --check-prefix UNTIEDRT +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s -DUNTIEDRT +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix UNTIEDRT // // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -o - | FileCheck %s // RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s @@ -14,6 +14,19 @@ #ifndef HEADER #define HEADER +enum omp_allocator_handle_t { + omp_null_allocator = 0, + omp_default_mem_alloc = 1, + omp_large_cap_mem_alloc = 2, + omp_const_mem_alloc = 3, + omp_high_bw_mem_alloc = 4, + omp_low_lat_mem_alloc = 5, + omp_cgroup_mem_alloc = 6, + omp_pteam_mem_alloc = 7, + omp_thread_mem_alloc = 8, + KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__ +}; + // CHECK-DAG: [[IDENT_T:%.+]] = type { i32, i32, i32, i32, i8* } // CHECK-DAG: [[STRUCT_SHAREDS:%.+]] = type { i8*, [2 x [[STRUCT_S:%.+]]]* } // CHECK-DAG: [[STRUCT_SHAREDS1:%.+]] = type { [2 x [[STRUCT_S:%.+]]]* } @@ -258,21 +271,26 @@ a = 4; c = 5; } -// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 0, i64 48, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY6:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 0, i64 256, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY6:@.+]] to i32 (i32, i8*)*)) // CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]]) -#pragma omp task untied +#pragma omp task untied firstprivate(c) allocate(omp_pteam_mem_alloc:c) { -S s1; +S s1, s2; +#ifdef UNTIEDRT +#pragma omp allocate(s2) allocator(omp_pteam_mem_alloc) +#endif +s2.a = 0; #pragma omp task -a = 4; +a = c = 4; #pragma omp taskyield s1 = S(); +s2.a = 10; #pragma omp taskwait } return a; } // CHECK: define internal i32 [[TASK_ENTRY1]](i32 %0, [[KMP_TASK_T]]{{.*}}* noalias %1) -// CHECK: store i32 15, i32* [[A_PTR:@.+]] +// CHECK: store i32 15, i32* [[A_PTR:@.+]], // CHECK: [[A_VAL:%.+]] = load i32, i32* [[A_PTR]] // CHECK: [[A_VAL_I8:%.+]] = trunc i32 [[A_VAL]] to i8 // CHECK: store i8 [[A_VAL_I8]], i8* %{{.+}} @@ -294,10 +312,13 @@ // CHECK: define internal i32 // CHECK: store i32 4, i32* [[A_PTR]] -// CHECK: define internal i32 [[TASK_ENTRY6]](i32 %0, [[KMP_TASK_T]]{{.*}}* noalias %1) +// CHECK: define internal i32 [[TASK_ENTRY6]](i32 %0, [[KMP_TASK_T]]{{.*}}* noalias %{{.+}}) // UNTIEDRT: [[S1_ADDR_PTR:%.+]] = alloca %struct.S*, -// UNTIEDRT: call void (i8*, ...) %{{.+}}(i8* %{{.+}}, %struct.S** [[S1_ADDR_PTR]]) -// UNTIEDRT: [[S1_ADDR:%.+]] = load %struct.S*, %struct.S** [[S1_ADDR_PTR]], +// UNTIEDRT: [[S2_ADDR_PTR_REF:%.+]] = alloca %struct.S**, +// UNTIEDRT: call void (i8*, ...) %{{.+}}(i8* %{{.+}}, %struct.S** [[S1_ADDR_PTR]], %struct.S*** [[S2_ADDR_PTR_REF]]) +// UNTIEDRT-DAG: [[S1_ADDR:%.+]] = load %struct.S*, %struct.S** [[S1_ADDR_PTR]], +// UNTIEDRT-DAG: [[S2_ADDR_PTR:%.+]] = load %struct.S**, %struct.S*** [[S2_ADDR_PTR_REF]], +// UNTIEDRT-DAG: [[S2_ADDR:%.+]] = load %struct.S*, %struct.S** [[S2_ADDR_PTR]], // CHECK: switch i32 %{{.+}}, label %[[DONE:.+]] [ // CHECK: [[DONE]]: @@ -309,16 +330,25 @@ // UNTIEDRT: br label %[[EXIT:[^,]+]] // UNTIEDRT: call void [[CONSTR:@.+]](%struct.S* [[S1_ADDR]]) +// UNTI
[PATCH] D86558: [OPENMP]Add support for allocate vars in untied tasks.
ABataev created this revision. ABataev added a reviewer: jdoerfert. Herald added subscribers: guansong, yaxunl. Herald added a project: clang. ABataev requested review of this revision. Herald added a subscriber: sstefan1. Local vars, marked with pragma allocate, mustbe allocate by the call of the runtime function and cannot be allocated as other local variables. Instead, we allocate a space for the pointer in private record and store the address, returned by kmpc_alloc call in this pointer. So, for untied tasks #pragma omp task untied { S s; #pragma omp allocate(s) allocator(allocator) s = x; } compiler generates something like this: struct task_with_privates { S *ptr; }; void entry(task_with_privates *p) { S *s = p->s; switch(partid) { case 1: p->s = (S*)kmpc_alloc(); kmpc_omp_task(); br exit; case 2: *s = x; kmpc_omp_task(); br exit; case 2: ~S(s); kmpc_free((void*)s); br exit; } exit: } Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D86558 Files: clang/lib/CodeGen/CGOpenMPRuntime.cpp clang/lib/CodeGen/CGOpenMPRuntime.h clang/lib/CodeGen/CGStmtOpenMP.cpp clang/test/OpenMP/allocate_codegen.cpp clang/test/OpenMP/for_lastprivate_codegen.cpp clang/test/OpenMP/for_linear_codegen.cpp clang/test/OpenMP/for_reduction_codegen_UDR.cpp clang/test/OpenMP/parallel_firstprivate_codegen.cpp clang/test/OpenMP/parallel_private_codegen.cpp clang/test/OpenMP/task_codegen.cpp Index: clang/test/OpenMP/task_codegen.cpp === --- clang/test/OpenMP/task_codegen.cpp +++ clang/test/OpenMP/task_codegen.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix UNTIEDRT -// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - -DUNTIEDRT | FileCheck %s --check-prefix CHECK --check-prefix UNTIEDRT +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s -DUNTIEDRT +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix UNTIEDRT // // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -o - | FileCheck %s // RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s @@ -14,6 +14,19 @@ #ifndef HEADER #define HEADER +enum omp_allocator_handle_t { + omp_null_allocator = 0, + omp_default_mem_alloc = 1, + omp_large_cap_mem_alloc = 2, + omp_const_mem_alloc = 3, + omp_high_bw_mem_alloc = 4, + omp_low_lat_mem_alloc = 5, + omp_cgroup_mem_alloc = 6, + omp_pteam_mem_alloc = 7, + omp_thread_mem_alloc = 8, + KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__ +}; + // CHECK-DAG: [[IDENT_T:%.+]] = type { i32, i32, i32, i32, i8* } // CHECK-DAG: [[STRUCT_SHAREDS:%.+]] = type { i8*, [2 x [[STRUCT_S:%.+]]]* } // CHECK-DAG: [[STRUCT_SHAREDS1:%.+]] = type { [2 x [[STRUCT_S:%.+]]]* } @@ -258,21 +271,26 @@ a = 4; c = 5; } -// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 0, i64 48, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY6:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 0, i64 256, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY6:@.+]] to i32 (i32, i8*)*)) // CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]]) -#pragma omp task untied +#pragma omp task untied firstprivate(c) allocate(omp_pteam_mem_alloc:c) { -S s1; +S s1, s2; +#ifdef UNTIEDRT +#pragma omp allocate(s2) allocator(omp_pteam_mem_alloc) +#endif +s2.a = 0; #pragma omp task -a = 4; +a = c = 4; #pragma omp taskyield s1 = S(); +s2.a = 10; #pragma omp taskwait } return a; } // CHECK: define internal i32 [[TASK_ENTRY1]](i32 %0, [[KMP_TASK_T]]{{.*}}* noalias %1) -// CHECK: store i32 15, i32* [[A_PTR:@.+]] +// CHECK: store i32 15, i32* [[A_PTR:@.+]], // CHECK: [[A_VAL:%.+]] = load i32, i32* [[A_PTR]] // CHECK: [[A_VAL_I8:%.+]] = trunc i32 [[A_VAL]] to i8 // CHECK: store i8 [[A_VAL_I8]], i8* %{{.+}} @@ -294,10 +312,13 @@ // CHECK: define internal i32 // CHECK: store i32 4, i32* [[A_PTR]] -// CHECK: define internal i32 [[TASK_ENTRY6]](i32 %0, [[KMP_TASK_T]]{{.*}}* noalias %1) +// CHECK: define internal i32 [[TASK_ENTRY6]](i32 %0, [[KMP_TASK_T]]{{.*}}* noalias %{{.+}}) // UNTIEDRT: [[S1_ADDR_