ABataev updated this revision to Diff 264020.
ABataev added a comment.

Rebase


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79257/new/

https://reviews.llvm.org/D79257

Files:
  clang/lib/CodeGen/CGOpenMPRuntime.cpp
  clang/lib/CodeGen/CGOpenMPRuntime.h
  clang/lib/Sema/SemaOpenMP.cpp
  clang/test/OpenMP/target_parallel_for_simd_uses_allocators_codegen.cpp
  clang/test/OpenMP/target_parallel_for_uses_allocators_codegen.cpp
  clang/test/OpenMP/target_parallel_uses_allocators_codegen.cpp
  clang/test/OpenMP/target_simd_uses_allocators_codegen.cpp
  
clang/test/OpenMP/target_teams_distribute_parallel_for_simd_uses_allocators_codegen.cpp
  
clang/test/OpenMP/target_teams_distribute_parallel_for_uses_allocators_codegen.cpp
  clang/test/OpenMP/target_teams_distribute_simd_uses_allocators_codegen.cpp
  clang/test/OpenMP/target_teams_distribute_uses_allocators_codegen.cpp
  clang/test/OpenMP/target_teams_uses_allocators_codegen.cpp
  clang/test/OpenMP/target_uses_allocators_codegen.cpp

Index: clang/test/OpenMP/target_uses_allocators_codegen.cpp
===================================================================
--- /dev/null
+++ clang/test/OpenMP/target_uses_allocators_codegen.cpp
@@ -0,0 +1,93 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+enum omp_allocator_handle_t {
+  omp_null_allocator = 0,
+  omp_default_mem_alloc = 1,
+  omp_large_cap_mem_alloc = 2,
+  omp_const_mem_alloc = 3,
+  omp_high_bw_mem_alloc = 4,
+  omp_low_lat_mem_alloc = 5,
+  omp_cgroup_mem_alloc = 6,
+  omp_pteam_mem_alloc = 7,
+  omp_thread_mem_alloc = 8,
+  KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__
+};
+
+typedef enum omp_alloctrait_key_t { omp_atk_sync_hint = 1,
+                                    omp_atk_alignment = 2,
+                                    omp_atk_access = 3,
+                                    omp_atk_pool_size = 4,
+                                    omp_atk_fallback = 5,
+                                    omp_atk_fb_data = 6,
+                                    omp_atk_pinned = 7,
+                                    omp_atk_partition = 8
+} omp_alloctrait_key_t;
+typedef enum omp_alloctrait_value_t {
+  omp_atv_false = 0,
+  omp_atv_true = 1,
+  omp_atv_default = 2,
+  omp_atv_contended = 3,
+  omp_atv_uncontended = 4,
+  omp_atv_sequential = 5,
+  omp_atv_private = 6,
+  omp_atv_all = 7,
+  omp_atv_thread = 8,
+  omp_atv_pteam = 9,
+  omp_atv_cgroup = 10,
+  omp_atv_default_mem_fb = 11,
+  omp_atv_null_fb = 12,
+  omp_atv_abort_fb = 13,
+  omp_atv_allocator_fb = 14,
+  omp_atv_environment = 15,
+  omp_atv_nearest = 16,
+  omp_atv_blocked = 17,
+  omp_atv_interleaved = 18
+} omp_alloctrait_value_t;
+
+typedef struct omp_alloctrait_t {
+  omp_alloctrait_key_t key;
+  __UINTPTR_TYPE__ value;
+} omp_alloctrait_t;
+
+// Just map the traits variable as a firstprivate variable.
+// CHECK-DAG: [[SIZES:@.+]] = private unnamed_addr constant [1 x i64] [i64 160]
+// CHECK-DAG: [[MAPTYPES:@.+]] = private unnamed_addr constant [1 x i64] [i64 673]
+
+// CHECK: define {{.*}}[[FOO:@.+]]()
+void foo() {
+  omp_alloctrait_t traits[10];
+  omp_allocator_handle_t my_allocator;
+
+// CHECK: [[RES:%.+]] = call i32 @__tgt_target(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0))
+// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]]
+// CHECK: [[FAILED]]:
+// CHECK: call void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* %{{[^,]+}})
+#pragma omp target uses_allocators(omp_null_allocator, omp_thread_mem_alloc, my_allocator(traits))
+  ;
+}
+
+// CHECK: define internal void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* {{.+}})
+// CHECK: [[TRAITS_ADDR_REF:%.+]] = alloca [10 x %struct.omp_alloctrait_t]*,
+// CHECK: [[MY_ALLOCATOR_ADDR:%.+]] = alloca i64,
+// CHECK: [[TRAITS_ADDR:%.+]] = load [10 x %struct.omp_alloctrait_t]*, [10 x %struct.omp_alloctrait_t]** [[TRAITS_ADDR_REF]],
+// CHECK: [[TRAITS_ADDR_VOIDPTR:%.+]] = bitcast [10 x %struct.omp_alloctrait_t]* [[TRAITS_ADDR]] to i8**
+// CHECK: [[TRAITS:%.+]] = load i8*, i8** [[TRAITS_ADDR_VOIDPTR]],
+// CHECK: [[ALLOCATOR:%.+]] = call i8* @__kmpc_init_allocator(i32 %{{.+}}, i8* null, i32 10, i8* [[TRAITS]])
+// CHECK: [[CONV:%.+]] = ptrtoint i8* [[ALLOCATOR]] to i64
+// CHECK: store i64 [[CONV]], i64* [[MY_ALLOCATOR_ADDR]],
+
+// Destroy allocator upon exit from the region.
+// CHECK: [[ALLOCATOR:%.+]] = load i64, i64* [[MY_ALLOCATOR_ADDR]],
+// CHECK: [[CONV:%.+]] = inttoptr i64 [[ALLOCATOR]] to i8*
+// CHECK: call void @__kmpc_destroy_allocator(i32 %{{.+}}, i8* [[CONV]])
+// CHECK: ret void
+
+#endif
Index: clang/test/OpenMP/target_teams_uses_allocators_codegen.cpp
===================================================================
--- /dev/null
+++ clang/test/OpenMP/target_teams_uses_allocators_codegen.cpp
@@ -0,0 +1,93 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+enum omp_allocator_handle_t {
+  omp_null_allocator = 0,
+  omp_default_mem_alloc = 1,
+  omp_large_cap_mem_alloc = 2,
+  omp_const_mem_alloc = 3,
+  omp_high_bw_mem_alloc = 4,
+  omp_low_lat_mem_alloc = 5,
+  omp_cgroup_mem_alloc = 6,
+  omp_pteam_mem_alloc = 7,
+  omp_thread_mem_alloc = 8,
+  KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__
+};
+
+typedef enum omp_alloctrait_key_t { omp_atk_sync_hint = 1,
+                                    omp_atk_alignment = 2,
+                                    omp_atk_access = 3,
+                                    omp_atk_pool_size = 4,
+                                    omp_atk_fallback = 5,
+                                    omp_atk_fb_data = 6,
+                                    omp_atk_pinned = 7,
+                                    omp_atk_partition = 8
+} omp_alloctrait_key_t;
+typedef enum omp_alloctrait_value_t {
+  omp_atv_false = 0,
+  omp_atv_true = 1,
+  omp_atv_default = 2,
+  omp_atv_contended = 3,
+  omp_atv_uncontended = 4,
+  omp_atv_sequential = 5,
+  omp_atv_private = 6,
+  omp_atv_all = 7,
+  omp_atv_thread = 8,
+  omp_atv_pteam = 9,
+  omp_atv_cgroup = 10,
+  omp_atv_default_mem_fb = 11,
+  omp_atv_null_fb = 12,
+  omp_atv_abort_fb = 13,
+  omp_atv_allocator_fb = 14,
+  omp_atv_environment = 15,
+  omp_atv_nearest = 16,
+  omp_atv_blocked = 17,
+  omp_atv_interleaved = 18
+} omp_alloctrait_value_t;
+
+typedef struct omp_alloctrait_t {
+  omp_alloctrait_key_t key;
+  __UINTPTR_TYPE__ value;
+} omp_alloctrait_t;
+
+// Just map the traits variable as a firstprivate variable.
+// CHECK-DAG: [[SIZES:@.+]] = private unnamed_addr constant [1 x i64] [i64 160]
+// CHECK-DAG: [[MAPTYPES:@.+]] = private unnamed_addr constant [1 x i64] [i64 673]
+
+// CHECK: define {{.*}}[[FOO:@.+]]()
+void foo() {
+  omp_alloctrait_t traits[10];
+  omp_allocator_handle_t my_allocator;
+
+// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i32 0, i32 0)
+// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]]
+// CHECK: [[FAILED]]:
+// CHECK: call void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* %{{[^,]+}})
+#pragma omp target teams uses_allocators(omp_null_allocator, omp_thread_mem_alloc, my_allocator(traits))
+  ;
+}
+
+// CHECK: define internal void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* {{.+}})
+// CHECK: [[TRAITS_ADDR_REF:%.+]] = alloca [10 x %struct.omp_alloctrait_t]*,
+// CHECK: [[MY_ALLOCATOR_ADDR:%.+]] = alloca i64,
+// CHECK: [[TRAITS_ADDR:%.+]] = load [10 x %struct.omp_alloctrait_t]*, [10 x %struct.omp_alloctrait_t]** [[TRAITS_ADDR_REF]],
+// CHECK: [[TRAITS_ADDR_VOIDPTR:%.+]] = bitcast [10 x %struct.omp_alloctrait_t]* [[TRAITS_ADDR]] to i8**
+// CHECK: [[TRAITS:%.+]] = load i8*, i8** [[TRAITS_ADDR_VOIDPTR]],
+// CHECK: [[ALLOCATOR:%.+]] = call i8* @__kmpc_init_allocator(i32 %{{.+}}, i8* null, i32 10, i8* [[TRAITS]])
+// CHECK: [[CONV:%.+]] = ptrtoint i8* [[ALLOCATOR]] to i64
+// CHECK: store i64 [[CONV]], i64* [[MY_ALLOCATOR_ADDR]],
+
+// Destroy allocator upon exit from the region.
+// CHECK: [[ALLOCATOR:%.+]] = load i64, i64* [[MY_ALLOCATOR_ADDR]],
+// CHECK: [[CONV:%.+]] = inttoptr i64 [[ALLOCATOR]] to i8*
+// CHECK: call void @__kmpc_destroy_allocator(i32 %{{.+}}, i8* [[CONV]])
+// CHECK: ret void
+
+#endif
Index: clang/test/OpenMP/target_teams_distribute_uses_allocators_codegen.cpp
===================================================================
--- /dev/null
+++ clang/test/OpenMP/target_teams_distribute_uses_allocators_codegen.cpp
@@ -0,0 +1,94 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+enum omp_allocator_handle_t {
+  omp_null_allocator = 0,
+  omp_default_mem_alloc = 1,
+  omp_large_cap_mem_alloc = 2,
+  omp_const_mem_alloc = 3,
+  omp_high_bw_mem_alloc = 4,
+  omp_low_lat_mem_alloc = 5,
+  omp_cgroup_mem_alloc = 6,
+  omp_pteam_mem_alloc = 7,
+  omp_thread_mem_alloc = 8,
+  KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__
+};
+
+typedef enum omp_alloctrait_key_t { omp_atk_sync_hint = 1,
+                                    omp_atk_alignment = 2,
+                                    omp_atk_access = 3,
+                                    omp_atk_pool_size = 4,
+                                    omp_atk_fallback = 5,
+                                    omp_atk_fb_data = 6,
+                                    omp_atk_pinned = 7,
+                                    omp_atk_partition = 8
+} omp_alloctrait_key_t;
+typedef enum omp_alloctrait_value_t {
+  omp_atv_false = 0,
+  omp_atv_true = 1,
+  omp_atv_default = 2,
+  omp_atv_contended = 3,
+  omp_atv_uncontended = 4,
+  omp_atv_sequential = 5,
+  omp_atv_private = 6,
+  omp_atv_all = 7,
+  omp_atv_thread = 8,
+  omp_atv_pteam = 9,
+  omp_atv_cgroup = 10,
+  omp_atv_default_mem_fb = 11,
+  omp_atv_null_fb = 12,
+  omp_atv_abort_fb = 13,
+  omp_atv_allocator_fb = 14,
+  omp_atv_environment = 15,
+  omp_atv_nearest = 16,
+  omp_atv_blocked = 17,
+  omp_atv_interleaved = 18
+} omp_alloctrait_value_t;
+
+typedef struct omp_alloctrait_t {
+  omp_alloctrait_key_t key;
+  __UINTPTR_TYPE__ value;
+} omp_alloctrait_t;
+
+// Just map the traits variable as a firstprivate variable.
+// CHECK-DAG: [[SIZES:@.+]] = private unnamed_addr constant [1 x i64] [i64 160]
+// CHECK-DAG: [[MAPTYPES:@.+]] = private unnamed_addr constant [1 x i64] [i64 673]
+
+// CHECK: define {{.*}}[[FOO:@.+]]()
+void foo() {
+  omp_alloctrait_t traits[10];
+  omp_allocator_handle_t my_allocator;
+
+// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i32 0, i32 0)
+// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]]
+// CHECK: [[FAILED]]:
+// CHECK: call void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* %{{[^,]+}})
+#pragma omp target teams distribute uses_allocators(omp_null_allocator, omp_thread_mem_alloc, my_allocator(traits))
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
+// CHECK: define internal void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* {{.+}})
+// CHECK: [[TRAITS_ADDR_REF:%.+]] = alloca [10 x %struct.omp_alloctrait_t]*,
+// CHECK: [[MY_ALLOCATOR_ADDR:%.+]] = alloca i64,
+// CHECK: [[TRAITS_ADDR:%.+]] = load [10 x %struct.omp_alloctrait_t]*, [10 x %struct.omp_alloctrait_t]** [[TRAITS_ADDR_REF]],
+// CHECK: [[TRAITS_ADDR_VOIDPTR:%.+]] = bitcast [10 x %struct.omp_alloctrait_t]* [[TRAITS_ADDR]] to i8**
+// CHECK: [[TRAITS:%.+]] = load i8*, i8** [[TRAITS_ADDR_VOIDPTR]],
+// CHECK: [[ALLOCATOR:%.+]] = call i8* @__kmpc_init_allocator(i32 %{{.+}}, i8* null, i32 10, i8* [[TRAITS]])
+// CHECK: [[CONV:%.+]] = ptrtoint i8* [[ALLOCATOR]] to i64
+// CHECK: store i64 [[CONV]], i64* [[MY_ALLOCATOR_ADDR]],
+
+// Destroy allocator upon exit from the region.
+// CHECK: [[ALLOCATOR:%.+]] = load i64, i64* [[MY_ALLOCATOR_ADDR]],
+// CHECK: [[CONV:%.+]] = inttoptr i64 [[ALLOCATOR]] to i8*
+// CHECK: call void @__kmpc_destroy_allocator(i32 %{{.+}}, i8* [[CONV]])
+// CHECK: ret void
+
+#endif
Index: clang/test/OpenMP/target_teams_distribute_simd_uses_allocators_codegen.cpp
===================================================================
--- /dev/null
+++ clang/test/OpenMP/target_teams_distribute_simd_uses_allocators_codegen.cpp
@@ -0,0 +1,94 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+enum omp_allocator_handle_t {
+  omp_null_allocator = 0,
+  omp_default_mem_alloc = 1,
+  omp_large_cap_mem_alloc = 2,
+  omp_const_mem_alloc = 3,
+  omp_high_bw_mem_alloc = 4,
+  omp_low_lat_mem_alloc = 5,
+  omp_cgroup_mem_alloc = 6,
+  omp_pteam_mem_alloc = 7,
+  omp_thread_mem_alloc = 8,
+  KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__
+};
+
+typedef enum omp_alloctrait_key_t { omp_atk_sync_hint = 1,
+                                    omp_atk_alignment = 2,
+                                    omp_atk_access = 3,
+                                    omp_atk_pool_size = 4,
+                                    omp_atk_fallback = 5,
+                                    omp_atk_fb_data = 6,
+                                    omp_atk_pinned = 7,
+                                    omp_atk_partition = 8
+} omp_alloctrait_key_t;
+typedef enum omp_alloctrait_value_t {
+  omp_atv_false = 0,
+  omp_atv_true = 1,
+  omp_atv_default = 2,
+  omp_atv_contended = 3,
+  omp_atv_uncontended = 4,
+  omp_atv_sequential = 5,
+  omp_atv_private = 6,
+  omp_atv_all = 7,
+  omp_atv_thread = 8,
+  omp_atv_pteam = 9,
+  omp_atv_cgroup = 10,
+  omp_atv_default_mem_fb = 11,
+  omp_atv_null_fb = 12,
+  omp_atv_abort_fb = 13,
+  omp_atv_allocator_fb = 14,
+  omp_atv_environment = 15,
+  omp_atv_nearest = 16,
+  omp_atv_blocked = 17,
+  omp_atv_interleaved = 18
+} omp_alloctrait_value_t;
+
+typedef struct omp_alloctrait_t {
+  omp_alloctrait_key_t key;
+  __UINTPTR_TYPE__ value;
+} omp_alloctrait_t;
+
+// Just map the traits variable as a firstprivate variable.
+// CHECK-DAG: [[SIZES:@.+]] = private unnamed_addr constant [1 x i64] [i64 160]
+// CHECK-DAG: [[MAPTYPES:@.+]] = private unnamed_addr constant [1 x i64] [i64 673]
+
+// CHECK: define {{.*}}[[FOO:@.+]]()
+void foo() {
+  omp_alloctrait_t traits[10];
+  omp_allocator_handle_t my_allocator;
+
+// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i32 0, i32 1)
+// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]]
+// CHECK: [[FAILED]]:
+// CHECK: call void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* %{{[^,]+}})
+#pragma omp target teams distribute simd uses_allocators(omp_null_allocator, omp_thread_mem_alloc, my_allocator(traits))
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
+// CHECK: define internal void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* {{.+}})
+// CHECK: [[TRAITS_ADDR_REF:%.+]] = alloca [10 x %struct.omp_alloctrait_t]*,
+// CHECK: [[MY_ALLOCATOR_ADDR:%.+]] = alloca i64,
+// CHECK: [[TRAITS_ADDR:%.+]] = load [10 x %struct.omp_alloctrait_t]*, [10 x %struct.omp_alloctrait_t]** [[TRAITS_ADDR_REF]],
+// CHECK: [[TRAITS_ADDR_VOIDPTR:%.+]] = bitcast [10 x %struct.omp_alloctrait_t]* [[TRAITS_ADDR]] to i8**
+// CHECK: [[TRAITS:%.+]] = load i8*, i8** [[TRAITS_ADDR_VOIDPTR]],
+// CHECK: [[ALLOCATOR:%.+]] = call i8* @__kmpc_init_allocator(i32 %{{.+}}, i8* null, i32 10, i8* [[TRAITS]])
+// CHECK: [[CONV:%.+]] = ptrtoint i8* [[ALLOCATOR]] to i64
+// CHECK: store i64 [[CONV]], i64* [[MY_ALLOCATOR_ADDR]],
+
+// Destroy allocator upon exit from the region.
+// CHECK: [[ALLOCATOR:%.+]] = load i64, i64* [[MY_ALLOCATOR_ADDR]],
+// CHECK: [[CONV:%.+]] = inttoptr i64 [[ALLOCATOR]] to i8*
+// CHECK: call void @__kmpc_destroy_allocator(i32 %{{.+}}, i8* [[CONV]])
+// CHECK: ret void
+
+#endif
Index: clang/test/OpenMP/target_teams_distribute_parallel_for_uses_allocators_codegen.cpp
===================================================================
--- /dev/null
+++ clang/test/OpenMP/target_teams_distribute_parallel_for_uses_allocators_codegen.cpp
@@ -0,0 +1,94 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+enum omp_allocator_handle_t {
+  omp_null_allocator = 0,
+  omp_default_mem_alloc = 1,
+  omp_large_cap_mem_alloc = 2,
+  omp_const_mem_alloc = 3,
+  omp_high_bw_mem_alloc = 4,
+  omp_low_lat_mem_alloc = 5,
+  omp_cgroup_mem_alloc = 6,
+  omp_pteam_mem_alloc = 7,
+  omp_thread_mem_alloc = 8,
+  KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__
+};
+
+typedef enum omp_alloctrait_key_t { omp_atk_sync_hint = 1,
+                                    omp_atk_alignment = 2,
+                                    omp_atk_access = 3,
+                                    omp_atk_pool_size = 4,
+                                    omp_atk_fallback = 5,
+                                    omp_atk_fb_data = 6,
+                                    omp_atk_pinned = 7,
+                                    omp_atk_partition = 8
+} omp_alloctrait_key_t;
+typedef enum omp_alloctrait_value_t {
+  omp_atv_false = 0,
+  omp_atv_true = 1,
+  omp_atv_default = 2,
+  omp_atv_contended = 3,
+  omp_atv_uncontended = 4,
+  omp_atv_sequential = 5,
+  omp_atv_private = 6,
+  omp_atv_all = 7,
+  omp_atv_thread = 8,
+  omp_atv_pteam = 9,
+  omp_atv_cgroup = 10,
+  omp_atv_default_mem_fb = 11,
+  omp_atv_null_fb = 12,
+  omp_atv_abort_fb = 13,
+  omp_atv_allocator_fb = 14,
+  omp_atv_environment = 15,
+  omp_atv_nearest = 16,
+  omp_atv_blocked = 17,
+  omp_atv_interleaved = 18
+} omp_alloctrait_value_t;
+
+typedef struct omp_alloctrait_t {
+  omp_alloctrait_key_t key;
+  __UINTPTR_TYPE__ value;
+} omp_alloctrait_t;
+
+// Just map the traits variable as a firstprivate variable.
+// CHECK-DAG: [[SIZES:@.+]] = private unnamed_addr constant [1 x i64] [i64 160]
+// CHECK-DAG: [[MAPTYPES:@.+]] = private unnamed_addr constant [1 x i64] [i64 673]
+
+// CHECK: define {{.*}}[[FOO:@.+]]()
+void foo() {
+  omp_alloctrait_t traits[10];
+  omp_allocator_handle_t my_allocator;
+
+// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i32 0, i32 0)
+// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]]
+// CHECK: [[FAILED]]:
+// CHECK: call void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* %{{[^,]+}})
+#pragma omp target teams distribute parallel for uses_allocators(omp_null_allocator, omp_thread_mem_alloc, my_allocator(traits))
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
+// CHECK: define internal void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* {{.+}})
+// CHECK: [[TRAITS_ADDR_REF:%.+]] = alloca [10 x %struct.omp_alloctrait_t]*,
+// CHECK: [[MY_ALLOCATOR_ADDR:%.+]] = alloca i64,
+// CHECK: [[TRAITS_ADDR:%.+]] = load [10 x %struct.omp_alloctrait_t]*, [10 x %struct.omp_alloctrait_t]** [[TRAITS_ADDR_REF]],
+// CHECK: [[TRAITS_ADDR_VOIDPTR:%.+]] = bitcast [10 x %struct.omp_alloctrait_t]* [[TRAITS_ADDR]] to i8**
+// CHECK: [[TRAITS:%.+]] = load i8*, i8** [[TRAITS_ADDR_VOIDPTR]],
+// CHECK: [[ALLOCATOR:%.+]] = call i8* @__kmpc_init_allocator(i32 %{{.+}}, i8* null, i32 10, i8* [[TRAITS]])
+// CHECK: [[CONV:%.+]] = ptrtoint i8* [[ALLOCATOR]] to i64
+// CHECK: store i64 [[CONV]], i64* [[MY_ALLOCATOR_ADDR]],
+
+// Destroy allocator upon exit from the region.
+// CHECK: [[ALLOCATOR:%.+]] = load i64, i64* [[MY_ALLOCATOR_ADDR]],
+// CHECK: [[CONV:%.+]] = inttoptr i64 [[ALLOCATOR]] to i8*
+// CHECK: call void @__kmpc_destroy_allocator(i32 %{{.+}}, i8* [[CONV]])
+// CHECK: ret void
+
+#endif
Index: clang/test/OpenMP/target_teams_distribute_parallel_for_simd_uses_allocators_codegen.cpp
===================================================================
--- /dev/null
+++ clang/test/OpenMP/target_teams_distribute_parallel_for_simd_uses_allocators_codegen.cpp
@@ -0,0 +1,94 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+enum omp_allocator_handle_t {
+  omp_null_allocator = 0,
+  omp_default_mem_alloc = 1,
+  omp_large_cap_mem_alloc = 2,
+  omp_const_mem_alloc = 3,
+  omp_high_bw_mem_alloc = 4,
+  omp_low_lat_mem_alloc = 5,
+  omp_cgroup_mem_alloc = 6,
+  omp_pteam_mem_alloc = 7,
+  omp_thread_mem_alloc = 8,
+  KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__
+};
+
+typedef enum omp_alloctrait_key_t { omp_atk_sync_hint = 1,
+                                    omp_atk_alignment = 2,
+                                    omp_atk_access = 3,
+                                    omp_atk_pool_size = 4,
+                                    omp_atk_fallback = 5,
+                                    omp_atk_fb_data = 6,
+                                    omp_atk_pinned = 7,
+                                    omp_atk_partition = 8
+} omp_alloctrait_key_t;
+typedef enum omp_alloctrait_value_t {
+  omp_atv_false = 0,
+  omp_atv_true = 1,
+  omp_atv_default = 2,
+  omp_atv_contended = 3,
+  omp_atv_uncontended = 4,
+  omp_atv_sequential = 5,
+  omp_atv_private = 6,
+  omp_atv_all = 7,
+  omp_atv_thread = 8,
+  omp_atv_pteam = 9,
+  omp_atv_cgroup = 10,
+  omp_atv_default_mem_fb = 11,
+  omp_atv_null_fb = 12,
+  omp_atv_abort_fb = 13,
+  omp_atv_allocator_fb = 14,
+  omp_atv_environment = 15,
+  omp_atv_nearest = 16,
+  omp_atv_blocked = 17,
+  omp_atv_interleaved = 18
+} omp_alloctrait_value_t;
+
+typedef struct omp_alloctrait_t {
+  omp_alloctrait_key_t key;
+  __UINTPTR_TYPE__ value;
+} omp_alloctrait_t;
+
+// Just map the traits variable as a firstprivate variable.
+// CHECK-DAG: [[SIZES:@.+]] = private unnamed_addr constant [1 x i64] [i64 160]
+// CHECK-DAG: [[MAPTYPES:@.+]] = private unnamed_addr constant [1 x i64] [i64 673]
+
+// CHECK: define {{.*}}[[FOO:@.+]]()
+void foo() {
+  omp_alloctrait_t traits[10];
+  omp_allocator_handle_t my_allocator;
+
+// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i32 0, i32 0)
+// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]]
+// CHECK: [[FAILED]]:
+// CHECK: call void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* %{{[^,]+}})
+#pragma omp target teams distribute parallel for simd uses_allocators(omp_null_allocator, omp_thread_mem_alloc, my_allocator(traits))
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
+// CHECK: define internal void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* {{.+}})
+// CHECK: [[TRAITS_ADDR_REF:%.+]] = alloca [10 x %struct.omp_alloctrait_t]*,
+// CHECK: [[MY_ALLOCATOR_ADDR:%.+]] = alloca i64,
+// CHECK: [[TRAITS_ADDR:%.+]] = load [10 x %struct.omp_alloctrait_t]*, [10 x %struct.omp_alloctrait_t]** [[TRAITS_ADDR_REF]],
+// CHECK: [[TRAITS_ADDR_VOIDPTR:%.+]] = bitcast [10 x %struct.omp_alloctrait_t]* [[TRAITS_ADDR]] to i8**
+// CHECK: [[TRAITS:%.+]] = load i8*, i8** [[TRAITS_ADDR_VOIDPTR]],
+// CHECK: [[ALLOCATOR:%.+]] = call i8* @__kmpc_init_allocator(i32 %{{.+}}, i8* null, i32 10, i8* [[TRAITS]])
+// CHECK: [[CONV:%.+]] = ptrtoint i8* [[ALLOCATOR]] to i64
+// CHECK: store i64 [[CONV]], i64* [[MY_ALLOCATOR_ADDR]],
+
+// Destroy allocator upon exit from the region.
+// CHECK: [[ALLOCATOR:%.+]] = load i64, i64* [[MY_ALLOCATOR_ADDR]],
+// CHECK: [[CONV:%.+]] = inttoptr i64 [[ALLOCATOR]] to i8*
+// CHECK: call void @__kmpc_destroy_allocator(i32 %{{.+}}, i8* [[CONV]])
+// CHECK: ret void
+
+#endif
Index: clang/test/OpenMP/target_simd_uses_allocators_codegen.cpp
===================================================================
--- /dev/null
+++ clang/test/OpenMP/target_simd_uses_allocators_codegen.cpp
@@ -0,0 +1,94 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+enum omp_allocator_handle_t {
+  omp_null_allocator = 0,
+  omp_default_mem_alloc = 1,
+  omp_large_cap_mem_alloc = 2,
+  omp_const_mem_alloc = 3,
+  omp_high_bw_mem_alloc = 4,
+  omp_low_lat_mem_alloc = 5,
+  omp_cgroup_mem_alloc = 6,
+  omp_pteam_mem_alloc = 7,
+  omp_thread_mem_alloc = 8,
+  KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__
+};
+
+typedef enum omp_alloctrait_key_t { omp_atk_sync_hint = 1,
+                                    omp_atk_alignment = 2,
+                                    omp_atk_access = 3,
+                                    omp_atk_pool_size = 4,
+                                    omp_atk_fallback = 5,
+                                    omp_atk_fb_data = 6,
+                                    omp_atk_pinned = 7,
+                                    omp_atk_partition = 8
+} omp_alloctrait_key_t;
+typedef enum omp_alloctrait_value_t {
+  omp_atv_false = 0,
+  omp_atv_true = 1,
+  omp_atv_default = 2,
+  omp_atv_contended = 3,
+  omp_atv_uncontended = 4,
+  omp_atv_sequential = 5,
+  omp_atv_private = 6,
+  omp_atv_all = 7,
+  omp_atv_thread = 8,
+  omp_atv_pteam = 9,
+  omp_atv_cgroup = 10,
+  omp_atv_default_mem_fb = 11,
+  omp_atv_null_fb = 12,
+  omp_atv_abort_fb = 13,
+  omp_atv_allocator_fb = 14,
+  omp_atv_environment = 15,
+  omp_atv_nearest = 16,
+  omp_atv_blocked = 17,
+  omp_atv_interleaved = 18
+} omp_alloctrait_value_t;
+
+typedef struct omp_alloctrait_t {
+  omp_alloctrait_key_t key;
+  __UINTPTR_TYPE__ value;
+} omp_alloctrait_t;
+
+// Just map the traits variable as a firstprivate variable.
+// CHECK-DAG: [[SIZES:@.+]] = private unnamed_addr constant [1 x i64] [i64 160]
+// CHECK-DAG: [[MAPTYPES:@.+]] = private unnamed_addr constant [1 x i64] [i64 673]
+
+// CHECK: define {{.*}}[[FOO:@.+]]()
+void foo() {
+  omp_alloctrait_t traits[10];
+  omp_allocator_handle_t my_allocator;
+
+// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i32 1, i32 1)
+// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]]
+// CHECK: [[FAILED]]:
+// CHECK: call void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* %{{[^,]+}})
+#pragma omp target simd uses_allocators(omp_null_allocator, omp_thread_mem_alloc, my_allocator(traits))
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
+// CHECK: define internal void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* {{.+}})
+// CHECK: [[TRAITS_ADDR_REF:%.+]] = alloca [10 x %struct.omp_alloctrait_t]*,
+// CHECK: [[MY_ALLOCATOR_ADDR:%.+]] = alloca i64,
+// CHECK: [[TRAITS_ADDR:%.+]] = load [10 x %struct.omp_alloctrait_t]*, [10 x %struct.omp_alloctrait_t]** [[TRAITS_ADDR_REF]],
+// CHECK: [[TRAITS_ADDR_VOIDPTR:%.+]] = bitcast [10 x %struct.omp_alloctrait_t]* [[TRAITS_ADDR]] to i8**
+// CHECK: [[TRAITS:%.+]] = load i8*, i8** [[TRAITS_ADDR_VOIDPTR]],
+// CHECK: [[ALLOCATOR:%.+]] = call i8* @__kmpc_init_allocator(i32 %{{.+}}, i8* null, i32 10, i8* [[TRAITS]])
+// CHECK: [[CONV:%.+]] = ptrtoint i8* [[ALLOCATOR]] to i64
+// CHECK: store i64 [[CONV]], i64* [[MY_ALLOCATOR_ADDR]],
+
+// Destroy allocator upon exit from the region.
+// CHECK: [[ALLOCATOR:%.+]] = load i64, i64* [[MY_ALLOCATOR_ADDR]],
+// CHECK: [[CONV:%.+]] = inttoptr i64 [[ALLOCATOR]] to i8*
+// CHECK: call void @__kmpc_destroy_allocator(i32 %{{.+}}, i8* [[CONV]])
+// CHECK: ret void
+
+#endif
Index: clang/test/OpenMP/target_parallel_uses_allocators_codegen.cpp
===================================================================
--- /dev/null
+++ clang/test/OpenMP/target_parallel_uses_allocators_codegen.cpp
@@ -0,0 +1,93 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+enum omp_allocator_handle_t {
+  omp_null_allocator = 0,
+  omp_default_mem_alloc = 1,
+  omp_large_cap_mem_alloc = 2,
+  omp_const_mem_alloc = 3,
+  omp_high_bw_mem_alloc = 4,
+  omp_low_lat_mem_alloc = 5,
+  omp_cgroup_mem_alloc = 6,
+  omp_pteam_mem_alloc = 7,
+  omp_thread_mem_alloc = 8,
+  KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__
+};
+
+typedef enum omp_alloctrait_key_t { omp_atk_sync_hint = 1,
+                                    omp_atk_alignment = 2,
+                                    omp_atk_access = 3,
+                                    omp_atk_pool_size = 4,
+                                    omp_atk_fallback = 5,
+                                    omp_atk_fb_data = 6,
+                                    omp_atk_pinned = 7,
+                                    omp_atk_partition = 8
+} omp_alloctrait_key_t;
+typedef enum omp_alloctrait_value_t {
+  omp_atv_false = 0,
+  omp_atv_true = 1,
+  omp_atv_default = 2,
+  omp_atv_contended = 3,
+  omp_atv_uncontended = 4,
+  omp_atv_sequential = 5,
+  omp_atv_private = 6,
+  omp_atv_all = 7,
+  omp_atv_thread = 8,
+  omp_atv_pteam = 9,
+  omp_atv_cgroup = 10,
+  omp_atv_default_mem_fb = 11,
+  omp_atv_null_fb = 12,
+  omp_atv_abort_fb = 13,
+  omp_atv_allocator_fb = 14,
+  omp_atv_environment = 15,
+  omp_atv_nearest = 16,
+  omp_atv_blocked = 17,
+  omp_atv_interleaved = 18
+} omp_alloctrait_value_t;
+
+typedef struct omp_alloctrait_t {
+  omp_alloctrait_key_t key;
+  __UINTPTR_TYPE__ value;
+} omp_alloctrait_t;
+
+// Just map the traits variable as a firstprivate variable.
+// CHECK-DAG: [[SIZES:@.+]] = private unnamed_addr constant [1 x i64] [i64 160]
+// CHECK-DAG: [[MAPTYPES:@.+]] = private unnamed_addr constant [1 x i64] [i64 673]
+
+// CHECK: define {{.*}}[[FOO:@.+]]()
+void foo() {
+  omp_alloctrait_t traits[10];
+  omp_allocator_handle_t my_allocator;
+
+// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i32 1, i32 0)
+// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]]
+// CHECK: [[FAILED]]:
+// CHECK: call void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* %{{[^,]+}})
+#pragma omp target parallel uses_allocators(omp_null_allocator, omp_thread_mem_alloc, my_allocator(traits))
+  ;
+}
+
+// CHECK: define internal void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* {{.+}})
+// CHECK: [[TRAITS_ADDR_REF:%.+]] = alloca [10 x %struct.omp_alloctrait_t]*,
+// CHECK: [[MY_ALLOCATOR_ADDR:%.+]] = alloca i64,
+// CHECK: [[TRAITS_ADDR:%.+]] = load [10 x %struct.omp_alloctrait_t]*, [10 x %struct.omp_alloctrait_t]** [[TRAITS_ADDR_REF]],
+// CHECK: [[TRAITS_ADDR_VOIDPTR:%.+]] = bitcast [10 x %struct.omp_alloctrait_t]* [[TRAITS_ADDR]] to i8**
+// CHECK: [[TRAITS:%.+]] = load i8*, i8** [[TRAITS_ADDR_VOIDPTR]],
+// CHECK: [[ALLOCATOR:%.+]] = call i8* @__kmpc_init_allocator(i32 %{{.+}}, i8* null, i32 10, i8* [[TRAITS]])
+// CHECK: [[CONV:%.+]] = ptrtoint i8* [[ALLOCATOR]] to i64
+// CHECK: store i64 [[CONV]], i64* [[MY_ALLOCATOR_ADDR]],
+
+// Destroy allocator upon exit from the region.
+// CHECK: [[ALLOCATOR:%.+]] = load i64, i64* [[MY_ALLOCATOR_ADDR]],
+// CHECK: [[CONV:%.+]] = inttoptr i64 [[ALLOCATOR]] to i8*
+// CHECK: call void @__kmpc_destroy_allocator(i32 %{{.+}}, i8* [[CONV]])
+// CHECK: ret void
+
+#endif
Index: clang/test/OpenMP/target_parallel_for_uses_allocators_codegen.cpp
===================================================================
--- /dev/null
+++ clang/test/OpenMP/target_parallel_for_uses_allocators_codegen.cpp
@@ -0,0 +1,94 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+enum omp_allocator_handle_t {
+  omp_null_allocator = 0,
+  omp_default_mem_alloc = 1,
+  omp_large_cap_mem_alloc = 2,
+  omp_const_mem_alloc = 3,
+  omp_high_bw_mem_alloc = 4,
+  omp_low_lat_mem_alloc = 5,
+  omp_cgroup_mem_alloc = 6,
+  omp_pteam_mem_alloc = 7,
+  omp_thread_mem_alloc = 8,
+  KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__
+};
+
+typedef enum omp_alloctrait_key_t { omp_atk_sync_hint = 1,
+                                    omp_atk_alignment = 2,
+                                    omp_atk_access = 3,
+                                    omp_atk_pool_size = 4,
+                                    omp_atk_fallback = 5,
+                                    omp_atk_fb_data = 6,
+                                    omp_atk_pinned = 7,
+                                    omp_atk_partition = 8
+} omp_alloctrait_key_t;
+typedef enum omp_alloctrait_value_t {
+  omp_atv_false = 0,
+  omp_atv_true = 1,
+  omp_atv_default = 2,
+  omp_atv_contended = 3,
+  omp_atv_uncontended = 4,
+  omp_atv_sequential = 5,
+  omp_atv_private = 6,
+  omp_atv_all = 7,
+  omp_atv_thread = 8,
+  omp_atv_pteam = 9,
+  omp_atv_cgroup = 10,
+  omp_atv_default_mem_fb = 11,
+  omp_atv_null_fb = 12,
+  omp_atv_abort_fb = 13,
+  omp_atv_allocator_fb = 14,
+  omp_atv_environment = 15,
+  omp_atv_nearest = 16,
+  omp_atv_blocked = 17,
+  omp_atv_interleaved = 18
+} omp_alloctrait_value_t;
+
+typedef struct omp_alloctrait_t {
+  omp_alloctrait_key_t key;
+  __UINTPTR_TYPE__ value;
+} omp_alloctrait_t;
+
+// Just map the traits variable as a firstprivate variable.
+// CHECK-DAG: [[SIZES:@.+]] = private unnamed_addr constant [1 x i64] [i64 160]
+// CHECK-DAG: [[MAPTYPES:@.+]] = private unnamed_addr constant [1 x i64] [i64 673]
+
+// CHECK: define {{.*}}[[FOO:@.+]]()
+void foo() {
+  omp_alloctrait_t traits[10];
+  omp_allocator_handle_t my_allocator;
+
+// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i32 1, i32 0)
+// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]]
+// CHECK: [[FAILED]]:
+// CHECK: call void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* %{{[^,]+}})
+#pragma omp target parallel for uses_allocators(omp_null_allocator, omp_thread_mem_alloc, my_allocator(traits))
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
+// CHECK: define internal void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* {{.+}})
+// CHECK: [[TRAITS_ADDR_REF:%.+]] = alloca [10 x %struct.omp_alloctrait_t]*,
+// CHECK: [[MY_ALLOCATOR_ADDR:%.+]] = alloca i64,
+// CHECK: [[TRAITS_ADDR:%.+]] = load [10 x %struct.omp_alloctrait_t]*, [10 x %struct.omp_alloctrait_t]** [[TRAITS_ADDR_REF]],
+// CHECK: [[TRAITS_ADDR_VOIDPTR:%.+]] = bitcast [10 x %struct.omp_alloctrait_t]* [[TRAITS_ADDR]] to i8**
+// CHECK: [[TRAITS:%.+]] = load i8*, i8** [[TRAITS_ADDR_VOIDPTR]],
+// CHECK: [[ALLOCATOR:%.+]] = call i8* @__kmpc_init_allocator(i32 %{{.+}}, i8* null, i32 10, i8* [[TRAITS]])
+// CHECK: [[CONV:%.+]] = ptrtoint i8* [[ALLOCATOR]] to i64
+// CHECK: store i64 [[CONV]], i64* [[MY_ALLOCATOR_ADDR]],
+
+// Destroy allocator upon exit from the region.
+// CHECK: [[ALLOCATOR:%.+]] = load i64, i64* [[MY_ALLOCATOR_ADDR]],
+// CHECK: [[CONV:%.+]] = inttoptr i64 [[ALLOCATOR]] to i8*
+// CHECK: call void @__kmpc_destroy_allocator(i32 %{{.+}}, i8* [[CONV]])
+// CHECK: ret void
+
+#endif
Index: clang/test/OpenMP/target_parallel_for_simd_uses_allocators_codegen.cpp
===================================================================
--- /dev/null
+++ clang/test/OpenMP/target_parallel_for_simd_uses_allocators_codegen.cpp
@@ -0,0 +1,94 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+enum omp_allocator_handle_t {
+  omp_null_allocator = 0,
+  omp_default_mem_alloc = 1,
+  omp_large_cap_mem_alloc = 2,
+  omp_const_mem_alloc = 3,
+  omp_high_bw_mem_alloc = 4,
+  omp_low_lat_mem_alloc = 5,
+  omp_cgroup_mem_alloc = 6,
+  omp_pteam_mem_alloc = 7,
+  omp_thread_mem_alloc = 8,
+  KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__
+};
+
+typedef enum omp_alloctrait_key_t { omp_atk_sync_hint = 1,
+                                    omp_atk_alignment = 2,
+                                    omp_atk_access = 3,
+                                    omp_atk_pool_size = 4,
+                                    omp_atk_fallback = 5,
+                                    omp_atk_fb_data = 6,
+                                    omp_atk_pinned = 7,
+                                    omp_atk_partition = 8
+} omp_alloctrait_key_t;
+typedef enum omp_alloctrait_value_t {
+  omp_atv_false = 0,
+  omp_atv_true = 1,
+  omp_atv_default = 2,
+  omp_atv_contended = 3,
+  omp_atv_uncontended = 4,
+  omp_atv_sequential = 5,
+  omp_atv_private = 6,
+  omp_atv_all = 7,
+  omp_atv_thread = 8,
+  omp_atv_pteam = 9,
+  omp_atv_cgroup = 10,
+  omp_atv_default_mem_fb = 11,
+  omp_atv_null_fb = 12,
+  omp_atv_abort_fb = 13,
+  omp_atv_allocator_fb = 14,
+  omp_atv_environment = 15,
+  omp_atv_nearest = 16,
+  omp_atv_blocked = 17,
+  omp_atv_interleaved = 18
+} omp_alloctrait_value_t;
+
+typedef struct omp_alloctrait_t {
+  omp_alloctrait_key_t key;
+  __UINTPTR_TYPE__ value;
+} omp_alloctrait_t;
+
+// Just map the traits variable as a firstprivate variable.
+// CHECK-DAG: [[SIZES:@.+]] = private unnamed_addr constant [1 x i64] [i64 160]
+// CHECK-DAG: [[MAPTYPES:@.+]] = private unnamed_addr constant [1 x i64] [i64 673]
+
+// CHECK: define {{.*}}[[FOO:@.+]]()
+void foo() {
+  omp_alloctrait_t traits[10];
+  omp_allocator_handle_t my_allocator;
+
+// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i32 1, i32 0)
+// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]]
+// CHECK: [[FAILED]]:
+// CHECK: call void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* %{{[^,]+}})
+#pragma omp target parallel for simd uses_allocators(omp_null_allocator, omp_thread_mem_alloc, my_allocator(traits))
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
+// CHECK: define internal void @[[TGT_REGION]]([10 x %struct.omp_alloctrait_t]* {{.+}})
+// CHECK: [[TRAITS_ADDR_REF:%.+]] = alloca [10 x %struct.omp_alloctrait_t]*,
+// CHECK: [[MY_ALLOCATOR_ADDR:%.+]] = alloca i64,
+// CHECK: [[TRAITS_ADDR:%.+]] = load [10 x %struct.omp_alloctrait_t]*, [10 x %struct.omp_alloctrait_t]** [[TRAITS_ADDR_REF]],
+// CHECK: [[TRAITS_ADDR_VOIDPTR:%.+]] = bitcast [10 x %struct.omp_alloctrait_t]* [[TRAITS_ADDR]] to i8**
+// CHECK: [[TRAITS:%.+]] = load i8*, i8** [[TRAITS_ADDR_VOIDPTR]],
+// CHECK: [[ALLOCATOR:%.+]] = call i8* @__kmpc_init_allocator(i32 %{{.+}}, i8* null, i32 10, i8* [[TRAITS]])
+// CHECK: [[CONV:%.+]] = ptrtoint i8* [[ALLOCATOR]] to i64
+// CHECK: store i64 [[CONV]], i64* [[MY_ALLOCATOR_ADDR]],
+
+// Destroy allocator upon exit from the region.
+// CHECK: [[ALLOCATOR:%.+]] = load i64, i64* [[MY_ALLOCATOR_ADDR]],
+// CHECK: [[CONV:%.+]] = inttoptr i64 [[ALLOCATOR]] to i8*
+// CHECK: call void @__kmpc_destroy_allocator(i32 %{{.+}}, i8* [[CONV]])
+// CHECK: ret void
+
+#endif
Index: clang/lib/Sema/SemaOpenMP.cpp
===================================================================
--- clang/lib/Sema/SemaOpenMP.cpp
+++ clang/lib/Sema/SemaOpenMP.cpp
@@ -79,6 +79,15 @@
       llvm::SmallVector<std::pair<Expr *, OverloadedOperatorKind>, 4>;
   using DoacrossDependMapTy =
       llvm::DenseMap<OMPDependClause *, OperatorOffsetTy>;
+  /// Kind of the declaration used in the uses_allocators clauses.
+  enum class UsesAllocatorsDeclKind {
+    /// Predefined allocator
+    PredefinedAllocator,
+    /// User-defined allocator
+    UserDefinedAllocator,
+    /// The declaration that represent allocator trait
+    AllocatorTrait,
+  };
 
 private:
   struct DSAInfo {
@@ -170,7 +179,8 @@
     llvm::SmallVector<DeclRefExpr *, 4> DeclareTargetLinkVarDecls;
     /// List of decls used in inclusive/exclusive clauses of the scan directive.
     llvm::DenseSet<CanonicalDeclPtr<Decl>> UsedInScanDirective;
-    llvm::DenseSet<CanonicalDeclPtr<const Decl>> UsesAllocatorsDecls;
+    llvm::DenseMap<CanonicalDeclPtr<const Decl>, UsesAllocatorsDeclKind>
+        UsesAllocatorsDecls;
     SharingMapTy(OpenMPDirectiveKind DKind, DeclarationNameInfo Name,
                  Scope *CurScope, SourceLocation Loc)
         : Directive(DKind), DirectiveName(Name), CurScope(CurScope),
@@ -1019,16 +1029,25 @@
   }
 
   /// Marks decl as used in uses_allocators clause as the allocator.
-  void addUsesAllocatorsDecl(const Decl *D) {
-    getTopOfStack().UsesAllocatorsDecls.insert(D);
+  void addUsesAllocatorsDecl(const Decl *D, UsesAllocatorsDeclKind Kind) {
+    getTopOfStack().UsesAllocatorsDecls.try_emplace(D, Kind);
   }
   /// Checks if specified decl is used in uses allocator clause as the
   /// allocator.
-  bool isUsesAllocatorsDecl(unsigned Level, const Decl *D) const {
-    return getStackElemAtLevel(Level).UsesAllocatorsDecls.count(D) > 0;
+  Optional<UsesAllocatorsDeclKind> isUsesAllocatorsDecl(unsigned Level,
+                                                        const Decl *D) const {
+    const SharingMapTy &StackElem = getTopOfStack();
+    auto I = StackElem.UsesAllocatorsDecls.find(D);
+    if (I == StackElem.UsesAllocatorsDecls.end())
+      return None;
+    return I->getSecond();
   }
-  bool isUsesAllocatorsDecl(const Decl *D) const {
-    return getTopOfStack().UsesAllocatorsDecls.count(D) > 0;
+  Optional<UsesAllocatorsDeclKind> isUsesAllocatorsDecl(const Decl *D) const {
+    const SharingMapTy &StackElem = getTopOfStack();
+    auto I = StackElem.UsesAllocatorsDecls.find(D);
+    if (I == StackElem.UsesAllocatorsDecls.end())
+      return None;
+    return I->getSecond();
   }
 };
 
@@ -2234,6 +2253,13 @@
             D, [](OpenMPClauseKind K) { return K == OMPC_copyin; }, Level))
       return OMPC_private;
   }
+  // User-defined allocators are private since they must be defined in the
+  // context of target region.
+  if (DSAStack->hasExplicitDirective(isOpenMPTargetExecutionDirective, Level) &&
+      DSAStack->isUsesAllocatorsDecl(Level, D).getValueOr(
+          DSAStackTy::UsesAllocatorsDeclKind::AllocatorTrait) ==
+          DSAStackTy::UsesAllocatorsDeclKind::UserDefinedAllocator)
+    return OMPC_private;
   return (DSAStack->hasExplicitDSA(
               D, [](OpenMPClauseKind K) { return K == OMPC_private; }, Level) ||
           (DSAStack->isClauseParsingMode() &&
@@ -2556,7 +2582,7 @@
           if (!DRE)
             continue;
           ValueDecl *VD = DRE->getDecl();
-          if (!VD)
+          if (!VD || !isa<VarDecl>(VD))
             continue;
           DSAStackTy::DSAVarData DVar =
               DSAStack->getTopDSA(VD, /*FromParent=*/false);
@@ -3277,7 +3303,7 @@
           !Stack->isImplicitTaskFirstprivate(VD))
         return;
       // Skip allocators in uses_allocators clauses.
-      if (Stack->isUsesAllocatorsDecl(VD))
+      if (Stack->isUsesAllocatorsDecl(VD).hasValue())
         return;
 
       DSAStackTy::DSAVarData DVar = Stack->getTopDSA(VD, /*FromParent=*/false);
@@ -4314,6 +4340,21 @@
         }
       }
     }
+    if (ThisCaptureRegion == OMPD_target) {
+      // Capture allocator traits in the target region. They are used implicitly
+      // and, thus, are not captured by default.
+      for (OMPClause *C : Clauses) {
+        if (const auto *UAC = dyn_cast<OMPUsesAllocatorsClause>(C)) {
+          for (unsigned I = 0, End = UAC->getNumberOfAllocators(); I < End;
+               ++I) {
+            OMPUsesAllocatorsClause::Data D = UAC->getAllocatorData(I);
+            if (Expr *E = D.AllocatorTraits)
+              MarkDeclarationsReferencedInExpr(E);
+          }
+          continue;
+        }
+      }
+    }
     if (++CompletedRegions == CaptureRegions.size())
       DSAStack->setBodyComplete();
     SR = ActOnCapturedRegionEnd(SR.get());
@@ -4741,7 +4782,10 @@
 
 public:
   bool VisitDeclRefExpr(const DeclRefExpr *E) {
-    return !S->isUsesAllocatorsDecl(E->getDecl());
+    return S->isUsesAllocatorsDecl(E->getDecl())
+               .getValueOr(
+                   DSAStackTy::UsesAllocatorsDeclKind::AllocatorTrait) ==
+           DSAStackTy::UsesAllocatorsDeclKind::AllocatorTrait;
   }
   bool VisitStmt(const Stmt *S) {
     for (const Stmt *Child : S->children()) {
@@ -18632,8 +18676,7 @@
       !findOMPAlloctraitT(*this, StartLoc, DSAStack))
     return nullptr;
   llvm::SmallSet<CanonicalDeclPtr<Decl>, 4> PredefinedAllocators;
-  for (int I = OMPAllocateDeclAttr::OMPDefaultMemAlloc;
-       I < OMPAllocateDeclAttr::OMPUserDefinedMemAlloc; ++I) {
+  for (int I = 0; I < OMPAllocateDeclAttr::OMPUserDefinedMemAlloc; ++I) {
     auto AllocatorKind = static_cast<OMPAllocateDeclAttr::AllocatorTypeTy>(I);
     StringRef Allocator =
         OMPAllocateDeclAttr::ConvertAllocatorTypeTyToStr(AllocatorKind);
@@ -18693,7 +18736,11 @@
       // No allocator traits - just convert it to rvalue.
       if (!D.AllocatorTraits)
         AllocatorExpr = DefaultLvalueConversion(AllocatorExpr).get();
-      DSAStack->addUsesAllocatorsDecl(DRE->getDecl());
+      DSAStack->addUsesAllocatorsDecl(
+          DRE->getDecl(),
+          IsPredefinedAllocator
+              ? DSAStackTy::UsesAllocatorsDeclKind::PredefinedAllocator
+              : DSAStackTy::UsesAllocatorsDeclKind::UserDefinedAllocator);
     }
     Expr *AllocatorTraitsExpr = nullptr;
     if (D.AllocatorTraits) {
@@ -18721,6 +18768,12 @@
               << AllocatorTraitsExpr->getType();
           continue;
         }
+        // Do not map by default allocator traits if it is a standalone
+        // variable.
+        if (auto *DRE = dyn_cast<DeclRefExpr>(AllocatorTraitsExpr))
+          DSAStack->addUsesAllocatorsDecl(
+              DRE->getDecl(),
+              DSAStackTy::UsesAllocatorsDeclKind::AllocatorTrait);
       }
     }
     OMPUsesAllocatorsClause::Data &NewD = NewData.emplace_back();
Index: clang/lib/CodeGen/CGOpenMPRuntime.h
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntime.h
+++ clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -1832,6 +1832,14 @@
   /// \param NewDepKind New dependency kind.
   void emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
                         OpenMPDependClauseKind NewDepKind, SourceLocation Loc);
+
+  /// Initializes user defined allocators specified in the uses_allocators
+  /// clauses.
+  void emitUsesAllocatorsInit(CodeGenFunction &CGF, const Expr *Allocator,
+                              const Expr *AllocatorTraits);
+
+  /// Destroys user defined allocators specified in the uses_allocators clause.
+  void emitUsesAllocatorsFini(CodeGenFunction &CGF, const Expr *Allocator);
 };
 
 /// Class supports emissionof SIMD-only code.
Index: clang/lib/CodeGen/CGOpenMPRuntime.cpp
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -720,6 +720,11 @@
   OMPRTL__kmpc_alloc,
   // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
   OMPRTL__kmpc_free,
+  // Call to omp_allocator_handle_t __kmpc_init_allocator(int gtid,
+  // omp_memspace_handle_t, int ntraits, omp_alloctrait_t traits[]);
+  OMPRTL__kmpc_init_allocator,
+  // Call to void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t al);
+  OMPRTL__kmpc_destroy_allocator,
 
   //
   // Offloading related calls
@@ -2392,6 +2397,26 @@
     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
     break;
   }
+  case OMPRTL__kmpc_init_allocator: {
+    // Build omp_allocator_handle_t __kmpc_init_allocator(int gtid,
+    // omp_memspace_handle_t, int ntraits, omp_alloctrait_t traits[]);
+    // omp_allocator_handle_t type is void*, omp_memspace_handle_t type is
+    // void*.
+    auto *FnTy = llvm::FunctionType::get(
+        CGM.VoidPtrTy, {CGM.IntTy, CGM.VoidPtrTy, CGM.IntTy, CGM.VoidPtrTy},
+        /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_init_allocator");
+    break;
+  }
+  case OMPRTL__kmpc_destroy_allocator: {
+    // Build void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t al);
+    // omp_allocator_handle_t type is void*.
+    auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, {CGM.IntTy, CGM.VoidPtrTy},
+                                         /*isVarArg=*/false);
+    RTLFn =
+        CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_destroy_allocator");
+    break;
+  }
   case OMPRTL__kmpc_push_target_tripcount: {
     // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
     // size);
@@ -7085,16 +7110,104 @@
   }
 }
 
+namespace {
+/// Cleanup action for uses_allocators support.
+class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
+  ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
+
+public:
+  OMPUsesAllocatorsActionTy(
+      ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
+      : Allocators(Allocators) {}
+  void Enter(CodeGenFunction &CGF) override {
+    if (!CGF.HaveInsertPoint())
+      return;
+    for (const auto &AllocatorData : Allocators) {
+      CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
+          CGF, AllocatorData.first, AllocatorData.second);
+    }
+  }
+  void Exit(CodeGenFunction &CGF) override {
+    if (!CGF.HaveInsertPoint())
+      return;
+    for (const auto &AllocatorData : Allocators) {
+      CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
+                                                        AllocatorData.first);
+    }
+  }
+};
+} // namespace
+
 void CGOpenMPRuntime::emitTargetOutlinedFunction(
     const OMPExecutableDirective &D, StringRef ParentName,
     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
   assert(!ParentName.empty() && "Invalid target region parent name!");
   HasEmittedTargetRegion = true;
+  SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
+  for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
+    for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
+      const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
+      if (!D.AllocatorTraits)
+        continue;
+      Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
+    }
+  }
+  OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
+  CodeGen.setAction(UsesAllocatorAction);
   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
                                    IsOffloadEntry, CodeGen);
 }
 
+void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
+                                             const Expr *Allocator,
+                                             const Expr *AllocatorTraits) {
+  llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
+  ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
+  // Use default memspace handle.
+  llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
+  llvm::Value *NumTraits = llvm::ConstantInt::get(
+      CGF.IntTy, cast<ConstantArrayType>(
+                     AllocatorTraits->getType()->getAsArrayTypeUnsafe())
+                     ->getSize()
+                     .getLimitedValue());
+  LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
+  Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+      AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
+  AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
+                                           AllocatorTraitsLVal.getBaseInfo(),
+                                           AllocatorTraitsLVal.getTBAAInfo());
+  llvm::Value *Traits =
+      CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
+
+  llvm::Value *AllocatorVal =
+      CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_init_allocator),
+                          {ThreadId, MemSpaceHandle, NumTraits, Traits});
+  // Store to allocator.
+  CGF.EmitVarDecl(*cast<VarDecl>(
+      cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
+  LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
+  AllocatorVal =
+      CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
+                               Allocator->getType(), Allocator->getExprLoc());
+  CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
+}
+
+void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
+                                             const Expr *Allocator) {
+  llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
+  ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
+  LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
+  llvm::Value *AllocatorVal =
+      CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
+  AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
+                                          CGF.getContext().VoidPtrTy,
+                                          Allocator->getExprLoc());
+  (void)CGF.EmitRuntimeCall(
+      createRuntimeFunction(OMPRTL__kmpc_destroy_allocator),
+      {ThreadId, AllocatorVal});
+}
+
 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
     const OMPExecutableDirective &D, StringRef ParentName,
     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
@@ -8537,6 +8650,19 @@
       for (const auto *D : C->varlists())
         FirstPrivateDecls.try_emplace(
             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
+    // Extract implicit firstprivates from uses_allocators clauses.
+    for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
+      for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
+        OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
+        if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
+          FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
+                                        /*Implicit=*/true);
+        else if (const auto *VD = dyn_cast<VarDecl>(
+                     cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
+                         ->getDecl()))
+          FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
+      }
+    }
     // Extract device pointer clause information.
     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
       for (auto L : C->component_lists())
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to