sfantao updated this revision to Diff 40992.
sfantao added a comment.

Rebase.


http://reviews.llvm.org/D12614

Files:
  include/clang/Basic/DiagnosticDriverKinds.td
  include/clang/Basic/LangOptions.def
  include/clang/Basic/LangOptions.h
  include/clang/Driver/CC1Options.td
  include/clang/Driver/Options.td
  lib/CodeGen/CGOpenMPRuntime.cpp
  lib/CodeGen/CGOpenMPRuntime.h
  lib/CodeGen/CGStmtOpenMP.cpp
  lib/CodeGen/CodeGenModule.cpp
  lib/Frontend/CompilerInvocation.cpp
  lib/Serialization/ASTReader.cpp
  lib/Serialization/ASTWriter.cpp
  test/OpenMP/target_codegen.cpp
  test/OpenMP/target_codegen_global_capture.cpp
  test/OpenMP/target_codegen_registration.cpp
  test/OpenMP/target_codegen_registration_naming.cpp
  test/OpenMP/target_messages.cpp

Index: test/OpenMP/target_messages.cpp
===================================================================
--- test/OpenMP/target_messages.cpp
+++ test/OpenMP/target_messages.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -o - %s
+// RUN: not %clang_cc1 -fopenmp -std=c++11 -omptargets=aaa-bbb-ccc-ddd -o - %s 2>&1 | FileCheck %s
+// CHECK: error: OpenMP target is invalid: 'aaa-bbb-ccc-ddd'
 
 void foo() {
 }
Index: test/OpenMP/target_codegen_registration_naming.cpp
===================================================================
--- /dev/null
+++ test/OpenMP/target_codegen_registration_naming.cpp
@@ -0,0 +1,66 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -omp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -omp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -fopenmp-is-device -omp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -omp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -omp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -fopenmp-is-device -omp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: [[CA:%.+]] = type { i32* }
+
+// CHECK: define {{.*}}i32 @[[NNAME:.+]](i32 {{.*}}%{{.+}})
+int nested(int a){
+  // CHECK: call void @.omp_offloading.[[FILEID:[0-9a-f]+\.[0-9a-f]+]].[[NNAME]].l[[T1L:[0-9]+]].c[[T1C:[0-9]+]](
+  #pragma omp target
+    ++a;
+
+  // CHECK: call void @"[[LNAME:.+]]"([[CA]]*
+  auto F = [&](){
+    #pragma omp parallel
+    {
+      #pragma omp target
+      ++a;
+    }
+  };
+
+  F();
+
+  return a;
+}
+
+// CHECK: define {{.*}}void @.omp_offloading.[[FILEID]].[[NNAME]].l[[T1L]].c[[T1C]](
+// TCHECK: define {{.*}}void @.omp_offloading.[[FILEID:[0-9a-f]+\.[0-9a-f]+]].[[NNAME:.+]].l[[T1L:[0-9]+]].c[[T1C:[0-9]+]](
+
+// CHECK: define {{.*}}void @"[[LNAME]]"(
+// CHECK: call void {{.*}}@__kmpc_fork_call{{.+}}[[PNAME:@.+]] to
+
+// CHECK: define {{.*}}void [[PNAME]](
+// CHECK: call void @.omp_offloading.[[FILEID]].[[NNAME]].l[[T2L:[0-9]+]].c[[T2C:[0-9]+]](
+
+// CHECK: define {{.*}}void @.omp_offloading.[[FILEID]].[[NNAME]].l[[T2L]].c[[T2C]](
+// TCHECK: define {{.*}}void @.omp_offloading.[[FILEID]].[[NNAME:.+]].l[[T2L:[0-9]+]].c[[T2C:[0-9]+]](
+
+
+// Check metadata is properly generated:
+// CHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 {{[0-9]+}}, i32 {{[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 [[T1C]], i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 {{[0-9]+}}, i32 {{[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 [[T2C]], i32 {{[0-9]+}}}
+
+// TCHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 {{[0-9]+}}, i32 {{[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 [[T1C]], i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 {{[0-9]+}}, i32 {{[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 [[T2C]], i32 {{[0-9]+}}}
+#endif
Index: test/OpenMP/target_codegen_registration.cpp
===================================================================
--- /dev/null
+++ test/OpenMP/target_codegen_registration.cpp
@@ -0,0 +1,437 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -omp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -omp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -omp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -omp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -omp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -omp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+
+// Check that no target code is emmitted if no omptests flag was provided.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK-NTARGET
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK-DAG: [[SA:%.+]] = type { [4 x i32] }
+// CHECK-DAG: [[SB:%.+]] = type { [8 x i32] }
+// CHECK-DAG: [[SC:%.+]] = type { [16 x i32] }
+// CHECK-DAG: [[SD:%.+]] = type { [32 x i32] }
+// CHECK-DAG: [[SE:%.+]] = type { [64 x i32] }
+// CHECK-DAG: [[ST1:%.+]] = type { [228 x i32] }
+// CHECK-DAG: [[ST2:%.+]] = type { [1128 x i32] }
+// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]] }
+// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* }
+// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* }
+
+// TCHECK:    [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]] }
+
+// CHECK-DAG: [[A1:@.+]] = internal global [[SA]]
+// CHECK-DAG: [[A2:@.+]] = global [[SA]]
+// CHECK-DAG: [[B1:@.+]] = global [[SB]]
+// CHECK-DAG: [[B2:@.+]] = global [[SB]]
+// CHECK-DAG: [[C1:@.+]] = internal global [[SC]]
+// CHECK-DAG: [[D1:@.+]] = global [[SD]]
+// CHECK-DAG: [[E1:@.+]] = global [[SE]]
+// CHECK-DAG: [[T1:@.+]] = global [[ST1]]
+// CHECK-DAG: [[T2:@.+]] = global [[ST2]]
+
+// CHECK-NTARGET-DAG: [[SA:%.+]] = type { [4 x i32] }
+// CHECK-NTARGET-DAG: [[SB:%.+]] = type { [8 x i32] }
+// CHECK-NTARGET-DAG: [[SC:%.+]] = type { [16 x i32] }
+// CHECK-NTARGET-DAG: [[SD:%.+]] = type { [32 x i32] }
+// CHECK-NTARGET-DAG: [[SE:%.+]] = type { [64 x i32] }
+// CHECK-NTARGET-DAG: [[ST1:%.+]] = type { [228 x i32] }
+// CHECK-NTARGET-DAG: [[ST2:%.+]] = type { [1128 x i32] }
+// CHECK-NTARGET-NOT: type { i8*,
+// CHECK-NTARGET-NOT: type { i32,
+
+// We have 7 target regions
+
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// TCHECK-NOT: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 3]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 3]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 3]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 3]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 3]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 3]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 3]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 3]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 3]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 3]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 3]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 3]
+
+// CHECK-NTARGET-NOT: private constant i8 0
+// CHECK-NTARGET-NOT: private unnamed_addr constant [1 x i
+
+// CHECK-DAG: [[NAMEPTR1:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME1:\.omp_offloading\.[0-9a-f]+\.[0-9a-f]+\._Z.+\.l[0-9]+\.c[0-9]+]]\00"
+// CHECK-DAG: [[ENTRY1:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR2:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME2:.+]]\00"
+// CHECK-DAG: [[ENTRY2:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR3:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME3:.+]]\00"
+// CHECK-DAG: [[ENTRY3:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR4:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME4:.+]]\00"
+// CHECK-DAG: [[ENTRY4:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR5:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME5:.+]]\00"
+// CHECK-DAG: [[ENTRY5:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR6:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME6:.+]]\00"
+// CHECK-DAG: [[ENTRY6:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR7:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME7:.+]]\00"
+// CHECK-DAG: [[ENTRY7:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR8:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME8:.+]]\00"
+// CHECK-DAG: [[ENTRY8:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR9:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME9:.+]]\00"
+// CHECK-DAG: [[ENTRY9:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR10:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME10:.+]]\00"
+// CHECK-DAG: [[ENTRY10:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR11:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME11:.+]]\00"
+// CHECK-DAG: [[ENTRY11:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR12:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME12:.+]]\00"
+// CHECK-DAG: [[ENTRY12:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+
+// TCHECK-DAG: [[NAMEPTR1:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME1:\.omp_offloading\.[0-9a-f]+\.[0-9a-f]+\._Z.+\.l[0-9]+\.c[0-9]+]]\00"
+// TCHECK-DAG: [[ENTRY1:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i32*)* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR2:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME2:.+]]\00"
+// TCHECK-DAG: [[ENTRY2:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i32*)* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR3:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME3:.+]]\00"
+// TCHECK-DAG: [[ENTRY3:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i32*)* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR4:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME4:.+]]\00"
+// TCHECK-DAG: [[ENTRY4:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i32*)* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR5:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME5:.+]]\00"
+// TCHECK-DAG: [[ENTRY5:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i32*)* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR6:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME6:.+]]\00"
+// TCHECK-DAG: [[ENTRY6:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i32*)* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR7:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME7:.+]]\00"
+// TCHECK-DAG: [[ENTRY7:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i32*)* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR8:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME8:.+]]\00"
+// TCHECK-DAG: [[ENTRY8:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i32*)* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR9:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME9:.+]]\00"
+// TCHECK-DAG: [[ENTRY9:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i32*)* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR10:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME10:.+]]\00"
+// TCHECK-DAG: [[ENTRY10:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i32*)* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR11:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME11:.+]]\00"
+// TCHECK-DAG: [[ENTRY11:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i32*)* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR12:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME12:.+]]\00"
+// TCHECK-DAG: [[ENTRY12:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i32*)* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+
+// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]]
+// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]]
+// CHECK: [[DEVBEGIN:@.+]] = external constant i8
+// CHECK: [[DEVEND:@.+]] = external constant i8
+// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }]
+// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }
+
+// We have 4 initializers, one for the 500 priority, another one for 501, or more for the default priority, and the last one for the offloading registration function.
+// CHECK: @llvm.global_ctors = appending global [4 x { i32, void ()*, i8* }] [
+// CHECK-SAME: { i32, void ()*, i8* } { i32 500, void ()* [[P500:@[^,]+]], i8* null },
+// CHECK-SAME: { i32, void ()*, i8* } { i32 501, void ()* [[P501:@[^,]+]], i8* null },
+// CHECK-SAME: { i32, void ()*, i8* } { i32 65535, void ()* [[PMAX:@[^,]+]], i8* null },
+// CHECK-SAME: { i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* [[REGFN:@.+]] to void ()*), i8* null }]
+
+// CHECK-NTARGET: @llvm.global_ctors = appending global [3   x { i32, void ()*, i8* }] [
+
+extern int *R;
+
+struct SA {
+  int arr[4];
+  void foo() {
+    int a = *R;
+    a += 1;
+    *R = a;
+  }
+  SA() {
+    int a = *R;
+    a += 2;
+    *R = a;
+  }
+  ~SA() {
+    int a = *R;
+    a += 3;
+    *R = a;
+  }
+};
+
+struct SB {
+  int arr[8];
+  void foo() {
+    int a = *R;
+    #pragma omp target
+    a += 4;
+    *R = a;
+  }
+  SB() {
+    int a = *R;
+    a += 5;
+    *R = a;
+  }
+  ~SB() {
+    int a = *R;
+    a += 6;
+    *R = a;
+  }
+};
+
+struct SC {
+  int arr[16];
+  void foo() {
+    int a = *R;
+    a += 7;
+    *R = a;
+  }
+  SC() {
+    int a = *R;
+    #pragma omp target
+    a += 8;
+    *R = a;
+  }
+  ~SC() {
+    int a = *R;
+    a += 9;
+    *R = a;
+  }
+};
+
+struct SD {
+  int arr[32];
+  void foo() {
+    int a = *R;
+    a += 10;
+    *R = a;
+  }
+  SD() {
+    int a = *R;
+    a += 11;
+    *R = a;
+  }
+  ~SD() {
+    int a = *R;
+    #pragma omp target
+    a += 12;
+    *R = a;
+  }
+};
+
+struct SE {
+  int arr[64];
+  void foo() {
+    int a = *R;
+    #pragma omp target if(0)
+    a += 13;
+    *R = a;
+  }
+  SE() {
+    int a = *R;
+    #pragma omp target
+    a += 14;
+    *R = a;
+  }
+  ~SE() {
+    int a = *R;
+    #pragma omp target
+    a += 15;
+    *R = a;
+  }
+};
+
+template <int x>
+struct ST {
+  int arr[128 + x];
+  void foo() {
+    int a = *R;
+    #pragma omp target
+    a += 16 + x;
+    *R = a;
+  }
+  ST() {
+    int a = *R;
+    #pragma omp target
+    a += 17 + x;
+    *R = a;
+  }
+  ~ST() {
+    int a = *R;
+    #pragma omp target
+    a += 18 + x;
+    *R = a;
+  }
+};
+
+// We have to make sure we us all the target regions:
+//CHECK-DAG: define internal void @[[NAME1]](
+//CHECK-DAG: call void @[[NAME1]](
+//CHECK-DAG: define internal void @[[NAME2]](
+//CHECK-DAG: call void @[[NAME2]](
+//CHECK-DAG: define internal void @[[NAME3]](
+//CHECK-DAG: call void @[[NAME3]](
+//CHECK-DAG: define internal void @[[NAME4]](
+//CHECK-DAG: call void @[[NAME4]](
+//CHECK-DAG: define internal void @[[NAME5]](
+//CHECK-DAG: call void @[[NAME5]](
+//CHECK-DAG: define internal void @[[NAME6]](
+//CHECK-DAG: call void @[[NAME6]](
+//CHECK-DAG: define internal void @[[NAME7]](
+//CHECK-DAG: call void @[[NAME7]](
+//CHECK-DAG: define internal void @[[NAME8]](
+//CHECK-DAG: call void @[[NAME8]](
+//CHECK-DAG: define internal void @[[NAME9]](
+//CHECK-DAG: call void @[[NAME9]](
+//CHECK-DAG: define internal void @[[NAME10]](
+//CHECK-DAG: call void @[[NAME10]](
+//CHECK-DAG: define internal void @[[NAME11]](
+//CHECK-DAG: call void @[[NAME11]](
+//CHECK-DAG: define internal void @[[NAME12]](
+//CHECK-DAG: call void @[[NAME12]](
+
+//TCHECK-DAG: define void @[[NAME1]](
+//TCHECK-DAG: define void @[[NAME2]](
+//TCHECK-DAG: define void @[[NAME3]](
+//TCHECK-DAG: define void @[[NAME4]](
+//TCHECK-DAG: define void @[[NAME5]](
+//TCHECK-DAG: define void @[[NAME6]](
+//TCHECK-DAG: define void @[[NAME7]](
+//TCHECK-DAG: define void @[[NAME8]](
+//TCHECK-DAG: define void @[[NAME9]](
+//TCHECK-DAG: define void @[[NAME10]](
+//TCHECK-DAG: define void @[[NAME11]](
+//TCHECK-DAG: define void @[[NAME12]](
+
+// CHECK-NTARGET-NOT: __tgt_target
+// CHECK-NTARGET-NOT: __tgt_register_lib
+// CHECK-NTARGET-NOT: __tgt_unregister_lib
+
+// TCHECK-NOT: __tgt_target
+// TCHECK-NOT: __tgt_register_lib
+// TCHECK-NOT: __tgt_unregister_lib
+
+// We have 2 initializers with priority 500
+//CHECK: define internal void [[P500]](
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK-NOT: call void @{{.+}}()
+//CHECK:     ret void
+
+// We have 1 initializers with priority 501
+//CHECK: define internal void [[P501]](
+//CHECK:     call void @{{.+}}()
+//CHECK-NOT: call void @{{.+}}()
+//CHECK:     ret void
+
+// We have 6 initializers with default priority
+//CHECK: define internal void [[PMAX]](
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK-NOT: call void @{{.+}}()
+//CHECK:     ret void
+
+// Check registration and unregistration
+
+//CHECK:     define internal void [[UNREGFN:@.+]](i8*)
+//CHECK:     call i32 @__tgt_unregister_lib([[DSCTY]]* [[DESC]])
+//CHECK:     ret void
+//CHECK:     declare i32 @__tgt_unregister_lib([[DSCTY]]*)
+
+//CHECK:     define internal void [[REGFN]](i8*)
+//CHECK:     call i32 @__tgt_register_lib([[DSCTY]]* [[DESC]])
+//CHECK:     call i32 @__cxa_atexit(void (i8*)* [[UNREGFN]], i8* bitcast ([[DSCTY]]* [[DESC]] to i8*),
+//CHECK:     ret void
+//CHECK:     declare i32 @__tgt_register_lib([[DSCTY]]*)
+
+static __attribute__((init_priority(500))) SA a1;
+SA a2;
+SB __attribute__((init_priority(500))) b1;
+SB __attribute__((init_priority(501))) b2;
+static SC c1;
+SD d1;
+SE e1;
+ST<100> t1;
+ST<1000> t2;
+
+
+int bar(int a){
+  int r = a;
+
+  a1.foo();
+  a2.foo();
+  b1.foo();
+  b2.foo();
+  c1.foo();
+  d1.foo();
+  e1.foo();
+  t1.foo();
+  t2.foo();
+
+  #pragma omp target
+  ++r;
+
+  return r + *R;
+}
+
+// Check metadata is properly generated:
+// CHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}}
+// CHECK-DAG = !{i32 0, i32 [[DEVID:[0-9]+]], i32 [[FILEID:[0-9]+]], !"_ZN2SB3fooEv", i32 160, i32 13, i32 {{[0-9]}}+}
+// CHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2SDD2Ev", i32 210, i32 13, i32 {{[0-9]}}+}
+// CHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2SEC2Ev", i32 226, i32 13, i32 {{[0-9]}}+}
+// CHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2SED2Ev", i32 232, i32 13, i32 {{[0-9]}}+}
+// CHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2STILi1000EE3fooEv", i32 243, i32 13, i32 {{[0-9]}}+}
+// CHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2STILi100EEC2Ev", i32 249, i32 13, i32 {{[0-9]}}+}
+// CHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_Z3bari", i32 352, i32 11, i32 {{[0-9]}}+}
+// CHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2STILi100EED2Ev", i32 255, i32 13, i32 {{[0-9]}}+}
+// CHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2STILi1000EEC2Ev", i32 249, i32 13, i32 {{[0-9]}}+}
+// CHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2STILi1000EED2Ev", i32 255, i32 13, i32 {{[0-9]}}+}
+// CHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2STILi100EE3fooEv", i32 243, i32 13, i32 {{[0-9]}}+}
+// CHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2SCC2Ev", i32 185, i32 13, i32 {{[0-9]}}+}
+
+// TCHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}}
+// TCHECK-DAG = !{i32 0, i32 [[DEVID:[0-9]+]], i32 [[FILEID:[0-9]+]], !"_ZN2SB3fooEv", i32 160, i32 13, i32 {{[0-9]}}+}
+// TCHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2SDD2Ev", i32 210, i32 13, i32 {{[0-9]}}+}
+// TCHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2SEC2Ev", i32 226, i32 13, i32 {{[0-9]}}+}
+// TCHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2SED2Ev", i32 232, i32 13, i32 {{[0-9]}}+}
+// TCHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2STILi1000EE3fooEv", i32 243, i32 13, i32 {{[0-9]}}+}
+// TCHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2STILi100EEC2Ev", i32 249, i32 13, i32 {{[0-9]}}+}
+// TCHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_Z3bari", i32 352, i32 11, i32 {{[0-9]}}+}
+// TCHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2STILi100EED2Ev", i32 255, i32 13, i32 {{[0-9]}}+}
+// TCHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2STILi1000EEC2Ev", i32 249, i32 13, i32 {{[0-9]}}+}
+// TCHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2STILi1000EED2Ev", i32 255, i32 13, i32 {{[0-9]}}+}
+// TCHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2STILi100EE3fooEv", i32 243, i32 13, i32 {{[0-9]}}+}
+// TCHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2SCC2Ev", i32 185, i32 13, i32 {{[0-9]}}+}
+
+#endif
Index: test/OpenMP/target_codegen_global_capture.cpp
===================================================================
--- test/OpenMP/target_codegen_global_capture.cpp
+++ test/OpenMP/target_codegen_global_capture.cpp
@@ -1,9 +1,9 @@
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s
-// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s
-// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
Index: test/OpenMP/target_codegen.cpp
===================================================================
--- test/OpenMP/target_codegen.cpp
+++ test/OpenMP/target_codegen.cpp
@@ -1,15 +1,32 @@
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s
-// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s
-// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -omp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -omp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -omp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -omp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -omp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -omp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
 
 // CHECK-DAG: [[TT:%.+]] = type { i64, i8 }
 // CHECK-DAG: [[S1:%.+]] = type { double }
+// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]] }
+// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* }
+// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* }
+
+// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}} }
 
 // We have 8 target regions, but only 7 that actually will generate offloading
 // code, only 6 will have mapped arguments, and only 4 have all-constant map
@@ -33,6 +50,27 @@
 // CHECK-DAG: @{{.*}} = private constant i8 0
 // CHECK-DAG: @{{.*}} = private constant i8 0
 
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK-NOT: @{{.+}} = constant [[ENTTY]]
+
+// Check if offloading descriptor is created.
+// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]]
+// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]]
+// CHECK: [[DEVBEGIN:@.+]] = external constant i8
+// CHECK: [[DEVEND:@.+]] = external constant i8
+// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }]
+// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }
+
+// Check target registration is registered as a Ctor.
+// CHECK: appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* [[REGFN:@.+]] to void ()*), i8* null }]
+
+
 template<typename tx, typename ty>
 struct TT{
   tx X;
Index: lib/Serialization/ASTWriter.cpp
===================================================================
--- lib/Serialization/ASTWriter.cpp
+++ lib/Serialization/ASTWriter.cpp
@@ -1327,6 +1327,13 @@
   }
   Record.push_back(LangOpts.CommentOpts.ParseAllComments);
 
+  // OpenMP offloading options.
+  Record.push_back(LangOpts.OMPTargetTriples.size());
+  for (auto &T : LangOpts.OMPTargetTriples)
+    AddString(T.getTriple(), Record);
+
+  AddString(LangOpts.OMPHostIRFile, Record);
+
   Stream.EmitRecord(LANGUAGE_OPTIONS, Record);
 
   // Target options.
Index: lib/Serialization/ASTReader.cpp
===================================================================
--- lib/Serialization/ASTReader.cpp
+++ lib/Serialization/ASTReader.cpp
@@ -4697,6 +4697,13 @@
   }
   LangOpts.CommentOpts.ParseAllComments = Record[Idx++];
 
+  // OpenMP offloading options.
+  for (unsigned N = Record[Idx++]; N; --N) {
+    LangOpts.OMPTargetTriples.push_back(llvm::Triple(ReadString(Record, Idx)));
+  }
+
+  LangOpts.OMPHostIRFile = ReadString(Record, Idx);
+
   return Listener.ReadLanguageOptions(LangOpts, Complain,
                                       AllowCompatibleDifferences);
 }
Index: lib/Frontend/CompilerInvocation.cpp
===================================================================
--- lib/Frontend/CompilerInvocation.cpp
+++ lib/Frontend/CompilerInvocation.cpp
@@ -1765,6 +1765,30 @@
   Opts.OpenMP = Args.hasArg(options::OPT_fopenmp);
   Opts.OpenMPUseTLS =
       Opts.OpenMP && !Args.hasArg(options::OPT_fnoopenmp_use_tls);
+  Opts.OpenMPIsDevice =
+      Opts.OpenMP && Args.hasArg(options::OPT_fopenmp_is_device);
+
+  // Get the OpenMP target triples if any.
+  if (Arg *A = Args.getLastArg(options::OPT_omptargets_EQ)) {
+
+    for (unsigned i = 0; i < A->getNumValues(); ++i) {
+      llvm::Triple TT(A->getValue(i));
+
+      if (TT.getArch() == llvm::Triple::UnknownArch)
+        Diags.Report(clang::diag::err_drv_invalid_omp_target) << A->getValue(i);
+      else
+        Opts.OMPTargetTriples.push_back(TT);
+    }
+  }
+
+  // Get OpenMP host file path if any and report if a non existent file is
+  // found
+  if (Arg *A = Args.getLastArg(options::OPT_omp_host_ir_file_path)) {
+    Opts.OMPHostIRFile = A->getValue();
+    if (!llvm::sys::fs::exists(Opts.OMPHostIRFile))
+      Diags.Report(clang::diag::err_drv_omp_host_ir_file_not_found)
+          << Opts.OMPHostIRFile;
+  }
 
   // Record whether the __DEPRECATED define was requested.
   Opts.Deprecated = Args.hasFlag(OPT_fdeprecated_macro,
Index: lib/CodeGen/CodeGenModule.cpp
===================================================================
--- lib/CodeGen/CodeGenModule.cpp
+++ lib/CodeGen/CodeGenModule.cpp
@@ -374,6 +374,10 @@
     if (llvm::Function *CudaDtorFunction = CUDARuntime->makeModuleDtorFunction())
       AddGlobalDtor(CudaDtorFunction);
   }
+  if (OpenMPRuntime)
+    if (llvm::Function *OpenMPRegistrationFunction =
+            OpenMPRuntime->emitRegistrationFunction())
+      AddGlobalCtor(OpenMPRegistrationFunction, 0);
   if (PGOReader && PGOStats.hasDiagnostics())
     PGOStats.reportDiagnostics(getDiags(), getCodeGenOpts().MainFileName);
   EmitCtorList(GlobalCtors, "llvm.global_ctors");
@@ -1435,6 +1439,11 @@
     }
   }
 
+  // If this is OpenMP device, check if it is legal to emit this global
+  // normally.
+  if (OpenMPRuntime && OpenMPRuntime->emitTargetGlobal(GD))
+    return;
+
   // Ignore declarations, they will be emitted on their first use.
   if (const auto *FD = dyn_cast<FunctionDecl>(Global)) {
     // Forward declarations are emitted lazily on first use.
@@ -3533,6 +3542,9 @@
     // File-scope asm is ignored during device-side CUDA compilation.
     if (LangOpts.CUDA && LangOpts.CUDAIsDevice)
       break;
+    // File-scope asm is ignored during device-side OpenMP compilation.
+    if (LangOpts.OpenMPIsDevice)
+      break;
     auto *AD = cast<FileScopeAsmDecl>(D);
     getModule().appendModuleInlineAsm(AD->getAsmString()->getString());
     break;
Index: lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- lib/CodeGen/CGStmtOpenMP.cpp
+++ lib/CodeGen/CGStmtOpenMP.cpp
@@ -2479,14 +2479,8 @@
   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
   GenerateOpenMPCapturedVars(CS, CapturedVars, /*UseOnlyReferences=*/true);
 
-  // Emit target region as a standalone region.
-  auto &&CodeGen = [&CS](CodeGenFunction &CGF) {
-    CGF.EmitStmt(CS.getCapturedStmt());
-  };
-
-  // Obtain the target region outlined function.
-  llvm::Value *Fn =
-      CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, CodeGen);
+  llvm::Function *Fn = nullptr;
+  llvm::Constant *FnID = nullptr;
 
   // Check if we have any if clause associated with the directive.
   const Expr *IfCond = nullptr;
@@ -2501,7 +2495,34 @@
     Device = C->getDevice();
   }
 
-  CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, IfCond, Device,
+  // Check if we have an if clause whose conditional always evaluates to false
+  // or if we do not have any targets specified. If so the target region is not
+  // an offload entry point.
+  bool IsOffloadEntry = true;
+  if (IfCond) {
+    bool Val;
+    if (ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
+      IsOffloadEntry = false;
+  }
+  if (CGM.getLangOpts().OMPTargetTriples.empty())
+    IsOffloadEntry = false;
+
+  assert(CurFuncDecl && "No parent declaration for target region!");
+  StringRef ParentName;
+  // In case we have Ctors/Dtors we use the complete type variant to produce
+  // the mangling of the device outlined kernel.
+  if (auto *D = dyn_cast<CXXConstructorDecl>(CurFuncDecl))
+    ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
+  else if (auto *D = dyn_cast<CXXDestructorDecl>(CurFuncDecl))
+    ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
+  else
+    ParentName =
+        CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CurFuncDecl)));
+
+  CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
+                                                    IsOffloadEntry);
+
+  CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, IfCond, Device,
                                         CapturedVars);
 }
 
Index: lib/CodeGen/CGOpenMPRuntime.h
===================================================================
--- lib/CodeGen/CGOpenMPRuntime.h
+++ lib/CodeGen/CGOpenMPRuntime.h
@@ -35,6 +35,7 @@
 
 namespace clang {
 class Expr;
+class GlobalDecl;
 class OMPExecutableDirective;
 class VarDecl;
 
@@ -162,6 +163,10 @@
     // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
     // *arg_types);
     OMPRTL__tgt_target,
+    // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
+    OMPRTL__tgt_register_lib,
+    // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
+    OMPRTL__tgt_unregister_lib,
   };
 
   /// \brief Values for bit flags used in the ident_t to describe the fields.
@@ -285,7 +290,181 @@
   ///    } flags;
   /// } kmp_depend_info_t;
   QualType KmpDependInfoTy;
+  /// \brief Type struct __tgt_offload_entry{
+  ///   void      *addr;       // Pointer to the offload entry info.
+  ///                          // (function or global)
+  ///   char      *name;       // Name of the function or global.
+  ///   size_t     size;       // Size of the entry info (0 if it a function).
+  /// };
+  QualType TgtOffloadEntryQTy;
+  /// struct __tgt_device_image{
+  /// void   *ImageStart;       // Pointer to the target code start.
+  /// void   *ImageEnd;         // Pointer to the target code end.
+  /// // We also add the host entries to the device image, as it may be useful
+  /// // for the target runtime to have access to that information.
+  /// __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all
+  ///                                       // the entries.
+  /// __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
+  ///                                       // entries (non inclusive).
+  /// };
+  QualType TgtDeviceImageQTy;
+  /// struct __tgt_bin_desc{
+  ///   int32_t              NumDevices;      // Number of devices supported.
+  ///   __tgt_device_image   *DeviceImages;   // Arrays of device images
+  ///                                         // (one per device).
+  ///   __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all the
+  ///                                         // entries.
+  ///   __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
+  ///                                         // entries (non inclusive).
+  /// };
+  QualType TgtBinaryDescriptorQTy;
+  /// \brief Entity that registers the offloading constants that were emitted so
+  /// far.
+  class OffloadEntriesInfoManagerTy {
+    CodeGenModule &CGM;
+
+    /// \brief Number of entries registered so far.
+    unsigned OffloadingEntriesNum;
+
+  public:
+    /// \brief Base class of the entries info.
+    class OffloadEntryInfo {
+    public:
+      /// \brief Kind of a given entry. Currently, only target regions are
+      /// supported.
+      enum OffloadingEntryInfoKinds {
+        // Entry is a target region.
+        OFFLOAD_ENTRY_INFO_TARGET_REGION = 0,
+        // Invalid entry info.
+        OFFLOAD_ENTRY_INFO_INVALID = ~0u
+      };
+
+      OffloadEntryInfo() : Order(~0u), Kind(OFFLOAD_ENTRY_INFO_INVALID) {}
+      explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order)
+          : Order(Order), Kind(Kind) {}
+
+      bool isValid() const { return Order != ~0u; }
+      unsigned getOrder() const { return Order; }
+      OffloadingEntryInfoKinds getKind() const { return Kind; }
+      static bool classof(const OffloadEntryInfo *Info) { return true; }
+
+    protected:
+      // \brief Order this entry was emitted.
+      unsigned Order;
+
+      OffloadingEntryInfoKinds Kind;
+    };
+
+    /// \brief Return true if a there are no entries defined.
+    bool empty() const;
+    /// \brief Return number of entries defined so far.
+    unsigned size() const { return OffloadingEntriesNum; }
+    OffloadEntriesInfoManagerTy(CodeGenModule &CGM)
+        : CGM(CGM), OffloadingEntriesNum(0) {}
+
+    ///
+    /// Target region entries related.
+    ///
+    /// \brief Target region entries info.
+    class OffloadEntryInfoTargetRegion : public OffloadEntryInfo {
+      // \brief Address of the entity that has to be mapped for offloading.
+      llvm::Constant *Addr;
+      // \brief Address that can be used as the ID of the entry.
+      llvm::Constant *ID;
+
+    public:
+      OffloadEntryInfoTargetRegion()
+          : OffloadEntryInfo(OFFLOAD_ENTRY_INFO_TARGET_REGION, ~0u),
+            Addr(nullptr), ID(nullptr) {}
+      explicit OffloadEntryInfoTargetRegion(unsigned Order,
+                                            llvm::Constant *Addr,
+                                            llvm::Constant *ID)
+          : OffloadEntryInfo(OFFLOAD_ENTRY_INFO_TARGET_REGION, Order),
+            Addr(Addr), ID(ID) {}
+
+      llvm::Constant *getAddress() const { return Addr; }
+      llvm::Constant *getID() const { return ID; }
+      void setAddress(llvm::Constant *V) {
+        assert(!Addr && "Address as been set before!");
+        Addr = V;
+      }
+      void setID(llvm::Constant *V) {
+        assert(!ID && "ID as been set before!");
+        ID = V;
+      }
+      static bool classof(const OffloadEntryInfo *Info) {
+        return Info->getKind() == OFFLOAD_ENTRY_INFO_TARGET_REGION;
+      }
+    };
+    /// \brief Initialize target region entry.
+    void initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
+                                         StringRef ParentName, unsigned LineNum,
+                                         unsigned ColNum, unsigned Order);
+    /// \brief Register target region entry.
+    void registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
+                                       StringRef ParentName, unsigned LineNum,
+                                       unsigned ColNum, llvm::Constant *Addr,
+                                       llvm::Constant *ID);
+    /// \brief Return true if a target region entry with the provided
+    /// information exists.
+    bool hasTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
+                                  StringRef ParentName, unsigned LineNum,
+                                  unsigned ColNum) const;
+    /// brief Applies action \a Action on all registered entries.
+    typedef llvm::function_ref<void(unsigned, unsigned, StringRef, unsigned,
+                                    unsigned, OffloadEntryInfoTargetRegion &)>
+        OffloadTargetRegionEntryInfoActTy;
+    void actOnTargetRegionEntriesInfo(
+        const OffloadTargetRegionEntryInfoActTy &Action);
+
+  private:
+    // Storage for target region entries kind. The storage is to be indexed by
+    // file ID, device ID, parent function name, lane number, and column number.
+    typedef llvm::DenseMap<unsigned, OffloadEntryInfoTargetRegion>
+        OffloadEntriesTargetRegionPerColumn;
+    typedef llvm::DenseMap<unsigned, OffloadEntriesTargetRegionPerColumn>
+        OffloadEntriesTargetRegionPerLine;
+    typedef llvm::StringMap<OffloadEntriesTargetRegionPerLine>
+        OffloadEntriesTargetRegionPerParentName;
+    typedef llvm::DenseMap<unsigned, OffloadEntriesTargetRegionPerParentName>
+        OffloadEntriesTargetRegionPerFile;
+    typedef llvm::DenseMap<unsigned, OffloadEntriesTargetRegionPerFile>
+        OffloadEntriesTargetRegionPerDevice;
+    typedef OffloadEntriesTargetRegionPerDevice OffloadEntriesTargetRegionTy;
+    OffloadEntriesTargetRegionTy OffloadEntriesTargetRegion;
+  };
+  OffloadEntriesInfoManagerTy OffloadEntriesInfoManager;
+
+  /// \brief Creates and registers offloading binary descriptor for the current
+  /// compilation unit. The function that does the registration is returned.
+  llvm::Function *createOffloadingBinaryDescriptorRegistration();
+
+  /// \brief Creates offloading entry for the provided address \a Addr,
+  /// name \a Name and size \a Size.
+  void createOffloadEntry(llvm::Constant *Addr, StringRef Name, uint64_t Size);
+
+  /// \brief Creates all the offload entries in the current compilation unit
+  /// along with the associated metadata.
+  void createOffloadEntriesAndInfoMetadata();
+
+  /// \brief Loads all the offload entries information from the host IR
+  /// metadata.
+  void loadOffloadInfoMetadata();
 
+  /// \brief Returns __tgt_offload_entry type.
+  QualType getTgtOffloadEntryQTy();
+
+  /// \brief Returns __tgt_device_image type.
+  QualType getTgtDeviceImageQTy();
+
+  /// \brief Returns __tgt_bin_desc type.
+  QualType getTgtBinaryDescriptorQTy();
+
+  /// \brief Start scanning from statement \a S and and emit all target regions
+  /// found along the way.
+  /// \param S Starting statement.
+  /// \param ParentName Name of the function declaration that is being scanned.
+  void scanForTargetRegionsFunctions(const Stmt *S, StringRef ParentName);
 
   /// \brief Build type kmp_routine_entry_t (if not built yet).
   void emitKmpRoutineEntryT(QualType KmpInt32Ty);
@@ -738,26 +917,56 @@
 
   /// \brief Emit outilined function for 'target' directive.
   /// \param D Directive to emit.
-  /// \param CodeGen Code generation sequence for the \a D directive.
-  virtual llvm::Value *
-  emitTargetOutlinedFunction(const OMPExecutableDirective &D,
-                             const RegionCodeGenTy &CodeGen);
+  /// \param ParentName Name of the function that encloses the target region.
+  /// \param OutlinedFn Outlined function value to be defined by this call.
+  /// \param OutlinedFnID Outlined function ID value to be defined by this call.
+  /// \param IsOffloadEntry True if the outlined function is an offload entry.
+  /// An oulined function may not be an entry if, e.g. the if clause always
+  /// evaluates to false.
+  virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D,
+                                          StringRef ParentName,
+                                          llvm::Function *&OutlinedFn,
+                                          llvm::Constant *&OutlinedFnID,
+                                          bool IsOffloadEntry);
 
   /// \brief Emit the target offloading code associated with \a D. The emitted
   /// code attempts offloading the execution to the device, an the event of
   /// a failure it executes the host version outlined in \a OutlinedFn.
   /// \param D Directive to emit.
   /// \param OutlinedFn Host version of the code to be offloaded.
+  /// \param OutlinedFnID ID of host version of the code to be offloaded.
   /// \param IfCond Expression evaluated in if clause associated with the target
   /// directive, or null if no if clause is used.
   /// \param Device Expression evaluated in device clause associated with the
   /// target directive, or null if no device clause is used.
   /// \param CapturedVars Values captured in the current region.
   virtual void emitTargetCall(CodeGenFunction &CGF,
                               const OMPExecutableDirective &D,
-                              llvm::Value *OutlinedFn, const Expr *IfCond,
+                              llvm::Value *OutlinedFn,
+                              llvm::Value *OutlinedFnID, const Expr *IfCond,
                               const Expr *Device,
                               ArrayRef<llvm::Value *> CapturedVars);
+
+  /// \brief Emit the target regions enclosed in \a GD function definition or
+  /// the function itself in case it is a valid device function. Returns true if
+  /// \a GD was dealt with successfully.
+  /// \param FD Function to scan.
+  virtual bool emitTargetFunctions(GlobalDecl GD);
+
+  /// \brief Emit the global variable if it is a valid device global variable.
+  /// Returns true if \a GD was dealt with successfully.
+  /// \param GD Variable declaration to emit.
+  virtual bool emitTargetGlobalVariable(GlobalDecl GD);
+
+  /// \brief Emit the global \a GD if it is meaningful for the target. Returns
+  /// if it was emitted succesfully.
+  /// \param GD Global to scan.
+  virtual bool emitTargetGlobal(GlobalDecl GD);
+
+  /// \brief Creates the offloading descriptor in the event any target region
+  /// was emitted in the current module and return the function that registers
+  /// it.
+  virtual llvm::Function *emitRegistrationFunction();
 };
 
 } // namespace CodeGen
Index: lib/CodeGen/CGOpenMPRuntime.cpp
===================================================================
--- lib/CodeGen/CGOpenMPRuntime.cpp
+++ lib/CodeGen/CGOpenMPRuntime.cpp
@@ -11,16 +11,19 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "CGCXXABI.h"
 #include "CGOpenMPRuntime.h"
 #include "CodeGenFunction.h"
 #include "CGCleanup.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/StmtOpenMP.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/Value.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
 
@@ -215,25 +218,31 @@
 
 /// \brief API for captured statement code generation in OpenMP target
 /// constructs. For this captures, implicit parameters are used instead of the
-/// captured fields.
+/// captured fields. The name of the target region has to be unique in a given
+/// application so it is provided by the client, because only the client has
+/// the information to generate that.
 class CGOpenMPTargetRegionInfo : public CGOpenMPRegionInfo {
 public:
   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
-                           const RegionCodeGenTy &CodeGen)
+                           const RegionCodeGenTy &CodeGen, StringRef HelperName)
       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
-                           /*HasCancel = */ false) {}
+                           /*HasCancel=*/false),
+        HelperName(HelperName) {}
 
   /// \brief This is unused for target regions because each starts executing
   /// with a single thread.
   const VarDecl *getThreadIDVariable() const override { return nullptr; }
 
   /// \brief Get the name of the capture helper.
-  StringRef getHelperName() const override { return ".omp_offloading."; }
+  StringRef getHelperName() const override { return HelperName; }
 
   static bool classof(const CGCapturedStmtInfo *Info) {
     return CGOpenMPRegionInfo::classof(Info) &&
            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
   }
+
+private:
+  StringRef HelperName;
 };
 
 /// \brief RAII for emitting code of OpenMP constructs.
@@ -299,7 +308,8 @@
 }
 
 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
-    : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr) {
+    : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr),
+      OffloadEntriesInfoManager(CGM) {
   IdentTy = llvm::StructType::create(
       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
@@ -309,6 +319,8 @@
                                llvm::PointerType::getUnqual(CGM.Int32Ty)};
   Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
+
+  loadOffloadInfoMetadata();
 }
 
 void CGOpenMPRuntime::clear() {
@@ -918,6 +930,26 @@
     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
     break;
   }
+  case OMPRTL__tgt_register_lib: {
+    // Build void __tgt_register_lib(__tgt_bin_desc *desc);
+    QualType ParamTy =
+        CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
+    llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
+    break;
+  }
+  case OMPRTL__tgt_unregister_lib: {
+    // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
+    QualType ParamTy =
+        CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
+    llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
+    break;
+  }
   }
   return RTLFn;
 }
@@ -1917,6 +1949,382 @@
 };
 } // anonymous namespace
 
+bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
+  // FIXME: Add other entries type when they become supported.
+  return OffloadEntriesTargetRegion.empty();
+}
+
+/// \brief Initialize target region entry.
+void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
+    initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
+                                    StringRef ParentName, unsigned LineNum,
+                                    unsigned ColNum, unsigned Order) {
+  assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
+                                             "only required for the device "
+                                             "code generation.");
+  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum][ColNum] =
+      OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr);
+  ++OffloadingEntriesNum;
+}
+
+void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
+    registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
+                                  StringRef ParentName, unsigned LineNum,
+                                  unsigned ColNum, llvm::Constant *Addr,
+                                  llvm::Constant *ID) {
+  // If we are emitting code for a target, the entry is already initialized,
+  // only has to be registered.
+  if (CGM.getLangOpts().OpenMPIsDevice) {
+    assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
+                                    ColNum) &&
+           "Entry must exist.");
+    auto &Entry = OffloadEntriesTargetRegion[DeviceID][FileID][ParentName]
+                                            [LineNum][ColNum];
+    assert(Entry.isValid() && "Entry not initialized!");
+    Entry.setAddress(Addr);
+    Entry.setID(ID);
+    return;
+  } else {
+    OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID);
+    OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum][ColNum] =
+        Entry;
+  }
+}
+
+bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
+    unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
+    unsigned ColNum) const {
+  auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
+  if (PerDevice == OffloadEntriesTargetRegion.end())
+    return false;
+  auto PerFile = PerDevice->second.find(FileID);
+  if (PerFile == PerDevice->second.end())
+    return false;
+  auto PerParentName = PerFile->second.find(ParentName);
+  if (PerParentName == PerFile->second.end())
+    return false;
+  auto PerLine = PerParentName->second.find(LineNum);
+  if (PerLine == PerParentName->second.end())
+    return false;
+  auto PerColumn = PerLine->second.find(ColNum);
+  if (PerColumn == PerLine->second.end())
+    return false;
+  // Fail if this entry is already registered.
+  if (PerColumn->second.getAddress() || PerColumn->second.getID())
+    return false;
+  return true;
+}
+
+void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
+    const OffloadTargetRegionEntryInfoActTy &Action) {
+  // Scan all target region entries and perform the provided action.
+  for (auto &D : OffloadEntriesTargetRegion)
+    for (auto &F : D.second)
+      for (auto &P : F.second)
+        for (auto &L : P.second)
+          for (auto &C : L.second)
+            Action(D.first, F.first, P.first(), L.first, C.first, C.second);
+}
+
+/// \brief Create a Ctor/Dtor-like function whose body is emitted through
+/// \a Codegen. This is used to emit the two functions that register and
+/// unregister the descriptor of the current compilation unit.
+static llvm::Function *
+createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name,
+                                         const RegionCodeGenTy &Codegen) {
+  auto &C = CGM.getContext();
+  FunctionArgList Args;
+  ImplicitParamDecl DummyPtr(C, /*DC=*/nullptr, SourceLocation(),
+                             /*Id=*/nullptr, C.VoidPtrTy);
+  Args.push_back(&DummyPtr);
+
+  CodeGenFunction CGF(CGM);
+  GlobalDecl();
+  auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
+      C.VoidTy, Args, FunctionType::ExtInfo(),
+      /*isVariadic=*/false);
+  auto FTy = CGM.getTypes().GetFunctionType(FI);
+  auto *Fn =
+      CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation());
+  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation());
+  Codegen(CGF);
+  CGF.FinishFunction();
+  return Fn;
+}
+
+llvm::Function *
+CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
+
+  // If we don't have entries or if we are emitting code for the device, we
+  // don't need to do anything.
+  if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
+    return nullptr;
+
+  auto &M = CGM.getModule();
+  auto &C = CGM.getContext();
+
+  // Get list of devices we care about
+  auto &Devices = CGM.getLangOpts().OMPTargetTriples;
+
+  // We should be creating an offloading descriptor only if there are devices
+  // specified.
+  assert(!Devices.empty() && "No OpenMP offloading devices??");
+
+  // Create the external variables that will point to the begin and end of the
+  // host entries section. These will be defined by the linker.
+  auto *OffloadEntryTy =
+      CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
+  llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable(
+      M, OffloadEntryTy, /*isConstant=*/true,
+      llvm::GlobalValue::ExternalLinkage, /*Initializer=*/0,
+      ".omp_offloading.entries_begin");
+  llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable(
+      M, OffloadEntryTy, /*isConstant=*/true,
+      llvm::GlobalValue::ExternalLinkage, /*Initializer=*/0,
+      ".omp_offloading.entries_end");
+
+  // Create all device images
+  llvm::SmallVector<llvm::Constant *, 4> DeviceImagesEntires;
+  auto *DeviceImageTy = cast<llvm::StructType>(
+      CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
+
+  for (unsigned i = 0; i < Devices.size(); ++i) {
+    StringRef T = Devices[i].getTriple();
+    auto *ImgBegin = new llvm::GlobalVariable(
+        M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
+        /*Initializer=*/0, Twine(".omp_offloading.img_start.") + Twine(T));
+    auto *ImgEnd = new llvm::GlobalVariable(
+        M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
+        /*Initializer=*/0, Twine(".omp_offloading.img_end.") + Twine(T));
+
+    llvm::Constant *Dev =
+        llvm::ConstantStruct::get(DeviceImageTy, ImgBegin, ImgEnd,
+                                  HostEntriesBegin, HostEntriesEnd, nullptr);
+    DeviceImagesEntires.push_back(Dev);
+  }
+
+  // Create device images global array.
+  llvm::ArrayType *DeviceImagesInitTy =
+      llvm::ArrayType::get(DeviceImageTy, DeviceImagesEntires.size());
+  llvm::Constant *DeviceImagesInit =
+      llvm::ConstantArray::get(DeviceImagesInitTy, DeviceImagesEntires);
+
+  llvm::GlobalVariable *DeviceImages = new llvm::GlobalVariable(
+      M, DeviceImagesInitTy, /*isConstant=*/true,
+      llvm::GlobalValue::InternalLinkage, DeviceImagesInit,
+      ".omp_offloading.device_images");
+  DeviceImages->setUnnamedAddr(true);
+
+  // This is a Zero array to be used in the creation of the constant expressions
+  llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
+                             llvm::Constant::getNullValue(CGM.Int32Ty)};
+
+  // Create the target region descriptor.
+  auto *BinaryDescriptorTy = cast<llvm::StructType>(
+      CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy()));
+  llvm::Constant *TargetRegionsDescriptorInit = llvm::ConstantStruct::get(
+      BinaryDescriptorTy, llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
+      llvm::ConstantExpr::getGetElementPtr(DeviceImagesInitTy, DeviceImages,
+                                           Index),
+      HostEntriesBegin, HostEntriesEnd, nullptr);
+
+  auto *Desc = new llvm::GlobalVariable(
+      M, BinaryDescriptorTy, /*isConstant=*/true,
+      llvm::GlobalValue::InternalLinkage, TargetRegionsDescriptorInit,
+      ".omp_offloading.descriptor");
+
+  // Emit code to register or unregister the descriptor at execution
+  // startup or closing, respectively.
+
+  // Create a variable to drive the registration and unregistration of the
+  // descriptor, so we can reuse the logic that emits Ctors and Dtors.
+  auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var");
+  ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(),
+                                IdentInfo, C.CharTy);
+
+  auto *UnRegFn = createOffloadingBinaryDescriptorFunction(
+      CGM, ".omp_offloading.descriptor_unreg", [&](CodeGenFunction &CGF) {
+        CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
+                             Desc);
+      });
+  auto *RegFn = createOffloadingBinaryDescriptorFunction(
+      CGM, ".omp_offloading.descriptor_reg", [&](CodeGenFunction &CGF) {
+        CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib),
+                             Desc);
+        CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
+      });
+  return RegFn;
+}
+
+void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *Addr, StringRef Name,
+                                         uint64_t Size) {
+  auto *TgtOffloadEntryType = cast<llvm::StructType>(
+      CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()));
+  llvm::LLVMContext &C = CGM.getModule().getContext();
+  llvm::Module &M = CGM.getModule();
+
+  // Make sure the address has the right type.
+  llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(Addr, CGM.VoidPtrTy);
+
+  // Create constant string with the name.
+  llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
+
+  llvm::GlobalVariable *Str =
+      new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true,
+                               llvm::GlobalValue::InternalLinkage, StrPtrInit,
+                               ".omp_offloading.entry_name");
+  Str->setUnnamedAddr(true);
+  llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy);
+
+  // Create the entry struct.
+  llvm::Constant *EntryInit = llvm::ConstantStruct::get(
+      TgtOffloadEntryType, AddrPtr, StrPtr,
+      llvm::ConstantInt::get(CGM.SizeTy, Size), nullptr);
+  llvm::GlobalVariable *Entry = new llvm::GlobalVariable(
+      M, TgtOffloadEntryType, true, llvm::GlobalValue::ExternalLinkage,
+      EntryInit, ".omp_offloading.entry");
+
+  // The entry has to be created in the section the linker expects it to be.
+  Entry->setSection(".omp_offloading.entries");
+  // We can't have any padding between symbols, so we need to have 1-byte
+  // alignment.
+  Entry->setAlignment(1);
+  return;
+}
+
+void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
+  // Emit the offloading entries and metadata so that the device codegen side
+  // can
+  // easily figure out what to emit. The produced metadata looks like this:
+  //
+  // !omp_offload.info = !{!1, ...}
+  //
+  // Right now we only generate metadata for function that contain target
+  // regions.
+
+  // If we do not have entries, we dont need to do anything.
+  if (OffloadEntriesInfoManager.empty())
+    return;
+
+  llvm::Module &M = CGM.getModule();
+  llvm::LLVMContext &C = M.getContext();
+  SmallVector<OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
+      OrderedEntries(OffloadEntriesInfoManager.size());
+
+  // Create the offloading info metadata node.
+  llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
+
+  // Auxiliar methods to create metadata values and strings.
+  auto getMDInt = [&](unsigned v) {
+    return llvm::ConstantAsMetadata::get(
+        llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v));
+  };
+
+  auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); };
+
+  // Create function that emits metadata for each target region entry;
+  auto &&TargetRegionMetadataEmitter = [&](
+      unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line,
+      unsigned Column,
+      OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
+    llvm::SmallVector<llvm::Metadata *, 32> Ops;
+    // Generate metadata for target regions. Each entry of this metadata
+    // contains:
+    // - Entry 0 -> Kind of this type of metadata (0).
+    // - Entry 1 -> Device ID of the file where the entry was identified.
+    // - Entry 2 -> File ID of the file where the entry was identified.
+    // - Entry 3 -> Mangled name of the function where the entry was identified.
+    // - Entry 4 -> Line in the file where the entry was identified.
+    // - Entry 5 -> Column in the file where the entry was identified.
+    // - Entry 6 -> Order the entry was created.
+    // The first element of the metadata node is the kind.
+    Ops.push_back(getMDInt(E.getKind()));
+    Ops.push_back(getMDInt(DeviceID));
+    Ops.push_back(getMDInt(FileID));
+    Ops.push_back(getMDString(ParentName));
+    Ops.push_back(getMDInt(Line));
+    Ops.push_back(getMDInt(Column));
+    Ops.push_back(getMDInt(E.getOrder()));
+
+    // Save this entry in the right position of the ordered entries array.
+    OrderedEntries[E.getOrder()] = &E;
+
+    // Add metadata to the named metadata node.
+    MD->addOperand(llvm::MDNode::get(C, Ops));
+  };
+
+  OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
+      TargetRegionMetadataEmitter);
+
+  for (auto *E : OrderedEntries) {
+    assert(E && "All ordered entries must exist!");
+    if (auto *CE =
+            dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
+                E)) {
+      assert(CE->getID() && CE->getAddress() &&
+             "Entry ID and Addr are invalid!");
+      createOffloadEntry(CE->getID(), CE->getAddress()->getName(), /*Size=*/0);
+    } else
+      llvm_unreachable("Unsupported entry kind.");
+  }
+}
+
+/// \brief Loads all the offload entries information from the host IR
+/// metadata.
+void CGOpenMPRuntime::loadOffloadInfoMetadata() {
+  // If we are in target mode, load the metadata from the host IR. This code has
+  // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
+
+  if (!CGM.getLangOpts().OpenMPIsDevice)
+    return;
+
+  if (CGM.getLangOpts().OMPHostIRFile.empty())
+    return;
+
+  auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
+  if (Buf.getError())
+    return;
+
+  llvm::LLVMContext C;
+  auto ME = llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C);
+
+  if (ME.getError())
+    return;
+
+  llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
+  if (!MD)
+    return;
+
+  for (auto I : MD->operands()) {
+    llvm::MDNode *MN = cast<llvm::MDNode>(I);
+    unsigned Idx = 0;
+
+    auto getMDInt = [&]() {
+      llvm::ConstantAsMetadata *V =
+          cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx++));
+      return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
+    };
+
+    auto getMDString = [&]() {
+      llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx++));
+      return V->getString();
+    };
+
+    switch (getMDInt()) {
+    default:
+      llvm_unreachable("Unexpected metadata!");
+      break;
+    case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
+        OFFLOAD_ENTRY_INFO_TARGET_REGION:
+      OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
+          /*DeviceID=*/getMDInt(), /*FileID=*/getMDInt(),
+          /*ParentName=*/getMDString(), /*Line=*/getMDInt(),
+          /*Column=*/getMDInt(), /*Order=*/getMDInt());
+      break;
+    }
+  }
+}
+
 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
   if (!KmpRoutineEntryPtrTy) {
     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
@@ -1940,6 +2348,80 @@
   return Field;
 }
 
+QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
+
+  // Make sure the type of the entry is already created. This is the type we
+  // have to create:
+  // struct __tgt_offload_entry{
+  //   void      *addr;       // Pointer to the offload entry info.
+  //                          // (function or global)
+  //   char      *name;       // Name of the function or global.
+  //   size_t     size;       // Size of the entry info (0 if it a function).
+  // };
+  if (TgtOffloadEntryQTy.isNull()) {
+    ASTContext &C = CGM.getContext();
+    auto *RD = C.buildImplicitRecord("__tgt_offload_entry");
+    RD->startDefinition();
+    addFieldToRecordDecl(C, RD, C.VoidPtrTy);
+    addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
+    addFieldToRecordDecl(C, RD, C.getSizeType());
+    RD->completeDefinition();
+    TgtOffloadEntryQTy = C.getRecordType(RD);
+  }
+  return TgtOffloadEntryQTy;
+}
+
+QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
+  // These are the types we need to build:
+  // struct __tgt_device_image{
+  // void   *ImageStart;       // Pointer to the target code start.
+  // void   *ImageEnd;         // Pointer to the target code end.
+  // // We also add the host entries to the device image, as it may be useful
+  // // for the target runtime to have access to that information.
+  // __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all
+  //                                       // the entries.
+  // __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
+  //                                       // entries (non inclusive).
+  // };
+  if (TgtDeviceImageQTy.isNull()) {
+    ASTContext &C = CGM.getContext();
+    auto *RD = C.buildImplicitRecord("__tgt_device_image");
+    RD->startDefinition();
+    addFieldToRecordDecl(C, RD, C.VoidPtrTy);
+    addFieldToRecordDecl(C, RD, C.VoidPtrTy);
+    addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
+    addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
+    RD->completeDefinition();
+    TgtDeviceImageQTy = C.getRecordType(RD);
+  }
+  return TgtDeviceImageQTy;
+}
+
+QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
+  // struct __tgt_bin_desc{
+  //   int32_t              NumDevices;      // Number of devices supported.
+  //   __tgt_device_image   *DeviceImages;   // Arrays of device images
+  //                                         // (one per device).
+  //   __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all the
+  //                                         // entries.
+  //   __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
+  //                                         // entries (non inclusive).
+  // };
+  if (TgtBinaryDescriptorQTy.isNull()) {
+    ASTContext &C = CGM.getContext();
+    auto *RD = C.buildImplicitRecord("__tgt_bin_desc");
+    RD->startDefinition();
+    addFieldToRecordDecl(
+        C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
+    addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
+    addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
+    addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
+    RD->completeDefinition();
+    TgtBinaryDescriptorQTy = C.getRecordType(RD);
+  }
+  return TgtBinaryDescriptorQTy;
+}
+
 namespace {
 struct PrivateHelpersTy {
   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
@@ -3172,20 +3654,116 @@
   }
 }
 
-llvm::Value *
-CGOpenMPRuntime::emitTargetOutlinedFunction(const OMPExecutableDirective &D,
-                                            const RegionCodeGenTy &CodeGen) {
+/// \brief Obtain information that uniquely identifies a target entry. This
+/// consists of the file and device IDs as well as line and column numbers
+/// associated with the relevant entry source location.
+static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
+                                     unsigned &DeviceID, unsigned &FileID,
+                                     unsigned &LineNum, unsigned &ColumnNum) {
+
+  auto &SM = C.getSourceManager();
+
+  // The loc should be always valid and have a file ID (the user cannot use
+  // #pragma directives in macros)
+
+  assert(Loc.isValid() && "Source location is expected to be always valid.");
+  assert(Loc.isFileID() && "Source location is expected to refer to a file.");
+
+  PresumedLoc PLoc = SM.getPresumedLoc(Loc);
+  assert(PLoc.isValid() && "Source location is expected to be always valid.");
+
+  llvm::sys::fs::UniqueID ID;
+  if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
+    llvm_unreachable("Source file with target region no longer exists!");
+
+  DeviceID = ID.getDevice();
+  FileID = ID.getFile();
+  LineNum = PLoc.getLine();
+  ColumnNum = PLoc.getColumn();
+  return;
+}
+
+void CGOpenMPRuntime::emitTargetOutlinedFunction(
+    const OMPExecutableDirective &D, StringRef ParentName,
+    llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
+    bool IsOffloadEntry) {
+
+  assert(!ParentName.empty() && "Invalid target region parent name!");
+
   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
 
+  // Emit target region as a standalone region.
+  auto &&CodeGen = [&CS](CodeGenFunction &CGF) {
+    CGF.EmitStmt(CS.getCapturedStmt());
+  };
+
+  // Create a unique name for the proxy/entry function that using the source
+  // location information of the current target region. The name will be
+  // something like:
+  //
+  // .omp_offloading.DD_FFFF.PP.lBB.cCC
+  //
+  // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
+  // mangled name of the function that encloses the target region, BB is the
+  // line number of the target region, and CC is the column number of the target
+  // region.
+
+  unsigned DeviceID;
+  unsigned FileID;
+  unsigned Line;
+  unsigned Column;
+  getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID,
+                           Line, Column);
+  SmallString<64> EntryFnName;
+  {
+    llvm::raw_svector_ostream OS(EntryFnName);
+    OS << ".omp_offloading" << llvm::format(".%llx", DeviceID)
+       << llvm::format(".%llx.", FileID) << ParentName << ".l" << Line << ".c"
+       << Column;
+  }
+
   CodeGenFunction CGF(CGM, true);
-  CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen);
+  CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
-  return CGF.GenerateOpenMPCapturedStmtFunction(CS, /*UseOnlyReferences=*/true);
+
+  OutlinedFn =
+      CGF.GenerateOpenMPCapturedStmtFunction(CS, /*UseOnlyReferences=*/true);
+
+  // If this target outline function is not an offload entry, we don't need to
+  // register it.
+  if (!IsOffloadEntry)
+    return;
+
+  // The target region ID is used by the runtime library to identify the current
+  // target region, so it only has to be unique and not necessarily point to
+  // anything. It could be the pointer to the outlined function that implements
+  // the target region, but we aren't using that so that the compiler doesn't
+  // need to keep that, and could therefore inline the host function if proven
+  // worthwhile during optimization. In the other hand, if emitting code for the
+  // device, the ID has to be the function address so that it can retrieved from
+  // the offloading entry and launched by the runtime library. We also mark the
+  // outlined function to have external linkage in case we are emitting code for
+  // the device, because these functions will be entry points to the device.
+
+  if (CGM.getLangOpts().OpenMPIsDevice) {
+    OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
+    OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage);
+  } else
+    OutlinedFnID = new llvm::GlobalVariable(
+        CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
+        llvm::GlobalValue::PrivateLinkage,
+        llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id");
+
+  // Register the information for the entry associated with this target region.
+  OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
+      DeviceID, FileID, ParentName, Line, Column, OutlinedFn, OutlinedFnID);
+  return;
 }
 
 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
                                      const OMPExecutableDirective &D,
                                      llvm::Value *OutlinedFn,
+                                     llvm::Value *OutlinedFnID,
                                      const Expr *IfCond, const Expr *Device,
                                      ArrayRef<llvm::Value *> CapturedVars) {
   /// \brief Values for bit flags used to specify the mapping type for
@@ -3203,6 +3781,8 @@
     OMP_DEVICEID_UNDEF = -1,
   };
 
+  assert(OutlinedFn && "Invalid outlined function!");
+
   // Fill up the arrays with the all the captured variables.
   SmallVector<llvm::Value *, 16> BasePointers;
   SmallVector<llvm::Value *, 16> Pointers;
@@ -3265,7 +3845,7 @@
 
   // Fill up the pointer arrays and transfer execution to the device.
   auto &&ThenGen = [this, &BasePointers, &Pointers, &Sizes, &MapTypes,
-                    hasVLACaptures, Device, OffloadError,
+                    hasVLACaptures, Device, OutlinedFnID, OffloadError,
                     OffloadErrorQType](CodeGenFunction &CGF) {
     unsigned PointerNumVal = BasePointers.size();
     llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal);
@@ -3384,10 +3964,8 @@
     // compiler doesn't need to keep that, and could therefore inline the host
     // function if proven worthwhile during optimization.
 
-    llvm::Value *HostPtr = new llvm::GlobalVariable(
-        CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
-        llvm::GlobalValue::PrivateLinkage,
-        llvm::Constant::getNullValue(CGM.Int8Ty), ".offload_hstptr");
+    // From this point on, we need to have an ID of the target region defined.
+    assert(OutlinedFnID && "Invalid outlined function ID!");
 
     // Emit device ID if any.
     llvm::Value *DeviceID;
@@ -3398,25 +3976,35 @@
       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
 
     llvm::Value *OffloadingArgs[] = {
-        DeviceID,      HostPtr,    PointerNum,   BasePointersArray,
-        PointersArray, SizesArray, MapTypesArray};
+        DeviceID,      OutlinedFnID, PointerNum,   BasePointersArray,
+        PointersArray, SizesArray,   MapTypesArray};
     auto Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target),
                                       OffloadingArgs);
 
     CGF.EmitStoreOfScalar(Return, OffloadError);
   };
 
-  if (IfCond) {
-    // Notify that the host version must be executed.
-    auto &&ElseGen = [this, OffloadError,
-                      OffloadErrorQType](CodeGenFunction &CGF) {
-      CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/-1u),
-                            OffloadError);
-    };
-    emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
+  // Notify that the host version must be executed.
+  auto &&ElseGen = [this, OffloadError,
+                    OffloadErrorQType](CodeGenFunction &CGF) {
+    CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/-1u),
+                          OffloadError);
+  };
+
+  // If we have a target function ID it means that we need to support
+  // offloading, otherwise, just execute on the host. We need to execute on host
+  // regardless of the conditional in the if clause if, e.g., the user do not
+  // specify target triples.
+  if (OutlinedFnID) {
+    if (IfCond) {
+      emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
+    } else {
+      CodeGenFunction::RunCleanupsScope Scope(CGF);
+      ThenGen(CGF);
+    }
   } else {
     CodeGenFunction::RunCleanupsScope Scope(CGF);
-    ThenGen(CGF);
+    ElseGen(CGF);
   }
 
   // Check the error code and execute the host version if required.
@@ -3433,3 +4021,120 @@
   CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
   return;
 }
+
+void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
+                                                    StringRef ParentName) {
+  if (!S)
+    return;
+
+  // If we find a OMP target directive, codegen the outline function and
+  // register the result.
+  // FIXME: Add other directives with target when they become supported.
+  bool isTargetDirective = isa<OMPTargetDirective>(S);
+
+  if (isTargetDirective) {
+    auto *E = cast<OMPExecutableDirective>(S);
+    unsigned DeviceID;
+    unsigned FileID;
+    unsigned Line;
+    unsigned Column;
+    getTargetEntryUniqueInfo(CGM.getContext(), E->getLocStart(), DeviceID,
+                             FileID, Line, Column);
+
+    // Is this a target region that should not be emitted as an entry point? If
+    // so just signal we are done with this target region.
+    if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(
+            DeviceID, FileID, ParentName, Line, Column))
+      return;
+
+    llvm::Function *Fn;
+    llvm::Constant *Addr;
+    emitTargetOutlinedFunction(*E, ParentName, Fn, Addr,
+                               /*isOffloadEntry=*/true);
+    assert(Fn && Addr && "Target region emission failed.");
+    return;
+  }
+
+  if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) {
+    if (!E->getAssociatedStmt())
+      return;
+
+    scanForTargetRegionsFunctions(
+        cast<CapturedStmt>(E->getAssociatedStmt())->getCapturedStmt(),
+        ParentName);
+    return;
+  }
+
+  // If this is a lambda function, look into its body.
+  if (auto *L = dyn_cast<LambdaExpr>(S))
+    S = L->getBody();
+
+  // Keep looking for target regions recursively.
+  for (auto *II : S->children())
+    scanForTargetRegionsFunctions(II, ParentName);
+
+  return;
+}
+
+bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
+  auto &FD = *cast<FunctionDecl>(GD.getDecl());
+
+  // If emitting code for the host, we do not process FD here. Instead we do
+  // the normal code generation.
+  if (!CGM.getLangOpts().OpenMPIsDevice)
+    return false;
+
+  // Try to detect target regions in the function.
+  scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD));
+
+  // We should not emit any function othen that the ones created during the
+  // scanning. Therefore, we signal that this function is completely dealt
+  // with.
+  return true;
+}
+
+bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
+  if (!CGM.getLangOpts().OpenMPIsDevice)
+    return false;
+
+  // Check if there are Ctors/Dtors in this declaration and look for target
+  // regions in it. We use the complete variant to produce the kernel name
+  // mangling.
+  QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
+  if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
+    for (auto *Ctor : RD->ctors()) {
+      StringRef ParentName =
+          CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
+      scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
+    }
+    auto *Dtor = RD->getDestructor();
+    if (Dtor) {
+      StringRef ParentName =
+          CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
+      scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
+    }
+  }
+
+  // If we are in target mode we do not emit any global (declare target is not
+  // implemented yet). Therefore we signal that GD was processed in this case.
+  return true;
+}
+
+bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
+  auto *VD = GD.getDecl();
+  if (isa<FunctionDecl>(VD))
+    return emitTargetFunctions(GD);
+
+  return emitTargetGlobalVariable(GD);
+}
+
+llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
+  // If we have offloading in the current module, we need to emit the entries
+  // now and register the offloading descriptor.
+  createOffloadEntriesAndInfoMetadata();
+
+  // Create and register the offloading binary descriptors. This is the main
+  // entity that captures all the information about offloading in the current
+  // compilation unit.
+  return createOffloadingBinaryDescriptorRegistration();
+}
Index: include/clang/Driver/Options.td
===================================================================
--- include/clang/Driver/Options.td
+++ include/clang/Driver/Options.td
@@ -1614,6 +1614,8 @@
 def object : Flag<["-"], "object">;
 def o : JoinedOrSeparate<["-"], "o">, Flags<[DriverOption, RenderAsInput, CC1Option, CC1AsOption]>,
   HelpText<"Write output to <file>">, MetaVarName<"<file>">;
+def omptargets_EQ : CommaJoined<["-"], "omptargets=">, Flags<[DriverOption, CC1Option]>,
+  HelpText<"Specify comma-separated list of triples OpenMP offloading targets to be supported">;
 def pagezero__size : JoinedOrSeparate<["-"], "pagezero_size">;
 def pass_exit_codes : Flag<["-", "--"], "pass-exit-codes">, Flags<[Unsupported]>;
 def pedantic_errors : Flag<["-", "--"], "pedantic-errors">, Group<pedantic_Group>, Flags<[CC1Option]>;
Index: include/clang/Driver/CC1Options.td
===================================================================
--- include/clang/Driver/CC1Options.td
+++ include/clang/Driver/CC1Options.td
@@ -673,6 +673,15 @@
 def fcuda_target_overloads : Flag<["-"], "fcuda-target-overloads">,
   HelpText<"Enable function overloads based on CUDA target attributes.">;
 
+//===----------------------------------------------------------------------===//
+// OpenMP Options
+//===----------------------------------------------------------------------===//
+
+def fopenmp_is_device : Flag<["-"], "fopenmp-is-device">,
+  HelpText<"Generate code only for an OpenMP target device.">;
+def omp_host_ir_file_path : Separate<["-"], "omp-host-ir-file-path">,
+  HelpText<"Path to the IR file produced by the frontend for the host.">;
+  
 } // let Flags = [CC1Option]
 
 
Index: include/clang/Basic/LangOptions.h
===================================================================
--- include/clang/Basic/LangOptions.h
+++ include/clang/Basic/LangOptions.h
@@ -108,7 +108,15 @@
 
   /// \brief Options for parsing comments.
   CommentOptions CommentOpts;
-  
+
+  /// \brief Triples of the OpenMP targets that the host code codegen should
+  /// take into account in order to generate accurate offloading descriptors.
+  std::vector<llvm::Triple> OMPTargetTriples;
+
+  /// \brief Name of the IR file that contains the result of the OpenMP target
+  /// host code generation.
+  std::string OMPHostIRFile;
+
   LangOptions();
 
   // Define accessors/mutators for language options of enumeration type.
Index: include/clang/Basic/LangOptions.def
===================================================================
--- include/clang/Basic/LangOptions.def
+++ include/clang/Basic/LangOptions.def
@@ -165,6 +165,8 @@
 LANGOPT(CUDA              , 1, 0, "CUDA")
 LANGOPT(OpenMP            , 1, 0, "OpenMP support")
 LANGOPT(OpenMPUseTLS      , 1, 0, "Use TLS for threadprivates or runtime calls")
+LANGOPT(OpenMPIsDevice    , 1, 0, "Generate code only for OpenMP target device")
+
 LANGOPT(CUDAIsDevice      , 1, 0, "Compiling for CUDA device")
 LANGOPT(CUDAAllowHostCallsFromHostDevice, 1, 0, "Allow host device functions to call host functions")
 LANGOPT(CUDADisableTargetCallChecks, 1, 0, "Disable checks for call targets (host, device, etc.)")
Index: include/clang/Basic/DiagnosticDriverKinds.td
===================================================================
--- include/clang/Basic/DiagnosticDriverKinds.td
+++ include/clang/Basic/DiagnosticDriverKinds.td
@@ -123,6 +123,9 @@
 def err_drv_optimization_remark_pattern : Error<
   "%0 in '%1'">;
 def err_drv_no_neon_modifier : Error<"[no]neon is not accepted as modifier, please use [no]simd instead">;
+def err_drv_invalid_omp_target : Error<"OpenMP target is invalid: '%0'">;
+def err_drv_omp_host_ir_file_not_found : Error<
+  "The provided host compiler IR file '%0' is required to generate code for OpenMP target regions but cannot be found.">;
 
 def warn_O4_is_O3 : Warning<"-O4 is equivalent to -O3">, InGroup<Deprecated>;
 def warn_drv_lto_libpath : Warning<"libLTO.dylib relative to clang installed dir not found; using 'ld' default search path instead">,
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to