jdoerfert updated this revision to Diff 277635.
jdoerfert added a comment.

Rebase


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82822/new/

https://reviews.llvm.org/D82822

Files:
  clang/lib/CodeGen/CGOpenMPRuntime.cpp
  clang/test/OpenMP/cancel_codegen.cpp
  clang/test/OpenMP/irbuilder_nested_parallel_for.c
  clang/test/OpenMP/task_codegen.cpp

Index: clang/test/OpenMP/task_codegen.cpp
===================================================================
--- clang/test/OpenMP/task_codegen.cpp
+++ clang/test/OpenMP/task_codegen.cpp
@@ -33,12 +33,11 @@
   char b;
   S s[2];
   int arr[10][a];
-// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T]]* @{{.+}})
 // CHECK: [[B_REF:%.+]] = getelementptr inbounds [[STRUCT_SHAREDS]], [[STRUCT_SHAREDS]]* [[CAPTURES:%.+]], i32 0, i32 0
 // CHECK: store i8* [[B]], i8** [[B_REF]]
 // CHECK: [[S_REF:%.+]] = getelementptr inbounds [[STRUCT_SHAREDS]], [[STRUCT_SHAREDS]]* [[CAPTURES]], i32 0, i32 1
 // CHECK: store [2 x [[STRUCT_S]]]* [[S]], [2 x [[STRUCT_S]]]** [[S_REF]]
-// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 33, i64 40, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY1:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 33, i64 40, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY1:@.+]] to i32 (i32, i8*)*))
 // CHECK: [[SHAREDS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]], [[KMP_TASK_T]]* [[TASK_PTR:%.+]], i32 0, i32 0
 // CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_PTR]]
 // CHECK: [[BITCAST:%.+]] = bitcast [[STRUCT_SHAREDS]]* [[CAPTURES]] to i8*
@@ -46,7 +45,7 @@
 // CHECK: [[PRIORITY_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]], [[KMP_TASK_T]]* [[TASK_PTR]], i32 0, i32 4
 // CHECK: [[PRIORITY:%.+]] = bitcast %union{{.+}}* [[PRIORITY_REF_PTR]] to i32*
 // CHECK: store i32 {{.+}}, i32* [[PRIORITY]]
-// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
+// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]])
 #pragma omp task shared(a, b, s) priority(b)
   {
     a = 15;
@@ -55,7 +54,7 @@
   }
 // CHECK: [[S_REF:%.+]] = getelementptr inbounds [[STRUCT_SHAREDS1]], [[STRUCT_SHAREDS1]]* [[CAPTURES:%.+]], i32 0, i32 0
 // CHECK: store [2 x [[STRUCT_S]]]* [[S]], [2 x [[STRUCT_S]]]** [[S_REF]]
-// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{[^,]+}}, i32 [[GTID]], i32 1, i64 40, i64 8,
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{[^,]+}}, i32 {{%.*}}, i32 1, i64 40, i64 8,
 // CHECK: [[SHAREDS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]], [[KMP_TASK_T]]* [[TASK_PTR:%.+]], i32 0, i32 0
 // CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_PTR]]
 // CHECK: [[BITCAST:%.+]] = bitcast [[STRUCT_SHAREDS1]]* [[CAPTURES]] to i8*
@@ -101,20 +100,20 @@
 // CHECK: [[T0:%.*]] = getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 2
 // CHECK: store i8 1, i8* [[T0]]
 // CHECK: bitcast [[KMP_DEPEND_INFO]]* [[DEP_BASE]] to i8*
-// CHECK: call i32 @__kmpc_omp_task_with_deps([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]], i32 4, i8* %{{[^,]+}}, i32 0, i8* null)
+// CHECK: call i32 @__kmpc_omp_task_with_deps([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]], i32 4, i8* %{{[^,]+}}, i32 0, i8* null)
 #pragma omp task shared(a, s) depend(in : a, b, s, arr[:])
   {
     a = 15;
     s[1].a = 10;
   }
-// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 0, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
-// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 0, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
+// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]])
 #pragma omp task untied
   {
 #pragma omp critical
     a = 1;
   }
-// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 0, i64 40, i64 1,
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 0, i64 40, i64 1,
 // CHECK: getelementptr inbounds [2 x [[STRUCT_S]]], [2 x [[STRUCT_S]]]* [[S]], i64 0, i64 0
 // CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 0
 // CHECK: ptrtoint [[STRUCT_S]]* %{{.+}} to i64
@@ -146,12 +145,12 @@
 // CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 2
 // CHECK: store i8 3, i8*
 // CHECK: bitcast [[KMP_DEPEND_INFO]]* %{{.+}} to i8*
-// CHECK: call i32 @__kmpc_omp_task_with_deps([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]], i32 2, i8* %{{[^,]+}}, i32 0, i8* null)
+// CHECK: call i32 @__kmpc_omp_task_with_deps([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]], i32 2, i8* %{{[^,]+}}, i32 0, i8* null)
 #pragma omp task untied depend(out : s[0], arr[4:][b])
   {
     a = 1;
   }
-// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 0, i64 40, i64 1,
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 0, i64 40, i64 1,
 // CHECK: getelementptr inbounds [2 x [[STRUCT_S]]], [2 x [[STRUCT_S]]]* [[S]], i64 0, i64 0
 // CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 0
 // CHECK: ptrtoint [[STRUCT_S]]* %{{.+}} to i64
@@ -183,12 +182,12 @@
 // CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 2
 // CHECK: store i8 4, i8*
 // CHECK: bitcast [[KMP_DEPEND_INFO]]* %{{.+}} to i8*
-// CHECK: call i32 @__kmpc_omp_task_with_deps([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]], i32 2, i8* %{{[^,]+}}, i32 0, i8* null)
+// CHECK: call i32 @__kmpc_omp_task_with_deps([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]], i32 2, i8* %{{[^,]+}}, i32 0, i8* null)
 #pragma omp task untied depend(mutexinoutset: s[0], arr[4:][b])
   {
     a = 1;
   }
-// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 3, i64 40, i64 1,
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 3, i64 40, i64 1,
 // CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 0
 // CHECK: store i64 ptrtoint (i32* @{{.+}} to i64), i64*
 // CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 1
@@ -229,38 +228,38 @@
 // CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 2
 // CHECK: store i8 3, i8*
 // CHECK: bitcast [[KMP_DEPEND_INFO]]* %{{.+}} to i8*
-// CHECK: call i32 @__kmpc_omp_task_with_deps([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]], i32 3, i8* %{{[^,]+}}, i32 0, i8* null)
+// CHECK: call i32 @__kmpc_omp_task_with_deps([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]], i32 3, i8* %{{[^,]+}}, i32 0, i8* null)
 #pragma omp task final(true) depend(inout: a, s[1], arr[:a][3:])
   {
     a = 2;
   }
-// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 3, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY3:@.+]] to i32 (i32, i8*)*))
-// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 3, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY3:@.+]] to i32 (i32, i8*)*))
+// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]])
 #pragma omp task final(true)
   {
     a = 2;
   }
-// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY4:@.+]] to i32 (i32, i8*)*))
-// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
+  // CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY4:@.+]] to i32 (i32, i8*)*))
+  // CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]])
   const bool flag = false;
 #pragma omp task final(flag)
   {
     a = 3;
   }
-// CHECK: [[B_VAL:%.+]] = load i8, i8* [[B]]
-// CHECK: [[CMP:%.+]] = icmp ne i8 [[B_VAL]], 0
-// CHECK: [[FINAL:%.+]] = select i1 [[CMP]], i32 2, i32 0
-// CHECK: [[FLAGS:%.+]] = or i32 [[FINAL]], 1
-// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 [[FLAGS]], i64 40, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY5:@.+]] to i32 (i32, i8*)*))
-// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
+  // CHECK: [[B_VAL:%.+]] = load i8, i8* [[B]]
+  // CHECK: [[CMP:%.+]] = icmp ne i8 [[B_VAL]], 0
+  // CHECK: [[FINAL:%.+]] = select i1 [[CMP]], i32 2, i32 0
+  // CHECK: [[FLAGS:%.+]] = or i32 [[FINAL]], 1
+  // CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 [[FLAGS]], i64 40, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY5:@.+]] to i32 (i32, i8*)*))
+  // CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]])
   int c __attribute__((aligned(128)));
 #pragma omp task final(b) shared(c)
   {
     a = 4;
     c = 5;
   }
-// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 0, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY6:@.+]] to i32 (i32, i8*)*))
-// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 0, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY6:@.+]] to i32 (i32, i8*)*))
+// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]])
 #pragma omp task untied
   {
     S s1;
Index: clang/test/OpenMP/irbuilder_nested_parallel_for.c
===================================================================
--- /dev/null
+++ clang/test/OpenMP/irbuilder_nested_parallel_for.c
@@ -0,0 +1,299 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefixes=CHECK %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -verify %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK-DEBUG %s
+
+// expected-no-diagnostics
+
+// TODO: Teach the update script to check new functions too.
+
+#ifndef HEADER
+#define HEADER
+
+// CHECK-LABEL: @_Z14parallel_for_0v(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
+// CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+// CHECK:       omp_parallel:
+// CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @_Z14parallel_for_0v..omp_par to void (i32*, i32*, ...)*))
+// CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+// CHECK:       omp.par.outlined.exit:
+// CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+// CHECK:       omp.par.exit.split:
+// CHECK-NEXT:    ret void
+//
+// CHECK-DEBUG-LABEL: @_Z14parallel_for_0v(
+// CHECK-DEBUG-NEXT:  entry:
+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    br label [[OMP_PARALLEL:%.*]]
+// CHECK-DEBUG:       omp_parallel:
+// CHECK-DEBUG-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @_Z14parallel_for_0v..omp_par to void (i32*, i32*, ...)*)), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+// CHECK-DEBUG:       omp.par.outlined.exit:
+// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+// CHECK-DEBUG:       omp.par.exit.split:
+// CHECK-DEBUG-NEXT:    ret void, !dbg !{{[0-9]*}}
+//
+void parallel_for_0(void) {
+#pragma omp parallel
+  {
+#pragma omp for
+    for (int i = 0; i < 100; ++i) {
+    }
+  }
+}
+
+// CHECK-LABEL: @_Z14parallel_for_1Pfid(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[R_ADDR:%.*]] = alloca float*, align 8
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca double, align 8
+// CHECK-NEXT:    store float* [[R:%.*]], float** [[R_ADDR]], align 8
+// CHECK-NEXT:    store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
+// CHECK-NEXT:    store double [[B:%.*]], double* [[B_ADDR]], align 8
+// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
+// CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+// CHECK:       omp_parallel:
+// CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z14parallel_for_1Pfid..omp_par.1 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]])
+// CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT19:%.*]]
+// CHECK:       omp.par.outlined.exit19:
+// CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+// CHECK:       omp.par.exit.split:
+// CHECK-NEXT:    ret void
+//
+// CHECK-DEBUG-LABEL: @_Z14parallel_for_1Pfid(
+// CHECK-DEBUG-NEXT:  entry:
+// CHECK-DEBUG-NEXT:    [[R_ADDR:%.*]] = alloca float*, align 8
+// CHECK-DEBUG-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-DEBUG-NEXT:    [[B_ADDR:%.*]] = alloca double, align 8
+// CHECK-DEBUG-NEXT:    store float* [[R:%.*]], float** [[R_ADDR]], align 8
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata !{{[0-9]*}}, metadata !DIExpression()), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata !{{[0-9]*}}, metadata !DIExpression()), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    store double [[B:%.*]], double* [[B_ADDR]], align 8
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata !{{[0-9]*}}, metadata !DIExpression()), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @12), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    br label [[OMP_PARALLEL:%.*]]
+// CHECK-DEBUG:       omp_parallel:
+// CHECK-DEBUG-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @12, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z14parallel_for_1Pfid..omp_par.1 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]]), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT19:%.*]]
+// CHECK-DEBUG:       omp.par.outlined.exit19:
+// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+// CHECK-DEBUG:       omp.par.exit.split:
+// CHECK-DEBUG-NEXT:    ret void, !dbg !{{[0-9]*}}
+//
+void parallel_for_1(float *r, int a, double b) {
+#pragma omp parallel
+  {
+#pragma omp parallel
+    {
+#pragma omp for
+      for (int i = 0; i < 100; ++i) {
+        *r = a + b;
+      }
+    }
+  }
+}
+
+// CHECK-LABEL: @_Z14parallel_for_2Pfid(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[R_ADDR:%.*]] = alloca float*, align 8
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca double, align 8
+// CHECK-NEXT:    [[DOTOMP_IV212:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[TMP213:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTOMP_LB214:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTOMP_UB215:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTOMP_STRIDE216:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTOMP_IS_LAST217:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[I218:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store float* [[R:%.*]], float** [[R_ADDR]], align 8
+// CHECK-NEXT:    store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
+// CHECK-NEXT:    store double [[B:%.*]], double* [[B_ADDR]], align 8
+// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
+// CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+// CHECK:       omp_parallel:
+// CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z14parallel_for_2Pfid..omp_par.4 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]])
+// CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT211:%.*]]
+// CHECK:       omp.par.outlined.exit211:
+// CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+// CHECK:       omp.par.exit.split:
+// CHECK-NEXT:    store i32 0, i32* [[DOTOMP_LB214]], align 4
+// CHECK-NEXT:    store i32 99, i32* [[DOTOMP_UB215]], align 4
+// CHECK-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE216]], align 4
+// CHECK-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST217]], align 4
+// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM219:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @41)
+// CHECK-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @2, i32 [[OMP_GLOBAL_THREAD_NUM219]], i32 34, i32* [[DOTOMP_IS_LAST217]], i32* [[DOTOMP_LB214]], i32* [[DOTOMP_UB215]], i32* [[DOTOMP_STRIDE216]], i32 1, i32 1)
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[DOTOMP_UB215]], align 4
+// CHECK-NEXT:    [[CMP220:%.*]] = icmp sgt i32 [[TMP0]], 99
+// CHECK-NEXT:    br i1 [[CMP220]], label [[COND_TRUE221:%.*]], label [[COND_FALSE222:%.*]]
+// CHECK:       cond.true221:
+// CHECK-NEXT:    br label [[COND_END223:%.*]]
+// CHECK:       cond.false222:
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTOMP_UB215]], align 4
+// CHECK-NEXT:    br label [[COND_END223]]
+// CHECK:       cond.end223:
+// CHECK-NEXT:    [[COND224:%.*]] = phi i32 [ 99, [[COND_TRUE221]] ], [ [[TMP1]], [[COND_FALSE222]] ]
+// CHECK-NEXT:    store i32 [[COND224]], i32* [[DOTOMP_UB215]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB214]], align 4
+// CHECK-NEXT:    store i32 [[TMP2]], i32* [[DOTOMP_IV212]], align 4
+// CHECK-NEXT:    br label [[OMP_INNER_FOR_COND225:%.*]]
+// CHECK:       omp.inner.for.cond225:
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV212]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB215]], align 4
+// CHECK-NEXT:    [[CMP226:%.*]] = icmp sle i32 [[TMP3]], [[TMP4]]
+// CHECK-NEXT:    br i1 [[CMP226]], label [[OMP_INNER_FOR_BODY227:%.*]], label [[OMP_INNER_FOR_END236:%.*]]
+// CHECK:       omp.inner.for.body227:
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV212]], align 4
+// CHECK-NEXT:    [[MUL228:%.*]] = mul nsw i32 [[TMP5]], 1
+// CHECK-NEXT:    [[ADD229:%.*]] = add nsw i32 0, [[MUL228]]
+// CHECK-NEXT:    store i32 [[ADD229]], i32* [[I218]], align 4
+// CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// CHECK-NEXT:    [[CONV230:%.*]] = sitofp i32 [[TMP6]] to double
+// CHECK-NEXT:    [[TMP7:%.*]] = load double, double* [[B_ADDR]], align 8
+// CHECK-NEXT:    [[ADD231:%.*]] = fadd double [[CONV230]], [[TMP7]]
+// CHECK-NEXT:    [[CONV232:%.*]] = fptrunc double [[ADD231]] to float
+// CHECK-NEXT:    [[TMP8:%.*]] = load float*, float** [[R_ADDR]], align 8
+// CHECK-NEXT:    store float [[CONV232]], float* [[TMP8]], align 4
+// CHECK-NEXT:    br label [[OMP_BODY_CONTINUE233:%.*]]
+// CHECK:       omp.body.continue233:
+// CHECK-NEXT:    br label [[OMP_INNER_FOR_INC234:%.*]]
+// CHECK:       omp.inner.for.inc234:
+// CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV212]], align 4
+// CHECK-NEXT:    [[ADD235:%.*]] = add nsw i32 [[TMP9]], 1
+// CHECK-NEXT:    store i32 [[ADD235]], i32* [[DOTOMP_IV212]], align 4
+// CHECK-NEXT:    br label [[OMP_INNER_FOR_COND225]]
+// CHECK:       omp.inner.for.end236:
+// CHECK-NEXT:    br label [[OMP_LOOP_EXIT237:%.*]]
+// CHECK:       omp.loop.exit237:
+// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM238:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @43)
+// CHECK-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @2, i32 [[OMP_GLOBAL_THREAD_NUM238]])
+// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM239:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
+// CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @7, i32 [[OMP_GLOBAL_THREAD_NUM239]])
+// CHECK-NEXT:    ret void
+//
+// CHECK-DEBUG-LABEL: @_Z14parallel_for_2Pfid(
+// CHECK-DEBUG-NEXT:  entry:
+// CHECK-DEBUG-NEXT:    [[R_ADDR:%.*]] = alloca float*, align 8
+// CHECK-DEBUG-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-DEBUG-NEXT:    [[B_ADDR:%.*]] = alloca double, align 8
+// CHECK-DEBUG-NEXT:    [[DOTOMP_IV212:%.*]] = alloca i32, align 4
+// CHECK-DEBUG-NEXT:    [[TMP213:%.*]] = alloca i32, align 4
+// CHECK-DEBUG-NEXT:    [[DOTOMP_LB214:%.*]] = alloca i32, align 4
+// CHECK-DEBUG-NEXT:    [[DOTOMP_UB215:%.*]] = alloca i32, align 4
+// CHECK-DEBUG-NEXT:    [[DOTOMP_STRIDE216:%.*]] = alloca i32, align 4
+// CHECK-DEBUG-NEXT:    [[DOTOMP_IS_LAST217:%.*]] = alloca i32, align 4
+// CHECK-DEBUG-NEXT:    [[I218:%.*]] = alloca i32, align 4
+// CHECK-DEBUG-NEXT:    store float* [[R:%.*]], float** [[R_ADDR]], align 8
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata !{{[0-9]*}}, metadata !DIExpression()), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata !{{[0-9]*}}, metadata !DIExpression()), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    store double [[B:%.*]], double* [[B_ADDR]], align 8
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata !{{[0-9]*}}, metadata !DIExpression()), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @25), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    br label [[OMP_PARALLEL:%.*]]
+// CHECK-DEBUG:       omp_parallel:
+// CHECK-DEBUG-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @25, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z14parallel_for_2Pfid..omp_par.4 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]]), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT211:%.*]]
+// CHECK-DEBUG:       omp.par.outlined.exit211:
+// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+// CHECK-DEBUG:       omp.par.exit.split:
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata i32* [[DOTOMP_IV212]], metadata !{{[0-9]*}}, metadata !DIExpression()), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata i32* [[DOTOMP_LB214]], metadata !{{[0-9]*}}, metadata !DIExpression()), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    store i32 0, i32* [[DOTOMP_LB214]], align 4, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata i32* [[DOTOMP_UB215]], metadata !{{[0-9]*}}, metadata !DIExpression()), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    store i32 99, i32* [[DOTOMP_UB215]], align 4, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata i32* [[DOTOMP_STRIDE216]], metadata !{{[0-9]*}}, metadata !DIExpression()), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE216]], align 4, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata i32* [[DOTOMP_IS_LAST217]], metadata !{{[0-9]*}}, metadata !DIExpression()), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST217]], align 4, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata i32* [[I218]], metadata !{{[0-9]*}}, metadata !DIExpression()), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM219:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @97)
+// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @96, i32 [[OMP_GLOBAL_THREAD_NUM219]], i32 34, i32* [[DOTOMP_IS_LAST217]], i32* [[DOTOMP_LB214]], i32* [[DOTOMP_UB215]], i32* [[DOTOMP_STRIDE216]], i32 1, i32 1), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    [[TMP0:%.*]] = load i32, i32* [[DOTOMP_UB215]], align 4, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    [[CMP220:%.*]] = icmp sgt i32 [[TMP0]], 99, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    br i1 [[CMP220]], label [[COND_TRUE221:%.*]], label [[COND_FALSE222:%.*]], !dbg !{{[0-9]*}}
+// CHECK-DEBUG:       cond.true221:
+// CHECK-DEBUG-NEXT:    br label [[COND_END223:%.*]], !dbg !{{[0-9]*}}
+// CHECK-DEBUG:       cond.false222:
+// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTOMP_UB215]], align 4, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    br label [[COND_END223]], !dbg !{{[0-9]*}}
+// CHECK-DEBUG:       cond.end223:
+// CHECK-DEBUG-NEXT:    [[COND224:%.*]] = phi i32 [ 99, [[COND_TRUE221]] ], [ [[TMP1]], [[COND_FALSE222]] ], !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    store i32 [[COND224]], i32* [[DOTOMP_UB215]], align 4, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB214]], align 4, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    store i32 [[TMP2]], i32* [[DOTOMP_IV212]], align 4, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    br label [[OMP_INNER_FOR_COND225:%.*]], !dbg !{{[0-9]*}}
+// CHECK-DEBUG:       omp.inner.for.cond225:
+// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV212]], align 4, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB215]], align 4, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    [[CMP226:%.*]] = icmp sle i32 [[TMP3]], [[TMP4]], !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    br i1 [[CMP226]], label [[OMP_INNER_FOR_BODY227:%.*]], label [[OMP_INNER_FOR_END236:%.*]], !dbg !{{[0-9]*}}
+// CHECK-DEBUG:       omp.inner.for.body227:
+// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV212]], align 4, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    [[MUL228:%.*]] = mul nsw i32 [[TMP5]], 1, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    [[ADD229:%.*]] = add nsw i32 0, [[MUL228]], !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    store i32 [[ADD229]], i32* [[I218]], align 4, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    [[CONV230:%.*]] = sitofp i32 [[TMP6]] to double, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load double, double* [[B_ADDR]], align 8, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    [[ADD231:%.*]] = fadd double [[CONV230]], [[TMP7]], !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    [[CONV232:%.*]] = fptrunc double [[ADD231]] to float, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load float*, float** [[R_ADDR]], align 8, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    store float [[CONV232]], float* [[TMP8]], align 4, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    br label [[OMP_BODY_CONTINUE233:%.*]], !dbg !{{[0-9]*}}
+// CHECK-DEBUG:       omp.body.continue233:
+// CHECK-DEBUG-NEXT:    br label [[OMP_INNER_FOR_INC234:%.*]], !dbg !{{[0-9]*}}
+// CHECK-DEBUG:       omp.inner.for.inc234:
+// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV212]], align 4, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    [[ADD235:%.*]] = add nsw i32 [[TMP9]], 1, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    store i32 [[ADD235]], i32* [[DOTOMP_IV212]], align 4, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    br label [[OMP_INNER_FOR_COND225]], !dbg !{{[0-9]*}}, !llvm.loop !{{[0-9]*}}
+// CHECK-DEBUG:       omp.inner.for.end236:
+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_EXIT237:%.*]], !dbg !{{[0-9]*}}
+// CHECK-DEBUG:       omp.loop.exit237:
+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM238:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @100)
+// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @99, i32 [[OMP_GLOBAL_THREAD_NUM238]]), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM239:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @103), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @102, i32 [[OMP_GLOBAL_THREAD_NUM239]]), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    ret void, !dbg !{{[0-9]*}}
+//
+void parallel_for_2(float *r, int a, double b) {
+#pragma omp parallel
+  {
+#pragma omp for
+    for (int i = 0; i < 100; ++i)
+      *r = a + b;
+#pragma omp parallel
+    {
+#pragma omp for
+      for (int i = 0; i < 100; ++i)
+        *r = a + b;
+#pragma omp parallel
+      {
+#pragma omp for
+        for (int i = 0; i < 100; ++i)
+          *r = a + b;
+      }
+#pragma omp for
+      for (int i = 0; i < 100; ++i)
+        *r = a + b;
+#pragma omp parallel
+      {
+#pragma omp for
+        for (int i = 0; i < 100; ++i)
+          *r = a + b;
+      }
+#pragma omp for
+      for (int i = 0; i < 100; ++i)
+        *r = a + b;
+    }
+#pragma omp for
+    for (int i = 0; i < 100; ++i)
+      *r = a + b;
+  }
+#pragma omp for
+  for (int i = 0; i < 100; ++i)
+    *r = a + b;
+}
+
+#endif
Index: clang/test/OpenMP/cancel_codegen.cpp
===================================================================
--- clang/test/OpenMP/cancel_codegen.cpp
+++ clang/test/OpenMP/cancel_codegen.cpp
@@ -16,7 +16,6 @@
 
 float flag;
 int main (int argc, char **argv) {
-// ALL: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(
 #pragma omp parallel
 {
 #pragma omp cancel parallel if(flag)
@@ -42,14 +41,14 @@
   }
 }
 // ALL: call void @__kmpc_for_static_init_4(
-// ALL: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID]], i32 3)
+// ALL: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID:%.*]], i32 3)
 // ALL: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
 // ALL: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
 // ALL: [[EXIT]]
 // ALL: br label
 // ALL: [[CONTINUE]]
 // ALL: br label
-// ALL: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID]], i32 3)
+// ALL: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID:%.*]], i32 3)
 // ALL: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
 // ALL: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
 // ALL: [[EXIT]]
@@ -66,7 +65,7 @@
 // ALL: [[BOOL:%.+]] = fcmp une float [[FLAG]], 0.000000e+00
 // ALL: br i1 [[BOOL]], label %[[THEN:[^,]+]], label %[[ELSE:[^,]+]]
 // ALL: [[THEN]]
-// ALL: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID]], i32 2)
+// ALL: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID:%.*]], i32 2)
 // ALL: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
 // ALL: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
 // ALL: [[EXIT]]
@@ -148,7 +147,7 @@
 // CHECK: br label
 // CHECK: [[CONTINUE]]
 // CHECK: br label
-// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID]], i32 3)
+// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID:%.*]], i32 3)
 // CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
 // CHECK: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
 // CHECK: [[EXIT]]
Index: clang/lib/CodeGen/CGOpenMPRuntime.cpp
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -1452,6 +1452,19 @@
   }
 }
 
+static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
+                                                  SourceLocation Loc,
+                                                  SmallString<128> &Buffer) {
+  llvm::raw_svector_ostream OS(Buffer);
+  // Build debug location
+  PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
+  OS << ";" << PLoc.getFilename() << ";";
+  if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
+    OS << FD->getQualifiedNameAsString();
+  OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
+  return OS.str();
+}
+
 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
                                                  SourceLocation Loc,
                                                  unsigned Flags) {
@@ -1461,6 +1474,16 @@
       Loc.isInvalid())
     return getOrCreateDefaultLocation(Flags).getPointer();
 
+  // If the OpenMPIRBuilder is used we need to use it for all location handling
+  // as the clang invariants used below might be broken.
+  if (CGM.getLangOpts().OpenMPIRBuilder) {
+    SmallString<128> Buffer;
+    OMPBuilder.updateToLocation(CGF.Builder.saveIP());
+    auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
+        getIdentStringFromSourceLocation(CGF, Loc, Buffer));
+    return OMPBuilder.getOrCreateIdent(SrcLocStr, IdentFlag(Flags));
+  }
+
   assert(CGF.CurFn && "No function in current CodeGenFunction.");
 
   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
@@ -1494,15 +1517,9 @@
 
   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
   if (OMPDebugLoc == nullptr) {
-    SmallString<128> Buffer2;
-    llvm::raw_svector_ostream OS2(Buffer2);
-    // Build debug location
-    PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
-    OS2 << ";" << PLoc.getFilename() << ";";
-    if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
-      OS2 << FD->getQualifiedNameAsString();
-    OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
-    OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
+    SmallString<128> Buffer;
+    OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(
+        getIdentStringFromSourceLocation(CGF, Loc, Buffer));
     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
   }
   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
@@ -1516,6 +1533,16 @@
 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
                                           SourceLocation Loc) {
   assert(CGF.CurFn && "No function in current CodeGenFunction.");
+  // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
+  // the clang invariants used below might be broken.
+  if (CGM.getLangOpts().OpenMPIRBuilder) {
+    SmallString<128> Buffer;
+    OMPBuilder.updateToLocation(CGF.Builder.saveIP());
+    auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
+        getIdentStringFromSourceLocation(CGF, Loc, Buffer));
+    return OMPBuilder.getOrCreateThreadID(
+        OMPBuilder.getOrCreateIdent(SrcLocStr));
+  }
 
   llvm::Value *ThreadID = nullptr;
   // Check whether we've already cached a load of the thread id in this
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to