scott.linder updated this revision to Diff 158618.
scott.linder added a comment.

Update test


https://reviews.llvm.org/D50104

Files:
  lib/CodeGen/CGBuiltin.cpp
  test/CodeGenOpenCL/cl20-device-side-enqueue.cl
  test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl

Index: test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl
===================================================================
--- /dev/null
+++ test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl
@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -emit-llvm -o - -triple amdgcn < %s | FileCheck %s --check-prefixes=COMMON,AMDGPU
+// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -emit-llvm -o - -triple "spir-unknown-unknown" < %s | FileCheck %s --check-prefixes=COMMON,SPIR32
+// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" < %s | FileCheck %s --check-prefixes=COMMON,SPIR64
+// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -debug-info-kind=limited -emit-llvm -o - -triple amdgcn < %s | FileCheck %s --check-prefixes=CHECK-DEBUG
+
+// Check that the enqueue_kernel array temporary is in the entry block to avoid
+// a dynamic alloca
+
+typedef struct {int a;} ndrange_t;
+
+kernel void test(int i) {
+// COMMON-LABEL: define {{.*}} void @test
+// COMMON-LABEL: entry:
+// AMDGPU: %block_sizes = alloca [1 x i64]
+// SPIR32: %block_sizes = alloca [1 x i32]
+// SPIR64: %block_sizes = alloca [1 x i64]
+// COMMON-LABEL: if.then:
+// COMMON-NOT: alloca
+// CHECK-DEBUG: getelementptr {{.*}} %block_sizes, {{.*}} !dbg !34
+// COMMON-LABEL: if.end
+  queue_t default_queue;
+  unsigned flags = 0;
+  ndrange_t ndrange;
+  if (i)
+    enqueue_kernel(default_queue, flags, ndrange, ^(local void *a) { }, 32);
+}
+
+// Check that the temporary is scoped to the `if`
+
+// CHECK-DEBUG: !32 = distinct !DILexicalBlock(scope: !7, file: !1, line: 24)
+// CHECK-DEBUG: !34 = !DILocation(line: 25, scope: !32)
Index: test/CodeGenOpenCL/cl20-device-side-enqueue.cl
===================================================================
--- test/CodeGenOpenCL/cl20-device-side-enqueue.cl
+++ test/CodeGenOpenCL/cl20-device-side-enqueue.cl
@@ -46,8 +46,24 @@
   // COMMON: %event_wait_list2 = alloca [1 x %opencl.clk_event_t*]
   clk_event_t event_wait_list2[] = {clk_event};
 
-  // Emits block literal on stack and block kernel [[INVLK1]].
   // COMMON: [[NDR:%[a-z0-9]+]] = alloca %struct.ndrange_t, align 4
+
+  // B32: %[[BLOCK_SIZES1:.*]] = alloca [1 x i32]
+  // B64: %[[BLOCK_SIZES1:.*]] = alloca [1 x i64]
+  // B32: %[[BLOCK_SIZES2:.*]] = alloca [1 x i32]
+  // B64: %[[BLOCK_SIZES2:.*]] = alloca [1 x i64]
+  // B32: %[[BLOCK_SIZES3:.*]] = alloca [1 x i32]
+  // B64: %[[BLOCK_SIZES3:.*]] = alloca [1 x i64]
+  // B32: %[[BLOCK_SIZES4:.*]] = alloca [1 x i32]
+  // B64: %[[BLOCK_SIZES4:.*]] = alloca [1 x i64]
+  // B32: %[[BLOCK_SIZES5:.*]] = alloca [1 x i32]
+  // B64: %[[BLOCK_SIZES5:.*]] = alloca [1 x i64]
+  // B32: %[[BLOCK_SIZES6:.*]] = alloca [3 x i32]
+  // B64: %[[BLOCK_SIZES6:.*]] = alloca [3 x i64]
+  // B32: %[[BLOCK_SIZES7:.*]] = alloca [1 x i32]
+  // B64: %[[BLOCK_SIZES7:.*]] = alloca [1 x i64]
+
+  // Emits block literal on stack and block kernel [[INVLK1]].
   // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
   // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
   // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to void ()*
@@ -73,48 +89,44 @@
   // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]],  %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]],
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* [[BL_I8]])
-
   enqueue_kernel(default_queue, flags, ndrange, 2, &event_wait_list, &clk_event,
                  ^(void) {
                    a[i] = b[i];
                  });
 
   // Emits global block literal [[BLG1]] and block kernel [[INVGK1]].
   // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
   // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
-  // B32: %[[TMP:.*]] = alloca [1 x i32]
-  // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]], i32 0, i32 0
-  // B32: store i32 256, i32* %[[TMP1]], align 4
-  // B64: %[[TMP:.*]] = alloca [1 x i64]
-  // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0
-  // B64: store i64 256, i64* %[[TMP1]], align 8
+  // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* %[[BLOCK_SIZES1]], i32 0, i32 0
+  // B32: store i32 256, i32* %[[TMP]], align 4
+  // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* %[[BLOCK_SIZES1]], i32 0, i32 0
+  // B64: store i64 256, i64* %[[TMP]], align 8
   // COMMON-LABEL: call i32 @__enqueue_kernel_varargs(
   // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK1:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG1]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
-  // B32-SAME: i32* %[[TMP1]])
-  // B64-SAME: i64* %[[TMP1]])
+  // B32-SAME: i32* %[[TMP]])
+  // B64-SAME: i64* %[[TMP]])
   enqueue_kernel(default_queue, flags, ndrange,
                  ^(local void *p) {
                    return;
                  },
                  256);
+
   char c;
   // Emits global block literal [[BLG2]] and block kernel [[INVGK2]].
   // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
   // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
-  // B32: %[[TMP:.*]] = alloca [1 x i32]
-  // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]], i32 0, i32 0
-  // B32: store i32 %{{.*}}, i32* %[[TMP1]], align 4
-  // B64: %[[TMP:.*]] = alloca [1 x i64]
-  // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0
-  // B64: store i64 %{{.*}}, i64* %[[TMP1]], align 8
+  // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* %[[BLOCK_SIZES2]], i32 0, i32 0
+  // B32: store i32 %{{.*}}, i32* %[[TMP]], align 4
+  // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* %[[BLOCK_SIZES2]], i32 0, i32 0
+  // B64: store i64 %{{.*}}, i64* %[[TMP]], align 8
   // COMMON-LABEL: call i32 @__enqueue_kernel_varargs(
   // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG2]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
-  // B32-SAME: i32* %[[TMP1]])
-  // B64-SAME: i64* %[[TMP1]])
+  // B32-SAME: i32* %[[TMP]])
+  // B64-SAME: i64* %[[TMP]])
   enqueue_kernel(default_queue, flags, ndrange,
                  ^(local void *p) {
                    return;
@@ -127,18 +139,16 @@
   // COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0
   // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** [[AD]] to %opencl.clk_event_t{{.*}}* addrspace(4)*
   // COMMON: [[EVNT:%[0-9]+]]  = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)*
-  // B32: %[[TMP:.*]] = alloca [1 x i32]
-  // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]], i32 0, i32 0
-  // B32: store i32 256, i32* %[[TMP1]], align 4
-  // B64: %[[TMP:.*]] = alloca [1 x i64]
-  // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0
-  // B64: store i64 256, i64* %[[TMP1]], align 8
+  // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* %[[BLOCK_SIZES3]], i32 0, i32 0
+  // B32: store i32 256, i32* %[[TMP]], align 4
+  // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* %[[BLOCK_SIZES3]], i32 0, i32 0
+  // B64: store i64 256, i64* %[[TMP]], align 8
   // COMMON-LABEL: call i32 @__enqueue_kernel_events_varargs
   // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]],  %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}} [[WAIT_EVNT]], %opencl.clk_event_t{{.*}} [[EVNT]],
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG3]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
-  // B32-SAME: i32* %[[TMP1]])
-  // B64-SAME: i64* %[[TMP1]])
+  // B32-SAME: i32* %[[TMP]])
+  // B64-SAME: i64* %[[TMP]])
   enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, &clk_event,
                  ^(local void *p) {
                    return;
@@ -151,18 +161,16 @@
   // COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0
   // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** [[AD]] to %opencl.clk_event_t{{.*}}* addrspace(4)*
   // COMMON: [[EVNT:%[0-9]+]]  = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)*
-  // B32: %[[TMP:.*]] = alloca [1 x i32]
-  // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]], i32 0, i32 0
-  // B32: store i32 %{{.*}}, i32* %[[TMP1]], align 4
-  // B64: %[[TMP:.*]] = alloca [1 x i64]
-  // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0
-  // B64: store i64 %{{.*}}, i64* %[[TMP1]], align 8
+  // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* %[[BLOCK_SIZES4]], i32 0, i32 0
+  // B32: store i32 %{{.*}}, i32* %[[TMP]], align 4
+  // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* %[[BLOCK_SIZES4]], i32 0, i32 0
+  // B64: store i64 %{{.*}}, i64* %[[TMP]], align 8
   // COMMON-LABEL: call i32 @__enqueue_kernel_events_varargs
   // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]],  %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]],
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK4:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG4]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
-  // B32-SAME: i32* %[[TMP1]])
-  // B64-SAME: i64* %[[TMP1]])
+  // B32-SAME: i32* %[[TMP]])
+  // B64-SAME: i64* %[[TMP]])
   enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, &clk_event,
                  ^(local void *p) {
                    return;
@@ -173,18 +181,16 @@
   // Emits global block literal [[BLG5]] and block kernel [[INVGK5]].
   // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
   // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
-  // B32: %[[TMP:.*]] = alloca [1 x i32]
-  // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]], i32 0, i32 0
-  // B32: store i32 %{{.*}}, i32* %[[TMP1]], align 4
-  // B64: %[[TMP:.*]] = alloca [1 x i64]
-  // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0
-  // B64: store i64 %{{.*}}, i64* %[[TMP1]], align 8
+  // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* %[[BLOCK_SIZES5]], i32 0, i32 0
+  // B32: store i32 %{{.*}}, i32* %[[TMP]], align 4
+  // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* %[[BLOCK_SIZES5]], i32 0, i32 0
+  // B64: store i64 %{{.*}}, i64* %[[TMP]], align 8
   // COMMON-LABEL: call i32 @__enqueue_kernel_varargs
   // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK5:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG5]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
-  // B32-SAME: i32* %[[TMP1]])
-  // B64-SAME: i64* %[[TMP1]])
+  // B32-SAME: i32* %[[TMP]])
+  // B64-SAME: i64* %[[TMP]])
   enqueue_kernel(default_queue, flags, ndrange,
                  ^(local void *p) {
                    return;
@@ -194,26 +200,24 @@
   // Emits global block literal [[BLG6]] and block kernel [[INVGK6]].
   // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
   // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
-  // B32: %[[TMP:.*]] = alloca [3 x i32]
-  // B32: %[[TMP1:.*]] = getelementptr [3 x i32], [3 x i32]* %[[TMP]], i32 0, i32 0
-  // B32: store i32 1, i32* %[[TMP1]], align 4
-  // B32: %[[TMP2:.*]] = getelementptr [3 x i32], [3 x i32]* %[[TMP]], i32 0, i32 1
-  // B32: store i32 2, i32* %[[TMP2]], align 4
-  // B32: %[[TMP3:.*]] = getelementptr [3 x i32], [3 x i32]* %[[TMP]], i32 0, i32 2
-  // B32: store i32 4, i32* %[[TMP3]], align 4
-  // B64: %[[TMP:.*]] = alloca [3 x i64]
-  // B64: %[[TMP1:.*]] = getelementptr [3 x i64], [3 x i64]* %[[TMP]], i32 0, i32 0
-  // B64: store i64 1, i64* %[[TMP1]], align 8
-  // B64: %[[TMP2:.*]] = getelementptr [3 x i64], [3 x i64]* %[[TMP]], i32 0, i32 1
-  // B64: store i64 2, i64* %[[TMP2]], align 8
-  // B64: %[[TMP3:.*]] = getelementptr [3 x i64], [3 x i64]* %[[TMP]], i32 0, i32 2
-  // B64: store i64 4, i64* %[[TMP3]], align 8
+  // B32: %[[TMP:.*]] = getelementptr [3 x i32], [3 x i32]* %[[BLOCK_SIZES6]], i32 0, i32 0
+  // B32: store i32 1, i32* %[[TMP]], align 4
+  // B32: %[[BLOCK_SIZES62:.*]] = getelementptr [3 x i32], [3 x i32]* %[[BLOCK_SIZES6]], i32 0, i32 1
+  // B32: store i32 2, i32* %[[BLOCK_SIZES62]], align 4
+  // B32: %[[BLOCK_SIZES63:.*]] = getelementptr [3 x i32], [3 x i32]* %[[BLOCK_SIZES6]], i32 0, i32 2
+  // B32: store i32 4, i32* %[[BLOCK_SIZES63]], align 4
+  // B64: %[[TMP:.*]] = getelementptr [3 x i64], [3 x i64]* %[[BLOCK_SIZES6]], i32 0, i32 0
+  // B64: store i64 1, i64* %[[TMP]], align 8
+  // B64: %[[BLOCK_SIZES62:.*]] = getelementptr [3 x i64], [3 x i64]* %[[BLOCK_SIZES6]], i32 0, i32 1
+  // B64: store i64 2, i64* %[[BLOCK_SIZES62]], align 8
+  // B64: %[[BLOCK_SIZES63:.*]] = getelementptr [3 x i64], [3 x i64]* %[[BLOCK_SIZES6]], i32 0, i32 2
+  // B64: store i64 4, i64* %[[BLOCK_SIZES63]], align 8
   // COMMON-LABEL: call i32 @__enqueue_kernel_varargs
   // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK6:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG6]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3,
-  // B32-SAME: i32* %[[TMP1]])
-  // B64-SAME: i64* %[[TMP1]])
+  // B32-SAME: i32* %[[TMP]])
+  // B64-SAME: i64* %[[TMP]])
   enqueue_kernel(default_queue, flags, ndrange,
                  ^(local void *p1, local void *p2, local void *p3) {
                    return;
@@ -223,18 +227,16 @@
   // Emits global block literal [[BLG7]] and block kernel [[INVGK7]].
   // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*, %opencl.queue_t** %default_queue
   // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
-  // B32: %[[TMP:.*]] = alloca [1 x i32]
-  // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]], i32 0, i32 0
-  // B32: store i32 0, i32* %[[TMP1]], align 4
-  // B64: %[[TMP:.*]] = alloca [1 x i64]
-  // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0
-  // B64: store i64 4294967296, i64* %[[TMP1]], align 8
+  // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* %[[BLOCK_SIZES7]], i32 0, i32 0
+  // B32: store i32 0, i32* %[[TMP]], align 4
+  // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* %[[BLOCK_SIZES7]], i32 0, i32 0
+  // B64: store i64 4294967296, i64* %[[TMP]], align 8
   // COMMON-LABEL: call i32 @__enqueue_kernel_varargs
   // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK7:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG7]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
-  // B32-SAME: i32* %[[TMP1]])
-  // B64-SAME: i64* %[[TMP1]])
+  // B32-SAME: i32* %[[TMP]])
+  // B64-SAME: i64* %[[TMP]])
   enqueue_kernel(default_queue, flags, ndrange,
                  ^(local void *p) {
                    return;
Index: lib/CodeGen/CGBuiltin.cpp
===================================================================
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -3338,15 +3338,18 @@
     // Create a temporary array to hold the sizes of local pointer arguments
     // for the block. \p First is the position of the first size argument.
     auto CreateArrayForSizeVar = [=](unsigned First) {
-      auto *AT = llvm::ArrayType::get(SizeTy, NumArgs - First);
-      auto *Arr = Builder.CreateAlloca(AT);
+      llvm::APInt ArraySize(32, NumArgs - First);
+      QualType SizeArrayTy = getContext().getConstantArrayType(
+          getContext().getSizeType(), ArraySize, ArrayType::Normal,
+          /*IndexTypeQuals=*/0);
+      auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
       llvm::Value *Ptr;
       // Each of the following arguments specifies the size of the corresponding
       // argument passed to the enqueued block.
       auto *Zero = llvm::ConstantInt::get(IntTy, 0);
       for (unsigned I = First; I < NumArgs; ++I) {
         auto *Index = llvm::ConstantInt::get(IntTy, I - First);
-        auto *GEP = Builder.CreateGEP(Arr, {Zero, Index});
+        auto *GEP = Builder.CreateGEP(Tmp.getPointer(), {Zero, Index});
         if (I == First)
           Ptr = GEP;
         auto *V =
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to