[PATCH] D58388: [OpenCL] Simplify LLVM IR generated for OpenCL blocks

2019-02-21 Thread Andrew Savonichev via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rC354568: [OpenCL] Simplify LLVM IR generated for OpenCL 
blocks (authored by asavonic, committed by ).

Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58388/new/

https://reviews.llvm.org/D58388

Files:
  lib/CodeGen/CGBlocks.cpp
  lib/CodeGen/CGOpenCLRuntime.cpp
  lib/CodeGen/CGOpenCLRuntime.h
  test/CodeGenOpenCL/blocks.cl
  test/CodeGenOpenCL/cl20-device-side-enqueue.cl

Index: lib/CodeGen/CGOpenCLRuntime.h
===
--- lib/CodeGen/CGOpenCLRuntime.h
+++ lib/CodeGen/CGOpenCLRuntime.h
@@ -91,6 +91,10 @@
   /// \param Block block literal emitted for the block expression.
   void recordBlockInfo(const BlockExpr *E, llvm::Function *InvokeF,
llvm::Value *Block);
+
+  /// \return LLVM block invoke function emitted for an expression derived from
+  /// the block expression.
+  llvm::Function *getInvokeFunction(const Expr *E);
 };
 
 }
Index: lib/CodeGen/CGBlocks.cpp
===
--- lib/CodeGen/CGBlocks.cpp
+++ lib/CodeGen/CGBlocks.cpp
@@ -1253,52 +1253,49 @@
   ReturnValueSlot ReturnValue) {
   const BlockPointerType *BPT =
 E->getCallee()->getType()->getAs();
-
   llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee());
-
-  // Get a pointer to the generic block literal.
-  // For OpenCL we generate generic AS void ptr to be able to reuse the same
-  // block definition for blocks with captures generated as private AS local
-  // variables and without captures generated as global AS program scope
-  // variables.
-  unsigned AddrSpace = 0;
-  if (getLangOpts().OpenCL)
-AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_generic);
-
-  llvm::Type *BlockLiteralTy =
-  llvm::PointerType::get(CGM.getGenericBlockLiteralType(), AddrSpace);
-
-  // Bitcast the callee to a block literal.
-  BlockPtr =
-  Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal");
-
-  // Get the function pointer from the literal.
-  llvm::Value *FuncPtr =
-  Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr,
-  CGM.getLangOpts().OpenCL ? 2 : 3);
-
-  // Add the block literal.
+  llvm::Type *GenBlockTy = CGM.getGenericBlockLiteralType();
+  llvm::Value *Func = nullptr;
+  QualType FnType = BPT->getPointeeType();
+  ASTContext  = getContext();
   CallArgList Args;
 
-  QualType VoidPtrQualTy = getContext().VoidPtrTy;
-  llvm::Type *GenericVoidPtrTy = VoidPtrTy;
   if (getLangOpts().OpenCL) {
-GenericVoidPtrTy = CGM.getOpenCLRuntime().getGenericVoidPointerType();
-VoidPtrQualTy =
-getContext().getPointerType(getContext().getAddrSpaceQualType(
-getContext().VoidTy, LangAS::opencl_generic));
-  }
-
-  BlockPtr = Builder.CreatePointerCast(BlockPtr, GenericVoidPtrTy);
-  Args.add(RValue::get(BlockPtr), VoidPtrQualTy);
-
-  QualType FnType = BPT->getPointeeType();
+// For OpenCL, BlockPtr is already casted to generic block literal.
 
-  // And the rest of the arguments.
-  EmitCallArgs(Args, FnType->getAs(), E->arguments());
+// First argument of a block call is a generic block literal casted to
+// generic void pointer, i.e. i8 addrspace(4)*
+llvm::Value *BlockDescriptor = Builder.CreatePointerCast(
+BlockPtr, CGM.getOpenCLRuntime().getGenericVoidPointerType());
+QualType VoidPtrQualTy = Ctx.getPointerType(
+Ctx.getAddrSpaceQualType(Ctx.VoidTy, LangAS::opencl_generic));
+Args.add(RValue::get(BlockDescriptor), VoidPtrQualTy);
+// And the rest of the arguments.
+EmitCallArgs(Args, FnType->getAs(), E->arguments());
+
+// We *can* call the block directly unless it is a function argument.
+if (!isa(E->getCalleeDecl()))
+  Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee());
+else {
+  llvm::Value *FuncPtr = Builder.CreateStructGEP(GenBlockTy, BlockPtr, 2);
+  Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
+}
+  } else {
+// Bitcast the block literal to a generic block literal.
+BlockPtr = Builder.CreatePointerCast(
+BlockPtr, llvm::PointerType::get(GenBlockTy, 0), "block.literal");
+// Get pointer to the block invoke function
+llvm::Value *FuncPtr = Builder.CreateStructGEP(GenBlockTy, BlockPtr, 3);
+
+// First argument is a block literal casted to a void pointer
+BlockPtr = Builder.CreatePointerCast(BlockPtr, VoidPtrTy);
+Args.add(RValue::get(BlockPtr), Ctx.VoidPtrTy);
+// And the rest of the arguments.
+EmitCallArgs(Args, FnType->getAs(), E->arguments());
 
-  // Load the function.
-  llvm::Value *Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
+// Load the function.
+Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
+  }
 
   const FunctionType *FuncTy = 

[PATCH] D58388: [OpenCL] Simplify LLVM IR generated for OpenCL blocks

2019-02-20 Thread Anastasia Stulova via Phabricator via cfe-commits
Anastasia accepted this revision.
Anastasia added a comment.
This revision is now accepted and ready to land.

LGTM! Great! Thanks!


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58388/new/

https://reviews.llvm.org/D58388



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D58388: [OpenCL] Simplify LLVM IR generated for OpenCL blocks

2019-02-20 Thread Alexey Sotkin via Phabricator via cfe-commits
AlexeySotkin marked an inline comment as done.
AlexeySotkin added inline comments.



Comment at: lib/CodeGen/CGOpenCLRuntime.cpp:131
+static const BlockExpr *getBlockExpr(const Expr *E) {
+  if (auto Cast = dyn_cast(E)) {
+E = Cast->getSubExpr();

Anastasia wrote:
> Btw, does this handle the case when we assign a variable multiple time? I was 
> just wondering if we need a loop somewhere?
> 
> I.e. does something like this work now:
> 
> ```
> typedef void (^bl_t)(local void *);
> 
> bl_t a = ...;
> bl_t b = a;
> bl_t c = b;
> 
> c();
> enqueue_kernel(... c, ...);
> ```
> 
> 
You are right, we need a loop. Now it works.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58388/new/

https://reviews.llvm.org/D58388



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D58388: [OpenCL] Simplify LLVM IR generated for OpenCL blocks

2019-02-20 Thread Alexey Sotkin via Phabricator via cfe-commits
AlexeySotkin updated this revision to Diff 187575.
AlexeySotkin added a comment.

Fix resolving of block invoke function in case of sequence of assignments.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58388/new/

https://reviews.llvm.org/D58388

Files:
  lib/CodeGen/CGBlocks.cpp
  lib/CodeGen/CGOpenCLRuntime.cpp
  lib/CodeGen/CGOpenCLRuntime.h
  test/CodeGenOpenCL/blocks.cl
  test/CodeGenOpenCL/cl20-device-side-enqueue.cl

Index: test/CodeGenOpenCL/cl20-device-side-enqueue.cl
===
--- test/CodeGenOpenCL/cl20-device-side-enqueue.cl
+++ test/CodeGenOpenCL/cl20-device-side-enqueue.cl
@@ -312,9 +312,7 @@
   };
 
   // Uses global block literal [[BLG8]] and invoke function [[INVG8]].
-  // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2)
-  // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)*
-  // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+  // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   block_A();
 
   // Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. [[INVGK8]] calls [[INVG8]].
@@ -333,15 +331,35 @@
   unsigned size = get_kernel_work_group_size(block_A);
 
   // Uses global block literal [[BLG8]] and invoke function [[INVG8]]. Make sure no redundant block literal and invoke functions are emitted.
-  // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2)
-  // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)*
-  // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+  // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   block_A();
 
+  // Make sure that block invoke function is resolved correctly after sequence of assignements.
+  // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)*
+  // COMMON-SAME: addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)*
+  // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*)
+  // COMMON-SAME: to %struct.__opencl_block_literal_generic addrspace(4)*),
+  // COMMON-SAME: %struct.__opencl_block_literal_generic addrspace(4)** %b1,
+  bl_t b1 = block_G;
+  // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)*
+  // COMMON-SAME: addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)*
+  // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*)
+  // COMMON-SAME: to %struct.__opencl_block_literal_generic addrspace(4)*),
+  // COMMON-SAME: %struct.__opencl_block_literal_generic addrspace(4)** %b2,
+  bl_t b2 = b1;
+  // COMMON: call spir_func void @block_G_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)*
+  // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*)
+  // COOMON-SAME: to i8 addrspace(4)*), i8 addrspace(3)* null)
+  b2(0);
+  // Uses global block literal [[BL_GLOBAL]] and block kernel [[INV_G_K]]. [[INV_G_K]] calls [[INV_G]].
+  // COMMON: call i32 @__get_kernel_preferred_work_group_size_multiple_impl(
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INV_G_K:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
+  // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+  size = get_kernel_preferred_work_group_size_multiple(b2);
+
   void (^block_C)(void) = ^{
 

[PATCH] D58388: [OpenCL] Simplify LLVM IR generated for OpenCL blocks

2019-02-20 Thread Anastasia Stulova via Phabricator via cfe-commits
Anastasia added inline comments.



Comment at: lib/CodeGen/CGBlocks.cpp:1275
+// We *can* call the block directly unless it is a function argument.
+if (!isa(E->getCalleeDecl()))
+  Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee());

I think it's reasonable enough... if we restrict blocks as parameters in the 
spec later it should be easy enough to modify this code. 



Comment at: lib/CodeGen/CGOpenCLRuntime.cpp:131
+static const BlockExpr *getBlockExpr(const Expr *E) {
+  if (auto Cast = dyn_cast(E)) {
+E = Cast->getSubExpr();

Btw, does this handle the case when we assign a variable multiple time? I was 
just wondering if we need a loop somewhere?

I.e. does something like this work now:

```
typedef void (^bl_t)(local void *);

bl_t a = ...;
bl_t b = a;
bl_t c = b;

c();
enqueue_kernel(... c, ...);
```




CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58388/new/

https://reviews.llvm.org/D58388



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D58388: [OpenCL] Simplify LLVM IR generated for OpenCL blocks

2019-02-19 Thread Alexey Sotkin via Phabricator via cfe-commits
AlexeySotkin updated this revision to Diff 187399.
AlexeySotkin added a comment.

Fix ObjC lit tests failure


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58388/new/

https://reviews.llvm.org/D58388

Files:
  lib/CodeGen/CGBlocks.cpp
  lib/CodeGen/CGOpenCLRuntime.cpp
  lib/CodeGen/CGOpenCLRuntime.h
  test/CodeGenOpenCL/blocks.cl
  test/CodeGenOpenCL/cl20-device-side-enqueue.cl

Index: test/CodeGenOpenCL/cl20-device-side-enqueue.cl
===
--- test/CodeGenOpenCL/cl20-device-side-enqueue.cl
+++ test/CodeGenOpenCL/cl20-device-side-enqueue.cl
@@ -312,9 +312,7 @@
   };
 
   // Uses global block literal [[BLG8]] and invoke function [[INVG8]].
-  // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2)
-  // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)*
-  // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+  // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   block_A();
 
   // Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. [[INVGK8]] calls [[INVG8]].
@@ -333,9 +331,7 @@
   unsigned size = get_kernel_work_group_size(block_A);
 
   // Uses global block literal [[BLG8]] and invoke function [[INVG8]]. Make sure no redundant block literal and invoke functions are emitted.
-  // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2)
-  // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)*
-  // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+  // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   block_A();
 
   void (^block_C)(void) = ^{
Index: test/CodeGenOpenCL/blocks.cl
===
--- test/CodeGenOpenCL/blocks.cl
+++ test/CodeGenOpenCL/blocks.cl
@@ -39,11 +39,8 @@
   // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic* %[[blk_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)*
   // SPIR: store %struct.__opencl_block_literal_generic addrspace(4)* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B:.*]],
   // SPIR: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic addrspace(4)*, %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B]]
-  // SPIR: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]], i32 0, i32 2
   // SPIR: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]] to i8 addrspace(4)*
-  // SPIR: %[[invoke_func_ptr:.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[invoke_addr]]
-  // SPIR: %[[invoke_func:.*]] = addrspacecast i8 addrspace(4)* %[[invoke_func_ptr]] to i32 (i8 addrspace(4)*)*
-  // SPIR: call {{.*}}i32 %[[invoke_func]](i8 addrspace(4)* %[[blk_gen_ptr]])
+  // SPIR: call {{.*}}i32 @__foo_block_invoke(i8 addrspace(4)* %[[blk_gen_ptr]])
   // AMDGCN: %[[block_invoke:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block:.*]], i32 0, i32 2
   // AMDGCN: store i8* bitcast (i32 (i8*)* @__foo_block_invoke to i8*), i8* addrspace(5)* %[[block_invoke]]
   // AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]], i32 0, i32 3
@@ -53,11 +50,8 @@
   // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic addrspace(5)* %[[blk_ptr]] to 

[PATCH] D58388: [OpenCL] Simplify LLVM IR generated for OpenCL blocks

2019-02-19 Thread Alexey Sotkin via Phabricator via cfe-commits
AlexeySotkin created this revision.
AlexeySotkin added reviewers: Anastasia, yaxunl, svenvh.
AlexeySotkin added a project: clang.

Emit direct call of block invoke functions when possible, i.e. in case the
block is not passed as a function argument.
Also doing some refactoring of `CodeGenFunction::EmitBlockCallExpr()`


Repository:
  rC Clang

https://reviews.llvm.org/D58388

Files:
  lib/CodeGen/CGBlocks.cpp
  lib/CodeGen/CGOpenCLRuntime.cpp
  lib/CodeGen/CGOpenCLRuntime.h
  test/CodeGenOpenCL/blocks.cl
  test/CodeGenOpenCL/cl20-device-side-enqueue.cl

Index: test/CodeGenOpenCL/cl20-device-side-enqueue.cl
===
--- test/CodeGenOpenCL/cl20-device-side-enqueue.cl
+++ test/CodeGenOpenCL/cl20-device-side-enqueue.cl
@@ -312,9 +312,7 @@
   };
 
   // Uses global block literal [[BLG8]] and invoke function [[INVG8]].
-  // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2)
-  // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)*
-  // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+  // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   block_A();
 
   // Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. [[INVGK8]] calls [[INVG8]].
@@ -333,9 +331,7 @@
   unsigned size = get_kernel_work_group_size(block_A);
 
   // Uses global block literal [[BLG8]] and invoke function [[INVG8]]. Make sure no redundant block literal and invoke functions are emitted.
-  // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2)
-  // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)*
-  // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+  // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   block_A();
 
   void (^block_C)(void) = ^{
Index: test/CodeGenOpenCL/blocks.cl
===
--- test/CodeGenOpenCL/blocks.cl
+++ test/CodeGenOpenCL/blocks.cl
@@ -39,11 +39,8 @@
   // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic* %[[blk_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)*
   // SPIR: store %struct.__opencl_block_literal_generic addrspace(4)* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B:.*]],
   // SPIR: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic addrspace(4)*, %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B]]
-  // SPIR: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]], i32 0, i32 2
   // SPIR: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]] to i8 addrspace(4)*
-  // SPIR: %[[invoke_func_ptr:.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[invoke_addr]]
-  // SPIR: %[[invoke_func:.*]] = addrspacecast i8 addrspace(4)* %[[invoke_func_ptr]] to i32 (i8 addrspace(4)*)*
-  // SPIR: call {{.*}}i32 %[[invoke_func]](i8 addrspace(4)* %[[blk_gen_ptr]])
+  // SPIR: call {{.*}}i32 @__foo_block_invoke(i8 addrspace(4)* %[[blk_gen_ptr]])
   // AMDGCN: %[[block_invoke:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block:.*]], i32 0, i32 2
   // AMDGCN: store i8* bitcast (i32 (i8*)* @__foo_block_invoke to i8*), i8* addrspace(5)* %[[block_invoke]]
   // AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]], i32 0, i32 3