[PATCH] D58388: [OpenCL] Simplify LLVM IR generated for OpenCL blocks
This revision was automatically updated to reflect the committed changes. Closed by commit rC354568: [OpenCL] Simplify LLVM IR generated for OpenCL blocks (authored by asavonic, committed by ). Repository: rC Clang CHANGES SINCE LAST ACTION https://reviews.llvm.org/D58388/new/ https://reviews.llvm.org/D58388 Files: lib/CodeGen/CGBlocks.cpp lib/CodeGen/CGOpenCLRuntime.cpp lib/CodeGen/CGOpenCLRuntime.h test/CodeGenOpenCL/blocks.cl test/CodeGenOpenCL/cl20-device-side-enqueue.cl Index: lib/CodeGen/CGOpenCLRuntime.h === --- lib/CodeGen/CGOpenCLRuntime.h +++ lib/CodeGen/CGOpenCLRuntime.h @@ -91,6 +91,10 @@ /// \param Block block literal emitted for the block expression. void recordBlockInfo(const BlockExpr *E, llvm::Function *InvokeF, llvm::Value *Block); + + /// \return LLVM block invoke function emitted for an expression derived from + /// the block expression. + llvm::Function *getInvokeFunction(const Expr *E); }; } Index: lib/CodeGen/CGBlocks.cpp === --- lib/CodeGen/CGBlocks.cpp +++ lib/CodeGen/CGBlocks.cpp @@ -1253,52 +1253,49 @@ ReturnValueSlot ReturnValue) { const BlockPointerType *BPT = E->getCallee()->getType()->getAs(); - llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee()); - - // Get a pointer to the generic block literal. - // For OpenCL we generate generic AS void ptr to be able to reuse the same - // block definition for blocks with captures generated as private AS local - // variables and without captures generated as global AS program scope - // variables. - unsigned AddrSpace = 0; - if (getLangOpts().OpenCL) -AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_generic); - - llvm::Type *BlockLiteralTy = - llvm::PointerType::get(CGM.getGenericBlockLiteralType(), AddrSpace); - - // Bitcast the callee to a block literal. - BlockPtr = - Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal"); - - // Get the function pointer from the literal. - llvm::Value *FuncPtr = - Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr, - CGM.getLangOpts().OpenCL ? 2 : 3); - - // Add the block literal. + llvm::Type *GenBlockTy = CGM.getGenericBlockLiteralType(); + llvm::Value *Func = nullptr; + QualType FnType = BPT->getPointeeType(); + ASTContext = getContext(); CallArgList Args; - QualType VoidPtrQualTy = getContext().VoidPtrTy; - llvm::Type *GenericVoidPtrTy = VoidPtrTy; if (getLangOpts().OpenCL) { -GenericVoidPtrTy = CGM.getOpenCLRuntime().getGenericVoidPointerType(); -VoidPtrQualTy = -getContext().getPointerType(getContext().getAddrSpaceQualType( -getContext().VoidTy, LangAS::opencl_generic)); - } - - BlockPtr = Builder.CreatePointerCast(BlockPtr, GenericVoidPtrTy); - Args.add(RValue::get(BlockPtr), VoidPtrQualTy); - - QualType FnType = BPT->getPointeeType(); +// For OpenCL, BlockPtr is already casted to generic block literal. - // And the rest of the arguments. - EmitCallArgs(Args, FnType->getAs(), E->arguments()); +// First argument of a block call is a generic block literal casted to +// generic void pointer, i.e. i8 addrspace(4)* +llvm::Value *BlockDescriptor = Builder.CreatePointerCast( +BlockPtr, CGM.getOpenCLRuntime().getGenericVoidPointerType()); +QualType VoidPtrQualTy = Ctx.getPointerType( +Ctx.getAddrSpaceQualType(Ctx.VoidTy, LangAS::opencl_generic)); +Args.add(RValue::get(BlockDescriptor), VoidPtrQualTy); +// And the rest of the arguments. +EmitCallArgs(Args, FnType->getAs(), E->arguments()); + +// We *can* call the block directly unless it is a function argument. +if (!isa(E->getCalleeDecl())) + Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee()); +else { + llvm::Value *FuncPtr = Builder.CreateStructGEP(GenBlockTy, BlockPtr, 2); + Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign()); +} + } else { +// Bitcast the block literal to a generic block literal. +BlockPtr = Builder.CreatePointerCast( +BlockPtr, llvm::PointerType::get(GenBlockTy, 0), "block.literal"); +// Get pointer to the block invoke function +llvm::Value *FuncPtr = Builder.CreateStructGEP(GenBlockTy, BlockPtr, 3); + +// First argument is a block literal casted to a void pointer +BlockPtr = Builder.CreatePointerCast(BlockPtr, VoidPtrTy); +Args.add(RValue::get(BlockPtr), Ctx.VoidPtrTy); +// And the rest of the arguments. +EmitCallArgs(Args, FnType->getAs(), E->arguments()); - // Load the function. - llvm::Value *Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign()); +// Load the function. +Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign()); + } const FunctionType *FuncTy =
[PATCH] D58388: [OpenCL] Simplify LLVM IR generated for OpenCL blocks
Anastasia accepted this revision. Anastasia added a comment. This revision is now accepted and ready to land. LGTM! Great! Thanks! CHANGES SINCE LAST ACTION https://reviews.llvm.org/D58388/new/ https://reviews.llvm.org/D58388 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D58388: [OpenCL] Simplify LLVM IR generated for OpenCL blocks
AlexeySotkin marked an inline comment as done. AlexeySotkin added inline comments. Comment at: lib/CodeGen/CGOpenCLRuntime.cpp:131 +static const BlockExpr *getBlockExpr(const Expr *E) { + if (auto Cast = dyn_cast(E)) { +E = Cast->getSubExpr(); Anastasia wrote: > Btw, does this handle the case when we assign a variable multiple time? I was > just wondering if we need a loop somewhere? > > I.e. does something like this work now: > > ``` > typedef void (^bl_t)(local void *); > > bl_t a = ...; > bl_t b = a; > bl_t c = b; > > c(); > enqueue_kernel(... c, ...); > ``` > > You are right, we need a loop. Now it works. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D58388/new/ https://reviews.llvm.org/D58388 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D58388: [OpenCL] Simplify LLVM IR generated for OpenCL blocks
AlexeySotkin updated this revision to Diff 187575. AlexeySotkin added a comment. Fix resolving of block invoke function in case of sequence of assignments. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D58388/new/ https://reviews.llvm.org/D58388 Files: lib/CodeGen/CGBlocks.cpp lib/CodeGen/CGOpenCLRuntime.cpp lib/CodeGen/CGOpenCLRuntime.h test/CodeGenOpenCL/blocks.cl test/CodeGenOpenCL/cl20-device-side-enqueue.cl Index: test/CodeGenOpenCL/cl20-device-side-enqueue.cl === --- test/CodeGenOpenCL/cl20-device-side-enqueue.cl +++ test/CodeGenOpenCL/cl20-device-side-enqueue.cl @@ -312,9 +312,7 @@ }; // Uses global block literal [[BLG8]] and invoke function [[INVG8]]. - // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2) - // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)* - // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) + // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) block_A(); // Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. [[INVGK8]] calls [[INVG8]]. @@ -333,15 +331,35 @@ unsigned size = get_kernel_work_group_size(block_A); // Uses global block literal [[BLG8]] and invoke function [[INVG8]]. Make sure no redundant block literal and invoke functions are emitted. - // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2) - // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)* - // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) + // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) block_A(); + // Make sure that block invoke function is resolved correctly after sequence of assignements. + // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* + // COMMON-SAME: addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* + // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*) + // COMMON-SAME: to %struct.__opencl_block_literal_generic addrspace(4)*), + // COMMON-SAME: %struct.__opencl_block_literal_generic addrspace(4)** %b1, + bl_t b1 = block_G; + // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* + // COMMON-SAME: addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* + // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*) + // COMMON-SAME: to %struct.__opencl_block_literal_generic addrspace(4)*), + // COMMON-SAME: %struct.__opencl_block_literal_generic addrspace(4)** %b2, + bl_t b2 = b1; + // COMMON: call spir_func void @block_G_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* + // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) + // COOMON-SAME: to i8 addrspace(4)*), i8 addrspace(3)* null) + b2(0); + // Uses global block literal [[BL_GLOBAL]] and block kernel [[INV_G_K]]. [[INV_G_K]] calls [[INV_G]]. + // COMMON: call i32 @__get_kernel_preferred_work_group_size_multiple_impl( + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INV_G_K:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*)) + size = get_kernel_preferred_work_group_size_multiple(b2); + void (^block_C)(void) = ^{
[PATCH] D58388: [OpenCL] Simplify LLVM IR generated for OpenCL blocks
Anastasia added inline comments. Comment at: lib/CodeGen/CGBlocks.cpp:1275 +// We *can* call the block directly unless it is a function argument. +if (!isa(E->getCalleeDecl())) + Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee()); I think it's reasonable enough... if we restrict blocks as parameters in the spec later it should be easy enough to modify this code. Comment at: lib/CodeGen/CGOpenCLRuntime.cpp:131 +static const BlockExpr *getBlockExpr(const Expr *E) { + if (auto Cast = dyn_cast(E)) { +E = Cast->getSubExpr(); Btw, does this handle the case when we assign a variable multiple time? I was just wondering if we need a loop somewhere? I.e. does something like this work now: ``` typedef void (^bl_t)(local void *); bl_t a = ...; bl_t b = a; bl_t c = b; c(); enqueue_kernel(... c, ...); ``` CHANGES SINCE LAST ACTION https://reviews.llvm.org/D58388/new/ https://reviews.llvm.org/D58388 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D58388: [OpenCL] Simplify LLVM IR generated for OpenCL blocks
AlexeySotkin updated this revision to Diff 187399. AlexeySotkin added a comment. Fix ObjC lit tests failure CHANGES SINCE LAST ACTION https://reviews.llvm.org/D58388/new/ https://reviews.llvm.org/D58388 Files: lib/CodeGen/CGBlocks.cpp lib/CodeGen/CGOpenCLRuntime.cpp lib/CodeGen/CGOpenCLRuntime.h test/CodeGenOpenCL/blocks.cl test/CodeGenOpenCL/cl20-device-side-enqueue.cl Index: test/CodeGenOpenCL/cl20-device-side-enqueue.cl === --- test/CodeGenOpenCL/cl20-device-side-enqueue.cl +++ test/CodeGenOpenCL/cl20-device-side-enqueue.cl @@ -312,9 +312,7 @@ }; // Uses global block literal [[BLG8]] and invoke function [[INVG8]]. - // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2) - // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)* - // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) + // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) block_A(); // Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. [[INVGK8]] calls [[INVG8]]. @@ -333,9 +331,7 @@ unsigned size = get_kernel_work_group_size(block_A); // Uses global block literal [[BLG8]] and invoke function [[INVG8]]. Make sure no redundant block literal and invoke functions are emitted. - // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2) - // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)* - // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) + // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) block_A(); void (^block_C)(void) = ^{ Index: test/CodeGenOpenCL/blocks.cl === --- test/CodeGenOpenCL/blocks.cl +++ test/CodeGenOpenCL/blocks.cl @@ -39,11 +39,8 @@ // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic* %[[blk_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)* // SPIR: store %struct.__opencl_block_literal_generic addrspace(4)* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B:.*]], // SPIR: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic addrspace(4)*, %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B]] - // SPIR: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]], i32 0, i32 2 // SPIR: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]] to i8 addrspace(4)* - // SPIR: %[[invoke_func_ptr:.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[invoke_addr]] - // SPIR: %[[invoke_func:.*]] = addrspacecast i8 addrspace(4)* %[[invoke_func_ptr]] to i32 (i8 addrspace(4)*)* - // SPIR: call {{.*}}i32 %[[invoke_func]](i8 addrspace(4)* %[[blk_gen_ptr]]) + // SPIR: call {{.*}}i32 @__foo_block_invoke(i8 addrspace(4)* %[[blk_gen_ptr]]) // AMDGCN: %[[block_invoke:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block:.*]], i32 0, i32 2 // AMDGCN: store i8* bitcast (i32 (i8*)* @__foo_block_invoke to i8*), i8* addrspace(5)* %[[block_invoke]] // AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]], i32 0, i32 3 @@ -53,11 +50,8 @@ // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic addrspace(5)* %[[blk_ptr]] to
[PATCH] D58388: [OpenCL] Simplify LLVM IR generated for OpenCL blocks
AlexeySotkin created this revision. AlexeySotkin added reviewers: Anastasia, yaxunl, svenvh. AlexeySotkin added a project: clang. Emit direct call of block invoke functions when possible, i.e. in case the block is not passed as a function argument. Also doing some refactoring of `CodeGenFunction::EmitBlockCallExpr()` Repository: rC Clang https://reviews.llvm.org/D58388 Files: lib/CodeGen/CGBlocks.cpp lib/CodeGen/CGOpenCLRuntime.cpp lib/CodeGen/CGOpenCLRuntime.h test/CodeGenOpenCL/blocks.cl test/CodeGenOpenCL/cl20-device-side-enqueue.cl Index: test/CodeGenOpenCL/cl20-device-side-enqueue.cl === --- test/CodeGenOpenCL/cl20-device-side-enqueue.cl +++ test/CodeGenOpenCL/cl20-device-side-enqueue.cl @@ -312,9 +312,7 @@ }; // Uses global block literal [[BLG8]] and invoke function [[INVG8]]. - // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2) - // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)* - // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) + // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) block_A(); // Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. [[INVGK8]] calls [[INVG8]]. @@ -333,9 +331,7 @@ unsigned size = get_kernel_work_group_size(block_A); // Uses global block literal [[BLG8]] and invoke function [[INVG8]]. Make sure no redundant block literal and invoke functions are emitted. - // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2) - // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)* - // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) + // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) block_A(); void (^block_C)(void) = ^{ Index: test/CodeGenOpenCL/blocks.cl === --- test/CodeGenOpenCL/blocks.cl +++ test/CodeGenOpenCL/blocks.cl @@ -39,11 +39,8 @@ // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic* %[[blk_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)* // SPIR: store %struct.__opencl_block_literal_generic addrspace(4)* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B:.*]], // SPIR: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic addrspace(4)*, %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B]] - // SPIR: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]], i32 0, i32 2 // SPIR: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]] to i8 addrspace(4)* - // SPIR: %[[invoke_func_ptr:.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[invoke_addr]] - // SPIR: %[[invoke_func:.*]] = addrspacecast i8 addrspace(4)* %[[invoke_func_ptr]] to i32 (i8 addrspace(4)*)* - // SPIR: call {{.*}}i32 %[[invoke_func]](i8 addrspace(4)* %[[blk_gen_ptr]]) + // SPIR: call {{.*}}i32 @__foo_block_invoke(i8 addrspace(4)* %[[blk_gen_ptr]]) // AMDGCN: %[[block_invoke:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block:.*]], i32 0, i32 2 // AMDGCN: store i8* bitcast (i32 (i8*)* @__foo_block_invoke to i8*), i8* addrspace(5)* %[[block_invoke]] // AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]], i32 0, i32 3