Date: Saturday, August 17, 2019 @ 00:04:46 Author: dbermond Revision: 499892
upgpkg: spirv-llvm-translator 8.0.1.2-1 Updated to version 8.0.1.2 Modified: spirv-llvm-translator/trunk/PKGBUILD Deleted: spirv-llvm-translator/trunk/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch -----------------------------------------------------------------+ 0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch | 1116 ---------- PKGBUILD | 14 2 files changed, 9 insertions(+), 1121 deletions(-) Deleted: 0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch =================================================================== --- 0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch 2019-08-16 23:33:02 UTC (rev 499891) +++ 0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch 2019-08-17 00:04:46 UTC (rev 499892) @@ -1,1116 +0,0 @@ -From 9ce0fe02fd6cda5fb29fbb0d5037a1798a810b8a Mon Sep 17 00:00:00 2001 -From: Alexey Sotkin <alexey.sot...@intel.com> -Date: Thu, 21 Feb 2019 17:14:36 +0300 -Subject: [PATCH 1/3] Update LowerOpenCL pass to handle new blocks - represntation in LLVM IR - ---- - lib/SPIRV/SPIRVLowerOCLBlocks.cpp | 413 ++++++++---------------------- - test/global_block.ll | 71 ++--- - test/literal-struct.ll | 31 ++- - test/transcoding/block_w_struct_return.ll | 47 ++-- - test/transcoding/enqueue_kernel.ll | 237 ++++++++++------- - 5 files changed, 317 insertions(+), 482 deletions(-) - -diff --git a/lib/SPIRV/SPIRVLowerOCLBlocks.cpp b/lib/SPIRV/SPIRVLowerOCLBlocks.cpp -index 50e1838..b42a4ec 100644 ---- a/lib/SPIRV/SPIRVLowerOCLBlocks.cpp -+++ b/lib/SPIRV/SPIRVLowerOCLBlocks.cpp -@@ -1,303 +1,110 @@ --//===- SPIRVLowerOCLBlocks.cpp - OCL Utilities ----------------------------===// --// --// The LLVM/SPIRV Translator --// --// This file is distributed under the University of Illinois Open Source --// License. See LICENSE.TXT for details. --// --// Copyright (c) 2018 Intel Corporation. All rights reserved. --// --// Permission is hereby granted, free of charge, to any person obtaining a --// copy of this software and associated documentation files (the "Software"), --// to deal with the Software without restriction, including without limitation --// the rights to use, copy, modify, merge, publish, distribute, sublicense, --// and/or sell copies of the Software, and to permit persons to whom the --// Software is furnished to do so, subject to the following conditions: --// --// Redistributions of source code must retain the above copyright notice, --// this list of conditions and the following disclaimers. --// Redistributions in binary form must reproduce the above copyright notice, --// this list of conditions and the following disclaimers in the documentation --// and/or other materials provided with the distribution. --// Neither the names of Intel Corporation, nor the names of its --// contributors may be used to endorse or promote products derived from this --// Software without specific prior written permission. --// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR --// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, --// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE --// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER --// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, --// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH --// THE SOFTWARE. --// --//===----------------------------------------------------------------------===// --// --// SPIR-V specification doesn't allow function pointers, so SPIR-V translator --// is designed to fail if a value with function type (except calls) is occured. --// Currently there is only two cases, when function pointers are generating in --// LLVM IR in OpenCL - block calls and device side enqueue built-in calls. --// --// In both cases values with function type used as intermediate representation --// for block literal structure. --// --// This pass is designed to find such cases and simplify them to avoid any --// function pointer types occurrences in LLVM IR in 4 steps. --// --// 1. Find all function pointer allocas, like --// %block = alloca void () * --// --// Then find a single store to that alloca: --// %blockLit = alloca <{ i32, i32, ...}>, align 4 --// %0 = bitcast <{ i32, i32, ... }>* %blockLit to void ()* --// > store void ()* %0, void ()** %block, align 4 --// --// And replace the alloca users by new instructions which used stored value --// %blockLit itself instead of function pointer alloca %block. --// --// 2. Find consecutive casts from block literal type to i8 addrspace(4)* --// used function pointers as an intermediate type: --// %0 = bitcast <{ i32, i32 }> %block to void() * --// %1 = addrspacecast void() * %0 to i8 addrspace(4)* --// And simplify them: --// %2 = addrspacecast <{ i32, i32 }> %block to i8 addrspace(4)* --// --// 3. Find all unused instructions with function pointer type occured after --// pp.1-2 and remove them. --// --// 4. Find unused globals with function pointer type, like --// @block = constant void ()* --// bitcast ({ i32, i32 }* @__block_literal_global to void ()* --// --// And remove them. --// --//===----------------------------------------------------------------------===// --#define DEBUG_TYPE "spv-lower-ocl-blocks" -- --#include "OCLUtil.h" --#include "SPIRVInternal.h" -- --#include "llvm/ADT/SetVector.h" --#include "llvm/Analysis/ValueTracking.h" --#include "llvm/IR/GlobalVariable.h" --#include "llvm/IR/InstIterator.h" --#include "llvm/IR/Module.h" --#include "llvm/Pass.h" --#include "llvm/PassSupport.h" --#include "llvm/Support/Casting.h" -- --using namespace llvm; -- --namespace { -- --static void --removeUnusedFunctionPtrInst(Instruction *I, -- SmallSetVector<Instruction *, 16> &FuncPtrInsts) { -- for (unsigned OpIdx = 0, Ops = I->getNumOperands(); OpIdx != Ops; ++OpIdx) { -- Instruction *OpI = dyn_cast<Instruction>(I->getOperand(OpIdx)); -- I->setOperand(OpIdx, nullptr); -- if (OpI && OpI != I && OpI->user_empty()) -- FuncPtrInsts.insert(OpI); -- } -- I->eraseFromParent(); --} -- --static bool isFuncPtrAlloca(const AllocaInst *AI) { -- auto *ET = dyn_cast<PointerType>(AI->getAllocatedType()); -- return ET && ET->getElementType()->isFunctionTy(); --} -- --static bool hasFuncPtrType(const Value *V) { -- auto *PT = dyn_cast<PointerType>(V->getType()); -- return PT && PT->getElementType()->isFunctionTy(); --} -- --static bool isFuncPtrInst(const Instruction *I) { -- if (auto *AI = dyn_cast<AllocaInst>(I)) -- return isFuncPtrAlloca(AI); -- -- for (auto &Op : I->operands()) { -- if (auto *AI = dyn_cast<AllocaInst>(Op)) -- return isFuncPtrAlloca(AI); -- -- auto *OpI = dyn_cast<Instruction>(&Op); -- if (OpI && OpI != I && hasFuncPtrType(OpI)) -- return true; -- } -- return false; --} -- --static StoreInst *findSingleStore(AllocaInst *AI) { -- StoreInst *Store = nullptr; -- for (auto *U : AI->users()) { -- if (!isa<StoreInst>(U)) -- continue; // not a store -- if (Store) -- return nullptr; // there are more than one stores -- Store = dyn_cast<StoreInst>(U); -- } -- return Store; --} -- --static void fixFunctionPtrAllocaUsers(AllocaInst *AI) { -- // Find and remove a single store to alloca -- auto *SingleStore = findSingleStore(AI); -- assert(SingleStore && "More than one store to the function pointer alloca"); -- auto *StoredVal = SingleStore->getValueOperand(); -- SingleStore->eraseFromParent(); -- -- // Find loads from the alloca and replace thier users -- for (auto *U : AI->users()) { -- auto *LI = dyn_cast<LoadInst>(U); -- if (!LI) -- continue; -- -- for (auto *U : LI->users()) { -- auto *UInst = cast<Instruction>(U); -- auto *Cast = CastInst::CreatePointerBitCastOrAddrSpaceCast( -- StoredVal, UInst->getType(), "", UInst); -- UInst->replaceAllUsesWith(Cast); -- } -- } --} -- --static int getBlockLiteralIdx(const Function &F) { -- StringRef FName = F.getName(); -- if (isEnqueueKernelBI(FName)) -- return FName.contains("events") ? 7 : 4; -- if (isKernelQueryBI(FName)) -- return FName.contains("for_ndrange") ? 2 : 1; -- if (FName.startswith("__") && FName.contains("_block_invoke")) -- return F.hasStructRetAttr() ? 1 : 0; -- -- return -1; // No block literal argument --} -- --static bool hasBlockLiteralArg(const Function &F) { -- return getBlockLiteralIdx(F) != -1; --} -- --static bool simplifyFunctionPtrCasts(Function &F) { -- bool Changed = false; -- int BlockLiteralIdx = getBlockLiteralIdx(F); -- for (auto *U : F.users()) { -- auto *Call = dyn_cast<CallInst>(U); -- if (!Call) -- continue; -- if (Call->getFunction()->getName() == F.getName().str() + "_kernel") -- continue; // Skip block invoke function calls inside block invoke kernels -- -- const DataLayout &DL = F.getParent()->getDataLayout(); -- auto *BlockLiteral = Call->getOperand(BlockLiteralIdx); -- auto *BlockLiteralVal = GetUnderlyingObject(BlockLiteral, DL); -- if (isa<GlobalVariable>(BlockLiteralVal)) -- continue; // nothing to do with globals -- -- auto *BlockLiteralAlloca = cast<AllocaInst>(BlockLiteralVal); -- assert(!BlockLiteralAlloca->getAllocatedType()->isFunctionTy() && -- "Function type shouldn't be there"); -- -- auto *NewBlockLiteral = CastInst::CreatePointerBitCastOrAddrSpaceCast( -- BlockLiteralAlloca, BlockLiteral->getType(), "", Call); -- BlockLiteral->replaceAllUsesWith(NewBlockLiteral); -- Changed |= true; -- } -- return Changed; --} -- --static void --findFunctionPtrAllocas(Module &M, -- SmallVectorImpl<AllocaInst *> &FuncPtrAllocas) { -- for (auto &F : M) { -- if (F.isDeclaration()) -- continue; -- for (auto &I : instructions(F)) { -- auto *AI = dyn_cast<AllocaInst>(&I); -- if (!AI || !isFuncPtrAlloca(AI)) -- continue; -- FuncPtrAllocas.push_back(AI); -- } -- } --} -- --static void --findUnusedFunctionPtrInsts(Module &M, -- SmallSetVector<Instruction *, 16> &FuncPtrInsts) { -- for (auto &F : M) { -- if (F.isDeclaration()) -- continue; -- for (auto &I : instructions(F)) -- if (I.user_empty() && isFuncPtrInst(&I)) -- FuncPtrInsts.insert(&I); -- } --} -- --static void --findUnusedFunctionPtrGlbs(Module &M, -- SmallVectorImpl<GlobalVariable *> &FuncPtrGlbs) { -- for (auto &GV : M.globals()) { -- if (!GV.user_empty()) -- continue; -- auto *GVType = dyn_cast<PointerType>(GV.getType()->getElementType()); -- if (GVType && GVType->getElementType()->isFunctionTy()) -- FuncPtrGlbs.push_back(&GV); -- } --} -- --class SPIRVLowerOCLBlocks : public ModulePass { -- --public: -- SPIRVLowerOCLBlocks() : ModulePass(ID) {} -- -- bool runOnModule(Module &M) { -- bool Changed = false; -- -- // 1. Find function pointer allocas and fix their users -- SmallVector<AllocaInst *, 16> FuncPtrAllocas; -- findFunctionPtrAllocas(M, FuncPtrAllocas); -- -- Changed |= !FuncPtrAllocas.empty(); -- for (auto *AI : FuncPtrAllocas) -- fixFunctionPtrAllocaUsers(AI); -- -- // 2. Simplify consecutive casts which use function pointer types -- for (auto &F : M) -- if (hasBlockLiteralArg(F)) -- Changed |= simplifyFunctionPtrCasts(F); -- -- // 3. Cleanup unused instructions with function pointer type -- // which are occured after pp. 1-2 -- SmallSetVector<Instruction *, 16> FuncPtrInsts; -- findUnusedFunctionPtrInsts(M, FuncPtrInsts); -- -- Changed |= !FuncPtrInsts.empty(); -- while (!FuncPtrInsts.empty()) { -- Instruction *I = FuncPtrInsts.pop_back_val(); -- removeUnusedFunctionPtrInst(I, FuncPtrInsts); -- } -- -- // 4. Find and remove unused global variables with function pointer type -- SmallVector<GlobalVariable *, 16> FuncPtrGlbs; -- findUnusedFunctionPtrGlbs(M, FuncPtrGlbs); -- -- Changed |= !FuncPtrGlbs.empty(); -- for (auto *GV : FuncPtrGlbs) -- GV->eraseFromParent(); -- -- return Changed; -- } -- -- static char ID; --}; // class SPIRVLowerOCLBlocks -- --char SPIRVLowerOCLBlocks::ID = 0; -- --} // namespace -- --INITIALIZE_PASS( -- SPIRVLowerOCLBlocks, "spv-lower-ocl-blocks", -- "Remove function pointers occured in case of using OpenCL blocks", false, -- false) -- --llvm::ModulePass *llvm::createSPIRVLowerOCLBlocks() { -- return new SPIRVLowerOCLBlocks(); --} -+//===- SPIRVLowerOCLBlocks.cpp - OCL Utilities ----------------------------===// -+// -+// The LLVM/SPIRV Translator -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+// Copyright (c) 2018 Intel Corporation. All rights reserved. -+// -+// Permission is hereby granted, free of charge, to any person obtaining a -+// copy of this software and associated documentation files (the "Software"), -+// to deal with the Software without restriction, including without limitation -+// the rights to use, copy, modify, merge, publish, distribute, sublicense, -+// and/or sell copies of the Software, and to permit persons to whom the -+// Software is furnished to do so, subject to the following conditions: -+// -+// Redistributions of source code must retain the above copyright notice, -+// this list of conditions and the following disclaimers. -+// Redistributions in binary form must reproduce the above copyright notice, -+// this list of conditions and the following disclaimers in the documentation -+// and/or other materials provided with the distribution. -+// Neither the names of Intel Corporation, nor the names of its -+// contributors may be used to endorse or promote products derived from this -+// Software without specific prior written permission. -+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -+// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH -+// THE SOFTWARE. -+// -+//===----------------------------------------------------------------------===// -+// -+// SPIR-V specification doesn't allow function pointers, so SPIR-V translator -+// is designed to fail if a value with function type (except calls) is occured. -+// Currently there is only two cases, when function pointers are generating in -+// LLVM IR in OpenCL - block calls and device side enqueue built-in calls. -+// -+// In both cases values with function type used as intermediate representation -+// for block literal structure. -+// -+// In LLVM IR produced by clang, blocks are represented with the following -+// structure: -+// %struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } -+// Pointers to block invoke functions are stored in the third field. Clang -+// replaces inderect function calls in all cases except if block is passed as a -+// function argument. Note that it is somewhat unclear if the OpenCL C spec -+// should allow passing blocks as function argumernts. This pass is not supposed -+// to work correctly with such functions. -+// Clang though has to store function pointers to this structure. Purpose of -+// this pass is to replace store of function pointers(not allowed in SPIR-V) -+// with null pointers. -+// -+//===----------------------------------------------------------------------===// -+#define DEBUG_TYPE "spv-lower-ocl-blocks" -+ -+#include "SPIRVInternal.h" -+ -+#include "llvm/IR/Module.h" -+#include "llvm/Pass.h" -+#include "llvm/Support/Regex.h" -+ -+using namespace llvm; -+ -+namespace { -+ -+static bool isBlockInvoke(Function &F) { -+ static Regex BlockInvokeRegex("_block_invoke_?[0-9]*$"); -+ return BlockInvokeRegex.match(F.getName()); -+} -+ -+class SPIRVLowerOCLBlocks : public ModulePass { -+ -+public: -+ SPIRVLowerOCLBlocks() : ModulePass(ID) {} -+ -+ bool runOnModule(Module &M) { -+ bool Changed = false; -+ for (Function &F : M) { -+ if (!isBlockInvoke(F)) -+ continue; -+ for (User *U : F.users()) { -+ if (!isa<Constant>(U)) -+ continue; -+ Constant *Null = Constant::getNullValue(U->getType()); -+ if (U != Null) { -+ U->replaceAllUsesWith(Null); -+ Changed = true; -+ } -+ } -+ } -+ return Changed; -+ } -+ -+ static char ID; -+}; -+ -+char SPIRVLowerOCLBlocks::ID = 0; -+ -+} // namespace -+ -+INITIALIZE_PASS( -+ SPIRVLowerOCLBlocks, "spv-lower-ocl-blocks", -+ "Remove function pointers occured in case of using OpenCL blocks", false, -+ false) -+ -+llvm::ModulePass *llvm::createSPIRVLowerOCLBlocks() { -+ return new SPIRVLowerOCLBlocks(); -+} -diff --git a/test/global_block.ll b/test/global_block.ll -index a9267d8..efb4cf3 100644 ---- a/test/global_block.ll -+++ b/test/global_block.ll -@@ -16,7 +16,7 @@ - ; RUN: llvm-spirv %t.bc -o %t.spv - ; RUN: llvm-spirv -r %t.spv -o - | llvm-dis | FileCheck %s --check-prefix=CHECK-LLVM - --target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" -+target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" - target triple = "spir-unknown-unknown" - - ; CHECK-SPIRV: Name [[block_invoke:[0-9]+]] "_block_invoke" -@@ -26,71 +26,56 @@ target triple = "spir-unknown-unknown" - ; CHECK-SPIRV: TypePointer [[int8Ptr:[0-9]+]] 8 [[int8]] - ; CHECK-SPIRV: TypeFunction [[block_invoke_type:[0-9]+]] [[int]] [[int8Ptr]] [[int]] - --;; This variable is not needed in SPIRV --; CHECK-SPIRV-NOT: Name {{[0-9]+}} block_kernel.b1 --; CHECK-LLVM-NOT: @block_kernel.b1 --@block_kernel.b1 = internal addrspace(2) constant i32 (i32) addrspace(4)* addrspacecast (i32 (i32) addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i32 (i32) addrspace(1)*) to i32 (i32) addrspace(4)*), align 8 -+%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } - --@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4 -+@block_kernel.b1 = internal addrspace(2) constant %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), align 4 -+@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (i32 (i8 addrspace(4)*, i32)* @_block_invoke to i8*) to i8 addrspace(4)*) }, align 4 - --; Function Attrs: convergent nounwind --define spir_kernel void @block_kernel(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 { -+; Function Attrs: convergent noinline nounwind optnone -+define spir_kernel void @block_kernel(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { - entry: -- %res.addr = alloca i32 addrspace(1)*, align 8 -- store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 8, !tbaa !10 -- -+ %res.addr = alloca i32 addrspace(1)*, align 4 -+ store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 4 - ; CHECK-SPIRV: FunctionCall [[int]] {{[0-9]+}} [[block_invoke]] {{[0-9]+}} [[five]] - ; CHECK-LLVM: %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* {{.*}}, i32 5) -- %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 5) #2 -- -- %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 8, !tbaa !10 -- store i32 %call, i32 addrspace(1)* %0, align 4, !tbaa !14 -+ %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 5) #2 -+ %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 4 -+ store i32 %call, i32 addrspace(1)* %0, align 4 - ret void - } - --; CHECK-SPIRV: 5 Function [[int]] [[block_invoke]] 0 [[block_invoke_type]] -+; CHECK-SPIRV: 5 Function [[int]] [[block_invoke]] 2 [[block_invoke_type]] - ; CHECK-SPIRV-NEXT: 3 FunctionParameter [[int8Ptr]] {{[0-9]+}} - ; CHECK-SPIRV-NEXT: 3 FunctionParameter [[int]] {{[0-9]+}} - ; CHECK-LLVM: define internal spir_func i32 @_block_invoke(i8 addrspace(4)* {{.*}}, i32 %{{.*}}) --; Function Attrs: convergent nounwind -+; Function Attrs: convergent noinline nounwind optnone - define internal spir_func i32 @_block_invoke(i8 addrspace(4)* %.block_descriptor, i32 %i) #1 { - entry: -- %.block_descriptor.addr = alloca i8 addrspace(4)*, align 8 -+ %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 - %i.addr = alloca i32, align 4 -- store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 8 -- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)* -- store i32 %i, i32* %i.addr, align 4, !tbaa !14 -- %0 = load i32, i32* %i.addr, align 4, !tbaa !14 -+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4 -+ store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 -+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* -+ store i32 %i, i32* %i.addr, align 4 -+ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4 -+ %0 = load i32, i32* %i.addr, align 4 - %add = add nsw i32 %0, 1 - ret i32 %add - } - --attributes #0 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } --attributes #1 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -+attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } -+attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } - attributes #2 = { convergent } - - !llvm.module.flags = !{!0} --!opencl.enable.FP_CONTRACT = !{} - !opencl.ocl.version = !{!1} - !opencl.spir.version = !{!1} --!opencl.used.extensions = !{!2} --!opencl.used.optional.core.features = !{!2} --!opencl.compiler.options = !{!2} --!llvm.ident = !{!3} -+!llvm.ident = !{!2} - - !0 = !{i32 1, !"wchar_size", i32 4} - !1 = !{i32 2, i32 0} --!2 = !{} --!3 = !{!"clang version 7.0.0"} --!4 = !{i32 1} --!5 = !{!"none"} --!6 = !{!"int*"} --!7 = !{!""} --!8 = !{i1 false} --!9 = !{i32 0} --!10 = !{!11, !11, i64 0} --!11 = !{!"any pointer", !12, i64 0} --!12 = !{!"omnipotent char", !13, i64 0} --!13 = !{!"Simple C/C++ TBAA"} --!14 = !{!15, !15, i64 0} --!15 = !{!"int", !12, i64 0} -+!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"} -+!3 = !{i32 1} -+!4 = !{!"none"} -+!5 = !{!"int*"} -+!6 = !{!""} -diff --git a/test/literal-struct.ll b/test/literal-struct.ll -index c52170a..52a731a 100644 ---- a/test/literal-struct.ll -+++ b/test/literal-struct.ll -@@ -2,7 +2,7 @@ - ; structs, i.e. structs whose type has no name. Typicaly clang generate such - ; structs if the kernel contains OpenCL 2.0 blocks. The IR was produced with - ; the following command: --; clang -cc1 -triple spir -cl-std=cl2.0 -O0 -finclude-default-header literal-struct.cl -emit-llvm -o test/literal-struct.ll -+; clang -cc1 -triple spir -cl-std=cl2.0 -O0 literal-struct.cl -emit-llvm -o test/literal-struct.ll - - ; literal-struct.cl: - ; void foo() -@@ -14,25 +14,28 @@ - ; RUN: llvm-as < %s | llvm-spirv -spirv-text -o %t - ; RUN: FileCheck < %t %s - --; CHECK-DAG: TypeInt [[Int:[0-9]+]] 32 0 --; CHECK-DAG: TypeStruct [[StructType:[0-9]+]] [[Int]] [[Int]] {{$}} -+; CHECK: TypeInt [[Int:[0-9]+]] 32 0 -+; CHECK: TypeInt [[Int8:[0-9]+]] 8 0 -+; CHECK: TypePointer [[Int8Ptr:[0-9]+]] 8 [[Int8]] -+; CHECK: TypeStruct [[StructType:[0-9]+]] [[Int]] [[Int]] [[Int8Ptr]] - - target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" - target triple = "spir" - --@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4 -+%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } -+ -+@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__foo_block_invoke to i8*) to i8 addrspace(4)*) }, align 4 - ; CHECK: ConstantComposite [[StructType]] - --; This is artificial case is added to cover ConstantNull instrucitions with TypeStruct. --@__block_literal_global.1 = internal addrspace(1) constant { i32, i32 } zeroinitializer, align 4 -+@__block_literal_global.1 = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } zeroinitializer, align 4 - ; CHECK: ConstantNull [[StructType]] - - ; Function Attrs: convergent noinline nounwind optnone - define spir_func void @foo() #0 { - entry: -- %myBlock = alloca void () addrspace(4)*, align 4 -- store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %myBlock, align 4 -- call spir_func void @__foo_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*)) #1 -+ %myBlock = alloca %struct.__opencl_block_literal_generic addrspace(4)*, align 4 -+ store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %myBlock, align 4 -+ call spir_func void @__foo_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*)) #1 - ret void - } - -@@ -40,14 +43,14 @@ entry: - define internal spir_func void @__foo_block_invoke(i8 addrspace(4)* %.block_descriptor) #0 { - entry: - %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 -- %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4 -+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4 - store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 -- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)* -- store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4 -+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* -+ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4 - ret void - } - --attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -+attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } - attributes #1 = { convergent } - - !llvm.module.flags = !{!0} -@@ -57,4 +60,4 @@ attributes #1 = { convergent } - - !0 = !{i32 1, !"wchar_size", i32 4} - !1 = !{i32 2, i32 0} --!2 = !{!"clang version 8.0.0 "} -+!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"} -diff --git a/test/transcoding/block_w_struct_return.ll b/test/transcoding/block_w_struct_return.ll -index 76e29f0..df89b13 100644 ---- a/test/transcoding/block_w_struct_return.ll -+++ b/test/transcoding/block_w_struct_return.ll -@@ -16,6 +16,8 @@ - ; res[tid] = kernelBlock(aa).a - 6; - ; } - -+; clang -cc1 -triple spir -cl-std=cl2.0 -disable-llvm-passes -finclude-default-header block_w_struct_return.cl -emit-llvm -o test/transcoding/block_w_struct_return.ll -+ - ; RUN: llvm-as %s -o %t.bc - ; RUN: llvm-spirv %t.bc -spirv-text -o %t.spv.txt - ; RUN: FileCheck < %t.spv.txt %s --check-prefix=CHECK-SPIRV -@@ -27,12 +29,14 @@ - ; CHECK-SPIRV: Name [[BlockInv:[0-9]+]] "__block_ret_struct_block_invoke" - - ; CHECK-SPIRV: 4 TypeInt [[IntTy:[0-9]+]] 32 -+; CHECK-SPIRV: 4 TypeInt [[Int8Ty:[0-9]+]] 8 -+; CHECK-SPIRV: 4 TypePointer [[Int8Ptr:[0-9]+]] 8 [[Int8Ty]] - ; CHECK-SPIRV: 3 TypeStruct [[StructTy:[0-9]+]] [[IntTy]] - ; CHECK-SPIRV: 4 TypePointer [[StructPtrTy:[0-9]+]] 7 [[StructTy]] - - ; CHECK-SPIRV: 4 Variable [[StructPtrTy]] [[StructArg:[0-9]+]] 7 - ; CHECK-SPIRV: 4 Variable [[StructPtrTy]] [[StructRet:[0-9]+]] 7 --; CHECK-SPIRV: 4 PtrCastToGeneric {{[0-9]+}} [[BlockLit:[0-9]+]] {{[0-9]+}} -+; CHECK-SPIRV: 4 PtrCastToGeneric [[Int8Ptr]] [[BlockLit:[0-9]+]] {{[0-9]+}} - ; CHECK-SPIRV: 7 FunctionCall {{[0-9]+}} {{[0-9]+}} [[BlockInv]] [[StructRet]] [[BlockLit]] [[StructArg]] - - ; CHECK-LLVM: %[[StructA:.*]] = type { i32 } -@@ -41,20 +45,21 @@ - target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" - target triple = "spir64-unknown-unknown" - -+%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } - %struct.A = type { i32 } - --@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4 -+@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 16, i32 8, i8 addrspace(4)* addrspacecast (i8* bitcast (void (%struct.A*, i8 addrspace(4)*, %struct.A*)* @__block_ret_struct_block_invoke to i8*) to i8 addrspace(4)*) }, align 8 - - ; Function Attrs: convergent noinline nounwind optnone --define spir_kernel void @block_ret_struct(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 !kernel_arg_host_accessible !8 !kernel_arg_pipe_depth !9 !kernel_arg_pipe_io !7 !kernel_arg_buffer_location !7 { -+define spir_kernel void @block_ret_struct(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { - entry: - %res.addr = alloca i32 addrspace(1)*, align 8 -- %kernelBlock = alloca void (%struct.A*, %struct.A*) addrspace(4)*, align 8 -+ %kernelBlock = alloca %struct.__opencl_block_literal_generic addrspace(4)*, align 8 - %tid = alloca i64, align 8 - %aa = alloca %struct.A, align 4 - %tmp = alloca %struct.A, align 4 - store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 8 -- store void (%struct.A*, %struct.A*) addrspace(4)* addrspacecast (void (%struct.A*, %struct.A*) addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to void (%struct.A*, %struct.A*) addrspace(1)*) to void (%struct.A*, %struct.A*) addrspace(4)*), void (%struct.A*, %struct.A*) addrspace(4)** %kernelBlock, align 8 -+ store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %kernelBlock, align 8 - %call = call spir_func i64 @_Z13get_global_idj(i32 0) #4 - store i64 %call, i64* %tid, align 8 - %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 8 -@@ -63,7 +68,7 @@ entry: - store i32 -1, i32 addrspace(1)* %arrayidx, align 4 - %a = getelementptr inbounds %struct.A, %struct.A* %aa, i32 0, i32 0 - store i32 5, i32* %a, align 4 -- call spir_func void @__block_ret_struct_block_invoke(%struct.A* sret %tmp, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), %struct.A* byval align 4 %aa) #5 -+ call spir_func void @__block_ret_struct_block_invoke(%struct.A* sret %tmp, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), %struct.A* byval align 4 %aa) #5 - %a1 = getelementptr inbounds %struct.A, %struct.A* %tmp, i32 0, i32 0 - %2 = load i32, i32* %a1, align 4 - %sub = sub nsw i32 %2, 6 -@@ -78,10 +83,10 @@ entry: - define internal spir_func void @__block_ret_struct_block_invoke(%struct.A* noalias sret %agg.result, i8 addrspace(4)* %.block_descriptor, %struct.A* byval align 4 %a) #1 { - entry: - %.block_descriptor.addr = alloca i8 addrspace(4)*, align 8 -- %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 8 -+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 8 - store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 8 -- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)* -- store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 8 -+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* -+ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 8 - %a1 = getelementptr inbounds %struct.A, %struct.A* %a, i32 0, i32 0 - store i32 6, i32* %a1, align 4 - %0 = bitcast %struct.A* %agg.result to i8* -@@ -96,30 +101,22 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture r - ; Function Attrs: convergent nounwind readnone - declare spir_func i64 @_Z13get_global_idj(i32) #3 - --attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } --attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -+attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } -+attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } - attributes #2 = { argmemonly nounwind } - attributes #3 = { convergent nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } - attributes #4 = { convergent nounwind readnone } - attributes #5 = { convergent } - - !llvm.module.flags = !{!0} --!opencl.enable.FP_CONTRACT = !{} - !opencl.ocl.version = !{!1} - !opencl.spir.version = !{!1} --!opencl.used.extensions = !{!2} --!opencl.used.optional.core.features = !{!2} --!opencl.compiler.options = !{!2} --!llvm.ident = !{!3} -+!llvm.ident = !{!2} - - !0 = !{i32 1, !"wchar_size", i32 4} - !1 = !{i32 2, i32 0} --!2 = !{} --!3 = !{!"clang version 7.0.0"} --!4 = !{i32 1} --!5 = !{!"none"} --!6 = !{!"int*"} --!7 = !{!""} --!8 = !{i1 false} --!9 = !{i32 0} -- -+!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"} -+!3 = !{i32 1} -+!4 = !{!"none"} -+!5 = !{!"int*"} -+!6 = !{!""} -diff --git a/test/transcoding/enqueue_kernel.ll b/test/transcoding/enqueue_kernel.ll -index 0d29c71..435871d 100644 ---- a/test/transcoding/enqueue_kernel.ll -+++ b/test/transcoding/enqueue_kernel.ll -@@ -51,11 +51,12 @@ - ; ModuleID = 'enqueue_kernel.cl' - source_filename = "enqueue_kernel.cl" - target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" --target triple = "spir-unknown-unknown" -+target triple = "spir" - - %opencl.queue_t = type opaque - %struct.ndrange_t = type { i32 } - %opencl.clk_event_t = type opaque -+%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } - - ; CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer1:[0-9]+]] "__device_side_enqueue_block_invoke_kernel" - ; CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer2:[0-9]+]] "__device_side_enqueue_block_invoke_2_kernel" -@@ -66,89 +67,123 @@ target triple = "spir-unknown-unknown" - - ; CHECK-SPIRV: TypeInt [[Int32Ty:[0-9]+]] 32 - ; CHECK-SPIRV: TypeInt [[Int8Ty:[0-9]+]] 8 --; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt8:[0-9]+]] 8 - ; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt0:[0-9]+]] 0 --; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt17:[0-9]+]] 17 -+; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt17:[0-9]+]] 21 - ; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt2:[0-9]+]] 2 --; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt20:[0-9]+]] 20 --; CHECK-SPIRV: TypeVoid [[VoidTy:[0-9]+]] -+; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt8:[0-9]+]] 8 -+; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt20:[0-9]+]] 24 - - ; CHECK-SPIRV: TypePointer {{[0-9]+}} 7 {{[0-9]+}} -+; CHECK-SPIRV: TypePointer [[Int8PtrGenTy:[0-9]+]] 8 [[Int8Ty]] -+; CHECK-SPIRV: TypeVoid [[VoidTy:[0-9]+]] - ; CHECK-SPIRV: TypePointer [[Int32LocPtrTy:[0-9]+]] 7 [[Int32Ty]] - ; CHECK-SPIRV: TypeDeviceEvent [[EventTy:[0-9]+]] --; CHECK-SPIRV: TypePointer [[Int8PtrGenTy:[0-9]+]] 8 [[Int8Ty]] - ; CHECK-SPIRV: TypePointer [[EventPtrTy:[0-9]+]] 8 [[EventTy]] - ; CHECK-SPIRV: TypeFunction [[BlockTy1:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]] - ; CHECK-SPIRV: TypeFunction [[BlockTy2:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]] - ; CHECK-SPIRV: TypeFunction [[BlockTy3:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]] - ; CHECK-SPIRV: ConstantNull [[EventPtrTy]] [[EventNull:[0-9]+]] - --; CHECK-LLVM: [[BlockTy1:%[0-9]+]] = type { i32, i32 } --; CHECK-LLVM: [[BlockTy2:%[0-9]+]] = type <{ i32, i32, i32 addrspace(1)*, i32, i8 }> --; CHECK-LLVM: [[BlockTy3:%[0-9]+]] = type <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> --; CHECK-LLVM: [[BlockTy4:%[0-9]+]] = type <{ i32, i32 }> -+; CHECK-LLVM: [[BlockTy1:%[0-9]+]] = type { i32, i32, i8 addrspace(4)* } -+; CHECK-LLVM: [[BlockTy2:%[0-9]+]] = type <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> -+; CHECK-LLVM: [[BlockTy3:%[0-9]+]] = type <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> -+; CHECK-LLVM: [[BlockTy4:%[0-9]+]] = type <{ i32, i32, i8 addrspace(4)* }> - --; CHECK-LLVM: @__block_literal_global = internal addrspace(1) constant [[BlockTy1]] { i32 8, i32 4 }, align 4 --; CHECK-LLVM: @__block_literal_global.1 = internal addrspace(1) constant [[BlockTy1]] { i32 8, i32 4 }, align 4 -+; CHECK-LLVM: @__block_literal_global = internal addrspace(1) constant [[BlockTy1]] { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* null to i8 addrspace(4)*) }, align 4 -+; CHECK-LLVM: @__block_literal_global.1 = internal addrspace(1) constant [[BlockTy1]] { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* null to i8 addrspace(4)*) }, align 4 - --@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4 --@__block_literal_global.1 = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4 -+@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3 to i8*) to i8 addrspace(4)*) }, align 4 -+@__block_literal_global.1 = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4 to i8*) to i8 addrspace(4)*) }, align 4 - - ; Function Attrs: convergent noinline nounwind optnone --define spir_kernel void @device_side_enqueue(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %i, i8 signext %c0) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 { -+define spir_kernel void @device_side_enqueue(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %i, i8 signext %c0) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { - entry: -+ %a.addr = alloca i32 addrspace(1)*, align 4 -+ %b.addr = alloca i32 addrspace(1)*, align 4 -+ %i.addr = alloca i32, align 4 -+ %c0.addr = alloca i8, align 1 - %default_queue = alloca %opencl.queue_t*, align 4 - %flags = alloca i32, align 4 - %ndrange = alloca %struct.ndrange_t, align 4 - %clk_event = alloca %opencl.clk_event_t*, align 4 - %event_wait_list = alloca %opencl.clk_event_t*, align 4 - %event_wait_list2 = alloca [1 x %opencl.clk_event_t*], align 4 -- %block = alloca <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, align 4 -- %block3 = alloca <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, align 4 -+ %block = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, align 4 -+ %tmp = alloca %struct.ndrange_t, align 4 -+ %block3 = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, align 4 -+ %tmp4 = alloca %struct.ndrange_t, align 4 - %c = alloca i8, align 1 -+ %tmp11 = alloca %struct.ndrange_t, align 4 -+ %block_sizes = alloca [1 x i32], align 4 -+ %tmp12 = alloca %struct.ndrange_t, align 4 -+ %block_sizes13 = alloca [3 x i32], align 4 -+ store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4 -+ store i32 addrspace(1)* %b, i32 addrspace(1)** %b.addr, align 4 -+ store i32 %i, i32* %i.addr, align 4 -+ store i8 %c0, i8* %c0.addr, align 1 - store i32 0, i32* %flags, align 4 - %arrayinit.begin = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0 - %0 = load %opencl.clk_event_t*, %opencl.clk_event_t** %clk_event, align 4 - store %opencl.clk_event_t* %0, %opencl.clk_event_t** %arrayinit.begin, align 4 - %1 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4 - %2 = load i32, i32* %flags, align 4 -- %block.size = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 0 -- store i32 17, i32* %block.size, align 4 -- %block.align = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 1 -+ %3 = bitcast %struct.ndrange_t* %tmp to i8* -+ %4 = bitcast %struct.ndrange_t* %ndrange to i8* -+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %3, i8* align 4 %4, i32 4, i1 false) -+ %block.size = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 0 -+ store i32 21, i32* %block.size, align 4 -+ %block.align = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 1 - store i32 4, i32* %block.align, align 4 -- %block.captured = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 2 -- store i32 addrspace(1)* %a, i32 addrspace(1)** %block.captured, align 4 -- %block.captured1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 3 -- store i32 %i, i32* %block.captured1, align 4 -- %block.captured2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 4 -- store i8 %c0, i8* %block.captured2, align 4 -- %3 = bitcast <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block to void ()* -- %4 = addrspacecast void ()* %3 to i8 addrspace(4)* -+ %block.invoke = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 2 -+ store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke, align 4 -+ %block.captured = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 3 -+ %5 = load i32 addrspace(1)*, i32 addrspace(1)** %a.addr, align 4 -+ store i32 addrspace(1)* %5, i32 addrspace(1)** %block.captured, align 4 -+ %block.captured1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 4 -+ %6 = load i32, i32* %i.addr, align 4 -+ store i32 %6, i32* %block.captured1, align 4 -+ %block.captured2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 5 -+ %7 = load i8, i8* %c0.addr, align 1 -+ store i8 %7, i8* %block.captured2, align 4 -+ %8 = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block to %struct.__opencl_block_literal_generic* -+ %9 = addrspacecast %struct.__opencl_block_literal_generic* %8 to i8 addrspace(4)* - - ; CHECK-SPIRV: PtrCastToGeneric [[Int8PtrGenTy]] [[BlockLit1:[0-9]+]] - ; CHECK-SPIRV: EnqueueKernel [[Int32Ty]] {{[0-9]+}} {{[0-9]+}} {{[0-9]+}} {{[0-9]+}} - ; [[ConstInt0]] [[EventNull]] [[EventNull]] - ; [[BlockKer1]] [[BlockLit1]] [[ConstInt17]] [[ConstInt8]] - --; CHECK-LLVM: [[Block2:%[0-9]+]] = addrspacecast [[BlockTy2]]* %block to i8 addrspace(4)* -+; CHECK-LLVM: [[Block2:%[0-9]+]] = bitcast [[BlockTy2]]* %block to %struct.__opencl_block_literal_generic* -+; CHECK-LLVM: [[Block2Ptr:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[Block2]] to i8 addrspace(4)* - ; CHECK-LLVM: [[BlockInv2:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8 addrspace(4)* --; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv2]], i8 addrspace(4)* [[Block2]]) -- -- %5 = call i32 @__enqueue_kernel_basic(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* byval %ndrange, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %4) -- %6 = addrspacecast %opencl.clk_event_t** %event_wait_list to %opencl.clk_event_t* addrspace(4)* -- %7 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)* -- %block.size5 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 0 -- store i32 20, i32* %block.size5, align 4 -- %block.align6 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 1 -+; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv2]], i8 addrspace(4)* [[Block2Ptr]]) -+ -+ %10 = call i32 @__enqueue_kernel_basic(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* byval %tmp, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %9) -+ %11 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4 -+ %12 = load i32, i32* %flags, align 4 -+ %13 = bitcast %struct.ndrange_t* %tmp4 to i8* -+ %14 = bitcast %struct.ndrange_t* %ndrange to i8* -+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %13, i8* align 4 %14, i32 4, i1 false) -+ %15 = addrspacecast %opencl.clk_event_t** %event_wait_list to %opencl.clk_event_t* addrspace(4)* -+ %16 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)* -+ %block.size5 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 0 -+ store i32 24, i32* %block.size5, align 4 -+ %block.align6 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 1 - store i32 4, i32* %block.align6, align 4 -- %block.captured7 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 2 -- store i32 addrspace(1)* %a, i32 addrspace(1)** %block.captured7, align 4 -- %block.captured8 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 3 -- store i32 %i, i32* %block.captured8, align 4 -- %block.captured9 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 4 -- store i32 addrspace(1)* %b, i32 addrspace(1)** %block.captured9, align 4 -- %8 = bitcast <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3 to void ()* -- %9 = addrspacecast void ()* %8 to i8 addrspace(4)* -+ %block.invoke7 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 2 -+ store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2 to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke7, align 4 -+ %block.captured8 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 3 -+ %17 = load i32 addrspace(1)*, i32 addrspace(1)** %a.addr, align 4 -+ store i32 addrspace(1)* %17, i32 addrspace(1)** %block.captured8, align 4 -+ %block.captured9 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 4 -+ %18 = load i32, i32* %i.addr, align 4 -+ store i32 %18, i32* %block.captured9, align 4 -+ %block.captured10 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 5 -+ %19 = load i32 addrspace(1)*, i32 addrspace(1)** %b.addr, align 4 -+ store i32 addrspace(1)* %19, i32 addrspace(1)** %block.captured10, align 4 -+ %20 = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3 to %struct.__opencl_block_literal_generic* -+ %21 = addrspacecast %struct.__opencl_block_literal_generic* %20 to i8 addrspace(4)* -+ - - ; CHECK-SPIRV: PtrCastToGeneric [[EventPtrTy]] [[Event1:[0-9]+]] - ; CHECK-SPIRV: PtrCastToGeneric [[EventPtrTy]] [[Event2:[0-9]+]] -@@ -158,16 +193,24 @@ entry: - ; [[ConstInt2]] [[Event1]] [[Event2]] - ; [[BlockKer2]] [[BlockLit2]] [[ConstInt20]] [[ConstInt8]] - --; CHECK-LLVM: [[Block3:%[0-9]+]] = addrspacecast [[BlockTy3]]* %block3 to i8 addrspace(4)* -+; CHECK-LLVM: [[Block3:%[0-9]+]] = bitcast [[BlockTy3]]* %block3 to %struct.__opencl_block_literal_generic* -+; CHECK-LLVM: [[Block3Ptr:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[Block3]] to i8 addrspace(4) - ; CHECK-LLVM: [[BlockInv3:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8 addrspace(4)* --; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv3]], i8 addrspace(4)* [[Block3]]) -- -- %10 = call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i32 2, %opencl.clk_event_t* addrspace(4)* %6, %opencl.clk_event_t* addrspace(4)* %7, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %9) -- %11 = alloca [1 x i32] -- %12 = getelementptr [1 x i32], [1 x i32]* %11, i32 0, i32 0 -- %13 = load i8, i8* %c, align 1 -- %14 = zext i8 %13 to i32 -- store i32 %14, i32* %12, align 4 -+; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv3]], i8 addrspace(4)* [[Block3Ptr]]) -+ -+ %22 = call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* %11, i32 %12, %struct.ndrange_t* %tmp4, i32 2, %opencl.clk_event_t* addrspace(4)* %15, %opencl.clk_event_t* addrspace(4)* %16, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %21) -+ %23 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4 -+ %24 = load i32, i32* %flags, align 4 -+ %25 = bitcast %struct.ndrange_t* %tmp11 to i8* -+ %26 = bitcast %struct.ndrange_t* %ndrange to i8* -+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %25, i8* align 4 %26, i32 4, i1 false) -+ %arraydecay = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0 -+ %27 = addrspacecast %opencl.clk_event_t** %arraydecay to %opencl.clk_event_t* addrspace(4)* -+ %28 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)* -+ %29 = getelementptr [1 x i32], [1 x i32]* %block_sizes, i32 0, i32 0 -+ %30 = load i8, i8* %c, align 1 -+ %31 = zext i8 %30 to i32 -+ store i32 %31, i32* %29, align 4 - - ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf31:[0-9]+]] - ; CHECK-SPIRV: Bitcast {{[0-9]+}} [[BlockLit3Tmp:[0-9]+]] [[BlockGlb1:[0-9]+]] -@@ -182,14 +225,18 @@ entry: - ; CHECK-LLVM: [[BlockInv0:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8 addrspace(4)* - ; CHECK-LLVM: call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv0]], i8 addrspace(4)* [[Block0]], i32 1, i32* {{.*}}) - -- %15 = call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i32 2, %opencl.clk_event_t* addrspace(4)* %6, %opencl.clk_event_t* addrspace(4)* %7, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %12) -- %16 = alloca [3 x i32] -- %17 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 0 -- store i32 1, i32* %17, align 4 -- %18 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 1 -- store i32 2, i32* %18, align 4 -- %19 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 2 -- store i32 4, i32* %19, align 4 -+ %32 = call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* %23, i32 %24, %struct.ndrange_t* %tmp11, i32 2, %opencl.clk_event_t* addrspace(4)* %27, %opencl.clk_event_t* addrspace(4)* %28, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %29) -+ %33 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4 -+ %34 = load i32, i32* %flags, align 4 -+ %35 = bitcast %struct.ndrange_t* %tmp12 to i8* -+ %36 = bitcast %struct.ndrange_t* %ndrange to i8* -+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %35, i8* align 4 %36, i32 4, i1 false) -+ %37 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 0 -+ store i32 1, i32* %37, align 4 -+ %38 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 1 -+ store i32 2, i32* %38, align 4 -+ %39 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 2 -+ store i32 4, i32* %39, align 4 - - ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf41:[0-9]+]] - ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf42:[0-9]+]] -@@ -206,24 +253,27 @@ entry: - ; CHECK-LLVM: [[BlockInv1:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8 addrspace(4)* - ; CHECK-LLVM: call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv1]], i8 addrspace(4)* [[Block1]], i32 3, i32* {{.*}}) - -- %20 = call i32 @__enqueue_kernel_varargs(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global.1 to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, i32* %17) -+ %40 = call i32 @__enqueue_kernel_varargs(%opencl.queue_t* %33, i32 %34, %struct.ndrange_t* %tmp12, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global.1 to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, i32* %37) - ret void - } - -+; Function Attrs: argmemonly nounwind -+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) #1 -+ - ; Function Attrs: convergent noinline nounwind optnone - define internal spir_func void @__device_side_enqueue_block_invoke(i8 addrspace(4)* %.block_descriptor) #2 { - entry: - %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 -- %block.addr = alloca <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)*, align 4 -+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)*, align 4 - store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 -- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* -- store <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)** %block.addr, align 4 -- %block.capture.addr = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 4 -+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* -+ store <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)** %block.addr, align 4 -+ %block.capture.addr = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 5 - %0 = load i8, i8 addrspace(4)* %block.capture.addr, align 4 - %conv = sext i8 %0 to i32 -- %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 2 -+ %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 3 - %1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr1, align 4 -- %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 3 -+ %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 4 - %2 = load i32, i32 addrspace(4)* %block.capture.addr2, align 4 - %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 %2 - store i32 %conv, i32 addrspace(1)* %arrayidx, align 4 -@@ -243,19 +293,19 @@ declare i32 @__enqueue_kernel_basic(%opencl.queue_t*, i32, %struct.ndrange_t*, i - define internal spir_func void @__device_side_enqueue_block_invoke_2(i8 addrspace(4)* %.block_descriptor) #2 { - entry: - %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 -- %block.addr = alloca <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*, align 4 -+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*, align 4 - store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 -- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* -- store <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)** %block.addr, align 4 -- %block.capture.addr = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4 -+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* -+ store <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)** %block.addr, align 4 -+ %block.capture.addr = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 5 - %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr, align 4 -- %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3 -+ %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4 - %1 = load i32, i32 addrspace(4)* %block.capture.addr1, align 4 - %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 %1 - %2 = load i32, i32 addrspace(1)* %arrayidx, align 4 -- %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 2 -+ %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3 - %3 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr2, align 4 -- %block.capture.addr3 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3 -+ %block.capture.addr3 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4 - %4 = load i32, i32 addrspace(4)* %block.capture.addr3, align 4 - %arrayidx4 = getelementptr inbounds i32, i32 addrspace(1)* %3, i32 %4 - store i32 %2, i32 addrspace(1)* %arrayidx4, align 4 -@@ -276,11 +326,11 @@ define internal spir_func void @__device_side_enqueue_block_invoke_3(i8 addrspac - entry: - %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 - %p.addr = alloca i8 addrspace(3)*, align 4 -- %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4 -+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4 - store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 -- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)* -+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* - store i8 addrspace(3)* %p, i8 addrspace(3)** %p.addr, align 4 -- store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4 -+ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4 - ret void - } - -@@ -300,13 +350,13 @@ entry: - %p1.addr = alloca i8 addrspace(3)*, align 4 - %p2.addr = alloca i8 addrspace(3)*, align 4 - %p3.addr = alloca i8 addrspace(3)*, align 4 -- %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4 -+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4 - store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 -- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)* -+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* - store i8 addrspace(3)* %p1, i8 addrspace(3)** %p1.addr, align 4 - store i8 addrspace(3)* %p2, i8 addrspace(3)** %p2.addr, align 4 - store i8 addrspace(3)* %p3, i8 addrspace(3)** %p3.addr, align 4 -- store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4 -+ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4 - ret void - } - -@@ -329,27 +379,20 @@ declare i32 @__enqueue_kernel_varargs(%opencl.queue_t*, i32, %struct.ndrange_t*, - ; CHECK-LLVM-DAG: define spir_kernel void @__device_side_enqueue_block_invoke_3_kernel(i8 addrspace(4)*, i8 addrspace(3)*) - ; CHECK-LLVM-DAG: define spir_kernel void @__device_side_enqueue_block_invoke_4_kernel(i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*) - --attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } -+attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } - attributes #1 = { argmemonly nounwind } --attributes #2 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -+attributes #2 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } - attributes #3 = { nounwind } - - !llvm.module.flags = !{!0} --!opencl.enable.FP_CONTRACT = !{} - !opencl.ocl.version = !{!1} - !opencl.spir.version = !{!1} --!opencl.used.extensions = !{!2} --!opencl.used.optional.core.features = !{!2} --!opencl.compiler.options = !{!2} --!llvm.ident = !{!3} -+!llvm.ident = !{!2} - - !0 = !{i32 1, !"wchar_size", i32 4} - !1 = !{i32 2, i32 0} --!2 = !{} --!3 = !{!"clang version 7.0.0"} --!4 = !{i32 1, i32 1, i32 0, i32 0} --!5 = !{!"none", !"none", !"none", !"none"} --!6 = !{!"int*", !"int*", !"int", !"char"} --!7 = !{!"", !"", !"", !""} --!8 = !{i1 false, i1 false, i1 false, i1 false} --!9 = !{i32 0, i32 0, i32 0, i32 0} -+!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"} -+!3 = !{i32 1, i32 1, i32 0, i32 0} -+!4 = !{!"none", !"none", !"none", !"none"} -+!5 = !{!"int*", !"int*", !"int", !"char"} -+!6 = !{!"", !"", !"", !""} --- -1.8.3.1 - Modified: PKGBUILD =================================================================== --- PKGBUILD 2019-08-16 23:33:02 UTC (rev 499891) +++ PKGBUILD 2019-08-17 00:04:46 UTC (rev 499892) @@ -2,7 +2,7 @@ _srcname=SPIRV-LLVM-Translator pkgname=${_srcname,,} -_build=1 +_build=2 pkgver=8.0.1.${_build} pkgrel=1 pkgdesc="Tool and a library for bi-directional translation between SPIR-V and LLVM IR" @@ -10,15 +10,19 @@ url="https://github.com/KhronosGroup/SPIRV-LLVM-Translator/" license=(custom) makedepends=(cmake llvm) +_commit=94af090661d7c953c516c97a25ed053c744a0737 source=("${url}/archive/v${pkgver%.*}-${_build}/${pkgname}-${pkgver}.tar.gz" - 0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch) -sha256sums=('3947761445e93a7049f75d133ea6ea4aa620feb9b09f0bfd006ec2ffcb0b00ab' - 'd3f477a6e7f4ab5ae2af8e50ec8cfc36f184392c119160f281bb66da758f3e25') + "${pkgname}-0001-Update-LowerOpenCL-pass.patch"::"https://raw.githubusercontent.com/intel/opencl-clang/${_commit}/patches/spirv/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch" + "${pkgname}-0002-Remove-extra-semicolon.patch"::"https://raw.githubusercontent.com/intel/opencl-clang/${_commit}/patches/spirv/0002-Remove-extra-semicolon.patch") +sha256sums=('f145292872419cc101eee89ce1904be06ead2f5761816a22cb13788e5d394e19' + '42ac9214fceec9b207201488c9eac899138cdbbd02e3fc56fe815e9bc3455046' + 'af9c341d4a2554e421965b40b0c7f87f4fc60b0522bbc6f0d6f2907d1ddd7ddb') prepare() { mkdir -p build cd ${_srcname}-${pkgver%.*}-${_build} - patch -Np1 -i ../0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch + patch -Np1 -i ../${pkgname}-0001-Update-LowerOpenCL-pass.patch + patch -Np1 -i ../${pkgname}-0002-Remove-extra-semicolon.patch } build() {