[arch-commits] Commit in spirv-llvm-translator/trunk (2 files)

Daniel Bermond via arch-commits Fri, 16 Aug 2019 17:05:33 -0700

    Date: Saturday, August 17, 2019 @ 00:04:46
  Author: dbermond
Revision: 499892


upgpkg: spirv-llvm-translator 8.0.1.2-1

Updated to version 8.0.1.2

Modified:
  spirv-llvm-translator/trunk/PKGBUILD
Deleted:
  
spirv-llvm-translator/trunk/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch

-----------------------------------------------------------------+
 0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch | 1116 
----------
 PKGBUILD                                                        |   14 
 2 files changed, 9 insertions(+), 1121 deletions(-)

Deleted: 0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch
===================================================================
--- 0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch     
2019-08-16 23:33:02 UTC (rev 499891)
+++ 0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch     
2019-08-17 00:04:46 UTC (rev 499892)
@@ -1,1116 +0,0 @@
-From 9ce0fe02fd6cda5fb29fbb0d5037a1798a810b8a Mon Sep 17 00:00:00 2001
-From: Alexey Sotkin <alexey.sot...@intel.com>
-Date: Thu, 21 Feb 2019 17:14:36 +0300
-Subject: [PATCH 1/3] Update LowerOpenCL pass to handle new blocks
- represntation in LLVM IR
-
----
- lib/SPIRV/SPIRVLowerOCLBlocks.cpp         | 413 ++++++++----------------------
- test/global_block.ll                      |  71 ++---
- test/literal-struct.ll                    |  31 ++-
- test/transcoding/block_w_struct_return.ll |  47 ++--
- test/transcoding/enqueue_kernel.ll        | 237 ++++++++++-------
- 5 files changed, 317 insertions(+), 482 deletions(-)
-
-diff --git a/lib/SPIRV/SPIRVLowerOCLBlocks.cpp 
b/lib/SPIRV/SPIRVLowerOCLBlocks.cpp
-index 50e1838..b42a4ec 100644
---- a/lib/SPIRV/SPIRVLowerOCLBlocks.cpp
-+++ b/lib/SPIRV/SPIRVLowerOCLBlocks.cpp
-@@ -1,303 +1,110 @@
--//===- SPIRVLowerOCLBlocks.cpp - OCL Utilities 
----------------------------===//
--//
--//                     The LLVM/SPIRV Translator
--//
--// This file is distributed under the University of Illinois Open Source
--// License. See LICENSE.TXT for details.
--//
--// Copyright (c) 2018 Intel Corporation. All rights reserved.
--//
--// Permission is hereby granted, free of charge, to any person obtaining a
--// copy of this software and associated documentation files (the "Software"),
--// to deal with the Software without restriction, including without limitation
--// the rights to use, copy, modify, merge, publish, distribute, sublicense,
--// and/or sell copies of the Software, and to permit persons to whom the
--// Software is furnished to do so, subject to the following conditions:
--//
--// Redistributions of source code must retain the above copyright notice,
--// this list of conditions and the following disclaimers.
--// Redistributions in binary form must reproduce the above copyright notice,
--// this list of conditions and the following disclaimers in the documentation
--// and/or other materials provided with the distribution.
--// Neither the names of Intel Corporation, nor the names of its
--// contributors may be used to endorse or promote products derived from this
--// Software without specific prior written permission.
--// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
--// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
--// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
--// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
--// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
--// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
--// THE SOFTWARE.
--//
--//===----------------------------------------------------------------------===//
--//
--// SPIR-V specification doesn't allow function pointers, so SPIR-V translator
--// is designed to fail if a value with function type (except calls) is 
occured.
--// Currently there is only two cases, when function pointers are generating in
--// LLVM IR in OpenCL - block calls and device side enqueue built-in calls.
--//
--// In both cases values with function type used as intermediate representation
--// for block literal structure.
--//
--// This pass is designed to find such cases and simplify them to avoid any
--// function pointer types occurrences in LLVM IR in 4 steps.
--//
--// 1. Find all function pointer allocas, like
--//      %block = alloca void () *
--//
--//    Then find a single store to that alloca:
--//      %blockLit = alloca <{ i32, i32, ...}>, align 4
--//      %0 = bitcast <{ i32, i32, ... }>* %blockLit to void ()*
--//    > store void ()* %0, void ()** %block, align 4
--//
--//    And replace the alloca users by new instructions which used stored value
--//    %blockLit itself instead of function pointer alloca %block.
--//
--// 2. Find consecutive casts from block literal type to i8 addrspace(4)*
--//    used function pointers as an intermediate type:
--//      %0 = bitcast <{ i32, i32 }> %block to void() *
--//      %1 = addrspacecast void() * %0 to i8 addrspace(4)*
--//    And simplify them:
--//      %2 = addrspacecast <{ i32, i32 }> %block to i8 addrspace(4)*
--//
--// 3. Find all unused instructions with function pointer type occured after
--//    pp.1-2 and remove them.
--//
--// 4. Find unused globals with function pointer type, like
--//    @block = constant void ()*
--//             bitcast ({ i32, i32 }* @__block_literal_global to void ()*
--//
--//    And remove them.
--//
--//===----------------------------------------------------------------------===//
--#define DEBUG_TYPE "spv-lower-ocl-blocks"
--
--#include "OCLUtil.h"
--#include "SPIRVInternal.h"
--
--#include "llvm/ADT/SetVector.h"
--#include "llvm/Analysis/ValueTracking.h"
--#include "llvm/IR/GlobalVariable.h"
--#include "llvm/IR/InstIterator.h"
--#include "llvm/IR/Module.h"
--#include "llvm/Pass.h"
--#include "llvm/PassSupport.h"
--#include "llvm/Support/Casting.h"
--
--using namespace llvm;
--
--namespace {
--
--static void
--removeUnusedFunctionPtrInst(Instruction *I,
--                            SmallSetVector<Instruction *, 16> &FuncPtrInsts) {
--  for (unsigned OpIdx = 0, Ops = I->getNumOperands(); OpIdx != Ops; ++OpIdx) {
--    Instruction *OpI = dyn_cast<Instruction>(I->getOperand(OpIdx));
--    I->setOperand(OpIdx, nullptr);
--    if (OpI && OpI != I && OpI->user_empty())
--      FuncPtrInsts.insert(OpI);
--  }
--  I->eraseFromParent();
--}
--
--static bool isFuncPtrAlloca(const AllocaInst *AI) {
--  auto *ET = dyn_cast<PointerType>(AI->getAllocatedType());
--  return ET && ET->getElementType()->isFunctionTy();
--}
--
--static bool hasFuncPtrType(const Value *V) {
--  auto *PT = dyn_cast<PointerType>(V->getType());
--  return PT && PT->getElementType()->isFunctionTy();
--}
--
--static bool isFuncPtrInst(const Instruction *I) {
--  if (auto *AI = dyn_cast<AllocaInst>(I))
--    return isFuncPtrAlloca(AI);
--
--  for (auto &Op : I->operands()) {
--    if (auto *AI = dyn_cast<AllocaInst>(Op))
--      return isFuncPtrAlloca(AI);
--
--    auto *OpI = dyn_cast<Instruction>(&Op);
--    if (OpI && OpI != I && hasFuncPtrType(OpI))
--      return true;
--  }
--  return false;
--}
--
--static StoreInst *findSingleStore(AllocaInst *AI) {
--  StoreInst *Store = nullptr;
--  for (auto *U : AI->users()) {
--    if (!isa<StoreInst>(U))
--      continue; // not a store
--    if (Store)
--      return nullptr; // there are more than one stores
--    Store = dyn_cast<StoreInst>(U);
--  }
--  return Store;
--}
--
--static void fixFunctionPtrAllocaUsers(AllocaInst *AI) {
--  // Find and remove a single store to alloca
--  auto *SingleStore = findSingleStore(AI);
--  assert(SingleStore && "More than one store to the function pointer alloca");
--  auto *StoredVal = SingleStore->getValueOperand();
--  SingleStore->eraseFromParent();
--
--  // Find loads from the alloca and replace thier users
--  for (auto *U : AI->users()) {
--    auto *LI = dyn_cast<LoadInst>(U);
--    if (!LI)
--      continue;
--
--    for (auto *U : LI->users()) {
--      auto *UInst = cast<Instruction>(U);
--      auto *Cast = CastInst::CreatePointerBitCastOrAddrSpaceCast(
--          StoredVal, UInst->getType(), "", UInst);
--      UInst->replaceAllUsesWith(Cast);
--    }
--  }
--}
--
--static int getBlockLiteralIdx(const Function &F) {
--  StringRef FName = F.getName();
--  if (isEnqueueKernelBI(FName))
--    return FName.contains("events") ? 7 : 4;
--  if (isKernelQueryBI(FName))
--    return FName.contains("for_ndrange") ? 2 : 1;
--  if (FName.startswith("__") && FName.contains("_block_invoke"))
--    return F.hasStructRetAttr() ? 1 : 0;
--
--  return -1; // No block literal argument
--}
--
--static bool hasBlockLiteralArg(const Function &F) {
--  return getBlockLiteralIdx(F) != -1;
--}
--
--static bool simplifyFunctionPtrCasts(Function &F) {
--  bool Changed = false;
--  int BlockLiteralIdx = getBlockLiteralIdx(F);
--  for (auto *U : F.users()) {
--    auto *Call = dyn_cast<CallInst>(U);
--    if (!Call)
--      continue;
--    if (Call->getFunction()->getName() == F.getName().str() + "_kernel")
--      continue; // Skip block invoke function calls inside block invoke 
kernels
--
--    const DataLayout &DL = F.getParent()->getDataLayout();
--    auto *BlockLiteral = Call->getOperand(BlockLiteralIdx);
--    auto *BlockLiteralVal = GetUnderlyingObject(BlockLiteral, DL);
--    if (isa<GlobalVariable>(BlockLiteralVal))
--      continue; // nothing to do with globals
--
--    auto *BlockLiteralAlloca = cast<AllocaInst>(BlockLiteralVal);
--    assert(!BlockLiteralAlloca->getAllocatedType()->isFunctionTy() &&
--           "Function type shouldn't be there");
--
--    auto *NewBlockLiteral = CastInst::CreatePointerBitCastOrAddrSpaceCast(
--        BlockLiteralAlloca, BlockLiteral->getType(), "", Call);
--    BlockLiteral->replaceAllUsesWith(NewBlockLiteral);
--    Changed |= true;
--  }
--  return Changed;
--}
--
--static void
--findFunctionPtrAllocas(Module &M,
--                       SmallVectorImpl<AllocaInst *> &FuncPtrAllocas) {
--  for (auto &F : M) {
--    if (F.isDeclaration())
--      continue;
--    for (auto &I : instructions(F)) {
--      auto *AI = dyn_cast<AllocaInst>(&I);
--      if (!AI || !isFuncPtrAlloca(AI))
--        continue;
--      FuncPtrAllocas.push_back(AI);
--    }
--  }
--}
--
--static void
--findUnusedFunctionPtrInsts(Module &M,
--                           SmallSetVector<Instruction *, 16> &FuncPtrInsts) {
--  for (auto &F : M) {
--    if (F.isDeclaration())
--      continue;
--    for (auto &I : instructions(F))
--      if (I.user_empty() && isFuncPtrInst(&I))
--        FuncPtrInsts.insert(&I);
--  }
--}
--
--static void
--findUnusedFunctionPtrGlbs(Module &M,
--                          SmallVectorImpl<GlobalVariable *> &FuncPtrGlbs) {
--  for (auto &GV : M.globals()) {
--    if (!GV.user_empty())
--      continue;
--    auto *GVType = dyn_cast<PointerType>(GV.getType()->getElementType());
--    if (GVType && GVType->getElementType()->isFunctionTy())
--      FuncPtrGlbs.push_back(&GV);
--  }
--}
--
--class SPIRVLowerOCLBlocks : public ModulePass {
--
--public:
--  SPIRVLowerOCLBlocks() : ModulePass(ID) {}
--
--  bool runOnModule(Module &M) {
--    bool Changed = false;
--
--    // 1. Find function pointer allocas and fix their users
--    SmallVector<AllocaInst *, 16> FuncPtrAllocas;
--    findFunctionPtrAllocas(M, FuncPtrAllocas);
--
--    Changed |= !FuncPtrAllocas.empty();
--    for (auto *AI : FuncPtrAllocas)
--      fixFunctionPtrAllocaUsers(AI);
--
--    // 2. Simplify consecutive casts which use function pointer types
--    for (auto &F : M)
--      if (hasBlockLiteralArg(F))
--        Changed |= simplifyFunctionPtrCasts(F);
--
--    // 3. Cleanup unused instructions with function pointer type
--    // which are occured after pp. 1-2
--    SmallSetVector<Instruction *, 16> FuncPtrInsts;
--    findUnusedFunctionPtrInsts(M, FuncPtrInsts);
--
--    Changed |= !FuncPtrInsts.empty();
--    while (!FuncPtrInsts.empty()) {
--      Instruction *I = FuncPtrInsts.pop_back_val();
--      removeUnusedFunctionPtrInst(I, FuncPtrInsts);
--    }
--
--    // 4. Find and remove unused global variables with function pointer type
--    SmallVector<GlobalVariable *, 16> FuncPtrGlbs;
--    findUnusedFunctionPtrGlbs(M, FuncPtrGlbs);
--
--    Changed |= !FuncPtrGlbs.empty();
--    for (auto *GV : FuncPtrGlbs)
--      GV->eraseFromParent();
--
--    return Changed;
--  }
--
--  static char ID;
--}; // class SPIRVLowerOCLBlocks
--
--char SPIRVLowerOCLBlocks::ID = 0;
--
--} // namespace
--
--INITIALIZE_PASS(
--    SPIRVLowerOCLBlocks, "spv-lower-ocl-blocks",
--    "Remove function pointers occured in case of using OpenCL blocks", false,
--    false)
--
--llvm::ModulePass *llvm::createSPIRVLowerOCLBlocks() {
--  return new SPIRVLowerOCLBlocks();
--}
-+//===- SPIRVLowerOCLBlocks.cpp - OCL Utilities 
----------------------------===//
-+//
-+//                     The LLVM/SPIRV Translator
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+// Copyright (c) 2018 Intel Corporation. All rights reserved.
-+//
-+// Permission is hereby granted, free of charge, to any person obtaining a
-+// copy of this software and associated documentation files (the "Software"),
-+// to deal with the Software without restriction, including without limitation
-+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
-+// and/or sell copies of the Software, and to permit persons to whom the
-+// Software is furnished to do so, subject to the following conditions:
-+//
-+// Redistributions of source code must retain the above copyright notice,
-+// this list of conditions and the following disclaimers.
-+// Redistributions in binary form must reproduce the above copyright notice,
-+// this list of conditions and the following disclaimers in the documentation
-+// and/or other materials provided with the distribution.
-+// Neither the names of Intel Corporation, nor the names of its
-+// contributors may be used to endorse or promote products derived from this
-+// Software without specific prior written permission.
-+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-+// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
-+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
-+// THE SOFTWARE.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+// SPIR-V specification doesn't allow function pointers, so SPIR-V translator
-+// is designed to fail if a value with function type (except calls) is 
occured.
-+// Currently there is only two cases, when function pointers are generating in
-+// LLVM IR in OpenCL - block calls and device side enqueue built-in calls.
-+//
-+// In both cases values with function type used as intermediate representation
-+// for block literal structure.
-+//
-+// In LLVM IR produced by clang, blocks are represented with the following
-+// structure:
-+// %struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* 
}
-+// Pointers to block invoke functions are stored in the third field. Clang
-+// replaces inderect function calls in all cases except if block is passed as 
a
-+// function argument. Note that it is somewhat unclear if the OpenCL C spec
-+// should allow passing blocks as function argumernts. This pass is not 
supposed
-+// to work correctly with such functions.
-+// Clang though has to store function pointers to this structure. Purpose of
-+// this pass is to replace store of function pointers(not allowed in SPIR-V)
-+// with null pointers.
-+//
-+//===----------------------------------------------------------------------===//
-+#define DEBUG_TYPE "spv-lower-ocl-blocks"
-+
-+#include "SPIRVInternal.h"
-+
-+#include "llvm/IR/Module.h"
-+#include "llvm/Pass.h"
-+#include "llvm/Support/Regex.h"
-+
-+using namespace llvm;
-+
-+namespace {
-+
-+static bool isBlockInvoke(Function &F) {
-+  static Regex BlockInvokeRegex("_block_invoke_?[0-9]*$");
-+  return BlockInvokeRegex.match(F.getName());
-+}
-+
-+class SPIRVLowerOCLBlocks : public ModulePass {
-+
-+public:
-+  SPIRVLowerOCLBlocks() : ModulePass(ID) {}
-+
-+  bool runOnModule(Module &M) {
-+    bool Changed = false;
-+    for (Function &F : M) {
-+      if (!isBlockInvoke(F))
-+        continue;
-+      for (User *U : F.users()) {
-+        if (!isa<Constant>(U))
-+          continue;
-+        Constant *Null = Constant::getNullValue(U->getType());
-+        if (U != Null) {
-+          U->replaceAllUsesWith(Null);
-+          Changed = true;
-+        }
-+      }
-+    }
-+    return Changed;
-+  }
-+
-+  static char ID;
-+};
-+
-+char SPIRVLowerOCLBlocks::ID = 0;
-+
-+} // namespace
-+
-+INITIALIZE_PASS(
-+    SPIRVLowerOCLBlocks, "spv-lower-ocl-blocks",
-+    "Remove function pointers occured in case of using OpenCL blocks", false,
-+    false)
-+
-+llvm::ModulePass *llvm::createSPIRVLowerOCLBlocks() {
-+  return new SPIRVLowerOCLBlocks();
-+}
-diff --git a/test/global_block.ll b/test/global_block.ll
-index a9267d8..efb4cf3 100644
---- a/test/global_block.ll
-+++ b/test/global_block.ll
-@@ -16,7 +16,7 @@
- ; RUN: llvm-spirv %t.bc -o %t.spv
- ; RUN: llvm-spirv -r %t.spv -o - | llvm-dis | FileCheck %s 
--check-prefix=CHECK-LLVM
- 
--target datalayout = 
"e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
-+target datalayout = 
"e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
- target triple = "spir-unknown-unknown"
- 
- ; CHECK-SPIRV: Name [[block_invoke:[0-9]+]] "_block_invoke"
-@@ -26,71 +26,56 @@ target triple = "spir-unknown-unknown"
- ; CHECK-SPIRV: TypePointer [[int8Ptr:[0-9]+]] 8 [[int8]]
- ; CHECK-SPIRV: TypeFunction [[block_invoke_type:[0-9]+]] [[int]] [[int8Ptr]] 
[[int]]
- 
--;; This variable is not needed in SPIRV
--; CHECK-SPIRV-NOT: Name {{[0-9]+}} block_kernel.b1
--; CHECK-LLVM-NOT: @block_kernel.b1
--@block_kernel.b1 = internal addrspace(2) constant i32 (i32) addrspace(4)* 
addrspacecast (i32 (i32) addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* 
@__block_literal_global to i32 (i32) addrspace(1)*) to i32 (i32) 
addrspace(4)*), align 8
-+%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
- 
--@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 
8, i32 4 }, align 4
-+@block_kernel.b1 = internal addrspace(2) constant 
%struct.__opencl_block_literal_generic addrspace(4)* addrspacecast 
(%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 
addrspace(4)* } addrspace(1)* @__block_literal_global to 
%struct.__opencl_block_literal_generic addrspace(1)*) to 
%struct.__opencl_block_literal_generic addrspace(4)*), align 4
-+@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 
addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast 
(i32 (i8 addrspace(4)*, i32)* @_block_invoke to i8*) to i8 addrspace(4)*) }, 
align 4
- 
--; Function Attrs: convergent nounwind
--define spir_kernel void @block_kernel(i32 addrspace(1)* %res) #0 
!kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 
!kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
-+; Function Attrs: convergent noinline nounwind optnone
-+define spir_kernel void @block_kernel(i32 addrspace(1)* %res) #0 
!kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 
!kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
- entry:
--  %res.addr = alloca i32 addrspace(1)*, align 8
--  store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 8, !tbaa 
!10
--
-+  %res.addr = alloca i32 addrspace(1)*, align 4
-+  store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 4
- ; CHECK-SPIRV: FunctionCall [[int]] {{[0-9]+}} [[block_invoke]] {{[0-9]+}} 
[[five]]
- ; CHECK-LLVM: %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* 
{{.*}}, i32 5)
--  %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* addrspacecast 
(i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global 
to i8 addrspace(1)*) to i8 addrspace(4)*), i32 5) #2
--
--  %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 8, !tbaa 
!10
--  store i32 %call, i32 addrspace(1)* %0, align 4, !tbaa !14
-+  %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* addrspacecast 
(i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* 
@__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 5) #2
-+  %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 4
-+  store i32 %call, i32 addrspace(1)* %0, align 4
-   ret void
- }
- 
--; CHECK-SPIRV: 5 Function [[int]] [[block_invoke]] 0 [[block_invoke_type]]
-+; CHECK-SPIRV: 5 Function [[int]] [[block_invoke]] 2 [[block_invoke_type]]
- ; CHECK-SPIRV-NEXT: 3 FunctionParameter [[int8Ptr]] {{[0-9]+}}
- ; CHECK-SPIRV-NEXT: 3 FunctionParameter [[int]] {{[0-9]+}}
- ; CHECK-LLVM: define internal spir_func i32 @_block_invoke(i8 addrspace(4)* 
{{.*}}, i32 %{{.*}})
--; Function Attrs: convergent nounwind
-+; Function Attrs: convergent noinline nounwind optnone
- define internal spir_func i32 @_block_invoke(i8 addrspace(4)* 
%.block_descriptor, i32 %i) #1 {
- entry:
--  %.block_descriptor.addr = alloca i8 addrspace(4)*, align 8
-+  %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
-   %i.addr = alloca i32, align 4
--  store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** 
%.block_descriptor.addr, align 8
--  %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> 
addrspace(4)*
--  store i32 %i, i32* %i.addr, align 4, !tbaa !14
--  %0 = load i32, i32* %i.addr, align 4, !tbaa !14
-+  %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4
-+  store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** 
%.block_descriptor.addr, align 4
-+  %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 
addrspace(4)* }> addrspace(4)*
-+  store i32 %i, i32* %i.addr, align 4
-+  store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, 
i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4
-+  %0 = load i32, i32* %i.addr, align 4
-   %add = add nsw i32 %0, 1
-   ret i32 %add
- }
- 
--attributes #0 = { convergent nounwind 
"correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" 
"less-precise-fpmad"="false" "no-frame-pointer-elim"="false" 
"no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" 
"no-signed-zeros-fp-math"="false" "no-trapping-math"="false" 
"stack-protector-buffer-size"="8" "uniform-work-group-size"="false" 
"unsafe-fp-math"="false" "use-soft-float"="false" }
--attributes #1 = { convergent nounwind 
"correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" 
"less-precise-fpmad"="false" "no-frame-pointer-elim"="false" 
"no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" 
"no-signed-zeros-fp-math"="false" "no-trapping-math"="false" 
"stack-protector-buffer-size"="8" "unsafe-fp-math"="false" 
"use-soft-float"="false" }
-+attributes #0 = { convergent noinline nounwind optnone 
"correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" 
"disable-tail-calls"="false" "less-precise-fpmad"="false" 
"min-legal-vector-width"="0" "no-frame-pointer-elim"="false" 
"no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" 
"no-signed-zeros-fp-math"="false" "no-trapping-math"="false" 
"stack-protector-buffer-size"="8" "uniform-work-group-size"="false" 
"unsafe-fp-math"="false" "use-soft-float"="false" }
-+attributes #1 = { convergent noinline nounwind optnone 
"correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" 
"disable-tail-calls"="false" "less-precise-fpmad"="false" 
"min-legal-vector-width"="0" "no-frame-pointer-elim"="false" 
"no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" 
"no-signed-zeros-fp-math"="false" "no-trapping-math"="false" 
"stack-protector-buffer-size"="8" "unsafe-fp-math"="false" 
"use-soft-float"="false" }
- attributes #2 = { convergent }
- 
- !llvm.module.flags = !{!0}
--!opencl.enable.FP_CONTRACT = !{}
- !opencl.ocl.version = !{!1}
- !opencl.spir.version = !{!1}
--!opencl.used.extensions = !{!2}
--!opencl.used.optional.core.features = !{!2}
--!opencl.compiler.options = !{!2}
--!llvm.ident = !{!3}
-+!llvm.ident = !{!2}
- 
- !0 = !{i32 1, !"wchar_size", i32 4}
- !1 = !{i32 2, i32 0}
--!2 = !{}
--!3 = !{!"clang version 7.0.0"}
--!4 = !{i32 1}
--!5 = !{!"none"}
--!6 = !{!"int*"}
--!7 = !{!""}
--!8 = !{i1 false}
--!9 = !{i32 0}
--!10 = !{!11, !11, i64 0}
--!11 = !{!"any pointer", !12, i64 0}
--!12 = !{!"omnipotent char", !13, i64 0}
--!13 = !{!"Simple C/C++ TBAA"}
--!14 = !{!15, !15, i64 0}
--!15 = !{!"int", !12, i64 0}
-+!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 
04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 
384f64397f6ad95a361b72d62c07d7bac9f24163)"}
-+!3 = !{i32 1}
-+!4 = !{!"none"}
-+!5 = !{!"int*"}
-+!6 = !{!""}
-diff --git a/test/literal-struct.ll b/test/literal-struct.ll
-index c52170a..52a731a 100644
---- a/test/literal-struct.ll
-+++ b/test/literal-struct.ll
-@@ -2,7 +2,7 @@
- ; structs, i.e. structs whose type has no name. Typicaly clang generate such
- ; structs if the kernel contains OpenCL 2.0 blocks. The IR was produced with
- ; the following command:
--; clang -cc1 -triple spir -cl-std=cl2.0 -O0 -finclude-default-header 
literal-struct.cl -emit-llvm -o test/literal-struct.ll
-+; clang -cc1 -triple spir -cl-std=cl2.0 -O0 literal-struct.cl -emit-llvm -o 
test/literal-struct.ll
- 
- ; literal-struct.cl:
- ; void foo()
-@@ -14,25 +14,28 @@
- ; RUN: llvm-as < %s | llvm-spirv -spirv-text -o %t
- ; RUN: FileCheck < %t %s
- 
--; CHECK-DAG: TypeInt [[Int:[0-9]+]] 32 0
--; CHECK-DAG: TypeStruct [[StructType:[0-9]+]] [[Int]] [[Int]] {{$}}
-+; CHECK: TypeInt [[Int:[0-9]+]] 32 0
-+; CHECK: TypeInt [[Int8:[0-9]+]] 8 0
-+; CHECK: TypePointer [[Int8Ptr:[0-9]+]] 8 [[Int8]]
-+; CHECK: TypeStruct [[StructType:[0-9]+]] [[Int]] [[Int]] [[Int8Ptr]]
- 
- target datalayout = 
"e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
- target triple = "spir"
- 
--@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 
8, i32 4 }, align 4
-+%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
-+
-+@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 
addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast 
(void (i8 addrspace(4)*)* @__foo_block_invoke to i8*) to i8 addrspace(4)*) }, 
align 4
- ; CHECK: ConstantComposite [[StructType]]
- 
--; This is artificial case is added to cover ConstantNull instrucitions with 
TypeStruct.
--@__block_literal_global.1 = internal addrspace(1) constant { i32, i32 } 
zeroinitializer, align 4
-+@__block_literal_global.1 = internal addrspace(1) constant { i32, i32, i8 
addrspace(4)* } zeroinitializer, align 4
- ; CHECK: ConstantNull [[StructType]]
- 
- ; Function Attrs: convergent noinline nounwind optnone
- define spir_func void @foo() #0 {
- entry:
--  %myBlock = alloca void () addrspace(4)*, align 4
--  store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ 
i32, i32 } addrspace(1)* @__block_literal_global to void () addrspace(1)*) to 
void () addrspace(4)*), void () addrspace(4)** %myBlock, align 4
--  call spir_func void @__foo_block_invoke(i8 addrspace(4)* addrspacecast (i8 
addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 
addrspace(1)*) to i8 addrspace(4)*)) #1
-+  %myBlock = alloca %struct.__opencl_block_literal_generic addrspace(4)*, 
align 4
-+  store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast 
(%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 
addrspace(4)* } addrspace(1)* @__block_literal_global to 
%struct.__opencl_block_literal_generic addrspace(1)*) to 
%struct.__opencl_block_literal_generic addrspace(4)*), 
%struct.__opencl_block_literal_generic addrspace(4)** %myBlock, align 4
-+  call spir_func void @__foo_block_invoke(i8 addrspace(4)* addrspacecast (i8 
addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* 
@__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*)) #1
-   ret void
- }
- 
-@@ -40,14 +43,14 @@ entry:
- define internal spir_func void @__foo_block_invoke(i8 addrspace(4)* 
%.block_descriptor) #0 {
- entry:
-   %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
--  %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4
-+  %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4
-   store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** 
%.block_descriptor.addr, align 4
--  %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> 
addrspace(4)*
--  store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** 
%block.addr, align 4
-+  %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 
addrspace(4)* }> addrspace(4)*
-+  store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, 
i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4
-   ret void
- }
- 
--attributes #0 = { convergent noinline nounwind optnone 
"correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" 
"disable-tail-calls"="false" "less-precise-fpmad"="false" 
"no-frame-pointer-elim"="false" "no-infs-fp-math"="false" 
"no-jump-tables"="false" "no-nans-fp-math"="false" 
"no-signed-zeros-fp-math"="false" "no-trapping-math"="false" 
"stack-protector-buffer-size"="8" "unsafe-fp-math"="false" 
"use-soft-float"="false" }
-+attributes #0 = { convergent noinline nounwind optnone 
"correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" 
"disable-tail-calls"="false" "less-precise-fpmad"="false" 
"min-legal-vector-width"="0" "no-frame-pointer-elim"="false" 
"no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" 
"no-signed-zeros-fp-math"="false" "no-trapping-math"="false" 
"stack-protector-buffer-size"="8" "unsafe-fp-math"="false" 
"use-soft-float"="false" }
- attributes #1 = { convergent }
- 
- !llvm.module.flags = !{!0}
-@@ -57,4 +60,4 @@ attributes #1 = { convergent }
- 
- !0 = !{i32 1, !"wchar_size", i32 4}
- !1 = !{i32 2, i32 0}
--!2 = !{!"clang version 8.0.0 "}
-+!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 
04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 
384f64397f6ad95a361b72d62c07d7bac9f24163)"}
-diff --git a/test/transcoding/block_w_struct_return.ll 
b/test/transcoding/block_w_struct_return.ll
-index 76e29f0..df89b13 100644
---- a/test/transcoding/block_w_struct_return.ll
-+++ b/test/transcoding/block_w_struct_return.ll
-@@ -16,6 +16,8 @@
- ;   res[tid] = kernelBlock(aa).a - 6;
- ; }
- 
-+; clang -cc1 -triple spir -cl-std=cl2.0 -disable-llvm-passes 
-finclude-default-header block_w_struct_return.cl -emit-llvm -o 
test/transcoding/block_w_struct_return.ll
-+
- ; RUN: llvm-as %s -o %t.bc
- ; RUN: llvm-spirv %t.bc -spirv-text -o %t.spv.txt
- ; RUN: FileCheck < %t.spv.txt %s --check-prefix=CHECK-SPIRV
-@@ -27,12 +29,14 @@
- ; CHECK-SPIRV: Name [[BlockInv:[0-9]+]] "__block_ret_struct_block_invoke"
- 
- ; CHECK-SPIRV: 4 TypeInt [[IntTy:[0-9]+]] 32
-+; CHECK-SPIRV: 4 TypeInt [[Int8Ty:[0-9]+]] 8
-+; CHECK-SPIRV: 4 TypePointer [[Int8Ptr:[0-9]+]] 8 [[Int8Ty]]
- ; CHECK-SPIRV: 3 TypeStruct [[StructTy:[0-9]+]] [[IntTy]]
- ; CHECK-SPIRV: 4 TypePointer [[StructPtrTy:[0-9]+]] 7 [[StructTy]]
- 
- ; CHECK-SPIRV: 4 Variable [[StructPtrTy]] [[StructArg:[0-9]+]] 7
- ; CHECK-SPIRV: 4 Variable [[StructPtrTy]] [[StructRet:[0-9]+]] 7
--; CHECK-SPIRV: 4 PtrCastToGeneric {{[0-9]+}} [[BlockLit:[0-9]+]] {{[0-9]+}}
-+; CHECK-SPIRV: 4 PtrCastToGeneric [[Int8Ptr]] [[BlockLit:[0-9]+]] {{[0-9]+}}
- ; CHECK-SPIRV: 7 FunctionCall {{[0-9]+}} {{[0-9]+}} [[BlockInv]] 
[[StructRet]] [[BlockLit]] [[StructArg]]
- 
- ; CHECK-LLVM: %[[StructA:.*]] = type { i32 }
-@@ -41,20 +45,21 @@
- target datalayout = 
"e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
- target triple = "spir64-unknown-unknown"
- 
-+%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
- %struct.A = type { i32 }
- 
--@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 
8, i32 4 }, align 4
-+@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 
addrspace(4)* } { i32 16, i32 8, i8 addrspace(4)* addrspacecast (i8* bitcast 
(void (%struct.A*, i8 addrspace(4)*, %struct.A*)* 
@__block_ret_struct_block_invoke to i8*) to i8 addrspace(4)*) }, align 8
- 
- ; Function Attrs: convergent noinline nounwind optnone
--define spir_kernel void @block_ret_struct(i32 addrspace(1)* %res) #0 
!kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 
!kernel_arg_base_type !6 !kernel_arg_type_qual !7 !kernel_arg_host_accessible 
!8 !kernel_arg_pipe_depth !9 !kernel_arg_pipe_io !7 !kernel_arg_buffer_location 
!7 {
-+define spir_kernel void @block_ret_struct(i32 addrspace(1)* %res) #0 
!kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 
!kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
- entry:
-   %res.addr = alloca i32 addrspace(1)*, align 8
--  %kernelBlock = alloca void (%struct.A*, %struct.A*) addrspace(4)*, align 8
-+  %kernelBlock = alloca %struct.__opencl_block_literal_generic addrspace(4)*, 
align 8
-   %tid = alloca i64, align 8
-   %aa = alloca %struct.A, align 4
-   %tmp = alloca %struct.A, align 4
-   store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 8
--  store void (%struct.A*, %struct.A*) addrspace(4)* addrspacecast (void 
(%struct.A*, %struct.A*) addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* 
@__block_literal_global to void (%struct.A*, %struct.A*) addrspace(1)*) to void 
(%struct.A*, %struct.A*) addrspace(4)*), void (%struct.A*, %struct.A*) 
addrspace(4)** %kernelBlock, align 8
-+  store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast 
(%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 
addrspace(4)* } addrspace(1)* @__block_literal_global to 
%struct.__opencl_block_literal_generic addrspace(1)*) to 
%struct.__opencl_block_literal_generic addrspace(4)*), 
%struct.__opencl_block_literal_generic addrspace(4)** %kernelBlock, align 8
-   %call = call spir_func i64 @_Z13get_global_idj(i32 0) #4
-   store i64 %call, i64* %tid, align 8
-   %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 8
-@@ -63,7 +68,7 @@ entry:
-   store i32 -1, i32 addrspace(1)* %arrayidx, align 4
-   %a = getelementptr inbounds %struct.A, %struct.A* %aa, i32 0, i32 0
-   store i32 5, i32* %a, align 4
--  call spir_func void @__block_ret_struct_block_invoke(%struct.A* sret %tmp, 
i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } 
addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 
addrspace(4)*), %struct.A* byval align 4 %aa) #5
-+  call spir_func void @__block_ret_struct_block_invoke(%struct.A* sret %tmp, 
i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 
addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to 
i8 addrspace(4)*), %struct.A* byval align 4 %aa) #5
-   %a1 = getelementptr inbounds %struct.A, %struct.A* %tmp, i32 0, i32 0
-   %2 = load i32, i32* %a1, align 4
-   %sub = sub nsw i32 %2, 6
-@@ -78,10 +83,10 @@ entry:
- define internal spir_func void @__block_ret_struct_block_invoke(%struct.A* 
noalias sret %agg.result, i8 addrspace(4)* %.block_descriptor, %struct.A* byval 
align 4 %a) #1 {
- entry:
-   %.block_descriptor.addr = alloca i8 addrspace(4)*, align 8
--  %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 8
-+  %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 8
-   store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** 
%.block_descriptor.addr, align 8
--  %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> 
addrspace(4)*
--  store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** 
%block.addr, align 8
-+  %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 
addrspace(4)* }> addrspace(4)*
-+  store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, 
i8 addrspace(4)* }> addrspace(4)** %block.addr, align 8
-   %a1 = getelementptr inbounds %struct.A, %struct.A* %a, i32 0, i32 0
-   store i32 6, i32* %a1, align 4
-   %0 = bitcast %struct.A* %agg.result to i8*
-@@ -96,30 +101,22 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture 
writeonly, i8* nocapture r
- ; Function Attrs: convergent nounwind readnone
- declare spir_func i64 @_Z13get_global_idj(i32) #3
- 
--attributes #0 = { convergent noinline nounwind optnone 
"correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" 
"disable-tail-calls"="false" "less-precise-fpmad"="false" 
"no-frame-pointer-elim"="false" "no-infs-fp-math"="false" 
"no-jump-tables"="false" "no-nans-fp-math"="false" 
"no-signed-zeros-fp-math"="false" "no-trapping-math"="false" 
"stack-protector-buffer-size"="8" "uniform-work-group-size"="false" 
"unsafe-fp-math"="false" "use-soft-float"="false" }
--attributes #1 = { convergent noinline nounwind optnone 
"correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" 
"disable-tail-calls"="false" "less-precise-fpmad"="false" 
"no-frame-pointer-elim"="false" "no-infs-fp-math"="false" 
"no-jump-tables"="false" "no-nans-fp-math"="false" 
"no-signed-zeros-fp-math"="false" "no-trapping-math"="false" 
"stack-protector-buffer-size"="8" "unsafe-fp-math"="false" 
"use-soft-float"="false" }
-+attributes #0 = { convergent noinline nounwind optnone 
"correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" 
"disable-tail-calls"="false" "less-precise-fpmad"="false" 
"min-legal-vector-width"="0" "no-frame-pointer-elim"="false" 
"no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" 
"no-signed-zeros-fp-math"="false" "no-trapping-math"="false" 
"stack-protector-buffer-size"="8" "uniform-work-group-size"="false" 
"unsafe-fp-math"="false" "use-soft-float"="false" }
-+attributes #1 = { convergent noinline nounwind optnone 
"correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" 
"disable-tail-calls"="false" "less-precise-fpmad"="false" 
"min-legal-vector-width"="0" "no-frame-pointer-elim"="false" 
"no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" 
"no-signed-zeros-fp-math"="false" "no-trapping-math"="false" 
"stack-protector-buffer-size"="8" "unsafe-fp-math"="false" 
"use-soft-float"="false" }
- attributes #2 = { argmemonly nounwind }
- attributes #3 = { convergent nounwind readnone 
"correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" 
"disable-tail-calls"="false" "less-precise-fpmad"="false" 
"no-frame-pointer-elim"="false" "no-infs-fp-math"="false" 
"no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" 
"no-trapping-math"="false" "stack-protector-buffer-size"="8" 
"unsafe-fp-math"="false" "use-soft-float"="false" }
- attributes #4 = { convergent nounwind readnone }
- attributes #5 = { convergent }
- 
- !llvm.module.flags = !{!0}
--!opencl.enable.FP_CONTRACT = !{}
- !opencl.ocl.version = !{!1}
- !opencl.spir.version = !{!1}
--!opencl.used.extensions = !{!2}
--!opencl.used.optional.core.features = !{!2}
--!opencl.compiler.options = !{!2}
--!llvm.ident = !{!3}
-+!llvm.ident = !{!2}
- 
- !0 = !{i32 1, !"wchar_size", i32 4}
- !1 = !{i32 2, i32 0}
--!2 = !{}
--!3 = !{!"clang version 7.0.0"}
--!4 = !{i32 1}
--!5 = !{!"none"}
--!6 = !{!"int*"}
--!7 = !{!""}
--!8 = !{i1 false}
--!9 = !{i32 0}
--
-+!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 
04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 
384f64397f6ad95a361b72d62c07d7bac9f24163)"}
-+!3 = !{i32 1}
-+!4 = !{!"none"}
-+!5 = !{!"int*"}
-+!6 = !{!""}
-diff --git a/test/transcoding/enqueue_kernel.ll 
b/test/transcoding/enqueue_kernel.ll
-index 0d29c71..435871d 100644
---- a/test/transcoding/enqueue_kernel.ll
-+++ b/test/transcoding/enqueue_kernel.ll
-@@ -51,11 +51,12 @@
- ; ModuleID = 'enqueue_kernel.cl'
- source_filename = "enqueue_kernel.cl"
- target datalayout = 
"e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
--target triple = "spir-unknown-unknown"
-+target triple = "spir"
- 
- %opencl.queue_t = type opaque
- %struct.ndrange_t = type { i32 }
- %opencl.clk_event_t = type opaque
-+%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
- 
- ; CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer1:[0-9]+]] 
"__device_side_enqueue_block_invoke_kernel"
- ; CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer2:[0-9]+]] 
"__device_side_enqueue_block_invoke_2_kernel"
-@@ -66,89 +67,123 @@ target triple = "spir-unknown-unknown"
- 
- ; CHECK-SPIRV: TypeInt [[Int32Ty:[0-9]+]] 32
- ; CHECK-SPIRV: TypeInt [[Int8Ty:[0-9]+]] 8
--; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt8:[0-9]+]] 8
- ; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt0:[0-9]+]] 0
--; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt17:[0-9]+]] 17
-+; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt17:[0-9]+]] 21
- ; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt2:[0-9]+]] 2
--; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt20:[0-9]+]] 20
--; CHECK-SPIRV: TypeVoid [[VoidTy:[0-9]+]]
-+; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt8:[0-9]+]] 8
-+; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt20:[0-9]+]] 24
- 
- ; CHECK-SPIRV: TypePointer {{[0-9]+}} 7 {{[0-9]+}}
-+; CHECK-SPIRV: TypePointer [[Int8PtrGenTy:[0-9]+]] 8 [[Int8Ty]]
-+; CHECK-SPIRV: TypeVoid [[VoidTy:[0-9]+]]
- ; CHECK-SPIRV: TypePointer [[Int32LocPtrTy:[0-9]+]] 7 [[Int32Ty]]
- ; CHECK-SPIRV: TypeDeviceEvent [[EventTy:[0-9]+]]
--; CHECK-SPIRV: TypePointer [[Int8PtrGenTy:[0-9]+]] 8 [[Int8Ty]]
- ; CHECK-SPIRV: TypePointer [[EventPtrTy:[0-9]+]] 8 [[EventTy]]
- ; CHECK-SPIRV: TypeFunction [[BlockTy1:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]]
- ; CHECK-SPIRV: TypeFunction [[BlockTy2:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]]
- ; CHECK-SPIRV: TypeFunction [[BlockTy3:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]]
- ; CHECK-SPIRV: ConstantNull [[EventPtrTy]] [[EventNull:[0-9]+]]
- 
--; CHECK-LLVM: [[BlockTy1:%[0-9]+]] = type { i32, i32 }
--; CHECK-LLVM: [[BlockTy2:%[0-9]+]] = type <{ i32, i32, i32 addrspace(1)*, 
i32, i8 }>
--; CHECK-LLVM: [[BlockTy3:%[0-9]+]] = type <{ i32, i32, i32 addrspace(1)*, 
i32, i32 addrspace(1)* }>
--; CHECK-LLVM: [[BlockTy4:%[0-9]+]] = type <{ i32, i32 }>
-+; CHECK-LLVM: [[BlockTy1:%[0-9]+]] = type { i32, i32, i8 addrspace(4)* }
-+; CHECK-LLVM: [[BlockTy2:%[0-9]+]] = type <{ i32, i32, i8 addrspace(4)*, i32 
addrspace(1)*, i32, i8 }>
-+; CHECK-LLVM: [[BlockTy3:%[0-9]+]] = type <{ i32, i32, i8 addrspace(4)*, i32 
addrspace(1)*, i32, i32 addrspace(1)* }>
-+; CHECK-LLVM: [[BlockTy4:%[0-9]+]] = type <{ i32, i32, i8 addrspace(4)* }>
- 
--; CHECK-LLVM: @__block_literal_global = internal addrspace(1) constant 
[[BlockTy1]] { i32 8, i32 4 }, align 4
--; CHECK-LLVM: @__block_literal_global.1 = internal addrspace(1) constant 
[[BlockTy1]] { i32 8, i32 4 }, align 4
-+; CHECK-LLVM: @__block_literal_global = internal addrspace(1) constant 
[[BlockTy1]] { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* null to i8 
addrspace(4)*) }, align 4
-+; CHECK-LLVM: @__block_literal_global.1 = internal addrspace(1) constant 
[[BlockTy1]] { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* null to i8 
addrspace(4)*) }, align 4
- 
--@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 
8, i32 4 }, align 4
--@__block_literal_global.1 = internal addrspace(1) constant { i32, i32 } { i32 
8, i32 4 }, align 4
-+@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 
addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast 
(void (i8 addrspace(4)*, i8 addrspace(3)*)* 
@__device_side_enqueue_block_invoke_3 to i8*) to i8 addrspace(4)*) }, align 4
-+@__block_literal_global.1 = internal addrspace(1) constant { i32, i32, i8 
addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast 
(void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* 
@__device_side_enqueue_block_invoke_4 to i8*) to i8 addrspace(4)*) }, align 4
- 
- ; Function Attrs: convergent noinline nounwind optnone
--define spir_kernel void @device_side_enqueue(i32 addrspace(1)* %a, i32 
addrspace(1)* %b, i32 %i, i8 signext %c0) #0 !kernel_arg_addr_space !4 
!kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 
!kernel_arg_type_qual !7 {
-+define spir_kernel void @device_side_enqueue(i32 addrspace(1)* %a, i32 
addrspace(1)* %b, i32 %i, i8 signext %c0) #0 !kernel_arg_addr_space !3 
!kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 
!kernel_arg_type_qual !6 {
- entry:
-+  %a.addr = alloca i32 addrspace(1)*, align 4
-+  %b.addr = alloca i32 addrspace(1)*, align 4
-+  %i.addr = alloca i32, align 4
-+  %c0.addr = alloca i8, align 1
-   %default_queue = alloca %opencl.queue_t*, align 4
-   %flags = alloca i32, align 4
-   %ndrange = alloca %struct.ndrange_t, align 4
-   %clk_event = alloca %opencl.clk_event_t*, align 4
-   %event_wait_list = alloca %opencl.clk_event_t*, align 4
-   %event_wait_list2 = alloca [1 x %opencl.clk_event_t*], align 4
--  %block = alloca <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, align 4
--  %block3 = alloca <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, 
align 4
-+  %block = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 
}>, align 4
-+  %tmp = alloca %struct.ndrange_t, align 4
-+  %block3 = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 
addrspace(1)* }>, align 4
-+  %tmp4 = alloca %struct.ndrange_t, align 4
-   %c = alloca i8, align 1
-+  %tmp11 = alloca %struct.ndrange_t, align 4
-+  %block_sizes = alloca [1 x i32], align 4
-+  %tmp12 = alloca %struct.ndrange_t, align 4
-+  %block_sizes13 = alloca [3 x i32], align 4
-+  store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4
-+  store i32 addrspace(1)* %b, i32 addrspace(1)** %b.addr, align 4
-+  store i32 %i, i32* %i.addr, align 4
-+  store i8 %c0, i8* %c0.addr, align 1
-   store i32 0, i32* %flags, align 4
-   %arrayinit.begin = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x 
%opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0
-   %0 = load %opencl.clk_event_t*, %opencl.clk_event_t** %clk_event, align 4
-   store %opencl.clk_event_t* %0, %opencl.clk_event_t** %arrayinit.begin, 
align 4
-   %1 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4
-   %2 = load i32, i32* %flags, align 4
--  %block.size = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, 
i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 0
--  store i32 17, i32* %block.size, align 4
--  %block.align = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, 
i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 1
-+  %3 = bitcast %struct.ndrange_t* %tmp to i8*
-+  %4 = bitcast %struct.ndrange_t* %ndrange to i8*
-+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %3, i8* align 4 %4, i32 4, 
i1 false)
-+  %block.size = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 
addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, 
i32, i8 }>* %block, i32 0, i32 0
-+  store i32 21, i32* %block.size, align 4
-+  %block.align = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 
addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, 
i32, i8 }>* %block, i32 0, i32 1
-   store i32 4, i32* %block.align, align 4
--  %block.captured = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, 
i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 2
--  store i32 addrspace(1)* %a, i32 addrspace(1)** %block.captured, align 4
--  %block.captured1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, 
i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 3
--  store i32 %i, i32* %block.captured1, align 4
--  %block.captured2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, 
i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 4
--  store i8 %c0, i8* %block.captured2, align 4
--  %3 = bitcast <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block to void ()*
--  %4 = addrspacecast void ()* %3 to i8 addrspace(4)*
-+  %block.invoke = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 
addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, 
i32, i8 }>* %block, i32 0, i32 2
-+  store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* 
@__device_side_enqueue_block_invoke to i8*) to i8 addrspace(4)*), i8 
addrspace(4)** %block.invoke, align 4
-+  %block.captured = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 
addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, 
i32, i8 }>* %block, i32 0, i32 3
-+  %5 = load i32 addrspace(1)*, i32 addrspace(1)** %a.addr, align 4
-+  store i32 addrspace(1)* %5, i32 addrspace(1)** %block.captured, align 4
-+  %block.captured1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, 
i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 
addrspace(1)*, i32, i8 }>* %block, i32 0, i32 4
-+  %6 = load i32, i32* %i.addr, align 4
-+  store i32 %6, i32* %block.captured1, align 4
-+  %block.captured2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, 
i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 
addrspace(1)*, i32, i8 }>* %block, i32 0, i32 5
-+  %7 = load i8, i8* %c0.addr, align 1
-+  store i8 %7, i8* %block.captured2, align 4
-+  %8 = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* 
%block to %struct.__opencl_block_literal_generic*
-+  %9 = addrspacecast %struct.__opencl_block_literal_generic* %8 to i8 
addrspace(4)*
- 
- ; CHECK-SPIRV: PtrCastToGeneric [[Int8PtrGenTy]] [[BlockLit1:[0-9]+]]
- ; CHECK-SPIRV: EnqueueKernel [[Int32Ty]] {{[0-9]+}} {{[0-9]+}} {{[0-9]+}} 
{{[0-9]+}}
- ;                            [[ConstInt0]] [[EventNull]] [[EventNull]]
- ;                            [[BlockKer1]] [[BlockLit1]] [[ConstInt17]] 
[[ConstInt8]]
- 
--; CHECK-LLVM: [[Block2:%[0-9]+]] = addrspacecast [[BlockTy2]]* %block to i8 
addrspace(4)*
-+; CHECK-LLVM: [[Block2:%[0-9]+]] = bitcast [[BlockTy2]]* %block to 
%struct.__opencl_block_literal_generic*
-+; CHECK-LLVM: [[Block2Ptr:%[0-9]+]] = addrspacecast 
%struct.__opencl_block_literal_generic* [[Block2]] to i8 addrspace(4)*
- ; CHECK-LLVM: [[BlockInv2:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* 
@__device_side_enqueue_block_invoke_kernel to i8 addrspace(4)*
--; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* 
{{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* 
addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* 
[[BlockInv2]], i8 addrspace(4)* [[Block2]])
--
--  %5 = call i32 @__enqueue_kernel_basic(%opencl.queue_t* %1, i32 %2, 
%struct.ndrange_t* byval %ndrange, i8 addrspace(4)* addrspacecast (i8* bitcast 
(void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8*) to 
i8 addrspace(4)*), i8 addrspace(4)* %4)
--  %6 = addrspacecast %opencl.clk_event_t** %event_wait_list to 
%opencl.clk_event_t* addrspace(4)*
--  %7 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* 
addrspace(4)*
--  %block.size5 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, 
i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* 
}>* %block3, i32 0, i32 0
--  store i32 20, i32* %block.size5, align 4
--  %block.align6 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, 
i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* 
}>* %block3, i32 0, i32 1
-+; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* 
{{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* 
addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* 
[[BlockInv2]], i8 addrspace(4)* [[Block2Ptr]])
-+
-+  %10 = call i32 @__enqueue_kernel_basic(%opencl.queue_t* %1, i32 %2, 
%struct.ndrange_t* byval %tmp, i8 addrspace(4)* addrspacecast (i8* bitcast 
(void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8*) to 
i8 addrspace(4)*), i8 addrspace(4)* %9)
-+  %11 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4
-+  %12 = load i32, i32* %flags, align 4
-+  %13 = bitcast %struct.ndrange_t* %tmp4 to i8*
-+  %14 = bitcast %struct.ndrange_t* %ndrange to i8*
-+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %13, i8* align 4 %14, i32 
4, i1 false)
-+  %15 = addrspacecast %opencl.clk_event_t** %event_wait_list to 
%opencl.clk_event_t* addrspace(4)*
-+  %16 = addrspacecast %opencl.clk_event_t** %clk_event to 
%opencl.clk_event_t* addrspace(4)*
-+  %block.size5 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 
addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 
addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 0
-+  store i32 24, i32* %block.size5, align 4
-+  %block.align6 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 
addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 
addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 1
-   store i32 4, i32* %block.align6, align 4
--  %block.captured7 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, 
i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 
addrspace(1)* }>* %block3, i32 0, i32 2
--  store i32 addrspace(1)* %a, i32 addrspace(1)** %block.captured7, align 4
--  %block.captured8 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, 
i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 
addrspace(1)* }>* %block3, i32 0, i32 3
--  store i32 %i, i32* %block.captured8, align 4
--  %block.captured9 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, 
i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 
addrspace(1)* }>* %block3, i32 0, i32 4
--  store i32 addrspace(1)* %b, i32 addrspace(1)** %block.captured9, align 4
--  %8 = bitcast <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* 
%block3 to void ()*
--  %9 = addrspacecast void ()* %8 to i8 addrspace(4)*
-+  %block.invoke7 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 
addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 
addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 2
-+  store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* 
@__device_side_enqueue_block_invoke_2 to i8*) to i8 addrspace(4)*), i8 
addrspace(4)** %block.invoke7, align 4
-+  %block.captured8 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, 
i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, 
i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 3
-+  %17 = load i32 addrspace(1)*, i32 addrspace(1)** %a.addr, align 4
-+  store i32 addrspace(1)* %17, i32 addrspace(1)** %block.captured8, align 4
-+  %block.captured9 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, 
i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, 
i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 4
-+  %18 = load i32, i32* %i.addr, align 4
-+  store i32 %18, i32* %block.captured9, align 4
-+  %block.captured10 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, 
i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, 
i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 5
-+  %19 = load i32 addrspace(1)*, i32 addrspace(1)** %b.addr, align 4
-+  store i32 addrspace(1)* %19, i32 addrspace(1)** %block.captured10, align 4
-+  %20 = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 
addrspace(1)* }>* %block3 to %struct.__opencl_block_literal_generic*
-+  %21 = addrspacecast %struct.__opencl_block_literal_generic* %20 to i8 
addrspace(4)*
-+
- 
- ; CHECK-SPIRV: PtrCastToGeneric [[EventPtrTy]] [[Event1:[0-9]+]]
- ; CHECK-SPIRV: PtrCastToGeneric [[EventPtrTy]] [[Event2:[0-9]+]]
-@@ -158,16 +193,24 @@ entry:
- ;                            [[ConstInt2]] [[Event1]] [[Event2]]
- ;                            [[BlockKer2]] [[BlockLit2]] [[ConstInt20]] 
[[ConstInt8]]
- 
--; CHECK-LLVM: [[Block3:%[0-9]+]] = addrspacecast [[BlockTy3]]* %block3 to i8 
addrspace(4)*
-+; CHECK-LLVM: [[Block3:%[0-9]+]] = bitcast [[BlockTy3]]* %block3 to 
%struct.__opencl_block_literal_generic*
-+; CHECK-LLVM: [[Block3Ptr:%[0-9]+]] = addrspacecast 
%struct.__opencl_block_literal_generic* [[Block3]] to i8 addrspace(4)
- ; CHECK-LLVM: [[BlockInv3:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* 
@__device_side_enqueue_block_invoke_2_kernel to i8 addrspace(4)*
--; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* 
{{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* 
addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 
addrspace(4)* [[BlockInv3]], i8 addrspace(4)* [[Block3]])
--
--  %10 = call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* %1, i32 %2, 
%struct.ndrange_t* %ndrange, i32 2, %opencl.clk_event_t* addrspace(4)* %6, 
%opencl.clk_event_t* addrspace(4)* %7, i8 addrspace(4)* addrspacecast (i8* 
bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel 
to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %9)
--  %11 = alloca [1 x i32]
--  %12 = getelementptr [1 x i32], [1 x i32]* %11, i32 0, i32 0
--  %13 = load i8, i8* %c, align 1
--  %14 = zext i8 %13 to i32
--  store i32 %14, i32* %12, align 4
-+; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* 
{{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* 
addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 
addrspace(4)* [[BlockInv3]], i8 addrspace(4)* [[Block3Ptr]])
-+
-+  %22 = call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* %11, i32 
%12, %struct.ndrange_t* %tmp4, i32 2, %opencl.clk_event_t* addrspace(4)* %15, 
%opencl.clk_event_t* addrspace(4)* %16, i8 addrspace(4)* addrspacecast (i8* 
bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel 
to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %21)
-+  %23 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4
-+  %24 = load i32, i32* %flags, align 4
-+  %25 = bitcast %struct.ndrange_t* %tmp11 to i8*
-+  %26 = bitcast %struct.ndrange_t* %ndrange to i8*
-+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %25, i8* align 4 %26, i32 
4, i1 false)
-+  %arraydecay = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x 
%opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0
-+  %27 = addrspacecast %opencl.clk_event_t** %arraydecay to 
%opencl.clk_event_t* addrspace(4)*
-+  %28 = addrspacecast %opencl.clk_event_t** %clk_event to 
%opencl.clk_event_t* addrspace(4)*
-+  %29 = getelementptr [1 x i32], [1 x i32]* %block_sizes, i32 0, i32 0
-+  %30 = load i8, i8* %c, align 1
-+  %31 = zext i8 %30 to i32
-+  store i32 %31, i32* %29, align 4
- 
- ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf31:[0-9]+]]
- ; CHECK-SPIRV: Bitcast {{[0-9]+}} [[BlockLit3Tmp:[0-9]+]] [[BlockGlb1:[0-9]+]]
-@@ -182,14 +225,18 @@ entry:
- ; CHECK-LLVM: [[BlockInv0:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*, 
i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8 
addrspace(4)*
- ; CHECK-LLVM: call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* 
{{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* 
addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 
addrspace(4)* [[BlockInv0]], i8 addrspace(4)* [[Block0]], i32 1, i32* {{.*}})
- 
--  %15 = call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* %1, i32 
%2, %struct.ndrange_t* %ndrange, i32 2, %opencl.clk_event_t* addrspace(4)* %6, 
%opencl.clk_event_t* addrspace(4)* %7, i8 addrspace(4)* addrspacecast (i8* 
bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* 
@__device_side_enqueue_block_invoke_3_kernel to i8*) to i8 addrspace(4)*), i8 
addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } 
addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 
addrspace(4)*), i32 1, i32* %12)
--  %16 = alloca [3 x i32]
--  %17 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 0
--  store i32 1, i32* %17, align 4
--  %18 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 1
--  store i32 2, i32* %18, align 4
--  %19 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 2
--  store i32 4, i32* %19, align 4
-+  %32 = call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* %23, i32 
%24, %struct.ndrange_t* %tmp11, i32 2, %opencl.clk_event_t* addrspace(4)* %27, 
%opencl.clk_event_t* addrspace(4)* %28, i8 addrspace(4)* addrspacecast (i8* 
bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* 
@__device_side_enqueue_block_invoke_3_kernel to i8*) to i8 addrspace(4)*), i8 
addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 
addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to 
i8 addrspace(4)*), i32 1, i32* %29)
-+  %33 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4
-+  %34 = load i32, i32* %flags, align 4
-+  %35 = bitcast %struct.ndrange_t* %tmp12 to i8*
-+  %36 = bitcast %struct.ndrange_t* %ndrange to i8*
-+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %35, i8* align 4 %36, i32 
4, i1 false)
-+  %37 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 0
-+  store i32 1, i32* %37, align 4
-+  %38 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 1
-+  store i32 2, i32* %38, align 4
-+  %39 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 2
-+  store i32 4, i32* %39, align 4
- 
- ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf41:[0-9]+]]
- ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf42:[0-9]+]]
-@@ -206,24 +253,27 @@ entry:
- ; CHECK-LLVM: [[BlockInv1:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*, 
i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* 
@__device_side_enqueue_block_invoke_4_kernel to i8 addrspace(4)*
- ; CHECK-LLVM: call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* 
{{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* 
addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* 
[[BlockInv1]], i8 addrspace(4)* [[Block1]], i32 3, i32* {{.*}})
- 
--  %20 = call i32 @__enqueue_kernel_varargs(%opencl.queue_t* %1, i32 %2, 
%struct.ndrange_t* %ndrange, i8 addrspace(4)* addrspacecast (i8* bitcast (void 
(i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* 
@__device_side_enqueue_block_invoke_4_kernel to i8*) to i8 addrspace(4)*), i8 
addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } 
addrspace(1)* @__block_literal_global.1 to i8 addrspace(1)*) to i8 
addrspace(4)*), i32 3, i32* %17)
-+  %40 = call i32 @__enqueue_kernel_varargs(%opencl.queue_t* %33, i32 %34, 
%struct.ndrange_t* %tmp12, i8 addrspace(4)* addrspacecast (i8* bitcast (void 
(i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* 
@__device_side_enqueue_block_invoke_4_kernel to i8*) to i8 addrspace(4)*), i8 
addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 
addrspace(4)* } addrspace(1)* @__block_literal_global.1 to i8 addrspace(1)*) to 
i8 addrspace(4)*), i32 3, i32* %37)
-   ret void
- }
- 
-+; Function Attrs: argmemonly nounwind
-+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* 
nocapture readonly, i32, i1) #1
-+
- ; Function Attrs: convergent noinline nounwind optnone
- define internal spir_func void @__device_side_enqueue_block_invoke(i8 
addrspace(4)* %.block_descriptor) #2 {
- entry:
-   %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
--  %block.addr = alloca <{ i32, i32, i32 addrspace(1)*, i32, i8 }> 
addrspace(4)*, align 4
-+  %block.addr = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, 
i8 }> addrspace(4)*, align 4
-   store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** 
%.block_descriptor.addr, align 4
--  %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i32 
addrspace(1)*, i32, i8 }> addrspace(4)*
--  store <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, <{ 
i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)** %block.addr, align 4
--  %block.capture.addr = getelementptr inbounds <{ i32, i32, i32 
addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> 
addrspace(4)* %block, i32 0, i32 4
-+  %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 
addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)*
-+  store <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> 
addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 
}> addrspace(4)** %block.addr, align 4
-+  %block.capture.addr = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, 
i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 
addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 5
-   %0 = load i8, i8 addrspace(4)* %block.capture.addr, align 4
-   %conv = sext i8 %0 to i32
--  %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i32 
addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> 
addrspace(4)* %block, i32 0, i32 2
-+  %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i8 
addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, 
i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 3
-   %1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* 
%block.capture.addr1, align 4
--  %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i32 
addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> 
addrspace(4)* %block, i32 0, i32 3
-+  %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i8 
addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, 
i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 4
-   %2 = load i32, i32 addrspace(4)* %block.capture.addr2, align 4
-   %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 %2
-   store i32 %conv, i32 addrspace(1)* %arrayidx, align 4
-@@ -243,19 +293,19 @@ declare i32 @__enqueue_kernel_basic(%opencl.queue_t*, 
i32, %struct.ndrange_t*, i
- define internal spir_func void @__device_side_enqueue_block_invoke_2(i8 
addrspace(4)* %.block_descriptor) #2 {
- entry:
-   %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
--  %block.addr = alloca <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* 
}> addrspace(4)*, align 4
-+  %block.addr = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, 
i32 addrspace(1)* }> addrspace(4)*, align 4
-   store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** 
%.block_descriptor.addr, align 4
--  %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i32 
addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*
--  store <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> 
addrspace(4)* %block, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> 
addrspace(4)** %block.addr, align 4
--  %block.capture.addr = getelementptr inbounds <{ i32, i32, i32 
addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, 
i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4
-+  %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 
addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*
-+  store <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 
addrspace(1)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)*, i32 
addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)** %block.addr, align 4
-+  %block.capture.addr = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, 
i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, 
i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 5
-   %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* 
%block.capture.addr, align 4
--  %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i32 
addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, 
i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3
-+  %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i8 
addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 
addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* 
%block, i32 0, i32 4
-   %1 = load i32, i32 addrspace(4)* %block.capture.addr1, align 4
-   %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 %1
-   %2 = load i32, i32 addrspace(1)* %arrayidx, align 4
--  %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i32 
addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, 
i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 2
-+  %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i8 
addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 
addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* 
%block, i32 0, i32 3
-   %3 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* 
%block.capture.addr2, align 4
--  %block.capture.addr3 = getelementptr inbounds <{ i32, i32, i32 
addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, 
i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3
-+  %block.capture.addr3 = getelementptr inbounds <{ i32, i32, i8 
addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 
addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* 
%block, i32 0, i32 4
-   %4 = load i32, i32 addrspace(4)* %block.capture.addr3, align 4
-   %arrayidx4 = getelementptr inbounds i32, i32 addrspace(1)* %3, i32 %4
-   store i32 %2, i32 addrspace(1)* %arrayidx4, align 4
-@@ -276,11 +326,11 @@ define internal spir_func void 
@__device_side_enqueue_block_invoke_3(i8 addrspac
- entry:
-   %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
-   %p.addr = alloca i8 addrspace(3)*, align 4
--  %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4
-+  %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4
-   store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** 
%.block_descriptor.addr, align 4
--  %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> 
addrspace(4)*
-+  %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 
addrspace(4)* }> addrspace(4)*
-   store i8 addrspace(3)* %p, i8 addrspace(3)** %p.addr, align 4
--  store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** 
%block.addr, align 4
-+  store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, 
i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4
-   ret void
- }
- 
-@@ -300,13 +350,13 @@ entry:
-   %p1.addr = alloca i8 addrspace(3)*, align 4
-   %p2.addr = alloca i8 addrspace(3)*, align 4
-   %p3.addr = alloca i8 addrspace(3)*, align 4
--  %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4
-+  %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4
-   store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** 
%.block_descriptor.addr, align 4
--  %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> 
addrspace(4)*
-+  %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 
addrspace(4)* }> addrspace(4)*
-   store i8 addrspace(3)* %p1, i8 addrspace(3)** %p1.addr, align 4
-   store i8 addrspace(3)* %p2, i8 addrspace(3)** %p2.addr, align 4
-   store i8 addrspace(3)* %p3, i8 addrspace(3)** %p3.addr, align 4
--  store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** 
%block.addr, align 4
-+  store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, 
i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4
-   ret void
- }
- 
-@@ -329,27 +379,20 @@ declare i32 @__enqueue_kernel_varargs(%opencl.queue_t*, 
i32, %struct.ndrange_t*,
- ; CHECK-LLVM-DAG: define spir_kernel void 
@__device_side_enqueue_block_invoke_3_kernel(i8 addrspace(4)*, i8 addrspace(3)*)
- ; CHECK-LLVM-DAG: define spir_kernel void 
@__device_side_enqueue_block_invoke_4_kernel(i8 addrspace(4)*, i8 
addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)
- 
--attributes #0 = { convergent noinline nounwind optnone 
"correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" 
"less-precise-fpmad"="false" "no-frame-pointer-elim"="false" 
"no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" 
"no-signed-zeros-fp-math"="false" "no-trapping-math"="false" 
"stack-protector-buffer-size"="8" "uniform-work-group-size"="false" 
"unsafe-fp-math"="false" "use-soft-float"="false" }
-+attributes #0 = { convergent noinline nounwind optnone 
"correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" 
"disable-tail-calls"="false" "less-precise-fpmad"="false" 
"min-legal-vector-width"="0" "no-frame-pointer-elim"="false" 
"no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" 
"no-signed-zeros-fp-math"="false" "no-trapping-math"="false" 
"stack-protector-buffer-size"="8" "uniform-work-group-size"="false" 
"unsafe-fp-math"="false" "use-soft-float"="false" }
- attributes #1 = { argmemonly nounwind }
--attributes #2 = { convergent noinline nounwind optnone 
"correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" 
"less-precise-fpmad"="false" "no-frame-pointer-elim"="false" 
"no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" 
"no-signed-zeros-fp-math"="false" "no-trapping-math"="false" 
"stack-protector-buffer-size"="8" "unsafe-fp-math"="false" 
"use-soft-float"="false" }
-+attributes #2 = { convergent noinline nounwind optnone 
"correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" 
"disable-tail-calls"="false" "less-precise-fpmad"="false" 
"min-legal-vector-width"="0" "no-frame-pointer-elim"="false" 
"no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" 
"no-signed-zeros-fp-math"="false" "no-trapping-math"="false" 
"stack-protector-buffer-size"="8" "unsafe-fp-math"="false" 
"use-soft-float"="false" }
- attributes #3 = { nounwind }
- 
- !llvm.module.flags = !{!0}
--!opencl.enable.FP_CONTRACT = !{}
- !opencl.ocl.version = !{!1}
- !opencl.spir.version = !{!1}
--!opencl.used.extensions = !{!2}
--!opencl.used.optional.core.features = !{!2}
--!opencl.compiler.options = !{!2}
--!llvm.ident = !{!3}
-+!llvm.ident = !{!2}
- 
- !0 = !{i32 1, !"wchar_size", i32 4}
- !1 = !{i32 2, i32 0}
--!2 = !{}
--!3 = !{!"clang version 7.0.0"}
--!4 = !{i32 1, i32 1, i32 0, i32 0}
--!5 = !{!"none", !"none", !"none", !"none"}
--!6 = !{!"int*", !"int*", !"int", !"char"}
--!7 = !{!"", !"", !"", !""}
--!8 = !{i1 false, i1 false, i1 false, i1 false}
--!9 = !{i32 0, i32 0, i32 0, i32 0}
-+!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 
04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 
384f64397f6ad95a361b72d62c07d7bac9f24163)"}
-+!3 = !{i32 1, i32 1, i32 0, i32 0}
-+!4 = !{!"none", !"none", !"none", !"none"}
-+!5 = !{!"int*", !"int*", !"int", !"char"}
-+!6 = !{!"", !"", !"", !""}
--- 
-1.8.3.1
-

Modified: PKGBUILD
===================================================================
--- PKGBUILD    2019-08-16 23:33:02 UTC (rev 499891)
+++ PKGBUILD    2019-08-17 00:04:46 UTC (rev 499892)
@@ -2,7 +2,7 @@
 
 _srcname=SPIRV-LLVM-Translator
 pkgname=${_srcname,,}
-_build=1
+_build=2
 pkgver=8.0.1.${_build}
 pkgrel=1
 pkgdesc="Tool and a library for bi-directional translation between SPIR-V and 
LLVM IR"
@@ -10,15 +10,19 @@
 url="https://github.com/KhronosGroup/SPIRV-LLVM-Translator/";
 license=(custom)
 makedepends=(cmake llvm)
+_commit=94af090661d7c953c516c97a25ed053c744a0737
 source=("${url}/archive/v${pkgver%.*}-${_build}/${pkgname}-${pkgver}.tar.gz"
-        0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch)
-sha256sums=('3947761445e93a7049f75d133ea6ea4aa620feb9b09f0bfd006ec2ffcb0b00ab'
-            'd3f477a6e7f4ab5ae2af8e50ec8cfc36f184392c119160f281bb66da758f3e25')
+        
"${pkgname}-0001-Update-LowerOpenCL-pass.patch"::"https://raw.githubusercontent.com/intel/opencl-clang/${_commit}/patches/spirv/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch";
+        
"${pkgname}-0002-Remove-extra-semicolon.patch"::"https://raw.githubusercontent.com/intel/opencl-clang/${_commit}/patches/spirv/0002-Remove-extra-semicolon.patch";)
+sha256sums=('f145292872419cc101eee89ce1904be06ead2f5761816a22cb13788e5d394e19'
+            '42ac9214fceec9b207201488c9eac899138cdbbd02e3fc56fe815e9bc3455046'
+            'af9c341d4a2554e421965b40b0c7f87f4fc60b0522bbc6f0d6f2907d1ddd7ddb')
 
 prepare() {
     mkdir -p build
     cd ${_srcname}-${pkgver%.*}-${_build}
-    patch -Np1 -i 
../0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch
+    patch -Np1 -i ../${pkgname}-0001-Update-LowerOpenCL-pass.patch
+    patch -Np1 -i ../${pkgname}-0002-Remove-extra-semicolon.patch
 }
 
 build() {

[arch-commits] Commit in spirv-llvm-translator/trunk (2 files)

Reply via email to