https://github.com/AlexVlx created https://github.com/llvm/llvm-project/pull/110897
Albeit not currently enabled, the InferAddressSpaces pass is desirable / profitable for SPIR-V, as it can leverage info that might subsequently be lost as transforms are applied to the IR/resulting SPIR-V. This patch enables the pass for all SPIR-V targets, and is modelled after the AMDGPU implementation. >From 9f3cac44dde7d0adcf6cd090c0b91f57cb1c4dca Mon Sep 17 00:00:00 2001 From: Alex Voicu <alexandru.vo...@amd.com> Date: Wed, 2 Oct 2024 11:18:36 +0100 Subject: [PATCH] Enable `InferAddressSpaces` for SPIR-V. --- .../amdgpu-kernel-arg-pointer-type.cu | 62 ++--- llvm/lib/Target/SPIRV/CMakeLists.txt | 2 + llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp | 92 +++++++ llvm/lib/Target/SPIRV/SPIRVTargetMachine.h | 7 + .../Target/SPIRV/SPIRVTargetTransformInfo.h | 4 + .../SPIRV/assumed-addrspace.ll | 31 +++ .../InferAddressSpaces/SPIRV/basic.ll | 236 ++++++++++++++++++ .../SPIRV/infer-address-space.ll | 211 ++++++++++++++++ .../SPIRV/infer-addrspacecast.ll | 65 +++++ .../SPIRV/infer-getelementptr.ll | 108 ++++++++ .../SPIRV/insert-pos-assert.ll | 158 ++++++++++++ .../InferAddressSpaces/SPIRV/is.constant.ll | 57 +++++ .../InferAddressSpaces/SPIRV/lit.local.cfg | 2 + .../SPIRV/mem-intrinsics.ll | 145 +++++++++++ .../SPIRV/multiple-uses-of-val.ll | 70 ++++++ .../InferAddressSpaces/SPIRV/prefetch.ll | 60 +++++ .../preserving-debugloc-addrspacecast.ll | 48 ++++ .../SPIRV/redundant-addrspacecast.ll | 28 +++ .../InferAddressSpaces/SPIRV/self-phi.ll | 29 +++ .../InferAddressSpaces/SPIRV/volatile.ll | 187 ++++++++++++++ 20 files changed, 1567 insertions(+), 35 deletions(-) create mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/assumed-addrspace.ll create mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/basic.ll create mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-address-space.ll create mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-addrspacecast.ll create mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-getelementptr.ll create mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/insert-pos-assert.ll create mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/is.constant.ll create mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/lit.local.cfg create mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/mem-intrinsics.ll create mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/multiple-uses-of-val.ll create mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/prefetch.ll create mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/preserving-debugloc-addrspacecast.ll create mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/redundant-addrspacecast.ll create mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/self-phi.ll create mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/volatile.ll diff --git a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu index b295bbbdaaf955..15c8b46d278ea1 100644 --- a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu +++ b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu @@ -58,13 +58,11 @@ // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel1Pi( -// OPT-SPIRV-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0:[0-9]+]] { +// OPT-SPIRV-SAME: ptr addrspace(1) nocapture noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0:[0-9]+]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] -// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64 -// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4 -// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP1]], align 4 +// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4 +// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 +// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(1) [[X_COERCE]], align 4 // OPT-SPIRV-NEXT: ret void // // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel1Pi( @@ -126,13 +124,11 @@ __global__ void kernel1(int *x) { // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel2Ri( -// OPT-SPIRV-SAME: ptr addrspace(1) noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] { +// OPT-SPIRV-SAME: ptr addrspace(1) nocapture noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] -// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64 -// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4 -// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP1]], align 4 +// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4 +// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 +// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(1) [[X_COERCE]], align 4 // OPT-SPIRV-NEXT: ret void // // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel2Ri( @@ -195,7 +191,7 @@ __global__ void kernel2(int &x) { // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel3PU3AS2iPU3AS1i( -// OPT-SPIRV-SAME: ptr addrspace(2) nocapture noundef readonly [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1:[0-9]+]] { +// OPT-SPIRV-SAME: ptr addrspace(2) nocapture noundef readonly [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] // OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(2) [[X]], align 4 // OPT-SPIRV-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[Y]], align 4 @@ -261,7 +257,7 @@ __global__ void kernel3(__attribute__((address_space(2))) int *x, // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_func void @_Z4funcPi( -// OPT-SPIRV-SAME: ptr addrspace(4) nocapture noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2:[0-9]+]] { +// OPT-SPIRV-SAME: ptr addrspace(4) nocapture noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1:[0-9]+]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] // OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[X]], align 4 // OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 @@ -343,7 +339,7 @@ struct S { // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel41S( -// OPT-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] { +// OPT-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2:[0-9]+]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] // OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 0 // OPT-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 1 @@ -446,19 +442,17 @@ __global__ void kernel4(struct S s) { // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel5P1S( -// OPT-SPIRV-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] { +// OPT-SPIRV-SAME: ptr addrspace(1) nocapture noundef readonly [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] -// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[S_COERCE]] to i64 -// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[TMP1]], align 8 -// OPT-SPIRV-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[TMP2]], align 4 -// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 -// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP2]], align 4 -// OPT-SPIRV-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP1]], i64 8 -// OPT-SPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[Y]], align 8 -// OPT-SPIRV-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(4) [[TMP4]], align 4 -// OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP5]], 1.000000e+00 -// OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP4]], align 4 +// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(1) [[S_COERCE]], align 8 +// OPT-SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[TMP0]], align 4 +// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 +// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP0]], align 4 +// OPT-SPIRV-NEXT: [[Y:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[S_COERCE]], i64 8 +// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(1) [[Y]], align 8 +// OPT-SPIRV-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(4) [[TMP2]], align 4 +// OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 1.000000e+00 +// OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP2]], align 4 // OPT-SPIRV-NEXT: ret void // // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel5P1S( @@ -551,7 +545,7 @@ struct T { // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel61T( -// OPT-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] { +// OPT-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] // OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_T]] [[T_COERCE]], 0 // OPT-SPIRV-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x ptr addrspace(4)] [[TMP0]], 0 @@ -631,13 +625,11 @@ __global__ void kernel6(struct T t) { // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel7Pi( -// OPT-SPIRV-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] { +// OPT-SPIRV-SAME: ptr addrspace(1) noalias nocapture noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] -// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64 -// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4 -// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP1]], align 4 +// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4 +// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 +// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(1) [[X_COERCE]], align 4 // OPT-SPIRV-NEXT: ret void // // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel7Pi( @@ -700,7 +692,7 @@ struct SS { // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel82SS( -// OPT-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] { +// OPT-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] // OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_SS]] [[A_COERCE]], 0 // OPT-SPIRV-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(4) [[TMP0]], align 4 diff --git a/llvm/lib/Target/SPIRV/CMakeLists.txt b/llvm/lib/Target/SPIRV/CMakeLists.txt index 326343ae278148..0ae292498e4636 100644 --- a/llvm/lib/Target/SPIRV/CMakeLists.txt +++ b/llvm/lib/Target/SPIRV/CMakeLists.txt @@ -52,6 +52,8 @@ add_llvm_target(SPIRVCodeGen Core Demangle GlobalISel + Passes + Scalar SPIRVAnalysis MC SPIRVDesc diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp index e5384b2eb2c2c1..91bcd68813fc55 100644 --- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp @@ -26,9 +26,15 @@ #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/InitializePasses.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Pass.h" +#include "llvm/Passes/OptimizationLevel.h" +#include "llvm/Passes/PassBuilder.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/InferAddressSpaces.h" #include "llvm/Transforms/Utils.h" #include <optional> @@ -91,6 +97,89 @@ SPIRVTargetMachine::SPIRVTargetMachine(const Target &T, const Triple &TT, setRequiresStructuredCFG(false); } +namespace { + enum AddressSpace { + Function = storageClassToAddressSpace(SPIRV::StorageClass::Function), + CrossWorkgroup = + storageClassToAddressSpace(SPIRV::StorageClass::CrossWorkgroup), + UniformConstant = + storageClassToAddressSpace(SPIRV::StorageClass::UniformConstant), + Workgroup = storageClassToAddressSpace(SPIRV::StorageClass::Workgroup), + Generic = storageClassToAddressSpace(SPIRV::StorageClass::Generic) + }; +} + +unsigned SPIRVTargetMachine::getAssumedAddrSpace(const Value *V) const { + const auto *LD = dyn_cast<LoadInst>(V); + if (!LD) + return UINT32_MAX; + + // It must be a load from a pointer to Generic. + assert(V->getType()->isPointerTy() && + V->getType()->getPointerAddressSpace() == AddressSpace::Generic); + + const auto *Ptr = LD->getPointerOperand(); + if (Ptr->getType()->getPointerAddressSpace() != AddressSpace::UniformConstant) + return UINT32_MAX; + // For a loaded from a pointer to UniformConstant, we can infer CrossWorkgroup + // storage, as this could only have been legally initialised with a + // CrossWorkgroup (aka device) constant pointer. + return AddressSpace::CrossWorkgroup; +} + +std::pair<const Value *, unsigned> +SPIRVTargetMachine::getPredicatedAddrSpace(const Value *V) const { + using namespace PatternMatch; + + if (auto *II = dyn_cast<IntrinsicInst>(V)) { + switch (II->getIntrinsicID()) { + case Intrinsic::amdgcn_is_shared: + return std::pair(II->getArgOperand(0), AddressSpace::Workgroup); + case Intrinsic::amdgcn_is_private: + return std::pair(II->getArgOperand(0), AddressSpace::Function); + default: + break; + } + return std::pair(nullptr, UINT32_MAX); + } + // Check the global pointer predication based on + // (!is_share(p) && !is_private(p)). Note that logic 'and' is commutative and + // the order of 'is_shared' and 'is_private' is not significant. + Value *Ptr; + if (getTargetTriple().getVendor() == Triple::VendorType::AMD && + match( + const_cast<Value *>(V), + m_c_And(m_Not(m_Intrinsic<Intrinsic::amdgcn_is_shared>(m_Value(Ptr))), + m_Not(m_Intrinsic<Intrinsic::amdgcn_is_private>(m_Deferred(Ptr)))))) + return std::pair(Ptr, AddressSpace::CrossWorkgroup); + + return std::pair(nullptr, UINT32_MAX); +} + +bool SPIRVTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS, + unsigned DestAS) const { + if (SrcAS != AddressSpace::Generic && SrcAS != AddressSpace::CrossWorkgroup) + return false; + return DestAS == AddressSpace::Generic || + DestAS == AddressSpace::CrossWorkgroup; +} + +void SPIRVTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { + PB.registerCGSCCOptimizerLateEPCallback([](CGSCCPassManager &PM, + OptimizationLevel Level) { + if (Level == OptimizationLevel::O0) + return; + + FunctionPassManager FPM; + + // Add infer address spaces pass to the opt pipeline after inlining + // but before SROA to increase SROA opportunities. + FPM.addPass(InferAddressSpacesPass(AddressSpace::Generic)); + + PM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM))); + }); +} + namespace { // SPIR-V Code Generator Pass Configuration Options. class SPIRVPassConfig : public TargetPassConfig { @@ -178,6 +267,9 @@ void SPIRVPassConfig::addIRPasses() { addPass(createSPIRVStructurizerPass()); } + if (TM.getOptLevel() > CodeGenOptLevel::None) + addPass(createInferAddressSpacesPass(AddressSpace::Generic)); + addPass(createSPIRVRegularizerPass()); addPass(createSPIRVPrepareFunctionsPass(TM)); addPass(createSPIRVStripConvergenceIntrinsicsPass()); diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h index a1a9f26846153b..24b09febb9d184 100644 --- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h +++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h @@ -43,6 +43,13 @@ class SPIRVTargetMachine : public LLVMTargetMachine { TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } + + unsigned getAssumedAddrSpace(const Value *V) const override; + std::pair<const Value *, unsigned> + getPredicatedAddrSpace(const Value *V) const override; + bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DstAS) const override; + + void registerPassBuilderCallbacks(PassBuilder &PB) override; }; } // namespace llvm diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h b/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h index 24047f31fab290..295c0ceeade839 100644 --- a/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h +++ b/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h @@ -39,6 +39,10 @@ class SPIRVTTIImpl : public BasicTTIImplBase<SPIRVTTIImpl> { : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} + unsigned getFlatAddressSpace() const { + return storageClassToAddressSpace(SPIRV::StorageClass::Generic); + } + TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) { // SPIR-V natively supports OpBitcount, per 3.53.14 in the spec, as such it // is reasonable to assume the Op is fast / preferable to the expanded loop. diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/assumed-addrspace.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/assumed-addrspace.ll new file mode 100644 index 00000000000000..9b65ff44f288f2 --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/assumed-addrspace.ll @@ -0,0 +1,31 @@ +; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces -o - %s | FileCheck %s +; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces -o - %s | FileCheck %s + +@c0 = addrspace(2) global ptr undef + +; CHECK-LABEL: @generic_ptr_from_constant +; CHECK: addrspacecast ptr addrspace(4) %p to ptr addrspace(1) +; CHECK-NEXT: load float, ptr addrspace(1) +define spir_func float @generic_ptr_from_constant() { + %p = load ptr addrspace(4), ptr addrspace(2) @c0 + %v = load float, ptr addrspace(4) %p + ret float %v +} + +%struct.S = type { ptr addrspace(4), ptr addrspace(4) } + +; CHECK-LABEL: @generic_ptr_from_aggregate_argument +; CHECK: addrspacecast ptr addrspace(4) %p0 to ptr addrspace(1) +; CHECK: addrspacecast ptr addrspace(4) %p1 to ptr addrspace(1) +; CHECK: load i32, ptr addrspace(1) +; CHECK: store float %v1, ptr addrspace(1) +; CHECK: ret +define spir_kernel void @generic_ptr_from_aggregate_argument(ptr addrspace(2) byval(%struct.S) align 8 %0) { + %p0 = load ptr addrspace(4), ptr addrspace(2) %0 + %f1 = getelementptr inbounds %struct.S, ptr addrspace(2) %0, i64 0, i32 1 + %p1 = load ptr addrspace(4), ptr addrspace(2) %f1 + %v0 = load i32, ptr addrspace(4) %p0 + %v1 = sitofp i32 %v0 to float + store float %v1, ptr addrspace(4) %p1 + ret void +} diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/basic.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/basic.ll new file mode 100644 index 00000000000000..75b23aa30349af --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/basic.ll @@ -0,0 +1,236 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces %s | FileCheck %s +; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces %s | FileCheck %s + +; Trivial optimization of generic addressing + +define float @load_global_from_flat(ptr addrspace(4) %generic_scalar) #0 { +; CHECK-LABEL: define float @load_global_from_flat( +; CHECK-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr addrspace(1) +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[TMP0]], align 4 +; CHECK-NEXT: ret float [[TMP1]] +; + %tmp0 = addrspacecast ptr addrspace(4) %generic_scalar to ptr addrspace(1) + %tmp1 = load float, ptr addrspace(1) %tmp0 + ret float %tmp1 +} + +define float @load_group_from_flat(ptr addrspace(4) %generic_scalar) #0 { +; CHECK-LABEL: define float @load_group_from_flat( +; CHECK-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr addrspace(3) +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(3) [[TMP0]], align 4 +; CHECK-NEXT: ret float [[TMP1]] +; + %tmp0 = addrspacecast ptr addrspace(4) %generic_scalar to ptr addrspace(3) + %tmp1 = load float, ptr addrspace(3) %tmp0 + ret float %tmp1 +} + +define float @load_private_from_flat(ptr addrspace(4) %generic_scalar) #0 { +; CHECK-LABEL: define float @load_private_from_flat( +; CHECK-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[TMP0]], align 4 +; CHECK-NEXT: ret float [[TMP1]] +; + %tmp0 = addrspacecast ptr addrspace(4) %generic_scalar to ptr + %tmp1 = load float, ptr %tmp0 + ret float %tmp1 +} + +define spir_kernel void @store_global_from_flat(ptr addrspace(4) %generic_scalar) #0 { +; CHECK-LABEL: define spir_kernel void @store_global_from_flat( +; CHECK-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr addrspace(1) +; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(1) [[TMP0]], align 4 +; CHECK-NEXT: ret void +; + %tmp0 = addrspacecast ptr addrspace(4) %generic_scalar to ptr addrspace(1) + store float 0.0, ptr addrspace(1) %tmp0 + ret void +} + +define spir_kernel void @store_group_from_flat(ptr addrspace(4) %generic_scalar) #0 { +; CHECK-LABEL: define spir_kernel void @store_group_from_flat( +; CHECK-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr addrspace(3) +; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(3) [[TMP0]], align 4 +; CHECK-NEXT: ret void +; + %tmp0 = addrspacecast ptr addrspace(4) %generic_scalar to ptr addrspace(3) + store float 0.0, ptr addrspace(3) %tmp0 + ret void +} + +define spir_kernel void @store_private_from_flat(ptr addrspace(4) %generic_scalar) #0 { +; CHECK-LABEL: define spir_kernel void @store_private_from_flat( +; CHECK-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr +; CHECK-NEXT: store float 0.000000e+00, ptr [[TMP0]], align 4 +; CHECK-NEXT: ret void +; + %tmp0 = addrspacecast ptr addrspace(4) %generic_scalar to ptr + store float 0.0, ptr %tmp0 + ret void +} + +define spir_kernel void @load_store_global(ptr addrspace(1) nocapture %input, ptr addrspace(1) nocapture %output) #0 { +; CHECK-LABEL: define spir_kernel void @load_store_global( +; CHECK-SAME: ptr addrspace(1) nocapture [[INPUT:%.*]], ptr addrspace(1) nocapture [[OUTPUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[INPUT]], align 4 +; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[OUTPUT]], align 4 +; CHECK-NEXT: ret void +; + %tmp0 = addrspacecast ptr addrspace(1) %input to ptr addrspace(4) + %tmp1 = addrspacecast ptr addrspace(1) %output to ptr addrspace(4) + %val = load i32, ptr addrspace(4) %tmp0, align 4 + store i32 %val, ptr addrspace(4) %tmp1, align 4 + ret void +} + +define spir_kernel void @load_store_group(ptr addrspace(3) nocapture %input, ptr addrspace(3) nocapture %output) #0 { +; CHECK-LABEL: define spir_kernel void @load_store_group( +; CHECK-SAME: ptr addrspace(3) nocapture [[INPUT:%.*]], ptr addrspace(3) nocapture [[OUTPUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(3) [[INPUT]], align 4 +; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(3) [[OUTPUT]], align 4 +; CHECK-NEXT: ret void +; + %tmp0 = addrspacecast ptr addrspace(3) %input to ptr addrspace(4) + %tmp1 = addrspacecast ptr addrspace(3) %output to ptr addrspace(4) + %val = load i32, ptr addrspace(4) %tmp0, align 4 + store i32 %val, ptr addrspace(4) %tmp1, align 4 + ret void +} + +define spir_kernel void @load_store_private(ptr nocapture %input, ptr nocapture %output) #0 { +; CHECK-LABEL: define spir_kernel void @load_store_private( +; CHECK-SAME: ptr nocapture [[INPUT:%.*]], ptr nocapture [[OUTPUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[INPUT]], align 4 +; CHECK-NEXT: store i32 [[VAL]], ptr [[OUTPUT]], align 4 +; CHECK-NEXT: ret void +; + %tmp0 = addrspacecast ptr %input to ptr addrspace(4) + %tmp1 = addrspacecast ptr %output to ptr addrspace(4) + %val = load i32, ptr addrspace(4) %tmp0, align 4 + store i32 %val, ptr addrspace(4) %tmp1, align 4 + ret void +} + +define spir_kernel void @load_store_flat(ptr addrspace(4) nocapture %input, ptr addrspace(4) nocapture %output) #0 { +; CHECK-LABEL: define spir_kernel void @load_store_flat( +; CHECK-SAME: ptr addrspace(4) nocapture [[INPUT:%.*]], ptr addrspace(4) nocapture [[OUTPUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(4) [[INPUT]], align 4 +; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(4) [[OUTPUT]], align 4 +; CHECK-NEXT: ret void +; + %val = load i32, ptr addrspace(4) %input, align 4 + store i32 %val, ptr addrspace(4) %output, align 4 + ret void +} + +define spir_kernel void @store_addrspacecast_ptr_value(ptr addrspace(1) nocapture %input, ptr addrspace(1) nocapture %output) #0 { +; CHECK-LABEL: define spir_kernel void @store_addrspacecast_ptr_value( +; CHECK-SAME: ptr addrspace(1) nocapture [[INPUT:%.*]], ptr addrspace(1) nocapture [[OUTPUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(1) [[INPUT]] to ptr addrspace(4) +; CHECK-NEXT: store ptr addrspace(4) [[CAST]], ptr addrspace(1) [[OUTPUT]], align 4 +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(1) %input to ptr addrspace(4) + store ptr addrspace(4) %cast, ptr addrspace(1) %output, align 4 + ret void +} + +define i32 @atomicrmw_add_global_to_flat(ptr addrspace(1) %global.ptr, i32 %y) #0 { +; CHECK-LABEL: define i32 @atomicrmw_add_global_to_flat( +; CHECK-SAME: ptr addrspace(1) [[GLOBAL_PTR:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr addrspace(1) [[GLOBAL_PTR]], i32 [[Y]] seq_cst, align 4 +; CHECK-NEXT: ret i32 [[RET]] +; + %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr addrspace(4) + %ret = atomicrmw add ptr addrspace(4) %cast, i32 %y seq_cst + ret i32 %ret +} + +define i32 @atomicrmw_add_group_to_flat(ptr addrspace(3) %group.ptr, i32 %y) #0 { +; CHECK-LABEL: define i32 @atomicrmw_add_group_to_flat( +; CHECK-SAME: ptr addrspace(3) [[GROUP_PTR:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr addrspace(3) [[GROUP_PTR]], i32 [[Y]] seq_cst, align 4 +; CHECK-NEXT: ret i32 [[RET]] +; + %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr addrspace(4) + %ret = atomicrmw add ptr addrspace(4) %cast, i32 %y seq_cst + ret i32 %ret +} + +define { i32, i1 } @cmpxchg_global_to_flat(ptr addrspace(1) %global.ptr, i32 %cmp, i32 %val) #0 { +; CHECK-LABEL: define { i32, i1 } @cmpxchg_global_to_flat( +; CHECK-SAME: ptr addrspace(1) [[GLOBAL_PTR:%.*]], i32 [[CMP:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = cmpxchg ptr addrspace(1) [[GLOBAL_PTR]], i32 [[CMP]], i32 [[VAL]] seq_cst monotonic, align 4 +; CHECK-NEXT: ret { i32, i1 } [[RET]] +; + %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr addrspace(4) + %ret = cmpxchg ptr addrspace(4) %cast, i32 %cmp, i32 %val seq_cst monotonic + ret { i32, i1 } %ret +} + +define { i32, i1 } @cmpxchg_group_to_flat(ptr addrspace(3) %group.ptr, i32 %cmp, i32 %val) #0 { +; CHECK-LABEL: define { i32, i1 } @cmpxchg_group_to_flat( +; CHECK-SAME: ptr addrspace(3) [[GROUP_PTR:%.*]], i32 [[CMP:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = cmpxchg ptr addrspace(3) [[GROUP_PTR]], i32 [[CMP]], i32 [[VAL]] seq_cst monotonic, align 4 +; CHECK-NEXT: ret { i32, i1 } [[RET]] +; + %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr addrspace(4) + %ret = cmpxchg ptr addrspace(4) %cast, i32 %cmp, i32 %val seq_cst monotonic + ret { i32, i1 } %ret +} + +define { ptr addrspace(4), i1 } @cmpxchg_group_to_flat_wrong_operand(ptr addrspace(3) %cas.ptr, ptr addrspace(3) %cmp.ptr, ptr addrspace(4) %val) #0 { + %cast.cmp = addrspacecast ptr addrspace(3) %cmp.ptr to ptr addrspace(4) + %ret = cmpxchg ptr addrspace(3) %cas.ptr, ptr addrspace(4) %cast.cmp, ptr addrspace(4) %val seq_cst monotonic + ret { ptr addrspace(4), i1 } %ret +} + +define void @local_nullptr(ptr addrspace(1) nocapture %results, ptr addrspace(3) %a) { +; CHECK-LABEL: define void @local_nullptr( +; CHECK-SAME: ptr addrspace(1) nocapture [[RESULTS:%.*]], ptr addrspace(3) [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne ptr addrspace(3) [[A]], addrspacecast (ptr null to ptr addrspace(3)) +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32 +; CHECK-NEXT: store i32 [[CONV]], ptr addrspace(1) [[RESULTS]], align 4 +; CHECK-NEXT: ret void +; +entry: + %tobool = icmp ne ptr addrspace(3) %a, addrspacecast (ptr null to ptr addrspace(3)) + %conv = zext i1 %tobool to i32 + store i32 %conv, ptr addrspace(1) %results, align 4 + ret void +} + +define i32 @atomicrmw_add_global_to_flat_preserve_amdgpu_md(ptr addrspace(1) %global.ptr, i32 %y) #0 { +; CHECK-LABEL: define i32 @atomicrmw_add_global_to_flat_preserve_amdgpu_md( +; CHECK-SAME: ptr addrspace(1) [[GLOBAL_PTR:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr addrspace(1) [[GLOBAL_PTR]], i32 [[Y]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]], !amdgpu.no.remote.memory [[META0]] +; CHECK-NEXT: ret i32 [[RET]] +; + %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr addrspace(4) + %ret = atomicrmw add ptr addrspace(4) %cast, i32 %y seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 + ret i32 %ret +} + +define ptr addrspace(4) @try_infer_getelementptr_constant_null() { +; CHECK-LABEL: define ptr addrspace(4) @try_infer_getelementptr_constant_null() { +; CHECK-NEXT: [[CE:%.*]] = getelementptr i8, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) null, i64 8), i64 0 +; CHECK-NEXT: ret ptr addrspace(4) [[CE]] +; + %ce = getelementptr i8, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) null, i64 8), i64 0 + ret ptr addrspace(4) %ce +} + +attributes #0 = { nounwind } + +!0 = !{} +;. +; CHECK: [[META0]] = !{} +;. diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-address-space.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-address-space.ll new file mode 100644 index 00000000000000..7de9557a9ee902 --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-address-space.ll @@ -0,0 +1,211 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=spirv64-amd-amdhsa -passes=infer-address-spaces %s | FileCheck %s +; Ports of most of test/CodeGen/NVPTX/access-non-generic.ll + +@scalar = internal addrspace(3) global float 0.0, align 4 +@array = internal addrspace(3) global [10 x float] zeroinitializer, align 4 + +define spir_kernel void @load_store_lds_f32(i32 %i, float %v) #0 { +; CHECK-LABEL: define spir_kernel void @load_store_lds_f32( +; CHECK-SAME: i32 [[I:%.*]], float [[V:%.*]]) addrspace(4) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: [[TMP:%.*]] = load float, ptr addrspace(3) @scalar, align 4 +; CHECK-NEXT: call addrspace(4) void @use(float [[TMP]]) +; CHECK-NEXT: store float [[V]], ptr addrspace(3) @scalar, align 4 +; CHECK-NEXT: call addrspace(4) void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(3) @scalar, align 4 +; CHECK-NEXT: call addrspace(4) void @use(float [[TMP2]]) +; CHECK-NEXT: store float [[V]], ptr addrspace(3) @scalar, align 4 +; CHECK-NEXT: call addrspace(4) void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(3) getelementptr inbounds ([10 x float], ptr addrspace(3) @array, i32 0, i32 5), align 4 +; CHECK-NEXT: call addrspace(4) void @use(float [[TMP3]]) +; CHECK-NEXT: store float [[V]], ptr addrspace(3) getelementptr inbounds ([10 x float], ptr addrspace(3) @array, i32 0, i32 5), align 4 +; CHECK-NEXT: call addrspace(4) void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [10 x float], ptr addrspace(3) @array, i32 0, i32 5 +; CHECK-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(3) [[TMP4]], align 4 +; CHECK-NEXT: call addrspace(4) void @use(float [[TMP5]]) +; CHECK-NEXT: store float [[V]], ptr addrspace(3) [[TMP4]], align 4 +; CHECK-NEXT: call addrspace(4) void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [10 x float], ptr addrspace(3) @array, i32 0, i32 [[I]] +; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr addrspace(3) [[TMP7]], align 4 +; CHECK-NEXT: call addrspace(4) void @use(float [[TMP8]]) +; CHECK-NEXT: store float [[V]], ptr addrspace(3) [[TMP7]], align 4 +; CHECK-NEXT: call addrspace(4) void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: ret void +; +bb: + %tmp = load float, ptr addrspace(4) addrspacecast (ptr addrspace(3) @scalar to ptr addrspace(4)), align 4 + call void @use(float %tmp) + store float %v, ptr addrspace(4) addrspacecast (ptr addrspace(3) @scalar to ptr addrspace(4)), align 4 + call void @llvm.amdgcn.s.barrier() + %tmp1 = addrspacecast ptr addrspace(3) @scalar to ptr addrspace(4) + %tmp2 = load float, ptr addrspace(4) %tmp1, align 4 + call void @use(float %tmp2) + store float %v, ptr addrspace(4) %tmp1, align 4 + call void @llvm.amdgcn.s.barrier() + %tmp3 = load float, ptr addrspace(4) getelementptr inbounds ([10 x float], ptr addrspace(4) addrspacecast (ptr addrspace(3) @array to ptr addrspace(4)), i32 0, i32 5), align 4 + call void @use(float %tmp3) + store float %v, ptr addrspace(4) getelementptr inbounds ([10 x float], ptr addrspace(4) addrspacecast (ptr addrspace(3) @array to ptr addrspace(4)), i32 0, i32 5), align 4 + call void @llvm.amdgcn.s.barrier() + %tmp4 = getelementptr inbounds [10 x float], ptr addrspace(4) addrspacecast (ptr addrspace(3) @array to ptr addrspace(4)), i32 0, i32 5 + %tmp5 = load float, ptr addrspace(4) %tmp4, align 4 + call void @use(float %tmp5) + store float %v, ptr addrspace(4) %tmp4, align 4 + call void @llvm.amdgcn.s.barrier() + %tmp6 = addrspacecast ptr addrspace(3) @array to ptr addrspace(4) + %tmp7 = getelementptr inbounds [10 x float], ptr addrspace(4) %tmp6, i32 0, i32 %i + %tmp8 = load float, ptr addrspace(4) %tmp7, align 4 + call void @use(float %tmp8) + store float %v, ptr addrspace(4) %tmp7, align 4 + call void @llvm.amdgcn.s.barrier() + ret void +} + +define i32 @constexpr_load_int_from_float_lds() #0 { +; CHECK-LABEL: define i32 @constexpr_load_int_from_float_lds( +; CHECK-SAME: ) addrspace(4) #[[ATTR0]] { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: [[TMP:%.*]] = load i32, ptr addrspace(3) @scalar, align 4 +; CHECK-NEXT: ret i32 [[TMP]] +; +bb: + %tmp = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @scalar to ptr addrspace(4)), align 4 + ret i32 %tmp +} + +define i32 @load_int_from_global_float(ptr addrspace(1) %input, i32 %i, i32 %j) #0 { +; CHECK-LABEL: define i32 @load_int_from_global_float( +; CHECK-SAME: ptr addrspace(1) [[INPUT:%.*]], i32 [[I:%.*]], i32 [[J:%.*]]) addrspace(4) #[[ATTR0]] { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr addrspace(1) [[INPUT]], i32 [[I]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr addrspace(1) [[TMP1]], i32 [[J]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[TMP2]], align 4 +; CHECK-NEXT: ret i32 [[TMP4]] +; +bb: + %tmp = addrspacecast ptr addrspace(1) %input to ptr addrspace(4) + %tmp1 = getelementptr float, ptr addrspace(4) %tmp, i32 %i + %tmp2 = getelementptr float, ptr addrspace(4) %tmp1, i32 %j + %tmp4 = load i32, ptr addrspace(4) %tmp2 + ret i32 %tmp4 +} + +define spir_kernel void @nested_const_expr() #0 { +; CHECK-LABEL: define spir_kernel void @nested_const_expr( +; CHECK-SAME: ) addrspace(4) #[[ATTR0]] { +; CHECK-NEXT: store i32 1, ptr addrspace(3) getelementptr ([10 x float], ptr addrspace(3) @array, i64 0, i64 1), align 4 +; CHECK-NEXT: ret void +; + store i32 1, ptr addrspace(4) bitcast (ptr addrspace(4) getelementptr ([10 x float], ptr addrspace(4) addrspacecast (ptr addrspace(3) @array to ptr addrspace(4)), i64 0, i64 1) to ptr addrspace(4)), align 4 + + ret void +} + +define spir_kernel void @rauw(ptr addrspace(1) %input) #0 { +; CHECK-LABEL: define spir_kernel void @rauw( +; CHECK-SAME: ptr addrspace(1) [[INPUT:%.*]]) addrspace(4) #[[ATTR0]] { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr float, ptr addrspace(1) [[INPUT]], i64 10 +; CHECK-NEXT: [[V:%.*]] = load float, ptr addrspace(1) [[ADDR]], align 4 +; CHECK-NEXT: store float [[V]], ptr addrspace(1) [[ADDR]], align 4 +; CHECK-NEXT: ret void +; +bb: + %generic_input = addrspacecast ptr addrspace(1) %input to ptr addrspace(4) + %addr = getelementptr float, ptr addrspace(4) %generic_input, i64 10 + %v = load float, ptr addrspace(4) %addr + store float %v, ptr addrspace(4) %addr + ret void +} + +; FIXME: Should be able to eliminate the cast inside the loop +define spir_kernel void @loop() #0 { +; CHECK-LABEL: define spir_kernel void @loop( +; CHECK-SAME: ) addrspace(4) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[END:%.*]] = getelementptr float, ptr addrspace(3) @array, i64 10 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[I:%.*]] = phi ptr addrspace(3) [ @array, %[[ENTRY]] ], [ [[I2:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[V:%.*]] = load float, ptr addrspace(3) [[I]], align 4 +; CHECK-NEXT: call addrspace(4) void @use(float [[V]]) +; CHECK-NEXT: [[I2]] = getelementptr float, ptr addrspace(3) [[I]], i64 1 +; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq ptr addrspace(3) [[I2]], [[END]] +; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + %p = addrspacecast ptr addrspace(3) @array to ptr addrspace(4) + %end = getelementptr float, ptr addrspace(4) %p, i64 10 + br label %loop + +loop: ; preds = %loop, %entry + %i = phi ptr addrspace(4) [ %p, %entry ], [ %i2, %loop ] + %v = load float, ptr addrspace(4) %i + call void @use(float %v) + %i2 = getelementptr float, ptr addrspace(4) %i, i64 1 + %exit_cond = icmp eq ptr addrspace(4) %i2, %end + br i1 %exit_cond, label %exit, label %loop + +exit: ; preds = %loop + ret void +} + +@generic_end = external addrspace(1) global ptr addrspace(4) + +define spir_kernel void @loop_with_generic_bound() #0 { +; CHECK-LABEL: define spir_kernel void @loop_with_generic_bound( +; CHECK-SAME: ) addrspace(4) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[END:%.*]] = load ptr addrspace(4), ptr addrspace(1) @generic_end, align 8 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[I:%.*]] = phi ptr addrspace(3) [ @array, %[[ENTRY]] ], [ [[I2:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[V:%.*]] = load float, ptr addrspace(3) [[I]], align 4 +; CHECK-NEXT: call addrspace(4) void @use(float [[V]]) +; CHECK-NEXT: [[I2]] = getelementptr float, ptr addrspace(3) [[I]], i64 1 +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(3) [[I2]] to ptr addrspace(4) +; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq ptr addrspace(4) [[TMP0]], [[END]] +; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + %p = addrspacecast ptr addrspace(3) @array to ptr addrspace(4) + %end = load ptr addrspace(4), ptr addrspace(1) @generic_end + br label %loop + +loop: ; preds = %loop, %entry + %i = phi ptr addrspace(4) [ %p, %entry ], [ %i2, %loop ] + %v = load float, ptr addrspace(4) %i + call void @use(float %v) + %i2 = getelementptr float, ptr addrspace(4) %i, i64 1 + %exit_cond = icmp eq ptr addrspace(4) %i2, %end + br i1 %exit_cond, label %exit, label %loop + +exit: ; preds = %loop + ret void +} + +define void @select_bug() #0 { +; CHECK-LABEL: define void @select_bug( +; CHECK-SAME: ) addrspace(4) #[[ATTR0]] { +; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr addrspace(4) inttoptr (i64 4873 to ptr addrspace(4)), null +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i64 73, i64 93 +; CHECK-NEXT: [[ADD_PTR157:%.*]] = getelementptr inbounds i64, ptr addrspace(4) undef, i64 [[SEL]] +; CHECK-NEXT: [[CMP169:%.*]] = icmp uge ptr addrspace(4) undef, [[ADD_PTR157]] +; CHECK-NEXT: unreachable +; + %cmp = icmp ne ptr addrspace(4) inttoptr (i64 4873 to ptr addrspace(4)), null + %sel = select i1 %cmp, i64 73, i64 93 + %add.ptr157 = getelementptr inbounds i64, ptr addrspace(4) undef, i64 %sel + %cmp169 = icmp uge ptr addrspace(4) undef, %add.ptr157 + unreachable +} + +declare void @llvm.amdgcn.s.barrier() #1 +declare void @use(float) #0 + +attributes #0 = { nounwind } +attributes #1 = { convergent nounwind } diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-addrspacecast.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-addrspacecast.ll new file mode 100644 index 00000000000000..4e64ec7174017d --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-addrspacecast.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces %s | FileCheck %s +; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces %s | FileCheck %s + +; Test that pure addrspacecast instructions not directly connected to +; a memory operation are inferred. + +define void @addrspacecast_gep_addrspacecast(ptr addrspace(3) %ptr) { +; CHECK-LABEL: define void @addrspacecast_gep_addrspacecast( +; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]]) { +; CHECK-NEXT: [[GEP0:%.*]] = getelementptr i32, ptr addrspace(3) [[PTR]], i64 9 +; CHECK-NEXT: store i32 8, ptr addrspace(3) [[GEP0]], align 8 +; CHECK-NEXT: ret void +; + %asc0 = addrspacecast ptr addrspace(3) %ptr to ptr addrspace(4) + %gep0 = getelementptr i32, ptr addrspace(4) %asc0, i64 9 + %asc1 = addrspacecast ptr addrspace(4) %gep0 to ptr addrspace(3) + store i32 8, ptr addrspace(3) %asc1, align 8 + ret void +} + +define void @addrspacecast_different_pointee_type(ptr addrspace(3) %ptr) { +; CHECK-LABEL: define void @addrspacecast_different_pointee_type( +; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]]) { +; CHECK-NEXT: [[GEP0:%.*]] = getelementptr i32, ptr addrspace(3) [[PTR]], i64 9 +; CHECK-NEXT: store i8 8, ptr addrspace(3) [[GEP0]], align 8 +; CHECK-NEXT: ret void +; + %asc0 = addrspacecast ptr addrspace(3) %ptr to ptr addrspace(4) + %gep0 = getelementptr i32, ptr addrspace(4) %asc0, i64 9 + %asc1 = addrspacecast ptr addrspace(4) %gep0 to ptr addrspace(3) + store i8 8, ptr addrspace(3) %asc1, align 8 + ret void +} + +define void @addrspacecast_to_memory(ptr addrspace(3) %ptr) { +; CHECK-LABEL: define void @addrspacecast_to_memory( +; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]]) { +; CHECK-NEXT: [[GEP0:%.*]] = getelementptr i32, ptr addrspace(3) [[PTR]], i64 9 +; CHECK-NEXT: store volatile ptr addrspace(3) [[GEP0]], ptr addrspace(1) undef, align +; CHECK-NEXT: ret void +; + %asc0 = addrspacecast ptr addrspace(3) %ptr to ptr addrspace(4) + %gep0 = getelementptr i32, ptr addrspace(4) %asc0, i64 9 + %asc1 = addrspacecast ptr addrspace(4) %gep0 to ptr addrspace(3) + store volatile ptr addrspace(3) %asc1, ptr addrspace(1) undef + ret void +} + +define void @multiuse_addrspacecast_gep_addrspacecast(ptr addrspace(3) %ptr) { +; CHECK-LABEL: define void @multiuse_addrspacecast_gep_addrspacecast( +; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]]) { +; CHECK-NEXT: [[ASC0:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr addrspace(4) +; CHECK-NEXT: store volatile ptr addrspace(4) [[ASC0]], ptr addrspace(1) undef, align +; CHECK-NEXT: [[GEP0:%.*]] = getelementptr i32, ptr addrspace(3) [[PTR]], i64 9 +; CHECK-NEXT: store i32 8, ptr addrspace(3) [[GEP0]], align 8 +; CHECK-NEXT: ret void +; + %asc0 = addrspacecast ptr addrspace(3) %ptr to ptr addrspace(4) + store volatile ptr addrspace(4) %asc0, ptr addrspace(1) undef + %gep0 = getelementptr i32, ptr addrspace(4) %asc0, i64 9 + %asc1 = addrspacecast ptr addrspace(4) %gep0 to ptr addrspace(3) + store i32 8, ptr addrspace(3) %asc1, align 8 + ret void +} diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-getelementptr.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-getelementptr.ll new file mode 100644 index 00000000000000..56412e50ed5d2c --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-getelementptr.ll @@ -0,0 +1,108 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces %s | FileCheck %s +; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces %s | FileCheck %s + +; Test that pure GetElementPtr instructions not directly connected to +; a memory operation are inferred. + +@lds = internal unnamed_addr addrspace(3) global [648 x double] undef, align 8 + +define void @simplified_constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) { +; CHECK-LABEL: @simplified_constexpr_gep_addrspacecast( +; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds double, ptr addrspace(3) getelementptr inbounds ([648 x double], ptr addrspace(3) @lds, i64 0, i64 384), i64 [[IDX0:%.*]] +; CHECK-NEXT: store double 1.000000e+00, ptr addrspace(3) [[GEP0]], align 8 +; CHECK-NEXT: ret void +; + %gep0 = getelementptr inbounds double, ptr addrspace(4) addrspacecast (ptr addrspace(3) getelementptr inbounds ([648 x double], ptr addrspace(3) @lds, i64 0, i64 384) to ptr addrspace(4)), i64 %idx0 + %asc = addrspacecast ptr addrspace(4) %gep0 to ptr addrspace(3) + store double 1.000000e+00, ptr addrspace(3) %asc, align 8 + ret void +} + +define void @constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) { +; CHECK-LABEL: @constexpr_gep_addrspacecast( +; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds double, ptr addrspace(3) getelementptr ([648 x double], ptr addrspace(3) @lds, i64 0, i64 384), i64 [[IDX0:%.*]] +; CHECK-NEXT: store double 1.000000e+00, ptr addrspace(3) [[GEP0]], align 8 +; CHECK-NEXT: ret void +; + %gep0 = getelementptr inbounds double, ptr addrspace(4) getelementptr ([648 x double], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds to ptr addrspace(4)), i64 0, i64 384), i64 %idx0 + %asc = addrspacecast ptr addrspace(4) %gep0 to ptr addrspace(3) + store double 1.0, ptr addrspace(3) %asc, align 8 + ret void +} + +define void @constexpr_gep_gep_addrspacecast(i64 %idx0, i64 %idx1) { +; CHECK-LABEL: @constexpr_gep_gep_addrspacecast( +; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds double, ptr addrspace(3) getelementptr ([648 x double], ptr addrspace(3) @lds, i64 0, i64 384), i64 [[IDX0:%.*]] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds double, ptr addrspace(3) [[GEP0]], i64 [[IDX1:%.*]] +; CHECK-NEXT: store double 1.000000e+00, ptr addrspace(3) [[GEP1]], align 8 +; CHECK-NEXT: ret void +; + %gep0 = getelementptr inbounds double, ptr addrspace(4) getelementptr ([648 x double], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds to ptr addrspace(4)), i64 0, i64 384), i64 %idx0 + %gep1 = getelementptr inbounds double, ptr addrspace(4) %gep0, i64 %idx1 + %asc = addrspacecast ptr addrspace(4) %gep1 to ptr addrspace(3) + store double 1.0, ptr addrspace(3) %asc, align 8 + ret void +} + +; Don't crash +define spir_kernel void @vector_gep(<4 x ptr addrspace(3)> %array) nounwind { +; CHECK-LABEL: @vector_gep( +; CHECK-NEXT: [[CAST:%.*]] = addrspacecast <4 x ptr addrspace(3)> [[ARRAY:%.*]] to <4 x ptr addrspace(4)> +; CHECK-NEXT: [[P:%.*]] = getelementptr [1024 x i32], <4 x ptr addrspace(4)> [[CAST]], <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16> +; CHECK-NEXT: [[P0:%.*]] = extractelement <4 x ptr addrspace(4)> [[P]], i32 0 +; CHECK-NEXT: [[P1:%.*]] = extractelement <4 x ptr addrspace(4)> [[P]], i32 1 +; CHECK-NEXT: [[P2:%.*]] = extractelement <4 x ptr addrspace(4)> [[P]], i32 2 +; CHECK-NEXT: [[P3:%.*]] = extractelement <4 x ptr addrspace(4)> [[P]], i32 3 +; CHECK-NEXT: store i32 99, ptr addrspace(4) [[P0]], align 4 +; CHECK-NEXT: store i32 99, ptr addrspace(4) [[P1]], align 4 +; CHECK-NEXT: store i32 99, ptr addrspace(4) [[P2]], align 4 +; CHECK-NEXT: store i32 99, ptr addrspace(4) [[P3]], align 4 +; CHECK-NEXT: ret void +; + %cast = addrspacecast <4 x ptr addrspace(3)> %array to <4 x ptr addrspace(4)> + %p = getelementptr [1024 x i32], <4 x ptr addrspace(4)> %cast, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16> + %p0 = extractelement <4 x ptr addrspace(4)> %p, i32 0 + %p1 = extractelement <4 x ptr addrspace(4)> %p, i32 1 + %p2 = extractelement <4 x ptr addrspace(4)> %p, i32 2 + %p3 = extractelement <4 x ptr addrspace(4)> %p, i32 3 + store i32 99, ptr addrspace(4) %p0 + store i32 99, ptr addrspace(4) %p1 + store i32 99, ptr addrspace(4) %p2 + store i32 99, ptr addrspace(4) %p3 + ret void +} + +define void @repeated_constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) { +; CHECK-LABEL: @repeated_constexpr_gep_addrspacecast( +; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds double, ptr addrspace(3) getelementptr ([648 x double], ptr addrspace(3) @lds, i64 0, i64 384), i64 [[IDX0:%.*]] +; CHECK-NEXT: store double 1.000000e+00, ptr addrspace(3) [[GEP0]], align 8 +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds double, ptr addrspace(3) getelementptr ([648 x double], ptr addrspace(3) @lds, i64 0, i64 384), i64 [[IDX1:%.*]] +; CHECK-NEXT: store double 1.000000e+00, ptr addrspace(3) [[GEP1]], align 8 +; CHECK-NEXT: ret void +; + %gep0 = getelementptr inbounds double, ptr addrspace(4) getelementptr ([648 x double], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds to ptr addrspace(4)), i64 0, i64 384), i64 %idx0 + %asc0 = addrspacecast ptr addrspace(4) %gep0 to ptr addrspace(3) + store double 1.0, ptr addrspace(3) %asc0, align 8 + + %gep1 = getelementptr inbounds double, ptr addrspace(4) getelementptr ([648 x double], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds to ptr addrspace(4)), i64 0, i64 384), i64 %idx1 + %asc1 = addrspacecast ptr addrspace(4) %gep1 to ptr addrspace(3) + store double 1.0, ptr addrspace(3) %asc1, align 8 + + ret void +} + +define void @unorder_constexpr_gep_bitcast() { +; CHECK-LABEL: @unorder_constexpr_gep_bitcast( +; CHECK-NEXT: [[X0:%.*]] = load i32, ptr addrspace(3) @lds, align 4 +; CHECK-NEXT: [[X1:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @lds, i32 1), align 4 +; CHECK-NEXT: call void @use(i32 [[X0]], i32 [[X1]]) +; CHECK-NEXT: ret void +; + %x0 = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds to ptr addrspace(4)), align 4 + %x1 = load i32, ptr addrspace(4) getelementptr (i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds to ptr addrspace(4)), i32 1), align 4 + call void @use(i32 %x0, i32 %x1) + ret void +} + +declare void @use(i32, i32) diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/insert-pos-assert.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/insert-pos-assert.ll new file mode 100644 index 00000000000000..f736579c1765fe --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/insert-pos-assert.ll @@ -0,0 +1,158 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces %s | FileCheck %s --check-prefix=SPIRV32 +; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces %s | FileCheck %s --check-prefix=SPIRV64 + +; Addrspacecasts or bitcasts must be inserted after the instructions that define their uses. + +%struct.s0 = type { ptr addrspace(4), i32 } +%struct.s1 = type { %struct.s0 } + +@global0 = protected addrspace(2) externally_initialized global %struct.s1 zeroinitializer + +declare i32 @func(ptr %arg) + +define i32 @addrspacecast_insert_pos_assert() { +; CHECK-LABEL: @addrspacecast_insert_pos_assert( +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[LOAD0:%.*]] = load ptr addrspace(4), ptr addrspace(2) @global0, align 4 +; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[LOAD0]] to ptr addrspace(1) +; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(1) [[TMP1]] to ptr addrspace(4) +; CHECK-NEXT: [[LOAD1:%.*]] = load i32, ptr [[ALLOCA]], align 4 +; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[LOAD1]] to i64 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP2]], i64 [[SEXT]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func(ptr addrspace(4) [[GEP]]) +; CHECK-NEXT: ret i32 [[CALL]] +; +; SPIRV32-LABEL: @addrspacecast_insert_pos_assert( +; SPIRV32-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4 +; SPIRV32-NEXT: [[LOAD0:%.*]] = load ptr addrspace(4), ptr addrspace(2) @global0, align 4 +; SPIRV32-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[LOAD0]] to ptr addrspace(1) +; SPIRV32-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(1) [[TMP1]] to ptr addrspace(4) +; SPIRV32-NEXT: [[LOAD1:%.*]] = load i32, ptr [[ALLOCA]], align 4 +; SPIRV32-NEXT: [[SEXT:%.*]] = sext i32 [[LOAD1]] to i64 +; SPIRV32-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP2]], i64 [[SEXT]] +; SPIRV32-NEXT: [[CALL:%.*]] = call i32 @func(ptr addrspace(4) [[GEP]]) +; SPIRV32-NEXT: ret i32 [[CALL]] +; +; SPIRV64-LABEL: @addrspacecast_insert_pos_assert( +; SPIRV64-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4 +; SPIRV64-NEXT: [[LOAD0:%.*]] = load ptr addrspace(4), ptr addrspace(2) @global0, align 8 +; SPIRV64-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[LOAD0]] to ptr addrspace(1) +; SPIRV64-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(1) [[TMP1]] to ptr addrspace(4) +; SPIRV64-NEXT: [[LOAD1:%.*]] = load i32, ptr [[ALLOCA]], align 4 +; SPIRV64-NEXT: [[SEXT:%.*]] = sext i32 [[LOAD1]] to i64 +; SPIRV64-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP2]], i64 [[SEXT]] +; SPIRV64-NEXT: [[CALL:%.*]] = call i32 @func(ptr addrspace(4) [[GEP]]) +; SPIRV64-NEXT: ret i32 [[CALL]] +; + %alloca = alloca i32, align 4 + %cast = addrspacecast ptr %alloca to ptr addrspace(4) + %load0 = load ptr addrspace(4), ptr addrspace(2) @global0 + %load1 = load i32, ptr addrspace(4) %cast + %sext = sext i32 %load1 to i64 + %gep = getelementptr inbounds i32, ptr addrspace(4) %load0, i64 %sext + %call = call i32 @func(ptr addrspace(4) %gep) + ret i32 %call +} + +define void @bitcast_insert_pos_assert_1() { +; CHECK-LABEL: @bitcast_insert_pos_assert_1( +; CHECK-NEXT: bb.0: +; CHECK-NEXT: [[ASC0:%.*]] = addrspacecast ptr undef to ptr addrspace(4) +; CHECK-NEXT: [[PTI0:%.*]] = ptrtoint ptr addrspace(4) [[ASC0]] to i64 +; CHECK-NEXT: br label [[BB_1:%.*]] +; CHECK: bb.1: +; CHECK-NEXT: br i1 undef, label [[BB_2:%.*]], label [[BB_3:%.*]] +; CHECK: bb.2: +; CHECK-NEXT: [[PTI1:%.*]] = ptrtoint ptr addrspace(4) [[ASC0]] to i64 +; CHECK-NEXT: [[ITP0:%.*]] = inttoptr i64 [[PTI1]] to ptr addrspace(4) +; CHECK-NEXT: [[LOAD0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ITP0]], align 8 +; CHECK-NEXT: br label [[BB_3]] +; CHECK: bb.3: +; CHECK-NEXT: ret void +; +; SPIRV32-LABEL: @bitcast_insert_pos_assert_1( +; SPIRV32-NEXT: bb.0: +; SPIRV32-NEXT: [[ASC0:%.*]] = addrspacecast ptr undef to ptr addrspace(4) +; SPIRV32-NEXT: [[PTI0:%.*]] = ptrtoint ptr addrspace(4) [[ASC0]] to i64 +; SPIRV32-NEXT: br label [[BB_1:%.*]] +; SPIRV32: bb.1: +; SPIRV32-NEXT: br i1 undef, label [[BB_2:%.*]], label [[BB_3:%.*]] +; SPIRV32: bb.2: +; SPIRV32-NEXT: [[PTI1:%.*]] = ptrtoint ptr addrspace(4) [[ASC0]] to i64 +; SPIRV32-NEXT: [[ITP0:%.*]] = inttoptr i64 [[PTI1]] to ptr addrspace(4) +; SPIRV32-NEXT: [[LOAD0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ITP0]], align 8 +; SPIRV32-NEXT: br label [[BB_3]] +; SPIRV32: bb.3: +; SPIRV32-NEXT: ret void +; +; SPIRV64-LABEL: @bitcast_insert_pos_assert_1( +; SPIRV64-NEXT: bb.0: +; SPIRV64-NEXT: [[ASC0:%.*]] = addrspacecast ptr undef to ptr addrspace(4) +; SPIRV64-NEXT: [[PTI0:%.*]] = ptrtoint ptr addrspace(4) [[ASC0]] to i64 +; SPIRV64-NEXT: br label [[BB_1:%.*]] +; SPIRV64: bb.1: +; SPIRV64-NEXT: br i1 undef, label [[BB_2:%.*]], label [[BB_3:%.*]] +; SPIRV64: bb.2: +; SPIRV64-NEXT: [[LOAD0:%.*]] = load ptr addrspace(4), ptr undef, align 8 +; SPIRV64-NEXT: br label [[BB_3]] +; SPIRV64: bb.3: +; SPIRV64-NEXT: ret void +; +bb.0: + %asc0 = addrspacecast ptr undef to ptr addrspace(4) + %pti0 = ptrtoint ptr addrspace(4) %asc0 to i64 + br label %bb.1 + +bb.1: + br i1 undef, label %bb.2, label %bb.3 + +bb.2: + %pti1 = ptrtoint ptr addrspace(4) %asc0 to i64 + %itp0 = inttoptr i64 %pti1 to ptr addrspace(4) + %load0 = load ptr addrspace(4), ptr addrspace(4) %itp0, align 8 + br label %bb.3 + +bb.3: + ret void +} + +define void @bitcast_insert_pos_assert_2() { +; CHECK-LABEL: @bitcast_insert_pos_assert_2( +; CHECK-NEXT: [[ALLOCA0:%.*]] = alloca [[STRUCT_S1:%.*]], align 16 +; CHECK-NEXT: [[ASC0:%.*]] = addrspacecast ptr [[ALLOCA0]] to ptr addrspace(4) +; CHECK-NEXT: [[PTI0:%.*]] = ptrtoint ptr addrspace(4) [[ASC0]] to i64 +; CHECK-NEXT: [[ITP0:%.*]] = inttoptr i64 [[PTI0]] to ptr addrspace(4) +; CHECK-NEXT: [[ITP1:%.*]] = ptrtoint ptr addrspace(4) [[ASC0]] to i64 +; CHECK-NEXT: [[ITP2:%.*]] = inttoptr i64 [[ITP1]] to ptr addrspace(4) +; CHECK-NEXT: [[GEP0:%.*]] = getelementptr i64, ptr addrspace(4) [[ITP2]], i64 1 +; CHECK-NEXT: ret void +; +; SPIRV32-LABEL: @bitcast_insert_pos_assert_2( +; SPIRV32-NEXT: [[ALLOCA0:%.*]] = alloca [[STRUCT_S1:%.*]], align 16 +; SPIRV32-NEXT: [[ASC0:%.*]] = addrspacecast ptr [[ALLOCA0]] to ptr addrspace(4) +; SPIRV32-NEXT: [[PTI0:%.*]] = ptrtoint ptr addrspace(4) [[ASC0]] to i64 +; SPIRV32-NEXT: [[ITP0:%.*]] = inttoptr i64 [[PTI0]] to ptr addrspace(4) +; SPIRV32-NEXT: [[ITP1:%.*]] = ptrtoint ptr addrspace(4) [[ASC0]] to i64 +; SPIRV32-NEXT: [[ITP2:%.*]] = inttoptr i64 [[ITP1]] to ptr addrspace(4) +; SPIRV32-NEXT: [[GEP0:%.*]] = getelementptr i64, ptr addrspace(4) [[ITP2]], i64 1 +; SPIRV32-NEXT: ret void +; +; SPIRV64-LABEL: @bitcast_insert_pos_assert_2( +; SPIRV64-NEXT: [[ALLOCA0:%.*]] = alloca [[STRUCT_S1:%.*]], align 16 +; SPIRV64-NEXT: [[ASC0:%.*]] = addrspacecast ptr [[ALLOCA0]] to ptr addrspace(4) +; SPIRV64-NEXT: [[PTI0:%.*]] = ptrtoint ptr addrspace(4) [[ASC0]] to i64 +; SPIRV64-NEXT: [[ITP0:%.*]] = inttoptr i64 [[PTI0]] to ptr addrspace(4) +; SPIRV64-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[ALLOCA0]] to ptr addrspace(4) +; SPIRV64-NEXT: [[GEP0:%.*]] = getelementptr i64, ptr addrspace(4) [[TMP1]], i64 1 +; SPIRV64-NEXT: ret void +; + %alloca0 = alloca %struct.s1, align 16 + %asc0 = addrspacecast ptr %alloca0 to ptr addrspace(4) + %pti0 = ptrtoint ptr addrspace(4) %asc0 to i64 + %itp0 = inttoptr i64 %pti0 to ptr addrspace(4) + %itp1 = ptrtoint ptr addrspace(4) %asc0 to i64 + %itp2 = inttoptr i64 %itp1 to ptr addrspace(4) + %gep0 = getelementptr i64, ptr addrspace(4) %itp2, i64 1 + ret void +} diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/is.constant.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/is.constant.ll new file mode 100644 index 00000000000000..d6a58d2fccde06 --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/is.constant.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces %s | FileCheck %s +; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces %s | FileCheck %s + +define i1 @is_constant_global_to_flat(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define i1 @is_constant_global_to_flat( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: [[RET:%.*]] = call i1 @llvm.is.constant.p1(ptr addrspace(1) [[PTR]]) +; CHECK-NEXT: ret i1 [[RET]] +; + %cast = addrspacecast ptr addrspace(1) %ptr to ptr addrspace(4) + %ret = call i1 @llvm.is.constant.p4(ptr addrspace(4) %cast) + ret i1 %ret +} + +define i1 @is_constant_local_to_flat(ptr addrspace(3) %ptr) { +; CHECK-LABEL: define i1 @is_constant_local_to_flat( +; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]]) { +; CHECK-NEXT: [[RET:%.*]] = call i1 @llvm.is.constant.p3(ptr addrspace(3) [[PTR]]) +; CHECK-NEXT: ret i1 [[RET]] +; + %cast = addrspacecast ptr addrspace(3) %ptr to ptr addrspace(4) + %ret = call i1 @llvm.is.constant.p4(ptr addrspace(4) %cast) + ret i1 %ret +} + +define i1 @is_constant_private_to_flat(ptr %ptr) { +; CHECK-LABEL: define i1 @is_constant_private_to_flat( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[RET:%.*]] = call i1 @llvm.is.constant.p0(ptr [[PTR]]) +; CHECK-NEXT: ret i1 [[RET]] +; + %cast = addrspacecast ptr %ptr to ptr addrspace(4) + %ret = call i1 @llvm.is.constant.p4(ptr addrspace(4) %cast) + ret i1 %ret +} + +define i1 @is_constant_private_to_flat_v2(<2 x ptr> %ptr) { +; CHECK-LABEL: define i1 @is_constant_private_to_flat_v2( +; CHECK-SAME: <2 x ptr> [[PTR:%.*]]) { +; CHECK-NEXT: [[RET:%.*]] = call i1 @llvm.is.constant.v2p0(<2 x ptr> [[PTR]]) +; CHECK-NEXT: ret i1 [[RET]] +; + %cast = addrspacecast <2 x ptr> %ptr to <2 x ptr addrspace(4)> + %ret = call i1 @llvm.is.constant.v2p4(<2 x ptr addrspace(4)> %cast) + ret i1 %ret +} + +define i1 @is_constant_i32(i32 %val) { +; CHECK-LABEL: define i1 @is_constant_i32( +; CHECK-SAME: i32 [[VAL:%.*]]) { +; CHECK-NEXT: [[RET:%.*]] = call i1 @llvm.is.constant.i32(i32 [[VAL]]) +; CHECK-NEXT: ret i1 [[RET]] +; + %ret = call i1 @llvm.is.constant.i32(i32 %val) + ret i1 %ret +} diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/lit.local.cfg b/llvm/test/Transforms/InferAddressSpaces/SPIRV/lit.local.cfg new file mode 100644 index 00000000000000..78dd74cd6dc634 --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/lit.local.cfg @@ -0,0 +1,2 @@ +if not "SPIRV" in config.root.targets: + config.unsupported = True diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/mem-intrinsics.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/mem-intrinsics.ll new file mode 100644 index 00000000000000..fd60c307a35fca --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/mem-intrinsics.ll @@ -0,0 +1,145 @@ +; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces %s | FileCheck %s +; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces %s | FileCheck %s + +; CHECK-LABEL: @memset_group_to_flat( +; CHECK: call void @llvm.memset.p3.i64(ptr addrspace(3) align 4 %group.ptr, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 +define spir_kernel void @memset_group_to_flat(ptr addrspace(3) %group.ptr, i32 %y) #0 { + %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr addrspace(4) + call void @llvm.memset.p4.i64(ptr addrspace(4) align 4 %cast, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 + ret void +} + +; CHECK-LABEL: @memset_global_to_flat( +; CHECK: call void @llvm.memset.p1.i64(ptr addrspace(1) align 4 %global.ptr, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 +define spir_kernel void @memset_global_to_flat(ptr addrspace(1) %global.ptr, i32 %y) #0 { + %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr addrspace(4) + call void @llvm.memset.p4.i64(ptr addrspace(4) align 4 %cast, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 + ret void +} + +; CHECK-LABEL: @memset_group_to_flat_no_md( +; CHECK: call void @llvm.memset.p3.i64(ptr addrspace(3) align 4 %group.ptr, i8 4, i64 %size, i1 false){{$}} +define spir_kernel void @memset_group_to_flat_no_md(ptr addrspace(3) %group.ptr, i64 %size) #0 { + %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr addrspace(4) + call void @llvm.memset.p4.i64(ptr addrspace(4) align 4 %cast, i8 4, i64 %size, i1 false) + ret void +} + +; CHECK-LABEL: @memset_global_to_flat_no_md( +; CHECK: call void @llvm.memset.p1.i64(ptr addrspace(1) align 4 %global.ptr, i8 4, i64 %size, i1 false){{$}} +define spir_kernel void @memset_global_to_flat_no_md(ptr addrspace(1) %global.ptr, i64 %size) #0 { + %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr addrspace(4) + call void @llvm.memset.p4.i64(ptr addrspace(4) align 4 %cast, i8 4, i64 %size, i1 false) + ret void +} + +; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group( +; CHECK: call void @llvm.memcpy.p4.p3.i64(ptr addrspace(4) align 4 %dest, ptr addrspace(3) align 4 %src.group.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 +define spir_kernel void @memcpy_flat_to_flat_replace_src_with_group(ptr addrspace(4) %dest, ptr addrspace(3) %src.group.ptr, i64 %size) #0 { + %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr addrspace(4) + call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 4 %dest, ptr addrspace(4) align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 + ret void +} + +; CHECK-LABEL: @memcpy_inline_flat_to_flat_replace_src_with_group( +; CHECK: call void @llvm.memcpy.inline.p4.p3.i64(ptr addrspace(4) align 4 %dest, ptr addrspace(3) align 4 %src.group.ptr, i64 42, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 +define spir_kernel void @memcpy_inline_flat_to_flat_replace_src_with_group(ptr addrspace(4) %dest, ptr addrspace(3) %src.group.ptr) #0 { + %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr addrspace(4) + call void @llvm.memcpy.inline.p4.p4.i64(ptr addrspace(4) align 4 %dest, ptr addrspace(4) align 4 %cast.src, i64 42, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 + ret void +} + +; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_with_group( +; CHECK: call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) align 4 %dest.group.ptr, ptr addrspace(4) align 4 %src.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 +define spir_kernel void @memcpy_flat_to_flat_replace_dest_with_group(ptr addrspace(3) %dest.group.ptr, ptr addrspace(4) %src.ptr, i64 %size) #0 { + %cast.dest = addrspacecast ptr addrspace(3) %dest.group.ptr to ptr addrspace(4) + call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 4 %cast.dest, ptr addrspace(4) align 4 %src.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 + ret void +} + +; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_src_with_group( +; CHECK: call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) align 4 %src.group.ptr, ptr addrspace(3) align 4 %src.group.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 +define spir_kernel void @memcpy_flat_to_flat_replace_dest_src_with_group(ptr addrspace(3) %dest.group.ptr, ptr addrspace(3) %src.group.ptr, i64 %size) #0 { + %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr addrspace(4) + %cast.dest = addrspacecast ptr addrspace(3) %src.group.ptr to ptr addrspace(4) + call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 4 %cast.dest, ptr addrspace(4) align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 + ret void +} + +; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_group_src_global( +; CHECK: call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) align 4 %dest.group.ptr, ptr addrspace(1) align 4 %src.global.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 +define spir_kernel void @memcpy_flat_to_flat_replace_dest_group_src_global(ptr addrspace(3) %dest.group.ptr, ptr addrspace(1) %src.global.ptr, i64 %size) #0 { + %cast.src = addrspacecast ptr addrspace(1) %src.global.ptr to ptr addrspace(4) + %cast.dest = addrspacecast ptr addrspace(3) %dest.group.ptr to ptr addrspace(4) + call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 4 %cast.dest, ptr addrspace(4) align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 + ret void +} + +; CHECK-LABEL: @memcpy_group_to_flat_replace_dest_global( +; CHECK: call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 4 %dest.global.ptr, ptr addrspace(3) align 4 %src.group.ptr, i32 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 +define spir_kernel void @memcpy_group_to_flat_replace_dest_global(ptr addrspace(1) %dest.global.ptr, ptr addrspace(3) %src.group.ptr, i32 %size) #0 { + %cast.dest = addrspacecast ptr addrspace(1) %dest.global.ptr to ptr addrspace(4) + call void @llvm.memcpy.p4.p3.i32(ptr addrspace(4) align 4 %cast.dest, ptr addrspace(3) align 4 %src.group.ptr, i32 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 + ret void +} + +; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct( +; CHECK: call void @llvm.memcpy.p4.p3.i64(ptr addrspace(4) align 4 %dest, ptr addrspace(3) align 4 %src.group.ptr, i64 %size, i1 false), !tbaa.struct !8 +define spir_kernel void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(ptr addrspace(4) %dest, ptr addrspace(3) %src.group.ptr, i64 %size) #0 { + %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr addrspace(4) + call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 4 %dest, ptr addrspace(4) align 4 %cast.src, i64 %size, i1 false), !tbaa.struct !8 + ret void +} + +; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group_no_md( +; CHECK: call void @llvm.memcpy.p4.p3.i64(ptr addrspace(4) align 4 %dest, ptr addrspace(3) align 4 %src.group.ptr, i64 %size, i1 false){{$}} +define spir_kernel void @memcpy_flat_to_flat_replace_src_with_group_no_md(ptr addrspace(4) %dest, ptr addrspace(3) %src.group.ptr, i64 %size) #0 { + %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr addrspace(4) + call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 4 %dest, ptr addrspace(4) align 4 %cast.src, i64 %size, i1 false) + ret void +} + +; CHECK-LABEL: @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md( +; CHECK: call void @llvm.memcpy.p4.p3.i64(ptr addrspace(4) align 4 %dest0, ptr addrspace(3) align 4 %src.group.ptr, i64 %size, i1 false){{$}} +; CHECK: call void @llvm.memcpy.p4.p3.i64(ptr addrspace(4) align 4 %dest1, ptr addrspace(3) align 4 %src.group.ptr, i64 %size, i1 false){{$}} +define spir_kernel void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(ptr addrspace(4) %dest0, ptr addrspace(4) %dest1, ptr addrspace(3) %src.group.ptr, i64 %size) #0 { + %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr addrspace(4) + call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 4 %dest0, ptr addrspace(4) align 4 %cast.src, i64 %size, i1 false) + call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 4 %dest1, ptr addrspace(4) align 4 %cast.src, i64 %size, i1 false) + ret void +} + +; Check for iterator problems if the pointer has 2 uses in the same call +; CHECK-LABEL: @memcpy_group_flat_to_flat_self( +; CHECK: call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) align 4 %group.ptr, ptr addrspace(3) align 4 %group.ptr, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 +define spir_kernel void @memcpy_group_flat_to_flat_self(ptr addrspace(3) %group.ptr) #0 { + %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr addrspace(4) + call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 4 %cast, ptr addrspace(4) align 4 %cast, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 + ret void +} +; CHECK-LABEL: @memmove_flat_to_flat_replace_src_with_group( +; CHECK: call void @llvm.memmove.p4.p3.i64(ptr addrspace(4) align 4 %dest, ptr addrspace(3) align 4 %src.group.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 +define spir_kernel void @memmove_flat_to_flat_replace_src_with_group(ptr addrspace(4) %dest, ptr addrspace(3) %src.group.ptr, i64 %size) #0 { + %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr addrspace(4) + call void @llvm.memmove.p4.p4.i64(ptr addrspace(4) align 4 %dest, ptr addrspace(4) align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 + ret void +} + +declare void @llvm.memset.p4.i64(ptr addrspace(4) nocapture writeonly, i8, i64, i1) #1 +declare void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) nocapture writeonly, ptr addrspace(4) nocapture readonly, i64, i1) #1 +declare void @llvm.memcpy.inline.p4.p4.i64(ptr addrspace(4) nocapture writeonly, ptr addrspace(4) nocapture readonly, i64, i1) #1 +declare void @llvm.memcpy.p4.p3.i32(ptr addrspace(4) nocapture writeonly, ptr addrspace(3) nocapture readonly, i32, i1) #1 +declare void @llvm.memmove.p4.p4.i64(ptr addrspace(4) nocapture writeonly, ptr addrspace(4) nocapture readonly, i64, i1) #1 + +attributes #0 = { nounwind } +attributes #1 = { argmemonly nounwind } + +!0 = !{!1, !1, i64 0} +!1 = !{!"A", !2} +!2 = !{!"tbaa root"} +!3 = !{!4} +!4 = distinct !{!4, !5, !"some scope 1"} +!5 = distinct !{!5, !"some domain"} +!6 = !{!7} +!7 = distinct !{!7, !5, !"some scope 2"} +!8 = !{i64 0, i64 8, null} diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/multiple-uses-of-val.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/multiple-uses-of-val.ll new file mode 100644 index 00000000000000..83725d22df3124 --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/multiple-uses-of-val.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=spirv32-- -S -passes=infer-address-spaces --verify-each %s | FileCheck %s +; RUN: opt -mtriple=spirv64-- -S -passes=infer-address-spaces --verify-each %s | FileCheck %s + +; Inst can use a value multiple time. When we're inserting an addrspacecast to flat, +; it's important all the identical uses use an indentical replacement, especially +; for PHIs. + +define spir_kernel void @test_phi() { +; CHECK-LABEL: @test_phi( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOADED_PTR:%.*]] = load ptr addrspace(4), ptr addrspace(2) null, align 8 +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[LOADED_PTR]] to ptr addrspace(1) +; CHECK-NEXT: br label [[BB0:%.*]] +; CHECK: bb0: +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr addrspace(1) [[TMP0]], i64 3 +; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[GEP]] to ptr addrspace(4) +; CHECK-NEXT: switch i32 0, label [[END:%.*]] [ +; CHECK-NEXT: i32 1, label [[END]] +; CHECK-NEXT: i32 4, label [[END]] +; CHECK-NEXT: i32 5, label [[BB1:%.*]] +; CHECK-NEXT: ] +; CHECK: bb1: +; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr addrspace(1) [[GEP]], align 16 +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[RETVAL_SROA_0_0_I569_PH:%.*]] = phi ptr addrspace(4) [ null, [[BB1]] ], [ [[TMP1]], [[BB0]] ], [ [[TMP1]], [[BB0]] ], [ [[TMP1]], [[BB0]] ] +; CHECK-NEXT: ret void +; +entry: + %loaded.ptr = load ptr addrspace(4), ptr addrspace(2) null, align 8 + br label %bb0 + +bb0: + %gep = getelementptr i64, ptr addrspace(4) %loaded.ptr, i64 3 + switch i32 0, label %end [ + i32 1, label %end + i32 4, label %end + i32 5, label %bb1 + ] + +bb1: + %0 = load double, ptr addrspace(4) %gep, align 16 + br label %end + +end: + %retval.sroa.0.0.i569.ph = phi ptr addrspace(4) [ null, %bb1 ], [ %gep, %bb0 ], [ %gep, %bb0 ], [ %gep, %bb0 ] + ret void +} + +declare void @uses_ptrs(ptr addrspace(4), ptr addrspace(4), ptr addrspace(4)) + +; We shouldn't treat PHIs differently, even other users should have the same treatment. +; All occurences of %gep are replaced with an identical value. +define spir_kernel void @test_other() { +; CHECK-LABEL: @test_other( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOADED_PTR:%.*]] = load ptr addrspace(4), ptr addrspace(2) null, align 8 +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[LOADED_PTR]] to ptr addrspace(1) +; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr addrspace(4) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr addrspace(4) [[TMP1]], i64 3 +; CHECK-NEXT: call void @uses_ptrs(ptr addrspace(4) [[GEP]], ptr addrspace(4) [[GEP]], ptr addrspace(4) [[GEP]]) +; CHECK-NEXT: ret void +; +entry: + %loaded.ptr = load ptr addrspace(4), ptr addrspace(2) null, align 8 + %gep = getelementptr i64, ptr addrspace(4) %loaded.ptr, i64 3 + call void @uses_ptrs(ptr addrspace(4) %gep, ptr addrspace(4) %gep, ptr addrspace(4) %gep) + ret void +} diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/prefetch.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/prefetch.ll new file mode 100644 index 00000000000000..b7c773e92cb2f5 --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/prefetch.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces %s | FileCheck %s +; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces %s | FileCheck %s + +define void @prefetch_shared_to_flat(ptr addrspace(3) %group.ptr) { +; CHECK-LABEL: define void @prefetch_shared_to_flat( +; CHECK-SAME: ptr addrspace(3) [[GROUP_PTR:%.*]]) { +; CHECK-NEXT: tail call void @llvm.prefetch.p3(ptr addrspace(3) [[GROUP_PTR]], i32 0, i32 0, i32 1) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr addrspace(4) + tail call void @llvm.prefetch.p4(ptr addrspace(4) %cast, i32 0, i32 0, i32 1) + ret void +} + +define void @prefetch_global_to_flat(ptr addrspace(1) %global.ptr) { +; CHECK-LABEL: define void @prefetch_global_to_flat( +; CHECK-SAME: ptr addrspace(1) [[GLOBAL_PTR:%.*]]) { +; CHECK-NEXT: tail call void @llvm.prefetch.p1(ptr addrspace(1) [[GLOBAL_PTR]], i32 0, i32 0, i32 1) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr addrspace(4) + tail call void @llvm.prefetch.p4(ptr addrspace(4) %cast, i32 0, i32 0, i32 1) + ret void +} + +define void @prefetch_constant_to_flat(ptr addrspace(2) %const.ptr) { +; CHECK-LABEL: define void @prefetch_constant_to_flat( +; CHECK-SAME: ptr addrspace(2) [[CONST_PTR:%.*]]) { +; CHECK-NEXT: tail call void @llvm.prefetch.p2(ptr addrspace(2) [[CONST_PTR]], i32 0, i32 0, i32 1) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(2) %const.ptr to ptr addrspace(4) + tail call void @llvm.prefetch.p4(ptr addrspace(4) %cast, i32 0, i32 0, i32 1) + ret void +} + +define void @prefetch_flat_to_shared(ptr addrspace(4) %flat.ptr) { +; CHECK-LABEL: define void @prefetch_flat_to_shared( +; CHECK-SAME: ptr addrspace(4) [[FLAT_PTR:%.*]]) { +; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(4) [[FLAT_PTR]] to ptr addrspace(3) +; CHECK-NEXT: tail call void @llvm.prefetch.p3(ptr addrspace(3) [[CAST]], i32 0, i32 0, i32 1) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(4) %flat.ptr to ptr addrspace(3) + tail call void @llvm.prefetch.p3(ptr addrspace(3) %cast, i32 0, i32 0, i32 1) + ret void +} + +define void @prefetch_flat_to_global(ptr addrspace(4) %flat.ptr) { +; CHECK-LABEL: define void @prefetch_flat_to_global( +; CHECK-SAME: ptr addrspace(4) [[FLAT_PTR:%.*]]) { +; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(4) [[FLAT_PTR]] to ptr addrspace(1) +; CHECK-NEXT: tail call void @llvm.prefetch.p1(ptr addrspace(1) [[CAST]], i32 0, i32 0, i32 1) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(4) %flat.ptr to ptr addrspace(1) + tail call void @llvm.prefetch.p1(ptr addrspace(1) %cast, i32 0, i32 0, i32 1) + ret void +} diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/preserving-debugloc-addrspacecast.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/preserving-debugloc-addrspacecast.ll new file mode 100644 index 00000000000000..296e3af86647e2 --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/preserving-debugloc-addrspacecast.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces -o - %s | FileCheck %s +; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces -o - %s | FileCheck %s + +; Check that InferAddressSpaces's cloneInstructionWithNewAddressSpace() propagates +; the debug location to new addrspacecast instruction which casts `%p` in the following test. + +@c0 = addrspace(2) global ptr poison + +define float @generic_ptr_from_constant() !dbg !5 { +; CHECK-LABEL: define float @generic_ptr_from_constant( +; CHECK-SAME: ) !dbg [[DBG5:![0-9]+]] { +; CHECK-NEXT: [[P:%.*]] = load ptr addrspace(4), ptr addrspace(2) @c0, align 8, !dbg [[DBG8:![0-9]+]] +; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[P]] to ptr addrspace(1), !dbg [[DBG8]] +; CHECK-NEXT: [[V:%.*]] = load float, ptr addrspace(1) [[TMP1]], align 4, !dbg [[DBG9:![0-9]+]] +; CHECK-NEXT: ret float [[V]], !dbg [[DBG10:![0-9]+]] +; + %p = load ptr addrspace(4), ptr addrspace(2) @c0, align 8, !dbg !8 + %v = load float, ptr addrspace(4) %p, align 4, !dbg !9 + ret float %v, !dbg !10 +} + +!llvm.dbg.cu = !{!0} +!llvm.debugify = !{!2, !3} +!llvm.module.flags = !{!4} + +; +!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) +!1 = !DIFile(filename: "temp.ll", directory: "/") +!2 = !{i32 3} +!3 = !{i32 0} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = distinct !DISubprogram(name: "generic_ptr_from_constant", linkageName: "generic_ptr_from_constant", scope: null, file: !1, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) +!6 = !DISubroutineType(types: !7) +!7 = !{} +!8 = !DILocation(line: 1, column: 1, scope: !5) +!9 = !DILocation(line: 2, column: 1, scope: !5) +!10 = !DILocation(line: 3, column: 1, scope: !5) +;. +; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C, file: [[META1:![0-9]+]], producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) +; CHECK: [[META1]] = !DIFile(filename: "temp.ll", directory: {{.*}}) +; CHECK: [[DBG5]] = distinct !DISubprogram(name: "generic_ptr_from_constant", linkageName: "generic_ptr_from_constant", scope: null, file: [[META1]], line: 1, type: [[META6:![0-9]+]], scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]]) +; CHECK: [[META6]] = !DISubroutineType(types: [[META7:![0-9]+]]) +; CHECK: [[META7]] = !{} +; CHECK: [[DBG8]] = !DILocation(line: 1, column: 1, scope: [[DBG5]]) +; CHECK: [[DBG9]] = !DILocation(line: 2, column: 1, scope: [[DBG5]]) +; CHECK: [[DBG10]] = !DILocation(line: 3, column: 1, scope: [[DBG5]]) +;. diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/redundant-addrspacecast.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/redundant-addrspacecast.ll new file mode 100644 index 00000000000000..3b5d4b7adc3a7d --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/redundant-addrspacecast.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces %s | FileCheck %s +; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces %s | FileCheck %s + +%0 = type { i8, i8, i8 } + +; Make sure there is only one addrspacecast. The original cast should +; not be cloned to satisfy the second user. +define void @bar(ptr addrspace(1) %orig.ptr) { +; CHECK-LABEL: @bar( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[ORIG_CAST:%.*]] = addrspacecast ptr addrspace(1) [[ORIG_PTR:%.*]] to ptr addrspace(4) +; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds [[TMP0:%.*]], ptr addrspace(4) [[ORIG_CAST]], i64 0, i32 1 +; CHECK-NEXT: call void @foo(ptr addrspace(4) [[GEP0]]) +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [[TMP0]], ptr addrspace(4) [[ORIG_CAST]], i64 0, i32 2 +; CHECK-NEXT: call void @foo(ptr addrspace(4) [[GEP1]]) +; CHECK-NEXT: ret void +; +bb: + %orig.cast = addrspacecast ptr addrspace(1) %orig.ptr to ptr addrspace(4) + %gep0 = getelementptr inbounds %0, ptr addrspace(4) %orig.cast, i64 0, i32 1 + call void @foo(ptr addrspace(4) %gep0) + %gep1 = getelementptr inbounds %0, ptr addrspace(4) %orig.cast, i64 0, i32 2 + call void @foo(ptr addrspace(4) %gep1) + ret void +} + +declare void @foo(ptr addrspace(4)) diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/self-phi.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/self-phi.ll new file mode 100644 index 00000000000000..ec5c31f32d513b --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/self-phi.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=spirv32-- -S -passes=infer-address-spaces %s | FileCheck %s +; RUN: opt -mtriple=spirv64-- -S -passes=infer-address-spaces %s | FileCheck %s + +define spir_kernel void @phi_self(ptr addrspace(1) %arg) { +; CHECK-LABEL: @phi_self( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[I:%.*]] = phi ptr addrspace(1) [ [[I]], [[LOOP]] ], [ [[ARG:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[I1:%.*]] = load i8, ptr addrspace(1) [[I]], align 1 +; CHECK-NEXT: [[I2:%.*]] = icmp eq i8 [[I1]], 0 +; CHECK-NEXT: br i1 [[I2]], label [[LOOP]], label [[RET:%.*]] +; CHECK: ret: +; CHECK-NEXT: ret void +; +entry: + %cast = addrspacecast ptr addrspace(1) %arg to ptr addrspace(4) + br label %loop + +loop: + %i = phi ptr addrspace(4) [%i, %loop], [%cast, %entry] + %i1 = load i8, ptr addrspace(4) %i, align 1 + %i2 = icmp eq i8 %i1, 0 + br i1 %i2, label %loop, label %ret + +ret: + ret void +} diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/volatile.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/volatile.ll new file mode 100644 index 00000000000000..b835a008a91e0e --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/volatile.ll @@ -0,0 +1,187 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces %s | FileCheck %s +; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces %s | FileCheck %s + +; Check that volatile users of addrspacecast are not replaced. + +define spir_kernel void @volatile_load_flat_from_global(ptr addrspace(1) nocapture %input, ptr addrspace(1) nocapture %output) #0 { +; CHECK-LABEL: define spir_kernel void @volatile_load_flat_from_global( +; CHECK-SAME: ptr addrspace(1) nocapture [[INPUT:%.*]], ptr addrspace(1) nocapture [[OUTPUT:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[INPUT]] to ptr addrspace(4) +; CHECK-NEXT: [[VAL:%.*]] = load volatile i32, ptr addrspace(4) [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[OUTPUT]], align 4 +; CHECK-NEXT: ret void +; + %tmp0 = addrspacecast ptr addrspace(1) %input to ptr addrspace(4) + %tmp1 = addrspacecast ptr addrspace(1) %output to ptr addrspace(4) + %val = load volatile i32, ptr addrspace(4) %tmp0, align 4 + store i32 %val, ptr addrspace(4) %tmp1, align 4 + ret void +} + +define spir_kernel void @volatile_load_flat_from_constant(ptr addrspace(2) nocapture %input, ptr addrspace(1) nocapture %output) #0 { +; CHECK-LABEL: define spir_kernel void @volatile_load_flat_from_constant( +; CHECK-SAME: ptr addrspace(2) nocapture [[INPUT:%.*]], ptr addrspace(1) nocapture [[OUTPUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(2) [[INPUT]] to ptr addrspace(4) +; CHECK-NEXT: [[VAL:%.*]] = load volatile i32, ptr addrspace(4) [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[OUTPUT]], align 4 +; CHECK-NEXT: ret void +; + %tmp0 = addrspacecast ptr addrspace(2) %input to ptr addrspace(4) + %tmp1 = addrspacecast ptr addrspace(1) %output to ptr addrspace(4) + %val = load volatile i32, ptr addrspace(4) %tmp0, align 4 + store i32 %val, ptr addrspace(4) %tmp1, align 4 + ret void +} + +define spir_kernel void @volatile_load_flat_from_group(ptr addrspace(3) nocapture %input, ptr addrspace(3) nocapture %output) #0 { +; CHECK-LABEL: define spir_kernel void @volatile_load_flat_from_group( +; CHECK-SAME: ptr addrspace(3) nocapture [[INPUT:%.*]], ptr addrspace(3) nocapture [[OUTPUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(3) [[INPUT]] to ptr addrspace(4) +; CHECK-NEXT: [[VAL:%.*]] = load volatile i32, ptr addrspace(4) [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(3) [[OUTPUT]], align 4 +; CHECK-NEXT: ret void +; + %tmp0 = addrspacecast ptr addrspace(3) %input to ptr addrspace(4) + %tmp1 = addrspacecast ptr addrspace(3) %output to ptr addrspace(4) + %val = load volatile i32, ptr addrspace(4) %tmp0, align 4 + store i32 %val, ptr addrspace(4) %tmp1, align 4 + ret void +} + +define spir_kernel void @volatile_load_flat_from_private(ptr nocapture %input, ptr nocapture %output) #0 { +; CHECK-LABEL: define spir_kernel void @volatile_load_flat_from_private( +; CHECK-SAME: ptr nocapture [[INPUT:%.*]], ptr nocapture [[OUTPUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(4) +; CHECK-NEXT: [[VAL:%.*]] = load volatile i32, ptr addrspace(4) [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[VAL]], ptr [[OUTPUT]], align 4 +; CHECK-NEXT: ret void +; + %tmp0 = addrspacecast ptr %input to ptr addrspace(4) + %tmp1 = addrspacecast ptr %output to ptr addrspace(4) + %val = load volatile i32, ptr addrspace(4) %tmp0, align 4 + store i32 %val, ptr addrspace(4) %tmp1, align 4 + ret void +} + +define spir_kernel void @volatile_store_flat_to_global(ptr addrspace(1) nocapture %input, ptr addrspace(1) nocapture %output) #0 { +; CHECK-LABEL: define spir_kernel void @volatile_store_flat_to_global( +; CHECK-SAME: ptr addrspace(1) nocapture [[INPUT:%.*]], ptr addrspace(1) nocapture [[OUTPUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[OUTPUT]] to ptr addrspace(4) +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[INPUT]], align 4 +; CHECK-NEXT: store volatile i32 [[VAL]], ptr addrspace(4) [[TMP1]], align 4 +; CHECK-NEXT: ret void +; + %tmp0 = addrspacecast ptr addrspace(1) %input to ptr addrspace(4) + %tmp1 = addrspacecast ptr addrspace(1) %output to ptr addrspace(4) + %val = load i32, ptr addrspace(4) %tmp0, align 4 + store volatile i32 %val, ptr addrspace(4) %tmp1, align 4 + ret void +} + +define spir_kernel void @volatile_store_flat_to_group(ptr addrspace(3) nocapture %input, ptr addrspace(3) nocapture %output) #0 { +; CHECK-LABEL: define spir_kernel void @volatile_store_flat_to_group( +; CHECK-SAME: ptr addrspace(3) nocapture [[INPUT:%.*]], ptr addrspace(3) nocapture [[OUTPUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(3) [[OUTPUT]] to ptr addrspace(4) +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(3) [[INPUT]], align 4 +; CHECK-NEXT: store volatile i32 [[VAL]], ptr addrspace(4) [[TMP1]], align 4 +; CHECK-NEXT: ret void +; + %tmp0 = addrspacecast ptr addrspace(3) %input to ptr addrspace(4) + %tmp1 = addrspacecast ptr addrspace(3) %output to ptr addrspace(4) + %val = load i32, ptr addrspace(4) %tmp0, align 4 + store volatile i32 %val, ptr addrspace(4) %tmp1, align 4 + ret void +} + +define spir_kernel void @volatile_store_flat_to_private(ptr nocapture %input, ptr nocapture %output) #0 { +; CHECK-LABEL: define spir_kernel void @volatile_store_flat_to_private( +; CHECK-SAME: ptr nocapture [[INPUT:%.*]], ptr nocapture [[OUTPUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[OUTPUT]] to ptr addrspace(4) +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[INPUT]], align 4 +; CHECK-NEXT: store volatile i32 [[VAL]], ptr addrspace(4) [[TMP1]], align 4 +; CHECK-NEXT: ret void +; + %tmp0 = addrspacecast ptr %input to ptr addrspace(4) + %tmp1 = addrspacecast ptr %output to ptr addrspace(4) + %val = load i32, ptr addrspace(4) %tmp0, align 4 + store volatile i32 %val, ptr addrspace(4) %tmp1, align 4 + ret void +} + +define i32 @volatile_atomicrmw_add_group_to_flat(ptr addrspace(3) %group.ptr, i32 %y) #0 { +; CHECK-LABEL: define i32 @volatile_atomicrmw_add_group_to_flat( +; CHECK-SAME: ptr addrspace(3) [[GROUP_PTR:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(3) [[GROUP_PTR]] to ptr addrspace(4) +; CHECK-NEXT: [[RET:%.*]] = atomicrmw volatile add ptr addrspace(4) [[CAST]], i32 [[Y]] seq_cst, align 4 +; CHECK-NEXT: ret i32 [[RET]] +; + %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr addrspace(4) + %ret = atomicrmw volatile add ptr addrspace(4) %cast, i32 %y seq_cst + ret i32 %ret +} + +define i32 @volatile_atomicrmw_add_global_to_flat(ptr addrspace(1) %global.ptr, i32 %y) #0 { +; CHECK-LABEL: define i32 @volatile_atomicrmw_add_global_to_flat( +; CHECK-SAME: ptr addrspace(1) [[GLOBAL_PTR:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(1) [[GLOBAL_PTR]] to ptr addrspace(4) +; CHECK-NEXT: [[RET:%.*]] = atomicrmw volatile add ptr addrspace(4) [[CAST]], i32 [[Y]] seq_cst, align 4 +; CHECK-NEXT: ret i32 [[RET]] +; + %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr addrspace(4) + %ret = atomicrmw volatile add ptr addrspace(4) %cast, i32 %y seq_cst + ret i32 %ret +} + +define { i32, i1 } @volatile_cmpxchg_global_to_flat(ptr addrspace(1) %global.ptr, i32 %cmp, i32 %val) #0 { +; CHECK-LABEL: define { i32, i1 } @volatile_cmpxchg_global_to_flat( +; CHECK-SAME: ptr addrspace(1) [[GLOBAL_PTR:%.*]], i32 [[CMP:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(1) [[GLOBAL_PTR]] to ptr addrspace(4) +; CHECK-NEXT: [[RET:%.*]] = cmpxchg volatile ptr addrspace(4) [[CAST]], i32 [[CMP]], i32 [[VAL]] seq_cst monotonic, align 4 +; CHECK-NEXT: ret { i32, i1 } [[RET]] +; + %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr addrspace(4) + %ret = cmpxchg volatile ptr addrspace(4) %cast, i32 %cmp, i32 %val seq_cst monotonic + ret { i32, i1 } %ret +} + +define { i32, i1 } @volatile_cmpxchg_group_to_flat(ptr addrspace(3) %group.ptr, i32 %cmp, i32 %val) #0 { +; CHECK-LABEL: define { i32, i1 } @volatile_cmpxchg_group_to_flat( +; CHECK-SAME: ptr addrspace(3) [[GROUP_PTR:%.*]], i32 [[CMP:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(3) [[GROUP_PTR]] to ptr addrspace(4) +; CHECK-NEXT: [[RET:%.*]] = cmpxchg volatile ptr addrspace(4) [[CAST]], i32 [[CMP]], i32 [[VAL]] seq_cst monotonic, align 4 +; CHECK-NEXT: ret { i32, i1 } [[RET]] +; + %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr addrspace(4) + %ret = cmpxchg volatile ptr addrspace(4) %cast, i32 %cmp, i32 %val seq_cst monotonic + ret { i32, i1 } %ret +} + +define spir_kernel void @volatile_memset_group_to_flat(ptr addrspace(3) %group.ptr, i32 %y) #0 { +; CHECK-LABEL: define spir_kernel void @volatile_memset_group_to_flat( +; CHECK-SAME: ptr addrspace(3) [[GROUP_PTR:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(3) [[GROUP_PTR]] to ptr addrspace(4) +; CHECK-NEXT: call void @llvm.memset.p4.i64(ptr addrspace(4) align 4 [[CAST]], i8 4, i64 32, i1 true) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr addrspace(4) + call void @llvm.memset.p0.i64(ptr addrspace(4) align 4 %cast, i8 4, i64 32, i1 true) + ret void +} + +define spir_kernel void @volatile_memset_global_to_flat(ptr addrspace(1) %global.ptr, i32 %y) #0 { +; CHECK-LABEL: define spir_kernel void @volatile_memset_global_to_flat( +; CHECK-SAME: ptr addrspace(1) [[GLOBAL_PTR:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(1) [[GLOBAL_PTR]] to ptr addrspace(4) +; CHECK-NEXT: call void @llvm.memset.p4.i64(ptr addrspace(4) align 4 [[CAST]], i8 4, i64 32, i1 true) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr addrspace(4) + call void @llvm.memset.p4.i64(ptr addrspace(4) align 4 %cast, i8 4, i64 32, i1 true) + ret void +} + +declare void @llvm.memset.p4.i64(ptr addrspace(4) nocapture writeonly, i8, i64, i1) #1 + +attributes #0 = { nounwind } +attributes #1 = { argmemonly nounwind } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits